package edu.umd.cloud9.collection.clue;

import java.io.IOException;
import java.net.URI;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.filecache.DistributedCache;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.SequenceFileInputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.log4j.Logger;

/* loaded from: input_file:edu/umd/cloud9/collection/clue/DemoCountClueWarcRecords.class */
public class DemoCountClueWarcRecords extends Configured implements Tool {
    private static final Logger sLogger = Logger.getLogger(DemoCountClueWarcRecords.class);

    /* loaded from: input_file:edu/umd/cloud9/collection/clue/DemoCountClueWarcRecords$MyMapper.class */
    private static class MyMapper extends MapReduceBase implements Mapper<Writable, ClueWarcRecord, Writable, Text> {
        ClueWarcDocnoMapping mDocMapping = new ClueWarcDocnoMapping();

        private MyMapper() {
        }

        public void configure(JobConf jobConf) {
            try {
                this.mDocMapping.loadMapping(DistributedCache.getLocalCacheFiles(jobConf)[0], FileSystem.getLocal(jobConf));
            } catch (Exception e) {
                e.printStackTrace();
                throw new RuntimeException("Error initializing DocnoMapping!");
            }
        }

        public void map(Writable writable, ClueWarcRecord clueWarcRecord, OutputCollector<Writable, Text> outputCollector, Reporter reporter) throws IOException {
            reporter.incrCounter(Records.TOTAL, 1L);
            String headerMetadataItem = clueWarcRecord.getHeaderMetadataItem("WARC-TREC-ID");
            int docno = this.mDocMapping.getDocno(headerMetadataItem);
            if (headerMetadataItem == null || docno == -1) {
                return;
            }
            reporter.incrCounter(Records.PAGES, 1L);
        }

        public /* bridge */ /* synthetic */ void map(Object obj, Object obj2, OutputCollector outputCollector, Reporter reporter) throws IOException {
            map((Writable) obj, (ClueWarcRecord) obj2, (OutputCollector<Writable, Text>) outputCollector, reporter);
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:edu/umd/cloud9/collection/clue/DemoCountClueWarcRecords$Records.class */
    public enum Records {
        TOTAL,
        PAGES;

        /* renamed from: values, reason: to resolve conflict with enum method */
        public static Records[] valuesCustom() {
            Records[] valuesCustom = values();
            int length = valuesCustom.length;
            Records[] recordsArr = new Records[length];
            System.arraycopy(valuesCustom, 0, recordsArr, 0, length);
            return recordsArr;
        }
    }

    private static int printUsage() {
        System.out.println("usage: [original|repacked] [base-path] [segment-num] [mapping-file]");
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    public int run(String[] strArr) throws Exception {
        if (strArr.length != 4) {
            printUsage();
            return -1;
        }
        boolean z = true;
        if (strArr[0].equals("original")) {
            z = false;
        } else if (strArr[0].equals("repacked")) {
            z = true;
        } else {
            System.err.println("Expecting either 'original' or 'repacked' as first argument.");
            System.err.println("  'original' = original source WARC files");
            System.err.println("  'repacked' = repacked SequenceFiles");
            System.exit(-1);
        }
        String str = strArr[1];
        int parseInt = Integer.parseInt(strArr[2]);
        String str2 = strArr[3];
        sLogger.info("Tool name: DemoCountClueWarcRecords");
        sLogger.info(" - version: " + strArr[0]);
        sLogger.info(" - base path: " + str);
        sLogger.info(" - segment number: " + parseInt);
        sLogger.info(" - mapping file: " + str2);
        String str3 = "/tmp/" + System.currentTimeMillis();
        JobConf jobConf = new JobConf(DemoCountClueWarcRecords.class);
        jobConf.setJobName("DemoCountClueWarcRecords:segment" + parseInt);
        jobConf.setNumMapTasks(10);
        jobConf.setNumReduceTasks(0);
        if (z) {
            FileInputFormat.addInputPath(jobConf, new Path(str));
        } else {
            ClueCollectionPathConstants.addEnglishCollectionPart(jobConf, str, parseInt);
        }
        DistributedCache.addCacheFile(new URI(str2), jobConf);
        FileOutputFormat.setOutputPath(jobConf, new Path(str3));
        FileOutputFormat.setCompressOutput(jobConf, false);
        if (z) {
            jobConf.setInputFormat(SequenceFileInputFormat.class);
        } else {
            jobConf.setInputFormat(ClueWarcInputFormat.class);
        }
        jobConf.setOutputKeyClass(Text.class);
        jobConf.setOutputValueClass(IntWritable.class);
        jobConf.setMapperClass(MyMapper.class);
        FileSystem.get(jobConf).delete(new Path(str3), true);
        JobClient.runJob(jobConf);
        FileSystem.get(jobConf).delete(new Path(str3), true);
        return 0;
    }

    public static void main(String[] strArr) throws Exception {
        System.exit(ToolRunner.run(new Configuration(), new DemoCountClueWarcRecords(), strArr));
    }
}
