package edu.umd.cloud9.collection.trec;

import edu.umd.cloud9.collection.DocnoMapping;
import java.io.IOException;
import java.net.URI;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.filecache.DistributedCache;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.LineReader;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.log4j.Logger;

/* loaded from: input_file:edu/umd/cloud9/collection/trec/BuildTrecForwardIndex2.class */
public class BuildTrecForwardIndex2 extends Configured implements Tool {
    private static final Logger LOG = Logger.getLogger(BuildTrecForwardIndex2.class);
    private static final String DOCNO_MAPPING_FILE_PROPERTY = "DocnoMappingFile";

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:edu/umd/cloud9/collection/trec/BuildTrecForwardIndex2$Count.class */
    public enum Count {
        DOCS;

        /* renamed from: values, reason: to resolve conflict with enum method */
        public static Count[] valuesCustom() {
            Count[] valuesCustom = values();
            int length = valuesCustom.length;
            Count[] countArr = new Count[length];
            System.arraycopy(valuesCustom, 0, countArr, 0, length);
            return countArr;
        }
    }

    /* loaded from: input_file:edu/umd/cloud9/collection/trec/BuildTrecForwardIndex2$MyMapper.class */
    private static class MyMapper extends Mapper<LongWritable, TrecDocument, IntWritable, Text> {
        private static final IntWritable docno = new IntWritable(1);
        private static final Text text = new Text();
        private DocnoMapping docMapping;

        private MyMapper() {
        }

        public void setup(Mapper<LongWritable, TrecDocument, IntWritable, Text>.Context context) {
            try {
                Configuration configuration = context.getConfiguration();
                this.docMapping = new TrecDocnoMapping();
                if (!configuration.get("mapred.job.tracker").equals("local")) {
                    this.docMapping.loadMapping(DistributedCache.getLocalCacheFiles(configuration)[0], FileSystem.getLocal(configuration));
                } else {
                    this.docMapping.loadMapping(new Path(configuration.get(BuildTrecForwardIndex2.DOCNO_MAPPING_FILE_PROPERTY)), FileSystem.get(configuration));
                }
            } catch (Exception e) {
                e.printStackTrace();
                throw new RuntimeException("Error initializing DocnoMapping!");
            }
        }

        public void map(LongWritable longWritable, TrecDocument trecDocument, Mapper<LongWritable, TrecDocument, IntWritable, Text>.Context context) throws IOException, InterruptedException {
            context.getCounter(Count.DOCS).increment(1L);
            int length = trecDocument.getContent().getBytes().length;
            docno.set(this.docMapping.getDocno(trecDocument.getDocid()));
            text.set(longWritable + "\t" + length);
            context.write(docno, text);
        }

        public /* bridge */ /* synthetic */ void map(Object obj, Object obj2, Mapper.Context context) throws IOException, InterruptedException {
            map((LongWritable) obj, (TrecDocument) obj2, (Mapper<LongWritable, TrecDocument, IntWritable, Text>.Context) context);
        }
    }

    private static int printUsage() {
        System.out.println("usage: [collection-path] [output-path] [index-file] [docno-mapping-file]");
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    public int run(String[] strArr) throws Exception {
        if (strArr.length != 4) {
            printUsage();
            return -1;
        }
        Job job = new Job(getConf(), BuildTrecForwardIndex2.class.getCanonicalName());
        job.setJarByClass(BuildTrecForwardIndex2.class);
        FileSystem fileSystem = FileSystem.get(getConf());
        String str = strArr[0];
        String str2 = strArr[1];
        String str3 = strArr[2];
        String str4 = strArr[3];
        LOG.info("Tool name: " + BuildTrecForwardIndex2.class.getSimpleName());
        LOG.info(" - collection path: " + str);
        LOG.info(" - output path: " + str2);
        LOG.info(" - index file: " + str3);
        LOG.info(" - mapping file: " + str4);
        job.getConfiguration().set("mapred.child.java.opts", "-Xmx1024m");
        job.setNumReduceTasks(1);
        if (job.getConfiguration().get("mapred.job.tracker").equals("local")) {
            job.getConfiguration().set(DOCNO_MAPPING_FILE_PROPERTY, str4);
        } else {
            DistributedCache.addCacheFile(new URI(str4), job.getConfiguration());
        }
        FileInputFormat.setInputPaths(job, new Path[]{new Path(str)});
        FileOutputFormat.setOutputPath(job, new Path(str2));
        FileOutputFormat.setCompressOutput(job, false);
        job.setInputFormatClass(TrecDocumentInputFormat2.class);
        job.setOutputKeyClass(IntWritable.class);
        job.setOutputValueClass(Text.class);
        job.setMapperClass(MyMapper.class);
        FileSystem.get(getConf()).delete(new Path(str2), true);
        job.waitForCompletion(true);
        int value = (int) job.getCounters().findCounter(Count.DOCS).getValue();
        String str5 = String.valueOf(str2) + "/part-r-00000";
        LOG.info("Writing " + value + " doc offseta to " + str3);
        LineReader lineReader = new LineReader(fileSystem.open(new Path(str5)));
        FSDataOutputStream create = fileSystem.create(new Path(str3), true);
        create.writeUTF(TrecForwardIndex.class.getCanonicalName());
        create.writeUTF(str);
        create.writeInt(value);
        int i = 0;
        Text text = new Text();
        while (lineReader.readLine(text) > 0) {
            String[] split = text.toString().split("\\t");
            long parseLong = Long.parseLong(split[1]);
            int parseInt = Integer.parseInt(split[2]);
            create.writeLong(parseLong);
            create.writeInt(parseInt);
            i++;
            if (i % 100000 == 0) {
                LOG.info(String.valueOf(i) + " docs");
            }
        }
        lineReader.close();
        create.close();
        LOG.info(String.valueOf(i) + " docs total. Done!");
        if (value != i) {
            throw new RuntimeException("Unexpected number of documents in building forward index!");
        }
        return 0;
    }

    public static void main(String[] strArr) throws Exception {
        System.exit(ToolRunner.run(new Configuration(), new BuildTrecForwardIndex2(), strArr));
    }
}
