/*
 * Decompiled with CFR 0.152.
 */
package org.apache.mahout.vectorizer.term;

import com.google.common.base.Preconditions;
import java.io.IOException;
import java.net.URI;
import java.util.Iterator;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.filecache.DistributedCache;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.shingle.ShingleFilter;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.mahout.common.Pair;
import org.apache.mahout.common.StringTuple;
import org.apache.mahout.common.iterator.sequencefile.SequenceFileIterable;
import org.apache.mahout.common.lucene.IteratorTokenStream;
import org.apache.mahout.math.NamedVector;
import org.apache.mahout.math.RandomAccessSparseVector;
import org.apache.mahout.math.SequentialAccessSparseVector;
import org.apache.mahout.math.Vector;
import org.apache.mahout.math.VectorWritable;
import org.apache.mahout.math.map.OpenObjectIntHashMap;

public class TFPartialVectorReducer
extends Reducer<Text, StringTuple, Text, VectorWritable> {
    private final OpenObjectIntHashMap<String> dictionary = new OpenObjectIntHashMap();
    private int dimension;
    private boolean sequentialAccess;
    private boolean namedVector;
    private int maxNGramSize = 1;

    protected void reduce(Text key, Iterable<StringTuple> values, Reducer.Context context) throws IOException, InterruptedException {
        Iterator<StringTuple> it = values.iterator();
        if (!it.hasNext()) {
            return;
        }
        StringTuple value = it.next();
        RandomAccessSparseVector vector = new RandomAccessSparseVector(this.dimension, value.length());
        if (this.maxNGramSize >= 2) {
            ShingleFilter sf = new ShingleFilter((TokenStream)new IteratorTokenStream(value.getEntries().iterator()), this.maxNGramSize);
            do {
                String term;
                if ((term = ((CharTermAttribute)sf.getAttribute(CharTermAttribute.class)).toString()).length() <= 0 || !this.dictionary.containsKey((Object)term)) continue;
                int termId = this.dictionary.get((Object)term);
                vector.setQuick(termId, vector.getQuick(termId) + 1.0);
            } while (sf.incrementToken());
            sf.end();
            sf.close();
        } else {
            for (String term : value.getEntries()) {
                if (term.length() <= 0 || !this.dictionary.containsKey((Object)term)) continue;
                int termId = this.dictionary.get((Object)term);
                vector.setQuick(termId, vector.getQuick(termId) + 1.0);
            }
        }
        if (this.sequentialAccess) {
            vector = new SequentialAccessSparseVector((Vector)vector);
        }
        if (this.namedVector) {
            vector = new NamedVector((Vector)vector, key.toString());
        }
        if (vector.getNumNondefaultElements() > 0) {
            VectorWritable vectorWritable = new VectorWritable((Vector)vector);
            context.write((Object)key, (Object)vectorWritable);
        } else {
            context.getCounter("TFParticalVectorReducer", "emptyVectorCount").increment(1L);
        }
    }

    protected void setup(Reducer.Context context) throws IOException, InterruptedException {
        super.setup(context);
        Configuration conf = context.getConfiguration();
        URI[] localFiles = DistributedCache.getCacheFiles((Configuration)conf);
        Preconditions.checkArgument((localFiles != null && localFiles.length >= 1 ? 1 : 0) != 0, (Object)"missing paths from the DistributedCache");
        this.dimension = conf.getInt("vector.dimension", Integer.MAX_VALUE);
        this.sequentialAccess = conf.getBoolean("vector.sequentialAccess", false);
        this.namedVector = conf.getBoolean("vector.named", false);
        this.maxNGramSize = conf.getInt("max.ngrams", this.maxNGramSize);
        Path dictionaryFile = new Path(localFiles[0].getPath());
        for (Pair record : new SequenceFileIterable(dictionaryFile, true, conf)) {
            this.dictionary.put((Object)((Writable)record.getFirst()).toString(), ((IntWritable)record.getSecond()).get());
        }
    }
}

