/*
 * Decompiled with CFR 0.152.
 */
package org.apache.mahout.vectorizer.tfidf;

import com.google.common.base.Preconditions;
import java.io.IOException;
import java.net.URI;
import java.util.Iterator;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.filecache.DistributedCache;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.mahout.common.Pair;
import org.apache.mahout.common.iterator.sequencefile.SequenceFileIterable;
import org.apache.mahout.math.NamedVector;
import org.apache.mahout.math.RandomAccessSparseVector;
import org.apache.mahout.math.SequentialAccessSparseVector;
import org.apache.mahout.math.Vector;
import org.apache.mahout.math.VectorWritable;
import org.apache.mahout.math.map.OpenIntLongHashMap;
import org.apache.mahout.vectorizer.TFIDF;

public class TFIDFPartialVectorReducer
extends Reducer<WritableComparable<?>, VectorWritable, WritableComparable<?>, VectorWritable> {
    private final OpenIntLongHashMap dictionary = new OpenIntLongHashMap();
    private final TFIDF tfidf = new TFIDF();
    private int minDf = 1;
    private int maxDfPercent = 99;
    private long vectorCount = 1L;
    private long featureCount;
    private boolean sequentialAccess;
    private boolean namedVector;

    protected void reduce(WritableComparable<?> key, Iterable<VectorWritable> values, Reducer.Context context) throws IOException, InterruptedException {
        Iterator<VectorWritable> it = values.iterator();
        if (!it.hasNext()) {
            return;
        }
        Vector value = it.next().get();
        Iterator it1 = value.iterateNonZero();
        RandomAccessSparseVector vector = new RandomAccessSparseVector((int)this.featureCount, value.getNumNondefaultElements());
        while (it1.hasNext()) {
            long df;
            Vector.Element e = (Vector.Element)it1.next();
            if (!this.dictionary.containsKey(e.index()) || (double)(df = this.dictionary.get(e.index())) * 100.0 / (double)this.vectorCount > (double)this.maxDfPercent) continue;
            if (df < (long)this.minDf) {
                df = this.minDf;
            }
            vector.setQuick(e.index(), this.tfidf.calculate((int)e.get(), (int)df, (int)this.featureCount, (int)this.vectorCount));
        }
        if (this.sequentialAccess) {
            vector = new SequentialAccessSparseVector((Vector)vector);
        }
        if (this.namedVector) {
            vector = new NamedVector((Vector)vector, key.toString());
        }
        VectorWritable vectorWritable = new VectorWritable((Vector)vector);
        context.write(key, (Object)vectorWritable);
    }

    protected void setup(Reducer.Context context) throws IOException, InterruptedException {
        super.setup(context);
        Configuration conf = context.getConfiguration();
        URI[] localFiles = DistributedCache.getCacheFiles((Configuration)conf);
        Preconditions.checkArgument((localFiles != null && localFiles.length >= 1 ? 1 : 0) != 0, (Object)"missing paths from the DistributedCache");
        this.vectorCount = conf.getLong("vector.count", 1L);
        this.featureCount = conf.getLong("feature.count", 1L);
        this.minDf = conf.getInt("min.df", 1);
        this.maxDfPercent = conf.getInt("max.df.percentage", 99);
        this.sequentialAccess = conf.getBoolean("vector.sequentialAccess", false);
        this.namedVector = conf.getBoolean("vector.named", false);
        Path dictionaryFile = new Path(localFiles[0].getPath());
        for (Pair record : new SequenceFileIterable(dictionaryFile, true, conf)) {
            this.dictionary.put(((IntWritable)record.getFirst()).get(), ((LongWritable)record.getSecond()).get());
        }
    }
}

