/*
 * Decompiled with CFR 0.152.
 */
package org.apache.mahout.vectorizer.collocations.llr;

import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.mahout.math.stats.LogLikelihood;
import org.apache.mahout.vectorizer.collocations.llr.Gram;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class LLRReducer
extends Reducer<Gram, Gram, Text, DoubleWritable> {
    private static final Logger log = LoggerFactory.getLogger(LLRReducer.class);
    public static final String NGRAM_TOTAL = "ngramTotal";
    public static final String MIN_LLR = "minLLR";
    public static final float DEFAULT_MIN_LLR = 1.0f;
    private long ngramTotal;
    private float minLLRValue;
    private boolean emitUnigrams;
    private final LLCallback ll;

    protected void reduce(Gram ngram, Iterable<Gram> values, Reducer.Context context) throws IOException, InterruptedException {
        int[] gramFreq = new int[]{-1, -1};
        if (ngram.getType() == Gram.Type.UNIGRAM && this.emitUnigrams) {
            DoubleWritable dd = new DoubleWritable((double)ngram.getFrequency());
            Text t = new Text(ngram.getString());
            context.write((Object)t, (Object)dd);
            return;
        }
        String[] gram = new String[2];
        for (Gram value : values) {
            int pos;
            int n = pos = value.getType() == Gram.Type.HEAD ? 0 : 1;
            if (gramFreq[pos] != -1) {
                log.warn("Extra {} for {}, skipping", (Object)value.getType(), (Object)ngram);
                if (value.getType() == Gram.Type.HEAD) {
                    context.getCounter((Enum)Skipped.EXTRA_HEAD).increment(1L);
                } else {
                    context.getCounter((Enum)Skipped.EXTRA_TAIL).increment(1L);
                }
                return;
            }
            gram[pos] = value.getString();
            gramFreq[pos] = value.getFrequency();
        }
        if (gramFreq[0] == -1) {
            log.warn("Missing head for {}, skipping.", (Object)ngram);
            context.getCounter((Enum)Skipped.MISSING_HEAD).increment(1L);
            return;
        }
        if (gramFreq[1] == -1) {
            log.warn("Missing tail for {}, skipping", (Object)ngram);
            context.getCounter((Enum)Skipped.MISSING_TAIL).increment(1L);
            return;
        }
        int k11 = ngram.getFrequency();
        int k12 = gramFreq[0] - ngram.getFrequency();
        int k21 = gramFreq[1] - ngram.getFrequency();
        int k22 = (int)(this.ngramTotal - (long)(gramFreq[0] + gramFreq[1] - ngram.getFrequency()));
        try {
            double llr = this.ll.logLikelihoodRatio(k11, k12, k21, k22);
            if (llr < (double)this.minLLRValue) {
                context.getCounter((Enum)Skipped.LESS_THAN_MIN_LLR).increment(1L);
                return;
            }
            DoubleWritable dd = new DoubleWritable(llr);
            Text t = new Text(ngram.getString());
            context.write((Object)t, (Object)dd);
        }
        catch (IllegalArgumentException ex) {
            context.getCounter((Enum)Skipped.LLR_CALCULATION_ERROR).increment(1L);
            log.error("Problem calculating LLR ratio: " + ex.getMessage());
            log.error("NGram: " + (Object)((Object)ngram));
            log.error("HEAD: " + gram[0] + ':' + gramFreq[0]);
            log.error("TAIL: " + gram[1] + ':' + gramFreq[1]);
            log.error("k11: " + k11 + " k12: " + k12 + " k21: " + k21 + " k22: " + k22);
        }
    }

    protected void setup(Reducer.Context context) throws IOException, InterruptedException {
        super.setup(context);
        Configuration conf = context.getConfiguration();
        this.ngramTotal = conf.getLong(NGRAM_TOTAL, -1L);
        this.minLLRValue = conf.getFloat(MIN_LLR, 1.0f);
        this.emitUnigrams = conf.getBoolean("emit-unigrams", false);
        if (log.isInfoEnabled()) {
            log.info("NGram Total is {}", (Object)this.ngramTotal);
            log.info("Min LLR value is {}", (Object)Float.valueOf(this.minLLRValue));
            log.info("Emit Unitgrams is {}", (Object)this.emitUnigrams);
        }
        if (this.ngramTotal == -1L) {
            throw new IllegalStateException("No NGRAM_TOTAL available in job config");
        }
    }

    public LLRReducer() {
        this.ll = new ConcreteLLCallback();
    }

    LLRReducer(LLCallback ll) {
        this.ll = ll;
    }

    public static final class ConcreteLLCallback
    implements LLCallback {
        @Override
        public double logLikelihoodRatio(int k11, int k12, int k21, int k22) {
            return LogLikelihood.logLikelihoodRatio((int)k11, (int)k12, (int)k21, (int)k22);
        }
    }

    public static interface LLCallback {
        public double logLikelihoodRatio(int var1, int var2, int var3, int var4);
    }

    public static enum Skipped {
        EXTRA_HEAD,
        EXTRA_TAIL,
        MISSING_HEAD,
        MISSING_TAIL,
        LESS_THAN_MIN_LLR,
        LLR_CALCULATION_ERROR;

    }
}

