/*
 * Decompiled with CFR 0.152.
 */
package org.apache.mahout.classifier.bayes.mapreduce.common;

import com.google.common.collect.Iterators;
import java.io.IOException;
import java.util.Iterator;
import java.util.regex.Pattern;
import org.apache.commons.lang.mutable.MutableDouble;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.shingle.ShingleFilter;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.mahout.classifier.bayes.common.BayesParameters;
import org.apache.mahout.common.StringTuple;
import org.apache.mahout.common.lucene.IteratorTokenStream;
import org.apache.mahout.math.function.ObjectIntProcedure;
import org.apache.mahout.math.map.OpenObjectIntHashMap;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class BayesFeatureMapper
extends MapReduceBase
implements Mapper<Text, Text, StringTuple, DoubleWritable> {
    private static final Logger log = LoggerFactory.getLogger(BayesFeatureMapper.class);
    private static final DoubleWritable ONE = new DoubleWritable(1.0);
    private static final Pattern SPACE_PATTERN = Pattern.compile("[ ]+");
    private int gramSize = 1;

    public void map(Text key, Text value, final OutputCollector<StringTuple, DoubleWritable> output, Reporter reporter) throws IOException {
        final String label = key.toString();
        Object[] tokens = SPACE_PATTERN.split(value.toString());
        OpenObjectIntHashMap wordList = new OpenObjectIntHashMap(tokens.length * this.gramSize);
        if (this.gramSize > 1) {
            ShingleFilter sf = new ShingleFilter((TokenStream)new IteratorTokenStream((Iterator<String>)Iterators.forArray((Object[])tokens)), this.gramSize);
            do {
                String term;
                if ((term = ((TermAttribute)sf.getAttribute(TermAttribute.class)).term()).length() <= 0) continue;
                if (wordList.containsKey((Object)term)) {
                    wordList.put((Object)term, 1 + wordList.get((Object)term));
                    continue;
                }
                wordList.put((Object)term, 1);
            } while (sf.incrementToken());
        } else {
            for (Object term : tokens) {
                if (wordList.containsKey(term)) {
                    wordList.put(term, 1 + wordList.get(term));
                    continue;
                }
                wordList.put(term, 1);
            }
        }
        final MutableDouble lengthNormalisationMut = new MutableDouble(0.0);
        wordList.forEachPair((ObjectIntProcedure)new ObjectIntProcedure<String>(){

            public boolean apply(String word, int dKJ) {
                lengthNormalisationMut.add((double)(dKJ * dKJ));
                return true;
            }
        });
        final double lengthNormalisation = Math.sqrt(lengthNormalisationMut.doubleValue());
        wordList.forEachPair((ObjectIntProcedure)new ObjectIntProcedure<String>(){

            public boolean apply(String token, int dKJ) {
                try {
                    StringTuple tuple = new StringTuple();
                    tuple.add("__WT");
                    tuple.add(label);
                    tuple.add(token);
                    DoubleWritable f = new DoubleWritable(Math.log(1.0 + (double)dKJ) / lengthNormalisation);
                    output.collect((Object)tuple, (Object)f);
                }
                catch (IOException e) {
                    throw new IllegalStateException(e);
                }
                return true;
            }
        });
        reporter.setStatus("Bayes Feature Mapper: Document Label: " + label);
        wordList.forEachPair((ObjectIntProcedure)new ObjectIntProcedure<String>(){

            public boolean apply(String token, int dKJ) {
                try {
                    StringTuple dfTuple = new StringTuple();
                    dfTuple.add("__DF");
                    dfTuple.add(label);
                    dfTuple.add(token);
                    output.collect((Object)dfTuple, (Object)ONE);
                    StringTuple tokenCountTuple = new StringTuple();
                    tokenCountTuple.add("__FC");
                    tokenCountTuple.add(token);
                    output.collect((Object)tokenCountTuple, (Object)ONE);
                    StringTuple tokenTfTuple = new StringTuple();
                    tokenTfTuple.add("__FF");
                    tokenTfTuple.add(token);
                    output.collect((Object)tokenTfTuple, (Object)new DoubleWritable((double)dKJ));
                }
                catch (IOException e) {
                    throw new IllegalStateException(e);
                }
                return true;
            }
        });
        StringTuple labelCountTuple = new StringTuple();
        labelCountTuple.add("__LC");
        labelCountTuple.add(label);
        output.collect((Object)labelCountTuple, (Object)ONE);
    }

    public void configure(JobConf job) {
        try {
            BayesParameters params = new BayesParameters(job.get("bayes.parameters", ""));
            log.info("Bayes Parameter {}", (Object)params.print());
            this.gramSize = params.getGramSize();
        }
        catch (IOException ex) {
            log.warn(ex.toString(), (Throwable)ex);
        }
    }
}

