/*
 * Decompiled with CFR 0.152.
 */
package org.apache.mahout.classifier.bayes.mapreduce.common;

import java.io.IOException;
import java.util.Map;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DefaultStringifier;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.SequenceFileInputFormat;
import org.apache.hadoop.util.GenericsUtil;
import org.apache.mahout.classifier.bayes.common.BayesParameters;
import org.apache.mahout.classifier.bayes.io.SequenceFileModelReader;
import org.apache.mahout.classifier.bayes.mapreduce.common.BayesJob;
import org.apache.mahout.classifier.bayes.mapreduce.common.BayesTfIdfMapper;
import org.apache.mahout.classifier.bayes.mapreduce.common.BayesTfIdfOutputFormat;
import org.apache.mahout.classifier.bayes.mapreduce.common.BayesTfIdfReducer;
import org.apache.mahout.classifier.bayes.mapreduce.common.BayesWeightSummerDriver;
import org.apache.mahout.common.HadoopUtil;
import org.apache.mahout.common.StringTuple;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class BayesTfIdfDriver
implements BayesJob {
    private static final Logger log = LoggerFactory.getLogger(BayesTfIdfDriver.class);

    @Override
    public void runJob(Path input, Path output, BayesParameters params) throws IOException {
        JobClient client = new JobClient();
        JobConf conf = new JobConf(BayesWeightSummerDriver.class);
        conf.setJobName("TfIdf Driver running over input: " + input);
        conf.setOutputKeyClass(StringTuple.class);
        conf.setOutputValueClass(DoubleWritable.class);
        FileInputFormat.addInputPath((JobConf)conf, (Path)new Path(output, "trainer-termDocCount"));
        FileInputFormat.addInputPath((JobConf)conf, (Path)new Path(output, "trainer-wordFreq"));
        FileInputFormat.addInputPath((JobConf)conf, (Path)new Path(output, "trainer-featureCount"));
        Path outPath = new Path(output, "trainer-tfIdf");
        FileOutputFormat.setOutputPath((JobConf)conf, (Path)outPath);
        conf.setJarByClass(BayesTfIdfDriver.class);
        conf.setMapperClass(BayesTfIdfMapper.class);
        conf.setInputFormat(SequenceFileInputFormat.class);
        conf.setCombinerClass(BayesTfIdfReducer.class);
        conf.setReducerClass(BayesTfIdfReducer.class);
        conf.setOutputFormat(BayesTfIdfOutputFormat.class);
        conf.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization,org.apache.hadoop.io.serializer.WritableSerialization");
        HadoopUtil.delete((Configuration)conf, outPath);
        Path interimFile = new Path(output, "trainer-docCount/part-*");
        Map<String, Double> labelDocumentCounts = SequenceFileModelReader.readLabelDocumentCounts(interimFile, (Configuration)conf);
        DefaultStringifier mapStringifier = new DefaultStringifier((Configuration)conf, GenericsUtil.getClass(labelDocumentCounts));
        String labelDocumentCountString = mapStringifier.toString(labelDocumentCounts);
        log.info("Counts of documents in Each Label");
        Map c = (Map)mapStringifier.fromString(labelDocumentCountString);
        log.info("{}", (Object)c);
        conf.set("cnaivebayes.labelDocumentCounts", labelDocumentCountString);
        log.info(params.print());
        conf.set("bayes.parameters", params.toString());
        client.setConf((Configuration)conf);
        JobClient.runJob((JobConf)conf);
    }
}

