/*
 * Decompiled with CFR 0.152.
 */
package org.apache.mahout.df.mapreduce;

import java.io.IOException;
import java.net.URI;
import java.util.Arrays;
import java.util.Random;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.filecache.DistributedCache;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
import org.apache.mahout.classifier.ClassifierResult;
import org.apache.mahout.classifier.ResultAnalyzer;
import org.apache.mahout.common.HadoopUtil;
import org.apache.mahout.common.Pair;
import org.apache.mahout.common.RandomUtils;
import org.apache.mahout.common.iterator.sequencefile.SequenceFileIterable;
import org.apache.mahout.df.DFUtils;
import org.apache.mahout.df.DecisionForest;
import org.apache.mahout.df.data.DataConverter;
import org.apache.mahout.df.data.Dataset;
import org.apache.mahout.df.data.Instance;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class Classifier {
    private static final Logger log = LoggerFactory.getLogger(Classifier.class);
    private final Path forestPath;
    private final Path inputPath;
    private final Path datasetPath;
    private final Configuration conf;
    private final ResultAnalyzer analyzer;
    private final Dataset dataset;
    private final Path outputPath;
    private final Path mappersOutputPath;

    public ResultAnalyzer getAnalyzer() {
        return this.analyzer;
    }

    public Classifier(Path forestPath, Path inputPath, Path datasetPath, Path outputPath, Configuration conf, boolean analyze) throws IOException {
        this.forestPath = forestPath;
        this.inputPath = inputPath;
        this.datasetPath = datasetPath;
        this.outputPath = outputPath;
        this.conf = conf;
        if (analyze) {
            this.dataset = Dataset.load(conf, datasetPath);
            this.analyzer = new ResultAnalyzer(Arrays.asList(this.dataset.labels()), "unknown");
        } else {
            this.dataset = null;
            this.analyzer = null;
        }
        this.mappersOutputPath = new Path(outputPath, "mappers");
    }

    private void configureJob(Job job) throws IOException {
        job.setJarByClass(Classifier.class);
        FileInputFormat.setInputPaths((Job)job, (Path[])new Path[]{this.inputPath});
        FileOutputFormat.setOutputPath((Job)job, (Path)this.mappersOutputPath);
        job.setOutputKeyClass(LongWritable.class);
        job.setOutputValueClass(Text.class);
        job.setMapperClass(CMapper.class);
        job.setNumReduceTasks(0);
        job.setInputFormatClass(CTextInputFormat.class);
        job.setOutputFormatClass(SequenceFileOutputFormat.class);
    }

    public void run() throws IOException, ClassNotFoundException, InterruptedException {
        FileSystem fs = FileSystem.get((Configuration)this.conf);
        if (fs.exists(this.outputPath)) {
            throw new IOException("Output path already exists : " + this.outputPath);
        }
        log.info("Adding the dataset to the DistributedCache");
        DistributedCache.addCacheFile((URI)this.datasetPath.toUri(), (Configuration)this.conf);
        log.info("Adding the decision forest to the DistributedCache");
        DistributedCache.addCacheFile((URI)this.forestPath.toUri(), (Configuration)this.conf);
        Job job = new Job(this.conf, "decision forest classifier");
        log.info("Configuring the job...");
        this.configureJob(job);
        log.info("Running the job...");
        if (!job.waitForCompletion(true)) {
            log.error("Job failed!");
            return;
        }
        this.parseOutput((JobContext)job);
        HadoopUtil.delete(this.conf, this.mappersOutputPath);
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    private void parseOutput(JobContext job) throws IOException {
        Path[] outfiles;
        Configuration conf = job.getConfiguration();
        FileSystem fs = this.mappersOutputPath.getFileSystem(conf);
        for (Path path : outfiles = DFUtils.listOutputFiles(fs, this.mappersOutputPath)) {
            FSDataOutputStream ofile = null;
            try {
                for (Pair record : new SequenceFileIterable(path, true, conf)) {
                    int key = (int)((LongWritable)record.getFirst()).get();
                    String value = ((Text)record.getSecond()).toString();
                    if (ofile == null) {
                        ofile = fs.create(new Path(this.outputPath, value).suffix(".out"));
                        continue;
                    }
                    ofile.writeChars(value);
                    ofile.writeChar(10);
                    if (this.analyzer == null) continue;
                    this.analyzer.addInstance(this.dataset.getLabel(key), new ClassifierResult(this.dataset.getLabel(Integer.parseInt(value)), 1.0));
                }
            }
            finally {
                ofile.close();
            }
        }
    }

    public static class CMapper
    extends Mapper<LongWritable, Text, LongWritable, Text> {
        private DataConverter converter;
        private DecisionForest forest;
        private final Random rng = RandomUtils.getRandom();
        private boolean first = true;
        private final Text lvalue = new Text();

        protected void setup(Mapper.Context context) throws IOException, InterruptedException {
            super.setup(context);
            Configuration conf = context.getConfiguration();
            URI[] files = DistributedCache.getCacheFiles((Configuration)conf);
            if (files == null || files.length < 2) {
                throw new IOException("not enough paths in the DistributedCache");
            }
            Dataset dataset = Dataset.load(conf, new Path(files[0].getPath()));
            this.converter = new DataConverter(dataset);
            this.forest = DecisionForest.load(conf, new Path(files[1].getPath()));
            if (this.forest == null) {
                throw new InterruptedException("DecisionForest not found!");
            }
        }

        protected void map(LongWritable key, Text value, Mapper.Context context) throws IOException, InterruptedException {
            String line;
            if (this.first) {
                FileSplit split = (FileSplit)context.getInputSplit();
                Path path = split.getPath();
                this.lvalue.set(path.getName());
                context.write((Object)key, (Object)this.lvalue);
                this.first = false;
            }
            if (!(line = value.toString()).isEmpty()) {
                Instance instance = this.converter.convert(0, line);
                int prediction = this.forest.classify(this.rng, instance);
                key.set((long)instance.getLabel());
                this.lvalue.set(Integer.toString(prediction));
                context.write((Object)key, (Object)this.lvalue);
            }
        }
    }

    public static class CTextInputFormat
    extends TextInputFormat {
        protected boolean isSplitable(JobContext jobContext, Path path) {
            return false;
        }
    }
}

