/*
 * Decompiled with CFR 0.152.
 */
package org.apache.mahout.clustering.dirichlet;

import java.io.IOException;
import java.net.URI;
import java.util.List;
import org.apache.commons.cli2.Option;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.mahout.clustering.Cluster;
import org.apache.mahout.clustering.WeightedVectorWritable;
import org.apache.mahout.clustering.dirichlet.DirichletCluster;
import org.apache.mahout.clustering.dirichlet.DirichletClusterMapper;
import org.apache.mahout.clustering.dirichlet.DirichletClusterer;
import org.apache.mahout.clustering.dirichlet.DirichletMapper;
import org.apache.mahout.clustering.dirichlet.DirichletReducer;
import org.apache.mahout.clustering.dirichlet.DirichletState;
import org.apache.mahout.clustering.dirichlet.models.DistributionDescription;
import org.apache.mahout.clustering.dirichlet.models.GaussianClusterDistribution;
import org.apache.mahout.common.AbstractJob;
import org.apache.mahout.common.HadoopUtil;
import org.apache.mahout.common.commandline.DefaultOptionCreator;
import org.apache.mahout.common.iterator.sequencefile.PathFilters;
import org.apache.mahout.common.iterator.sequencefile.PathType;
import org.apache.mahout.common.iterator.sequencefile.SequenceFileDirValueIterable;
import org.apache.mahout.common.iterator.sequencefile.SequenceFileValueIterable;
import org.apache.mahout.math.RandomAccessSparseVector;
import org.apache.mahout.math.VectorWritable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class DirichletDriver
extends AbstractJob {
    public static final String STATE_IN_KEY = "org.apache.mahout.clustering.dirichlet.stateIn";
    public static final String MODEL_DISTRIBUTION_KEY = "org.apache.mahout.clustering.dirichlet.modelFactory";
    public static final String NUM_CLUSTERS_KEY = "org.apache.mahout.clustering.dirichlet.numClusters";
    public static final String ALPHA_0_KEY = "org.apache.mahout.clustering.dirichlet.alpha_0";
    public static final String EMIT_MOST_LIKELY_KEY = "org.apache.mahout.clustering.dirichlet.emitMostLikely";
    public static final String THRESHOLD_KEY = "org.apache.mahout.clustering.dirichlet.threshold";
    public static final String MODEL_PROTOTYPE_CLASS_OPTION = "modelPrototype";
    public static final String MODEL_DISTRIBUTION_CLASS_OPTION = "modelDist";
    public static final String ALPHA_OPTION = "alpha";
    private static final Logger log = LoggerFactory.getLogger(DirichletDriver.class);

    public static void main(String[] args) throws Exception {
        ToolRunner.run((Configuration)new Configuration(), (Tool)new DirichletDriver(), (String[])args);
    }

    public int run(String[] args) throws Exception {
        this.addInputOption();
        this.addOutputOption();
        this.addOption((Option)DefaultOptionCreator.maxIterationsOption().create());
        this.addOption((Option)DefaultOptionCreator.numClustersOption().withRequired(true).create());
        this.addOption((Option)DefaultOptionCreator.overwriteOption().create());
        this.addOption((Option)DefaultOptionCreator.clusteringOption().create());
        this.addOption(ALPHA_OPTION, "a0", "The alpha0 value for the DirichletDistribution. Defaults to 1.0", "1.0");
        this.addOption(MODEL_DISTRIBUTION_CLASS_OPTION, "md", "The ModelDistribution class name. Defaults to GaussianClusterDistribution", GaussianClusterDistribution.class.getName());
        this.addOption(MODEL_PROTOTYPE_CLASS_OPTION, "mp", "The ModelDistribution prototype Vector class name. Defaults to RandomAccessSparseVector", RandomAccessSparseVector.class.getName());
        this.addOption((Option)DefaultOptionCreator.distanceMeasureOption().withRequired(false).create());
        this.addOption((Option)DefaultOptionCreator.emitMostLikelyOption().create());
        this.addOption((Option)DefaultOptionCreator.thresholdOption().create());
        this.addOption((Option)DefaultOptionCreator.methodOption().create());
        if (this.parseArguments(args) == null) {
            return -1;
        }
        Path input = this.getInputPath();
        Path output = this.getOutputPath();
        if (this.hasOption("overwrite")) {
            HadoopUtil.delete(this.getConf(), output);
        }
        String modelFactory = this.getOption(MODEL_DISTRIBUTION_CLASS_OPTION);
        String modelPrototype = this.getOption(MODEL_PROTOTYPE_CLASS_OPTION);
        String distanceMeasure = this.getOption("distanceMeasure");
        int numModels = Integer.parseInt(this.getOption("numClusters"));
        int maxIterations = Integer.parseInt(this.getOption("maxIter"));
        boolean emitMostLikely = Boolean.parseBoolean(this.getOption("emitMostLikely"));
        double threshold = Double.parseDouble(this.getOption("threshold"));
        double alpha0 = Double.parseDouble(this.getOption(ALPHA_OPTION));
        boolean runClustering = this.hasOption("clustering");
        boolean runSequential = this.getOption("method").equalsIgnoreCase("sequential");
        int prototypeSize = DirichletDriver.readPrototypeSize(input);
        DistributionDescription description = new DistributionDescription(modelFactory, modelPrototype, distanceMeasure, prototypeSize);
        DirichletDriver.run(this.getConf(), input, output, description, numModels, maxIterations, alpha0, runClustering, emitMostLikely, threshold, runSequential);
        return 0;
    }

    public static void run(Configuration conf, Path input, Path output, DistributionDescription description, int numModels, int maxIterations, double alpha0, boolean runClustering, boolean emitMostLikely, double threshold, boolean runSequential) throws IOException, ClassNotFoundException, InterruptedException {
        Path clustersOut = DirichletDriver.buildClusters(conf, input, output, description, numModels, maxIterations, alpha0, runSequential);
        if (runClustering) {
            DirichletDriver.clusterData(conf, input, clustersOut, new Path(output, "clusteredPoints"), emitMostLikely, threshold, runSequential);
        }
    }

    public static void run(Path input, Path output, DistributionDescription description, int numClusters, int maxIterations, double alpha0, boolean runClustering, boolean emitMostLikely, double threshold, boolean runSequential) throws IOException, ClassNotFoundException, InterruptedException {
        DirichletDriver.run(new Configuration(), input, output, description, numClusters, maxIterations, alpha0, runClustering, emitMostLikely, threshold, runSequential);
    }

    static DirichletState createState(DistributionDescription description, int numModels, double alpha0) {
        return new DirichletState(description, numModels, alpha0);
    }

    public static int readPrototypeSize(Path input) throws IOException {
        Configuration conf = new Configuration();
        FileSystem fs = FileSystem.get((URI)input.toUri(), (Configuration)conf);
        FileStatus[] status = fs.listStatus(input, PathFilters.logsCRCFilter());
        int protoSize = 0;
        if (status.length > 0) {
            FileStatus s = status[0];
            for (VectorWritable value : new SequenceFileValueIterable(s.getPath(), true, conf)) {
                protoSize = value.get().size();
            }
        }
        return protoSize;
    }

    private static void writeInitialState(Path output, Path stateOut, DistributionDescription description, int numModels, double alpha0) throws IOException {
        DirichletState state = DirichletDriver.createState(description, numModels, alpha0);
        DirichletDriver.writeState(output, stateOut, numModels, state);
    }

    private static void writeState(Path output, Path stateOut, int numModels, DirichletState state) throws IOException {
        Configuration conf = new Configuration();
        FileSystem fs = FileSystem.get((URI)output.toUri(), (Configuration)conf);
        for (int i = 0; i < numModels; ++i) {
            Path path = new Path(stateOut, "part-" + i);
            SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, path, Text.class, DirichletCluster.class);
            writer.append((Writable)new Text(Integer.toString(i)), (Writable)state.getClusters().get(i));
            writer.close();
        }
    }

    private static void runIteration(Configuration conf, Path input, Path stateIn, Path stateOut, DistributionDescription description, int numClusters, double alpha0) throws IOException, InterruptedException, ClassNotFoundException {
        conf.set(STATE_IN_KEY, stateIn.toString());
        conf.set(MODEL_DISTRIBUTION_KEY, description.toString());
        conf.set(NUM_CLUSTERS_KEY, Integer.toString(numClusters));
        conf.set(ALPHA_0_KEY, Double.toString(alpha0));
        Job job = new Job(conf, "Dirichlet Driver running runIteration over stateIn: " + stateIn);
        job.setInputFormatClass(SequenceFileInputFormat.class);
        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(DirichletCluster.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(VectorWritable.class);
        job.setMapperClass(DirichletMapper.class);
        job.setReducerClass(DirichletReducer.class);
        job.setJarByClass(DirichletDriver.class);
        FileInputFormat.addInputPath((Job)job, (Path)input);
        FileOutputFormat.setOutputPath((Job)job, (Path)stateOut);
        if (!job.waitForCompletion(true)) {
            throw new InterruptedException("Dirichlet Iteration failed processing " + stateIn);
        }
    }

    public static Path buildClusters(Configuration conf, Path input, Path output, DistributionDescription description, int numClusters, int maxIterations, double alpha0, boolean runSequential) throws IOException, ClassNotFoundException, InterruptedException {
        Path clustersIn = new Path(output, "clusters-0");
        DirichletDriver.writeInitialState(output, clustersIn, description, numClusters, alpha0);
        clustersIn = runSequential ? DirichletDriver.buildClustersSeq(conf, input, output, description, numClusters, maxIterations, alpha0, clustersIn) : DirichletDriver.buildClustersMR(conf, input, output, description, numClusters, maxIterations, alpha0, clustersIn);
        return clustersIn;
    }

    private static Path buildClustersSeq(Configuration conf, Path input, Path output, DistributionDescription description, int numClusters, int maxIterations, double alpha0, Path clustersIn) throws IOException {
        for (int iteration = 1; iteration <= maxIterations; ++iteration) {
            Cluster[] newModels;
            log.info("Iteration {}", (Object)iteration);
            Path clustersOut = new Path(output, "clusters-" + iteration);
            DirichletState state = DirichletMapper.loadState(conf, clustersIn.toString(), description, alpha0, numClusters);
            List<DirichletCluster> oldModels = state.getClusters();
            for (DirichletCluster oldModel : oldModels) {
                oldModel.getModel().configure(conf);
            }
            for (Cluster newModel : newModels = (Cluster[])state.getModelFactory().sampleFromPosterior(state.getModels())) {
                newModel.configure(conf);
            }
            DirichletClusterer clusterer = new DirichletClusterer(state);
            for (VectorWritable value : new SequenceFileDirValueIterable(input, PathType.LIST, PathFilters.logsCRCFilter(), conf)) {
                clusterer.observe(newModels, value);
            }
            clusterer.updateModels(newModels);
            DirichletDriver.writeState(output, clustersOut, numClusters, state);
            clustersIn = clustersOut;
        }
        return clustersIn;
    }

    private static Path buildClustersMR(Configuration conf, Path input, Path output, DistributionDescription description, int numClusters, int maxIterations, double alpha0, Path clustersIn) throws IOException, InterruptedException, ClassNotFoundException {
        for (int iteration = 1; iteration <= maxIterations; ++iteration) {
            log.info("Iteration {}", (Object)iteration);
            Path clustersOut = new Path(output, "clusters-" + iteration);
            DirichletDriver.runIteration(conf, input, clustersIn, clustersOut, description, numClusters, alpha0);
            clustersIn = clustersOut;
        }
        return clustersIn;
    }

    public static void clusterData(Configuration conf, Path input, Path stateIn, Path output, boolean emitMostLikely, double threshold, boolean runSequential) throws IOException, InterruptedException, ClassNotFoundException {
        if (runSequential) {
            DirichletDriver.clusterDataSeq(conf, input, stateIn, output, emitMostLikely, threshold);
        } else {
            DirichletDriver.clusterDataMR(conf, input, stateIn, output, emitMostLikely, threshold);
        }
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    private static void clusterDataSeq(Configuration conf, Path input, Path stateIn, Path output, boolean emitMostLikely, double threshold) throws IOException {
        List<DirichletCluster> clusters = DirichletClusterMapper.loadClusters(conf, stateIn);
        for (DirichletCluster cluster : clusters) {
            cluster.getModel().configure(conf);
        }
        DirichletClusterer clusterer = new DirichletClusterer(emitMostLikely, threshold);
        FileSystem fs = FileSystem.get((URI)input.toUri(), (Configuration)conf);
        FileStatus[] status = fs.listStatus(input, PathFilters.logsCRCFilter());
        int part = 0;
        for (FileStatus s : status) {
            SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, new Path(output, "part-m-" + part++), IntWritable.class, WeightedVectorWritable.class);
            try {
                for (VectorWritable value : new SequenceFileValueIterable(s.getPath(), conf)) {
                    clusterer.emitPointToClusters(value, clusters, writer);
                }
            }
            finally {
                writer.close();
            }
        }
    }

    private static void clusterDataMR(Configuration conf, Path input, Path stateIn, Path output, boolean emitMostLikely, double threshold) throws IOException, InterruptedException, ClassNotFoundException {
        conf.set(STATE_IN_KEY, stateIn.toString());
        conf.set(EMIT_MOST_LIKELY_KEY, Boolean.toString(emitMostLikely));
        conf.set(THRESHOLD_KEY, Double.toString(threshold));
        Job job = new Job(conf, "Dirichlet Driver running clusterData over input: " + input);
        job.setOutputKeyClass(IntWritable.class);
        job.setOutputValueClass(WeightedVectorWritable.class);
        job.setMapperClass(DirichletClusterMapper.class);
        job.setInputFormatClass(SequenceFileInputFormat.class);
        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        job.setNumReduceTasks(0);
        job.setJarByClass(DirichletDriver.class);
        FileInputFormat.addInputPath((Job)job, (Path)input);
        FileOutputFormat.setOutputPath((Job)job, (Path)output);
        if (!job.waitForCompletion(true)) {
            throw new InterruptedException("Dirichlet Clustering failed processing " + stateIn);
        }
    }
}

