/*
 * Decompiled with CFR 0.152.
 */
package org.apache.mahout.clustering.meanshift;

import java.io.IOException;
import java.net.URI;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.cli2.Option;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.mahout.clustering.AbstractCluster;
import org.apache.mahout.clustering.WeightedVectorWritable;
import org.apache.mahout.clustering.meanshift.MeanShiftCanopy;
import org.apache.mahout.clustering.meanshift.MeanShiftCanopyClusterMapper;
import org.apache.mahout.clustering.meanshift.MeanShiftCanopyClusterer;
import org.apache.mahout.clustering.meanshift.MeanShiftCanopyCreatorMapper;
import org.apache.mahout.clustering.meanshift.MeanShiftCanopyMapper;
import org.apache.mahout.clustering.meanshift.MeanShiftCanopyReducer;
import org.apache.mahout.common.AbstractJob;
import org.apache.mahout.common.HadoopUtil;
import org.apache.mahout.common.Pair;
import org.apache.mahout.common.commandline.DefaultOptionCreator;
import org.apache.mahout.common.distance.DistanceMeasure;
import org.apache.mahout.common.iterator.sequencefile.PathFilters;
import org.apache.mahout.common.iterator.sequencefile.PathType;
import org.apache.mahout.common.iterator.sequencefile.SequenceFileDirValueIterable;
import org.apache.mahout.common.iterator.sequencefile.SequenceFileIterable;
import org.apache.mahout.common.iterator.sequencefile.SequenceFileValueIterable;
import org.apache.mahout.math.VectorWritable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class MeanShiftCanopyDriver
extends AbstractJob {
    private static final Logger log = LoggerFactory.getLogger(MeanShiftCanopyDriver.class);
    public static final String INPUT_IS_CANOPIES_OPTION = "inputIsCanopies";
    public static final String STATE_IN_KEY = "org.apache.mahout.clustering.meanshift.stateInKey";
    private static final String CONTROL_CONVERGED = "control/converged";

    public static void main(String[] args) throws Exception {
        ToolRunner.run((Configuration)new Configuration(), (Tool)new MeanShiftCanopyDriver(), (String[])args);
    }

    public int run(String[] args) throws Exception {
        this.addInputOption();
        this.addOutputOption();
        this.addOption((Option)DefaultOptionCreator.convergenceOption().create());
        this.addOption((Option)DefaultOptionCreator.maxIterationsOption().create());
        this.addOption((Option)DefaultOptionCreator.overwriteOption().create());
        this.addOption(INPUT_IS_CANOPIES_OPTION, INPUT_IS_CANOPIES_OPTION, "If present, the input directory already contains MeanShiftCanopies");
        this.addOption((Option)DefaultOptionCreator.distanceMeasureOption().create());
        this.addOption((Option)DefaultOptionCreator.t1Option().create());
        this.addOption((Option)DefaultOptionCreator.t2Option().create());
        this.addOption((Option)DefaultOptionCreator.clusteringOption().create());
        this.addOption((Option)DefaultOptionCreator.methodOption().create());
        if (this.parseArguments(args) == null) {
            return -1;
        }
        Path input = this.getInputPath();
        Path output = this.getOutputPath();
        if (this.hasOption("overwrite")) {
            HadoopUtil.delete(this.getConf(), output);
        }
        String measureClass = this.getOption("distanceMeasure");
        double t1 = Double.parseDouble(this.getOption("t1"));
        double t2 = Double.parseDouble(this.getOption("t2"));
        boolean runClustering = this.hasOption("clustering");
        double convergenceDelta = Double.parseDouble(this.getOption("convergenceDelta"));
        int maxIterations = Integer.parseInt(this.getOption("maxIter"));
        boolean inputIsCanopies = this.hasOption(INPUT_IS_CANOPIES_OPTION);
        boolean runSequential = this.getOption("method").equalsIgnoreCase("sequential");
        ClassLoader ccl = Thread.currentThread().getContextClassLoader();
        DistanceMeasure measure = ccl.loadClass(measureClass).asSubclass(DistanceMeasure.class).newInstance();
        MeanShiftCanopyDriver.run(this.getConf(), input, output, measure, t1, t2, convergenceDelta, maxIterations, inputIsCanopies, runClustering, runSequential);
        return 0;
    }

    public static void run(Configuration conf, Path input, Path output, DistanceMeasure measure, double t1, double t2, double convergenceDelta, int maxIterations, boolean inputIsCanopies, boolean runClustering, boolean runSequential) throws IOException, InterruptedException, ClassNotFoundException {
        Path clustersIn = new Path(output, "clusters-0");
        if (inputIsCanopies) {
            clustersIn = input;
        } else {
            MeanShiftCanopyDriver.createCanopyFromVectors(conf, input, clustersIn, measure, runSequential);
        }
        Path clustersOut = MeanShiftCanopyDriver.buildClusters(conf, clustersIn, output, measure, t1, t2, convergenceDelta, maxIterations, runSequential);
        if (runClustering) {
            MeanShiftCanopyDriver.clusterData(inputIsCanopies ? input : new Path(output, "clusters-0"), clustersOut, new Path(output, "clusteredPoints"), runSequential);
        }
    }

    public static void createCanopyFromVectors(Configuration conf, Path input, Path output, DistanceMeasure measure, boolean runSequential) throws IOException, InterruptedException, ClassNotFoundException {
        if (runSequential) {
            MeanShiftCanopyDriver.createCanopyFromVectorsSeq(input, output, measure);
        } else {
            MeanShiftCanopyDriver.createCanopyFromVectorsMR(conf, input, output, measure);
        }
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    private static void createCanopyFromVectorsSeq(Path input, Path output, DistanceMeasure measure) throws IOException {
        Configuration conf = new Configuration();
        FileSystem fs = FileSystem.get((URI)input.toUri(), (Configuration)conf);
        FileStatus[] status = fs.listStatus(input, PathFilters.logsCRCFilter());
        int part = 0;
        int id = 0;
        for (FileStatus s : status) {
            SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, new Path(output, "part-m-" + part++), Text.class, MeanShiftCanopy.class);
            try {
                for (VectorWritable value : new SequenceFileValueIterable(s.getPath(), conf)) {
                    writer.append((Writable)new Text(), (Writable)MeanShiftCanopy.initialCanopy(value.get(), id++, measure));
                }
            }
            finally {
                writer.close();
            }
        }
    }

    private static void createCanopyFromVectorsMR(Configuration conf, Path input, Path output, DistanceMeasure measure) throws IOException, InterruptedException, ClassNotFoundException {
        conf.set("org.apache.mahout.clustering.kmeans.measure", measure.getClass().getName());
        Job job = new Job(conf);
        job.setJarByClass(MeanShiftCanopyDriver.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(MeanShiftCanopy.class);
        job.setMapperClass(MeanShiftCanopyCreatorMapper.class);
        job.setNumReduceTasks(0);
        job.setInputFormatClass(SequenceFileInputFormat.class);
        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        FileInputFormat.setInputPaths((Job)job, (Path[])new Path[]{input});
        FileOutputFormat.setOutputPath((Job)job, (Path)output);
        if (!job.waitForCompletion(true)) {
            throw new InterruptedException("Mean Shift createCanopyFromVectorsMR failed on input " + input);
        }
    }

    public static Path buildClusters(Configuration conf, Path clustersIn, Path output, DistanceMeasure measure, double t1, double t2, double convergenceDelta, int maxIterations, boolean runSequential) throws IOException, InterruptedException, ClassNotFoundException {
        if (runSequential) {
            return MeanShiftCanopyDriver.buildClustersSeq(clustersIn, output, measure, t1, t2, convergenceDelta, maxIterations);
        }
        return MeanShiftCanopyDriver.buildClustersMR(conf, clustersIn, output, measure, t1, t2, convergenceDelta, maxIterations);
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    private static Path buildClustersSeq(Path clustersIn, Path output, DistanceMeasure measure, double t1, double t2, double convergenceDelta, int maxIterations) throws IOException {
        MeanShiftCanopyClusterer clusterer = new MeanShiftCanopyClusterer(measure, t1, t2, convergenceDelta);
        List<MeanShiftCanopy> clusters = new ArrayList<MeanShiftCanopy>();
        Configuration conf = new Configuration();
        FileSystem fs = FileSystem.get((URI)clustersIn.toUri(), (Configuration)conf);
        for (MeanShiftCanopy value : new SequenceFileDirValueIterable(clustersIn, PathType.LIST, PathFilters.logsCRCFilter(), conf)) {
            clusterer.mergeCanopy(value, clusters);
        }
        boolean[] converged = new boolean[]{false};
        for (int iteration = 1; !converged[0] && iteration <= maxIterations; ++iteration) {
            log.info("Mean Shift Iteration: {}", (Object)iteration);
            clusters = clusterer.iterate(clusters, converged);
            Path clustersOut = new Path(output, "clusters-" + iteration);
            SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, new Path(clustersOut, "part-r-00000"), Text.class, MeanShiftCanopy.class);
            try {
                for (MeanShiftCanopy cluster : clusters) {
                    log.debug("Writing Cluster:{} center:{} numPoints:{} radius:{} to: {}", new Object[]{cluster.getId(), AbstractCluster.formatVector(cluster.getCenter(), null), cluster.getNumPoints(), AbstractCluster.formatVector(cluster.getRadius(), null), clustersOut.getName()});
                    writer.append((Writable)new Text(cluster.getIdentifier()), (Writable)cluster);
                }
            }
            finally {
                writer.close();
            }
            clustersIn = clustersOut;
        }
        return clustersIn;
    }

    private static Path buildClustersMR(Configuration conf, Path clustersIn, Path output, DistanceMeasure measure, double t1, double t2, double convergenceDelta, int maxIterations) throws IOException, InterruptedException, ClassNotFoundException {
        boolean converged = false;
        for (int iteration = 1; !converged && iteration <= maxIterations; ++iteration) {
            log.info("Mean Shift Iteration {}", (Object)iteration);
            Path clustersOut = new Path(output, "clusters-" + iteration);
            Path controlOut = new Path(output, CONTROL_CONVERGED);
            MeanShiftCanopyDriver.runIterationMR(conf, clustersIn, clustersOut, controlOut, measure.getClass().getName(), t1, t2, convergenceDelta);
            converged = FileSystem.get((Configuration)new Configuration()).exists(controlOut);
            clustersIn = clustersOut;
        }
        return clustersIn;
    }

    private static void runIterationMR(Configuration conf, Path input, Path output, Path control, String measureClassName, double t1, double t2, double convergenceDelta) throws IOException, InterruptedException, ClassNotFoundException {
        conf.set("org.apache.mahout.clustering.canopy.measure", measureClassName);
        conf.set("org.apache.mahout.clustering.canopy.convergence", String.valueOf(convergenceDelta));
        conf.set("org.apache.mahout.clustering.canopy.t1", String.valueOf(t1));
        conf.set("org.apache.mahout.clustering.canopy.t2", String.valueOf(t2));
        conf.set("org.apache.mahout.clustering.control.path", control.toString());
        Job job = new Job(conf, "Mean Shift Driver running runIteration over input: " + input);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(MeanShiftCanopy.class);
        FileInputFormat.setInputPaths((Job)job, (Path[])new Path[]{input});
        FileOutputFormat.setOutputPath((Job)job, (Path)output);
        job.setMapperClass(MeanShiftCanopyMapper.class);
        job.setReducerClass(MeanShiftCanopyReducer.class);
        job.setNumReduceTasks(1);
        job.setInputFormatClass(SequenceFileInputFormat.class);
        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        job.setJarByClass(MeanShiftCanopyDriver.class);
        if (!job.waitForCompletion(true)) {
            throw new InterruptedException("Mean Shift Iteration failed on input " + input);
        }
    }

    public static void clusterData(Path input, Path clustersIn, Path output, boolean runSequential) throws IOException, InterruptedException, ClassNotFoundException {
        if (runSequential) {
            MeanShiftCanopyDriver.clusterDataSeq(input, clustersIn, output);
        } else {
            MeanShiftCanopyDriver.clusterDataMR(input, clustersIn, output);
        }
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    private static void clusterDataSeq(Path input, Path clustersIn, Path output) throws IOException {
        ArrayList<MeanShiftCanopy> clusters = new ArrayList<MeanShiftCanopy>();
        Configuration conf = new Configuration();
        for (MeanShiftCanopy value : new SequenceFileDirValueIterable(clustersIn, PathType.LIST, PathFilters.logsCRCFilter(), conf)) {
            clusters.add(value);
        }
        FileSystem fs = FileSystem.get((URI)input.toUri(), (Configuration)conf);
        FileStatus[] status = fs.listStatus(input, PathFilters.logsCRCFilter());
        int part = 0;
        for (FileStatus s : status) {
            SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, new Path(output, "part-m-" + part++), IntWritable.class, WeightedVectorWritable.class);
            try {
                for (Pair record : new SequenceFileIterable(s.getPath(), conf)) {
                    MeanShiftCanopy canopy = (MeanShiftCanopy)record.getSecond();
                    MeanShiftCanopy closest = MeanShiftCanopyClusterer.findCoveringCanopy(canopy, clusters);
                    writer.append((Writable)new IntWritable(closest.getId()), (Writable)new WeightedVectorWritable(1.0, canopy.getCenter()));
                }
            }
            finally {
                writer.close();
            }
        }
    }

    private static void clusterDataMR(Path input, Path clustersIn, Path output) throws IOException, InterruptedException, ClassNotFoundException {
        Configuration conf = new Configuration();
        conf.set(STATE_IN_KEY, clustersIn.toString());
        Job job = new Job(conf, "Mean Shift Driver running clusterData over input: " + input);
        job.setOutputKeyClass(IntWritable.class);
        job.setOutputValueClass(WeightedVectorWritable.class);
        job.setMapperClass(MeanShiftCanopyClusterMapper.class);
        job.setInputFormatClass(SequenceFileInputFormat.class);
        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        job.setNumReduceTasks(0);
        job.setJarByClass(MeanShiftCanopyDriver.class);
        FileInputFormat.setInputPaths((Job)job, (Path[])new Path[]{input});
        FileOutputFormat.setOutputPath((Job)job, (Path)output);
        if (!job.waitForCompletion(true)) {
            throw new InterruptedException("Mean Shift Clustering failed on clustersIn " + clustersIn);
        }
    }
}

