/*
 * Decompiled with CFR 0.152.
 */
package org.apache.mahout.clustering.kmeans;

import java.io.IOException;
import java.net.URI;
import java.util.ArrayList;
import org.apache.commons.cli2.Option;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.mahout.clustering.AbstractCluster;
import org.apache.mahout.clustering.ClusterObservations;
import org.apache.mahout.clustering.WeightedVectorWritable;
import org.apache.mahout.clustering.kmeans.Cluster;
import org.apache.mahout.clustering.kmeans.KMeansClusterMapper;
import org.apache.mahout.clustering.kmeans.KMeansClusterer;
import org.apache.mahout.clustering.kmeans.KMeansCombiner;
import org.apache.mahout.clustering.kmeans.KMeansMapper;
import org.apache.mahout.clustering.kmeans.KMeansReducer;
import org.apache.mahout.clustering.kmeans.KMeansUtil;
import org.apache.mahout.clustering.kmeans.RandomSeedGenerator;
import org.apache.mahout.common.AbstractJob;
import org.apache.mahout.common.HadoopUtil;
import org.apache.mahout.common.commandline.DefaultOptionCreator;
import org.apache.mahout.common.distance.DistanceMeasure;
import org.apache.mahout.common.distance.SquaredEuclideanDistanceMeasure;
import org.apache.mahout.common.iterator.sequencefile.PathFilters;
import org.apache.mahout.common.iterator.sequencefile.PathType;
import org.apache.mahout.common.iterator.sequencefile.SequenceFileDirValueIterable;
import org.apache.mahout.common.iterator.sequencefile.SequenceFileValueIterable;
import org.apache.mahout.common.iterator.sequencefile.SequenceFileValueIterator;
import org.apache.mahout.math.VectorWritable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class KMeansDriver
extends AbstractJob {
    private static final Logger log = LoggerFactory.getLogger(KMeansDriver.class);

    public static void main(String[] args) throws Exception {
        ToolRunner.run((Configuration)new Configuration(), (Tool)new KMeansDriver(), (String[])args);
    }

    public int run(String[] args) throws Exception {
        this.addInputOption();
        this.addOutputOption();
        this.addOption((Option)DefaultOptionCreator.distanceMeasureOption().create());
        this.addOption((Option)DefaultOptionCreator.clustersInOption().withDescription("The input centroids, as Vectors.  Must be a SequenceFile of Writable, Cluster/Canopy.  If k is also specified, then a random set of vectors will be selected and written out to this path first").create());
        this.addOption((Option)DefaultOptionCreator.numClustersOption().withDescription("The k in k-Means.  If specified, then a random selection of k Vectors will be chosen as the Centroid and written to the clusters input path.").create());
        this.addOption((Option)DefaultOptionCreator.convergenceOption().create());
        this.addOption((Option)DefaultOptionCreator.maxIterationsOption().create());
        this.addOption((Option)DefaultOptionCreator.overwriteOption().create());
        this.addOption((Option)DefaultOptionCreator.clusteringOption().create());
        this.addOption((Option)DefaultOptionCreator.methodOption().create());
        if (this.parseArguments(args) == null) {
            return -1;
        }
        Path input = this.getInputPath();
        Path clusters = new Path(this.getOption("clusters"));
        Path output = this.getOutputPath();
        String measureClass = this.getOption("distanceMeasure");
        if (measureClass == null) {
            measureClass = SquaredEuclideanDistanceMeasure.class.getName();
        }
        double convergenceDelta = Double.parseDouble(this.getOption("convergenceDelta"));
        int maxIterations = Integer.parseInt(this.getOption("maxIter"));
        if (this.hasOption("overwrite")) {
            HadoopUtil.delete(this.getConf(), output);
        }
        ClassLoader ccl = Thread.currentThread().getContextClassLoader();
        DistanceMeasure measure = ccl.loadClass(measureClass).asSubclass(DistanceMeasure.class).newInstance();
        if (this.hasOption("numClusters")) {
            clusters = RandomSeedGenerator.buildRandom(this.getConf(), input, clusters, Integer.parseInt(this.getOption("numClusters")), measure);
        }
        boolean runClustering = this.hasOption("clustering");
        boolean runSequential = this.getOption("method").equalsIgnoreCase("sequential");
        if (this.getConf() == null) {
            this.setConf(new Configuration());
        }
        KMeansDriver.run(this.getConf(), input, clusters, output, measure, convergenceDelta, maxIterations, runClustering, runSequential);
        return 0;
    }

    public static void run(Configuration conf, Path input, Path clustersIn, Path output, DistanceMeasure measure, double convergenceDelta, int maxIterations, boolean runClustering, boolean runSequential) throws IOException, InterruptedException, ClassNotFoundException {
        String delta = Double.toString(convergenceDelta);
        if (log.isInfoEnabled()) {
            log.info("Input: {} Clusters In: {} Out: {} Distance: {}", new Object[]{input, clustersIn, output, measure.getClass().getName()});
            log.info("convergence: {} max Iterations: {} num Reduce Tasks: {} Input Vectors: {}", new Object[]{convergenceDelta, maxIterations, VectorWritable.class.getName()});
        }
        Path clustersOut = KMeansDriver.buildClusters(conf, input, clustersIn, output, measure, maxIterations, delta, runSequential);
        if (runClustering) {
            log.info("Clustering data");
            KMeansDriver.clusterData(conf, input, clustersOut, new Path(output, "clusteredPoints"), measure, delta, runSequential);
        }
    }

    public static void run(Path input, Path clustersIn, Path output, DistanceMeasure measure, double convergenceDelta, int maxIterations, boolean runClustering, boolean runSequential) throws IOException, InterruptedException, ClassNotFoundException {
        KMeansDriver.run(new Configuration(), input, clustersIn, output, measure, convergenceDelta, maxIterations, runClustering, runSequential);
    }

    public static Path buildClusters(Configuration conf, Path input, Path clustersIn, Path output, DistanceMeasure measure, int maxIterations, String delta, boolean runSequential) throws IOException, InterruptedException, ClassNotFoundException {
        if (runSequential) {
            return KMeansDriver.buildClustersSeq(conf, input, clustersIn, output, measure, maxIterations, delta);
        }
        return KMeansDriver.buildClustersMR(conf, input, clustersIn, output, measure, maxIterations, delta);
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    private static Path buildClustersSeq(Configuration conf, Path input, Path clustersIn, Path output, DistanceMeasure measure, int maxIterations, String delta) throws IOException {
        KMeansClusterer clusterer = new KMeansClusterer(measure);
        ArrayList<Cluster> clusters = new ArrayList<Cluster>();
        KMeansUtil.configureWithClusterInfo(conf, clustersIn, clusters);
        if (clusters.isEmpty()) {
            throw new IllegalStateException("Clusters is empty!");
        }
        boolean converged = false;
        for (int iteration = 1; !converged && iteration <= maxIterations; ++iteration) {
            log.info("K-Means Iteration: " + iteration);
            FileSystem fs = FileSystem.get((URI)input.toUri(), (Configuration)conf);
            for (VectorWritable value : new SequenceFileDirValueIterable(input, PathType.LIST, PathFilters.logsCRCFilter(), conf)) {
                clusterer.addPointToNearestCluster(value.get(), clusters);
            }
            converged = clusterer.testConvergence(clusters, Double.parseDouble(delta));
            Path clustersOut = new Path(output, "clusters-" + iteration);
            SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, new Path(clustersOut, "part-r-00000"), Text.class, Cluster.class);
            try {
                for (Cluster cluster : clusters) {
                    log.debug("Writing Cluster:{} center:{} numPoints:{} radius:{} to: {}", new Object[]{cluster.getId(), AbstractCluster.formatVector(cluster.getCenter(), null), cluster.getNumPoints(), AbstractCluster.formatVector(cluster.getRadius(), null), clustersOut.getName()});
                    writer.append((Writable)new Text(cluster.getIdentifier()), (Writable)cluster);
                }
            }
            finally {
                writer.close();
            }
            clustersIn = clustersOut;
        }
        return clustersIn;
    }

    private static Path buildClustersMR(Configuration conf, Path input, Path clustersIn, Path output, DistanceMeasure measure, int maxIterations, String delta) throws IOException, InterruptedException, ClassNotFoundException {
        boolean converged = false;
        for (int iteration = 1; !converged && iteration <= maxIterations; ++iteration) {
            log.info("K-Means Iteration {}", (Object)iteration);
            Path clustersOut = new Path(output, "clusters-" + iteration);
            converged = KMeansDriver.runIteration(conf, input, clustersIn, clustersOut, measure.getClass().getName(), delta);
            clustersIn = clustersOut;
        }
        return clustersIn;
    }

    private static boolean runIteration(Configuration conf, Path input, Path clustersIn, Path clustersOut, String measureClass, String convergenceDelta) throws IOException, InterruptedException, ClassNotFoundException {
        conf.set("org.apache.mahout.clustering.kmeans.path", clustersIn.toString());
        conf.set("org.apache.mahout.clustering.kmeans.measure", measureClass);
        conf.set("org.apache.mahout.clustering.kmeans.convergence", convergenceDelta);
        Job job = new Job(conf, "KMeans Driver running runIteration over clustersIn: " + clustersIn);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(ClusterObservations.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Cluster.class);
        job.setInputFormatClass(SequenceFileInputFormat.class);
        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        job.setMapperClass(KMeansMapper.class);
        job.setCombinerClass(KMeansCombiner.class);
        job.setReducerClass(KMeansReducer.class);
        FileInputFormat.addInputPath((Job)job, (Path)input);
        FileOutputFormat.setOutputPath((Job)job, (Path)clustersOut);
        job.setJarByClass(KMeansDriver.class);
        HadoopUtil.delete(conf, clustersOut);
        if (!job.waitForCompletion(true)) {
            throw new InterruptedException("K-Means Iteration failed processing " + clustersIn);
        }
        FileSystem fs = FileSystem.get((URI)clustersOut.toUri(), (Configuration)conf);
        return KMeansDriver.isConverged(clustersOut, conf, fs);
    }

    private static boolean isConverged(Path filePath, Configuration conf, FileSystem fs) throws IOException {
        for (FileStatus part : fs.listStatus(filePath, PathFilters.partFilter())) {
            SequenceFileValueIterator iterator = new SequenceFileValueIterator(part.getPath(), true, conf);
            while (iterator.hasNext()) {
                Cluster value = (Cluster)iterator.next();
                if (value.isConverged()) continue;
                iterator.close();
                return false;
            }
        }
        return true;
    }

    public static void clusterData(Configuration conf, Path input, Path clustersIn, Path output, DistanceMeasure measure, String convergenceDelta, boolean runSequential) throws IOException, InterruptedException, ClassNotFoundException {
        if (log.isInfoEnabled()) {
            log.info("Running Clustering");
            log.info("Input: {} Clusters In: {} Out: {} Distance: {}", new Object[]{input, clustersIn, output, measure});
            log.info("convergence: {} Input Vectors: {}", (Object)convergenceDelta, (Object)VectorWritable.class.getName());
        }
        if (runSequential) {
            KMeansDriver.clusterDataSeq(conf, input, clustersIn, output, measure);
        } else {
            KMeansDriver.clusterDataMR(conf, input, clustersIn, output, measure, convergenceDelta);
        }
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    private static void clusterDataSeq(Configuration conf, Path input, Path clustersIn, Path output, DistanceMeasure measure) throws IOException {
        KMeansClusterer clusterer = new KMeansClusterer(measure);
        ArrayList<Cluster> clusters = new ArrayList<Cluster>();
        KMeansUtil.configureWithClusterInfo(conf, clustersIn, clusters);
        if (clusters.isEmpty()) {
            throw new IllegalStateException("Clusters is empty!");
        }
        FileSystem fs = FileSystem.get((URI)input.toUri(), (Configuration)conf);
        FileStatus[] status = fs.listStatus(input, PathFilters.logsCRCFilter());
        int part = 0;
        for (FileStatus s : status) {
            SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, new Path(output, "part-m-" + part), IntWritable.class, WeightedVectorWritable.class);
            try {
                for (VectorWritable value : new SequenceFileValueIterable(s.getPath(), conf)) {
                    clusterer.emitPointToNearestCluster(value.get(), clusters, writer);
                }
            }
            finally {
                writer.close();
            }
        }
    }

    private static void clusterDataMR(Configuration conf, Path input, Path clustersIn, Path output, DistanceMeasure measure, String convergenceDelta) throws IOException, InterruptedException, ClassNotFoundException {
        conf.set("org.apache.mahout.clustering.kmeans.path", clustersIn.toString());
        conf.set("org.apache.mahout.clustering.kmeans.measure", measure.getClass().getName());
        conf.set("org.apache.mahout.clustering.kmeans.convergence", convergenceDelta);
        Job job = new Job(conf, "KMeans Driver running clusterData over input: " + input);
        job.setInputFormatClass(SequenceFileInputFormat.class);
        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        job.setOutputKeyClass(IntWritable.class);
        job.setOutputValueClass(WeightedVectorWritable.class);
        FileInputFormat.setInputPaths((Job)job, (Path[])new Path[]{input});
        HadoopUtil.delete(conf, output);
        FileOutputFormat.setOutputPath((Job)job, (Path)output);
        job.setMapperClass(KMeansClusterMapper.class);
        job.setNumReduceTasks(0);
        job.setJarByClass(KMeansDriver.class);
        if (!job.waitForCompletion(true)) {
            throw new InterruptedException("K-Means Clustering failed processing " + clustersIn);
        }
    }
}

