/*
 * Decompiled with CFR 0.152.
 */
package org.apache.mahout.clustering.fuzzykmeans;

import java.io.IOException;
import java.net.URI;
import java.util.ArrayList;
import org.apache.commons.cli2.Option;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.mahout.clustering.AbstractCluster;
import org.apache.mahout.clustering.ClusterObservations;
import org.apache.mahout.clustering.WeightedVectorWritable;
import org.apache.mahout.clustering.fuzzykmeans.FuzzyKMeansClusterMapper;
import org.apache.mahout.clustering.fuzzykmeans.FuzzyKMeansClusterer;
import org.apache.mahout.clustering.fuzzykmeans.FuzzyKMeansCombiner;
import org.apache.mahout.clustering.fuzzykmeans.FuzzyKMeansMapper;
import org.apache.mahout.clustering.fuzzykmeans.FuzzyKMeansReducer;
import org.apache.mahout.clustering.fuzzykmeans.FuzzyKMeansUtil;
import org.apache.mahout.clustering.fuzzykmeans.SoftCluster;
import org.apache.mahout.clustering.kmeans.RandomSeedGenerator;
import org.apache.mahout.common.AbstractJob;
import org.apache.mahout.common.HadoopUtil;
import org.apache.mahout.common.commandline.DefaultOptionCreator;
import org.apache.mahout.common.distance.DistanceMeasure;
import org.apache.mahout.common.distance.SquaredEuclideanDistanceMeasure;
import org.apache.mahout.common.iterator.sequencefile.PathFilters;
import org.apache.mahout.common.iterator.sequencefile.PathType;
import org.apache.mahout.common.iterator.sequencefile.SequenceFileDirValueIterable;
import org.apache.mahout.common.iterator.sequencefile.SequenceFileValueIterable;
import org.apache.mahout.common.iterator.sequencefile.SequenceFileValueIterator;
import org.apache.mahout.math.VectorWritable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class FuzzyKMeansDriver
extends AbstractJob {
    public static final String M_OPTION = "m";
    private static final Logger log = LoggerFactory.getLogger(FuzzyKMeansDriver.class);

    public static void main(String[] args) throws Exception {
        ToolRunner.run((Configuration)new Configuration(), (Tool)new FuzzyKMeansDriver(), (String[])args);
    }

    public int run(String[] args) throws Exception {
        this.addInputOption();
        this.addOutputOption();
        this.addOption((Option)DefaultOptionCreator.distanceMeasureOption().create());
        this.addOption((Option)DefaultOptionCreator.clustersInOption().withDescription("The input centroids, as Vectors.  Must be a SequenceFile of Writable, Cluster/Canopy.  If k is also specified, then a random set of vectors will be selected and written out to this path first").create());
        this.addOption((Option)DefaultOptionCreator.numClustersOption().withDescription("The k in k-Means.  If specified, then a random selection of k Vectors will be chosen as the Centroid and written to the clusters input path.").create());
        this.addOption((Option)DefaultOptionCreator.convergenceOption().create());
        this.addOption((Option)DefaultOptionCreator.maxIterationsOption().create());
        this.addOption((Option)DefaultOptionCreator.overwriteOption().create());
        this.addOption(M_OPTION, M_OPTION, "coefficient normalization factor, must be greater than 1", true);
        this.addOption((Option)DefaultOptionCreator.clusteringOption().create());
        this.addOption((Option)DefaultOptionCreator.emitMostLikelyOption().create());
        this.addOption((Option)DefaultOptionCreator.thresholdOption().create());
        this.addOption((Option)DefaultOptionCreator.methodOption().create());
        if (this.parseArguments(args) == null) {
            return -1;
        }
        Path input = this.getInputPath();
        Path clusters = new Path(this.getOption("clusters"));
        Path output = this.getOutputPath();
        String measureClass = this.getOption("distanceMeasure");
        if (measureClass == null) {
            measureClass = SquaredEuclideanDistanceMeasure.class.getName();
        }
        double convergenceDelta = Double.parseDouble(this.getOption("convergenceDelta"));
        float fuzziness = Float.parseFloat(this.getOption(M_OPTION));
        int maxIterations = Integer.parseInt(this.getOption("maxIter"));
        if (this.hasOption("overwrite")) {
            HadoopUtil.delete(this.getConf(), output);
        }
        boolean emitMostLikely = Boolean.parseBoolean(this.getOption("emitMostLikely"));
        double threshold = Double.parseDouble(this.getOption("threshold"));
        ClassLoader ccl = Thread.currentThread().getContextClassLoader();
        DistanceMeasure measure = ccl.loadClass(measureClass).asSubclass(DistanceMeasure.class).newInstance();
        if (this.hasOption("numClusters")) {
            clusters = RandomSeedGenerator.buildRandom(this.getConf(), input, clusters, Integer.parseInt(this.parseArguments(args).get("numClusters")), measure);
        }
        boolean runClustering = this.hasOption("clustering");
        boolean runSequential = this.getOption("method").equalsIgnoreCase("sequential");
        FuzzyKMeansDriver.run(this.getConf(), input, clusters, output, measure, convergenceDelta, maxIterations, fuzziness, runClustering, emitMostLikely, threshold, runSequential);
        return 0;
    }

    public static void run(Path input, Path clustersIn, Path output, DistanceMeasure measure, double convergenceDelta, int maxIterations, float m, boolean runClustering, boolean emitMostLikely, double threshold, boolean runSequential) throws IOException, ClassNotFoundException, InterruptedException {
        Path clustersOut = FuzzyKMeansDriver.buildClusters(new Configuration(), input, clustersIn, output, measure, convergenceDelta, maxIterations, m, runSequential);
        if (runClustering) {
            log.info("Clustering ");
            FuzzyKMeansDriver.clusterData(input, clustersOut, new Path(output, "clusteredPoints"), measure, convergenceDelta, m, emitMostLikely, threshold, runSequential);
        }
    }

    private static boolean runIteration(Configuration conf, Path input, Path clustersIn, Path clustersOut, String measureClass, double convergenceDelta, float m) throws IOException, InterruptedException, ClassNotFoundException {
        conf.set("org.apache.mahout.clustering.kmeans.path", clustersIn.toString());
        conf.set("org.apache.mahout.clustering.kmeans.measure", measureClass);
        conf.set("org.apache.mahout.clustering.kmeans.convergence", String.valueOf(convergenceDelta));
        conf.set("org.apache.mahout.clustering.fuzzykmeans.m", String.valueOf(m));
        conf.set("org.apache.mahout.clustering.fuzzykmeans.emitMostLikely", Boolean.toString(true));
        conf.set("org.apache.mahout.clustering.fuzzykmeans.threshold", Double.toString(0.0));
        Job job = new Job(conf, "FuzzyKMeans Driver running runIteration over clustersIn: " + clustersIn);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(ClusterObservations.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(SoftCluster.class);
        job.setInputFormatClass(SequenceFileInputFormat.class);
        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        job.setMapperClass(FuzzyKMeansMapper.class);
        job.setCombinerClass(FuzzyKMeansCombiner.class);
        job.setReducerClass(FuzzyKMeansReducer.class);
        job.setJarByClass(FuzzyKMeansDriver.class);
        FileInputFormat.addInputPath((Job)job, (Path)input);
        FileOutputFormat.setOutputPath((Job)job, (Path)clustersOut);
        if (!job.waitForCompletion(true)) {
            throw new InterruptedException("Fuzzy K-Means Iteration failed processing " + clustersIn);
        }
        FileSystem fs = FileSystem.get((URI)clustersOut.toUri(), (Configuration)conf);
        return FuzzyKMeansDriver.isConverged(clustersOut, conf, fs);
    }

    public static void run(Configuration conf, Path input, Path clustersIn, Path output, DistanceMeasure measure, double convergenceDelta, int maxIterations, float m, boolean runClustering, boolean emitMostLikely, double threshold, boolean runSequential) throws IOException, ClassNotFoundException, InterruptedException {
        Path clustersOut = FuzzyKMeansDriver.buildClusters(conf, input, clustersIn, output, measure, convergenceDelta, maxIterations, m, runSequential);
        if (runClustering) {
            log.info("Clustering");
            FuzzyKMeansDriver.clusterData(input, clustersOut, new Path(output, "clusteredPoints"), measure, convergenceDelta, m, emitMostLikely, threshold, runSequential);
        }
    }

    public static Path buildClusters(Configuration conf, Path input, Path clustersIn, Path output, DistanceMeasure measure, double convergenceDelta, int maxIterations, float m, boolean runSequential) throws IOException, InterruptedException, ClassNotFoundException {
        if (runSequential) {
            return FuzzyKMeansDriver.buildClustersSeq(input, clustersIn, output, measure, convergenceDelta, maxIterations, m);
        }
        return FuzzyKMeansDriver.buildClustersMR(conf, input, clustersIn, output, measure, convergenceDelta, maxIterations, m);
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    private static Path buildClustersSeq(Path input, Path clustersIn, Path output, DistanceMeasure measure, double convergenceDelta, int maxIterations, float m) throws IOException {
        FuzzyKMeansClusterer clusterer = new FuzzyKMeansClusterer(measure, convergenceDelta, m);
        ArrayList<SoftCluster> clusters = new ArrayList<SoftCluster>();
        FuzzyKMeansUtil.configureWithClusterInfo(clustersIn, clusters);
        if (clusters.isEmpty()) {
            throw new IllegalStateException("Clusters is empty!");
        }
        boolean converged = false;
        for (int iteration = 1; !converged && iteration <= maxIterations; ++iteration) {
            log.info("Fuzzy k-Means Iteration: " + iteration);
            Configuration conf = new Configuration();
            FileSystem fs = FileSystem.get((URI)input.toUri(), (Configuration)conf);
            for (VectorWritable value : new SequenceFileDirValueIterable(input, PathType.LIST, PathFilters.logsCRCFilter(), conf)) {
                clusterer.addPointToClusters(clusters, value.get());
            }
            converged = clusterer.testConvergence(clusters);
            Path clustersOut = new Path(output, "clusters-" + iteration);
            SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, new Path(clustersOut, "part-r-00000"), Text.class, SoftCluster.class);
            try {
                for (SoftCluster cluster : clusters) {
                    log.debug("Writing Cluster:{} center:{} numPoints:{} radius:{} to: {}", new Object[]{cluster.getId(), AbstractCluster.formatVector(cluster.getCenter(), null), cluster.getNumPoints(), AbstractCluster.formatVector(cluster.getRadius(), null), clustersOut.getName()});
                    writer.append((Writable)new Text(cluster.getIdentifier()), (Writable)cluster);
                }
            }
            finally {
                writer.close();
            }
            clustersIn = clustersOut;
        }
        return clustersIn;
    }

    private static Path buildClustersMR(Configuration conf, Path input, Path clustersIn, Path output, DistanceMeasure measure, double convergenceDelta, int maxIterations, float m) throws IOException, InterruptedException, ClassNotFoundException {
        boolean converged = false;
        for (int iteration = 1; !converged && iteration <= maxIterations; ++iteration) {
            log.info("Fuzzy K-Means Iteration {}", (Object)iteration);
            Path clustersOut = new Path(output, "clusters-" + iteration);
            converged = FuzzyKMeansDriver.runIteration(conf, input, clustersIn, clustersOut, measure.getClass().getName(), convergenceDelta, m);
            clustersIn = clustersOut;
        }
        return clustersIn;
    }

    public static void clusterData(Path input, Path clustersIn, Path output, DistanceMeasure measure, double convergenceDelta, float m, boolean emitMostLikely, double threshold, boolean runSequential) throws IOException, ClassNotFoundException, InterruptedException {
        if (runSequential) {
            FuzzyKMeansDriver.clusterDataSeq(input, clustersIn, output, measure, convergenceDelta, m);
        } else {
            FuzzyKMeansDriver.clusterDataMR(input, clustersIn, output, measure, convergenceDelta, m, emitMostLikely, threshold);
        }
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    private static void clusterDataSeq(Path input, Path clustersIn, Path output, DistanceMeasure measure, double convergenceDelta, float m) throws IOException {
        FuzzyKMeansClusterer clusterer = new FuzzyKMeansClusterer(measure, convergenceDelta, m);
        ArrayList<SoftCluster> clusters = new ArrayList<SoftCluster>();
        FuzzyKMeansUtil.configureWithClusterInfo(clustersIn, clusters);
        if (clusters.isEmpty()) {
            throw new IllegalStateException("Clusters is empty!");
        }
        Configuration conf = new Configuration();
        FileSystem fs = FileSystem.get((URI)input.toUri(), (Configuration)conf);
        FileStatus[] status = fs.listStatus(input, PathFilters.logsCRCFilter());
        int part = 0;
        for (FileStatus s : status) {
            SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, new Path(output, "part-m-" + part), IntWritable.class, WeightedVectorWritable.class);
            try {
                for (VectorWritable value : new SequenceFileValueIterable(s.getPath(), conf)) {
                    clusterer.emitPointToClusters(value, clusters, writer);
                }
            }
            finally {
                writer.close();
            }
        }
    }

    private static void clusterDataMR(Path input, Path clustersIn, Path output, DistanceMeasure measure, double convergenceDelta, float m, boolean emitMostLikely, double threshold) throws IOException, InterruptedException, ClassNotFoundException {
        Configuration conf = new Configuration();
        conf.set("org.apache.mahout.clustering.kmeans.path", clustersIn.toString());
        conf.set("org.apache.mahout.clustering.kmeans.measure", measure.getClass().getName());
        conf.set("org.apache.mahout.clustering.kmeans.convergence", String.valueOf(convergenceDelta));
        conf.set("org.apache.mahout.clustering.fuzzykmeans.m", String.valueOf(m));
        conf.set("org.apache.mahout.clustering.fuzzykmeans.emitMostLikely", Boolean.toString(emitMostLikely));
        conf.set("org.apache.mahout.clustering.fuzzykmeans.threshold", Double.toString(threshold));
        output.getFileSystem(conf).delete(output, true);
        Job job = new Job(conf, "FuzzyKMeans Driver running clusterData over input: " + input);
        job.setOutputKeyClass(IntWritable.class);
        job.setOutputValueClass(WeightedVectorWritable.class);
        FileInputFormat.setInputPaths((Job)job, (Path[])new Path[]{input});
        FileOutputFormat.setOutputPath((Job)job, (Path)output);
        job.setMapperClass(FuzzyKMeansClusterMapper.class);
        job.setInputFormatClass(SequenceFileInputFormat.class);
        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        job.setNumReduceTasks(0);
        job.setJarByClass(FuzzyKMeansDriver.class);
        if (!job.waitForCompletion(true)) {
            throw new InterruptedException("Fuzzy K-Means Clustering failed processing " + clustersIn);
        }
    }

    private static boolean isConverged(Path filePath, Configuration conf, FileSystem fs) throws IOException {
        FileStatus[] matches;
        Path clusterPath = new Path(filePath, "*");
        ArrayList<Path> result = new ArrayList<Path>();
        for (FileStatus match : matches = fs.listStatus(FileUtil.stat2Paths((FileStatus[])fs.globStatus(clusterPath, PathFilters.partFilter())), PathFilters.partFilter())) {
            result.add(fs.makeQualified(match.getPath()));
        }
        boolean converged = true;
        for (Path path : result) {
            SequenceFileValueIterator iterator = new SequenceFileValueIterator(path, true, conf);
            while (converged && iterator.hasNext()) {
                converged = ((SoftCluster)iterator.next()).isConverged();
            }
            iterator.close();
        }
        return converged;
    }
}

