/*
 * Decompiled with CFR 0.152.
 */
package org.apache.mahout.clustering.canopy;

import java.io.IOException;
import java.net.URI;
import java.util.ArrayList;
import org.apache.commons.cli2.Option;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.mahout.clustering.AbstractCluster;
import org.apache.mahout.clustering.WeightedVectorWritable;
import org.apache.mahout.clustering.canopy.Canopy;
import org.apache.mahout.clustering.canopy.CanopyClusterer;
import org.apache.mahout.clustering.canopy.CanopyMapper;
import org.apache.mahout.clustering.canopy.CanopyReducer;
import org.apache.mahout.clustering.canopy.ClusterMapper;
import org.apache.mahout.common.AbstractJob;
import org.apache.mahout.common.HadoopUtil;
import org.apache.mahout.common.commandline.DefaultOptionCreator;
import org.apache.mahout.common.distance.DistanceMeasure;
import org.apache.mahout.common.iterator.sequencefile.PathFilters;
import org.apache.mahout.common.iterator.sequencefile.PathType;
import org.apache.mahout.common.iterator.sequencefile.SequenceFileDirValueIterable;
import org.apache.mahout.math.VectorWritable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class CanopyDriver
extends AbstractJob {
    public static final String DEFAULT_CLUSTERED_POINTS_DIRECTORY = "clusteredPoints";
    private static final Logger log = LoggerFactory.getLogger(CanopyDriver.class);

    public static void main(String[] args) throws Exception {
        ToolRunner.run((Configuration)new Configuration(), (Tool)new CanopyDriver(), (String[])args);
    }

    public int run(String[] args) throws Exception {
        this.addInputOption();
        this.addOutputOption();
        this.addOption((Option)DefaultOptionCreator.distanceMeasureOption().create());
        this.addOption((Option)DefaultOptionCreator.t1Option().create());
        this.addOption((Option)DefaultOptionCreator.t2Option().create());
        this.addOption((Option)DefaultOptionCreator.t3Option().create());
        this.addOption((Option)DefaultOptionCreator.t4Option().create());
        this.addOption((Option)DefaultOptionCreator.overwriteOption().create());
        this.addOption((Option)DefaultOptionCreator.clusteringOption().create());
        this.addOption((Option)DefaultOptionCreator.methodOption().create());
        if (this.parseArguments(args) == null) {
            return -1;
        }
        Path input = this.getInputPath();
        Path output = this.getOutputPath();
        Configuration conf = this.getConf();
        if (this.hasOption("overwrite")) {
            HadoopUtil.delete(conf, output);
        }
        String measureClass = this.getOption("distanceMeasure");
        double t1 = Double.parseDouble(this.getOption("t1"));
        double t2 = Double.parseDouble(this.getOption("t2"));
        double t3 = t1;
        if (this.hasOption("t3")) {
            t3 = Double.parseDouble(this.getOption("t3"));
        }
        double t4 = t2;
        if (this.hasOption("t4")) {
            t4 = Double.parseDouble(this.getOption("t4"));
        }
        boolean runClustering = this.hasOption("clustering");
        boolean runSequential = this.getOption("method").equalsIgnoreCase("sequential");
        ClassLoader ccl = Thread.currentThread().getContextClassLoader();
        DistanceMeasure measure = ccl.loadClass(measureClass).asSubclass(DistanceMeasure.class).newInstance();
        CanopyDriver.run(conf, input, output, measure, t1, t2, t3, t4, runClustering, runSequential);
        return 0;
    }

    public static void run(Configuration conf, Path input, Path output, DistanceMeasure measure, double t1, double t2, double t3, double t4, boolean runClustering, boolean runSequential) throws IOException, InterruptedException, ClassNotFoundException, InstantiationException, IllegalAccessException {
        Path clustersOut = CanopyDriver.buildClusters(conf, input, output, measure, t1, t2, t3, t4, runSequential);
        if (runClustering) {
            CanopyDriver.clusterData(conf, input, clustersOut, output, measure, t1, t2, runSequential);
        }
    }

    public static void run(Configuration conf, Path input, Path output, DistanceMeasure measure, double t1, double t2, boolean runClustering, boolean runSequential) throws IOException, InterruptedException, ClassNotFoundException, InstantiationException, IllegalAccessException {
        CanopyDriver.run(conf, input, output, measure, t1, t2, t1, t2, runClustering, runSequential);
    }

    public static void run(Path input, Path output, DistanceMeasure measure, double t1, double t2, boolean runClustering, boolean runSequential) throws IOException, InterruptedException, ClassNotFoundException, InstantiationException, IllegalAccessException {
        CanopyDriver.run(new Configuration(), input, output, measure, t1, t2, runClustering, runSequential);
    }

    public static Path buildClusters(Configuration conf, Path input, Path output, DistanceMeasure measure, double t1, double t2, boolean runSequential) throws IOException, InterruptedException, ClassNotFoundException {
        return CanopyDriver.buildClusters(conf, input, output, measure, t1, t2, t1, t2, runSequential);
    }

    public static Path buildClusters(Configuration conf, Path input, Path output, DistanceMeasure measure, double t1, double t2, double t3, double t4, boolean runSequential) throws IOException, InterruptedException, ClassNotFoundException {
        log.info("Build Clusters Input: {} Out: {} Measure: {} t1: {} t2: {}", new Object[]{input, output, measure, t1, t2});
        if (runSequential) {
            return CanopyDriver.buildClustersSeq(input, output, measure, t1, t2);
        }
        return CanopyDriver.buildClustersMR(conf, input, output, measure, t1, t2, t3, t4);
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    private static Path buildClustersSeq(Path input, Path output, DistanceMeasure measure, double t1, double t2) throws IOException {
        CanopyClusterer clusterer = new CanopyClusterer(measure, t1, t2);
        ArrayList<Canopy> canopies = new ArrayList<Canopy>();
        Configuration conf = new Configuration();
        FileSystem fs = FileSystem.get((URI)input.toUri(), (Configuration)conf);
        for (VectorWritable vw : new SequenceFileDirValueIterable(input, PathType.LIST, PathFilters.logsCRCFilter(), conf)) {
            clusterer.addPointToCanopies(vw.get(), canopies);
        }
        Path canopyOutputDir = new Path(output, "clusters-0");
        Path path = new Path(canopyOutputDir, "part-r-00000");
        SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, path, Text.class, Canopy.class);
        try {
            for (Canopy canopy : canopies) {
                canopy.computeParameters();
                log.debug("Writing Canopy:{} center:{} numPoints:{} radius:{}", new Object[]{canopy.getIdentifier(), AbstractCluster.formatVector(canopy.getCenter(), null), canopy.getNumPoints(), AbstractCluster.formatVector(canopy.getRadius(), null)});
                writer.append((Writable)new Text(canopy.getIdentifier()), (Writable)canopy);
            }
        }
        finally {
            writer.close();
        }
        return canopyOutputDir;
    }

    private static Path buildClustersMR(Configuration conf, Path input, Path output, DistanceMeasure measure, double t1, double t2, double t3, double t4) throws IOException, InterruptedException, ClassNotFoundException {
        conf.set("org.apache.mahout.clustering.canopy.measure", measure.getClass().getName());
        conf.set("org.apache.mahout.clustering.canopy.t1", String.valueOf(t1));
        conf.set("org.apache.mahout.clustering.canopy.t2", String.valueOf(t2));
        conf.set("org.apache.mahout.clustering.canopy.t3", String.valueOf(t3));
        conf.set("org.apache.mahout.clustering.canopy.t4", String.valueOf(t4));
        Job job = new Job(conf, "Canopy Driver running buildClusters over input: " + input);
        job.setInputFormatClass(SequenceFileInputFormat.class);
        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        job.setMapperClass(CanopyMapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(VectorWritable.class);
        job.setReducerClass(CanopyReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Canopy.class);
        job.setNumReduceTasks(1);
        job.setJarByClass(CanopyDriver.class);
        FileInputFormat.addInputPath((Job)job, (Path)input);
        Path canopyOutputDir = new Path(output, "clusters-0");
        FileOutputFormat.setOutputPath((Job)job, (Path)canopyOutputDir);
        if (!job.waitForCompletion(true)) {
            throw new InterruptedException("Canopy Job failed processing " + input);
        }
        return canopyOutputDir;
    }

    public static void clusterData(Configuration conf, Path points, Path canopies, Path output, DistanceMeasure measure, double t1, double t2, boolean runSequential) throws InstantiationException, IllegalAccessException, IOException, InterruptedException, ClassNotFoundException {
        if (runSequential) {
            CanopyDriver.clusterDataSeq(points, canopies, output, measure, t1, t2);
        } else {
            CanopyDriver.clusterDataMR(conf, points, canopies, output, measure, t1, t2);
        }
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    private static void clusterDataSeq(Path points, Path canopies, Path output, DistanceMeasure measure, double t1, double t2) throws InstantiationException, IllegalAccessException, IOException {
        CanopyClusterer clusterer = new CanopyClusterer(measure, t1, t2);
        ArrayList<Canopy> clusters = new ArrayList<Canopy>();
        Configuration conf = new Configuration();
        for (Canopy value : new SequenceFileDirValueIterable(canopies, PathType.LIST, PathFilters.logsCRCFilter(), conf)) {
            clusters.add(value);
        }
        FileSystem fs = FileSystem.get((URI)points.toUri(), (Configuration)conf);
        FileStatus[] status = fs.listStatus(points, PathFilters.logsCRCFilter());
        Path outPath = new Path(output, DEFAULT_CLUSTERED_POINTS_DIRECTORY);
        int part = 0;
        for (FileStatus s : status) {
            SequenceFile.Reader reader = new SequenceFile.Reader(fs, s.getPath(), conf);
            SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, new Path(outPath, "part-m-" + part), IntWritable.class, WeightedVectorWritable.class);
            try {
                Writable key = reader.getKeyClass().asSubclass(Writable.class).newInstance();
                VectorWritable vw = reader.getValueClass().asSubclass(VectorWritable.class).newInstance();
                while (reader.next(key, (Writable)vw)) {
                    Canopy closest = clusterer.findClosestCanopy(vw.get(), clusters);
                    writer.append((Writable)new IntWritable(closest.getId()), (Writable)new WeightedVectorWritable(1.0, vw.get()));
                    vw = reader.getValueClass().asSubclass(VectorWritable.class).newInstance();
                }
            }
            finally {
                reader.close();
                writer.close();
            }
        }
    }

    private static void clusterDataMR(Configuration conf, Path points, Path canopies, Path output, DistanceMeasure measure, double t1, double t2) throws IOException, InterruptedException, ClassNotFoundException {
        conf.set("org.apache.mahout.clustering.canopy.measure", measure.getClass().getName());
        conf.set("org.apache.mahout.clustering.canopy.t1", String.valueOf(t1));
        conf.set("org.apache.mahout.clustering.canopy.t2", String.valueOf(t2));
        conf.set("org.apache.mahout.clustering.canopy.path", canopies.toString());
        Job job = new Job(conf, "Canopy Driver running clusterData over input: " + points);
        job.setInputFormatClass(SequenceFileInputFormat.class);
        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        job.setMapperClass(ClusterMapper.class);
        job.setOutputKeyClass(IntWritable.class);
        job.setOutputValueClass(WeightedVectorWritable.class);
        job.setNumReduceTasks(0);
        job.setJarByClass(CanopyDriver.class);
        FileInputFormat.addInputPath((Job)job, (Path)points);
        Path outPath = new Path(output, DEFAULT_CLUSTERED_POINTS_DIRECTORY);
        FileOutputFormat.setOutputPath((Job)job, (Path)outPath);
        HadoopUtil.delete(conf, outPath);
        if (!job.waitForCompletion(true)) {
            throw new InterruptedException("Canopy Clustering failed processing " + canopies);
        }
    }
}

