/*
 * Decompiled with CFR 0.152.
 */
package org.apache.mahout.clustering.spectral.kmeans;

import java.io.IOException;
import java.util.Map;
import org.apache.commons.cli2.Option;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.mahout.clustering.kmeans.KMeansDriver;
import org.apache.mahout.clustering.kmeans.RandomSeedGenerator;
import org.apache.mahout.clustering.spectral.common.AffinityMatrixInputJob;
import org.apache.mahout.clustering.spectral.common.MatrixDiagonalizeJob;
import org.apache.mahout.clustering.spectral.common.UnitVectorizerJob;
import org.apache.mahout.clustering.spectral.common.VectorMatrixMultiplicationJob;
import org.apache.mahout.common.AbstractJob;
import org.apache.mahout.common.HadoopUtil;
import org.apache.mahout.common.Pair;
import org.apache.mahout.common.commandline.DefaultOptionCreator;
import org.apache.mahout.common.distance.DistanceMeasure;
import org.apache.mahout.common.iterator.sequencefile.SequenceFileIterable;
import org.apache.mahout.math.Vector;
import org.apache.mahout.math.VectorIterable;
import org.apache.mahout.math.decomposer.lanczos.LanczosState;
import org.apache.mahout.math.hadoop.DistributedRowMatrix;
import org.apache.mahout.math.hadoop.decomposer.DistributedLanczosSolver;
import org.apache.mahout.math.hadoop.decomposer.EigenVerificationJob;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class SpectralKMeansDriver
extends AbstractJob {
    private static final Logger log = LoggerFactory.getLogger(SpectralKMeansDriver.class);
    public static final double OVERSHOOT_MULTIPLIER = 2.0;

    public static void main(String[] args) throws Exception {
        ToolRunner.run((Tool)new SpectralKMeansDriver(), (String[])args);
    }

    public int run(String[] arg0) throws IOException, ClassNotFoundException, InstantiationException, IllegalAccessException, InterruptedException {
        Configuration conf = this.getConf();
        this.addInputOption();
        this.addOutputOption();
        this.addOption("dimensions", "d", "Square dimensions of affinity matrix", true);
        this.addOption("clusters", "k", "Number of clusters and top eigenvectors", true);
        this.addOption((Option)DefaultOptionCreator.distanceMeasureOption().create());
        this.addOption((Option)DefaultOptionCreator.convergenceOption().create());
        this.addOption((Option)DefaultOptionCreator.maxIterationsOption().create());
        this.addOption((Option)DefaultOptionCreator.overwriteOption().create());
        Map<String, String> parsedArgs = this.parseArguments(arg0);
        if (parsedArgs == null) {
            return 0;
        }
        Path input = this.getInputPath();
        Path output = this.getOutputPath();
        if (this.hasOption("overwrite")) {
            HadoopUtil.delete(conf, output);
        }
        int numDims = Integer.parseInt(parsedArgs.get("--dimensions"));
        int clusters = Integer.parseInt(parsedArgs.get("--clusters"));
        String measureClass = this.getOption("distanceMeasure");
        ClassLoader ccl = Thread.currentThread().getContextClassLoader();
        DistanceMeasure measure = ccl.loadClass(measureClass).asSubclass(DistanceMeasure.class).newInstance();
        double convergenceDelta = Double.parseDouble(this.getOption("convergenceDelta"));
        int maxIterations = Integer.parseInt(this.getOption("maxIter"));
        SpectralKMeansDriver.run(conf, input, output, numDims, clusters, measure, convergenceDelta, maxIterations);
        return 0;
    }

    public static void run(Configuration conf, Path input, Path output, int numDims, int clusters, DistanceMeasure measure, double convergenceDelta, int maxIterations) throws IOException, InterruptedException, ClassNotFoundException {
        Path outputCalc = new Path(output, "calculations");
        Path outputTmp = new Path(output, "temporary");
        Path affSeqFiles = new Path(outputCalc, "seqfile-" + (System.nanoTime() & 0xFFL));
        AffinityMatrixInputJob.runJob(input, affSeqFiles, numDims, numDims);
        DistributedRowMatrix A = new DistributedRowMatrix(affSeqFiles, new Path(outputTmp, "afftmp-" + (System.nanoTime() & 0xFFL)), numDims, numDims);
        Configuration depConf = new Configuration(conf);
        A.setConf(depConf);
        Vector D = MatrixDiagonalizeJob.runJob(affSeqFiles, numDims);
        DistributedRowMatrix L = VectorMatrixMultiplicationJob.runJob(affSeqFiles, D, new Path(outputCalc, "laplacian-" + (System.nanoTime() & 0xFFL)));
        L.setConf(depConf);
        int overshoot = (int)((double)clusters * 2.0);
        DistributedLanczosSolver solver = new DistributedLanczosSolver();
        LanczosState state = new LanczosState((VectorIterable)L, overshoot, numDims, solver.getInitialVector(L));
        Path lanczosSeqFiles = new Path(outputCalc, "eigenvectors-" + (System.nanoTime() & 0xFFL));
        solver.runJob(conf, state, overshoot, true, lanczosSeqFiles.toString());
        EigenVerificationJob verifier = new EigenVerificationJob();
        Path verifiedEigensPath = new Path(outputCalc, "eigenverifier");
        verifier.runJob(conf, lanczosSeqFiles, L.getRowPath(), verifiedEigensPath, true, 1.0, 0.0, clusters);
        Path cleanedEigens = verifier.getCleanedEigensPath();
        DistributedRowMatrix W = new DistributedRowMatrix(cleanedEigens, new Path(cleanedEigens, "tmp"), clusters, numDims);
        W.setConf(depConf);
        DistributedRowMatrix Wtrans = W.transpose();
        Path unitVectors = new Path(outputCalc, "unitvectors-" + (System.nanoTime() & 0xFFL));
        UnitVectorizerJob.runJob(Wtrans.getRowPath(), unitVectors);
        DistributedRowMatrix Wt = new DistributedRowMatrix(unitVectors, new Path(unitVectors, "tmp"), clusters, numDims);
        Wt.setConf(depConf);
        Path initialclusters = RandomSeedGenerator.buildRandom(conf, Wt.getRowPath(), new Path(output, "clusters-0"), clusters, measure);
        KMeansDriver.run(conf, Wt.getRowPath(), initialclusters, output, measure, convergenceDelta, maxIterations, true, false);
        Path clusteredPointsPath = new Path(output, "clusteredPoints");
        Path inputPath = new Path(clusteredPointsPath, "part-m-00000");
        int id = 0;
        for (Pair record : new SequenceFileIterable(inputPath, conf)) {
            log.info("{}: {}", (Object)id++, (Object)((IntWritable)record.getFirst()).get());
        }
    }
}

