/*
 * Decompiled with CFR 0.152.
 */
package org.apache.mahout.math.hadoop.decomposer;

import java.io.IOException;
import java.util.Map;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.mahout.common.AbstractJob;
import org.apache.mahout.math.DenseVector;
import org.apache.mahout.math.NamedVector;
import org.apache.mahout.math.Vector;
import org.apache.mahout.math.VectorIterable;
import org.apache.mahout.math.VectorWritable;
import org.apache.mahout.math.decomposer.lanczos.LanczosSolver;
import org.apache.mahout.math.decomposer.lanczos.LanczosState;
import org.apache.mahout.math.hadoop.DistributedRowMatrix;
import org.apache.mahout.math.hadoop.decomposer.EigenVerificationJob;
import org.apache.mahout.math.hadoop.decomposer.HdfsBackedLanczosState;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class DistributedLanczosSolver
extends LanczosSolver
implements Tool {
    public static final String RAW_EIGENVECTORS = "rawEigenvectors";
    private static final Logger log = LoggerFactory.getLogger(DistributedLanczosSolver.class);
    private Configuration conf;
    private Map<String, String> parsedArgs;

    public Vector getInitialVector(VectorIterable corpus) {
        DenseVector initialVector = new DenseVector(corpus.numCols());
        initialVector.assign(1.0 / Math.sqrt(corpus.numCols()));
        return initialVector;
    }

    public LanczosState runJob(Configuration originalConfig, LanczosState state, int desiredRank, boolean isSymmetric, String outputEigenVectorPathString) throws IOException {
        ((DistributedRowMatrix)state.getCorpus()).setConf(new Configuration(originalConfig));
        this.setConf(originalConfig);
        this.solve(state, desiredRank, isSymmetric);
        this.serializeOutput(state, new Path(outputEigenVectorPathString));
        return state;
    }

    public LanczosState runJob(Configuration originalConfig, Path inputPath, Path outputTmpPath, int numRows, int numCols, boolean isSymmetric, int desiredRank, String outputEigenVectorPathString) throws IOException {
        DistributedRowMatrix matrix = new DistributedRowMatrix(inputPath, outputTmpPath, numRows, numCols);
        matrix.setConf(new Configuration(originalConfig));
        LanczosState state = new LanczosState((VectorIterable)matrix, numCols, desiredRank, this.getInitialVector(matrix));
        return this.runJob(originalConfig, state, desiredRank, isSymmetric, outputEigenVectorPathString);
    }

    public void runJob(Configuration originalConfig, LanczosState state, int numCols, int desiredRank, boolean isSymmetric, String outputEigenVectorPathString) throws IOException {
        this.setConf(originalConfig);
        this.solve(state, desiredRank, isSymmetric);
        this.serializeOutput(state, new Path(outputEigenVectorPathString));
    }

    public int run(String[] strings) throws Exception {
        Path inputPath = new Path(this.parsedArgs.get("--input"));
        Path outputPath = new Path(this.parsedArgs.get("--output"));
        Path outputTmpPath = new Path(this.parsedArgs.get("--tempDir"));
        Path workingDirPath = this.parsedArgs.get("--workingDir") != null ? new Path(this.parsedArgs.get("--workingDir")) : null;
        int numRows = Integer.parseInt(this.parsedArgs.get("--numRows"));
        int numCols = Integer.parseInt(this.parsedArgs.get("--numCols"));
        boolean isSymmetric = Boolean.parseBoolean(this.parsedArgs.get("--symmetric"));
        int desiredRank = Integer.parseInt(this.parsedArgs.get("--rank"));
        boolean cleansvd = Boolean.parseBoolean(this.parsedArgs.get("--cleansvd"));
        if (cleansvd) {
            double maxError = Double.parseDouble(this.parsedArgs.get("--maxError"));
            double minEigenvalue = Double.parseDouble(this.parsedArgs.get("--minEigenvalue"));
            boolean inMemory = Boolean.parseBoolean(this.parsedArgs.get("--inMemory"));
            return this.run(inputPath, outputPath, outputTmpPath, workingDirPath, numRows, numCols, isSymmetric, desiredRank, maxError, minEigenvalue, inMemory);
        }
        return this.run(inputPath, outputPath, outputTmpPath, workingDirPath, numRows, numCols, isSymmetric, desiredRank);
    }

    public int run(Path inputPath, Path outputPath, Path outputTmpPath, Path workingDirPath, int numRows, int numCols, boolean isSymmetric, int desiredRank, double maxError, double minEigenvalue, boolean inMemory) throws Exception {
        int result = this.run(inputPath, outputPath, outputTmpPath, workingDirPath, numRows, numCols, isSymmetric, desiredRank);
        if (result != 0) {
            return result;
        }
        Path rawEigenVectorPath = new Path(outputPath, RAW_EIGENVECTORS);
        return new EigenVerificationJob().run(inputPath, rawEigenVectorPath, outputPath, outputTmpPath, maxError, minEigenvalue, inMemory, this.getConf() != null ? new Configuration(this.getConf()) : new Configuration());
    }

    public int run(Path inputPath, Path outputPath, Path outputTmpPath, Path workingDirPath, int numRows, int numCols, boolean isSymmetric, int desiredRank) throws Exception {
        LanczosState state;
        DistributedRowMatrix matrix = new DistributedRowMatrix(inputPath, outputTmpPath, numRows, numCols);
        matrix.setConf(new Configuration(this.getConf() != null ? this.getConf() : new Configuration()));
        if (workingDirPath == null) {
            state = new LanczosState((VectorIterable)matrix, numCols, desiredRank, this.getInitialVector(matrix));
        } else {
            HdfsBackedLanczosState hState = new HdfsBackedLanczosState(matrix, numCols, desiredRank, this.getInitialVector(matrix), workingDirPath);
            hState.setConf(matrix.getConf());
            state = hState;
        }
        this.solve(state, desiredRank, isSymmetric);
        Path outputEigenVectorPath = new Path(outputPath, RAW_EIGENVECTORS);
        this.serializeOutput(state, outputEigenVectorPath);
        return 0;
    }

    public void serializeOutput(LanczosState state, Path outputPath) throws IOException {
        int numEigenVectors = state.getIterationNumber();
        log.info("Persisting {} eigenVectors and eigenValues to: {}", (Object)numEigenVectors, (Object)outputPath);
        Configuration conf = this.getConf() != null ? this.getConf() : new Configuration();
        FileSystem fs = FileSystem.get((Configuration)conf);
        SequenceFile.Writer seqWriter = new SequenceFile.Writer(fs, conf, outputPath, IntWritable.class, VectorWritable.class);
        IntWritable iw = new IntWritable();
        for (int i = 0; i < numEigenVectors; ++i) {
            NamedVector v = new NamedVector(state.getRightSingularVector(numEigenVectors - 1 - i), "eigenVector" + i + ", eigenvalue = " + state.getSingularValue(numEigenVectors - 1 - i));
            VectorWritable vw = new VectorWritable((Vector)v);
            iw.set(i);
            seqWriter.append((Writable)iw, (Writable)vw);
        }
        seqWriter.close();
    }

    public void setConf(Configuration configuration) {
        this.conf = configuration;
    }

    public Configuration getConf() {
        return this.conf;
    }

    public DistributedLanczosSolverJob job() {
        return new DistributedLanczosSolverJob();
    }

    public static void main(String[] args) throws Exception {
        ToolRunner.run((Tool)new DistributedLanczosSolver().job(), (String[])args);
    }

    public class DistributedLanczosSolverJob
    extends AbstractJob {
        public void setConf(Configuration conf) {
            DistributedLanczosSolver.this.setConf(conf);
        }

        public Configuration getConf() {
            return DistributedLanczosSolver.this.getConf();
        }

        public int run(String[] args) throws Exception {
            this.addInputOption();
            this.addOutputOption();
            this.addOption("numRows", "nr", "Number of rows of the input matrix");
            this.addOption("numCols", "nc", "Number of columns of the input matrix");
            this.addOption("rank", "r", "Desired decomposition rank (note: only roughly 1/4 to 1/3 of these will have the top portion of the spectrum)");
            this.addOption("symmetric", "sym", "Is the input matrix square and symmetric?");
            this.addOption("workingDir", "wd", "Working directory path to store Lanczos basis vectors (to be used on restarts, and to avoid too much RAM usage)");
            this.addOption("cleansvd", "cl", "Run the EigenVerificationJob to clean the eigenvectors after SVD", false);
            this.addOption("maxError", "err", "Maximum acceptable error", "0.05");
            this.addOption("minEigenvalue", "mev", "Minimum eigenvalue to keep the vector for", "0.0");
            this.addOption("inMemory", "mem", "Buffer eigen matrix into memory (if you have enough!)", "false");
            DistributedLanczosSolver.this.parsedArgs = this.parseArguments(args);
            if (DistributedLanczosSolver.this.parsedArgs == null) {
                return -1;
            }
            return DistributedLanczosSolver.this.run(args);
        }
    }
}

