/*
 * Decompiled with CFR 0.152.
 */
package org.apache.mahout.math.hadoop.similarity;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.Map;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
import org.apache.hadoop.mapreduce.lib.partition.HashPartitioner;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.mahout.common.AbstractJob;
import org.apache.mahout.math.RandomAccessSparseVector;
import org.apache.mahout.math.SequentialAccessSparseVector;
import org.apache.mahout.math.VarIntWritable;
import org.apache.mahout.math.Vector;
import org.apache.mahout.math.VectorWritable;
import org.apache.mahout.math.hadoop.DistributedRowMatrix;
import org.apache.mahout.math.hadoop.similarity.Cooccurrence;
import org.apache.mahout.math.hadoop.similarity.SimilarityMatrixEntryKey;
import org.apache.mahout.math.hadoop.similarity.SimilarityType;
import org.apache.mahout.math.hadoop.similarity.WeightedOccurrence;
import org.apache.mahout.math.hadoop.similarity.WeightedOccurrenceArray;
import org.apache.mahout.math.hadoop.similarity.WeightedRowPair;
import org.apache.mahout.math.hadoop.similarity.vector.DistributedVectorSimilarity;

public class RowSimilarityJob
extends AbstractJob {
    public static final String DISTRIBUTED_SIMILARITY_CLASSNAME = RowSimilarityJob.class.getName() + ".distributedSimilarityClassname";
    public static final String NUMBER_OF_COLUMNS = RowSimilarityJob.class.getName() + ".numberOfColumns";
    public static final String MAX_SIMILARITIES_PER_ROW = RowSimilarityJob.class.getName() + ".maxSimilaritiesPerRow";
    private static final int DEFAULT_MAX_SIMILARITIES_PER_ROW = 100;

    public static void main(String[] args) throws Exception {
        ToolRunner.run((Tool)new RowSimilarityJob(), (String[])args);
    }

    public int run(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        String distributedSimilarityClassname;
        this.addInputOption();
        this.addOutputOption();
        this.addOption("numberOfColumns", "r", "Number of columns in the input matrix");
        this.addOption("similarityClassname", "s", "Name of distributed similarity class to instantiate, alternatively use one of the predefined similarities (" + SimilarityType.listEnumNames() + ')');
        this.addOption("maxSimilaritiesPerRow", "m", "Number of maximum similarities per row (default: 100)", String.valueOf(100));
        Map<String, String> parsedArgs = this.parseArguments(args);
        if (parsedArgs == null) {
            return -1;
        }
        int numberOfColumns = Integer.parseInt(parsedArgs.get("--numberOfColumns"));
        String similarityClassnameArg = parsedArgs.get("--similarityClassname");
        try {
            distributedSimilarityClassname = SimilarityType.valueOf(similarityClassnameArg).getSimilarityImplementationClassName();
        }
        catch (IllegalArgumentException iae) {
            distributedSimilarityClassname = similarityClassnameArg;
        }
        int maxSimilaritiesPerRow = Integer.parseInt(parsedArgs.get("--maxSimilaritiesPerRow"));
        Path inputPath = this.getInputPath();
        Path outputPath = this.getOutputPath();
        Path tempDirPath = new Path(parsedArgs.get("--tempDir"));
        Path weightsPath = new Path(tempDirPath, "weights");
        Path pairwiseSimilarityPath = new Path(tempDirPath, "pairwiseSimilarity");
        AtomicInteger currentPhase = new AtomicInteger();
        if (RowSimilarityJob.shouldRunNextPhase(parsedArgs, currentPhase)) {
            Job weights = this.prepareJob(inputPath, weightsPath, SequenceFileInputFormat.class, RowWeightMapper.class, VarIntWritable.class, WeightedOccurrence.class, WeightedOccurrencesPerColumnReducer.class, VarIntWritable.class, WeightedOccurrenceArray.class, SequenceFileOutputFormat.class);
            weights.getConfiguration().set(DISTRIBUTED_SIMILARITY_CLASSNAME, distributedSimilarityClassname);
            weights.waitForCompletion(true);
        }
        if (RowSimilarityJob.shouldRunNextPhase(parsedArgs, currentPhase)) {
            Job pairwiseSimilarity = this.prepareJob(weightsPath, pairwiseSimilarityPath, SequenceFileInputFormat.class, CooccurrencesMapper.class, WeightedRowPair.class, Cooccurrence.class, SimilarityReducer.class, SimilarityMatrixEntryKey.class, DistributedRowMatrix.MatrixEntryWritable.class, SequenceFileOutputFormat.class);
            Configuration pairwiseConf = pairwiseSimilarity.getConfiguration();
            pairwiseConf.set(DISTRIBUTED_SIMILARITY_CLASSNAME, distributedSimilarityClassname);
            pairwiseConf.setInt(NUMBER_OF_COLUMNS, numberOfColumns);
            pairwiseSimilarity.waitForCompletion(true);
        }
        if (RowSimilarityJob.shouldRunNextPhase(parsedArgs, currentPhase)) {
            Job asMatrix = this.prepareJob(pairwiseSimilarityPath, outputPath, SequenceFileInputFormat.class, Mapper.class, SimilarityMatrixEntryKey.class, DistributedRowMatrix.MatrixEntryWritable.class, EntriesToVectorsReducer.class, IntWritable.class, VectorWritable.class, SequenceFileOutputFormat.class);
            asMatrix.setPartitionerClass(HashPartitioner.class);
            asMatrix.setGroupingComparatorClass(SimilarityMatrixEntryKey.SimilarityMatrixEntryKeyGroupingComparator.class);
            asMatrix.getConfiguration().setInt(MAX_SIMILARITIES_PER_ROW, maxSimilaritiesPerRow);
            asMatrix.waitForCompletion(true);
        }
        return 0;
    }

    static DistributedVectorSimilarity instantiateSimilarity(String classname) {
        try {
            return Class.forName(classname).asSubclass(DistributedVectorSimilarity.class).newInstance();
        }
        catch (ClassNotFoundException cnfe) {
            throw new IllegalStateException(cnfe);
        }
        catch (InstantiationException ie) {
            throw new IllegalStateException(ie);
        }
        catch (IllegalAccessException iae) {
            throw new IllegalStateException(iae);
        }
    }

    public static class EntriesToVectorsReducer
    extends Reducer<SimilarityMatrixEntryKey, DistributedRowMatrix.MatrixEntryWritable, IntWritable, VectorWritable> {
        private int maxSimilaritiesPerRow;

        protected void setup(Reducer.Context ctx) throws IOException, InterruptedException {
            super.setup(ctx);
            this.maxSimilaritiesPerRow = ctx.getConfiguration().getInt(MAX_SIMILARITIES_PER_ROW, -1);
            if (this.maxSimilaritiesPerRow < 1) {
                throw new IllegalStateException("Maximum number of similarities per row was not correctly set!");
            }
        }

        protected void reduce(SimilarityMatrixEntryKey key, Iterable<DistributedRowMatrix.MatrixEntryWritable> entries, Reducer.Context ctx) throws IOException, InterruptedException {
            RandomAccessSparseVector temporaryVector = new RandomAccessSparseVector(Integer.MAX_VALUE, this.maxSimilaritiesPerRow);
            int similaritiesSet = 0;
            for (DistributedRowMatrix.MatrixEntryWritable entry : entries) {
                temporaryVector.setQuick(entry.getCol(), entry.getVal());
                if (++similaritiesSet != this.maxSimilaritiesPerRow) continue;
                break;
            }
            SequentialAccessSparseVector vector = new SequentialAccessSparseVector((Vector)temporaryVector);
            ctx.write((Object)new IntWritable(key.getRow()), (Object)new VectorWritable((Vector)vector));
        }
    }

    public static class SimilarityReducer
    extends Reducer<WeightedRowPair, Cooccurrence, SimilarityMatrixEntryKey, DistributedRowMatrix.MatrixEntryWritable> {
        private DistributedVectorSimilarity similarity;
        private int numberOfColumns;

        protected void setup(Reducer.Context ctx) throws IOException, InterruptedException {
            super.setup(ctx);
            this.similarity = RowSimilarityJob.instantiateSimilarity(ctx.getConfiguration().get(DISTRIBUTED_SIMILARITY_CLASSNAME));
            this.numberOfColumns = ctx.getConfiguration().getInt(NUMBER_OF_COLUMNS, -1);
            if (this.numberOfColumns < 1) {
                throw new IllegalStateException("Number of columns was not correctly set!");
            }
        }

        protected void reduce(WeightedRowPair rowPair, Iterable<Cooccurrence> cooccurrences, Reducer.Context ctx) throws IOException, InterruptedException {
            int rowB;
            int rowA = rowPair.getRowA();
            double similarityValue = this.similarity.similarity(rowA, rowB = rowPair.getRowB(), cooccurrences, rowPair.getWeightA(), rowPair.getWeightB(), this.numberOfColumns);
            if (!Double.isNaN(similarityValue)) {
                ctx.getCounter((Enum)Counter.SIMILAR_ROWS).increment(1L);
                SimilarityMatrixEntryKey key = new SimilarityMatrixEntryKey();
                DistributedRowMatrix.MatrixEntryWritable entry = new DistributedRowMatrix.MatrixEntryWritable();
                entry.setVal(similarityValue);
                entry.setRow(rowA);
                entry.setCol(rowB);
                key.set(rowA, similarityValue);
                ctx.write((Object)key, (Object)entry);
                if (rowA != rowB) {
                    entry.setRow(rowB);
                    entry.setCol(rowA);
                    key.set(rowB, similarityValue);
                    ctx.write((Object)key, (Object)entry);
                }
            }
        }
    }

    public static class CooccurrencesMapper
    extends Mapper<VarIntWritable, WeightedOccurrenceArray, WeightedRowPair, Cooccurrence> {
        protected void map(VarIntWritable column, WeightedOccurrenceArray weightedOccurrenceArray, Mapper.Context ctx) throws IOException, InterruptedException {
            WeightedOccurrence[] weightedOccurrences = weightedOccurrenceArray.getWeightedOccurrences();
            WeightedRowPair rowPair = new WeightedRowPair();
            Cooccurrence coocurrence = new Cooccurrence();
            int numPairs = 0;
            for (int n = 0; n < weightedOccurrences.length; ++n) {
                int rowA = weightedOccurrences[n].getRow();
                double weightA = weightedOccurrences[n].getWeight();
                double valueA = weightedOccurrences[n].getValue();
                for (int m = n; m < weightedOccurrences.length; ++m) {
                    int rowB = weightedOccurrences[m].getRow();
                    double weightB = weightedOccurrences[m].getWeight();
                    double valueB = weightedOccurrences[m].getValue();
                    if (rowA <= rowB) {
                        rowPair.set(rowA, rowB, weightA, weightB);
                    } else {
                        rowPair.set(rowB, rowA, weightB, weightA);
                    }
                    coocurrence.set(column.get(), valueA, valueB);
                    ctx.write((Object)rowPair, (Object)coocurrence);
                    ++numPairs;
                }
            }
            ctx.getCounter((Enum)Counter.COOCCURRENCES).increment((long)numPairs);
        }
    }

    public static class WeightedOccurrencesPerColumnReducer
    extends Reducer<VarIntWritable, WeightedOccurrence, VarIntWritable, WeightedOccurrenceArray> {
        protected void reduce(VarIntWritable column, Iterable<WeightedOccurrence> weightedOccurrences, Reducer.Context ctx) throws IOException, InterruptedException {
            ArrayList<WeightedOccurrence> collectedWeightedOccurrences = new ArrayList<WeightedOccurrence>();
            for (WeightedOccurrence weightedOccurrence : weightedOccurrences) {
                collectedWeightedOccurrences.add(weightedOccurrence.clone());
            }
            ctx.write((Object)column, (Object)new WeightedOccurrenceArray(collectedWeightedOccurrences.toArray(new WeightedOccurrence[collectedWeightedOccurrences.size()])));
        }
    }

    public static class RowWeightMapper
    extends Mapper<IntWritable, VectorWritable, VarIntWritable, WeightedOccurrence> {
        private DistributedVectorSimilarity similarity;

        protected void setup(Mapper.Context ctx) throws IOException, InterruptedException {
            super.setup(ctx);
            this.similarity = RowSimilarityJob.instantiateSimilarity(ctx.getConfiguration().get(DISTRIBUTED_SIMILARITY_CLASSNAME));
        }

        protected void map(IntWritable row, VectorWritable vectorWritable, Mapper.Context ctx) throws IOException, InterruptedException {
            Vector v = vectorWritable.get();
            double weight = this.similarity.weight(v);
            Iterator elementsIterator = v.iterateNonZero();
            while (elementsIterator.hasNext()) {
                Vector.Element element = (Vector.Element)elementsIterator.next();
                int column = element.index();
                double value = element.get();
                ctx.write((Object)new VarIntWritable(column), (Object)new WeightedOccurrence(row.get(), value, weight));
            }
        }
    }

    public static enum Counter {
        COOCCURRENCES,
        SIMILAR_ROWS;

    }
}

