/*
 * Decompiled with CFR 0.152.
 */
package org.apache.mahout.clustering.minhash;

import com.google.common.base.Charsets;
import com.google.common.io.Files;
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.mahout.math.SequentialAccessSparseVector;
import org.apache.mahout.math.Vector;
import org.apache.mahout.math.VectorWritable;

public final class LastfmDataConverter {
    private static final Pattern TAB_PATTERN = Pattern.compile("\t");

    private LastfmDataConverter() {
    }

    private static String usedMemory() {
        Runtime runtime = Runtime.getRuntime();
        return "Used Memory: [" + (runtime.totalMemory() - runtime.freeMemory()) / 0x100000L + " MB] ";
    }

    private static String getFeature(String[] fields, Lastfm dataSet) {
        if (dataSet == Lastfm.USERS_360K) {
            return fields[0];
        }
        return fields[2];
    }

    private static String getItem(String[] fields, Lastfm dataSet) {
        if (dataSet == Lastfm.USERS_360K) {
            return fields[2];
        }
        return fields[0];
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public static Map<String, List<Integer>> convertToItemFeatures(String inputFile, Lastfm dataSet) throws IOException {
        long totalRecords = dataSet.totalRecords;
        HashMap<String, Integer> featureIdxMap = new HashMap<String, Integer>();
        HashMap<String, List<Integer>> itemFeaturesMap = new HashMap<String, List<Integer>>();
        String msg = LastfmDataConverter.usedMemory() + "Converting data to internal vector format: ";
        BufferedReader br = Files.newReader((File)new File(inputFile), (Charset)Charsets.UTF_8);
        try {
            String line;
            System.out.print(msg);
            int prevPercentDone = 1;
            double percentDone = 0.0;
            long parsedRecords = 0L;
            while ((line = br.readLine()) != null) {
                ArrayList<Integer> features;
                String[] fields = TAB_PATTERN.split(line);
                String feature = LastfmDataConverter.getFeature(fields, dataSet);
                String item = LastfmDataConverter.getItem(fields, dataSet);
                Integer featureIdx = (Integer)featureIdxMap.get(feature);
                if (featureIdx == null) {
                    featureIdx = featureIdxMap.size() + 1;
                    featureIdxMap.put(feature, featureIdx);
                }
                if ((features = (ArrayList<Integer>)itemFeaturesMap.get(item)) == null) {
                    features = new ArrayList<Integer>();
                    itemFeaturesMap.put(item, features);
                }
                features.add(featureIdx);
                percentDone = (double)(++parsedRecords) * 100.0 / (double)totalRecords;
                msg = LastfmDataConverter.usedMemory() + "Converting data to internal vector format: ";
                if (percentDone > (double)prevPercentDone) {
                    System.out.print('\r' + msg + percentDone + '%');
                    ++prevPercentDone;
                }
                ++parsedRecords;
            }
            msg = LastfmDataConverter.usedMemory() + "Converting data to internal vector format: ";
            System.out.print('\r' + msg + percentDone + "% Completed\n");
        }
        finally {
            br.close();
        }
        return itemFeaturesMap;
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public static boolean writeToSequenceFile(Map<String, List<Integer>> itemFeaturesMap, Path outputPath) throws IOException {
        Configuration conf = new Configuration();
        FileSystem fs = FileSystem.get((Configuration)conf);
        fs.mkdirs(outputPath.getParent());
        long totalRecords = itemFeaturesMap.size();
        SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, outputPath, Text.class, VectorWritable.class);
        try {
            String msg = "Now writing vectorized data in sequence file format: ";
            System.out.print(msg);
            Text itemWritable = new Text();
            VectorWritable featuresWritable = new VectorWritable();
            int doneRecords = 0;
            int prevPercentDone = 1;
            for (Map.Entry<String, List<Integer>> itemFeature : itemFeaturesMap.entrySet()) {
                double percentDone;
                int numfeatures = itemFeature.getValue().size();
                itemWritable.set(itemFeature.getKey());
                SequentialAccessSparseVector featureVector = new SequentialAccessSparseVector(numfeatures);
                int i = 0;
                for (Integer feature : itemFeature.getValue()) {
                    featureVector.setQuick(i++, (double)feature.intValue());
                }
                featuresWritable.set((Vector)featureVector);
                writer.append((Writable)itemWritable, (Writable)featuresWritable);
                if (!((percentDone = (double)(++doneRecords) * 100.0 / (double)totalRecords) > (double)prevPercentDone)) continue;
                System.out.print('\r' + msg + percentDone + "% " + (percentDone >= 100.0 ? "Completed\n" : ""));
                ++prevPercentDone;
            }
        }
        finally {
            writer.close();
        }
        return true;
    }

    public static void main(String[] args) throws Exception {
        if (args.length < 3) {
            System.out.println("[Usage]: LastfmDataConverter <input> <output> <dataset>");
            System.out.println("   <input>: Absolute path to the local file [usersha1-artmbid-artname-plays.tsv] ");
            System.out.println("  <output>: Absolute path to the HDFS output file");
            System.out.println(" <dataset>: Either of the two Lastfm public datasets. Must be either 'Users360K' or 'Users1K'");
            System.out.println("Note:- Hadoop configuration pointing to HDFS namenode should be in classpath");
            return;
        }
        Lastfm dataSet = Lastfm.valueOf(args[2]);
        Map<String, List<Integer>> itemFeatures = LastfmDataConverter.convertToItemFeatures(args[0], dataSet);
        if (itemFeatures.isEmpty()) {
            throw new IllegalStateException("Error converting the data file: [" + args[0] + ']');
        }
        Path output = new Path(args[1]);
        boolean status = LastfmDataConverter.writeToSequenceFile(itemFeatures, output);
        if (status) {
            System.out.println("Data converted and written successfully to HDFS location: [" + output + ']');
        } else {
            System.err.println("Error writing the converted data to HDFS location: [" + output + ']');
        }
    }

    static enum Lastfm {
        USERS_360K(17559530),
        USERS_1K(19150868);

        private final int totalRecords;

        private Lastfm(int totalRecords) {
            this.totalRecords = totalRecords;
        }
    }
}

