/*
 * Decompiled with CFR 0.152.
 */
package org.apache.mahout.fpm.pfpgrowth;

import java.io.IOException;
import java.net.URI;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.PriorityQueue;
import java.util.regex.Pattern;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DefaultStringifier;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
import org.apache.hadoop.util.GenericsUtil;
import org.apache.mahout.common.HadoopUtil;
import org.apache.mahout.common.Pair;
import org.apache.mahout.common.Parameters;
import org.apache.mahout.common.iterator.sequencefile.PathType;
import org.apache.mahout.common.iterator.sequencefile.SequenceFileDirIterable;
import org.apache.mahout.fpm.pfpgrowth.AggregatorMapper;
import org.apache.mahout.fpm.pfpgrowth.AggregatorReducer;
import org.apache.mahout.fpm.pfpgrowth.CountDescendingPairComparator;
import org.apache.mahout.fpm.pfpgrowth.ParallelCountingMapper;
import org.apache.mahout.fpm.pfpgrowth.ParallelCountingReducer;
import org.apache.mahout.fpm.pfpgrowth.ParallelFPGrowthCombiner;
import org.apache.mahout.fpm.pfpgrowth.ParallelFPGrowthMapper;
import org.apache.mahout.fpm.pfpgrowth.ParallelFPGrowthReducer;
import org.apache.mahout.fpm.pfpgrowth.TransactionSortingMapper;
import org.apache.mahout.fpm.pfpgrowth.TransactionSortingReducer;
import org.apache.mahout.fpm.pfpgrowth.TransactionTree;
import org.apache.mahout.fpm.pfpgrowth.convertors.string.TopKStringPatterns;
import org.apache.mahout.fpm.pfpgrowth.fpgrowth.FPGrowth;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public final class PFPGrowth {
    public static final String ENCODING = "encoding";
    public static final String F_LIST = "fList";
    public static final String G_LIST = "gList";
    public static final String NUM_GROUPS = "numGroups";
    public static final String OUTPUT = "output";
    public static final String MIN_SUPPORT = "minSupport";
    public static final String MAX_HEAPSIZE = "maxHeapSize";
    public static final String INPUT = "input";
    public static final String PFP_PARAMETERS = "pfp.parameters";
    public static final String FILE_PATTERN = "part-*";
    public static final String FPGROWTH = "fpgrowth";
    public static final String FREQUENT_PATTERNS = "frequentpatterns";
    public static final String PARALLEL_COUNTING = "parallelcounting";
    public static final String SORTED_OUTPUT = "sortedoutput";
    public static final String SPLIT_PATTERN = "splitPattern";
    public static final Pattern SPLITTER = Pattern.compile("[ ,\t]*[,|\t][ ,\t]*");
    private static final Logger log = LoggerFactory.getLogger(PFPGrowth.class);

    private PFPGrowth() {
    }

    public static List<Pair<String, Long>> deserializeList(Parameters params, String key, Configuration conf) throws IOException {
        ArrayList<Pair<String, Long>> list = new ArrayList();
        conf.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization,org.apache.hadoop.io.serializer.WritableSerialization");
        DefaultStringifier listStringifier = new DefaultStringifier(conf, GenericsUtil.getClass(list));
        String serializedString = params.get(key, listStringifier.toString(list));
        list = (List)listStringifier.fromString(serializedString);
        return list;
    }

    public static Map<String, Long> deserializeMap(Parameters params, String key, Configuration conf) throws IOException {
        HashMap<String, Long> map = new HashMap();
        conf.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization,org.apache.hadoop.io.serializer.WritableSerialization");
        DefaultStringifier mapStringifier = new DefaultStringifier(conf, GenericsUtil.getClass(map));
        String gListString = params.get(key, mapStringifier.toString(map));
        map = (Map)mapStringifier.fromString(gListString);
        return map;
    }

    public static List<Pair<String, Long>> readFList(Parameters params) {
        int minSupport = Integer.valueOf(params.get(MIN_SUPPORT, "3"));
        Configuration conf = new Configuration();
        PriorityQueue<Pair<String, Long>> queue = new PriorityQueue<Pair<String, Long>>(11, new CountDescendingPairComparator());
        Path parallelCountingPath = new Path(params.get(OUTPUT), PARALLEL_COUNTING);
        Path filesPattern = new Path(parallelCountingPath, FILE_PATTERN);
        for (Pair record : new SequenceFileDirIterable(filesPattern, PathType.GLOB, null, null, true, conf)) {
            long value = ((LongWritable)record.getSecond()).get();
            if (value < (long)minSupport) continue;
            queue.add(new Pair<String, Long>(((Writable)record.getFirst()).toString(), value));
        }
        ArrayList<Pair<String, Long>> fList = new ArrayList<Pair<String, Long>>();
        while (!queue.isEmpty()) {
            fList.add((Pair<String, Long>)queue.poll());
        }
        return fList;
    }

    public static List<Pair<String, TopKStringPatterns>> readFrequentPattern(Parameters params) throws IOException {
        Configuration conf = new Configuration();
        Path frequentPatternsPath = new Path(params.get(OUTPUT), FREQUENT_PATTERNS);
        FileSystem fs = FileSystem.get((URI)frequentPatternsPath.toUri(), (Configuration)conf);
        FileStatus[] outputFiles = fs.globStatus(new Path(frequentPatternsPath, FILE_PATTERN));
        ArrayList<Pair<String, TopKStringPatterns>> ret = new ArrayList<Pair<String, TopKStringPatterns>>();
        for (FileStatus fileStatus : outputFiles) {
            ret.addAll(FPGrowth.readFrequentPattern(conf, fileStatus.getPath()));
        }
        return ret;
    }

    public static void runPFPGrowth(Parameters params) throws IOException, InterruptedException, ClassNotFoundException {
        PFPGrowth.startParallelCounting(params);
        PFPGrowth.startGroupingItems(params);
        PFPGrowth.startTransactionSorting(params);
        PFPGrowth.startParallelFPGrowth(params);
        PFPGrowth.startAggregating(params);
    }

    public static void startAggregating(Parameters params) throws IOException, InterruptedException, ClassNotFoundException {
        Configuration conf = new Configuration();
        params.set(F_LIST, "");
        params.set(G_LIST, "");
        conf.set(PFP_PARAMETERS, params.toString());
        conf.set("mapred.compress.map.output", "true");
        conf.set("mapred.output.compression.type", "BLOCK");
        Path input = new Path(params.get(OUTPUT), FPGROWTH);
        Job job = new Job(conf, "PFP Aggregator Driver running over input: " + input);
        job.setJarByClass(PFPGrowth.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(TopKStringPatterns.class);
        FileInputFormat.addInputPath((Job)job, (Path)input);
        Path outPath = new Path(params.get(OUTPUT), FREQUENT_PATTERNS);
        FileOutputFormat.setOutputPath((Job)job, (Path)outPath);
        job.setInputFormatClass(SequenceFileInputFormat.class);
        job.setMapperClass(AggregatorMapper.class);
        job.setCombinerClass(AggregatorReducer.class);
        job.setReducerClass(AggregatorReducer.class);
        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        HadoopUtil.delete(conf, outPath);
        job.waitForCompletion(true);
    }

    public static void startGroupingItems(Parameters params) throws IOException {
        Configuration conf = new Configuration();
        List<Pair<String, Long>> fList = PFPGrowth.readFList(params);
        Integer numGroups = Integer.valueOf(params.get(NUM_GROUPS, "50"));
        HashMap<String, Long> gList = new HashMap<String, Long>();
        long maxPerGroup = fList.size() / numGroups;
        if ((long)fList.size() != maxPerGroup * (long)numGroups.intValue()) {
            ++maxPerGroup;
        }
        long i = 0L;
        long groupID = 0L;
        for (Pair<String, Long> featureFreq : fList) {
            String feature = featureFreq.getFirst();
            if (i / maxPerGroup == groupID) {
                gList.put(feature, groupID);
            } else {
                gList.put(feature, ++groupID);
            }
            ++i;
        }
        log.info("No of Features: {}", (Object)fList.size());
        params.set(G_LIST, PFPGrowth.serializeMap(gList, conf));
        params.set(F_LIST, PFPGrowth.serializeList(fList, conf));
    }

    public static void startParallelCounting(Parameters params) throws IOException, InterruptedException, ClassNotFoundException {
        Configuration conf = new Configuration();
        conf.set(PFP_PARAMETERS, params.toString());
        conf.set("mapred.compress.map.output", "true");
        conf.set("mapred.output.compression.type", "BLOCK");
        String input = params.get(INPUT);
        Job job = new Job(conf, "Parallel Counting Driver running over input: " + input);
        job.setJarByClass(PFPGrowth.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(LongWritable.class);
        FileInputFormat.addInputPath((Job)job, (Path)new Path(input));
        Path outPath = new Path(params.get(OUTPUT), PARALLEL_COUNTING);
        FileOutputFormat.setOutputPath((Job)job, (Path)outPath);
        HadoopUtil.delete(conf, outPath);
        job.setInputFormatClass(TextInputFormat.class);
        job.setMapperClass(ParallelCountingMapper.class);
        job.setCombinerClass(ParallelCountingReducer.class);
        job.setReducerClass(ParallelCountingReducer.class);
        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        job.waitForCompletion(true);
    }

    public static void startTransactionSorting(Parameters params) throws IOException, InterruptedException, ClassNotFoundException {
        Configuration conf = new Configuration();
        String gList = params.get(G_LIST);
        params.set(G_LIST, "");
        conf.set(PFP_PARAMETERS, params.toString());
        conf.set("mapred.compress.map.output", "true");
        conf.set("mapred.output.compression.type", "BLOCK");
        String input = params.get(INPUT);
        Job job = new Job(conf, "PFP Transaction Sorting running over input" + input);
        job.setJarByClass(PFPGrowth.class);
        job.setMapOutputKeyClass(LongWritable.class);
        job.setMapOutputValueClass(TransactionTree.class);
        job.setOutputKeyClass(LongWritable.class);
        job.setOutputValueClass(TransactionTree.class);
        FileInputFormat.addInputPath((Job)job, (Path)new Path(input));
        Path outPath = new Path(params.get(OUTPUT), SORTED_OUTPUT);
        FileOutputFormat.setOutputPath((Job)job, (Path)outPath);
        HadoopUtil.delete(conf, outPath);
        job.setInputFormatClass(TextInputFormat.class);
        job.setMapperClass(TransactionSortingMapper.class);
        job.setReducerClass(TransactionSortingReducer.class);
        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        job.waitForCompletion(true);
        params.set(G_LIST, gList);
    }

    public static void startParallelFPGrowth(Parameters params) throws IOException, InterruptedException, ClassNotFoundException {
        Configuration conf = new Configuration();
        conf.set(PFP_PARAMETERS, params.toString());
        conf.set("mapred.compress.map.output", "true");
        conf.set("mapred.output.compression.type", "BLOCK");
        Path input = new Path(params.get(OUTPUT), SORTED_OUTPUT);
        Job job = new Job(conf, "PFP Growth Driver running over input" + input);
        job.setJarByClass(PFPGrowth.class);
        job.setMapOutputKeyClass(LongWritable.class);
        job.setMapOutputValueClass(TransactionTree.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(TopKStringPatterns.class);
        FileInputFormat.addInputPath((Job)job, (Path)input);
        Path outPath = new Path(params.get(OUTPUT), FPGROWTH);
        FileOutputFormat.setOutputPath((Job)job, (Path)outPath);
        HadoopUtil.delete(conf, outPath);
        job.setInputFormatClass(SequenceFileInputFormat.class);
        job.setMapperClass(ParallelFPGrowthMapper.class);
        job.setCombinerClass(ParallelFPGrowthCombiner.class);
        job.setReducerClass(ParallelFPGrowthReducer.class);
        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        job.waitForCompletion(true);
    }

    private static String serializeList(List<Pair<String, Long>> list, Configuration conf) throws IOException {
        conf.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization,org.apache.hadoop.io.serializer.WritableSerialization");
        DefaultStringifier listStringifier = new DefaultStringifier(conf, GenericsUtil.getClass(list));
        return listStringifier.toString(list);
    }

    private static String serializeMap(Map<String, Long> map, Configuration conf) throws IOException {
        conf.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization,org.apache.hadoop.io.serializer.WritableSerialization");
        DefaultStringifier mapStringifier = new DefaultStringifier(conf, GenericsUtil.getClass(map));
        return mapStringifier.toString(map);
    }
}

