/*
 * Decompiled with CFR 0.152.
 */
package eu.stratosphere.example.java.record.wordcount;

import eu.stratosphere.api.common.Plan;
import eu.stratosphere.api.common.Program;
import eu.stratosphere.api.common.ProgramDescription;
import eu.stratosphere.api.common.operators.FileDataSink;
import eu.stratosphere.api.common.operators.FileDataSource;
import eu.stratosphere.api.common.operators.GenericDataSink;
import eu.stratosphere.api.common.operators.Operator;
import eu.stratosphere.api.java.record.functions.FunctionAnnotation;
import eu.stratosphere.api.java.record.functions.MapFunction;
import eu.stratosphere.api.java.record.functions.ReduceFunction;
import eu.stratosphere.api.java.record.io.CsvOutputFormat;
import eu.stratosphere.api.java.record.io.TextInputFormat;
import eu.stratosphere.api.java.record.operators.MapOperator;
import eu.stratosphere.api.java.record.operators.ReduceOperator;
import eu.stratosphere.client.LocalExecutor;
import eu.stratosphere.types.IntValue;
import eu.stratosphere.types.Record;
import eu.stratosphere.types.StringValue;
import eu.stratosphere.types.Value;
import eu.stratosphere.util.AsciiUtils;
import eu.stratosphere.util.Collector;
import java.util.Iterator;

public class WordCountOptimized
implements Program,
ProgramDescription {
    public Plan getPlan(String ... args) {
        int numSubTasks = args.length > 0 ? Integer.parseInt(args[0]) : 1;
        String dataInput = args.length > 1 ? args[1] : "";
        String output = args.length > 2 ? args[2] : "";
        FileDataSource source = new FileDataSource(TextInputFormat.class, dataInput, "Input Lines");
        source.setParameter("textformat.charset", "ASCII");
        MapOperator mapper = MapOperator.builder(TokenizeLine.class).input(new Operator[]{source}).name("Tokenize Lines").build();
        ReduceOperator reducer = ReduceOperator.builder(CountWords.class, StringValue.class, (int)0).input(new Operator[]{mapper}).name("Count Words").build();
        FileDataSink out = new FileDataSink(CsvOutputFormat.class, output, (Operator)reducer, "Word Counts");
        ((CsvOutputFormat.ConfigBuilder)((CsvOutputFormat.ConfigBuilder)((CsvOutputFormat.ConfigBuilder)CsvOutputFormat.configureRecordFormat((FileDataSink)out).recordDelimiter('\n')).fieldDelimiter(' ')).field(StringValue.class, 0)).field(IntValue.class, 1);
        Plan plan = new Plan((GenericDataSink)out, "WordCount Example");
        plan.setDefaultParallelism(numSubTasks);
        return plan;
    }

    public String getDescription() {
        return "Parameters: [numSubStasks] [input] [output]";
    }

    public static void main(String[] args) throws Exception {
        WordCountOptimized wc = new WordCountOptimized();
        Plan plan = wc.getPlan("1", "file:///path/to/input", "file:///path/to/output");
        LocalExecutor.execute((Plan)plan);
        System.exit(0);
    }

    @ReduceOperator.Combinable
    @FunctionAnnotation.ConstantFields(value={0})
    public static class CountWords
    extends ReduceFunction {
        private final IntValue cnt = new IntValue();

        public void reduce(Iterator<Record> records, Collector<Record> out) throws Exception {
            Record element = null;
            int sum = 0;
            while (records.hasNext()) {
                element = records.next();
                IntValue i = (IntValue)element.getField(1, IntValue.class);
                sum += i.getValue();
            }
            this.cnt.setValue(sum);
            element.setField(1, (Value)this.cnt);
            out.collect((Object)element);
        }

        public void combine(Iterator<Record> records, Collector<Record> out) throws Exception {
            this.reduce(records, out);
        }
    }

    public static class TokenizeLine
    extends MapFunction {
        private final Record outputRecord = new Record();
        private final StringValue word = new StringValue();
        private final IntValue one = new IntValue(1);
        private final AsciiUtils.WhitespaceTokenizer tokenizer = new AsciiUtils.WhitespaceTokenizer();

        public void map(Record record, Collector<Record> collector) {
            StringValue line = (StringValue)record.getField(0, StringValue.class);
            AsciiUtils.replaceNonWordChars((StringValue)line, (char)' ');
            AsciiUtils.toLowerCase((StringValue)line);
            this.tokenizer.setStringToTokenize(line);
            while (this.tokenizer.next(this.word)) {
                this.outputRecord.setField(0, (Value)this.word);
                this.outputRecord.setField(1, (Value)this.one);
                collector.collect((Object)this.outputRecord);
            }
        }
    }
}

