/*
 * Decompiled with CFR 0.152.
 */
package org.apache.spark.examples.mllib;

import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import org.apache.spark.SparkConf;
import org.apache.spark.SparkContext;
import org.apache.spark.examples.mllib.LDAExample;
import org.apache.spark.examples.mllib.LDAExample$;
import org.apache.spark.examples.mllib.LDAExample$Params$;
import org.apache.spark.ml.Pipeline;
import org.apache.spark.ml.PipelineModel;
import org.apache.spark.ml.PipelineStage;
import org.apache.spark.ml.feature.CountVectorizer;
import org.apache.spark.ml.feature.CountVectorizerModel;
import org.apache.spark.ml.feature.RegexTokenizer;
import org.apache.spark.ml.feature.StopWordsRemover;
import org.apache.spark.mllib.clustering.DistributedLDAModel;
import org.apache.spark.mllib.clustering.EMLDAOptimizer;
import org.apache.spark.mllib.clustering.LDA;
import org.apache.spark.mllib.clustering.LDAModel;
import org.apache.spark.mllib.clustering.LDAOptimizer;
import org.apache.spark.mllib.clustering.OnlineLDAOptimizer;
import org.apache.spark.mllib.linalg.Vector;
import org.apache.spark.rdd.RDD;
import org.apache.spark.rdd.RDD$;
import org.apache.spark.sql.DataFrame;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.Row$;
import org.apache.spark.sql.SQLContext;
import org.apache.spark.sql.SQLContext$;
import scala.Array$;
import scala.Function0;
import scala.Function1;
import scala.Function2;
import scala.MatchError;
import scala.Option;
import scala.Predef$;
import scala.Serializable;
import scala.Some;
import scala.StringContext;
import scala.Tuple2;
import scala.Tuple3;
import scala.collection.GenIterable;
import scala.collection.GenTraversableOnce;
import scala.collection.Seq;
import scala.collection.Seq$;
import scala.collection.SeqLike;
import scala.collection.immutable.Nil$;
import scala.collection.immutable.StringOps;
import scala.collection.mutable.ArrayOps;
import scala.collection.mutable.StringBuilder;
import scala.math.Numeric;
import scala.reflect.ClassTag$;
import scala.runtime.BoxedUnit;
import scala.runtime.BoxesRunTime;
import scala.runtime.Nothing$;
import scala.runtime.ScalaRunTime$;
import scala.sys.package$;
import scopt.OptionParser;
import scopt.Read$;

public final class LDAExample$ {
    public static final LDAExample$ MODULE$;

    static {
        new LDAExample$();
    }

    public void main(String[] args) {
        LDAExample.Params defaultParams = new LDAExample.Params(LDAExample$Params$.MODULE$.apply$default$1(), LDAExample$Params$.MODULE$.apply$default$2(), LDAExample$Params$.MODULE$.apply$default$3(), LDAExample$Params$.MODULE$.apply$default$4(), LDAExample$Params$.MODULE$.apply$default$5(), LDAExample$Params$.MODULE$.apply$default$6(), LDAExample$Params$.MODULE$.apply$default$7(), LDAExample$Params$.MODULE$.apply$default$8(), LDAExample$Params$.MODULE$.apply$default$9(), LDAExample$Params$.MODULE$.apply$default$10());
        OptionParser<LDAExample.Params> parser = new OptionParser<LDAExample.Params>(defaultParams){
            {
                this.head((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"LDAExample: an example LDA app for plain text data."}));
                this.opt("k", Read$.MODULE$.intRead()).text(new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"number of topics. default: ", ""})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{BoxesRunTime.boxToInteger((int)defaultParams$1.k())}))).action((Function2)new Serializable(this){
                    public static final long serialVersionUID = 0L;

                    public final LDAExample.Params apply(int x, LDAExample.Params c) {
                        int x$5 = x;
                        Seq<String> x$6 = c.copy$default$1();
                        int x$7 = c.copy$default$3();
                        double x$8 = c.copy$default$4();
                        double x$9 = c.copy$default$5();
                        int x$10 = c.copy$default$6();
                        String x$11 = c.copy$default$7();
                        String x$12 = c.copy$default$8();
                        Option<String> x$13 = c.copy$default$9();
                        int x$14 = c.copy$default$10();
                        return c.copy(x$6, x$5, x$7, x$8, x$9, x$10, x$11, x$12, x$13, x$14);
                    }
                });
                this.opt("maxIterations", Read$.MODULE$.intRead()).text(new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"number of iterations of learning. default: ", ""})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{BoxesRunTime.boxToInteger((int)defaultParams$1.maxIterations())}))).action((Function2)new Serializable(this){
                    public static final long serialVersionUID = 0L;

                    public final LDAExample.Params apply(int x, LDAExample.Params c) {
                        int x$15 = x;
                        Seq<String> x$16 = c.copy$default$1();
                        int x$17 = c.copy$default$2();
                        double x$18 = c.copy$default$4();
                        double x$19 = c.copy$default$5();
                        int x$20 = c.copy$default$6();
                        String x$21 = c.copy$default$7();
                        String x$22 = c.copy$default$8();
                        Option<String> x$23 = c.copy$default$9();
                        int x$24 = c.copy$default$10();
                        return c.copy(x$16, x$17, x$15, x$18, x$19, x$20, x$21, x$22, x$23, x$24);
                    }
                });
                this.opt("docConcentration", Read$.MODULE$.doubleRead()).text(new StringBuilder().append((Object)new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"amount of topic smoothing to use (> 1.0) (-1=auto)."})).s((Seq)Nil$.MODULE$)).append((Object)new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"  default: ", ""})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{BoxesRunTime.boxToDouble((double)defaultParams$1.docConcentration())}))).toString()).action((Function2)new Serializable(this){
                    public static final long serialVersionUID = 0L;

                    public final LDAExample.Params apply(double x, LDAExample.Params c) {
                        double x$25 = x;
                        Seq<String> x$26 = c.copy$default$1();
                        int x$27 = c.copy$default$2();
                        int x$28 = c.copy$default$3();
                        double x$29 = c.copy$default$5();
                        int x$30 = c.copy$default$6();
                        String x$31 = c.copy$default$7();
                        String x$32 = c.copy$default$8();
                        Option<String> x$33 = c.copy$default$9();
                        int x$34 = c.copy$default$10();
                        return c.copy(x$26, x$27, x$28, x$25, x$29, x$30, x$31, x$32, x$33, x$34);
                    }
                });
                this.opt("topicConcentration", Read$.MODULE$.doubleRead()).text(new StringBuilder().append((Object)new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"amount of term (word) smoothing to use (> 1.0) (-1=auto)."})).s((Seq)Nil$.MODULE$)).append((Object)new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"  default: ", ""})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{BoxesRunTime.boxToDouble((double)defaultParams$1.topicConcentration())}))).toString()).action((Function2)new Serializable(this){
                    public static final long serialVersionUID = 0L;

                    public final LDAExample.Params apply(double x, LDAExample.Params c) {
                        double x$35 = x;
                        Seq<String> x$36 = c.copy$default$1();
                        int x$37 = c.copy$default$2();
                        int x$38 = c.copy$default$3();
                        double x$39 = c.copy$default$4();
                        int x$40 = c.copy$default$6();
                        String x$41 = c.copy$default$7();
                        String x$42 = c.copy$default$8();
                        Option<String> x$43 = c.copy$default$9();
                        int x$44 = c.copy$default$10();
                        return c.copy(x$36, x$37, x$38, x$39, x$35, x$40, x$41, x$42, x$43, x$44);
                    }
                });
                this.opt("vocabSize", Read$.MODULE$.intRead()).text(new StringBuilder().append((Object)new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"number of distinct word types to use, chosen by frequency. (-1=all)"})).s((Seq)Nil$.MODULE$)).append((Object)new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"  default: ", ""})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{BoxesRunTime.boxToInteger((int)defaultParams$1.vocabSize())}))).toString()).action((Function2)new Serializable(this){
                    public static final long serialVersionUID = 0L;

                    public final LDAExample.Params apply(int x, LDAExample.Params c) {
                        int x$45 = x;
                        Seq<String> x$46 = c.copy$default$1();
                        int x$47 = c.copy$default$2();
                        int x$48 = c.copy$default$3();
                        double x$49 = c.copy$default$4();
                        double x$50 = c.copy$default$5();
                        String x$51 = c.copy$default$7();
                        String x$52 = c.copy$default$8();
                        Option<String> x$53 = c.copy$default$9();
                        int x$54 = c.copy$default$10();
                        return c.copy(x$46, x$47, x$48, x$49, x$50, x$45, x$51, x$52, x$53, x$54);
                    }
                });
                this.opt("stopwordFile", Read$.MODULE$.stringRead()).text(new StringBuilder().append((Object)new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"filepath for a list of stopwords. Note: This must fit on a single machine."})).s((Seq)Nil$.MODULE$)).append((Object)new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"  default: ", ""})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{defaultParams$1.stopwordFile()}))).toString()).action((Function2)new Serializable(this){
                    public static final long serialVersionUID = 0L;

                    public final LDAExample.Params apply(String x, LDAExample.Params c) {
                        String x$55 = x;
                        Seq<String> x$56 = c.copy$default$1();
                        int x$57 = c.copy$default$2();
                        int x$58 = c.copy$default$3();
                        double x$59 = c.copy$default$4();
                        double x$60 = c.copy$default$5();
                        int x$61 = c.copy$default$6();
                        String x$62 = c.copy$default$8();
                        Option<String> x$63 = c.copy$default$9();
                        int x$64 = c.copy$default$10();
                        return c.copy(x$56, x$57, x$58, x$59, x$60, x$61, x$55, x$62, x$63, x$64);
                    }
                });
                this.opt("algorithm", Read$.MODULE$.stringRead()).text(new StringBuilder().append((Object)new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"inference algorithm to use. em and online are supported."})).s((Seq)Nil$.MODULE$)).append((Object)new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{" default: ", ""})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{defaultParams$1.algorithm()}))).toString()).action((Function2)new Serializable(this){
                    public static final long serialVersionUID = 0L;

                    public final LDAExample.Params apply(String x, LDAExample.Params c) {
                        String x$65 = x;
                        Seq<String> x$66 = c.copy$default$1();
                        int x$67 = c.copy$default$2();
                        int x$68 = c.copy$default$3();
                        double x$69 = c.copy$default$4();
                        double x$70 = c.copy$default$5();
                        int x$71 = c.copy$default$6();
                        String x$72 = c.copy$default$7();
                        Option<String> x$73 = c.copy$default$9();
                        int x$74 = c.copy$default$10();
                        return c.copy(x$66, x$67, x$68, x$69, x$70, x$71, x$72, x$65, x$73, x$74);
                    }
                });
                this.opt("checkpointDir", Read$.MODULE$.stringRead()).text(new StringBuilder().append((Object)new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"Directory for checkpointing intermediate results."})).s((Seq)Nil$.MODULE$)).append((Object)new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"  Checkpointing helps with recovery and eliminates temporary shuffle files on disk."})).s((Seq)Nil$.MODULE$)).append((Object)new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"  default: ", ""})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{defaultParams$1.checkpointDir()}))).toString()).action((Function2)new Serializable(this){
                    public static final long serialVersionUID = 0L;

                    public final LDAExample.Params apply(String x, LDAExample.Params c) {
                        Some x$75 = new Some((Object)x);
                        Seq<String> x$76 = c.copy$default$1();
                        int x$77 = c.copy$default$2();
                        int x$78 = c.copy$default$3();
                        double x$79 = c.copy$default$4();
                        double x$80 = c.copy$default$5();
                        int x$81 = c.copy$default$6();
                        String x$82 = c.copy$default$7();
                        String x$83 = c.copy$default$8();
                        int x$84 = c.copy$default$10();
                        return c.copy(x$76, x$77, x$78, x$79, x$80, x$81, x$82, x$83, (Option<String>)x$75, x$84);
                    }
                });
                this.opt("checkpointInterval", Read$.MODULE$.intRead()).text(new StringBuilder().append((Object)new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"Iterations between each checkpoint.  Only used if checkpointDir is set."})).s((Seq)Nil$.MODULE$)).append((Object)new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{" default: ", ""})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{BoxesRunTime.boxToInteger((int)defaultParams$1.checkpointInterval())}))).toString()).action((Function2)new Serializable(this){
                    public static final long serialVersionUID = 0L;

                    public final LDAExample.Params apply(int x, LDAExample.Params c) {
                        int x$85 = x;
                        Seq<String> x$86 = c.copy$default$1();
                        int x$87 = c.copy$default$2();
                        int x$88 = c.copy$default$3();
                        double x$89 = c.copy$default$4();
                        double x$90 = c.copy$default$5();
                        int x$91 = c.copy$default$6();
                        String x$92 = c.copy$default$7();
                        String x$93 = c.copy$default$8();
                        Option<String> x$94 = c.copy$default$9();
                        return c.copy(x$86, x$87, x$88, x$89, x$90, x$91, x$92, x$93, x$94, x$85);
                    }
                });
                this.arg("<input>...", Read$.MODULE$.stringRead()).text("input paths (directories) to plain text corpora.  Each text file line should hold 1 document.").unbounded().required().action((Function2)new Serializable(this){
                    public static final long serialVersionUID = 0L;

                    public final LDAExample.Params apply(String x, LDAExample.Params c) {
                        return c.copy((Seq<String>)((Seq)c.input().$colon$plus((Object)x, Seq$.MODULE$.canBuildFrom())), c.copy$default$2(), c.copy$default$3(), c.copy$default$4(), c.copy$default$5(), c.copy$default$6(), c.copy$default$7(), c.copy$default$8(), c.copy$default$9(), c.copy$default$10());
                    }
                });
            }
        };
        parser.parse((Seq)Predef$.MODULE$.wrapRefArray((Object[])args), (Object)defaultParams).map((Function1)new Serializable(){
            public static final long serialVersionUID = 0L;

            public final void apply(LDAExample.Params params) {
                LDAExample$.MODULE$.org$apache$spark$examples$mllib$LDAExample$$run(params);
            }
        }).getOrElse((Function0)new Serializable((OptionParser)parser){
            public static final long serialVersionUID = 0L;
            private final OptionParser parser$1;

            public final Nothing$ apply() {
                this.parser$1.showUsageAsError();
                return package$.MODULE$.exit(1);
            }
            {
                this.parser$1 = parser$1;
            }
        });
    }

    public void org$apache$spark$examples$mllib$LDAExample$$run(LDAExample.Params params) {
        Tuple3<RDD<Tuple2<Object, Vector>>, String[], Object> tuple3;
        block5: {
            block8: {
                EMLDAOptimizer eMLDAOptimizer;
                LDA lda;
                long actualCorpusSize;
                String[] vocabArray;
                RDD corpus;
                SparkContext sc;
                block7: {
                    String string;
                    block6: {
                        Tuple3 tuple32;
                        SparkConf conf = new SparkConf().setAppName(new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"LDAExample with ", ""})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{params})));
                        sc = new SparkContext(conf);
                        Logger.getRootLogger().setLevel(Level.WARN);
                        long preprocessStart = System.nanoTime();
                        tuple3 = this.preprocess(sc, params.input(), params.vocabSize(), params.stopwordFile());
                        if (tuple3 == null) break block5;
                        RDD corpus2 = (RDD)tuple3._1();
                        String[] vocabArray2 = (String[])tuple3._2();
                        long actualNumTokens = BoxesRunTime.unboxToLong((Object)tuple3._3());
                        Tuple3 tuple33 = tuple32 = new Tuple3((Object)corpus2, (Object)vocabArray2, (Object)BoxesRunTime.boxToLong((long)actualNumTokens));
                        corpus = (RDD)tuple33._1();
                        vocabArray = (String[])tuple33._2();
                        long actualNumTokens2 = BoxesRunTime.unboxToLong((Object)tuple33._3());
                        corpus.cache();
                        actualCorpusSize = corpus.count();
                        int actualVocabSize = Predef$.MODULE$.refArrayOps((Object[])vocabArray).size();
                        double preprocessElapsed = (double)(System.nanoTime() - preprocessStart) / 1.0E9;
                        Predef$.MODULE$.println();
                        Predef$.MODULE$.println((Object)new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"Corpus summary:"})).s((Seq)Nil$.MODULE$));
                        Predef$.MODULE$.println((Object)new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"\\t Training set size: ", " documents"})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{BoxesRunTime.boxToLong((long)actualCorpusSize)})));
                        Predef$.MODULE$.println((Object)new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"\\t Vocabulary size: ", " terms"})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{BoxesRunTime.boxToInteger((int)actualVocabSize)})));
                        Predef$.MODULE$.println((Object)new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"\\t Training set size: ", " tokens"})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{BoxesRunTime.boxToLong((long)actualNumTokens2)})));
                        Predef$.MODULE$.println((Object)new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"\\t Preprocessing time: ", " sec"})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{BoxesRunTime.boxToDouble((double)preprocessElapsed)})));
                        Predef$.MODULE$.println();
                        lda = new LDA();
                        String string2 = string = params.algorithm().toLowerCase();
                        if ("em" != null ? !"em".equals(string2) : string2 != null) break block6;
                        eMLDAOptimizer = new EMLDAOptimizer();
                        break block7;
                    }
                    String string3 = string;
                    if ("online" != null ? !"online".equals(string3) : string3 != null) break block8;
                    eMLDAOptimizer = new OnlineLDAOptimizer().setMiniBatchFraction(0.05 + 1.0 / (double)actualCorpusSize);
                }
                EMLDAOptimizer optimizer = eMLDAOptimizer;
                lda.setOptimizer((LDAOptimizer)optimizer).setK(params.k()).setMaxIterations(params.maxIterations()).setDocConcentration(params.docConcentration()).setTopicConcentration(params.topicConcentration()).setCheckpointInterval(params.checkpointInterval());
                if (params.checkpointDir().nonEmpty()) {
                    sc.setCheckpointDir((String)params.checkpointDir().get());
                }
                long startTime = System.nanoTime();
                LDAModel ldaModel = lda.run(corpus);
                double elapsed = (double)(System.nanoTime() - startTime) / 1.0E9;
                Predef$.MODULE$.println((Object)new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"Finished training LDA model.  Summary:"})).s((Seq)Nil$.MODULE$));
                Predef$.MODULE$.println((Object)new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"\\t Training time: ", " sec"})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{BoxesRunTime.boxToDouble((double)elapsed)})));
                if (ldaModel instanceof DistributedLDAModel) {
                    DistributedLDAModel distLDAModel = (DistributedLDAModel)ldaModel;
                    double avgLogLikelihood = distLDAModel.logLikelihood() / (double)actualCorpusSize;
                    Predef$.MODULE$.println((Object)new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"\\t Training data average log likelihood: ", ""})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{BoxesRunTime.boxToDouble((double)avgLogLikelihood)})));
                    Predef$.MODULE$.println();
                }
                Tuple2[] topicIndices = ldaModel.describeTopics(10);
                Tuple2[][] topics = (Tuple2[][])Predef$.MODULE$.refArrayOps((Object[])topicIndices).map((Function1)new Serializable(vocabArray){
                    public static final long serialVersionUID = 0L;
                    public final String[] vocabArray$1;

                    public final Tuple2<String, Object>[] apply(Tuple2<int[], double[]> x0$1) {
                        Tuple2<int[], double[]> tuple2 = x0$1;
                        if (tuple2 != null) {
                            int[] terms = (int[])tuple2._1();
                            double[] termWeights = (double[])tuple2._2();
                            Tuple2[] tuple2Array = (Tuple2[])Predef$.MODULE$.refArrayOps((Object[])Predef$.MODULE$.intArrayOps(terms).zip((GenIterable)Predef$.MODULE$.wrapDoubleArray(termWeights), Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(Tuple2.class)))).map((Function1)new Serializable(this){
                                public static final long serialVersionUID = 0L;
                                private final /* synthetic */ anonfun.11 $outer;

                                public final Tuple2<String, Object> apply(Tuple2<Object, Object> x0$2) {
                                    Tuple2<Object, Object> tuple2 = x0$2;
                                    if (tuple2 != null) {
                                        int term = tuple2._1$mcI$sp();
                                        double weight = tuple2._2$mcD$sp();
                                        Tuple2 tuple22 = new Tuple2((Object)this.$outer.vocabArray$1[term], (Object)BoxesRunTime.boxToDouble((double)weight));
                                        return tuple22;
                                    }
                                    throw new MatchError(tuple2);
                                }
                                {
                                    if ($outer == null) {
                                        throw new NullPointerException();
                                    }
                                    this.$outer = $outer;
                                }
                            }, Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(Tuple2.class)));
                            return tuple2Array;
                        }
                        throw new MatchError(tuple2);
                    }
                    {
                        this.vocabArray$1 = vocabArray$1;
                    }
                }, Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(ScalaRunTime$.MODULE$.arrayClass(Tuple2.class))));
                Predef$.MODULE$.println((Object)new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"", " topics:"})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{BoxesRunTime.boxToInteger((int)params.k())})));
                Predef$.MODULE$.refArrayOps((Object[])Predef$.MODULE$.refArrayOps((Object[])topics).zipWithIndex(Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(Tuple2.class)))).foreach((Function1)new Serializable(){
                    public static final long serialVersionUID = 0L;

                    public final void apply(Tuple2<Tuple2<String, Object>[], Object> x0$3) {
                        Tuple2<Tuple2<String, Object>[], Object> tuple2 = x0$3;
                        if (tuple2 != null) {
                            Tuple2[] topic = (Tuple2[])tuple2._1();
                            int i = tuple2._2$mcI$sp();
                            Predef$.MODULE$.println((Object)new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"TOPIC ", ""})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{BoxesRunTime.boxToInteger((int)i)})));
                            Predef$.MODULE$.refArrayOps((Object[])topic).foreach((Function1)new Serializable(this){
                                public static final long serialVersionUID = 0L;

                                public final void apply(Tuple2<String, Object> x0$4) {
                                    Tuple2<String, Object> tuple2 = x0$4;
                                    if (tuple2 != null) {
                                        String term = (String)tuple2._1();
                                        double weight = tuple2._2$mcD$sp();
                                        Predef$.MODULE$.println((Object)new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"", "\\t", ""})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{term, BoxesRunTime.boxToDouble((double)weight)})));
                                        BoxedUnit boxedUnit = BoxedUnit.UNIT;
                                        return;
                                    }
                                    throw new MatchError(tuple2);
                                }
                            });
                            Predef$.MODULE$.println();
                            BoxedUnit boxedUnit = BoxedUnit.UNIT;
                            return;
                        }
                        throw new MatchError(tuple2);
                    }
                });
                sc.stop();
                return;
            }
            throw new IllegalArgumentException(new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"Only em, online are supported but got ", "."})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{params.algorithm()})));
        }
        throw new MatchError(tuple3);
    }

    private Tuple3<RDD<Tuple2<Object, Vector>>, String[], Object> preprocess(SparkContext sc, Seq<String> paths, int vocabSize, String stopwordFile) {
        String[] stringArray;
        SQLContext sqlContext = SQLContext$.MODULE$.getOrCreate(sc);
        DataFrame df = sqlContext.implicits().stringRddToDataFrameHolder(sc.textFile(paths.mkString(","), sc.textFile$default$2())).toDF((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"docs"}));
        if (stopwordFile.isEmpty()) {
            stringArray = (String[])Array$.MODULE$.empty(ClassTag$.MODULE$.apply(String.class));
        } else {
            String[] stopWordText = (String[])sc.textFile(stopwordFile, sc.textFile$default$2()).collect();
            stringArray = (String[])Predef$.MODULE$.refArrayOps((Object[])stopWordText).flatMap((Function1)new Serializable(){
                public static final long serialVersionUID = 0L;

                public final ArrayOps<String> apply(String x$2) {
                    return Predef$.MODULE$.refArrayOps((Object[])new StringOps(Predef$.MODULE$.augmentString(x$2)).stripMargin().split("\\s+"));
                }
            }, Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(String.class)));
        }
        String[] customizedStopWords = stringArray;
        RegexTokenizer tokenizer = (RegexTokenizer)new RegexTokenizer().setInputCol("docs").setOutputCol("rawTokens");
        StopWordsRemover stopWordsRemover = new StopWordsRemover().setInputCol("rawTokens").setOutputCol("tokens");
        stopWordsRemover.setStopWords((String[])Predef$.MODULE$.refArrayOps((Object[])stopWordsRemover.getStopWords()).$plus$plus((GenTraversableOnce)Predef$.MODULE$.refArrayOps((Object[])customizedStopWords), Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(String.class))));
        CountVectorizer countVectorizer = new CountVectorizer().setVocabSize(vocabSize).setInputCol("tokens").setOutputCol("features");
        Pipeline pipeline = new Pipeline().setStages((PipelineStage[])((Object[])new PipelineStage[]{tokenizer, stopWordsRemover, countVectorizer}));
        PipelineModel model = pipeline.fit(df);
        RDD documents = model.transform(df).select("features", (Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[0])).map((Function1)new Serializable(){
            public static final long serialVersionUID = 0L;

            public final Vector apply(Row x0$5) {
                Object features;
                Row row = x0$5;
                Some some = Row$.MODULE$.unapplySeq(row);
                if (!some.isEmpty() && some.get() != null && ((SeqLike)some.get()).lengthCompare(1) == 0 && (features = ((SeqLike)some.get()).apply(0)) instanceof Vector) {
                    Vector vector;
                    Vector vector2 = vector = (Vector)features;
                    return vector2;
                }
                throw new MatchError((Object)row);
            }
        }, ClassTag$.MODULE$.apply(Vector.class)).zipWithIndex().map((Function1)new Serializable(){
            public static final long serialVersionUID = 0L;

            public final Tuple2<Object, Vector> apply(Tuple2<Vector, Object> x$3) {
                return x$3.swap();
            }
        }, ClassTag$.MODULE$.apply(Tuple2.class));
        return new Tuple3((Object)documents, (Object)((CountVectorizerModel)model.stages()[2]).vocabulary(), (Object)BoxesRunTime.boxToLong((long)((long)RDD$.MODULE$.numericRDDToDoubleRDDFunctions(documents.map((Function1)new Serializable(){
            public static final long serialVersionUID = 0L;

            public final int apply(Tuple2<Object, Vector> x$4) {
                return ((Vector)x$4._2()).numActives();
            }
        }, ClassTag$.MODULE$.Int()), (Numeric)Numeric.IntIsIntegral$.MODULE$).sum())));
    }

    private LDAExample$() {
        MODULE$ = this;
    }
}

