/*
 * Decompiled with CFR 0.152.
 */
package org.apache.spark.ml.feature;

import java.io.IOException;
import org.apache.spark.annotation.Experimental;
import org.apache.spark.ml.Estimator;
import org.apache.spark.ml.feature.CountVectorizer$;
import org.apache.spark.ml.feature.CountVectorizerModel;
import org.apache.spark.ml.feature.CountVectorizerParams;
import org.apache.spark.ml.feature.CountVectorizerParams$class;
import org.apache.spark.ml.param.DoubleParam;
import org.apache.spark.ml.param.IntParam;
import org.apache.spark.ml.param.Param;
import org.apache.spark.ml.param.ParamMap;
import org.apache.spark.ml.param.ParamPair;
import org.apache.spark.ml.param.shared.HasInputCol$class;
import org.apache.spark.ml.param.shared.HasOutputCol$class;
import org.apache.spark.ml.util.DefaultParamsWritable;
import org.apache.spark.ml.util.DefaultParamsWritable$class;
import org.apache.spark.ml.util.Identifiable$;
import org.apache.spark.ml.util.MLReader;
import org.apache.spark.ml.util.MLWritable$class;
import org.apache.spark.ml.util.MLWriter;
import org.apache.spark.rdd.RDD;
import org.apache.spark.rdd.RDD$;
import org.apache.spark.sql.DataFrame;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.types.StructType;
import org.apache.spark.util.collection.OpenHashMap;
import scala.Array$;
import scala.Function0;
import scala.Function1;
import scala.Function2;
import scala.MatchError;
import scala.Predef$;
import scala.Serializable;
import scala.Tuple2;
import scala.collection.Iterable;
import scala.collection.Iterable$;
import scala.collection.Seq;
import scala.math.Ordering;
import scala.reflect.ClassTag$;
import scala.reflect.ScalaSignature;
import scala.runtime.BoxesRunTime;

@Experimental
@ScalaSignature(bytes="\u0006\u0001\u0005\rc\u0001B\u0001\u0003\u00015\u0011qbQ8v]R4Vm\u0019;pe&TXM\u001d\u0006\u0003\u0007\u0011\tqAZ3biV\u0014XM\u0003\u0002\u0006\r\u0005\u0011Q\u000e\u001c\u0006\u0003\u000f!\tQa\u001d9be.T!!\u0003\u0006\u0002\r\u0005\u0004\u0018m\u00195f\u0015\u0005Y\u0011aA8sO\u000e\u00011\u0003\u0002\u0001\u000f-e\u00012a\u0004\t\u0013\u001b\u0005!\u0011BA\t\u0005\u0005%)5\u000f^5nCR|'\u000f\u0005\u0002\u0014)5\t!!\u0003\u0002\u0016\u0005\t!2i\\;oiZ+7\r^8sSj,'/T8eK2\u0004\"aE\f\n\u0005a\u0011!!F\"pk:$h+Z2u_JL'0\u001a:QCJ\fWn\u001d\t\u00035ui\u0011a\u0007\u0006\u00039\u0011\tA!\u001e;jY&\u0011ad\u0007\u0002\u0016\t\u00164\u0017-\u001e7u!\u0006\u0014\u0018-\\:Xe&$\u0018M\u00197f\u0011!\u0001\u0003A!b\u0001\n\u0003\n\u0013aA;jIV\t!\u0005\u0005\u0002$S9\u0011AeJ\u0007\u0002K)\ta%A\u0003tG\u0006d\u0017-\u0003\u0002)K\u00051\u0001K]3eK\u001aL!AK\u0016\u0003\rM#(/\u001b8h\u0015\tAS\u0005\u0003\u0005.\u0001\t\u0005\t\u0015!\u0003#\u0003\u0011)\u0018\u000e\u001a\u0011\t\u000b=\u0002A\u0011\u0001\u0019\u0002\rqJg.\u001b;?)\t\t$\u0007\u0005\u0002\u0014\u0001!)\u0001E\fa\u0001E!)q\u0006\u0001C\u0001iQ\t\u0011\u0007C\u00037\u0001\u0011\u0005q'A\u0006tKRLe\u000e];u\u0007>dGC\u0001\u001d:\u001b\u0005\u0001\u0001\"\u0002\u001e6\u0001\u0004\u0011\u0013!\u0002<bYV,\u0007\"\u0002\u001f\u0001\t\u0003i\u0014\u0001D:fi>+H\u000f];u\u0007>dGC\u0001\u001d?\u0011\u0015Q4\b1\u0001#\u0011\u0015\u0001\u0005\u0001\"\u0001B\u00031\u0019X\r\u001e,pG\u0006\u00147+\u001b>f)\tA$\tC\u0003;\u007f\u0001\u00071\t\u0005\u0002%\t&\u0011Q)\n\u0002\u0004\u0013:$\b\"B$\u0001\t\u0003A\u0015\u0001C:fi6Kg\u000e\u0012$\u0015\u0005aJ\u0005\"\u0002\u001eG\u0001\u0004Q\u0005C\u0001\u0013L\u0013\taUE\u0001\u0004E_V\u0014G.\u001a\u0005\u0006\u001d\u0002!\taT\u0001\tg\u0016$X*\u001b8U\rR\u0011\u0001\b\u0015\u0005\u0006u5\u0003\rA\u0013\u0005\u0006%\u0002!\teU\u0001\u0004M&$HC\u0001\nU\u0011\u0015)\u0016\u000b1\u0001W\u0003\u001d!\u0017\r^1tKR\u0004\"a\u0016.\u000e\u0003aS!!\u0017\u0004\u0002\u0007M\fH.\u0003\u0002\\1\nIA)\u0019;b\rJ\fW.\u001a\u0005\u0006;\u0002!\tEX\u0001\u0010iJ\fgn\u001d4pe6\u001c6\r[3nCR\u0011q,\u001a\t\u0003A\u000el\u0011!\u0019\u0006\u0003Eb\u000bQ\u0001^=qKNL!\u0001Z1\u0003\u0015M#(/^2u)f\u0004X\rC\u0003g9\u0002\u0007q,\u0001\u0004tG\",W.\u0019\u0005\u0006Q\u0002!\t%[\u0001\u0005G>\u0004\u0018\u0010\u0006\u00022U\")1n\u001aa\u0001Y\u0006)Q\r\u001f;sCB\u0011Q\u000e]\u0007\u0002]*\u0011q\u000eB\u0001\u0006a\u0006\u0014\u0018-\\\u0005\u0003c:\u0014\u0001\u0002U1sC6l\u0015\r\u001d\u0015\u0003\u0001M\u0004\"\u0001^<\u000e\u0003UT!A\u001e\u0004\u0002\u0015\u0005tgn\u001c;bi&|g.\u0003\u0002yk\naQ\t\u001f9fe&lWM\u001c;bY\u001e)!P\u0001E\u0001w\u0006y1i\\;oiZ+7\r^8sSj,'\u000f\u0005\u0002\u0014y\u001a)\u0011A\u0001E\u0001{N1AP`A\u0002\u0003\u0013\u0001\"\u0001J@\n\u0007\u0005\u0005QE\u0001\u0004B]f\u0014VM\u001a\t\u00055\u0005\u0015\u0011'C\u0002\u0002\bm\u0011Q\u0003R3gCVdG\u000fU1sC6\u001c(+Z1eC\ndW\rE\u0002%\u0003\u0017I1!!\u0004&\u00051\u0019VM]5bY&T\u0018M\u00197f\u0011\u0019yC\u0010\"\u0001\u0002\u0012Q\t1\u0010C\u0004\u0002\u0016q$\t%a\u0006\u0002\t1|\u0017\r\u001a\u000b\u0004c\u0005e\u0001bBA\u000e\u0003'\u0001\rAI\u0001\u0005a\u0006$\b\u000e\u000b\u0004\u0002\u0014\u0005}\u0011Q\u0005\t\u0004i\u0006\u0005\u0012bAA\u0012k\n)1+\u001b8dK\u0006\u0012\u0011qE\u0001\u0006c92d\u0006\r\u0005\n\u0003Wa\u0018\u0011!C\u0005\u0003[\t1B]3bIJ+7o\u001c7wKR\u0011\u0011q\u0006\t\u0005\u0003c\tY$\u0004\u0002\u00024)!\u0011QGA\u001c\u0003\u0011a\u0017M\\4\u000b\u0005\u0005e\u0012\u0001\u00026bm\u0006LA!!\u0010\u00024\t1qJ\u00196fGRDS\u0001`A\u0010\u0003KAS!_A\u0010\u0003K\u0001")
public class CountVectorizer
extends Estimator<CountVectorizerModel>
implements CountVectorizerParams,
DefaultParamsWritable {
    private final String uid;
    private final IntParam vocabSize;
    private final DoubleParam minDF;
    private final DoubleParam minTF;
    private final Param<String> outputCol;
    private final Param<String> inputCol;

    public static MLReader<CountVectorizer> read() {
        return CountVectorizer$.MODULE$.read();
    }

    public static CountVectorizer load(String string) {
        return CountVectorizer$.MODULE$.load(string);
    }

    @Override
    public MLWriter write() {
        return DefaultParamsWritable$class.write(this);
    }

    @Override
    public void save(String path) throws IOException {
        MLWritable$class.save(this, path);
    }

    @Override
    public IntParam vocabSize() {
        return this.vocabSize;
    }

    @Override
    public DoubleParam minDF() {
        return this.minDF;
    }

    @Override
    public DoubleParam minTF() {
        return this.minTF;
    }

    @Override
    public void org$apache$spark$ml$feature$CountVectorizerParams$_setter_$vocabSize_$eq(IntParam x$1) {
        this.vocabSize = x$1;
    }

    @Override
    public void org$apache$spark$ml$feature$CountVectorizerParams$_setter_$minDF_$eq(DoubleParam x$1) {
        this.minDF = x$1;
    }

    @Override
    public void org$apache$spark$ml$feature$CountVectorizerParams$_setter_$minTF_$eq(DoubleParam x$1) {
        this.minTF = x$1;
    }

    @Override
    public int getVocabSize() {
        return CountVectorizerParams$class.getVocabSize(this);
    }

    @Override
    public double getMinDF() {
        return CountVectorizerParams$class.getMinDF(this);
    }

    @Override
    public StructType validateAndTransformSchema(StructType schema) {
        return CountVectorizerParams$class.validateAndTransformSchema(this, schema);
    }

    @Override
    public double getMinTF() {
        return CountVectorizerParams$class.getMinTF(this);
    }

    @Override
    public final Param<String> outputCol() {
        return this.outputCol;
    }

    @Override
    public final void org$apache$spark$ml$param$shared$HasOutputCol$_setter_$outputCol_$eq(Param x$1) {
        this.outputCol = x$1;
    }

    @Override
    public final String getOutputCol() {
        return HasOutputCol$class.getOutputCol(this);
    }

    @Override
    public final Param<String> inputCol() {
        return this.inputCol;
    }

    @Override
    public final void org$apache$spark$ml$param$shared$HasInputCol$_setter_$inputCol_$eq(Param x$1) {
        this.inputCol = x$1;
    }

    @Override
    public final String getInputCol() {
        return HasInputCol$class.getInputCol(this);
    }

    @Override
    public String uid() {
        return this.uid;
    }

    public CountVectorizer setInputCol(String value) {
        return (CountVectorizer)this.set(this.inputCol(), value);
    }

    public CountVectorizer setOutputCol(String value) {
        return (CountVectorizer)this.set(this.outputCol(), value);
    }

    public CountVectorizer setVocabSize(int value) {
        return (CountVectorizer)this.set(this.vocabSize(), BoxesRunTime.boxToInteger((int)value));
    }

    public CountVectorizer setMinDF(double value) {
        return (CountVectorizer)this.set(this.minDF(), BoxesRunTime.boxToDouble((double)value));
    }

    public CountVectorizer setMinTF(double value) {
        return (CountVectorizer)this.set(this.minTF(), BoxesRunTime.boxToDouble((double)value));
    }

    @Override
    public CountVectorizerModel fit(DataFrame dataset) {
        this.transformSchema(dataset.schema(), true);
        int vocSize = BoxesRunTime.unboxToInt((Object)this.$(this.vocabSize()));
        RDD input = dataset.select(this.$(this.inputCol()), (Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[0])).map((Function1)new Serializable(this){
            public static final long serialVersionUID = 0L;

            public final Seq<String> apply(Row x$1) {
                return (Seq)x$1.getAs(0);
            }
        }, ClassTag$.MODULE$.apply(Seq.class));
        double minDf = BoxesRunTime.unboxToDouble((Object)this.$(this.minDF())) >= 1.0 ? BoxesRunTime.unboxToDouble((Object)this.$(this.minDF())) : BoxesRunTime.unboxToDouble((Object)this.$(this.minDF())) * (double)input.cache().count();
        RDD wordCounts = RDD$.MODULE$.rddToPairRDDFunctions(input.flatMap((Function1)new Serializable(this){
            public static final long serialVersionUID = 0L;

            public final Iterable<Tuple2<String, Tuple2<Object, Object>>> apply(Seq<String> x0$1) {
                Seq<String> seq = x0$1;
                OpenHashMap.mcJ.sp wc = new OpenHashMap.mcJ.sp(ClassTag$.MODULE$.apply(String.class), ClassTag$.MODULE$.Long());
                seq.foreach((Function1)new Serializable(this, (OpenHashMap)wc){
                    public static final long serialVersionUID = 0L;
                    private final OpenHashMap wc$1;

                    public final long apply(String w2) {
                        return this.wc$1.changeValue$mcJ$sp((Object)w2, (Function0)new Serializable(this){
                            public static final long serialVersionUID = 0L;

                            public final long apply() {
                                return this.apply$mcJ$sp();
                            }

                            public long apply$mcJ$sp() {
                                return 1L;
                            }
                        }, (Function1)new Serializable(this){
                            public static final long serialVersionUID = 0L;

                            public final long apply(long x$2) {
                                return this.apply$mcJJ$sp(x$2);
                            }

                            public long apply$mcJJ$sp(long x$2) {
                                return x$2 + 1L;
                            }
                        });
                    }
                    {
                        this.wc$1 = wc$1;
                    }
                });
                Iterable iterable = (Iterable)wc.map((Function1)new Serializable(this){
                    public static final long serialVersionUID = 0L;

                    public final Tuple2<String, Tuple2<Object, Object>> apply(Tuple2<String, Object> x0$2) {
                        Tuple2<String, Object> tuple2 = x0$2;
                        if (tuple2 != null) {
                            String word = (String)tuple2._1();
                            long count = tuple2._2$mcJ$sp();
                            Tuple2 tuple22 = new Tuple2((Object)word, (Object)new Tuple2.mcJI.sp(count, 1));
                            return tuple22;
                        }
                        throw new MatchError(tuple2);
                    }
                }, Iterable$.MODULE$.canBuildFrom());
                return iterable;
            }
        }, ClassTag$.MODULE$.apply(Tuple2.class)), ClassTag$.MODULE$.apply(String.class), ClassTag$.MODULE$.apply(Tuple2.class), (Ordering)Ordering.String$.MODULE$).reduceByKey((Function2)new Serializable(this){
            public static final long serialVersionUID = 0L;

            public final Tuple2<Object, Object> apply(Tuple2<Object, Object> x0$3, Tuple2<Object, Object> x1$1) {
                Tuple2 tuple2 = new Tuple2(x0$3, x1$1);
                if (tuple2 != null) {
                    Tuple2 tuple22 = (Tuple2)tuple2._1();
                    Tuple2 tuple23 = (Tuple2)tuple2._2();
                    if (tuple22 != null) {
                        long wc1 = tuple22._1$mcJ$sp();
                        int df1 = tuple22._2$mcI$sp();
                        if (tuple23 != null) {
                            long wc2 = tuple23._1$mcJ$sp();
                            int df2 = tuple23._2$mcI$sp();
                            Tuple2.mcJI.sp sp2 = new Tuple2.mcJI.sp(wc1 + wc2, df1 + df2);
                            return sp2;
                        }
                    }
                }
                throw new MatchError((Object)tuple2);
            }
        }).filter((Function1)new Serializable(this, minDf){
            public static final long serialVersionUID = 0L;
            private final double minDf$1;

            public final boolean apply(Tuple2<String, Tuple2<Object, Object>> x0$4) {
                Tuple2 tuple2;
                Tuple2<String, Tuple2<Object, Object>> tuple22 = x0$4;
                if (tuple22 != null && (tuple2 = (Tuple2)tuple22._2()) != null) {
                    int df = tuple2._2$mcI$sp();
                    boolean bl = (double)df >= this.minDf$1;
                    return bl;
                }
                throw new MatchError(tuple22);
            }
            {
                this.minDf$1 = minDf$1;
            }
        }).map((Function1)new Serializable(this){
            public static final long serialVersionUID = 0L;

            public final Tuple2<String, Object> apply(Tuple2<String, Tuple2<Object, Object>> x0$5) {
                Tuple2<String, Tuple2<Object, Object>> tuple2 = x0$5;
                if (tuple2 != null) {
                    String word = (String)tuple2._1();
                    Tuple2 tuple22 = (Tuple2)tuple2._2();
                    if (tuple22 != null) {
                        long count = tuple22._1$mcJ$sp();
                        Tuple2 tuple23 = new Tuple2((Object)word, (Object)BoxesRunTime.boxToLong((long)count));
                        return tuple23;
                    }
                }
                throw new MatchError(tuple2);
            }
        }, ClassTag$.MODULE$.apply(Tuple2.class)).cache();
        long fullVocabSize = wordCounts.count();
        Tuple2[] tmpSortedWC = fullVocabSize <= (long)vocSize ? (Tuple2[])Predef$.MODULE$.refArrayOps((Object[])wordCounts.collect()).sortBy((Function1)new Serializable(this){
            public static final long serialVersionUID = 0L;

            public final long apply(Tuple2<String, Object> x$3) {
                return -x$3._2$mcJ$sp();
            }
        }, (Ordering)Ordering.Long$.MODULE$) : (Tuple2[])wordCounts.sortBy((Function1)new Serializable(this){
            public static final long serialVersionUID = 0L;

            public final long apply(Tuple2<String, Object> x$4) {
                return x$4._2$mcJ$sp();
            }
        }, false, wordCounts.sortBy$default$3(), (Ordering)Ordering.Long$.MODULE$, ClassTag$.MODULE$.Long()).take(vocSize);
        String[] vocab = (String[])Predef$.MODULE$.refArrayOps((Object[])tmpSortedWC).map((Function1)new Serializable(this){
            public static final long serialVersionUID = 0L;

            public final String apply(Tuple2<String, Object> x$5) {
                return (String)x$5._1();
            }
        }, Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(String.class)));
        Predef$.MODULE$.require(vocab.length > 0, (Function0)new Serializable(this){
            public static final long serialVersionUID = 0L;

            public final String apply() {
                return "The vocabulary size should be > 0. Lower minDF as necessary.";
            }
        });
        return this.copyValues(new CountVectorizerModel(this.uid(), vocab).setParent(this), this.copyValues$default$2());
    }

    @Override
    public StructType transformSchema(StructType schema) {
        return this.validateAndTransformSchema(schema);
    }

    @Override
    public CountVectorizer copy(ParamMap extra) {
        return (CountVectorizer)this.defaultCopy(extra);
    }

    public CountVectorizer(String uid) {
        this.uid = uid;
        HasInputCol$class.$init$(this);
        HasOutputCol$class.$init$(this);
        CountVectorizerParams$class.$init$(this);
        MLWritable$class.$init$(this);
        DefaultParamsWritable$class.$init$(this);
        this.setDefault((Seq<ParamPair<?>>)Predef$.MODULE$.wrapRefArray((Object[])new ParamPair[]{this.vocabSize().$minus$greater(BoxesRunTime.boxToInteger((int)262144)), this.minDF().$minus$greater(BoxesRunTime.boxToDouble((double)1.0))}));
    }

    public CountVectorizer() {
        this(Identifiable$.MODULE$.randomUID("cntVec"));
    }
}

