/*
 * Decompiled with CFR 0.152.
 */
package org.apache.spark.examples.ml;

import java.util.Arrays;
import java.util.List;
import org.apache.spark.ml.feature.CountVectorizer;
import org.apache.spark.ml.feature.CountVectorizerModel;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.RowFactory;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.types.ArrayType;
import org.apache.spark.sql.types.DataType;
import org.apache.spark.sql.types.DataTypes;
import org.apache.spark.sql.types.Metadata;
import org.apache.spark.sql.types.StructField;
import org.apache.spark.sql.types.StructType;

public class JavaCountVectorizerExample {
    public static void main(String[] args) {
        SparkSession spark = SparkSession.builder().appName("JavaCountVectorizerExample").getOrCreate();
        List<Row> data = Arrays.asList(RowFactory.create((Object[])new Object[]{Arrays.asList("a", "b", "c")}), RowFactory.create((Object[])new Object[]{Arrays.asList("a", "b", "b", "c", "a")}));
        StructType schema = new StructType(new StructField[]{new StructField("text", (DataType)new ArrayType(DataTypes.StringType, true), false, Metadata.empty())});
        Dataset df = spark.createDataFrame(data, schema);
        CountVectorizerModel cvModel = new CountVectorizer().setInputCol("text").setOutputCol("feature").setVocabSize(3).setMinDF(2.0).fit(df);
        CountVectorizerModel cvm = new CountVectorizerModel(new String[]{"a", "b", "c"}).setInputCol("text").setOutputCol("feature");
        cvModel.transform(df).show();
        spark.stop();
    }
}

