/*
 * Decompiled with CFR 0.152.
 */
package org.apache.spark.examples.ml;

import java.util.Arrays;
import java.util.List;
import org.apache.spark.ml.feature.RegexTokenizer;
import org.apache.spark.ml.feature.Tokenizer;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.RowFactory;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.types.DataTypes;
import org.apache.spark.sql.types.Metadata;
import org.apache.spark.sql.types.StructField;
import org.apache.spark.sql.types.StructType;

public class JavaTokenizerExample {
    public static void main(String[] args) {
        SparkSession spark = SparkSession.builder().appName("JavaTokenizerExample").getOrCreate();
        List<Row> data = Arrays.asList(RowFactory.create((Object[])new Object[]{0, "Hi I heard about Spark"}), RowFactory.create((Object[])new Object[]{1, "I wish Java could use case classes"}), RowFactory.create((Object[])new Object[]{2, "Logistic,regression,models,are,neat"}));
        StructType schema = new StructType(new StructField[]{new StructField("label", DataTypes.IntegerType, false, Metadata.empty()), new StructField("sentence", DataTypes.StringType, false, Metadata.empty())});
        Dataset sentenceDataFrame = spark.createDataFrame(data, schema);
        Tokenizer tokenizer = (Tokenizer)((Tokenizer)new Tokenizer().setInputCol("sentence")).setOutputCol("words");
        Dataset wordsDataFrame = tokenizer.transform(sentenceDataFrame);
        for (Row r : wordsDataFrame.select("words", new String[]{"label"}).takeAsList(3)) {
            List words = r.getList(0);
            for (String word : words) {
                System.out.print(word + " ");
            }
            System.out.println();
        }
        RegexTokenizer regexTokenizer = ((RegexTokenizer)((RegexTokenizer)new RegexTokenizer().setInputCol("sentence")).setOutputCol("words")).setPattern("\\W");
        spark.stop();
    }
}

