/*
 * Decompiled with CFR 0.152.
 */
package org.apache.spark.examples.ml;

import java.util.Arrays;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.ml.feature.OneHotEncoder;
import org.apache.spark.ml.feature.StringIndexer;
import org.apache.spark.ml.feature.StringIndexerModel;
import org.apache.spark.sql.DataFrame;
import org.apache.spark.sql.RowFactory;
import org.apache.spark.sql.SQLContext;
import org.apache.spark.sql.types.DataTypes;
import org.apache.spark.sql.types.Metadata;
import org.apache.spark.sql.types.StructField;
import org.apache.spark.sql.types.StructType;

public class JavaOneHotEncoderExample {
    public static void main(String[] args) {
        SparkConf conf = new SparkConf().setAppName("JavaOneHotEncoderExample");
        JavaSparkContext jsc = new JavaSparkContext(conf);
        SQLContext sqlContext = new SQLContext(jsc);
        JavaRDD jrdd = jsc.parallelize(Arrays.asList(RowFactory.create((Object[])new Object[]{0, "a"}), RowFactory.create((Object[])new Object[]{1, "b"}), RowFactory.create((Object[])new Object[]{2, "c"}), RowFactory.create((Object[])new Object[]{3, "a"}), RowFactory.create((Object[])new Object[]{4, "a"}), RowFactory.create((Object[])new Object[]{5, "c"})));
        StructType schema = new StructType(new StructField[]{new StructField("id", DataTypes.DoubleType, false, Metadata.empty()), new StructField("category", DataTypes.StringType, false, Metadata.empty())});
        DataFrame df = sqlContext.createDataFrame(jrdd, schema);
        StringIndexerModel indexer = new StringIndexer().setInputCol("category").setOutputCol("categoryIndex").fit(df);
        DataFrame indexed = indexer.transform(df);
        OneHotEncoder encoder = new OneHotEncoder().setInputCol("categoryIndex").setOutputCol("categoryVec");
        DataFrame encoded = encoder.transform(indexed);
        encoded.select("id", new String[]{"categoryVec"}).show();
        jsc.stop();
    }
}

