/*
 * Decompiled with CFR 0.152.
 */
package org.apache.spark.examples.ml;

import java.util.Arrays;
import java.util.List;
import org.apache.spark.ml.feature.BucketedRandomProjectionLSH;
import org.apache.spark.ml.feature.BucketedRandomProjectionLSHModel;
import org.apache.spark.ml.linalg.Vector;
import org.apache.spark.ml.linalg.VectorUDT;
import org.apache.spark.ml.linalg.Vectors;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.RowFactory;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.types.DataType;
import org.apache.spark.sql.types.DataTypes;
import org.apache.spark.sql.types.Metadata;
import org.apache.spark.sql.types.StructField;
import org.apache.spark.sql.types.StructType;

public class JavaBucketedRandomProjectionLSHExample {
    public static void main(String[] args) {
        SparkSession spark = SparkSession.builder().appName("JavaBucketedRandomProjectionLSHExample").getOrCreate();
        List<Row> dataA = Arrays.asList(RowFactory.create((Object[])new Object[]{0, Vectors.dense((double)1.0, (double[])new double[]{1.0})}), RowFactory.create((Object[])new Object[]{1, Vectors.dense((double)1.0, (double[])new double[]{-1.0})}), RowFactory.create((Object[])new Object[]{2, Vectors.dense((double)-1.0, (double[])new double[]{-1.0})}), RowFactory.create((Object[])new Object[]{3, Vectors.dense((double)-1.0, (double[])new double[]{1.0})}));
        List<Row> dataB = Arrays.asList(RowFactory.create((Object[])new Object[]{4, Vectors.dense((double)1.0, (double[])new double[]{0.0})}), RowFactory.create((Object[])new Object[]{5, Vectors.dense((double)-1.0, (double[])new double[]{0.0})}), RowFactory.create((Object[])new Object[]{6, Vectors.dense((double)0.0, (double[])new double[]{1.0})}), RowFactory.create((Object[])new Object[]{7, Vectors.dense((double)0.0, (double[])new double[]{-1.0})}));
        StructType schema = new StructType(new StructField[]{new StructField("id", DataTypes.IntegerType, false, Metadata.empty()), new StructField("keys", (DataType)new VectorUDT(), false, Metadata.empty())});
        Dataset dfA = spark.createDataFrame(dataA, schema);
        Dataset dfB = spark.createDataFrame(dataB, schema);
        Vector key = Vectors.dense((double)1.0, (double[])new double[]{0.0});
        BucketedRandomProjectionLSH mh = new BucketedRandomProjectionLSH().setBucketLength(2.0).setNumHashTables(3).setInputCol("keys").setOutputCol("values");
        BucketedRandomProjectionLSHModel model = (BucketedRandomProjectionLSHModel)mh.fit(dfA);
        model.transform(dfA).show();
        Dataset transformedA = model.transform(dfA).cache();
        Dataset transformedB = model.transform(dfB).cache();
        model.approxSimilarityJoin(dfA, dfB, 1.5).show();
        model.approxSimilarityJoin(transformedA, transformedB, 1.5).show();
        model.approxSimilarityJoin(dfA, dfA, 2.5).filter("datasetA.id < datasetB.id").show();
        model.approxNearestNeighbors(dfA, key, 2).show();
        model.approxNearestNeighbors(transformedA, key, 2).show();
        spark.stop();
    }
}

