/*
 * Decompiled with CFR 0.152.
 */
package net.sansa_stack.ml.spark.similarity.examples;

import java.io.Serializable;
import net.sansa_stack.ml.spark.similarity.examples.SimilarityStacking$;
import net.sansa_stack.ml.spark.similarity.similarityEstimationModels.JaccardModel;
import net.sansa_stack.ml.spark.similarity.similarityEstimationModels.MinHashModel;
import net.sansa_stack.ml.spark.utils.FeatureExtractorModel;
import org.apache.jena.riot.Lang;
import org.apache.jena.sys.JenaSystem;
import org.apache.spark.ml.feature.CountVectorizer;
import org.apache.spark.ml.feature.CountVectorizerModel;
import org.apache.spark.ml.linalg.Vector;
import org.apache.spark.sql.Column;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.SparkSession$;
import org.apache.spark.sql.expressions.UserDefinedFunction;
import org.apache.spark.sql.functions$;
import scala.Function1;
import scala.Predef$;
import scala.collection.Seq;
import scala.reflect.api.JavaUniverse;
import scala.reflect.api.Mirror;
import scala.reflect.api.TypeCreator;
import scala.reflect.api.TypeTags;
import scala.reflect.api.Types;
import scala.reflect.api.Universe;
import scala.reflect.runtime.package$;
import scala.runtime.BoxesRunTime;

public final class SimilarityStacking$ {
    public static SimilarityStacking$ MODULE$;

    static {
        new SimilarityStacking$();
    }

    public void main(String[] args) {
        SparkSession spark = SparkSession$.MODULE$.builder().appName("MinMal Semantic Similarity Estimation Calls").config("spark.serializer", "org.apache.spark.serializer.KryoSerializer").getOrCreate();
        JenaSystem.init();
        String inputPath = "./sansa-ml/sansa-ml-spark/src/main/resources/movieData/movie.nt";
        Dataset triplesDf = ((Dataset)net.sansa_stack.rdf.spark.io.package$.MODULE$.RDFDataFrameReader(spark.read()).rdf(Lang.NTRIPLES).apply((Object)inputPath)).cache();
        triplesDf.show(false);
        FeatureExtractorModel featureExtractorModel = new FeatureExtractorModel().setMode("an");
        Dataset extractedFeaturesDataFrame = featureExtractorModel.transform(triplesDf).filter((Function1 & Serializable & scala.Serializable)t -> BoxesRunTime.boxToBoolean((boolean)SimilarityStacking$.$anonfun$main$1(t)));
        Dataset filteredFeaturesDataFrame = extractedFeaturesDataFrame.filter((Function1 & Serializable & scala.Serializable)t -> BoxesRunTime.boxToBoolean((boolean)SimilarityStacking$.$anonfun$main$2(t))).cache();
        filteredFeaturesDataFrame.show(false);
        CountVectorizerModel cvModel = new CountVectorizer().setInputCol("extractedFeatures").setOutputCol("vectorizedFeatures").fit(filteredFeaturesDataFrame);
        Dataset tmpCvDf = cvModel.transform(filteredFeaturesDataFrame);
        JavaUniverse $u = package$.MODULE$.universe();
        JavaUniverse.JavaMirror $m = package$.MODULE$.universe().runtimeMirror(this.getClass().getClassLoader());
        public final class Net_sansa_stack_ml_spark_similarity_examples_SimilarityStacking$$typecreator1$1
        extends TypeCreator {
            public <U extends Universe> Types.TypeApi apply(Mirror<U> $m$untyped) {
                Universe $u = $m$untyped.universe();
                Mirror<U> $m = $m$untyped;
                return $m.staticClass("org.apache.spark.ml.linalg.Vector").asType().toTypeConstructor();
            }

            public Net_sansa_stack_ml_spark_similarity_examples_SimilarityStacking$$typecreator1$1() {
            }
        }
        UserDefinedFunction isNoneZeroVector = functions$.MODULE$.udf((Function1 & Serializable & scala.Serializable)v -> BoxesRunTime.boxToBoolean((boolean)SimilarityStacking$.$anonfun$main$3(v)), ((TypeTags)package$.MODULE$.universe()).TypeTag().Boolean(), ((TypeTags)$u).TypeTag().apply((Mirror)$m, (TypeCreator)new Net_sansa_stack_ml_spark_similarity_examples_SimilarityStacking$$typecreator1$1()));
        Dataset countVectorizedFeaturesDataFrame = tmpCvDf.filter(isNoneZeroVector.apply((Seq)Predef$.MODULE$.wrapRefArray((Object[])new Column[]{functions$.MODULE$.col("vectorizedFeatures")}))).select("uri", (Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"vectorizedFeatures"})).cache();
        countVectorizedFeaturesDataFrame.show(false);
        MinHashModel minHashModel = (MinHashModel)new MinHashModel().setInputCol("vectorizedFeatures");
        Dataset<Row> aps = minHashModel.similarityJoin((Dataset<Row>)countVectorizedFeaturesDataFrame, (Dataset<Row>)countVectorizedFeaturesDataFrame, 1.0, "minHashDistance");
        aps.show(false);
        Dataset tmp = aps.join(countVectorizedFeaturesDataFrame.withColumnRenamed("uri", "uriA"), "uriA").withColumnRenamed("vectorizedFeatures", "datasetA").join(countVectorizedFeaturesDataFrame.withColumnRenamed("uri", "uriB"), "uriB").withColumnRenamed("vectorizedFeatures", "datasetB").drop("distance");
        tmp.show();
        JaccardModel jaccardModel = (JaccardModel)new JaccardModel().setInputCol("vectorizedFeatures");
        tmp.withColumn("jaccard", jaccardModel.similarityEstimation().apply((Seq)Predef$.MODULE$.wrapRefArray((Object[])new Column[]{functions$.MODULE$.col("datasetA"), functions$.MODULE$.col("datasetB")}))).select("uriA", (Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"uriB", "jaccard"})).show(false);
    }

    public static final /* synthetic */ boolean $anonfun$main$1(Row t) {
        return ((String)t.getAs("uri")).startsWith("m");
    }

    public static final /* synthetic */ boolean $anonfun$main$2(Row t) {
        return ((String)t.getAs("uri")).startsWith("m");
    }

    public static final /* synthetic */ boolean $anonfun$main$3(Vector v) {
        return v.numNonzeros() > 0;
    }

    private SimilarityStacking$() {
        MODULE$ = this;
    }
}

