/*
 * Decompiled with CFR 0.152.
 */
package net.sansa_stack.ml.spark.similarity.experiment;

import com.typesafe.config.Config;
import java.io.Serializable;
import java.util.Calendar;
import net.sansa_stack.ml.spark.similarity.similarityEstimationModels.BatetModel;
import net.sansa_stack.ml.spark.similarity.similarityEstimationModels.BraunBlanquetModel;
import net.sansa_stack.ml.spark.similarity.similarityEstimationModels.DiceModel;
import net.sansa_stack.ml.spark.similarity.similarityEstimationModels.JaccardModel;
import net.sansa_stack.ml.spark.similarity.similarityEstimationModels.OchiaiModel;
import net.sansa_stack.ml.spark.similarity.similarityEstimationModels.SimpsonModel;
import net.sansa_stack.ml.spark.similarity.similarityEstimationModels.TverskyModel;
import net.sansa_stack.ml.spark.utils.ConfigResolver;
import net.sansa_stack.ml.spark.utils.FeatureExtractorModel;
import net.sansa_stack.ml.spark.utils.FileLister$;
import net.sansa_stack.rdf.spark.io.package$;
import org.apache.jena.riot.Lang;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import org.apache.spark.ml.feature.CountVectorizer;
import org.apache.spark.ml.feature.CountVectorizerModel;
import org.apache.spark.ml.feature.MinHashLSH;
import org.apache.spark.ml.feature.MinHashLSHModel;
import org.apache.spark.ml.linalg.Vector;
import org.apache.spark.sql.Column;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.Row$;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.SparkSession$;
import org.apache.spark.sql.expressions.UserDefinedFunction;
import org.apache.spark.sql.functions$;
import org.apache.spark.sql.types.BooleanType$;
import org.apache.spark.sql.types.DataType;
import org.apache.spark.sql.types.DataTypes;
import org.apache.spark.sql.types.DoubleType$;
import org.apache.spark.sql.types.IntegerType$;
import org.apache.spark.sql.types.LongType$;
import org.apache.spark.sql.types.StringType$;
import org.apache.spark.sql.types.StructField;
import org.apache.spark.sql.types.StructField$;
import org.apache.spark.sql.types.StructType;
import org.apache.spark.sql.types.StructType$;
import scala.Function1;
import scala.Predef$;
import scala.Tuple13;
import scala.Tuple2;
import scala.collection.JavaConverters$;
import scala.collection.Seq;
import scala.collection.TraversableOnce;
import scala.collection.immutable.List;
import scala.collection.immutable.List$;
import scala.collection.immutable.Nil$;
import scala.collection.mutable.ArrayOps;
import scala.collection.mutable.ListBuffer;
import scala.collection.mutable.ListBuffer$;
import scala.reflect.ClassTag$;
import scala.runtime.BoxedUnit;
import scala.runtime.BoxesRunTime;
import scala.runtime.IntRef;
import scala.runtime.RichInt$;
import scala.runtime.java8.JFunction1;

public final class SimilarityPipelineExperiment$ {
    public static SimilarityPipelineExperiment$ MODULE$;
    private final Logger Log;

    static {
        new SimilarityPipelineExperiment$();
    }

    private Logger Log() {
        return this.Log;
    }

    public void main(String[] args) {
        String configFilePath = args[0];
        Config config = new ConfigResolver(configFilePath).getConfig();
        Logger.getLogger((String)"org").setLevel(Level.ERROR);
        String sparkMaster = config.getString("master");
        Predef$.MODULE$.println();
        String pathToFolder = config.getString("pathToFolder");
        Predef$.MODULE$.println((Object)new StringBuilder(39).append("For evaluation data we search in path: ").append(pathToFolder).toString());
        List inputAll = (List)FileLister$.MODULE$.getListOfFiles(pathToFolder).filter((Function1 & Serializable & scala.Serializable)x$1 -> BoxesRunTime.boxToBoolean((boolean)x$1.endsWith(".nt")));
        Predef$.MODULE$.println((Object)new StringBuilder(26).append("we found in provided path ").append(inputAll.size()).toString());
        Predef$.MODULE$.println((Object)"files are:");
        inputAll.foreach((Function1 & Serializable & scala.Serializable)x$2 -> {
            Predef$.MODULE$.println((Object)x$2);
            return BoxedUnit.UNIT;
        });
        Predef$.MODULE$.println();
        boolean showDataFrames = config.getBoolean("showDataFrames");
        int numberRuns = config.getInt("numberRuns");
        List similarityEstimationModeAll = ((TraversableOnce)JavaConverters$.MODULE$.asScalaBufferConverter(config.getStringList("similarityEstimationModeAll")).asScala()).toList();
        List parametersFeatureExtractorModeAll = ((TraversableOnce)JavaConverters$.MODULE$.asScalaBufferConverter(config.getStringList("parametersFeatureExtractorModeAll")).asScala()).toList();
        List parameterCountVectorizerMinDfAll = (List)((TraversableOnce)JavaConverters$.MODULE$.asScalaBufferConverter(config.getIntList("parameterCountVectorizerMinDfAll")).asScala()).toList().map((Function1 & Serializable & scala.Serializable)x$3 -> BoxesRunTime.boxToInteger((int)Predef$.MODULE$.Integer2int(x$3)), List$.MODULE$.canBuildFrom());
        List parameterCountVectorizerMaxVocabSizeAll = (List)((TraversableOnce)JavaConverters$.MODULE$.asScalaBufferConverter(config.getIntList("parameterCountVectorizerMaxVocabSizeAll")).asScala()).toList().map((Function1 & Serializable & scala.Serializable)x$4 -> BoxesRunTime.boxToInteger((int)Predef$.MODULE$.Integer2int(x$4)), List$.MODULE$.canBuildFrom());
        List parameterSimilarityAlphaAll = (List)((TraversableOnce)JavaConverters$.MODULE$.asScalaBufferConverter(config.getDoubleList("parameterSimilarityAlphaAll")).asScala()).toList().map((Function1 & Serializable & scala.Serializable)x$5 -> BoxesRunTime.boxToDouble((double)Predef$.MODULE$.Double2double(x$5)), List$.MODULE$.canBuildFrom());
        List parameterSimilarityBetaAll = (List)((TraversableOnce)JavaConverters$.MODULE$.asScalaBufferConverter(config.getDoubleList("parameterSimilarityBetaAll")).asScala()).toList().map((Function1 & Serializable & scala.Serializable)x$6 -> BoxesRunTime.boxToDouble((double)Predef$.MODULE$.Double2double(x$6)), List$.MODULE$.canBuildFrom());
        List parameterNumHashTablesAll = (List)((TraversableOnce)JavaConverters$.MODULE$.asScalaBufferConverter(config.getIntList("parameterNumHashTablesAll")).asScala()).toList().map((Function1 & Serializable & scala.Serializable)x$7 -> BoxesRunTime.boxToInteger((int)Predef$.MODULE$.Integer2int(x$7)), List$.MODULE$.canBuildFrom());
        List parameterSimilarityAllPairThresholdAll = (List)((TraversableOnce)JavaConverters$.MODULE$.asScalaBufferConverter(config.getDoubleList("parameterSimilarityAllPairThresholdAll")).asScala()).toList().map((Function1 & Serializable & scala.Serializable)x$8 -> BoxesRunTime.boxToDouble((double)Predef$.MODULE$.Double2double(x$8)), List$.MODULE$.canBuildFrom());
        List parameterSimilarityNearestNeighborsKAll = (List)((TraversableOnce)JavaConverters$.MODULE$.asScalaBufferConverter(config.getIntList("parameterSimilarityNearestNeighborsKAll")).asScala()).toList().map((Function1 & Serializable & scala.Serializable)x$9 -> BoxesRunTime.boxToInteger((int)Predef$.MODULE$.Integer2int(x$9)), List$.MODULE$.canBuildFrom());
        boolean parameterOnlyMovieSimilarity = config.getBoolean("parameterOnlyMovieSimilarity");
        List pipelineComponents = ((TraversableOnce)JavaConverters$.MODULE$.asScalaBufferConverter(config.getStringList("pipelineComponents")).asScala()).toList();
        int totalNumberExperiments = numberRuns * inputAll.length() * similarityEstimationModeAll.length() * parametersFeatureExtractorModeAll.length() * parameterCountVectorizerMinDfAll.length() * parameterCountVectorizerMaxVocabSizeAll.length() * parameterSimilarityAlphaAll.length() * parameterSimilarityBetaAll.length() * parameterNumHashTablesAll.length() * parameterSimilarityAllPairThresholdAll.length() * parameterSimilarityNearestNeighborsKAll.length();
        String evaluation_datetime = Calendar.getInstance().getTime().toString();
        Predef$.MODULE$.println();
        String outputFilePath = config.getString("outputFilePath");
        StructType schema = StructType$.MODULE$.apply((Seq)List$.MODULE$.apply((Seq)Predef$.MODULE$.wrapRefArray((Object[])new StructField[]{new StructField("pipelineComponents", (DataType)StringType$.MODULE$, true, StructField$.MODULE$.apply$default$4()), new StructField("run", (DataType)IntegerType$.MODULE$, true, StructField$.MODULE$.apply$default$4()), new StructField("inputPath", (DataType)StringType$.MODULE$, true, StructField$.MODULE$.apply$default$4()), new StructField("inputFileName", (DataType)StringType$.MODULE$, true, StructField$.MODULE$.apply$default$4()), new StructField("inputFileSizeNumberTriples", (DataType)LongType$.MODULE$, true, StructField$.MODULE$.apply$default$4()), new StructField("similarityEstimationMode", (DataType)StringType$.MODULE$, true, StructField$.MODULE$.apply$default$4()), new StructField("parametersFeatureExtractorMode", (DataType)StringType$.MODULE$, true, StructField$.MODULE$.apply$default$4()), new StructField("parameterCountVectorizerMinDf", (DataType)IntegerType$.MODULE$, true, StructField$.MODULE$.apply$default$4()), new StructField("parameterCountVectorizerMaxVocabSize", (DataType)IntegerType$.MODULE$, true, StructField$.MODULE$.apply$default$4()), new StructField("parameterSimilarityAlpha", (DataType)DoubleType$.MODULE$, true, StructField$.MODULE$.apply$default$4()), new StructField("parameterSimilarityBeta", (DataType)DoubleType$.MODULE$, true, StructField$.MODULE$.apply$default$4()), new StructField("parameterNumHashTables", (DataType)IntegerType$.MODULE$, true, StructField$.MODULE$.apply$default$4()), new StructField("parameterSimilarityAllPairThreshold", (DataType)DoubleType$.MODULE$, true, StructField$.MODULE$.apply$default$4()), new StructField("parameterSimilarityNearestNeighborsK", (DataType)IntegerType$.MODULE$, true, StructField$.MODULE$.apply$default$4()), new StructField("processingTimeReadIn", (DataType)DoubleType$.MODULE$, true, StructField$.MODULE$.apply$default$4()), new StructField("processingTimeFeatureExtraction", (DataType)DoubleType$.MODULE$, true, StructField$.MODULE$.apply$default$4()), new StructField("processingTimeCountVectorizer", (DataType)DoubleType$.MODULE$, true, StructField$.MODULE$.apply$default$4()), new StructField("processingTimeSimilarityEstimatorSetup", (DataType)DoubleType$.MODULE$, true, StructField$.MODULE$.apply$default$4()), new StructField("processingTimeSimilarityEstimatorNearestNeighbors", (DataType)DoubleType$.MODULE$, true, StructField$.MODULE$.apply$default$4()), new StructField("processingTimeSimilarityEstimatorAllPairSimilarity", (DataType)DoubleType$.MODULE$, true, StructField$.MODULE$.apply$default$4()), new StructField("processingTimeTotal", (DataType)DoubleType$.MODULE$, true, StructField$.MODULE$.apply$default$4()), new StructField("onlyMovieSimilarity", (DataType)BooleanType$.MODULE$, true, StructField$.MODULE$.apply$default$4())})));
        ListBuffer ex_results = (ListBuffer)ListBuffer$.MODULE$.apply((Seq)Nil$.MODULE$);
        IntRef counterCurrentRun = IntRef.create((int)0);
        RichInt$.MODULE$.to$extension0(Predef$.MODULE$.intWrapper(1), numberRuns).foreach$mVc$sp((Function1)(JFunction1.mcVI.sp & Serializable & scala.Serializable)run -> inputAll.foreach((Function1 & Serializable & scala.Serializable)input -> {
            similarityEstimationModeAll.foreach((Function1 & Serializable & scala.Serializable)similarityEstimationMode -> {
                parametersFeatureExtractorModeAll$1.foreach((Function1 & Serializable & scala.Serializable)parametersFeatureExtractorMode -> {
                    parameterCountVectorizerMinDfAll$1.foreach((Function1)(JFunction1.mcVI.sp & Serializable & scala.Serializable)parameterCountVectorizerMinDf -> parameterCountVectorizerMaxVocabSizeAll$1.foreach((Function1)(JFunction1.mcVI.sp & Serializable & scala.Serializable)parameterCountVectorizerMaxVocabSize -> parameterSimilarityAlphaAll$1.foreach((Function1)(JFunction1.mcVD.sp & Serializable & scala.Serializable)parameterSimilarityAlpha -> parameterSimilarityBetaAll$1.foreach((Function1)(JFunction1.mcVD.sp & Serializable & scala.Serializable)parameterSimilarityBeta -> parameterNumHashTablesAll$1.foreach((Function1)(JFunction1.mcVI.sp & Serializable & scala.Serializable)parameterNumHashTables -> parameterSimilarityAllPairThresholdAll$1.foreach((Function1)(JFunction1.mcVD.sp & Serializable & scala.Serializable)parameterSimilarityAllPairThreshold -> parameterSimilarityNearestNeighborsKAll$1.foreach((Function1 & Serializable & scala.Serializable)parameterSimilarityNearestNeighborsK -> SimilarityPipelineExperiment$.$anonfun$main$20(counterCurrentRun$1, totalNumberExperiments$1, sparkMaster$1, pipelineComponents$1, run$1, input$1, similarityEstimationMode$1, parametersFeatureExtractorMode, parameterCountVectorizerMinDf, parameterCountVectorizerMaxVocabSize, parameterSimilarityAlpha, parameterSimilarityBeta, parameterNumHashTables, parameterSimilarityAllPairThreshold, parameterOnlyMovieSimilarity$1, showDataFrames$1, ex_results$1, BoxesRunTime.unboxToInt((Object)parameterSimilarityNearestNeighborsK)))))))));
                    return BoxedUnit.UNIT;
                });
                return BoxedUnit.UNIT;
            });
            return BoxedUnit.UNIT;
        }));
        SparkSession spark = SparkSession$.MODULE$.builder().appName("SimilarityPipelineExperiment").config("spark.serializer", "org.apache.spark.serializer.KryoSerializer").getOrCreate();
        Dataset df = spark.createDataFrame(spark.sparkContext().parallelize((Seq)ex_results, spark.sparkContext().parallelize$default$2(), ClassTag$.MODULE$.apply(Row.class)), schema).cache();
        df.show();
        String storageFilePath = new StringBuilder(4).append(outputFilePath).append(evaluation_datetime.replace(":", "").replace(" ", "")).append(".csv").toString();
        Predef$.MODULE$.println((Object)new StringBuilder(24).append("we store our file here: ").append(storageFilePath).toString());
        df.repartition(1).write().option("header", "true").format("csv").save(storageFilePath);
        spark.stop();
    }

    public Row runExperiment(String sparkMaster, List<String> pipelineComponents, int run, String inputPath, String similarityEstimationMode, String parametersFeatureExtractorMode, int parameterCountVectorizerMinDf, int parameterCountVectorizerMaxVocabSize, double parameterSimilarityAlpha, double parameterSimilarityBeta, int parameterNumHashTables, double parameterSimilarityAllPairThreshold, int parameterSimilarityNearestNeighborsK, boolean parameterOnlyMovieSimilarity, boolean showDataFrames) {
        Predef$.MODULE$.println((Object)"These are the parameters:");
        Predef$.MODULE$.println((Object)new Tuple13(pipelineComponents, (Object)BoxesRunTime.boxToInteger((int)run), (Object)inputPath, (Object)similarityEstimationMode, (Object)parametersFeatureExtractorMode, (Object)BoxesRunTime.boxToInteger((int)parameterCountVectorizerMinDf), (Object)BoxesRunTime.boxToInteger((int)parameterCountVectorizerMaxVocabSize), (Object)BoxesRunTime.boxToDouble((double)parameterSimilarityAlpha), (Object)BoxesRunTime.boxToDouble((double)parameterSimilarityBeta), (Object)BoxesRunTime.boxToInteger((int)parameterNumHashTables), (Object)BoxesRunTime.boxToDouble((double)parameterSimilarityAllPairThreshold), (Object)BoxesRunTime.boxToInteger((int)parameterSimilarityNearestNeighborsK), (Object)BoxesRunTime.boxToBoolean((boolean)parameterOnlyMovieSimilarity)));
        Predef$.MODULE$.println();
        SparkSession spark = SparkSession$.MODULE$.builder().appName("SimilarityPipelineExperiment").config("spark.serializer", "org.apache.spark.serializer.KryoSerializer").getOrCreate();
        long experimentTime = System.nanoTime();
        long startTime = System.nanoTime();
        if (!pipelineComponents.contains((Object)"ri")) {
            return Row$.MODULE$.apply((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{pipelineComponents.toString(), BoxesRunTime.boxToInteger((int)run), inputPath, "no file", BoxesRunTime.boxToInteger((int)0), similarityEstimationMode, parametersFeatureExtractorMode, BoxesRunTime.boxToInteger((int)parameterCountVectorizerMinDf), BoxesRunTime.boxToInteger((int)parameterCountVectorizerMaxVocabSize), BoxesRunTime.boxToDouble((double)parameterSimilarityAlpha), BoxesRunTime.boxToDouble((double)parameterSimilarityBeta), BoxesRunTime.boxToInteger((int)parameterNumHashTables), BoxesRunTime.boxToDouble((double)parameterSimilarityAllPairThreshold), BoxesRunTime.boxToInteger((int)parameterSimilarityNearestNeighborsK), BoxesRunTime.boxToDouble((double)0.0), BoxesRunTime.boxToDouble((double)0.0), BoxesRunTime.boxToDouble((double)0.0), BoxesRunTime.boxToDouble((double)0.0), BoxesRunTime.boxToDouble((double)0.0), BoxesRunTime.boxToDouble((double)0.0), BoxesRunTime.boxToDouble((double)((double)(System.nanoTime() - experimentTime) / 1.0E9)), BoxesRunTime.boxToBoolean((boolean)parameterOnlyMovieSimilarity)}));
        }
        String inputFileName = (String)new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[])inputPath.split("/"))).last();
        Predef$.MODULE$.println((Object)"1: Read in data as Dataframe");
        Predef$.MODULE$.println((Object)new StringBuilder(27).append("\tthe used input string is: ").append(inputPath).toString());
        Lang lang = Lang.NTRIPLES;
        startTime = System.nanoTime();
        Dataset triplesDf = ((Dataset)package$.MODULE$.RDFDataFrameReader(spark.read()).rdf(lang).apply((Object)inputPath)).cache();
        long inputFileSizeNumberTriples = triplesDf.count();
        Predef$.MODULE$.println((Object)new StringBuilder(22).append("\tthe file has ").append(inputFileSizeNumberTriples).append(" triples").toString());
        double processingTimeReadIn = (double)(System.nanoTime() - startTime) / 1.0E9;
        Predef$.MODULE$.println((Object)new StringBuilder(27).append("\tthe read in needed ").append(processingTimeReadIn).append("seconds").toString());
        if (showDataFrames) {
            triplesDf.limit(10).show();
        }
        if (!pipelineComponents.contains((Object)"fe")) {
            return Row$.MODULE$.apply((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{pipelineComponents.toString(), BoxesRunTime.boxToInteger((int)run), inputPath, inputFileName, BoxesRunTime.boxToLong((long)inputFileSizeNumberTriples), similarityEstimationMode, parametersFeatureExtractorMode, BoxesRunTime.boxToInteger((int)parameterCountVectorizerMinDf), BoxesRunTime.boxToInteger((int)parameterCountVectorizerMaxVocabSize), BoxesRunTime.boxToDouble((double)parameterSimilarityAlpha), BoxesRunTime.boxToDouble((double)parameterSimilarityBeta), BoxesRunTime.boxToInteger((int)parameterNumHashTables), BoxesRunTime.boxToDouble((double)parameterSimilarityAllPairThreshold), BoxesRunTime.boxToInteger((int)parameterSimilarityNearestNeighborsK), BoxesRunTime.boxToDouble((double)processingTimeReadIn), BoxesRunTime.boxToDouble((double)0.0), BoxesRunTime.boxToDouble((double)0.0), BoxesRunTime.boxToDouble((double)0.0), BoxesRunTime.boxToDouble((double)0.0), BoxesRunTime.boxToDouble((double)0.0), BoxesRunTime.boxToDouble((double)0.0), BoxesRunTime.boxToBoolean((boolean)parameterOnlyMovieSimilarity)}));
        }
        Predef$.MODULE$.println((Object)"2: Dataframe based feature extractor");
        if (parameterOnlyMovieSimilarity) {
            Predef$.MODULE$.println((Object)"\tFeature Dataframe consits only Movies");
        }
        Predef$.MODULE$.println((Object)new StringBuilder(29).append("\tfeature extraction mode is: ").append(parametersFeatureExtractorMode).toString());
        startTime = System.nanoTime();
        FeatureExtractorModel fe = new FeatureExtractorModel().setMode(parametersFeatureExtractorMode).setOutputCol("extractedFeatures");
        Dataset feFeatures = parameterOnlyMovieSimilarity ? fe.transform(triplesDf).filter((Function1 & Serializable & scala.Serializable)t -> BoxesRunTime.boxToBoolean((boolean)SimilarityPipelineExperiment$.$anonfun$runExperiment$1(t))).cache() : fe.transform(triplesDf).cache();
        Predef$.MODULE$.println((Object)new StringBuilder(53).append("\tour extracted dataframe contains of: ").append(feFeatures.count()).append(" different uris").toString());
        double processingTimeFeatureExtraction = (double)(System.nanoTime() - startTime) / 1.0E9;
        Predef$.MODULE$.println((Object)new StringBuilder(38).append("\tthe feature extraction needed ").append(processingTimeFeatureExtraction).append("seconds").toString());
        if (showDataFrames) {
            feFeatures.limit(10).show();
        }
        if (!pipelineComponents.contains((Object)"cv")) {
            return Row$.MODULE$.apply((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{pipelineComponents.toString(), BoxesRunTime.boxToInteger((int)run), inputPath, inputFileName, BoxesRunTime.boxToLong((long)inputFileSizeNumberTriples), similarityEstimationMode, parametersFeatureExtractorMode, BoxesRunTime.boxToInteger((int)parameterCountVectorizerMinDf), BoxesRunTime.boxToInteger((int)parameterCountVectorizerMaxVocabSize), BoxesRunTime.boxToDouble((double)parameterSimilarityAlpha), BoxesRunTime.boxToDouble((double)parameterSimilarityBeta), BoxesRunTime.boxToInteger((int)parameterNumHashTables), BoxesRunTime.boxToDouble((double)parameterSimilarityAllPairThreshold), BoxesRunTime.boxToInteger((int)parameterSimilarityNearestNeighborsK), BoxesRunTime.boxToDouble((double)processingTimeReadIn), BoxesRunTime.boxToDouble((double)processingTimeFeatureExtraction), BoxesRunTime.boxToDouble((double)0.0), BoxesRunTime.boxToDouble((double)0.0), BoxesRunTime.boxToDouble((double)0.0), BoxesRunTime.boxToDouble((double)0.0), BoxesRunTime.boxToDouble((double)((double)(System.nanoTime() - experimentTime) / 1.0E9)), BoxesRunTime.boxToBoolean((boolean)parameterOnlyMovieSimilarity)}));
        }
        Predef$.MODULE$.println((Object)"3: Count Vectorizer from MLlib");
        Predef$.MODULE$.println((Object)new StringBuilder(19).append("\tmax vocabsize is: ").append(parameterCountVectorizerMaxVocabSize).toString());
        Predef$.MODULE$.println((Object)new StringBuilder(42).append("\tmin number documents it has to occur is: ").append(parameterCountVectorizerMinDf).toString());
        startTime = System.nanoTime();
        CountVectorizerModel cvModel = new CountVectorizer().setInputCol("extractedFeatures").setOutputCol("vectorizedFeatures").setVocabSize(parameterCountVectorizerMaxVocabSize).setMinDF((double)parameterCountVectorizerMinDf).fit(feFeatures);
        Dataset cvFeatures = cvModel.transform(feFeatures).cache();
        UserDefinedFunction isNoneZeroVector = functions$.MODULE$.udf((Function1 & Serializable & scala.Serializable)v -> BoxesRunTime.boxToBoolean((boolean)SimilarityPipelineExperiment$.$anonfun$runExperiment$2(v)), DataTypes.BooleanType);
        Dataset featuresDf = cvFeatures.filter(isNoneZeroVector.apply((Seq)Predef$.MODULE$.wrapRefArray((Object[])new Column[]{functions$.MODULE$.col("vectorizedFeatures")}))).select("uri", (Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"vectorizedFeatures"})).cache();
        featuresDf.count();
        double processingTimeCountVectorizer = (double)(System.nanoTime() - startTime) / 1.0E9;
        Predef$.MODULE$.println((Object)new StringBuilder(39).append("\tthe Count Vectorization needed ").append(processingTimeCountVectorizer).append("seconds").toString());
        if (showDataFrames) {
            featuresDf.limit(10).show();
        }
        double processingTimeSimilarityEstimatorSetup = -1.0;
        double processingTimeSimilarityEstimatorNearestNeighbors = -1.0;
        double processingTimeSimilarityEstimatorAllPairSimilarity = -1.0;
        Row tmpK = ((Row[])featuresDf.take(1))[0];
        Vector key = (Vector)tmpK.getAs("vectorizedFeatures");
        String keyUri = (String)tmpK.getAs("uri");
        Predef$.MODULE$.println();
        Predef$.MODULE$.println((Object)new Tuple2((Object)keyUri, (Object)key));
        if (!pipelineComponents.contains((Object)"nn")) {
            return Row$.MODULE$.apply((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{pipelineComponents.toString(), BoxesRunTime.boxToInteger((int)run), inputPath, inputFileName, BoxesRunTime.boxToLong((long)inputFileSizeNumberTriples), similarityEstimationMode, parametersFeatureExtractorMode, BoxesRunTime.boxToInteger((int)parameterCountVectorizerMinDf), BoxesRunTime.boxToInteger((int)parameterCountVectorizerMaxVocabSize), BoxesRunTime.boxToDouble((double)parameterSimilarityAlpha), BoxesRunTime.boxToDouble((double)parameterSimilarityBeta), BoxesRunTime.boxToInteger((int)parameterNumHashTables), BoxesRunTime.boxToDouble((double)parameterSimilarityAllPairThreshold), BoxesRunTime.boxToInteger((int)parameterSimilarityNearestNeighborsK), BoxesRunTime.boxToDouble((double)processingTimeReadIn), BoxesRunTime.boxToDouble((double)processingTimeFeatureExtraction), BoxesRunTime.boxToDouble((double)processingTimeCountVectorizer), BoxesRunTime.boxToDouble((double)0.0), BoxesRunTime.boxToDouble((double)0.0), BoxesRunTime.boxToDouble((double)0.0), BoxesRunTime.boxToDouble((double)((double)(System.nanoTime() - experimentTime) / 1.0E9)), BoxesRunTime.boxToBoolean((boolean)parameterOnlyMovieSimilarity)}));
        }
        String string = similarityEstimationMode;
        String string2 = "MinHash";
        if (!(string != null ? !string.equals(string2) : string2 != null)) {
            Predef$.MODULE$.println((Object)"4. Similarity Estimation Process MinHash");
            Predef$.MODULE$.println((Object)new StringBuilder(31).append("\tthe number of hash tables is: ").append(parameterNumHashTables).toString());
            startTime = System.nanoTime();
            MinHashLSHModel similarityModel = (MinHashLSHModel)new MinHashLSH().setNumHashTables(parameterNumHashTables).setInputCol("vectorizedFeatures").setOutputCol("hashedFeatures").fit(featuresDf);
            processingTimeSimilarityEstimatorSetup = (double)(System.nanoTime() - startTime) / 1.0E9;
            Predef$.MODULE$.println((Object)new StringBuilder(33).append("\tthe MinHash Setup needed ").append(processingTimeSimilarityEstimatorSetup).append("seconds").toString());
            Predef$.MODULE$.println((Object)"4.1 Calculate nearestneigbors for one key");
            startTime = System.nanoTime();
            Dataset nnSimilarityDf = similarityModel.approxNearestNeighbors(featuresDf, key, parameterSimilarityNearestNeighborsK, "distance").withColumn("key_column", functions$.MODULE$.lit((Object)keyUri)).select("key_column", (Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"uri", "distance"})).cache();
            long numberOfNn = nnSimilarityDf.count();
            Predef$.MODULE$.println((Object)new StringBuilder(20).append("\tWe have number NN: ").append(numberOfNn).toString());
            processingTimeSimilarityEstimatorNearestNeighbors = (double)(System.nanoTime() - startTime) / 1.0E9;
            Predef$.MODULE$.println((Object)new StringBuilder(32).append("\tNearestNeighbors needed ").append(processingTimeSimilarityEstimatorNearestNeighbors).append("seconds").toString());
            if (showDataFrames) {
                nnSimilarityDf.limit(10).show();
            }
            if (!pipelineComponents.contains((Object)"ap")) {
                return Row$.MODULE$.apply((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{pipelineComponents.toString(), BoxesRunTime.boxToInteger((int)run), inputPath, inputFileName, BoxesRunTime.boxToLong((long)inputFileSizeNumberTriples), similarityEstimationMode, parametersFeatureExtractorMode, BoxesRunTime.boxToInteger((int)parameterCountVectorizerMinDf), BoxesRunTime.boxToInteger((int)parameterCountVectorizerMaxVocabSize), BoxesRunTime.boxToDouble((double)parameterSimilarityAlpha), BoxesRunTime.boxToDouble((double)parameterSimilarityBeta), BoxesRunTime.boxToInteger((int)parameterNumHashTables), BoxesRunTime.boxToDouble((double)parameterSimilarityAllPairThreshold), BoxesRunTime.boxToInteger((int)parameterSimilarityNearestNeighborsK), BoxesRunTime.boxToDouble((double)processingTimeReadIn), BoxesRunTime.boxToDouble((double)processingTimeFeatureExtraction), BoxesRunTime.boxToDouble((double)processingTimeCountVectorizer), BoxesRunTime.boxToDouble((double)processingTimeSimilarityEstimatorSetup), BoxesRunTime.boxToDouble((double)processingTimeSimilarityEstimatorNearestNeighbors), BoxesRunTime.boxToDouble((double)0.0), BoxesRunTime.boxToDouble((double)((double)(System.nanoTime() - experimentTime) / 1.0E9)), BoxesRunTime.boxToBoolean((boolean)parameterOnlyMovieSimilarity)}));
            }
            Predef$.MODULE$.println((Object)"4.2 Calculate all pair similarity");
            startTime = System.nanoTime();
            Dataset allPairSimilarityDf = similarityModel.approxSimilarityJoin(featuresDf, featuresDf, parameterSimilarityAllPairThreshold, "distance").cache();
            long lenJoinDf = allPairSimilarityDf.count();
            Predef$.MODULE$.println((Object)new StringBuilder(22).append("\tWe have number Join: ").append(lenJoinDf).toString());
            processingTimeSimilarityEstimatorAllPairSimilarity = (double)(System.nanoTime() - startTime) / 1.0E9;
            Predef$.MODULE$.println((Object)new StringBuilder(33).append("\tAllPairSimilarity needed ").append(processingTimeSimilarityEstimatorAllPairSimilarity).append("seconds").toString());
            if (showDataFrames) {
                allPairSimilarityDf.limit(10).show();
            }
        } else {
            String string3 = similarityEstimationMode;
            String string4 = "Jaccard";
            if (!(string3 != null ? !string3.equals(string4) : string4 != null)) {
                Predef$.MODULE$.println((Object)"4. Similarity Estimation Process Jaccard");
                startTime = System.nanoTime();
                JaccardModel similarityModel = (JaccardModel)new JaccardModel().setInputCol("vectorizedFeatures");
                processingTimeSimilarityEstimatorSetup = (double)(System.nanoTime() - startTime) / 1.0E9;
                Predef$.MODULE$.println((Object)"4.1 Calculate nearestneigbors for one key");
                startTime = System.nanoTime();
                Dataset x$1 = cvFeatures;
                Vector x$2 = key;
                int x$3 = parameterSimilarityNearestNeighborsK;
                String x$4 = "theFirstUri";
                boolean x$5 = false;
                String x$6 = similarityModel.nearestNeighbors$default$5();
                Dataset nnSimilarityDf = similarityModel.nearestNeighbors((Dataset<Row>)x$1, x$2, x$3, x$4, x$6, x$5).cache();
                long numberOfNn = nnSimilarityDf.count();
                Predef$.MODULE$.println((Object)new StringBuilder(20).append("\tWe have number NN: ").append(numberOfNn).toString());
                processingTimeSimilarityEstimatorNearestNeighbors = (double)(System.nanoTime() - startTime) / 1.0E9;
                Predef$.MODULE$.println((Object)new StringBuilder(33).append("\tNearestNeighbors needed ").append(processingTimeSimilarityEstimatorNearestNeighbors).append(" seconds").toString());
                if (showDataFrames) {
                    nnSimilarityDf.limit(10).show();
                }
                if (!pipelineComponents.contains((Object)"ap")) {
                    return Row$.MODULE$.apply((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{pipelineComponents.toString(), BoxesRunTime.boxToInteger((int)run), inputPath, inputFileName, BoxesRunTime.boxToLong((long)inputFileSizeNumberTriples), similarityEstimationMode, parametersFeatureExtractorMode, BoxesRunTime.boxToInteger((int)parameterCountVectorizerMinDf), BoxesRunTime.boxToInteger((int)parameterCountVectorizerMaxVocabSize), BoxesRunTime.boxToDouble((double)parameterSimilarityAlpha), BoxesRunTime.boxToDouble((double)parameterSimilarityBeta), BoxesRunTime.boxToInteger((int)parameterNumHashTables), BoxesRunTime.boxToDouble((double)parameterSimilarityAllPairThreshold), BoxesRunTime.boxToInteger((int)parameterSimilarityNearestNeighborsK), BoxesRunTime.boxToDouble((double)processingTimeReadIn), BoxesRunTime.boxToDouble((double)processingTimeFeatureExtraction), BoxesRunTime.boxToDouble((double)processingTimeCountVectorizer), BoxesRunTime.boxToDouble((double)processingTimeSimilarityEstimatorSetup), BoxesRunTime.boxToDouble((double)processingTimeSimilarityEstimatorNearestNeighbors), BoxesRunTime.boxToDouble((double)0.0), BoxesRunTime.boxToDouble((double)((double)(System.nanoTime() - experimentTime) / 1.0E9)), BoxesRunTime.boxToBoolean((boolean)parameterOnlyMovieSimilarity)}));
                }
                Predef$.MODULE$.println((Object)"4.2 Calculate all pair similarity");
                startTime = System.nanoTime();
                Dataset allPairSimilarityDf = similarityModel.similarityJoin((Dataset<Row>)featuresDf, (Dataset<Row>)featuresDf, 1.0 - parameterSimilarityAllPairThreshold, similarityModel.similarityJoin$default$4()).cache();
                long lenJoinDf = allPairSimilarityDf.count();
                Predef$.MODULE$.println((Object)new StringBuilder(22).append("\tWe have number Join: ").append(lenJoinDf).toString());
                processingTimeSimilarityEstimatorAllPairSimilarity = (double)(System.nanoTime() - startTime) / 1.0E9;
                Predef$.MODULE$.println((Object)new StringBuilder(33).append("\tAllPairSimilarity needed ").append(processingTimeSimilarityEstimatorAllPairSimilarity).append("seconds").toString());
                if (showDataFrames) {
                    allPairSimilarityDf.limit(10).show();
                }
            } else {
                String string5 = similarityEstimationMode;
                String string6 = "Tversky";
                if (!(string5 != null ? !string5.equals(string6) : string6 != null)) {
                    Predef$.MODULE$.println((Object)"4. Similarity Estimation Process Tversky");
                    startTime = System.nanoTime();
                    TverskyModel similarityModel = ((TverskyModel)new TverskyModel().setInputCol("vectorizedFeatures")).setAlpha(1.0).setBeta(1.0);
                    processingTimeSimilarityEstimatorSetup = (double)(System.nanoTime() - startTime) / 1.0E9;
                    Predef$.MODULE$.println((Object)"4.1 Calculate nearestneigbors for one key");
                    startTime = System.nanoTime();
                    Dataset x$7 = cvFeatures;
                    Vector x$8 = key;
                    int x$9 = parameterSimilarityNearestNeighborsK;
                    String x$10 = "theFirstUri";
                    boolean x$11 = false;
                    String x$12 = similarityModel.nearestNeighbors$default$5();
                    Dataset nnSimilarityDf = similarityModel.nearestNeighbors((Dataset<Row>)x$7, x$8, x$9, x$10, x$12, x$11).cache();
                    long numberOfNn = nnSimilarityDf.count();
                    Predef$.MODULE$.println((Object)new StringBuilder(20).append("\tWe have number NN: ").append(numberOfNn).toString());
                    processingTimeSimilarityEstimatorNearestNeighbors = (double)(System.nanoTime() - startTime) / 1.0E9;
                    Predef$.MODULE$.println((Object)new StringBuilder(32).append("\tNearestNeighbors needed ").append(processingTimeSimilarityEstimatorNearestNeighbors).append("seconds").toString());
                    if (showDataFrames) {
                        nnSimilarityDf.limit(10).show();
                    }
                    if (!pipelineComponents.contains((Object)"ap")) {
                        return Row$.MODULE$.apply((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{pipelineComponents.toString(), BoxesRunTime.boxToInteger((int)run), inputPath, inputFileName, BoxesRunTime.boxToLong((long)inputFileSizeNumberTriples), similarityEstimationMode, parametersFeatureExtractorMode, BoxesRunTime.boxToInteger((int)parameterCountVectorizerMinDf), BoxesRunTime.boxToInteger((int)parameterCountVectorizerMaxVocabSize), BoxesRunTime.boxToDouble((double)parameterSimilarityAlpha), BoxesRunTime.boxToDouble((double)parameterSimilarityBeta), BoxesRunTime.boxToInteger((int)parameterNumHashTables), BoxesRunTime.boxToDouble((double)parameterSimilarityAllPairThreshold), BoxesRunTime.boxToInteger((int)parameterSimilarityNearestNeighborsK), BoxesRunTime.boxToDouble((double)processingTimeReadIn), BoxesRunTime.boxToDouble((double)processingTimeFeatureExtraction), BoxesRunTime.boxToDouble((double)processingTimeCountVectorizer), BoxesRunTime.boxToDouble((double)processingTimeSimilarityEstimatorSetup), BoxesRunTime.boxToDouble((double)processingTimeSimilarityEstimatorNearestNeighbors), BoxesRunTime.boxToDouble((double)0.0), BoxesRunTime.boxToDouble((double)((double)(System.nanoTime() - experimentTime) / 1.0E9)), BoxesRunTime.boxToBoolean((boolean)parameterOnlyMovieSimilarity)}));
                    }
                    Predef$.MODULE$.println((Object)"4.2 Calculate all pair similarity");
                    startTime = System.nanoTime();
                    Dataset allPairSimilarityDf = similarityModel.similarityJoin((Dataset<Row>)featuresDf, (Dataset<Row>)featuresDf, 1.0 - parameterSimilarityAllPairThreshold, similarityModel.similarityJoin$default$4()).cache();
                    long lenJoinDf = allPairSimilarityDf.count();
                    Predef$.MODULE$.println((Object)new StringBuilder(22).append("\tWe have number Join: ").append(lenJoinDf).toString());
                    processingTimeSimilarityEstimatorAllPairSimilarity = (double)(System.nanoTime() - startTime) / 1.0E9;
                    Predef$.MODULE$.println((Object)new StringBuilder(33).append("\tAllPairSimilarity needed ").append(processingTimeSimilarityEstimatorAllPairSimilarity).append("seconds").toString());
                    if (showDataFrames) {
                        allPairSimilarityDf.limit(10).show();
                    }
                } else {
                    String string7 = similarityEstimationMode;
                    String string8 = "Batet";
                    if (!(string7 != null ? !string7.equals(string8) : string8 != null)) {
                        Predef$.MODULE$.println((Object)"4. Similarity Estimation Process Batet");
                        startTime = System.nanoTime();
                        BatetModel similarityModel = (BatetModel)new BatetModel().setInputCol("vectorizedFeatures");
                        processingTimeSimilarityEstimatorSetup = (double)(System.nanoTime() - startTime) / 1.0E9;
                        Predef$.MODULE$.println((Object)"4.1 Calculate nearestneigbors for one key");
                        startTime = System.nanoTime();
                        Dataset x$13 = cvFeatures;
                        Vector x$14 = key;
                        int x$15 = parameterSimilarityNearestNeighborsK;
                        String x$16 = "theFirstUri";
                        boolean x$17 = false;
                        String x$18 = similarityModel.nearestNeighbors$default$5();
                        Dataset nnSimilarityDf = similarityModel.nearestNeighbors((Dataset<Row>)x$13, x$14, x$15, x$16, x$18, x$17).cache();
                        long numberOfNn = nnSimilarityDf.count();
                        Predef$.MODULE$.println((Object)new StringBuilder(20).append("\tWe have number NN: ").append(numberOfNn).toString());
                        processingTimeSimilarityEstimatorNearestNeighbors = (double)(System.nanoTime() - startTime) / 1.0E9;
                        Predef$.MODULE$.println((Object)new StringBuilder(32).append("\tNearestNeighbors needed ").append(processingTimeSimilarityEstimatorNearestNeighbors).append("seconds").toString());
                        if (showDataFrames) {
                            nnSimilarityDf.limit(10).show();
                        }
                        if (!pipelineComponents.contains((Object)"ap")) {
                            return Row$.MODULE$.apply((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{pipelineComponents.toString(), BoxesRunTime.boxToInteger((int)run), inputPath, inputFileName, BoxesRunTime.boxToLong((long)inputFileSizeNumberTriples), similarityEstimationMode, parametersFeatureExtractorMode, BoxesRunTime.boxToInteger((int)parameterCountVectorizerMinDf), BoxesRunTime.boxToInteger((int)parameterCountVectorizerMaxVocabSize), BoxesRunTime.boxToDouble((double)parameterSimilarityAlpha), BoxesRunTime.boxToDouble((double)parameterSimilarityBeta), BoxesRunTime.boxToInteger((int)parameterNumHashTables), BoxesRunTime.boxToDouble((double)parameterSimilarityAllPairThreshold), BoxesRunTime.boxToInteger((int)parameterSimilarityNearestNeighborsK), BoxesRunTime.boxToDouble((double)processingTimeReadIn), BoxesRunTime.boxToDouble((double)processingTimeFeatureExtraction), BoxesRunTime.boxToDouble((double)processingTimeCountVectorizer), BoxesRunTime.boxToDouble((double)processingTimeSimilarityEstimatorSetup), BoxesRunTime.boxToDouble((double)processingTimeSimilarityEstimatorNearestNeighbors), BoxesRunTime.boxToDouble((double)0.0), BoxesRunTime.boxToDouble((double)((double)(System.nanoTime() - experimentTime) / 1.0E9)), BoxesRunTime.boxToBoolean((boolean)parameterOnlyMovieSimilarity)}));
                        }
                        Predef$.MODULE$.println((Object)"4.2 Calculate all pair similarity");
                        startTime = System.nanoTime();
                        Dataset allPairSimilarityDf = similarityModel.similarityJoin((Dataset<Row>)featuresDf, (Dataset<Row>)featuresDf, parameterSimilarityAllPairThreshold, similarityModel.similarityJoin$default$4()).cache();
                        long lenJoinDf = allPairSimilarityDf.count();
                        Predef$.MODULE$.println((Object)new StringBuilder(22).append("\tWe have number Join: ").append(lenJoinDf).toString());
                        processingTimeSimilarityEstimatorAllPairSimilarity = (double)(System.nanoTime() - startTime) / 1.0E9;
                        Predef$.MODULE$.println((Object)new StringBuilder(33).append("\tAllPairSimilarity needed ").append(processingTimeSimilarityEstimatorAllPairSimilarity).append("seconds").toString());
                        if (showDataFrames) {
                            allPairSimilarityDf.limit(10).show();
                        }
                    } else {
                        String string9 = similarityEstimationMode;
                        String string10 = "Braun Blanquet";
                        if (!(string9 != null ? !string9.equals(string10) : string10 != null)) {
                            Predef$.MODULE$.println((Object)"4. Similarity Estimation Process Braun Blanquet");
                            startTime = System.nanoTime();
                            BraunBlanquetModel similarityModel = (BraunBlanquetModel)new BraunBlanquetModel().setInputCol("vectorizedFeatures");
                            processingTimeSimilarityEstimatorSetup = (double)(System.nanoTime() - startTime) / 1.0E9;
                            Predef$.MODULE$.println((Object)"4.1 Calculate nearestneigbors for one key");
                            startTime = System.nanoTime();
                            Dataset x$19 = cvFeatures;
                            Vector x$20 = key;
                            int x$21 = parameterSimilarityNearestNeighborsK;
                            String x$22 = "theFirstUri";
                            boolean x$23 = false;
                            String x$24 = similarityModel.nearestNeighbors$default$5();
                            Dataset nnSimilarityDf = similarityModel.nearestNeighbors((Dataset<Row>)x$19, x$20, x$21, x$22, x$24, x$23).cache();
                            long numberOfNn = nnSimilarityDf.count();
                            Predef$.MODULE$.println((Object)new StringBuilder(20).append("\tWe have number NN: ").append(numberOfNn).toString());
                            processingTimeSimilarityEstimatorNearestNeighbors = (double)(System.nanoTime() - startTime) / 1.0E9;
                            Predef$.MODULE$.println((Object)new StringBuilder(32).append("\tNearestNeighbors needed ").append(processingTimeSimilarityEstimatorNearestNeighbors).append("seconds").toString());
                            if (showDataFrames) {
                                nnSimilarityDf.limit(10).show();
                            }
                            if (!pipelineComponents.contains((Object)"ap")) {
                                return Row$.MODULE$.apply((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{pipelineComponents.toString(), BoxesRunTime.boxToInteger((int)run), inputPath, inputFileName, BoxesRunTime.boxToLong((long)inputFileSizeNumberTriples), similarityEstimationMode, parametersFeatureExtractorMode, BoxesRunTime.boxToInteger((int)parameterCountVectorizerMinDf), BoxesRunTime.boxToInteger((int)parameterCountVectorizerMaxVocabSize), BoxesRunTime.boxToDouble((double)parameterSimilarityAlpha), BoxesRunTime.boxToDouble((double)parameterSimilarityBeta), BoxesRunTime.boxToInteger((int)parameterNumHashTables), BoxesRunTime.boxToDouble((double)parameterSimilarityAllPairThreshold), BoxesRunTime.boxToInteger((int)parameterSimilarityNearestNeighborsK), BoxesRunTime.boxToDouble((double)processingTimeReadIn), BoxesRunTime.boxToDouble((double)processingTimeFeatureExtraction), BoxesRunTime.boxToDouble((double)processingTimeCountVectorizer), BoxesRunTime.boxToDouble((double)processingTimeSimilarityEstimatorSetup), BoxesRunTime.boxToDouble((double)processingTimeSimilarityEstimatorNearestNeighbors), BoxesRunTime.boxToDouble((double)0.0), BoxesRunTime.boxToDouble((double)((double)(System.nanoTime() - experimentTime) / 1.0E9)), BoxesRunTime.boxToBoolean((boolean)parameterOnlyMovieSimilarity)}));
                            }
                            Predef$.MODULE$.println((Object)"4.2 Calculate all pair similarity");
                            startTime = System.nanoTime();
                            Dataset allPairSimilarityDf = similarityModel.similarityJoin((Dataset<Row>)featuresDf, (Dataset<Row>)featuresDf, parameterSimilarityAllPairThreshold, similarityModel.similarityJoin$default$4()).cache();
                            long lenJoinDf = allPairSimilarityDf.count();
                            Predef$.MODULE$.println((Object)new StringBuilder(22).append("\tWe have number Join: ").append(lenJoinDf).toString());
                            processingTimeSimilarityEstimatorAllPairSimilarity = (double)(System.nanoTime() - startTime) / 1.0E9;
                            Predef$.MODULE$.println((Object)new StringBuilder(33).append("\tAllPairSimilarity needed ").append(processingTimeSimilarityEstimatorAllPairSimilarity).append("seconds").toString());
                            if (showDataFrames) {
                                allPairSimilarityDf.limit(10).show();
                            }
                        } else {
                            String string11 = similarityEstimationMode;
                            String string12 = "Dice";
                            if (!(string11 != null ? !string11.equals(string12) : string12 != null)) {
                                Predef$.MODULE$.println((Object)"4. Similarity Estimation Process Dice");
                                startTime = System.nanoTime();
                                DiceModel similarityModel = (DiceModel)new DiceModel().setInputCol("vectorizedFeatures");
                                processingTimeSimilarityEstimatorSetup = (double)(System.nanoTime() - startTime) / 1.0E9;
                                Predef$.MODULE$.println((Object)"4.1 Calculate nearestneigbors for one key");
                                startTime = System.nanoTime();
                                Dataset x$25 = cvFeatures;
                                Vector x$26 = key;
                                int x$27 = parameterSimilarityNearestNeighborsK;
                                String x$28 = "theFirstUri";
                                boolean x$29 = false;
                                String x$30 = similarityModel.nearestNeighbors$default$5();
                                Dataset nnSimilarityDf = similarityModel.nearestNeighbors((Dataset<Row>)x$25, x$26, x$27, x$28, x$30, x$29).cache();
                                long numberOfNn = nnSimilarityDf.count();
                                Predef$.MODULE$.println((Object)new StringBuilder(20).append("\tWe have number NN: ").append(numberOfNn).toString());
                                processingTimeSimilarityEstimatorNearestNeighbors = (double)(System.nanoTime() - startTime) / 1.0E9;
                                Predef$.MODULE$.println((Object)new StringBuilder(32).append("\tNearestNeighbors needed ").append(processingTimeSimilarityEstimatorNearestNeighbors).append("seconds").toString());
                                if (showDataFrames) {
                                    nnSimilarityDf.limit(10).show();
                                }
                                if (!pipelineComponents.contains((Object)"ap")) {
                                    return Row$.MODULE$.apply((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{pipelineComponents.toString(), BoxesRunTime.boxToInteger((int)run), inputPath, inputFileName, BoxesRunTime.boxToLong((long)inputFileSizeNumberTriples), similarityEstimationMode, parametersFeatureExtractorMode, BoxesRunTime.boxToInteger((int)parameterCountVectorizerMinDf), BoxesRunTime.boxToInteger((int)parameterCountVectorizerMaxVocabSize), BoxesRunTime.boxToDouble((double)parameterSimilarityAlpha), BoxesRunTime.boxToDouble((double)parameterSimilarityBeta), BoxesRunTime.boxToInteger((int)parameterNumHashTables), BoxesRunTime.boxToDouble((double)parameterSimilarityAllPairThreshold), BoxesRunTime.boxToInteger((int)parameterSimilarityNearestNeighborsK), BoxesRunTime.boxToDouble((double)processingTimeReadIn), BoxesRunTime.boxToDouble((double)processingTimeFeatureExtraction), BoxesRunTime.boxToDouble((double)processingTimeCountVectorizer), BoxesRunTime.boxToDouble((double)processingTimeSimilarityEstimatorSetup), BoxesRunTime.boxToDouble((double)processingTimeSimilarityEstimatorNearestNeighbors), BoxesRunTime.boxToDouble((double)0.0), BoxesRunTime.boxToDouble((double)((double)(System.nanoTime() - experimentTime) / 1.0E9)), BoxesRunTime.boxToBoolean((boolean)parameterOnlyMovieSimilarity)}));
                                }
                                Predef$.MODULE$.println((Object)"4.2 Calculate all pair similarity");
                                startTime = System.nanoTime();
                                Dataset allPairSimilarityDf = similarityModel.similarityJoin((Dataset<Row>)featuresDf, (Dataset<Row>)featuresDf, parameterSimilarityAllPairThreshold, similarityModel.similarityJoin$default$4()).cache();
                                long lenJoinDf = allPairSimilarityDf.count();
                                Predef$.MODULE$.println((Object)new StringBuilder(22).append("\tWe have number Join: ").append(lenJoinDf).toString());
                                processingTimeSimilarityEstimatorAllPairSimilarity = (double)(System.nanoTime() - startTime) / 1.0E9;
                                Predef$.MODULE$.println((Object)new StringBuilder(33).append("\tAllPairSimilarity needed ").append(processingTimeSimilarityEstimatorAllPairSimilarity).append("seconds").toString());
                                if (showDataFrames) {
                                    allPairSimilarityDf.limit(10).show();
                                }
                            } else {
                                String string13 = similarityEstimationMode;
                                String string14 = "Ochiai";
                                if (!(string13 != null ? !string13.equals(string14) : string14 != null)) {
                                    Predef$.MODULE$.println((Object)"4. Similarity Estimation Process Ochiai");
                                    startTime = System.nanoTime();
                                    OchiaiModel similarityModel = (OchiaiModel)new OchiaiModel().setInputCol("vectorizedFeatures");
                                    processingTimeSimilarityEstimatorSetup = (double)(System.nanoTime() - startTime) / 1.0E9;
                                    Predef$.MODULE$.println((Object)"4.1 Calculate nearestneigbors for one key");
                                    startTime = System.nanoTime();
                                    Dataset x$31 = cvFeatures;
                                    Vector x$32 = key;
                                    int x$33 = parameterSimilarityNearestNeighborsK;
                                    String x$34 = "theFirstUri";
                                    boolean x$35 = false;
                                    String x$36 = similarityModel.nearestNeighbors$default$5();
                                    Dataset nnSimilarityDf = similarityModel.nearestNeighbors((Dataset<Row>)x$31, x$32, x$33, x$34, x$36, x$35).cache();
                                    long numberOfNn = nnSimilarityDf.count();
                                    Predef$.MODULE$.println((Object)new StringBuilder(20).append("\tWe have number NN: ").append(numberOfNn).toString());
                                    processingTimeSimilarityEstimatorNearestNeighbors = (double)(System.nanoTime() - startTime) / 1.0E9;
                                    Predef$.MODULE$.println((Object)new StringBuilder(32).append("\tNearestNeighbors needed ").append(processingTimeSimilarityEstimatorNearestNeighbors).append("seconds").toString());
                                    if (showDataFrames) {
                                        nnSimilarityDf.limit(10).show();
                                    }
                                    if (!pipelineComponents.contains((Object)"ap")) {
                                        return Row$.MODULE$.apply((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{pipelineComponents.toString(), BoxesRunTime.boxToInteger((int)run), inputPath, inputFileName, BoxesRunTime.boxToLong((long)inputFileSizeNumberTriples), similarityEstimationMode, parametersFeatureExtractorMode, BoxesRunTime.boxToInteger((int)parameterCountVectorizerMinDf), BoxesRunTime.boxToInteger((int)parameterCountVectorizerMaxVocabSize), BoxesRunTime.boxToDouble((double)parameterSimilarityAlpha), BoxesRunTime.boxToDouble((double)parameterSimilarityBeta), BoxesRunTime.boxToInteger((int)parameterNumHashTables), BoxesRunTime.boxToDouble((double)parameterSimilarityAllPairThreshold), BoxesRunTime.boxToInteger((int)parameterSimilarityNearestNeighborsK), BoxesRunTime.boxToDouble((double)processingTimeReadIn), BoxesRunTime.boxToDouble((double)processingTimeFeatureExtraction), BoxesRunTime.boxToDouble((double)processingTimeCountVectorizer), BoxesRunTime.boxToDouble((double)processingTimeSimilarityEstimatorSetup), BoxesRunTime.boxToDouble((double)processingTimeSimilarityEstimatorNearestNeighbors), BoxesRunTime.boxToDouble((double)0.0), BoxesRunTime.boxToDouble((double)((double)(System.nanoTime() - experimentTime) / 1.0E9)), BoxesRunTime.boxToBoolean((boolean)parameterOnlyMovieSimilarity)}));
                                    }
                                    Predef$.MODULE$.println((Object)"4.2 Calculate all pair similarity");
                                    startTime = System.nanoTime();
                                    Dataset allPairSimilarityDf = similarityModel.similarityJoin((Dataset<Row>)featuresDf, (Dataset<Row>)featuresDf, parameterSimilarityAllPairThreshold, similarityModel.similarityJoin$default$4()).cache();
                                    long lenJoinDf = allPairSimilarityDf.count();
                                    Predef$.MODULE$.println((Object)new StringBuilder(22).append("\tWe have number Join: ").append(lenJoinDf).toString());
                                    processingTimeSimilarityEstimatorAllPairSimilarity = (double)(System.nanoTime() - startTime) / 1.0E9;
                                    Predef$.MODULE$.println((Object)new StringBuilder(33).append("\tAllPairSimilarity needed ").append(processingTimeSimilarityEstimatorAllPairSimilarity).append("seconds").toString());
                                    if (showDataFrames) {
                                        allPairSimilarityDf.limit(10).show();
                                    }
                                } else {
                                    String string15 = similarityEstimationMode;
                                    String string16 = "Simpson";
                                    if (!(string15 != null ? !string15.equals(string16) : string16 != null)) {
                                        Predef$.MODULE$.println((Object)"4. Similarity Estimation Process Simpson");
                                        startTime = System.nanoTime();
                                        SimpsonModel similarityModel = (SimpsonModel)new SimpsonModel().setInputCol("vectorizedFeatures");
                                        processingTimeSimilarityEstimatorSetup = (double)(System.nanoTime() - startTime) / 1.0E9;
                                        Predef$.MODULE$.println((Object)"4.1 Calculate nearestneigbors for one key");
                                        startTime = System.nanoTime();
                                        Dataset x$37 = cvFeatures;
                                        Vector x$38 = key;
                                        int x$39 = parameterSimilarityNearestNeighborsK;
                                        String x$40 = "theFirstUri";
                                        boolean x$41 = false;
                                        String x$42 = similarityModel.nearestNeighbors$default$5();
                                        Dataset nnSimilarityDf = similarityModel.nearestNeighbors((Dataset<Row>)x$37, x$38, x$39, x$40, x$42, x$41).cache();
                                        long numberOfNn = nnSimilarityDf.count();
                                        Predef$.MODULE$.println((Object)new StringBuilder(20).append("\tWe have number NN: ").append(numberOfNn).toString());
                                        processingTimeSimilarityEstimatorNearestNeighbors = (double)(System.nanoTime() - startTime) / 1.0E9;
                                        Predef$.MODULE$.println((Object)new StringBuilder(32).append("\tNearestNeighbors needed ").append(processingTimeSimilarityEstimatorNearestNeighbors).append("seconds").toString());
                                        if (showDataFrames) {
                                            nnSimilarityDf.limit(10).show();
                                        }
                                        if (!pipelineComponents.contains((Object)"ap")) {
                                            return Row$.MODULE$.apply((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{pipelineComponents.toString(), BoxesRunTime.boxToInteger((int)run), inputPath, inputFileName, BoxesRunTime.boxToLong((long)inputFileSizeNumberTriples), similarityEstimationMode, parametersFeatureExtractorMode, BoxesRunTime.boxToInteger((int)parameterCountVectorizerMinDf), BoxesRunTime.boxToInteger((int)parameterCountVectorizerMaxVocabSize), BoxesRunTime.boxToDouble((double)parameterSimilarityAlpha), BoxesRunTime.boxToDouble((double)parameterSimilarityBeta), BoxesRunTime.boxToInteger((int)parameterNumHashTables), BoxesRunTime.boxToDouble((double)parameterSimilarityAllPairThreshold), BoxesRunTime.boxToInteger((int)parameterSimilarityNearestNeighborsK), BoxesRunTime.boxToDouble((double)processingTimeReadIn), BoxesRunTime.boxToDouble((double)processingTimeFeatureExtraction), BoxesRunTime.boxToDouble((double)processingTimeCountVectorizer), BoxesRunTime.boxToDouble((double)processingTimeSimilarityEstimatorSetup), BoxesRunTime.boxToDouble((double)processingTimeSimilarityEstimatorNearestNeighbors), BoxesRunTime.boxToDouble((double)0.0), BoxesRunTime.boxToDouble((double)((double)(System.nanoTime() - experimentTime) / 1.0E9)), BoxesRunTime.boxToBoolean((boolean)parameterOnlyMovieSimilarity)}));
                                        }
                                        Predef$.MODULE$.println((Object)"4.2 Calculate all pair similarity");
                                        startTime = System.nanoTime();
                                        Dataset allPairSimilarityDf = similarityModel.similarityJoin((Dataset<Row>)featuresDf, (Dataset<Row>)featuresDf, parameterSimilarityAllPairThreshold, similarityModel.similarityJoin$default$4()).cache();
                                        long lenJoinDf = allPairSimilarityDf.count();
                                        Predef$.MODULE$.println((Object)new StringBuilder(22).append("\tWe have number Join: ").append(lenJoinDf).toString());
                                        processingTimeSimilarityEstimatorAllPairSimilarity = (double)(System.nanoTime() - startTime) / 1.0E9;
                                        Predef$.MODULE$.println((Object)new StringBuilder(33).append("\tAllPairSimilarity needed ").append(processingTimeSimilarityEstimatorAllPairSimilarity).append("seconds").toString());
                                        if (showDataFrames) {
                                            allPairSimilarityDf.limit(10).show();
                                        }
                                    } else {
                                        String string17 = similarityEstimationMode;
                                        String string18 = "MinHashJaccardStacked";
                                        if (!(string17 != null ? !string17.equals(string18) : string18 != null)) {
                                            Dataset nnSimilarityDf;
                                            Predef$.MODULE$.println((Object)"4. Similarity Estimation Process MinHashJaccardStacked");
                                            Predef$.MODULE$.println((Object)new StringBuilder(31).append("\tthe number of hash tables is: ").append(parameterNumHashTables).toString());
                                            startTime = System.nanoTime();
                                            MinHashLSHModel similarityModelMinHash = (MinHashLSHModel)new MinHashLSH().setNumHashTables(parameterNumHashTables).setInputCol("vectorizedFeatures").setOutputCol("hashedFeatures").fit(featuresDf);
                                            JaccardModel similarityModelJaccard = (JaccardModel)new JaccardModel().setInputCol("vectorizedFeatures");
                                            processingTimeSimilarityEstimatorSetup = (double)(System.nanoTime() - startTime) / 1.0E9;
                                            Predef$.MODULE$.println((Object)new StringBuilder(33).append("\tthe MinHash Setup needed ").append(processingTimeSimilarityEstimatorSetup).append("seconds").toString());
                                            Predef$.MODULE$.println((Object)"4.1 Calculate nearestneigbors for one key");
                                            startTime = System.nanoTime();
                                            Dataset shortendedNnDf = nnSimilarityDf = similarityModelMinHash.approxNearestNeighbors(featuresDf, key, parameterSimilarityNearestNeighborsK, "distance").cache();
                                            long numberOfNn = shortendedNnDf.count();
                                            Predef$.MODULE$.println((Object)new StringBuilder(20).append("\tWe have number NN: ").append(numberOfNn).toString());
                                            processingTimeSimilarityEstimatorNearestNeighbors = (double)(System.nanoTime() - startTime) / 1.0E9;
                                            Predef$.MODULE$.println((Object)new StringBuilder(32).append("\tNearestNeighbors needed ").append(processingTimeSimilarityEstimatorNearestNeighbors).append("seconds").toString());
                                            if (showDataFrames) {
                                                shortendedNnDf.limit(10).show();
                                            }
                                            if (!pipelineComponents.contains((Object)"ap")) {
                                                return Row$.MODULE$.apply((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{pipelineComponents.toString(), BoxesRunTime.boxToInteger((int)run), inputPath, inputFileName, BoxesRunTime.boxToLong((long)inputFileSizeNumberTriples), similarityEstimationMode, parametersFeatureExtractorMode, BoxesRunTime.boxToInteger((int)parameterCountVectorizerMinDf), BoxesRunTime.boxToInteger((int)parameterCountVectorizerMaxVocabSize), BoxesRunTime.boxToDouble((double)parameterSimilarityAlpha), BoxesRunTime.boxToDouble((double)parameterSimilarityBeta), BoxesRunTime.boxToInteger((int)parameterNumHashTables), BoxesRunTime.boxToDouble((double)parameterSimilarityAllPairThreshold), BoxesRunTime.boxToInteger((int)parameterSimilarityNearestNeighborsK), BoxesRunTime.boxToDouble((double)processingTimeReadIn), BoxesRunTime.boxToDouble((double)processingTimeFeatureExtraction), BoxesRunTime.boxToDouble((double)processingTimeCountVectorizer), BoxesRunTime.boxToDouble((double)processingTimeSimilarityEstimatorSetup), BoxesRunTime.boxToDouble((double)processingTimeSimilarityEstimatorNearestNeighbors), BoxesRunTime.boxToDouble((double)0.0), BoxesRunTime.boxToDouble((double)((double)(System.nanoTime() - experimentTime) / 1.0E9)), BoxesRunTime.boxToBoolean((boolean)parameterOnlyMovieSimilarity)}));
                                            }
                                            Predef$.MODULE$.println((Object)"4.2 Calculate all pair similarity");
                                            startTime = System.nanoTime();
                                            Dataset allPairSimilarityDf = similarityModelMinHash.approxSimilarityJoin(featuresDf, featuresDf, 1.0 - parameterSimilarityAllPairThreshold, "distance").cache();
                                            long lenJoinDf = allPairSimilarityDf.count();
                                            Predef$.MODULE$.println((Object)new StringBuilder(22).append("\tWe have number Join: ").append(lenJoinDf).toString());
                                            Dataset minHashedSimilarities = allPairSimilarityDf.withColumn("uriA", functions$.MODULE$.col("datasetA").getField("uri")).withColumn("uriB", functions$.MODULE$.col("datasetB").getField("uri")).select("uriA", (Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"uriB", "distance"})).cache();
                                            if (showDataFrames) {
                                                minHashedSimilarities.limit(10).show(false);
                                            }
                                            processingTimeSimilarityEstimatorAllPairSimilarity = (double)(System.nanoTime() - startTime) / 1.0E9;
                                            Predef$.MODULE$.println((Object)new StringBuilder(33).append("\tAllPairSimilarity needed ").append(processingTimeSimilarityEstimatorAllPairSimilarity).append("seconds").toString());
                                        } else {
                                            throw new Error("you haven't specified a working Similarity Estimation");
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            }
        }
        double processingTimeTotal = (double)(System.nanoTime() - experimentTime) / 1.0E9;
        Predef$.MODULE$.println((Object)new StringBuilder(37).append("the complete experiment took ").append(processingTimeTotal).append(" seconds").toString());
        spark.stop();
        return Row$.MODULE$.apply((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{pipelineComponents.toString(), BoxesRunTime.boxToInteger((int)run), inputPath, inputFileName, BoxesRunTime.boxToLong((long)inputFileSizeNumberTriples), similarityEstimationMode, parametersFeatureExtractorMode, BoxesRunTime.boxToInteger((int)parameterCountVectorizerMinDf), BoxesRunTime.boxToInteger((int)parameterCountVectorizerMaxVocabSize), BoxesRunTime.boxToDouble((double)parameterSimilarityAlpha), BoxesRunTime.boxToDouble((double)parameterSimilarityBeta), BoxesRunTime.boxToInteger((int)parameterNumHashTables), BoxesRunTime.boxToDouble((double)parameterSimilarityAllPairThreshold), BoxesRunTime.boxToInteger((int)parameterSimilarityNearestNeighborsK), BoxesRunTime.boxToDouble((double)processingTimeReadIn), BoxesRunTime.boxToDouble((double)processingTimeFeatureExtraction), BoxesRunTime.boxToDouble((double)processingTimeCountVectorizer), BoxesRunTime.boxToDouble((double)processingTimeSimilarityEstimatorSetup), BoxesRunTime.boxToDouble((double)processingTimeSimilarityEstimatorNearestNeighbors), BoxesRunTime.boxToDouble((double)processingTimeSimilarityEstimatorAllPairSimilarity), BoxesRunTime.boxToDouble((double)processingTimeTotal), BoxesRunTime.boxToBoolean((boolean)parameterOnlyMovieSimilarity)}));
    }

    public boolean runExperiment$default$15() {
        return false;
    }

    public static final /* synthetic */ ListBuffer $anonfun$main$20(IntRef counterCurrentRun$1, int totalNumberExperiments$1, String sparkMaster$1, List pipelineComponents$1, int run$1, String input$1, String similarityEstimationMode$1, String parametersFeatureExtractorMode$1, int parameterCountVectorizerMinDf$1, int parameterCountVectorizerMaxVocabSize$1, double parameterSimilarityAlpha$1, double parameterSimilarityBeta$1, int parameterNumHashTables$1, double parameterSimilarityAllPairThreshold$1, boolean parameterOnlyMovieSimilarity$1, boolean showDataFrames$1, ListBuffer ex_results$1, int parameterSimilarityNearestNeighborsK) {
        ++counterCurrentRun$1.elem;
        Predef$.MODULE$.println((Object)new StringBuilder(14).append("Experiment (").append(counterCurrentRun$1.elem).append("/").append(totalNumberExperiments$1).append(")").toString());
        Row tmpRow = MODULE$.runExperiment(sparkMaster$1, (List<String>)pipelineComponents$1, run$1, input$1, similarityEstimationMode$1, parametersFeatureExtractorMode$1, parameterCountVectorizerMinDf$1, parameterCountVectorizerMaxVocabSize$1, parameterSimilarityAlpha$1, parameterSimilarityBeta$1, parameterNumHashTables$1, parameterSimilarityAllPairThreshold$1, parameterSimilarityNearestNeighborsK, parameterOnlyMovieSimilarity$1, showDataFrames$1);
        Predef$.MODULE$.println((Object)"Resulting row for DataFrame:");
        Predef$.MODULE$.println((Object)tmpRow);
        Predef$.MODULE$.println();
        return ex_results$1.$plus$eq((Object)tmpRow);
    }

    public static final /* synthetic */ boolean $anonfun$runExperiment$1(Row t) {
        return ((String)new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[])((String)t.getAs("uri")).split("/"))).last()).startsWith("m");
    }

    public static final /* synthetic */ boolean $anonfun$runExperiment$2(Vector v) {
        return v.numNonzeros() > 0;
    }

    private SimilarityPipelineExperiment$() {
        MODULE$ = this;
        this.Log = Logger.getLogger((String)this.getClass().getCanonicalName());
    }
}

