/*
 * Decompiled with CFR 0.152.
 */
package net.sansa_stack.ml.spark.anomalydetection;

import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.Serializable;
import net.sansa_stack.ml.spark.anomalydetection.DistADConfig;
import net.sansa_stack.ml.spark.anomalydetection.DistADUtil$;
import net.sansa_stack.ml.spark.utils.FeatureExtractorModel;
import net.sansa_stack.rdf.common.io.riot.error.ErrorParseMode$;
import net.sansa_stack.rdf.common.io.riot.error.WarningParseMode$;
import net.sansa_stack.rdf.spark.io.NTripleReader$;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.jena.graph.Node;
import org.apache.jena.graph.Triple;
import org.apache.jena.riot.Lang;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import org.apache.spark.HashPartitioner;
import org.apache.spark.Partitioner;
import org.apache.spark.ml.clustering.BisectingKMeans;
import org.apache.spark.ml.clustering.BisectingKMeansModel;
import org.apache.spark.ml.evaluation.ClusteringEvaluator;
import org.apache.spark.ml.feature.CountVectorizer;
import org.apache.spark.ml.feature.CountVectorizerModel;
import org.apache.spark.ml.feature.MinHashLSH;
import org.apache.spark.ml.feature.MinHashLSHModel;
import org.apache.spark.ml.linalg.Vector;
import org.apache.spark.rdd.RDD;
import org.apache.spark.rdd.RDD$;
import org.apache.spark.sql.Column;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.SparkSession$;
import org.apache.spark.sql.expressions.UserDefinedFunction;
import org.apache.spark.sql.functions$;
import org.apache.spark.storage.StorageLevel$;
import scala.Double$;
import scala.Function1;
import scala.Function2;
import scala.MatchError;
import scala.Predef$;
import scala.Tuple2;
import scala.Tuple3;
import scala.Tuple4;
import scala.collection.Map;
import scala.collection.Seq;
import scala.collection.Seq$;
import scala.collection.TraversableLike;
import scala.collection.TraversableOnce;
import scala.collection.immutable.;
import scala.collection.immutable.List;
import scala.collection.immutable.List$;
import scala.collection.immutable.Nil$;
import scala.collection.immutable.StringOps;
import scala.collection.mutable.ArrayOps;
import scala.collection.mutable.HashSet;
import scala.collection.mutable.HashSet$;
import scala.collection.mutable.Set;
import scala.collection.mutable.Set$;
import scala.collection.mutable.SetLike;
import scala.collection.mutable.StringBuilder;
import scala.math.Ordering;
import scala.reflect.ClassTag;
import scala.reflect.ClassTag$;
import scala.reflect.api.JavaUniverse;
import scala.reflect.api.Mirror;
import scala.reflect.api.Symbols;
import scala.reflect.api.TypeCreator;
import scala.reflect.api.TypeTags;
import scala.reflect.api.Types;
import scala.reflect.api.Universe;
import scala.reflect.runtime.package$;
import scala.runtime.BooleanRef;
import scala.runtime.BoxedUnit;
import scala.runtime.BoxesRunTime;
import scala.runtime.DoubleRef;
import scala.runtime.IntRef;
import scala.runtime.LazyRef;
import scala.runtime.NonLocalReturnControl;
import scala.runtime.Null$;
import scala.runtime.RichChar$;
import scala.runtime.RichInt$;
import scala.runtime.java8.JFunction1;

public final class DistADUtil$ {
    public static DistADUtil$ MODULE$;
    private final Logger LOG;
    private final List<String> objList;
    private final UserDefinedFunction convertStringToDouble;

    static {
        new DistADUtil$();
    }

    public Logger LOG() {
        return this.LOG;
    }

    public List<String> objList() {
        return this.objList;
    }

    public SparkSession createSpark() {
        LazyRef spark$lzy = new LazyRef();
        Logger.getLogger((String)"org").setLevel(Level.ERROR);
        return DistADUtil$.spark$1(spark$lzy);
    }

    /*
     * WARNING - void declaration
     */
    public RDD<Triple> readData(SparkSession spark, String input) {
        void var3_3;
        RDD originalDataRDD = null;
        if (input.endsWith("nt")) {
            originalDataRDD = NTripleReader$.MODULE$.load(spark, input, ErrorParseMode$.MODULE$.SKIP(), WarningParseMode$.MODULE$.IGNORE(), NTripleReader$.MODULE$.load$default$5(), NTripleReader$.MODULE$.load$default$6());
        } else {
            Lang lang = Lang.TURTLE;
            originalDataRDD = (RDD)net.sansa_stack.rdf.spark.io.package$.MODULE$.RDFReader(spark).rdf(lang).apply((Object)input);
        }
        return var3_3;
    }

    public boolean isNumeric(String x) {
        boolean bl;
        block3: {
            try {
                if (x.contains("^")) {
                    int c = x.indexOf(94);
                    String subject = x.substring(1, c - 1);
                    bl = this.isAllDigits(subject);
                    break block3;
                }
                bl = false;
            }
            catch (Exception e) {
                bl = false;
            }
        }
        return bl;
    }

    public boolean isAllDigits(String x) {
        BooleanRef found = BooleanRef.create((boolean)true);
        new StringOps(Predef$.MODULE$.augmentString(x)).foreach((Function1 & Serializable & scala.Serializable)ch -> {
            DistADUtil$.$anonfun$isAllDigits$1(found, BoxesRunTime.unboxToChar((Object)ch));
            return BoxedUnit.UNIT;
        });
        return found.elem;
    }

    public boolean searchEdge(String x, List<String> y) {
        boolean bl;
        if (x.contains("^")) {
            int c = x.indexOf(94);
            String subject = x.substring(c + 2);
            bl = y.contains((Object)subject);
        } else {
            bl = false;
        }
        return bl;
    }

    public double getNumber(String a) {
        double d;
        try {
            int c = a.indexOf(94);
            String subject = a.substring(0, c);
            subject = StringUtils.stripStart((String)subject, (String)"\"");
            subject = StringUtils.stripEnd((String)subject, (String)"\"");
            d = new StringOps(Predef$.MODULE$.augmentString(subject)).toDouble();
        }
        catch (Throwable e) {
            d = Double.NaN;
        }
        return d;
    }

    public RDD<Triple> getOnlyLiteralObjects(RDD<Triple> nTriplesRDD) {
        return nTriplesRDD.filter((Function1 & Serializable & scala.Serializable)f -> BoxesRunTime.boxToBoolean((boolean)DistADUtil$.$anonfun$getOnlyLiteralObjects$1(f)));
    }

    public RDD<Triple> triplesWithNumericLit(RDD<Triple> objLit) {
        return objLit.filter((Function1 & Serializable & scala.Serializable)f -> BoxesRunTime.boxToBoolean((boolean)DistADUtil$.MODULE$.isNumeric(f.getObject().toString())));
    }

    /*
     * WARNING - void declaration
     */
    public RDD<Triple> triplesWithNumericLitWithTypeIgnoreEndingWithID(RDD<Triple> data) {
        void var2_2;
        RDD onlyLiteralDataRDD = this.triplesWithNumericLit(data);
        onlyLiteralDataRDD = onlyLiteralDataRDD.filter((Function1 & Serializable & scala.Serializable)f -> BoxesRunTime.boxToBoolean((boolean)DistADUtil$.MODULE$.searchEdge(f.getObject().toString(), DistADUtil$.MODULE$.objList())));
        onlyLiteralDataRDD = onlyLiteralDataRDD.filter((Function1 & Serializable & scala.Serializable)f -> BoxesRunTime.boxToBoolean((boolean)DistADUtil$.$anonfun$triplesWithNumericLitWithTypeIgnoreEndingWithID$2(f)));
        return var2_2;
    }

    public UserDefinedFunction convertStringToDouble() {
        return this.convertStringToDouble;
    }

    public RDD<Triple> filterAllTriplesWhichAtLeastHaveOneNumericLiterals(RDD<Triple> originalDataRDD, RDD<Triple> onlyLiteralDataRDD) {
        RDD a1 = onlyLiteralDataRDD.map((Function1 & Serializable & scala.Serializable)f -> new Tuple2((Object)f.getSubject().toString(), f), ClassTag$.MODULE$.apply(Tuple2.class));
        RDD a2 = originalDataRDD.map((Function1 & Serializable & scala.Serializable)f -> new Tuple2((Object)f.getSubject().toString(), f), ClassTag$.MODULE$.apply(Tuple2.class));
        RDD a3 = RDD$.MODULE$.rddToPairRDDFunctions(a2, ClassTag$.MODULE$.apply(String.class), ClassTag$.MODULE$.apply(Triple.class), (Ordering)Ordering.String$.MODULE$).join(a1);
        return a3.map((Function1 & Serializable & scala.Serializable)f -> (Triple)((Tuple2)f._2())._1(), ClassTag$.MODULE$.apply(Triple.class));
    }

    public <A, B> scala.collection.immutable.Map<A, List<B>> merge(List<scala.collection.immutable.Map<A, B>> input) {
        return ((TraversableLike)input.flatten((Function1)Predef$.MODULE$.$conforms())).groupBy((Function1 & Serializable & scala.Serializable)x0$1 -> {
            Object k;
            Tuple2 tuple2 = x0$1;
            if (tuple2 == null) {
                throw new MatchError((Object)tuple2);
            }
            Object object = k = tuple2._1();
            return object;
        }).mapValues((Function1 & Serializable & scala.Serializable)x$1 -> (List)x$1.map((Function1 & Serializable & scala.Serializable)x0$2 -> {
            Object v;
            Tuple2 tuple2 = x0$2;
            if (tuple2 == null) {
                throw new MatchError((Object)tuple2);
            }
            Object object = v = tuple2._2();
            return object;
        }, List$.MODULE$.canBuildFrom()));
    }

    public void writeAnomaliesToFile(List<String> data, String path) {
        if (path.contains("hdfs://")) {
            StringBuilder stringBuilder = new StringBuilder();
            data.foreach((Function1 & Serializable & scala.Serializable)line -> stringBuilder.append(line).append("\n"));
            Path fsPath = new Path(path);
            FileSystem fs = fsPath.getFileSystem(this.createSpark().sparkContext().hadoopConfiguration());
            fs.delete(fsPath, true);
            FSDataOutputStream os = fs.create(new Path(path));
            os.write(stringBuilder.toString().getBytes());
            fs.close();
        } else {
            File file = new File(path);
            BufferedWriter bw = new BufferedWriter(new FileWriter(file));
            data.foreach((Function1 & Serializable & scala.Serializable)line -> {
                bw.write(new java.lang.StringBuilder(1).append(line).append("\n").toString());
                return BoxedUnit.UNIT;
            });
            bw.close();
        }
    }

    public void writeToFile(String path, Dataset<Row> data) {
        if (path.contains("hdfs://")) {
            StringBuilder stringBuilder = new StringBuilder();
            new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[])data.coalesce(1).collect())).foreach((Function1 & Serializable & scala.Serializable)row -> stringBuilder.append(row.mkString(",")).append("\n"));
            Path fsPath = new Path(path);
            FileSystem fs = fsPath.getFileSystem(this.createSpark().sparkContext().hadoopConfiguration());
            fs.delete(fsPath, true);
            FSDataOutputStream os = fs.create(new Path(path));
            os.write(stringBuilder.toString().getBytes());
            fs.close();
        } else {
            scala.collection.immutable.Map tsvWithHeaderOptions = (scala.collection.immutable.Map)Predef$.MODULE$.Map().apply((Seq)Predef$.MODULE$.wrapRefArray((Object[])new Tuple2[]{new Tuple2((Object)"delimiter", (Object)","), new Tuple2((Object)"header", (Object)"true")}));
            data.coalesce(1).write().mode(SaveMode.Overwrite).options((Map)tsvWithHeaderOptions).csv(path);
        }
    }

    public int detectNumberOfClusters(Dataset<Row> data, double percentage) {
        int n;
        Object object = new Object();
        try {
            Dataset sampledData = null;
            sampledData = percentage != 1.0 ? data.sample(percentage) : data;
            FeatureExtractorModel featureExtractorModel = new FeatureExtractorModel().setMode("or");
            Dataset<Row> extractedFeaturesDataFrame = featureExtractorModel.transform(sampledData);
            CountVectorizerModel assembler = new CountVectorizer().setInputCol("extractedFeatures").setOutputCol("features").fit(extractedFeaturesDataFrame);
            Dataset transformedData = assembler.transform(extractedFeaturesDataFrame);
            IntRef bestIndex = IntRef.create((int)-1);
            DoubleRef maxQuality = DoubleRef.create((double)Double$.MODULE$.MinValue());
            ClusteringEvaluator evaluator = new ClusteringEvaluator();
            RichInt$.MODULE$.to$extension0(Predef$.MODULE$.intWrapper(2), 10).foreach$mVc$sp((Function1)(JFunction1.mcVI.sp & Serializable & scala.Serializable)i -> {
                BisectingKMeans bkm = new BisectingKMeans().setK(i).setSeed(1L).setFeaturesCol("features");
                BisectingKMeansModel model = bkm.fit(transformedData);
                Dataset predictions = model.transform(transformedData);
                long count = predictions.select("prediction", (Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[0])).distinct().count();
                if (count < (long)i) {
                    if (count != 1L) {
                        throw new NonLocalReturnControl.mcI.sp(object, (int)count);
                    }
                    throw new NonLocalReturnControl.mcI.sp(object, i);
                }
                double silhouette = evaluator.evaluate(predictions);
                if (silhouette > maxQuality$1.elem) {
                    maxQuality$1.elem = silhouette;
                    bestIndex$1.elem = i;
                }
                MODULE$.LOG().info((Object)new java.lang.StringBuilder(40).append("Silhouette Coefficient for ").append(i).append(" clusters is ").append(silhouette).toString());
            });
            n = bestIndex.elem;
        }
        catch (NonLocalReturnControl ex) {
            if (ex.key() == object) {
                n = ex.value$mcI$sp();
            }
            throw ex;
        }
        return n;
    }

    public Dataset<Row> iqr(Dataset<Row> data, boolean verbose, int anomalyListSize) {
        Dataset finalResult;
        block2: {
            Dataset dataPivoted = data.groupBy("prediction", (Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"p"})).agg(functions$.MODULE$.count("o").as("count"), (Seq)Predef$.MODULE$.wrapRefArray((Object[])new Column[]{functions$.MODULE$.expr("approx_percentile(o,0.25)").as("q1"), functions$.MODULE$.expr("approx_percentile(o,0.75)").as("q3")})).withColumn("iqr", functions$.MODULE$.col("q3").$minus((Object)functions$.MODULE$.col("q1"))).withColumn("upper", functions$.MODULE$.col("q3").$plus((Object)functions$.MODULE$.lit((Object)BoxesRunTime.boxToDouble((double)1.5)).$times((Object)functions$.MODULE$.col("iqr")))).withColumn("lower", functions$.MODULE$.col("q1").$minus((Object)functions$.MODULE$.lit((Object)BoxesRunTime.boxToDouble((double)1.5)).$times((Object)functions$.MODULE$.col("iqr")))).cache();
            if (verbose) {
                this.LOG().info((Object)"result of aggregation:");
                dataPivoted.show(false);
            }
            Dataset a = data.join(dataPivoted, (Seq)Seq$.MODULE$.apply((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"prediction", "p"})));
            if (verbose) {
                this.LOG().info((Object)"result of aggregation join:");
                a.show(false);
            }
            finalResult = a.filter(functions$.MODULE$.col("count").$greater((Object)BoxesRunTime.boxToInteger((int)anomalyListSize))).filter(functions$.MODULE$.col("o").$less((Object)functions$.MODULE$.col("lower")).$bar$bar((Object)functions$.MODULE$.col("o").$greater((Object)functions$.MODULE$.col("upper")))).select("s", (Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"p", "o"}));
            if (!verbose) break block2;
            this.LOG().info((Object)new java.lang.StringBuilder(26).append("total number of anomalies ").append(finalResult.count()).toString());
            finalResult.show(false);
        }
        return finalResult;
    }

    public Dataset<Row> mad(Dataset<Row> data, boolean verbose, int anomalyListSize) {
        Dataset finalResult;
        block4: {
            Dataset dataPivoted = data.groupBy("prediction", (Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"p"})).agg(functions$.MODULE$.count("o").as("count"), (Seq)Predef$.MODULE$.wrapRefArray((Object[])new Column[]{functions$.MODULE$.expr("approx_percentile(o,0.5)").as("median")}));
            if (verbose) {
                this.LOG().info((Object)"result of aggregation:");
                dataPivoted.show(false);
            }
            Dataset a = data.join(dataPivoted, (Seq)Seq$.MODULE$.apply((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"prediction", "p"}))).withColumn("difference", functions$.MODULE$.col("o").$minus((Object)functions$.MODULE$.col("median")));
            if (verbose) {
                this.LOG().info((Object)"result of aggregation join:");
                a.show(false);
            }
            Dataset b = a.groupBy("prediction", (Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"p"})).agg(functions$.MODULE$.expr("approx_percentile(difference,0.5)").as("MAD"), (Seq)Predef$.MODULE$.wrapRefArray((Object[])new Column[0]));
            if (verbose) {
                b.show(false);
            }
            Dataset c = a.join(b, (Seq)Seq$.MODULE$.apply((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"prediction", "p"}))).withColumn("upper", functions$.MODULE$.col("median").$plus((Object)functions$.MODULE$.lit((Object)BoxesRunTime.boxToDouble((double)2.5)).$times((Object)functions$.MODULE$.col("MAD")))).withColumn("lower", functions$.MODULE$.col("median").$minus((Object)functions$.MODULE$.lit((Object)BoxesRunTime.boxToDouble((double)2.5)).$times((Object)functions$.MODULE$.col("MAD"))));
            if (verbose) {
                c.show(false);
            }
            finalResult = c.filter(functions$.MODULE$.col("count").$greater((Object)BoxesRunTime.boxToInteger((int)anomalyListSize))).filter(functions$.MODULE$.col("o").$less((Object)functions$.MODULE$.col("lower")).$bar$bar((Object)functions$.MODULE$.col("o").$greater((Object)functions$.MODULE$.col("upper")))).select("s", (Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"p", "o"}));
            if (!verbose) break block4;
            this.LOG().info((Object)new java.lang.StringBuilder(26).append("total number of anomalies ").append(finalResult.count()).toString());
            finalResult.show(false);
        }
        return finalResult;
    }

    public Dataset<Row> zscore(Dataset<Row> data, boolean verbose, int anomalyListSize) {
        Dataset finalResult;
        block2: {
            Dataset dataPivoted = data.groupBy("prediction", (Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"p"})).agg(functions$.MODULE$.count("o").as("count"), (Seq)Predef$.MODULE$.wrapRefArray((Object[])new Column[]{functions$.MODULE$.mean("o").as("mean"), functions$.MODULE$.stddev("o").as("std")}));
            if (verbose) {
                this.LOG().info((Object)"result of aggregation:");
                dataPivoted.show(false);
            }
            Dataset a = data.join(dataPivoted, (Seq)Seq$.MODULE$.apply((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"prediction", "p"})));
            if (verbose) {
                this.LOG().info((Object)"result of aggregation join:");
                a.show(false);
            }
            a = a.withColumn("zscore", functions$.MODULE$.col("o").$minus((Object)functions$.MODULE$.col("mean").$div((Object)functions$.MODULE$.col("std"))));
            int threshold = 2;
            finalResult = a.filter(functions$.MODULE$.col("count").$greater((Object)BoxesRunTime.boxToInteger((int)anomalyListSize))).filter(functions$.MODULE$.abs(functions$.MODULE$.col("zscore")).$greater((Object)BoxesRunTime.boxToInteger((int)threshold))).select("s", (Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"p", "o"}));
            if (!verbose) break block2;
            this.LOG().info((Object)new java.lang.StringBuilder(26).append("total number of anomalies ").append(finalResult.count()).toString());
            finalResult.show(false);
        }
        return finalResult;
    }

    /*
     * WARNING - void declaration
     */
    public Dataset<Row> createDF(RDD<Triple> data) {
        void var2_2;
        Dataset df = net.sansa_stack.rdf.spark.model.package$.MODULE$.TripleOperations(data).toDF();
        return var2_2;
    }

    public Dataset<Row> createDFWithConversion(RDD<Triple> data) {
        Dataset df = net.sansa_stack.rdf.spark.model.package$.MODULE$.TripleOperations(data).toDF();
        return df.withColumn("o", this.convertStringToDouble().apply((Seq)Predef$.MODULE$.wrapRefArray((Object[])new Column[]{functions$.MODULE$.col("o")})));
    }

    public Dataset<Row> calculateBiSectingKmeanClustering(RDD<Triple> data, int numberOfClusters) {
        FeatureExtractorModel featureExtractorModel = new FeatureExtractorModel().setMode("or");
        Dataset<Row> extractedFeaturesDataFrame = featureExtractorModel.transform(data);
        CountVectorizerModel assembler = new CountVectorizer().setInputCol("extractedFeatures").setOutputCol("features").fit(extractedFeaturesDataFrame);
        Dataset transformedData = assembler.transform(extractedFeaturesDataFrame);
        BisectingKMeans bkm = new BisectingKMeans().setK(numberOfClusters).setSeed(1L).setFeaturesCol("features");
        BisectingKMeansModel model = bkm.fit(transformedData);
        Dataset prediction = model.transform(transformedData).withColumnRenamed("uri", "s");
        return prediction;
    }

    public Dataset<Row> calculateBiSectingKmeanClustering(Dataset<Row> data, int numberOfClusters) {
        FeatureExtractorModel featureExtractorModel = new FeatureExtractorModel().setMode("or");
        Dataset<Row> extractedFeaturesDataFrame = featureExtractorModel.transform(data);
        CountVectorizerModel assembler = new CountVectorizer().setInputCol("extractedFeatures").setOutputCol("features").fit(extractedFeaturesDataFrame);
        Dataset transformedData = assembler.transform(extractedFeaturesDataFrame);
        BisectingKMeans bkm = new BisectingKMeans().setK(numberOfClusters).setSeed(1L).setFeaturesCol("features");
        BisectingKMeansModel model = bkm.fit(transformedData);
        Dataset prediction = model.transform(transformedData).withColumnRenamed("uri", "s");
        return prediction;
    }

    /*
     * WARNING - void declaration
     */
    public String getLocalName(Node x) {
        void var3_3;
        int a = x.toString().lastIndexOf("/");
        String b = x.toString().substring(a + 1);
        return var3_3;
    }

    public boolean search(double a, double[] b) {
        return new ArrayOps.ofDouble(Predef$.MODULE$.doubleArrayOps(b)).contains((Object)BoxesRunTime.boxToDouble((double)a));
    }

    public Dataset<Row> calculateMinHashLSHClustering(RDD<Triple> partialDataRDD, RDD<Triple> originalData, DistADConfig config) {
        Dataset<?> dataset;
        RDD<Tuple2<String, Set<Tuple3<String, String, Object>>>> mapSubWithTriples = this.propClustering(partialDataRDD);
        scala.collection.immutable.Set validEntitySet = new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[])partialDataRDD.map((Function1 & Serializable & scala.Serializable)f -> f.getSubject(), ClassTag$.MODULE$.apply(Node.class)).cache().collect())).toSet();
        RDD nTriplesRDDWithValidEntities = originalData.filter((Function1 & Serializable & scala.Serializable)f -> BoxesRunTime.boxToBoolean((boolean)validEntitySet.contains((Object)f.getSubject())));
        String string = config.clusteringType();
        String string2 = config.PARTIAL();
        String string3 = string;
        if (!(string2 != null ? !string2.equals(string3) : string3 != null)) {
            dataset = this.calculatePairwiseSimilarity((RDD<Triple>)nTriplesRDDWithValidEntities, config.pairWiseDistanceThreshold());
        } else {
            String string4 = config.FULL();
            String string5 = string;
            if (!(string4 != null ? !string4.equals(string5) : string5 != null)) {
                dataset = this.calculatePairwiseSimilarity(originalData, config.pairWiseDistanceThreshold());
            } else {
                throw new MatchError((Object)string);
            }
        }
        Dataset<?> pairwiseSim = dataset;
        Dataset opiu = pairwiseSim.filter(functions$.MODULE$.col("datasetA.uri").isNotNull()).filter(functions$.MODULE$.col("datasetB.uri").isNotNull()).filter(functions$.MODULE$.col("datasetA.uri").$eq$bang$eq((Object)functions$.MODULE$.col("datasetB.uri"))).select((Seq)Predef$.MODULE$.wrapRefArray((Object[])new Column[]{functions$.MODULE$.col("datasetA.uri").alias("id1"), functions$.MODULE$.col("datasetB.uri").alias("id2")}));
        Dataset x1 = opiu.persist(StorageLevel$.MODULE$.MEMORY_AND_DISK());
        RDD x1Map = x1.rdd().map((Function1 & Serializable & scala.Serializable)row -> {
            String id = row.getString(0);
            String value = row.getString(1);
            return new Tuple2((Object)id, (Object)value);
        }, ClassTag$.MODULE$.apply(Tuple2.class));
        Set initialSet3 = Set$.MODULE$.empty();
        Function2 & Serializable & scala.Serializable addToSet3 = (Function2 & Serializable & scala.Serializable)(s, v) -> (Set)s.$plus$eq(v);
        Function2 & Serializable & scala.Serializable mergePartitionSets3 = (Function2 & Serializable & scala.Serializable)(p1, p2) -> (Set)p1.$plus$plus$eq((TraversableOnce)p2);
        RDD uniqueByKey3 = RDD$.MODULE$.rddToPairRDDFunctions(x1Map, ClassTag$.MODULE$.apply(String.class), ClassTag$.MODULE$.apply(String.class), (Ordering)Ordering.String$.MODULE$).aggregateByKey((Object)initialSet3, (Function2)addToSet3, (Function2)mergePartitionSets3, ClassTag$.MODULE$.apply(Set.class));
        RDD k = uniqueByKey3.map((Function1 & Serializable & scala.Serializable)f -> new Tuple2(f._1(), (Object)((SetLike)f._2()).$plus$eq(f._1()).toSet()), ClassTag$.MODULE$.apply(Tuple2.class));
        HashPartitioner partitioner = new HashPartitioner(500);
        RDD mapSubWithTriplesPart = RDD$.MODULE$.rddToPairRDDFunctions(mapSubWithTriples, ClassTag$.MODULE$.apply(String.class), ClassTag$.MODULE$.apply(Set.class), (Ordering)Ordering.String$.MODULE$).partitionBy((Partitioner)partitioner).persist(StorageLevel$.MODULE$.MEMORY_AND_DISK());
        RDD ys = RDD$.MODULE$.rddToPairRDDFunctions(k, ClassTag$.MODULE$.apply(String.class), ClassTag$.MODULE$.apply(scala.collection.immutable.Set.class), (Ordering)Ordering.String$.MODULE$).partitionBy((Partitioner)partitioner).persist(StorageLevel$.MODULE$.MEMORY_AND_DISK());
        RDD joinSimSubTriples2 = RDD$.MODULE$.rddToPairRDDFunctions(ys, ClassTag$.MODULE$.apply(String.class), ClassTag$.MODULE$.apply(scala.collection.immutable.Set.class), (Ordering)Ordering.String$.MODULE$).join(mapSubWithTriplesPart);
        RDD clusterOfSubjects = joinSimSubTriples2.map((Function1 & Serializable & scala.Serializable)x0$1 -> {
            Tuple2 tuple2;
            Tuple2 tuple22 = x0$1;
            if (tuple22 == null || (tuple2 = (Tuple2)tuple22._2()) == null) {
                throw new MatchError((Object)tuple22);
            }
            scala.collection.immutable.Set iter = (scala.collection.immutable.Set)tuple2._1();
            Set iter1 = (Set)tuple2._2();
            Tuple2 tuple23 = new Tuple2((Object)iter.toSet(), (Object)iter1);
            return tuple23;
        }, ClassTag$.MODULE$.apply(Tuple2.class));
        HashSet initialSet = HashSet$.MODULE$.empty();
        Function2 & Serializable & scala.Serializable addToSet = (Function2 & Serializable & scala.Serializable)(s, v) -> s.$plus$eq(v);
        Function2 & Serializable & scala.Serializable mergePartitionSets = (Function2 & Serializable & scala.Serializable)(p1, p2) -> (HashSet)p1.$plus$plus$eq((TraversableOnce)p2);
        RDD x$1 = clusterOfSubjects;
        ClassTag x$2 = ClassTag$.MODULE$.apply(scala.collection.immutable.Set.class);
        ClassTag x$3 = ClassTag$.MODULE$.apply(Set.class);
        Null$ x$4 = RDD$.MODULE$.rddToPairRDDFunctions$default$4(x$1);
        RDD uniqueByKey = RDD$.MODULE$.rddToPairRDDFunctions(x$1, x$2, x$3, null).aggregateByKey((Object)initialSet, (Function2)addToSet, (Function2)mergePartitionSets, ClassTag$.MODULE$.apply(HashSet.class));
        RDD propCluster = uniqueByKey.map((Function1 & Serializable & scala.Serializable)x0$2 -> {
            Tuple2 tuple2 = x0$2;
            if (tuple2 == null) {
                throw new MatchError((Object)tuple2);
            }
            HashSet iter = (HashSet)tuple2._2();
            Tuple2 tuple22 = new Tuple2((Object)((TraversableOnce)iter.flatMap((Function1 & Serializable & scala.Serializable)f2 -> (Set)f2.map((Function1 & Serializable & scala.Serializable)f -> (String)f._2(), Set$.MODULE$.canBuildFrom()), HashSet$.MODULE$.canBuildFrom())).toSet(), (Object)iter);
            return tuple22;
        }, ClassTag$.MODULE$.apply(Tuple2.class));
        RDD propDistinct = propCluster.flatMap((Function1 & Serializable & scala.Serializable)x0$3 -> {
            Tuple2 tuple2 = x0$3;
            if (tuple2 == null) {
                throw new MatchError((Object)tuple2);
            }
            scala.collection.immutable.Set a = (scala.collection.immutable.Set)tuple2._1();
            HashSet iter = (HashSet)tuple2._2();
            scala.collection.immutable.Set set = (scala.collection.immutable.Set)a.map((Function1 & Serializable & scala.Serializable)f2 -> new Tuple2(f2, (Object)((TraversableOnce)iter.flatMap((Function1 & Serializable & scala.Serializable)f -> f, HashSet$.MODULE$.canBuildFrom())).toSet()), scala.collection.immutable.Set$.MODULE$.canBuildFrom());
            return set;
        }, ClassTag$.MODULE$.apply(Tuple2.class));
        RDD clusterOfProp = propDistinct.map((Function1 & Serializable & scala.Serializable)x0$4 -> {
            Tuple2 tuple2 = x0$4;
            if (tuple2 == null) {
                throw new MatchError((Object)tuple2);
            }
            String a = (String)tuple2._1();
            scala.collection.immutable.Set iter1 = (scala.collection.immutable.Set)tuple2._2();
            scala.collection.immutable.Set set = (scala.collection.immutable.Set)iter1.filter((Function1 & Serializable & scala.Serializable)f -> BoxesRunTime.boxToBoolean((boolean)DistADUtil$.$anonfun$calculateMinHashLSHClustering$17(a, f)));
            return set;
        }, ClassTag$.MODULE$.apply(scala.collection.immutable.Set.class));
        mapSubWithTriplesPart.unpersist(mapSubWithTriplesPart.unpersist$default$1());
        ys.unpersist(ys.unpersist$default$1());
        int x$5 = 1000;
        Ordering x$6 = clusterOfProp.repartition$default$2(x$5);
        RDD setData = clusterOfProp.repartition(x$5, x$6).persist(StorageLevel$.MODULE$.MEMORY_AND_DISK());
        RDD setDataStore = setData.map((Function1 & Serializable & scala.Serializable)f -> f.toSeq(), ClassTag$.MODULE$.apply(Seq.class));
        SparkSession spark = this.createSpark();
        RDD b = setDataStore.zipWithIndex();
        RDD a = b.map((Function1 & Serializable & scala.Serializable)f -> (Seq)((TraversableLike)f._1()).map((Function1 & Serializable & scala.Serializable)r -> new Tuple4(r._1(), r._2(), (Object)BoxesRunTime.boxToDouble((double)new StringOps(Predef$.MODULE$.augmentString(r._3().toString().replace("\"", ""))).toDouble()), (Object)BoxesRunTime.boxToLong((long)f._2$mcJ$sp())), Seq$.MODULE$.canBuildFrom()), ClassTag$.MODULE$.apply(Seq.class));
        RDD c = a.flatMap((Function1 & Serializable & scala.Serializable)x -> (Seq)Predef$.MODULE$.identity(x), ClassTag$.MODULE$.apply(Tuple4.class));
        JavaUniverse $u = package$.MODULE$.universe();
        JavaUniverse.JavaMirror $m = package$.MODULE$.universe().runtimeMirror(this.getClass().getClassLoader());
        public final class Net_sansa_stack_ml_spark_anomalydetection_DistADUtil$$typecreator5$1
        extends TypeCreator {
            public <U extends Universe> Types.TypeApi apply(Mirror<U> $m$untyped) {
                Universe $u = $m$untyped.universe();
                Mirror<U> $m = $m$untyped;
                return $u.internal().reificationSupport().TypeRef($u.internal().reificationSupport().ThisType($m.staticPackage("scala").asModule().moduleClass()), (Symbols.SymbolApi)$m.staticClass("scala.Tuple4"), (List)new .colon.colon((Object)$u.internal().reificationSupport().TypeRef($u.internal().reificationSupport().SingleType($m.staticPackage("scala").asModule().moduleClass().asType().toTypeConstructor(), (Symbols.SymbolApi)$m.staticModule("scala.Predef")), (Symbols.SymbolApi)$u.internal().reificationSupport().selectType($m.staticModule("scala.Predef").asModule().moduleClass(), "String"), (List)Nil$.MODULE$), (List)new .colon.colon((Object)$u.internal().reificationSupport().TypeRef($u.internal().reificationSupport().SingleType($m.staticPackage("scala").asModule().moduleClass().asType().toTypeConstructor(), (Symbols.SymbolApi)$m.staticModule("scala.Predef")), (Symbols.SymbolApi)$u.internal().reificationSupport().selectType($m.staticModule("scala.Predef").asModule().moduleClass(), "String"), (List)Nil$.MODULE$), (List)new .colon.colon((Object)$m.staticClass("scala.Double").asType().toTypeConstructor(), (List)new .colon.colon((Object)$m.staticClass("scala.Long").asType().toTypeConstructor(), (List)Nil$.MODULE$)))));
            }

            public Net_sansa_stack_ml_spark_anomalydetection_DistADUtil$$typecreator5$1() {
            }
        }
        return spark.implicits().rddToDatasetHolder(c, spark.implicits().newProductEncoder(((TypeTags)$u).TypeTag().apply((Mirror)$m, (TypeCreator)new Net_sansa_stack_ml_spark_anomalydetection_DistADUtil$$typecreator5$1()))).toDF((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"s", "p", "o", "prediction"}));
    }

    private Dataset<?> calculatePairwiseSimilarity(RDD<Triple> triplesRDD, double pairWiseDistanceThreshold) {
        FeatureExtractorModel featureExtractorModel = new FeatureExtractorModel().setMode("or");
        Dataset<Row> extractedFeaturesDataFrame = featureExtractorModel.transform(triplesRDD);
        CountVectorizerModel cvModel = new CountVectorizer().setInputCol("extractedFeatures").setOutputCol("vectorizedFeatures").fit(extractedFeaturesDataFrame);
        Dataset tmpCvDf = cvModel.transform(extractedFeaturesDataFrame);
        JavaUniverse $u = package$.MODULE$.universe();
        JavaUniverse.JavaMirror $m = package$.MODULE$.universe().runtimeMirror(this.getClass().getClassLoader());
        public final class Net_sansa_stack_ml_spark_anomalydetection_DistADUtil$$typecreator1$2
        extends TypeCreator {
            public <U extends Universe> Types.TypeApi apply(Mirror<U> $m$untyped) {
                Universe $u = $m$untyped.universe();
                Mirror<U> $m = $m$untyped;
                return $m.staticClass("org.apache.spark.ml.linalg.Vector").asType().toTypeConstructor();
            }

            public Net_sansa_stack_ml_spark_anomalydetection_DistADUtil$$typecreator1$2() {
            }
        }
        UserDefinedFunction isNoneZeroVector = functions$.MODULE$.udf((Function1 & Serializable & scala.Serializable)v -> BoxesRunTime.boxToBoolean((boolean)DistADUtil$.$anonfun$calculatePairwiseSimilarity$1(v)), ((TypeTags)package$.MODULE$.universe()).TypeTag().Boolean(), ((TypeTags)$u).TypeTag().apply((Mirror)$m, (TypeCreator)new Net_sansa_stack_ml_spark_anomalydetection_DistADUtil$$typecreator1$2()));
        Dataset countVectorizedFeaturesDataFrame = tmpCvDf.filter(isNoneZeroVector.apply((Seq)Predef$.MODULE$.wrapRefArray((Object[])new Column[]{functions$.MODULE$.col("vectorizedFeatures")}))).select("uri", (Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"vectorizedFeatures"}));
        Dataset countVectorizedFeaturesDataFramePartitioned = countVectorizedFeaturesDataFrame.cache();
        countVectorizedFeaturesDataFramePartitioned.collect();
        MinHashLSHModel minHashModel = (MinHashLSHModel)new MinHashLSH().setNumHashTables(3).setInputCol("vectorizedFeatures").setOutputCol("hashValues").fit(countVectorizedFeaturesDataFramePartitioned);
        Dataset pairwiseSim = minHashModel.approxSimilarityJoin(countVectorizedFeaturesDataFramePartitioned, countVectorizedFeaturesDataFramePartitioned, pairWiseDistanceThreshold, "distance");
        return pairwiseSim;
    }

    public RDD<Tuple2<String, Set<Tuple3<String, String, Object>>>> propClustering(RDD<Triple> triplesWithNumericLiteral) {
        RDD subMap = triplesWithNumericLiteral.map((Function1 & Serializable & scala.Serializable)f -> new Tuple2((Object)f.getSubject().toString(), (Object)new Tuple3((Object)f.getSubject().toString(), (Object)f.getPredicate().toString(), (Object)BoxesRunTime.boxToDouble((double)MODULE$.getNumber(f.getObject().toString())))), ClassTag$.MODULE$.apply(Tuple2.class));
        Set initialSet = Set$.MODULE$.empty();
        Function2 & Serializable & scala.Serializable addToSet = (Function2 & Serializable & scala.Serializable)(s, v) -> (Set)s.$plus$eq(v);
        Function2 & Serializable & scala.Serializable mergePartitionSets = (Function2 & Serializable & scala.Serializable)(p1, p2) -> (Set)p1.$plus$plus$eq((TraversableOnce)p2);
        RDD uniqueByKey = RDD$.MODULE$.rddToPairRDDFunctions(subMap, ClassTag$.MODULE$.apply(String.class), ClassTag$.MODULE$.apply(Tuple3.class), (Ordering)Ordering.String$.MODULE$).aggregateByKey((Object)initialSet, (Function2)addToSet, (Function2)mergePartitionSets, ClassTag$.MODULE$.apply(Set.class));
        return uniqueByKey;
    }

    public RDD<Tuple2<String, String>> propWithSubject(RDD<Triple> a) {
        return a.map((Function1 & Serializable & scala.Serializable)f -> new Tuple2((Object)MODULE$.getLocalName(f.getSubject()), (Object)MODULE$.getLocalName(f.getPredicate())), ClassTag$.MODULE$.apply(Tuple2.class));
    }

    private static final /* synthetic */ SparkSession spark$lzycompute$1(LazyRef spark$lzy$1) {
        SparkSession sparkSession;
        LazyRef lazyRef = spark$lzy$1;
        synchronized (lazyRef) {
            sparkSession = spark$lzy$1.initialized() ? (SparkSession)spark$lzy$1.value() : (SparkSession)spark$lzy$1.initialize((Object)SparkSession$.MODULE$.builder().appName("Anomaly Detection").config("spark.serializer", "org.apache.spark.serializer.KryoSerializer").config("spark.kryo.registrator", String.join((CharSequence)", ", "net.sansa_stack.rdf.spark.io.JenaKryoRegistrator", "net.sansa_stack.query.spark.sparqlify.KryoRegistratorSparqlify")).config("spark.sql.crossJoin.enabled", true).config("spark.sql.shuffle.partitions", 2000L).config("spark.kryoserializer.buffer.max", 2047L).config("spark.sql.pivotMaxValues", 100000L).getOrCreate());
        }
        return sparkSession;
    }

    private static final SparkSession spark$1(LazyRef spark$lzy$1) {
        return spark$lzy$1.initialized() ? (SparkSession)spark$lzy$1.value() : DistADUtil$.spark$lzycompute$1(spark$lzy$1);
    }

    public static final /* synthetic */ void $anonfun$isAllDigits$1(BooleanRef found$1, char ch) {
        block0: {
            if (RichChar$.MODULE$.isDigit$extension(Predef$.MODULE$.charWrapper(ch))) break block0;
            found$1.elem = false;
        }
    }

    public static final /* synthetic */ boolean $anonfun$getOnlyLiteralObjects$1(Triple f) {
        return f.getObject().isLiteral();
    }

    public static final /* synthetic */ boolean $anonfun$triplesWithNumericLitWithTypeIgnoreEndingWithID$2(Triple f) {
        return !f.getPredicate().toString().toLowerCase().endsWith("id");
    }

    public static final /* synthetic */ double $anonfun$convertStringToDouble$1(String v) {
        return new StringOps(Predef$.MODULE$.augmentString(((Object)BoxesRunTime.boxToDouble((double)MODULE$.getNumber(v))).toString())).toDouble();
    }

    public static final /* synthetic */ boolean $anonfun$calculateMinHashLSHClustering$17(String a$1, Tuple3 f) {
        return ((String)f._2()).equals(a$1);
    }

    public static final /* synthetic */ boolean $anonfun$calculatePairwiseSimilarity$1(Vector v) {
        return v.numNonzeros() > 0;
    }

    private DistADUtil$() {
        MODULE$ = this;
        this.LOG = Logger.getLogger(this.getClass());
        this.objList = List$.MODULE$.apply((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"http://www.w3.org/2001/XMLSchema#decimal", "http://www.w3.org/2001/XMLSchema#integer", "http://www.w3.org/2001/XMLSchema#double", "http://www.w3.org/2001/XMLSchema#float", "http://www.w3.org/2001/XMLSchema#int", "http://www.w3.org/2001/XMLSchema#long", "http://www.w3.org/2001/XMLSchema#unsignedInt", "http://www.w3.org/2001/XMLSchema#unsignedLong", "http://www.w3.org/2001/XMLSchema#positiveInteger", "http://www.w3.org/2001/XMLSchema#nonNegativeInteger", "http://www.w3.org/2001/XMLSchema#negativeInteger", "http://www.w3.org/2001/XMLSchema#nonPositiveInteger"}));
        JavaUniverse $u = package$.MODULE$.universe();
        JavaUniverse.JavaMirror $m = package$.MODULE$.universe().runtimeMirror(this.getClass().getClassLoader());
        public final class Net_sansa_stack_ml_spark_anomalydetection_DistADUtil$$typecreator1$1
        extends TypeCreator {
            public <U extends Universe> Types.TypeApi apply(Mirror<U> $m$untyped) {
                Universe $u = $m$untyped.universe();
                Mirror<U> $m = $m$untyped;
                return $u.internal().reificationSupport().TypeRef($u.internal().reificationSupport().SingleType($m.staticPackage("scala").asModule().moduleClass().asType().toTypeConstructor(), (Symbols.SymbolApi)$m.staticModule("scala.Predef")), (Symbols.SymbolApi)$u.internal().reificationSupport().selectType($m.staticModule("scala.Predef").asModule().moduleClass(), "String"), (List)Nil$.MODULE$);
            }

            public Net_sansa_stack_ml_spark_anomalydetection_DistADUtil$$typecreator1$1() {
            }
        }
        this.convertStringToDouble = functions$.MODULE$.udf((Function1 & Serializable & scala.Serializable)v -> BoxesRunTime.boxToDouble((double)DistADUtil$.$anonfun$convertStringToDouble$1(v)), ((TypeTags)package$.MODULE$.universe()).TypeTag().Double(), ((TypeTags)$u).TypeTag().apply((Mirror)$m, (TypeCreator)new Net_sansa_stack_ml_spark_anomalydetection_DistADUtil$$typecreator1$1()));
    }
}

