/*
 * Decompiled with CFR 0.152.
 */
package net.sansa_stack.ml.spark.anomalydetection;

import com.linkedin.relevance.isolationforest.IsolationForest;
import com.linkedin.relevance.isolationforest.IsolationForestModel;
import java.io.Serializable;
import net.sansa_stack.ml.spark.anomalydetection.DistADConfig;
import net.sansa_stack.ml.spark.anomalydetection.DistADLogger$;
import net.sansa_stack.ml.spark.anomalydetection.DistADUtil$;
import net.sansa_stack.ml.spark.featureExtraction.FeatureExtractingSparqlGenerator$;
import net.sansa_stack.ml.spark.featureExtraction.SparqlFrame;
import net.sansa_stack.query.spark.package;
import net.sansa_stack.rdf.spark.model.package$;
import org.apache.jena.graph.Triple;
import org.apache.spark.ml.feature.VectorAssembler;
import org.apache.spark.rdd.RDD;
import org.apache.spark.sql.Column;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoder;
import org.apache.spark.sql.Encoders$;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.functions$;
import org.apache.spark.sql.types.DataType;
import org.apache.spark.sql.types.DoubleType$;
import scala.Array$;
import scala.Function1;
import scala.MatchError;
import scala.Predef$;
import scala.Tuple2;
import scala.Tuple3;
import scala.collection.Seq;
import scala.collection.Seq$;
import scala.collection.immutable.List;
import scala.collection.mutable.ArrayOps;
import scala.reflect.ClassTag$;
import scala.reflect.ScalaSignature;
import scala.runtime.BoxedUnit;
import scala.runtime.BoxesRunTime;
import scala.runtime.ObjectRef;
import scala.runtime.RichInt$;
import scala.runtime.java8.JFunction1;

@ScalaSignature(bytes="\u0006\u0001\r4A\u0001C\u0005\u0001)!AA\u0002\u0001B\u0001B\u0003%1\u0004\u0003\u0005'\u0001\t\u0005\t\u0015!\u0003(\u0011!)\u0004A!A!\u0002\u00131\u0004\"\u0002\u001e\u0001\t\u0003Y\u0004\"\u0002!\u0001\t\u0003\t\u0005\"\u0002+\u0001\t\u0003)\u0006\"\u00020\u0001\t\u0003y&\u0001H'vYRLg)Z1ukJ,\u0017I\\8nC2LH)\u001a;fGRLwN\u001c\u0006\u0003\u0015-\t\u0001#\u00198p[\u0006d\u0017\u0010Z3uK\u000e$\u0018n\u001c8\u000b\u00051i\u0011!B:qCJ\\'B\u0001\b\u0010\u0003\tiGN\u0003\u0002\u0011#\u0005Y1/\u00198tC~\u001bH/Y2l\u0015\u0005\u0011\u0012a\u00018fi\u000e\u00011C\u0001\u0001\u0016!\t1\u0012$D\u0001\u0018\u0015\u0005A\u0012!B:dC2\f\u0017B\u0001\u000e\u0018\u0005\u0019\te.\u001f*fMB\u0011A\u0004J\u0007\u0002;)\u0011adH\u0001\u0004gFd'B\u0001\u0007!\u0015\t\t#%\u0001\u0004ba\u0006\u001c\u0007.\u001a\u0006\u0002G\u0005\u0019qN]4\n\u0005\u0015j\"\u0001D*qCJ\\7+Z:tS>t\u0017aD8sS\u001eLg.\u00197ECR\f'\u000b\u0012#\u0011\u0007!ZS&D\u0001*\u0015\tQs$A\u0002sI\u0012L!\u0001L\u0015\u0003\u0007I#E\t\u0005\u0002/g5\tqF\u0003\u00021c\u0005)qM]1qQ*\u0011!\u0007I\u0001\u0005U\u0016t\u0017-\u0003\u00025_\t1AK]5qY\u0016\faaY8oM&<\u0007CA\u001c9\u001b\u0005I\u0011BA\u001d\n\u00051!\u0015n\u001d;B\t\u000e{gNZ5h\u0003\u0019a\u0014N\\5u}Q!A(\u0010 @!\t9\u0004\u0001C\u0003\r\t\u0001\u00071\u0004C\u0003'\t\u0001\u0007q\u0005C\u00036\t\u0001\u0007a'A\u0002sk:$\u0012A\u0011\t\u0003\u0007Fs!\u0001R(\u000f\u0005\u0015seB\u0001$N\u001d\t9EJ\u0004\u0002I\u00176\t\u0011J\u0003\u0002K'\u00051AH]8pizJ\u0011aI\u0005\u0003C\tJ!\u0001\u0004\u0011\n\u0005yy\u0012B\u0001)\u001e\u0003\u001d\u0001\u0018mY6bO\u0016L!AU*\u0003\u0013\u0011\u000bG/\u0019$sC6,'B\u0001)\u001e\u0003\u0011\u001a\u0017\r\\2vY\u0006$X-\u00118p[\u0006d\u0017.Z:G_JlU\u000f\u001c;ja2,g)Z1ukJ,GC\u0001\"W\u0011\u00159f\u00011\u0001Y\u0003\u0011!\u0017\r^1\u0011\u0007qI6,\u0003\u0002[;\t9A)\u0019;bg\u0016$\bC\u0001\u000f]\u0013\tiVDA\u0002S_^\f!#\u00193e\u00072,8\u000f^3s\u0013\u0012$v\u000eR1uCR\u0019!\tY1\t\u000b];\u0001\u0019\u0001\"\t\u000b\t<\u0001\u0019\u0001\"\u0002\u0017A\u0014X\rZ5di&|gn\u001d")
public class MultiFeatureAnomalyDetection {
    private final SparkSession spark;
    private final RDD<Triple> originalDataRDD;
    private final DistADConfig config;

    public Dataset<Row> run() {
        Dataset dataset;
        Dataset<Row> dataset2;
        Dataset dataset3;
        long startTime = System.currentTimeMillis();
        RDD<Triple> onlyLiteralDataRDD = DistADUtil$.MODULE$.triplesWithNumericLitWithTypeIgnoreEndingWithID(this.originalDataRDD);
        if (this.config.verbose()) {
            DistADLogger$.MODULE$.LOG().info((Object)"Original Data RDD Only with numeric Literals:");
            new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[])onlyLiteralDataRDD.take(10))).foreach((Function1 & Serializable & scala.Serializable)x$1 -> {
                MultiFeatureAnomalyDetection.$anonfun$run$1(x$1);
                return BoxedUnit.UNIT;
            });
        }
        Dataset onlyLiteralDataDataSet = null;
        if (this.config.featureExtractor().equals(this.config.LITERAL2FEATURE())) {
            Encoder nodeTupleEncoder = Encoders$.MODULE$.kryo(Tuple3.class);
            onlyLiteralDataDataSet = package$.MODULE$.TripleOperations(onlyLiteralDataRDD).toDS();
        }
        Dataset onlyLiteralDataDataFrame = DistADUtil$.MODULE$.createDFWithConversion(onlyLiteralDataRDD).cache();
        if (this.config.verbose()) {
            DistADLogger$.MODULE$.LOG().info((Object)"Original Data DataFrame Only with numeric Literals:");
            onlyLiteralDataDataFrame.show(false);
        }
        String string = this.config.featureExtractor();
        String string2 = this.config.PIVOT();
        String string3 = string;
        if (!(string2 != null ? !string2.equals(string3) : string3 != null)) {
            Dataset onlyLiteralDataWithDoubleDataFramePivoted = onlyLiteralDataDataFrame.groupBy("s", (Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[0])).pivot("p").agg(functions$.MODULE$.first("o"), (Seq)Predef$.MODULE$.wrapRefArray((Object[])new Column[0]));
            if (this.config.verbose()) {
                DistADLogger$.MODULE$.LOG().info((Object)"Original Data DataFrame Only with numeric Literals and Pivoted:");
                onlyLiteralDataWithDoubleDataFramePivoted.show(false);
            }
            onlyLiteralDataWithDoubleDataFramePivoted = onlyLiteralDataWithDoubleDataFramePivoted.toDF((Seq)Predef$.MODULE$.wrapRefArray((Object[])new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[])onlyLiteralDataWithDoubleDataFramePivoted.columns())).map((Function1 & Serializable & scala.Serializable)x$1 -> x$1.replace(".", "_"), Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(String.class)))));
            if (this.config.verbose()) {
                DistADLogger$.MODULE$.LOG().info((Object)"Original Data DataFrame Only with numeric Literals and Pivoted-Columns renamed:");
                onlyLiteralDataWithDoubleDataFramePivoted.show(false);
            }
            dataset3 = onlyLiteralDataWithDoubleDataFramePivoted;
        } else {
            String string4 = this.config.LITERAL2FEATURE();
            String string5 = string;
            if (!(string4 != null ? !string4.equals(string5) : string5 != null)) {
                DistADLogger$.MODULE$.LOG().info((Object)"Starting Literal2Feature. May take time....");
                String seedVarName = "?s";
                String whereClauseForSeed = "?s ?p ?o";
                int maxUp = 0;
                int maxDown = this.config.l2fDepth();
                int seedNumber = this.config.l2fSeedNumber();
                double seedNumberAsRatio = 1.0;
                Tuple2<String, List<String>> a = FeatureExtractingSparqlGenerator$.MODULE$.createSparql((Dataset<Triple>)onlyLiteralDataDataSet, seedVarName, whereClauseForSeed, maxUp, maxDown, seedNumber, seedNumberAsRatio, FeatureExtractingSparqlGenerator$.MODULE$.createSparql$default$8(), FeatureExtractingSparqlGenerator$.MODULE$.createSparql$default$9(), FeatureExtractingSparqlGenerator$.MODULE$.createSparql$default$10());
                SparqlFrame sparqlFrame = new SparqlFrame().setSparqlQuery((String)a._1()).setQueryExcecutionEngine(package.SPARQLEngine$.MODULE$.Sparqlify()).setCollapsByKey(false);
                ObjectRef b = ObjectRef.create(sparqlFrame.transform(onlyLiteralDataDataSet));
                new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[])((Dataset)b.elem).columns())).foreach((Function1 & Serializable & scala.Serializable)c -> {
                    MultiFeatureAnomalyDetection.$anonfun$run$3(b, c);
                    return BoxedUnit.UNIT;
                });
                dataset3 = (Dataset)b.elem;
            } else {
                throw new MatchError((Object)string);
            }
        }
        Dataset onlyLiteralDataWithDoubleDataFramePivoted = dataset3;
        if (this.config.verbose()) {
            onlyLiteralDataWithDoubleDataFramePivoted.show(false);
        }
        String string6 = this.config.clusteringMethod();
        String string7 = this.config.BISECTINGKMEANS();
        String string8 = string6;
        if (!(string7 != null ? !string7.equals(string8) : string8 != null)) {
            Dataset<Row> dataset4;
            if (this.config.silhouetteMethod()) {
                this.config.numberOfClusters_$eq(DistADUtil$.MODULE$.detectNumberOfClusters((Dataset<Row>)onlyLiteralDataWithDoubleDataFramePivoted, this.config.silhouetteMethodSamplingRate()));
                DistADLogger$.MODULE$.LOG().info((Object)new StringBuilder(45).append("Number of optimal cluster for the dataset is ").append(this.config.numberOfClusters()).toString());
            }
            String string9 = this.config.clusteringType();
            String string10 = this.config.PARTIAL();
            String string11 = string9;
            if (!(string10 != null ? !string10.equals(string11) : string11 != null)) {
                dataset4 = DistADUtil$.MODULE$.calculateBiSectingKmeanClustering(onlyLiteralDataRDD, this.config.numberOfClusters());
            } else {
                String string12 = this.config.FULL();
                String string13 = string9;
                if (!(string12 != null ? !string12.equals(string13) : string13 != null)) {
                    dataset4 = DistADUtil$.MODULE$.calculateBiSectingKmeanClustering(this.originalDataRDD, this.config.numberOfClusters());
                } else {
                    throw new MatchError((Object)string9);
                }
            }
            Dataset<Row> predictions = dataset4;
            if (this.config.verbose()) {
                DistADLogger$.MODULE$.LOG().info((Object)new StringBuilder(36).append("Result of clustering with ").append(this.config.numberOfClusters()).append(" clusters:").toString());
                predictions.show(false);
            }
            dataset2 = this.addClusterIdToData((Dataset<Row>)onlyLiteralDataWithDoubleDataFramePivoted, predictions);
        } else {
            String string14 = this.config.MINHASHLSH();
            String string15 = string6;
            if (!(string14 != null ? !string14.equals(string15) : string15 != null)) {
                Dataset<Row> predictions = DistADUtil$.MODULE$.calculateMinHashLSHClustering(onlyLiteralDataRDD, this.originalDataRDD, this.config);
                if (this.config.verbose()) {
                    DistADLogger$.MODULE$.LOG().info((Object)"Result of clustering:");
                    predictions.show(false);
                }
                dataset2 = this.addClusterIdToData((Dataset<Row>)onlyLiteralDataWithDoubleDataFramePivoted, predictions);
            } else {
                throw new MatchError((Object)string6);
            }
        }
        Dataset<Row> onlyLiteralDataWithDoubleDataFrameWithClusterId = dataset2;
        if (this.config.verbose()) {
            DistADLogger$.MODULE$.LOG().info((Object)"Add clustering result to data:");
            onlyLiteralDataWithDoubleDataFrameWithClusterId.show(false);
        }
        onlyLiteralDataWithDoubleDataFrameWithClusterId = this.calculateAnomaliesForMultipleFeature(onlyLiteralDataWithDoubleDataFrameWithClusterId);
        scala.collection.mutable.StringBuilder finalExp = new scala.collection.mutable.StringBuilder("");
        new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[])new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[])onlyLiteralDataWithDoubleDataFrameWithClusterId.columns())).filter((Function1 & Serializable & scala.Serializable)p -> BoxesRunTime.boxToBoolean((boolean)p.startsWith("predictedLabel"))))).map((Function1 & Serializable & scala.Serializable)m -> finalExp.append(new StringBuilder(7).append((String)m).append("==1 or ").toString()), Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(scala.collection.mutable.StringBuilder.class)));
        String finalExp2 = "";
        if (finalExp.nonEmpty()) {
            finalExp2 = finalExp.substring(0, finalExp.length() - 4);
            Column[] cols = (Column[])new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[])new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[])onlyLiteralDataWithDoubleDataFrameWithClusterId.columns())).filter((Function1 & Serializable & scala.Serializable)p -> BoxesRunTime.boxToBoolean((boolean)MultiFeatureAnomalyDetection.$anonfun$run$6(p))))).map((Function1 & Serializable & scala.Serializable)m -> functions$.MODULE$.col(m), Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(Column.class)));
            Dataset result = onlyLiteralDataWithDoubleDataFrameWithClusterId.filter(finalExp2).select((Seq)Predef$.MODULE$.wrapRefArray((Object[])cols));
            if (this.config.verbose()) {
                result.show(false);
            }
            DistADLogger$.MODULE$.LOG().info((Object)new StringBuilder(16).append("Operation took: ").append(System.currentTimeMillis() - startTime).toString());
            DistADLogger$.MODULE$.LOG().info((Object)new StringBuilder(26).append("Total number of anomalies ").append(result.count()).toString());
            dataset = result;
        } else {
            DistADLogger$.MODULE$.LOG().info((Object)new StringBuilder(16).append("Operation took: ").append(System.currentTimeMillis() - startTime).toString());
            DistADLogger$.MODULE$.LOG().info((Object)"Total number of anomalies 0");
            dataset = this.spark.emptyDataFrame();
        }
        return dataset;
    }

    public Dataset<Row> calculateAnomaliesForMultipleFeature(Dataset<Row> data) {
        ObjectRef originalData = ObjectRef.create(data);
        Dataset<Row> data1 = data;
        if (this.config.verbose()) {
            data1.show(false);
        }
        double contamination = 0.1;
        IsolationForest isolationForest = (IsolationForest)new IsolationForest().setNumEstimators(this.config.numEstimatorsForIF()).setBootstrap(false).setMaxSamples((double)this.config.maxSampleForIF()).setFeaturesCol("features").setPredictionCol("predictedLabel").setScoreCol("outlierScore").setContamination(contamination).setContaminationError(0.01 * contamination).setRandomSeed(1L);
        ObjectRef finalResult = ObjectRef.create((Object)((Dataset)originalData.elem));
        RichInt$.MODULE$.until$extension0(Predef$.MODULE$.intWrapper(0), this.config.numberOfClusters()).foreach$mVc$sp((Function1)(JFunction1.mcVI.sp & Serializable & scala.Serializable)clusterId -> {
            block6: {
                ObjectRef data3 = ObjectRef.create((Object)data1.filter(functions$.MODULE$.col("prediction").$eq$eq$eq((Object)BoxesRunTime.boxToInteger((int)clusterId))));
                ObjectRef cols = ObjectRef.create((Object)((String[])new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[])new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[])((Dataset)data3.elem).columns())).filter((Function1 & Serializable & scala.Serializable)x$2 -> BoxesRunTime.boxToBoolean((boolean)MultiFeatureAnomalyDetection.$anonfun$calculateAnomaliesForMultipleFeature$2(x$2))))).filter((Function1 & Serializable & scala.Serializable)x$3 -> BoxesRunTime.boxToBoolean((boolean)MultiFeatureAnomalyDetection.$anonfun$calculateAnomaliesForMultipleFeature$3(x$3)))));
                new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[])((String[])cols.elem))).foreach((Function1 & Serializable & scala.Serializable)col -> {
                    MultiFeatureAnomalyDetection.$anonfun$calculateAnomaliesForMultipleFeature$4(data3, originalData, cols, col);
                    return BoxedUnit.UNIT;
                });
                Dataset data1_1 = ((Dataset)data3.elem).na().fill(Double.MAX_VALUE);
                if ($this.config.verbose()) {
                    data1_1.show(false);
                }
                VectorAssembler vectorAssembler = new VectorAssembler().setInputCols((String[])cols.elem).setOutputCol("features");
                Dataset data2 = vectorAssembler.transform(data1_1);
                if ($this.config.verbose()) {
                    data2.show(false);
                }
                if (data2.isEmpty()) break block6;
                try {
                    IsolationForestModel isolationForestModel = isolationForest.fit(data2);
                    Dataset dataWithScores = isolationForestModel.transform(data2);
                    if ($this.config.verbose()) {
                        dataWithScores.show(false);
                    }
                    String newColName = new StringBuilder(15).append("predictedLabel_").append(clusterId).toString();
                    finalResult$1.elem = ((Dataset)finalResult$1.elem).join(dataWithScores.withColumnRenamed("predictedLabel", newColName).select("s", (Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{newColName})), (Seq)Seq$.MODULE$.apply((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"s"})), "leftouter");
                    if ($this.config.verbose()) {
                        ((Dataset)finalResult$1.elem).show(false);
                    }
                }
                catch (Exception e) {
                    DistADLogger$.MODULE$.LOG().warn((Object)"Number of selected setMaxSamples for IF is too much");
                }
            }
        });
        return (Dataset)finalResult.elem;
    }

    /*
     * WARNING - void declaration
     */
    public Dataset<Row> addClusterIdToData(Dataset<Row> data, Dataset<Row> predictions) {
        void var3_3;
        Dataset dataJoined = data.join(predictions, "s").cache();
        dataJoined = dataJoined.drop("extractedFeatures");
        dataJoined = dataJoined.drop("features");
        dataJoined = dataJoined.drop("p");
        dataJoined = dataJoined.drop("o");
        return var3_3;
    }

    public static final /* synthetic */ void $anonfun$run$1(Object x$1) {
        DistADLogger$.MODULE$.LOG().info(x$1);
    }

    public static final /* synthetic */ void $anonfun$run$3(ObjectRef b$1, String c) {
        block0: {
            if (c.equals("s")) break block0;
            b$1.elem = ((Dataset)b$1.elem).withColumn(c, functions$.MODULE$.col(c).cast((DataType)DoubleType$.MODULE$));
        }
    }

    public static final /* synthetic */ boolean $anonfun$run$6(String p) {
        return !p.startsWith("predictedLabel");
    }

    public static final /* synthetic */ boolean $anonfun$calculateAnomaliesForMultipleFeature$2(String x$2) {
        return !x$2.equals("prediction");
    }

    public static final /* synthetic */ boolean $anonfun$calculateAnomaliesForMultipleFeature$3(String x$3) {
        return !x$3.equals("s");
    }

    public static final /* synthetic */ boolean $anonfun$calculateAnomaliesForMultipleFeature$5(String col$1, String p) {
        return !p.equals(col$1);
    }

    public static final /* synthetic */ void $anonfun$calculateAnomaliesForMultipleFeature$4(ObjectRef data3$1, ObjectRef originalData$1, ObjectRef cols$1, String col) {
        block0: {
            long c = ((Dataset)data3$1.elem).select(col, (Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[0])).distinct().count();
            if (c != 1L || !((Row)((Dataset)data3$1.elem).select(col, (Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[0])).first()).isNullAt(0)) break block0;
            data3$1.elem = ((Dataset)data3$1.elem).drop(col);
            originalData$1.elem = ((Dataset)originalData$1.elem).drop(col);
            cols$1.elem = (String[])new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[])((String[])cols$1.elem))).filter((Function1 & Serializable & scala.Serializable)p -> BoxesRunTime.boxToBoolean((boolean)MultiFeatureAnomalyDetection.$anonfun$calculateAnomaliesForMultipleFeature$5(col, p)));
        }
    }

    public MultiFeatureAnomalyDetection(SparkSession spark, RDD<Triple> originalDataRDD, DistADConfig config) {
        this.spark = spark;
        this.originalDataRDD = originalDataRDD;
        this.config = config;
    }
}

