/*
 * Decompiled with CFR 0.152.
 */
package net.sansa_stack.ml.spark.featureExtraction;

import java.io.Serializable;
import net.sansa_stack.ml.spark.featureExtraction.SparqlFrame;
import net.sansa_stack.rdf.common.io.riot.error.ErrorParseMode$;
import net.sansa_stack.rdf.common.io.riot.error.WarningParseMode$;
import net.sansa_stack.rdf.spark.io.NTripleReader$;
import net.sansa_stack.rdf.spark.model.package$;
import org.apache.jena.sys.JenaSystem;
import org.apache.spark.ml.feature.StopWordsRemover;
import org.apache.spark.ml.feature.StringIndexer;
import org.apache.spark.ml.feature.Tokenizer;
import org.apache.spark.ml.feature.VectorAssembler;
import org.apache.spark.ml.feature.Word2Vec;
import org.apache.spark.ml.feature.Word2VecModel;
import org.apache.spark.sql.Column;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.RelationalGroupedDataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.SparkSession$;
import org.apache.spark.sql.functions$;
import org.apache.spark.sql.types.DataType;
import org.apache.spark.sql.types.DoubleType$;
import scala.Array$;
import scala.Function1;
import scala.Predef;
import scala.Predef$;
import scala.Tuple2;
import scala.collection.GenSeq;
import scala.collection.Seq;
import scala.collection.SetLike;
import scala.collection.immutable.List$;
import scala.collection.immutable.Nil$;
import scala.collection.immutable.StringOps;
import scala.collection.mutable.ArrayOps;
import scala.collection.mutable.Map;
import scala.collection.mutable.Map$;
import scala.reflect.ClassTag$;
import scala.runtime.BoxedUnit;
import scala.runtime.BoxesRunTime;
import scala.runtime.ObjectRef;

public final class FeatureTypeIdentifier$ {
    public static FeatureTypeIdentifier$ MODULE$;

    static {
        new FeatureTypeIdentifier$();
    }

    public void main(String[] args) {
        long currentTime = System.nanoTime();
        Predef$.MODULE$.println((Object)"\nSETUP SPARK SESSION");
        SparkSession spark = SparkSession$.MODULE$.builder().appName("SampleFeatureExtractionPipeline").config("spark.serializer", "org.apache.spark.serializer.KryoSerializer").config("spark.kryo.registrator", String.join((CharSequence)", ", "net.sansa_stack.rdf.spark.io.JenaKryoRegistrator", "net.sansa_stack.query.spark.sparqlify.KryoRegistratorSparqlify")).getOrCreate();
        spark.sparkContext().setLogLevel("ERROR");
        JenaSystem.init();
        Predef$.MODULE$.println((Object)"\nREAD IN DATA");
        String inputFilePath = args[0];
        Dataset dataset = package$.MODULE$.TripleOperations(NTripleReader$.MODULE$.load(spark, inputFilePath, ErrorParseMode$.MODULE$.SKIP(), WarningParseMode$.MODULE$.IGNORE(), NTripleReader$.MODULE$.load$default$5(), NTripleReader$.MODULE$.load$default$6())).toDS().persist();
        Predef$.MODULE$.println((Object)"\nCREATE FEATURE EXTRACTING SPARQL");
        String manualQueryString = new StringOps(Predef$.MODULE$.augmentString("\n        | SELECT\n        | ?movie\n        | ?movie__down_date\n        | ?movie__down_title\n        | ?movie__down_runtime\n        | ?movie__down_actor__down_actor_name\n        | ?movie__down_genre__down_film_genre_name\n        | ?movie__down_country__down_country_name\n        | ?movie__down_country__down_country_languages\n        | ?movie__down_country__down_country_areaInSqKm\n        |\n        |WHERE {\n        |\t# this fixes the entities, in this sample to be a movie\n        | ?movie <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://data.linkedmdb.org/movie/film> .\n        |\n        | # this is a optional block to gain only a smaller subset of entities, like Superhero-movies\n        | # ?movie <http://data.linkedmdb.org/movie/genre> ?movie__down_genre .\n        | # ?movie__down_genre <http://data.linkedmdb.org/movie/film_genre_name> \"Superhero\"\n        |\n        | # From here on we collect our Features\n        |\tOPTIONAL {\n        |\t\t?movie <http://purl.org/dc/terms/date> ?movie__down_date .\n        |\t}\n        |\n        |\tOPTIONAL {\n        |\t\t?movie <http://purl.org/dc/terms/title> ?movie__down_title .\n        |\t}\n        |\n        |\tOPTIONAL {\n        |\t\t?movie <http://data.linkedmdb.org/movie/runtime> ?movie__down_runtime .\n        |\t}\n        |\n        | OPTIONAL {\n        |\t\t?movie <http://data.linkedmdb.org/movie/actor> ?movie__down_actor .\n        |\t\t?movie__down_actor <http://data.linkedmdb.org/movie/actor_name> ?movie__down_actor__down_actor_name .\n        | }\n        |\n        | OPTIONAL {\n        |\t\t?movie <http://data.linkedmdb.org/movie/genre> ?movie__down_genre .\n        |\t\t?movie__down_genre <http://data.linkedmdb.org/movie/film_genre_name> ?movie__down_genre__down_film_genre_name .\n        |\t}\n        |\n        | OPTIONAL {\n        |\t\t?movie <http://data.linkedmdb.org/movie/country> ?movie__down_country .\n        |\t\t?movie__down_country <http://data.linkedmdb.org/movie/country_name> ?movie__down_country__down_country_name .\n        |\t}\n        |\n        | OPTIONAL {\n        |\t\t?movie <http://data.linkedmdb.org/movie/country> ?movie__down_country .\n        |\t\t?movie__down_country <http://data.linkedmdb.org/movie/country_languages> ?movie__down_country__down_country_languages .\n        |\t}\n        |\n        | OPTIONAL {\n        |\t\t?movie <http://data.linkedmdb.org/movie/country> ?movie__down_country .\n        |\t\t?movie__down_country <http://data.linkedmdb.org/movie/country_areaInSqKm> ?movie__down_country__down_country_areaInSqKm .\n        |\t}\n        |}\n    ")).stripMargin();
        String string = args[1];
        String string2 = "";
        String queryString = !(string != null ? !string.equals(string2) : string2 != null) ? manualQueryString : args[1];
        Predef$.MODULE$.println((Object)queryString);
        Predef$.MODULE$.println((Object)"\nFEATURE EXTRACTION OVER SPARQL");
        SparqlFrame sparqlFrame = new SparqlFrame().setSparqlQuery(queryString);
        Dataset queryResultDf = sparqlFrame.transform(dataset).persist();
        queryResultDf.show(false);
        Predef$.MODULE$.println((Object)"\nCOLLAPS COLUMNS & IDENTIFY FEATURE CHARACTERISTICS");
        String keyColumnNameString = "movie";
        scala.collection.immutable.Seq featureColumns = ((scala.collection.immutable.Seq)List$.MODULE$.apply((Seq)Predef$.MODULE$.wrapRefArray((Object[])queryResultDf.columns())).filter((Function1 & Serializable & scala.Serializable)x$1 -> BoxesRunTime.boxToBoolean((boolean)FeatureTypeIdentifier$.$anonfun$main$1(keyColumnNameString, x$1)))).toSeq();
        ObjectRef collapsedDataframe = ObjectRef.create((Object)queryResultDf.select(keyColumnNameString, (Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[0])).dropDuplicates().persist());
        queryResultDf.unpersist();
        long numberRows = ((Dataset)collapsedDataframe.elem).count();
        Predef$.MODULE$.println((Object)new StringBuilder(24).append("Number distinct ids is: ").append(numberRows).toString());
        ObjectRef featureDescriptions = ObjectRef.create((Object)((Map)Map$.MODULE$.apply((Seq)Nil$.MODULE$)));
        featureColumns.foreach((Function1 & Serializable & scala.Serializable)currentFeatureColumnNameString -> {
            FeatureTypeIdentifier$.$anonfun$main$2(queryResultDf, keyColumnNameString, numberRows, featureDescriptions, collapsedDataframe, currentFeatureColumnNameString);
            return BoxedUnit.UNIT;
        });
        Predef$.MODULE$.println((Object)"\nCOLLAPSED DATAFRAME");
        ((Dataset)collapsedDataframe.elem).show(false);
        Predef$.MODULE$.println((Object)"\nFEATURE CHARACTERISTICS");
        ((Map)featureDescriptions.elem).foreach((Function1 & Serializable & scala.Serializable)x$2 -> {
            Predef$.MODULE$.println((Object)x$2);
            return BoxedUnit.UNIT;
        });
        Predef$.MODULE$.println((Object)"\nDIGITIZE FEATURES");
        scala.collection.immutable.Seq collectedFeatureColumns = ((scala.collection.immutable.Seq)List$.MODULE$.apply((Seq)Predef$.MODULE$.wrapRefArray((Object[])((Dataset)collapsedDataframe.elem).columns())).filter((Function1 & Serializable & scala.Serializable)x$3 -> BoxesRunTime.boxToBoolean((boolean)FeatureTypeIdentifier$.$anonfun$main$4(keyColumnNameString, x$3)))).toSeq();
        ObjectRef fullDigitizedDf = ObjectRef.create((Object)((Dataset)collapsedDataframe.elem).select(keyColumnNameString, (Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[0])).persist());
        ((Dataset)collapsedDataframe.elem).unpersist();
        collectedFeatureColumns.foreach((Function1 & Serializable & scala.Serializable)featureColumn -> {
            FeatureTypeIdentifier$.$anonfun$main$5(collapsedDataframe, keyColumnNameString, fullDigitizedDf, featureColumn);
            return BoxedUnit.UNIT;
        });
        String[] allColumns = ((Dataset)fullDigitizedDf.elem).columns();
        String[] nonDigitizedCoulumns = (String[])new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[])allColumns)).filter((Function1 & Serializable & scala.Serializable)x$4 -> BoxesRunTime.boxToBoolean((boolean)x$4.contains("(notDigitizedYet)")));
        String[] digitzedColumns = (String[])new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[])allColumns)).diff((GenSeq)Predef$.MODULE$.wrapRefArray((Object[])nonDigitizedCoulumns));
        if (new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[])nonDigitizedCoulumns)).size() > 0) {
            Predef$.MODULE$.println((Object)new StringBuilder(41).append("we drop following non digitized columns:\n").append(new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[])nonDigitizedCoulumns)).mkString("\n")).toString());
        }
        Dataset onlyDigitizedDf = ((Dataset)fullDigitizedDf.elem).select((Seq)Predef$.MODULE$.wrapRefArray((Object[])new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[])digitzedColumns)).map((Function1 & Serializable & scala.Serializable)x$5 -> functions$.MODULE$.col(x$5), Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(Column.class)))));
        ((Dataset)fullDigitizedDf.elem).unpersist();
        onlyDigitizedDf.show();
        Predef$.MODULE$.println((Object)"FIX FEATURE LENGTH");
        String[] columnsNameWithVariableFeatureColumnLength = (String[])new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[])onlyDigitizedDf.columns())).filter((Function1 & Serializable & scala.Serializable)x$6 -> BoxesRunTime.boxToBoolean((boolean)x$6.contains("ListOf")));
        ObjectRef fixedLengthFeatureDf = ObjectRef.create((Object)onlyDigitizedDf.select((Seq)Predef$.MODULE$.wrapRefArray((Object[])new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[])new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[])onlyDigitizedDf.columns())).diff((GenSeq)Predef$.MODULE$.wrapRefArray((Object[])columnsNameWithVariableFeatureColumnLength)))).map((Function1 & Serializable & scala.Serializable)x$7 -> functions$.MODULE$.col(x$7), Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(Column.class))))).persist());
        new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[])columnsNameWithVariableFeatureColumnLength)).foreach((Function1 & Serializable & scala.Serializable)columnName -> {
            FeatureTypeIdentifier$.$anonfun$main$10(onlyDigitizedDf, keyColumnNameString, fixedLengthFeatureDf, columnName);
            return BoxedUnit.UNIT;
        });
        ((Dataset)fixedLengthFeatureDf.elem).show(false);
        Predef$.MODULE$.println((Object)"ASSEMBLE VECTOR");
        String[] columnsToAssemble = (String[])new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[])((Dataset)fixedLengthFeatureDf.elem).columns())).filterNot((Function1 & Serializable & scala.Serializable)x$8 -> BoxesRunTime.boxToBoolean((boolean)FeatureTypeIdentifier$.$anonfun$main$11(keyColumnNameString, x$8)));
        Predef$.MODULE$.println((Object)new StringBuilder(21).append("columns to assemble:\n").append(new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[])columnsToAssemble)).mkString(", ")).toString());
        VectorAssembler assembler = new VectorAssembler().setInputCols(columnsToAssemble).setOutputCol("features");
        Dataset output = assembler.transform((Dataset)fixedLengthFeatureDf.elem).persist();
        ((Dataset)fixedLengthFeatureDf.elem).unpersist();
        output.select(keyColumnNameString, (Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"features"})).show(false);
        long outputSize = output.count();
        Predef$.MODULE$.println((Object)new StringBuilder(40).append("assembled vector has number of samples: ").append(outputSize).toString());
    }

    public static final /* synthetic */ boolean $anonfun$main$1(String keyColumnNameString$1, String x$1) {
        return !((SetLike)Predef$.MODULE$.Set().apply((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{keyColumnNameString$1}))).contains((Object)x$1);
    }

    public static final /* synthetic */ void $anonfun$main$2(Dataset queryResultDf$1, String keyColumnNameString$1, long numberRows$1, ObjectRef featureDescriptions$1, ObjectRef collapsedDataframe$1, String currentFeatureColumnNameString) {
        Predef$.MODULE$.println((Object)currentFeatureColumnNameString);
        Dataset twoColumnDf = queryResultDf$1.select(keyColumnNameString$1, (Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{currentFeatureColumnNameString})).dropDuplicates();
        RelationalGroupedDataset groupedTwoColumnDf = twoColumnDf.groupBy(keyColumnNameString$1, (Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[0]));
        Dataset collapsedTwoColumnDfwithSize = groupedTwoColumnDf.agg(functions$.MODULE$.collect_list(currentFeatureColumnNameString).as(currentFeatureColumnNameString), (Seq)Predef$.MODULE$.wrapRefArray((Object[])new Column[0])).withColumn("size", functions$.MODULE$.size(functions$.MODULE$.col(currentFeatureColumnNameString)));
        int minNumberOfElements = ((Row)collapsedTwoColumnDfwithSize.select("size", (Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[0])).agg(functions$.MODULE$.min("size"), (Seq)Predef$.MODULE$.wrapRefArray((Object[])new Column[0])).head()).getInt(0);
        int maxNumberOfElements = ((Row)collapsedTwoColumnDfwithSize.select("size", (Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[0])).agg(functions$.MODULE$.max("size"), (Seq)Predef$.MODULE$.wrapRefArray((Object[])new Column[0])).head()).getInt(0);
        boolean nullable = minNumberOfElements == 0;
        DataType datatype = twoColumnDf.select(currentFeatureColumnNameString, (Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[0])).schema().apply(0).dataType();
        int numberDistinctValues = (int)twoColumnDf.select(currentFeatureColumnNameString, (Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[0])).distinct().count();
        boolean isListOfEntries = maxNumberOfElements > 1;
        double availability = (double)collapsedTwoColumnDfwithSize.select("size", (Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[0])).filter(functions$.MODULE$.col("size").$greater((Object)BoxesRunTime.boxToInteger((int)0))).count() / (double)numberRows$1;
        boolean isCategorical = (double)numberDistinctValues / (double)numberRows$1 < 0.1;
        String featureType = "";
        featureType = isListOfEntries ? new StringBuilder(7).append(featureType).append("ListOf_").toString() : new StringBuilder(7).append(featureType).append("Single_").toString();
        featureType = isCategorical ? new StringBuilder(12).append(featureType).append("Categorical_").toString() : new StringBuilder(15).append(featureType).append("NonCategorical_").toString();
        featureType = new StringBuilder(0).append(featureType).append(datatype.toString().split("Type")[0]).toString();
        scala.collection.immutable.Map featureSummary = (scala.collection.immutable.Map)Predef$.MODULE$.Map().apply((Seq)Predef$.MODULE$.wrapRefArray((Object[])new Tuple2[]{Predef.ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc((Object)"featureType"), (Object)featureType), Predef.ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc((Object)"name"), (Object)currentFeatureColumnNameString), Predef.ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc((Object)"nullable"), (Object)BoxesRunTime.boxToBoolean((boolean)nullable)), Predef.ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc((Object)"datatype"), (Object)datatype), Predef.ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc((Object)"numberDistinctValues"), (Object)BoxesRunTime.boxToInteger((int)numberDistinctValues)), Predef.ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc((Object)"isListOfEntries"), (Object)BoxesRunTime.boxToBoolean((boolean)isListOfEntries)), Predef.ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc((Object)"avalability"), (Object)BoxesRunTime.boxToDouble((double)availability))}));
        ((Map)featureDescriptions$1.elem).update((Object)currentFeatureColumnNameString, (Object)featureSummary);
        Dataset joinableDf = isListOfEntries ? collapsedTwoColumnDfwithSize.select(keyColumnNameString$1, (Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{currentFeatureColumnNameString})) : twoColumnDf.select(keyColumnNameString$1, (Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{currentFeatureColumnNameString}));
        String arg$macro$1 = currentFeatureColumnNameString;
        String arg$macro$2 = featureType;
        collapsedDataframe$1.elem = ((Dataset)collapsedDataframe$1.elem).join(joinableDf.withColumnRenamed(currentFeatureColumnNameString, new StringOps("%s(%s)").format((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{arg$macro$1, arg$macro$2}))), keyColumnNameString$1);
    }

    public static final /* synthetic */ boolean $anonfun$main$4(String keyColumnNameString$1, String x$3) {
        return !((SetLike)Predef$.MODULE$.Set().apply((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{keyColumnNameString$1}))).contains((Object)x$3);
    }

    public static final /* synthetic */ void $anonfun$main$5(ObjectRef collapsedDataframe$1, String keyColumnNameString$1, ObjectRef fullDigitizedDf$1, String featureColumn) {
        String featureType = featureColumn.split("\\(")[1].split("\\)")[0];
        String featureName = featureColumn.split("\\(")[0];
        Predef$.MODULE$.println((Object)featureName);
        Predef$.MODULE$.println((Object)featureType);
        Dataset dfCollapsedTwoColumns = ((Dataset)collapsedDataframe$1.elem).select(keyColumnNameString$1, (Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{featureColumn}));
        Dataset digitizedDf = (Dataset)fullDigitizedDf$1.elem;
        String newFeatureColumnName = featureName;
        String string = featureType;
        String string2 = "Single_NonCategorical_String";
        if (!(string != null ? !string.equals(string2) : string2 != null)) {
            Dataset dfCollapsedTwoColumnsNullsReplaced = dfCollapsedTwoColumns.na().fill("");
            Tokenizer tokenizer = (Tokenizer)new Tokenizer().setInputCol(featureColumn).setOutputCol("words");
            Dataset tokenizedDf = tokenizer.transform(dfCollapsedTwoColumnsNullsReplaced);
            StopWordsRemover remover = new StopWordsRemover().setInputCol("words").setOutputCol("filtered");
            Dataset inputDf = remover.transform(tokenizedDf);
            Word2Vec word2vec = new Word2Vec().setInputCol("filtered").setOutputCol("output").setVectorSize(2);
            Word2VecModel model = word2vec.fit(inputDf);
            digitizedDf = model.transform(inputDf);
            newFeatureColumnName = new StringBuilder(10).append(newFeatureColumnName).append("(Word2Vec)").toString();
        } else {
            String string3 = featureType;
            String string4 = "ListOf_NonCategorical_String";
            if (!(string3 != null ? !string3.equals(string4) : string4 != null)) {
                Dataset dfCollapsedTwoColumnsNullsReplaced = dfCollapsedTwoColumns.withColumn("sentences", functions$.MODULE$.concat_ws(". ", (Seq)Predef$.MODULE$.wrapRefArray((Object[])new Column[]{functions$.MODULE$.col(featureColumn)}))).na().fill("");
                Tokenizer tokenizer = (Tokenizer)new Tokenizer().setInputCol("sentences").setOutputCol("words");
                Dataset tokenizedDf = tokenizer.transform(dfCollapsedTwoColumnsNullsReplaced);
                StopWordsRemover remover = new StopWordsRemover().setInputCol("words").setOutputCol("filtered");
                Dataset inputDf = remover.transform(tokenizedDf);
                Word2Vec word2vec = new Word2Vec().setInputCol("filtered").setOutputCol("output").setVectorSize(2);
                Word2VecModel model = word2vec.fit(inputDf);
                digitizedDf = model.transform(inputDf);
                newFeatureColumnName = new StringBuilder(10).append(newFeatureColumnName).append("(Word2Vec)").toString();
            } else {
                String string5 = featureType;
                String string6 = "Single_Categorical_String";
                if (!(string5 != null ? !string5.equals(string6) : string6 != null)) {
                    Dataset inputDf = dfCollapsedTwoColumns.na().fill("");
                    StringIndexer indexer = new StringIndexer().setInputCol(featureColumn).setOutputCol("output");
                    digitizedDf = indexer.fit(inputDf).transform(inputDf);
                    newFeatureColumnName = new StringBuilder(15).append(newFeatureColumnName).append("(IndexedString)").toString();
                } else {
                    String string7 = featureType;
                    String string8 = "ListOf_Categorical_String";
                    if (!(string7 != null ? !string7.equals(string8) : string8 != null)) {
                        Dataset inputDf = dfCollapsedTwoColumns.select((Seq)Predef$.MODULE$.wrapRefArray((Object[])new Column[]{functions$.MODULE$.col(keyColumnNameString$1), functions$.MODULE$.explode_outer(functions$.MODULE$.col(featureColumn))})).na().fill("");
                        StringIndexer indexer = new StringIndexer().setInputCol("col").setOutputCol("outputTmp");
                        digitizedDf = indexer.fit(inputDf).transform(inputDf).groupBy(keyColumnNameString$1, (Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[0])).agg(functions$.MODULE$.collect_set("outputTmp").as("output"), (Seq)Predef$.MODULE$.wrapRefArray((Object[])new Column[0])).select(keyColumnNameString$1, (Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"output"}));
                        newFeatureColumnName = new StringBuilder(21).append(newFeatureColumnName).append("(ListOfIndexedString)").toString();
                    } else if (featureType.endsWith("Double")) {
                        digitizedDf = dfCollapsedTwoColumns.withColumnRenamed(featureColumn, "output").na().fill(-1.0);
                        newFeatureColumnName = new StringBuilder(2).append(newFeatureColumnName).append("(").append(featureType).append(")").toString();
                    } else if (featureType.endsWith("Integer")) {
                        digitizedDf = dfCollapsedTwoColumns.withColumn("output", functions$.MODULE$.col(featureColumn).cast((DataType)DoubleType$.MODULE$)).na().fill(-1.0);
                        newFeatureColumnName = new StringBuilder(2).append(newFeatureColumnName).append("(").append(featureType).append(")").toString();
                    } else if (featureType.endsWith("Boolean")) {
                        digitizedDf = dfCollapsedTwoColumns.withColumn("output", functions$.MODULE$.col(featureColumn).cast((DataType)DoubleType$.MODULE$)).na().fill(-1.0);
                        newFeatureColumnName = new StringBuilder(2).append(newFeatureColumnName).append("(").append(featureType).append(")").toString();
                    } else {
                        Predef$.MODULE$.println((Object)"transformation not possible yet");
                        digitizedDf = dfCollapsedTwoColumns.withColumnRenamed(featureColumn, "output");
                        newFeatureColumnName = new StringBuilder(17).append(newFeatureColumnName).append("(notDigitizedYet)").toString();
                    }
                }
            }
        }
        Dataset joinableDf = digitizedDf.withColumnRenamed("output", newFeatureColumnName).select(keyColumnNameString$1, (Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{newFeatureColumnName}));
        fullDigitizedDf$1.elem = ((Dataset)fullDigitizedDf$1.elem).join(joinableDf, keyColumnNameString$1);
    }

    public static final /* synthetic */ void $anonfun$main$10(Dataset onlyDigitizedDf$1, String keyColumnNameString$1, ObjectRef fixedLengthFeatureDf$1, String columnName) {
        Predef$.MODULE$.println((Object)new StringBuilder(34).append("Fix number of features in column: ").append(columnName).toString());
        String newColumnName = columnName.split("\\(")[0];
        Dataset twoColumnDf = onlyDigitizedDf$1.select(keyColumnNameString$1, (Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{columnName}));
        Dataset fixedLengthDf = twoColumnDf.select((Seq)Predef$.MODULE$.wrapRefArray((Object[])new Column[]{functions$.MODULE$.col(keyColumnNameString$1), functions$.MODULE$.explode_outer(functions$.MODULE$.col(columnName))})).groupBy(keyColumnNameString$1, (Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[0])).agg(functions$.MODULE$.mean("col").alias(new StringBuilder(5).append(newColumnName).append("_mean").toString()), (Seq)Predef$.MODULE$.wrapRefArray((Object[])new Column[]{functions$.MODULE$.min("col").alias(new StringBuilder(4).append(newColumnName).append("_min").toString()), functions$.MODULE$.max("col").alias(new StringBuilder(4).append(newColumnName).append("_max").toString()), functions$.MODULE$.stddev("col").alias(new StringBuilder(7).append(newColumnName).append("_stddev").toString())})).na().fill(-1L);
        fixedLengthFeatureDf$1.elem = ((Dataset)fixedLengthFeatureDf$1.elem).join(fixedLengthDf, keyColumnNameString$1);
    }

    public static final /* synthetic */ boolean $anonfun$main$11(String keyColumnNameString$1, String x$8) {
        String string = x$8;
        String string2 = keyColumnNameString$1;
        return !(string != null ? !string.equals(string2) : string2 != null);
    }

    private FeatureTypeIdentifier$() {
        MODULE$ = this;
    }
}

