package io.stoys.spark.dq;

import io.stoys.spark.SToysException;
import io.stoys.spark.SToysException$;
import io.stoys.spark.dq.DqFramework;
import org.apache.spark.sql.Column;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.catalyst.expressions.Stack;
import org.apache.spark.sql.functions$;
import org.apache.spark.sql.types.StructField;
import org.apache.spark.sql.types.StructType;
import scala.Array$;
import scala.MatchError;
import scala.Predef$;
import scala.StringContext;
import scala.Tuple2;
import scala.Tuple3;
import scala.collection.Seq;
import scala.collection.Seq$;
import scala.collection.SeqLike;
import scala.collection.TraversableLike;
import scala.collection.TraversableOnce;
import scala.collection.generic.GenericTraversableTemplate;
import scala.collection.immutable.Map;
import scala.collection.immutable.Nil$;
import scala.collection.mutable.Set$;
import scala.math.Ordering$String$;
import scala.reflect.ClassTag$;
import scala.reflect.api.Mirror;
import scala.reflect.api.TypeCreator;
import scala.reflect.api.Types;
import scala.reflect.api.Universe;
import scala.reflect.runtime.package$;
import scala.runtime.BoxesRunTime;

/* compiled from: DqFramework.scala */
/* loaded from: input_file:io/stoys/spark/dq/DqFramework$.class */
public final class DqFramework$ {
    public static final DqFramework$ MODULE$ = null;
    private final String MISSING_TOKEN;
    private final String NULL_TOKEN;

    static {
        new DqFramework$();
    }

    public String MISSING_TOKEN() {
        return this.MISSING_TOKEN;
    }

    public String NULL_TOKEN() {
        return this.NULL_TOKEN;
    }

    public DqFramework.ColumnNamesInfo io$stoys$spark$dq$DqFramework$$getColumnNamesInfo(Seq<String> seq, Seq<String> seq2) {
        Tuple3 unzip3 = ((GenericTraversableTemplate) seq2.map(new DqFramework$$anonfun$2(seq, ((TraversableOnce) ((TraversableLike) seq.zipWithIndex(Seq$.MODULE$.canBuildFrom())).map(new DqFramework$$anonfun$1(), Seq$.MODULE$.canBuildFrom())).toMap(Predef$.MODULE$.$conforms()), Set$.MODULE$.empty()), Seq$.MODULE$.canBuildFrom())).unzip3(Predef$.MODULE$.$conforms());
        if (unzip3 == null) {
            throw new MatchError(unzip3);
        }
        Tuple3 tuple3 = new Tuple3((Seq) unzip3._1(), (Seq) unzip3._2(), (Seq) unzip3._3());
        Seq seq3 = (Seq) tuple3._1();
        Seq seq4 = (Seq) tuple3._2();
        return new DqFramework.ColumnNamesInfo(((Seq) seq4.$plus$plus(seq3, Seq$.MODULE$.canBuildFrom())).flatten(new DqFramework$$anonfun$io$stoys$spark$dq$DqFramework$$getColumnNamesInfo$1()), seq4.flatten(new DqFramework$$anonfun$io$stoys$spark$dq$DqFramework$$getColumnNamesInfo$2()), ((Seq) tuple3._3()).flatten(new DqFramework$$anonfun$io$stoys$spark$dq$DqFramework$$getColumnNamesInfo$3()), seq3.flatten(new DqFramework$$anonfun$io$stoys$spark$dq$DqFramework$$getColumnNamesInfo$4()));
    }

    public Seq<DqFramework.RuleInfo> getRuleInfo(SparkSession sparkSession, Seq<String> seq, Seq<DqRule> seq2) {
        return (Seq) seq2.map(new DqFramework$$anonfun$getRuleInfo$1(sparkSession, seq), Seq$.MODULE$.canBuildFrom());
    }

    public boolean checkWideDqColumnsSanity(StructType structType, int i) {
        StructField[] structFieldArr = (StructField[]) Predef$.MODULE$.refArrayOps((StructField[]) Predef$.MODULE$.refArrayOps(structType.fields()).takeRight(i)).filter(new DqFramework$$anonfun$5());
        if (Predef$.MODULE$.refArrayOps(structFieldArr).nonEmpty()) {
            throw new SToysException(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"Dq rules have to return boolean values! Not true for: ", "."})).s(Predef$.MODULE$.genericWrapArray(new Object[]{Predef$.MODULE$.refArrayOps((Object[]) Predef$.MODULE$.refArrayOps(structFieldArr).map(new DqFramework$$anonfun$6(), Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(String.class)))).mkString(", ")})), SToysException$.MODULE$.$lessinit$greater$default$2());
        }
        Map map = (Map) Predef$.MODULE$.refArrayOps((Object[]) Predef$.MODULE$.refArrayOps(structType.fields()).map(new DqFramework$$anonfun$7(), Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(String.class)))).groupBy(new DqFramework$$anonfun$8()).filter(new DqFramework$$anonfun$9());
        if (!map.nonEmpty()) {
            return true;
        }
        throw new SToysException(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"Dq rules and fields have to have unique names! Not true for: ", "."})).s(Predef$.MODULE$.genericWrapArray(new Object[]{((TraversableOnce) ((SeqLike) map.toSeq().map(new DqFramework$$anonfun$10(), Seq$.MODULE$.canBuildFrom())).sorted(Ordering$String$.MODULE$)).mkString(", ")})), SToysException$.MODULE$.$lessinit$greater$default$2());
    }

    public <T> DqFramework.WideDqDfInfo computeWideDqDfInfo(Dataset<T> dataset, Seq<DqRule> seq, Seq<DqRule> seq2) {
        Seq<String> seq3 = (Seq) Predef$.MODULE$.refArrayOps(dataset.columns()).toSeq().dropRight(seq.size());
        Seq<DqFramework.RuleInfo> ruleInfo = getRuleInfo(dataset.sparkSession(), seq3, seq);
        Seq<DqFramework.RuleInfo> ruleInfo2 = getRuleInfo(dataset.sparkSession(), seq3, seq2);
        Seq seq4 = (Seq) ruleInfo.$plus$plus(ruleInfo2, Seq$.MODULE$.canBuildFrom());
        Dataset<Row> df = ruleInfo2.isEmpty() ? dataset.toDF() : computeWideDqDf(dataset, ruleInfo2);
        checkWideDqColumnsSanity(df.schema(), seq4.size());
        return new DqFramework.WideDqDfInfo(df, seq3, seq4);
    }

    private <T> Dataset<Row> computeWideDqDf(Dataset<T> dataset, Seq<DqFramework.RuleInfo> seq) {
        return dataset.select((Seq) ((Seq) seq.map(new DqFramework$$anonfun$11(), Seq$.MODULE$.canBuildFrom())).$plus$colon(functions$.MODULE$.col("*"), Seq$.MODULE$.canBuildFrom()));
    }

    public Dataset<DqResult> computeDqResult(Dataset<Row> dataset, Seq<String> seq, Seq<DqFramework.RuleInfo> seq2, DqConfig dqConfig, Map<String, String> map) {
        Dataset select = dataset.select(Predef$.MODULE$.wrapRefArray(new Column[]{functions$.MODULE$.array((Seq) seq.map(new DqFramework$$anonfun$14(), Seq$.MODULE$.canBuildFrom())).as("rowSample"), functions$.MODULE$.monotonically_increasing_id().as("rowId"), functions$.MODULE$.array((Seq) seq2.map(new DqFramework$$anonfun$12(), Seq$.MODULE$.canBuildFrom())).as("ruleHashes")}));
        DqAggregator dqAggregator = new DqAggregator(seq.size(), (Seq) seq2.map(new DqFramework$$anonfun$15(), Seq$.MODULE$.canBuildFrom()), dqConfig);
        return select.as(dataset.sparkSession().implicits().newProductEncoder(package$.MODULE$.universe().TypeTag().apply(package$.MODULE$.universe().runtimeMirror(getClass().getClassLoader()), new TypeCreator() { // from class: io.stoys.spark.dq.DqFramework$$typecreator4$1
            public <U extends Universe> Types.TypeApi apply(Mirror<U> mirror) {
                Universe universe = mirror.universe();
                return universe.internal().reificationSupport().TypeRef(universe.internal().reificationSupport().SingleType(universe.internal().reificationSupport().ThisType(mirror.staticPackage("io.stoys.spark.dq").asModule().moduleClass()), mirror.staticModule("io.stoys.spark.dq.DqAggregator")), mirror.staticClass("io.stoys.spark.dq.DqAggregator.DqAggInputRow"), Nil$.MODULE$);
            }
        }))).select(dqAggregator.toColumn()).map(new DqFramework$$anonfun$computeDqResult$1(seq, seq2, map), dataset.sparkSession().implicits().newProductEncoder(package$.MODULE$.universe().TypeTag().apply(package$.MODULE$.universe().runtimeMirror(getClass().getClassLoader()), new TypeCreator() { // from class: io.stoys.spark.dq.DqFramework$$typecreator8$1
            public <U extends Universe> Types.TypeApi apply(Mirror<U> mirror) {
                mirror.universe();
                return mirror.staticClass("io.stoys.spark.dq.DqResult").asType().toTypeConstructor();
            }
        })));
    }

    public Column io$stoys$spark$dq$DqFramework$$createSafeValuesArrayExpr(DqFramework.ColumnNamesInfo columnNamesInfo) {
        return functions$.MODULE$.array((Seq) ((Seq) columnNamesInfo.existing().map(new DqFramework$$anonfun$20(), Seq$.MODULE$.canBuildFrom())).$plus$plus((Seq) columnNamesInfo.missing().map(new DqFramework$$anonfun$21(), Seq$.MODULE$.canBuildFrom()), Seq$.MODULE$.canBuildFrom()));
    }

    public Dataset<DqViolationPerRow> computeDqViolationPerRow(DqFramework.WideDqDfInfo wideDqDfInfo, Seq<String> seq) {
        Column io$stoys$spark$dq$DqFramework$$createSafeValuesArrayExpr = io$stoys$spark$dq$DqFramework$$createSafeValuesArrayExpr(io$stoys$spark$dq$DqFramework$$getColumnNamesInfo(wideDqDfInfo.columnNames(), seq));
        Seq seq2 = (Seq) wideDqDfInfo.ruleInfo().map(new DqFramework$$anonfun$22(), Seq$.MODULE$.canBuildFrom());
        return wideDqDfInfo.wideDqDf().select(Predef$.MODULE$.wrapRefArray(new Column[]{io$stoys$spark$dq$DqFramework$$createSafeValuesArrayExpr.as("primary_key"), stack((Seq) seq2.flatten(Predef$.MODULE$.$conforms()).$plus$colon(functions$.MODULE$.lit(BoxesRunTime.boxToInteger(seq2.size())), Seq$.MODULE$.canBuildFrom())).as(Seq$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new String[]{"result", "column_names", "values", "rule_name", "rule_expression"})))})).where(functions$.MODULE$.not(functions$.MODULE$.col("result"))).drop("result").as(wideDqDfInfo.wideDqDf().sparkSession().implicits().newProductEncoder(package$.MODULE$.universe().TypeTag().apply(package$.MODULE$.universe().runtimeMirror(getClass().getClassLoader()), new TypeCreator() { // from class: io.stoys.spark.dq.DqFramework$$typecreator12$1
            public <U extends Universe> Types.TypeApi apply(Mirror<U> mirror) {
                mirror.universe();
                return mirror.staticClass("io.stoys.spark.dq.DqViolationPerRow").asType().toTypeConstructor();
            }
        })));
    }

    public Dataset<Row> selectFailingRows(Dataset<Row> dataset, int i) {
        if (i <= 0) {
            return dataset.limit(0);
        }
        String[] columns = dataset.columns();
        Tuple2 splitAt = Predef$.MODULE$.refArrayOps(columns).splitAt(columns.length - i);
        if (splitAt == null) {
            throw new MatchError(splitAt);
        }
        Tuple2 tuple2 = new Tuple2((String[]) splitAt._1(), (String[]) splitAt._2());
        String[] strArr = (String[]) tuple2._1();
        return dataset.where(functions$.MODULE$.not((Column) Predef$.MODULE$.refArrayOps((Object[]) Predef$.MODULE$.refArrayOps((String[]) tuple2._2()).map(new DqFramework$$anonfun$selectFailingRows$1(), Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(Column.class)))).reduce(new DqFramework$$anonfun$selectFailingRows$2()))).select(Predef$.MODULE$.wrapRefArray((Object[]) Predef$.MODULE$.refArrayOps(strArr).map(new DqFramework$$anonfun$selectFailingRows$3(), Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(Column.class)))));
    }

    private Column stack(Seq<Column> seq) {
        return new Column(new Stack((Seq) seq.map(new DqFramework$$anonfun$stack$1(), Seq$.MODULE$.canBuildFrom())));
    }

    private DqFramework$() {
        MODULE$ = this;
        this.MISSING_TOKEN = "__MISSING__";
        this.NULL_TOKEN = "__NULL__";
    }
}
