package ai.tripl.arc.extract;

import ai.tripl.arc.api.API;
import ai.tripl.arc.util.CloudUtils$;
import ai.tripl.arc.util.ExtractUtils$;
import ai.tripl.arc.util.MetadataUtils$;
import ai.tripl.arc.util.log.logger.Logger;
import java.io.FileNotFoundException;
import java.util.HashMap;
import org.apache.avro.Schema;
import org.apache.spark.sql.AnalysisException;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.avro.package$;
import org.apache.spark.sql.functions$;
import org.apache.spark.sql.types.StructType;
import org.apache.spark.storage.StorageLevel$;
import scala.MatchError;
import scala.None$;
import scala.Option;
import scala.Option$;
import scala.Predef$;
import scala.Some;
import scala.StringContext;
import scala.collection.immutable.List;
import scala.collection.immutable.List$;
import scala.collection.immutable.Nil$;
import scala.reflect.ClassTag$;
import scala.runtime.BoxedUnit;
import scala.runtime.BoxesRunTime;
import scala.util.Left;
import scala.util.Right;

/* compiled from: AvroExtract.scala */
/* loaded from: input_file:ai/tripl/arc/extract/AvroExtract$.class */
public final class AvroExtract$ {
    public static final AvroExtract$ MODULE$ = null;

    static {
        new AvroExtract$();
    }

    public Option<Dataset<Row>> extract(API.AvroExtract avroExtract, SparkSession sparkSession, Logger logger) {
        Dataset<Row> emptyDataFrame;
        Dataset<Row> dataset;
        Dataset<Row> dataset2;
        Dataset<Row> dataset3;
        Dataset<Row> repartition;
        Dataset<Row> dataset4;
        Dataset<Row> dataset5;
        Dataset<Row> withColumn;
        Dataset<Row> dataset6;
        Dataset<Row> load;
        long currentTimeMillis = System.currentTimeMillis();
        HashMap hashMap = new HashMap();
        hashMap.put("type", avroExtract.getType());
        hashMap.put("name", avroExtract.name());
        avroExtract.description().foreach(new AvroExtract$$anonfun$extract$1(hashMap));
        hashMap.put("input", avroExtract.input());
        hashMap.put("outputView", avroExtract.outputView());
        hashMap.put("persist", Boolean.valueOf(avroExtract.persist()));
        hashMap.put("contiguousIndex", Boolean.valueOf(avroExtract.contiguousIndex()));
        logger.info().field("event", "enter").map("stage", hashMap).log();
        try {
            Some schema = ExtractUtils$.MODULE$.getSchema(avroExtract.cols(), sparkSession, logger);
            CloudUtils$.MODULE$.setHadoopConfiguration(avroExtract.authentication(), sparkSession, logger);
            try {
                Right input = avroExtract.input();
                if (input instanceof Right) {
                    String str = (String) input.b();
                    Some basePath = avroExtract.basePath();
                    if (basePath instanceof Some) {
                        load = sparkSession.read().format("avro").option("basePath", (String) basePath.x()).load(str);
                    } else {
                        if (!None$.MODULE$.equals(basePath)) {
                            throw new MatchError(basePath);
                        }
                        load = sparkSession.read().format("avro").load(str);
                    }
                    dataset6 = load;
                } else {
                    if (!(input instanceof Left)) {
                        throw new MatchError(input);
                    }
                    Dataset table = sparkSession.table((String) ((Left) input).a());
                    Some avroSchema = avroExtract.avroSchema();
                    if (!(avroSchema instanceof Some)) {
                        if (None$.MODULE$.equals(avroSchema)) {
                            throw new Exception(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"AvroExtract requires the 'avroSchema' to be provided when reading from an 'inputView'."})).s(Nil$.MODULE$));
                        }
                        throw new MatchError(avroSchema);
                    }
                    Schema schema2 = (Schema) avroSchema.x();
                    Some inputField = avroExtract.inputField();
                    if (inputField instanceof Some) {
                        String str2 = (String) inputField.x();
                        withColumn = table.withColumn(str2, package$.MODULE$.from_avro(functions$.MODULE$.col(str2), schema2.toString()));
                    } else {
                        if (!None$.MODULE$.equals(inputField)) {
                            throw new MatchError(inputField);
                        }
                        withColumn = table.withColumn("value", package$.MODULE$.from_avro(functions$.MODULE$.col("value"), schema2.toString()));
                    }
                    dataset6 = withColumn;
                }
                dataset = dataset6;
            } catch (Throwable th) {
                if (th instanceof FileNotFoundException) {
                    emptyDataFrame = sparkSession.emptyDataFrame();
                } else {
                    if (!(th instanceof AnalysisException) || !th.getMessage().contains("Path does not exist")) {
                        if (th instanceof Exception) {
                            throw new AvroExtract$$anon$2(hashMap, (Exception) th);
                        }
                        throw th;
                    }
                    emptyDataFrame = sparkSession.emptyDataFrame();
                }
                dataset = emptyDataFrame;
            }
            Dataset<Row> dataset7 = dataset;
            try {
                if (dataset7.schema().length() == 0) {
                    hashMap.put("records", 0);
                    if (!(schema instanceof Some)) {
                        if (None$.MODULE$.equals(schema)) {
                            throw new Exception(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"AvroExtract has produced 0 columns and no schema has been provided to create an empty dataframe."})).s(Nil$.MODULE$));
                        }
                        throw new MatchError(schema);
                    }
                    dataset2 = sparkSession.createDataFrame(sparkSession.sparkContext().emptyRDD(ClassTag$.MODULE$.apply(Row.class)), (StructType) schema.x());
                } else {
                    dataset2 = dataset7;
                }
                Dataset<Row> addInternalColumns = ExtractUtils$.MODULE$.addInternalColumns(dataset2, avroExtract.contiguousIndex());
                if (schema instanceof Some) {
                    dataset3 = MetadataUtils$.MODULE$.setMetadata(addInternalColumns, (StructType) schema.x());
                } else {
                    if (!None$.MODULE$.equals(schema)) {
                        throw new MatchError(schema);
                    }
                    dataset3 = addInternalColumns;
                }
                Dataset<Row> dataset8 = dataset3;
                List<String> partitionBy = avroExtract.partitionBy();
                if (Nil$.MODULE$.equals(partitionBy)) {
                    Some numPartitions = avroExtract.numPartitions();
                    if (numPartitions instanceof Some) {
                        dataset5 = dataset8.repartition(BoxesRunTime.unboxToInt(numPartitions.x()));
                    } else {
                        if (!None$.MODULE$.equals(numPartitions)) {
                            throw new MatchError(numPartitions);
                        }
                        dataset5 = dataset8;
                    }
                    dataset4 = dataset5;
                } else {
                    List list = (List) partitionBy.map(new AvroExtract$$anonfun$1(dataset7), List$.MODULE$.canBuildFrom());
                    Some numPartitions2 = avroExtract.numPartitions();
                    if (numPartitions2 instanceof Some) {
                        repartition = dataset8.repartition(BoxesRunTime.unboxToInt(numPartitions2.x()), list);
                    } else {
                        if (!None$.MODULE$.equals(numPartitions2)) {
                            throw new MatchError(numPartitions2);
                        }
                        repartition = dataset8.repartition(list);
                    }
                    dataset4 = repartition;
                }
                Dataset<Row> dataset9 = dataset4;
                dataset9.createOrReplaceTempView(avroExtract.outputView());
                if (dataset9.isStreaming()) {
                    BoxedUnit boxedUnit = BoxedUnit.UNIT;
                } else {
                    hashMap.put("inputFiles", Integer.valueOf(dataset9.inputFiles().length));
                    hashMap.put("outputColumns", Integer.valueOf(dataset9.schema().length()));
                    hashMap.put("numPartitions", Integer.valueOf(dataset9.rdd().partitions().length));
                    if (avroExtract.persist()) {
                        dataset9.persist(StorageLevel$.MODULE$.MEMORY_AND_DISK_SER());
                        hashMap.put("records", Long.valueOf(dataset9.count()));
                    } else {
                        BoxedUnit boxedUnit2 = BoxedUnit.UNIT;
                    }
                }
                logger.info().field("event", "exit").field("duration", BoxesRunTime.boxToLong(System.currentTimeMillis() - currentTimeMillis)).map("stage", hashMap).log();
                return Option$.MODULE$.apply(dataset9);
            } catch (Exception e) {
                throw new AvroExtract$$anon$3(hashMap, e);
            }
        } catch (Exception e2) {
            throw new AvroExtract$$anon$1(hashMap, e2);
        }
    }

    private AvroExtract$() {
        MODULE$ = this;
    }
}
