package datahub.spark;

import com.google.common.collect.ImmutableSet;
import com.linkedin.common.FabricType;
import datahub.shaded.org.slf4j.Logger;
import datahub.shaded.org.slf4j.LoggerFactory;
import datahub.spark.model.LineageUtils;
import datahub.spark.model.dataset.CatalogTableDataset;
import datahub.spark.model.dataset.HdfsPathDataset;
import datahub.spark.model.dataset.JdbcDataset;
import datahub.spark.model.dataset.SparkDataset;
import datahub.spark2.shaded.typesafe.config.Config;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;
import lombok.Generated;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.spark.SparkContext;
import org.apache.spark.sql.catalyst.catalog.CatalogTable;
import org.apache.spark.sql.catalyst.catalog.HiveTableRelation;
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan;
import org.apache.spark.sql.execution.FileSourceScanExec;
import org.apache.spark.sql.execution.RowDataSourceScanExec;
import org.apache.spark.sql.execution.SparkPlan;
import org.apache.spark.sql.execution.columnar.InMemoryRelation;
import org.apache.spark.sql.execution.columnar.InMemoryTableScanExec;
import org.apache.spark.sql.execution.command.CreateDataSourceTableAsSelectCommand;
import org.apache.spark.sql.execution.datasources.HadoopFsRelation;
import org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand;
import org.apache.spark.sql.execution.datasources.LogicalRelation;
import org.apache.spark.sql.execution.datasources.SaveIntoDataSourceCommand;
import org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions;
import org.apache.spark.sql.execution.datasources.jdbc.JDBCRelation;
import org.apache.spark.sql.hive.execution.CreateHiveTableAsSelectCommand;
import org.apache.spark.sql.hive.execution.HiveTableScanExec;
import org.apache.spark.sql.hive.execution.InsertIntoHiveTable;
import org.apache.spark.sql.sources.BaseRelation;
import scala.Option;
import scala.collection.JavaConversions;
import scala.runtime.AbstractFunction1;

/* loaded from: input_file:datahub/spark/DatasetExtractor.class */
public class DatasetExtractor {

    @Generated
    private static final Logger log = LoggerFactory.getLogger((Class<?>) DatasetExtractor.class);
    private static final Map<Class<? extends LogicalPlan>, PlanToDataset> PLAN_TO_DATASET = new HashMap();
    private static final Map<Class<? extends SparkPlan>, SparkPlanToDataset> SPARKPLAN_TO_DATASET = new HashMap();
    private static final Map<Class<? extends BaseRelation>, RelationToDataset> REL_TO_DATASET = new HashMap();
    private static final Set<Class<? extends LogicalPlan>> OUTPUT_CMD = ImmutableSet.of(InsertIntoHadoopFsRelationCommand.class, SaveIntoDataSourceCommand.class, CreateDataSourceTableAsSelectCommand.class, CreateHiveTableAsSelectCommand.class, InsertIntoHiveTable.class);
    private static final String DATASET_ENV_KEY = "metadata.dataset.env";
    private static final String DATASET_PLATFORM_INSTANCE_KEY = "metadata.dataset.platformInstance";
    private static final String TABLE_HIVE_PLATFORM_ALIAS = "metadata.table.hive_platform_alias";
    private static final String INCLUDE_SCHEME_KEY = "metadata.include_scheme";

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:datahub/spark/DatasetExtractor$PlanToDataset.class */
    public interface PlanToDataset {
        Optional<? extends Collection<SparkDataset>> fromPlanNode(LogicalPlan logicalPlan, SparkContext sparkContext, Config config);
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:datahub/spark/DatasetExtractor$RelationToDataset.class */
    public interface RelationToDataset {
        Optional<? extends Collection<SparkDataset>> fromRelation(BaseRelation baseRelation, SparkContext sparkContext, Config config);
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:datahub/spark/DatasetExtractor$SparkPlanToDataset.class */
    public interface SparkPlanToDataset {
        Optional<? extends Collection<SparkDataset>> fromSparkPlanNode(SparkPlan sparkPlan, SparkContext sparkContext, Config config);
    }

    private DatasetExtractor() {
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public static Optional<? extends Collection<SparkDataset>> asDataset(LogicalPlan logicalPlan, SparkContext sparkContext, boolean z) {
        if (!z && OUTPUT_CMD.contains(logicalPlan.getClass())) {
            return Optional.empty();
        }
        if (PLAN_TO_DATASET.containsKey(logicalPlan.getClass())) {
            return PLAN_TO_DATASET.get(logicalPlan.getClass()).fromPlanNode(logicalPlan, sparkContext, LineageUtils.parseSparkConfig());
        }
        log.error(logicalPlan.getClass() + " is not supported yet. Please contact datahub team for further support. ");
        return Optional.empty();
    }

    /* JADX INFO: Access modifiers changed from: private */
    public static Path getDirectoryPath(Path path, Configuration configuration) {
        try {
            return path.getFileSystem(configuration).getFileStatus(path).isFile() ? path.getParent() : path;
        } catch (IOException e) {
            return path;
        }
    }

    private static FabricType getCommonFabricType(Config config) {
        FabricType fabricType;
        String upperCase = config.hasPath(DATASET_ENV_KEY) ? config.getString(DATASET_ENV_KEY).toUpperCase() : "PROD";
        try {
            fabricType = FabricType.valueOf(upperCase);
        } catch (IllegalArgumentException e) {
            log.warn("Invalid env ({}). Setting env to default PROD", upperCase);
            fabricType = FabricType.PROD;
        }
        return fabricType;
    }

    private static String getCommonPlatformInstance(Config config) {
        if (config.hasPath(DATASET_PLATFORM_INSTANCE_KEY)) {
            return config.getString(DATASET_PLATFORM_INSTANCE_KEY);
        }
        return null;
    }

    private static String getTableHivePlatformAlias(Config config) {
        return config.hasPath(TABLE_HIVE_PLATFORM_ALIAS) ? config.getString(TABLE_HIVE_PLATFORM_ALIAS) : "hive";
    }

    private static boolean getIncludeScheme(Config config) {
        if (config.hasPath(INCLUDE_SCHEME_KEY)) {
            return config.getBoolean(INCLUDE_SCHEME_KEY);
        }
        return true;
    }

    static {
        SPARKPLAN_TO_DATASET.put(FileSourceScanExec.class, (sparkPlan, sparkContext, config) -> {
            BaseRelation relation = ((FileSourceScanExec) sparkPlan).relation();
            return !REL_TO_DATASET.containsKey(relation.getClass()) ? Optional.empty() : REL_TO_DATASET.get(relation.getClass()).fromRelation(relation, sparkContext, config);
        });
        SPARKPLAN_TO_DATASET.put(HiveTableScanExec.class, (sparkPlan2, sparkContext2, config2) -> {
            LogicalPlan relation = ((HiveTableScanExec) sparkPlan2).relation();
            return !PLAN_TO_DATASET.containsKey(relation.getClass()) ? Optional.empty() : PLAN_TO_DATASET.get(relation.getClass()).fromPlanNode(relation, sparkContext2, config2);
        });
        SPARKPLAN_TO_DATASET.put(RowDataSourceScanExec.class, (sparkPlan3, sparkContext3, config3) -> {
            BaseRelation relation = ((RowDataSourceScanExec) sparkPlan3).relation();
            return !REL_TO_DATASET.containsKey(relation.getClass()) ? Optional.empty() : REL_TO_DATASET.get(relation.getClass()).fromRelation(relation, sparkContext3, config3);
        });
        SPARKPLAN_TO_DATASET.put(InMemoryTableScanExec.class, (sparkPlan4, sparkContext4, config4) -> {
            LogicalPlan relation = ((InMemoryTableScanExec) sparkPlan4).relation();
            return !PLAN_TO_DATASET.containsKey(relation.getClass()) ? Optional.empty() : PLAN_TO_DATASET.get(relation.getClass()).fromPlanNode(relation, sparkContext4, config4);
        });
        PLAN_TO_DATASET.put(InsertIntoHadoopFsRelationCommand.class, (logicalPlan, sparkContext5, config5) -> {
            InsertIntoHadoopFsRelationCommand insertIntoHadoopFsRelationCommand = (InsertIntoHadoopFsRelationCommand) logicalPlan;
            return insertIntoHadoopFsRelationCommand.catalogTable().isDefined() ? Optional.of(Collections.singletonList(new CatalogTableDataset((CatalogTable) insertIntoHadoopFsRelationCommand.catalogTable().get(), getCommonPlatformInstance(config5), getTableHivePlatformAlias(config5), getCommonFabricType(config5)))) : Optional.of(Collections.singletonList(new HdfsPathDataset(insertIntoHadoopFsRelationCommand.outputPath(), getCommonPlatformInstance(config5), getIncludeScheme(config5), getCommonFabricType(config5))));
        });
        PLAN_TO_DATASET.put(LogicalRelation.class, (logicalPlan2, sparkContext6, config6) -> {
            BaseRelation relation = ((LogicalRelation) logicalPlan2).relation();
            return !REL_TO_DATASET.containsKey(relation.getClass()) ? Optional.empty() : REL_TO_DATASET.get(relation.getClass()).fromRelation(relation, sparkContext6, config6);
        });
        PLAN_TO_DATASET.put(SaveIntoDataSourceCommand.class, (logicalPlan3, sparkContext7, config7) -> {
            Map mapAsJavaMap = JavaConversions.mapAsJavaMap(((SaveIntoDataSourceCommand) logicalPlan3).options());
            String str = (String) mapAsJavaMap.getOrDefault("url", "");
            return str.contains("jdbc") ? Optional.of(Collections.singletonList(new JdbcDataset(str, (String) mapAsJavaMap.get("dbtable"), getCommonPlatformInstance(config7), getCommonFabricType(config7)))) : mapAsJavaMap.containsKey("path") ? Optional.of(Collections.singletonList(new HdfsPathDataset(new Path((String) mapAsJavaMap.get("path")), getCommonPlatformInstance(config7), getIncludeScheme(config7), getCommonFabricType(config7)))) : Optional.empty();
        });
        PLAN_TO_DATASET.put(CreateDataSourceTableAsSelectCommand.class, (logicalPlan4, sparkContext8, config8) -> {
            return Optional.of(Collections.singletonList(new CatalogTableDataset(((CreateDataSourceTableAsSelectCommand) logicalPlan4).table(), getCommonPlatformInstance(config8), getTableHivePlatformAlias(config8), getCommonFabricType(config8))));
        });
        PLAN_TO_DATASET.put(CreateHiveTableAsSelectCommand.class, (logicalPlan5, sparkContext9, config9) -> {
            return Optional.of(Collections.singletonList(new CatalogTableDataset(((CreateHiveTableAsSelectCommand) logicalPlan5).tableDesc(), getCommonPlatformInstance(config9), getTableHivePlatformAlias(config9), getCommonFabricType(config9))));
        });
        PLAN_TO_DATASET.put(InsertIntoHiveTable.class, (logicalPlan6, sparkContext10, config10) -> {
            return Optional.of(Collections.singletonList(new CatalogTableDataset(((InsertIntoHiveTable) logicalPlan6).table(), getCommonPlatformInstance(config10), getTableHivePlatformAlias(config10), getCommonFabricType(config10))));
        });
        PLAN_TO_DATASET.put(HiveTableRelation.class, (logicalPlan7, sparkContext11, config11) -> {
            return Optional.of(Collections.singletonList(new CatalogTableDataset(((HiveTableRelation) logicalPlan7).tableMeta(), getCommonPlatformInstance(config11), getTableHivePlatformAlias(config11), getCommonFabricType(config11))));
        });
        REL_TO_DATASET.put(HadoopFsRelation.class, (baseRelation, sparkContext12, config12) -> {
            return Optional.of(Collections.singletonList(new HdfsPathDataset((Path) ((List) JavaConversions.asJavaCollection(((HadoopFsRelation) baseRelation).location().rootPaths()).stream().map(path -> {
                return getDirectoryPath(path, sparkContext12.hadoopConfiguration());
            }).distinct().collect(Collectors.toList())).get(0), getCommonPlatformInstance(config12), getIncludeScheme(config12), getCommonFabricType(config12))));
        });
        REL_TO_DATASET.put(JDBCRelation.class, (baseRelation2, sparkContext13, config13) -> {
            JDBCRelation jDBCRelation = (JDBCRelation) baseRelation2;
            Option option = jDBCRelation.jdbcOptions().parameters().get(JDBCOptions.JDBC_TABLE_NAME());
            return option.isEmpty() ? Optional.empty() : Optional.of(Collections.singletonList(new JdbcDataset(jDBCRelation.jdbcOptions().url(), (String) option.get(), getCommonPlatformInstance(config13), getCommonFabricType(config13))));
        });
        PLAN_TO_DATASET.put(InMemoryRelation.class, (logicalPlan8, sparkContext14, config14) -> {
            SparkPlan cachedPlan = ((InMemoryRelation) logicalPlan8).cachedPlan();
            final ArrayList arrayList = new ArrayList();
            cachedPlan.collectLeaves().toList().foreach(new AbstractFunction1<SparkPlan, Void>() { // from class: datahub.spark.DatasetExtractor.1
                public Void apply(SparkPlan sparkPlan5) {
                    if (!DatasetExtractor.SPARKPLAN_TO_DATASET.containsKey(sparkPlan5.getClass())) {
                        DatasetExtractor.log.error(sparkPlan5.getClass() + " is not yet supported. Please contact datahub team for further support.");
                        return null;
                    }
                    Optional<? extends Collection<SparkDataset>> fromSparkPlanNode = ((SparkPlanToDataset) DatasetExtractor.SPARKPLAN_TO_DATASET.get(sparkPlan5.getClass())).fromSparkPlanNode(sparkPlan5, sparkContext14, config14);
                    ArrayList arrayList2 = arrayList;
                    fromSparkPlanNode.ifPresent(collection -> {
                        arrayList2.addAll(collection);
                    });
                    return null;
                }
            });
            return arrayList.isEmpty() ? Optional.empty() : Optional.of(arrayList);
        });
    }
}
