package io.openlineage.spark.agent.lifecycle;

import datahub.spark2.shaded.org.slf4j.Logger;
import datahub.spark2.shaded.org.slf4j.LoggerFactory;
import io.openlineage.client.OpenLineage;
import io.openlineage.spark.agent.lifecycle.DatasetParser;
import io.openlineage.spark.api.AbstractInputDatasetBuilder;
import io.openlineage.spark.api.OpenLineageContext;
import java.io.IOException;
import java.net.URI;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.stream.Collectors;
import lombok.NonNull;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapreduce.Job;
import org.apache.spark.rdd.HadoopRDD;
import org.apache.spark.rdd.NewHadoopRDD;
import org.apache.spark.rdd.RDD;

/* loaded from: input_file:io/openlineage/spark/agent/lifecycle/HadoopRDDInputDatasetBuilder.class */
public class HadoopRDDInputDatasetBuilder extends AbstractInputDatasetBuilder<RDD<?>> {
    private static final Logger log = LoggerFactory.getLogger((Class<?>) HadoopRDDInputDatasetBuilder.class);

    public HadoopRDDInputDatasetBuilder(@NonNull OpenLineageContext openLineageContext) {
        super(openLineageContext);
        if (openLineageContext == null) {
            throw new NullPointerException("context is marked non-null but is null");
        }
    }

    @Override // io.openlineage.spark.api.AbstractInputDatasetBuilder, io.openlineage.spark.api.AbstractGenericArgPartialFunction, io.openlineage.spark.api.AbstractPartial
    public boolean isDefinedAt(RDD<?> rdd) {
        return (rdd instanceof HadoopRDD) || (rdd instanceof NewHadoopRDD);
    }

    public Collection<OpenLineage.InputDataset> apply(RDD<?> rdd) {
        return (Collection) findInputs(rdd).stream().map(this::buildInputDataset).collect(Collectors.toList());
    }

    protected OpenLineage.InputDataset buildInputDataset(URI uri) {
        DatasetParser.DatasetParseResult parse = DatasetParser.parse(uri);
        return new OpenLineage.InputDatasetBuilder().name(parse.getName()).namespace(parse.getNamespace()).build();
    }

    protected List<URI> findInputs(RDD<?> rdd) {
        ArrayList arrayList = new ArrayList();
        Path[] inputPaths = getInputPaths(rdd);
        if (inputPaths != null) {
            for (Path path : inputPaths) {
                arrayList.add(getDatasetUri(path.toUri()));
            }
        }
        return arrayList;
    }

    protected Path[] getInputPaths(RDD<?> rdd) {
        Path[] pathArr = null;
        if (rdd instanceof HadoopRDD) {
            pathArr = FileInputFormat.getInputPaths(((HadoopRDD) rdd).getJobConf());
        } else if (rdd instanceof NewHadoopRDD) {
            try {
                pathArr = org.apache.hadoop.mapreduce.lib.input.FileInputFormat.getInputPaths(new Job(((NewHadoopRDD) rdd).getConf()));
            } catch (IOException e) {
                log.error("Openlineage spark agent could not get input paths", (Throwable) e);
            }
        }
        return pathArr;
    }

    protected URI getDatasetUri(URI uri) {
        return uri;
    }
}
