package org.seqdoop.hadoop_bam;

import htsjdk.samtools.Chunk;
import htsjdk.samtools.DiskBasedBAMFileIndex;
import htsjdk.samtools.SAMSequenceDictionary;
import htsjdk.samtools.SamFiles;
import htsjdk.samtools.util.BinaryCodec;
import htsjdk.samtools.util.Interval;
import htsjdk.samtools.util.Locatable;
import htsjdk.variant.vcf.VCFConstants;
import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.seqdoop.hadoop_bam.util.SAMHeaderReader;
import org.seqdoop.hadoop_bam.util.WrapSeekable;

/* loaded from: input_file:org/seqdoop/hadoop_bam/BAMInputFormat.class */
public class BAMInputFormat extends FileInputFormat<LongWritable, SAMRecordWritable> {
    public static final boolean DEBUG_BAM_SPLITTER = false;
    public static final String KEEP_PAIRED_READS_TOGETHER_PROPERTY = "hadoopbam.bam.keep-paired-reads-together";
    public static final String INTERVALS_PROPERTY = "hadoopbam.bam.intervals";

    public static <T extends Locatable> void setIntervals(Configuration configuration, List<T> list) {
        StringBuilder sb = new StringBuilder();
        Iterator<T> it2 = list.iterator();
        while (it2.hasNext()) {
            T next = it2.next();
            sb.append(String.format("%s:%d-%d", next.getContig(), Integer.valueOf(next.getStart()), Integer.valueOf(next.getEnd())));
            if (it2.hasNext()) {
                sb.append(VCFConstants.INFO_FIELD_ARRAY_SEPARATOR);
            }
        }
        configuration.set(INTERVALS_PROPERTY, sb.toString());
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public static List<Interval> getIntervals(Configuration configuration) {
        String str = configuration.get(INTERVALS_PROPERTY);
        if (str == null) {
            return null;
        }
        ArrayList arrayList = new ArrayList();
        for (String str2 : str.split(VCFConstants.INFO_FIELD_ARRAY_SEPARATOR)) {
            String[] split = str2.split(":|-");
            arrayList.add(new Interval(split[0], Integer.parseInt(split[1]), Integer.parseInt(split[2])));
        }
        return arrayList;
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public static Path getIdxPath(Path path) {
        return path.suffix(SplittingBAMIndexer.OUTPUT_FILE_EXTENSION);
    }

    public RecordReader<LongWritable, SAMRecordWritable> createRecordReader(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws InterruptedException, IOException {
        BAMRecordReader bAMRecordReader = new BAMRecordReader();
        bAMRecordReader.initialize(inputSplit, taskAttemptContext);
        return bAMRecordReader;
    }

    public List<InputSplit> getSplits(JobContext jobContext) throws IOException {
        return getSplits(super.getSplits(jobContext), jobContext.getConfiguration());
    }

    public List<InputSplit> getSplits(List<InputSplit> list, Configuration configuration) throws IOException {
        Collections.sort(list, new Comparator<InputSplit>() { // from class: org.seqdoop.hadoop_bam.BAMInputFormat.1
            @Override // java.util.Comparator
            public int compare(InputSplit inputSplit, InputSplit inputSplit2) {
                return ((FileSplit) inputSplit).getPath().compareTo(((FileSplit) inputSplit2).getPath());
            }
        });
        ArrayList arrayList = new ArrayList(list.size());
        int i = 0;
        while (i < list.size()) {
            try {
                i = addIndexedSplits(list, i, arrayList, configuration);
            } catch (IOException e) {
                i = addProbabilisticSplits(list, i, arrayList, configuration);
            }
        }
        return filterByInterval(arrayList, configuration);
    }

    private int addIndexedSplits(List<InputSplit> list, int i, List<InputSplit> list2, Configuration configuration) throws IOException {
        Path path = list.get(i).getPath();
        ArrayList arrayList = new ArrayList();
        SplittingBAMIndex splittingBAMIndex = new SplittingBAMIndex((InputStream) path.getFileSystem(configuration).open(getIdxPath(path)));
        int size = list.size();
        for (int i2 = i; i2 < size; i2++) {
            if (!path.equals(list.get(i2).getPath())) {
                size = i2;
            }
        }
        int i3 = i;
        while (i3 < size) {
            FileSplit fileSplit = list.get(i3);
            long start = fileSplit.getStart();
            long length = start + fileSplit.getLength();
            Long nextAlignment = splittingBAMIndex.nextAlignment(start);
            Long valueOf = i3 == size - 1 ? Long.valueOf(splittingBAMIndex.prevAlignment(length).longValue() | BinaryCodec.MAX_USHORT) : splittingBAMIndex.nextAlignment(length);
            if (nextAlignment == null || valueOf == null) {
                System.err.println("Warning: index for " + path.toString() + " was not good. Generating probabilistic splits.");
                return addProbabilisticSplits(list, i, list2, configuration);
            }
            arrayList.add(new FileVirtualSplit(path, nextAlignment.longValue(), valueOf.longValue(), fileSplit.getLocations()));
            i3++;
        }
        Iterator it2 = arrayList.iterator();
        while (it2.hasNext()) {
            list2.add((InputSplit) it2.next());
        }
        return size;
    }

    private int addProbabilisticSplits(List<InputSplit> list, int i, List<InputSplit> list2, Configuration configuration) throws IOException {
        Path path = list.get(i).getPath();
        WrapSeekable<FSDataInputStream> openPath = WrapSeekable.openPath(path.getFileSystem(configuration), path);
        BAMSplitGuesser bAMSplitGuesser = new BAMSplitGuesser(openPath, configuration);
        FileVirtualSplit fileVirtualSplit = null;
        while (i < list.size()) {
            FileSplit fileSplit = list.get(i);
            if (!fileSplit.getPath().equals(path)) {
                break;
            }
            long start = fileSplit.getStart();
            long length = start + fileSplit.getLength();
            long guessNextBAMRecordStart = bAMSplitGuesser.guessNextBAMRecordStart(start, length);
            long j = (length << 16) | BinaryCodec.MAX_USHORT;
            if (guessNextBAMRecordStart != length) {
                fileVirtualSplit = new FileVirtualSplit(path, guessNextBAMRecordStart, j, fileSplit.getLocations());
                list2.add(fileVirtualSplit);
            } else {
                if (fileVirtualSplit == null) {
                    throw new IOException("'" + path + "': no reads in first split: bad BAM file or tiny split size?");
                }
                fileVirtualSplit.setEndVirtualOffset(j);
            }
            i++;
        }
        openPath.close();
        return i;
    }

    private List<InputSplit> filterByInterval(List<InputSplit> list, Configuration configuration) throws IOException {
        List<Interval> intervals = getIntervals(configuration);
        if (intervals == null) {
            return list;
        }
        ArrayList<Chunk> arrayList = new ArrayList();
        LinkedHashSet<Path> linkedHashSet = new LinkedHashSet();
        Iterator<InputSplit> it2 = list.iterator();
        while (it2.hasNext()) {
            linkedHashSet.add(((FileVirtualSplit) it2.next()).getPath());
        }
        for (Path path : linkedHashSet) {
            FileSystem fileSystem = path.getFileSystem(configuration);
            java.nio.file.Path findIndex = SamFiles.findIndex(Paths.get(fileSystem.makeQualified(path).toUri()));
            if (findIndex == null) {
                System.err.println("WARNING: no BAM index file found, splits will not be filtered, which may be very inefficient: " + path);
                return list;
            }
            Path path2 = new Path(findIndex.toUri());
            SAMSequenceDictionary sequenceDictionary = SAMHeaderReader.readSAMHeaderFrom((InputStream) fileSystem.open(path), configuration).getSequenceDictionary();
            DiskBasedBAMFileIndex diskBasedBAMFileIndex = new DiskBasedBAMFileIndex(WrapSeekable.openPath(configuration, path2), sequenceDictionary);
            for (Interval interval : intervals) {
                arrayList.addAll(diskBasedBAMFileIndex.getSpanOverlapping(sequenceDictionary.getSequenceIndex(interval.getContig()), interval.getStart(), interval.getEnd()).getChunks());
            }
        }
        ArrayList arrayList2 = new ArrayList();
        Iterator<InputSplit> it3 = list.iterator();
        while (it3.hasNext()) {
            FileVirtualSplit fileVirtualSplit = (FileVirtualSplit) it3.next();
            long startVirtualOffset = fileVirtualSplit.getStartVirtualOffset();
            long endVirtualOffset = fileVirtualSplit.getEndVirtualOffset();
            long j = Long.MAX_VALUE;
            long j2 = Long.MIN_VALUE;
            boolean z = false;
            for (Chunk chunk : arrayList) {
                long chunkStart = chunk.getChunkStart();
                long chunkEnd = chunk.getChunkEnd();
                if (overlaps(startVirtualOffset, endVirtualOffset, chunkStart, chunkEnd)) {
                    long max = Math.max(startVirtualOffset, chunkStart);
                    long min = Math.min(endVirtualOffset, chunkEnd);
                    j = Math.min(j, max);
                    j2 = Math.max(j2, min);
                    z = true;
                }
            }
            if (z) {
                arrayList2.add(new FileVirtualSplit(fileVirtualSplit.getPath(), j, j2, fileVirtualSplit.getLocations()));
            }
        }
        return arrayList2;
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public static boolean overlaps(long j, long j2, long j3, long j4) {
        return (j3 >= j && j3 <= j2) || (j4 >= j && j4 <= j2) || (j >= j3 && j2 <= j4);
    }

    public boolean isSplitable(JobContext jobContext, Path path) {
        return true;
    }
}
