package org.broadinstitute.hellbender.tools.walkers.realignmentfilter;

import com.google.common.collect.HashMultiset;
import com.google.common.collect.Multiset;
import com.google.common.collect.Multisets;
import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.util.Locatable;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.variantcontext.VariantContextBuilder;
import htsjdk.variant.variantcontext.writer.VariantContextWriter;
import htsjdk.variant.vcf.VCFHeader;
import java.io.File;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashSet;
import java.util.List;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.ArgumentCollection;
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
import org.broadinstitute.barclay.argparser.ExperimentalFeature;
import org.broadinstitute.barclay.help.DocumentedFeature;
import org.broadinstitute.hellbender.engine.AssemblyRegion;
import org.broadinstitute.hellbender.engine.MultiVariantWalkerGroupedOnStart;
import org.broadinstitute.hellbender.engine.ReadsContext;
import org.broadinstitute.hellbender.engine.ReferenceContext;
import org.broadinstitute.hellbender.engine.filters.CountingVariantFilter;
import org.broadinstitute.hellbender.engine.filters.ReadFilter;
import org.broadinstitute.hellbender.engine.filters.VariantFilterLibrary;
import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.AssemblyBasedCallerArgumentCollection;
import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.AssemblyBasedCallerUtils;
import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.AssemblyResultSet;
import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.ReadLikelihoodCalculationEngine;
import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.readthreading.ReadThreadingAssembler;
import org.broadinstitute.hellbender.tools.walkers.mutect.M2ArgumentCollection;
import org.broadinstitute.hellbender.tools.walkers.mutect.Mutect2Engine;
import org.broadinstitute.hellbender.utils.BaseUtils;
import org.broadinstitute.hellbender.utils.MathUtils;
import org.broadinstitute.hellbender.utils.SimpleInterval;
import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.utils.bwa.BwaMemAlignment;
import org.broadinstitute.hellbender.utils.downsampling.DownsamplingMethod;
import org.broadinstitute.hellbender.utils.fasta.CachingIndexedFastaSequenceFile;
import org.broadinstitute.hellbender.utils.genotyper.AlleleLikelihoods;
import org.broadinstitute.hellbender.utils.genotyper.IndexedSampleList;
import org.broadinstitute.hellbender.utils.genotyper.SampleList;
import org.broadinstitute.hellbender.utils.haplotype.Haplotype;
import org.broadinstitute.hellbender.utils.haplotype.HaplotypeBAMWriter;
import org.broadinstitute.hellbender.utils.io.IOUtils;
import org.broadinstitute.hellbender.utils.locusiterator.LocusIteratorByState;
import org.broadinstitute.hellbender.utils.pileup.ReadPileup;
import org.broadinstitute.hellbender.utils.read.GATKRead;
import org.broadinstitute.hellbender.utils.read.ReadUtils;
import org.broadinstitute.hellbender.utils.smithwaterman.SmithWatermanAligner;
import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants;
import org.broadinstitute.hellbender.utils.variant.GATKVCFHeaderLines;
import picard.cmdline.programgroups.VariantFilteringProgramGroup;

@CommandLineProgramProperties(summary = "Filter alignment artifacts from a vcf callset.", oneLineSummary = "Filter alignment artifacts from a vcf callset.", programGroup = VariantFilteringProgramGroup.class)
@DocumentedFeature
@ExperimentalFeature
/* loaded from: input_file:org/broadinstitute/hellbender/tools/walkers/realignmentfilter/FilterAlignmentArtifacts.class */
public class FilterAlignmentArtifacts extends MultiVariantWalkerGroupedOnStart {
    public static final int DEFAULT_DISTANCE_TO_GROUP_VARIANTS = 1000;
    public static final int DEFAULT_REF_PADDING = 100;
    public static final int DEFAULT_MAX_GROUPED_SPAN = 10000;
    private static final int MIN_UNITIG_LENGTH = 30;
    private static final int ASSEMBLY_PADDING = 50;
    private static final SmithWatermanAligner ALIGNER = SmithWatermanAligner.getAligner(SmithWatermanAligner.Implementation.FASTEST_AVAILABLE);
    public static final int DEFAULT_INDEL_START_TOLERANCE = 5;
    public static final String INDEL_START_TOLERANCE_LONG_NAME = "indel-start-tolerance";
    public static final int DEFAULT_KMER_SIZE = 21;
    public static final String KMER_SIZE_LONG_NAME = "kmer-size";
    public static final String DONT_SKIP_ALREADY_FILTERED_VARIANTS_LONG_NAME = "dont-skip-filtered-variants";
    private VariantContextWriter vcfWriter;
    private RealignmentEngine realignmentEngine;
    private SAMFileHeader bamHeader;
    private SampleList samplesList;
    private CachingIndexedFastaSequenceFile referenceReader;
    private ReadThreadingAssembler assemblyEngine;
    private ReadLikelihoodCalculationEngine likelihoodCalculationEngine;
    private Optional<HaplotypeBAMWriter> haplotypeBAMWriter;

    @Argument(fullName = "output", shortName = "O", doc = "The output filtered VCF file", optional = false)
    private final String outputVcf = null;

    @Argument(fullName = INDEL_START_TOLERANCE_LONG_NAME, doc = "Max distance between indel start of aligned read in the bam and the variant in the vcf", optional = true)
    private int indelStartTolerance = 5;

    @Argument(fullName = "kmer-size", doc = "Kmer size for reassembly", optional = true)
    private int kmerSize = 21;

    @Argument(fullName = DONT_SKIP_ALREADY_FILTERED_VARIANTS_LONG_NAME, doc = "Try to realign all variants, even ones that have already been filtered.", optional = true)
    private boolean dontSkipFilteredVariants = false;

    @Argument(fullName = AssemblyBasedCallerArgumentCollection.BAM_OUTPUT_LONG_NAME, shortName = AssemblyBasedCallerArgumentCollection.BAM_OUTPUT_SHORT_NAME, doc = "File to which assembled haplotypes should be written", optional = true)
    public String bamOutputPath = null;

    @ArgumentCollection
    protected RealignmentArgumentCollection realignmentArgumentCollection = new RealignmentArgumentCollection();
    private final M2ArgumentCollection MTAC = new M2ArgumentCollection();

    @Override // org.broadinstitute.hellbender.engine.GATKTool
    public List<ReadFilter> getDefaultReadFilters() {
        return Mutect2Engine.makeStandardMutect2ReadFilters();
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // org.broadinstitute.hellbender.engine.VariantWalkerBase
    public CountingVariantFilter makeVariantFilter() {
        return new CountingVariantFilter(this.dontSkipFilteredVariants ? VariantFilterLibrary.ALLOW_ALL_VARIANTS : VariantFilterLibrary.PASSES_FILTERS);
    }

    @Override // org.broadinstitute.hellbender.engine.GATKTool
    public boolean requiresReads() {
        return true;
    }

    @Override // org.broadinstitute.hellbender.engine.MultiVariantWalkerGroupedOnStart
    protected int defaultDistanceToGroupVariants() {
        return 1000;
    }

    @Override // org.broadinstitute.hellbender.engine.MultiVariantWalkerGroupedOnStart
    protected int defaultReferenceWindowPadding() {
        return 100;
    }

    @Override // org.broadinstitute.hellbender.engine.MultiVariantWalkerGroupedOnStart
    protected int defaultMaxGroupedSpan() {
        return 10000;
    }

    @Override // org.broadinstitute.hellbender.engine.GATKTool
    public void onTraversalStart() {
        this.realignmentEngine = new RealignmentEngine(this.realignmentArgumentCollection);
        this.vcfWriter = createVCFWriter(new File(this.outputVcf));
        VCFHeader headerForVariants = getHeaderForVariants();
        HashSet hashSet = new HashSet(headerForVariants.getMetaDataInSortedOrder());
        hashSet.add(GATKVCFHeaderLines.getFilterLine(GATKVCFConstants.ALIGNMENT_ARTIFACT_FILTER_NAME));
        hashSet.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.UNITIG_SIZES_KEY));
        hashSet.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.ALIGNMENT_SCORE_DIFFERENCE_KEY));
        hashSet.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.JOINT_ALIGNMENT_COUNT_KEY));
        hashSet.addAll(getDefaultToolVCFHeaderLines());
        this.vcfWriter.writeHeader(new VCFHeader(hashSet, headerForVariants.getGenotypeSamples()));
        this.bamHeader = getHeaderForReads();
        this.samplesList = new IndexedSampleList(new ArrayList(ReadUtils.getSamplesFromHeader(this.bamHeader)));
        this.referenceReader = AssemblyBasedCallerUtils.createReferenceReader((String) Utils.nonNull(this.referenceArguments.getReferenceFileName()));
        this.assemblyEngine = this.MTAC.createReadThreadingAssembler();
        this.likelihoodCalculationEngine = AssemblyBasedCallerUtils.createLikelihoodCalculationEngine(this.MTAC.likelihoodArgs);
        this.haplotypeBAMWriter = this.bamOutputPath == null ? Optional.empty() : Optional.of(new HaplotypeBAMWriter(HaplotypeBAMWriter.WriterType.ALL_POSSIBLE_HAPLOTYPES, IOUtils.getPath(this.bamOutputPath), true, false, getHeaderForSAMWriter()));
    }

    @Override // org.broadinstitute.hellbender.engine.GATKTool
    public Object onTraversalSuccess() {
        return "SUCCESS";
    }

    @Override // org.broadinstitute.hellbender.engine.MultiVariantWalkerGroupedOnStart
    public void apply(List<VariantContext> list, ReferenceContext referenceContext, List<ReadsContext> list2) {
        for (VariantContext variantContext : list) {
            AssemblyResultSet assembleReads = AssemblyBasedCallerUtils.assembleReads(makeAssemblyRegionFromVariantReads(list2, variantContext), Collections.emptyList(), this.MTAC, this.bamHeader, this.samplesList, this.logger, this.referenceReader, this.assemblyEngine, ALIGNER, false);
            AssemblyRegion regionForGenotyping = assembleReads.getRegionForGenotyping();
            AlleleLikelihoods<GATKRead, Haplotype> computeReadLikelihoods = this.likelihoodCalculationEngine.computeReadLikelihoods(assembleReads, this.samplesList, AssemblyBasedCallerUtils.splitReadsBySample(this.samplesList, this.bamHeader, regionForGenotyping.getReads()));
            computeReadLikelihoods.switchToNaturalLog();
            computeReadLikelihoods.changeEvidence(AssemblyBasedCallerUtils.realignReadsToTheirBestHaplotype(computeReadLikelihoods, assembleReads.getReferenceHaplotype(), assembleReads.getPaddedReferenceLoc(), ALIGNER));
            writeBamOutput(assembleReads, computeReadLikelihoods, new HashSet(computeReadLikelihoods.alleles()), regionForGenotyping.getSpan());
            List<byte[]> unitigs = getUnitigs(new LocusIteratorByState(regionForGenotyping.getReads().iterator(), DownsamplingMethod.NONE, false, (Collection<String>) this.samplesList.asListOfSamples(), this.bamHeader));
            VariantContextBuilder attribute = new VariantContextBuilder(variantContext).attribute(GATKVCFConstants.UNITIG_SIZES_KEY, unitigs.stream().mapToInt(bArr -> {
                return bArr.length;
            }).toArray());
            Stream<byte[]> stream = unitigs.stream();
            RealignmentEngine realignmentEngine = this.realignmentEngine;
            realignmentEngine.getClass();
            List<List<BwaMemAlignment>> findJointAlignments = RealignmentEngine.findJointAlignments((List) stream.map(realignmentEngine::realign).collect(Collectors.toList()), this.realignmentArgumentCollection.maxReasonableFragmentLength);
            attribute.attribute(GATKVCFConstants.JOINT_ALIGNMENT_COUNT_KEY, Integer.valueOf(findJointAlignments.size()));
            findJointAlignments.sort(Comparator.comparingInt(FilterAlignmentArtifacts::jointAlignmentScore).reversed());
            if (!findJointAlignments.isEmpty() && findJointAlignments.get(0).get(0).getRefId() != getReferenceDictionary().getSequenceIndex(variantContext.getContig())) {
                attribute.filter(GATKVCFConstants.ALIGNMENT_ARTIFACT_FILTER_NAME);
            } else if (findJointAlignments.size() > 1) {
                int sum = unitigs.stream().mapToInt(bArr2 -> {
                    return bArr2.length;
                }).sum();
                int jointAlignmentScore = jointAlignmentScore(findJointAlignments.get(0)) - jointAlignmentScore(findJointAlignments.get(1));
                int i = totalMismatches(findJointAlignments.get(1)) - totalMismatches(findJointAlignments.get(0));
                attribute.attribute(GATKVCFConstants.ALIGNMENT_SCORE_DIFFERENCE_KEY, Integer.valueOf(jointAlignmentScore));
                if (((double) jointAlignmentScore) / ((double) sum) < this.realignmentArgumentCollection.minAlignerScoreDifferencePerBase && ((double) i) / ((double) sum) < this.realignmentArgumentCollection.minMismatchDifferencePerBase) {
                    attribute.filter(GATKVCFConstants.ALIGNMENT_ARTIFACT_FILTER_NAME);
                }
            }
            this.vcfWriter.add(attribute.make());
        }
    }

    private AssemblyRegion makeAssemblyRegionFromVariantReads(List<ReadsContext> list, VariantContext variantContext) {
        Set set = (Set) list.stream().flatMap((v0) -> {
            return Utils.stream(v0);
        }).filter(gATKRead -> {
            return RealignmentEngine.supportsVariant(gATKRead, variantContext, this.indelStartTolerance);
        }).map((v0) -> {
            return v0.getName();
        }).collect(Collectors.toSet());
        List list2 = (List) list.stream().flatMap((v0) -> {
            return Utils.stream(v0);
        }).filter(gATKRead2 -> {
            return set.contains(gATKRead2.getName());
        }).sorted(Comparator.comparingInt((v0) -> {
            return v0.getStart();
        })).collect(Collectors.toList());
        AssemblyRegion assemblyRegion = new AssemblyRegion(new SimpleInterval(variantContext.getContig(), Math.max(list2.stream().mapToInt((v0) -> {
            return v0.getStart();
        }).min().orElse(variantContext.getStart()) - 50, 1), list2.stream().mapToInt((v0) -> {
            return v0.getEnd();
        }).max().orElse(variantContext.getEnd()) + 50), 0, this.bamHeader);
        assemblyRegion.addAll(list2);
        return assemblyRegion;
    }

    private void writeBamOutput(AssemblyResultSet assemblyResultSet, AlleleLikelihoods<GATKRead, Haplotype> alleleLikelihoods, Set<Haplotype> set, Locatable locatable) {
        this.haplotypeBAMWriter.ifPresent(haplotypeBAMWriter -> {
            haplotypeBAMWriter.writeReadsAlignedToHaplotypes(assemblyResultSet.getHaplotypeList(), assemblyResultSet.getPaddedReferenceLoc(), assemblyResultSet.getHaplotypeList(), set, alleleLikelihoods, locatable);
        });
    }

    private List<byte[]> getUnitigs(LocusIteratorByState locusIteratorByState) {
        ArrayList arrayList = new ArrayList();
        int i = Integer.MIN_VALUE;
        while (locusIteratorByState.hasNext()) {
            ReadPileup basePileup = locusIteratorByState.next().getBasePileup();
            if (!basePileup.isEmpty()) {
                int start = basePileup.getLocation().getStart();
                if (start != i + 1) {
                    arrayList.add(new StringBuilder());
                }
                i = start;
                StringBuilder sb = (StringBuilder) arrayList.get(arrayList.size() - 1);
                int[] baseCounts = basePileup.getBaseCounts();
                if (((int) Utils.stream(basePileup).filter((v0) -> {
                    return v0.isDeletion();
                }).count()) < basePileup.size() / 2) {
                    sb.append((char) BaseUtils.baseIndexToSimpleBase(MathUtils.maxElementIndex(baseCounts)));
                    Multiset multiset = (Multiset) Utils.stream(basePileup).map((v0) -> {
                        return v0.getBasesOfImmediatelyFollowingInsertion();
                    }).filter(str -> {
                        return str != null;
                    }).collect(Collectors.toCollection(HashMultiset::create));
                    if (multiset.size() > basePileup.size() / 2) {
                        sb.append((String) ((Multiset.Entry) Multisets.copyHighestCountFirst(multiset).entrySet().iterator().next()).getElement());
                    }
                }
            }
        }
        return (List) arrayList.stream().map(sb2 -> {
            return sb2.toString().getBytes();
        }).filter(bArr -> {
            return bArr.length > 30;
        }).collect(Collectors.toList());
    }

    private static int jointAlignmentScore(List<BwaMemAlignment> list) {
        return list.stream().mapToInt((v0) -> {
            return v0.getAlignerScore();
        }).sum();
    }

    private static int totalMismatches(List<BwaMemAlignment> list) {
        return list.stream().mapToInt((v0) -> {
            return v0.getNMismatches();
        }).sum();
    }

    @Override // org.broadinstitute.hellbender.engine.GATKTool
    public void closeTool() {
        if (this.vcfWriter != null) {
            this.vcfWriter.close();
        }
    }
}
