package org.broadinstitute.hellbender.tools.spark.pathseq;

import java.util.Collection;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
import org.broadinstitute.barclay.help.DocumentedFeature;
import org.broadinstitute.hellbender.cmdline.CommandLineProgram;
import org.broadinstitute.hellbender.cmdline.programgroups.MetagenomicsProgramGroup;
import org.broadinstitute.hellbender.engine.spark.datasources.ReferenceFileSparkSource;
import org.broadinstitute.hellbender.tools.spark.sv.utils.SVKmerShort;

@DocumentedFeature
@CommandLineProgramProperties(summary = "Produce a set of k-mers from the given host reference. The output file from this tool is required to run the PathSeq pipeline.", oneLineSummary = "Builds set of host reference k-mers", programGroup = MetagenomicsProgramGroup.class)
/* loaded from: input_file:org/broadinstitute/hellbender/tools/spark/pathseq/PathSeqBuildKmers.class */
public final class PathSeqBuildKmers extends CommandLineProgram {
    public static final String REFERENCE_LONG_NAME = "reference";
    public static final String REFERENCE_SHORT_NAME = "R";
    public static final String BLOOM_FILTER_FALSE_POSITIVE_P_LONG_NAME = "bloom-false-positive-probability";
    public static final String BLOOM_FILTER_FALSE_POSITIVE_P_SHORT_NAME = "P";
    public static final String KMER_SIZE_LONG_NAME = "kmer-size";
    public static final String KMER_SIZE_SHORT_NAME = "SZ";
    public static final String KMER_MASK_LONG_NAME = "kmer-mask";
    public static final String KMER_MASK_SHORT_NAME = "M";
    public static final String KMER_SPACING_LONG_NAME = "kmer-spacing";
    public static final String KMER_SPACING_SHORT_NAME = "SP";

    @Argument(doc = "File for k-mer set output. Extension will be automatically added if not present (.hss for hash set or .bfi for Bloom filter)", shortName = "O", fullName = "output")
    public String outputFile;

    @Argument(doc = "Reference FASTA file path on local disk", fullName = "reference", shortName = "R")
    public String reference;

    @Argument(doc = "If non-zero, creates a Bloom filter with this false positive probability", fullName = BLOOM_FILTER_FALSE_POSITIVE_P_LONG_NAME, shortName = BLOOM_FILTER_FALSE_POSITIVE_P_SHORT_NAME, minValue = 0.0d, maxValue = 1.0d, maxRecommendedValue = 0.001d, optional = true)
    public double bloomFpp = 0.0d;

    @Argument(doc = "K-mer size, must be odd and less than 32", fullName = "kmer-size", shortName = KMER_SIZE_SHORT_NAME, minValue = 1.0d, maxValue = 31.0d, optional = true)
    public int kmerSize = 31;

    @Argument(doc = "Comma-delimited list of base indices (starting with 0) to mask in each k-mer", fullName = KMER_MASK_LONG_NAME, shortName = "M", optional = true)
    public String kmerMaskString = "";

    @Argument(doc = "Spacing between successive k-mers", fullName = KMER_SPACING_LONG_NAME, shortName = KMER_SPACING_SHORT_NAME, minValue = 1.0d, optional = true)
    public int kmerSpacing = 1;

    @Override // org.broadinstitute.hellbender.cmdline.CommandLineProgram
    protected Object doWork() {
        ReferenceFileSparkSource referenceFileSparkSource = new ReferenceFileSparkSource(this.reference);
        SVKmerShort mask = SVKmerShort.getMask(PSUtils.parseMask(this.kmerMaskString, this.kmerSize), this.kmerSize);
        this.logger.info("Loading reference kmers...");
        Collection<long[]> maskedKmersFromLocalReference = PSKmerUtils.getMaskedKmersFromLocalReference(referenceFileSparkSource, this.kmerSize, this.kmerSpacing, mask);
        long longArrayCollectionSize = PSKmerUtils.longArrayCollectionSize(maskedKmersFromLocalReference);
        if (this.bloomFpp <= 0.0d) {
            this.logger.info("Building kmer hash set...");
            PSKmerUtils.writeKmerSet(this.outputFile, new PSKmerSet(PSKmerUtils.longArrayCollectionToSet(maskedKmersFromLocalReference, longArrayCollectionSize), this.kmerSize, mask));
            return null;
        }
        this.logger.info("Building Bloom filter with false positive probability " + this.bloomFpp + "...");
        PSKmerBloomFilter pSKmerBloomFilter = new PSKmerBloomFilter(PSKmerUtils.longArrayCollectionToBloomFilter(maskedKmersFromLocalReference, longArrayCollectionSize, this.bloomFpp), this.kmerSize, mask, longArrayCollectionSize);
        this.logger.info("Theoretical Bloom filter false positive probability: " + pSKmerBloomFilter.getFalsePositiveProbability());
        PSKmerUtils.writeKmerBloomFilter(this.outputFile, pSKmerBloomFilter);
        return null;
    }
}
