package org.opencb.cellbase.lib.builders.clinical.variant;

import com.fasterxml.jackson.core.JsonProcessingException;
import java.io.BufferedReader;
import java.io.File;
import java.io.FilenameFilter;
import java.io.IOException;
import java.math.BigInteger;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import javax.xml.bind.JAXBElement;
import javax.xml.bind.JAXBException;
import org.apache.commons.lang3.StringUtils;
import org.opencb.biodata.formats.variant.clinvar.rcv.ClinvarParser;
import org.opencb.biodata.formats.variant.clinvar.rcv.v64jaxb.CitationType;
import org.opencb.biodata.formats.variant.clinvar.rcv.v64jaxb.MeasureSetType;
import org.opencb.biodata.formats.variant.clinvar.rcv.v64jaxb.MeasureTraitType;
import org.opencb.biodata.formats.variant.clinvar.rcv.v64jaxb.MeasureType;
import org.opencb.biodata.formats.variant.clinvar.rcv.v64jaxb.ObservationSet;
import org.opencb.biodata.formats.variant.clinvar.rcv.v64jaxb.PublicSetType;
import org.opencb.biodata.formats.variant.clinvar.rcv.v64jaxb.ReferenceAssertionType;
import org.opencb.biodata.formats.variant.clinvar.rcv.v64jaxb.ReleaseType;
import org.opencb.biodata.formats.variant.clinvar.rcv.v64jaxb.SetElementSetType;
import org.opencb.biodata.formats.variant.clinvar.rcv.v64jaxb.TraitType;
import org.opencb.biodata.models.variant.avro.AlleleOrigin;
import org.opencb.biodata.models.variant.avro.Confidence;
import org.opencb.biodata.models.variant.avro.ConsistencyStatus;
import org.opencb.biodata.models.variant.avro.EthnicCategory;
import org.opencb.biodata.models.variant.avro.EvidenceEntry;
import org.opencb.biodata.models.variant.avro.EvidenceImpact;
import org.opencb.biodata.models.variant.avro.EvidenceSource;
import org.opencb.biodata.models.variant.avro.EvidenceSubmission;
import org.opencb.biodata.models.variant.avro.GenomicFeature;
import org.opencb.biodata.models.variant.avro.HeritableTrait;
import org.opencb.biodata.models.variant.avro.ModeOfInheritance;
import org.opencb.biodata.models.variant.avro.Penetrance;
import org.opencb.biodata.models.variant.avro.Property;
import org.opencb.biodata.models.variant.avro.SomaticInformation;
import org.opencb.biodata.models.variant.avro.VariantAnnotation;
import org.opencb.biodata.models.variant.avro.VariantClassification;
import org.opencb.cellbase.lib.EtlCommons;
import org.opencb.cellbase.lib.builders.clinical.variant.ClinicalIndexer;
import org.opencb.cellbase.lib.variant.VariantAnnotationUtils;
import org.opencb.commons.ProgressLogger;
import org.opencb.commons.utils.FileUtils;
import org.rocksdb.RocksDB;
import org.rocksdb.RocksDBException;

/* loaded from: input_file:org/opencb/cellbase/lib/builders/clinical/variant/ClinVarIndexer.class */
public class ClinVarIndexer extends ClinicalIndexer {
    private static final String CLINVAR_CONTEXT = "org.opencb.biodata.formats.variant.clinvar.rcv.v64jaxb";
    private static final String CLINVAR_NAME = "clinvar";
    private static final int VARIANT_SUMMARY_CHR_COLUMN = 18;
    private static final int VARIANT_SUMMARY_START_COLUMN = 19;
    private static final int VARIANT_SUMMARY_END_COLUMN = 20;
    private static final int VARIANT_SUMMARY_REFERENCE_COLUMN = 32;
    private static final int VARIANT_SUMMARY_ALTERNATE_COLUMN = 33;
    private static final int VARIANT_SUMMARY_CLINSIG_COLUMN = 6;
    private static final int VARIANT_SUMMARY_GENE_COLUMN = 4;
    private static final int VARIANT_SUMMARY_REVIEW_COLUMN = 24;
    private static final int VARIANT_SUMMARY_ORIGIN_COLUMN = 14;
    private static final int VARIANT_SUMMARY_PHENOTYPE_COLUMN = 13;
    private static final int VARIANT_SUMMARY_ASSEMBLY_COLUMN = 16;
    private static final int VARIATION_ALLELE_VARIATION_COLUMN = 0;
    private static final int VARIATION_ALLELE_TYPE_COLUMN = 1;
    private static final int VARIATION_ALLELE_ALLELE_COLUMN = 2;
    private static final String SOMATIC = "somatic";
    private static final String CLINICAL_SIGNIFICANCE_IN_SOURCE_FILE = "ClinicalSignificance_in_source_file";
    private static final String REVIEW_STATUS_IN_SOURCE_FILE = "ReviewStatus_in_source_file";
    private static final String TRAIT = "trait";
    private static final String MODE_OF_INHERITANCE = "modeOfInheritance";
    private static final String GENOTYPESET = "GenotypeSet";
    private static final String COMPOUND_HETEROZYGOTE = "CompoundHeterozygote";
    private static final String DIPLOTYPE = "Diplotype";
    private static final String VARIANT = "Variant";
    private static final char CLINICAL_SIGNIFICANCE_SEPARATOR = '/';
    private final Path clinvarXMLFiles;
    private final Path clinvarSummaryFile;
    private final Path clinvarVariationAlleleFile;
    private final Path clinvarEFOFile;
    private final String assembly;
    private int numberSomaticRecords;
    private int numberGermlineRecords;
    private int numberNoDiseaseTrait;
    private int numberMultipleInheritanceModels;
    private static final Set<ModeOfInheritance> DOMINANT_TERM_SET = new HashSet(Arrays.asList(ModeOfInheritance.monoallelic, ModeOfInheritance.monoallelic_maternally_imprinted, ModeOfInheritance.monoallelic_not_imprinted, ModeOfInheritance.monoallelic_paternally_imprinted));
    private static final Set<ModeOfInheritance> RECESSIVE_TERM_SET = new HashSet(Arrays.asList(ModeOfInheritance.biallelic));

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:org/opencb/cellbase/lib/builders/clinical/variant/ClinVarIndexer$AlleleLocationData.class */
    public class AlleleLocationData {
        private String alleleId;
        private ClinicalIndexer.SequenceLocation sequenceLocation;

        AlleleLocationData(String str, ClinicalIndexer.SequenceLocation sequenceLocation) {
            this.alleleId = str;
            this.sequenceLocation = sequenceLocation;
        }

        String getAlleleId() {
            return this.alleleId;
        }

        void setAlleleId(String str) {
            this.alleleId = str;
        }

        ClinicalIndexer.SequenceLocation getSequenceLocation() {
            return this.sequenceLocation;
        }

        void setSequenceLocation(ClinicalIndexer.SequenceLocation sequenceLocation) {
            this.sequenceLocation = sequenceLocation;
        }
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:org/opencb/cellbase/lib/builders/clinical/variant/ClinVarIndexer$EFO.class */
    public class EFO {
        private final String id;
        private final String name;
        private final String url;

        EFO(String str, String str2, String str3) {
            this.id = str;
            this.name = str2;
            this.url = str3;
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:org/opencb/cellbase/lib/builders/clinical/variant/ClinVarIndexer$VariationData.class */
    public class VariationData {
        private String id;
        private String type;

        VariationData(String str, String str2) {
            this.id = str;
            this.type = str2;
        }

        String getId() {
            return this.id;
        }

        void setId(String str) {
            this.id = str;
        }

        String getType() {
            return this.type;
        }

        void setType(String str) {
            this.type = str;
        }
    }

    public ClinVarIndexer(Path path, Path path2, Path path3, Path path4, boolean z, Path path5, String str, RocksDB rocksDB) throws IOException {
        super(path5);
        this.numberSomaticRecords = VARIATION_ALLELE_VARIATION_COLUMN;
        this.numberGermlineRecords = VARIATION_ALLELE_VARIATION_COLUMN;
        this.numberNoDiseaseTrait = VARIATION_ALLELE_VARIATION_COLUMN;
        this.numberMultipleInheritanceModels = VARIATION_ALLELE_VARIATION_COLUMN;
        this.rdb = rocksDB;
        this.clinvarXMLFiles = path;
        this.clinvarSummaryFile = path2;
        this.clinvarVariationAlleleFile = path3;
        this.clinvarEFOFile = path4;
        this.normalize = z;
        this.genomeSequenceFilePath = path5;
        this.assembly = str;
    }

    public void index() throws RocksDBException {
        try {
            Map<String, EFO> loadEFOTerms = loadEFOTerms();
            Map<String, List<AlleleLocationData>> parseVariantSummary = parseVariantSummary(loadEFOTerms);
            File[] listFiles = this.clinvarXMLFiles.toFile().listFiles(new FilenameFilter() { // from class: org.opencb.cellbase.lib.builders.clinical.variant.ClinVarIndexer.1
                @Override // java.io.FilenameFilter
                public boolean accept(File file, String str) {
                    return str.endsWith(".xml") || str.endsWith(".xml.gz");
                }
            });
            Arrays.sort(listFiles);
            ProgressLogger batchSize = new ProgressLogger("Parsed XML records:", listFiles.length * 10000, 200).setBatchSize(10000);
            int length = listFiles.length;
            for (int i = VARIATION_ALLELE_VARIATION_COLUMN; i < length; i += VARIATION_ALLELE_TYPE_COLUMN) {
                File file = listFiles[i];
                logger.info("Unmarshalling clinvar file " + file + " ...");
                JAXBElement<ReleaseType> unmarshalXML = unmarshalXML(file.toPath());
                logger.info("Done");
                logger.info("Serializing clinvar records that have Sequence Location for Assembly " + this.assembly + " ...");
                for (PublicSetType publicSetType : ((ReleaseType) unmarshalXML.getValue()).getClinVarSet()) {
                    List<AlleleLocationData> list = parseVariantSummary.get(publicSetType.getReferenceClinVarAssertion().getClinVarAccession().getAcc());
                    if (list != null) {
                        boolean z = VARIATION_ALLELE_VARIATION_COLUMN;
                        for (int i2 = VARIATION_ALLELE_VARIATION_COLUMN; i2 < list.size(); i2 += VARIATION_ALLELE_TYPE_COLUMN) {
                            z = updateRocksDB(list.get(i2), publicSetType, getMateVariantStringByAlleleLocationData(i2, list), loadEFOTerms) || z;
                        }
                        if (z) {
                            this.numberIndexedRecords += VARIATION_ALLELE_TYPE_COLUMN;
                        }
                    }
                    batchSize.increment(1L);
                    this.totalNumberRecords += VARIATION_ALLELE_TYPE_COLUMN;
                }
            }
            logger.info("Done");
            printSummary();
        } catch (RocksDBException e) {
            logger.error("Error reading/writing from/to the RocksDB index while indexing ClinVar");
            throw e;
        } catch (IOException e2) {
            logger.error("Error indexing clinvar Xml file: " + e2.getMessage());
            e2.printStackTrace();
        } catch (JAXBException e3) {
            logger.error("Error unmarshalling clinvar Xml file: " + e3.getMessage());
            e3.printStackTrace();
        }
    }

    private String getMateVariantStringByAlleleLocationData(int i, List<AlleleLocationData> list) {
        StringBuilder sb = new StringBuilder();
        for (int i2 = VARIATION_ALLELE_VARIATION_COLUMN; i2 < list.size(); i2 += VARIATION_ALLELE_TYPE_COLUMN) {
            if (i2 != i) {
                ClinicalIndexer.SequenceLocation sequenceLocation = list.get(i2).getSequenceLocation();
                List<String> normalisedVariantString = getNormalisedVariantString(sequenceLocation.getChromosome(), sequenceLocation.getStart(), sequenceLocation.getReference(), sequenceLocation.getAlternate());
                if (normalisedVariantString != null) {
                    for (String str : normalisedVariantString) {
                        if (sb.length() > 0) {
                            sb.append(",");
                        }
                        sb.append(str);
                    }
                }
            }
        }
        if (sb.length() == 0) {
            return null;
        }
        return sb.toString();
    }

    private void printSummary() {
        logger.info("Total number of parsed ClinVar records: {}", Integer.valueOf(this.totalNumberRecords));
        logger.info("Number of indexed Clinvar records: {}", Integer.valueOf(this.numberIndexedRecords));
        logger.info("Number of new variants in ClinVar not previously indexed in RocksDB: {}", Integer.valueOf(this.numberNewVariants));
        logger.info("Number of updated variants during ClinVar indexing: {}", Integer.valueOf(this.numberVariantUpdates));
        logger.info("Number of ClinVar germline variants: {}", Integer.valueOf(this.numberGermlineRecords));
        logger.info("Number of ClinVar somatic variants: {}", Integer.valueOf(this.numberSomaticRecords));
        logger.info("Number of ClinVar records without a \"disease\" trait: {}", Integer.valueOf(this.numberNoDiseaseTrait));
        logger.info("Number of ClinVar records with multiple inheritance models: {}", Integer.valueOf(this.numberMultipleInheritanceModels));
    }

    private boolean updateRocksDB(ClinicalIndexer.SequenceLocation sequenceLocation, String str, String[] strArr, String str2, Map<String, EFO> map) throws RocksDBException, IOException {
        List<String> normalisedVariantString = getNormalisedVariantString(sequenceLocation.getChromosome(), sequenceLocation.getStart(), sequenceLocation.getReference(), sequenceLocation.getAlternate());
        if (normalisedVariantString == null) {
            return false;
        }
        for (String str3 : normalisedVariantString) {
            VariantAnnotation variantAnnotation = getVariantAnnotation(str3.getBytes());
            String str4 = VARIATION_ALLELE_VARIATION_COLUMN;
            if (normalisedVariantString.size() > VARIATION_ALLELE_TYPE_COLUMN) {
                str4 = StringUtils.join(normalisedVariantString, ',');
            }
            addNewEntries(variantAnnotation, str, strArr, str2, str4, map);
            this.rdb.put(str3.getBytes(), jsonObjectWriter.writeValueAsBytes(variantAnnotation));
        }
        return true;
    }

    private boolean updateRocksDB(AlleleLocationData alleleLocationData, PublicSetType publicSetType, String str, Map<String, EFO> map) throws RocksDBException, IOException {
        List<String> normalisedVariantString = getNormalisedVariantString(alleleLocationData.getSequenceLocation().getChromosome(), alleleLocationData.getSequenceLocation().getStart(), alleleLocationData.getSequenceLocation().getReference(), alleleLocationData.getSequenceLocation().getAlternate());
        if (normalisedVariantString == null) {
            return false;
        }
        for (String str2 : normalisedVariantString) {
            VariantAnnotation variantAnnotation = getVariantAnnotation(str2.getBytes());
            String str3 = VARIATION_ALLELE_VARIATION_COLUMN;
            if (normalisedVariantString.size() > VARIATION_ALLELE_TYPE_COLUMN) {
                str3 = StringUtils.join(normalisedVariantString, ',');
            }
            addNewEntries(variantAnnotation, publicSetType, alleleLocationData.getAlleleId(), str, str3, map, publicSetType.getReferenceClinVarAssertion().getClinVarAccession().getAcc(), publicSetType.getReferenceClinVarAssertion().getClinicalSignificance().getDescription(), publicSetType.getReferenceClinVarAssertion().getClinicalSignificance().getReviewStatus().name(), publicSetType.getReferenceClinVarAssertion().getObservedIn());
            for (MeasureTraitType measureTraitType : publicSetType.getClinVarAssertion()) {
                addNewEntries(variantAnnotation, publicSetType, alleleLocationData.getAlleleId(), str, str3, map, measureTraitType.getClinVarAccession().getAcc(), StringUtils.join(measureTraitType.getClinicalSignificance().getDescription(), '/'), getReviewStatusIfPresent(measureTraitType), measureTraitType.getObservedIn());
            }
            this.rdb.put(str2.getBytes(), jsonObjectWriter.writeValueAsBytes(variantAnnotation));
        }
        return true;
    }

    private String getReviewStatusIfPresent(MeasureTraitType measureTraitType) {
        if (measureTraitType.getClinicalSignificance().getReviewStatus() != null) {
            return measureTraitType.getClinicalSignificance().getReviewStatus().name();
        }
        return null;
    }

    private void addNewEntries(VariantAnnotation variantAnnotation, String str, String[] strArr, String str2, String str3, Map<String, EFO> map) {
        EvidenceSource evidenceSource = new EvidenceSource("clinvar", "2022.02", "2022-02");
        List<AlleleOrigin> list = VARIATION_ALLELE_VARIATION_COLUMN;
        if (!EtlCommons.isMissing(strArr[VARIANT_SUMMARY_ORIGIN_COLUMN])) {
            list = getAlleleOriginList(new ArrayList(new HashSet(Arrays.asList(strArr[VARIANT_SUMMARY_ORIGIN_COLUMN].toLowerCase().split(";")))));
        }
        List emptyList = Collections.emptyList();
        if (!EtlCommons.isMissing(strArr[VARIANT_SUMMARY_PHENOTYPE_COLUMN])) {
            emptyList = (List) new HashSet(Arrays.asList(strArr[VARIANT_SUMMARY_PHENOTYPE_COLUMN].toLowerCase().split(";"))).stream().map(str4 -> {
                return new HeritableTrait(str4, (ModeOfInheritance) null);
            }).collect(Collectors.toList());
        }
        List emptyList2 = Collections.emptyList();
        if (!EtlCommons.isMissing(strArr[VARIANT_SUMMARY_GENE_COLUMN])) {
            String[] split = strArr[VARIANT_SUMMARY_GENE_COLUMN].split(",");
            emptyList2 = new ArrayList(split.length);
            int length = split.length;
            for (int i = VARIATION_ALLELE_VARIATION_COLUMN; i < length; i += VARIATION_ALLELE_TYPE_COLUMN) {
                emptyList2.add(createGeneGenomicFeature(split[i]));
            }
        }
        ArrayList arrayList = new ArrayList(3);
        VariantClassification variantClassification = VARIATION_ALLELE_VARIATION_COLUMN;
        if (!EtlCommons.isMissing(strArr[VARIANT_SUMMARY_CLINSIG_COLUMN])) {
            variantClassification = getVariantClassification(Arrays.asList(strArr[VARIANT_SUMMARY_CLINSIG_COLUMN].split("[,/;]")));
            arrayList.add(new Property((String) null, CLINICAL_SIGNIFICANCE_IN_SOURCE_FILE, strArr[VARIANT_SUMMARY_CLINSIG_COLUMN]));
        }
        ConsistencyStatus consistencyStatus = VARIATION_ALLELE_VARIATION_COLUMN;
        if (!EtlCommons.isMissing(strArr[VARIANT_SUMMARY_REVIEW_COLUMN])) {
            consistencyStatus = getConsistencyStatus(strArr[VARIANT_SUMMARY_REVIEW_COLUMN]);
            arrayList.add(new Property((String) null, REVIEW_STATUS_IN_SOURCE_FILE, strArr[VARIANT_SUMMARY_REVIEW_COLUMN]));
        }
        if (str2 != null) {
            arrayList.add(new Property((String) null, GENOTYPESET, str2));
        }
        if (str3 != null) {
            arrayList.add(new Property("HAPLOTYPE", "Haplotype", str3));
        }
        variantAnnotation.getTraitAssociation().add(new EvidenceEntry(evidenceSource, Collections.emptyList(), (SomaticInformation) null, "https://www.ncbi.nlm.nih.gov/clinvar/variation/" + str, str, (String) null, (list == null || list.isEmpty()) ? null : list, emptyList, emptyList2, variantClassification, (EvidenceImpact) null, (Confidence) null, consistencyStatus, EthnicCategory.Z, (Penetrance) null, (Boolean) null, (String) null, arrayList, Collections.emptyList()));
    }

    private ConsistencyStatus getConsistencyStatus(String str) {
        if (!StringUtils.isNotBlank(str)) {
            return null;
        }
        String[] split = str.split("[,/;]");
        int length = split.length;
        for (int i = VARIATION_ALLELE_VARIATION_COLUMN; i < length; i += VARIATION_ALLELE_TYPE_COLUMN) {
            String trim = split[i].toLowerCase().trim();
            if (VariantAnnotationUtils.CLINVAR_REVIEW_TO_CONSISTENCY_STATUS.containsKey(trim)) {
                return VariantAnnotationUtils.CLINVAR_REVIEW_TO_CONSISTENCY_STATUS.get(trim);
            }
        }
        return null;
    }

    private void addNewEntries(VariantAnnotation variantAnnotation, PublicSetType publicSetType, String str, String str2, String str3, Map<String, EFO> map, String str4, String str5, String str6, List<ObservationSet> list) throws JsonProcessingException {
        ArrayList arrayList = new ArrayList(3);
        EvidenceSource evidenceSource = new EvidenceSource("clinvar", "2022.02", "2022-02");
        VariantClassification variantClassification = getVariantClassification(Arrays.asList(str5.split("[,/;]")));
        arrayList.add(new Property((String) null, CLINICAL_SIGNIFICANCE_IN_SOURCE_FILE, str5));
        ConsistencyStatus consistencyStatus = getConsistencyStatus(str6);
        arrayList.add(new Property((String) null, REVIEW_STATUS_IN_SOURCE_FILE, str6));
        if (str2 != null) {
            arrayList.add(new Property((String) null, GENOTYPESET, str2));
        }
        String acc = publicSetType.getReferenceClinVarAssertion().getMeasureSet().getAcc();
        if (StringUtils.isNotEmpty(acc)) {
            arrayList.add(new Property("VCV_ID", "VCV ID", acc));
        }
        if (str3 != null) {
            arrayList.add(new Property("HAPLOTYPE", "Haplotype", str3));
        }
        List<GenomicFeature> genomicFeature = getGenomicFeature(publicSetType, str);
        List<EvidenceSubmission> submissionList = getSubmissionList(publicSetType);
        List<String> arrayList2 = new ArrayList();
        HashSet hashSet = new HashSet(list.size());
        for (ObservationSet observationSet : list) {
            hashSet.add(observationSet.getSample().getOrigin());
            arrayList2 = addBibliographyFromObservationSet(arrayList2, observationSet);
        }
        List<AlleleOrigin> alleleOriginList = getAlleleOriginList(new ArrayList(hashSet));
        variantAnnotation.getTraitAssociation().add(new EvidenceEntry(evidenceSource, submissionList, (SomaticInformation) null, "https://www.ncbi.nlm.nih.gov/clinvar/" + str4, str4, (String) null, (alleleOriginList == null || alleleOriginList.isEmpty()) ? null : alleleOriginList, getHeritableTrait(publicSetType, map, arrayList), genomicFeature, variantClassification, (EvidenceImpact) null, (Confidence) null, consistencyStatus, EthnicCategory.Z, (Penetrance) null, (Boolean) null, (String) null, arrayList, arrayList2));
    }

    private List<EvidenceSubmission> getSubmissionList(PublicSetType publicSetType) {
        ArrayList arrayList = new ArrayList(publicSetType.getClinVarAssertion().size());
        for (MeasureTraitType measureTraitType : publicSetType.getClinVarAssertion()) {
            arrayList.add(new EvidenceSubmission(measureTraitType.getClinVarSubmissionID().getSubmitter(), (measureTraitType.getClinicalSignificance() == null || measureTraitType.getClinicalSignificance().getDateLastEvaluated() == null) ? String.format("%04d", Integer.valueOf(measureTraitType.getClinVarAccession().getDateUpdated().getYear())) + String.format("%02d", Integer.valueOf(measureTraitType.getClinVarAccession().getDateUpdated().getMonth())) + String.format("%02d", Integer.valueOf(measureTraitType.getClinVarAccession().getDateUpdated().getDay())) : String.format("%04d", Integer.valueOf(measureTraitType.getClinicalSignificance().getDateLastEvaluated().getYear())) + String.format("%02d", Integer.valueOf(measureTraitType.getClinicalSignificance().getDateLastEvaluated().getMonth())) + String.format("%02d", Integer.valueOf(measureTraitType.getClinicalSignificance().getDateLastEvaluated().getDay())), (String) null));
        }
        return arrayList;
    }

    private ModeOfInheritance getInheritanceModel(List<ReferenceAssertionType.AttributeSet> list, Map<String, String> map) throws JsonProcessingException {
        HashSet hashSet = new HashSet();
        if (list != null) {
            for (ReferenceAssertionType.AttributeSet attributeSet : list) {
                if (attributeSet.getAttribute().getType() != null && attributeSet.getAttribute().getType().equalsIgnoreCase("modeofinheritance")) {
                    hashSet.add(attributeSet.getAttribute().getValue().toLowerCase());
                }
            }
        }
        if (hashSet.size() == 0) {
            return null;
        }
        if (hashSet.size() <= VARIATION_ALLELE_TYPE_COLUMN) {
            map.put(MODE_OF_INHERITANCE, hashSet.iterator().next());
            return getModeOfInheritance(hashSet.iterator().next());
        }
        map.put(MODE_OF_INHERITANCE, jsonObjectWriter.writeValueAsString(new ArrayList(hashSet)));
        this.numberMultipleInheritanceModels += VARIATION_ALLELE_TYPE_COLUMN;
        return solveModeOfInheritanceConflict(hashSet);
    }

    private ModeOfInheritance solveModeOfInheritanceConflict(Set<String> set) {
        logger.warn("Multiple inheritance models found for a variant: {}", String.join(",", new ArrayList(set)));
        Set set2 = (Set) set.stream().map(this::getModeOfInheritance).collect(Collectors.toSet());
        if (set2.size() == VARIATION_ALLELE_TYPE_COLUMN || (set2.size() == VARIATION_ALLELE_ALLELE_COLUMN && set2.contains(null))) {
            set2.remove(null);
            if (set2.isEmpty()) {
                logger.warn("No inheritance model selected");
                return null;
            }
            logger.warn("Selected inheritance model: {}", set2.iterator().next());
            return (ModeOfInheritance) set2.iterator().next();
        }
        set2.remove(null);
        set2.removeAll(DOMINANT_TERM_SET);
        if (set2.size() <= 0) {
            return null;
        }
        set2.removeAll(RECESSIVE_TERM_SET);
        if (set2.size() > 0) {
            logger.warn("No inheritance model selected, conflicting inheritance models found");
            return null;
        }
        logger.warn("Dominant and recessive models found, {} selected", ModeOfInheritance.monoallelic_and_biallelic.name());
        return ModeOfInheritance.monoallelic_and_biallelic;
    }

    private ModeOfInheritance getModeOfInheritance(String str) {
        if (VariantAnnotationUtils.MODEOFINHERITANCE_MAP.containsKey(str)) {
            return VariantAnnotationUtils.MODEOFINHERITANCE_MAP.get(str);
        }
        return null;
    }

    private List<GenomicFeature> getGenomicFeature(PublicSetType publicSetType, String str) {
        if (publicSetType.getReferenceClinVarAssertion().getMeasureSet() != null) {
            return getGenomicFeature(publicSetType.getReferenceClinVarAssertion().getMeasureSet());
        }
        if (publicSetType.getReferenceClinVarAssertion().getGenotypeSet() != null) {
            for (MeasureSetType measureSetType : publicSetType.getReferenceClinVarAssertion().getGenotypeSet().getMeasureSet()) {
                if (measureSetType.getMeasure() != null) {
                    for (MeasureType measureType : measureSetType.getMeasure()) {
                        if (measureType.getID() != null && new BigInteger(str).equals(measureType.getID())) {
                            return getGenomicFeature(measureSetType);
                        }
                    }
                }
            }
        }
        throw new RuntimeException("One of either MeasureSet or GenotypeSet attributes are required within publicSet.getReferenceClinVarAssertion(). Also, if GenotypeSet is present, at least one MeasureSet corresponding to each alleleId is required.Please check " + publicSetType.getReferenceClinVarAssertion().getClinVarAccession().getAcc());
    }

    private List<GenomicFeature> getGenomicFeature(MeasureSetType measureSetType) {
        HashSet hashSet = new HashSet();
        for (MeasureType measureType : measureSetType.getMeasure()) {
            if (measureType.getMeasureRelationship() != null) {
                for (MeasureType.MeasureRelationship measureRelationship : measureType.getMeasureRelationship()) {
                    if (measureRelationship.getSymbol() != null) {
                        for (SetElementSetType setElementSetType : measureRelationship.getSymbol()) {
                            if (setElementSetType.getElementValue() != null && setElementSetType.getElementValue().getValue() != null) {
                                hashSet.add(createGeneGenomicFeature(setElementSetType.getElementValue().getValue()));
                            }
                        }
                    }
                }
            }
        }
        return new ArrayList(hashSet);
    }

    private List<HeritableTrait> getHeritableTrait(PublicSetType publicSetType, Map<String, EFO> map, List<Property> list) throws JsonProcessingException {
        ArrayList arrayList = new ArrayList(publicSetType.getReferenceClinVarAssertion().getTraitSet().getTrait().size());
        ArrayList arrayList2 = new ArrayList(publicSetType.getReferenceClinVarAssertion().getTraitSet().getTrait().size());
        HashMap hashMap = new HashMap();
        ModeOfInheritance inheritanceModel = getInheritanceModel(publicSetType.getReferenceClinVarAssertion().getAttributeSet(), hashMap);
        for (TraitType traitType : publicSetType.getReferenceClinVarAssertion().getTraitSet().getTrait()) {
            String traitName = getTraitName(traitType, publicSetType);
            if (!StringUtils.isNotBlank(traitName)) {
                throw new IllegalArgumentException("ClinVar record found " + publicSetType.getReferenceClinVarAssertion().getClinVarAccession().getAcc() + " with no trait provided");
            }
            HashMap hashMap2 = new HashMap(hashMap);
            hashMap2.put(TRAIT, traitName);
            arrayList.add(new HeritableTrait(traitName, inheritanceModel));
            if (traitInheritanceModesPresent(traitType.getAttributeSet())) {
                throw new RuntimeException("ClinVar record found providing inheritance mode withint the trait. After ClinVar schema v53 inheritance mode is expected to be provided at the root of the ReferenceClinvarAssertion field.");
            }
            arrayList2.add(hashMap2);
        }
        if (arrayList.size() == 0) {
            logger.warn("Entry {}. No \"disease\" entry found among the traits", publicSetType.getReferenceClinVarAssertion().getClinVarAccession().getAcc());
            this.numberNoDiseaseTrait += VARIATION_ALLELE_TYPE_COLUMN;
        } else {
            list.add(new Property((String) null, MODE_OF_INHERITANCE, jsonObjectWriter.writeValueAsString(arrayList2)));
        }
        return arrayList;
    }

    private String getTraitName(TraitType traitType, PublicSetType publicSetType) {
        int i = VARIATION_ALLELE_VARIATION_COLUMN;
        while (i < traitType.getName().size() && !((SetElementSetType) traitType.getName().get(i)).getElementValue().getType().equalsIgnoreCase("preferred")) {
            i += VARIATION_ALLELE_TYPE_COLUMN;
        }
        if (i < traitType.getName().size()) {
            return ((SetElementSetType) traitType.getName().get(i)).getElementValue().getValue();
        }
        if (traitType.getName().size() <= 0) {
            throw new IllegalArgumentException("ClinVar record found " + publicSetType.getReferenceClinVarAssertion().getClinVarAccession().getAcc() + " with no trait provided");
        }
        logger.warn("ClinVar record found " + publicSetType.getReferenceClinVarAssertion().getClinVarAccession().getAcc() + " with no preferred trait provided. Arbitrarily selecting first one: {}", ((SetElementSetType) traitType.getName().get(VARIATION_ALLELE_VARIATION_COLUMN)).getElementValue().getValue());
        return ((SetElementSetType) traitType.getName().get(VARIATION_ALLELE_VARIATION_COLUMN)).getElementValue().getValue();
    }

    private boolean traitInheritanceModesPresent(List<TraitType.AttributeSet> list) {
        if (list == null) {
            return false;
        }
        for (TraitType.AttributeSet attributeSet : list) {
            if (attributeSet.getAttribute().getType() != null && attributeSet.getAttribute().getType().equalsIgnoreCase("modeofinheritance")) {
                return true;
            }
        }
        return false;
    }

    private List<String> addBibliographyFromObservationSet(List<String> list, ObservationSet observationSet) {
        Iterator it = observationSet.getObservedData().iterator();
        while (it.hasNext()) {
            Iterator it2 = ((ObservationSet.ObservedData) it.next()).getCitation().iterator();
            while (it2.hasNext()) {
                for (CitationType.ID id : ((CitationType) it2.next()).getID()) {
                    if (id.getSource().equalsIgnoreCase("pubmed")) {
                        if (list == null) {
                            list = new ArrayList();
                        }
                        list.add("PMID:" + id.getValue());
                    }
                }
            }
        }
        return list;
    }

    /* JADX WARN: Multi-variable type inference failed */
    /* JADX WARN: Type inference failed for: r0v102, types: [java.util.List] */
    /* JADX WARN: Type inference failed for: r9v0, types: [org.opencb.cellbase.lib.builders.clinical.variant.ClinVarIndexer] */
    private Map<String, List<AlleleLocationData>> parseVariantSummary(Map<String, EFO> map) throws IOException, RocksDBException {
        ArrayList arrayList;
        List list;
        logger.info("Loading AlleleID -> variation ID map...");
        Map<String, List<VariationData>> loadAlleleIdToVariationData = loadAlleleIdToVariationData();
        logger.info("Parsing {}...", this.clinvarSummaryFile);
        BufferedReader newBufferedReader = FileUtils.newBufferedReader(this.clinvarSummaryFile);
        ProgressLogger batchSize = new ProgressLogger("Parsed variant summary lines:", () -> {
            return EtlCommons.countFileLines(this.clinvarSummaryFile);
        }, 200).setBatchSize(10000);
        HashMap hashMap = new HashMap();
        HashMap hashMap2 = new HashMap();
        newBufferedReader.readLine();
        String readLine = newBufferedReader.readLine();
        while (true) {
            String str = readLine;
            if (str == null) {
                newBufferedReader.close();
                logger.info("{} compound variation records found.", Integer.valueOf(hashMap2.size()));
                logger.info("Indexing compound variation records");
                for (String str2 : hashMap2.keySet()) {
                    for (int i = VARIATION_ALLELE_VARIATION_COLUMN; i < ((List) hashMap2.get(str2)).size(); i += VARIATION_ALLELE_TYPE_COLUMN) {
                        String[] strArr = (String[]) ((List) hashMap2.get(str2)).get(i);
                        if (updateRocksDB(parseSequenceLocation(strArr), str2, strArr, getMateVariantStringByVariantSummaryRecord(i, (List) hashMap2.get(str2)), map)) {
                            this.numberIndexedRecords += VARIATION_ALLELE_TYPE_COLUMN;
                        }
                    }
                }
                return hashMap;
            }
            String[] split = str.split("\t");
            if (split[VARIANT_SUMMARY_ASSEMBLY_COLUMN].equalsIgnoreCase(this.assembly) && !EtlCommons.isMissing(split[VARIANT_SUMMARY_CHR_COLUMN]) && !EtlCommons.isMissing(split[VARIANT_SUMMARY_START_COLUMN]) && !EtlCommons.isMissing(split[20]) && !missingAllele(split[VARIANT_SUMMARY_REFERENCE_COLUMN]) && !missingAllele(split[VARIANT_SUMMARY_ALTERNATE_COLUMN]) && !split[VARIANT_SUMMARY_REFERENCE_COLUMN].equals(split[VARIANT_SUMMARY_ALTERNATE_COLUMN])) {
                ClinicalIndexer.SequenceLocation parseSequenceLocation = parseSequenceLocation(split);
                for (String str3 : new HashSet(Arrays.asList(split[11].split(";")))) {
                    if (hashMap.get(str3) == null) {
                        list = new ArrayList();
                        hashMap.put(str3, list);
                    } else {
                        list = (List) hashMap.get(str3);
                    }
                    if (EtlCommons.isMissing(split[VARIATION_ALLELE_VARIATION_COLUMN])) {
                        throw new RuntimeException("Allele id missing from variant_summary.txt. Aborting parsing. Line: " + str);
                    }
                    list.add(new AlleleLocationData(split[VARIATION_ALLELE_VARIATION_COLUMN], parseSequenceLocation));
                }
                if (!EtlCommons.isMissing(split[VARIATION_ALLELE_VARIATION_COLUMN])) {
                    List<VariationData> list2 = loadAlleleIdToVariationData.get(split[VARIATION_ALLELE_VARIATION_COLUMN]);
                    if (list2 != null) {
                        for (VariationData variationData : list2) {
                            if (!VARIANT.equals(variationData.getType())) {
                                if (hashMap2.containsKey(variationData.getId())) {
                                    arrayList = (List) hashMap2.get(variationData.getId());
                                } else {
                                    arrayList = new ArrayList(VARIATION_ALLELE_ALLELE_COLUMN);
                                    hashMap2.put(variationData.getId(), arrayList);
                                }
                                arrayList.add(split);
                            } else if (updateRocksDB(parseSequenceLocation, variationData.getId(), split, null, map)) {
                                this.numberIndexedRecords += VARIATION_ALLELE_TYPE_COLUMN;
                            }
                        }
                    } else {
                        logger.warn("No variation data found for allele ID {}. variant_summary line {}. This is probably due to lack of synchronisation between variant_summary and variation_allele files. ", split[VARIATION_ALLELE_VARIATION_COLUMN], str);
                    }
                }
                this.totalNumberRecords += VARIATION_ALLELE_TYPE_COLUMN;
            }
            batchSize.increment(1L);
            readLine = newBufferedReader.readLine();
        }
    }

    private ClinicalIndexer.SequenceLocation parseSequenceLocation(String[] strArr) {
        String str = strArr[VARIANT_SUMMARY_CHR_COLUMN];
        String str2 = strArr[VARIANT_SUMMARY_REFERENCE_COLUMN];
        String str3 = strArr[VARIANT_SUMMARY_ALTERNATE_COLUMN];
        Integer valueOf = Integer.valueOf(strArr[VARIANT_SUMMARY_START_COLUMN]);
        Integer valueOf2 = Integer.valueOf(strArr[20]);
        return (emptySequence(str2) && !emptySequence(str3) && valueOf2.intValue() == valueOf.intValue() + VARIATION_ALLELE_TYPE_COLUMN) ? new ClinicalIndexer.SequenceLocation(this, str, valueOf2.intValue(), valueOf.intValue(), str2, str3) : (str3.length() == VARIATION_ALLELE_TYPE_COLUMN && str2.length() > VARIATION_ALLELE_TYPE_COLUMN && str2.startsWith(str3)) ? new ClinicalIndexer.SequenceLocation(this, str, valueOf.intValue() - VARIATION_ALLELE_TYPE_COLUMN, valueOf2.intValue(), str2, str3) : new ClinicalIndexer.SequenceLocation(this, str, valueOf.intValue(), valueOf2.intValue(), str2, str3);
    }

    private boolean emptySequence(String str) {
        return "".equals(str) || "-".equals(str);
    }

    private boolean missingAllele(String str) {
        return str == null || str.replace("not specified", "").replace("NS", "").replace("NA", "").replace("na", "").replace("NULL", "").replace("null", "").replace(".", "").isEmpty();
    }

    private String getMateVariantStringByVariantSummaryRecord(int i, List<String[]> list) {
        StringBuilder sb = new StringBuilder();
        for (int i2 = VARIATION_ALLELE_VARIATION_COLUMN; i2 < list.size(); i2 += VARIATION_ALLELE_TYPE_COLUMN) {
            if (i2 != i) {
                String[] strArr = list.get(i2);
                List<String> normalisedVariantString = getNormalisedVariantString(strArr[VARIANT_SUMMARY_CHR_COLUMN], Integer.valueOf(strArr[VARIANT_SUMMARY_START_COLUMN]).intValue(), strArr[VARIANT_SUMMARY_REFERENCE_COLUMN], strArr[VARIANT_SUMMARY_ALTERNATE_COLUMN]);
                if (normalisedVariantString != null) {
                    for (String str : normalisedVariantString) {
                        if (sb.length() > 0) {
                            sb.append(",");
                        }
                        sb.append(str);
                    }
                }
            }
        }
        if (sb.length() == 0) {
            return null;
        }
        return sb.toString();
    }

    private Map<String, List<VariationData>> loadAlleleIdToVariationData() throws IOException {
        String str;
        HashMap hashMap = new HashMap();
        BufferedReader newBufferedReader = FileUtils.newBufferedReader(this.clinvarVariationAlleleFile);
        String readLine = newBufferedReader.readLine();
        while (true) {
            str = readLine;
            if (str == null || !str.startsWith("#")) {
                break;
            }
            readLine = newBufferedReader.readLine();
        }
        while (str != null) {
            String[] split = str.split("\t");
            List list = (List) hashMap.get(split[VARIATION_ALLELE_ALLELE_COLUMN]);
            if (list == null) {
                list = new ArrayList();
                hashMap.put(split[VARIATION_ALLELE_ALLELE_COLUMN], list);
            }
            list.add(new VariationData(split[VARIATION_ALLELE_VARIATION_COLUMN], split[VARIATION_ALLELE_TYPE_COLUMN]));
            str = newBufferedReader.readLine();
        }
        newBufferedReader.close();
        return hashMap;
    }

    private Map<String, EFO> loadEFOTerms() {
        if (this.clinvarEFOFile == null) {
            logger.warn("No EFO terms file present: EFO terms won't be added");
            return null;
        }
        logger.info("Loading EFO terms ...");
        HashMap hashMap = new HashMap();
        try {
            Stream<String> lines = Files.lines(this.clinvarEFOFile);
            Throwable th = VARIATION_ALLELE_VARIATION_COLUMN;
            try {
                try {
                    lines.forEach(str -> {
                        addEfoTermToMap(str, hashMap);
                    });
                    logger.info("Done");
                    if (lines != null) {
                        if (th != null) {
                            try {
                                lines.close();
                            } catch (Throwable th2) {
                                th.addSuppressed(th2);
                            }
                        } else {
                            lines.close();
                        }
                    }
                    return hashMap;
                } catch (Throwable th3) {
                    th = th3;
                    throw th3;
                }
            } finally {
            }
        } catch (IOException e) {
            logger.error("Error loading EFO file: " + e.getMessage());
            logger.error("EFO terms won't be added");
            return null;
        }
    }

    private void addEfoTermToMap(String str, Map<String, EFO> map) {
        String[] split = str.split("\t");
        map.put(split[VARIATION_ALLELE_VARIATION_COLUMN], new EFO(split[VARIATION_ALLELE_ALLELE_COLUMN], split[3], split[VARIATION_ALLELE_TYPE_COLUMN]));
    }

    private JAXBElement<ReleaseType> unmarshalXML(Path path) throws JAXBException, IOException {
        return (JAXBElement) ClinvarParser.loadXMLInfo(path.toString(), CLINVAR_CONTEXT);
    }
}
