package org.opencb.cellbase.app.transform.clinical.variant;

import com.fasterxml.jackson.core.JsonProcessingException;
import java.io.BufferedReader;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import javax.xml.bind.JAXBElement;
import javax.xml.bind.JAXBException;
import org.opencb.biodata.formats.variant.clinvar.ClinvarParser;
import org.opencb.biodata.formats.variant.clinvar.v24jaxb.CitationType;
import org.opencb.biodata.formats.variant.clinvar.v24jaxb.MeasureSetType;
import org.opencb.biodata.formats.variant.clinvar.v24jaxb.MeasureTraitType;
import org.opencb.biodata.formats.variant.clinvar.v24jaxb.ObservationSet;
import org.opencb.biodata.formats.variant.clinvar.v24jaxb.PublicSetType;
import org.opencb.biodata.formats.variant.clinvar.v24jaxb.ReleaseType;
import org.opencb.biodata.formats.variant.clinvar.v24jaxb.SetElementSetType;
import org.opencb.biodata.formats.variant.clinvar.v24jaxb.TraitType;
import org.opencb.biodata.models.variant.avro.AlleleOrigin;
import org.opencb.biodata.models.variant.avro.ClinicalSignificance;
import org.opencb.biodata.models.variant.avro.Confidence;
import org.opencb.biodata.models.variant.avro.ConsistencyStatus;
import org.opencb.biodata.models.variant.avro.DrugResponseClassification;
import org.opencb.biodata.models.variant.avro.EthnicCategory;
import org.opencb.biodata.models.variant.avro.EvidenceEntry;
import org.opencb.biodata.models.variant.avro.EvidenceImpact;
import org.opencb.biodata.models.variant.avro.EvidenceSource;
import org.opencb.biodata.models.variant.avro.EvidenceSubmission;
import org.opencb.biodata.models.variant.avro.GenomicFeature;
import org.opencb.biodata.models.variant.avro.HeritableTrait;
import org.opencb.biodata.models.variant.avro.ModeOfInheritance;
import org.opencb.biodata.models.variant.avro.Penetrance;
import org.opencb.biodata.models.variant.avro.Property;
import org.opencb.biodata.models.variant.avro.SomaticInformation;
import org.opencb.biodata.models.variant.avro.TraitAssociation;
import org.opencb.biodata.models.variant.avro.VariantClassification;
import org.opencb.cellbase.app.cli.EtlCommons;
import org.opencb.cellbase.app.transform.clinical.variant.ClinicalIndexer;
import org.opencb.cellbase.core.variant.annotation.VariantAnnotationUtils;
import org.opencb.commons.ProgressLogger;
import org.opencb.commons.utils.FileUtils;
import org.rocksdb.RocksDB;
import org.rocksdb.RocksDBException;

/* loaded from: input_file:org/opencb/cellbase/app/transform/clinical/variant/ClinVarIndexer.class */
public class ClinVarIndexer extends ClinicalIndexer {
    private static final String CLINVAR_NAME = "clinvar";
    private static final int VARIANT_SUMMARY_CLINSIG_COLUMN = 6;
    private static final int VARIANT_SUMMARY_GENE_COLUMN = 4;
    private static final int VARIANT_SUMMARY_REVIEW_COLUMN = 24;
    private static final int VARIANT_SUMMARY_ORIGIN_COLUMN = 14;
    private static final int VARIANT_SUMMARY_PHENOTYPE_COLUMN = 13;
    private static final int VARIATION_ALLELE_ALLELE_COLUMN = 2;
    private static final int VARIATION_ALLELE_VARIATION_COLUMN = 0;
    private static final String SOMATIC = "somatic";
    private static final String CLINICAL_SIGNIFICANCE_IN_SOURCE_FILE = "ClinicalSignificance_in_source_file";
    private static final String REVIEW_STATUS_IN_SOURCE_FILE = "ReviewStatus_in_source_file";
    private static final String TRAIT = "trait";
    private static final String MODE_OF_INHERITANCE = "modeOfInheritance";
    private final Path clinvarXMLFile;
    private final Path clinvarSummaryFile;
    private final Path clinvarVariationAlleleFile;
    private final Path clinvarEFOFile;
    private final String assembly;
    private int numberSomaticRecords = 0;
    private int numberGermlineRecords = 0;
    private int numberNoDiseaseTrait = 0;
    private int numberMultipleInheritanceModels = 0;
    private static final Set<ModeOfInheritance> DOMINANT_TERM_SET = new HashSet(Arrays.asList(ModeOfInheritance.monoallelic, ModeOfInheritance.monoallelic_maternally_imprinted, ModeOfInheritance.monoallelic_not_imprinted, ModeOfInheritance.monoallelic_paternally_imprinted));
    private static final Set<ModeOfInheritance> RECESSIVE_TERM_SET = new HashSet(Arrays.asList(ModeOfInheritance.biallelic));

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:org/opencb/cellbase/app/transform/clinical/variant/ClinVarIndexer$EFO.class */
    public class EFO {
        private final String id;
        private final String name;
        private final String url;

        EFO(String str, String str2, String str3) {
            this.id = str;
            this.name = str2;
            this.url = str3;
        }
    }

    public ClinVarIndexer(Path path, Path path2, Path path3, Path path4, String str, RocksDB rocksDB) {
        this.rdb = rocksDB;
        this.clinvarXMLFile = path;
        this.clinvarSummaryFile = path2;
        this.clinvarVariationAlleleFile = path3;
        this.clinvarEFOFile = path4;
        this.assembly = str;
    }

    public void index() throws RocksDBException {
        try {
            Map<String, EFO> loadEFOTerms = loadEFOTerms();
            Map<String, ClinicalIndexer.SequenceLocation> parseVariantSummary = parseVariantSummary(loadEFOTerms);
            logger.info("Unmarshalling clinvar file " + this.clinvarXMLFile + " ...");
            JAXBElement<ReleaseType> unmarshalXML = unmarshalXML(this.clinvarXMLFile);
            logger.info("Done");
            logger.info("Serializing clinvar records that have Sequence Location for Assembly " + this.assembly + " ...");
            ProgressLogger batchSize = new ProgressLogger("Parsed XML records:", ((ReleaseType) unmarshalXML.getValue()).getClinVarSet().size(), 200).setBatchSize(10000);
            for (PublicSetType publicSetType : ((ReleaseType) unmarshalXML.getValue()).getClinVarSet()) {
                ClinicalIndexer.SequenceLocation sequenceLocation = parseVariantSummary.get(publicSetType.getReferenceClinVarAssertion().getClinVarAccession().getAcc());
                if (sequenceLocation != null) {
                    updateRocksDB(sequenceLocation, publicSetType, loadEFOTerms);
                    this.numberIndexedRecords++;
                }
                batchSize.increment(1L);
                this.totalNumberRecords++;
            }
            logger.info("Done");
            printSummary();
        } catch (IOException e) {
            logger.error("Error indexing clinvar Xml file: " + e.getMessage());
        } catch (JAXBException e2) {
            logger.error("Error unmarshalling clinvar Xml file " + this.clinvarXMLFile + ": " + e2.getMessage());
        } catch (RocksDBException e3) {
            logger.error("Error reading/writing from/to the RocksDB index while indexing ClinVar");
            throw e3;
        }
    }

    private void printSummary() {
        logger.info("Total number of parsed ClinVar records: {}", Integer.valueOf(this.totalNumberRecords));
        logger.info("Number of indexed Clinvar records: {}", Integer.valueOf(this.numberIndexedRecords));
        logger.info("Number of new variants in ClinVar not previously indexed in RocksDB: {}", Integer.valueOf(this.numberNewVariants));
        logger.info("Number of updated variants during ClinVar indexing: {}", Integer.valueOf(this.numberVariantUpdates));
        logger.info("Number of ClinVar germline variants: {}", Integer.valueOf(this.numberGermlineRecords));
        logger.info("Number of ClinVar somatic variants: {}", Integer.valueOf(this.numberSomaticRecords));
        logger.info("Number of ClinVar records without a \"disease\" trait: {}", Integer.valueOf(this.numberNoDiseaseTrait));
        logger.info("Number of ClinVar records with multiple inheritance models: {}", Integer.valueOf(this.numberMultipleInheritanceModels));
    }

    private void updateRocksDB(ClinicalIndexer.SequenceLocation sequenceLocation, String str, String[] strArr, Map<String, EFO> map) throws RocksDBException, IOException {
        byte[] bytes = VariantAnnotationUtils.buildVariantId(sequenceLocation.getChromosome(), sequenceLocation.getStart(), sequenceLocation.getReference(), sequenceLocation.getAlternate()).getBytes();
        List<EvidenceEntry> evidenceEntryList = getEvidenceEntryList(bytes);
        addNewEntries(evidenceEntryList, str, strArr, map);
        this.rdb.put(bytes, jsonObjectWriter.writeValueAsBytes(evidenceEntryList));
    }

    private void updateRocksDB(ClinicalIndexer.SequenceLocation sequenceLocation, PublicSetType publicSetType, Map<String, EFO> map) throws RocksDBException, IOException {
        byte[] bytes = VariantAnnotationUtils.buildVariantId(sequenceLocation.getChromosome(), sequenceLocation.getStart(), sequenceLocation.getReference(), sequenceLocation.getAlternate()).getBytes();
        List<EvidenceEntry> evidenceEntryList = getEvidenceEntryList(bytes);
        addNewEntries(evidenceEntryList, publicSetType, map);
        this.rdb.put(bytes, jsonObjectWriter.writeValueAsBytes(evidenceEntryList));
    }

    private void addNewEntries(List<EvidenceEntry> list, String str, String[] strArr, Map<String, EFO> map) {
        EvidenceSource evidenceSource = new EvidenceSource("clinvar", (String) null, (String) null);
        List<AlleleOrigin> alleleOriginList = EtlCommons.isMissing(strArr[VARIANT_SUMMARY_ORIGIN_COLUMN]) ? null : getAlleleOriginList(new ArrayList(new HashSet(Arrays.asList(strArr[VARIANT_SUMMARY_ORIGIN_COLUMN].toLowerCase().split(";")))));
        List emptyList = Collections.emptyList();
        if (!EtlCommons.isMissing(strArr[VARIANT_SUMMARY_PHENOTYPE_COLUMN])) {
            emptyList = (List) new HashSet(Arrays.asList(strArr[VARIANT_SUMMARY_PHENOTYPE_COLUMN].toLowerCase().split(";"))).stream().map(str2 -> {
                return new HeritableTrait(str2, (ModeOfInheritance) null);
            }).collect(Collectors.toList());
        }
        List emptyList2 = Collections.emptyList();
        if (!EtlCommons.isMissing(strArr[VARIANT_SUMMARY_GENE_COLUMN])) {
            String[] split = strArr[VARIANT_SUMMARY_GENE_COLUMN].split(",");
            emptyList2 = new ArrayList(split.length);
            for (String str3 : split) {
                emptyList2.add(createGeneGenomicFeature(str3));
            }
        }
        ArrayList arrayList = new ArrayList(2);
        VariantClassification variantClassification = null;
        if (!EtlCommons.isMissing(strArr[VARIANT_SUMMARY_CLINSIG_COLUMN])) {
            variantClassification = getVariantClassification(strArr[VARIANT_SUMMARY_CLINSIG_COLUMN]);
            arrayList.add(new Property((String) null, CLINICAL_SIGNIFICANCE_IN_SOURCE_FILE, strArr[VARIANT_SUMMARY_CLINSIG_COLUMN]));
        }
        ConsistencyStatus consistencyStatus = null;
        if (!EtlCommons.isMissing(strArr[VARIANT_SUMMARY_REVIEW_COLUMN])) {
            consistencyStatus = getConsistencyStatus(strArr[VARIANT_SUMMARY_REVIEW_COLUMN]);
            arrayList.add(new Property((String) null, REVIEW_STATUS_IN_SOURCE_FILE, strArr[VARIANT_SUMMARY_REVIEW_COLUMN]));
        }
        list.add(new EvidenceEntry(evidenceSource, Collections.emptyList(), (SomaticInformation) null, "https://www.ncbi.nlm.nih.gov/clinvar/variation/" + str, str, (String) null, (alleleOriginList == null || alleleOriginList.isEmpty()) ? null : alleleOriginList, emptyList, emptyList2, variantClassification, (EvidenceImpact) null, (Confidence) null, consistencyStatus, EthnicCategory.Z, (Penetrance) null, (Boolean) null, (String) null, arrayList, Collections.emptyList()));
    }

    private ConsistencyStatus getConsistencyStatus(String str) {
        for (String str2 : str.split("[,/;]")) {
            String trim = str2.toLowerCase().trim();
            if (VariantAnnotationUtils.CLINVAR_REVIEW_TO_CONSISTENCY_STATUS.containsKey(trim)) {
                return (ConsistencyStatus) VariantAnnotationUtils.CLINVAR_REVIEW_TO_CONSISTENCY_STATUS.get(trim);
            }
        }
        return null;
    }

    private VariantClassification getVariantClassification(String str) {
        VariantClassification variantClassification = new VariantClassification();
        for (String str2 : str.split("[,/;]")) {
            String trim = str2.toLowerCase().trim();
            if (VariantAnnotationUtils.CLINVAR_CLINSIG_TO_ACMG.containsKey(trim)) {
                if (variantClassification.getClinicalSignificance() == null) {
                    variantClassification.setClinicalSignificance((ClinicalSignificance) VariantAnnotationUtils.CLINVAR_CLINSIG_TO_ACMG.get(trim));
                } else if (isBenign((ClinicalSignificance) VariantAnnotationUtils.CLINVAR_CLINSIG_TO_ACMG.get(trim)) && isPathogenic(variantClassification.getClinicalSignificance())) {
                    logger.warn("Benign and Pathogenic clinical significances found for the same record");
                    logger.warn("Will set uncertain_significance instead");
                    variantClassification.setClinicalSignificance(ClinicalSignificance.uncertain_significance);
                }
            } else if (VariantAnnotationUtils.CLINVAR_CLINSIG_TO_TRAIT_ASSOCIATION.containsKey(trim)) {
                variantClassification.setTraitAssociation((TraitAssociation) VariantAnnotationUtils.CLINVAR_CLINSIG_TO_TRAIT_ASSOCIATION.get(trim));
            } else if (VariantAnnotationUtils.CLINVAR_CLINSIG_TO_DRUG_RESPONSE.containsKey(trim)) {
                variantClassification.setDrugResponseClassification((DrugResponseClassification) VariantAnnotationUtils.CLINVAR_CLINSIG_TO_DRUG_RESPONSE.get(trim));
            } else {
                logger.debug("No mapping found for referenceClinVarAssertion.clinicalSignificance {}", trim);
                logger.debug("No value will be set at EvidenceEntry.variantClassification for this term");
            }
        }
        return variantClassification;
    }

    private boolean isPathogenic(ClinicalSignificance clinicalSignificance) {
        return ClinicalSignificance.pathogenic.equals(clinicalSignificance) || ClinicalSignificance.likely_pathogenic.equals(clinicalSignificance);
    }

    private boolean isBenign(ClinicalSignificance clinicalSignificance) {
        return ClinicalSignificance.benign.equals(clinicalSignificance) || ClinicalSignificance.likely_benign.equals(clinicalSignificance);
    }

    private void addNewEntries(List<EvidenceEntry> list, PublicSetType publicSetType, Map<String, EFO> map) throws JsonProcessingException {
        ArrayList arrayList = new ArrayList(3);
        EvidenceSource evidenceSource = new EvidenceSource("clinvar", (String) null, (String) null);
        String acc = publicSetType.getReferenceClinVarAssertion().getClinVarAccession().getAcc();
        VariantClassification variantClassification = getVariantClassification(publicSetType.getReferenceClinVarAssertion().getClinicalSignificance().getDescription());
        arrayList.add(new Property((String) null, CLINICAL_SIGNIFICANCE_IN_SOURCE_FILE, publicSetType.getReferenceClinVarAssertion().getClinicalSignificance().getDescription()));
        ConsistencyStatus consistencyStatus = getConsistencyStatus(publicSetType.getReferenceClinVarAssertion().getClinicalSignificance().getReviewStatus().name());
        arrayList.add(new Property((String) null, REVIEW_STATUS_IN_SOURCE_FILE, publicSetType.getReferenceClinVarAssertion().getClinicalSignificance().getReviewStatus().name()));
        List<GenomicFeature> genomicFeature = getGenomicFeature(publicSetType);
        List<EvidenceSubmission> submissionList = getSubmissionList(publicSetType);
        List<String> arrayList2 = new ArrayList();
        HashSet hashSet = new HashSet(publicSetType.getReferenceClinVarAssertion().getObservedIn().size());
        for (ObservationSet observationSet : publicSetType.getReferenceClinVarAssertion().getObservedIn()) {
            hashSet.add(observationSet.getSample().getOrigin());
            arrayList2 = addBibliographyFromObservationSet(arrayList2, observationSet);
        }
        List<AlleleOrigin> alleleOriginList = getAlleleOriginList(new ArrayList(hashSet));
        list.add(new EvidenceEntry(evidenceSource, submissionList, (SomaticInformation) null, "https://www.ncbi.nlm.nih.gov/clinvar/" + acc, acc, (String) null, (alleleOriginList == null || alleleOriginList.isEmpty()) ? null : alleleOriginList, getHeritableTrait(publicSetType, map, arrayList), genomicFeature, variantClassification, (EvidenceImpact) null, (Confidence) null, consistencyStatus, EthnicCategory.Z, (Penetrance) null, (Boolean) null, (String) null, arrayList, arrayList2));
    }

    private List<EvidenceSubmission> getSubmissionList(PublicSetType publicSetType) {
        ArrayList arrayList = new ArrayList(publicSetType.getClinVarAssertion().size());
        for (MeasureTraitType measureTraitType : publicSetType.getClinVarAssertion()) {
            arrayList.add(new EvidenceSubmission(measureTraitType.getClinVarSubmissionID().getSubmitter(), (measureTraitType.getClinicalSignificance() == null || measureTraitType.getClinicalSignificance().getDateLastEvaluated() == null) ? String.format("%04d", Integer.valueOf(measureTraitType.getClinVarAccession().getDateUpdated().getYear())) + String.format("%02d", Integer.valueOf(measureTraitType.getClinVarAccession().getDateUpdated().getMonth())) + String.format("%02d", Integer.valueOf(measureTraitType.getClinVarAccession().getDateUpdated().getDay())) : String.format("%04d", Integer.valueOf(measureTraitType.getClinicalSignificance().getDateLastEvaluated().getYear())) + String.format("%02d", Integer.valueOf(measureTraitType.getClinicalSignificance().getDateLastEvaluated().getMonth())) + String.format("%02d", Integer.valueOf(measureTraitType.getClinicalSignificance().getDateLastEvaluated().getDay())), (String) null));
        }
        return arrayList;
    }

    private String getPreferredTraitName(PublicSetType publicSetType, Map<String, EFO> map) {
        Iterator it = publicSetType.getReferenceClinVarAssertion().getTraitSet().getTrait().iterator();
        while (it.hasNext()) {
            for (SetElementSetType setElementSetType : ((TraitType) it.next()).getName()) {
                if (setElementSetType.getElementValue().getType().equalsIgnoreCase("preferred")) {
                    return map.get(setElementSetType.getElementValue().getValue()) != null ? map.get(setElementSetType.getElementValue().getValue()).name : setElementSetType.getElementValue().getValue();
                }
            }
        }
        logger.warn("Entry {}. No \"disease\" entry found among the traits", publicSetType.getReferenceClinVarAssertion().getClinVarAccession().getAcc());
        this.numberNoDiseaseTrait++;
        return null;
    }

    private ModeOfInheritance getInheritanceModel(TraitType traitType, Map<String, String> map) throws JsonProcessingException {
        HashSet hashSet = new HashSet();
        if (traitType.getAttributeSet() != null) {
            for (TraitType.AttributeSet attributeSet : traitType.getAttributeSet()) {
                if (attributeSet.getAttribute().getType() != null && attributeSet.getAttribute().getType().equalsIgnoreCase("modeofinheritance")) {
                    hashSet.add(attributeSet.getAttribute().getValue().toLowerCase());
                }
            }
        }
        if (hashSet.size() == 0) {
            return null;
        }
        if (hashSet.size() <= 1) {
            map.put(MODE_OF_INHERITANCE, hashSet.iterator().next());
            return getModeOfInheritance(hashSet.iterator().next());
        }
        map.put(MODE_OF_INHERITANCE, jsonObjectWriter.writeValueAsString(new ArrayList(hashSet)));
        this.numberMultipleInheritanceModels++;
        return solveModeOfInheritanceConflict(hashSet);
    }

    private ModeOfInheritance solveModeOfInheritanceConflict(Set<String> set) {
        logger.warn("Multiple inheritance models found for a variant: {}", String.join(",", new ArrayList(set)));
        Set set2 = (Set) set.stream().map(str -> {
            return getModeOfInheritance(str);
        }).collect(Collectors.toSet());
        if (set2.size() == 1 || (set2.size() == 2 && set2.contains(null))) {
            set2.remove(null);
            logger.warn("Selected inheritance model: {}", set2.iterator().next());
            return (ModeOfInheritance) set2.iterator().next();
        }
        set2.remove(null);
        set2.removeAll(DOMINANT_TERM_SET);
        if (set2.size() <= 0) {
            return null;
        }
        set2.removeAll(RECESSIVE_TERM_SET);
        if (set2.size() > 0) {
            logger.warn("No inheritance model selected, conflicting inheritance models found");
            return null;
        }
        logger.warn("Dominant and recessive models found, {} selected", ModeOfInheritance.monoallelic_and_biallelic.name());
        return ModeOfInheritance.monoallelic_and_biallelic;
    }

    private ModeOfInheritance getModeOfInheritance(String str) {
        if (VariantAnnotationUtils.MODEOFINHERITANCE_MAP.containsKey(str)) {
            return (ModeOfInheritance) VariantAnnotationUtils.MODEOFINHERITANCE_MAP.get(str);
        }
        return null;
    }

    private List<GenomicFeature> getGenomicFeature(PublicSetType publicSetType) {
        HashSet hashSet = new HashSet();
        for (MeasureSetType.Measure measure : publicSetType.getReferenceClinVarAssertion().getMeasureSet().getMeasure()) {
            if (measure.getMeasureRelationship() != null) {
                for (MeasureSetType.Measure.MeasureRelationship measureRelationship : measure.getMeasureRelationship()) {
                    if (measureRelationship.getSymbol() != null) {
                        for (SetElementSetType setElementSetType : measureRelationship.getSymbol()) {
                            if (setElementSetType.getElementValue() != null && setElementSetType.getElementValue().getValue() != null) {
                                hashSet.add(createGeneGenomicFeature(setElementSetType.getElementValue().getValue()));
                            }
                        }
                    }
                }
            }
        }
        return new ArrayList(hashSet);
    }

    private List<HeritableTrait> getHeritableTrait(PublicSetType publicSetType, Map<String, EFO> map, List<Property> list) throws JsonProcessingException {
        ArrayList arrayList = new ArrayList(publicSetType.getReferenceClinVarAssertion().getTraitSet().getTrait().size());
        ArrayList arrayList2 = new ArrayList(publicSetType.getReferenceClinVarAssertion().getTraitSet().getTrait().size());
        for (TraitType traitType : publicSetType.getReferenceClinVarAssertion().getTraitSet().getTrait()) {
            int i = 0;
            while (i < traitType.getName().size() && !((SetElementSetType) traitType.getName().get(i)).getElementValue().getType().equalsIgnoreCase("preferred")) {
                i++;
            }
            if (i >= traitType.getName().size()) {
                throw new IllegalArgumentException("ClinVar record found " + publicSetType.getReferenceClinVarAssertion().getClinVarAccession().getAcc() + " with no preferred trait provided");
            }
            HashMap hashMap = new HashMap();
            hashMap.put(TRAIT, ((SetElementSetType) traitType.getName().get(i)).getElementValue().getValue());
            arrayList.add(new HeritableTrait(((SetElementSetType) traitType.getName().get(i)).getElementValue().getValue(), getInheritanceModel(traitType, hashMap)));
            arrayList2.add(hashMap);
        }
        if (arrayList.size() == 0) {
            logger.warn("Entry {}. No \"disease\" entry found among the traits", publicSetType.getReferenceClinVarAssertion().getClinVarAccession().getAcc());
            this.numberNoDiseaseTrait++;
        } else {
            list.add(new Property((String) null, MODE_OF_INHERITANCE, jsonObjectWriter.writeValueAsString(arrayList2)));
        }
        return arrayList;
    }

    private List<String> addBibliographyFromObservationSet(List<String> list, ObservationSet observationSet) {
        Iterator it = observationSet.getObservedData().iterator();
        while (it.hasNext()) {
            Iterator it2 = ((ObservationSet.ObservedData) it.next()).getCitation().iterator();
            while (it2.hasNext()) {
                for (CitationType.ID id : ((CitationType) it2.next()).getID()) {
                    if (id.getSource().equalsIgnoreCase("pubmed")) {
                        if (list == null) {
                            list = new ArrayList();
                        }
                        list.add("PMID:" + id.getValue());
                    }
                }
            }
        }
        return list;
    }

    private Map<String, ClinicalIndexer.SequenceLocation> parseVariantSummary(Map<String, EFO> map) throws IOException, RocksDBException {
        logger.info("Loading AlleleID -> variation ID map...");
        Map<String, String> loadAlleleIdToVariationId = loadAlleleIdToVariationId();
        logger.info("Parsing {}...", this.clinvarSummaryFile);
        BufferedReader newBufferedReader = FileUtils.newBufferedReader(this.clinvarSummaryFile);
        ProgressLogger batchSize = new ProgressLogger("Parsed variant summary lines:", () -> {
            return EtlCommons.countFileLines(this.clinvarSummaryFile);
        }, 200).setBatchSize(10000);
        HashMap hashMap = new HashMap();
        newBufferedReader.readLine();
        String readLine = newBufferedReader.readLine();
        while (true) {
            String str = readLine;
            if (str == null) {
                newBufferedReader.close();
                return hashMap;
            }
            String[] split = str.split("\t");
            if (split[16].equals(this.assembly)) {
                ClinicalIndexer.SequenceLocation sequenceLocation = new ClinicalIndexer.SequenceLocation(this, split[18], Integer.valueOf(split[19]).intValue(), Integer.valueOf(split[20]).intValue(), split[21], split[22]);
                for (String str2 : split[11].split(";")) {
                    hashMap.put(str2, sequenceLocation);
                }
                if (!EtlCommons.isMissing(split[0]) && loadAlleleIdToVariationId.containsKey(split[0])) {
                    updateRocksDB(sequenceLocation, loadAlleleIdToVariationId.get(split[0]), split, map);
                    this.numberIndexedRecords++;
                }
                this.totalNumberRecords++;
            }
            batchSize.increment(1L);
            readLine = newBufferedReader.readLine();
        }
    }

    private Map<String, String> loadAlleleIdToVariationId() throws IOException {
        String str;
        HashMap hashMap = new HashMap();
        BufferedReader newBufferedReader = FileUtils.newBufferedReader(this.clinvarVariationAlleleFile);
        String readLine = newBufferedReader.readLine();
        while (true) {
            str = readLine;
            if (str == null || !str.startsWith("#")) {
                break;
            }
            readLine = newBufferedReader.readLine();
        }
        while (str != null) {
            String[] split = str.split("\t");
            hashMap.put(split[2], split[0]);
            str = newBufferedReader.readLine();
        }
        newBufferedReader.close();
        return hashMap;
    }

    private Map<String, EFO> loadEFOTerms() {
        if (this.clinvarEFOFile == null) {
            logger.warn("No EFO terms file present: EFO terms won't be added");
            return null;
        }
        logger.info("Loading EFO terms ...");
        HashMap hashMap = new HashMap();
        try {
            Stream<String> lines = Files.lines(this.clinvarEFOFile);
            Throwable th = null;
            try {
                try {
                    lines.forEach(str -> {
                        addEfoTermToMap(str, hashMap);
                    });
                    logger.info("Done");
                    if (lines != null) {
                        if (0 != 0) {
                            try {
                                lines.close();
                            } catch (Throwable th2) {
                                th.addSuppressed(th2);
                            }
                        } else {
                            lines.close();
                        }
                    }
                    return hashMap;
                } catch (Throwable th3) {
                    th = th3;
                    throw th3;
                }
            } finally {
            }
        } catch (IOException e) {
            logger.error("Error loading EFO file: " + e.getMessage());
            logger.error("EFO terms won't be added");
            return null;
        }
    }

    private void addEfoTermToMap(String str, Map<String, EFO> map) {
        String[] split = str.split("\t");
        map.put(split[0], new EFO(split[2], split[3], split[1]));
    }

    private JAXBElement<ReleaseType> unmarshalXML(Path path) throws JAXBException, IOException {
        return (JAXBElement) ClinvarParser.loadXMLInfo(path.toString(), "org.opencb.biodata.formats.variant.clinvar.v24jaxb");
    }
}
