package org.opencb.cellbase.app.transform.clinical.variant;

import java.io.BufferedReader;
import java.io.IOException;
import java.nio.file.Path;
import java.text.NumberFormat;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.opencb.biodata.models.variant.avro.Confidence;
import org.opencb.biodata.models.variant.avro.ConsistencyStatus;
import org.opencb.biodata.models.variant.avro.EthnicCategory;
import org.opencb.biodata.models.variant.avro.EvidenceEntry;
import org.opencb.biodata.models.variant.avro.EvidenceImpact;
import org.opencb.biodata.models.variant.avro.EvidenceSource;
import org.opencb.biodata.models.variant.avro.GenomicFeature;
import org.opencb.biodata.models.variant.avro.Penetrance;
import org.opencb.biodata.models.variant.avro.Property;
import org.opencb.biodata.models.variant.avro.SomaticInformation;
import org.opencb.biodata.models.variant.avro.VariantClassification;
import org.opencb.cellbase.app.cli.EtlCommons;
import org.opencb.cellbase.app.transform.clinical.variant.ClinicalIndexer;
import org.opencb.cellbase.app.transform.variation.VariationFile;
import org.opencb.cellbase.core.variant.annotation.VariantAnnotationUtils;
import org.opencb.commons.ProgressLogger;
import org.opencb.commons.utils.FileUtils;
import org.rocksdb.RocksDB;
import org.rocksdb.RocksDBException;

/* loaded from: input_file:org/opencb/cellbase/app/transform/clinical/variant/CosmicIndexer.class */
public class CosmicIndexer extends ClinicalIndexer {
    private static final String COSMIC_NAME = "cosmic";
    private static final int PRIMARY_SITE_COLUMN = 7;
    private static final int SITE_SUBTYPE_COLUMN = 8;
    private static final int PRIMARY_HISTOLOGY_COLUMN = 11;
    private static final int HISTOLOGY_SUBTYPE_COLUMN = 12;
    private static final int ID_COLUMN = 16;
    private static final String MUTATION_SOMATIC_STATUS_IN_SOURCE_FILE = "mutationSomaticStatus_in_source_file";
    private static final int GENE_NAMES_COLUMN = 0;
    private static final int HGNC_COLUMN = 3;
    private final Path cosmicFile;
    private final int mutationSomaticStatusColumn;
    private final int pubmedPMIDColumn;
    private final int sampleSourceColumn;
    private final int tumourOriginColumn;
    private Pattern mutationGRCh37GenomePositionPattern;
    private Pattern snvPattern;
    private static final String CHROMOSOME = "CHR";
    private static final String START = "START";
    private static final String END = "END";
    private static final String REF = "REF";
    private static final String ALT = "ALT";
    private static final String VARIANT_STRING_PATTERN = "[ACGT]*";
    private int invalidPositionLines = 0;
    private int invalidSubstitutionLines = 0;
    private int invalidDeletionLines = 0;
    private int invalidInsertionLines = 0;
    private int invalidDuplicationLines = 0;
    private int invalidMutationCDSOtherReason = 0;
    private int ignoredCosmicLines = 0;

    public CosmicIndexer(Path path, String str, RocksDB rocksDB) {
        this.rdb = rocksDB;
        this.cosmicFile = path;
        compileRegularExpressionPatterns();
        if (str.equalsIgnoreCase("grch37")) {
            this.mutationSomaticStatusColumn = 29;
            this.pubmedPMIDColumn = 30;
            this.sampleSourceColumn = 32;
            this.tumourOriginColumn = 33;
            return;
        }
        this.mutationSomaticStatusColumn = 29;
        this.pubmedPMIDColumn = 30;
        this.sampleSourceColumn = 32;
        this.tumourOriginColumn = 33;
    }

    private void compileRegularExpressionPatterns() {
        this.mutationGRCh37GenomePositionPattern = Pattern.compile("(?<CHR>\\S+):(?<START>\\d+)-(?<END>\\d+)");
        this.snvPattern = Pattern.compile("c\\.\\d+(_\\d+)?(?<REF>(A|C|T|G)+)>(?<ALT>(A|C|T|G)+)");
    }

    public void index() throws RocksDBException {
        logger.info("Parsing cosmic file ...");
        try {
            try {
                ProgressLogger batchSize = new ProgressLogger("Parsed COSMIC lines:", () -> {
                    return EtlCommons.countFileLines(this.cosmicFile);
                }, 200).setBatchSize(10000);
                BufferedReader newBufferedReader = FileUtils.newBufferedReader(this.cosmicFile);
                newBufferedReader.readLine();
                while (true) {
                    String readLine = newBufferedReader.readLine();
                    if (readLine == null) {
                        logger.info("Done");
                        printSummary();
                        return;
                    }
                    logger.debug(readLine);
                    EvidenceEntry buildCosmic = buildCosmic(readLine);
                    ClinicalIndexer.SequenceLocation sequenceLocation = new ClinicalIndexer.SequenceLocation();
                    if (parsePosition(sequenceLocation, readLine) && parseVariant(sequenceLocation, readLine)) {
                        updateRocksDB(sequenceLocation, buildCosmic);
                        this.numberIndexedRecords++;
                    } else {
                        this.ignoredCosmicLines++;
                    }
                    this.totalNumberRecords++;
                    batchSize.increment(1L);
                }
            } catch (RocksDBException e) {
                logger.error("Error reading/writing from/to the RocksDB index while indexing Cosmic");
                throw e;
            } catch (IOException e2) {
                e2.printStackTrace();
                logger.info("Done");
                printSummary();
            }
        } catch (Throwable th) {
            logger.info("Done");
            printSummary();
            throw th;
        }
    }

    private void printSummary() {
        logger.info("Total number of parsed Cosmic records: {}", Integer.valueOf(this.totalNumberRecords));
        logger.info("Number of indexed Cosmic records: {}", Integer.valueOf(this.numberIndexedRecords));
        logger.info("Number of new variants in Cosmic not previously indexed in RocksDB: {}", Integer.valueOf(this.numberNewVariants));
        logger.info("Number of updated variants during Cosmic indexing: {}", Integer.valueOf(this.numberVariantUpdates));
        NumberFormat numberFormat = NumberFormat.getInstance();
        logger.info(numberFormat.format(this.ignoredCosmicLines) + " cosmic lines ignored: ");
        if (this.invalidPositionLines > 0) {
            logger.info("\t-" + numberFormat.format(this.invalidPositionLines) + " lines by invalid position");
        }
        if (this.invalidSubstitutionLines > 0) {
            logger.info("\t-" + numberFormat.format(this.invalidSubstitutionLines) + " lines by invalid substitution CDS");
        }
        if (this.invalidInsertionLines > 0) {
            logger.info("\t-" + numberFormat.format(this.invalidInsertionLines) + " lines by invalid insertion CDS");
        }
        if (this.invalidDeletionLines > 0) {
            logger.info("\t-" + numberFormat.format(this.invalidDeletionLines) + " lines by invalid deletion CDS");
        }
        if (this.invalidDuplicationLines > 0) {
            logger.info("\t-" + numberFormat.format(this.invalidDuplicationLines) + " lines because mutation CDS is a duplication");
        }
        if (this.invalidMutationCDSOtherReason > 0) {
            logger.info("\t-" + numberFormat.format(this.invalidMutationCDSOtherReason) + " lines because mutation CDS is invalid for other reasons");
        }
    }

    private void updateRocksDB(ClinicalIndexer.SequenceLocation sequenceLocation, EvidenceEntry evidenceEntry) throws RocksDBException, IOException {
        byte[] bytes = VariantAnnotationUtils.buildVariantId(sequenceLocation.getChromosome(), sequenceLocation.getStart(), sequenceLocation.getReference(), sequenceLocation.getAlternate()).getBytes();
        List<EvidenceEntry> evidenceEntryList = getEvidenceEntryList(bytes);
        addNewEntry(evidenceEntryList, evidenceEntry);
        this.rdb.put(bytes, jsonObjectWriter.writeValueAsBytes(evidenceEntryList));
    }

    private void addNewEntry(List<EvidenceEntry> list, EvidenceEntry evidenceEntry) {
        boolean z = false;
        for (int i = 0; i < list.size() && !z; i++) {
            if (sameSomaticDocument(list.get(i), evidenceEntry)) {
                if (list.get(i).getBibliography() == null) {
                    list.get(i).setBibliography(evidenceEntry.getBibliography());
                } else if (evidenceEntry.getBibliography() != null) {
                    HashSet hashSet = new HashSet(list.get(i).getBibliography());
                    hashSet.addAll(new HashSet(evidenceEntry.getBibliography()));
                    list.get(i).setBibliography(new ArrayList(hashSet));
                }
                z = true;
            }
        }
        if (z) {
            return;
        }
        list.add(evidenceEntry);
    }

    public boolean sameSomaticDocument(EvidenceEntry evidenceEntry, EvidenceEntry evidenceEntry2) {
        if (evidenceEntry == evidenceEntry2) {
            return true;
        }
        if (evidenceEntry2 == null || evidenceEntry.getClass() != evidenceEntry2.getClass()) {
            return false;
        }
        if (evidenceEntry.getSource() != null) {
            if (!evidenceEntry.getSource().equals(evidenceEntry2.getSource())) {
                return false;
            }
        } else if (evidenceEntry2.getSource() != null) {
            return false;
        }
        if (evidenceEntry.getSomaticInformation() != null) {
            if (!evidenceEntry.getSomaticInformation().equals(evidenceEntry2.getSomaticInformation())) {
                return false;
            }
        } else if (evidenceEntry2.getSomaticInformation() != null) {
            return false;
        }
        if (evidenceEntry.getId() != null) {
            if (!evidenceEntry.getId().equals(evidenceEntry2.getId())) {
                return false;
            }
        } else if (evidenceEntry2.getId() != null) {
            return false;
        }
        if (evidenceEntry.getAlleleOrigin() != null) {
            if (!evidenceEntry.getAlleleOrigin().equals(evidenceEntry2.getAlleleOrigin())) {
                return false;
            }
        } else if (evidenceEntry2.getAlleleOrigin() != null) {
            return false;
        }
        if (evidenceEntry.getGenomicFeatures() != null) {
            if (!evidenceEntry.getGenomicFeatures().equals(evidenceEntry2.getGenomicFeatures())) {
                return false;
            }
        } else if (evidenceEntry2.getGenomicFeatures() != null) {
            return false;
        }
        return evidenceEntry.getAdditionalProperties() != null ? evidenceEntry.getAdditionalProperties().equals(evidenceEntry2.getAdditionalProperties()) : evidenceEntry2.getAdditionalProperties() == null;
    }

    private boolean parseVariant(ClinicalIndexer.SequenceLocation sequenceLocation, String str) {
        boolean z;
        String str2 = str.split("\t", -1)[17];
        if (str2.contains(">")) {
            z = parseSnv(str2, sequenceLocation);
            if (!z) {
                this.invalidSubstitutionLines++;
            }
        } else if (str2.contains("del")) {
            z = parseDeletion(str2, sequenceLocation);
            if (!z) {
                this.invalidDeletionLines++;
            }
        } else if (str2.contains("ins")) {
            z = parseInsertion(str2, sequenceLocation);
            if (!z) {
                this.invalidInsertionLines++;
            }
        } else if (str2.contains("dup")) {
            z = parseDuplication(str2);
            if (!z) {
                this.invalidDuplicationLines++;
            }
        } else {
            z = false;
            this.invalidMutationCDSOtherReason++;
        }
        return z;
    }

    private boolean parseDuplication(String str) {
        return false;
    }

    private boolean parseInsertion(String str, ClinicalIndexer.SequenceLocation sequenceLocation) {
        boolean z = true;
        String str2 = str.split("ins")[1];
        if (str2.matches("\\d+") || !str2.matches(VARIANT_STRING_PATTERN)) {
            z = false;
        } else {
            sequenceLocation.setReference("");
            sequenceLocation.setAlternate(getPositiveStrandString(str2, sequenceLocation.getStrand()));
        }
        return z;
    }

    private boolean parseDeletion(String str, ClinicalIndexer.SequenceLocation sequenceLocation) {
        boolean z = true;
        String[] split = str.split("del");
        if (split.length < 2) {
            z = false;
        } else if (split[1].matches("\\d+") || !split[1].matches(VARIANT_STRING_PATTERN)) {
            z = false;
        } else {
            sequenceLocation.setReference(getPositiveStrandString(split[1], sequenceLocation.getStrand()));
            sequenceLocation.setAlternate("");
        }
        return z;
    }

    private boolean parseSnv(String str, ClinicalIndexer.SequenceLocation sequenceLocation) {
        boolean z = true;
        Matcher matcher = this.snvPattern.matcher(str);
        if (matcher.matches()) {
            String group = matcher.group(REF);
            String group2 = matcher.group(ALT);
            if (group.equalsIgnoreCase("N") || group2.equalsIgnoreCase("N")) {
                z = false;
            } else {
                sequenceLocation.setReference(getPositiveStrandString(group, sequenceLocation.getStrand()));
                sequenceLocation.setAlternate(getPositiveStrandString(group2, sequenceLocation.getStrand()));
            }
        } else {
            z = false;
        }
        return z;
    }

    private String getPositiveStrandString(String str, String str2) {
        return str2.equals("-") ? reverseComplementary(str) : str;
    }

    private String reverseComplementary(String str) {
        char[] charArray = new StringBuilder(str).reverse().toString().toCharArray();
        for (int i = 0; i < charArray.length; i++) {
            charArray[i] = ((Character) VariantAnnotationUtils.COMPLEMENTARY_NT.get(Character.valueOf(charArray[i]))).charValue();
        }
        return String.valueOf(charArray);
    }

    private EvidenceEntry buildCosmic(String str) {
        String[] split = str.split("\t", -1);
        return new EvidenceEntry(new EvidenceSource("cosmic", (String) null, (String) null), Collections.emptyList(), getSomaticInformation(split), (String) null, split[ID_COLUMN], (String) null, getAlleleOriginList(Collections.singletonList(split[this.mutationSomaticStatusColumn])), Collections.emptyList(), getGenomicFeature(split), (VariantClassification) null, (EvidenceImpact) null, (Confidence) null, (ConsistencyStatus) null, EthnicCategory.Z, (Penetrance) null, (Boolean) null, (String) null, Collections.singletonList(new Property((String) null, MUTATION_SOMATIC_STATUS_IN_SOURCE_FILE, split[this.mutationSomaticStatusColumn])), getBibliography(split[this.pubmedPMIDColumn]));
    }

    private SomaticInformation getSomaticInformation(String[] strArr) {
        String str = null;
        if (!EtlCommons.isMissing(strArr[PRIMARY_SITE_COLUMN])) {
            str = strArr[PRIMARY_SITE_COLUMN].replace("_", " ");
        }
        String str2 = null;
        if (!EtlCommons.isMissing(strArr[SITE_SUBTYPE_COLUMN])) {
            str2 = strArr[SITE_SUBTYPE_COLUMN].replace("_", " ");
        }
        String str3 = null;
        if (!EtlCommons.isMissing(strArr[PRIMARY_HISTOLOGY_COLUMN])) {
            str3 = strArr[PRIMARY_HISTOLOGY_COLUMN].replace("_", " ");
        }
        String str4 = null;
        if (!EtlCommons.isMissing(strArr[HISTOLOGY_SUBTYPE_COLUMN])) {
            str4 = strArr[HISTOLOGY_SUBTYPE_COLUMN].replace("_", " ");
        }
        String str5 = null;
        if (!EtlCommons.isMissing(strArr[this.tumourOriginColumn])) {
            str5 = strArr[this.tumourOriginColumn].replace("_", " ");
        }
        String str6 = null;
        if (!EtlCommons.isMissing(strArr[this.sampleSourceColumn])) {
            str6 = strArr[this.sampleSourceColumn].replace("_", " ");
        }
        return new SomaticInformation(str, str2, str3, str4, str5, str6);
    }

    private List<String> getBibliography(String str) {
        return !EtlCommons.isMissing(str) ? Collections.singletonList("PMID:" + str) : Collections.emptyList();
    }

    private List<GenomicFeature> getGenomicFeature(String[] strArr) {
        ArrayList arrayList = new ArrayList(2);
        arrayList.add(createGeneGenomicFeature(strArr[0]));
        if (!strArr[HGNC_COLUMN].equalsIgnoreCase(strArr[0]) && !EtlCommons.isMissing(strArr[HGNC_COLUMN])) {
            arrayList.add(createGeneGenomicFeature(strArr[HGNC_COLUMN]));
        }
        return arrayList;
    }

    public boolean parsePosition(ClinicalIndexer.SequenceLocation sequenceLocation, String str) {
        boolean z = false;
        String[] split = str.split("\t", -1);
        String str2 = split[23];
        sequenceLocation.setStrand(split[24]);
        if (str2 != null && !str2.isEmpty()) {
            Matcher matcher = this.mutationGRCh37GenomePositionPattern.matcher(str2);
            if (matcher.matches()) {
                setCosmicChromosome(matcher.group(CHROMOSOME), sequenceLocation);
                sequenceLocation.setStart(getStart(Integer.valueOf(Integer.parseInt(matcher.group(START))), split[27]).intValue());
                sequenceLocation.setEnd(Integer.parseInt(matcher.group(END)));
                z = true;
            }
        }
        if (!z) {
            this.invalidPositionLines++;
        }
        return z;
    }

    private Integer getStart(Integer num, String str) {
        return str.contains("ins") ? Integer.valueOf(num.intValue() + 1) : num;
    }

    private void setCosmicChromosome(String str, ClinicalIndexer.SequenceLocation sequenceLocation) {
        boolean z = -1;
        switch (str.hashCode()) {
            case 1601:
                if (str.equals("23")) {
                    z = false;
                    break;
                }
                break;
            case 1602:
                if (str.equals("24")) {
                    z = true;
                    break;
                }
                break;
            case 1603:
                if (str.equals("25")) {
                    z = 2;
                    break;
                }
                break;
        }
        switch (z) {
            case false:
                sequenceLocation.setChromosome("X");
                return;
            case true:
                sequenceLocation.setChromosome("Y");
                return;
            case VariationFile.RS_COLUMN_INDEX /* 2 */:
                sequenceLocation.setChromosome("MT");
                return;
            default:
                sequenceLocation.setChromosome(str);
                return;
        }
    }
}
