package org.opencb.cellbase.app.transform;

import java.io.BufferedReader;
import java.io.IOException;
import java.nio.charset.Charset;
import java.nio.file.Files;
import java.nio.file.LinkOption;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.SortedSet;
import org.apache.commons.collections.map.HashedMap;
import org.opencb.biodata.formats.feature.gff.Gff2;
import org.opencb.biodata.formats.feature.gtf.Gtf;
import org.opencb.biodata.formats.feature.gtf.io.GtfReader;
import org.opencb.biodata.formats.io.FileFormatException;
import org.opencb.biodata.formats.sequence.fasta.Fasta;
import org.opencb.biodata.formats.sequence.fasta.io.FastaReader;
import org.opencb.biodata.models.core.Exon;
import org.opencb.biodata.models.core.Gene;
import org.opencb.biodata.models.core.GeneAnnotation;
import org.opencb.biodata.models.core.MiRNAGene;
import org.opencb.biodata.models.core.Transcript;
import org.opencb.biodata.models.core.TranscriptTfbs;
import org.opencb.biodata.models.core.Xref;
import org.opencb.biodata.models.variant.avro.Expression;
import org.opencb.biodata.models.variant.avro.GeneDrugInteraction;
import org.opencb.biodata.models.variant.avro.GeneTraitAssociation;
import org.opencb.biodata.tools.sequence.FastaIndexManager;
import org.opencb.cellbase.app.cli.BuildCommandExecutor;
import org.opencb.cellbase.app.cli.EtlCommons;
import org.opencb.cellbase.core.config.Species;
import org.opencb.cellbase.core.serializer.CellBaseSerializer;
import org.opencb.commons.utils.FileUtils;
import org.rocksdb.RocksDBException;

/* loaded from: input_file:org/opencb/cellbase/app/transform/GeneParser.class */
public class GeneParser extends CellBaseParser {
    private Map<String, Integer> transcriptDict;
    private Map<String, Exon> exonDict;
    private Path gtfFile;
    private Path proteinFastaFile;
    private Path cDnaFastaFile;
    private Path geneDescriptionFile;
    private Path xrefsFile;
    private Path uniprotIdMappingFile;
    private Path tfbsFile;
    private Path mirnaFile;
    private Path geneExpressionFile;
    private Path geneDrugFile;
    private Path hpoFile;
    private Path disgenetFile;
    private Path genomeSequenceFilePath;
    private boolean flexibleGTFParsing;
    private Species species;
    private Connection sqlConn;
    private PreparedStatement sqlQuery;
    private int CHUNK_SIZE;
    private String chunkIdSuffix;
    private Set<String> indexedSequences;
    private int featureCounter;
    private String[] featureTypes;
    private String currentFeature;
    private Map<String, Object> currentTranscriptMap;
    private int geneCounter;
    private ArrayList<String> geneList;
    private String geneName;
    private int transcriptCounter;
    private ArrayList<String> transcriptList;
    private String transcriptName;
    private int exonCounter;
    private String feature;
    private Gtf nextGtfToReturn;

    public GeneParser(Path path, Path path2, Species species, CellBaseSerializer cellBaseSerializer) {
        this(path, path2, species, false, cellBaseSerializer);
    }

    public GeneParser(Path path, Path path2, Species species, boolean z, CellBaseSerializer cellBaseSerializer) {
        this(null, path.resolve("description.txt"), path.resolve("xrefs.txt"), path.resolve("idmapping_selected.tab.gz"), path.resolve("MotifFeatures.gff.gz"), path.resolve("mirna.txt"), path.getParent().getParent().resolve("common/expression/allgenes_updown_in_organism_part.tab.gz"), path.resolve("geneDrug/dgidb.tsv"), path.resolve(BuildCommandExecutor.HPO_INPUT_FILE_NAME), path.resolve(BuildCommandExecutor.DISGENET_INPUT_FILE_NAME), path2, species, z, cellBaseSerializer);
        getGtfFileFromGeneDirectoryPath(path);
        getProteinFastaFileFromGeneDirectoryPath(path);
        getCDnaFastaFileFromGeneDirectoryPath(path);
    }

    public GeneParser(Path path, Path path2, Path path3, Path path4, Path path5, Path path6, Path path7, Path path8, Path path9, Path path10, Path path11, Species species, boolean z, CellBaseSerializer cellBaseSerializer) {
        super(cellBaseSerializer);
        this.CHUNK_SIZE = 2000;
        this.chunkIdSuffix = (this.CHUNK_SIZE / 1000) + "k";
        this.featureCounter = -1;
        this.featureTypes = new String[]{"exon", "cds", "start_codon", "stop_codon"};
        this.currentFeature = "";
        this.gtfFile = path;
        this.geneDescriptionFile = path2;
        this.xrefsFile = path3;
        this.uniprotIdMappingFile = path4;
        this.tfbsFile = path5;
        this.mirnaFile = path6;
        this.geneExpressionFile = path7;
        this.geneDrugFile = path8;
        this.hpoFile = path9;
        this.disgenetFile = path10;
        this.genomeSequenceFilePath = path11;
        this.species = species;
        this.flexibleGTFParsing = z;
        this.transcriptDict = new HashMap(250000);
        this.exonDict = new HashMap(8000000);
    }

    @Override // org.opencb.cellbase.app.transform.CellBaseParser
    public void parse() throws IOException, SecurityException, NoSuchMethodException, FileFormatException, InterruptedException {
        Transcript transcript;
        Gene gene = null;
        Exon exon = null;
        int i = 1;
        int i2 = 1;
        Map<String, String> geneDescriptionMap = getGeneDescriptionMap();
        Map<String, ArrayList<Xref>> xrefMap = GeneParserUtils.getXrefMap(this.xrefsFile, this.uniprotIdMappingFile);
        Map<String, Fasta> proteinSequencesMap = getProteinSequencesMap();
        Map<String, Fasta> cDnaSequencesMap = getCDnaSequencesMap();
        Map<String, SortedSet<Gff2>> tfbsMap = GeneParserUtils.getTfbsMap(this.tfbsFile);
        Map<String, MiRNAGene> map = GeneParserUtils.getmiRNAGeneMap(this.mirnaFile);
        Map<String, List<Expression>> geneExpressionMap = GeneParserUtils.getGeneExpressionMap(this.species.getScientificName(), this.geneExpressionFile);
        Map<String, List<GeneDrugInteraction>> geneDrugMap = GeneParserUtils.getGeneDrugMap(this.geneDrugFile);
        Map<String, List<GeneTraitAssociation>> geneDiseaseAssociationMap = GeneParserUtils.getGeneDiseaseAssociationMap(this.hpoFile, this.disgenetFile);
        try {
            FastaIndexManager fastaIndexManager = new FastaIndexManager(this.genomeSequenceFilePath, true);
            if (!fastaIndexManager.isConnected()) {
                fastaIndexManager.index();
            }
            this.transcriptDict.clear();
            this.exonDict.clear();
            this.logger.info("Parsing gtf...");
            GtfReader gtfReader = new GtfReader(this.gtfFile);
            Map<String, Map<String, Map<String, Object>>> map2 = null;
            if (this.flexibleGTFParsing) {
                map2 = loadGTFMap(gtfReader);
                initializePointers(map2);
            }
            while (true) {
                Gtf gTFEntry = getGTFEntry(gtfReader, map2);
                if (gTFEntry == null) {
                    this.serializer.serialize(gene);
                    gtfReader.close();
                    this.serializer.close();
                    fastaIndexManager.close();
                    return;
                }
                if (!gTFEntry.getFeature().equals(EtlCommons.GENE_DATA) && !gTFEntry.getFeature().equals("transcript") && !gTFEntry.getFeature().equals("UTR") && !gTFEntry.getFeature().equals("Selenocysteine")) {
                    String str = (String) gTFEntry.getAttributes().get("gene_id");
                    String str2 = (String) gTFEntry.getAttributes().get("transcript_id");
                    if (newGene(gene, str)) {
                        if (gene != null) {
                            this.serializer.serialize(gene);
                        }
                        gene = new Gene(str, (String) gTFEntry.getAttributes().get("gene_name"), (String) gTFEntry.getAttributes().get("gene_biotype"), "KNOWN", gTFEntry.getSequenceName().replaceFirst("chr", ""), Integer.valueOf(gTFEntry.getStart()), Integer.valueOf(gTFEntry.getEnd()), gTFEntry.getStrand(), "Ensembl", geneDescriptionMap.get(str), new ArrayList(), map.get(str), new GeneAnnotation(geneExpressionMap.get(str), geneDiseaseAssociationMap.get(gTFEntry.getAttributes().get("gene_name")), geneDrugMap.get(gTFEntry.getAttributes().get("gene_name"))));
                    }
                    if (this.transcriptDict.containsKey(str2)) {
                        transcript = (Transcript) gene.getTranscripts().get(this.transcriptDict.get(str2).intValue());
                    } else {
                        String replaceFirst = gTFEntry.getSequenceName().replaceFirst("chr", "");
                        ArrayList<TranscriptTfbs> transcriptTfbses = getTranscriptTfbses(gTFEntry, replaceFirst, tfbsMap);
                        Map attributes = gTFEntry.getAttributes();
                        transcript = new Transcript(str2, (String) attributes.get("transcript_name"), attributes.get("transcript_biotype") != null ? (String) attributes.get("transcript_biotype") : gTFEntry.getSource(), "KNOWN", replaceFirst, Integer.valueOf(gTFEntry.getStart()), Integer.valueOf(gTFEntry.getEnd()), gTFEntry.getStrand(), 0, 0, 0, 0, 0, "", "", xrefMap.get(str2), new ArrayList(), transcriptTfbses);
                        String str3 = (String) gTFEntry.getAttributes().get("tag");
                        if (str3 != null) {
                            transcript.setAnnotationFlags(new HashSet(Arrays.asList(str3.split(","))));
                        }
                        Fasta fasta = proteinSequencesMap.get(str2);
                        if (fasta != null) {
                            transcript.setProteinSequence(fasta.getSeq());
                        }
                        Fasta fasta2 = cDnaSequencesMap.get(str2);
                        if (fasta2 != null) {
                            transcript.setcDnaSequence(fasta2.getSeq());
                        }
                        gene.getTranscripts().add(transcript);
                        this.transcriptDict.put(str2, Integer.valueOf(gene.getTranscripts().size() - 1));
                    }
                    updateTranscriptAndGeneCoords(transcript, gene, gTFEntry);
                    if (gTFEntry.getFeature().equalsIgnoreCase("exon")) {
                        String str4 = null;
                        try {
                            str4 = fastaIndexManager.query(gTFEntry.getSequenceName(), gTFEntry.getStart(), gTFEntry.getEnd());
                        } catch (RocksDBException e) {
                            e.printStackTrace();
                        }
                        exon = new Exon((String) gTFEntry.getAttributes().get("exon_id"), gTFEntry.getSequenceName().replaceFirst("chr", ""), Integer.valueOf(gTFEntry.getStart()), Integer.valueOf(gTFEntry.getEnd()), gTFEntry.getStrand(), 0, 0, 0, 0, 0, 0, -1, Integer.valueOf(Integer.parseInt((String) gTFEntry.getAttributes().get("exon_number"))), str4);
                        transcript.getExons().add(exon);
                        this.exonDict.put(transcript.getId() + "_" + exon.getExonNumber(), exon);
                        if (((String) gTFEntry.getAttributes().get("exon_number")).equals("1")) {
                            i = 1;
                            i2 = 1;
                        } else {
                            i += (this.exonDict.get(transcript.getId() + "_" + (exon.getExonNumber() - 1)).getEnd() - this.exonDict.get(transcript.getId() + "_" + (exon.getExonNumber() - 1)).getStart()) + 1;
                        }
                    } else {
                        exon = this.exonDict.get(transcript.getId() + "_" + exon.getExonNumber());
                        if (gTFEntry.getFeature().equalsIgnoreCase("CDS")) {
                            if (gTFEntry.getStrand().equals("+") || gTFEntry.getStrand().equals("1")) {
                                exon.setGenomicCodingStart(gTFEntry.getStart());
                                exon.setGenomicCodingEnd(gTFEntry.getEnd());
                                exon.setCdnaCodingStart((gTFEntry.getStart() - exon.getStart()) + i);
                                exon.setCdnaCodingEnd((gTFEntry.getEnd() - exon.getStart()) + i);
                                transcript.setCdnaCodingEnd((gTFEntry.getEnd() - exon.getStart()) + i);
                                exon.setCdsStart(i2);
                                exon.setCdsEnd((gTFEntry.getEnd() - gTFEntry.getStart()) + i2);
                                i2 += (gTFEntry.getEnd() - gTFEntry.getStart()) + 1;
                                transcript.setCdsLength(i2 - 1);
                                exon.setPhase(Integer.valueOf(gTFEntry.getFrame()).intValue());
                                if (transcript.getGenomicCodingStart() == 0 || transcript.getGenomicCodingStart() > gTFEntry.getStart()) {
                                    transcript.setGenomicCodingStart(gTFEntry.getStart());
                                }
                                if (transcript.getGenomicCodingEnd() == 0 || transcript.getGenomicCodingEnd() < gTFEntry.getEnd()) {
                                    transcript.setGenomicCodingEnd(gTFEntry.getEnd());
                                }
                                if (transcript.getCdnaCodingStart() == 0) {
                                    transcript.setCdnaCodingStart((gTFEntry.getStart() - exon.getStart()) + i);
                                }
                            } else {
                                exon.setGenomicCodingStart(gTFEntry.getStart());
                                exon.setGenomicCodingEnd(gTFEntry.getEnd());
                                exon.setCdnaCodingStart((exon.getEnd() - gTFEntry.getEnd()) + i);
                                exon.setCdnaCodingEnd((exon.getEnd() - gTFEntry.getStart()) + i);
                                transcript.setCdnaCodingEnd((exon.getEnd() - gTFEntry.getStart()) + i);
                                exon.setCdsStart(i2);
                                exon.setCdsEnd((gTFEntry.getEnd() - gTFEntry.getStart()) + i2);
                                i2 += (gTFEntry.getEnd() - gTFEntry.getStart()) + 1;
                                transcript.setCdsLength(i2 - 1);
                                exon.setPhase(Integer.valueOf(gTFEntry.getFrame()).intValue());
                                if (transcript.getGenomicCodingStart() == 0 || transcript.getGenomicCodingStart() > gTFEntry.getStart()) {
                                    transcript.setGenomicCodingStart(gTFEntry.getStart());
                                }
                                if (transcript.getGenomicCodingEnd() == 0 || transcript.getGenomicCodingEnd() < gTFEntry.getEnd()) {
                                    transcript.setGenomicCodingEnd(gTFEntry.getEnd());
                                }
                                if (transcript.getCdnaCodingStart() == 0) {
                                    transcript.setCdnaCodingStart((exon.getEnd() - gTFEntry.getEnd()) + i);
                                }
                            }
                            transcript.setProteinID((String) gTFEntry.getAttributes().get("protein_id"));
                        }
                        if (gTFEntry.getFeature().equalsIgnoreCase("start_codon")) {
                            System.out.println("Empty block, this should be redesigned");
                        }
                        if (gTFEntry.getFeature().equalsIgnoreCase("stop_codon")) {
                            if (exon.getStrand().equals("+")) {
                                exon.setGenomicCodingEnd(gTFEntry.getEnd());
                                exon.setCdnaCodingEnd((gTFEntry.getEnd() - exon.getStart()) + i);
                                exon.setCdsEnd((gTFEntry.getEnd() - gTFEntry.getStart()) + i2);
                                i2 += gTFEntry.getEnd() - gTFEntry.getStart();
                                transcript.setGenomicCodingEnd(gTFEntry.getEnd());
                                transcript.setCdnaCodingEnd((gTFEntry.getEnd() - exon.getStart()) + i);
                                transcript.setCdsLength(i2 - 1);
                            } else {
                                exon.setGenomicCodingStart(gTFEntry.getStart());
                                exon.setCdnaCodingEnd((exon.getEnd() - gTFEntry.getStart()) + i);
                                exon.setCdsEnd((gTFEntry.getEnd() - gTFEntry.getStart()) + i2);
                                i2 += gTFEntry.getEnd() - gTFEntry.getStart();
                                transcript.setGenomicCodingStart(gTFEntry.getStart());
                                transcript.setCdnaCodingEnd((exon.getEnd() - gTFEntry.getStart()) + i);
                                transcript.setCdsLength(i2 - 1);
                            }
                        }
                    }
                }
            }
        } catch (Exception e2) {
            e2.printStackTrace();
        }
    }

    private void initializePointers(Map<String, Map<String, Map<String, Object>>> map) {
        this.geneCounter = 0;
        this.geneList = new ArrayList<>(map.keySet());
        this.geneName = this.geneList.get(this.geneCounter);
        this.transcriptCounter = 0;
        this.transcriptList = new ArrayList<>(map.get(this.geneName).keySet());
        this.transcriptName = this.transcriptList.get(this.transcriptCounter);
        this.exonCounter = 0;
        this.feature = "exon";
        this.nextGtfToReturn = (Gtf) ((List) map.get(this.geneName).get(this.transcriptName).get("exon")).get(this.exonCounter);
    }

    private Gtf getGTFEntry(GtfReader gtfReader, Map<String, Map<String, Map<String, Object>>> map) throws FileFormatException {
        if (map == null) {
            return gtfReader.read();
        }
        if (this.nextGtfToReturn == null) {
            return null;
        }
        Gtf gtf = this.nextGtfToReturn;
        if (this.feature.equals("exon")) {
            if (map.get(this.geneName).get(this.transcriptName).containsKey("cds")) {
                this.nextGtfToReturn = getExonCDSLine(Integer.valueOf(((Gtf) ((List) map.get(this.geneName).get(this.transcriptName).get("exon")).get(this.exonCounter)).getStart()), Integer.valueOf(((Gtf) ((List) map.get(this.geneName).get(this.transcriptName).get("exon")).get(this.exonCounter)).getEnd()), (List) map.get(this.geneName).get(this.transcriptName).get("cds"));
                if (this.nextGtfToReturn != null) {
                    this.feature = "cds";
                    return gtf;
                }
            }
            getFeatureFollowsExon(map);
            return gtf;
        }
        if (this.feature.equals("cds") || this.feature.equals("stop_codon")) {
            getFeatureFollowsExon(map);
            return gtf;
        }
        if (!this.feature.equals("start_codon")) {
            throw new FileFormatException("Execution cannot reach this point");
        }
        this.feature = "stop_codon";
        this.nextGtfToReturn = (Gtf) map.get(this.geneName).get(this.transcriptName).get("stop_codon");
        return gtf;
    }

    private Gtf getExonCDSLine(Integer num, Integer num2, List list) {
        for (Object obj : list) {
            Integer valueOf = Integer.valueOf(((Gtf) obj).getStart());
            Integer valueOf2 = Integer.valueOf(((Gtf) obj).getEnd());
            if (valueOf.intValue() <= num2.intValue() && valueOf2.intValue() >= num.intValue()) {
                return (Gtf) obj;
            }
        }
        return null;
    }

    private void getFeatureFollowsExon(Map<String, Map<String, Map<String, Object>>> map) {
        this.exonCounter++;
        if (this.exonCounter != ((List) map.get(this.geneName).get(this.transcriptName).get("exon")).size() && !this.feature.equals("stop_codon")) {
            this.feature = "exon";
            this.nextGtfToReturn = (Gtf) ((List) map.get(this.geneName).get(this.transcriptName).get("exon")).get(this.exonCounter);
            return;
        }
        if (!this.feature.equals("stop_codon") && map.get(this.geneName).get(this.transcriptName).containsKey("start_codon")) {
            this.feature = "start_codon";
            this.nextGtfToReturn = (Gtf) map.get(this.geneName).get(this.transcriptName).get("start_codon");
            return;
        }
        this.transcriptCounter++;
        if (this.transcriptCounter == map.get(this.geneName).size()) {
            this.geneCounter++;
            if (this.geneCounter == map.size()) {
                this.nextGtfToReturn = null;
                this.feature = null;
            } else {
                this.geneName = this.geneList.get(this.geneCounter);
                this.transcriptCounter = 0;
                this.transcriptList = new ArrayList<>(map.get(this.geneName).keySet());
            }
        }
        if (this.nextGtfToReturn != null) {
            this.transcriptName = this.transcriptList.get(this.transcriptCounter);
            this.exonCounter = 0;
            this.feature = "exon";
            this.nextGtfToReturn = (Gtf) ((List) map.get(this.geneName).get(this.transcriptName).get("exon")).get(this.exonCounter);
        }
    }

    private Map<String, Map<String, Map<String, Object>>> loadGTFMap(GtfReader gtfReader) throws FileFormatException {
        Map<String, Map<String, Object>> hashedMap;
        Map<String, Object> hashedMap2;
        HashedMap hashedMap3 = new HashedMap();
        while (true) {
            Gtf read = gtfReader.read();
            if (read == null) {
                break;
            }
            if (!read.getFeature().equals(EtlCommons.GENE_DATA) && !read.getFeature().equals("transcript") && !read.getFeature().equals("UTR") && !read.getFeature().equals("Selenocysteine")) {
                String str = (String) read.getAttributes().get("gene_id");
                if (hashedMap3.containsKey(str)) {
                    hashedMap = hashedMap3.get(str);
                } else {
                    hashedMap = new HashedMap<>();
                    hashedMap3.put(str, hashedMap);
                }
                String str2 = (String) read.getAttributes().get("transcript_id");
                if (hashedMap.containsKey(str2)) {
                    hashedMap2 = hashedMap.get(str2);
                } else {
                    hashedMap2 = new HashedMap<>();
                    hashedMap.put(str2, hashedMap2);
                }
                addGTFLineToGTFMap(hashedMap2, read);
            }
        }
        if (!exonNumberPresent(hashedMap3)) {
            setExonNumber(hashedMap3);
        }
        return hashedMap3;
    }

    private boolean exonNumberPresent(Map<String, Map<String, Map<String, Object>>> map) {
        Map<String, Map<String, Object>> map2 = map.get(map.keySet().iterator().next());
        return ((Gtf) ((List) map2.get(map2.keySet().iterator().next()).get("exon")).get(0)).getAttributes().containsKey("exon_number");
    }

    private void setExonNumber(Map<String, Map<String, Map<String, Object>>> map) {
        for (String str : map.keySet()) {
            Iterator<String> it = map.get(str).keySet().iterator();
            while (it.hasNext()) {
                List list = (List) map.get(str).get(it.next()).get("exon");
                Collections.sort(list, (gtf, gtf2) -> {
                    return Integer.valueOf(gtf.getStart()).compareTo(Integer.valueOf(gtf2.getStart()));
                });
                if (((Gtf) list.get(0)).getStrand().equals("+")) {
                    int i = 1;
                    Iterator it2 = list.iterator();
                    while (it2.hasNext()) {
                        ((Gtf) it2.next()).getAttributes().put("exon_number", String.valueOf(i));
                        i++;
                    }
                } else {
                    int size = list.size();
                    Iterator it3 = list.iterator();
                    while (it3.hasNext()) {
                        ((Gtf) it3.next()).getAttributes().put("exon_number", String.valueOf(size));
                        size--;
                    }
                }
            }
        }
    }

    private void addGTFLineToGTFMap(Map<String, Object> map, Gtf gtf) {
        List arrayList;
        String lowerCase = gtf.getFeature().toLowerCase();
        if (!lowerCase.equals("exon") && !lowerCase.equals("cds")) {
            if (lowerCase.equals("start_codon") || lowerCase.equals("stop_codon")) {
                map.put(lowerCase, gtf);
                return;
            }
            return;
        }
        if (map.containsKey(lowerCase)) {
            arrayList = (List) map.get(lowerCase);
        } else {
            arrayList = new ArrayList();
            map.put(lowerCase, arrayList);
        }
        arrayList.add(gtf);
    }

    private ArrayList<TranscriptTfbs> getTranscriptTfbses(Gtf gtf, String str, Map<String, SortedSet<Gff2>> map) {
        ArrayList<TranscriptTfbs> arrayList = null;
        if (map.containsKey(str)) {
            for (Gff2 gff2 : map.get(str)) {
                if (gtf.getStrand().equals("+")) {
                    if (gff2.getStart() > gtf.getStart() + 500) {
                        break;
                    }
                    if (gff2.getEnd() > gtf.getStart() - 2500) {
                        arrayList = addTranscriptTfbstoList(gff2, gtf, str, arrayList);
                    }
                } else {
                    if (gff2.getStart() > gtf.getEnd() + 2500) {
                        break;
                    }
                    if (gff2.getStart() > gtf.getEnd() - 500) {
                        arrayList = addTranscriptTfbstoList(gff2, gtf, str, arrayList);
                    }
                }
            }
        }
        return arrayList;
    }

    private ArrayList<TranscriptTfbs> addTranscriptTfbstoList(Gff2 gff2, Gtf gtf, String str, ArrayList<TranscriptTfbs> arrayList) {
        if (arrayList == null) {
            arrayList = new ArrayList<>();
        }
        String[] split = gff2.getAttribute().split("=")[1].split(":");
        arrayList.add(new TranscriptTfbs(split[0], split[1], str, Integer.valueOf(gff2.getStart()), Integer.valueOf(gff2.getEnd()), gff2.getStrand(), getRelativeTranscriptTfbsStart(gff2, gtf), getRelativeTranscriptTfbsEnd(gff2, gtf), Float.valueOf(Float.parseFloat(gff2.getScore()))));
        return arrayList;
    }

    private Integer getRelativeTranscriptTfbsStart(Gff2 gff2, Gtf gtf) {
        return gtf.getStrand().equals("+") ? gff2.getStart() < gtf.getStart() ? Integer.valueOf(gff2.getStart() - gtf.getStart()) : Integer.valueOf((gff2.getStart() - gtf.getStart()) + 1) : gff2.getEnd() > gtf.getEnd() ? Integer.valueOf(gtf.getEnd() - gff2.getEnd()) : Integer.valueOf((gtf.getEnd() - gff2.getEnd()) + 1);
    }

    private Integer getRelativeTranscriptTfbsEnd(Gff2 gff2, Gtf gtf) {
        return gtf.getStrand().equals("+") ? gff2.getEnd() < gtf.getStart() ? Integer.valueOf(gff2.getEnd() - gtf.getStart()) : Integer.valueOf((gff2.getEnd() - gtf.getStart()) + 1) : gff2.getStart() > gtf.getEnd() ? Integer.valueOf(gtf.getEnd() - gff2.getStart()) : Integer.valueOf((gtf.getEnd() - gff2.getStart()) + 1);
    }

    private Map<String, Fasta> getCDnaSequencesMap() throws IOException, FileFormatException {
        this.logger.info("Loading ENSEMBL's cDNA sequences...");
        HashMap hashMap = new HashMap();
        if (this.cDnaFastaFile == null || !Files.exists(this.cDnaFastaFile, new LinkOption[0]) || Files.isDirectory(this.cDnaFastaFile, new LinkOption[0]) || Files.size(this.cDnaFastaFile) <= 0) {
            this.logger.warn("cDNA fasta file " + this.cDnaFastaFile + " not found");
            this.logger.warn("ENSEMBL's cDNA sequences not loaded");
        } else {
            FastaReader fastaReader = new FastaReader(this.cDnaFastaFile);
            List<Fasta> readAll = fastaReader.readAll();
            fastaReader.close();
            for (Fasta fasta : readAll) {
                hashMap.put(fasta.getId(), fasta);
            }
        }
        return hashMap;
    }

    private Map<String, Fasta> getProteinSequencesMap() throws IOException, FileFormatException {
        this.logger.info("Loading ENSEMBL's protein sequences...");
        HashMap hashMap = new HashMap();
        if (this.proteinFastaFile == null || !Files.exists(this.proteinFastaFile, new LinkOption[0]) || Files.isDirectory(this.proteinFastaFile, new LinkOption[0]) || Files.size(this.proteinFastaFile) <= 0) {
            this.logger.warn("Protein fasta file " + this.proteinFastaFile + " not found");
            this.logger.warn("ENSEMBL's protein sequences not loaded");
        } else {
            FastaReader fastaReader = new FastaReader(this.proteinFastaFile);
            List<Fasta> readAll = fastaReader.readAll();
            fastaReader.close();
            for (Fasta fasta : readAll) {
                hashMap.put(fasta.getDescription().split("transcript:")[1].split("\\s")[0], fasta);
            }
        }
        return hashMap;
    }

    private Map<String, String> getGeneDescriptionMap() throws IOException {
        this.logger.info("Loading gene description data...");
        HashMap hashMap = new HashMap();
        if (this.geneDescriptionFile == null || !Files.exists(this.geneDescriptionFile, new LinkOption[0]) || Files.size(this.geneDescriptionFile) <= 0) {
            this.logger.warn("Gene description file " + this.geneDescriptionFile + " not found");
            this.logger.warn("Gene description data not loaded");
        } else {
            Iterator<String> it = Files.readAllLines(this.geneDescriptionFile, Charset.forName("ISO-8859-1")).iterator();
            while (it.hasNext()) {
                String[] split = it.next().split("\t", -1);
                hashMap.put(split[0], split[1]);
            }
        }
        return hashMap;
    }

    private boolean newGene(Gene gene, String str) {
        return gene == null || !str.equals(gene.getId());
    }

    @Deprecated
    private void connect(Path path) throws ClassNotFoundException, SQLException, IOException {
        this.logger.info("Connecting to reference genome sequence database ...");
        Class.forName("org.sqlite.JDBC");
        this.sqlConn = DriverManager.getConnection("jdbc:sqlite:" + path.getParent().toString() + "/reference_genome.db");
        if (!Files.exists(Paths.get(path.getParent().toString(), "reference_genome.db"), new LinkOption[0]) || Files.size(path.getParent().resolve("reference_genome.db")) == 0) {
            this.logger.info("Genome sequence database doesn't exists and will be created");
            this.sqlConn.createStatement().executeUpdate("CREATE TABLE if not exists  genome_sequence (sequenceName VARCHAR(50), chunkId VARCHAR(30), start INT, end INT, sequence VARCHAR(2000))");
            indexReferenceGenomeFasta(path);
        }
        this.indexedSequences = getIndexedSequences();
        this.sqlQuery = this.sqlConn.prepareStatement("SELECT sequence from genome_sequence WHERE chunkId = ? ");
        this.logger.info("Genome sequence database connected");
    }

    @Deprecated
    private Set<String> getIndexedSequences() throws SQLException {
        HashSet hashSet = new HashSet();
        ResultSet executeQuery = this.sqlConn.prepareStatement("SELECT DISTINCT sequenceName from genome_sequence").executeQuery();
        while (executeQuery.next()) {
            hashSet.add(executeQuery.getString(1));
        }
        executeQuery.close();
        return hashSet;
    }

    @Deprecated
    private void disconnectSqlite() throws SQLException {
        this.sqlConn.close();
    }

    @Deprecated
    private void indexReferenceGenomeFasta(Path path) throws IOException, ClassNotFoundException, SQLException {
        BufferedReader newBufferedReader = FileUtils.newBufferedReader(path);
        String str = "";
        boolean z = false;
        PreparedStatement prepareStatement = this.sqlConn.prepareStatement("INSERT INTO genome_sequence (chunkID, start, end, sequence, sequenceName) values (?, ?, ?, ?, ?)");
        StringBuilder sb = new StringBuilder();
        while (true) {
            String readLine = newBufferedReader.readLine();
            if (readLine == null) {
                insertGenomeSequence(str, z, prepareStatement, sb);
                newBufferedReader.close();
                this.sqlConn.createStatement().executeUpdate("CREATE INDEX chunkId_idx on genome_sequence(chunkId)");
                return;
            } else if (readLine.startsWith(">")) {
                if (sb.length() > 0) {
                    insertGenomeSequence(str, z, prepareStatement, sb);
                }
                str = readLine.replace(">", "").split(" ")[0];
                z = readLine.endsWith("HAP");
                sb.delete(0, sb.length());
            } else {
                sb.append(readLine);
            }
        }
    }

    @Deprecated
    private void insertGenomeSequence(String str, boolean z, PreparedStatement preparedStatement, StringBuilder sb) throws SQLException {
        int i;
        int i2 = 0;
        int i3 = 1;
        int i4 = this.CHUNK_SIZE - 1;
        if (z || str.contains("PATCH")) {
            return;
        }
        this.logger.info("Indexing genome sequence {} ...", str);
        preparedStatement.setString(5, str);
        if (sb.length() < this.CHUNK_SIZE) {
            preparedStatement.setString(1, str + "_0_" + this.chunkIdSuffix);
            preparedStatement.setInt(2, 1);
            preparedStatement.setInt(3, sb.length() - 1);
            preparedStatement.setString(4, sb.toString());
            return;
        }
        int length = sb.length();
        this.sqlConn.setAutoCommit(false);
        while (i3 < length) {
            if (i2 % 10000 == 0 && i2 != 0) {
                preparedStatement.executeBatch();
                this.sqlConn.commit();
            }
            preparedStatement.setString(1, str + "_" + i2 + "_" + this.chunkIdSuffix);
            if (i3 == 1) {
                preparedStatement.setInt(2, i3);
                preparedStatement.setInt(3, i4);
                preparedStatement.setString(4, sb.substring(i3 - 1, this.CHUNK_SIZE - 1));
                i = i3 + (this.CHUNK_SIZE - 1);
            } else if (i3 + this.CHUNK_SIZE < length) {
                preparedStatement.setInt(2, i3);
                preparedStatement.setInt(3, i4);
                preparedStatement.setString(4, sb.substring(i3 - 1, (i3 + this.CHUNK_SIZE) - 1));
                i = i3 + this.CHUNK_SIZE;
            } else {
                preparedStatement.setInt(2, i3);
                preparedStatement.setInt(3, length);
                preparedStatement.setString(4, sb.substring(i3 - 1, length));
                i = length;
            }
            i3 = i;
            preparedStatement.addBatch();
            i4 = (i3 + this.CHUNK_SIZE) - 1;
            i2++;
        }
        preparedStatement.executeBatch();
        this.sqlConn.commit();
        this.sqlConn.setAutoCommit(true);
    }

    @Deprecated
    private String getExonSequence(String str, int i, int i2) {
        String str2 = "";
        if (this.indexedSequences.contains(str)) {
            try {
                StringBuilder sb = new StringBuilder();
                int chunk = getChunk(i);
                int chunk2 = getChunk(i2);
                for (int i3 = chunk; i3 <= chunk2; i3++) {
                    this.sqlQuery.setString(1, str + "_" + i3 + "_" + this.chunkIdSuffix);
                    sb.append(this.sqlQuery.executeQuery().getString(1));
                }
                int offset = getOffset(i);
                int offset2 = getOffset(i) + (i2 - i) + 1;
                if (chunk > 0) {
                    if (sb.toString().length() > 0 && sb.toString().length() >= offset2) {
                        str2 = sb.toString().substring(offset, offset2);
                    }
                } else if (sb.toString().length() > 0 && sb.toString().length() + 1 >= offset2) {
                    str2 = sb.toString().substring(offset - 1, offset2 - 1);
                }
            } catch (SQLException e) {
                this.logger.error("Error obtaining exon sequence ({}:{}-{})", new Object[]{str, Integer.valueOf(i), Integer.valueOf(i2)});
            }
        }
        return str2;
    }

    private int getChunk(int i) {
        return i / this.CHUNK_SIZE;
    }

    private int getOffset(int i) {
        return i % this.CHUNK_SIZE;
    }

    private void updateTranscriptAndGeneCoords(Transcript transcript, Gene gene, Gtf gtf) {
        if (transcript.getStart() > gtf.getStart()) {
            transcript.setStart(gtf.getStart());
        }
        if (transcript.getEnd() < gtf.getEnd()) {
            transcript.setEnd(gtf.getEnd());
        }
        if (gene.getStart() > gtf.getStart()) {
            gene.setStart(gtf.getStart());
        }
        if (gene.getEnd() < gtf.getEnd()) {
            gene.setEnd(gtf.getEnd());
        }
    }

    private void getGtfFileFromGeneDirectoryPath(Path path) {
        for (String str : path.toFile().list()) {
            if (str.endsWith(".gtf") || str.endsWith(".gtf.gz")) {
                this.gtfFile = path.resolve(str);
                return;
            }
        }
    }

    private void getProteinFastaFileFromGeneDirectoryPath(Path path) {
        for (String str : path.toFile().list()) {
            if (str.endsWith(".pep.all.fa") || str.endsWith(".pep.all.fa.gz")) {
                this.proteinFastaFile = path.resolve(str);
                return;
            }
        }
    }

    private void getCDnaFastaFileFromGeneDirectoryPath(Path path) {
        for (String str : path.toFile().list()) {
            if (str.endsWith(".cdna.all.fa") || str.endsWith(".cdna.all.fa.gz")) {
                this.cDnaFastaFile = path.resolve(str);
                return;
            }
        }
    }
}
