package org.opencb.cellbase.app.transform;

import com.google.protobuf.MessageOrBuilder;
import com.google.protobuf.util.JsonFormat;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.nio.charset.Charset;
import java.nio.file.Files;
import java.nio.file.LinkOption;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.zip.GZIPInputStream;
import org.opencb.biodata.formats.feature.gff.Gff2;
import org.opencb.biodata.formats.feature.gff.io.Gff2Reader;
import org.opencb.biodata.formats.feature.gtf.Gtf;
import org.opencb.biodata.formats.feature.gtf.io.GtfReader;
import org.opencb.biodata.formats.io.FileFormatException;
import org.opencb.biodata.formats.sequence.fasta.Fasta;
import org.opencb.biodata.formats.sequence.fasta.io.FastaReader;
import org.opencb.biodata.models.core.protobuf.GeneModel;
import org.opencb.biodata.models.core.protobuf.TranscriptModel;
import org.opencb.biodata.models.variant.avro.Expression;
import org.opencb.biodata.models.variant.avro.ExpressionCall;
import org.opencb.biodata.models.variant.avro.GeneDrugInteraction;
import org.opencb.cellbase.app.cli.EtlCommons;
import org.opencb.cellbase.core.config.Species;
import org.opencb.cellbase.core.serializer.CellBaseSerializer;
import org.opencb.commons.utils.FileUtils;

/* loaded from: input_file:org/opencb/cellbase/app/transform/GeneParserProto.class */
public class GeneParserProto extends CellBaseParser {
    private Map<String, Integer> transcriptDict;
    private Map<String, TranscriptModel.Exon.Builder> exonDict;
    private Path gtfFile;
    private Path proteinFastaFile;
    private Path cDnaFastaFile;
    private Path geneDescriptionFile;
    private Path xrefsFile;
    private Path uniprotIdMappingFile;
    private Path tfbsFile;
    private Path mirnaFile;
    private Path geneExpressionFile;
    private Path geneDrugFile;
    private Path genomeSequenceFilePath;
    private Species species;
    private Connection sqlConn;
    private PreparedStatement sqlQuery;
    private int CHUNK_SIZE;
    private String chunkIdSuffix;
    private Set<String> indexedSequences;

    public GeneParserProto(Path path, Path path2, Species species, CellBaseSerializer cellBaseSerializer) {
        this(null, path.resolve("description.txt"), path.resolve("xrefs.txt"), path.resolve("idmapping_selected.tab.gz"), path.resolve("MotifFeatures.gff"), path.resolve("mirna.txt"), path.getParent().getParent().resolve("common/expression/allgenes_updown_in_organism_part.tab.gz"), path.resolve("geneDrug/dgidb.tsv"), path2, species, cellBaseSerializer);
        getGtfFileFromGeneDirectoryPath(path);
        getProteinFastaFileFromGeneDirectoryPath(path);
        getCDnaFastaFileFromGeneDirectoryPath(path);
    }

    public GeneParserProto(Path path, Path path2, Path path3, Path path4, Path path5, Path path6, Path path7, Path path8, Path path9, Species species, CellBaseSerializer cellBaseSerializer) {
        super(cellBaseSerializer);
        this.CHUNK_SIZE = 2000;
        this.chunkIdSuffix = (this.CHUNK_SIZE / 1000) + "k";
        this.gtfFile = path;
        this.geneDescriptionFile = path2;
        this.xrefsFile = path3;
        this.uniprotIdMappingFile = path4;
        this.tfbsFile = path5;
        this.mirnaFile = path6;
        this.geneExpressionFile = path7;
        this.geneDrugFile = path8;
        this.genomeSequenceFilePath = path9;
        this.species = species;
        this.transcriptDict = new HashMap(250000);
        this.exonDict = new HashMap(8000000);
    }

    @Override // org.opencb.cellbase.app.transform.CellBaseParser
    public void parse() throws IOException, SecurityException, NoSuchMethodException, FileFormatException, InterruptedException {
        MessageOrBuilder messageOrBuilder = null;
        int i = 1;
        int i2 = 1;
        Map<String, String> geneDescriptionMap = getGeneDescriptionMap();
        getXrefMap();
        Map<String, Fasta> proteinSequencesMap = getProteinSequencesMap();
        Map<String, Fasta> cDnaSequencesMap = getCDnaSequencesMap();
        Map<String, SortedSet<Gff2>> tfbsMap = getTfbsMap();
        Map<String, GeneModel.MiRNAGene> map = getmiRNAGeneMap(this.mirnaFile);
        getGeneDrugMap();
        try {
            connect(this.genomeSequenceFilePath);
            this.transcriptDict.clear();
            this.exonDict.clear();
            ArrayList arrayList = null;
            ArrayList arrayList2 = null;
            GeneModel.Gene.Builder builder = null;
            TranscriptModel.Transcript.Builder builder2 = null;
            TranscriptModel.Exon.Builder builder3 = null;
            String str = null;
            this.logger.info("Parsing gtf...");
            GtfReader gtfReader = new GtfReader(this.gtfFile);
            PrintWriter printWriter = new PrintWriter(new File("/tmp/aaa.json"));
            while (true) {
                Gtf read = gtfReader.read();
                if (read == null) {
                    printWriter.close();
                    this.serializer.serialize(messageOrBuilder);
                    gtfReader.close();
                    this.serializer.close();
                    try {
                        disconnectSqlite();
                        return;
                    } catch (SQLException e) {
                        e.printStackTrace();
                        return;
                    }
                }
                if (!read.getFeature().equals(EtlCommons.GENE_DATA) && !read.getFeature().equals("transcript") && !read.getFeature().equals("UTR") && !read.getFeature().equals("Selenocysteine")) {
                    String str2 = (String) read.getAttributes().get("gene_id");
                    String str3 = (String) read.getAttributes().get("transcript_id");
                    if (str == null || !str.equals(str2)) {
                        str = str2;
                        if (builder != null) {
                            builder.addAllTranscripts(arrayList);
                            messageOrBuilder = builder.build();
                            System.out.println(JsonFormat.printer().print(messageOrBuilder));
                            this.logger.info("Serialized {}", builder.getId());
                        }
                        arrayList = new ArrayList();
                        this.logger.info("New gene '{}' found", str2);
                        builder = GeneModel.Gene.newBuilder().setId(str2).setName((String) read.getAttributes().get("gene_name")).setBiotype((String) read.getAttributes().get("gene_biotype")).setStatus("KNOWN").setChromosome(read.getSequenceName().replaceFirst("chr", "")).setStart(read.getStart()).setEnd(read.getEnd()).setStrand(read.getStrand()).setSource("Ensembl").setDescription(geneDescriptionMap.get(str2) != null ? geneDescriptionMap.get(str2) : "");
                        if (map.get(str2) != null) {
                            builder.setMirna(map.get(str2));
                        }
                    }
                    if (this.transcriptDict.containsKey(str3)) {
                    } else {
                        this.logger.info("New transcript '{}' found", str3);
                        String replaceFirst = read.getSequenceName().replaceFirst("chr", "");
                        List<TranscriptModel.TranscriptTfbs> transcriptTfbses = getTranscriptTfbses(read, replaceFirst, tfbsMap);
                        Map attributes = read.getAttributes();
                        arrayList2 = new ArrayList();
                        builder2 = TranscriptModel.Transcript.newBuilder().setId(str3).setName((String) attributes.get("transcript_name")).setBiotype(attributes.get("transcript_biotype") != null ? (String) attributes.get("transcript_biotype") : read.getSource()).setStatus("KNOWN").setChromosome(replaceFirst).setStart(read.getStart()).setEnd(read.getEnd()).setStrand(read.getStrand()).setGenomicCodingStart(0).setGenomicCodingEnd(0).setCdnaCodingStart(0).setCdnaCodingEnd(0).setCdsLength(0).setProteinId("").setDescription("").addAllTfbs(transcriptTfbses);
                        if (((String) read.getAttributes().get("tag")) != null) {
                        }
                        Fasta fasta = proteinSequencesMap.get(str3);
                        if (fasta != null) {
                            builder2.setProteinSequence(fasta.getSeq());
                        }
                        Fasta fasta2 = cDnaSequencesMap.get(str3);
                        if (fasta2 != null) {
                            builder2.setCdnaSequence(fasta2.getSeq());
                        }
                        this.transcriptDict.put(str3, Integer.valueOf(arrayList.size()));
                        arrayList.add(builder2.build());
                    }
                    updateTranscriptAndGeneCoords(builder2, builder, read);
                    if (read.getFeature().equalsIgnoreCase("exon")) {
                        builder3 = TranscriptModel.Exon.newBuilder().setId((String) read.getAttributes().get("exon_id")).setChromosome(read.getSequenceName().replaceFirst("chr", "")).setStart(read.getStart()).setEnd(read.getEnd()).setStrand(read.getStrand()).setGenomicCodingStart(0).setGenomicCodingEnd(0).setCdnaCodingStart(0).setCdnaCodingEnd(0).setCdsStart(0).setCdsEnd(0).setPhase(-1).setExonNumber(Integer.parseInt((String) read.getAttributes().get("exon_number"))).setSequence(getExonSequence(read.getSequenceName(), read.getStart(), read.getEnd()));
                        arrayList2.add(null);
                        this.exonDict.put(builder2.getId() + "_" + builder3.getExonNumber(), builder3);
                        if (((String) read.getAttributes().get("exon_number")).equals("1")) {
                            i = 1;
                            i2 = 1;
                        } else {
                            i += (this.exonDict.get(builder2.getId() + "_" + (builder3.getExonNumber() - 1)).getEnd() - this.exonDict.get(builder2.getId() + "_" + (builder3.getExonNumber() - 1)).getStart()) + 1;
                        }
                    } else {
                        builder3 = this.exonDict.get(builder2.getId() + "_" + builder3.getExonNumber());
                        if (read.getFeature().equalsIgnoreCase("CDS")) {
                            if (read.getStrand().equals("+") || read.getStrand().equals("1")) {
                                builder3.setGenomicCodingStart(read.getStart());
                                builder3.setGenomicCodingEnd(read.getEnd());
                                builder3.setCdnaCodingStart((read.getStart() - builder3.getStart()) + i);
                                builder3.setCdnaCodingEnd((read.getEnd() - builder3.getStart()) + i);
                                builder2.setCdnaCodingEnd((read.getEnd() - builder3.getStart()) + i);
                                builder3.setCdsStart(i2);
                                builder3.setCdsEnd((read.getEnd() - read.getStart()) + i2);
                                i2 += (read.getEnd() - read.getStart()) + 1;
                                builder2.setCdsLength(i2 - 1);
                                builder3.setPhase(Integer.valueOf(read.getFrame()).intValue());
                                if (builder2.getGenomicCodingStart() == 0 || builder2.getGenomicCodingStart() > read.getStart()) {
                                    builder2.setGenomicCodingStart(read.getStart());
                                }
                                if (builder2.getGenomicCodingEnd() == 0 || builder2.getGenomicCodingEnd() < read.getEnd()) {
                                    builder2.setGenomicCodingEnd(read.getEnd());
                                }
                                if (builder2.getCdnaCodingStart() == 0) {
                                    builder2.setCdnaCodingStart((read.getStart() - builder3.getStart()) + i);
                                }
                            } else {
                                builder3.setGenomicCodingStart(read.getStart());
                                builder3.setGenomicCodingEnd(read.getEnd());
                                builder3.setCdnaCodingStart((builder3.getEnd() - read.getEnd()) + i);
                                builder3.setCdnaCodingEnd((builder3.getEnd() - read.getStart()) + i);
                                builder2.setCdnaCodingEnd((builder3.getEnd() - read.getStart()) + i);
                                builder3.setCdsStart(i2);
                                builder3.setCdsEnd((read.getEnd() - read.getStart()) + i2);
                                i2 += (read.getEnd() - read.getStart()) + 1;
                                builder2.setCdsLength(i2 - 1);
                                builder3.setPhase(Integer.valueOf(read.getFrame()).intValue());
                                if (builder2.getGenomicCodingStart() == 0 || builder2.getGenomicCodingStart() > read.getStart()) {
                                    builder2.setGenomicCodingStart(read.getStart());
                                }
                                if (builder2.getGenomicCodingEnd() == 0 || builder2.getGenomicCodingEnd() < read.getEnd()) {
                                    builder2.setGenomicCodingEnd(read.getEnd());
                                }
                                if (builder2.getCdnaCodingStart() == 0) {
                                    builder2.setCdnaCodingStart((builder3.getEnd() - read.getEnd()) + i);
                                }
                            }
                            builder2.setProteinId((String) read.getAttributes().get("protein_id"));
                        }
                        if (read.getFeature().equalsIgnoreCase("start_codon")) {
                        }
                        if (read.getFeature().equalsIgnoreCase("stop_codon")) {
                            if (builder3.getStrand().equals("+")) {
                                builder3.setGenomicCodingEnd(read.getEnd());
                                builder3.setCdnaCodingEnd((read.getEnd() - builder3.getStart()) + i);
                                builder3.setCdsEnd((read.getEnd() - read.getStart()) + i2);
                                i2 += read.getEnd() - read.getStart();
                                builder2.setGenomicCodingEnd(read.getEnd());
                                builder2.setCdnaCodingEnd((read.getEnd() - builder3.getStart()) + i);
                                builder2.setCdsLength(i2 - 1);
                            } else {
                                builder3.setGenomicCodingStart(read.getStart());
                                builder3.setCdnaCodingEnd((builder3.getEnd() - read.getStart()) + i);
                                builder3.setCdsEnd((read.getEnd() - read.getStart()) + i2);
                                i2 += read.getEnd() - read.getStart();
                                builder2.setGenomicCodingStart(read.getStart());
                                builder2.setCdnaCodingEnd((builder3.getEnd() - read.getStart()) + i);
                                builder2.setCdsLength(i2 - 1);
                            }
                        }
                    }
                }
            }
        } catch (ClassNotFoundException | SQLException e2) {
            e2.printStackTrace();
        }
    }

    private Map<String, List<GeneDrugInteraction>> getGeneDrugMap() throws IOException {
        HashMap hashMap = new HashMap();
        BufferedReader bufferedReader = this.geneDrugFile.toFile().getName().endsWith(".gz") ? new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(this.geneDrugFile.toFile())))) : Files.newBufferedReader(this.geneDrugFile, Charset.defaultCharset());
        this.logger.info("Loading gene-drug data form {}", this.geneDrugFile);
        bufferedReader.readLine();
        int i = 1;
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                bufferedReader.close();
                return hashMap;
            }
            String[] split = readLine.split("\t");
            addValueToMapElement(hashMap, split[0], new GeneDrugInteraction(split[0], split[4], "dgidb", split[2], split[3]));
            i++;
        }
    }

    private Map<String, List<Expression>> getGeneExpressionMap() throws IOException {
        HashMap hashMap = new HashMap();
        BufferedReader bufferedReader = this.geneExpressionFile.toFile().getName().endsWith(".gz") ? new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(this.geneExpressionFile.toFile())))) : Files.newBufferedReader(this.geneExpressionFile, Charset.defaultCharset());
        this.logger.info("Loading gene expression data form {}", this.geneExpressionFile);
        int i = 0;
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null || !readLine.startsWith("#")) {
                break;
            }
            i++;
        }
        while (true) {
            String readLine2 = bufferedReader.readLine();
            if (readLine2 == null) {
                bufferedReader.close();
                return hashMap;
            }
            String[] split = readLine2.split("\t");
            if (this.species.getScientificName().equals(split[2])) {
                if (split[7].equals("UP")) {
                    addValueToMapElement(hashMap, split[1], new Expression(split[1], (String) null, split[3], split[4], split[5], split[6], ExpressionCall.UP, Float.valueOf(split[8])));
                } else if (split[7].equals("DOWN")) {
                    addValueToMapElement(hashMap, split[1], new Expression(split[1], (String) null, split[3], split[4], split[5], split[6], ExpressionCall.DOWN, Float.valueOf(split[8])));
                } else {
                    this.logger.warn("Expression tags found different from UP/DOWN at line {}. Entry omitted. ", Integer.valueOf(i));
                }
            }
            i++;
        }
    }

    private <T> void addValueToMapElement(Map<String, List<T>> map, String str, T t) {
        if (map.containsKey(str)) {
            map.get(str).add(t);
            return;
        }
        ArrayList arrayList = new ArrayList();
        arrayList.add(t);
        map.put(str, arrayList);
    }

    private List<TranscriptModel.TranscriptTfbs> getTranscriptTfbses(Gtf gtf, String str, Map<String, SortedSet<Gff2>> map) {
        ArrayList arrayList = new ArrayList();
        if (map.containsKey(str)) {
            for (Gff2 gff2 : map.get(str)) {
                if (gtf.getStrand().equals("+")) {
                    if (gff2.getStart() > gtf.getStart() + 500) {
                        break;
                    }
                    if (gff2.getEnd() > gtf.getStart() - 2500) {
                        arrayList.add(addTranscriptTfbstoList(gff2, gtf, str));
                    }
                } else {
                    if (gff2.getStart() > gtf.getEnd() + 2500) {
                        break;
                    }
                    if (gff2.getStart() > gtf.getEnd() - 500) {
                        arrayList.add(addTranscriptTfbstoList(gff2, gtf, str));
                    }
                }
            }
        }
        return arrayList;
    }

    private TranscriptModel.TranscriptTfbs addTranscriptTfbstoList(Gff2 gff2, Gtf gtf, String str) {
        String[] split = gff2.getAttribute().split("=")[1].split(":");
        return TranscriptModel.TranscriptTfbs.newBuilder().setTfName(split[0]).setPwm(split[1]).setChromosome(str).setStart(gff2.getStart()).setEnd(gff2.getEnd()).setStrand(gff2.getStrand()).setRelativeStart(getRelativeTranscriptTfbsStart(gff2, gtf).intValue()).setRelativeEnd(getRelativeTranscriptTfbsEnd(gff2, gtf).intValue()).setScore(Float.parseFloat(gff2.getScore())).build();
    }

    private Integer getRelativeTranscriptTfbsStart(Gff2 gff2, Gtf gtf) {
        return gtf.getStrand().equals("+") ? gff2.getStart() < gtf.getStart() ? Integer.valueOf(gff2.getStart() - gtf.getStart()) : Integer.valueOf((gff2.getStart() - gtf.getStart()) + 1) : gff2.getEnd() > gtf.getEnd() ? Integer.valueOf(gtf.getEnd() - gff2.getEnd()) : Integer.valueOf((gtf.getEnd() - gff2.getEnd()) + 1);
    }

    private Integer getRelativeTranscriptTfbsEnd(Gff2 gff2, Gtf gtf) {
        return gtf.getStrand().equals("+") ? gff2.getEnd() < gtf.getStart() ? Integer.valueOf(gff2.getEnd() - gtf.getStart()) : Integer.valueOf((gff2.getEnd() - gtf.getStart()) + 1) : gff2.getStart() > gtf.getEnd() ? Integer.valueOf(gtf.getEnd() - gff2.getStart()) : Integer.valueOf((gtf.getEnd() - gff2.getStart()) + 1);
    }

    private Map<String, SortedSet<Gff2>> getTfbsMap() {
        HashMap hashMap = new HashMap();
        try {
            if (this.tfbsFile != null && Files.exists(this.tfbsFile, new LinkOption[0]) && !Files.isDirectory(this.tfbsFile, new LinkOption[0])) {
                Gff2Reader gff2Reader = new Gff2Reader(this.tfbsFile);
                while (true) {
                    Gff2 read = gff2Reader.read();
                    if (read == null) {
                        break;
                    }
                    addTfbsMotifToMap(hashMap, read);
                }
                gff2Reader.close();
            }
        } catch (IOException | NoSuchMethodException | FileFormatException e) {
            this.logger.error("Error loading TFBS file: " + e.getMessage());
            this.logger.error("transcript TFBS objects will not be serialized");
            hashMap.clear();
        }
        return hashMap;
    }

    private void addTfbsMotifToMap(Map<String, SortedSet<Gff2>> map, Gff2 gff2) {
        String replaceFirst = gff2.getSequenceName().replaceFirst("chr", "");
        SortedSet<Gff2> sortedSet = map.get(replaceFirst);
        if (sortedSet == null) {
            sortedSet = new TreeSet((Comparator<? super Gff2>) new Comparator<Gff2>() { // from class: org.opencb.cellbase.app.transform.GeneParserProto.1
                @Override // java.util.Comparator
                public int compare(Gff2 gff22, Gff2 gff23) {
                    return gff22.getStart() != gff23.getStart() ? gff22.getStart() - gff23.getStart() : gff22.getAttribute().compareTo(gff23.getAttribute());
                }
            });
            map.put(replaceFirst, sortedSet);
        }
        sortedSet.add(gff2);
    }

    private Map<String, Fasta> getCDnaSequencesMap() throws IOException, FileFormatException {
        this.logger.info("Loading ENSEMBL's cDNA sequences...");
        HashMap hashMap = new HashMap();
        if (this.cDnaFastaFile == null || !Files.exists(this.cDnaFastaFile, new LinkOption[0]) || Files.isDirectory(this.cDnaFastaFile, new LinkOption[0])) {
            this.logger.warn("cDNA fasta file " + this.cDnaFastaFile + " not found");
            this.logger.warn("ENSEMBL's cDNA sequences not loaded");
        } else {
            FastaReader fastaReader = new FastaReader(this.cDnaFastaFile);
            List<Fasta> readAll = fastaReader.readAll();
            fastaReader.close();
            for (Fasta fasta : readAll) {
                hashMap.put(fasta.getId(), fasta);
            }
        }
        return hashMap;
    }

    private Map<String, Fasta> getProteinSequencesMap() throws IOException, FileFormatException {
        this.logger.info("Loading ENSEMBL's protein sequences...");
        HashMap hashMap = new HashMap();
        if (this.proteinFastaFile == null || !Files.exists(this.proteinFastaFile, new LinkOption[0]) || Files.isDirectory(this.proteinFastaFile, new LinkOption[0])) {
            this.logger.warn("Protein fasta file " + this.proteinFastaFile + " not found");
            this.logger.warn("ENSEMBL's protein sequences not loaded");
        } else {
            FastaReader fastaReader = new FastaReader(this.proteinFastaFile);
            List<Fasta> readAll = fastaReader.readAll();
            fastaReader.close();
            for (Fasta fasta : readAll) {
                hashMap.put(fasta.getDescription().split("transcript:")[1].split("\\s")[0], fasta);
            }
        }
        return hashMap;
    }

    private Map<String, ArrayList<TranscriptModel.Xref>> getXrefMap() throws IOException {
        this.logger.info("Loading xref data...");
        HashMap hashMap = new HashMap();
        if (this.xrefsFile == null || !Files.exists(this.xrefsFile, new LinkOption[0])) {
            this.logger.warn("Xrefs file " + this.xrefsFile + " not found");
            this.logger.warn("Xref data not loaded");
        } else {
            Iterator<String> it = Files.readAllLines(this.xrefsFile, Charset.defaultCharset()).iterator();
            while (it.hasNext()) {
                String[] split = it.next().split("\t", -1);
                if (split.length >= 4) {
                    if (!hashMap.containsKey(split[0])) {
                        hashMap.put(split[0], new ArrayList());
                    }
                    ((ArrayList) hashMap.get(split[0])).add(TranscriptModel.Xref.newBuilder().setId(split[1]).setDbName(split[2]).setDbDisplayName(split[3]).build());
                }
            }
        }
        this.logger.info("Loading protein mapping into xref data...");
        if (this.uniprotIdMappingFile == null || !Files.exists(this.uniprotIdMappingFile, new LinkOption[0])) {
            this.logger.warn("Uniprot if mapping file " + this.uniprotIdMappingFile + " not found");
            this.logger.warn("Protein mapping into xref data not loaded");
        } else {
            BufferedReader newBufferedReader = FileUtils.newBufferedReader(this.uniprotIdMappingFile);
            while (true) {
                String readLine = newBufferedReader.readLine();
                if (readLine == null) {
                    break;
                }
                String[] split2 = readLine.split("\t", -1);
                if (split2.length >= 19 && split2[19].startsWith("ENST")) {
                    for (String str : split2[19].split("; ")) {
                        if (!hashMap.containsKey(str)) {
                            hashMap.put(str, new ArrayList());
                        }
                        ((ArrayList) hashMap.get(str)).add(TranscriptModel.Xref.newBuilder().setId(split2[0]).setDbName("uniprotkb_acc").setDbDisplayName("UniProtKB ACC").build());
                        ((ArrayList) hashMap.get(str)).add(TranscriptModel.Xref.newBuilder().setId(split2[0]).setDbName("uniprotkb_id").setDbDisplayName("UniProtKB ID").build());
                    }
                }
            }
            newBufferedReader.close();
        }
        return hashMap;
    }

    private Map<String, String> getGeneDescriptionMap() throws IOException {
        this.logger.info("Loading gene description data...");
        HashMap hashMap = new HashMap();
        if (this.geneDescriptionFile == null || !Files.exists(this.geneDescriptionFile, new LinkOption[0])) {
            this.logger.warn("Gene description file " + this.geneDescriptionFile + " not found");
            this.logger.warn("Gene description data not loaded");
        } else {
            Iterator<String> it = Files.readAllLines(this.geneDescriptionFile, Charset.defaultCharset()).iterator();
            while (it.hasNext()) {
                String[] split = it.next().split("\t", -1);
                hashMap.put(split[0], split[1]);
            }
        }
        return hashMap;
    }

    private boolean newGene(GeneModel.Gene gene, String str) {
        return gene == null || !str.equals(gene.getId());
    }

    private void connect(Path path) throws ClassNotFoundException, SQLException, IOException {
        this.logger.info("Connecting to reference genome sequence database ...");
        Class.forName("org.sqlite.JDBC");
        this.sqlConn = DriverManager.getConnection("jdbc:sqlite:" + path.getParent().toString() + "/reference_genome.db");
        if (!Files.exists(Paths.get(path.getParent().toString(), "reference_genome.db"), new LinkOption[0]) || Files.size(path.getParent().resolve("reference_genome.db")) == 0) {
            this.logger.info("Genome sequence database doesn't exists and will be created");
            this.sqlConn.createStatement().executeUpdate("CREATE TABLE if not exists genome_sequence (sequenceName VARCHAR(50), chunkId VARCHAR(30), start INT, end INT, sequence VARCHAR(2000))");
            indexReferenceGenomeFasta(path);
        }
        this.indexedSequences = getIndexedSequences();
        this.sqlQuery = this.sqlConn.prepareStatement("SELECT sequence from genome_sequence WHERE chunkId = ? ");
        this.logger.info("Genome sequence database connected");
    }

    private Set<String> getIndexedSequences() throws SQLException {
        HashSet hashSet = new HashSet();
        ResultSet executeQuery = this.sqlConn.prepareStatement("SELECT DISTINCT sequenceName from genome_sequence").executeQuery();
        while (executeQuery.next()) {
            hashSet.add(executeQuery.getString(1));
        }
        executeQuery.close();
        return hashSet;
    }

    private void disconnectSqlite() throws SQLException {
        this.sqlConn.close();
    }

    private void indexReferenceGenomeFasta(Path path) throws IOException, ClassNotFoundException, SQLException {
        BufferedReader newBufferedReader = FileUtils.newBufferedReader(path);
        String str = "";
        boolean z = false;
        PreparedStatement prepareStatement = this.sqlConn.prepareStatement("INSERT INTO genome_sequence (chunkID, start, end, sequence, sequenceName) values (?, ?, ?, ?, ?)");
        StringBuilder sb = new StringBuilder();
        while (true) {
            String readLine = newBufferedReader.readLine();
            if (readLine == null) {
                insertGenomeSequence(str, z, prepareStatement, sb);
                newBufferedReader.close();
                this.sqlConn.createStatement().executeUpdate("CREATE INDEX chunkId_idx on genome_sequence(chunkId)");
                return;
            } else if (readLine.startsWith(">")) {
                if (sb.length() > 0) {
                    insertGenomeSequence(str, z, prepareStatement, sb);
                }
                str = readLine.replace(">", "").split(" ")[0];
                z = readLine.endsWith("HAP");
                sb.delete(0, sb.length());
            } else {
                sb.append(readLine);
            }
        }
    }

    private void insertGenomeSequence(String str, boolean z, PreparedStatement preparedStatement, StringBuilder sb) throws SQLException {
        int i;
        int i2 = 0;
        int i3 = 1;
        int i4 = this.CHUNK_SIZE - 1;
        if (z || str.contains("PATCH")) {
            return;
        }
        this.logger.info("Indexing genome sequence {} ...", str);
        preparedStatement.setString(5, str);
        if (sb.length() < this.CHUNK_SIZE) {
            preparedStatement.setString(1, str + "_0_" + this.chunkIdSuffix);
            preparedStatement.setInt(2, 1);
            preparedStatement.setInt(3, sb.length() - 1);
            preparedStatement.setString(4, sb.toString());
            return;
        }
        int length = sb.length();
        this.sqlConn.setAutoCommit(false);
        while (i3 < length) {
            if (i2 % 10000 == 0 && i2 != 0) {
                preparedStatement.executeBatch();
                this.sqlConn.commit();
            }
            preparedStatement.setString(1, str + "_" + i2 + "_" + this.chunkIdSuffix);
            if (i3 == 1) {
                preparedStatement.setInt(2, i3);
                preparedStatement.setInt(3, i4);
                preparedStatement.setString(4, sb.substring(i3 - 1, this.CHUNK_SIZE - 1));
                i = i3 + (this.CHUNK_SIZE - 1);
            } else if (i3 + this.CHUNK_SIZE < length) {
                preparedStatement.setInt(2, i3);
                preparedStatement.setInt(3, i4);
                preparedStatement.setString(4, sb.substring(i3 - 1, (i3 + this.CHUNK_SIZE) - 1));
                i = i3 + this.CHUNK_SIZE;
            } else {
                preparedStatement.setInt(2, i3);
                preparedStatement.setInt(3, length);
                preparedStatement.setString(4, sb.substring(i3 - 1, length));
                i = length;
            }
            i3 = i;
            preparedStatement.addBatch();
            i4 = (i3 + this.CHUNK_SIZE) - 1;
            i2++;
        }
        preparedStatement.executeBatch();
        this.sqlConn.commit();
        this.sqlConn.setAutoCommit(true);
    }

    private String getExonSequence(String str, int i, int i2) {
        String str2 = "";
        if (this.indexedSequences.contains(str)) {
            try {
                StringBuilder sb = new StringBuilder();
                int chunk = getChunk(i);
                int chunk2 = getChunk(i2);
                for (int i3 = chunk; i3 <= chunk2; i3++) {
                    this.sqlQuery.setString(1, str + "_" + i3 + "_" + this.chunkIdSuffix);
                    sb.append(this.sqlQuery.executeQuery().getString(1));
                }
                int offset = getOffset(i);
                int offset2 = getOffset(i) + (i2 - i) + 1;
                if (chunk > 0) {
                    if (sb.toString().length() > 0 && sb.toString().length() >= offset2) {
                        str2 = sb.toString().substring(offset, offset2);
                    }
                } else if (sb.toString().length() > 0 && sb.toString().length() + 1 >= offset2) {
                    str2 = sb.toString().substring(offset - 1, offset2 - 1);
                }
            } catch (SQLException e) {
                this.logger.error("Error obtaining exon sequence ({}:{}-{})", new Object[]{str, Integer.valueOf(i), Integer.valueOf(i2)});
            }
        }
        return str2;
    }

    private int getChunk(int i) {
        return i / this.CHUNK_SIZE;
    }

    private int getOffset(int i) {
        return i % this.CHUNK_SIZE;
    }

    private void updateTranscriptAndGeneCoords(TranscriptModel.Transcript.Builder builder, GeneModel.Gene.Builder builder2, Gtf gtf) {
        if (builder.getStart() > gtf.getStart()) {
            builder.setStart(gtf.getStart());
        }
        if (builder.getEnd() < gtf.getEnd()) {
            builder.setEnd(gtf.getEnd());
        }
        if (builder2.getStart() > gtf.getStart()) {
            builder2.setStart(gtf.getStart());
        }
        if (builder2.getEnd() < gtf.getEnd()) {
            builder2.setEnd(gtf.getEnd());
        }
    }

    private Map<String, GeneModel.MiRNAGene> getmiRNAGeneMap(Path path) throws IOException {
        this.logger.info("Loading miRNA data ...");
        HashMap hashMap = new HashMap();
        if (this.mirnaFile == null || !Files.exists(this.mirnaFile, new LinkOption[0]) || Files.isDirectory(this.mirnaFile, new LinkOption[0])) {
            this.logger.warn("Mirna file " + this.mirnaFile + " not found");
            this.logger.warn("Mirna data not loaded");
        } else {
            BufferedReader newBufferedReader = Files.newBufferedReader(path, Charset.defaultCharset());
            while (true) {
                String readLine = newBufferedReader.readLine();
                if (readLine == null) {
                    break;
                }
                String[] split = readLine.split("\t");
                ArrayList arrayList = new ArrayList();
                for (String str : split[5].split(",")) {
                    arrayList.add(str);
                }
                String[] split2 = split[6].split(",");
                ArrayList arrayList2 = new ArrayList();
                for (String str2 : split2) {
                    String[] split3 = str2.split("\\|");
                    int indexOf = split[4].indexOf(split3[2]) + 1;
                    arrayList2.add(GeneModel.MiRNAGene.MiRNAMature.newBuilder().setMiRBaseAccession(split3[0]).setMiRBaseId(split3[1]).setSequence(split3[2]).setCdnaStart(indexOf).setCdnaEnd((indexOf + split3[2].length()) - 1).build());
                }
                hashMap.put(split[0], GeneModel.MiRNAGene.newBuilder().setMiRBaseAccession(split[1]).setMiRBaseId(split[2]).setStatus(split[3]).setSequence(split[4]).addAllAlias(arrayList).addAllMatures(arrayList2).build());
            }
            newBufferedReader.close();
        }
        return hashMap;
    }

    private void getGtfFileFromGeneDirectoryPath(Path path) {
        for (String str : path.toFile().list()) {
            if (str.endsWith(".gtf") || str.endsWith(".gtf.gz")) {
                this.gtfFile = path.resolve(str);
                return;
            }
        }
    }

    private void getProteinFastaFileFromGeneDirectoryPath(Path path) {
        for (String str : path.toFile().list()) {
            if (str.endsWith(".pep.all.fa") || str.endsWith(".pep.all.fa.gz")) {
                this.proteinFastaFile = path.resolve(str);
                return;
            }
        }
    }

    private void getCDnaFastaFileFromGeneDirectoryPath(Path path) {
        for (String str : path.toFile().list()) {
            if (str.endsWith(".cdna.all.fa") || str.endsWith(".cdna.all.fa.gz")) {
                this.cDnaFastaFile = path.resolve(str);
                return;
            }
        }
    }
}
