package org.opencb.cellbase.lib.download;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.PrintWriter;
import java.net.URI;
import java.nio.charset.Charset;
import java.nio.file.Files;
import java.nio.file.LinkOption;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.attribute.FileAttribute;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import org.apache.commons.io.FilenameUtils;
import org.apache.commons.lang.StringUtils;
import org.opencb.cellbase.core.config.CellBaseConfiguration;
import org.opencb.cellbase.core.config.SpeciesConfiguration;
import org.opencb.cellbase.core.exception.CellBaseException;
import org.opencb.cellbase.lib.EtlCommons;
import org.opencb.cellbase.lib.variant.VariantAnnotationUtils;
import org.opencb.commons.utils.FileUtils;

@Deprecated
/* loaded from: input_file:org/opencb/cellbase/lib/download/CoreDownloadManager.class */
public class CoreDownloadManager extends DownloadManager {
    private static final String ENSEMBL_NAME = "ENSEMBL";
    private static final String UNIPROT_NAME = "UniProt";
    private static final String INTACT_NAME = "IntAct";
    private static final String INTERPRO_NAME = "InterPro";
    private static final String GERP_NAME = "GERP++";
    private static final String PHASTCONS_NAME = "PhastCons";
    private static final String PHYLOP_NAME = "PhyloP";
    private static final String GENE_EXPRESSION_ATLAS_NAME = "Gene Expression Atlas";
    private static final String HPO_NAME = "HPO";
    private static final String DISGENET_NAME = "DisGeNET";
    private static final String GO_ANNOTATION_NAME = "EBI Gene Ontology Annotation";
    private static final String DGIDB_NAME = "DGIdb";
    private static final String GNOMAD_NAME = "gnomAD";
    private static final HashMap GENE_UNIPROT_XREF_FILES = new HashMap() { // from class: org.opencb.cellbase.lib.download.CoreDownloadManager.1
        {
            put("Homo sapiens", "HUMAN_9606_idmapping_selected.tab.gz");
            put("Mus musculus", "MOUSE_10090_idmapping_selected.tab.gz");
            put("Rattus norvegicus", "RAT_10116_idmapping_selected.tab.gz");
            put("Danio rerio", "DANRE_7955_idmapping_selected.tab.gz");
            put("Drosophila melanogaster", "DROME_7227_idmapping_selected.tab.gz");
            put("Saccharomyces cerevisiae", "YEAST_559292_idmapping_selected.tab.gz");
        }
    };

    public CoreDownloadManager(String str, String str2, Path path, CellBaseConfiguration cellBaseConfiguration) throws IOException, CellBaseException {
        super(str, str2, path, cellBaseConfiguration);
    }

    public CoreDownloadManager(CellBaseConfiguration cellBaseConfiguration, Path path, SpeciesConfiguration speciesConfiguration, SpeciesConfiguration.Assembly assembly) throws IOException, CellBaseException {
        super(cellBaseConfiguration, path, speciesConfiguration, assembly);
    }

    public void downloadReferenceGenome() throws IOException, InterruptedException {
        String str;
        this.logger.info("Downloading genome information ...");
        Path resolve = this.downloadFolder.resolve("genome");
        Files.createDirectories(resolve, new FileAttribute[0]);
        String str2 = this.ensemblHostUrl + "/" + this.ensemblRelease;
        if (this.speciesConfiguration.getScientificName().equals("Homo sapiens")) {
            str = str2 + "/fasta/" + this.speciesShortName + "/dna/*.dna.primary_assembly.fa.gz";
        } else {
            if (!this.configuration.getSpecies().getVertebrates().contains(this.speciesConfiguration)) {
                str2 = this.ensemblHostUrl + "/" + this.ensemblRelease + "/" + getPhylo(this.speciesConfiguration);
            }
            String str3 = str2 + "/fasta/";
            if (this.configuration.getSpecies().getBacteria().contains(this.speciesConfiguration)) {
                str3 = str3 + ((SpeciesConfiguration.Assembly) this.speciesConfiguration.getAssemblies().get(0)).getEnsemblCollection() + "/";
            }
            str = str3 + this.speciesShortName + "/dna/*.dna.toplevel.fa.gz";
        }
        downloadFile(str, resolve.resolve(StringUtils.capitalize(this.speciesShortName) + "." + this.assemblyConfiguration.getName() + ".fa.gz").toString());
        this.logger.info("Saving reference genome version data at {}", resolve.resolve("genomeVersion.json"));
        saveVersionData("genome", ENSEMBL_NAME, this.ensemblVersion, getTimeStamp(), Collections.singletonList(str), this.buildFolder.resolve("genomeVersion.json"));
    }

    public void downloadEnsemblGene() throws IOException, InterruptedException {
        this.logger.info("Downloading gene information ...");
        Path resolve = this.downloadFolder.resolve(EtlCommons.GENE_DATA);
        Files.createDirectories(resolve, new FileAttribute[0]);
        downloadEnsemblData(resolve);
        downloadDrugData(resolve);
        downloadGeneUniprotXref(resolve);
        downloadGeneExpressionAtlas(resolve);
        downloadGeneDiseaseAnnotation(resolve);
        downloadGnomadConstraints(resolve);
        downloadGO(resolve);
    }

    private void downloadGO(Path path) throws IOException, InterruptedException {
        if (this.speciesConfiguration.getScientificName().equals("Homo sapiens")) {
            this.logger.info("Downloading go annotation...");
            String host = this.configuration.getDownload().getGoAnnotation().getHost();
            downloadFile(host, path.resolve("goa_human.gaf.gz").toString());
            saveVersionData(EtlCommons.GENE_DATA, GO_ANNOTATION_NAME, null, getTimeStamp(), Collections.singletonList(host), this.buildFolder.resolve("goAnnotationVersion.json"));
        }
    }

    public void downloadObo() throws IOException, InterruptedException {
        this.logger.info("Downloading obo files ...");
        Path resolve = this.downloadFolder.resolve("obo");
        Files.createDirectories(resolve, new FileAttribute[0]);
        downloadFile(this.configuration.getDownload().getHpoObo().getHost(), resolve.resolve(EtlCommons.HPO_FILE).toString());
        downloadFile(this.configuration.getDownload().getGoObo().getHost(), resolve.resolve(EtlCommons.GO_FILE).toString());
    }

    private void downloadGnomadConstraints(Path path) throws IOException, InterruptedException {
        if (this.speciesConfiguration.getScientificName().equals("Homo sapiens")) {
            this.logger.info("Downloading gnomAD constraints data...");
            String host = this.configuration.getDownload().getGnomadConstraints().getHost();
            downloadFile(host, path.resolve("gnomad.v2.1.1.lof_metrics.by_transcript.txt.bgz").toString());
            saveVersionData(EtlCommons.GENE_DATA, GNOMAD_NAME, this.configuration.getDownload().getGnomadConstraints().getVersion(), getTimeStamp(), Collections.singletonList(host), this.buildFolder.resolve("gnomadVersion.json"));
        }
    }

    private void downloadDrugData(Path path) throws IOException, InterruptedException {
        if (this.speciesConfiguration.getScientificName().equals("Homo sapiens")) {
            this.logger.info("Downloading drug-gene data...");
            String host = this.configuration.getDownload().getDgidb().getHost();
            downloadFile(host, path.resolve("dgidb.tsv").toString());
            saveVersionData(EtlCommons.GENE_DATA, DGIDB_NAME, null, getTimeStamp(), Collections.singletonList(host), this.buildFolder.resolve("dgidbVersion.json"));
        }
    }

    private void downloadEnsemblData(Path path) throws IOException, InterruptedException {
        this.logger.info("Downloading gene Ensembl data (gtf, pep, cdna, motifs) ...");
        ArrayList arrayList = new ArrayList(4);
        String str = this.ensemblHostUrl + "/" + this.ensemblRelease;
        if (!this.configuration.getSpecies().getVertebrates().contains(this.speciesConfiguration)) {
            str = this.ensemblHostUrl + "/" + this.ensemblRelease + "/" + getPhylo(this.speciesConfiguration);
        }
        String str2 = this.configuration.getSpecies().getBacteria().contains(this.speciesConfiguration) ? ((SpeciesConfiguration.Assembly) this.speciesConfiguration.getAssemblies().get(0)).getEnsemblCollection() + "/" : "";
        String str3 = str + "/gtf/" + str2 + this.speciesShortName + "/*" + this.ensemblRelease.split("-")[1] + ".gtf.gz";
        downloadFile(str3, path.resolve(this.speciesShortName + ".gtf.gz").toString());
        arrayList.add(str3);
        String str4 = str + "/fasta/" + str2 + this.speciesShortName + "/pep/*.pep.all.fa.gz";
        downloadFile(str4, path.resolve(this.speciesShortName + ".pep.all.fa.gz").toString());
        arrayList.add(str4);
        String str5 = str + "/fasta/" + str2 + this.speciesShortName + "/cdna/*.cdna.all.fa.gz";
        downloadFile(str5, path.resolve(this.speciesShortName + ".cdna.all.fa.gz").toString());
        arrayList.add(str5);
        saveVersionData(EtlCommons.GENE_DATA, ENSEMBL_NAME, this.ensemblVersion, getTimeStamp(), arrayList, this.buildFolder.resolve("ensemblCoreVersion.json"));
    }

    private void downloadGeneUniprotXref(Path path) throws IOException, InterruptedException {
        this.logger.info("Downloading UniProt ID mapping ...");
        if (GENE_UNIPROT_XREF_FILES.containsKey(this.speciesConfiguration.getScientificName())) {
            String str = this.configuration.getDownload().getGeneUniprotXref().getHost() + "/" + GENE_UNIPROT_XREF_FILES.get(this.speciesConfiguration.getScientificName());
            downloadFile(str, path.resolve("idmapping_selected.tab.gz").toString());
            downloadFile(getUniProtReleaseNotesUrl(), path.resolve("uniprotRelnotes.txt").toString());
            saveVersionData(EtlCommons.GENE_DATA, UNIPROT_NAME, getUniProtRelease(path.resolve("uniprotRelnotes.txt").toString()), getTimeStamp(), Collections.singletonList(str), this.buildFolder.resolve("uniprotXrefVersion.json"));
        }
    }

    private String getUniProtRelease(String str) {
        Path path = Paths.get(str, new String[0]);
        Files.exists(path, new LinkOption[0]);
        try {
            BufferedReader newBufferedReader = Files.newBufferedReader(path, Charset.defaultCharset());
            String str2 = newBufferedReader.readLine().split(" ")[2];
            newBufferedReader.close();
            return str2;
        } catch (IOException e) {
            e.printStackTrace();
            return null;
        }
    }

    private String getUniProtReleaseNotesUrl() {
        return URI.create(this.configuration.getDownload().getGeneUniprotXref().getHost()).resolve("../../../").toString() + "/relnotes.txt";
    }

    private void downloadGeneExpressionAtlas(Path path) throws IOException, InterruptedException {
        this.logger.info("Downloading gene expression atlas ...");
        String host = this.configuration.getDownload().getGeneExpressionAtlas().getHost();
        downloadFile(host, path.resolve("allgenes_updown_in_organism_part.tab.gz").toString());
        saveVersionData(EtlCommons.GENE_DATA, GENE_EXPRESSION_ATLAS_NAME, getGeneExpressionAtlasVersion(), getTimeStamp(), Collections.singletonList(host), this.buildFolder.resolve("geneExpressionAtlasVersion.json"));
    }

    private String getGeneExpressionAtlasVersion() {
        return FilenameUtils.getBaseName(this.configuration.getDownload().getGeneExpressionAtlas().getHost()).split("_")[5].replace(".tab", "");
    }

    private void downloadGeneDiseaseAnnotation(Path path) throws IOException, InterruptedException {
        this.logger.info("Downloading gene disease annotation ...");
        String host = this.configuration.getDownload().getHpo().getHost();
        downloadFile(host, path.resolve(StringUtils.substringAfterLast(host, "/")).toString());
        saveVersionData(EtlCommons.GENE_DATA, HPO_NAME, null, getTimeStamp(), Collections.singletonList(host), this.buildFolder.resolve(EtlCommons.HPO_VERSION_FILE));
        String host2 = this.configuration.getDownload().getDisgenet().getHost();
        for (String str : this.configuration.getDownload().getDisgenet().getFiles()) {
            downloadFile(host2 + "/" + str, path.resolve(str.equalsIgnoreCase("readme.txt") ? "disgenetReadme.txt" : str).toString());
        }
        saveVersionData(EtlCommons.GENE_DISEASE_ASSOCIATION_DATA, DISGENET_NAME, getVersionFromVersionLine(path.resolve("disgenetReadme.txt"), "(version"), getTimeStamp(), Collections.singletonList(host2), this.buildFolder.resolve("disgenetVersion.json"));
    }

    private void runGeneExtraInfo(Path path) throws IOException, InterruptedException {
        this.logger.info("Downloading gene extra info ...");
        String path2 = path.resolve("gene_extra_info.log").toString();
        ArrayList arrayList = new ArrayList();
        arrayList.addAll(Arrays.asList("--species", this.speciesConfiguration.getScientificName(), "--assembly", this.assemblyConfiguration.getName(), "--outdir", path.toAbsolutePath().toString(), "--ensembl-libs", this.configuration.getDownload().getEnsembl().getLibs()));
        if (!this.configuration.getSpecies().getVertebrates().contains(this.speciesConfiguration) && !this.speciesConfiguration.getScientificName().equals("Drosophila melanogaster")) {
            arrayList.add("--phylo");
            arrayList.add("no-vertebrate");
        }
        if (EtlCommons.runCommandLineProcess(new File(System.getProperty("basedir") + "/bin/ensembl-scripts/"), "./gene_extra_info.pl", arrayList, path2)) {
            this.logger.info("Gene extra files created OK");
        } else {
            this.logger.error("Gene extra info for " + this.speciesConfiguration.getScientificName() + " cannot be downloaded");
        }
    }

    public void downloadConservation() throws IOException, InterruptedException {
        if (speciesHasInfoToDownload(this.speciesConfiguration, EtlCommons.CONSERVATION_DATA)) {
            this.logger.info("Downloading conservation information ...");
            Path resolve = this.downloadFolder.resolve(EtlCommons.CONSERVATION_DATA);
            if (this.speciesConfiguration.getScientificName().equals("Homo sapiens")) {
                Files.createDirectories(resolve, new FileAttribute[0]);
                Files.createDirectories(resolve.resolve("phastCons"), new FileAttribute[0]);
                Files.createDirectories(resolve.resolve("phylop"), new FileAttribute[0]);
                Files.createDirectories(resolve.resolve(EtlCommons.GERP_SUBDIRECTORY), new FileAttribute[0]);
                String[] strArr = {"1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", VariantAnnotationUtils.UNKNOWN_AMINOACID, "Y", "M"};
                if (this.assemblyConfiguration.getName().equalsIgnoreCase("GRCh38")) {
                    this.logger.info("Downloading GERP++ ...");
                    downloadFile(this.configuration.getDownload().getGerp().getHost(), resolve.resolve("gerp/gerp_conservation_scores.homo_sapiens.GRCh38.bw").toAbsolutePath().toString());
                    saveVersionData(EtlCommons.CONSERVATION_DATA, GERP_NAME, null, getTimeStamp(), Collections.singletonList(this.configuration.getDownload().getGerp().getHost()), this.buildFolder.resolve("gerpVersion.json"));
                    this.logger.info("Downloading phastCons and PhyloP ...");
                    String str = this.configuration.getDownload().getConservation().getHost() + "/hg38";
                    ArrayList arrayList = new ArrayList(strArr.length);
                    ArrayList arrayList2 = new ArrayList(strArr.length);
                    for (String str2 : strArr) {
                        String str3 = str + "/phastCons100way/hg38.100way.phastCons/chr" + str2 + ".phastCons100way.wigFix.gz";
                        downloadFile(str3, resolve.resolve("phastCons").resolve("chr" + str2 + ".phastCons100way.wigFix.gz").toString());
                        arrayList.add(str3);
                        String str4 = str + "/phyloP100way/hg38.100way.phyloP100way/chr" + str2 + ".phyloP100way.wigFix.gz";
                        downloadFile(str4, resolve.resolve("phylop").resolve("chr" + str2 + ".phyloP100way.wigFix.gz").toString());
                        arrayList2.add(str4);
                    }
                    saveVersionData(EtlCommons.CONSERVATION_DATA, PHASTCONS_NAME, null, getTimeStamp(), arrayList, this.buildFolder.resolve("phastConsVersion.json"));
                    saveVersionData(EtlCommons.CONSERVATION_DATA, PHYLOP_NAME, null, getTimeStamp(), arrayList2, this.buildFolder.resolve("phyloPVersion.json"));
                }
            }
            if (this.speciesConfiguration.getScientificName().equals("Mus musculus")) {
                Files.createDirectories(resolve, new FileAttribute[0]);
                Files.createDirectories(resolve.resolve("phastCons"), new FileAttribute[0]);
                Files.createDirectories(resolve.resolve("phylop"), new FileAttribute[0]);
                String str5 = this.configuration.getDownload().getConservation().getHost() + "/mm10";
                String[] strArr2 = {"1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", VariantAnnotationUtils.UNKNOWN_AMINOACID, "Y", "M"};
                ArrayList arrayList3 = new ArrayList(strArr2.length);
                ArrayList arrayList4 = new ArrayList(strArr2.length);
                for (String str6 : strArr2) {
                    String str7 = str5 + "/phastCons60way/mm10.60way.phastCons/chr" + str6 + ".phastCons60way.wigFix.gz";
                    downloadFile(str7, resolve.resolve("phastCons").resolve("chr" + str6 + ".phastCons60way.wigFix.gz").toString());
                    arrayList3.add(str7);
                    String str8 = str5 + "/phyloP60way/mm10.60way.phyloP60way/chr" + str6 + ".phyloP60way.wigFix.gz";
                    downloadFile(str8, resolve.resolve("phylop").resolve("chr" + str6 + ".phyloP60way.wigFix.gz").toString());
                    arrayList4.add(str8);
                }
                saveVersionData(EtlCommons.CONSERVATION_DATA, PHASTCONS_NAME, null, getTimeStamp(), arrayList3, this.buildFolder.resolve("phastConsVersion.json"));
                saveVersionData(EtlCommons.CONSERVATION_DATA, PHYLOP_NAME, null, getTimeStamp(), arrayList4, this.buildFolder.resolve("phastConsVersion.json"));
            }
        }
    }

    public void downloadProtein() throws IOException, InterruptedException {
        if (speciesHasInfoToDownload(this.speciesConfiguration, EtlCommons.PROTEIN_DATA)) {
            this.logger.info("Downloading protein information ...");
            Path resolve = this.downloadFolder.resolve(EtlCommons.PROTEIN_DATA);
            Files.createDirectories(resolve, new FileAttribute[0]);
            String host = this.configuration.getDownload().getUniprot().getHost();
            downloadFile(host, resolve.resolve("uniprot_sprot.xml.gz").toString());
            downloadFile(this.configuration.getDownload().getUniprotRelNotes().getHost(), resolve.resolve("uniprotRelnotes.txt").toString());
            Files.createDirectories(resolve.resolve("uniprot_chunks"), new FileAttribute[0]);
            splitUniprot(resolve.resolve("uniprot_sprot.xml.gz"), resolve.resolve("uniprot_chunks"));
            saveVersionData(EtlCommons.PROTEIN_DATA, UNIPROT_NAME, getLine(resolve.resolve("uniprotRelnotes.txt"), 1), getTimeStamp(), Collections.singletonList(host), this.buildFolder.resolve("uniprotVersion.json"));
        }
    }

    private void splitUniprot(Path path, Path path2) throws IOException {
        BufferedReader newBufferedReader = FileUtils.newBufferedReader(path);
        PrintWriter printWriter = null;
        StringBuilder sb = new StringBuilder();
        boolean z = true;
        boolean z2 = false;
        int i = 0;
        int i2 = 0;
        while (true) {
            String readLine = newBufferedReader.readLine();
            if (readLine == null) {
                printWriter.print("</uniprot>");
                printWriter.close();
                newBufferedReader.close();
                return;
            }
            if (readLine.trim().startsWith("<entry ")) {
                z2 = true;
                z = false;
                if (i % 10000 == 0) {
                    printWriter = new PrintWriter(new FileOutputStream(path2.resolve("chunk_" + i2 + ".xml").toFile()));
                    printWriter.println(sb.toString().trim());
                }
                i++;
            }
            if (z) {
                sb.append(readLine).append("\n");
            }
            if (z2) {
                printWriter.println(readLine);
            }
            if (readLine.trim().startsWith("</entry>")) {
                z2 = false;
                if (i % 10000 == 0) {
                    printWriter.print("</uniprot>");
                    printWriter.close();
                    i2++;
                }
            }
        }
    }
}
