package org.opencb.cellbase.lib.download;

import java.io.BufferedReader;
import java.io.IOException;
import java.net.URI;
import java.nio.charset.Charset;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.attribute.FileAttribute;
import java.util.AbstractMap;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.commons.io.FilenameUtils;
import org.apache.commons.lang.StringUtils;
import org.opencb.cellbase.core.config.CellBaseConfiguration;
import org.opencb.cellbase.core.config.SpeciesConfiguration;
import org.opencb.cellbase.core.exception.CellBaseException;
import org.opencb.cellbase.lib.EtlCommons;
import org.opencb.commons.utils.DockerUtils;
import org.opencb.commons.utils.FileUtils;

/* loaded from: input_file:org/opencb/cellbase/lib/download/GeneDownloadManager.class */
public class GeneDownloadManager extends AbstractDownloadManager {
    private static final String ENSEMBL_NAME = "ENSEMBL";
    private static final String UNIPROT_NAME = "UniProt";
    private static final String GENE_EXPRESSION_ATLAS_NAME = "Gene Expression Atlas";
    private static final String HPO_NAME = "HPO";
    private static final String DISGENET_NAME = "DisGeNET";
    private static final String GO_ANNOTATION_NAME = "EBI Gene Ontology Annotation";
    private static final String DGIDB_NAME = "DGIdb";
    private static final String GNOMAD_NAME = "gnomAD";
    private static String dockerImage;
    private static final Map<String, String> GENE_UNIPROT_XREF_FILES = new HashMap();

    public GeneDownloadManager(String str, String str2, Path path, CellBaseConfiguration cellBaseConfiguration) throws IOException, CellBaseException {
        super(str, str2, path, cellBaseConfiguration);
        dockerImage = "opencb/cellbase-builder:" + cellBaseConfiguration.getApiVersion();
    }

    @Override // org.opencb.cellbase.lib.download.AbstractDownloadManager
    public List<DownloadFile> download() throws IOException, InterruptedException {
        this.logger.info("Downloading gene information ...");
        Path resolve = this.downloadFolder.resolve(EtlCommons.GENE_DATA);
        Files.createDirectories(resolve, new FileAttribute[0]);
        Path resolve2 = this.downloadFolder.resolve(EtlCommons.REFSEQ_DATA);
        Files.createDirectories(resolve2, new FileAttribute[0]);
        ArrayList arrayList = new ArrayList();
        arrayList.addAll(downloadEnsemblData(resolve));
        arrayList.addAll(downloadRefSeq(resolve2));
        arrayList.add(downloadMane(resolve));
        arrayList.add(downloadLrg(resolve));
        arrayList.add(downloadDrugData(resolve));
        arrayList.addAll(downloadGeneUniprotXref(resolve));
        arrayList.add(downloadGeneExpressionAtlas(resolve));
        arrayList.addAll(downloadGeneDiseaseAnnotation(resolve));
        arrayList.add(downloadGnomadConstraints(resolve));
        arrayList.add(downloadGO(resolve));
        return arrayList;
    }

    private List<DownloadFile> downloadEnsemblData(Path path) throws IOException, InterruptedException {
        this.logger.info("Downloading gene Ensembl data (gtf, pep, cdna, motifs) ...");
        ArrayList arrayList = new ArrayList(4);
        ArrayList arrayList2 = new ArrayList();
        String str = this.ensemblHostUrl + "/" + this.ensemblRelease;
        if (!this.configuration.getSpecies().getVertebrates().contains(this.speciesConfiguration)) {
            str = this.ensemblHostUrl + "/" + this.ensemblRelease + "/" + getPhylo(this.speciesConfiguration);
        }
        String str2 = this.configuration.getSpecies().getBacteria().contains(this.speciesConfiguration) ? ((SpeciesConfiguration.Assembly) this.speciesConfiguration.getAssemblies().get(0)).getEnsemblCollection() + "/" : "";
        String str3 = str + "/gtf/" + str2 + this.speciesShortName + "/*" + this.ensemblRelease.split("-")[1] + ".gtf.gz";
        arrayList2.add(downloadFile(str3, path.resolve(this.speciesShortName + ".gtf.gz").toString()));
        arrayList.add(str3);
        String str4 = str + "/fasta/" + str2 + this.speciesShortName + "/pep/*.pep.all.fa.gz";
        arrayList2.add(downloadFile(str4, path.resolve(this.speciesShortName + ".pep.all.fa.gz").toString()));
        arrayList.add(str4);
        String str5 = str + "/fasta/" + str2 + this.speciesShortName + "/cdna/*.cdna.all.fa.gz";
        arrayList2.add(downloadFile(str5, path.resolve(this.speciesShortName + ".cdna.all.fa.gz").toString()));
        arrayList.add(str5);
        saveVersionData(EtlCommons.GENE_DATA, ENSEMBL_NAME, this.ensemblVersion, getTimeStamp(), arrayList, path.resolve("ensemblCoreVersion.json"));
        return arrayList2;
    }

    private List<DownloadFile> downloadRefSeq(Path path) throws IOException, InterruptedException {
        if (!this.speciesConfiguration.getScientificName().equals("Homo sapiens")) {
            return null;
        }
        this.logger.info("Downloading RefSeq...");
        ArrayList arrayList = new ArrayList();
        String host = this.configuration.getDownload().getRefSeq().getHost();
        saveVersionData(EtlCommons.REFSEQ_DATA, "RefSeq", null, getTimeStamp(), Collections.singletonList(host), path.resolve("refSeqVersion.json"));
        String str = "refSeq_" + StringUtils.capitalize(this.speciesShortName) + "." + this.assemblyConfiguration.getName() + ".gtf.gz";
        this.logger.info("downloading " + host);
        arrayList.add(downloadFile(host, path.resolve(str).toString()));
        String host2 = this.configuration.getDownload().getRefSeqFasta().getHost();
        String str2 = "refSeq_" + StringUtils.capitalize(this.speciesShortName) + "." + this.assemblyConfiguration.getName() + "_genomic.fna.gz";
        this.logger.info("downloading " + host2);
        Path resolve = path.resolve(str2);
        saveVersionData(EtlCommons.REFSEQ_DATA, "RefSeq", null, getTimeStamp(), Collections.singletonList(host2), path.resolve("refSeqFastaVersion.json"));
        arrayList.add(downloadFile(host2, resolve.toString()));
        this.logger.info("Unzipping file: " + str2);
        EtlCommons.runCommandLineProcess(null, "gunzip", Collections.singletonList(resolve.toString()), null);
        String host3 = this.configuration.getDownload().getRefSeqProteinFasta().getHost();
        Path resolve2 = path.resolve("refSeq_" + StringUtils.capitalize(this.speciesShortName) + "." + this.assemblyConfiguration.getName() + "_protein.faa.gz");
        saveVersionData(EtlCommons.REFSEQ_DATA, "RefSeq", null, getTimeStamp(), Collections.singletonList(host3), path.resolve("refSeqProteinFastaVersion.json"));
        arrayList.add(downloadFile(host3, resolve2.toString()));
        String host4 = this.configuration.getDownload().getRefSeqCdna().getHost();
        Path resolve3 = path.resolve("refSeq_" + StringUtils.capitalize(this.speciesShortName) + "." + this.assemblyConfiguration.getName() + "_cdna.fna.gz");
        saveVersionData(EtlCommons.REFSEQ_DATA, "RefSeq", null, getTimeStamp(), Collections.singletonList(host4), path.resolve("refSeqCdnaFastaVersion.json"));
        arrayList.add(downloadFile(host4, resolve3.toString()));
        return arrayList;
    }

    private DownloadFile downloadMane(Path path) throws IOException, InterruptedException {
        if (!this.speciesConfiguration.getScientificName().equals("Homo sapiens")) {
            return null;
        }
        this.logger.info("Downloading MANE Select ...");
        String host = this.configuration.getDownload().getManeSelect().getHost();
        saveVersionData(EtlCommons.GENE_DATA, "MANE Select", this.configuration.getDownload().getManeSelect().getVersion(), getTimeStamp(), Collections.singletonList(host), path.resolve("maneSelectVersion.json"));
        String[] split = host.split("/");
        return downloadFile(host, path.resolve(split[split.length - 1]).toString());
    }

    private DownloadFile downloadLrg(Path path) throws IOException, InterruptedException {
        if (!this.speciesConfiguration.getScientificName().equals("Homo sapiens")) {
            return null;
        }
        this.logger.info("Downloading LRG ...");
        String host = this.configuration.getDownload().getLrg().getHost();
        saveVersionData(EtlCommons.GENE_DATA, "LRG", this.configuration.getDownload().getLrg().getVersion(), getTimeStamp(), Collections.singletonList(host), path.resolve("lrgVersion.json"));
        String[] split = host.split("/");
        return downloadFile(host, path.resolve(split[split.length - 1]).toString());
    }

    private DownloadFile downloadGO(Path path) throws IOException, InterruptedException {
        if (!this.speciesConfiguration.getScientificName().equals("Homo sapiens")) {
            return null;
        }
        this.logger.info("Downloading go annotation...");
        String host = this.configuration.getDownload().getGoAnnotation().getHost();
        saveVersionData(EtlCommons.GENE_DATA, GO_ANNOTATION_NAME, null, getTimeStamp(), Collections.singletonList(host), path.resolve("goAnnotationVersion.json"));
        return downloadFile(host, path.resolve("goa_human.gaf.gz").toString());
    }

    private DownloadFile downloadGnomadConstraints(Path path) throws IOException, InterruptedException {
        if (!this.speciesConfiguration.getScientificName().equals("Homo sapiens")) {
            return null;
        }
        this.logger.info("Downloading gnomAD constraints data...");
        String host = this.configuration.getDownload().getGnomadConstraints().getHost();
        saveVersionData(EtlCommons.GENE_DATA, GNOMAD_NAME, this.configuration.getDownload().getGnomadConstraints().getVersion(), getTimeStamp(), Collections.singletonList(host), path.resolve("gnomadVersion.json"));
        return downloadFile(host, path.resolve("gnomad.v2.1.1.lof_metrics.by_transcript.txt.bgz").toString());
    }

    private DownloadFile downloadDrugData(Path path) throws IOException, InterruptedException {
        if (!this.speciesConfiguration.getScientificName().equals("Homo sapiens")) {
            return null;
        }
        this.logger.info("Downloading drug-gene data...");
        String host = this.configuration.getDownload().getDgidb().getHost();
        saveVersionData(EtlCommons.GENE_DATA, DGIDB_NAME, null, getTimeStamp(), Collections.singletonList(host), path.resolve("dgidbVersion.json"));
        return downloadFile(host, path.resolve("dgidb.tsv").toString());
    }

    private String getUniProtReleaseNotesUrl() {
        return URI.create(this.configuration.getDownload().getGeneUniprotXref().getHost()).resolve("../../../").toString() + "/relnotes.txt";
    }

    private String getUniProtRelease(String str) throws IOException {
        Path path = Paths.get(str, new String[0]);
        FileUtils.checkFile(path);
        BufferedReader newBufferedReader = Files.newBufferedReader(path, Charset.defaultCharset());
        String str2 = newBufferedReader.readLine().split(" ")[2];
        newBufferedReader.close();
        return str2;
    }

    private List<DownloadFile> downloadGeneUniprotXref(Path path) throws IOException, InterruptedException {
        this.logger.info("Downloading UniProt ID mapping ...");
        ArrayList arrayList = new ArrayList();
        if (GENE_UNIPROT_XREF_FILES.containsKey(this.speciesConfiguration.getScientificName())) {
            String str = this.configuration.getDownload().getGeneUniprotXref().getHost() + "/" + GENE_UNIPROT_XREF_FILES.get(this.speciesConfiguration.getScientificName());
            arrayList.add(downloadFile(str, path.resolve("idmapping_selected.tab.gz").toString()));
            arrayList.add(downloadFile(getUniProtReleaseNotesUrl(), path.resolve("uniprotRelnotes.txt").toString()));
            saveVersionData(EtlCommons.GENE_DATA, UNIPROT_NAME, getUniProtRelease(path.resolve("uniprotRelnotes.txt").toString()), getTimeStamp(), Collections.singletonList(str), path.resolve("uniprotXrefVersion.json"));
        }
        return arrayList;
    }

    private DownloadFile downloadGeneExpressionAtlas(Path path) throws IOException, InterruptedException {
        this.logger.info("Downloading gene expression atlas ...");
        String host = this.configuration.getDownload().getGeneExpressionAtlas().getHost();
        saveVersionData(EtlCommons.GENE_DATA, GENE_EXPRESSION_ATLAS_NAME, getGeneExpressionAtlasVersion(), getTimeStamp(), Collections.singletonList(host), path.resolve("geneExpressionAtlasVersion.json"));
        return downloadFile(host, path.resolve("allgenes_updown_in_organism_part.tab.gz").toString());
    }

    private String getGeneExpressionAtlasVersion() {
        return FilenameUtils.getBaseName(this.configuration.getDownload().getGeneExpressionAtlas().getHost()).split("_")[5].replace(".tab", "");
    }

    private List<DownloadFile> downloadGeneDiseaseAnnotation(Path path) throws IOException, InterruptedException {
        this.logger.info("Downloading gene disease annotation ...");
        ArrayList arrayList = new ArrayList();
        String host = this.configuration.getDownload().getHpo().getHost();
        arrayList.add(downloadFile(host, path.resolve(StringUtils.substringAfterLast(host, "/")).toString()));
        saveVersionData(EtlCommons.GENE_DATA, HPO_NAME, null, getTimeStamp(), Collections.singletonList(host), path.resolve(EtlCommons.HPO_VERSION_FILE));
        String host2 = this.configuration.getDownload().getDisgenet().getHost();
        for (String str : this.configuration.getDownload().getDisgenet().getFiles()) {
            arrayList.add(downloadFile(host2 + "/" + str, path.resolve(str.equalsIgnoreCase("readme.txt") ? "disgenetReadme.txt" : str).toString()));
        }
        saveVersionData(EtlCommons.GENE_DISEASE_ASSOCIATION_DATA, DISGENET_NAME, getVersionFromVersionLine(path.resolve("disgenetReadme.txt"), "(version"), getTimeStamp(), Collections.singletonList(host2), path.resolve("disgenetVersion.json"));
        return arrayList;
    }

    private void runGeneExtraInfo(Path path) throws IOException, InterruptedException {
        this.logger.info("Downloading gene extra info ...");
        if ("true".equals(System.getenv("CELLBASE_BUILD_DOCKER"))) {
            EtlCommons.runCommandLineProcess(null, "/opt/cellbase/gene_extra_info.pl", Arrays.asList("--outdir", path.toAbsolutePath().toString()), this.downloadLogFolder + "/gene_extra_info.log");
        } else {
            DockerUtils.run(dockerImage, (List) null, new AbstractMap.SimpleEntry(path.toAbsolutePath().toString(), "/ensembl-data"), "/opt/cellbase/gene_extra_info.pl --outdir /ensembl-data", (Map) null);
        }
    }

    static {
        GENE_UNIPROT_XREF_FILES.put("Homo sapiens", "HUMAN_9606_idmapping_selected.tab.gz");
        GENE_UNIPROT_XREF_FILES.put("Mus musculus", "MOUSE_10090_idmapping_selected.tab.gz");
        GENE_UNIPROT_XREF_FILES.put("Rattus norvegicus", "RAT_10116_idmapping_selected.tab.gz");
        GENE_UNIPROT_XREF_FILES.put("Danio rerio", "DANRE_7955_idmapping_selected.tab.gz");
        GENE_UNIPROT_XREF_FILES.put("Drosophila melanogaster", "DROME_7227_idmapping_selected.tab.gz");
        GENE_UNIPROT_XREF_FILES.put("Saccharomyces cerevisiae", "YEAST_559292_idmapping_selected.tab.gz");
    }
}
