package org.opencb.cellbase.app.cli;

import com.beust.jcommander.ParameterException;
import com.fasterxml.jackson.databind.DeserializationFeature;
import com.fasterxml.jackson.databind.ObjectMapper;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.net.URI;
import java.nio.charset.Charset;
import java.nio.file.Files;
import java.nio.file.LinkOption;
import java.nio.file.OpenOption;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Calendar;
import java.util.Collections;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;
import javax.ws.rs.client.ClientBuilder;
import javax.ws.rs.client.WebTarget;
import org.apache.commons.collections.map.HashedMap;
import org.apache.commons.io.FilenameUtils;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.StringUtils;
import org.opencb.cellbase.app.cli.CliOptionsParser;
import org.opencb.cellbase.core.config.Species;
import org.opencb.commons.utils.FileUtils;

/* loaded from: input_file:org/opencb/cellbase/app/cli/DownloadCommandExecutor.class */
public class DownloadCommandExecutor extends CommandExecutor {
    private CliOptionsParser.DownloadCommandOptions downloadCommandOptions;
    private Path output;
    private Path common;
    private File ensemblScriptsFolder;
    private String ensemblVersion;
    private String ensemblRelease;
    private Species species;
    private static final String[] VARIATION_FILES = {"variation.txt.gz", "variation_feature.txt.gz", "transcript_variation.txt.gz", "variation_synonym.txt.gz", "seq_region.txt.gz", "source.txt.gz", "attrib.txt.gz", "attrib_type.txt.gz", "seq_region.txt.gz", "structural_variation_feature.txt.gz", "study.txt.gz", "phenotype.txt.gz", "phenotype_feature.txt.gz", "phenotype_feature_attrib.txt.gz", "motif_feature_variation.txt.gz", "genotype_code.txt.gz", "allele_code.txt.gz", "population_genotype.txt.gz", "population.txt.gz", "allele.txt.gz"};

    @Deprecated
    private static final String[] DEPRECATED_REGULATION_FILES = {"AnnotatedFeatures.gff.gz", "MotifFeatures.gff.gz", "RegulatoryFeatures_MultiCell.gff.gz"};
    private static final Map<String, String> GENE_UNIPROT_XREF_FILES = new HashMap() { // from class: org.opencb.cellbase.app.cli.DownloadCommandExecutor.1
        {
            put("Homo sapiens", "HUMAN_9606_idmapping_selected.tab.gz");
            put("Mus musculus", "MOUSE_10090_idmapping_selected.tab.gz");
            put("Rattus norvegicus", "RAT_10116_idmapping_selected.tab.gz");
            put("Danio rerio", "DANRE_7955_idmapping_selected.tab.gz");
            put("Drosophila melanogaster", "DROME_7227_idmapping_selected.tab.gz");
            put("Saccharomyces cerevisiae", "YEAST_559292_idmapping_selected.tab.gz");
        }
    };
    private static final String ENSEMBL_NAME = "ENSEMBL";
    private static final String GENE_EXPRESSION_ATLAS_NAME = "Gene Expression Atlas";
    private static final String HPO_NAME = "HPO";
    private static final String DISGENET_NAME = "DisGeNET";
    private static final String DGIDB_NAME = "DGIdb";
    private static final String UNIPROT_NAME = "UniProt";
    private static final String CADD_NAME = "CADD";
    private static final String MIRBASE_NAME = "miRBase";
    private static final String MIRTARBASE_NAME = "miRTarBase";
    private static final String TARGETSCAN_NAME = "TargetScan";
    private static final String INTACT_NAME = "IntAct";
    private static final String INTERPRO_NAME = "InterPro";
    private static final String GERP_NAME = "GERP++";
    private static final String PHASTCONS_NAME = "PhastCons";
    private static final String PHYLOP_NAME = "PhyloP";
    private static final String CLINVAR_NAME = "ClinVar";
    private static final String IARCTP53_NAME = "IARC TP53 Database";
    private static final String DGV_NAME = "DGV";
    private static final String GWAS_NAME = "Gwas Catalog";
    private static final String DBSNP_NAME = "dbSNP";
    private static final String REACTOME_NAME = "Reactome";
    private static final String TRF_NAME = "Tandem repeats finder";
    private static final String GSD_NAME = "Genomic super duplications";
    private static final String WM_NAME = "WindowMasker";

    public DownloadCommandExecutor(CliOptionsParser.DownloadCommandOptions downloadCommandOptions) {
        super(downloadCommandOptions.commonOptions.logLevel, downloadCommandOptions.commonOptions.verbose, downloadCommandOptions.commonOptions.conf);
        this.output = null;
        this.common = null;
        this.downloadCommandOptions = downloadCommandOptions;
        if (downloadCommandOptions.output != null) {
            this.output = Paths.get(downloadCommandOptions.output, new String[0]);
        }
        if (downloadCommandOptions.common != null) {
            this.common = Paths.get(downloadCommandOptions.common, new String[0]);
        } else {
            this.common = this.output.resolve("common");
        }
        this.ensemblScriptsFolder = new File(System.getProperty("basedir") + "/bin/ensembl-scripts/");
    }

    @Override // org.opencb.cellbase.app.cli.CommandExecutor
    public void execute() {
        try {
            if (this.downloadCommandOptions.species == null || this.downloadCommandOptions.species.isEmpty()) {
                this.logger.error("--species parameter '{}' not valid", this.downloadCommandOptions.species);
            } else {
                for (Species species : this.configuration.getAllSpecies()) {
                    if (this.downloadCommandOptions.species.equalsIgnoreCase(species.getScientificName()) || this.downloadCommandOptions.species.equalsIgnoreCase(species.getCommonName()) || this.downloadCommandOptions.species.equalsIgnoreCase(species.getId())) {
                        this.species = species;
                        break;
                    }
                }
                if (this.species != null) {
                    processSpecies(this.species);
                } else {
                    this.logger.error("Species '{}' not valid", this.downloadCommandOptions.species);
                }
            }
        } catch (ParameterException e) {
            this.logger.error("Error in 'download' command line: " + e.getMessage());
        } catch (IOException | InterruptedException e2) {
            this.logger.error("Error downloading '" + this.downloadCommandOptions.species + "' files: " + e2.getMessage());
        }
    }

    /* JADX WARN: Removed duplicated region for block: B:102:0x03a1 A[SYNTHETIC] */
    /* JADX WARN: Removed duplicated region for block: B:109:0x03b9 A[SYNTHETIC] */
    /* JADX WARN: Removed duplicated region for block: B:116:0x03d1 A[SYNTHETIC] */
    /* JADX WARN: Removed duplicated region for block: B:123:0x03e9 A[SYNTHETIC] */
    /* JADX WARN: Removed duplicated region for block: B:126:0x030c A[SYNTHETIC] */
    /* JADX WARN: Removed duplicated region for block: B:63:0x031d A[SYNTHETIC] */
    /* JADX WARN: Removed duplicated region for block: B:67:0x032e A[SYNTHETIC] */
    /* JADX WARN: Removed duplicated region for block: B:74:0x0345 A[SYNTHETIC] */
    /* JADX WARN: Removed duplicated region for block: B:81:0x035d A[SYNTHETIC] */
    /* JADX WARN: Removed duplicated region for block: B:88:0x0378 A[SYNTHETIC] */
    /* JADX WARN: Removed duplicated region for block: B:95:0x0389 A[SYNTHETIC] */
    /*
        Code decompiled incorrectly, please refer to instructions dump.
        To view partially-correct add '--show-bad-code' argument
    */
    private void processSpecies(org.opencb.cellbase.core.config.Species r8) throws java.io.IOException, java.lang.InterruptedException {
        /*
            Method dump skipped, instructions count: 1036
            To view this dump add '--comments-level debug' option
        */
        throw new UnsupportedOperationException("Method not decompiled: org.opencb.cellbase.app.cli.DownloadCommandExecutor.processSpecies(org.opencb.cellbase.core.config.Species):void");
    }

    private void downloadStructuralVariants(Species species, String str, Path path) throws IOException, InterruptedException {
        if (species.getScientificName().equals("Homo sapiens")) {
            this.logger.info("Downloading DGV data ...");
            Path resolve = path.resolve("structuralVariants");
            makeDir(resolve);
            String str2 = (str.equalsIgnoreCase("grch37") ? "GRCh37_hg19" : "GRCh38_hg38") + "_variants_2016-05-15.txt";
            String str3 = this.configuration.getDownload().getDgv().getHost() + "/" + str2;
            downloadFile(str3, resolve.resolve(EtlCommons.DGV_FILE).toString());
            saveVersionData(EtlCommons.STRUCTURAL_VARIANTS_DATA, DGV_NAME, getDGVVersion(str2), getTimeStamp(), Collections.singletonList(str3), resolve.resolve(EtlCommons.DGV_VERSION_FILE));
        }
    }

    private String getDGVVersion(String str) {
        return str.split("\\.")[0].split("_")[3];
    }

    private boolean speciesHasInfoToDownload(Species species, String str) {
        boolean z = true;
        if (species.getData() == null || !species.getData().contains(str)) {
            this.logger.warn("Species '{}' has no '{}' information available to download", species.getScientificName(), str);
            z = false;
        }
        return z;
    }

    private String getPhylo(Species species) {
        if (this.configuration.getSpecies().getVertebrates().contains(species)) {
            return "vertebrates";
        }
        if (this.configuration.getSpecies().getMetazoa().contains(species)) {
            return "metazoa";
        }
        if (this.configuration.getSpecies().getFungi().contains(species)) {
            return "fungi";
        }
        if (this.configuration.getSpecies().getProtist().contains(species)) {
            return "protists";
        }
        if (this.configuration.getSpecies().getPlants().contains(species)) {
            return "plants";
        }
        if (this.configuration.getSpecies().getVirus().contains(species)) {
            return "virus";
        }
        if (this.configuration.getSpecies().getBacteria().contains(species)) {
            return "bacteria";
        }
        throw new ParameterException("Species " + species.getScientificName() + " not associated to any phylo in the configuration file");
    }

    private void downloadReferenceGenome(Species species, String str, String str2, Path path, String str3) throws IOException, InterruptedException {
        String str4;
        this.logger.info("Downloading genome information ...");
        Path resolve = path.resolve(EtlCommons.GENOME_DATA);
        makeDir(resolve);
        String str5 = str3 + "/" + this.ensemblRelease;
        if (species.getScientificName().equals("Homo sapiens")) {
            str4 = str5 + "/fasta/" + str + "/dna/*.dna.primary_assembly.fa.gz";
        } else {
            if (!this.configuration.getSpecies().getVertebrates().contains(species)) {
                str5 = str3 + "/" + this.ensemblRelease + "/" + getPhylo(species);
            }
            String str6 = str5 + "/fasta/";
            if (this.configuration.getSpecies().getBacteria().contains(species)) {
                str6 = str6 + ((Species.Assembly) species.getAssemblies().get(0)).getEnsemblCollection() + "/";
            }
            str4 = str6 + str + "/dna/*.dna.toplevel.fa.gz";
        }
        downloadFile(str4, resolve.resolve(StringUtils.capitalize(str) + "." + str2 + ".fa.gz").toString());
        this.logger.info("Saving reference genome version data at {}", resolve.resolve("genomeVersion.json"));
        saveVersionData(EtlCommons.GENOME_DATA, ENSEMBL_NAME, this.ensemblVersion, getTimeStamp(), Collections.singletonList(str4), resolve.resolve("genomeVersion.json"));
    }

    private String getTimeStamp() {
        return new SimpleDateFormat("yyyyMMdd_HHmmss").format(Calendar.getInstance().getTime());
    }

    private void saveVersionData(String str, String str2, String str3, String str4, List<String> list, Path path) {
        HashedMap hashedMap = new HashedMap();
        hashedMap.put("data", str);
        hashedMap.put("source", str2);
        hashedMap.put("version", str3);
        hashedMap.put("downloadDate", str4);
        hashedMap.put("uRL", list);
        writeVersionDataFile(hashedMap, path);
    }

    private void writeVersionDataFile(Map map, Path path) {
        try {
            BufferedWriter bufferedWriter = new BufferedWriter(new OutputStreamWriter(Files.newOutputStream(path, new OpenOption[0])));
            bufferedWriter.write(new ObjectMapper().writer().writeValueAsString(map) + "\n");
            bufferedWriter.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    private void downloadEnsemblGene(Species species, String str, String str2, Path path, String str3) throws IOException, InterruptedException {
        this.logger.info("Downloading gene information ...");
        Path resolve = path.resolve(EtlCommons.GENE_DATA);
        makeDir(resolve);
        downloadEnsemblData(species, str, resolve, str3);
        downloadDrugData(species, path);
        downloadGeneUniprotXref(species, resolve);
        downloadGeneExpressionAtlas();
        downloadGeneDiseaseAnnotation(resolve);
        runGeneExtraInfo(species, str2, resolve);
    }

    private void downloadDrugData(Species species, Path path) throws IOException, InterruptedException {
        if (species.getScientificName().equals("Homo sapiens")) {
            this.logger.info("Downloading drug-gene data...");
            Path resolve = path.resolve("gene/geneDrug");
            makeDir(resolve);
            String host = this.configuration.getDownload().getDgidb().getHost();
            downloadFile(host, resolve.resolve("dgidb.tsv").toString());
            saveVersionData(EtlCommons.GENE_DATA, DGIDB_NAME, null, getTimeStamp(), Collections.singletonList(host), resolve.resolve("dgidbVersion.json"));
        }
    }

    private void downloadEnsemblData(Species species, String str, Path path, String str2) throws IOException, InterruptedException {
        this.logger.info("Downloading gene Ensembl data (gtf, pep, cdna, motifs) ...");
        ArrayList arrayList = new ArrayList(4);
        String str3 = str2 + "/" + this.ensemblRelease;
        if (!this.configuration.getSpecies().getVertebrates().contains(species)) {
            str3 = str2 + "/" + this.ensemblRelease + "/" + getPhylo(species);
        }
        String str4 = this.configuration.getSpecies().getBacteria().contains(species) ? ((Species.Assembly) species.getAssemblies().get(0)).getEnsemblCollection() + "/" : "";
        String str5 = str3 + "/gtf/" + str4 + str + "/*" + this.ensemblRelease.split("-")[1] + ".gtf.gz";
        downloadFile(str5, path.resolve(str + ".gtf.gz").toString());
        arrayList.add(str5);
        String str6 = str3 + "/fasta/" + str4 + str + "/pep/*.pep.all.fa.gz";
        downloadFile(str6, path.resolve(str + ".pep.all.fa.gz").toString());
        arrayList.add(str6);
        String str7 = str3 + "/fasta/" + str4 + str + "/cdna/*.cdna.all.fa.gz";
        downloadFile(str7, path.resolve(str + ".cdna.all.fa.gz").toString());
        arrayList.add(str7);
        String str8 = str3 + "/regulation/" + str + "/MotifFeatures.gff.gz";
        downloadFile(str8, path.resolve("MotifFeatures.gff.gz").toString());
        arrayList.add(str8);
        saveVersionData(EtlCommons.GENE_DATA, ENSEMBL_NAME, this.ensemblVersion, getTimeStamp(), arrayList, path.resolve("ensemblCoreVersion.json"));
    }

    private void downloadGeneUniprotXref(Species species, Path path) throws IOException, InterruptedException {
        this.logger.info("Downloading UniProt ID mapping ...");
        if (GENE_UNIPROT_XREF_FILES.containsKey(species.getScientificName())) {
            String str = this.configuration.getDownload().getGeneUniprotXref().getHost() + "/" + GENE_UNIPROT_XREF_FILES.get(species.getScientificName());
            downloadFile(str, path.resolve("idmapping_selected.tab.gz").toString());
            downloadFile(getUniProtReleaseNotesUrl(), path.resolve("uniprotRelnotes.txt").toString());
            saveVersionData(EtlCommons.GENE_DATA, UNIPROT_NAME, getUniProtRelease(path.resolve("uniprotRelnotes.txt").toString()), getTimeStamp(), Collections.singletonList(str), path.resolve("uniprotXrefVersion.json"));
        }
    }

    private String getUniProtRelease(String str) {
        Path path = Paths.get(str, new String[0]);
        Files.exists(path, new LinkOption[0]);
        try {
            BufferedReader newBufferedReader = Files.newBufferedReader(path, Charset.defaultCharset());
            String str2 = newBufferedReader.readLine().split(" ")[2];
            newBufferedReader.close();
            return str2;
        } catch (IOException e) {
            e.printStackTrace();
            return null;
        }
    }

    private String getUniProtReleaseNotesUrl() {
        return URI.create(this.configuration.getDownload().getGeneUniprotXref().getHost()).resolve("../../../").toString() + "/relnotes.txt";
    }

    private void downloadGeneExpressionAtlas() throws IOException, InterruptedException {
        this.logger.info("Downloading gene expression atlas ...");
        Path resolve = this.common.resolve("expression");
        if (Files.exists(resolve, new LinkOption[0])) {
            return;
        }
        makeDir(resolve);
        String host = this.configuration.getDownload().getGeneExpressionAtlas().getHost();
        downloadFile(host, resolve.resolve("allgenes_updown_in_organism_part.tab.gz").toString());
        saveVersionData(EtlCommons.GENE_DATA, GENE_EXPRESSION_ATLAS_NAME, getGeneExpressionAtlasVersion(), getTimeStamp(), Collections.singletonList(host), resolve.resolve("geneExpressionAtlasVersion.json"));
    }

    private String getGeneExpressionAtlasVersion() {
        return FilenameUtils.getBaseName(this.configuration.getDownload().getGeneExpressionAtlas().getHost()).split("_")[5].replace(".tab", "");
    }

    private void downloadGeneDiseaseAnnotation(Path path) throws IOException, InterruptedException {
        this.logger.info("Downloading gene disease annotation ...");
        String host = this.configuration.getDownload().getHpo().getHost();
        downloadFile(host, path.resolve(StringUtils.substringAfterLast(host, "/")).toString());
        saveVersionData(EtlCommons.GENE_DATA, HPO_NAME, null, getTimeStamp(), Collections.singletonList(host), path.resolve("hpoVersion.json"));
        String host2 = this.configuration.getDownload().getDisgenet().getHost();
        String host3 = this.configuration.getDownload().getDisgenetReadme().getHost();
        downloadFile(host2, path.resolve(StringUtils.substringAfterLast(host2, "/")).toString());
        downloadFile(host3, path.resolve("disgenetReadme.txt").toString());
        saveVersionData(EtlCommons.GENE_DISEASE_ASSOCIATION_DATA, DISGENET_NAME, getVersionFromVersionLine(path.resolve("disgenetReadme.txt"), "(version"), getTimeStamp(), Collections.singletonList(host2), path.resolve("disgenetVersion.json"));
    }

    private String getVersionFromVersionLine(Path path, String str) {
        Files.exists(path, new LinkOption[0]);
        try {
            BufferedReader newBufferedReader = Files.newBufferedReader(path, Charset.defaultCharset());
            for (String readLine = newBufferedReader.readLine(); readLine != null; readLine = newBufferedReader.readLine()) {
                if (readLine.contains(str)) {
                    String str2 = readLine.split("\\(")[1].split("\\)")[0];
                    newBufferedReader.close();
                    return str2;
                }
            }
            return null;
        } catch (IOException e) {
            e.printStackTrace();
            return null;
        }
    }

    private void runGeneExtraInfo(Species species, String str, Path path) throws IOException, InterruptedException {
        this.logger.info("Downloading gene extra info ...");
        String path2 = path.resolve("gene_extra_info.log").toString();
        ArrayList arrayList = new ArrayList();
        arrayList.addAll(Arrays.asList("--species", species.getScientificName(), "--assembly", str, "--outdir", path.toAbsolutePath().toString(), "--ensembl-libs", this.configuration.getDownload().getEnsembl().getLibs()));
        if (!this.configuration.getSpecies().getVertebrates().contains(this.species) && !this.species.getScientificName().equals("Drosophila melanogaster")) {
            arrayList.add("--phylo");
            arrayList.add("no-vertebrate");
        }
        if (EtlCommons.runCommandLineProcess(this.ensemblScriptsFolder, "./gene_extra_info.pl", arrayList, path2)) {
            this.logger.info("Gene extra files created OK");
        } else {
            this.logger.error("Gene extra info for " + species.getScientificName() + " cannot be downloaded");
        }
    }

    private void downloadVariation(Species species, String str, Path path, String str2) throws IOException, InterruptedException {
        this.logger.info("Downloading variation information ...");
        Path resolve = path.resolve(EtlCommons.VARIATION_DATA);
        makeDir(resolve);
        String str3 = str2 + "/" + this.ensemblRelease;
        if (!this.configuration.getSpecies().getVertebrates().contains(species)) {
            str3 = str2 + "/" + this.ensemblRelease + "/" + getPhylo(species);
        }
        String str4 = str3 + "/mysql/" + str + "_variation_" + this.ensemblVersion;
        ArrayList arrayList = new ArrayList(VARIATION_FILES.length);
        for (String str5 : VARIATION_FILES) {
            downloadFile(str4 + "/" + str5, resolve.resolve(str5).toString());
            arrayList.add(str4 + "/" + str5);
        }
        saveVersionData(EtlCommons.VARIATION_DATA, ENSEMBL_NAME, this.ensemblVersion, getTimeStamp(), arrayList, resolve.resolve("ensemblVariationVersion.json"));
    }

    private void downloadRegulation(Species species, String str, String str2, Path path, String str3) throws IOException, InterruptedException {
        this.logger.info("Downloading regulation information ...");
        Path resolve = path.resolve(EtlCommons.REGULATION_DATA);
        makeDir(resolve);
        String str4 = str3 + "/" + this.ensemblRelease;
        if (!this.configuration.getSpecies().getVertebrates().contains(species)) {
            str4 = str3 + "/" + this.ensemblRelease + "/" + getPhylo(species);
        }
        String str5 = str4 + "/regulation/" + str;
        ArrayList arrayList = new ArrayList(DEPRECATED_REGULATION_FILES.length + 2);
        for (String str6 : DEPRECATED_REGULATION_FILES) {
            downloadFile(str5 + "/" + str6, resolve.resolve(str6).toString());
            arrayList.add(str5 + "/" + str6);
        }
        downloadFile(str5 + "/*Regulatory_Build.regulatory_features*.gff.gz", resolve.resolve(EtlCommons.REGULATORY_FEATURES_FILE).toString());
        arrayList.add(str5 + "/*Regulatory_Build.regulatory_features*.gff.gz");
        downloadFile(str5 + "/*motiffeatures*.gff.gz", resolve.resolve(EtlCommons.MOTIF_FEATURES_FILE).toString());
        arrayList.add(str5 + "/*motiffeatures*.gff.gz");
        saveVersionData(EtlCommons.REGULATION_DATA, ENSEMBL_NAME, this.ensemblVersion, getTimeStamp(), arrayList, resolve.resolve("ensemblRegulationVersion.json"));
        Path resolve2 = this.common.resolve("mirbase");
        if (!Files.exists(resolve2, new LinkOption[0])) {
            makeDir(resolve2);
            ArrayList arrayList2 = new ArrayList(2);
            String str7 = this.configuration.getDownload().getMirbase().getHost() + "/miRNA.xls.gz";
            downloadFile(str7, resolve2.resolve("miRNA.xls.gz").toString());
            arrayList2.add(str7);
            String str8 = this.configuration.getDownload().getMirbase().getHost() + "/aliases.txt.gz";
            downloadFile(str8, resolve2.resolve("aliases.txt.gz").toString());
            arrayList2.add(str8);
            downloadFile(this.configuration.getDownload().getMirbaseReadme().getHost(), resolve2.resolve("mirbaseReadme.txt").toString());
            saveVersionData(EtlCommons.REGULATION_DATA, MIRBASE_NAME, getLine(resolve2.resolve("mirbaseReadme.txt"), 1), getTimeStamp(), Collections.singletonList(str8), resolve2.resolve("mirbaseVersion.json"));
        }
        if (species.getScientificName().equals("Homo sapiens") && str2.equalsIgnoreCase("GRCh37")) {
            String str9 = this.configuration.getDownload().getTargetScan().getHost() + "/hg19/database/targetScanS.txt.gz";
            downloadFile(str9, resolve.resolve("targetScanS.txt.gz").toString());
            String str10 = this.configuration.getDownload().getTargetScan().getHost() + "/hg19/database/README.txt";
            saveVersionData(EtlCommons.REGULATION_DATA, TARGETSCAN_NAME, null, getTimeStamp(), Collections.singletonList(str9), resolve.resolve("targetScanVersion.json"));
            String str11 = this.configuration.getDownload().getMiRTarBase().getHost() + "/hsa_MTI.xls";
            downloadFile(str11, resolve.resolve("hsa_MTI.xls").toString());
            saveVersionData(EtlCommons.REGULATION_DATA, MIRTARBASE_NAME, str11.split("/")[5], getTimeStamp(), Collections.singletonList(str11), resolve.resolve("miRTarBaseVersion.json"));
        }
        if (species.getScientificName().equals("Mus musculus")) {
            String str12 = this.configuration.getDownload().getTargetScan().getHost() + "/mm9/database/targetScanS.txt.gz";
            downloadFile(str12, resolve.resolve("targetScanS.txt.gz").toString());
            downloadFile(this.configuration.getDownload().getTargetScan().getHost() + "/mm9/database/README.txt", resolve.resolve("targetScanReadme.txt").toString());
            saveVersionData(EtlCommons.REGULATION_DATA, TARGETSCAN_NAME, null, getTimeStamp(), Collections.singletonList(str12), resolve.resolve("targetScanVersion.json"));
            String str13 = this.configuration.getDownload().getMiRTarBase().getHost() + "/mmu_MTI.xls";
            downloadFile(str13, resolve.resolve("mmu_MTI.xls").toString());
            saveVersionData(EtlCommons.REGULATION_DATA, MIRTARBASE_NAME, str13.split("/")[5], getTimeStamp(), Collections.singletonList(str13), resolve.resolve("miRTarBaseVersion.json"));
        }
    }

    private String getLine(Path path, int i) {
        Files.exists(path, new LinkOption[0]);
        try {
            BufferedReader newBufferedReader = Files.newBufferedReader(path, Charset.defaultCharset());
            String str = null;
            for (int i2 = 0; i2 < i; i2++) {
                str = newBufferedReader.readLine();
            }
            newBufferedReader.close();
            return str;
        } catch (IOException e) {
            e.printStackTrace();
            return null;
        }
    }

    private void downloadProtein() throws IOException, InterruptedException {
        this.logger.info("Downloading protein information ...");
        Path resolve = this.common.resolve(EtlCommons.PROTEIN_DATA);
        if (Files.exists(resolve, new LinkOption[0])) {
            this.logger.info("Protein: skipping this since it is already downloaded. Delete 'protein' folder to force download");
            return;
        }
        makeDir(resolve);
        String host = this.configuration.getDownload().getUniprot().getHost();
        downloadFile(host, resolve.resolve("uniprot_sprot.xml.gz").toString());
        downloadFile(this.configuration.getDownload().getUniprotRelNotes().getHost(), resolve.resolve("uniprotRelnotes.txt").toString());
        saveVersionData(EtlCommons.PROTEIN_DATA, UNIPROT_NAME, getLine(resolve.resolve("uniprotRelnotes.txt"), 1), getTimeStamp(), Collections.singletonList(host), resolve.resolve("uniprotVersion.json"));
        makeDir(resolve.resolve("uniprot_chunks"));
        splitUniprot(resolve.resolve("uniprot_sprot.xml.gz"), resolve.resolve("uniprot_chunks"));
        String host2 = this.configuration.getDownload().getIntact().getHost();
        downloadFile(host2, resolve.resolve("intact.txt").toString());
        saveVersionData(EtlCommons.PROTEIN_DATA, INTACT_NAME, null, getTimeStamp(), Collections.singletonList(host2), resolve.resolve("intactVersion.json"));
        String host3 = this.configuration.getDownload().getInterpro().getHost();
        downloadFile(host3, resolve.resolve("protein2ipr.dat.gz").toString());
        downloadFile(this.configuration.getDownload().getInterproRelNotes().getHost(), resolve.resolve("interproRelnotes.txt").toString());
        saveVersionData(EtlCommons.PROTEIN_DATA, INTERPRO_NAME, getLine(resolve.resolve("interproRelnotes.txt"), 5), getTimeStamp(), Collections.singletonList(host3), resolve.resolve("interproVersion.json"));
    }

    private void splitUniprot(Path path, Path path2) throws IOException {
        BufferedReader newBufferedReader = FileUtils.newBufferedReader(path);
        PrintWriter printWriter = null;
        StringBuilder sb = new StringBuilder();
        boolean z = true;
        boolean z2 = false;
        int i = 0;
        int i2 = 0;
        while (true) {
            String readLine = newBufferedReader.readLine();
            if (readLine == null) {
                printWriter.print("</uniprot>");
                printWriter.close();
                newBufferedReader.close();
                return;
            }
            if (readLine.trim().startsWith("<entry ")) {
                z2 = true;
                z = false;
                if (i % 10000 == 0) {
                    printWriter = new PrintWriter(new FileOutputStream(path2.resolve("chunk_" + i2 + ".xml").toFile()));
                    printWriter.println(sb.toString().trim());
                }
                i++;
            }
            if (z) {
                sb.append(readLine).append("\n");
            }
            if (z2) {
                printWriter.println(readLine);
            }
            if (readLine.trim().startsWith("</entry>")) {
                z2 = false;
                if (i % 10000 == 0) {
                    printWriter.print("</uniprot>");
                    printWriter.close();
                    i2++;
                }
            }
        }
    }

    private void downloadConservation(Species species, String str, Path path) throws IOException, InterruptedException {
        this.logger.info("Downloading conservation information ...");
        Path resolve = path.resolve(EtlCommons.CONSERVATION_DATA);
        if (species.getScientificName().equals("Homo sapiens")) {
            makeDir(resolve);
            makeDir(resolve.resolve("phastCons"));
            makeDir(resolve.resolve("phylop"));
            makeDir(resolve.resolve(EtlCommons.GERP_SUBDIRECTORY));
            String[] strArr = {"1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "X", "Y", "M"};
            if (str.equalsIgnoreCase("GRCh37")) {
                this.logger.debug("Downloading GERP++ ...");
                downloadFile(this.configuration.getDownload().getGerp().getHost(), resolve.resolve("gerp/hg19.GERP_scores.tar.gz").toAbsolutePath().toString());
                saveVersionData(EtlCommons.CONSERVATION_DATA, GERP_NAME, null, getTimeStamp(), Collections.singletonList(this.configuration.getDownload().getGerp().getHost()), resolve.resolve("gerpVersion.json"));
                String str2 = this.configuration.getDownload().getConservation().getHost() + "/hg19";
                ArrayList arrayList = new ArrayList(strArr.length);
                ArrayList arrayList2 = new ArrayList(strArr.length);
                for (int i = 0; i < strArr.length; i++) {
                    String str3 = str2 + "/phastCons46way/primates/chr" + strArr[i] + ".phastCons46way.primates.wigFix.gz";
                    downloadFile(str3, resolve.resolve("phastCons").resolve("chr" + strArr[i] + ".phastCons46way.primates.wigFix.gz").toString());
                    arrayList.add(str3);
                    String str4 = str2 + "/phyloP46way/primates/chr" + strArr[i] + ".phyloP46way.primate.wigFix.gz";
                    downloadFile(str4, resolve.resolve("phylop").resolve("chr" + strArr[i] + ".phyloP46way.primate.wigFix.gz").toString());
                    arrayList2.add(str4);
                }
                saveVersionData(EtlCommons.CONSERVATION_DATA, PHASTCONS_NAME, null, getTimeStamp(), arrayList, resolve.resolve("phastConsVersion.json"));
                saveVersionData(EtlCommons.CONSERVATION_DATA, PHYLOP_NAME, null, getTimeStamp(), arrayList2, resolve.resolve("phyloPVersion.json"));
            }
            if (str.equalsIgnoreCase("GRCh38")) {
                String str5 = this.configuration.getDownload().getConservation().getHost() + "/hg38";
                ArrayList arrayList3 = new ArrayList(strArr.length);
                ArrayList arrayList4 = new ArrayList(strArr.length);
                for (int i2 = 0; i2 < strArr.length; i2++) {
                    String str6 = str5 + "/phastCons100way/hg38.100way.phastCons/chr" + strArr[i2] + ".phastCons100way.wigFix.gz";
                    downloadFile(str6, resolve.resolve("phastCons").resolve("chr" + strArr[i2] + ".phastCons100way.wigFix.gz").toString());
                    arrayList3.add(str6);
                    String str7 = str5 + "/phyloP100way/hg38.100way.phyloP100way/chr" + strArr[i2] + ".phyloP100way.wigFix.gz";
                    downloadFile(str7, resolve.resolve("phylop").resolve("chr" + strArr[i2] + ".phyloP100way.wigFix.gz").toString());
                    arrayList4.add(str7);
                }
                saveVersionData(EtlCommons.CONSERVATION_DATA, PHASTCONS_NAME, null, getTimeStamp(), arrayList3, resolve.resolve("phastConsVersion.json"));
                saveVersionData(EtlCommons.CONSERVATION_DATA, PHYLOP_NAME, null, getTimeStamp(), arrayList4, resolve.resolve("phyloPVersion.json"));
            }
        }
        if (species.getScientificName().equals("Mus musculus")) {
            makeDir(resolve);
            makeDir(resolve.resolve("phastCons"));
            makeDir(resolve.resolve("phylop"));
            String str8 = this.configuration.getDownload().getConservation().getHost() + "/mm10";
            String[] strArr2 = {"1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "X", "Y", "M"};
            ArrayList arrayList5 = new ArrayList(strArr2.length);
            ArrayList arrayList6 = new ArrayList(strArr2.length);
            for (int i3 = 0; i3 < strArr2.length; i3++) {
                String str9 = str8 + "/phastCons60way/mm10.60way.phastCons/chr" + strArr2[i3] + ".phastCons60way.wigFix.gz";
                downloadFile(str9, resolve.resolve("phastCons").resolve("chr" + strArr2[i3] + ".phastCons60way.wigFix.gz").toString());
                arrayList5.add(str9);
                String str10 = str8 + "/phyloP60way/mm10.60way.phyloP60way/chr" + strArr2[i3] + ".phyloP60way.wigFix.gz";
                downloadFile(str10, resolve.resolve("phylop").resolve("chr" + strArr2[i3] + ".phyloP60way.wigFix.gz").toString());
                arrayList6.add(str10);
            }
            saveVersionData(EtlCommons.CONSERVATION_DATA, PHASTCONS_NAME, null, getTimeStamp(), arrayList5, resolve.resolve("phastConsVersion.json"));
            saveVersionData(EtlCommons.CONSERVATION_DATA, PHYLOP_NAME, null, getTimeStamp(), arrayList6, resolve.resolve("phastConsVersion.json"));
        }
    }

    private void downloadClinical(Species species, String str, Path path) throws IOException, InterruptedException {
        if (species.getScientificName().equals("Homo sapiens")) {
            if (this.downloadCommandOptions.assembly == null) {
                throw new ParameterException("Assembly must be provided for downloading clinical variants data. Please, specify either --assembly GRCh37 or --assembly GRCh38");
            }
            this.logger.info("Downloading clinical information ...");
            Path resolve = path.resolve(EtlCommons.CLINICAL_VARIANTS_FOLDER);
            makeDir(resolve);
            ArrayList arrayList = new ArrayList(3);
            String host = this.configuration.getDownload().getClinvar().getHost();
            downloadFile(host, resolve.resolve(EtlCommons.CLINVAR_XML_FILE).toString());
            arrayList.add(host);
            String host2 = this.configuration.getDownload().getClinvarEfoTerms().getHost();
            downloadFile(host2, resolve.resolve(EtlCommons.CLINVAR_EFO_FILE).toString());
            arrayList.add(host2);
            String host3 = this.configuration.getDownload().getClinvarSummary().getHost();
            downloadFile(host3, resolve.resolve(EtlCommons.CLINVAR_SUMMARY_FILE).toString());
            arrayList.add(host3);
            String host4 = this.configuration.getDownload().getClinvarVariationAllele().getHost();
            downloadFile(host4, resolve.resolve(EtlCommons.CLINVAR_VARIATION_ALLELE_FILE).toString());
            arrayList.add(host4);
            saveVersionData(EtlCommons.CLINICAL_VARIANTS_DATA, CLINVAR_NAME, getClinVarVersion(), getTimeStamp(), arrayList, resolve.resolve("clinvarVersion.json"));
            if (str.equalsIgnoreCase("grch37")) {
                String host5 = this.configuration.getDownload().getIarctp53().getHost();
                downloadFile(host5, resolve.resolve(EtlCommons.IARCTP53_FILE).toString(), Collections.singletonList("--post-data=dataset-somaticMutationData=somaticMutationData&dataset-germlineMutationData=germlineMutationData&dataset-somaticMutationReference=somaticMutationReference&dataset-germlineMutationReference=germlineMutationReference"));
                ZipFile zipFile = new ZipFile(resolve.resolve(EtlCommons.IARCTP53_FILE).toString());
                Enumeration<? extends ZipEntry> entries = zipFile.entries();
                while (entries.hasMoreElements()) {
                    ZipEntry nextElement = entries.nextElement();
                    File file = new File(resolve.toFile(), nextElement.getName());
                    if (nextElement.isDirectory()) {
                        file.mkdirs();
                    } else {
                        file.getParentFile().mkdirs();
                        InputStream inputStream = zipFile.getInputStream(nextElement);
                        FileOutputStream fileOutputStream = new FileOutputStream(file);
                        IOUtils.copy(inputStream, fileOutputStream);
                        IOUtils.closeQuietly(inputStream);
                        fileOutputStream.close();
                    }
                }
                saveVersionData(EtlCommons.CLINICAL_VARIANTS_DATA, IARCTP53_NAME, getVersionFromVersionLine(resolve.resolve("Disclaimer.txt"), "The version of the database should be identified"), getTimeStamp(), Collections.singletonList(host5), resolve.resolve("iarctp53Version.json"));
            }
        }
    }

    private String getDocmVersion(Path path) {
        return getVersionFromVersionLine(path, "<select name=\"version\" id=\"version\"");
    }

    private void downloadDocm(List<String> list, Path path) throws IOException, InterruptedException {
        BufferedWriter newBufferedWriter = FileUtils.newBufferedWriter(path);
        WebTarget target = ClientBuilder.newClient().target(URI.create(this.configuration.getDownload().getDocm().getHost() + "v1/variants"));
        this.logger.info("Querying DOCM REST API to get detailed data for all their variants");
        int i = 0;
        Iterator<String> it = list.iterator();
        while (it.hasNext()) {
            newBufferedWriter.write(((String) target.path(it.next() + ".json").request().get(String.class)) + "\n");
            if (i % 10 == 0) {
                this.logger.info("{} DOCM variants saved", Integer.valueOf(i));
            }
            Thread.sleep(300L);
            i++;
        }
        this.logger.info("Finished. {} DOCM variants saved at {}", Integer.valueOf(i), path.toString());
        newBufferedWriter.close();
    }

    private List<String> getDocmHgvsList() throws IOException {
        WebTarget target = ClientBuilder.newClient().target(URI.create(this.configuration.getDownload().getDocm().getHost() + "v1/variants.json"));
        this.logger.info("Getting full list of DOCM hgvs from: {}", target.getUri().toURL());
        List<Map<String, String>> parseResult = parseResult((String) target.request().get(String.class));
        ArrayList arrayList = new ArrayList(parseResult.size());
        for (Map<String, String> map : parseResult) {
            if (map.containsKey("reference_version") && map.get("reference_version").equalsIgnoreCase(this.downloadCommandOptions.assembly)) {
                arrayList.add(map.get("hgvs"));
            }
        }
        this.logger.info("{} hgvs found", Integer.valueOf(arrayList.size()));
        return arrayList;
    }

    private List<Map<String, String>> parseResult(String str) throws IOException {
        ObjectMapper objectMapper = new ObjectMapper();
        objectMapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
        return (List) objectMapper.readerFor(objectMapper.getTypeFactory().constructCollectionType(List.class, Map.class)).readValue(str);
    }

    private String getDbsnpVersion() {
        return this.configuration.getDownload().getDbsnp().getHost().split("_")[2];
    }

    private String getGwasVersion() {
        String[] split = this.configuration.getDownload().getGwasCatalog().getHost().split("/");
        return split[9] + "/" + split[8] + "/" + split[7];
    }

    private String getClinVarVersion() {
        return this.configuration.getDownload().getClinvar().getHost().split("_")[1].split("\\.")[0];
    }

    private void downloadCaddScores(Species species, String str, Path path) throws IOException, InterruptedException {
        if (species.getScientificName().equals("Homo sapiens") && str.equalsIgnoreCase("GRCh37")) {
            this.logger.info("Downloading CADD scores information ...");
            Path resolve = path.resolve(EtlCommons.VARIATION_FUNCTIONAL_SCORE_DATA);
            makeDir(resolve);
            String host = this.configuration.getDownload().getCadd().getHost();
            downloadFile(host, resolve.resolve(BuildCommandExecutor.CADD_INPUT_FILE_NAME).toString());
            saveVersionData(EtlCommons.VARIATION_FUNCTIONAL_SCORE_DATA, CADD_NAME, host.split("/")[5], getTimeStamp(), Collections.singletonList(host), resolve.resolve("caddVersion.json"));
        }
    }

    private void downloadReactomeData() throws IOException, InterruptedException {
        Path resolve = this.common.resolve(EtlCommons.PROTEIN_DATA);
        String host = this.configuration.getDownload().getReactome().getHost();
        downloadFile(host, resolve.resolve("biopax.zip").toString());
        saveVersionData(EtlCommons.PROTEIN_DATA, REACTOME_NAME, null, getTimeStamp(), Collections.singletonList(host), resolve.resolve("reactomeVersion.json"));
    }

    private void downloadRepeats(Species species, String str, Path path) throws IOException, InterruptedException {
        String str2;
        if (species.getScientificName().equals("Homo sapiens")) {
            this.logger.info("Downloading repeats data ...");
            Path resolve = path.resolve("repeats");
            makeDir(resolve);
            if (str.equalsIgnoreCase("grch37")) {
                str2 = "hg19";
            } else {
                if (!str.equalsIgnoreCase("grch38")) {
                    this.logger.error("Please provide a valid human assembly {GRCh37, GRCh38)");
                    throw new ParameterException("Assembly '" + str + "' is not valid. Please provide a valid human assembly {GRCh37, GRCh38)");
                }
                str2 = "hg38";
            }
            String str3 = this.configuration.getDownload().getSimpleRepeats().getHost() + "/" + str2 + "/database/simpleRepeat.txt.gz";
            downloadFile(str3, resolve.resolve(EtlCommons.TRF_FILE).toString());
            saveVersionData("repeats", TRF_NAME, null, getTimeStamp(), Collections.singletonList(str3), resolve.resolve(EtlCommons.TRF_VERSION_FILE));
            String str4 = this.configuration.getDownload().getGenomicSuperDups().getHost() + "/" + str2 + "/database/genomicSuperDups.txt.gz";
            downloadFile(str4, resolve.resolve(EtlCommons.GSD_FILE).toString());
            saveVersionData("repeats", GSD_NAME, null, getTimeStamp(), Collections.singletonList(str4), resolve.resolve(EtlCommons.GSD_VERSION_FILE));
            if (str2.equalsIgnoreCase("hg19")) {
                return;
            }
            String str5 = this.configuration.getDownload().getWindowMasker().getHost() + "/" + str2 + "/database/windowmaskerSdust.txt.gz";
            downloadFile(str5, resolve.resolve(EtlCommons.WM_FILE).toString());
            saveVersionData("repeats", WM_NAME, null, getTimeStamp(), Collections.singletonList(str5), resolve.resolve(EtlCommons.WM_VERSION_FILE));
        }
    }

    private void downloadFile(String str, String str2) throws IOException, InterruptedException {
        downloadFile(str, str2, null);
    }

    private void downloadFile(String str, String str2, List<String> list) throws IOException, InterruptedException {
        ArrayList arrayList = new ArrayList(Arrays.asList("--tries=10", str, "-O", str2, "-o", str2 + ".log"));
        if (list != null && !list.isEmpty()) {
            arrayList.addAll(list);
        }
        if (EtlCommons.runCommandLineProcess(null, "wget", arrayList, null)) {
            this.logger.info(str2 + " created OK");
        } else {
            this.logger.warn(str + " cannot be downloaded");
        }
    }
}
