package org.opencb.cellbase.app.cli;

import com.beust.jcommander.ParameterException;
import java.io.File;
import java.io.IOException;
import java.nio.file.DirectoryStream;
import java.nio.file.Files;
import java.nio.file.LinkOption;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.StandardCopyOption;
import java.nio.file.attribute.FileAttribute;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import org.opencb.cellbase.app.cli.CliOptionsParser;
import org.opencb.cellbase.app.transform.CaddScoreParser;
import org.opencb.cellbase.app.transform.CellBaseParser;
import org.opencb.cellbase.app.transform.ConservationParser;
import org.opencb.cellbase.app.transform.DgvParser;
import org.opencb.cellbase.app.transform.GeneParser;
import org.opencb.cellbase.app.transform.GenomeSequenceFastaParser;
import org.opencb.cellbase.app.transform.InteractionParser;
import org.opencb.cellbase.app.transform.ProteinParser;
import org.opencb.cellbase.app.transform.RegulatoryRegionParser;
import org.opencb.cellbase.app.transform.RepeatsParser;
import org.opencb.cellbase.app.transform.clinical.variant.ClinVarParser;
import org.opencb.cellbase.app.transform.clinical.variant.ClinicalVariantParser;
import org.opencb.cellbase.app.transform.clinical.variant.CosmicParser;
import org.opencb.cellbase.app.transform.clinical.variant.GwasParser;
import org.opencb.cellbase.app.transform.variation.VariationFeatureFile;
import org.opencb.cellbase.app.transform.variation.VariationFile;
import org.opencb.cellbase.app.transform.variation.VariationParser;
import org.opencb.cellbase.core.config.Species;
import org.opencb.cellbase.core.serializer.CellBaseJsonFileSerializer;
import org.opencb.commons.utils.FileUtils;

/* loaded from: input_file:org/opencb/cellbase/app/cli/BuildCommandExecutor.class */
public class BuildCommandExecutor extends CommandExecutor {
    public static final String GWAS_INPUT_FILE_NAME = "gwas_catalog.tsv";
    public static final String CADD_INPUT_FILE_NAME = "whole_genome_SNVs.tsv.gz";
    public static final String DISGENET_INPUT_FILE_NAME = "all_gene_disease_associations.txt.gz";
    public static final String HPO_INPUT_FILE_NAME = "ALL_SOURCES_ALL_FREQUENCIES_diseases_to_genes_to_phenotypes.txt";
    public static final String DBSNP_INPUT_FILE_NAME = "All.vcf.gz";
    private CliOptionsParser.BuildCommandOptions buildCommandOptions;
    private Path input;
    private Path output;
    private Path common;
    private File ensemblScriptsFolder;
    private File proteinScriptsFolder;
    private boolean flexibleGTFParsing;
    private Species species;

    public BuildCommandExecutor(CliOptionsParser.BuildCommandOptions buildCommandOptions) {
        super(buildCommandOptions.commonOptions.logLevel, buildCommandOptions.commonOptions.verbose, buildCommandOptions.commonOptions.conf);
        this.input = null;
        this.output = null;
        this.common = null;
        this.buildCommandOptions = buildCommandOptions;
        if (buildCommandOptions.input != null) {
            this.input = Paths.get(buildCommandOptions.input, new String[0]);
        }
        if (buildCommandOptions.output != null) {
            this.output = Paths.get(buildCommandOptions.output, new String[0]);
        }
        if (buildCommandOptions.common != null) {
            this.common = Paths.get(buildCommandOptions.common, new String[0]);
        } else {
            this.common = this.input.getParent().getParent().resolve("common");
        }
        this.ensemblScriptsFolder = new File(System.getProperty("basedir") + "/bin/ensembl-scripts/");
        this.proteinScriptsFolder = new File(System.getProperty("basedir") + "/bin/protein/");
        this.flexibleGTFParsing = buildCommandOptions.flexibleGTFParsing;
    }

    /* JADX WARN: Failed to find 'out' block for switch in B:29:0x00f8. Please report as an issue. */
    @Override // org.opencb.cellbase.app.cli.CommandExecutor
    public void execute() {
        try {
            checkParameters();
            if (!Files.exists(this.output, new LinkOption[0])) {
                Files.createDirectories(this.output, new FileAttribute[0]);
            }
            for (Species species : this.configuration.getAllSpecies()) {
                if (this.buildCommandOptions.species.equalsIgnoreCase(species.getScientificName()) || this.buildCommandOptions.species.equalsIgnoreCase(species.getCommonName()) || this.buildCommandOptions.species.equalsIgnoreCase(species.getId())) {
                    this.species = species;
                    break;
                }
            }
            if (this.species == null) {
                this.logger.error("Species '{}' not valid", this.buildCommandOptions.species);
            }
            if (this.buildCommandOptions.data != null) {
                for (String str : this.buildCommandOptions.data.equals("all") ? (String[]) this.species.getData().toArray(new String[0]) : this.buildCommandOptions.data.split(",")) {
                    this.logger.info("Building '{}' data", str);
                    CellBaseParser cellBaseParser = null;
                    boolean z = -1;
                    switch (str.hashCode()) {
                        case -1703639550:
                            if (str.equals(EtlCommons.VARIATION_FUNCTIONAL_SCORE_DATA)) {
                                z = 4;
                                break;
                            }
                            break;
                        case -1354636448:
                            if (str.equals(EtlCommons.COSMIC_DATA)) {
                                z = 12;
                                break;
                            }
                            break;
                        case -1249501961:
                            if (str.equals(EtlCommons.GENOME_DATA)) {
                                z = true;
                                break;
                            }
                            break;
                        case -309012605:
                            if (str.equals(EtlCommons.PROTEIN_DATA)) {
                                z = 6;
                                break;
                            }
                            break;
                        case -81944045:
                            if (str.equals(EtlCommons.VARIATION_DATA)) {
                                z = 3;
                                break;
                            }
                            break;
                        case -27333718:
                            if (str.equals(EtlCommons.REGULATION_DATA)) {
                                z = 5;
                                break;
                            }
                            break;
                        case 111209:
                            if (str.equals(EtlCommons.PPI_DATA)) {
                                z = 7;
                                break;
                            }
                            break;
                        case 114288:
                            if (str.equals(EtlCommons.STRUCTURAL_VARIANTS_DATA)) {
                                z = 14;
                                break;
                            }
                            break;
                        case 3092384:
                            if (str.equals(EtlCommons.DRUG_DATA)) {
                                z = 9;
                                break;
                            }
                            break;
                        case 3169045:
                            if (str.equals(EtlCommons.GENE_DATA)) {
                                z = 2;
                                break;
                            }
                            break;
                        case 3185954:
                            if (str.equals(EtlCommons.GWAS_DATA)) {
                                z = 13;
                                break;
                            }
                            break;
                        case 860862745:
                            if (str.equals(EtlCommons.CLINVAR_DATA)) {
                                z = 11;
                                break;
                            }
                            break;
                        case 1087668982:
                            if (str.equals(EtlCommons.GENOME_INFO_DATA)) {
                                z = false;
                                break;
                            }
                            break;
                        case 1094288952:
                            if (str.equals("repeats")) {
                                z = 15;
                                break;
                            }
                            break;
                        case 1166274618:
                            if (str.equals(EtlCommons.CLINICAL_VARIANTS_DATA)) {
                                z = 10;
                                break;
                            }
                            break;
                        case 1953438045:
                            if (str.equals(EtlCommons.CONSERVATION_DATA)) {
                                z = 8;
                                break;
                            }
                            break;
                    }
                    switch (z) {
                        case VariationFile.VARIATION_ID_COLUMN_INDEX /* 0 */:
                            buildGenomeInfo();
                            break;
                        case true:
                            cellBaseParser = buildGenomeSequence();
                            break;
                        case VariationFile.RS_COLUMN_INDEX /* 2 */:
                            cellBaseParser = buildGene();
                            break;
                        case true:
                            cellBaseParser = buildVariation();
                            break;
                        case true:
                            cellBaseParser = buildCadd();
                            break;
                        case VariationFeatureFile.VARIATION_ID_COLUMN_INDEX_IN_VARIATION_FEATURE_FILE /* 5 */:
                            cellBaseParser = buildRegulation();
                            break;
                        case true:
                            cellBaseParser = buildProtein();
                            break;
                        case true:
                            cellBaseParser = getInteractionParser();
                            break;
                        case true:
                            cellBaseParser = buildConservation();
                            break;
                        case true:
                            cellBaseParser = buildDrugParser();
                            break;
                        case true:
                            cellBaseParser = buildClinicalVariants();
                            break;
                        case true:
                            cellBaseParser = buildClinvar();
                            break;
                        case true:
                            cellBaseParser = buildCosmic();
                            break;
                        case true:
                            cellBaseParser = buildGwas();
                            break;
                        case true:
                            cellBaseParser = buildStructuralVariants();
                            break;
                        case true:
                            cellBaseParser = buildRepeats();
                            break;
                        default:
                            this.logger.error("Build option '" + this.buildCommandOptions.data + "' is not valid");
                            break;
                    }
                    if (cellBaseParser != null) {
                        try {
                            cellBaseParser.parse();
                        } catch (Exception e) {
                            this.logger.error("Error executing 'build' command " + this.buildCommandOptions.data + ": " + e.getMessage(), e);
                        }
                        cellBaseParser.disconnect();
                    }
                }
            }
        } catch (IOException e2) {
            this.logger.error(e2.getMessage());
        } catch (ParameterException e3) {
            this.logger.error("Error parsing build command line parameters: " + e3.getMessage(), e3);
        }
    }

    private CellBaseParser buildStructuralVariants() {
        Path resolve = this.input.resolve("structuralVariants");
        copyVersionFiles(Arrays.asList(resolve.resolve(EtlCommons.DGV_VERSION_FILE)));
        return new DgvParser(resolve.resolve(EtlCommons.DGV_FILE), new CellBaseJsonFileSerializer(this.output, "structuralVariants", true));
    }

    private CellBaseParser buildRepeats() {
        Path resolve = this.input.resolve("repeats");
        copyVersionFiles(Arrays.asList(resolve.resolve(EtlCommons.TRF_VERSION_FILE)));
        copyVersionFiles(Arrays.asList(resolve.resolve(EtlCommons.GSD_VERSION_FILE)));
        copyVersionFiles(Arrays.asList(resolve.resolve(EtlCommons.WM_VERSION_FILE)));
        return new RepeatsParser(resolve, new CellBaseJsonFileSerializer(this.output, "repeats"));
    }

    private void copyVersionFiles(List<Path> list) {
        for (Path path : list) {
            try {
                Files.copy(path, this.output.resolve(path.getFileName()), StandardCopyOption.REPLACE_EXISTING);
            } catch (IOException e) {
                this.logger.warn("Version file {} not found - skipping", path.toString());
            }
        }
    }

    private void checkParameters() throws IOException {
        if (!Files.exists(this.input, new LinkOption[0]) || !Files.isDirectory(this.input, new LinkOption[0])) {
            throw new IOException("Input parameter '" + this.input.toString() + "' does not exist or is not a directory");
        }
        if (!Files.exists(this.common, new LinkOption[0]) || !Files.isDirectory(this.common, new LinkOption[0])) {
            throw new IOException("Common parameter '" + this.common.toString() + "' does not exist or is not a directory");
        }
    }

    private void buildGenomeInfo() {
        try {
            String path = this.output.resolve("genome_info.json").toAbsolutePath().toString();
            ArrayList arrayList = new ArrayList();
            String[] strArr = new String[8];
            strArr[0] = "--species";
            strArr[1] = this.species.getScientificName();
            strArr[2] = "--assembly";
            strArr[3] = this.buildCommandOptions.assembly == null ? getDefaultHumanAssembly() : this.buildCommandOptions.assembly;
            strArr[4] = "-o";
            strArr[5] = path;
            strArr[6] = "--ensembl-libs";
            strArr[7] = this.configuration.getDownload().getEnsembl().getLibs();
            arrayList.addAll(Arrays.asList(strArr));
            if (!this.configuration.getSpecies().getVertebrates().contains(this.species) && !this.species.getScientificName().equals("Drosophila melanogaster")) {
                arrayList.add("--phylo");
                arrayList.add("no-vertebrate");
            }
            if (EtlCommons.runCommandLineProcess(this.ensemblScriptsFolder, "./genome_info.pl", arrayList, this.output.resolve("genome_info.log").toAbsolutePath().toString())) {
                this.logger.info(path + " created OK");
            } else {
                this.logger.error("Genome info for " + this.species.getScientificName() + " cannot be downloaded");
            }
        } catch (IOException | InterruptedException e) {
            e.printStackTrace();
        }
    }

    private CellBaseParser buildGenomeSequence() {
        copyVersionFiles(Collections.singletonList(this.input.resolve("genome/genomeVersion.json")));
        return new GenomeSequenceFastaParser(getFastaReferenceGenome(), new CellBaseJsonFileSerializer(this.output, "genome_sequence"));
    }

    private CellBaseParser buildGene() {
        Path resolve = this.input.resolve(EtlCommons.GENE_DATA);
        copyVersionFiles(Arrays.asList(resolve.resolve("geneDrug/dgidbVersion.json"), resolve.resolve("ensemblCoreVersion.json"), resolve.resolve("uniprotXrefVersion.json"), resolve.resolve(this.common.resolve("expression/geneExpressionAtlasVersion.json")), resolve.resolve("hpoVersion.json"), resolve.resolve("disgenetVersion.json")));
        return new GeneParser(resolve, getFastaReferenceGenome(), this.species, this.flexibleGTFParsing, new CellBaseJsonFileSerializer(this.output, EtlCommons.GENE_DATA));
    }

    private CellBaseParser buildVariation() {
        Path resolve = this.input.resolve(EtlCommons.VARIATION_DATA);
        copyVersionFiles(Arrays.asList(resolve.resolve("ensemblVariationVersion.json")));
        return new VariationParser(resolve, new CellBaseJsonFileSerializer(this.output, (String) null, true, true, true));
    }

    private CellBaseParser buildCadd() {
        Path resolve = this.input.resolve(EtlCommons.VARIATION_FUNCTIONAL_SCORE_DATA);
        copyVersionFiles(Arrays.asList(resolve.resolve("caddVersion.json")));
        return new CaddScoreParser(resolve.resolve(CADD_INPUT_FILE_NAME), new CellBaseJsonFileSerializer(this.output, EtlCommons.CADD_DATA));
    }

    private CellBaseParser buildRegulation() {
        Path resolve = this.input.resolve(EtlCommons.REGULATION_DATA);
        copyVersionFiles(Arrays.asList(resolve.resolve("ensemblRegulationVersion.json"), this.common.resolve("mirbase/mirbaseVersion.json"), resolve.resolve("targetScanVersion.json"), resolve.resolve("miRTarBaseVersion.json")));
        return new RegulatoryRegionParser(resolve, new CellBaseJsonFileSerializer(this.output, "regulatory_region"));
    }

    private CellBaseParser buildProtein() {
        Path resolve = this.common.resolve(EtlCommons.PROTEIN_DATA);
        copyVersionFiles(Arrays.asList(resolve.resolve("uniprotVersion.json"), resolve.resolve("interproVersion.json")));
        return new ProteinParser(resolve.resolve("uniprot_chunks"), this.common.resolve(EtlCommons.PROTEIN_DATA).resolve("protein2ipr.dat.gz"), this.species.getScientificName(), new CellBaseJsonFileSerializer(this.output, EtlCommons.PROTEIN_DATA));
    }

    private void getProteinFunctionPredictionMatrices(Species species, Path path) throws IOException, InterruptedException {
        this.logger.info("Downloading protein function prediction matrices ...");
        if (EtlCommons.runCommandLineProcess(this.ensemblScriptsFolder, "./protein_function_prediction_matrices.pl", Arrays.asList("--species", species.getScientificName(), "--outdir", path.toString(), "--ensembl-libs", this.configuration.getDownload().getEnsembl().getLibs()), path.resolve("protein_function_prediction_matrices.log").toString())) {
            this.logger.info("Protein function prediction matrices created OK");
        } else {
            this.logger.error("Protein function prediction matrices for " + species.getScientificName() + " cannot be downloaded");
        }
    }

    private CellBaseParser getInteractionParser() {
        Path resolve = this.common.resolve(EtlCommons.PROTEIN_DATA);
        Path resolve2 = resolve.resolve("intact.txt");
        copyVersionFiles(Arrays.asList(resolve.resolve("intactVersion.json")));
        return new InteractionParser(resolve2, this.species.getScientificName(), new CellBaseJsonFileSerializer(this.output, "protein_protein_interaction"));
    }

    private CellBaseParser buildDrugParser() {
        throw new ParameterException("'drug' builder is not implemented yet");
    }

    private CellBaseParser buildConservation() {
        Path resolve = this.input.resolve(EtlCommons.CONSERVATION_DATA);
        copyVersionFiles(Arrays.asList(resolve.resolve("gerpVersion.json"), resolve.resolve("phastConsVersion.json"), resolve.resolve("phyloPVersion.json")));
        return new ConservationParser(resolve, 2000, new CellBaseJsonFileSerializer(this.output));
    }

    private CellBaseParser buildClinicalVariants() {
        Path resolve = this.input.resolve(EtlCommons.CLINICAL_VARIANTS_FOLDER);
        copyVersionFiles(Arrays.asList(resolve.resolve("clinvarVersion.json")));
        copyVersionFiles(Arrays.asList(resolve.resolve("gwasVersion.json")));
        return new ClinicalVariantParser(resolve, getFastaReferenceGenome(), this.buildCommandOptions.assembly == null ? getDefaultHumanAssembly() : this.buildCommandOptions.assembly, new CellBaseJsonFileSerializer(this.output, EtlCommons.CLINICAL_VARIANTS_JSON_FILE.replace(".json.gz", ""), true));
    }

    private String getDefaultHumanAssembly() {
        for (Species species : this.configuration.getSpecies().getVertebrates()) {
            if (species.getId().equals("hsapiens")) {
                return ((Species.Assembly) species.getAssemblies().get(0)).getName();
            }
        }
        throw new ParameterException("Clinical data can only be built if an hsapiens entry is defined within the configuration file. No hsapiens data found within the configuration.json file");
    }

    @Deprecated
    private CellBaseParser buildClinvar() {
        this.logger.warn("This method is deprecated, should no longer be used and will soon be removed");
        Path resolve = this.input.resolve("clinical");
        copyVersionFiles(Arrays.asList(resolve.resolve("clinvarVersion.json")));
        Path resolve2 = resolve.resolve("ClinVar.xml.gz");
        Path resolve3 = resolve.resolve(EtlCommons.CLINVAR_SUMMARY_FILE);
        Path resolve4 = resolve.resolve(EtlCommons.CLINVAR_EFO_FILE);
        if (!resolve4.toFile().exists()) {
            resolve4 = null;
        }
        String str = this.buildCommandOptions.assembly;
        checkMandatoryOption("assembly", str);
        if (str.equals(ClinVarParser.GRCH37_ASSEMBLY) || str.equals(ClinVarParser.GRCH38_ASSEMBLY)) {
            return new ClinVarParser(resolve2, resolve3, resolve4, str, new CellBaseJsonFileSerializer(this.output, EtlCommons.CLINVAR_DATA, true));
        }
        throw new ParameterException("Assembly '" + str + "' is not valid. Possible values: " + ClinVarParser.GRCH37_ASSEMBLY + ", " + ClinVarParser.GRCH38_ASSEMBLY);
    }

    @Deprecated
    private CellBaseParser buildCosmic() {
        this.logger.warn("This method is deprecated, should no longer be used and will soon be removed");
        return new CosmicParser(this.input.resolve("CosmicMutantExport.tsv"), new CellBaseJsonFileSerializer(this.output, EtlCommons.COSMIC_DATA, true), this.buildCommandOptions.assembly);
    }

    @Deprecated
    private CellBaseParser buildGwas() throws IOException {
        this.logger.warn("This method is deprecated, should no longer be used and will soon be removed");
        Path resolve = getInputDirFromCommandLine().resolve("clinical");
        copyVersionFiles(Arrays.asList(resolve.resolve("gwasVersion.json")));
        Path resolve2 = resolve.resolve("gwas_catalog.tsv");
        FileUtils.checkPath(resolve2);
        Path resolve3 = resolve.resolve("All.vcf.gz");
        FileUtils.checkPath(resolve3);
        return new GwasParser(resolve2, resolve3, new CellBaseJsonFileSerializer(this.output, EtlCommons.GWAS_DATA));
    }

    private Path getInputDirFromCommandLine() {
        File file = new File(this.input.toString());
        if (!file.exists()) {
            throw new ParameterException("Folder '" + this.input + "' doesn't exist");
        }
        if (file.isDirectory()) {
            return this.input;
        }
        throw new ParameterException("'" + this.input + "' is not a directory");
    }

    private Path getFastaReferenceGenome() {
        Path path = null;
        try {
            Iterator<Path> it = Files.newDirectoryStream(this.input.resolve(EtlCommons.GENOME_DATA), (DirectoryStream.Filter<? super Path>) path2 -> {
                return path2.toString().endsWith(".fa.gz");
            }).iterator();
            while (it.hasNext()) {
                path = it.next();
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
        return path;
    }

    private void checkMandatoryOption(String str, String str2) {
        if (str2 == null) {
            throw new ParameterException("'" + str + "' option is mandatory for '" + this.buildCommandOptions.data + "' builder");
        }
    }
}
