package org.opencb.cellbase.lib.download;

import com.fasterxml.jackson.databind.ObjectMapper;
import java.io.IOException;
import java.net.URL;
import java.nio.file.Files;
import java.nio.file.LinkOption;
import java.nio.file.Path;
import java.nio.file.attribute.FileAttribute;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.concurrent.TimeUnit;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.lang3.StringUtils;
import org.opencb.biodata.formats.feature.gff.Gff2;
import org.opencb.biodata.formats.feature.gff.io.Gff2Reader;
import org.opencb.biodata.formats.io.FileFormatException;
import org.opencb.biodata.models.core.RegulatoryPfm;
import org.opencb.cellbase.core.config.CellBaseConfiguration;
import org.opencb.cellbase.core.exception.CellBaseException;
import org.opencb.cellbase.core.serializer.CellBaseJsonFileSerializer;
import org.opencb.cellbase.lib.EtlCommons;

/* loaded from: input_file:org/opencb/cellbase/lib/download/RegulationDownloadManager.class */
public class RegulationDownloadManager extends AbstractDownloadManager {
    private Path regulationFolder;
    private static final String ENSEMBL_NAME = "ENSEMBL";
    private static final String MIRBASE_NAME = "miRBase";
    private static final String MIRTARBASE_NAME = "miRTarBase";

    public RegulationDownloadManager(String str, String str2, Path path, CellBaseConfiguration cellBaseConfiguration) throws IOException, CellBaseException {
        super(str, str2, path, cellBaseConfiguration);
    }

    @Override // org.opencb.cellbase.lib.download.AbstractDownloadManager
    public List<DownloadFile> download() throws IOException, InterruptedException, NoSuchMethodException, FileFormatException {
        if (!speciesHasInfoToDownload(this.speciesConfiguration, EtlCommons.REGULATION_DATA)) {
            return null;
        }
        this.regulationFolder = this.downloadFolder.resolve(EtlCommons.REGULATION_DATA);
        Files.createDirectories(this.regulationFolder, new FileAttribute[0]);
        this.logger.info("Downloading regulation information ...");
        ArrayList arrayList = new ArrayList();
        arrayList.addAll(downloadRegulatoryaAndMotifFeatures());
        arrayList.add(downloadMirna());
        arrayList.add(downloadMiRTarBase());
        return arrayList;
    }

    private List<DownloadFile> downloadRegulatoryaAndMotifFeatures() throws IOException, InterruptedException, NoSuchMethodException, FileFormatException {
        String str = this.ensemblHostUrl + "/" + this.ensemblRelease;
        if (!this.configuration.getSpecies().getVertebrates().contains(this.speciesConfiguration)) {
            str = this.ensemblHostUrl + "/" + this.ensemblRelease + "/" + getPhylo(this.speciesConfiguration);
        }
        String str2 = str + "/regulation/" + this.speciesShortName;
        ArrayList arrayList = new ArrayList();
        arrayList.add(downloadFile(str2 + "/*Regulatory_Build.regulatory_features*.gff.gz", this.regulationFolder.resolve(EtlCommons.REGULATORY_FEATURES_FILE).toString()));
        arrayList.add(downloadFile(str2 + "/MotifFeatures/*" + this.assemblyConfiguration.getName() + ".motif_features.gff.gz", this.regulationFolder.resolve(EtlCommons.MOTIF_FEATURES_FILE).toString()));
        arrayList.add(downloadFile(str2 + "/MotifFeatures/*" + this.assemblyConfiguration.getName() + ".motif_features.gff.gz.tbi", this.regulationFolder.resolve("motif_features.gff.gz.tbi").toString()));
        loadPfmMatrices();
        return arrayList;
    }

    private void loadPfmMatrices() throws IOException, NoSuchMethodException, FileFormatException, InterruptedException {
        this.logger.info("Downloading and building pfm matrices...");
        if (Files.exists(this.buildFolder.resolve("regulatory_pfm.json.gz"), new LinkOption[0])) {
            this.logger.info("regulatory_pfm.json.gz is already built");
            return;
        }
        Gff2Reader gff2Reader = new Gff2Reader(this.regulationFolder.resolve(EtlCommons.MOTIF_FEATURES_FILE));
        HashSet hashSet = new HashSet();
        Pattern compile = Pattern.compile("ENSPFM(\\d+)");
        while (true) {
            Gff2 read = gff2Reader.read();
            if (read == null) {
                break;
            }
            String matrixId = getMatrixId(compile, read);
            if (StringUtils.isNotEmpty(matrixId)) {
                hashSet.add(matrixId);
            }
        }
        gff2Reader.close();
        ObjectMapper objectMapper = new ObjectMapper();
        CellBaseJsonFileSerializer cellBaseJsonFileSerializer = new CellBaseJsonFileSerializer(this.buildFolder, EtlCommons.PFM_DATA, true);
        this.logger.info("Looking up " + hashSet.size() + " pfms");
        Iterator it = hashSet.iterator();
        while (it.hasNext()) {
            cellBaseJsonFileSerializer.serialize((RegulatoryPfm) objectMapper.readValue(new URL("https://rest.ensembl.org/species/homo_sapiens/binding_matrix/" + ((String) it.next()) + "?unit=frequencies;content-type=application/json"), RegulatoryPfm.class));
            TimeUnit.MILLISECONDS.sleep(250L);
        }
        cellBaseJsonFileSerializer.close();
    }

    private String getMatrixId(Pattern pattern, Gff2 gff2) {
        Matcher matcher = pattern.matcher(gff2.getAttribute());
        if (matcher.find()) {
            return matcher.group(0);
        }
        return null;
    }

    private DownloadFile downloadMirna() throws IOException, InterruptedException {
        String host = this.configuration.getDownload().getMirbase().getHost();
        downloadFile(this.configuration.getDownload().getMirbaseReadme().getHost(), this.regulationFolder.resolve("mirbaseReadme.txt").toString());
        saveVersionData(EtlCommons.REGULATION_DATA, MIRBASE_NAME, getLine(this.regulationFolder.resolve("mirbaseReadme.txt"), 1), getTimeStamp(), Collections.singletonList(host), this.regulationFolder.resolve("mirbaseVersion.json"));
        Path resolve = this.regulationFolder.resolve("miRNA.xls.gz");
        DownloadFile downloadFile = downloadFile(host, this.regulationFolder.resolve("miRNA.xls.gz").toString());
        EtlCommons.runCommandLineProcess(null, "gunzip", Collections.singletonList(resolve.toString()), null);
        return downloadFile;
    }

    private DownloadFile downloadMiRTarBase() throws IOException, InterruptedException {
        String host = this.configuration.getDownload().getMiRTarBase().getHost();
        saveVersionData(EtlCommons.REGULATION_DATA, MIRTARBASE_NAME, null, getTimeStamp(), Collections.singletonList(host), this.regulationFolder.resolve("miRTarBaseVersion.json"));
        return downloadFile(host, this.regulationFolder.resolve("hsa_MTI.xlsx").toString());
    }
}
