package org.opencb.cellbase.app.transform;

import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.file.Files;
import java.nio.file.LinkOption;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import org.opencb.biodata.models.core.GenomicScoreRegion;
import org.opencb.cellbase.app.cli.EtlCommons;
import org.opencb.cellbase.core.serializer.CellBaseFileSerializer;
import org.opencb.commons.utils.FileUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/opencb/cellbase/app/transform/ConservationParser.class */
public class ConservationParser extends CellBaseParser {
    private static final int CHUNK_SIZE = 2000;
    private Logger logger;
    private Path conservedRegionPath;
    private int chunkSize;
    private CellBaseFileSerializer fileSerializer;
    private Map<String, String> outputFileNames;

    public ConservationParser(Path path, CellBaseFileSerializer cellBaseFileSerializer) {
        this(path, CHUNK_SIZE, cellBaseFileSerializer);
    }

    public ConservationParser(Path path, int i, CellBaseFileSerializer cellBaseFileSerializer) {
        super(cellBaseFileSerializer);
        this.fileSerializer = cellBaseFileSerializer;
        this.conservedRegionPath = path;
        this.chunkSize = i;
        this.logger = LoggerFactory.getLogger(ConservationParser.class);
        this.outputFileNames = new HashMap();
    }

    @Override // org.opencb.cellbase.app.transform.CellBaseParser
    public void parse() throws IOException, InterruptedException {
        System.out.println("conservedRegionPath = " + this.conservedRegionPath.toString());
        if (this.conservedRegionPath == null || !Files.exists(this.conservedRegionPath, new LinkOption[0]) || !Files.isDirectory(this.conservedRegionPath, new LinkOption[0])) {
            throw new IOException("Conservation directory whether does not exist, is not a directory or cannot be read");
        }
        Path resolve = this.conservedRegionPath.resolve(EtlCommons.GERP_SUBDIRECTORY);
        if (resolve.toFile().exists()) {
            this.logger.debug("Parsing GERP data ...");
            gerpParser(resolve);
        }
        HashMap hashMap = new HashMap();
        HashSet<String> hashSet = new HashSet();
        for (Path path : Files.newDirectoryStream(this.conservedRegionPath.resolve("phastCons"), "*.wigFix.gz")) {
            String replace = path.getFileName().toString().split("\\.")[0].replace("chr", "");
            hashSet.add(replace);
            hashMap.put(replace + "phastCons", path);
        }
        for (Path path2 : Files.newDirectoryStream(this.conservedRegionPath.resolve("phylop"), "*.wigFix.gz")) {
            String replace2 = path2.getFileName().toString().split("\\.")[0].replace("chr", "");
            hashSet.add(replace2);
            hashMap.put(replace2 + "phylop", path2);
        }
        this.logger.debug("Chromosomes found '{}'", hashSet.toString());
        for (String str : hashSet) {
            this.logger.debug("Processing chromosome '{}', file '{}'", str, hashMap.get(str + "phastCons"));
            processWigFixFile((Path) hashMap.get(str + "phastCons"), "phastCons");
            this.logger.debug("Processing chromosome '{}', file '{}'", str, hashMap.get(str + "phylop"));
            processWigFixFile((Path) hashMap.get(str + "phylop"), "phylop");
        }
    }

    private void gerpParser(Path path) throws IOException, InterruptedException {
        this.logger.info("Uncompressing {}", path.resolve(EtlCommons.GERP_FILE));
        EtlCommons.runCommandLineProcess(null, "tar", Arrays.asList("-xvzf", path.resolve(EtlCommons.GERP_FILE).toString(), "--overwrite", "-C", path.toString()), null);
        boolean z = false;
        for (Path path2 : Files.newDirectoryStream(path, "*.rates")) {
            z = true;
            this.logger.info("Processing file '{}'", path2.getFileName().toString());
            String[] split = path2.getFileName().toString().replaceFirst("chr", "").split("\\.");
            BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(String.valueOf(path2))));
            int i = 1;
            int i2 = 1999;
            int i3 = 1;
            ArrayList arrayList = new ArrayList(this.chunkSize);
            while (true) {
                String readLine = bufferedReader.readLine();
                if (readLine != null) {
                    arrayList.add(Float.valueOf(readLine.split("\t")[1]));
                    i3++;
                    if (i3 == this.chunkSize) {
                        this.fileSerializer.serialize(new GenomicScoreRegion(split[0], i, i2, EtlCommons.GERP_SUBDIRECTORY, arrayList), getOutputFileName(split[0]));
                        i = i2 + 1;
                        i2 += this.chunkSize;
                        i3 = 0;
                        arrayList.clear();
                    }
                }
            }
            this.fileSerializer.serialize(new GenomicScoreRegion(split[0], i, (i + arrayList.size()) - 1, EtlCommons.GERP_SUBDIRECTORY, arrayList), getOutputFileName(split[0]));
            bufferedReader.close();
        }
        if (z) {
            return;
        }
        this.logger.warn("No GERP++ files were found. Please check that the original file {} is there, that it was properly decompressed and that the *.rates files are present", path.resolve(EtlCommons.GERP_FILE));
    }

    private void processWigFixFile(Path path, String str) throws IOException {
        BufferedReader newBufferedReader = FileUtils.newBufferedReader(path);
        String str2 = "";
        int i = 0;
        HashMap hashMap = new HashMap();
        ArrayList arrayList = new ArrayList();
        GenomicScoreRegion genomicScoreRegion = null;
        while (true) {
            String readLine = newBufferedReader.readLine();
            if (readLine == null) {
                this.fileSerializer.serialize(new GenomicScoreRegion(str2, i, (i + arrayList.size()) - 1, str, arrayList), getOutputFileName(str2));
                newBufferedReader.close();
                return;
            }
            if (readLine.startsWith("fixedStep")) {
                if (genomicScoreRegion != null) {
                    genomicScoreRegion = new GenomicScoreRegion(str2, i, (i + arrayList.size()) - 1, str, arrayList);
                    this.fileSerializer.serialize(genomicScoreRegion, getOutputFileName(str2));
                }
                hashMap.clear();
                for (String str3 : readLine.split(" ")) {
                    if (!str3.equalsIgnoreCase("fixedStep")) {
                        String[] split = str3.split("=");
                        hashMap.put(split[0].toLowerCase(), split[1]);
                    }
                }
                str2 = ((String) hashMap.get("chrom")).replace("chr", "");
                i = Integer.parseInt((String) hashMap.get("start"));
                arrayList = new ArrayList(CHUNK_SIZE);
            } else {
                if (i / CHUNK_SIZE != (i + arrayList.size()) / CHUNK_SIZE) {
                    genomicScoreRegion = new GenomicScoreRegion(str2, i, (i + arrayList.size()) - 1, str, arrayList);
                    this.fileSerializer.serialize(genomicScoreRegion, getOutputFileName(str2));
                    i += arrayList.size();
                    arrayList.clear();
                }
                arrayList.add(Float.valueOf(Float.parseFloat(readLine.trim())));
            }
        }
    }

    private String getOutputFileName(String str) {
        String str2 = this.outputFileNames.get(str);
        if (str2 == null) {
            str2 = "conservation_" + str;
            this.outputFileNames.put(str, str2);
        }
        return str2;
    }
}
