package umcg.genetica.io.trityper;

import gnu.trove.map.hash.THashMap;
import gnu.trove.set.hash.THashSet;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.Set;
import org.apache.tools.ant.DirectoryScanner;
import umcg.genetica.containers.Pair;
import umcg.genetica.io.Gpio;
import umcg.genetica.io.text.TextFile;
import umcg.genetica.math.matrix.DoubleMatrixDataset;
import umcg.genetica.math.stats.Log2Transform;
import umcg.genetica.math.stats.QuantileNormalization;

/* loaded from: input_file:umcg/genetica/io/trityper/TriTyperGeneticalGenomicsDataset.class */
public final class TriTyperGeneticalGenomicsDataset implements Comparable<TriTyperGeneticalGenomicsDataset> {
    private TriTyperGenotypeData genotypeData;
    private TriTyperExpressionData expressionData;
    private THashMap<String, String> genotypeToExpressionCouplings;
    private TriTyperGeneticalGenomicsDatasetSettings settings;
    private short[] expressionToGenotypeIdArray;
    private short totalGGSamples;
    private boolean expressionDataLoadedCorrectly;
    private short[] genotypeToExpressionIdArray;
    DoubleMatrixDataset<String, String> covariates;

    public TriTyperGeneticalGenomicsDataset(TriTyperGeneticalGenomicsDatasetSettings triTyperGeneticalGenomicsDatasetSettings, Pair<List<String>, List<List<String>>> pair) throws IOException, Exception {
        this.expressionDataLoadedCorrectly = true;
        this.covariates = null;
        this.settings = triTyperGeneticalGenomicsDatasetSettings;
        triTyperGeneticalGenomicsDatasetSettings.genotypeLocation = Gpio.formatAsDirectory(triTyperGeneticalGenomicsDatasetSettings.genotypeLocation);
        if (triTyperGeneticalGenomicsDatasetSettings.expressionLocation == null) {
            triTyperGeneticalGenomicsDatasetSettings.expressionLocation = triTyperGeneticalGenomicsDatasetSettings.genotypeLocation + "ExpressionData.txt";
        }
        this.genotypeData = new TriTyperGenotypeData();
        this.genotypeData.load(triTyperGeneticalGenomicsDatasetSettings.genotypeLocation);
        THashSet<String> tHashSet = new THashSet<>();
        Boolean[] isIncluded = this.genotypeData.getIsIncluded();
        loadCouplings();
        for (Map.Entry<String, String> entry : this.genotypeToExpressionCouplings.entrySet()) {
            Integer individualId = this.genotypeData.getIndividualId(entry.getKey());
            if (individualId.intValue() != -9 && isIncluded[individualId.intValue()] != null && isIncluded[individualId.intValue()].booleanValue()) {
                tHashSet.add(entry.getValue());
            }
        }
        if (tHashSet.isEmpty()) {
            System.err.println("ERROR: none of the expression samples will be included with your current settings.\nPlease check the links between genotype and gene expression samples and/or your PhenotypeInformation.txt");
            System.exit(-1);
        }
        this.expressionData = new TriTyperExpressionData();
        this.expressionData.confineToProbes(triTyperGeneticalGenomicsDatasetSettings.tsProbesConfine);
        this.expressionData.setConfineToProbesThatMapToAnyChromosome(triTyperGeneticalGenomicsDatasetSettings.confineProbesToProbesMappingToAnyChromosome);
        this.expressionData.setConfineToProbesThatMapToChromosome(triTyperGeneticalGenomicsDatasetSettings.confineProbesToProbesThatMapToChromosome);
        this.expressionData.setIncludeIndividuals(tHashSet);
        this.expressionData.setPathwayDefinitions(pair);
        this.expressionDataLoadedCorrectly = this.expressionData.load(triTyperGeneticalGenomicsDatasetSettings.expressionLocation, triTyperGeneticalGenomicsDatasetSettings.probeannotation, triTyperGeneticalGenomicsDatasetSettings.expressionplatform, triTyperGeneticalGenomicsDatasetSettings.cisAnalysis && triTyperGeneticalGenomicsDatasetSettings.transAnalysis);
        pruneGenotypeToExpressionCouplings();
        if (triTyperGeneticalGenomicsDatasetSettings.quantilenormalize) {
            QuantileNormalization.quantilenormalize(this.expressionData.getMatrix());
        }
        if (triTyperGeneticalGenomicsDatasetSettings.logtransform) {
            Log2Transform.log2transform(this.expressionData.getMatrix());
        }
        if (triTyperGeneticalGenomicsDatasetSettings.covariateFile == null || !Gpio.exists(triTyperGeneticalGenomicsDatasetSettings.covariateFile)) {
            return;
        }
        System.out.println("Loading covariates: " + triTyperGeneticalGenomicsDatasetSettings.covariateFile);
        HashSet hashSet = new HashSet();
        hashSet.addAll(Arrays.asList(this.expressionData.getIndividuals()));
        this.covariates = new DoubleMatrixDataset<>(triTyperGeneticalGenomicsDatasetSettings.covariateFile, (Set) null, hashSet);
        if (this.covariates.colObjects.isEmpty()) {
            System.out.println("Could not find matching sample identifiers between covariate file and expression file.\nTransposing your covariate file.");
            this.covariates = new DoubleMatrixDataset<>(triTyperGeneticalGenomicsDatasetSettings.covariateFile, hashSet);
            if (this.covariates.rowObjects.isEmpty()) {
                System.err.println("Could not find matching samples between expression data and covariate data.");
                System.exit(-1);
            } else {
                this.covariates.transposeDataset();
                this.covariates.recalculateHashMaps();
            }
        }
        this.covariates.removeColumnsWithNaNs();
        this.covariates.recalculateHashMaps();
        if (this.covariates.colObjects.isEmpty()) {
            System.err.println("ERROR: after removing samples with NaN values, no covariates remain");
            System.exit(-1);
        }
        System.out.println(this.covariates.rowObjects.size() + " covariates loaded for " + this.covariates.colObjects.size() + " samples");
        this.expressionData.pruneAndReorderSamples(this.covariates.colObjects);
        loadCouplings();
        pruneGenotypeToExpressionCouplings();
    }

    public TriTyperGeneticalGenomicsDataset(TriTyperGeneticalGenomicsDatasetSettings triTyperGeneticalGenomicsDatasetSettings) throws IOException, Exception {
        this(triTyperGeneticalGenomicsDatasetSettings, null);
    }

    public TriTyperGenotypeData getGenotypeData() {
        return this.genotypeData;
    }

    public void setGenotypeData(TriTyperGenotypeData triTyperGenotypeData) {
        this.genotypeData = triTyperGenotypeData;
    }

    public TriTyperExpressionData getExpressionData() {
        return this.expressionData;
    }

    public void setExpressionData(TriTyperExpressionData triTyperExpressionData) {
        this.expressionData = triTyperExpressionData;
    }

    public THashMap<String, String> getGenotypeToExpressionCouplings() {
        return this.genotypeToExpressionCouplings;
    }

    public void setGenotypeToExpressionCouplings(THashMap<String, String> tHashMap) {
        this.genotypeToExpressionCouplings = tHashMap;
    }

    public TriTyperGeneticalGenomicsDatasetSettings getSettings() {
        return this.settings;
    }

    public void setSettings(TriTyperGeneticalGenomicsDatasetSettings triTyperGeneticalGenomicsDatasetSettings) {
        this.settings = triTyperGeneticalGenomicsDatasetSettings;
    }

    public int getTotalGGSamples() {
        return this.totalGGSamples;
    }

    private void loadCouplings() throws IOException {
        this.genotypeToExpressionCouplings = new THashMap<>();
        String str = this.settings.genotypeToExpressionCoupling;
        if (str == null || str.trim().length() <= 0) {
            Boolean[] isIncluded = this.genotypeData.getIsIncluded();
            int i = 0;
            for (String str2 : this.genotypeData.getIndividuals()) {
                if (isIncluded[i] != null && isIncluded[i].booleanValue()) {
                    if (this.genotypeToExpressionCouplings.get(str2) != null) {
                        System.out.println("ERROR: your genotype data contains duplicate individuals: " + str2);
                        System.exit(0);
                    } else {
                        this.genotypeToExpressionCouplings.put(str2, str2);
                    }
                }
                i++;
            }
            return;
        }
        if (!Gpio.exists(str)) {
            throw new IOException("Error: genotype to expression coupling file: " + str + DirectoryScanner.DOES_NOT_EXIST_POSTFIX);
        }
        TextFile textFile = new TextFile(str, false);
        String[] readLineElemsReturnReference = textFile.readLineElemsReturnReference(TextFile.tab);
        while (true) {
            String[] strArr = readLineElemsReturnReference;
            if (strArr == null) {
                textFile.close();
                return;
            }
            if (strArr.length > 1) {
                String str3 = new String(strArr[0].getBytes("UTF-8"));
                String str4 = new String(strArr[1].getBytes("UTF-8"));
                if (this.genotypeToExpressionCouplings.get(str3) != null) {
                    System.out.println("ERROR: your genotype to expression coupling file contains duplicate entries for individual: " + str3);
                    System.exit(0);
                } else {
                    this.genotypeToExpressionCouplings.put(str3, str4);
                }
            }
            readLineElemsReturnReference = textFile.readLineElemsReturnReference(TextFile.tab);
        }
    }

    public int[] getExpressionToGenotypeIdArray() {
        int[] iArr = new int[this.expressionToGenotypeIdArray.length];
        for (int i = 0; i < iArr.length; i++) {
            iArr[i] = this.expressionToGenotypeIdArray[i];
        }
        return iArr;
    }

    public short[] getExpressionToGenotypeIdArrayShort() {
        return this.expressionToGenotypeIdArray;
    }

    @Override // java.lang.Comparable
    public int compareTo(TriTyperGeneticalGenomicsDataset triTyperGeneticalGenomicsDataset) {
        return this.genotypeData.getIndividuals().length - triTyperGeneticalGenomicsDataset.getGenotypeData().getIndividuals().length;
    }

    public boolean equals(TriTyperGeneticalGenomicsDataset triTyperGeneticalGenomicsDataset) {
        return this.genotypeData.getIndividuals().length == triTyperGeneticalGenomicsDataset.getGenotypeData().getIndividuals().length;
    }

    public void permuteSampleLables(Random random) {
        ArrayList arrayList = new ArrayList();
        int length = this.expressionToGenotypeIdArray.length;
        for (int i = 0; i < length; i++) {
            if (this.expressionToGenotypeIdArray[i] != -1) {
                arrayList.add(Short.valueOf(this.expressionToGenotypeIdArray[i]));
            }
        }
        short[] sArr = new short[length];
        this.genotypeToExpressionIdArray = new short[this.genotypeData.getIndividuals().length];
        for (int i2 = 0; i2 < length; i2++) {
            if (this.expressionToGenotypeIdArray[i2] == -1) {
                sArr[i2] = -1;
            } else {
                short shortValue = ((Short) arrayList.remove((int) (random.nextDouble() * arrayList.size()))).shortValue();
                sArr[i2] = shortValue;
                this.genotypeToExpressionIdArray[shortValue] = (short) i2;
            }
        }
        this.expressionToGenotypeIdArray = sArr;
    }

    public void permuteCovariates(Random random) {
        if (this.covariates != null) {
            System.out.println("Randomizing covariates");
            for (int i = 0; i < this.covariates.nrRows; i++) {
                ArrayList arrayList = new ArrayList();
                for (int i2 = 0; i2 < this.covariates.nrRows; i2++) {
                    arrayList.add(Double.valueOf(this.covariates.rawData[i][i2]));
                }
                Collections.shuffle(arrayList, random);
                for (int i3 = 0; i3 < this.covariates.nrRows; i3++) {
                    this.covariates.rawData[i][i3] = ((Double) arrayList.get(i3)).doubleValue();
                }
            }
        }
    }

    public void resetGenotypeToExpressionCouplings() throws IOException {
        loadCouplings();
    }

    public void pruneGenotypeToExpressionCouplings() {
        String[] individuals = this.genotypeData.getIndividuals();
        Boolean[] boolArr = new Boolean[individuals.length];
        THashMap<String, String> tHashMap = new THashMap<>();
        this.totalGGSamples = (short) 0;
        for (int i = 0; i < boolArr.length; i++) {
            String str = individuals[i];
            if (this.genotypeToExpressionCouplings.containsKey(str)) {
                String str2 = this.genotypeToExpressionCouplings.get(str);
                if (str2 != null) {
                    if (this.expressionData.getIndividualId(str2).intValue() == -9) {
                        boolArr[i] = false;
                    } else {
                        boolArr[i] = true;
                        tHashMap.put(str, str2);
                        this.totalGGSamples = (short) (this.totalGGSamples + 1);
                    }
                }
            } else {
                boolArr[i] = false;
            }
        }
        this.genotypeData.setIsIncluded(boolArr);
        this.genotypeToExpressionCouplings = tHashMap;
        Set<Map.Entry<String, String>> entrySet = tHashMap.entrySet();
        this.expressionToGenotypeIdArray = new short[this.totalGGSamples];
        HashSet hashSet = new HashSet();
        for (Map.Entry<String, String> entry : entrySet) {
            Integer individualId = this.expressionData.getIndividualId(entry.getValue());
            Integer individualId2 = this.genotypeData.getIndividualId(entry.getKey());
            if (individualId.intValue() != -9 && individualId2.intValue() != -9) {
                if (hashSet.contains(individualId)) {
                    System.out.println("ERROR: your dataset contains duplicate samples!");
                } else {
                    this.expressionToGenotypeIdArray[individualId.intValue()] = individualId2.shortValue();
                    hashSet.add(individualId);
                }
            }
        }
    }

    public HashMap<Integer, Integer> getGenotypeToExpressionIdHash() {
        HashMap<Integer, Integer> hashMap = new HashMap<>();
        int i = 0;
        for (short s : this.expressionToGenotypeIdArray) {
            hashMap.put(Integer.valueOf(s), Integer.valueOf(i));
            i++;
        }
        return hashMap;
    }

    public HashMap<Integer, Integer> getExpressionToGenotypeIdHash() {
        HashMap<Integer, Integer> hashMap = new HashMap<>();
        int i = 0;
        for (short s : this.expressionToGenotypeIdArray) {
            hashMap.put(Integer.valueOf(i), Integer.valueOf(s));
            i++;
        }
        return hashMap;
    }

    public boolean isExpressionDataLoadedCorrectly() {
        return this.expressionDataLoadedCorrectly;
    }

    public void setExpressionDataLoadedCorrectly(boolean z) {
        this.expressionDataLoadedCorrectly = z;
    }

    public DoubleMatrixDataset<String, String> getCovariateData() {
        return this.covariates;
    }
}
