package eqtlmappingpipeline.normalization;

import JSci.maths.ArrayMath;
import Jama.EigenvalueDecomposition;
import cern.jet.stat.Probability;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import org.apache.commons.math3.stat.ranking.NaNStrategy;
import org.apache.commons.math3.stat.ranking.NaturalRanking;
import org.apache.commons.math3.stat.ranking.TiesStrategy;
import umcg.genetica.console.ProgressBar;
import umcg.genetica.containers.Pair;
import umcg.genetica.io.Gpio;
import umcg.genetica.io.text.TextFile;
import umcg.genetica.math.PCA;
import umcg.genetica.math.matrix.DoubleMatrixDataset;
import umcg.genetica.math.matrix.MatrixHandling;
import umcg.genetica.math.matrix.MatrixTools;
import umcg.genetica.math.stats.Descriptives;
import umcg.genetica.math.stats.Log2Transform;
import umcg.genetica.math.stats.QuantileNormalization;
import umcg.genetica.math.stats.Regression;
import umcg.genetica.math.stats.concurrent.ConcurrentCorrelation;
import umcg.genetica.math.stats.concurrent.ConcurrentCovariation;
import umcg.genetica.methylation.ConvertBetaAndMvalues;

/* loaded from: input_file:eqtlmappingpipeline/normalization/Normalizer.class */
public class Normalizer {
    NaturalRanking ranking = new NaturalRanking(NaNStrategy.FAILED, TiesStrategy.AVERAGE);

    public void normalize(String str, String str2, String str3, int i, int i2, String str4, boolean z, String str5, boolean z2, boolean z3, boolean z4, boolean z5, boolean z6, boolean z7, boolean z8, boolean z9, boolean z10, boolean z11, boolean z12) throws IOException {
        String str6;
        DoubleMatrixDataset<String, String> doubleMatrixDataset;
        System.out.println("Running normalization.");
        if (str5 != null) {
            str6 = Gpio.formatAsDirectory(str5);
            Gpio.createDir(str6);
        } else {
            str6 = Gpio.getParentDir(str) == null ? "" : Gpio.getParentDir(str) + Gpio.getFileSeparator();
        }
        String parentDir = Gpio.getParentDir(str);
        String fileName = Gpio.getFileName(str);
        if (parentDir == null) {
        }
        String str7 = str6 + (fileName.contains(".txt.gz") ? fileName.replaceAll(".txt.gz", "") : fileName.replaceAll(".txt", ""));
        HashSet hashSet = null;
        if (str3 != null) {
            hashSet = new HashSet(new TextFile(str3, false).readAsArrayList());
        }
        HashSet hashSet2 = null;
        if (str2 != null) {
            hashSet2 = new HashSet(new TextFile(str2, false).readAsArrayList());
        }
        if (hashSet == null && hashSet2 == null) {
            doubleMatrixDataset = new DoubleMatrixDataset<>(str);
        } else {
            doubleMatrixDataset = new DoubleMatrixDataset<>(str, hashSet2, hashSet);
            if (hashSet != null) {
                str7 = str7 + ".SampleSelection";
                HashSet hashSet3 = new HashSet();
                hashSet3.addAll(doubleMatrixDataset.colObjects);
                hashSet3.addAll(hashSet);
                HashSet hashSet4 = new HashSet();
                HashSet hashSet5 = new HashSet();
                Iterator it = hashSet3.iterator();
                while (it.hasNext()) {
                    String str8 = (String) it.next();
                    if (!hashSet.contains(str8)) {
                        hashSet5.add(str8);
                    }
                    if (!doubleMatrixDataset.colObjects.contains(str8)) {
                        hashSet4.add(str8);
                    }
                }
                if (!hashSet4.isEmpty()) {
                    System.err.println("\nMatrix does not contains desired columns, please check filtering list.");
                    System.err.println(hashSet4.toString() + "\n");
                } else if (!hashSet5.isEmpty()) {
                    System.err.println("\nMatrix contains unwanted columns, please check filtering list.");
                    System.err.println(hashSet5.toString() + "\n");
                }
            }
            if (hashSet2 != null) {
                str7 = str7 + ".ProbeSelection";
                HashSet hashSet6 = new HashSet();
                hashSet6.addAll(doubleMatrixDataset.rowObjects);
                hashSet6.addAll(hashSet2);
                HashSet hashSet7 = new HashSet();
                HashSet hashSet8 = new HashSet();
                Iterator it2 = hashSet6.iterator();
                while (it2.hasNext()) {
                    String str9 = (String) it2.next();
                    if (!hashSet2.contains(str9)) {
                        hashSet8.add(str9);
                    }
                    if (!doubleMatrixDataset.rowObjects.contains(str9)) {
                        hashSet7.add(str9);
                    }
                }
                if (!hashSet7.isEmpty()) {
                    System.err.println("\nMatrix does not contains desired rows, please check filtering list.");
                    System.err.println(hashSet7.toString() + "\n");
                } else if (!hashSet8.isEmpty()) {
                    System.err.println("\nMatrix contains unwanted rows, please check filtering list.");
                    System.err.println(hashSet8.toString() + "\n");
                }
            }
            doubleMatrixDataset.save(str7 + ".txt.gz");
        }
        if (doubleMatrixDataset.nrCols > 3) {
            str7 = removeProbesWithZeroVariance(doubleMatrixDataset, str7);
        }
        if (z2) {
            str7 = quantileNormalize(doubleMatrixDataset, str7, z8, z9, z10, z11);
        }
        if (z3) {
            str7 = log2transform(doubleMatrixDataset, str7);
        }
        if (z4) {
            str7 = mValueTransform(doubleMatrixDataset, str7);
        }
        if (z5) {
            str7 = centerAndScale(doubleMatrixDataset, str7);
        }
        if (z7 && str4 != null) {
            str7 = adjustCovariates(doubleMatrixDataset, str7, str4, z, 1.0E-10d);
        }
        if (z6) {
            Pair<DoubleMatrixDataset<String, String>, DoubleMatrixDataset<String, String>> calculatePCA = calculatePCA(doubleMatrixDataset, new ConcurrentCorrelation(2).pairwiseCorrelation(doubleMatrixDataset.getRawDataTransposed()), str7, null);
            if (i != 0 || i2 != 0) {
                correctDataForPCs(doubleMatrixDataset, str7, i, i2, (DoubleMatrixDataset) calculatePCA.getLeft(), (DoubleMatrixDataset) calculatePCA.getRight());
            }
        }
        if (z12) {
            forceNormalDistribution(doubleMatrixDataset, str7);
        }
    }

    public double[] forceNormal(double[] dArr) {
        double[] rank = this.ranking.rank(dArr);
        for (int i = 0; i < dArr.length; i++) {
            dArr[i] = Probability.normalInverse(((0.5d + rank[i]) - 1.0d) / rank.length);
        }
        return dArr;
    }

    public String forceNormalDistribution(DoubleMatrixDataset<String, String> doubleMatrixDataset, String str) throws IOException {
        double[][] rawData = doubleMatrixDataset.getRawData();
        for (int i = 0; i < doubleMatrixDataset.rowObjects.size(); i++) {
            rawData[i] = forceNormal(rawData[i]);
        }
        DoubleMatrixDataset doubleMatrixDataset2 = new DoubleMatrixDataset(rawData, doubleMatrixDataset.rowObjects, doubleMatrixDataset.colObjects);
        String str2 = str + ".ForcedNormal";
        doubleMatrixDataset2.save(str2 + ".txt.gz");
        return str2;
    }

    public String quantileNormalize(DoubleMatrixDataset<String, String> doubleMatrixDataset, String str, boolean z, boolean z2, boolean z3, boolean z4) throws IOException {
        double[][] rawData = doubleMatrixDataset.getRawData();
        boolean containsNaNs = MatrixTools.containsNaNs(rawData);
        if (z4 && containsNaNs) {
            System.out.println("Warning: Data already contains nulls before treating zeros as nulls.\n Later on it will not be possible to distinguish between those two!");
        }
        if (z4) {
            MatrixHandling.ReplaceZerosToNull(rawData);
            containsNaNs = MatrixTools.containsNaNs(rawData);
        }
        if (!containsNaNs) {
            QuantileNormalization.quantilenormalize(rawData);
        } else if (z2) {
            QuantileNormalization.QuantileNormAdressingNaValuesAfterInitialQN(doubleMatrixDataset, false, false, false);
        } else if (z3) {
            QuantileNormalization.QuantileNormAdressingNaValuesAfterInitialQN(doubleMatrixDataset, false, true, false);
        } else if (z && z4) {
            QuantileNormalization.QuantileNormAdressingNaValuesAfterInitialQN(doubleMatrixDataset, true, false, true);
        } else if (z) {
            QuantileNormalization.QuantileNormAdressingNaValuesAfterInitialQN(doubleMatrixDataset, true, false, false);
        } else {
            System.out.println("Warning: Your data contains missing values and missing value treatment is not selected.\nIf desired please supply additional flag: --forceMissingValues or --forceReplacementOfMissingValues");
            System.exit(0);
        }
        if (z4) {
            MatrixHandling.ReplaceNullToZero(rawData);
        }
        DoubleMatrixDataset doubleMatrixDataset2 = new DoubleMatrixDataset(rawData, doubleMatrixDataset.rowObjects, doubleMatrixDataset.colObjects);
        String str2 = str + ".QuantileNormalized";
        doubleMatrixDataset2.save(str2 + ".txt.gz");
        return str2;
    }

    public String log2transform(DoubleMatrixDataset<String, String> doubleMatrixDataset, String str) throws IOException {
        double[][] rawData = doubleMatrixDataset.getRawData();
        Log2Transform.log2transform(rawData);
        DoubleMatrixDataset doubleMatrixDataset2 = new DoubleMatrixDataset(rawData, doubleMatrixDataset.rowObjects, doubleMatrixDataset.colObjects);
        String str2 = str + ".Log2Transformed";
        doubleMatrixDataset2.save(str2 + ".txt.gz");
        return str2;
    }

    public String mValueTransform(DoubleMatrixDataset<String, String> doubleMatrixDataset, String str) throws IOException {
        double[][] rawData = doubleMatrixDataset.getRawData();
        ConvertBetaAndMvalues.transformToMvalue(rawData);
        DoubleMatrixDataset doubleMatrixDataset2 = new DoubleMatrixDataset(rawData, doubleMatrixDataset.rowObjects, doubleMatrixDataset.colObjects);
        String str2 = str + ".MvalueTransformed";
        doubleMatrixDataset2.save(str2 + ".txt.gz");
        return str2;
    }

    public String centerAndScale(DoubleMatrixDataset<String, String> doubleMatrixDataset, String str) throws IOException {
        double[][] rawData = doubleMatrixDataset.getRawData();
        System.out.println("Standardizing probe mean");
        for (int i = 0; i < doubleMatrixDataset.rowObjects.size(); i++) {
            double mean = Descriptives.mean(rawData[i]);
            for (int i2 = 0; i2 < doubleMatrixDataset.colObjects.size(); i2++) {
                double[] dArr = rawData[i];
                int i3 = i2;
                dArr[i3] = dArr[i3] - mean;
            }
        }
        doubleMatrixDataset.setRawData(rawData);
        String str2 = str + ".ProbesCentered";
        doubleMatrixDataset.save(str2 + ".txt.gz");
        System.out.println("- Standardizing sample mean and standard deviation");
        for (int i4 = 0; i4 < doubleMatrixDataset.colObjects.size(); i4++) {
            double[] dArr2 = new double[doubleMatrixDataset.rowObjects.size()];
            for (int i5 = 0; i5 < doubleMatrixDataset.rowObjects.size(); i5++) {
                dArr2[i5] = doubleMatrixDataset.getRawData()[i5][i4];
            }
            double mean2 = Descriptives.mean(dArr2);
            for (int i6 = 0; i6 < doubleMatrixDataset.rowObjects.size(); i6++) {
                int i7 = i6;
                dArr2[i7] = dArr2[i7] - mean2;
            }
            double sqrt = Math.sqrt(Descriptives.variance(dArr2, mean2));
            for (int i8 = 0; i8 < doubleMatrixDataset.rowObjects.size(); i8++) {
                doubleMatrixDataset.getRawData()[i8][i4] = dArr2[i8] / sqrt;
            }
        }
        DoubleMatrixDataset doubleMatrixDataset2 = new DoubleMatrixDataset(rawData, doubleMatrixDataset.rowObjects, doubleMatrixDataset.colObjects);
        String str3 = str2 + ".SamplesZTransformed";
        doubleMatrixDataset2.save(str3 + ".txt.gz");
        return str3;
    }

    public String adjustCovariates(DoubleMatrixDataset<String, String> doubleMatrixDataset, String str, String str2, boolean z, double d) throws IOException {
        Pair<DoubleMatrixDataset<String, String>, DoubleMatrixDataset<String, String>> loadCovariateValues = loadCovariateValues(str2, doubleMatrixDataset);
        DoubleMatrixDataset<String, String> doubleMatrixDataset2 = (DoubleMatrixDataset) loadCovariateValues.getLeft();
        DoubleMatrixDataset doubleMatrixDataset3 = (DoubleMatrixDataset) loadCovariateValues.getRight();
        doubleMatrixDataset.rawData = doubleMatrixDataset3.rawData;
        doubleMatrixDataset.colObjects = doubleMatrixDataset3.colObjects;
        doubleMatrixDataset.rowObjects = doubleMatrixDataset3.rowObjects;
        doubleMatrixDataset.recalculateHashMaps();
        System.out.println("Covariate data has " + doubleMatrixDataset2.nrRows + " rows and " + doubleMatrixDataset2.nrCols + " columns.");
        for (int i = 0; i < doubleMatrixDataset2.rowObjects.size(); i++) {
            double mean = Descriptives.mean(doubleMatrixDataset2.getRawData()[i]);
            double sqrt = Math.sqrt(Descriptives.variance(doubleMatrixDataset2.getRawData()[i], mean));
            for (int i2 = 0; i2 < doubleMatrixDataset2.colObjects.size(); i2++) {
                double[] dArr = doubleMatrixDataset2.getRawData()[i];
                int i3 = i2;
                dArr[i3] = dArr[i3] - mean;
                double[] dArr2 = doubleMatrixDataset2.getRawData()[i];
                int i4 = i2;
                dArr2[i4] = dArr2[i4] / sqrt;
            }
        }
        double[][] pairwiseCovariation = new ConcurrentCovariation(2).pairwiseCovariation(doubleMatrixDataset2.getRawData());
        doubleMatrixDataset2.transposeDataset();
        DoubleMatrixDataset doubleMatrixDataset4 = (DoubleMatrixDataset) calculatePCA(doubleMatrixDataset2, pairwiseCovariation, str2, null).getLeft();
        doubleMatrixDataset4.transposeDataset();
        double[][] rawData = doubleMatrixDataset4.getRawData();
        System.out.println(doubleMatrixDataset4.nrRows + " covariates finally loaded.");
        double[] dArr3 = new double[rawData.length];
        System.out.println("Loading eigenvalues from: " + str2 + ".PCAOverSamplesEigenvalues.txt.gz");
        TextFile textFile = new TextFile(str2 + ".PCAOverSamplesEigenvalues.txt.gz", false);
        textFile.readLine();
        String[] readLineElems = textFile.readLineElems(TextFile.tab);
        while (true) {
            String[] strArr = readLineElems;
            if (strArr == null) {
                break;
            }
            if (strArr.length > 2) {
                int parseInt = Integer.parseInt(strArr[0]);
                double parseDouble = Double.parseDouble(strArr[1]);
                dArr3[parseInt - 1] = parseDouble;
                System.out.println(parseInt + "\t" + parseDouble);
            }
            readLineElems = textFile.readLineElems(TextFile.tab);
        }
        textFile.close();
        double[][] rawData2 = doubleMatrixDataset.getRawData();
        for (int i5 = 0; i5 < rawData.length; i5++) {
            if (dArr3 == null || dArr3[i5] > d) {
                correctForCovariate(rawData2, rawData, i5);
            } else {
                System.out.println("Not regressing covariate: " + i5 + " because explained variance < " + d + ": " + dArr3[i5]);
            }
        }
        doubleMatrixDataset.rawData = rawData2;
        String str3 = str + ".CovariatesRemoved";
        doubleMatrixDataset.save(str3 + ".txt.gz");
        return str3;
    }

    private double[][] correlateSamples(DoubleMatrixDataset<String, String> doubleMatrixDataset) {
        double[][] dArr = new double[doubleMatrixDataset.colObjects.size()][doubleMatrixDataset.colObjects.size()];
        double size = doubleMatrixDataset.rowObjects.size() - 1;
        ProgressBar progressBar = new ProgressBar(doubleMatrixDataset.colObjects.size(), "- Calculating correlations: " + doubleMatrixDataset.colObjects.size() + " x " + doubleMatrixDataset.colObjects.size());
        for (int i = 0; i < doubleMatrixDataset.colObjects.size(); i++) {
            for (int i2 = i; i2 < doubleMatrixDataset.colObjects.size(); i2++) {
                double d = 0.0d;
                for (int i3 = 0; i3 < doubleMatrixDataset.rowObjects.size(); i3++) {
                    d += doubleMatrixDataset.getRawData()[i3][i] * doubleMatrixDataset.getRawData()[i3][i2];
                }
                double d2 = d / size;
                dArr[i][i2] = d2;
                dArr[i2][i] = d2;
            }
            progressBar.iterate();
        }
        progressBar.close();
        return dArr;
    }

    public double[][] correlateProbes(DoubleMatrixDataset<String, String> doubleMatrixDataset) {
        double[][] dArr = new double[doubleMatrixDataset.rowObjects.size()][doubleMatrixDataset.rowObjects.size()];
        double size = doubleMatrixDataset.rowObjects.size() - 1;
        ProgressBar progressBar = new ProgressBar(doubleMatrixDataset.rowObjects.size(), "- Calculating correlations: " + doubleMatrixDataset.rowObjects.size() + " x " + doubleMatrixDataset.rowObjects.size());
        for (int i = 0; i < doubleMatrixDataset.rowObjects.size(); i++) {
            for (int i2 = i; i2 < doubleMatrixDataset.rowObjects.size(); i2++) {
                double d = 0.0d;
                for (int i3 = 0; i3 < doubleMatrixDataset.rowObjects.size(); i3++) {
                    d += doubleMatrixDataset.getRawData()[i3][i] * doubleMatrixDataset.getRawData()[i3][i2];
                }
                double d2 = d / size;
                dArr[i][i2] = d2;
                dArr[i2][i] = d2;
                System.out.println(i + "\t" + i2 + "\t" + d2);
            }
            progressBar.iterate();
        }
        progressBar.close();
        return dArr;
    }

    public Pair<DoubleMatrixDataset<String, String>, DoubleMatrixDataset<String, String>> calculatePCA(DoubleMatrixDataset<String, String> doubleMatrixDataset, double[][] dArr, String str, Integer num) throws IOException {
        System.out.println("Calculating PCA over file: " + str);
        System.out.println("- Performing PCA over correlation matrix of size: " + dArr.length + "x" + dArr.length);
        EigenvalueDecomposition eigenValueDecomposition = PCA.eigenValueDecomposition(dArr);
        if (num == null || num.intValue() > doubleMatrixDataset.colObjects.size()) {
            num = Integer.valueOf(doubleMatrixDataset.colObjects.size());
        } else if (num.intValue() < 1) {
            throw new IllegalArgumentException("Number of PCs to calculate should be at least 1");
        }
        DoubleMatrixDataset doubleMatrixDataset2 = new DoubleMatrixDataset(doubleMatrixDataset.colObjects.size(), num.intValue());
        doubleMatrixDataset2.rowObjects = doubleMatrixDataset.colObjects;
        double[] realEigenvalues = eigenValueDecomposition.getRealEigenvalues();
        System.out.println("Eigenvalue results:");
        System.out.println("PCA\tPCANr\tEigenValue\tExplainedVariance\tTotalExplainedVariance");
        TextFile textFile = new TextFile(str + ".PCAOverSamplesEigenvalues.txt.gz", true);
        double d = 0.0d;
        textFile.writeln("PCA\tPCANr\tEigenValue\tExplainedVariance\tTotalExplainedVariance");
        for (int i = 0; i < num.intValue(); i++) {
            double eigenValueVar = PCA.getEigenValueVar(realEigenvalues, i);
            double[] eigenVector = PCA.getEigenVector(eigenValueDecomposition, realEigenvalues, i);
            for (int i2 = 0; i2 < doubleMatrixDataset.colObjects.size(); i2++) {
                doubleMatrixDataset2.getRawData()[i2][i] = eigenVector[i2];
            }
            int i3 = i + 1;
            d += eigenValueVar;
            textFile.write(i3 + "\t" + realEigenvalues[(realEigenvalues.length - 1) - i] + "\t" + eigenValueVar + "\t" + d + "\n");
            doubleMatrixDataset2.colObjects.set(i, "Comp" + String.valueOf(i3));
            System.out.println("PCA:\t" + i3 + "\t" + realEigenvalues[(realEigenvalues.length - 1) - i] + "\t" + eigenValueVar + "\t" + d);
        }
        textFile.close();
        doubleMatrixDataset2.save(str + ".PCAOverSamplesEigenvectors.txt.gz");
        doubleMatrixDataset2.transposeDataset();
        doubleMatrixDataset2.save(str + ".PCAOverSamplesEigenvectorsTransposed.txt.gz");
        doubleMatrixDataset2.transposeDataset();
        System.out.println("Calculating PCs");
        System.out.println("Initializing PCA matrix");
        DoubleMatrixDataset doubleMatrixDataset3 = new DoubleMatrixDataset(doubleMatrixDataset.rowObjects.size(), num.intValue());
        doubleMatrixDataset3.rowObjects = doubleMatrixDataset.rowObjects;
        for (int i4 = 0; i4 < num.intValue(); i4++) {
            doubleMatrixDataset3.colObjects.set(i4, "Comp" + String.valueOf(i4 + 1));
        }
        for (int i5 = 0; i5 < doubleMatrixDataset.rowObjects.size(); i5++) {
            for (int i6 = 0; i6 < num.intValue(); i6++) {
                doubleMatrixDataset3.getRawData()[i5][i6] = 0.0d;
            }
        }
        ProgressBar progressBar = new ProgressBar(doubleMatrixDataset.rowObjects.size(), "Calculating the PCA scores per probe: ");
        for (int i7 = 0; i7 < doubleMatrixDataset.rowObjects.size(); i7++) {
            for (int i8 = 0; i8 < num.intValue(); i8++) {
                for (int i9 = 0; i9 < doubleMatrixDataset.colObjects.size(); i9++) {
                    double d2 = doubleMatrixDataset2.getRawData()[i9][i8];
                    double[] dArr2 = doubleMatrixDataset3.getRawData()[i7];
                    int i10 = i8;
                    dArr2[i10] = dArr2[i10] + (d2 * doubleMatrixDataset.getRawData()[i7][i9]);
                }
            }
            progressBar.iterate();
        }
        progressBar.close();
        String str2 = str + ".PCAOverSamplesPrincipalComponents.txt.gz";
        System.out.println("Saving PCA scores: " + str2);
        doubleMatrixDataset3.save(str2);
        return new Pair<>(doubleMatrixDataset3, doubleMatrixDataset2);
    }

    public void correctDataForPCs(DoubleMatrixDataset<String, String> doubleMatrixDataset, String str, int i, int i2, DoubleMatrixDataset<String, String> doubleMatrixDataset2, DoubleMatrixDataset<String, String> doubleMatrixDataset3) throws IOException {
        System.out.println("\nInitializing residual gene expression matrix");
        if (doubleMatrixDataset.colObjects.size() < i) {
            i = doubleMatrixDataset.colObjects.size() - (doubleMatrixDataset.colObjects.size() % i2);
        }
        for (int i3 = 0; i3 < i; i3++) {
            for (int i4 = 0; i4 < doubleMatrixDataset.rowObjects.size(); i4++) {
                for (int i5 = 0; i5 < doubleMatrixDataset.colObjects.size(); i5++) {
                    double[] dArr = doubleMatrixDataset.getRawData()[i4];
                    int i6 = i5;
                    dArr[i6] = dArr[i6] - (doubleMatrixDataset2.getRawData()[i4][i3] * doubleMatrixDataset3.getRawData()[i5][i3]);
                }
            }
            int i7 = i3 + 1;
            if (i2 > 0 && i7 % i2 == 0) {
                doubleMatrixDataset.save(str + "." + i7 + "PCAsOverSamplesRemoved.txt.gz");
                System.out.println("Removed\t" + i7 + "\tPCs. File:\t" + str + "." + i7 + "PCAsOverSamplesRemoved.txt.gz");
            }
        }
        doubleMatrixDataset.save(str + "." + i + "PCAsOverSamplesRemoved.txt.gz");
    }

    public void repeatPCAOmitCertainPCAs(HashSet<Integer> hashSet, String str, String str2, int i, int i2) throws IOException {
        System.out.println("Will write output to: " + str);
        String[] listOfFiles = Gpio.getListOfFiles(str);
        File file = new File(str2);
        String str3 = str + Gpio.getFileSeparator();
        String name = file.getName();
        String[] split = name.split("\\.");
        String str4 = null;
        String str5 = null;
        if (name.contains("PCAsOverSamplesRemoved")) {
            StringBuilder sb = new StringBuilder();
            sb.append(split[0]);
            for (int i3 = 1; i3 < split.length; i3++) {
                if (!split[i3].contains("PCAsOverSamplesRemoved")) {
                    sb.append(".").append(split[i3]);
                }
            }
            name = sb.toString();
        }
        for (String str6 : listOfFiles) {
            if (str6.toLowerCase().contains("pcaoversampleseigenvectors.")) {
                str4 = str3 + "" + str6;
            } else if (str6.toLowerCase().contains("pcaoversamplesprincipalcomponents")) {
                str5 = str3 + "" + str6;
            }
        }
        boolean z = true;
        if (str4 == null) {
            System.err.println("Could not find file containing 'PCAOverSamplesEigenvectors' in directory: " + str3);
            z = false;
        }
        if (str4 == null) {
            System.err.println("Could not find file containing 'PCAOverSamplesPrincipalComponents' in directory: " + str3);
            z = false;
        }
        if (!z) {
            System.exit(0);
        }
        System.out.println("Detected core file name to be: " + name);
        DoubleMatrixDataset doubleMatrixDataset = new DoubleMatrixDataset(str3 + name);
        DoubleMatrixDataset doubleMatrixDataset2 = new DoubleMatrixDataset(str5);
        DoubleMatrixDataset doubleMatrixDataset3 = new DoubleMatrixDataset(str4);
        if (doubleMatrixDataset.colObjects.size() < i) {
            i = doubleMatrixDataset.colObjects.size() - (doubleMatrixDataset.colObjects.size() % i2);
        }
        if (name.endsWith(".txt")) {
            name = name.substring(0, name.length() - 4);
        } else if (name.endsWith(".txt.gz")) {
            name = name.substring(0, name.length() - 7);
        }
        for (int i4 = 0; i4 < i; i4++) {
            if (hashSet.contains(Integer.valueOf(i4 + 1))) {
                System.out.println("Omitting PCA: " + (i4 + 1) + " since this component is under genetic control");
            } else {
                for (int i5 = 0; i5 < doubleMatrixDataset.rowObjects.size(); i5++) {
                    for (int i6 = 0; i6 < doubleMatrixDataset.colObjects.size(); i6++) {
                        double[] dArr = doubleMatrixDataset.getRawData()[i5];
                        int i7 = i6;
                        dArr[i7] = dArr[i7] - (doubleMatrixDataset2.getRawData()[i5][i4] * doubleMatrixDataset3.getRawData()[i6][i4]);
                    }
                }
            }
            int i8 = i4 + 1;
            if (i2 > 0 && i8 % i2 == 0) {
                doubleMatrixDataset.save(str3 + name + "." + i8 + "PCAsOverSamplesRemoved-GeneticVectorsNotRemoved.txt.gz");
                System.out.println("Removed\t" + i8 + "\tPCs. File:\t" + name + "." + i8 + "PCAsOverSamplesRemoved-GeneticVectorsNotRemoved.txt.gz");
            }
        }
        doubleMatrixDataset.save(str3 + name + "." + i + "PCAsOverSamplesRemoved-GeneticVectorsNotRemoved.txt.gz");
        System.out.println("Done\n");
    }

    private void correctForCovariate(double[][] dArr, double[][] dArr2, int i) {
        for (int i2 = 0; i2 < dArr.length; i2++) {
            double[] dArr3 = dArr[i2];
            double mean = ArrayMath.mean(dArr3);
            double variance = ArrayMath.variance(dArr3);
            double[] dArr4 = dArr2[i];
            double[] linearRegressionCoefficients = Regression.getLinearRegressionCoefficients(dArr4, dArr3);
            double correlation = ArrayMath.correlation(dArr4, dArr3);
            if ((correlation * correlation) - (1.0d / dArr3.length) < 0.0d) {
            }
            double[] dArr5 = new double[dArr4.length];
            for (int i3 = 0; i3 < dArr4.length; i3++) {
                dArr5[i3] = dArr3[i3] - (dArr4[i3] * linearRegressionCoefficients[0]);
            }
            double mean2 = ArrayMath.mean(dArr5);
            double standardDeviation = ArrayMath.standardDeviation(dArr5) / Math.sqrt(variance);
            for (int i4 = 0; i4 < dArr4.length; i4++) {
                int i5 = i4;
                dArr5[i5] = dArr5[i5] - mean2;
                int i6 = i4;
                dArr5[i6] = dArr5[i6] / standardDeviation;
                int i7 = i4;
                dArr5[i7] = dArr5[i7] + mean;
            }
            System.arraycopy(dArr5, 0, dArr[i2], 0, dArr4.length);
        }
    }

    private Pair<DoubleMatrixDataset<String, String>, DoubleMatrixDataset<String, String>> loadCovariateValues(String str, DoubleMatrixDataset<String, String> doubleMatrixDataset) throws IOException {
        System.out.println("- Removing covariates as defined in: " + str);
        TextFile textFile = new TextFile(str, false);
        int countLines = textFile.countLines() - 1;
        int countCols = textFile.countCols(TextFile.tab) - 1;
        if (countLines == 0 || countCols == 0) {
            System.err.println("Covariate file is empty, but no covariates found in file! Is your file format correct?");
            System.err.println("The program is expecting the following: tab separated, one covariate per row, one sample per column, with sample identifiers identical to your --in file.");
            System.exit(0);
        } else {
            System.out.println("Covariate file has " + countLines + " rows and " + countCols + " columns");
        }
        HashMap hashMap = new HashMap();
        String[] strArr = (String[]) doubleMatrixDataset.colObjects.toArray(new String[0]);
        for (int i = 0; i < strArr.length; i++) {
            hashMap.put(strArr[i], Integer.valueOf(i));
        }
        String[] readLineElemsReturnReference = textFile.readLineElemsReturnReference(TextFile.tab);
        int i2 = 0;
        boolean[] zArr = new boolean[doubleMatrixDataset.colObjects.size()];
        ArrayList arrayList = new ArrayList();
        for (int i3 = 1; i3 < readLineElemsReturnReference.length; i3++) {
            Integer num = (Integer) hashMap.get(readLineElemsReturnReference[i3]);
            arrayList.add(readLineElemsReturnReference[i3]);
            if (num != null) {
                zArr[num.intValue()] = true;
                i2++;
            }
        }
        ArrayList arrayList2 = new ArrayList();
        String[] readLineElemsReturnReference2 = textFile.readLineElemsReturnReference(TextFile.tab);
        while (true) {
            String[] strArr2 = readLineElemsReturnReference2;
            if (strArr2 == null) {
                break;
            }
            arrayList2.add(strArr2[0]);
            readLineElemsReturnReference2 = textFile.readLineElemsReturnReference(TextFile.tab);
        }
        textFile.close();
        boolean z = false;
        if (i2 == 0) {
            System.err.println("No matching samples detected between covariate file and dataset. Maybe your covariate file needs to be transposed? Will test that for you now:");
            Iterator it = arrayList2.iterator();
            while (it.hasNext()) {
                Integer num2 = (Integer) hashMap.get((String) it.next());
                if (num2 != null) {
                    zArr[num2.intValue()] = true;
                    i2++;
                }
            }
            if (i2 == 0) {
                System.err.println("Transposing the data does not seem to resolve the issue. Please check your sample identifiers.");
                System.exit(0);
            } else {
                System.out.println("Transposing the covariate file reveals: " + i2 + " samples present.");
                z = true;
            }
        }
        if (i2 < doubleMatrixDataset.colObjects.size()) {
            System.err.println("Covariates loaded from: " + str + ", but not all samples present in covariates file! " + i2 + " present in covariates file, out of " + doubleMatrixDataset.colObjects.size() + " in dataset...");
            System.out.println("Your dataset will be adjusted accordingly.");
        }
        int i4 = countLines;
        if (z) {
            i4 = countCols;
        }
        double[][] dArr = new double[i4][doubleMatrixDataset.colObjects.size()];
        for (int i5 = 0; i5 < dArr.length; i5++) {
            for (int i6 = 0; i6 < dArr[i5].length; i6++) {
                dArr[i5][i6] = Double.NaN;
            }
        }
        int i7 = 0;
        textFile.open();
        String[] readLineElemsReturnReference3 = textFile.readLineElemsReturnReference(TextFile.tab);
        String[] readLineElemsReturnReference4 = textFile.readLineElemsReturnReference(TextFile.tab);
        while (readLineElemsReturnReference4 != null) {
            if (z) {
                Integer num3 = (Integer) hashMap.get(readLineElemsReturnReference4[0]);
                if (num3 != null) {
                    for (int i8 = 1; i8 < readLineElemsReturnReference4.length; i8++) {
                        try {
                            dArr[i8 - 1][num3.intValue()] = Double.parseDouble(readLineElemsReturnReference4[i8]);
                        } catch (NumberFormatException e) {
                        }
                    }
                }
            } else {
                for (int i9 = 1; i9 < readLineElemsReturnReference4.length; i9++) {
                    Integer num4 = (Integer) hashMap.get(readLineElemsReturnReference3[i9]);
                    if (num4 != null) {
                        try {
                            dArr[i7][num4.intValue()] = Double.parseDouble(readLineElemsReturnReference4[i9]);
                        } catch (NumberFormatException e2) {
                        }
                    }
                }
            }
            readLineElemsReturnReference4 = textFile.readLineElemsReturnReference(TextFile.tab);
            i7++;
        }
        textFile.close();
        int i10 = 0;
        boolean[] zArr2 = new boolean[dArr.length];
        for (int i11 = 0; i11 < dArr.length; i11++) {
            int i12 = 0;
            for (int i13 = 0; i13 < dArr[i11].length; i13++) {
                if (!Double.isNaN(dArr[i11][i13])) {
                    i12++;
                }
            }
            if (i12 == 0) {
                zArr2[i11] = false;
            } else {
                zArr2[i11] = true;
                i10++;
            }
        }
        if (i10 == 0) {
            System.err.println("ERROR: none of your covariates seem to have valid numerical values.. Please check your covariate file.");
            System.exit(0);
        } else {
            System.out.println("After removing covariates without data, your dataset will have " + i10 + " covariates (out of: " + dArr.length + ") .");
        }
        ArrayList arrayList3 = z ? arrayList : arrayList2;
        if (i10 != dArr.length) {
            System.out.println("Removing covariates that have no data at all.");
            double[][] dArr2 = new double[i10][doubleMatrixDataset.colObjects.size()];
            ArrayList arrayList4 = new ArrayList();
            int i14 = 0;
            for (int i15 = 0; i15 < dArr.length; i15++) {
                if (zArr2[i15]) {
                    arrayList4.add(arrayList3.get(i15));
                    for (int i16 = 0; i16 < dArr[i15].length; i16++) {
                        dArr2[i14][i16] = dArr[i15][i16];
                        if (Double.isNaN(dArr[i15][i16])) {
                            zArr[i16] = false;
                        }
                    }
                    i14++;
                } else {
                    System.out.println(((String) arrayList3.get(i15)) + " removed.");
                }
            }
            i4 = i14;
            dArr = dArr2;
            arrayList3 = arrayList4;
        }
        System.out.println("");
        System.out.println("Remaining covariates: ");
        Iterator it2 = arrayList3.iterator();
        while (it2.hasNext()) {
            System.out.println((String) it2.next());
        }
        System.out.println("");
        for (int i17 = 0; i17 < dArr.length; i17++) {
            for (int i18 = 0; i18 < dArr[i17].length; i18++) {
                if (Double.isNaN(dArr[i17][i18])) {
                    zArr[i18] = false;
                }
            }
        }
        int i19 = 0;
        for (boolean z2 : zArr) {
            if (z2) {
                i19++;
            }
        }
        System.out.println("Your covariate corrected dataset will have " + i19 + " samples, after removing samples with missing covariate values.");
        double[][] rawData = doubleMatrixDataset.getRawData();
        double[][] dArr3 = new double[rawData.length][i19];
        double[][] dArr4 = new double[i4][i19];
        ArrayList arrayList5 = new ArrayList();
        for (int i20 = 0; i20 < doubleMatrixDataset.colObjects.size(); i20++) {
            if (zArr[i20]) {
                arrayList5.add(doubleMatrixDataset.colObjects.get(i20));
            }
        }
        for (int i21 = 0; i21 < rawData.length; i21++) {
            int i22 = 0;
            for (int i23 = 0; i23 < doubleMatrixDataset.colObjects.size(); i23++) {
                if (zArr[i23]) {
                    dArr3[i21][i22] = rawData[i21][i23];
                    i22++;
                }
            }
        }
        for (int i24 = 0; i24 < dArr.length; i24++) {
            int i25 = 0;
            for (int i26 = 0; i26 < doubleMatrixDataset.colObjects.size(); i26++) {
                if (zArr[i26]) {
                    dArr4[i24][i25] = dArr[i24][i26];
                    i25++;
                }
            }
        }
        DoubleMatrixDataset doubleMatrixDataset2 = new DoubleMatrixDataset(dArr4, arrayList3, arrayList5);
        doubleMatrixDataset2.save(str + "-asLoadedByNormalizer.txt");
        DoubleMatrixDataset doubleMatrixDataset3 = new DoubleMatrixDataset(dArr3, doubleMatrixDataset.rowObjects, arrayList5);
        doubleMatrixDataset3.save(doubleMatrixDataset.fileName + "-SampleSizeCorrectedForCovariates.txt");
        return new Pair<>(doubleMatrixDataset2, doubleMatrixDataset3);
    }

    private String removeProbesWithZeroVariance(DoubleMatrixDataset<String, String> doubleMatrixDataset, String str) throws IOException {
        boolean[] zArr = new boolean[doubleMatrixDataset.nrRows];
        int i = 0;
        for (int i2 = 0; i2 < doubleMatrixDataset.nrRows; i2++) {
            if (ArrayMath.variance(doubleMatrixDataset.rawData[i2]) == 0.0d) {
                System.out.println("Removing probe with zero variance: " + ((String) doubleMatrixDataset.rowObjects.get(i2)) + " on line " + (i2 + 1));
                i++;
                zArr[i2] = true;
            }
        }
        if (i <= 0) {
            return str;
        }
        int i3 = doubleMatrixDataset.nrRows - i;
        if (i3 == 0) {
            System.err.println("ERROR: all probes have zero variance!");
            System.exit(-1);
        }
        double[][] dArr = new double[i3][doubleMatrixDataset.nrCols];
        int i4 = 0;
        ArrayList arrayList = new ArrayList();
        for (int i5 = 0; i5 < doubleMatrixDataset.nrRows; i5++) {
            if (!zArr[i5]) {
                dArr[i4] = doubleMatrixDataset.rawData[i5];
                arrayList.add(doubleMatrixDataset.rowObjects.get(i5));
                i4++;
            }
        }
        doubleMatrixDataset.rawData = dArr;
        doubleMatrixDataset.rowObjects = arrayList;
        doubleMatrixDataset.recalculateHashMaps();
        String str2 = str + ".ProbesWithZeroVarianceRemoved";
        doubleMatrixDataset.save(str2 + ".txt.gz");
        return str2;
    }
}
