package eqtlmappingpipeline.interactionanalysis;

import au.com.bytecode.opencsv.CSVReader;
import au.com.bytecode.opencsv.CSVWriter;
import eqtlmappingpipeline.ase.AseConfiguration;
import gnu.trove.list.array.TDoubleArrayList;
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.Map;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.commons.cli.PosixParser;
import org.apache.commons.collections4.BidiMap;
import org.apache.commons.collections4.bidimap.DualHashBidiMap;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.math3.random.Well19937c;
import org.apache.commons.math3.stat.correlation.SpearmansCorrelation;
import org.apache.commons.math3.stat.ranking.NaNStrategy;
import org.apache.commons.math3.stat.ranking.NaturalRanking;
import org.apache.commons.math3.stat.ranking.RankingAlgorithm;
import org.apache.log4j.Logger;
import org.molgenis.genotype.Allele;
import org.molgenis.genotype.Alleles;
import org.molgenis.genotype.GenotypeDataException;
import org.molgenis.genotype.GenotypeInfo;
import org.molgenis.genotype.RandomAccessGenotypeData;
import org.molgenis.genotype.RandomAccessGenotypeDataReaderFormats;
import org.molgenis.genotype.multipart.IncompatibleMultiPartGenotypeDataException;
import org.molgenis.genotype.sampleFilter.SampleFilter;
import org.molgenis.genotype.tabix.TabixFileNotFoundException;
import org.molgenis.genotype.variant.GeneticVariant;
import org.molgenis.genotype.variantFilter.VariantFilter;
import umcg.genetica.math.matrix2.DoubleMatrixDataset;

/* loaded from: input_file:eqtlmappingpipeline/interactionanalysis/InteractionAnalysisDetermineDirection.class */
public class InteractionAnalysisDetermineDirection {
    private final RandomAccessGenotypeData genotypeData;
    private final DoubleMatrixDataset<String, String> expressionData;
    private final DoubleMatrixDataset<String, String> covariatesData;
    private final BidiMap<String, String> gte;
    private final HashMap<String, GeneticVariant> variantIdMap;
    private static final RankingAlgorithm COV_RANKER = new NaturalRanking(NaNStrategy.FAILED, new Well19937c(1));
    private static final SpearmansCorrelation spearmanCalculator = new SpearmansCorrelation();
    private static Logger LOGGER = Logger.getLogger(GenotypeInfo.class);
    private static final Options OPTIONS = new Options();

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:eqtlmappingpipeline/interactionanalysis/InteractionAnalysisDetermineDirection$EffectDiffResult.class */
    public static class EffectDiffResult {
        private final double rhoLow;
        private final double rhoHigh;

        public EffectDiffResult(double d, double d2) {
            this.rhoLow = d;
            this.rhoHigh = d2;
        }

        public double getRhoLow() {
            return this.rhoLow;
        }

        public double getRhoHigh() {
            return this.rhoHigh;
        }
    }

    public static void main(String[] strArr) throws IOException {
        RandomAccessGenotypeDataReaderFormats matchFormatToPath;
        try {
            CommandLine parse = new PosixParser().parse(OPTIONS, strArr, false);
            String[] optionValues = parse.getOptionValues("g");
            try {
                if (parse.hasOption("G")) {
                    matchFormatToPath = RandomAccessGenotypeDataReaderFormats.valueOf(parse.getOptionValue("G").toUpperCase());
                } else if (optionValues[0].endsWith(".vcf")) {
                    System.err.println("Only vcf.gz is supported. Please see manual on how to do create a vcf.gz file.");
                    System.exit(1);
                    return;
                } else {
                    try {
                        matchFormatToPath = RandomAccessGenotypeDataReaderFormats.matchFormatToPath(new String[]{optionValues[0]});
                    } catch (GenotypeDataException e) {
                        System.err.println("Unable to determine input 1 type based on specified path. Please specify --G");
                        System.exit(1);
                        return;
                    }
                }
                String optionValue = parse.getOptionValue("e");
                String optionValue2 = parse.getOptionValue("c");
                String optionValue3 = parse.getOptionValue("gte");
                String optionValue4 = parse.getOptionValue("q");
                String optionValue5 = parse.getOptionValue("o");
                double parseDouble = Double.parseDouble(parse.getOptionValue("f"));
                System.out.println("Genotype data: " + optionValues);
                System.out.println("Genotype data format: " + matchFormatToPath);
                System.out.println("Expression data: " + optionValue);
                System.out.println("Covariate data: " + optionValue2);
                System.out.println("Gte data: " + optionValue3);
                System.out.println("Query: " + optionValue4);
                System.out.println("Output: " + optionValue5);
                System.out.println("Outer fractions to use: " + parseDouble);
                try {
                    RandomAccessGenotypeData createFilteredGenotypeData = matchFormatToPath.createFilteredGenotypeData(optionValues, 100, (VariantFilter) null, (SampleFilter) null, (String) null, 0.8d);
                    System.out.println("Genotype data loaded for " + createFilteredGenotypeData.getSampleNames().length + " individuals");
                    DoubleMatrixDataset loadDoubleTextData = DoubleMatrixDataset.loadDoubleTextData(optionValue, "\t");
                    System.out.println("Loaded expression data for: " + loadDoubleTextData.rows() + " genes and " + loadDoubleTextData.columns() + " individuals");
                    DoubleMatrixDataset loadDoubleTextData2 = DoubleMatrixDataset.loadDoubleTextData(optionValue2, "\t");
                    System.out.println("Loaded covariate data for: " + loadDoubleTextData.rows() + " genes and " + loadDoubleTextData.columns() + " individuals");
                    InteractionAnalysisDetermineDirection interactionAnalysisDetermineDirection = new InteractionAnalysisDetermineDirection(createFilteredGenotypeData, loadDoubleTextData, loadDoubleTextData2, loadGte(optionValue3));
                    CSVReader cSVReader = new CSVReader(new FileReader(optionValue4), '\t', (char) 0, 1);
                    CSVWriter cSVWriter = new CSVWriter(new FileWriter(optionValue5), '\t', (char) 0);
                    String[] strArr2 = new String[6];
                    int i = 0 + 1;
                    strArr2[0] = "variant";
                    int i2 = i + 1;
                    strArr2[i] = "gene";
                    int i3 = i2 + 1;
                    strArr2[i2] = "covariate";
                    int i4 = i3 + 1;
                    strArr2[i3] = "assessedAllele";
                    int i5 = i4 + 1;
                    strArr2[i4] = "rhoLow";
                    int i6 = i5 + 1;
                    strArr2[i5] = "rhoHigh";
                    cSVWriter.writeNext(strArr2);
                    while (true) {
                        String[] readNext = cSVReader.readNext();
                        if (readNext == null) {
                            cSVWriter.close();
                            cSVReader.close();
                            System.out.println("Done");
                            return;
                        }
                        String str = readNext[0];
                        String str2 = readNext[1];
                        String str3 = readNext[2];
                        Allele create = Allele.create(readNext[3]);
                        EffectDiffResult calculateEffectDifference = interactionAnalysisDetermineDirection.calculateEffectDifference(str, str2, str3, create, parseDouble);
                        int i7 = 0 + 1;
                        strArr2[0] = str;
                        int i8 = i7 + 1;
                        strArr2[i7] = str2;
                        int i9 = i8 + 1;
                        strArr2[i8] = str3;
                        int i10 = i9 + 1;
                        strArr2[i9] = create.getAlleleAsString();
                        int i11 = i10 + 1;
                        strArr2[i10] = String.valueOf(calculateEffectDifference.getRhoLow());
                        int i12 = i11 + 1;
                        strArr2[i11] = String.valueOf(calculateEffectDifference.getRhoHigh());
                        cSVWriter.writeNext(strArr2);
                    }
                } catch (TabixFileNotFoundException e2) {
                    LOGGER.fatal("Tabix file not found for input data at: " + e2.getPath() + "\nPlease see README on how to create a tabix file");
                    System.exit(1);
                } catch (IOException e3) {
                    LOGGER.fatal("Error reading input data: " + e3.getMessage(), e3);
                    System.exit(1);
                } catch (IncompatibleMultiPartGenotypeDataException e4) {
                    LOGGER.fatal("Error combining the impute genotype data files: " + e4.getMessage(), e4);
                    System.exit(1);
                } catch (GenotypeDataException e5) {
                    LOGGER.fatal("Error reading input data: " + e5.getMessage(), e5);
                    System.exit(1);
                }
            } catch (IllegalArgumentException e6) {
                System.err.println("Error parsing --G \"" + parse.getOptionValue("G") + "\" is not a valid input data format");
                System.exit(1);
            }
        } catch (ParseException e7) {
            System.err.println("Invalid command line arguments: " + e7.getMessage());
            System.err.println();
            new HelpFormatter().printHelp(" ", OPTIONS);
            System.exit(1);
        }
    }

    private static BidiMap<String, String> loadGte(String str) throws IOException {
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(str), AseConfiguration.ENCODING));
        DualHashBidiMap dualHashBidiMap = new DualHashBidiMap();
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                return dualHashBidiMap;
            }
            String[] split = StringUtils.split(readLine, '\t');
            if (split.length != 2) {
                throw new RuntimeException("Error in GTE file line: " + readLine);
            }
            dualHashBidiMap.put(split[0], split[1]);
        }
    }

    public InteractionAnalysisDetermineDirection(RandomAccessGenotypeData randomAccessGenotypeData, DoubleMatrixDataset<String, String> doubleMatrixDataset, DoubleMatrixDataset<String, String> doubleMatrixDataset2, BidiMap<String, String> bidiMap) {
        this.genotypeData = randomAccessGenotypeData;
        this.expressionData = doubleMatrixDataset;
        this.covariatesData = doubleMatrixDataset2;
        this.gte = bidiMap;
        this.variantIdMap = randomAccessGenotypeData.getVariantIdMap();
        HashSet hashSet = new HashSet();
        Collections.addAll(hashSet, randomAccessGenotypeData.getSampleNames());
        Iterator it = bidiMap.entrySet().iterator();
        while (it.hasNext()) {
            Map.Entry entry = (Map.Entry) it.next();
            if (!hashSet.contains(entry.getKey())) {
                it.remove();
            }
            if (!doubleMatrixDataset.containsCol((Comparable) entry.getValue())) {
                it.remove();
            }
            if (!doubleMatrixDataset2.containsCol((Comparable) entry.getValue())) {
                it.remove();
            }
        }
        System.out.println("Samples with: genotypes, expression & covariate data: " + bidiMap.size());
    }

    public EffectDiffResult calculateEffectDifference(String str, String str2, String str3, Allele allele, double d) {
        if (this.variantIdMap.containsKey(str) && this.expressionData.containsRow(str2) && this.covariatesData.containsRow(str3)) {
            if (d <= 0.0d || d >= 1.0d) {
                throw new RuntimeException("Fraction must be between 0 and 1");
            }
            GeneticVariant geneticVariant = this.variantIdMap.get(str);
            Alleles variantAlleles = geneticVariant.getVariantAlleles();
            if (variantAlleles.contains(allele) && variantAlleles.getAlleleCount() == 2) {
                float[] sampleDosages = geneticVariant.getSampleDosages();
                String[] sampleNames = this.genotypeData.getSampleNames();
                LinkedHashSet linkedHashSet = new LinkedHashSet();
                TDoubleArrayList tDoubleArrayList = new TDoubleArrayList(sampleDosages.length);
                for (int i = 0; i < sampleDosages.length; i++) {
                    if (sampleDosages[i] >= 0.0f && this.gte.containsKey(sampleNames[i])) {
                        linkedHashSet.add(sampleNames[i]);
                        tDoubleArrayList.add(sampleDosages[i]);
                    }
                }
                System.out.println("Included samples: " + linkedHashSet.size());
                double[] dArr = new double[linkedHashSet.size()];
                double[] dArr2 = new double[linkedHashSet.size()];
                int i2 = 0;
                Iterator it = linkedHashSet.iterator();
                while (it.hasNext()) {
                    String str4 = (String) it.next();
                    dArr[i2] = this.expressionData.getElement(str2, (Comparable) this.gte.get(str4));
                    dArr2[i2] = this.covariatesData.getElement(str3, (Comparable) this.gte.get(str4));
                    i2++;
                }
                if (allele != variantAlleles.get(0)) {
                    for (int i3 = 0; i3 < tDoubleArrayList.size(); i3++) {
                        tDoubleArrayList.setQuick(i3, tDoubleArrayList.getQuick(i3) * (-1.0d));
                    }
                }
                double[] rank = COV_RANKER.rank(dArr2);
                int floor = (int) Math.floor(rank.length * d);
                System.out.println("Samples per group: " + floor);
                double[] dArr3 = new double[floor];
                double[] dArr4 = new double[floor];
                double[] dArr5 = new double[floor];
                double[] dArr6 = new double[floor];
                for (int i4 = 0; i4 < floor; i4++) {
                    dArr3[i4] = tDoubleArrayList.get((int) rank[i4]);
                    dArr4[i4] = dArr[(int) rank[i4]];
                    dArr5[i4] = tDoubleArrayList.get((int) rank[(rank.length - 1) - i4]);
                    dArr6[i4] = dArr[(int) rank[(rank.length - 1) - i4]];
                }
                double correlation = spearmanCalculator.correlation(dArr3, dArr4);
                double correlation2 = spearmanCalculator.correlation(dArr5, dArr6);
                System.out.println("rho low:" + correlation);
                System.out.println("rho high:" + correlation2);
                return new EffectDiffResult(correlation, correlation2);
            }
            return new EffectDiffResult(Double.NaN, Double.NaN);
        }
        return new EffectDiffResult(Double.NaN, Double.NaN);
    }

    static {
        OptionBuilder.withArgName("basePath");
        OptionBuilder.hasArgs();
        OptionBuilder.withDescription("The genotype");
        OptionBuilder.withLongOpt("genotypes");
        OptionBuilder.isRequired();
        OPTIONS.addOption(OptionBuilder.create("g"));
        OptionBuilder.withArgName("format");
        OptionBuilder.hasArg();
        OptionBuilder.withDescription("The genotype data format. If not defined will attempt to automatically select the first matching dataset on the specified path\n* PED_MAP - plink PED MAP files.\n* PLINK_BED - plink BED BIM FAM files.\n* VCF - bgziped vcf with tabix index file\n* VCFFOLDER - matches all bgziped vcf files + tabix index in a folder\n* SHAPEIT2 - shapeit2 phased haplotypes .haps & .sample\n* GEN - Oxford .gen & .sample\n* TRITYPER - TriTyper format folder");
        OptionBuilder.withLongOpt("genotypesFormat");
        OPTIONS.addOption(OptionBuilder.create("G"));
        OptionBuilder.withArgName("path");
        OptionBuilder.hasArg();
        OptionBuilder.withDescription("Expression data");
        OptionBuilder.withLongOpt("expression");
        OptionBuilder.isRequired();
        OPTIONS.addOption(OptionBuilder.create("e"));
        OptionBuilder.withArgName("path");
        OptionBuilder.hasArg();
        OptionBuilder.withDescription("Covariate data");
        OptionBuilder.withLongOpt("covariates");
        OptionBuilder.isRequired();
        OPTIONS.addOption(OptionBuilder.create("c"));
        OptionBuilder.withArgName("path");
        OptionBuilder.hasArg();
        OptionBuilder.withDescription("Genotype to expression coupling");
        OptionBuilder.withLongOpt("gte");
        OptionBuilder.isRequired();
        OPTIONS.addOption(OptionBuilder.create("gte"));
        OptionBuilder.withArgName("path");
        OptionBuilder.hasArg();
        OptionBuilder.withDescription("Query variant <tab> gene <tab> covariate <tab> assessedAllele. No header");
        OptionBuilder.withLongOpt("query");
        OptionBuilder.isRequired();
        OPTIONS.addOption(OptionBuilder.create("q"));
        OptionBuilder.withArgName("path");
        OptionBuilder.hasArg();
        OptionBuilder.withDescription("Output file");
        OptionBuilder.withLongOpt("output");
        OptionBuilder.isRequired();
        OPTIONS.addOption(OptionBuilder.create("o"));
        OptionBuilder.withArgName("double");
        OptionBuilder.hasArg();
        OptionBuilder.withDescription("Fraction of tail of either end of covarate to use.");
        OptionBuilder.withLongOpt("fraction");
        OptionBuilder.isRequired();
        OPTIONS.addOption(OptionBuilder.create("f"));
    }
}
