package nl.umcg.deelenp.genotypeharmonizer;

import JSci.maths.ArrayMath;
import com.google.common.collect.Lists;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import java.util.TreeMap;
import nl.umcg.deelenp.genotypeharmonizer.SnpLogWriter;
import org.apache.log4j.Logger;
import org.molgenis.genotype.RandomAccessGenotypeData;
import org.molgenis.genotype.modifiable.ModifiableGeneticVariant;
import org.molgenis.genotype.modifiable.ModifiableGenotypeData;
import org.molgenis.genotype.modifiable.ModifiableGenotypeDataInMemory;
import org.molgenis.genotype.util.Ld;
import org.molgenis.genotype.util.LdCalculator;
import org.molgenis.genotype.util.LdCalculatorException;
import org.molgenis.genotype.variant.GeneticVariant;

/* loaded from: input_file:nl/umcg/deelenp/genotypeharmonizer/Aligner.class */
public class Aligner {
    private static Logger LOGGER = Logger.getLogger(GenotypeHarmonizer.class);

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:nl/umcg/deelenp/genotypeharmonizer/Aligner$CorrelationResults.class */
    public static class CorrelationResults {
        private final int posCor;
        private final int negCor;

        public CorrelationResults(int i, int i2) {
            this.posCor = i;
            this.negCor = i2;
        }

        public int getPosCor() {
            return this.posCor;
        }

        public int getNegCor() {
            return this.negCor;
        }

        public int getTotalCor() {
            return getPosCor() + getNegCor();
        }
    }

    public ModifiableGenotypeData alignToRef(RandomAccessGenotypeData randomAccessGenotypeData, RandomAccessGenotypeData randomAccessGenotypeData2, double d, double d2, int i, boolean z, boolean z2, boolean z3, File file, double d3, File file2) throws LdCalculatorException, IOException, GenotypeAlignmentException {
        ModifiableGenotypeDataInMemory modifiableGenotypeDataInMemory = new ModifiableGenotypeDataInMemory(randomAccessGenotypeData);
        ArrayList<ModifiableGeneticVariant> arrayList = new ArrayList<>();
        ArrayList<GeneticVariant> arrayList2 = new ArrayList<>();
        BufferedWriter bufferedWriter = null;
        if (z2) {
            bufferedWriter = new BufferedWriter(new FileWriter(file));
            bufferedWriter.append((CharSequence) "chr\tpos\toriginalId\tnewId\n");
        }
        SnpLogWriter snpLogWriter = new SnpLogWriter(file2);
        int i2 = 0;
        int i3 = 0;
        int i4 = 0;
        for (ModifiableGeneticVariant modifiableGeneticVariant : modifiableGenotypeDataInMemory.getModifiableGeneticVariants()) {
            i2++;
            if (i2 % 10000 == 0) {
                System.out.println("Iteration 1 - " + GenotypeHarmonizer.DEFAULT_NUMBER_FORMATTER.format(i2) + " variants processed");
            }
            if (!modifiableGeneticVariant.isMapped()) {
                snpLogWriter.addToLog(modifiableGeneticVariant, SnpLogWriter.Actions.EXCLUDED, "No mapping");
                modifiableGeneticVariant.exclude();
            } else if (modifiableGeneticVariant.getStartPos() == 0) {
                snpLogWriter.addToLog(modifiableGeneticVariant, SnpLogWriter.Actions.EXCLUDED, "No mapping");
                modifiableGeneticVariant.exclude();
            } else if (!modifiableGeneticVariant.isSnp()) {
                snpLogWriter.addToLog(modifiableGeneticVariant, SnpLogWriter.Actions.EXCLUDED, "Not a SNP");
                modifiableGeneticVariant.exclude();
            } else if (modifiableGeneticVariant.isBiallelic()) {
                Iterator it = randomAccessGenotypeData2.getVariantsByPos(modifiableGeneticVariant.getSequenceName(), modifiableGeneticVariant.getStartPos()).iterator();
                GeneticVariant geneticVariant = null;
                if (it.hasNext()) {
                    ArrayList newArrayList = Lists.newArrayList(it);
                    Iterator it2 = newArrayList.iterator();
                    while (true) {
                        if (it2.hasNext()) {
                            GeneticVariant geneticVariant2 = (GeneticVariant) it2.next();
                            if (geneticVariant2.getVariantId().isSameId(modifiableGeneticVariant.getVariantId())) {
                                if (!geneticVariant2.getVariantAlleles().sameAlleles(modifiableGeneticVariant.getVariantAlleles()) && !geneticVariant2.getVariantAlleles().sameAlleles(modifiableGeneticVariant.getVariantAlleles().getComplement())) {
                                    snpLogWriter.addToLog(modifiableGeneticVariant, SnpLogWriter.Actions.EXCLUDED, "Found variant with same ID but alleles are not comparable");
                                    modifiableGeneticVariant.exclude();
                                    break;
                                }
                                geneticVariant = geneticVariant2;
                            }
                        } else {
                            if (geneticVariant == null) {
                                Iterator it3 = newArrayList.iterator();
                                while (true) {
                                    if (it3.hasNext()) {
                                        GeneticVariant geneticVariant3 = (GeneticVariant) it3.next();
                                        if (geneticVariant3.getVariantAlleles().sameAlleles(modifiableGeneticVariant.getVariantAlleles()) || geneticVariant3.getVariantAlleles().sameAlleles(modifiableGeneticVariant.getVariantAlleles().getComplement())) {
                                            if (geneticVariant != null) {
                                                snpLogWriter.addToLog(modifiableGeneticVariant, SnpLogWriter.Actions.EXCLUDED, "Position maps to multiple variants with same alleles. Neither of these variants have same ID as this variant. No way to know what the corresponding variant is");
                                                modifiableGeneticVariant.exclude();
                                                break;
                                            }
                                            geneticVariant = geneticVariant3;
                                        }
                                    } else if (geneticVariant == null) {
                                        if (!z3) {
                                            snpLogWriter.addToLog(modifiableGeneticVariant, SnpLogWriter.Actions.EXCLUDED, "No variant in the reference at this position with same ID or same alleles");
                                            modifiableGeneticVariant.exclude();
                                        }
                                    }
                                }
                            }
                            if (z2 && geneticVariant.getPrimaryVariantId() != null && (modifiableGeneticVariant.getPrimaryVariantId() == null || !modifiableGeneticVariant.getPrimaryVariantId().equals(geneticVariant.getPrimaryVariantId()))) {
                                bufferedWriter.append((CharSequence) modifiableGeneticVariant.getSequenceName());
                                bufferedWriter.append('\t');
                                bufferedWriter.append((CharSequence) String.valueOf(modifiableGeneticVariant.getStartPos()));
                                bufferedWriter.append('\t');
                                bufferedWriter.append((CharSequence) modifiableGeneticVariant.getPrimaryVariantId());
                                bufferedWriter.append('\t');
                                bufferedWriter.append((CharSequence) geneticVariant.getPrimaryVariantId());
                                bufferedWriter.append('\n');
                                LOGGER.debug("Updating primary variant ID of " + modifiableGeneticVariant.getPrimaryVariantId() + " to: " + geneticVariant.getPrimaryVariantId());
                                modifiableGeneticVariant.updatePrimaryId(geneticVariant.getPrimaryVariantId());
                            }
                            if (!modifiableGeneticVariant.isAtOrGcSnp()) {
                                i3++;
                                if (!modifiableGeneticVariant.getVariantAlleles().sameAlleles(geneticVariant.getVariantAlleles())) {
                                    i4++;
                                    modifiableGeneticVariant.swap();
                                    snpLogWriter.addToLog(modifiableGeneticVariant, SnpLogWriter.Actions.SWAPPED, "");
                                }
                            }
                            arrayList.add(modifiableGeneticVariant);
                            arrayList2.add(geneticVariant);
                        }
                    }
                } else if (!z3) {
                    snpLogWriter.addToLog(modifiableGeneticVariant, SnpLogWriter.Actions.EXCLUDED, "No variants at this position in reference");
                    modifiableGeneticVariant.exclude();
                }
            } else {
                snpLogWriter.addToLog(modifiableGeneticVariant, SnpLogWriter.Actions.EXCLUDED, "Not biallelic");
                modifiableGeneticVariant.exclude();
            }
        }
        if (z2) {
            bufferedWriter.close();
        }
        if (i2 == 0) {
            throw new GenotypeAlignmentException("No variants where found in the input genotype data. Please check your variant filter options");
        }
        LOGGER.info("Iteration 1 - Completed, non A/T and non G/C SNPs are aligned " + GenotypeHarmonizer.DEFAULT_NUMBER_FORMATTER.format(i3) + " found and " + GenotypeHarmonizer.DEFAULT_NUMBER_FORMATTER.format(i4) + " swapped");
        System.out.println("Iteration 1 - Completed, non A/T and non G/C SNPs are aligned " + GenotypeHarmonizer.DEFAULT_NUMBER_FORMATTER.format(i3) + " found and " + GenotypeHarmonizer.DEFAULT_NUMBER_FORMATTER.format(i4) + " swapped");
        if (arrayList.isEmpty()) {
            snpLogWriter.close();
            throw new GenotypeAlignmentException("Zero of the input variants found in reference set. Are both datasets the same genome build? Perhapse you need use --forceChr.");
        }
        int i5 = 0;
        Collections.sort(arrayList);
        Collections.sort(arrayList2);
        LOGGER.debug("Sorting of variant lists completed");
        if (z) {
            int i6 = 0;
            for (int i7 = 0; i7 < arrayList.size(); i7++) {
                i6++;
                if (i6 % 10000 == 0) {
                    System.out.println("Iteration 2 - " + GenotypeHarmonizer.DEFAULT_NUMBER_FORMATTER.format(i6) + " variants processed");
                }
                ModifiableGeneticVariant modifiableGeneticVariant2 = arrayList.get(i7);
                GeneticVariant geneticVariant4 = arrayList2.get(i7);
                if (!modifiableGeneticVariant2.isAtOrGcSnp()) {
                    CorrelationResults correlateHaplotypes = correlateHaplotypes(d, i, arrayList, arrayList2, i7, modifiableGeneticVariant2, geneticVariant4);
                    if (correlateHaplotypes.getTotalCor() < d2) {
                        snpLogWriter.addToLog(modifiableGeneticVariant2, SnpLogWriter.Actions.EXCLUDED, "Not enough non A/T or G/C in LD to check LD pattern");
                        modifiableGeneticVariant2.exclude();
                    } else if (correlateHaplotypes.getPosCor() < correlateHaplotypes.getNegCor()) {
                        i5++;
                        snpLogWriter.addToLog(modifiableGeneticVariant2, SnpLogWriter.Actions.EXCLUDED, "Non A/T or G/C SNP with inconsistency in LD pattern");
                        modifiableGeneticVariant2.exclude();
                    }
                }
            }
            LOGGER.info("Iteration 2 - Completed, non A/T and non G/C SNPs are LD checked");
            System.out.println("Iteration 2 - Completed, non A/T and non G/C SNPs are LD checked ");
            LOGGER.info("Excluded " + GenotypeHarmonizer.DEFAULT_NUMBER_FORMATTER.format(i5) + " non A/T and non G/C SNPs based on inconsistencies in LD pattern");
        } else {
            System.out.println("Iteration 2 - Skipped, non A/T and non G/C SNPs are not LD checked ");
            LOGGER.info("Iteration 2 - Skipped, non A/T and non G/C SNPs are not LD checked ");
        }
        int i8 = 0;
        int i9 = 0;
        int i10 = 0;
        int i11 = 0;
        for (int i12 = 0; i12 < arrayList.size(); i12++) {
            i8++;
            if (i8 % 10000 == 0) {
                System.out.println("Iteration 3 - " + GenotypeHarmonizer.DEFAULT_NUMBER_FORMATTER.format(i8) + " variants processed (" + GenotypeHarmonizer.DEFAULT_NUMBER_FORMATTER.format(i9) + " G/C or A/T SNPs checked)");
            }
            ModifiableGeneticVariant modifiableGeneticVariant3 = arrayList.get(i12);
            GeneticVariant geneticVariant5 = arrayList2.get(i12);
            if (modifiableGeneticVariant3.isAtOrGcSnp()) {
                i9++;
                CorrelationResults correlateHaplotypes2 = correlateHaplotypes(d, i, arrayList, arrayList2, i12, modifiableGeneticVariant3, geneticVariant5);
                if ((correlateHaplotypes2.getTotalCor() < d2 || correlateHaplotypes2.getPosCor() == correlateHaplotypes2.getNegCor()) && !z && modifiableGeneticVariant3.getMinorAlleleFrequency() <= d3 && geneticVariant5.getMinorAlleleFrequency() <= d3) {
                    if (modifiableGeneticVariant3.getMinorAllele() != geneticVariant5.getMinorAllele()) {
                        modifiableGeneticVariant3.swap();
                        i10++;
                        snpLogWriter.addToLog(modifiableGeneticVariant3, SnpLogWriter.Actions.SWAPPED, "Based on minor allele, study MAF: " + modifiableGeneticVariant3.getMinorAlleleFrequency() + "(" + modifiableGeneticVariant3.getMinorAllele() + ") reference MAF: " + geneticVariant5.getMinorAlleleFrequency() + "(" + geneticVariant5.getMinorAllele() + ")");
                    } else if (LOGGER.isDebugEnabled()) {
                        snpLogWriter.addToLog(modifiableGeneticVariant3, SnpLogWriter.Actions.MAINTAINED, "Based on minor allele, study MAF: " + modifiableGeneticVariant3.getMinorAlleleFrequency() + "(" + modifiableGeneticVariant3.getMinorAllele() + ") reference MAF: " + geneticVariant5.getMinorAlleleFrequency() + "(" + geneticVariant5.getMinorAllele() + ")");
                    }
                } else if (correlateHaplotypes2.getTotalCor() < d2) {
                    snpLogWriter.addToLog(modifiableGeneticVariant3, SnpLogWriter.Actions.EXCLUDED, "Not enough non A/T or non G/C in LD to assess strand based on LD. Pos cor " + correlateHaplotypes2.getPosCor() + " neg cor " + correlateHaplotypes2.getNegCor() + " MAF study: " + modifiableGeneticVariant3.getMinorAlleleFrequency() + "(" + modifiableGeneticVariant3.getMinorAllele() + ") MAF reference: " + geneticVariant5.getMinorAlleleFrequency() + "(" + geneticVariant5.getMinorAllele() + ")");
                    modifiableGeneticVariant3.exclude();
                } else if (correlateHaplotypes2.getPosCor() == correlateHaplotypes2.getNegCor()) {
                    snpLogWriter.addToLog(modifiableGeneticVariant3, SnpLogWriter.Actions.EXCLUDED, "Equal number of positive and negative correlations. Pos cor " + correlateHaplotypes2.getPosCor() + " neg cor " + correlateHaplotypes2.getNegCor() + " MAF study: " + modifiableGeneticVariant3.getMinorAlleleFrequency() + "(" + modifiableGeneticVariant3.getMinorAllele() + ") MAF reference: " + geneticVariant5.getMinorAlleleFrequency() + "(" + geneticVariant5.getMinorAllele() + ")");
                    modifiableGeneticVariant3.exclude();
                } else if (correlateHaplotypes2.getPosCor() < correlateHaplotypes2.getNegCor()) {
                    modifiableGeneticVariant3.swap();
                    i10++;
                    if (LOGGER.isDebugEnabled()) {
                        snpLogWriter.addToLog(modifiableGeneticVariant3, SnpLogWriter.Actions.SWAPPED, "Based on LD. Pos cor " + correlateHaplotypes2.getPosCor() + " neg cor " + correlateHaplotypes2.getNegCor() + " MAF study: " + modifiableGeneticVariant3.getMinorAlleleFrequency() + "(" + modifiableGeneticVariant3.getMinorAllele() + ") MAF reference: " + geneticVariant5.getMinorAlleleFrequency() + "(" + geneticVariant5.getMinorAllele() + ")");
                    } else {
                        snpLogWriter.addToLog(modifiableGeneticVariant3, SnpLogWriter.Actions.SWAPPED, "Based on LD");
                    }
                    if (z) {
                        CorrelationResults correlateHaplotypes3 = correlateHaplotypes(d, i, arrayList, arrayList2, i12, modifiableGeneticVariant3, geneticVariant5);
                        if (correlateHaplotypes3.getPosCor() < correlateHaplotypes3.getNegCor()) {
                            i11++;
                            snpLogWriter.addToLog(modifiableGeneticVariant3, SnpLogWriter.Actions.EXCLUDED, "G/C or A/T SNP with inconsistency in LD pattern that is not solved by swapping");
                            modifiableGeneticVariant3.exclude();
                        }
                    }
                } else if (LOGGER.isDebugEnabled()) {
                    snpLogWriter.addToLog(modifiableGeneticVariant3, SnpLogWriter.Actions.MAINTAINED, "Based on LD. Pos cor " + correlateHaplotypes2.getPosCor() + " neg cor " + correlateHaplotypes2.getNegCor() + " MAF study: " + modifiableGeneticVariant3.getMinorAlleleFrequency() + "(" + modifiableGeneticVariant3.getMinorAllele() + ") MAF reference: " + geneticVariant5.getMinorAlleleFrequency() + "(" + geneticVariant5.getMinorAllele() + ")");
                }
            }
        }
        if (z) {
            LOGGER.info("Iteration 3 - Completed, non A/T and non G/C SNPs are aligned and LD check afterwards");
            System.out.println("Iteration 3 - Completed, non A/T and non G/C SNPs are aligned and LD check afterwards");
        } else {
            LOGGER.info("Iteration 3 - Completed, non A/T and non G/C SNPs are aligned. Extra LD check skipped");
            System.out.println("Iteration 3 - Completed, non A/T and non G/C SNPs are aligned. Extra LD check skipped");
        }
        if (z) {
            LOGGER.info("Excluded " + GenotypeHarmonizer.DEFAULT_NUMBER_FORMATTER.format(i11) + " A/T or G/C variants based on LD patterns");
            System.out.println("Excluded " + GenotypeHarmonizer.DEFAULT_NUMBER_FORMATTER.format(i11) + " A/T or G/C variants based on LD patterns");
        }
        LOGGER.info("Swapped " + GenotypeHarmonizer.DEFAULT_NUMBER_FORMATTER.format(i10) + " out of " + GenotypeHarmonizer.DEFAULT_NUMBER_FORMATTER.format(i9) + " A/T or G/C variants based on LD patterns");
        System.out.println("Swapped " + GenotypeHarmonizer.DEFAULT_NUMBER_FORMATTER.format(i10) + " A/T or G/C variants based on LD patterns");
        snpLogWriter.close();
        return modifiableGenotypeDataInMemory;
    }

    private CorrelationResults correlateHaplotypes(double d, int i, ArrayList<ModifiableGeneticVariant> arrayList, ArrayList<GeneticVariant> arrayList2, int i2, GeneticVariant geneticVariant, GeneticVariant geneticVariant2) {
        int i3 = 0;
        int i4 = 0;
        for (int max = Math.max(0, i2 - i); max < i2 + i && max < arrayList.size(); max++) {
            if (i2 != max) {
                GeneticVariant geneticVariant3 = arrayList.get(max);
                if (geneticVariant.getSequenceName().equals(geneticVariant3.getSequenceName()) && !geneticVariant3.isAtOrGcSnp()) {
                    GeneticVariant geneticVariant4 = arrayList2.get(max);
                    try {
                        Ld calculateLd = LdCalculator.calculateLd(geneticVariant, geneticVariant3);
                        Ld calculateLd2 = LdCalculator.calculateLd(geneticVariant2, geneticVariant4);
                        if (!Double.isNaN(calculateLd.getR2()) && !Double.isNaN(calculateLd2.getR2()) && calculateLd.getR2() >= d && calculateLd2.getR2() >= d) {
                            TreeMap treeMap = new TreeMap(calculateLd.getHaplotypesFreq());
                            TreeMap treeMap2 = new TreeMap(calculateLd2.getHaplotypesFreq());
                            double[] createDoubleArrayFromCollection = createDoubleArrayFromCollection(treeMap.values());
                            double[] createDoubleArrayFromCollection2 = createDoubleArrayFromCollection(treeMap2.values());
                            double sqrt = Math.sqrt(ArrayMath.variance(createDoubleArrayFromCollection) * ArrayMath.variance(createDoubleArrayFromCollection2));
                            if (sqrt != 0.0d) {
                                double covariance = ArrayMath.covariance(createDoubleArrayFromCollection, createDoubleArrayFromCollection2) / sqrt;
                                if (covariance < 0.0d) {
                                    i4++;
                                } else if (covariance > 0.0d) {
                                    i3++;
                                }
                            }
                        }
                    } catch (LdCalculatorException e) {
                        LOGGER.debug("Error in LD calculation, skipping this comparison when comparing haplotype structure. Following error occurred: " + e.getMessage());
                    }
                }
            }
        }
        return new CorrelationResults(i3, i4);
    }

    private double[] createDoubleArrayFromCollection(Collection<Double> collection) {
        double[] dArr = new double[collection.size()];
        int i = 0;
        Iterator<Double> it = collection.iterator();
        while (it.hasNext()) {
            dArr[i] = it.next().doubleValue();
            i++;
        }
        return dArr;
    }
}
