package org.forester.application;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.math.RoundingMode;
import java.text.DecimalFormat;
import java.text.NumberFormat;
import java.util.ArrayList;
import org.forester.io.parsers.FastaParser;
import org.forester.io.parsers.GeneralMsaParser;
import org.forester.msa.DeleteableMsa;
import org.forester.msa.Msa;
import org.forester.msa.MsaInferrer;
import org.forester.msa.MsaMethods;
import org.forester.msa_compactor.Chart;
import org.forester.msa_compactor.MsaCompactor;
import org.forester.surfacing.DomainArchitectureBasedGenomeSimilarityCalculator;
import org.forester.util.CommandLineArguments;
import org.forester.util.DescriptiveStatistics;
import org.forester.util.ForesterUtil;
import psidev.psi.mi.jami.json.MIJsonUtils;
import psidev.psi.mi.jami.model.Range;

/* loaded from: input_file:WEB-INF/lib/forester-1.038.jar:org/forester/application/msa_compactor.class */
public class msa_compactor {
    private static final NumberFormat NF_1 = new DecimalFormat("0.#");
    private static final NumberFormat NF_4 = new DecimalFormat("0.####");
    private static final String HELP_OPTION_1 = "help";
    private static final String HELP_OPTION_2 = "h";
    private static final String REMOVE_WORST_OFFENDERS_OPTION = "r";
    private static final String AV_GAPINESS_OPTION = "g";
    private static final String STEP_OPTION = "s";
    private static final String LENGTH_OPTION = "l";
    private static final String REALIGN_OPTION = "a";
    private static final String INFO_ONLY_OPTION = "i";
    private static final String STEP_FOR_DIAGNOSTICS_OPTION = "sd";
    private static final String MIN_LENGTH_OPTION = "ml";
    private static final String GAP_RATIO_LENGTH_OPTION = "gr";
    private static final String REPORT_ENTROPY = "e";
    private static final String OUTPUT_FORMAT_OPTION = "f";
    private static final String OUTPUT_REMOVED_SEQS_OPTION = "ro";
    private static final String MAFFT_OPTIONS = "mo";
    private static final String PERFORM_PHYLOGENETIC_INFERENCE = "t";
    private static final String PATH_TO_MAFFT_OPTION = "mafft";
    private static final String DO_NOT_NORMALIZE_FOR_EFF_LENGTH_OPTION = "nn";
    private static final String PRG_NAME = "msa_compactor";
    private static final String PRG_DESC = "multiple sequence aligment compactor";
    private static final String PRG_VERSION = "0.3";
    private static final String PRG_DATE = "140508";
    private static final String E_MAIL = "czmasek@sanfordburham.org";
    private static final String WWW = "https://sites.google.com/site/cmzmasek/home/software/forester";

    public static void main(String[] strArr) {
        try {
            CommandLineArguments commandLineArguments = new CommandLineArguments(strArr);
            if (commandLineArguments.isOptionSet(HELP_OPTION_1) || commandLineArguments.isOptionSet(HELP_OPTION_2) || commandLineArguments.getNumberOfNames() < 1 || commandLineArguments.getNumberOfNames() > 2) {
                printHelp();
                System.exit(0);
            }
            File file = commandLineArguments.getFile(0);
            File file2 = null;
            if (commandLineArguments.getNumberOfNames() > 1) {
                file2 = commandLineArguments.getFile(1);
            }
            int i = -1;
            double d = -1.0d;
            int i2 = -1;
            int i3 = 1;
            boolean z = false;
            boolean z2 = true;
            String str = null;
            int i4 = 1;
            int i5 = -1;
            double d2 = -1.0d;
            boolean z3 = false;
            Msa.MSA_FORMAT msa_format = Msa.MSA_FORMAT.FASTA;
            File file3 = null;
            String str2 = "--auto";
            boolean z4 = false;
            ArrayList arrayList = new ArrayList();
            arrayList.add(REMOVE_WORST_OFFENDERS_OPTION);
            arrayList.add(AV_GAPINESS_OPTION);
            arrayList.add(LENGTH_OPTION);
            arrayList.add(REALIGN_OPTION);
            arrayList.add(DO_NOT_NORMALIZE_FOR_EFF_LENGTH_OPTION);
            arrayList.add(STEP_OPTION);
            arrayList.add(PATH_TO_MAFFT_OPTION);
            arrayList.add(STEP_FOR_DIAGNOSTICS_OPTION);
            arrayList.add(MIN_LENGTH_OPTION);
            arrayList.add(GAP_RATIO_LENGTH_OPTION);
            arrayList.add("e");
            arrayList.add(OUTPUT_FORMAT_OPTION);
            arrayList.add(OUTPUT_REMOVED_SEQS_OPTION);
            arrayList.add(MAFFT_OPTIONS);
            arrayList.add(PERFORM_PHYLOGENETIC_INFERENCE);
            arrayList.add("i");
            String validateAllowedOptionsAsString = commandLineArguments.validateAllowedOptionsAsString(arrayList);
            if (validateAllowedOptionsAsString.length() > 0) {
                ForesterUtil.fatalError(PRG_NAME, "unknown option(s): " + validateAllowedOptionsAsString);
            }
            FileInputStream fileInputStream = new FileInputStream(file);
            DeleteableMsa createInstance = FastaParser.isLikelyFasta(file) ? DeleteableMsa.createInstance(FastaParser.parseMsa(fileInputStream)) : DeleteableMsa.createInstance(GeneralMsaParser.parse(fileInputStream));
            DescriptiveStatistics calculateEffectiveLengthStatistics = MsaMethods.calculateEffectiveLengthStatistics(createInstance);
            if (commandLineArguments.isOptionSet("i")) {
                printInfo(file, createInstance, calculateEffectiveLengthStatistics);
                System.exit(0);
            }
            boolean z5 = (commandLineArguments.isOptionSet(LENGTH_OPTION) || commandLineArguments.isOptionSet(REMOVE_WORST_OFFENDERS_OPTION) || commandLineArguments.isOptionSet(AV_GAPINESS_OPTION) || commandLineArguments.isOptionSet(MIN_LENGTH_OPTION)) ? false : true;
            if (!z5 && file2 == null) {
                ForesterUtil.fatalError(PRG_NAME, "outfile file missing");
            }
            if (commandLineArguments.isOptionSet(REMOVE_WORST_OFFENDERS_OPTION)) {
                i = commandLineArguments.getOptionValueAsInt(REMOVE_WORST_OFFENDERS_OPTION);
                if (i < 1 || i >= createInstance.getNumberOfSequences() - 1) {
                    ForesterUtil.fatalError(PRG_NAME, "number of worst offender sequences to remove is out of range: " + i);
                }
            }
            if (commandLineArguments.isOptionSet(AV_GAPINESS_OPTION)) {
                if (commandLineArguments.isOptionSet(REMOVE_WORST_OFFENDERS_OPTION)) {
                    printHelp();
                    System.exit(0);
                }
                d = commandLineArguments.getOptionValueAsDouble(AV_GAPINESS_OPTION);
                if (d < DomainArchitectureBasedGenomeSimilarityCalculator.MIN_SIMILARITY_SCORE || d >= 1.0d) {
                    ForesterUtil.fatalError(PRG_NAME, "target gap-ratio is out of range: " + d);
                }
            }
            if (commandLineArguments.isOptionSet(LENGTH_OPTION)) {
                if (commandLineArguments.isOptionSet(REMOVE_WORST_OFFENDERS_OPTION) || commandLineArguments.isOptionSet(AV_GAPINESS_OPTION)) {
                    printHelp();
                    System.exit(0);
                }
                i2 = commandLineArguments.getOptionValueAsInt(LENGTH_OPTION);
                if (i2 >= createInstance.getLength()) {
                    ForesterUtil.fatalError(PRG_NAME, "target length is out of range [longer than MSA (" + createInstance.getLength() + ")]: " + i2);
                } else if (i2 < calculateEffectiveLengthStatistics.getMin()) {
                    ForesterUtil.fatalError(PRG_NAME, "target length is out of range [shorter than the shortest sequence (" + calculateEffectiveLengthStatistics.getMin() + ") ]: " + i2);
                }
            }
            if (commandLineArguments.isOptionSet(MIN_LENGTH_OPTION)) {
                if (commandLineArguments.isOptionSet(LENGTH_OPTION) || commandLineArguments.isOptionSet(REMOVE_WORST_OFFENDERS_OPTION) || commandLineArguments.isOptionSet(AV_GAPINESS_OPTION) || commandLineArguments.isOptionSet(STEP_OPTION) || commandLineArguments.isOptionSet(REALIGN_OPTION) || commandLineArguments.isOptionSet(PATH_TO_MAFFT_OPTION) || commandLineArguments.isOptionSet(STEP_FOR_DIAGNOSTICS_OPTION) || commandLineArguments.isOptionSet("e") || commandLineArguments.isOptionSet(OUTPUT_REMOVED_SEQS_OPTION) || commandLineArguments.isOptionSet(PERFORM_PHYLOGENETIC_INFERENCE)) {
                    printHelp();
                    System.exit(0);
                }
                i5 = commandLineArguments.getOptionValueAsInt(MIN_LENGTH_OPTION);
                if (i5 < 2 || i5 > calculateEffectiveLengthStatistics.getMax()) {
                    ForesterUtil.fatalError(PRG_NAME, "value for minimal sequence length is out of range: " + i5);
                }
            }
            if (commandLineArguments.isOptionSet(STEP_OPTION)) {
                i3 = commandLineArguments.getOptionValueAsInt(STEP_OPTION);
                if (i3 < 1 || i3 > createInstance.getNumberOfSequences() || (i > 0 && i3 > i)) {
                    ForesterUtil.fatalError(PRG_NAME, "value for step is out of range: " + i3);
                }
            }
            if (commandLineArguments.isOptionSet(REALIGN_OPTION)) {
                z = true;
            }
            if (commandLineArguments.isOptionSet(PATH_TO_MAFFT_OPTION)) {
                if (!z) {
                    ForesterUtil.fatalError(PRG_NAME, "no need to indicate path to MAFFT without realigning");
                }
                str = commandLineArguments.getOptionValueAsCleanString(PATH_TO_MAFFT_OPTION);
            }
            if (commandLineArguments.isOptionSet(DO_NOT_NORMALIZE_FOR_EFF_LENGTH_OPTION)) {
                z2 = false;
            }
            if (commandLineArguments.isOptionSet(STEP_FOR_DIAGNOSTICS_OPTION)) {
                i4 = commandLineArguments.getOptionValueAsInt(STEP_FOR_DIAGNOSTICS_OPTION);
                if (i4 < 1 || i4 > createInstance.getNumberOfSequences() || (i > 0 && i4 > i)) {
                    ForesterUtil.fatalError(PRG_NAME, "value for diagnostic step is out of range: " + i4);
                }
            }
            if (commandLineArguments.isOptionSet(GAP_RATIO_LENGTH_OPTION)) {
                d2 = commandLineArguments.getOptionValueAsDouble(GAP_RATIO_LENGTH_OPTION);
                if (d2 < DomainArchitectureBasedGenomeSimilarityCalculator.MIN_SIMILARITY_SCORE || d2 > 1.0d) {
                    ForesterUtil.fatalError(PRG_NAME, "gap ratio is out of range: " + d2);
                }
            }
            if (commandLineArguments.isOptionSet("e")) {
                z3 = true;
            }
            if (commandLineArguments.isOptionSet(OUTPUT_FORMAT_OPTION)) {
                String optionValueAsCleanString = commandLineArguments.getOptionValueAsCleanString(OUTPUT_FORMAT_OPTION);
                if (optionValueAsCleanString.equalsIgnoreCase("p")) {
                    msa_format = Msa.MSA_FORMAT.PHYLIP;
                } else if (optionValueAsCleanString.equalsIgnoreCase(OUTPUT_FORMAT_OPTION)) {
                    msa_format = Msa.MSA_FORMAT.FASTA;
                } else if (optionValueAsCleanString.equalsIgnoreCase(Range.N_TERMINAL_POSITION_SYMBOL)) {
                    msa_format = Msa.MSA_FORMAT.NEXUS;
                } else {
                    ForesterUtil.fatalError(PRG_NAME, "illegal or empty output format option: " + optionValueAsCleanString);
                }
            }
            if (commandLineArguments.isOptionSet(OUTPUT_REMOVED_SEQS_OPTION)) {
                file3 = new File(commandLineArguments.getOptionValueAsCleanString(OUTPUT_REMOVED_SEQS_OPTION));
            }
            if (z) {
                if (ForesterUtil.isEmpty(str)) {
                    str = MsaCompactor.guessPathToMafft();
                }
                checkPathToMafft(str);
                if (commandLineArguments.isOptionSet(MAFFT_OPTIONS)) {
                    str2 = commandLineArguments.getOptionValueAsCleanString(MAFFT_OPTIONS);
                    if (ForesterUtil.isEmpty(str2) || str2.length() < 3) {
                        ForesterUtil.fatalError(PRG_NAME, "illegal or empty MAFFT options: " + str2);
                    }
                }
            } else if (commandLineArguments.isOptionSet(MAFFT_OPTIONS)) {
                ForesterUtil.fatalError(PRG_NAME, "no need to indicate MAFFT options without realigning");
            }
            if (commandLineArguments.isOptionSet(PERFORM_PHYLOGENETIC_INFERENCE)) {
                z4 = true;
            }
            if (z5) {
                if (file2 != null || file3 != null) {
                    ForesterUtil.fatalError(PRG_NAME, "chart only, no outfile(s) produced, thus no need to indicate output file(s)");
                }
                if (!z && commandLineArguments.isOptionSet(STEP_OPTION)) {
                    ForesterUtil.fatalError(PRG_NAME, "chart only, no re-aligning, thus no need to use step for output and re-aligning; use -sd instead");
                }
            }
            if (z4 && i4 != 1) {
                ForesterUtil.fatalError(PRG_NAME, "step for diagnostics reports needs to be set to 1 for tree calculation");
            }
            printInfo(file, createInstance, calculateEffectiveLengthStatistics);
            if (!z5) {
                System.out.println("Output                               : " + file2);
            }
            if (file3 != null) {
                System.out.println("Write removed sequences to           : " + file3);
            }
            if (i > 0) {
                System.out.println("Number of worst offenders to remove  : " + i);
            }
            if (d > DomainArchitectureBasedGenomeSimilarityCalculator.MIN_SIMILARITY_SCORE) {
                System.out.println("Target gap-ratio                     : " + d);
            }
            if (i2 > 0) {
                System.out.println("Target MSA length                    : " + i2);
            }
            if (i5 > 1) {
                System.out.println("Minimal effective sequence length    : " + i5);
            }
            if (d2 > -1.0d) {
                System.out.println("Maximum allowed gap ratio per column : " + d2);
            }
            if (file2 != null || file3 != null) {
                System.out.print("Output format                        : ");
                if (msa_format == Msa.MSA_FORMAT.FASTA) {
                    System.out.println("fasta");
                } else if (msa_format == Msa.MSA_FORMAT.PHYLIP) {
                    System.out.println("phylip");
                } else if (msa_format == Msa.MSA_FORMAT.NEXUS) {
                    System.out.println("nexus");
                }
            }
            if (i5 == -1) {
                if (z5 && !z) {
                    System.out.println("Step for output and re-aligning      : n/a");
                } else if (z5) {
                    System.out.println("Step for re-aligning                 : " + i3);
                } else {
                    System.out.println("Step for output and re-aligning      : " + i3);
                }
                System.out.println("Step for diagnostics reports         : " + i4);
                System.out.println("Calculate normalized Shannon Entropy : " + z3);
                if (z2) {
                    System.out.println("Normalize                            : with individual, effective sequence lenghts");
                } else {
                    System.out.println("Normalize                            : with MSA length");
                }
                System.out.println("Realign with MAFFT                   : " + z);
                if (z) {
                    System.out.println("MAFFT options                        : " + str2);
                }
                System.out.println("Simple tree (Kimura distances, NJ)   : " + z4);
            }
            System.out.println();
            int numberOfSequences = createInstance.getNumberOfSequences();
            MsaCompactor msaCompactor = new MsaCompactor(createInstance);
            msaCompactor.setInfileName(file.getName());
            if (i > 0 || d > DomainArchitectureBasedGenomeSimilarityCalculator.MIN_SIMILARITY_SCORE || i2 > 0 || i5 != -1) {
                msaCompactor.setOutputFormat(msa_format);
                msaCompactor.setOutFileBase(file2);
            }
            if (i5 != -1) {
                msaCompactor.removeSequencesByMinimalLength(i5);
            } else {
                msaCompactor.setPeformPhylogenticInference(z4);
                if (file3 != null) {
                    msaCompactor.setRemovedSeqsOutBase(file3);
                }
                msaCompactor.setNorm(z2);
                msaCompactor.setRealign(z);
                if (z) {
                    msaCompactor.setPathToMafft(str);
                    msaCompactor.setMafftOptions(str2);
                }
                msaCompactor.setStep(i3);
                msaCompactor.setStepForDiagnostics(i4);
                msaCompactor.setCalculateNormalizedShannonEntropy(z3);
                Chart.display(i > 0 ? msaCompactor.removeWorstOffenders(i) : d > DomainArchitectureBasedGenomeSimilarityCalculator.MIN_SIMILARITY_SCORE ? msaCompactor.removeViaGapAverage(d) : i2 > 0 ? msaCompactor.removeViaLength(i2) : msaCompactor.chart(i3, z, z2), numberOfSequences, z3, file.getName());
                System.out.println();
                System.out.println("Final MSA properties");
                printMsaInfo(createInstance, MsaMethods.calculateEffectiveLengthStatistics(createInstance));
            }
        } catch (IOException e) {
            ForesterUtil.fatalError(PRG_NAME, e.getMessage());
        } catch (IllegalArgumentException e2) {
            ForesterUtil.fatalError(PRG_NAME, e2.getMessage());
        } catch (Exception e3) {
            ForesterUtil.unexpectedFatalError(PRG_NAME, e3);
        }
    }

    private static void printInfo(File file, DeleteableMsa deleteableMsa, DescriptiveStatistics descriptiveStatistics) {
        ForesterUtil.printProgramInformation(PRG_NAME, PRG_DESC, PRG_VERSION, PRG_DATE, E_MAIL, WWW, ForesterUtil.getForesterLibraryInformation());
        System.out.println("Input MSA                            : " + file);
        printMsaInfo(deleteableMsa, descriptiveStatistics);
    }

    private static void printMsaInfo(DeleteableMsa deleteableMsa, DescriptiveStatistics descriptiveStatistics) {
        System.out.println("MSA length                           : " + deleteableMsa.getLength());
        System.out.println("Number of sequences                  : " + deleteableMsa.getNumberOfSequences());
        System.out.println("Median sequence length               : " + NF_1.format(descriptiveStatistics.median()));
        System.out.println("Mean sequence length                 : " + NF_1.format(descriptiveStatistics.arithmeticMean()));
        System.out.println("Max sequence length                  : " + ((int) descriptiveStatistics.getMax()));
        System.out.println("Min sequence length                  : " + ((int) descriptiveStatistics.getMin()));
        System.out.println("Gap ratio                            : " + NF_4.format(MsaMethods.calcGapRatio(deleteableMsa)));
        System.out.println("Mean gap count per sequence          : " + NF_1.format(MsaMethods.calcNumberOfGapsStats(deleteableMsa).arithmeticMean()));
        System.out.println("Normalized Shannon Entropy (entn7)   : " + NF_4.format(MsaMethods.calcNormalizedShannonsEntropy(7, deleteableMsa)));
        System.out.println("Normalized Shannon Entropy (entn21)  : " + NF_4.format(MsaMethods.calcNormalizedShannonsEntropy(21, deleteableMsa)));
    }

    private static void checkPathToMafft(String str) {
        if (ForesterUtil.isEmpty(str) || !MsaInferrer.isInstalled(str)) {
            if (ForesterUtil.isEmpty(str)) {
                ForesterUtil.fatalError(PRG_NAME, "no MAFFT executable found, use -\"mafft=<path to MAFFT>\" option");
            } else {
                ForesterUtil.fatalError(PRG_NAME, "no MAFFT executable at \"" + str + MIJsonUtils.PROPERTY_DELIMITER);
            }
        }
    }

    private static void printHelp() {
        ForesterUtil.printProgramInformation(PRG_NAME, PRG_DESC, PRG_VERSION, PRG_DATE, E_MAIL, WWW, ForesterUtil.getForesterLibraryInformation());
        String guessPathToMafft = MsaCompactor.guessPathToMafft();
        String str = !ForesterUtil.isEmpty(guessPathToMafft) ? " (using " + guessPathToMafft + ")" : " (no path to MAFFT found, use -\"mafft=<path to MAFFT>\" option";
        System.out.println("Usage:");
        System.out.println();
        System.out.println("msa_compactor [options] <msa input file> [output file base]");
        System.out.println();
        System.out.println(" options: ");
        System.out.println();
        System.out.println("   -i             to only display same basic information about the MSA");
        System.out.println("   -r=<integer>   number of worst offender sequences to remove");
        System.out.println("   -l=<integer>   target MSA length");
        System.out.println("   -g=<decimal>   target gap-ratio (0.0-1.0)");
        System.out.println("   -a             to realign using MAFFT" + str);
        System.out.println("   -mo=<string>   options for MAFFT (default: --auto)");
        System.out.println("   -s=<integer>   step for output and re-aligning (default: 1)");
        System.out.println("   -sd=<integer>  step for diagnostics reports (default: 1)");
        System.out.println("   -e             to calculate normalized Shannon Entropy (not recommended for very large alignments)");
        System.out.println("   -f=<f|p|n>     format for output alignments: f for fasta (default), p for phylip, or n for nexus");
        System.out.println("   -ro=<file>     to output the removed sequences");
        System.out.println("   -ml=<integer>  minimal effecive sequence length (for deleting of shorter sequences)");
        System.out.println("   -gr=<decimal>  maximal allowed gap ratio per column (for deleting of columms) (0.0-1.0)");
        System.out.println("   -t             to calculate a simple phylogenetic tree (Kimura distances, NJ)");
        System.out.println("   -nn            to normalize gap-contributions with MSA length, instead of individual effective sequence lenghts");
        System.out.println();
        System.out.println();
        System.out.println();
    }

    static {
        NF_1.setRoundingMode(RoundingMode.HALF_UP);
        NF_4.setRoundingMode(RoundingMode.HALF_UP);
    }
}
