package org.forester.io.parsers;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeMap;
import java.util.TreeSet;
import org.forester.protein.BasicDomain;
import org.forester.protein.BasicProtein;
import org.forester.protein.Domain;
import org.forester.protein.Protein;
import org.forester.surfacing.DomainArchitectureBasedGenomeSimilarityCalculator;
import org.forester.util.ForesterUtil;

/* loaded from: input_file:WEB-INF/lib/forester-1.038.jar:org/forester/io/parsers/HmmscanPerDomainTableParser.class */
public final class HmmscanPerDomainTableParser {
    private static final String RETRO = "RETRO";
    private static final String PHAGE = "PHAGE";
    private static final String VIR = "VIR";
    private static final String TRANSPOS = "TRANSPOS";
    private static final String RV = "RV";
    private static final String GAG = "GAG_";
    private static final String HCV = "HCV_";
    private static final String HERPES = "HERPES_";
    private static final String BACULO = "BACULO_";
    private static final int E_VALUE_MAXIMUM_DEFAULT = -1;
    private static final ReturnType RETURN_TYPE_DEFAULT = ReturnType.UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN;
    private static final boolean IGNORE_DUFS_DEFAULT = false;
    private static final int MAX_ALLOWED_OVERLAP_DEFAULT = -1;
    private static final boolean IGNORE_REPLACED_RRMS = false;
    private static final boolean IGNORE_hGDE_amylase = true;
    private final Set<String> _filter;
    private final FilterType _filter_type;
    private final File _input_file;
    private final String _species;
    private double _fs_e_value_maximum;
    private double _i_e_value_maximum;
    private Map<String, Double> _individual_score_cutoffs;
    private boolean _ignore_dufs;
    private boolean _ignore_virus_like_ids;
    private int _max_allowed_overlap;
    private boolean _ignore_engulfed_domains;
    private ReturnType _return_type;
    private int _proteins_encountered;
    private int _proteins_ignored_due_to_filter;
    private int _proteins_stored;
    private int _domains_encountered;
    private int _domains_ignored_due_to_duf;
    private int _domains_ignored_due_to_overlap;
    private int _domains_ignored_due_to_fs_e_value;
    private int _domains_ignored_due_to_i_e_value;
    private int _domains_ignored_due_to_individual_score_cutoff;
    private int _domains_stored;
    private SortedSet<String> _domains_stored_set;
    private long _time;
    private int _domains_ignored_due_to_negative_domain_filter;
    private Map<String, Integer> _domains_ignored_due_to_negative_domain_filter_counts_map;
    private int _domains_ignored_due_to_virus_like_id;
    private Map<String, Integer> _domains_ignored_due_to_virus_like_id_counts_map;
    private final INDIVIDUAL_SCORE_CUTOFF _ind_cutoff;
    private final boolean _allow_proteins_with_same_name;

    /* loaded from: input_file:WEB-INF/lib/forester-1.038.jar:org/forester/io/parsers/HmmscanPerDomainTableParser$FilterType.class */
    public enum FilterType {
        NONE,
        POSITIVE_PROTEIN,
        NEGATIVE_PROTEIN,
        NEGATIVE_DOMAIN
    }

    /* loaded from: input_file:WEB-INF/lib/forester-1.038.jar:org/forester/io/parsers/HmmscanPerDomainTableParser$INDIVIDUAL_SCORE_CUTOFF.class */
    public enum INDIVIDUAL_SCORE_CUTOFF {
        FULL_SEQUENCE,
        DOMAIN,
        NONE
    }

    /* loaded from: input_file:WEB-INF/lib/forester-1.038.jar:org/forester/io/parsers/HmmscanPerDomainTableParser$ReturnType.class */
    public enum ReturnType {
        UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN
    }

    public HmmscanPerDomainTableParser(File file, String str, INDIVIDUAL_SCORE_CUTOFF individual_score_cutoff) {
        this._input_file = file;
        this._species = str;
        this._filter = null;
        this._filter_type = FilterType.NONE;
        this._ind_cutoff = individual_score_cutoff;
        this._allow_proteins_with_same_name = false;
        init();
    }

    public HmmscanPerDomainTableParser(File file, String str, INDIVIDUAL_SCORE_CUTOFF individual_score_cutoff, boolean z) {
        this._input_file = file;
        this._species = str;
        this._filter = null;
        this._filter_type = FilterType.NONE;
        this._ind_cutoff = individual_score_cutoff;
        this._allow_proteins_with_same_name = z;
        init();
    }

    public HmmscanPerDomainTableParser(File file, String str, Set<String> set, FilterType filterType, INDIVIDUAL_SCORE_CUTOFF individual_score_cutoff) {
        this._input_file = file;
        this._species = str;
        this._filter = set;
        this._filter_type = filterType;
        this._ind_cutoff = individual_score_cutoff;
        this._allow_proteins_with_same_name = false;
        init();
    }

    public HmmscanPerDomainTableParser(File file, String str, Set<String> set, FilterType filterType, INDIVIDUAL_SCORE_CUTOFF individual_score_cutoff, boolean z) {
        this._input_file = file;
        this._species = str;
        this._filter = set;
        this._filter_type = filterType;
        this._ind_cutoff = individual_score_cutoff;
        this._allow_proteins_with_same_name = z;
        init();
    }

    public boolean isAllowProteinsWithSameName() {
        return this._allow_proteins_with_same_name;
    }

    private void actuallyAddProtein(List<Protein> list, Protein protein) {
        Iterator<Domain> it2 = protein.getProteinDomains().iterator();
        while (it2.hasNext()) {
            getDomainsStoredSet().add(it2.next().getDomainId());
        }
        list.add(protein);
        this._proteins_stored++;
    }

    private void addProtein(List<Protein> list, Protein protein) {
        if (getMaxAllowedOverlap() != -1 || isIgnoreEngulfedDomains()) {
            int numberOfProteinDomains = protein.getNumberOfProteinDomains();
            protein = ForesterUtil.removeOverlappingDomains(getMaxAllowedOverlap(), isIgnoreEngulfedDomains(), protein);
            int numberOfProteinDomains2 = numberOfProteinDomains - protein.getNumberOfProteinDomains();
            this._domains_stored -= numberOfProteinDomains2;
            this._domains_ignored_due_to_overlap += numberOfProteinDomains2;
        }
        if (getFilterType() != FilterType.POSITIVE_PROTEIN && getFilterType() != FilterType.NEGATIVE_PROTEIN) {
            actuallyAddProtein(list, protein);
            return;
        }
        HashSet hashSet = new HashSet();
        Iterator<Domain> it2 = protein.getProteinDomains().iterator();
        while (it2.hasNext()) {
            hashSet.add(it2.next().getDomainId());
        }
        hashSet.retainAll(getFilter());
        if (getFilterType() == FilterType.POSITIVE_PROTEIN) {
            if (hashSet.size() > 0) {
                actuallyAddProtein(list, protein);
                return;
            } else {
                this._proteins_ignored_due_to_filter++;
                return;
            }
        }
        if (hashSet.size() < 1) {
            actuallyAddProtein(list, protein);
        } else {
            this._proteins_ignored_due_to_filter++;
        }
    }

    public int getDomainsEncountered() {
        return this._domains_encountered;
    }

    public int getDomainsIgnoredDueToDuf() {
        return this._domains_ignored_due_to_duf;
    }

    public int getDomainsIgnoredDueToIEval() {
        return this._domains_ignored_due_to_i_e_value;
    }

    public int getDomainsIgnoredDueToFsEval() {
        return this._domains_ignored_due_to_fs_e_value;
    }

    public int getDomainsIgnoredDueToIndividualScoreCutoff() {
        return this._domains_ignored_due_to_individual_score_cutoff;
    }

    public int getDomainsIgnoredDueToNegativeDomainFilter() {
        return this._domains_ignored_due_to_negative_domain_filter;
    }

    public Map<String, Integer> getDomainsIgnoredDueToNegativeDomainFilterCountsMap() {
        return this._domains_ignored_due_to_negative_domain_filter_counts_map;
    }

    public int getDomainsIgnoredDueToOverlap() {
        return this._domains_ignored_due_to_overlap;
    }

    public Map<String, Integer> getDomainsIgnoredDueToVirusLikeIdCountsMap() {
        return this._domains_ignored_due_to_virus_like_id_counts_map;
    }

    public int getDomainsIgnoredDueToVirusLikeIds() {
        return this._domains_ignored_due_to_virus_like_id;
    }

    public int getDomainsStored() {
        return this._domains_stored;
    }

    public SortedSet<String> getDomainsStoredSet() {
        return this._domains_stored_set;
    }

    private double getFsEValueMaximum() {
        return this._fs_e_value_maximum;
    }

    private double getIEValueMaximum() {
        return this._i_e_value_maximum;
    }

    private Set<String> getFilter() {
        return this._filter;
    }

    private FilterType getFilterType() {
        return this._filter_type;
    }

    public INDIVIDUAL_SCORE_CUTOFF getIndividualCutoffAppliesTo() {
        return this._ind_cutoff;
    }

    private Map<String, Double> getIndividualScoreCutoffs() {
        return this._individual_score_cutoffs;
    }

    private File getInputFile() {
        return this._input_file;
    }

    private int getMaxAllowedOverlap() {
        return this._max_allowed_overlap;
    }

    public int getProteinsEncountered() {
        return this._proteins_encountered;
    }

    public int getProteinsIgnoredDueToFilter() {
        return this._proteins_ignored_due_to_filter;
    }

    public int getProteinsStored() {
        return this._proteins_stored;
    }

    private ReturnType getReturnType() {
        return this._return_type;
    }

    private String getSpecies() {
        return this._species;
    }

    public long getTime() {
        return this._time;
    }

    private void init() {
        this._fs_e_value_maximum = -1.0d;
        this._i_e_value_maximum = -1.0d;
        setIgnoreDufs(false);
        setReturnType(RETURN_TYPE_DEFAULT);
        this._max_allowed_overlap = -1;
        setIndividualScoreCutoffs(null);
        setIgnoreEngulfedDomains(false);
        setIgnoreVirusLikeIds(false);
        intitCounts();
    }

    private void intitCounts() {
        setDomainsStoredSet(new TreeSet());
        setDomainsEncountered(0);
        setProteinsEncountered(0);
        setProteinsIgnoredDueToFilter(0);
        setDomainsIgnoredDueToNegativeFilter(0);
        setDomainsIgnoredDueToDuf(0);
        setDomainsIgnoredDueToFsEval(0);
        setDomainsIgnoredDueToIEval(0);
        setDomainsIgnoredDueToIndividualScoreCutoff(0);
        setDomainsIgnoredDueToVirusLikeId(0);
        setDomainsIgnoredDueToOverlap(0);
        setDomainsStored(0);
        setProteinsStored(0);
        setTime(0L);
        setDomainsIgnoredDueToVirusLikeIdCountsMap(new TreeMap());
        setDomainsIgnoredDueToNegativeDomainFilterCountsMap(new TreeMap());
    }

    private boolean isIgnoreDufs() {
        return this._ignore_dufs;
    }

    private boolean isIgnoreEngulfedDomains() {
        return this._ignore_engulfed_domains;
    }

    private boolean isIgnoreVirusLikeIds() {
        return this._ignore_virus_like_ids;
    }

    public List<Protein> parse() throws IOException {
        if (getIndividualCutoffAppliesTo() != INDIVIDUAL_SCORE_CUTOFF.NONE && (getIndividualScoreCutoffs() == null || getIndividualScoreCutoffs().size() < 1)) {
            throw new RuntimeException("attempt to use individual cuttoffs with having set them");
        }
        intitCounts();
        HashSet hashSet = new HashSet();
        String isReadableFile = ForesterUtil.isReadableFile(getInputFile());
        if (!ForesterUtil.isEmpty(isReadableFile)) {
            throw new IOException(isReadableFile);
        }
        BufferedReader bufferedReader = new BufferedReader(new FileReader(getInputFile()));
        ArrayList arrayList = new ArrayList();
        BasicProtein basicProtein = null;
        int i = 0;
        long time = new Date().getTime();
        Object obj = "";
        int i2 = -1;
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                if (basicProtein != null && basicProtein.getProteinDomains().size() > 0) {
                    addProtein(arrayList, basicProtein);
                }
                setProteinsEncountered(hashSet.size());
                setTime(new Date().getTime() - time);
                return arrayList;
            }
            i++;
            if (!ForesterUtil.isEmpty(readLine) && !readLine.startsWith("#")) {
                String[] split = readLine.split("\\s+");
                String str = split[0];
                String str2 = split[1];
                parseInt(split[2], i, "tlen");
                String str3 = split[3];
                String str4 = split[4];
                int parseInt = parseInt(split[5], i, "qlen");
                double parseDouble = parseDouble(split[6], i, "E-value");
                double parseDouble2 = parseDouble(split[7], i, "score");
                int parseInt2 = parseInt(split[9], i, "count");
                int parseInt3 = parseInt(split[10], i, "total");
                parseDouble(split[11], i, "c-Evalue");
                double parseDouble3 = parseDouble(split[12], i, "i-Evalue");
                double parseDouble4 = parseDouble(split[13], i, "score");
                parseInt(split[15], i, "hmm from");
                parseInt(split[16], i, "hmm to");
                int parseInt4 = parseInt(split[17], i, "ali from");
                int parseInt5 = parseInt(split[18], i, "ali to");
                parseInt(split[19], i, "env from");
                parseInt(split[20], i, "env to");
                this._domains_encountered++;
                if (!str3.equals(obj) || parseInt != i2) {
                    if (!isAllowProteinsWithSameName()) {
                        if (str3.equals(obj)) {
                            throw new IOException("more than one protein named [" + str3 + "] lengths: " + parseInt + ", " + i2);
                        }
                        if (hashSet.contains(str3)) {
                            throw new IOException("more than one protein named [" + str3 + "]");
                        }
                    }
                    obj = str3;
                    i2 = parseInt;
                    hashSet.add(str3);
                    if (basicProtein != null && basicProtein.getProteinDomains().size() > 0) {
                        addProtein(arrayList, basicProtein);
                    }
                    if (getReturnType() != ReturnType.UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN) {
                        throw new IllegalArgumentException("unknown return type");
                    }
                    basicProtein = new BasicProtein(str3, getSpecies(), parseInt);
                }
                boolean z = false;
                if (getIndividualCutoffAppliesTo() != INDIVIDUAL_SCORE_CUTOFF.NONE) {
                    if (!getIndividualScoreCutoffs().containsKey(str)) {
                        throw new IOException("could not find a score cutoff value for domain id \"" + str + "\" [line " + i + "] in [" + getInputFile().getCanonicalPath() + "]");
                    }
                    double doubleValue = getIndividualScoreCutoffs().get(str).doubleValue();
                    if (getIndividualCutoffAppliesTo() != INDIVIDUAL_SCORE_CUTOFF.FULL_SEQUENCE) {
                        if (parseDouble2 < doubleValue) {
                            z = true;
                        }
                    } else if (getIndividualCutoffAppliesTo() != INDIVIDUAL_SCORE_CUTOFF.DOMAIN && parseDouble4 < doubleValue) {
                        z = true;
                    }
                }
                String upperCase = str.toUpperCase();
                if (z) {
                    this._domains_ignored_due_to_individual_score_cutoff++;
                } else if (parseInt4 == parseInt5) {
                    continue;
                } else if (getFsEValueMaximum() != -1.0d && parseDouble > getFsEValueMaximum()) {
                    this._domains_ignored_due_to_fs_e_value++;
                } else if (getIEValueMaximum() != -1.0d && parseDouble3 > getIEValueMaximum()) {
                    this._domains_ignored_due_to_i_e_value++;
                } else if (isIgnoreDufs() && upperCase.startsWith("DUF")) {
                    this._domains_ignored_due_to_duf++;
                } else if (upperCase.equals("hGDE_amylase")) {
                    continue;
                } else if (isIgnoreVirusLikeIds() && (upperCase.contains(VIR) || upperCase.contains(PHAGE) || upperCase.contains(RETRO) || upperCase.contains(TRANSPOS) || upperCase.startsWith(RV) || upperCase.startsWith(GAG) || upperCase.startsWith(HCV) || upperCase.startsWith(HERPES) || upperCase.startsWith(BACULO))) {
                    ForesterUtil.increaseCountingMap(getDomainsIgnoredDueToVirusLikeIdCountsMap(), str);
                    this._domains_ignored_due_to_virus_like_id++;
                } else if (getFilterType() == FilterType.NEGATIVE_DOMAIN && getFilter().contains(str)) {
                    this._domains_ignored_due_to_negative_domain_filter++;
                    ForesterUtil.increaseCountingMap(getDomainsIgnoredDueToNegativeDomainFilterCountsMap(), str);
                } else {
                    try {
                        basicProtein.addProteinDomain(new BasicDomain(str, parseInt4, parseInt5, (short) parseInt2, (short) parseInt3, parseDouble3, parseDouble4));
                        this._domains_stored++;
                    } catch (IllegalArgumentException e) {
                        throw new IOException("problem with domain parsing at line " + i + "[" + readLine + "]: " + e.getMessage());
                    }
                }
            }
        }
    }

    private double parseDouble(String str, int i, String str2) throws IOException {
        try {
            return Double.valueOf(str).doubleValue();
        } catch (NumberFormatException e) {
            throw new IOException("could not parse \" +label + \" from \"" + str + "\" [line " + i + "] in [" + getInputFile().getCanonicalPath() + "]");
        }
    }

    private int parseInt(String str, int i, String str2) throws IOException {
        try {
            return Integer.valueOf(str).intValue();
        } catch (NumberFormatException e) {
            throw new IOException("could not parse \"" + str2 + "\" from \"" + str + "\" [line " + i + "] in [" + getInputFile().getCanonicalPath() + "]");
        }
    }

    private void setDomainsEncountered(int i) {
        this._domains_encountered = i;
    }

    private void setDomainsIgnoredDueToDuf(int i) {
        this._domains_ignored_due_to_duf = i;
    }

    private void setDomainsIgnoredDueToFsEval(int i) {
        this._domains_ignored_due_to_fs_e_value = i;
    }

    private void setDomainsIgnoredDueToIEval(int i) {
        this._domains_ignored_due_to_i_e_value = i;
    }

    private void setDomainsIgnoredDueToIndividualScoreCutoff(int i) {
        this._domains_ignored_due_to_individual_score_cutoff = i;
    }

    private void setDomainsIgnoredDueToNegativeDomainFilterCountsMap(Map<String, Integer> map) {
        this._domains_ignored_due_to_negative_domain_filter_counts_map = map;
    }

    private void setDomainsIgnoredDueToNegativeFilter(int i) {
        this._domains_ignored_due_to_negative_domain_filter = i;
    }

    private void setDomainsIgnoredDueToOverlap(int i) {
        this._domains_ignored_due_to_overlap = i;
    }

    private void setDomainsIgnoredDueToVirusLikeId(int i) {
        this._domains_ignored_due_to_virus_like_id = i;
    }

    private void setDomainsIgnoredDueToVirusLikeIdCountsMap(Map<String, Integer> map) {
        this._domains_ignored_due_to_virus_like_id_counts_map = map;
    }

    private void setDomainsStored(int i) {
        this._domains_stored = i;
    }

    private void setDomainsStoredSet(SortedSet<String> sortedSet) {
        this._domains_stored_set = sortedSet;
    }

    public void setFsEValueMaximum(double d) {
        if (d < DomainArchitectureBasedGenomeSimilarityCalculator.MIN_SIMILARITY_SCORE) {
            throw new IllegalArgumentException("attempt to set the maximum E-value to a negative value");
        }
        this._fs_e_value_maximum = d;
    }

    public void setIEValueMaximum(double d) {
        if (d < DomainArchitectureBasedGenomeSimilarityCalculator.MIN_SIMILARITY_SCORE) {
            throw new IllegalArgumentException("attempt to set the maximum E-value to a negative value");
        }
        this._i_e_value_maximum = d;
    }

    public void setIgnoreDufs(boolean z) {
        this._ignore_dufs = z;
    }

    public void setIgnoreEngulfedDomains(boolean z) {
        this._ignore_engulfed_domains = z;
    }

    public void setIgnoreVirusLikeIds(boolean z) {
        this._ignore_virus_like_ids = z;
    }

    public void setIndividualScoreCutoffs(Map<String, Double> map) {
        this._individual_score_cutoffs = map;
    }

    public void setMaxAllowedOverlap(int i) {
        if (i < 0) {
            throw new IllegalArgumentException("Attempt to set max allowed overlap to less than zero.");
        }
        this._max_allowed_overlap = i;
    }

    private void setProteinsEncountered(int i) {
        this._proteins_encountered = i;
    }

    private void setProteinsIgnoredDueToFilter(int i) {
        this._proteins_ignored_due_to_filter = i;
    }

    private void setProteinsStored(int i) {
        this._proteins_stored = i;
    }

    public void setReturnType(ReturnType returnType) {
        this._return_type = returnType;
    }

    private void setTime(long j) {
        this._time = j;
    }
}
