package org.forester.util;

import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.forester.io.parsers.phyloxml.PhyloXmlUtil;
import org.forester.phylogeny.PhylogenyNode;
import org.forester.phylogeny.data.Accession;
import org.forester.phylogeny.data.Sequence;
import psidev.psi.mi.jami.model.Xref;

/* loaded from: input_file:WEB-INF/lib/forester-1.038.jar:org/forester/util/SequenceAccessionTools.class */
public final class SequenceAccessionTools {
    public static final String UNIPROT_KB_BASE_PATTERN_STR = "((?:[OPQ][0-9][A-Z0-9]{3}[0-9])|(?:[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}))";
    public static final Pattern GENBANK_NUC_PATTERN_1 = Pattern.compile("(?:\\A|.*[^a-zA-Z0-9])([A-Z]\\d{5}(?:\\.\\d+)?)(?:[^a-zA-Z0-9]|\\Z)");
    public static final Pattern GENBANK_NUC_PATTERN_2 = Pattern.compile("(?:\\A|.*[^a-zA-Z0-9])([A-Z]{2}\\d{6}(?:\\.\\d+)?)(?:[^a-zA-Z0-9]|\\Z)");
    public static final Pattern GENBANK_PROT_PATTERN = Pattern.compile("(?:\\A|.*[^a-zA-Z0-9])([A-Z]{3}\\d{5}(?:\\.\\d+)?)(?:[^a-zA-Z0-9]|\\Z)");
    public static final Pattern GI_PATTERN = Pattern.compile("(?:\\b|_)(?:GI|gi)[|_=:](\\d+)(?:\\b|_)");
    public static final Pattern UNIPROT_KB_PATTERN_0 = Pattern.compile("(?:\\b|_)((?:[OPQ][0-9][A-Z0-9]{3}[0-9])|(?:[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}))(?:\\b|_)");
    public static final Pattern UNIPROT_KB_PATTERN_1 = Pattern.compile("(?:\\b|_)(?:sp|tr)[\\.|\\-_=/\\\\]((?:[OPQ][0-9][A-Z0-9]{3}[0-9])|(?:[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}))(?:\\b|_)");
    public static final Pattern UNIPROT_KB_PATTERN_2 = Pattern.compile("(?:\\b|_)(?:[A-Z0-9]{2,5}|((?:[OPQ][0-9][A-Z0-9]{3}[0-9])|(?:[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2})))_(([A-Z9][A-Z]{2}[A-Z0-9]{2})|RAT|PIG|PEA)(?:\\b|_)");
    public static final Pattern ENSEMBL_PATTERN = Pattern.compile("(?:\\b|_)(ENS[A-Z]*[0-9]+)(?:\\b|_)");
    private static final Pattern REFSEQ_PATTERN = Pattern.compile("(?:\\A|.*[^a-zA-Z0-9])([A-Z]{2}_\\d{6,})(?:[^a-zA-Z0-9]|\\Z)");

    private SequenceAccessionTools() {
    }

    public static final boolean isProteinDbQuery(String str) {
        String parseRefSeqAccessorFromString = parseRefSeqAccessorFromString(str);
        if ((ForesterUtil.isEmpty(parseRefSeqAccessorFromString) || parseRefSeqAccessorFromString.charAt(1) != 'P') && ForesterUtil.isEmpty(parseUniProtAccessorFromString(str))) {
            return GENBANK_PROT_PATTERN.matcher(str).lookingAt();
        }
        return true;
    }

    public static final Accession obtainAccessorFromDataFields(PhylogenyNode phylogenyNode) {
        String obtainUniProtAccessorFromDataFields = obtainUniProtAccessorFromDataFields(phylogenyNode);
        if (!ForesterUtil.isEmpty(obtainUniProtAccessorFromDataFields)) {
            return new Accession(obtainUniProtAccessorFromDataFields, Accession.Source.UNIPROT);
        }
        String obtainGenbankAccessorFromDataFields = obtainGenbankAccessorFromDataFields(phylogenyNode);
        if (!ForesterUtil.isEmpty(obtainGenbankAccessorFromDataFields)) {
            return new Accession(obtainGenbankAccessorFromDataFields, Accession.Source.NCBI);
        }
        String obtainRefSeqAccessorFromDataFields = obtainRefSeqAccessorFromDataFields(phylogenyNode);
        if (!ForesterUtil.isEmpty(obtainRefSeqAccessorFromDataFields)) {
            return new Accession(obtainRefSeqAccessorFromDataFields, Accession.Source.REFSEQ);
        }
        String obtainGiNumberFromDataFields = obtainGiNumberFromDataFields(phylogenyNode);
        if (ForesterUtil.isEmpty(obtainGiNumberFromDataFields)) {
            return null;
        }
        return new Accession(obtainGiNumberFromDataFields, Accession.Source.GI);
    }

    public static final Accession obtainFromSeqAccession(PhylogenyNode phylogenyNode) {
        if (!phylogenyNode.getNodeData().isHasSequence() || phylogenyNode.getNodeData().getSequence().getAccession() == null || ForesterUtil.isEmpty(phylogenyNode.getNodeData().getSequence().getAccession().getSource()) || ForesterUtil.isEmpty(phylogenyNode.getNodeData().getSequence().getAccession().getValue())) {
            return null;
        }
        String lowerCase = phylogenyNode.getNodeData().getSequence().getAccession().getSource().toLowerCase();
        String value = phylogenyNode.getNodeData().getSequence().getAccession().getValue();
        if (lowerCase.startsWith(PhyloXmlUtil.UNIPROT_TAX_PROVIDER) || lowerCase.equals("swissprot") || lowerCase.equals(Xref.UNIPROTKB_TREMBL) || lowerCase.equals("sp")) {
            return new Accession(value, Accession.Source.UNIPROT);
        }
        if (lowerCase.equals("embl") || lowerCase.equals("ebi")) {
            return new Accession(value, Accession.Source.EMBL);
        }
        if (lowerCase.equals("ncbi") || lowerCase.equals("genbank")) {
            return new Accession(value, Accession.Source.NCBI);
        }
        if (lowerCase.equals(Xref.REFSEQ)) {
            return new Accession(value, Accession.Source.REFSEQ);
        }
        if (lowerCase.equals("gi")) {
            return new Accession(value, Accession.Source.GI);
        }
        return null;
    }

    public static final String obtainGenbankAccessorFromDataFields(PhylogenyNode phylogenyNode) {
        String str = null;
        if (phylogenyNode.getNodeData().isHasSequence()) {
            Sequence sequence = phylogenyNode.getNodeData().getSequence();
            if (!ForesterUtil.isEmpty(sequence.getSymbol())) {
                str = parseGenbankAccessorFromString(sequence.getSymbol());
            }
            if (!ForesterUtil.isEmpty(sequence.getGeneName())) {
                str = parseGenbankAccessorFromString(sequence.getGeneName());
            }
            if (ForesterUtil.isEmpty(str) && !ForesterUtil.isEmpty(sequence.getName())) {
                str = parseGenbankAccessorFromString(sequence.getName());
            }
            if (ForesterUtil.isEmpty(str) && phylogenyNode.getNodeData().getSequence().getAccession() != null && !ForesterUtil.isEmpty(sequence.getAccession().getValue())) {
                str = parseGenbankAccessorFromString(sequence.getAccession().getValue());
            }
        }
        if (ForesterUtil.isEmpty(str) && !ForesterUtil.isEmpty(phylogenyNode.getName())) {
            str = parseGenbankAccessorFromString(phylogenyNode.getName());
        }
        return str;
    }

    public static final String obtainGiNumberFromDataFields(PhylogenyNode phylogenyNode) {
        String str = null;
        if (phylogenyNode.getNodeData().isHasSequence()) {
            Sequence sequence = phylogenyNode.getNodeData().getSequence();
            if (ForesterUtil.isEmpty((String) null) && !ForesterUtil.isEmpty(sequence.getName())) {
                str = parseGInumberFromString(sequence.getName());
            }
            if (ForesterUtil.isEmpty(str) && !ForesterUtil.isEmpty(sequence.getGeneName())) {
                str = parseGInumberFromString(sequence.getGeneName());
            }
            if (ForesterUtil.isEmpty(str) && phylogenyNode.getNodeData().getSequence().getAccession() != null && !ForesterUtil.isEmpty(sequence.getAccession().getValue())) {
                str = parseGInumberFromString(sequence.getAccession().getValue());
            }
        }
        if (ForesterUtil.isEmpty(str) && !ForesterUtil.isEmpty(phylogenyNode.getName())) {
            str = parseGInumberFromString(phylogenyNode.getName());
        }
        return str;
    }

    public static final String obtainRefSeqAccessorFromDataFields(PhylogenyNode phylogenyNode) {
        String str = null;
        if (phylogenyNode.getNodeData().isHasSequence()) {
            Sequence sequence = phylogenyNode.getNodeData().getSequence();
            if (!ForesterUtil.isEmpty(sequence.getSymbol())) {
                str = parseRefSeqAccessorFromString(sequence.getSymbol());
            }
            if (!ForesterUtil.isEmpty(sequence.getGeneName())) {
                str = parseRefSeqAccessorFromString(sequence.getGeneName());
            }
            if (ForesterUtil.isEmpty(str) && !ForesterUtil.isEmpty(sequence.getName())) {
                str = parseRefSeqAccessorFromString(sequence.getName());
            }
            if (ForesterUtil.isEmpty(str) && phylogenyNode.getNodeData().getSequence().getAccession() != null && !ForesterUtil.isEmpty(sequence.getAccession().getValue())) {
                str = parseRefSeqAccessorFromString(sequence.getAccession().getValue());
            }
        }
        if (ForesterUtil.isEmpty(str) && !ForesterUtil.isEmpty(phylogenyNode.getName())) {
            str = parseRefSeqAccessorFromString(phylogenyNode.getName());
        }
        return str;
    }

    public static final String obtainUniProtAccessorFromDataFields(PhylogenyNode phylogenyNode) {
        String str = null;
        if (phylogenyNode.getNodeData().isHasSequence()) {
            Sequence sequence = phylogenyNode.getNodeData().getSequence();
            if (!ForesterUtil.isEmpty(sequence.getSymbol())) {
                str = parseUniProtAccessorFromString(sequence.getSymbol());
            }
            if (ForesterUtil.isEmpty(str) && !ForesterUtil.isEmpty(sequence.getName())) {
                str = parseUniProtAccessorFromString(sequence.getName());
            }
            if (ForesterUtil.isEmpty(str) && !ForesterUtil.isEmpty(sequence.getGeneName())) {
                str = parseUniProtAccessorFromString(sequence.getGeneName());
            }
            if (ForesterUtil.isEmpty(str) && phylogenyNode.getNodeData().getSequence().getAccession() != null && !ForesterUtil.isEmpty(sequence.getAccession().getValue())) {
                str = parseUniProtAccessorFromString(sequence.getAccession().getValue());
            }
        }
        if (ForesterUtil.isEmpty(str) && !ForesterUtil.isEmpty(phylogenyNode.getName())) {
            str = parseUniProtAccessorFromString(phylogenyNode.getName());
        }
        return str;
    }

    public static final Accession parseAccessorFromString(String str) {
        if (ForesterUtil.isEmpty(str)) {
            return null;
        }
        String parseUniProtAccessorFromString = parseUniProtAccessorFromString(str);
        if (!ForesterUtil.isEmpty(parseUniProtAccessorFromString)) {
            return new Accession(parseUniProtAccessorFromString, Accession.Source.UNIPROT);
        }
        String parseGenbankAccessorFromString = parseGenbankAccessorFromString(str);
        if (!ForesterUtil.isEmpty(parseGenbankAccessorFromString)) {
            return new Accession(parseGenbankAccessorFromString, Accession.Source.NCBI);
        }
        String parseRefSeqAccessorFromString = parseRefSeqAccessorFromString(str);
        if (!ForesterUtil.isEmpty(parseRefSeqAccessorFromString)) {
            return new Accession(parseRefSeqAccessorFromString, Accession.Source.REFSEQ);
        }
        String parseGInumberFromString = parseGInumberFromString(str);
        if (!ForesterUtil.isEmpty(parseGInumberFromString)) {
            return new Accession(parseGInumberFromString, Accession.Source.GI);
        }
        String parseEnsemlAccessorFromString = parseEnsemlAccessorFromString(str);
        if (ForesterUtil.isEmpty(parseEnsemlAccessorFromString)) {
            return null;
        }
        return new Accession(parseEnsemlAccessorFromString, Accession.Source.ENSEMBL);
    }

    public static final String parseGenbankAccessorFromString(String str) {
        Matcher matcher = GENBANK_NUC_PATTERN_1.matcher(str);
        if (matcher.lookingAt()) {
            return matcher.group(1);
        }
        Matcher matcher2 = GENBANK_NUC_PATTERN_2.matcher(str);
        if (matcher2.lookingAt()) {
            return matcher2.group(1);
        }
        Matcher matcher3 = GENBANK_PROT_PATTERN.matcher(str);
        if (matcher3.lookingAt()) {
            return matcher3.group(1);
        }
        return null;
    }

    public static final String parseGenbankProteinAccessorFromString(String str) {
        Matcher matcher = GENBANK_PROT_PATTERN.matcher(str);
        if (matcher.lookingAt()) {
            return matcher.group(1);
        }
        return null;
    }

    public static final String parseGInumberFromString(String str) {
        Matcher matcher = GI_PATTERN.matcher(str);
        if (matcher.find()) {
            return matcher.group(1);
        }
        return null;
    }

    public static final String parseEnsemlAccessorFromString(String str) {
        Matcher matcher = ENSEMBL_PATTERN.matcher(str);
        if (matcher.find()) {
            return matcher.group(1);
        }
        return null;
    }

    public static final String parseRefSeqAccessorFromString(String str) {
        Matcher matcher = REFSEQ_PATTERN.matcher(str);
        if (matcher.lookingAt()) {
            return matcher.group(1);
        }
        return null;
    }

    public static final String parseUniProtAccessorFromString(String str) {
        Matcher matcher = UNIPROT_KB_PATTERN_1.matcher(str);
        if (matcher.find()) {
            return matcher.group(1);
        }
        Matcher matcher2 = UNIPROT_KB_PATTERN_2.matcher(str);
        if (matcher2.find()) {
            return matcher2.group();
        }
        Matcher matcher3 = UNIPROT_KB_PATTERN_0.matcher(str);
        if (matcher3.find()) {
            return matcher3.group(1);
        }
        return null;
    }
}
