package org.apache.ctakes.dictionary.cased.ae;

import java.sql.SQLException;
import java.util.Arrays;
import java.util.Collection;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.function.Function;
import java.util.function.Predicate;
import java.util.stream.Collectors;
import org.apache.ctakes.core.pipeline.PipeBitInfo;
import org.apache.ctakes.core.util.Pair;
import org.apache.ctakes.core.util.StringUtil;
import org.apache.ctakes.core.util.annotation.SemanticGroup;
import org.apache.ctakes.core.util.annotation.SemanticTui;
import org.apache.ctakes.dictionary.cased.annotation.AlikeSubsumingAnnotationCreator;
import org.apache.ctakes.dictionary.cased.annotation.AnnotationCreator;
import org.apache.ctakes.dictionary.cased.annotation.NonSubsumingAnnotationCreator;
import org.apache.ctakes.dictionary.cased.annotation.SemanticSubsumingAnnotationCreator;
import org.apache.ctakes.dictionary.cased.dictionary.BsvDictionary;
import org.apache.ctakes.dictionary.cased.dictionary.BsvListDictionary;
import org.apache.ctakes.dictionary.cased.dictionary.CasedDictionary;
import org.apache.ctakes.dictionary.cased.dictionary.DictionaryStore;
import org.apache.ctakes.dictionary.cased.dictionary.JdbcDictionary;
import org.apache.ctakes.dictionary.cased.encoder.BsvEncoder;
import org.apache.ctakes.dictionary.cased.encoder.BsvListEncoder;
import org.apache.ctakes.dictionary.cased.encoder.EncoderStore;
import org.apache.ctakes.dictionary.cased.encoder.JdbcEncoder;
import org.apache.ctakes.dictionary.cased.encoder.TermEncoder;
import org.apache.ctakes.dictionary.cased.encoder.TermEncoding;
import org.apache.ctakes.dictionary.cased.lookup.DiscoveredTerm;
import org.apache.ctakes.dictionary.cased.lookup.LookupEngine;
import org.apache.ctakes.dictionary.cased.lookup.LookupToken;
import org.apache.ctakes.dictionary.lookup2.ae.JCasTermAnnotator;
import org.apache.ctakes.typesystem.type.syntax.BaseToken;
import org.apache.ctakes.typesystem.type.syntax.NewlineToken;
import org.apache.ctakes.typesystem.type.syntax.WordToken;
import org.apache.ctakes.typesystem.type.textspan.Sentence;
import org.apache.ctakes.utils.env.EnvironmentVariable;
import org.apache.log4j.Logger;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;

@PipeBitInfo(name = "CasedAnnotationFinder", description = "Finds all-uppercase or normal terms in text.", role = PipeBitInfo.Role.ANNOTATOR, dependencies = {PipeBitInfo.TypeProduct.BASE_TOKEN, PipeBitInfo.TypeProduct.SENTENCE}, products = {PipeBitInfo.TypeProduct.IDENTIFIED_ANNOTATION})
/* loaded from: input_file:org/apache/ctakes/dictionary/cased/ae/CasedAnnotationFinder.class */
public final class CasedAnnotationFinder extends JCasAnnotator_ImplBase {
    public static final String DICTIONARY_TYPE = "_type";
    public static final String ENCODER_TYPE = "_type";

    @ConfigurationParameter(name = "dictionaries", mandatory = true, description = "Dictionaries to use for lookup.")
    private String[] _dictionaries;
    private static final String snomed_rxnorm_2020aa_type = "Jdbc";
    private static final String CONS_SKIP_PRP_KEY = "consecutiveSkips";
    private static final String TOTAL_SKIP_PRP_KEY = "totalSkips";

    @ConfigurationParameter(name = "encoders", mandatory = true, description = "Term Encoders with schemas and schema codes.")
    private String[] _encoders;
    private boolean _allowSkipping;
    private AnnotationCreator _annotationCreator;
    private static final Logger LOGGER = Logger.getLogger("CasedAnnotationFinder");
    private static final String[] VERB_POS = {"VB", "VBD", "VBG", "VBN", "VBP", "VBZ", "VV", "VVD", "VVG", "VVN", "VVP", "VVZ"};
    private static final String[] NOUN_POS = {"NN", "NNS", "NP", "NPS", "NNP", "NNPS"};
    private static final String[] ADJECTIVE_POS = {"JJ", "JJR", "JJS"};
    private static final String[] ADVERB_POS = {"RB", "RBR", "RBS"};
    private static final Predicate<BaseToken> isWantedToken = baseToken -> {
        return !(baseToken instanceof NewlineToken);
    };

    @ConfigurationParameter(name = "lookupVerbs", mandatory = false, description = "Use Verb parts of speech for lookup.")
    private String _lookupVerbs = "yes";

    @ConfigurationParameter(name = "lookupNouns", mandatory = false, description = "Use Noun parts of speech for lookup.")
    private String _lookupNouns = "yes";

    @ConfigurationParameter(name = "lookupAdjectives", mandatory = false, description = "Use Adjective parts of speech for lookup.")
    private String _lookupAdjectives = "yes";

    @ConfigurationParameter(name = "lookupAdverbs", mandatory = false, description = "Use Adverb parts of speech for lookup.")
    private String _lookupAdverbs = "yes";

    @ConfigurationParameter(name = "otherLookups", mandatory = false, description = "List of other parts of speech for lookup.")
    private String[] _otherLookups = new String[0];

    @ConfigurationParameter(name = JCasTermAnnotator.PARAM_MIN_SPAN_KEY, mandatory = false, description = "Minimum number of characters for a term.")
    protected int _minLookupSpan = 3;

    @ConfigurationParameter(name = "allowWordSkips", mandatory = false, description = "Terms may include words that do not match.  So-called loose matching.")
    protected String _allowSkips = "no";

    @ConfigurationParameter(name = CONS_SKIP_PRP_KEY, mandatory = false, description = "Number of consecutive non-comma tokens that can be skipped.")
    private int _consecutiveSkipMax = 2;

    @ConfigurationParameter(name = TOTAL_SKIP_PRP_KEY, mandatory = false, description = "Number of total tokens that can be skipped.")
    private int _totalSkipMax = 4;

    @ConfigurationParameter(name = "subsume", mandatory = false, description = "Subsume contained terms of the same semantic group.", defaultValue = {"yes"})
    private String _subsume = "yes";

    @ConfigurationParameter(name = "subsumeSemantics", mandatory = false, description = "Subsume contained terms of the same and certain other semantic groups.", defaultValue = {"yes"})
    private String _subsumeSemantics = "yes";

    @ConfigurationParameter(name = "reassignSemantics", mandatory = false, description = "Reassign Semantic Types (TUIs) to non-default Semantic Groups.")
    private String[] _reassignSemanticList = new String[0];
    private final Collection<String> _lookupPos = new HashSet();
    private final Map<SemanticTui, SemanticGroup> _semanticReassignment = new HashMap();
    private final Function<BaseToken, LookupToken> toLookupToken = baseToken -> {
        return new LookupToken(baseToken, isValidLookup(baseToken));
    };

    public void initialize(UimaContext uimaContext) throws ResourceInitializationException {
        LOGGER.info("Initializing Dictionary Lookup ...");
        super.initialize(uimaContext);
        if (isParameterTrue(this._subsumeSemantics)) {
            this._annotationCreator = new SemanticSubsumingAnnotationCreator();
        } else if (isParameterTrue(this._subsume)) {
            this._annotationCreator = new AlikeSubsumingAnnotationCreator();
        } else {
            this._annotationCreator = new NonSubsumingAnnotationCreator();
        }
        setupDictionaries(uimaContext);
        setupEncoders(uimaContext);
        setupPos();
        setupReassignSemantics();
    }

    private static boolean isParameterTrue(String str) {
        return str.equalsIgnoreCase("yes") || str.equalsIgnoreCase("true");
    }

    private void setupDictionaries(UimaContext uimaContext) throws ResourceInitializationException {
        if (this._dictionaries.length == 0) {
            LOGGER.error("Dictionary List is empty.  Consider using the default cTAKES Dictionary.  If you are using a piper file, add the line \"load sno_rx_16ab_settings\"");
            throw new ResourceInitializationException();
        }
        for (String str : this._dictionaries) {
            CasedDictionary createDictionary = createDictionary(str, uimaContext);
            if (createDictionary == null) {
                LOGGER.error("Could not create Dictionary for " + str);
                throw new ResourceInitializationException();
            }
            DictionaryStore.getInstance().addDictionary(createDictionary);
        }
    }

    private CasedDictionary createDictionary(String str, UimaContext uimaContext) {
        String env = EnvironmentVariable.getEnv(str + "_type", uimaContext);
        if (env == null || env.equals(EnvironmentVariable.NOT_PRESENT)) {
            LOGGER.error("No Dictionary Type specified for " + str + ".  Please set parameter " + str + "_type");
            return null;
        }
        try {
            String upperCase = env.toUpperCase();
            boolean z = -1;
            switch (upperCase.hashCode()) {
                case -2033062280:
                    if (upperCase.equals("BSV_LIST")) {
                        z = 2;
                        break;
                    }
                    break;
                case 66085:
                    if (upperCase.equals("BSV")) {
                        z = true;
                        break;
                    }
                    break;
                case 2271995:
                    if (upperCase.equals("JDBC")) {
                        z = false;
                        break;
                    }
                    break;
            }
            switch (z) {
                case false:
                    return new JdbcDictionary(str, uimaContext);
                case true:
                    return new BsvDictionary(str, uimaContext);
                case true:
                    return new BsvListDictionary(str, uimaContext);
                default:
                    LOGGER.error("Unknown Dictionary type " + env + " specified for " + str);
                    return null;
            }
        } catch (SQLException e) {
            LOGGER.error(e.getMessage());
            return null;
        }
    }

    private void setupEncoders(UimaContext uimaContext) throws ResourceInitializationException {
        if (this._encoders.length == 0) {
            LOGGER.error("Term Encoder List is empty.  Consider using the default cTAKES Term Encoder.  If you are using a piper file, add the line \"load sno_rx_2020aa_settings\"");
            throw new ResourceInitializationException();
        }
        for (String str : this._encoders) {
            TermEncoder createEncoder = createEncoder(str, uimaContext);
            if (createEncoder == null) {
                LOGGER.error("Could not create Term Encoder for " + str);
                throw new ResourceInitializationException();
            }
            EncoderStore.getInstance().addEncoder(createEncoder);
        }
    }

    private TermEncoder createEncoder(String str, UimaContext uimaContext) {
        String env = EnvironmentVariable.getEnv(str + "_type", uimaContext);
        if (env == null || env.equals(EnvironmentVariable.NOT_PRESENT)) {
            LOGGER.error("No Term Encoder Type specified for " + str + ".  Please set parameter " + str + "_type");
            return null;
        }
        try {
            String upperCase = env.toUpperCase();
            boolean z = -1;
            switch (upperCase.hashCode()) {
                case -2033062280:
                    if (upperCase.equals("BSV_LIST")) {
                        z = 2;
                        break;
                    }
                    break;
                case 66085:
                    if (upperCase.equals("BSV")) {
                        z = true;
                        break;
                    }
                    break;
                case 2271995:
                    if (upperCase.equals("JDBC")) {
                        z = false;
                        break;
                    }
                    break;
            }
            switch (z) {
                case false:
                    return new JdbcEncoder(str, uimaContext);
                case true:
                    return new BsvEncoder(str, uimaContext);
                case true:
                    return new BsvListEncoder(str, uimaContext);
                default:
                    LOGGER.error("Unknown Term Encoder type " + env + " specified for " + str);
                    return null;
            }
        } catch (SQLException e) {
            LOGGER.error(e.getMessage());
            return null;
        }
    }

    private void setupPos() throws ResourceInitializationException {
        if (isTrue(this._lookupVerbs)) {
            this._lookupPos.addAll(Arrays.asList(VERB_POS));
        }
        if (isTrue(this._lookupNouns)) {
            this._lookupPos.addAll(Arrays.asList(NOUN_POS));
        }
        if (isTrue(this._lookupAdjectives)) {
            this._lookupPos.addAll(Arrays.asList(ADJECTIVE_POS));
        }
        if (isTrue(this._lookupAdverbs)) {
            this._lookupPos.addAll(Arrays.asList(ADVERB_POS));
        }
        if (this._otherLookups.length != 0) {
            this._lookupPos.addAll(Arrays.asList(this._otherLookups));
        }
        if (this._lookupPos.isEmpty()) {
            LOGGER.error("No Parts of Speech indicated for Lookup.  At least one Part of Speech must be used.");
            throw new ResourceInitializationException();
        }
        LOGGER.info("Using Parts of Speech " + String.join(", ", this._lookupPos));
    }

    private void setupReassignSemantics() {
        if (this._semanticReassignment == null || this._reassignSemanticList.length == 0) {
            return;
        }
        for (String str : this._reassignSemanticList) {
            String[] fastSplit = StringUtil.fastSplit(str, ':');
            if (fastSplit.length != 2) {
                LOGGER.warn("Improper Key : Value pair for Semantic Reassignment " + str);
            } else {
                this._semanticReassignment.put(SemanticTui.getTui(fastSplit[0].trim()), SemanticGroup.getGroup(fastSplit[1].trim()));
            }
        }
        LOGGER.info("Reassigned Semantics: " + ((String) this._semanticReassignment.entrySet().stream().map(entry -> {
            return ((SemanticTui) entry.getKey()).getSemanticType() + " : " + ((SemanticGroup) entry.getValue()).getLongName();
        }).collect(Collectors.joining(", "))));
    }

    public void process(JCas jCas) throws AnalysisEngineProcessException {
        LOGGER.info("Finding Named Entities ...");
        Map indexCovered = JCasUtil.indexCovered(jCas, Sentence.class, BaseToken.class);
        HashMap hashMap = new HashMap();
        try {
            Iterator it = indexCovered.values().iterator();
            while (it.hasNext()) {
                hashMap.putAll(getDiscoveredTerms((Collection) it.next()));
            }
        } catch (ArrayIndexOutOfBoundsException e) {
            LOGGER.warn(e.getMessage());
        }
        createAnnotations(jCas, hashMap, (Map) hashMap.values().stream().flatMap((v0) -> {
            return v0.parallelStream();
        }).collect(Collectors.toMap(Function.identity(), this::getEncodings)));
    }

    private void createAnnotations(JCas jCas, Map<Pair<Integer>, Collection<DiscoveredTerm>> map, Map<DiscoveredTerm, Collection<TermEncoding>> map2) {
        this._annotationCreator.createAnnotations(jCas, map, map2, this._semanticReassignment);
    }

    private Collection<TermEncoding> getEncodings(DiscoveredTerm discoveredTerm) {
        return (Collection) EncoderStore.getInstance().getEncoders().stream().map(termEncoder -> {
            return termEncoder.getEncodings(discoveredTerm);
        }).filter((v0) -> {
            return Objects.nonNull(v0);
        }).flatMap((v0) -> {
            return v0.stream();
        }).collect(Collectors.toSet());
    }

    public Map<Pair<Integer>, Collection<DiscoveredTerm>> getDiscoveredTerms(Collection<BaseToken> collection) {
        return (Map) findTerms(collection).values().stream().map((v0) -> {
            return v0.entrySet();
        }).flatMap((v0) -> {
            return v0.stream();
        }).collect(Collectors.toMap((v0) -> {
            return v0.getKey();
        }, (v0) -> {
            return v0.getValue();
        }));
    }

    private Map<CasedDictionary, Map<Pair<Integer>, Collection<DiscoveredTerm>>> findTerms(Collection<BaseToken> collection) {
        Collection<CasedDictionary> dictionaries = DictionaryStore.getInstance().getDictionaries();
        HashMap hashMap = new HashMap(dictionaries.size());
        List list = (List) collection.stream().filter(isWantedToken).sorted(Comparator.comparingInt((v0) -> {
            return v0.getBegin();
        })).map(this.toLookupToken).collect(Collectors.toList());
        LookupEngine lookupEngine = getLookupEngine();
        dictionaries.forEach(casedDictionary -> {
        });
        return hashMap;
    }

    private boolean isValidLookup(BaseToken baseToken) {
        if (!(baseToken instanceof WordToken) || baseToken.getEnd() - baseToken.getBegin() < this._minLookupSpan) {
            return false;
        }
        String partOfSpeech = baseToken.getPartOfSpeech();
        return partOfSpeech == null || this._lookupPos.contains(partOfSpeech);
    }

    private LookupEngine getLookupEngine() {
        return new LookupEngine();
    }

    protected static int parseInt(Object obj, String str, int i) {
        if (obj instanceof Integer) {
            return ((Integer) obj).intValue();
        }
        if (obj instanceof String) {
            try {
                return Integer.parseInt((String) obj);
            } catch (NumberFormatException e) {
                LOGGER.warn("Could not parse " + str + " " + obj + " as an integer");
            }
        } else {
            LOGGER.warn("Could not parse " + str + " " + obj + " as an integer");
        }
        return i;
    }

    private static boolean isTrue(String str) {
        return str.equalsIgnoreCase("yes") || str.equalsIgnoreCase("true");
    }
}
