package org.apache.ctakes.dictionary.cased.util.tokenize;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashSet;
import java.util.Map;
import java.util.function.Function;
import java.util.stream.Collectors;
import org.apache.ctakes.dictionary.cased.lookup.CandidateTerm;
import org.apache.log4j.Logger;

/* loaded from: input_file:org/apache/ctakes/dictionary/cased/util/tokenize/TokenizedTermMapper.class */
public final class TokenizedTermMapper {
    private static final Logger LOGGER = Logger.getLogger("TokenizedTermMapper");
    private static final Collection<String> BAD_POS_TERMS = new HashSet(Arrays.asList("zero", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten", "and", "or", "but", "for", "nor", "so", "yet", "both", "this", "that", "these", "those", "the", "all", "an", "another", "any", "each", "either", "many", "much", "neither", "no", "some", "such", "that", "the", "them", "these", "this", "those", "there", "among", "upon", "in", "into", "below", "atop", "until", "over", "under", "towards", "to", "whether", "despite", "if", "can", "should", "will", "may", "might", "must", "could", "would", "need", "ought", "shall", "cannot", "shouldn", "some", "any", "all", "both", "half", "none", "twice", "at", "before", "after", "behind", "beneath", "beside", "between", "into", "through", "across", "of", "concerning", "like", "except", "with", "without", "toward", "to", "past", "against", "during", "until", "throughout", "below", "besides", "beyond", "from", "inside", "near", "outside", "since", "upon", "my", "our", "i", "you", "he", "she", "it", "him", "himself", "we", "mine", "yours", "his", "hers", "its", "our", "ours", "theirs", "about", "off", "up", "along", "away", "back", "by", "down", "forward", "in", "on", "out", "over", "around", "under", "to", "what", "whatever", "which", "whichever", "that", "who", "whom", "which", "that", "whoever", "whomever", "whose", "how", "where", "when", "however", "wherever", "whenever", "wherein", "why"));
    private static final Collection<String> BAD_UPPER_POS_TERMS = (Collection) BAD_POS_TERMS.stream().map((v0) -> {
        return v0.toUpperCase();
    }).collect(Collectors.toSet());

    private TokenizedTermMapper() {
    }

    public static void createTermMap(Collection<TokenizedTerm> collection, Map<String, Collection<CandidateTerm>> map, Map<String, Collection<CandidateTerm>> map2, Map<String, Collection<CandidateTerm>> map3) {
        Map<String, Long> createTokenCountMap = createTokenCountMap(collection);
        for (TokenizedTerm tokenizedTerm : collection) {
            String[] tokens = tokenizedTerm.getTokens();
            int rareWordIndex = getRareWordIndex(tokens, createTokenCountMap);
            if (rareWordIndex < 0) {
                LOGGER.warn("Bad Rare Word Index for " + String.join(" ", tokens));
            } else if (tokenizedTerm.isAllUpperCase()) {
                map.computeIfAbsent(tokens[rareWordIndex], str -> {
                    return new ArrayList();
                }).add(new CandidateTerm(tokenizedTerm, rareWordIndex));
            } else if (tokenizedTerm.isAllLowerCase()) {
                map3.computeIfAbsent(tokens[rareWordIndex], str2 -> {
                    return new ArrayList();
                }).add(new CandidateTerm(tokenizedTerm, rareWordIndex));
            } else {
                map2.computeIfAbsent(tokens[rareWordIndex], str3 -> {
                    return new ArrayList();
                }).add(new CandidateTerm(tokenizedTerm, rareWordIndex));
            }
        }
    }

    private static Map<String, Long> createTokenCountMap(Collection<TokenizedTerm> collection) {
        return (Map) collection.stream().map((v0) -> {
            return v0.getTokens();
        }).flatMap((v0) -> {
            return Arrays.stream(v0);
        }).filter(TokenizedTermMapper::isRarableToken).collect(Collectors.groupingBy(Function.identity(), Collectors.counting()));
    }

    private static int getRareWordIndex(String[] strArr, Map<String, Long> map) {
        Long l;
        if (strArr.length == 1) {
            return 0;
        }
        int i = 0;
        long j = 2147483647L;
        for (int i2 = 0; i2 < strArr.length; i2++) {
            if (isRarableToken(strArr[i2]) && (l = map.get(strArr[i2])) != null && l.longValue() < j) {
                i = i2;
                j = l.longValue();
            }
        }
        return i;
    }

    private static boolean isRarableToken(String str) {
        if (str.length() <= 1) {
            return false;
        }
        boolean z = false;
        int i = 0;
        while (true) {
            if (i >= str.length()) {
                break;
            }
            if (Character.isLetter(str.charAt(i))) {
                z = true;
                break;
            }
            i++;
        }
        return (!z || BAD_POS_TERMS.contains(str) || BAD_UPPER_POS_TERMS.contains(str)) ? false : true;
    }
}
