package edu.stanford.nlp.parser.lexparser;

import edu.stanford.nlp.ling.Label;
import edu.stanford.nlp.ling.Tag;
import edu.stanford.nlp.ling.TaggedWord;
import edu.stanford.nlp.ling.WordTag;
import edu.stanford.nlp.stats.ClassicCounter;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.Index;
import java.util.Map;
import java.util.Set;
import org.apache.tika.metadata.OfficeOpenXMLExtended;
import py4j.Protocol;

/* loaded from: input_file:edu/stanford/nlp/parser/lexparser/ChineseUnknownWordModel.class */
public class ChineseUnknownWordModel extends BaseUnknownWordModel {
    private static final String encoding = "GB18030";
    private final boolean useUnicodeType;
    private static final String numberMatch = ".*[0-9０-９一二三四五六七八九十百千万亿零〇○◯].*";
    private static final String dateMatch = ".*[0-9０-９一二三四五六七八九十百千万亿零〇○◯].*[年月日号]";
    private static final String ordinalMatch = "第.*";
    private static final String properNameMatch = ".*[··•․‧∙⋅・].*";
    private final Set<String> seenFirst;
    private static final long serialVersionUID = 221;

    public ChineseUnknownWordModel(Options options, Lexicon lexicon, Index<String> index, Index<String> index2, ClassicCounter<IntTaggedWord> classicCounter, Map<Label, ClassicCounter<String>> map, Map<String, Float> map2, boolean z, Set<String> set) {
        super(options, lexicon, index, index2, classicCounter, map, map2, null);
        this.useFirst = !z;
        this.useGT = z;
        this.useUnicodeType = options.lexOptions.useUnicodeType;
        this.seenFirst = set;
    }

    public ChineseUnknownWordModel(Options options, Lexicon lexicon, Index<String> index, Index<String> index2) {
        this(options, lexicon, index, index2, new ClassicCounter(), Generics.newHashMap(), Generics.newHashMap(), false, Generics.newHashSet());
    }

    @Override // edu.stanford.nlp.parser.lexparser.BaseUnknownWordModel
    public float score(IntTaggedWord intTaggedWord, String str) {
        float scoreGT;
        int type;
        String tagString = intTaggedWord.tagString(this.tagIndex);
        Tag tag = new Tag(tagString);
        if (str.matches(dateMatch)) {
            scoreGT = tagString.equals("NT") ? 0.0f : Float.NEGATIVE_INFINITY;
        } else if (str.matches(numberMatch)) {
            scoreGT = (!tagString.equals("CD") || str.matches(ordinalMatch)) ? (tagString.equals("OD") && str.matches(ordinalMatch)) ? 0.0f : Float.NEGATIVE_INFINITY : 0.0f;
        } else if (str.matches(properNameMatch)) {
            scoreGT = tagString.equals("NR") ? 0.0f : Float.NEGATIVE_INFINITY;
        } else if (this.useFirst) {
            String substring = str.substring(0, 1);
            if (this.useUnicodeType && (type = Character.getType(str.charAt(0))) != 5) {
                substring = Integer.toString(type);
            }
            if (!this.seenFirst.contains(substring)) {
                if (this.useGT) {
                    scoreGT = scoreGT(tagString);
                } else {
                    substring = "UNK";
                }
            }
            ClassicCounter<String> classicCounter = this.tagHash.get(tag);
            scoreGT = classicCounter == null ? Float.NEGATIVE_INFINITY : classicCounter.containsKey(substring) ? (float) classicCounter.getCount(substring) : (float) classicCounter.getCount("UNK");
        } else {
            scoreGT = this.useGT ? scoreGT(tagString) : Float.NEGATIVE_INFINITY;
        }
        return scoreGT;
    }

    public static void main(String[] strArr) {
        System.out.println("Testing unknown matching");
        if ("刘·革命".matches(properNameMatch)) {
            System.out.println("hooray names!");
        } else {
            System.out.println("Uh-oh names!");
        }
        if ("３０００".matches(numberMatch)) {
            System.out.println("hooray numbers!");
        } else {
            System.out.println("Uh-oh numbers!");
        }
        if ("百分之四十三点二".matches(numberMatch)) {
            System.out.println("hooray numbers!");
        } else {
            System.out.println("Uh-oh numbers!");
        }
        if ("百分之三十八点六".matches(numberMatch)) {
            System.out.println("hooray numbers!");
        } else {
            System.out.println("Uh-oh numbers!");
        }
        if ("三月".matches(dateMatch)) {
            System.out.println("hooray dates!");
        } else {
            System.out.println("Uh-oh dates!");
        }
        System.out.println("Testing tagged word");
        ClassicCounter classicCounter = new ClassicCounter();
        TaggedWord taggedWord = new TaggedWord(OfficeOpenXMLExtended.WORD_PROCESSING_PREFIX, Protocol.ENTRY_POINT_OBJECT_ID);
        classicCounter.incrementCount(taggedWord);
        TaggedWord taggedWord2 = new TaggedWord(OfficeOpenXMLExtended.WORD_PROCESSING_PREFIX, "t2");
        System.out.println(classicCounter.containsKey(taggedWord2));
        System.out.println(taggedWord.equals(taggedWord2));
        WordTag wordTag = toWordTag(taggedWord);
        WordTag wordTag2 = toWordTag(taggedWord2);
        WordTag wordTag3 = new WordTag(OfficeOpenXMLExtended.WORD_PROCESSING_PREFIX, "t2");
        System.out.println(wordTag.equals(wordTag2));
        System.out.println(wordTag2.equals(wordTag3));
    }

    private static WordTag toWordTag(TaggedWord taggedWord) {
        return new WordTag(taggedWord.word(), taggedWord.tag());
    }

    @Override // edu.stanford.nlp.parser.lexparser.BaseUnknownWordModel, edu.stanford.nlp.parser.lexparser.UnknownWordModel
    public String getSignature(String str, int i) {
        throw new UnsupportedOperationException();
    }
}
