package org.moara.yido.ner;

import com.seomse.commons.data.BeginEnd;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.moara.yido.ner.entity.NamedEntity;

/* loaded from: input_file:org/moara/yido/ner/PersonNamedEntityRecognizer.class */
class PersonNamedEntityRecognizer implements NamedEntityRecognizer {
    protected final String[] targetWords;
    protected final String[] exceptionWords;
    protected final String[] multipleSymbols;
    protected final String multipleSymbolRegx;
    protected final String entityType;
    private final int minEntityLength;
    private final int maxEntityLength;

    /* JADX INFO: Access modifiers changed from: package-private */
    public PersonNamedEntityRecognizer(String[] strArr, String[] strArr2, String[] strArr3, String str, BeginEnd beginEnd) {
        this.targetWords = strArr;
        this.exceptionWords = strArr2;
        this.multipleSymbols = strArr3;
        this.entityType = str;
        this.minEntityLength = beginEnd.getBegin();
        this.maxEntityLength = beginEnd.getEnd();
        StringBuilder sb = new StringBuilder();
        for (String str2 : strArr3) {
            sb.append("\\").append(str2);
        }
        this.multipleSymbolRegx = sb.toString();
    }

    @Override // org.moara.yido.ner.NamedEntityRecognizer
    public NamedEntity[] recognize(String str) {
        return (NamedEntity[]) new HashSet(recognizeEntities(textPreprocessing(str))).stream().sorted(Comparator.comparingInt((v0) -> {
            return v0.getBegin();
        })).toArray(i -> {
            return new NamedEntity[i];
        });
    }

    protected String textPreprocessing(String str) {
        String str2 = " " + str + " ";
        for (String str3 : this.targetWords) {
            String replace = str3.replace(" ", "");
            str2 = str2.replaceAll(replace + "[" + this.multipleSymbolRegx + "]", replace + " ");
        }
        return str2.replaceAll("[^가-힣" + this.multipleSymbolRegx + "]", " ").replaceAll("[" + this.multipleSymbolRegx + "]", "M");
    }

    protected Set<NamedEntity> recognizeEntities(String str) {
        HashSet hashSet = new HashSet();
        for (String str2 : this.targetWords) {
            hashSet.addAll(getEntities(str, str2));
        }
        return hashSet;
    }

    private List<NamedEntity> getEntities(String str, String str2) {
        ArrayList arrayList = new ArrayList();
        int i = 0;
        while (i < str.length()) {
            int indexOf = str.indexOf(str2, i);
            if (indexOf == -1) {
                break;
            }
            int lastIndexOf = str.substring(0, indexOf).lastIndexOf(" ") + 1;
            if (lastIndexOf == 0 || lastIndexOf == indexOf) {
                i = indexOf + 1;
            } else {
                for (String str3 : str.substring(lastIndexOf, indexOf).split("M")) {
                    int length = lastIndexOf + str3.length();
                    NamedEntity namedEntity = new NamedEntity(str3, this.entityType, lastIndexOf - 1, length - 1);
                    lastIndexOf = length + 1;
                    if (str3.length() >= this.minEntityLength && str3.length() <= this.maxEntityLength) {
                        String[] strArr = this.exceptionWords;
                        int length2 = strArr.length;
                        int i2 = 0;
                        while (true) {
                            if (i2 >= length2) {
                                arrayList.add(namedEntity);
                                break;
                            }
                            if (str3.contains(strArr[i2])) {
                                break;
                            }
                            i2++;
                        }
                    }
                }
                i = indexOf + 1;
            }
        }
        return arrayList;
    }
}
