package org.lionsoul.jcseg.tokenizer;

import java.io.IOException;
import java.io.Reader;
import org.eclipse.jetty.util.BlockingArrayQueue;
import org.eclipse.jetty.util.URIUtil;
import org.lionsoul.jcseg.tokenizer.core.ADictionary;
import org.lionsoul.jcseg.tokenizer.core.Entity;
import org.lionsoul.jcseg.tokenizer.core.IChunk;
import org.lionsoul.jcseg.tokenizer.core.IWord;
import org.lionsoul.jcseg.tokenizer.core.JcsegTaskConfig;
import org.lionsoul.jcseg.util.IStringBuffer;
import org.lionsoul.jcseg.util.NumericUtil;

/* loaded from: input_file:org/lionsoul/jcseg/tokenizer/NLPSeg.class */
public class NLPSeg extends ComplexSeg {
    public NLPSeg(Reader reader, JcsegTaskConfig jcsegTaskConfig, ADictionary aDictionary) throws IOException {
        super(reader, jcsegTaskConfig, aDictionary);
        jcsegTaskConfig.APPEND_CJK_PINYIN = false;
        jcsegTaskConfig.APPEND_CJK_SYN = false;
        jcsegTaskConfig.MAX_LATIN_LENGTH = BlockingArrayQueue.DEFAULT_CAPACITY;
    }

    public NLPSeg(JcsegTaskConfig jcsegTaskConfig, ADictionary aDictionary) throws IOException {
        this(null, jcsegTaskConfig, aDictionary);
    }

    /* JADX WARN: Multi-variable type inference failed */
    /* JADX WARN: Type inference failed for: r0v69, types: [org.lionsoul.jcseg.tokenizer.core.IWord] */
    @Override // org.lionsoul.jcseg.tokenizer.ASegment
    protected IWord getNextCJKWord(int i, int i2) throws IOException {
        Word word;
        String findCHName;
        char[] nextCJKSentence = nextCJKSentence(i);
        int i3 = 0;
        while (i3 < nextCJKSentence.length) {
            int isCNNumeric = NumericUtil.isCNNumeric(nextCJKSentence[i3]);
            if (isCNNumeric > -1) {
                IWord iWord = null;
                int i4 = -1;
                String nextCNNumeric = nextCNNumeric(nextCJKSentence, i3);
                if ((this.ctrlMask & 2) == 0) {
                    IStringBuffer iStringBuffer = new IStringBuffer();
                    if (isCNNumeric <= 10) {
                        int length = nextCNNumeric.length();
                        for (int i5 = 0; i3 + length < nextCJKSentence.length && i5 < this.config.MAX_UNIT_LENGTH; i5++) {
                            iStringBuffer.append(nextCJKSentence[i3 + length]);
                            String iStringBuffer2 = iStringBuffer.toString();
                            if (this.dic.match(1, iStringBuffer2)) {
                                iWord = this.dic.get(1, iStringBuffer2);
                            }
                            length++;
                        }
                    }
                    if (iWord == null) {
                        IWord iWord2 = null;
                        iStringBuffer.clear().append(nextCNNumeric);
                        for (int length2 = nextCNNumeric.length(); i3 + length2 < nextCJKSentence.length && length2 < this.config.MAX_LENGTH; length2++) {
                            iStringBuffer.append(nextCJKSentence[i3 + length2]);
                            String iStringBuffer3 = iStringBuffer.toString();
                            if (this.dic.match(0, iStringBuffer3)) {
                                iWord2 = this.dic.get(0, iStringBuffer3);
                            }
                        }
                        if (iWord2 != null) {
                            word = iWord2.m203clone();
                            i4 = word.getLength();
                        } else if (this.config.CNNUM_TO_ARABIC) {
                            word = new Word(NumericUtil.cnNumericToArabic(nextCNNumeric, true) + "", 9, Entity.E_NUMERIC_ARABIC);
                            word.setPartSpeech(IWord.NUMERIC_POSPEECH);
                        } else {
                            word = new Word(nextCNNumeric, 9, Entity.E_NUMERIC_CN);
                            word.setPartSpeech(IWord.NUMERIC_POSPEECH);
                        }
                    } else if (this.config.CNNUM_TO_ARABIC) {
                        word = new Word(NumericUtil.cnNumericToArabic(nextCNNumeric, true) + "", 9, "numeric.interger#" + iWord.getEntity());
                        word.setPartSpeech(IWord.NUMERIC_POSPEECH);
                    } else {
                        word = new Word(nextCNNumeric, 1, "numeric.cn#" + iWord.getEntity());
                        word.setPartSpeech(IWord.NUMERIC_POSPEECH);
                    }
                } else if (this.config.CNFRA_TO_ARABIC) {
                    String[] split = nextCNNumeric.split("分之");
                    word = new Word(NumericUtil.cnNumericToArabic(split[1], true) + URIUtil.SLASH + NumericUtil.cnNumericToArabic(split[0], true), 9, Entity.E_NUMERIC_FRACTION);
                    word.setPartSpeech(IWord.NUMERIC_POSPEECH);
                } else {
                    word = new Word(nextCNNumeric, 9, Entity.E_NUMERIC_CN_FRACTION);
                    word.setPartSpeech(IWord.NUMERIC_POSPEECH);
                }
                this.wordPool.add(word);
                word.setPosition(i2 + i3);
                i3 += i4 > 0 ? i4 : nextCNNumeric.length();
                if (iWord != null) {
                    IWord m203clone = iWord.m203clone();
                    m203clone.setPosition(i2 + i3);
                    this.wordPool.add(m203clone);
                    i3 += m203clone.getLength();
                }
            } else {
                IChunk bestCJKChunk = getBestCJKChunk(nextCJKSentence, i3);
                IWord iWord3 = bestCJKChunk.getWords()[0];
                int i6 = -1;
                if (this.config.I_CN_NAME && iWord3.getLength() <= 2 && bestCJKChunk.getWords().length > 1) {
                    StringBuilder sb = new StringBuilder();
                    sb.append(iWord3.getValue());
                    if (this.dic.match(2, iWord3.getValue()) && (findCHName = findCHName(nextCJKSentence, 0, bestCJKChunk)) != null) {
                        i6 = 3;
                        sb.append(findCHName);
                    } else if (this.dic.match(6, iWord3.getValue()) && bestCJKChunk.getWords()[1].getLength() <= 2 && this.dic.match(2, bestCJKChunk.getWords()[1].getValue())) {
                        i6 = 4;
                        sb.append(bestCJKChunk.getWords()[1].getValue());
                    }
                    if (i6 != -1) {
                        iWord3 = new Word(sb.toString(), i6);
                        iWord3.setEntity(i6 == 4 ? Entity.E_NAME_NICKNAME : Entity.E_NAME_CN);
                        iWord3.setPartSpeech(IWord.NAME_POSPEECH);
                    }
                }
                if (this.config.CLEAR_STOPWORD && this.dic.match(7, iWord3.getValue())) {
                    i3 += iWord3.getLength();
                } else {
                    IWord iWord4 = null;
                    if ((this.ctrlMask & 1) != 0 && nextCJKSentence.length - i3 <= this.dic.mixPrefixLength) {
                        iWord4 = getNextMixedWord(nextCJKSentence, i3);
                    }
                    if (iWord4 != null) {
                        iWord3 = iWord4.m203clone();
                    } else if (i6 == -1) {
                        iWord3 = iWord3.m203clone();
                    }
                    iWord3.setPosition(i2 + i3);
                    this.wordPool.add(iWord3);
                    i3 += iWord3.getLength();
                    if (i6 == -1) {
                        appendWordFeatures(iWord3);
                    }
                }
            }
        }
        if (this.wordPool.size() == 0) {
            return null;
        }
        return this.wordPool.remove();
    }

    /* JADX WARN: Code restructure failed: missing block: B:33:0x00cd, code lost:
    
        pushBack(r0);
        pushBack(r0);
        pushBack(r13);
     */
    /* JADX WARN: Removed duplicated region for block: B:222:0x02f9 A[LOOP:6: B:186:0x0297->B:222:0x02f9, LOOP_END] */
    /* JADX WARN: Removed duplicated region for block: B:223:0x02e5 A[SYNTHETIC] */
    @Override // org.lionsoul.jcseg.tokenizer.ASegment
    /*
        Code decompiled incorrectly, please refer to instructions dump.
        To view partially-correct add '--show-bad-code' argument
    */
    protected org.lionsoul.jcseg.tokenizer.core.IWord nextLatinWord(int r7, int r8) throws java.io.IOException {
        /*
            Method dump skipped, instructions count: 1832
            To view this dump add '--comments-level debug' option
        */
        throw new UnsupportedOperationException("Method not decompiled: org.lionsoul.jcseg.tokenizer.NLPSeg.nextLatinWord(int, int):org.lionsoul.jcseg.tokenizer.core.IWord");
    }
}
