package org.dbpedia.spotlight.spot;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import opennlp.tools.chunker.ChunkerME;
import opennlp.tools.postag.POSTaggerME;
import opennlp.tools.sentdetect.SentenceDetectorME;
import opennlp.tools.tokenize.TokenizerME;
import opennlp.tools.util.Span;
import opennlp.tools.util.model.BaseModel;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.dbpedia.spotlight.exceptions.ConfigurationException;
import org.dbpedia.spotlight.model.SpotlightConfiguration;
import org.dbpedia.spotlight.model.SurfaceForm;
import org.dbpedia.spotlight.model.SurfaceFormOccurrence;
import org.dbpedia.spotlight.model.Text;
import org.dbpedia.spotlight.spot.OpenNLPUtil;

/* loaded from: input_file:org/dbpedia/spotlight/spot/OpenNLPNGramSpotter.class */
public class OpenNLPNGramSpotter implements Spotter {
    protected static BaseModel sentenceModel;
    protected static BaseModel chunkModel;
    protected static BaseModel tokenModel;
    protected static BaseModel posModel;
    static final /* synthetic */ boolean $assertionsDisabled;
    private final Log LOG = LogFactory.getLog(getClass());
    protected Set<String> stopWords = SpotlightConfiguration.DEFAULT_STOPWORDS;
    String directoryPath = null;
    String name = "OpenNLPNGramSpotter";

    public OpenNLPNGramSpotter(String str, String str2) throws ConfigurationException {
        if (sentenceModel == null) {
            sentenceModel = OpenNLPUtil.loadModel(str, str2 + OpenNLPUtil.OpenNlpModels.SentenceModel.filename(), OpenNLPUtil.OpenNlpModels.SentenceModel.toString());
        }
        if (chunkModel == null) {
            chunkModel = OpenNLPUtil.loadModel(str, str2 + OpenNLPUtil.OpenNlpModels.ChunkModel.filename(), OpenNLPUtil.OpenNlpModels.ChunkModel.toString());
        }
        if (posModel == null) {
            posModel = OpenNLPUtil.loadModel(str, str2 + OpenNLPUtil.OpenNlpModels.POSModel.filename(), OpenNLPUtil.OpenNlpModels.POSModel.toString());
        }
        if (tokenModel == null) {
            tokenModel = OpenNLPUtil.loadModel(str, str2 + OpenNLPUtil.OpenNlpModels.TokenizerModel.filename(), OpenNLPUtil.OpenNlpModels.TokenizerModel.toString());
        }
    }

    @Override // org.dbpedia.spotlight.spot.Spotter
    public List<SurfaceFormOccurrence> extract(Text text) {
        String text2 = text.text();
        List<Integer> chars2remove = OpenNLPUtil.chars2remove(text2);
        List<SurfaceFormOccurrence> extractNPNGrams = extractNPNGrams(new Text(OpenNLPUtil.cleanText(text2, chars2remove)));
        if (extractNPNGrams == null || extractNPNGrams.isEmpty()) {
            return new ArrayList();
        }
        for (SurfaceFormOccurrence surfaceFormOccurrence : extractNPNGrams) {
            surfaceFormOccurrence.setTextOffset(OpenNLPUtil.computeOffset(text2, surfaceFormOccurrence.textOffset(), chars2remove));
        }
        return extractNPNGrams;
    }

    @Override // org.dbpedia.spotlight.spot.Spotter
    public String getName() {
        return this.name;
    }

    @Override // org.dbpedia.spotlight.spot.Spotter
    public void setName(String str) {
        this.name = str;
    }

    protected List<SurfaceFormOccurrence> extractNPNGrams(Text text) {
        String text2 = text.text();
        ArrayList arrayList = new ArrayList();
        SentenceDetectorME sentenceDetectorME = new SentenceDetectorME(sentenceModel);
        TokenizerME tokenizerME = new TokenizerME(tokenModel);
        POSTaggerME pOSTaggerME = new POSTaggerME(posModel);
        ChunkerME chunkerME = new ChunkerME(chunkModel);
        for (Span span : sentenceDetectorME.sentPosDetect(text2)) {
            String charSequence = span.getCoveredText(text2).toString();
            int start = span.getStart();
            Span[] spanArr = tokenizerME.tokenizePos(charSequence);
            String[] strArr = new String[spanArr.length];
            for (int i = 0; i < strArr.length; i++) {
                strArr[i] = spanArr[i].getCoveredText(charSequence).toString();
            }
            for (Span span2 : chunkerME.chunkAsSpans(strArr, pOSTaggerME.tag(strArr))) {
                if ("NP".equals(span2.getType())) {
                    spanArr[span2.getStart()].getStart();
                    spanArr[span2.getEnd() - 1].getEnd();
                    extractNGrams(extractNGramPos(span2.getStart(), span2.getEnd() - 1), start, text, spanArr, arrayList);
                }
            }
        }
        return arrayList;
    }

    public void extractNGrams(List<Map<String, Integer>> list, int i, Text text, Span[] spanArr, List<SurfaceFormOccurrence> list2) {
        String text2 = text.text();
        for (Map<String, Integer> map : list) {
            int intValue = map.get("start").intValue();
            int intValue2 = map.get("end").intValue();
            int i2 = intValue2 - intValue;
            int start = i + spanArr[intValue].getStart();
            int end = i + spanArr[intValue2].getEnd();
            String substring = text2.substring(start, end);
            if (substring.trim().length() != 0) {
                boolean z = i2 > 2;
                if (isStopWord(text2.substring(i + spanArr[intValue].getStart(), i + spanArr[intValue].getEnd()))) {
                    z = true;
                }
                if (isStopWord(text2.substring(i + spanArr[intValue2].getStart(), i + spanArr[intValue2].getEnd()))) {
                    z = true;
                }
                if (z) {
                    continue;
                } else {
                    NGram nGram = new NGram(substring, start, end);
                    SurfaceForm surfaceForm = new SurfaceForm(nGram.getTextform());
                    if (!$assertionsDisabled && nGram.getTextform().isEmpty()) {
                        throw new AssertionError();
                    }
                    SurfaceFormOccurrence surfaceFormOccurrence = new SurfaceFormOccurrence(surfaceForm, text, nGram.getStart());
                    if (surfaceForm.name().trim().length() > 0 && !list2.contains(surfaceFormOccurrence)) {
                        list2.add(surfaceFormOccurrence);
                    }
                }
            }
        }
    }

    public List<Map<String, Integer>> extractNGramPos(int i, int i2) {
        ArrayList arrayList = new ArrayList();
        if (i <= i2) {
            for (int i3 = i; i3 <= i2; i3++) {
                for (int i4 = i3; i4 <= i2; i4++) {
                    HashMap hashMap = new HashMap();
                    hashMap.put("start", Integer.valueOf(i3));
                    hashMap.put("end", Integer.valueOf(i4));
                    arrayList.add(hashMap);
                }
            }
        }
        return arrayList;
    }

    private boolean isStopWord(String str) {
        return this.stopWords.contains(str.toLowerCase());
    }

    static {
        $assertionsDisabled = !OpenNLPNGramSpotter.class.desiredAssertionStatus();
        sentenceModel = null;
        chunkModel = null;
        tokenModel = null;
        posModel = null;
    }
}
