package org.dbpedia.spotlight.spot.cooccurrence.weka;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.LinkedList;
import java.util.List;
import org.dbpedia.spotlight.exceptions.ItemNotFoundException;
import org.dbpedia.spotlight.model.SurfaceFormOccurrence;
import org.dbpedia.spotlight.model.TaggedText;
import org.dbpedia.spotlight.spot.cooccurrence.features.CandidateFeatures;
import org.dbpedia.spotlight.spot.cooccurrence.features.data.CandidateData;
import org.dbpedia.spotlight.spot.cooccurrence.features.data.CoOccurrenceData;
import org.dbpedia.spotlight.spot.cooccurrence.features.data.OccurrenceDataProvider;
import org.dbpedia.spotlight.tagging.TaggedToken;
import weka.core.Attribute;
import weka.core.Instance;

/* loaded from: input_file:org/dbpedia/spotlight/spot/cooccurrence/weka/InstanceBuilderNGram.class */
public abstract class InstanceBuilderNGram extends InstanceBuilder {
    public static Attribute count_corpus = new Attribute("count_corpus");
    public static Attribute count_web = new Attribute("count_web");
    public static Attribute contains_verb = new Attribute("contains_verb", Arrays.asList("no", "vb", "vbd", "vbg", "vbn", "be", "multiple"));
    public static Attribute term_case = new Attribute("case", Arrays.asList("all_lowercase", "mixed", "titlecase", "all_uppercase", "first_uppercase"));
    public static Attribute quoted = new Attribute("quoted", Arrays.asList("yes"));
    public static Attribute candidate_size = new Attribute("candidate_size");
    public static Attribute pre_pos = new Attribute("token_left", Arrays.asList("to", "verb", "a"));
    public static Attribute next_pos = new Attribute("token_right", Arrays.asList("of", "to", "be", "verb"));
    public static Attribute ends_with = new Attribute("ends_with", Arrays.asList("prep"));
    public static Attribute bigram_left_significance_web = new Attribute("bigram_left_signifance_web");
    public static Attribute bigram_right_significance_web = new Attribute("bigram_right_significance_web");
    public static Attribute trigram_left = new Attribute("trigram_left");
    public static Attribute trigram_right = new Attribute("trigram_right");
    protected long bigramLeftWebMin;
    protected long bigramRightWebMin;
    protected long trigramLeftWebMin;
    protected long trigramMiddleWebMin;
    protected long trigramRightWebMin;

    /* JADX INFO: Access modifiers changed from: protected */
    public InstanceBuilderNGram(OccurrenceDataProvider occurrenceDataProvider) {
        super(occurrenceDataProvider);
        this.bigramLeftWebMin = 0L;
        this.bigramRightWebMin = 0L;
        this.trigramLeftWebMin = 0L;
        this.trigramMiddleWebMin = 0L;
        this.trigramRightWebMin = 0L;
    }

    @Override // org.dbpedia.spotlight.spot.cooccurrence.weka.InstanceBuilder
    public ArrayList<Attribute> buildAttributeList() {
        ArrayList<Attribute> arrayList = new ArrayList<>();
        arrayList.addAll(Arrays.asList(contains_verb, term_case, candidate_size, pre_pos, next_pos, ends_with, quoted, bigram_left_significance_web, bigram_right_significance_web, trigram_left, trigram_right, candidate_class));
        return arrayList;
    }

    @Override // org.dbpedia.spotlight.spot.cooccurrence.weka.InstanceBuilder
    public Instance buildInstance(SurfaceFormOccurrence surfaceFormOccurrence, Instance instance) {
        TaggedText taggedText = (TaggedText) surfaceFormOccurrence.context();
        List<TaggedToken> taggedTokens = taggedText.taggedTokenProvider().getTaggedTokens(surfaceFormOccurrence);
        int size = taggedTokens.size();
        CandidateData candidateData = null;
        try {
            candidateData = this.dataProvider.getCandidateData(taggedTokens.get(0).getToken());
        } catch (ItemNotFoundException e) {
        }
        CandidateData candidateData2 = null;
        if (taggedTokens.size() > 1) {
            try {
                candidateData2 = this.dataProvider.getCandidateData(taggedTokens.get(1).getToken());
            } catch (ItemNotFoundException e2) {
            }
        }
        CandidateData candidateData3 = null;
        try {
            candidateData3 = this.dataProvider.getCandidateData(taggedTokens.get(taggedTokens.size() - 1).getToken());
        } catch (ItemNotFoundException e3) {
        }
        CandidateData candidateData4 = null;
        if (taggedTokens.size() > 1) {
            try {
                candidateData4 = this.dataProvider.getCandidateData(taggedTokens.get(taggedTokens.size() - 2).getToken());
            } catch (ItemNotFoundException e4) {
            }
        }
        List<TaggedToken> list = null;
        try {
            list = taggedText.taggedTokenProvider().getLeftContext(surfaceFormOccurrence, 2);
        } catch (ItemNotFoundException e5) {
        }
        CandidateData candidateData5 = null;
        if (list != null && list.size() > 0) {
            try {
                candidateData5 = this.dataProvider.getCandidateData(list.size() == 1 ? list.get(0).getToken().toLowerCase() : list.get(0).getToken());
            } catch (ItemNotFoundException e6) {
            }
        }
        List<TaggedToken> list2 = null;
        try {
            list2 = taggedText.taggedTokenProvider().getRightContext(surfaceFormOccurrence, 2);
        } catch (ItemNotFoundException e7) {
        }
        CandidateData candidateData6 = null;
        if (list2 != null && list2.size() > 0) {
            try {
                candidateData6 = this.dataProvider.getCandidateData(list2.get(0).getToken());
            } catch (ItemNotFoundException e8) {
            }
        }
        if (size == 2) {
            if (candidateData != null && candidateData2 != null) {
                try {
                    instance.setValue(i(count_web, buildAttributeList()), this.dataProvider.getBigramData(candidateData, candidateData2).getUnitCountWeb());
                } catch (ArrayIndexOutOfBoundsException e9) {
                } catch (ItemNotFoundException e10) {
                }
            }
        }
        LinkedList linkedList = new LinkedList();
        boolean equals = surfaceFormOccurrence.surfaceForm().name().toLowerCase().equals(surfaceFormOccurrence.surfaceForm().name());
        boolean equals2 = surfaceFormOccurrence.surfaceForm().name().toUpperCase().equals(surfaceFormOccurrence.surfaceForm().name());
        int i = 0;
        for (TaggedToken taggedToken : taggedTokens) {
            if (taggedToken.getPOSTag().startsWith("v") || taggedToken.getPOSTag().equals("be")) {
                linkedList.add(taggedToken.getPOSTag());
            }
            if (Character.isUpperCase(taggedToken.getToken().charAt(0))) {
                i++;
            }
        }
        try {
            if (linkedList.size() > 1) {
                instance.setValue(i(contains_verb, buildAttributeList()), 5.0d);
            } else if (linkedList.size() == 0) {
                instance.setValue(i(contains_verb, buildAttributeList()), 0.0d);
            } else if (((String) linkedList.get(0)).equals("vb")) {
                instance.setValue(i(contains_verb, buildAttributeList()), 1.0d);
            } else if (((String) linkedList.get(0)).equals("vbd")) {
                instance.setValue(i(contains_verb, buildAttributeList()), 2.0d);
            } else if (((String) linkedList.get(0)).equals("vbg")) {
                instance.setValue(i(contains_verb, buildAttributeList()), 3.0d);
            } else if (((String) linkedList.get(0)).equals("vbn")) {
                instance.setValue(i(contains_verb, buildAttributeList()), 4.0d);
            } else if (((String) linkedList.get(0)).equals("be")) {
                instance.setValue(i(contains_verb, buildAttributeList()), 5.0d);
            }
        } catch (ArrayIndexOutOfBoundsException e11) {
        }
        try {
            if (equals) {
                instance.setValue(i(term_case, buildAttributeList()), 0.0d);
            } else if (equals2) {
                instance.setValue(i(term_case, buildAttributeList()), 3.0d);
            } else if (i == taggedTokens.size()) {
                instance.setValue(i(term_case, buildAttributeList()), 2.0d);
            } else if (i == 1 && Character.isUpperCase(taggedTokens.get(0).getToken().charAt(0))) {
                instance.setValue(i(term_case, buildAttributeList()), 4.0d);
            } else {
                instance.setValue(i(term_case, buildAttributeList()), 1.0d);
            }
        } catch (ArrayIndexOutOfBoundsException e12) {
        }
        try {
            instance.setValue(i(candidate_size, buildAttributeList()), size);
        } catch (ArrayIndexOutOfBoundsException e13) {
        }
        try {
            TaggedToken leftNeighbourToken = taggedText.taggedTokenProvider().getLeftNeighbourToken(surfaceFormOccurrence);
            if (leftNeighbourToken.getPOSTag().equals("to")) {
                instance.setValue(i(pre_pos, buildAttributeList()), 0.0d);
            } else if (leftNeighbourToken.getPOSTag().matches("[mnf].*")) {
                instance.setValue(i(pre_pos, buildAttributeList()), 1.0d);
            } else if (leftNeighbourToken.getToken().matches("[aA][nN]?")) {
                instance.setValue(i(pre_pos, buildAttributeList()), 2.0d);
            }
        } catch (ArrayIndexOutOfBoundsException e14) {
        } catch (ItemNotFoundException e15) {
        }
        try {
            if (list.size() > 0) {
                if (list.get(0).getPOSTag().equals("to")) {
                    instance.setValue(i(pre_pos, buildAttributeList()), 0.0d);
                } else if (list.get(0).getPOSTag().matches("[mnf].*")) {
                    instance.setValue(i(pre_pos, buildAttributeList()), 1.0d);
                } else if (list.get(0).getToken().matches("[aA][nN]?")) {
                    instance.setValue(i(pre_pos, buildAttributeList()), 2.0d);
                }
            }
        } catch (ArrayIndexOutOfBoundsException e16) {
        }
        try {
            if (CandidateFeatures.quoted(surfaceFormOccurrence) == 1) {
                instance.setValue(i(quoted, buildAttributeList()), 0.0d);
            }
        } catch (ArrayIndexOutOfBoundsException e17) {
        }
        try {
            if (list2.size() > 0) {
                if (list2.get(0).getToken().equals("of")) {
                    instance.setValue(i(next_pos, buildAttributeList()), 0.0d);
                } else if (list2.get(0).getToken().equals("to")) {
                    instance.setValue(i(next_pos, buildAttributeList()), 1.0d);
                } else if (list2.get(0).getPOSTag().startsWith("be")) {
                    instance.setValue(i(next_pos, buildAttributeList()), 2.0d);
                } else if (list2.get(0).getPOSTag().startsWith("v")) {
                    instance.setValue(i(next_pos, buildAttributeList()), 3.0d);
                }
            }
        } catch (ArrayIndexOutOfBoundsException e18) {
        }
        try {
            if (taggedTokens.get(taggedTokens.size() - 1).getPOSTag().equals("in")) {
                instance.setValue(i(ends_with, buildAttributeList()), 0.0d);
            }
        } catch (ArrayIndexOutOfBoundsException e19) {
        }
        if (candidateData5 != null && candidateData != null && list.size() > 0 && !list.get(0).getPOSTag().matches(InstanceBuilder.FUNCTION_WORD_PATTERN) && !list.get(0).getPOSTag().equals("in")) {
            CoOccurrenceData coOccurrenceData = null;
            try {
                coOccurrenceData = this.dataProvider.getBigramData(candidateData5, candidateData);
            } catch (ItemNotFoundException e20) {
            }
            if (coOccurrenceData != null && coOccurrenceData.getUnitSignificanceWeb() > ((float) this.bigramLeftWebMin)) {
                try {
                    instance.setValue(i(bigram_left_significance_web, buildAttributeList()), coOccurrenceData.getUnitSignificanceWeb());
                } catch (ArrayIndexOutOfBoundsException e21) {
                }
            }
        }
        if (candidateData != null && candidateData2 != null && candidateData5 != null) {
            CoOccurrenceData coOccurrenceData2 = null;
            try {
                coOccurrenceData2 = this.dataProvider.getTrigramData(candidateData5, candidateData, candidateData2);
            } catch (ItemNotFoundException e22) {
            }
            if (coOccurrenceData2 != null && coOccurrenceData2.getUnitCountWeb() > this.trigramLeftWebMin) {
                try {
                    instance.setValue(i(trigram_left, buildAttributeList()), coOccurrenceData2.getUnitCountWeb());
                } catch (ArrayIndexOutOfBoundsException e23) {
                }
            }
        }
        if (candidateData3 != null && candidateData4 != null && candidateData6 != null) {
            CoOccurrenceData coOccurrenceData3 = null;
            try {
                coOccurrenceData3 = this.dataProvider.getTrigramData(candidateData4, candidateData3, candidateData6);
            } catch (NullPointerException e24) {
            } catch (ItemNotFoundException e25) {
            }
            if (coOccurrenceData3 != null && coOccurrenceData3.getUnitCountWeb() > this.trigramRightWebMin) {
                try {
                    instance.setValue(i(trigram_right, buildAttributeList()), coOccurrenceData3.getUnitCountWeb());
                } catch (ArrayIndexOutOfBoundsException e26) {
                }
            }
        }
        if (candidateData3 != null && candidateData6 != null && !list2.get(0).getPOSTag().matches(InstanceBuilder.FUNCTION_WORD_PATTERN) && !list2.get(0).getPOSTag().equals("in")) {
            CoOccurrenceData coOccurrenceData4 = null;
            try {
                coOccurrenceData4 = this.dataProvider.getBigramData(candidateData3, candidateData6);
            } catch (ItemNotFoundException e27) {
            }
            if (coOccurrenceData4 != null && coOccurrenceData4.getUnitSignificanceWeb() > ((float) this.bigramRightWebMin)) {
                try {
                    instance.setValue(i(bigram_right_significance_web, buildAttributeList()), coOccurrenceData4.getUnitSignificanceWeb());
                } catch (ArrayIndexOutOfBoundsException e28) {
                }
            }
        }
        if (this.verboseMode) {
            explain(surfaceFormOccurrence, instance);
        }
        return instance;
    }
}
