package org.dbpedia.spotlight.spot.cooccurrence.weka;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import org.dbpedia.spotlight.exceptions.ItemNotFoundException;
import org.dbpedia.spotlight.model.SurfaceFormOccurrence;
import org.dbpedia.spotlight.model.TaggedText;
import org.dbpedia.spotlight.spot.cooccurrence.features.CandidateFeatures;
import org.dbpedia.spotlight.spot.cooccurrence.features.data.CandidateData;
import org.dbpedia.spotlight.spot.cooccurrence.features.data.CoOccurrenceData;
import org.dbpedia.spotlight.spot.cooccurrence.features.data.OccurrenceDataProvider;
import org.dbpedia.spotlight.tagging.TaggedToken;
import weka.core.Attribute;
import weka.core.Instance;

/* loaded from: input_file:org/dbpedia/spotlight/spot/cooccurrence/weka/InstanceBuilderUnigram.class */
public abstract class InstanceBuilderUnigram extends InstanceBuilder {
    public static Attribute unigram_count_corpus = new Attribute("count_corpus");
    public static Attribute unigram_count_web = new Attribute("count_web");
    public static Attribute bigram_left_significance_corpus = new Attribute("left_significance_corpus");
    public static Attribute bigram_left_count_corpus = new Attribute("left_count_corpus");
    public static Attribute bigram_left_significance_web = new Attribute("left_significance_web");
    public static Attribute bigram_left_count_web = new Attribute("left_count_web");
    public static Attribute bigram_right_significance_corpus = new Attribute("right_significance_corpus");
    public static Attribute bigram_right_count_corpus = new Attribute("right_count_corpus");
    public static Attribute bigram_right_significance_web = new Attribute("right_significance_web");
    public static Attribute bigram_right_count_web = new Attribute("right_count_web");
    public static Attribute trigram_left_count_web = new Attribute("left_trigram_count_web");
    public static Attribute trigram_right_count_web = new Attribute("right_trigram_count_web");
    public static Attribute trigram_middle_count_web = new Attribute("middle_trigram_count_web");
    public static Attribute next_to_uppercase = new Attribute("next_to_uppercase", Arrays.asList("not_next_to_uppercase", "next_to_uppercase"));
    public static Attribute candidateCase = new Attribute("case", Arrays.asList("lowercase", "starts_with_uppercase", "all_uppercase", "sentence_initial_uppercase"));
    public static Attribute quoted = new Attribute("quoted", Arrays.asList("quoted", "not_quoted"));
    public static Attribute in_enumeration = new Attribute("in_enumeration", Arrays.asList("yes"));
    public static Attribute pre_pos = new Attribute("token_left", Arrays.asList("pp$", "prep", "of", "a", "the", "jj"));
    public static Attribute next_pos = new Attribute("token_right", Arrays.asList("verb", "of", "for"));
    public static Attribute possesive = new Attribute("possesive", Arrays.asList("yes"));
    protected long unigramCorpusMax;
    protected long unigramWebMin;
    protected long bigramLeftWebMin;
    protected long bigramRightWebMin;
    protected long trigramLeftWebMin;
    protected long trigramRightWebMin;
    protected long trigramMiddleWebMin;

    /* JADX INFO: Access modifiers changed from: protected */
    public InstanceBuilderUnigram(OccurrenceDataProvider occurrenceDataProvider) {
        super(occurrenceDataProvider);
        this.unigramCorpusMax = 40000L;
        this.unigramWebMin = 0L;
        this.bigramLeftWebMin = 0L;
        this.bigramRightWebMin = 0L;
        this.trigramLeftWebMin = 0L;
        this.trigramRightWebMin = 0L;
        this.trigramMiddleWebMin = 0L;
    }

    @Override // org.dbpedia.spotlight.spot.cooccurrence.weka.InstanceBuilder
    public ArrayList<Attribute> buildAttributeList() {
        ArrayList<Attribute> arrayList = new ArrayList<>();
        unigram_count_corpus.setWeight(0.5d);
        arrayList.add(unigram_count_corpus);
        unigram_count_web.setWeight(0.5d);
        arrayList.add(unigram_count_web);
        arrayList.add(bigram_left_significance_web);
        arrayList.add(bigram_right_significance_web);
        trigram_left_count_web.setWeight(10.0d);
        arrayList.add(trigram_left_count_web);
        trigram_right_count_web.setWeight(10.0d);
        arrayList.add(trigram_right_count_web);
        trigram_middle_count_web.setWeight(10.0d);
        arrayList.add(trigram_middle_count_web);
        arrayList.add(quoted);
        arrayList.add(possesive);
        in_enumeration.setWeight(50.0d);
        arrayList.add(in_enumeration);
        arrayList.add(candidateCase);
        arrayList.add(next_to_uppercase);
        arrayList.add(pre_pos);
        arrayList.add(next_pos);
        arrayList.add(candidate_class);
        return arrayList;
    }

    @Override // org.dbpedia.spotlight.spot.cooccurrence.weka.InstanceBuilder
    public Instance buildInstance(SurfaceFormOccurrence surfaceFormOccurrence, Instance instance) {
        ArrayList<Attribute> buildAttributeList = buildAttributeList();
        CandidateData candidateData = null;
        try {
            candidateData = this.dataProvider.getCandidateData(surfaceFormOccurrence.surfaceForm().name());
        } catch (ItemNotFoundException e) {
            this.LOG.debug("No occurrence data for " + surfaceFormOccurrence.surfaceForm());
        }
        if (candidateData != null) {
            List<TaggedToken> list = null;
            try {
                list = ((TaggedText) surfaceFormOccurrence.context()).taggedTokenProvider().getLeftContext(surfaceFormOccurrence, 2);
            } catch (ItemNotFoundException e2) {
            }
            CandidateData candidateData2 = null;
            if (list.size() > 0) {
                try {
                    candidateData2 = this.dataProvider.getCandidateData(list.get(0).getToken());
                } catch (ItemNotFoundException e3) {
                }
            }
            CandidateData candidateData3 = null;
            if (list.size() > 1) {
                try {
                    candidateData3 = this.dataProvider.getCandidateData(list.get(1).getToken());
                } catch (ItemNotFoundException e4) {
                }
            }
            List<TaggedToken> list2 = null;
            try {
                list2 = ((TaggedText) surfaceFormOccurrence.context()).taggedTokenProvider().getRightContext(surfaceFormOccurrence, 2);
            } catch (ItemNotFoundException e5) {
            }
            CandidateData candidateData4 = null;
            if (list2.size() > 0) {
                try {
                    candidateData4 = this.dataProvider.getCandidateData(list2.get(0).getToken());
                } catch (ItemNotFoundException e6) {
                }
            }
            CandidateData candidateData5 = null;
            if (list2.size() > 1) {
                try {
                    candidateData5 = this.dataProvider.getCandidateData(list2.get(1).getToken());
                } catch (ItemNotFoundException e7) {
                }
            }
            try {
                if (candidateData.getCountWikipedia() != null && candidateData.getCountWikipedia().longValue() < this.unigramCorpusMax) {
                    instance.setValue(unigram_count_corpus, candidateData.getCountWikipedia().longValue());
                }
            } catch (ArrayIndexOutOfBoundsException e8) {
            }
            try {
                if (candidateData.getCountWeb() != null && candidateData.getCountWeb().longValue() > this.unigramWebMin) {
                    instance.setValue(i(unigram_count_web, buildAttributeList()), candidateData.getCountWeb().longValue());
                }
            } catch (ArrayIndexOutOfBoundsException e9) {
            }
            if (candidateData2 != null && !list.get(0).getPOSTag().matches(InstanceBuilder.FUNCTION_WORD_PATTERN) && !list.get(0).getPOSTag().contains("$") && !list.get(0).getPOSTag().equals("in")) {
                try {
                    if (this.dataProvider.getBigramData(candidateData2, candidateData) != null) {
                        try {
                            instance.setValue(i(bigram_left_significance_web, buildAttributeList()), r0.getUnitSignificanceWeb());
                        } catch (ArrayIndexOutOfBoundsException e10) {
                        }
                    }
                } catch (ItemNotFoundException e11) {
                }
            }
            if (candidateData2 != null && candidateData3 != null) {
                try {
                    CoOccurrenceData trigramData = this.dataProvider.getTrigramData(candidateData3, candidateData2, candidateData);
                    if (!list.get(0).getPOSTag().equals(",") && !list.get(1).getPOSTag().equals(",") && ((!list.get(0).getPOSTag().equals("in") || !list.get(1).getPOSTag().equals("at")) && trigramData.getUnitCountWeb() >= this.trigramLeftWebMin)) {
                        instance.setValue(i(trigram_left_count_web, buildAttributeList()), trigramData.getUnitCountWeb());
                    }
                } catch (ArrayIndexOutOfBoundsException e12) {
                } catch (ItemNotFoundException e13) {
                }
            }
            if (candidateData4 != null && candidateData5 != null) {
                try {
                    CoOccurrenceData trigramData2 = this.dataProvider.getTrigramData(candidateData, candidateData4, candidateData5);
                    if (!list2.get(0).getPOSTag().equals(",") && !list2.get(1).getPOSTag().equals(",") && trigramData2.getUnitCountWeb() >= this.trigramRightWebMin) {
                        instance.setValue(i(trigram_right_count_web, buildAttributeList()), trigramData2.getUnitCountWeb());
                    }
                } catch (ArrayIndexOutOfBoundsException e14) {
                } catch (ItemNotFoundException e15) {
                }
            }
            if (candidateData2 != null && candidateData4 != null) {
                try {
                    CoOccurrenceData trigramData3 = this.dataProvider.getTrigramData(candidateData2, candidateData, candidateData4);
                    if (!list.get(0).getPOSTag().equals(",") && !list2.get(0).getPOSTag().equals(",") && !list.get(0).getPOSTag().equals("in") && !list2.get(0).getPOSTag().equals("cc") && trigramData3.getUnitCountWeb() >= this.trigramMiddleWebMin) {
                        instance.setValue(i(trigram_middle_count_web, buildAttributeList()), trigramData3.getUnitCountWeb());
                    }
                } catch (ArrayIndexOutOfBoundsException e16) {
                } catch (ItemNotFoundException e17) {
                }
            }
            if (candidateData4 != null && !list2.get(0).getPOSTag().matches(InstanceBuilder.FUNCTION_WORD_PATTERN)) {
                CoOccurrenceData coOccurrenceData = null;
                try {
                    coOccurrenceData = this.dataProvider.getBigramData(candidateData, candidateData4);
                } catch (ItemNotFoundException e18) {
                }
                if (coOccurrenceData != null) {
                    try {
                        instance.setValue(i(bigram_right_significance_web, buildAttributeList()), coOccurrenceData.getUnitSignificanceWeb());
                    } catch (ArrayIndexOutOfBoundsException e19) {
                    }
                }
            }
        }
        try {
            instance.setValue(i(candidateCase, buildAttributeList()), CandidateFeatures.nonSentenceInitialUppercase(surfaceFormOccurrence));
        } catch (ArrayIndexOutOfBoundsException e20) {
        }
        try {
            instance.setValue(i(quoted, buildAttributeList()), CandidateFeatures.quoted(surfaceFormOccurrence));
        } catch (ArrayIndexOutOfBoundsException e21) {
        }
        try {
            instance.setValue(i(next_to_uppercase, buildAttributeList()), CandidateFeatures.nextToUppercase(surfaceFormOccurrence));
        } catch (ArrayIndexOutOfBoundsException e22) {
        }
        try {
            if (CandidateFeatures.prePOS(surfaceFormOccurrence) != null) {
                instance.setValue(i(pre_pos, buildAttributeList()), r0.intValue());
            }
        } catch (ArrayIndexOutOfBoundsException e23) {
        }
        try {
            if (CandidateFeatures.nextPOS(surfaceFormOccurrence) != null) {
                instance.setValue(i(next_pos, buildAttributeList()), r0.intValue());
            }
        } catch (ArrayIndexOutOfBoundsException e24) {
        }
        try {
            if (CandidateFeatures.isInEnumeration(surfaceFormOccurrence)) {
                instance.setValue(i(in_enumeration, buildAttributeList()), 0.0d);
            }
        } catch (ArrayIndexOutOfBoundsException e25) {
        }
        try {
            if (CandidateFeatures.isPossessive(surfaceFormOccurrence)) {
                instance.setValue(i(possesive, buildAttributeList), 0.0d);
            }
        } catch (ArrayIndexOutOfBoundsException e26) {
        }
        if (this.verboseMode) {
            explain(surfaceFormOccurrence, instance);
        }
        return instance;
    }
}
