package org.dbpedia.spotlight.db;

import org.dbpedia.spotlight.db.model.ResourceStore;
import org.dbpedia.spotlight.db.model.SurfaceFormStore;
import org.dbpedia.spotlight.db.model.TextTokenizer;
import org.dbpedia.spotlight.db.model.TokenTypeStore;
import org.dbpedia.spotlight.db.similarity.ContextSimilarity;
import org.dbpedia.spotlight.disambiguate.ParagraphDisambiguator;
import org.dbpedia.spotlight.disambiguate.mixtures.Mixture;
import org.dbpedia.spotlight.exceptions.InputException;
import org.dbpedia.spotlight.log.SpotlightLog$;
import org.dbpedia.spotlight.log.SpotlightLog$StringSpotlightLog$;
import org.dbpedia.spotlight.model.DBpediaResourceOccurrence;
import org.dbpedia.spotlight.model.Feature;
import org.dbpedia.spotlight.model.Paragraph;
import org.dbpedia.spotlight.model.SurfaceFormOccurrence;
import org.dbpedia.spotlight.model.Token;
import org.dbpedia.spotlight.model.TokenType;
import scala.Predef$;
import scala.collection.JavaConversions$;
import scala.collection.Seq;
import scala.collection.SeqLike;
import scala.collection.TraversableLike;
import scala.collection.TraversableOnce;
import scala.collection.immutable.Iterable$;
import scala.collection.immutable.List;
import scala.collection.immutable.List$;
import scala.collection.immutable.Map;
import scala.collection.immutable.Nil$;
import scala.collection.immutable.Set;
import scala.collection.immutable.StringOps;
import scala.collection.mutable.ArrayBuffer$;
import scala.math.Ordering$Int$;
import scala.reflect.ScalaSignature;
import scala.runtime.ObjectRef;

/* compiled from: DBTwoStepDisambiguator.scala */
@ScalaSignature(bytes = "\u0006\u0001\u0005}d\u0001B\u0001\u0003\u0001-\u0011a\u0003\u0012\"Uo>\u001cF/\u001a9ESN\fWNY5hk\u0006$xN\u001d\u0006\u0003\u0007\u0011\t!\u0001\u001a2\u000b\u0005\u00151\u0011!C:q_Rd\u0017n\u001a5u\u0015\t9\u0001\"A\u0004eEB,G-[1\u000b\u0003%\t1a\u001c:h\u0007\u0001\u00192\u0001\u0001\u0007\u0013!\ti\u0001#D\u0001\u000f\u0015\u0005y\u0011!B:dC2\f\u0017BA\t\u000f\u0005\u0019\te.\u001f*fMB\u00111CF\u0007\u0002))\u0011Q\u0003B\u0001\rI&\u001c\u0018-\u001c2jOV\fG/Z\u0005\u0003/Q\u0011a\u0003U1sC\u001e\u0014\u0018\r\u001d5ESN\fWNY5hk\u0006$xN\u001d\u0005\t3\u0001\u0011\t\u0011)A\u00055\u0005QAo\\6f]N#xN]3\u0011\u0005mqR\"\u0001\u000f\u000b\u0005u\u0011\u0011!B7pI\u0016d\u0017BA\u0010\u001d\u00059!vn[3o)f\u0004Xm\u0015;pe\u0016D\u0001\"\t\u0001\u0003\u0002\u0003\u0006IAI\u0001\u0011gV\u0014h-Y2f\r>\u0014Xn\u0015;pe\u0016\u0004\"aG\u0012\n\u0005\u0011b\"\u0001E*ve\u001a\f7-\u001a$pe6\u001cFo\u001c:f\u0011!1\u0003A!A!\u0002\u00139\u0013!\u0004:fg>,(oY3Ti>\u0014X\r\u0005\u0002\u001cQ%\u0011\u0011\u0006\b\u0002\u000e%\u0016\u001cx.\u001e:dKN#xN]3\t\u0011-\u0002!Q1A\u0005\u00021\n\u0011cY1oI&$\u0017\r^3TK\u0006\u00148\r[3s+\u0005i\u0003C\u0001\u00180\u001b\u0005\u0011\u0011B\u0001\u0019\u0003\u0005M!%iQ1oI&$\u0017\r^3TK\u0006\u00148\r[3s\u0011!\u0011\u0004A!A!\u0002\u0013i\u0013AE2b]\u0012LG-\u0019;f'\u0016\f'o\u00195fe\u0002B\u0001\u0002\u000e\u0001\u0003\u0002\u0003\u0006I!N\u0001\b[&DH/\u001e:f!\t1\u0014(D\u00018\u0015\tAD#\u0001\u0005nSb$XO]3t\u0013\tQtGA\u0004NSb$XO]3\t\u0011q\u0002!\u0011!Q\u0001\nu\n\u0011cY8oi\u0016DHoU5nS2\f'/\u001b;z!\tq\u0014)D\u0001@\u0015\t\u0001%!\u0001\u0006tS6LG.\u0019:jifL!AQ \u0003#\r{g\u000e^3yiNKW.\u001b7be&$\u0018\u0010C\u0003E\u0001\u0011\u0005Q)\u0001\u0004=S:LGO\u0010\u000b\b\r\u001eC\u0015JS&M!\tq\u0003\u0001C\u0003\u001a\u0007\u0002\u0007!\u0004C\u0003\"\u0007\u0002\u0007!\u0005C\u0003'\u0007\u0002\u0007q\u0005C\u0003,\u0007\u0002\u0007Q\u0006C\u00035\u0007\u0002\u0007Q\u0007C\u0003=\u0007\u0002\u0007Q\bC\u0004O\u0001\u0001\u0007I\u0011A(\u0002\u0013Q|7.\u001a8ju\u0016\u0014X#\u0001)\u0011\u0005m\t\u0016B\u0001*\u001d\u00055!V\r\u001f;U_.,g.\u001b>fe\"9A\u000b\u0001a\u0001\n\u0003)\u0016!\u0004;pW\u0016t\u0017N_3s?\u0012*\u0017\u000f\u0006\u0002W3B\u0011QbV\u0005\u00031:\u0011A!\u00168ji\"9!lUA\u0001\u0002\u0004\u0001\u0016a\u0001=%c!1A\f\u0001Q!\nA\u000b!\u0002^8lK:L'0\u001a:!\u0011\u001dq\u0006A1A\u0005\u0002}\u000ba\"T!Y?\u000e\u000be\nR%E\u0003R+5+F\u0001a!\ti\u0011-\u0003\u0002c\u001d\t\u0019\u0011J\u001c;\t\r\u0011\u0004\u0001\u0015!\u0003a\u0003=i\u0015\tW0D\u0003:#\u0015\nR!U\u000bN\u0003\u0003b\u00024\u0001\u0005\u0004%\taX\u0001\f\u001b\u0006CvlQ(O)\u0016CF\u000b\u0003\u0004i\u0001\u0001\u0006I\u0001Y\u0001\r\u001b\u0006CvlQ(O)\u0016CF\u000b\t\u0005\u0006U\u0002!\ta[\u0001\u0006E\u0016\u001cHo\u0013\u000b\u0006Y\u0006=\u0011\u0011\u0004\t\u0005[VDXP\u0004\u0002og:\u0011qN]\u0007\u0002a*\u0011\u0011OC\u0001\u0007yI|w\u000e\u001e \n\u0003=I!\u0001\u001e\b\u0002\rA\u0013X\rZ3g\u0013\t1xOA\u0002NCBT!\u0001\u001e\b\u0011\u0005e\\X\"\u0001>\u000b\u0005u!\u0011B\u0001?{\u0005U\u0019VO\u001d4bG\u00164uN]7PG\u000e,(O]3oG\u0016\u0004RA`A\u0002\u0003\u0013q!A\\@\n\u0007\u0005\u0005a\"A\u0004qC\u000e\\\u0017mZ3\n\t\u0005\u0015\u0011q\u0001\u0002\u0005\u0019&\u001cHOC\u0002\u0002\u00029\u00012!_A\u0006\u0013\r\tiA\u001f\u0002\u001a\t\n\u0003X\rZ5b%\u0016\u001cx.\u001e:dK>\u001b7-\u001e:sK:\u001cW\rC\u0004\u0002\u0012%\u0004\r!a\u0005\u0002\u0013A\f'/Y4sCBD\u0007cA=\u0002\u0016%\u0019\u0011q\u0003>\u0003\u0013A\u000b'/Y4sCBD\u0007BBA\u000eS\u0002\u0007\u0001-A\u0001l\u0011\u001d\ty\u0002\u0001C\u0001\u0003C\taAY3ti.{F#\u00037\u0002$\u0005\u0015\u00121FA\u001e\u0011!\t\t\"!\bA\u0002\u0005M\u0001\u0002CA\u0014\u0003;\u0001\r!!\u000b\u0002\u0017=\u001c7-\u001e:sK:\u001cWm\u001d\t\u0005}\u0006\r\u0001\u0010\u0003\u0005\u0002.\u0005u\u0001\u0019AA\u0018\u0003\u0019!xn[3ogB)a0!\r\u00026%!\u00111GA\u0004\u0005\r\u0019V-\u001d\t\u0004s\u0006]\u0012bAA\u001du\nIAk\\6f]RK\b/\u001a\u0005\b\u00037\ti\u00021\u0001a\u0011\u0019)\u0002\u0001\"\u0001\u0002@Q\u0019Q0!\u0011\t\u0011\u0005E\u0011Q\ba\u0001\u0003'Ac!!\u0010\u0002F\u0005\r\u0004#B\u0007\u0002H\u0005-\u0013bAA%\u001d\t1A\u000f\u001b:poN\u0004B!!\u0014\u0002P1\u0001AaBA)\u0001\t\u0007\u00111\u000b\u0002\u0002)F!\u0011QKA.!\ri\u0011qK\u0005\u0004\u00033r!a\u0002(pi\"Lgn\u001a\t\u0005\u0003;\nyF\u0004\u0002\u000e\u007f&!\u0011\u0011MA\u0004\u0005%!\u0006N]8xC\ndWm\t\u0002\u0002fA!\u0011qMA7\u001b\t\tIGC\u0002\u0002l\u0011\t!\"\u001a=dKB$\u0018n\u001c8t\u0013\u0011\ty'!\u001b\u0003\u001d%s\u0007/\u001e;Fq\u000e,\u0007\u000f^5p]\"9\u00111\u000f\u0001\u0005\u0002\u0005U\u0014\u0001\u00028b[\u0016,\"!a\u001e\u0011\t\u0005e\u00141\u0010\b\u0003\u001bML1!! x\u0005\u0019\u0019FO]5oO\u0002")
/* loaded from: input_file:org/dbpedia/spotlight/db/DBTwoStepDisambiguator.class */
public class DBTwoStepDisambiguator implements ParagraphDisambiguator {
    public final SurfaceFormStore org$dbpedia$spotlight$db$DBTwoStepDisambiguator$$surfaceFormStore;
    private final DBCandidateSearcher candidateSearcher;
    public final Mixture org$dbpedia$spotlight$db$DBTwoStepDisambiguator$$mixture;
    public final ContextSimilarity org$dbpedia$spotlight$db$DBTwoStepDisambiguator$$contextSimilarity;
    private TextTokenizer tokenizer = null;
    private final int MAX_CANDIDATES = 10;
    private final int MAX_CONTEXT = 200;

    public DBCandidateSearcher candidateSearcher() {
        return this.candidateSearcher;
    }

    public TextTokenizer tokenizer() {
        return this.tokenizer;
    }

    public void tokenizer_$eq(TextTokenizer textTokenizer) {
        this.tokenizer = textTokenizer;
    }

    public int MAX_CANDIDATES() {
        return this.MAX_CANDIDATES;
    }

    public int MAX_CONTEXT() {
        return this.MAX_CONTEXT;
    }

    @Override // org.dbpedia.spotlight.disambiguate.ParagraphDisambiguator
    public Map<SurfaceFormOccurrence, List<DBpediaResourceOccurrence>> bestK(Paragraph paragraph, int i) {
        SpotlightLog$.MODULE$.debug(getClass(), "Running bestK for paragraph %s.", Predef$.MODULE$.genericWrapArray(new Object[]{paragraph.id()}), SpotlightLog$StringSpotlightLog$.MODULE$);
        if (paragraph.occurrences().size() == 0) {
            return Predef$.MODULE$.Map().apply(Nil$.MODULE$);
        }
        if (tokenizer() != null) {
            SpotlightLog$.MODULE$.info(getClass(), "Tokenizing input text...", Predef$.MODULE$.genericWrapArray(new Object[0]), SpotlightLog$StringSpotlightLog$.MODULE$);
            paragraph.text().setFeature(new Feature("tokens", tokenizer().tokenize(paragraph.text())));
        }
        List<List<Token>> list = DBSpotter$.MODULE$.tokensToSentences((List) paragraph.text().featureValue("tokens").get());
        return list.size() <= MAX_CONTEXT() ? bestK_(paragraph, JavaConversions$.MODULE$.asScalaBuffer(paragraph.getOccurrences()).toList(), (Seq) list.flatMap(new DBTwoStepDisambiguator$$anonfun$bestK$1(this), List$.MODULE$.canBuildFrom()), i) : (Map) ((List) list.flatMap(new DBTwoStepDisambiguator$$anonfun$1(this, paragraph, i, list, JavaConversions$.MODULE$.asScalaBuffer(paragraph.getOccurrences()).toBuffer(), ArrayBuffer$.MODULE$.apply(Nil$.MODULE$)), List$.MODULE$.canBuildFrom())).toList().reduce(new DBTwoStepDisambiguator$$anonfun$4(this));
    }

    public Map<SurfaceFormOccurrence, List<DBpediaResourceOccurrence>> bestK_(Paragraph paragraph, List<SurfaceFormOccurrence> list, Seq<TokenType> seq, int i) {
        if (list.size() == 0) {
            return Predef$.MODULE$.Map().apply(Nil$.MODULE$);
        }
        ObjectRef objectRef = new ObjectRef(Predef$.MODULE$.Set().apply(Nil$.MODULE$));
        Map map = (Map) list.foldLeft(Predef$.MODULE$.Map().apply(Nil$.MODULE$), new DBTwoStepDisambiguator$$anonfun$5(this, objectRef));
        Seq<TokenType> seq2 = (Seq) ((SeqLike) seq.distinct()).sortBy(new DBTwoStepDisambiguator$$anonfun$7(this), Ordering$Int$.MODULE$);
        return (Map) map.keys().foldLeft(Predef$.MODULE$.Map().apply(Nil$.MODULE$), new DBTwoStepDisambiguator$$anonfun$bestK_$1(this, paragraph, i, map, seq2, this.org$dbpedia$spotlight$db$DBTwoStepDisambiguator$$contextSimilarity.score(seq2, (Set) objectRef.elem)));
    }

    @Override // org.dbpedia.spotlight.disambiguate.ParagraphDisambiguator
    public List<DBpediaResourceOccurrence> disambiguate(Paragraph paragraph) throws InputException {
        return (List) ((TraversableOnce) ((TraversableLike) bestK(paragraph, MAX_CANDIDATES()).filter(new DBTwoStepDisambiguator$$anonfun$disambiguate$1(this))).map(new DBTwoStepDisambiguator$$anonfun$disambiguate$2(this), Iterable$.MODULE$.canBuildFrom())).toList().sortBy(new DBTwoStepDisambiguator$$anonfun$disambiguate$3(this), Ordering$Int$.MODULE$);
    }

    @Override // org.dbpedia.spotlight.disambiguate.ParagraphDisambiguator
    public String name() {
        return new StringOps(Predef$.MODULE$.augmentString("Database-backed 2 Step disambiguator (%s, %s)")).format(Predef$.MODULE$.genericWrapArray(new Object[]{this.org$dbpedia$spotlight$db$DBTwoStepDisambiguator$$contextSimilarity.getClass().getSimpleName(), this.org$dbpedia$spotlight$db$DBTwoStepDisambiguator$$mixture.toString()}));
    }

    public DBTwoStepDisambiguator(TokenTypeStore tokenTypeStore, SurfaceFormStore surfaceFormStore, ResourceStore resourceStore, DBCandidateSearcher dBCandidateSearcher, Mixture mixture, ContextSimilarity contextSimilarity) {
        this.org$dbpedia$spotlight$db$DBTwoStepDisambiguator$$surfaceFormStore = surfaceFormStore;
        this.candidateSearcher = dBCandidateSearcher;
        this.org$dbpedia$spotlight$db$DBTwoStepDisambiguator$$mixture = mixture;
        this.org$dbpedia$spotlight$db$DBTwoStepDisambiguator$$contextSimilarity = contextSimilarity;
    }
}
