package org.wikibrain.sr.wikify;

import com.typesafe.config.Config;
import gnu.trove.TCollections;
import gnu.trove.map.TIntDoubleMap;
import gnu.trove.map.hash.TIntDoubleHashMap;
import gnu.trove.set.TIntSet;
import gnu.trove.set.hash.TIntHashSet;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.wikibrain.conf.Configuration;
import org.wikibrain.conf.ConfigurationException;
import org.wikibrain.conf.Configurator;
import org.wikibrain.core.cmd.Env;
import org.wikibrain.core.cmd.EnvBuilder;
import org.wikibrain.core.dao.DaoException;
import org.wikibrain.core.dao.DaoFilter;
import org.wikibrain.core.dao.LocalLinkDao;
import org.wikibrain.core.dao.LocalPageDao;
import org.wikibrain.core.dao.RawPageDao;
import org.wikibrain.core.lang.Language;
import org.wikibrain.core.model.LocalLink;
import org.wikibrain.core.model.NameSpace;
import org.wikibrain.core.model.RawPage;
import org.wikibrain.core.nlp.NGramCreator;
import org.wikibrain.core.nlp.StringTokenizer;
import org.wikibrain.core.nlp.Token;
import org.wikibrain.phrases.AnchorTextPhraseAnalyzer;
import org.wikibrain.phrases.LinkProbabilityDao;
import org.wikibrain.phrases.PhraseAnalyzer;
import org.wikibrain.phrases.PhraseAnalyzerDao;
import org.wikibrain.phrases.PrunedCounts;
import org.wikibrain.sr.SRMetric;

/* loaded from: input_file:org/wikibrain/sr/wikify/MilneWittenWikifier.class */
public class MilneWittenWikifier implements Wikifier {
    private static final Logger LOG = LoggerFactory.getLogger(MilneWittenWikifier.class);
    private final LocalPageDao lpd;
    private final LocalLinkDao lld;
    private final RawPageDao rpd;
    private final SRMetric metric;
    private final PhraseAnalyzerDao phraseDao;
    private final LinkProbabilityDao linkProbDao;
    private final Language language;
    private int numTestingDocs = 100;
    private double minLinkProbability = 0.03d;
    private int maxNGram = 3;
    private StringTokenizer tokenizer = new StringTokenizer();
    private NGramCreator nGramCreator = new NGramCreator();
    private final TIntDoubleMap generality = TCollections.synchronizedMap(new TIntDoubleHashMap());
    private final int MAX_INLINKS = 1000;

    /* loaded from: input_file:org/wikibrain/sr/wikify/MilneWittenWikifier$Provider.class */
    public static class Provider extends org.wikibrain.conf.Provider<Wikifier> {
        public Provider(Configurator configurator, Configuration configuration) throws ConfigurationException {
            super(configurator, configuration);
        }

        public Class<Wikifier> getType() {
            return Wikifier.class;
        }

        public String getPath() {
            return "sr.wikifier";
        }

        public Wikifier get(String str, Config config, Map<String, String> map) throws ConfigurationException {
            if (map == null || !map.containsKey("language")) {
                throw new IllegalArgumentException("Wikifier requires 'language' runtime parameter.");
            }
            if (!config.getString("type").equals("milnewitten")) {
                return null;
            }
            Language byLangCode = Language.getByLangCode(map.get("language"));
            Configurator configurator = getConfigurator();
            String string = config.getString("sr");
            String string2 = config.getString("phraseAnalyzer");
            String string3 = config.getString("localLinkDao");
            LinkProbabilityDao linkProbabilityDao = (LinkProbabilityDao) Env.getComponent(configurator, LinkProbabilityDao.class, byLangCode);
            if (config.getBoolean("useLinkProbabilityCache")) {
                linkProbabilityDao.useCache(true);
            }
            return new MilneWittenWikifier((SRMetric) configurator.get(SRMetric.class, string, "language", byLangCode.getLangCode()), (AnchorTextPhraseAnalyzer) configurator.get(PhraseAnalyzer.class, string2), (LocalPageDao) configurator.get(LocalPageDao.class), (RawPageDao) configurator.get(RawPageDao.class), (LocalLinkDao) configurator.get(LocalLinkDao.class, string3), linkProbabilityDao);
        }

        /* renamed from: get, reason: collision with other method in class */
        public /* bridge */ /* synthetic */ Object m74get(String str, Config config, Map map) throws ConfigurationException {
            return get(str, config, (Map<String, String>) map);
        }
    }

    public MilneWittenWikifier(SRMetric sRMetric, AnchorTextPhraseAnalyzer anchorTextPhraseAnalyzer, LocalPageDao localPageDao, RawPageDao rawPageDao, LocalLinkDao localLinkDao, LinkProbabilityDao linkProbabilityDao) {
        this.lpd = localPageDao;
        this.linkProbDao = linkProbabilityDao;
        this.phraseDao = anchorTextPhraseAnalyzer.getDao();
        this.metric = sRMetric;
        this.rpd = rawPageDao;
        this.lld = localLinkDao;
        this.language = sRMetric.getLanguage();
    }

    public void testWikify() throws DaoException {
        RawPage byId = this.rpd.getById(this.language, this.lpd.getIdByTitle("Barack Obama", this.language, NameSpace.ARTICLE));
        for (int i = 0; i < 1; i++) {
            List<LocalLink> wikify = wikify(byId.getLocalId());
            System.out.println("Links detected for " + byId.getTitle() + " (" + i + ")");
            for (LocalLink localLink : wikify) {
                System.out.println("\t" + localLink + " page " + this.lpd.getById(this.language, localLink.getDestId()).getTitle());
            }
        }
    }

    private List<Token> getNGramTokens(String str) {
        ArrayList arrayList = new ArrayList();
        Iterator it = this.tokenizer.getSentenceTokens(this.language, str).iterator();
        while (it.hasNext()) {
            arrayList.addAll(this.nGramCreator.getNGramTokens(this.tokenizer.getWordTokens(this.language, (Token) it.next()), 1, this.maxNGram));
        }
        return arrayList;
    }

    private double getLinkProbability(String str) throws DaoException {
        return this.linkProbDao.getLinkProbability(str);
    }

    @Override // org.wikibrain.sr.wikify.Wikifier
    public List<LocalLink> wikify(int i, String str) throws DaoException {
        List<LinkInfo> candidates = getCandidates(str);
        identifyKnownCandidates(i, candidates);
        List<LinkInfo> detectLinks = detectLinks(candidates);
        ArrayList arrayList = new ArrayList();
        for (LinkInfo linkInfo : detectLinks) {
            arrayList.add(new LocalLink(this.language, linkInfo.getAnchortext(), i, linkInfo.getDest().intValue(), true, linkInfo.getStartChar(), true, (LocalLink.LocationType) null));
        }
        return arrayList;
    }

    @Override // org.wikibrain.sr.wikify.Wikifier
    public List<LocalLink> wikify(int i) throws DaoException {
        RawPage byId = this.rpd.getById(this.language, i);
        return byId == null ? new ArrayList() : wikify(i, byId.getPlainText(false));
    }

    @Override // org.wikibrain.sr.wikify.Wikifier
    public List<LocalLink> wikify(String str) throws DaoException {
        List<LinkInfo> detectLinks = detectLinks(getCandidates(str));
        ArrayList arrayList = new ArrayList();
        for (LinkInfo linkInfo : detectLinks) {
            arrayList.add(new LocalLink(this.language, linkInfo.getAnchortext(), -1, linkInfo.getDest().intValue(), true, linkInfo.getStartChar(), true, (LocalLink.LocationType) null));
        }
        Collections.sort(arrayList, new Comparator<LocalLink>() { // from class: org.wikibrain.sr.wikify.MilneWittenWikifier.1
            @Override // java.util.Comparator
            public int compare(LocalLink localLink, LocalLink localLink2) {
                return localLink.getLocation() - localLink2.getLocation();
            }
        });
        return arrayList;
    }

    private List<LinkInfo> detectLinks(List<LinkInfo> list) throws DaoException {
        HashMap hashMap = new HashMap();
        TIntDoubleMap relatedness = getRelatedness(list);
        Iterator<LinkInfo> it = list.iterator();
        while (it.hasNext()) {
            scoreLinkInfo(it.next(), hashMap, relatedness);
        }
        TIntSet tIntHashSet = new TIntHashSet();
        Collections.sort(list);
        ArrayList arrayList = new ArrayList();
        for (LinkInfo linkInfo : list) {
            if (linkInfo.getScore().doubleValue() < 0.01d) {
                break;
            }
            if (!linkInfo.intersects(tIntHashSet)) {
                arrayList.add(linkInfo);
                linkInfo.markAsUsed(tIntHashSet);
            }
        }
        return arrayList;
    }

    private TIntDoubleMap getRelatedness(List<LinkInfo> list) throws DaoException {
        TIntHashSet tIntHashSet = new TIntHashSet();
        TIntHashSet tIntHashSet2 = new TIntHashSet();
        for (LinkInfo linkInfo : list) {
            if (linkInfo.getKnownDest() != null) {
                tIntHashSet.add(linkInfo.getKnownDest().intValue());
            } else if (linkInfo.hasOnePossibility()) {
                tIntHashSet.add(linkInfo.getTopPriorDestination());
            } else {
                Iterator it = linkInfo.getPrior().keySet().iterator();
                while (it.hasNext()) {
                    tIntHashSet2.add(((Integer) it.next()).intValue());
                }
            }
        }
        int[] array = tIntHashSet.toArray();
        int[] array2 = tIntHashSet2.toArray();
        double[][] cosimilarity = this.metric.cosimilarity(array2, array);
        TIntDoubleHashMap tIntDoubleHashMap = new TIntDoubleHashMap();
        for (int i = 0; i < array2.length; i++) {
            double d = 0.0d;
            for (double d2 : cosimilarity[i]) {
                d += d2;
            }
            tIntDoubleHashMap.put(array2[i], d / array.length);
        }
        return tIntDoubleHashMap;
    }

    private void scoreLinkInfo(LinkInfo linkInfo, Map<String, LinkInfo> map, TIntDoubleMap tIntDoubleMap) throws DaoException {
        if (linkInfo.getKnownDest() != null) {
            linkInfo.setDest(linkInfo.getKnownDest());
            linkInfo.setScore(Double.valueOf(1000000.0d));
            return;
        }
        if (map.containsKey(linkInfo.getAnchortext())) {
            LinkInfo linkInfo2 = map.get(linkInfo.getAnchortext());
            linkInfo.setDest(linkInfo2.getDest());
            linkInfo.setScore(linkInfo2.getScore());
            return;
        }
        Iterator it = linkInfo.getPrior().keySet().iterator();
        while (it.hasNext()) {
            int intValue = ((Integer) it.next()).intValue();
            linkInfo.addScore(intValue, ((Integer) linkInfo.getPrior().get(Integer.valueOf(intValue))).intValue() * tIntDoubleMap.get(intValue) * linkInfo.getLinkProbability() * getGenerality(intValue));
        }
        if (linkInfo.getScores().size() == 0) {
            return;
        }
        linkInfo.setDest((Integer) linkInfo.getScores().getElement(0));
        linkInfo.setScore(Double.valueOf(linkInfo.getScores().getScore(0)));
        if (linkInfo.getScores().size() == 1) {
            linkInfo.setScore(Double.valueOf(linkInfo.getScore().doubleValue() * 3.0d));
        } else {
            linkInfo.setScore(Double.valueOf(linkInfo.getScore().doubleValue() * Math.min(3.0d, linkInfo.getScore().doubleValue() / linkInfo.getScores().getScore(1))));
        }
        map.put(linkInfo.getAnchortext(), linkInfo);
    }

    private double getGenerality(int i) throws DaoException {
        if (this.generality.containsKey(i)) {
            return this.generality.get(i);
        }
        int count = this.lld.getCount(new DaoFilter().setLanguages(this.language).setDestIds(i));
        double log = 0.5d + (Math.log(1 + Math.min(1000, count)) / Math.log(1001.0d));
        this.generality.put(count, count);
        return count;
    }

    private void identifyKnownCandidates(int i, List<LinkInfo> list) throws DaoException {
        HashSet hashSet = new HashSet();
        for (LocalLink localLink : this.lld.getLinks(this.language, i, true)) {
            if (localLink.getDestId() >= 0 && localLink.getAnchorText() != null && !hashSet.contains(localLink.getAnchorText())) {
                Iterator<LinkInfo> it = list.iterator();
                while (true) {
                    if (!it.hasNext()) {
                        break;
                    }
                    LinkInfo next = it.next();
                    if (localLink.getAnchorText().equals(next.getAnchortext())) {
                        if (next.getKnownDest() == null) {
                            next.setKnownDest(Integer.valueOf(localLink.getDestId()));
                            break;
                        }
                        LOG.info("conflict for link info " + next.getAnchortext() + " between " + next.getKnownDest() + " and " + localLink.getDestId());
                    }
                }
                hashSet.add(localLink.getAnchorText());
            }
        }
    }

    public List<LinkInfo> getTextContext(String str) throws DaoException {
        return getCandidates(str);
    }

    private List<LinkInfo> getCandidates(String str) throws DaoException {
        HashMap hashMap = new HashMap();
        ArrayList arrayList = new ArrayList();
        Iterator<Token> it = getNGramTokens(str).iterator();
        while (it.hasNext()) {
            LinkInfo makeLinkInfo = makeLinkInfo(it.next(), hashMap);
            if (makeLinkInfo != null) {
                arrayList.add(makeLinkInfo);
            }
        }
        return arrayList;
    }

    private LinkInfo makeLinkInfo(Token token, Map<String, LinkInfo> map) throws DaoException {
        double linkProbability = getLinkProbability(token.getToken());
        if (linkProbability < this.minLinkProbability) {
            return null;
        }
        if (map.containsKey(token.getToken())) {
            LinkInfo linkInfo = map.get(token.getToken());
            LinkInfo linkInfo2 = new LinkInfo();
            linkInfo2.setLinkProbability(linkProbability);
            linkInfo2.setAnchortext(token.getToken());
            linkInfo2.setStartChar(token.getBegin());
            linkInfo2.setEndChar(token.getEnd());
            linkInfo2.setPrior(linkInfo.getPrior());
            return linkInfo2;
        }
        PrunedCounts<Integer> phraseCounts = this.phraseDao.getPhraseCounts(this.language, token.getToken(), 30);
        if (phraseCounts == null || phraseCounts.isEmpty()) {
            return null;
        }
        LinkInfo linkInfo3 = new LinkInfo();
        linkInfo3.setLinkProbability(linkProbability);
        linkInfo3.setAnchortext(token.getToken());
        linkInfo3.setStartChar(token.getBegin());
        linkInfo3.setEndChar(token.getEnd());
        linkInfo3.setPrior(phraseCounts);
        map.put(token.getToken(), linkInfo3);
        return linkInfo3;
    }

    public static void main(String[] strArr) throws ConfigurationException, DaoException, IOException {
        ((MilneWittenWikifier) EnvBuilder.envFromArgs(strArr).getConfigurator().get(MilneWittenWikifier.class, "default", "language", "simple")).testWikify();
    }
}
