package org.wikibrain.sr.wikify;

import com.typesafe.config.Config;
import gnu.trove.list.array.TDoubleArrayList;
import gnu.trove.map.TIntDoubleMap;
import gnu.trove.map.hash.TIntDoubleHashMap;
import gnu.trove.set.TIntSet;
import gnu.trove.set.hash.TIntHashSet;
import java.util.ArrayList;
import java.util.BitSet;
import java.util.Collections;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.logging.Logger;
import org.wikibrain.conf.Configuration;
import org.wikibrain.conf.ConfigurationException;
import org.wikibrain.conf.Configurator;
import org.wikibrain.core.dao.DaoException;
import org.wikibrain.core.dao.DaoFilter;
import org.wikibrain.core.dao.LocalLinkDao;
import org.wikibrain.core.dao.RawPageDao;
import org.wikibrain.core.lang.Language;
import org.wikibrain.core.model.LocalLink;
import org.wikibrain.core.model.RawPage;
import org.wikibrain.core.nlp.StringTokenizer;
import org.wikibrain.core.nlp.Token;
import org.wikibrain.phrases.AnchorTextPhraseAnalyzer;
import org.wikibrain.phrases.LinkProbabilityDao;
import org.wikibrain.phrases.PhraseAnalyzer;
import org.wikibrain.phrases.PhraseAnalyzerDao;
import org.wikibrain.phrases.PhraseTokenizer;
import org.wikibrain.sr.SRMetric;
import org.wikibrain.utils.Scoreboard;

/* loaded from: input_file:org/wikibrain/sr/wikify/WebSailWikifier.class */
public class WebSailWikifier implements Wikifier {
    private static final Logger LOG = Logger.getLogger(WebSailWikifier.class.getName());
    private final Wikifier identityWikifier;
    private final SRMetric metric;
    private final LinkProbabilityDao linkProbDao;
    private final Language language;
    private final PhraseTokenizer phraseTokenizer;
    private final LocalLinkDao linkDao;
    private final PhraseAnalyzerDao phraseDao;
    private final RawPageDao rawPageDao;
    private int numTrainingLinks = 10000;
    private double desiredLinkRecall = 0.98d;
    private double minLinkProbability = 0.01d;

    /* loaded from: input_file:org/wikibrain/sr/wikify/WebSailWikifier$Provider.class */
    public static class Provider extends org.wikibrain.conf.Provider<Wikifier> {
        public Provider(Configurator configurator, Configuration configuration) throws ConfigurationException {
            super(configurator, configuration);
        }

        public Class<Wikifier> getType() {
            return Wikifier.class;
        }

        public String getPath() {
            return "sr.wikifier";
        }

        public Wikifier get(String str, Config config, Map<String, String> map) throws ConfigurationException {
            if (map == null || !map.containsKey("language")) {
                throw new IllegalArgumentException("Wikifier requires 'language' runtime parameter.");
            }
            if (!config.getString("type").equals("websail")) {
                return null;
            }
            Language byLangCode = Language.getByLangCode(map.get("language"));
            Configurator configurator = getConfigurator();
            String string = config.getString("sr");
            String string2 = config.getString("phraseAnalyzer");
            String string3 = config.getString("identityWikifier");
            String string4 = config.getString("localLinkDao");
            LinkProbabilityDao linkProbabilityDao = (LinkProbabilityDao) configurator.get(LinkProbabilityDao.class);
            if (config.getBoolean("useLinkProbabilityCache")) {
                linkProbabilityDao.useCache(true);
            }
            try {
                return new WebSailWikifier((Wikifier) configurator.get(Wikifier.class, string3, "language", byLangCode.getLangCode()), (RawPageDao) configurator.get(RawPageDao.class), (LocalLinkDao) configurator.get(LocalLinkDao.class, string4), linkProbabilityDao, ((AnchorTextPhraseAnalyzer) configurator.get(PhraseAnalyzer.class, string2)).getDao(), (SRMetric) configurator.get(SRMetric.class, string, "language", byLangCode.getLangCode()));
            } catch (DaoException e) {
                throw new ConfigurationException(e);
            }
        }

        /* renamed from: get, reason: collision with other method in class */
        public /* bridge */ /* synthetic */ Object m61get(String str, Config config, Map map) throws ConfigurationException {
            return get(str, config, (Map<String, String>) map);
        }
    }

    public WebSailWikifier(Wikifier wikifier, RawPageDao rawPageDao, LocalLinkDao localLinkDao, LinkProbabilityDao linkProbabilityDao, PhraseAnalyzerDao phraseAnalyzerDao, SRMetric sRMetric) throws DaoException {
        this.identityWikifier = wikifier;
        this.metric = sRMetric;
        this.language = sRMetric.getLanguage();
        this.linkDao = localLinkDao;
        this.linkProbDao = linkProbabilityDao;
        this.rawPageDao = rawPageDao;
        this.phraseDao = phraseAnalyzerDao;
        this.phraseTokenizer = new PhraseTokenizer(linkProbabilityDao);
        learnMinLinkProbability();
    }

    private void learnMinLinkProbability() throws DaoException {
        LOG.info("Learning minimum link probability");
        TDoubleArrayList tDoubleArrayList = new TDoubleArrayList();
        for (LocalLink localLink : this.linkDao.get(new DaoFilter().setLanguages(this.language).setLimit(this.numTrainingLinks))) {
            if (localLink.getDestId() >= 0) {
                tDoubleArrayList.add(this.linkProbDao.getLinkProbability(this.language, localLink.getAnchorText()));
            }
        }
        tDoubleArrayList.sort();
        tDoubleArrayList.reverse();
        this.minLinkProbability = tDoubleArrayList.get((int) (this.desiredLinkRecall * tDoubleArrayList.size()));
        LOG.info("Set minimum link probability to " + this.minLinkProbability + " to achieve " + this.desiredLinkRecall + " recall");
    }

    private List<LinkInfo> getCandidates(int i, String str) throws DaoException {
        ArrayList arrayList = new ArrayList();
        Iterator it = new StringTokenizer().getSentenceTokens(this.language, str).iterator();
        while (it.hasNext()) {
            for (Token token : this.phraseTokenizer.makePhraseTokens(this.language, (Token) it.next())) {
                double linkProbability = this.linkProbDao.getLinkProbability(this.language, token.getToken());
                if (linkProbability > this.minLinkProbability) {
                    LinkInfo linkInfo = new LinkInfo(token);
                    linkInfo.setLinkProbability(linkProbability);
                    arrayList.add(linkInfo);
                }
            }
        }
        return arrayList;
    }

    @Override // org.wikibrain.sr.wikify.Wikifier
    public List<LocalLink> wikify(int i, String str) throws DaoException {
        List<LinkInfo> candidates = getCandidates(i, str);
        disambiguate(candidates, i);
        return link(i, str, candidates);
    }

    @Override // org.wikibrain.sr.wikify.Wikifier
    public List<LocalLink> wikify(int i) throws DaoException {
        RawPage byId = this.rawPageDao.getById(this.language, i);
        return byId == null ? new ArrayList() : wikify(i, byId.getPlainText(false));
    }

    @Override // org.wikibrain.sr.wikify.Wikifier
    public List<LocalLink> wikify(String str) throws DaoException {
        return null;
    }

    private void disambiguate(List<LinkInfo> list, int i) throws DaoException {
        TIntSet actualLinks = getActualLinks(i);
        for (LinkInfo linkInfo : list) {
            linkInfo.setPrior(this.phraseDao.getPhraseCounts(this.language, linkInfo.getAnchortext(), 5));
        }
        TIntDoubleMap calculateConceptRelatedness = calculateConceptRelatedness(i, actualLinks, list);
        Iterator<LinkInfo> it = list.iterator();
        while (it.hasNext()) {
            scoreInfo(i, actualLinks, it.next(), calculateConceptRelatedness);
        }
    }

    private void scoreInfo(int i, TIntSet tIntSet, LinkInfo linkInfo, TIntDoubleMap tIntDoubleMap) {
        if (linkInfo.getPrior() == null || linkInfo.getPrior().isEmpty()) {
            return;
        }
        Scoreboard<Integer> scores = linkInfo.getScores();
        Iterator it = linkInfo.getPrior().keySet().iterator();
        while (it.hasNext()) {
            int intValue = ((Integer) it.next()).intValue();
            double intValue2 = ((0.4d * tIntDoubleMap.get(intValue)) + ((0.6d * ((Integer) linkInfo.getPrior().get(Integer.valueOf(intValue))).intValue()) / linkInfo.getPrior().getTotal())) * linkInfo.getLinkProbability();
            if (tIntSet.contains(intValue)) {
                intValue2 += 0.2d;
            }
            scores.add(Integer.valueOf(intValue), intValue2);
        }
        linkInfo.setDest((Integer) scores.getElement(0));
        linkInfo.setScore(Double.valueOf(scores.getScore(0) * (scores.size() == 1 ? 0.2d : scores.getScore(0) - scores.getScore(1))));
    }

    private TIntSet getActualLinks(int i) throws DaoException {
        TIntHashSet tIntHashSet = new TIntHashSet();
        for (LocalLink localLink : this.linkDao.getLinks(this.language, i, true)) {
            if (localLink.getDestId() >= 0) {
                tIntHashSet.add(localLink.getDestId());
            }
        }
        tIntHashSet.add(i);
        return tIntHashSet;
    }

    private TIntDoubleMap calculateConceptRelatedness(int i, TIntSet tIntSet, List<LinkInfo> list) throws DaoException {
        TIntHashSet tIntHashSet = new TIntHashSet();
        for (LinkInfo linkInfo : list) {
            if (linkInfo.getPrior() != null) {
                Iterator it = linkInfo.getPrior().keySet().iterator();
                while (it.hasNext()) {
                    tIntHashSet.add(((Integer) it.next()).intValue());
                }
            }
        }
        int[] array = tIntSet.toArray();
        int[] array2 = tIntHashSet.toArray();
        TIntDoubleHashMap tIntDoubleHashMap = new TIntDoubleHashMap();
        if (array.length == 0 || array2.length == 0) {
            return tIntDoubleHashMap;
        }
        double[][] cosimilarity = this.metric.cosimilarity(array2, array);
        for (int i2 = 0; i2 < array2.length; i2++) {
            double d = 0.0d;
            for (double d2 : cosimilarity[i2]) {
                if (!Double.isInfinite(d2) && !Double.isNaN(d2)) {
                    d += d2;
                }
            }
            tIntDoubleHashMap.put(array2[i2], d / array.length);
        }
        return tIntDoubleHashMap;
    }

    private List<LocalLink> link(int i, String str, List<LinkInfo> list) throws DaoException {
        BitSet bitSet = new BitSet(str.length());
        List<LocalLink> wikify = this.identityWikifier.wikify(i, str);
        for (LocalLink localLink : wikify) {
            bitSet.set(localLink.getLocation(), localLink.getLocation() + localLink.getAnchorText().length());
        }
        Collections.sort(list);
        for (LinkInfo linkInfo : list) {
            if (linkInfo.getDest() != null && linkInfo.getScore().doubleValue() > 0.001d && bitSet.get(linkInfo.getStartChar(), linkInfo.getEndChar()).isEmpty()) {
                wikify.add(linkInfo.toLocalLink(this.language, i));
                bitSet.set(linkInfo.getStartChar(), linkInfo.getEndChar());
            }
        }
        Collections.sort(wikify, new Comparator<LocalLink>() { // from class: org.wikibrain.sr.wikify.WebSailWikifier.1
            @Override // java.util.Comparator
            public int compare(LocalLink localLink2, LocalLink localLink3) {
                return localLink2.getLocation() - localLink3.getLocation();
            }
        });
        return wikify;
    }
}
