package org.wikibrain.sr.vector;

import com.typesafe.config.Config;
import gnu.trove.map.TIntFloatMap;
import gnu.trove.map.hash.TIntFloatHashMap;
import gnu.trove.set.TIntSet;
import gnu.trove.set.hash.TIntHashSet;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Scanner;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang3.ArrayUtils;
import org.wikibrain.conf.Configuration;
import org.wikibrain.conf.ConfigurationException;
import org.wikibrain.conf.Configurator;
import org.wikibrain.core.dao.DaoException;
import org.wikibrain.core.dao.LocalPageDao;
import org.wikibrain.core.lang.Language;
import org.wikibrain.core.model.LocalPage;
import org.wikibrain.lucene.LuceneSearcher;
import org.wikibrain.lucene.QueryBuilder;
import org.wikibrain.lucene.WikiBrainScoreDoc;
import org.wikibrain.lucene.WpIdFilter;
import org.wikibrain.sr.Explanation;
import org.wikibrain.sr.SRResult;
import org.wikibrain.sr.SRResultList;
import org.wikibrain.sr.utils.Leaderboard;
import org.wikibrain.sr.utils.SimUtils;

/* loaded from: input_file:org/wikibrain/sr/vector/ESAGenerator.class */
public class ESAGenerator implements VectorGenerator {
    private static final Logger LOG = Logger.getLogger(ESAGenerator.class.getName());
    private final LuceneSearcher searcher;
    private final Language language;
    private final LocalPageDao pageDao;
    private WpIdFilter conceptFilter = null;
    private TIntSet blackListSet;
    private final String blackListFilePath;

    /* loaded from: input_file:org/wikibrain/sr/vector/ESAGenerator$Provider.class */
    public static class Provider extends org.wikibrain.conf.Provider<VectorGenerator> {
        public Provider(Configurator configurator, Configuration configuration) throws ConfigurationException {
            super(configurator, configuration);
        }

        public Class getType() {
            return VectorGenerator.class;
        }

        public String getPath() {
            return "sr.metric.generator";
        }

        public VectorGenerator get(String str, Config config, Map<String, String> map) throws ConfigurationException {
            if (!config.getString("type").equals("esa")) {
                return null;
            }
            if (!map.containsKey("language")) {
                throw new IllegalArgumentException("Monolingual SR Metric requires 'language' runtime parameter");
            }
            Language byLangCode = Language.getByLangCode(map.get("language"));
            ESAGenerator eSAGenerator = new ESAGenerator(byLangCode, (LocalPageDao) getConfigurator().get(LocalPageDao.class), (LuceneSearcher) getConfigurator().get(LuceneSearcher.class, config.getString("luceneSearcher")), getConfig().get().getString("sr.blacklist.path"));
            if (config.hasPath("concepts")) {
                try {
                    eSAGenerator.setConcepts(FileUtils.getFile(new String[]{config.getString("concepts"), byLangCode.getLangCode() + ".txt"}));
                } catch (IOException e) {
                    throw new ConfigurationException(e);
                }
            }
            return eSAGenerator;
        }

        /* renamed from: get, reason: collision with other method in class */
        public /* bridge */ /* synthetic */ Object m44get(String str, Config config, Map map) throws ConfigurationException {
            return get(str, config, (Map<String, String>) map);
        }
    }

    public ESAGenerator(Language language, LocalPageDao localPageDao, LuceneSearcher luceneSearcher, String str) {
        this.language = language;
        this.pageDao = localPageDao;
        this.searcher = luceneSearcher;
        this.blackListFilePath = str;
        try {
            createBlackListSet();
        } catch (Exception e) {
            LOG.info("Could not create Blacklist Set");
        }
    }

    private void createBlackListSet() throws FileNotFoundException {
        this.blackListSet = new TIntHashSet();
        if (this.blackListFilePath == null || this.blackListFilePath.equals("")) {
            LOG.info("Skipping blacklist creation; no blacklist file specified.");
            return;
        }
        Scanner scanner = new Scanner(new File(this.blackListFilePath));
        while (scanner.hasNext()) {
            this.blackListSet.add(scanner.nextInt());
        }
        scanner.close();
    }

    @Override // org.wikibrain.sr.vector.VectorGenerator
    public TIntFloatMap getVector(int i) throws DaoException {
        int docIdFromLocalId = this.searcher.getDocIdFromLocalId(i, this.language);
        if (docIdFromLocalId >= 0) {
            return SimUtils.normalizeVector(expandScores(pruneSimilar(getQueryBuilder().setMoreLikeThisQuery(docIdFromLocalId).search())));
        }
        LOG.warning("Unindexed document " + i + " in " + this.language.getEnLangName());
        return new TIntFloatHashMap();
    }

    @Override // org.wikibrain.sr.vector.VectorGenerator
    public TIntFloatMap getVector(String str) {
        QueryBuilder phraseQuery = getQueryBuilder().setPhraseQuery(str);
        if (phraseQuery.hasQuery()) {
            return SimUtils.normalizeVector(expandScores(SimUtils.pruneSimilar(phraseQuery.search())));
        }
        LOG.log(Level.WARNING, "Phrase cannot be parsed to get a query. " + str);
        return null;
    }

    public void setConcepts(File file) throws IOException {
        this.conceptFilter = null;
        if (!file.isFile()) {
            LOG.warning("concept path " + file + " not a file; defaulting to all concepts");
            return;
        }
        TIntHashSet tIntHashSet = new TIntHashSet();
        Iterator it = FileUtils.readLines(file).iterator();
        while (it.hasNext()) {
            int intValue = Integer.valueOf((String) it.next()).intValue();
            if (!isBlacklisted(intValue)) {
                tIntHashSet.add(intValue);
            }
        }
        this.conceptFilter = new WpIdFilter(tIntHashSet.toArray());
        LOG.warning("installed " + tIntHashSet.size() + " concepts for " + this.language);
    }

    private boolean isBlacklisted(int i) {
        return this.blackListSet.contains(i);
    }

    @Override // org.wikibrain.sr.vector.VectorGenerator
    public List<Explanation> getExplanations(LocalPage localPage, LocalPage localPage2, TIntFloatMap tIntFloatMap, TIntFloatMap tIntFloatMap2, SRResult sRResult) throws DaoException {
        Leaderboard leaderboard = new Leaderboard(5);
        for (int i : tIntFloatMap.keys()) {
            if (tIntFloatMap2.containsKey(i)) {
                leaderboard.tallyScore(i, tIntFloatMap.get(i) * tIntFloatMap2.get(i));
            }
        }
        SRResultList top = leaderboard.getTop();
        if (top.numDocs() == 0) {
            return Arrays.asList(new Explanation("? and ? share no links", localPage, localPage2));
        }
        top.sortDescending();
        ArrayList arrayList = new ArrayList();
        for (int i2 = 0; i2 < top.numDocs(); i2++) {
            LocalPage byId = this.pageDao.getById(this.language, top.getId(i2));
            if (byId != null) {
                arrayList.add(new Explanation("Both ? and ? have similar text to ?", localPage, localPage2, byId));
            }
        }
        return arrayList;
    }

    private QueryBuilder getQueryBuilder() {
        QueryBuilder queryBuilderByLanguage = this.searcher.getQueryBuilderByLanguage(this.language);
        queryBuilderByLanguage.setResolveWikipediaIds(false);
        if (this.conceptFilter != null) {
            queryBuilderByLanguage.addFilter(this.conceptFilter);
        }
        return queryBuilderByLanguage;
    }

    private TIntFloatMap expandScores(WikiBrainScoreDoc[] wikiBrainScoreDocArr) {
        TIntFloatHashMap tIntFloatHashMap = new TIntFloatHashMap();
        for (WikiBrainScoreDoc wikiBrainScoreDoc : wikiBrainScoreDocArr) {
            tIntFloatHashMap.put(wikiBrainScoreDoc.luceneId, wikiBrainScoreDoc.score);
        }
        return tIntFloatHashMap;
    }

    private WikiBrainScoreDoc[] pruneSimilar(WikiBrainScoreDoc[] wikiBrainScoreDocArr) {
        if (wikiBrainScoreDocArr.length == 0) {
            return wikiBrainScoreDocArr;
        }
        int length = wikiBrainScoreDocArr.length;
        double d = 0.005d * wikiBrainScoreDocArr[0].score;
        int i = 0;
        int i2 = 100;
        while (true) {
            if (i2 >= wikiBrainScoreDocArr.length) {
                break;
            }
            if (wikiBrainScoreDocArr[i].score - wikiBrainScoreDocArr[i2].score < d) {
                length = i2;
                break;
            }
            i++;
            i2++;
        }
        if (length < wikiBrainScoreDocArr.length) {
            wikiBrainScoreDocArr = (WikiBrainScoreDoc[]) ArrayUtils.subarray(wikiBrainScoreDocArr, 0, length);
        }
        return wikiBrainScoreDocArr;
    }
}
