package org.wikibrain.sr.esa;

import gnu.trove.set.TIntSet;
import gnu.trove.set.hash.TIntHashSet;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.util.Iterator;
import java.util.regex.Pattern;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.commons.cli.PosixParser;
import org.apache.commons.io.FileUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.wikibrain.conf.ConfigurationException;
import org.wikibrain.conf.Configurator;
import org.wikibrain.conf.DefaultOptionBuilder;
import org.wikibrain.core.cmd.Env;
import org.wikibrain.core.cmd.EnvBuilder;
import org.wikibrain.core.dao.DaoException;
import org.wikibrain.core.dao.DaoFilter;
import org.wikibrain.core.dao.LocalLinkDao;
import org.wikibrain.core.dao.LocalPageDao;
import org.wikibrain.core.lang.Language;
import org.wikibrain.core.model.LocalPage;
import org.wikibrain.core.model.NameSpace;
import org.wikibrain.sr.SRResultList;
import org.wikibrain.sr.utils.Leaderboard;

/* loaded from: input_file:org/wikibrain/sr/esa/SRConceptSpaceGenerator.class */
public class SRConceptSpaceGenerator {
    private final Language lang;
    private final LocalLinkDao linkDao;
    private final LocalPageDao pageDao;
    private final int numArticles;
    private int maxConcepts = -1;
    private static final Logger LOG = LoggerFactory.getLogger(SRConceptSpaceGenerator.class);
    private static final Pattern[] TITLE_BLACKLIST = {Pattern.compile("^[0-9]{4} .*"), Pattern.compile("^(January|February|March|April|May|June|July|August|September|October|November|December).*"), Pattern.compile("^[0-9]+$")};

    public SRConceptSpaceGenerator(Language language, LocalLinkDao localLinkDao, LocalPageDao localPageDao) throws DaoException {
        this.lang = language;
        this.linkDao = localLinkDao;
        this.pageDao = localPageDao;
        this.numArticles = localPageDao.getCount(getFilter());
    }

    public DaoFilter getFilter() {
        return new DaoFilter().setNameSpaces(NameSpace.ARTICLE).setLanguages(this.lang).setRedirect(false).setDisambig(false);
    }

    public TIntSet getConcepts() throws DaoException {
        int numStopConcepts = getNumStopConcepts();
        Leaderboard leaderboard = new Leaderboard(getMaxConcepts() + numStopConcepts);
        for (LocalPage localPage : this.pageDao.get(getFilter())) {
            if (localPage != null && localPage.getNameSpace() == NameSpace.ARTICLE && !localPage.isDisambig() && !localPage.isRedirect() && !isBlacklisted(localPage) && !isList(localPage)) {
                leaderboard.tallyScore(localPage.getLocalId(), this.linkDao.getCount(new DaoFilter().setLanguages(this.lang).setDestIds(localPage.getLocalId())));
            }
        }
        SRResultList top = leaderboard.getTop();
        TIntHashSet tIntHashSet = new TIntHashSet();
        for (int i = 0; i < top.numDocs(); i++) {
            if (i >= numStopConcepts) {
                tIntHashSet.add(top.getId(i));
            }
        }
        return tIntHashSet;
    }

    public void writeConcepts(File file) throws DaoException, IOException {
        BufferedWriter bufferedWriter = new BufferedWriter(new FileWriter(file));
        for (int i : getConcepts().toArray()) {
            bufferedWriter.write(i + "\n");
        }
        bufferedWriter.close();
    }

    private boolean isBlacklisted(LocalPage localPage) {
        String canonicalTitle = localPage.getTitle().getCanonicalTitle();
        for (Pattern pattern : TITLE_BLACKLIST) {
            if (pattern.matcher(canonicalTitle).matches()) {
                return true;
            }
        }
        return false;
    }

    private boolean isList(LocalPage localPage) {
        return localPage.getTitle().getCanonicalTitle().toLowerCase().startsWith("list");
    }

    public int getNumStopConcepts() {
        return (int) (Math.pow(this.numArticles, 0.33333d) * 2.0d);
    }

    public int getMaxConcepts() {
        return this.maxConcepts < 0 ? (int) (Math.pow(this.numArticles, 0.33333d) * 1000.0d) : this.maxConcepts;
    }

    public void setMaxConcepts(int i) {
        this.maxConcepts = i;
    }

    public static void main(String[] strArr) throws ConfigurationException, DaoException, IOException {
        Options options = new Options();
        options.addOption(new DefaultOptionBuilder().hasArg().withLongOpt("output-dir").withDescription("directory to output concept mapping to").create("d"));
        options.addOption(new DefaultOptionBuilder().hasArg().withLongOpt("max-concepts").withDescription("maximum number of concepts").create("x"));
        EnvBuilder.addStandardOptions(options);
        try {
            CommandLine parse = new PosixParser().parse(options, strArr);
            Env build = new EnvBuilder(parse).build();
            Configurator configurator = build.getConfigurator();
            LocalLinkDao localLinkDao = (LocalLinkDao) configurator.get(LocalLinkDao.class);
            LocalPageDao localPageDao = (LocalPageDao) configurator.get(LocalPageDao.class);
            File file = new File(build.getConfiguration().get().getString("sr.concepts.path"));
            if (parse.hasOption("d")) {
                file = new File(parse.getOptionValue("d"));
            }
            if (!file.isDirectory()) {
                FileUtils.deleteQuietly(file);
                file.mkdirs();
            }
            Iterator it = build.getLanguages().iterator();
            while (it.hasNext()) {
                Language language = (Language) it.next();
                SRConceptSpaceGenerator sRConceptSpaceGenerator = new SRConceptSpaceGenerator(language, localLinkDao, localPageDao);
                if (parse.hasOption("x")) {
                    sRConceptSpaceGenerator.setMaxConcepts(Integer.valueOf(parse.getOptionValue("x")).intValue());
                }
                sRConceptSpaceGenerator.writeConcepts(new File(file, language.getLangCode() + ".txt"));
            }
        } catch (ParseException e) {
            System.err.println("Invalid option usage: " + e.getMessage());
            new HelpFormatter().printHelp("SRConceptSpaceGenerator", options);
        }
    }
}
