package org.wikibrain.sr.wikify;

import java.io.File;
import java.io.IOException;
import java.util.Iterator;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.commons.cli.PosixParser;
import org.apache.commons.io.IOUtils;
import org.apache.commons.io.LineIterator;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.wikibrain.conf.ConfigurationException;
import org.wikibrain.conf.DefaultOptionBuilder;
import org.wikibrain.core.cmd.Env;
import org.wikibrain.core.cmd.EnvBuilder;
import org.wikibrain.core.dao.DaoException;
import org.wikibrain.core.dao.LocalPageDao;
import org.wikibrain.core.lang.Language;
import org.wikibrain.phrases.LinkProbabilityDao;
import org.wikibrain.utils.WpIOUtils;

/* loaded from: input_file:org/wikibrain/sr/wikify/PlainTextCorpusCreator.class */
public class PlainTextCorpusCreator extends BaseCorpusCreator {
    private static final Logger LOG = LoggerFactory.getLogger(PlainTextCorpusCreator.class);
    private final File file;
    private int maxPages;

    /* loaded from: input_file:org/wikibrain/sr/wikify/PlainTextCorpusCreator$ClosingLineIterator.class */
    public static class ClosingLineIterator implements Iterator<IdAndText> {
        private LineIterator iter;

        public ClosingLineIterator(LineIterator lineIterator) {
            this.iter = lineIterator;
        }

        @Override // java.util.Iterator
        public boolean hasNext() {
            LineIterator lineIterator = this.iter;
            if (lineIterator == null) {
                return false;
            }
            if (lineIterator.hasNext()) {
                return true;
            }
            lineIterator.close();
            this.iter = null;
            return false;
        }

        /* JADX WARN: Can't rename method to resolve collision */
        @Override // java.util.Iterator
        public IdAndText next() {
            return new IdAndText(-1, this.iter.next());
        }

        @Override // java.util.Iterator
        public void remove() {
            throw new UnsupportedOperationException();
        }
    }

    public PlainTextCorpusCreator(Language language, Wikifier wikifier, LocalPageDao localPageDao, LinkProbabilityDao linkProbabilityDao, File file) {
        super(language, localPageDao, wikifier, linkProbabilityDao);
        this.maxPages = Integer.MAX_VALUE;
        this.file = file;
        if (!this.file.isFile()) {
            throw new IllegalArgumentException("Plaintext corpus " + this.file + " does not exist");
        }
    }

    public void setMaxPages(int i) {
        this.maxPages = i;
    }

    @Override // org.wikibrain.sr.wikify.BaseCorpusCreator
    public Iterator<IdAndText> getCorpus() throws DaoException {
        try {
            return new ClosingLineIterator(IOUtils.lineIterator(WpIOUtils.openReader(this.file)));
        } catch (IOException e) {
            throw new DaoException(e);
        }
    }

    public static void main(String[] strArr) throws ConfigurationException, IOException, DaoException {
        Options options = new Options();
        options.addOption(new DefaultOptionBuilder().hasArg().isRequired().withLongOpt("input").withDescription("input output file (existing data will be lost)").create("i"));
        options.addOption(new DefaultOptionBuilder().hasArg().isRequired().withLongOpt("output").withDescription("corpus output directory (existing data will be lost)").create("o"));
        options.addOption(new DefaultOptionBuilder().hasArg().withLongOpt("max-articles").withDescription("Maximum number of articles to process").create("x"));
        EnvBuilder.addStandardOptions(options);
        try {
            CommandLine parse = new PosixParser().parse(options, strArr);
            Env build = new EnvBuilder(parse).build();
            LocalPageDao localPageDao = (LocalPageDao) build.getConfigurator().get(LocalPageDao.class);
            Language defaultLanguage = build.getLanguages().getDefaultLanguage();
            PlainTextCorpusCreator plainTextCorpusCreator = new PlainTextCorpusCreator(defaultLanguage, (Wikifier) build.getComponent(Wikifier.class, defaultLanguage), localPageDao, (LinkProbabilityDao) build.getComponent(LinkProbabilityDao.class, defaultLanguage), new File(parse.getOptionValue("i")));
            if (parse.hasOption("x")) {
                plainTextCorpusCreator.setMaxPages(Integer.valueOf(parse.getOptionValue("x")).intValue());
            }
            plainTextCorpusCreator.write(new File(parse.getOptionValue("o")));
        } catch (ParseException e) {
            System.err.println("Invalid option usage: " + e.getMessage());
            new HelpFormatter().printHelp("WikiTextCorpusCreator", options);
        }
    }
}
