package org.opencb.cellbase.lib.builders;

import com.fasterxml.jackson.databind.ObjectMapper;
import java.io.File;
import java.nio.file.Path;
import java.util.Iterator;
import java.util.List;
import org.opencb.biodata.formats.pubmed.PubMedParser;
import org.opencb.biodata.formats.pubmed.v233jaxb.PubmedArticle;
import org.opencb.biodata.formats.pubmed.v233jaxb.PubmedArticleSet;
import org.opencb.cellbase.core.serializer.CellBaseFileSerializer;
import org.opencb.commons.utils.FileUtils;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/opencb/cellbase/lib/builders/PubMedBuilder.class */
public class PubMedBuilder extends CellBaseBuilder {
    private Path pubmedDir;
    private CellBaseFileSerializer fileSerializer;

    public PubMedBuilder(Path path, CellBaseFileSerializer cellBaseFileSerializer) {
        super(cellBaseFileSerializer);
        this.fileSerializer = cellBaseFileSerializer;
        this.pubmedDir = path;
        this.logger = LoggerFactory.getLogger(PubMedBuilder.class);
    }

    @Override // org.opencb.cellbase.lib.builders.CellBaseBuilder
    public void parse() throws Exception {
        FileUtils.checkPath(this.pubmedDir);
        this.logger.info("Parsing PubMed files...");
        for (File file : this.pubmedDir.toFile().listFiles()) {
            if (file.isFile() && (file.getName().endsWith("gz") || file.getName().endsWith("xml"))) {
                String str = file.getName().split("\\.")[0];
                new ObjectMapper().writerFor(PubmedArticle.class);
                List pubmedArticleOrPubmedBookArticle = ((PubmedArticleSet) PubMedParser.loadXMLInfo(file.getAbsolutePath())).getPubmedArticleOrPubmedBookArticle();
                this.logger.info("Parsing PubMed file {} of {} articles ...", file.getName(), Integer.valueOf(pubmedArticleOrPubmedBookArticle.size()));
                int i = 0;
                Iterator it = pubmedArticleOrPubmedBookArticle.iterator();
                while (it.hasNext()) {
                    this.fileSerializer.serialize((PubmedArticle) it.next(), str);
                    i++;
                    if (i % 2000 == 0) {
                        this.logger.info("\t\t" + i + " articles");
                    }
                }
                this.fileSerializer.close();
                this.logger.info("\t\tDone: " + i + " articles.");
            }
        }
        this.logger.info("Parsing PubMed files finished.");
    }
}
