/*
 * Decompiled with CFR 0.152.
 */
package io.earcam.utilitarian.site.search.offline;

import io.earcam.utilitarian.site.search.offline.AbstractHtmlProcessor;
import io.earcam.utilitarian.site.search.offline.Document;
import java.io.FileInputStream;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import org.jsoup.Jsoup;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class HtmlContentProcessor
extends AbstractHtmlProcessor {
    private static final Logger LOG = LoggerFactory.getLogger(HtmlContentProcessor.class);

    @Override
    public void process(Document document) {
        if (this.isHtml(document) && !document.hasRaw()) {
            try {
                org.jsoup.nodes.Document html = Jsoup.parse(new FileInputStream(document.file().toFile()), StandardCharsets.UTF_8.toString(), "");
                this.assignFields(document, html);
            }
            catch (IOException e) {
                LOG.warn("Failed to process HTML {} due to: {}", (Object)document.file(), (Object)e.getMessage());
                LOG.debug("Failed to process HTML", e);
            }
        }
    }

    private void assignFields(Document document, org.jsoup.nodes.Document html) {
        document.field("title", html.getElementsByTag("title").text());
        document.field("description", html.getElementsByTag("meta").select("[name=description]").attr("content"));
        document.field("raw", html.getElementsByTag("h1").text() + ' ' + html.getElementsByTag("h2").text() + ' ' + html.getElementsByTag("h3").text() + ' ' + html.getElementsByTag("h4").text() + ' ' + html.getElementsByTag("h5").text() + ' ' + html.getElementsByTag("h6").text() + ' ' + html.getElementsByTag("p").text());
    }
}

