package org.apache.lucene.ant;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.StringWriter;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.dom.Text;
import org.w3c.tidy.Tidy;

/* loaded from: input_file:org/apache/lucene/ant/HtmlDocument.class */
public class HtmlDocument {
    private Element rawDoc;

    public HtmlDocument(File file) throws IOException {
        Tidy tidy = new Tidy();
        tidy.setQuiet(true);
        tidy.setShowWarnings(false);
        this.rawDoc = tidy.parseDOM(new FileInputStream(file), (OutputStream) null).getDocumentElement();
    }

    public HtmlDocument(InputStream inputStream) {
        Tidy tidy = new Tidy();
        tidy.setQuiet(true);
        tidy.setShowWarnings(false);
        this.rawDoc = tidy.parseDOM(inputStream, (OutputStream) null).getDocumentElement();
    }

    public static Document getDocument(InputStream inputStream) {
        HtmlDocument htmlDocument = new HtmlDocument(inputStream);
        Document document = new Document();
        document.add(new Field("title", htmlDocument.getTitle(), Field.Store.YES, Field.Index.ANALYZED));
        document.add(new Field("contents", htmlDocument.getBody(), Field.Store.YES, Field.Index.ANALYZED));
        return document;
    }

    public static Document Document(File file) throws IOException {
        HtmlDocument htmlDocument = new HtmlDocument(file);
        Document document = new Document();
        document.add(new Field("title", htmlDocument.getTitle(), Field.Store.YES, Field.Index.ANALYZED));
        document.add(new Field("contents", htmlDocument.getBody(), Field.Store.YES, Field.Index.ANALYZED));
        BufferedReader bufferedReader = new BufferedReader(new FileReader(file));
        StringWriter stringWriter = new StringWriter();
        String readLine = bufferedReader.readLine();
        while (true) {
            String str = readLine;
            if (str == null) {
                bufferedReader.close();
                String stringWriter2 = stringWriter.toString();
                stringWriter.close();
                document.add(new Field("rawcontents", stringWriter2, Field.Store.YES, Field.Index.NO));
                return document;
            }
            stringWriter.write(str);
            readLine = bufferedReader.readLine();
        }
    }

    public static void main(String[] strArr) throws Exception {
        HtmlDocument htmlDocument = new HtmlDocument(new FileInputStream(new File(strArr[0])));
        System.out.println(new StringBuffer().append("Title = ").append(htmlDocument.getTitle()).toString());
        System.out.println(new StringBuffer().append("Body  = ").append(htmlDocument.getBody()).toString());
    }

    public String getTitle() {
        Text text;
        if (this.rawDoc == null) {
            return null;
        }
        String str = "";
        NodeList elementsByTagName = this.rawDoc.getElementsByTagName("title");
        if (elementsByTagName.getLength() > 0 && (text = (Text) ((Element) elementsByTagName.item(0)).getFirstChild()) != null) {
            str = text.getData();
        }
        return str;
    }

    public String getBody() {
        if (this.rawDoc == null) {
            return null;
        }
        NodeList elementsByTagName = this.rawDoc.getElementsByTagName("body");
        return elementsByTagName.getLength() > 0 ? getBodyText(elementsByTagName.item(0)) : "";
    }

    private String getBodyText(Node node) {
        NodeList childNodes = node.getChildNodes();
        StringBuffer stringBuffer = new StringBuffer();
        for (int i = 0; i < childNodes.getLength(); i++) {
            Node item = childNodes.item(i);
            switch (item.getNodeType()) {
                case 1:
                    stringBuffer.append(getBodyText(item));
                    stringBuffer.append(" ");
                    break;
                case 3:
                    stringBuffer.append(((Text) item).getData());
                    break;
            }
        }
        return stringBuffer.toString();
    }
}
