package org.codehaus.xsite.extractors;

import com.opensymphony.module.sitemesh.html.BasicRule;
import com.opensymphony.module.sitemesh.html.HTMLProcessor;
import com.opensymphony.module.sitemesh.html.Tag;
import com.opensymphony.module.sitemesh.html.TagRule;
import com.opensymphony.module.sitemesh.html.TextFilter;
import com.opensymphony.module.sitemesh.html.rules.BodyTagRule;
import com.opensymphony.module.sitemesh.html.rules.HeadExtractingRule;
import com.opensymphony.module.sitemesh.html.rules.MetaTagRule;
import com.opensymphony.module.sitemesh.html.rules.PageBuilder;
import com.opensymphony.module.sitemesh.html.rules.TitleExtractingRule;
import com.opensymphony.module.sitemesh.html.util.CharArray;
import java.io.File;
import java.io.IOException;
import java.util.Collection;
import java.util.HashSet;
import java.util.Properties;
import org.apache.commons.lang.StringEscapeUtils;
import org.codehaus.xsite.FileSystem;
import org.codehaus.xsite.PageExtractor;
import org.codehaus.xsite.io.CommonsFileSystem;
import org.codehaus.xsite.model.Link;
import org.codehaus.xsite.model.Page;

/* loaded from: input_file:org/codehaus/xsite/extractors/SiteMeshPageExtractor.class */
public class SiteMeshPageExtractor implements PageExtractor {
    private static final String LA = "<";
    private static final String RA = ">";
    private static final String LT = "__LT__";
    private static final String GT = "__GT__";
    private Properties properties;
    private String filename;
    private String head;
    private String body;
    private Collection<Link> links;
    private final TagRule[] rules;
    private final TextFilter[] filter;
    private final FileSystem fileSystem;
    private final boolean escapeHTML;

    /* loaded from: input_file:org/codehaus/xsite/extractors/SiteMeshPageExtractor$CannotParsePageException.class */
    public static class CannotParsePageException extends RuntimeException {
        public CannotParsePageException(Throwable th) {
            super(th);
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:org/codehaus/xsite/extractors/SiteMeshPageExtractor$LinkExtractingRule.class */
    public class LinkExtractingRule extends BasicRule {
        private LinkExtractingRule() {
        }

        public boolean shouldProcess(String str) {
            return str.equalsIgnoreCase("a");
        }

        public void process(Tag tag) {
            if (tag.hasAttribute("href", false)) {
                SiteMeshPageExtractor.this.links.add(new Link(tag.getAttributeValue("title", false), tag.getAttributeValue("href", false)));
            }
            tag.writeTo(currentBuffer());
        }
    }

    public SiteMeshPageExtractor() {
        this(new TagRule[0], new TextFilter[0], new CommonsFileSystem());
    }

    public SiteMeshPageExtractor(TagRule[] tagRuleArr, TextFilter[] textFilterArr, FileSystem fileSystem) {
        this(tagRuleArr, textFilterArr, fileSystem, true);
    }

    public SiteMeshPageExtractor(TagRule[] tagRuleArr, TextFilter[] textFilterArr, FileSystem fileSystem, boolean z) {
        this.rules = tagRuleArr;
        this.filter = textFilterArr;
        this.fileSystem = fileSystem;
        this.escapeHTML = z;
    }

    @Override // org.codehaus.xsite.PageExtractor
    public Page extractPage(File file) {
        this.links = new HashSet();
        try {
            this.filename = file.getName();
            extractContentFromHTML(this.fileSystem.readFile(file).toCharArray());
            return new Page(this.filename, this.head, this.body, this.links, this.properties);
        } catch (IOException e) {
            throw new CannotParsePageException(e);
        }
    }

    @Override // org.codehaus.xsite.PageExtractor
    public Page extractPage(String str, String str2) {
        this.links = new HashSet();
        try {
            this.filename = str;
            extractContentFromHTML(str2.toCharArray());
            return new Page(str, this.head, this.body, this.links, this.properties);
        } catch (IOException e) {
            throw new CannotParsePageException(e);
        }
    }

    private void extractContentFromHTML(char[] cArr) throws IOException {
        this.properties = new Properties();
        PageBuilder pageBuilder = new PageBuilder() { // from class: org.codehaus.xsite.extractors.SiteMeshPageExtractor.1
            public void addProperty(String str, String str2) {
                SiteMeshPageExtractor.this.properties.setProperty(str, SiteMeshPageExtractor.this.escape(str2));
            }
        };
        CharArray charArray = new CharArray(64);
        CharArray charArray2 = new CharArray(4096);
        HTMLProcessor hTMLProcessor = new HTMLProcessor(cArr, charArray2);
        hTMLProcessor.addRule(new BodyTagRule(pageBuilder, charArray2));
        hTMLProcessor.addRule(new HeadExtractingRule(charArray));
        hTMLProcessor.addRule(new TitleExtractingRule(pageBuilder));
        hTMLProcessor.addRule(new MetaTagRule(pageBuilder));
        hTMLProcessor.addRule(new LinkExtractingRule());
        for (int i = 0; i < this.rules.length; i++) {
            hTMLProcessor.addRule(this.rules[i]);
        }
        for (int i2 = 0; i2 < this.filter.length; i2++) {
            hTMLProcessor.addTextFilter(this.filter[i2]);
        }
        hTMLProcessor.process();
        this.head = escape(charArray.toString());
        this.body = escape(charArray2.toString());
    }

    /* JADX INFO: Access modifiers changed from: private */
    public String escape(String str) {
        return this.escapeHTML ? StringEscapeUtils.escapeHtml(str.replaceAll(LA, LT).replaceAll(RA, GT)).replaceAll(GT, RA).replaceAll(LT, LA) : str;
    }
}
