package org.archive.wayback.resourceindex;

import it.unimi.dsi.lang.MutableString;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.logging.Logger;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.apache.xpath.compiler.PsuedoNames;
import org.archive.url.UsableURIFactory;
import org.archive.wayback.ResourceIndex;
import org.archive.wayback.core.CaptureSearchResult;
import org.archive.wayback.core.CaptureSearchResults;
import org.archive.wayback.core.SearchResults;
import org.archive.wayback.core.WaybackRequest;
import org.archive.wayback.exception.AccessControlException;
import org.archive.wayback.exception.BadQueryException;
import org.archive.wayback.exception.ConfigurationException;
import org.archive.wayback.exception.ResourceIndexNotAvailableException;
import org.archive.wayback.exception.ResourceNotInArchiveException;
import org.archive.wayback.util.Timestamp;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;

/* loaded from: input_file:org/archive/wayback/resourceindex/NutchResourceIndex.class */
public class NutchResourceIndex implements ResourceIndex {
    private static final Logger LOGGER = Logger.getLogger(NutchResourceIndex.class.getName());
    private static final int MAX_RECORDS = 1000;
    private static final String NUTCH_NS = "http://www.nutch.org/opensearchrss/1.0/";
    private String searchUrlBase;
    private DocumentBuilder builder;
    private static final String NUTCH_FILENAME = "filename";
    private static final String NUTCH_FILEOFFSET = "fileoffset";
    private static final String NUTCH_ARCDATE = "date";
    private static final String NUTCH_DIGEST = "digest";
    private static final String NUTCH_MIME_TYPE = "type";
    private static final String NUTCH_CAPTURE_URL = "link";
    private static final String NUTCH_SEARCH_RESULT_TAG = "item";
    private static final String NUTCH_SEARCH_RESULTS_TAG = "channel";
    private static final String NUTCH_FIRST_RESULT = "opensearch:startIndex";
    private static final String NUTCH_NUM_RESULTS = "opensearch:totalResults";
    private static final String NUTCH_NUM_RETURNED = "opensearch:itemsPerPage";
    private static final String NUTCH_DEFAULT_HTTP_CODE = "200";
    private static final String NUTCH_DEFAULT_REDIRECT_URL = "-";
    private int maxRecords = 1000;
    private DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();

    public void init() throws ConfigurationException {
        LOGGER.info("initializing NutchResourceIndex...");
        LOGGER.info("Using base search url " + this.searchUrlBase);
        this.factory.setNamespaceAware(true);
        try {
            this.builder = this.factory.newDocumentBuilder();
        } catch (ParserConfigurationException e) {
            e.printStackTrace();
            throw new ConfigurationException(e.getMessage());
        }
    }

    @Override // org.archive.wayback.ResourceIndex
    public SearchResults query(WaybackRequest waybackRequest) throws ResourceIndexNotAvailableException, ResourceNotInArchiveException, BadQueryException, AccessControlException {
        String requestUrl = getRequestUrl(waybackRequest);
        try {
            LOGGER.info("Requesting OpenSearch: " + requestUrl);
            Document httpDocument = getHttpDocument(requestUrl);
            if (!waybackRequest.isReplayRequest() && !waybackRequest.isCaptureQueryRequest()) {
                throw new BadQueryException("Unable to perform path prefix requests with this index type");
            }
            CaptureSearchResults captureSearchResults = new CaptureSearchResults();
            NodeList searchChannel = getSearchChannel(httpDocument);
            NodeList searchItems = getSearchItems(httpDocument);
            if (searchChannel == null || searchChannel.getLength() != 1) {
                throw new ResourceNotInArchiveException("No results for " + requestUrl);
            }
            if (searchItems == null) {
                throw new ResourceNotInArchiveException("No results for " + requestUrl);
            }
            for (int i = 0; i < searchItems.getLength(); i++) {
                List<CaptureSearchResult> itemToSearchResults = itemToSearchResults((Element) searchItems.item(i));
                if (itemToSearchResults != null) {
                    Iterator<CaptureSearchResult> it2 = itemToSearchResults.iterator();
                    while (it2.hasNext()) {
                        captureSearchResults.addSearchResult(it2.next());
                    }
                }
            }
            Element element = (Element) searchChannel.item(0);
            captureSearchResults.putFilter(SearchResults.RESULTS_FIRST_RETURNED, getNodeContent(element, NUTCH_FIRST_RESULT));
            captureSearchResults.putFilter(SearchResults.RESULTS_NUM_RESULTS, getNodeContent(element, NUTCH_NUM_RESULTS));
            captureSearchResults.putFilter(SearchResults.RESULTS_NUM_RETURNED, getNodeContent(element, NUTCH_NUM_RETURNED));
            captureSearchResults.putFilter(SearchResults.RESULTS_REQUESTED, String.valueOf(waybackRequest.getResultsPerPage()));
            captureSearchResults.putFilter(WaybackRequest.REQUEST_START_DATE, Timestamp.earliestTimestamp().getDateStr());
            captureSearchResults.putFilter(WaybackRequest.REQUEST_END_DATE, Timestamp.latestTimestamp().getDateStr());
            return captureSearchResults;
        } catch (IOException e) {
            e.printStackTrace();
            throw new ResourceIndexNotAvailableException(e.getMessage());
        } catch (SAXException e2) {
            e2.printStackTrace();
            throw new ResourceIndexNotAvailableException("Unexpected SAX: " + e2.getMessage());
        }
    }

    private List<CaptureSearchResult> itemToSearchResults(Element element) throws ResourceIndexNotAvailableException {
        String nodeNutchContent = getNodeNutchContent(element, "filename");
        String nodeNutchContent2 = getNodeNutchContent(element, "digest");
        String nodeNutchContent3 = getNodeNutchContent(element, "type");
        String nodeNutchContent4 = getNodeNutchContent(element, NUTCH_FILEOFFSET);
        long j = 0;
        if (nodeNutchContent4 != null && nodeNutchContent4.length() > 0) {
            j = Long.parseLong(nodeNutchContent4);
        }
        String nodeContent = getNodeContent(element, "link");
        NodeList elementsByTagNameNS = element.getElementsByTagNameNS(NUTCH_NS, "date");
        int length = elementsByTagNameNS.getLength();
        ArrayList arrayList = null;
        if (length > 0) {
            arrayList = new ArrayList();
            for (int i = 0; i < length; i++) {
                String nodeTextValue = getNodeTextValue(elementsByTagNameNS.item(i));
                CaptureSearchResult captureSearchResult = new CaptureSearchResult();
                captureSearchResult.setFile(nodeNutchContent);
                captureSearchResult.setCaptureTimestamp(nodeTextValue);
                captureSearchResult.setHttpCode(NUTCH_DEFAULT_HTTP_CODE);
                captureSearchResult.setDigest(nodeNutchContent2);
                captureSearchResult.setMimeType(nodeNutchContent3);
                captureSearchResult.setOffset(j);
                captureSearchResult.setRedirectUrl("-");
                captureSearchResult.setOriginalUrl(nodeContent);
                captureSearchResult.setUrlKey(nodeContent);
                arrayList.add(captureSearchResult);
            }
        }
        return arrayList;
    }

    protected NodeList getSearchChannel(Document document) {
        if (document == null) {
            return null;
        }
        NodeList elementsByTagName = document.getElementsByTagName(NUTCH_SEARCH_RESULTS_TAG);
        if (elementsByTagName.getLength() <= 0) {
            return null;
        }
        return elementsByTagName;
    }

    protected NodeList getSearchItems(Document document) {
        if (document == null) {
            return null;
        }
        NodeList elementsByTagName = document.getElementsByTagName(NUTCH_SEARCH_RESULT_TAG);
        if (elementsByTagName.getLength() <= 0) {
            return null;
        }
        return elementsByTagName;
    }

    protected String getRequestUrl(WaybackRequest waybackRequest) throws BadQueryException {
        String requestUrl = waybackRequest.getRequestUrl();
        String replayTimestamp = waybackRequest.getReplayTimestamp();
        if (replayTimestamp == null || replayTimestamp.length() == 0) {
        }
        String endTimestamp = waybackRequest.getEndTimestamp();
        if (endTimestamp == null || endTimestamp.length() == 0) {
            endTimestamp = Timestamp.latestTimestamp().getDateStr();
        }
        String startTimestamp = waybackRequest.getStartTimestamp();
        if (startTimestamp == null || startTimestamp.length() == 0) {
            startTimestamp = Timestamp.earliestTimestamp().getDateStr();
        }
        int resultsPerPage = waybackRequest.getResultsPerPage();
        if (resultsPerPage < 1) {
            throw new BadQueryException("Hits per page must be positive");
        }
        if (resultsPerPage > this.maxRecords) {
            throw new BadQueryException("Hits per page must be less than " + this.maxRecords);
        }
        int pageNum = (waybackRequest.getPageNum() - 1) * resultsPerPage;
        if (requestUrl == null || requestUrl.length() <= 0) {
            throw new BadQueryException("Url is empty.");
        }
        MutableString append = new MutableString(this.searchUrlBase).append("?query=");
        append.append("date%3A").append(startTimestamp).append('-').append(endTimestamp);
        append.append('+');
        if (waybackRequest.isUrlQueryRequest()) {
            append.append("url%3A");
        } else {
            append.append("exacturl%3A");
        }
        try {
            append.append(URLEncoder.encode(UsableURIFactory.QUOT + requestUrl + UsableURIFactory.QUOT, "UTF-8"));
            append.append("&hitsPerPage=").append(resultsPerPage);
            append.append("&start=").append(pageNum);
            append.append("&dedupField=site");
            append.append("&hitsPerDup=").append(resultsPerPage);
            append.append("&hitsPerSite=").append(resultsPerPage);
            return append.toString();
        } catch (UnsupportedEncodingException e) {
            throw new BadQueryException(e.toString());
        }
    }

    protected String getNodeNutchContent(Element element, String str) {
        NodeList elementsByTagNameNS = element.getElementsByTagNameNS(NUTCH_NS, str);
        String str2 = null;
        if (elementsByTagNameNS != null && elementsByTagNameNS.getLength() > 0) {
            str2 = getNodeTextValue(elementsByTagNameNS.item(0));
        }
        if (str2 == null || str2.length() == 0) {
            return null;
        }
        return str2;
    }

    protected String getNodeContent(Element element, String str) {
        NodeList elementsByTagName = element.getElementsByTagName(str);
        String str2 = null;
        if (elementsByTagName != null && elementsByTagName.getLength() > 0) {
            str2 = getNodeTextValue(elementsByTagName.item(0));
        }
        if (str2 == null || str2.length() == 0) {
            return null;
        }
        return str2;
    }

    private String getNodeTextValue(Node node) {
        return (node.hasChildNodes() && node.getFirstChild().getNodeName().equals(PsuedoNames.PSEUDONAME_TEXT)) ? node.getFirstChild().getNodeValue() : "";
    }

    protected synchronized Document getHttpDocument(String str) throws IOException, SAXException {
        return this.builder.parse(str);
    }

    public String getSearchUrlBase() {
        return this.searchUrlBase;
    }

    public void setSearchUrlBase(String str) {
        this.searchUrlBase = str;
    }

    public int getMaxRecords() {
        return this.maxRecords;
    }

    public void setMaxRecords(int i) {
        this.maxRecords = i;
    }

    @Override // org.archive.wayback.ResourceIndex
    public void shutdown() throws IOException {
    }
}
