package org.jweaver.crawler.internal.runner;

import java.io.IOException;
import java.net.URI;
import java.net.http.HttpClient;
import java.net.http.HttpRequest;
import java.net.http.HttpResponse;
import java.time.Duration;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Queue;
import java.util.Random;
import java.util.Set;
import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.random.RandomGenerator;
import java.util.stream.Collectors;
import org.jweaver.crawler.internal.parse.DocumentParser;
import org.jweaver.crawler.internal.result.Connection;
import org.jweaver.crawler.internal.result.ErrorResultPage;
import org.jweaver.crawler.internal.result.NodeError;
import org.jweaver.crawler.internal.result.PageLink;
import org.jweaver.crawler.internal.result.ResponseData;
import org.jweaver.crawler.internal.result.ResultPage;
import org.jweaver.crawler.internal.result.SuccessResultPage;
import org.jweaver.crawler.internal.util.Constants;
import org.jweaver.crawler.internal.util.URIHelper;
import org.jweaver.crawler.internal.write.ExportConfig;
import org.jweaver.crawler.internal.write.JWeaverWriter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* JADX INFO: Access modifiers changed from: package-private */
/* loaded from: input_file:org/jweaver/crawler/internal/runner/JWeaverTask.class */
public final class JWeaverTask {
    private final HttpClient httpClient;
    private final DocumentParser parser;
    private final JWeaverWriter writer;
    private final String baseUri;
    private final Duration politenessDelay;
    private static final Logger log = LoggerFactory.getLogger(JWeaverTask.class);
    private Integer maxDepth;
    private ExportConfig exportConfiguration;
    private final Long id = Long.valueOf(generateExecutionId());
    private final List<Connection> connections = new ArrayList();
    private final Set<String> visitedUris = new HashSet();
    private final List<ResultPage> resultPages = new ArrayList();
    private final Queue<PageLink> pageLinkQueue = new ConcurrentLinkedQueue();
    private final List<NodeError> nodeErrorList = new ArrayList();

    /* JADX INFO: Access modifiers changed from: package-private */
    public JWeaverTask(String str, HttpClient httpClient, Duration duration, ExportConfig exportConfig, Integer num, DocumentParser documentParser, JWeaverWriter jWeaverWriter) {
        this.baseUri = str;
        this.politenessDelay = duration;
        this.parser = documentParser;
        this.maxDepth = num;
        this.writer = jWeaverWriter;
        this.httpClient = httpClient;
        this.exportConfiguration = exportConfig;
    }

    long generateExecutionId() {
        long nextLong = Random.from(RandomGenerator.getDefault()).nextLong(100L);
        if (nextLong < 0) {
            nextLong = (-1) * nextLong;
        }
        return nextLong >= 0 ? nextLong : (-1) * nextLong;
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public Long getId() {
        return this.id;
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public void start() {
        ResultPage crawl = crawl(new PageLink(this.baseUri, 0));
        this.visitedUris.add(this.baseUri);
        if (crawl instanceof SuccessResultPage) {
            SuccessResultPage successResultPage = (SuccessResultPage) crawl;
            this.resultPages.add(crawl);
            this.pageLinkQueue.addAll(successResultPage.linkSet());
            successResultPage.linkSet().forEach(pageLink -> {
                this.connections.add(new Connection(this.baseUri, pageLink.url(), 0));
            });
            travelLinks();
        } else if (crawl instanceof ErrorResultPage) {
            ErrorResultPage errorResultPage = (ErrorResultPage) crawl;
            log.error("Base URL [{}] responds with {}", errorResultPage.uri(), errorResultPage.content());
            throw new IllegalStateException("Unable to fetch ResultPage for root URL");
        }
        this.writer.processConnectionMap(this.baseUri, this.connections, this.exportConfiguration);
        this.writer.processErrors(this.baseUri, this.nodeErrorList, this.exportConfiguration);
    }

    void travelLinks() {
        while (!this.pageLinkQueue.isEmpty()) {
            PageLink poll = this.pageLinkQueue.poll();
            if (!this.visitedUris.contains(poll.url()) && poll.depth() <= this.maxDepth.intValue()) {
                ResultPage crawl = crawl(poll);
                this.visitedUris.add(poll.url());
                if (crawl instanceof SuccessResultPage) {
                    processSuccessPage((SuccessResultPage) crawl);
                } else if (crawl instanceof ErrorResultPage) {
                    processFailurePage((ErrorResultPage) crawl);
                }
            }
        }
    }

    void processSuccessPage(SuccessResultPage successResultPage) {
        this.resultPages.add(successResultPage);
        this.pageLinkQueue.addAll(successResultPage.linkSet());
        successResultPage.linkSet().forEach(pageLink -> {
            this.connections.add(new Connection(successResultPage.uri(), pageLink.url(), successResultPage.depth()));
        });
        writeOutput(successResultPage);
    }

    void processFailurePage(ErrorResultPage errorResultPage) {
        this.nodeErrorList.add(new NodeError(errorResultPage.uri(), errorResultPage.depth(), errorResultPage.content()));
    }

    void writeOutput(SuccessResultPage successResultPage) {
        Thread.ofVirtual().name("jweaver-writer-" + getId()).start(() -> {
            this.writer.processSuccess(successResultPage, this.exportConfiguration);
        });
    }

    boolean skipUrl(String str, String str2) {
        return (URIHelper.isAllowedUrl(str2) && URIHelper.isValidUri(str2) && !URIHelper.isExternalUri(str, str2)) ? false : true;
    }

    Set<PageLink> addChildLinks(Set<String> set, PageLink pageLink) {
        HashSet hashSet = new HashSet();
        for (String str : set) {
            if (!skipUrl(this.baseUri, str)) {
                hashSet.add(str);
            }
        }
        return (Set) hashSet.stream().map(str2 -> {
            return new PageLink(str2, pageLink.depth() + 1);
        }).collect(Collectors.toUnmodifiableSet());
    }

    ResultPage crawl(PageLink pageLink) {
        try {
            ResponseData<String> responseData = get(pageLink);
            return responseData.isSuccess() ? createFromHtmlBody(responseData.body(), pageLink) : ErrorResultPage.create(pageLink, responseData.body());
        } catch (InterruptedException e) {
            Thread.currentThread().interrupt();
            return ErrorResultPage.create(pageLink, e.getLocalizedMessage());
        } catch (Exception e2) {
            maybeThrow(pageLink, e2);
            return ErrorResultPage.create(pageLink, e2.getLocalizedMessage());
        }
    }

    void maybeThrow(PageLink pageLink, Exception exc) {
        if (pageLink.depth() == 0) {
            String format = String.format("Root URL request failed for %s", pageLink.url());
            log.error(format, exc);
            throw new IllegalStateException(format);
        }
    }

    ResponseData<String> get(PageLink pageLink) throws IOException, InterruptedException {
        Thread.sleep(this.politenessDelay);
        log.trace("Crawling {} with depth {}", pageLink.url(), Integer.valueOf(pageLink.depth()));
        HttpResponse send = this.httpClient.send(HttpRequest.newBuilder(URI.create(pageLink.url())).GET().timeout((Duration) this.httpClient.connectTimeout().orElse(Duration.ofSeconds(1L))).build(), HttpResponse.BodyHandlers.ofString());
        if (allowedContentType(send.headers().map())) {
            return new ResponseData<>(send.statusCode(), (String) send.body());
        }
        throw new IllegalArgumentException("Content-Type not allowed");
    }

    boolean allowedContentType(Map<String, List<String>> map) {
        String str = null;
        Iterator<String> it = map.keySet().iterator();
        while (true) {
            if (!it.hasNext()) {
                break;
            }
            String next = it.next();
            if (next.equalsIgnoreCase(Constants.CONTENT_TYPE_STR)) {
                str = next;
                break;
            }
        }
        return str == null || map.get(str).stream().filter(URIHelper::isAllowedContentType).count() > 0;
    }

    SuccessResultPage createFromHtmlBody(String str, PageLink pageLink) {
        return SuccessResultPage.create(pageLink, this.parser.parseTitle(str, pageLink.url()), this.parser.parseBody(str, pageLink.url()), addChildLinks(this.parser.parseLinks(str, pageLink.url()), pageLink));
    }
}
