package org.archive.wayback.accesscontrol.robotstxt;

import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.archive.wayback.core.CaptureSearchResult;
import org.archive.wayback.core.Resource;
import org.archive.wayback.exception.LiveDocumentNotAvailableException;
import org.archive.wayback.exception.LiveWebCacheUnavailableException;
import org.archive.wayback.exception.LiveWebTimeoutException;
import org.archive.wayback.liveweb.LiveWebCache;
import org.archive.wayback.resourceindex.filters.ExclusionFilter;
import org.archive.wayback.util.url.UrlOperations;
import org.archive.wayback.webapp.PerfStats;

/* loaded from: input_file:org/archive/wayback/accesscontrol/robotstxt/RobotExclusionFilter.class */
public class RobotExclusionFilter extends ExclusionFilter {
    protected static final String ROBOT_SUFFIX = "/robots.txt";
    private LiveWebCache webCache;
    private HashMap<String, RobotRules> rulesCache;
    private long maxCacheMS;
    private String userAgent;
    protected StringBuilder sb;
    private boolean notifiedSeen = false;
    private boolean notifiedPassed = false;
    protected HashMap<String, Integer> pathsCache = null;
    private static final Logger LOGGER = Logger.getLogger(RobotExclusionFilter.class.getName());
    protected static String WWWN_REGEX = "^www[0-9]+\\.";
    protected static final Pattern WWWN_PATTERN = Pattern.compile(WWWN_REGEX);
    private static final RobotRules emptyRules = new RobotRules();

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:org/archive/wayback/accesscontrol/robotstxt/RobotExclusionFilter$PerfStat.class */
    public enum PerfStat {
        RobotsFetchTotal,
        RobotsTotal
    }

    public RobotExclusionFilter(LiveWebCache liveWebCache, String str, long j) {
        this.webCache = null;
        this.rulesCache = null;
        this.maxCacheMS = 0L;
        this.userAgent = null;
        this.sb = null;
        this.rulesCache = new HashMap<>();
        this.webCache = liveWebCache;
        this.userAgent = str;
        this.maxCacheMS = j;
        this.sb = new StringBuilder(100);
    }

    protected String hostToRobotUrlString(String str, String str2) {
        this.sb.setLength(0);
        this.sb.append(str2);
        this.sb.append(str);
        if (str.endsWith(".")) {
            this.sb.deleteCharAt((str2.length() + str.length()) - 1);
        }
        this.sb.append(ROBOT_SUFFIX);
        String sb = this.sb.toString();
        LOGGER.fine("Adding robot URL:" + sb);
        return sb;
    }

    protected List<String> searchResultToRobotUrlStrings(String str, String str2) {
        ArrayList arrayList = new ArrayList();
        if (!str.startsWith("www")) {
            arrayList.add(hostToRobotUrlString(str, str2));
            arrayList.add(hostToRobotUrlString("www." + str, str2));
        } else if (str.startsWith("www.")) {
            arrayList.add(hostToRobotUrlString(str, str2));
            arrayList.add(hostToRobotUrlString(str.substring(4), str2));
        } else {
            Matcher matcher = WWWN_PATTERN.matcher(str);
            if (matcher.find()) {
                String substring = str.substring(matcher.end());
                arrayList.add(hostToRobotUrlString("www." + substring, str2));
                arrayList.add(hostToRobotUrlString(substring, str2));
            }
            arrayList.add(hostToRobotUrlString(str, str2));
        }
        return arrayList;
    }

    private RobotRules getRules(CaptureSearchResult captureSearchResult) {
        RobotRules robotRules = null;
        try {
            Iterator<String> it2 = searchResultToRobotUrlStrings(captureSearchResult.getOriginalHost(), UrlOperations.urlToScheme(captureSearchResult.getOriginalUrl())).iterator();
            String str = null;
            while (robotRules == null && it2.hasNext()) {
                String next = it2.next();
                if (str == null) {
                    str = next;
                }
                if (this.rulesCache.containsKey(next)) {
                    LOGGER.fine("ROBOT: Cached(" + next + ")");
                    robotRules = this.rulesCache.get(next);
                    if (!next.equals(str)) {
                        LOGGER.fine("Adding extra url(" + str + ") for prev cached rules(" + next + ")");
                        this.rulesCache.put(str, robotRules);
                    }
                } else {
                    Resource resource = null;
                    try {
                        try {
                            try {
                                try {
                                    try {
                                        PerfStats.timeStart(PerfStat.RobotsFetchTotal);
                                        if (LOGGER.isLoggable(Level.FINE)) {
                                            LOGGER.fine("ROBOT: NotCached - Downloading(" + next + ")");
                                        }
                                        RobotRules robotRules2 = new RobotRules();
                                        resource = this.webCache.getCachedResource(new URL(next), this.maxCacheMS, true);
                                        if (resource.getStatusCode() != 200) {
                                            LOGGER.info("ROBOT: NotAvailable(" + next + ")");
                                            throw new LiveDocumentNotAvailableException(next);
                                            break;
                                        }
                                        robotRules2.parse(resource);
                                        this.rulesCache.put(str, robotRules2);
                                        robotRules = robotRules2;
                                        if (LOGGER.isLoggable(Level.FINE)) {
                                            LOGGER.fine("ROBOT: Downloaded(" + next + ")");
                                        }
                                        if (resource != null) {
                                            try {
                                                resource.close();
                                            } catch (IOException e) {
                                            }
                                        }
                                        PerfStats.timeEnd(PerfStat.RobotsFetchTotal);
                                    } catch (LiveWebTimeoutException e2) {
                                        LOGGER.severe("ROBOT: LiveDocumentTimedOutException(" + next + ")");
                                        if (this.filterGroup != null) {
                                            this.filterGroup.setRobotTimedOut();
                                        }
                                        if (resource != null) {
                                            try {
                                                resource.close();
                                            } catch (IOException e3) {
                                            }
                                        }
                                        PerfStats.timeEnd(PerfStat.RobotsFetchTotal);
                                        return null;
                                    }
                                } catch (LiveDocumentNotAvailableException e4) {
                                    LOGGER.info("ROBOT: LiveDocumentNotAvailableException(" + next + ")");
                                    if (resource != null) {
                                        try {
                                            resource.close();
                                        } catch (IOException e5) {
                                        }
                                    }
                                    PerfStats.timeEnd(PerfStat.RobotsFetchTotal);
                                }
                            } catch (Throwable th) {
                                if (resource != null) {
                                    try {
                                        resource.close();
                                    } catch (IOException e6) {
                                    }
                                }
                                PerfStats.timeEnd(PerfStat.RobotsFetchTotal);
                                throw th;
                            }
                        } catch (MalformedURLException e7) {
                            LOGGER.warning("ROBOT: MalformedURLException(" + next + ")");
                            if (0 != 0) {
                                try {
                                    resource.close();
                                } catch (IOException e8) {
                                }
                            }
                            PerfStats.timeEnd(PerfStat.RobotsFetchTotal);
                            return null;
                        }
                    } catch (IOException e9) {
                        LOGGER.warning("ROBOT: IOException(" + next + "):" + e9.getLocalizedMessage());
                        if (0 != 0) {
                            try {
                                resource.close();
                            } catch (IOException e10) {
                            }
                        }
                        PerfStats.timeEnd(PerfStat.RobotsFetchTotal);
                        return null;
                    } catch (LiveWebCacheUnavailableException e11) {
                        LOGGER.severe("ROBOT: LiveWebCacheUnavailableException(" + next + ")");
                        if (this.filterGroup != null) {
                            this.filterGroup.setLiveWebGone();
                        }
                        if (0 != 0) {
                            try {
                                resource.close();
                            } catch (IOException e12) {
                            }
                        }
                        PerfStats.timeEnd(PerfStat.RobotsFetchTotal);
                        return null;
                    }
                }
            }
            if (robotRules == null) {
                this.rulesCache.put(str, emptyRules);
                robotRules = emptyRules;
                LOGGER.fine("No rules available, using emptyRules for:" + str);
            }
            return robotRules;
        } catch (Exception e13) {
            LOGGER.warning("ROBOT: Failed to get host from(" + captureSearchResult.getOriginalUrl() + ")");
            return null;
        }
    }

    @Override // org.archive.wayback.util.ObjectFilter
    public int filterObject(CaptureSearchResult captureSearchResult) {
        int i = 1;
        try {
            PerfStats.timeStart(PerfStat.RobotsTotal);
            if (!this.notifiedSeen) {
                if (this.filterGroup != null) {
                    this.filterGroup.setSawRobots();
                }
                this.notifiedSeen = true;
            }
            String originalUrl = captureSearchResult.getOriginalUrl();
            String uRLPath = UrlOperations.getURLPath(originalUrl);
            if (uRLPath.equals(ROBOT_SUFFIX) || captureSearchResult.isRobotIgnore()) {
                if (!this.notifiedPassed) {
                    if (this.filterGroup != null) {
                        this.filterGroup.setPassedRobots();
                    }
                    this.notifiedPassed = true;
                }
                PerfStats.timeEnd((Enum<?>) PerfStat.RobotsTotal, false);
                return 0;
            }
            if (this.pathsCache == null) {
                this.pathsCache = new HashMap<>();
            } else {
                Integer num = this.pathsCache.get(captureSearchResult.getUrlKey());
                if (num != null) {
                    int intValue = num.intValue();
                    PerfStats.timeEnd((Enum<?>) PerfStat.RobotsTotal, false);
                    return intValue;
                }
            }
            RobotRules rules = getRules(captureSearchResult);
            if (rules == null) {
                if (this.filterGroup == null || this.filterGroup.getRobotTimedOut() || this.filterGroup.getLiveWebGone()) {
                    i = 2;
                }
            } else if (rules.blocksPathForUA(uRLPath, this.userAgent)) {
                LOGGER.fine("ROBOT: BLOCKED(" + originalUrl + ")");
            } else {
                if (!this.notifiedPassed) {
                    if (this.filterGroup != null) {
                        this.filterGroup.setPassedRobots();
                    }
                    this.notifiedPassed = true;
                }
                i = 0;
                LOGGER.finer("ROBOT: ALLOWED(" + originalUrl + ")");
            }
            this.pathsCache.put(captureSearchResult.getUrlKey(), Integer.valueOf(i));
            PerfStats.timeEnd((Enum<?>) PerfStat.RobotsTotal, false);
            return i;
        } catch (Throwable th) {
            PerfStats.timeEnd((Enum<?>) PerfStat.RobotsTotal, false);
            throw th;
        }
    }

    public LiveWebCache getWebCache() {
        return this.webCache;
    }
}
