package org.codelibs.fess.ds.s3;

import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.URI;
import java.net.URISyntaxException;
import java.time.Instant;
import java.util.Arrays;
import java.util.Date;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.Objects;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.stream.Stream;
import org.apache.commons.io.output.DeferredFileOutputStream;
import org.apache.tika.io.FilenameUtils;
import org.codelibs.core.io.CopyUtil;
import org.codelibs.core.lang.StringUtil;
import org.codelibs.core.stream.StreamUtil;
import org.codelibs.fess.app.service.FailureUrlService;
import org.codelibs.fess.crawler.exception.CrawlingAccessException;
import org.codelibs.fess.crawler.exception.MaxLengthExceededException;
import org.codelibs.fess.crawler.exception.MultipleCrawlingAccessException;
import org.codelibs.fess.crawler.extractor.Extractor;
import org.codelibs.fess.crawler.filter.UrlFilter;
import org.codelibs.fess.crawler.helper.MimeTypeHelper;
import org.codelibs.fess.ds.AbstractDataStore;
import org.codelibs.fess.ds.callback.IndexUpdateCallback;
import org.codelibs.fess.es.config.exentity.DataConfig;
import org.codelibs.fess.exception.DataStoreCrawlingException;
import org.codelibs.fess.util.ComponentUtil;
import org.lastaflute.di.core.exception.ComponentNotFoundException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import software.amazon.awssdk.core.ResponseInputStream;
import software.amazon.awssdk.services.s3.model.Bucket;
import software.amazon.awssdk.services.s3.model.GetObjectResponse;
import software.amazon.awssdk.services.s3.model.Owner;
import software.amazon.awssdk.services.s3.model.S3Object;

/* loaded from: input_file:org/codelibs/fess/ds/s3/AmazonS3DataStore.class */
public class AmazonS3DataStore extends AbstractDataStore {
    private static final Logger logger = LoggerFactory.getLogger(AmazonS3DataStore.class);
    protected static final int DEFAULT_MAX_KEYS = 1000;
    protected static final long DEFAULT_MAX_SIZE = 10000000;
    protected static final String MAX_KEYS = "max_keys";
    protected static final String MAX_SIZE = "max_size";
    protected static final String IGNORE_ERROR = "ignore_error";
    protected static final String SUPPORTED_MIMETYPES = "supported_mimetypes";
    protected static final String INCLUDE_PATTERN = "include_pattern";
    protected static final String EXCLUDE_PATTERN = "exclude_pattern";
    protected static final String NUMBER_OF_THREADS = "number_of_threads";
    protected static final String OBJECT = "object";
    protected static final String OBJECT_URL = "url";
    protected static final String OBJECT_MIMETYPE = "mimetype";
    protected static final String OBJECT_FILETYPE = "filetype";
    protected static final String OBJECT_CONTENTS = "contents";
    protected static final String OBJECT_FILENAME = "filename";
    protected static final String OBJECT_MANAGEMENT_URL = "management_url";
    protected static final String OBJECT_BUCKET_NAME = "bucket_name";
    protected static final String OBJECT_BUCKET_CREATION_DATE = "creation_date";
    protected static final String OBJECT_KEY = "key";
    protected static final String OBJECT_E_TAG = "e_tag";
    protected static final String OBJECT_LAST_MODIFIED = "last_modified";
    protected static final String OBJECT_OWNER_ID = "owner_id";
    protected static final String OBJECT_OWNER_DISPLAY_NAME = "owner_display_name";
    protected static final String OBJECT_SIZE = "size";
    protected static final String OBJECT_STORAGE_CLASS = "storage_class";
    protected static final String OBJECT_ACCEPT_RANGES = "accept_ranges";
    protected static final String OBJECT_CACHE_CONTROL = "cache_control";
    protected static final String OBJECT_CONTENT_DISPOSITION = "content_disposition";
    protected static final String OBJECT_CONTENT_ENCODING = "content_encoding";
    protected static final String OBJECT_CONTENT_LANGUAGE = "content_language";
    protected static final String OBJECT_CONTENT_LENGTH = "content_length";
    protected static final String OBJECT_CONTENT_RANGE = "content_range";
    protected static final String OBJECT_CONTENT_TYPE = "content_type";
    protected static final String OBJECT_DELETE_MARKER = "delete_marker";
    protected static final String OBJECT_EXPIRATION = "expiration";
    protected static final String OBJECT_EXPIRES = "expires";
    protected static final String OBJECT_MISSING_META = "missing_meta";
    protected static final String OBJECT_OBJECT_LOCK_LEGAL_HOLD_STATUS = "object_lock_legal_hold_status";
    protected static final String OBJECT_OBJECT_LOCK_MODE = "object_lock_mode";
    protected static final String OBJECT_OBJECT_LOCK_RETAIN_UNTIL_DATE = "object_lock_retain_until_date";
    protected static final String OBJECT_PARTS_COUNT = "parts_count";
    protected static final String OBJECT_REPLICATION_STATUS = "replication_status";
    protected static final String OBJECT_REQUEST_CHARGED = "request_charged";
    protected static final String OBJECT_RESTORE = "restore";
    protected static final String OBJECT_SERVER_SIDE_ENCRYPTION = "server_side_encryption";
    protected static final String OBJECT_SSE_CUSTOMER_ALGORITHM = "sse_customer_algorithm";
    protected static final String OBJECT_SSE_CUSTOMER_KEY_MD5 = "sse_customer_key_md5";
    protected static final String OBJECT_SSEKMS_KEY_ID = "ssekms_key_id";
    protected static final String OBJECT_TAG_COUNT = "tag_count";
    protected static final String OBJECT_VERSION_ID = "version_id";
    protected static final String OBJECT_WEBSITE_REDIRECT_LOCATION = "website_redirect_location";
    protected String extractorName = "tikaExtractor";

    /* JADX INFO: Access modifiers changed from: protected */
    /* loaded from: input_file:org/codelibs/fess/ds/s3/AmazonS3DataStore$Config.class */
    public static class Config {
        final int maxKeys;
        final long maxSize;
        final boolean ignoreError;
        final String[] supportedMimeTypes;
        final UrlFilter urlFilter;

        Config(Map<String, String> map) {
            this.maxKeys = getMaxKeys(map);
            this.maxSize = getMaxSize(map);
            this.ignoreError = isIgnoreError(map);
            this.supportedMimeTypes = getSupportedMimeTypes(map);
            this.urlFilter = getUrlFilter(map);
        }

        private int getMaxKeys(Map<String, String> map) {
            String str = map.get(AmazonS3DataStore.MAX_KEYS);
            try {
                return StringUtil.isNotBlank(str) ? Integer.parseInt(str) : AmazonS3DataStore.DEFAULT_MAX_KEYS;
            } catch (NumberFormatException e) {
                return AmazonS3DataStore.DEFAULT_MAX_KEYS;
            }
        }

        private long getMaxSize(Map<String, String> map) {
            String str = map.get(AmazonS3DataStore.MAX_SIZE);
            try {
                return StringUtil.isNotBlank(str) ? Long.parseLong(str) : AmazonS3DataStore.DEFAULT_MAX_SIZE;
            } catch (NumberFormatException e) {
                return AmazonS3DataStore.DEFAULT_MAX_SIZE;
            }
        }

        private boolean isIgnoreError(Map<String, String> map) {
            return map.getOrDefault(AmazonS3DataStore.IGNORE_ERROR, "true").equalsIgnoreCase("true");
        }

        private String[] getSupportedMimeTypes(Map<String, String> map) {
            return (String[]) StreamUtil.split(map.getOrDefault(AmazonS3DataStore.SUPPORTED_MIMETYPES, ".*"), ",").get(stream -> {
                return (String[]) stream.map((v0) -> {
                    return v0.trim();
                }).toArray(i -> {
                    return new String[i];
                });
            });
        }

        private UrlFilter getUrlFilter(Map<String, String> map) {
            try {
                UrlFilter urlFilter = (UrlFilter) ComponentUtil.getComponent(UrlFilter.class);
                String str = map.get(AmazonS3DataStore.INCLUDE_PATTERN);
                if (StringUtil.isNotBlank(str)) {
                    urlFilter.addInclude(str);
                }
                String str2 = map.get(AmazonS3DataStore.EXCLUDE_PATTERN);
                if (StringUtil.isNotBlank(str2)) {
                    urlFilter.addExclude(str2);
                }
                urlFilter.init(map.get("crawlingInfoId"));
                if (AmazonS3DataStore.logger.isDebugEnabled()) {
                    AmazonS3DataStore.logger.debug("urlFilter: {}", urlFilter);
                }
                return urlFilter;
            } catch (ComponentNotFoundException e) {
                return null;
            }
        }

        public String toString() {
            long j = this.maxSize;
            boolean z = this.ignoreError;
            String arrays = Arrays.toString(this.supportedMimeTypes);
            UrlFilter urlFilter = this.urlFilter;
            return "{maxSize=" + j + ",ignoreError=" + j + ",supportedMimeTypes=" + z + ",urlFilter=" + arrays + "}";
        }
    }

    protected String getName() {
        return getClass().getSimpleName();
    }

    protected void storeData(DataConfig dataConfig, IndexUpdateCallback indexUpdateCallback, Map<String, String> map, Map<String, String> map2, Map<String, Object> map3) {
        Config config = new Config(map);
        if (logger.isDebugEnabled()) {
            logger.debug("config: {}", config);
        }
        ExecutorService newFixedThreadPool = newFixedThreadPool(Integer.parseInt(map.getOrDefault(NUMBER_OF_THREADS, "1")));
        try {
            try {
                AmazonS3Client createClient = createClient(map);
                try {
                    crawlBuckets(dataConfig, indexUpdateCallback, map, map2, map3, config, newFixedThreadPool, createClient);
                    if (logger.isDebugEnabled()) {
                        logger.debug("Shutting down thread executor.");
                    }
                    newFixedThreadPool.shutdown();
                    newFixedThreadPool.awaitTermination(60L, TimeUnit.SECONDS);
                    if (createClient != null) {
                        createClient.close();
                    }
                    newFixedThreadPool.shutdownNow();
                } catch (Throwable th) {
                    if (createClient != null) {
                        try {
                            createClient.close();
                        } catch (Throwable th2) {
                            th.addSuppressed(th2);
                        }
                    }
                    throw th;
                }
            } catch (Throwable th3) {
                newFixedThreadPool.shutdownNow();
                throw th3;
            }
        } catch (InterruptedException e) {
            if (logger.isDebugEnabled()) {
                logger.debug("Interrupted.", e);
            }
            newFixedThreadPool.shutdownNow();
        }
    }

    protected void crawlBuckets(DataConfig dataConfig, IndexUpdateCallback indexUpdateCallback, Map<String, String> map, Map<String, String> map2, Map<String, Object> map3, Config config, ExecutorService executorService, AmazonS3Client amazonS3Client) {
        if (logger.isDebugEnabled()) {
            logger.debug("Crawling buckets.");
        }
        amazonS3Client.getBuckets(bucket -> {
            if (logger.isDebugEnabled()) {
                logger.debug("Crawling bucket objects: {}", bucket.name());
            }
            amazonS3Client.getObjects(bucket.name(), config.maxKeys, s3Object -> {
                executorService.execute(() -> {
                    storeObject(dataConfig, indexUpdateCallback, map, map2, map3, config, amazonS3Client, bucket, s3Object);
                });
            });
        });
    }

    protected void storeObject(DataConfig dataConfig, IndexUpdateCallback indexUpdateCallback, Map<String, String> map, Map<String, String> map2, Map<String, Object> map3, Config config, AmazonS3Client amazonS3Client, Bucket bucket, S3Object s3Object) {
        HashMap hashMap = new HashMap(map3);
        try {
            String url = getUrl(amazonS3Client.getEndpoint(), amazonS3Client.getRegion().id(), bucket.name(), s3Object.key());
            UrlFilter urlFilter = config.urlFilter;
            if (urlFilter != null && !urlFilter.match(url)) {
                if (logger.isDebugEnabled()) {
                    logger.debug("Not matched: {}", url);
                    return;
                }
                return;
            }
            ResponseInputStream<GetObjectResponse> object = amazonS3Client.getObject(bucket.name(), s3Object.key());
            GetObjectResponse response = object.response();
            Stream of = Stream.of((Object[]) config.supportedMimeTypes);
            String contentType = response.contentType();
            Objects.requireNonNull(contentType);
            if (of.noneMatch(contentType::matches)) {
                if (logger.isDebugEnabled()) {
                    logger.debug("{} is not an indexing target.", response.contentType());
                    return;
                }
                return;
            }
            if (config.maxSize < s3Object.size().longValue()) {
                MaxLengthExceededException maxLengthExceededException = new MaxLengthExceededException("The content length (" + s3Object.size() + " byte) is over " + config.maxSize + " byte. The url is " + maxLengthExceededException);
                throw maxLengthExceededException;
            }
            logger.info("Crawling URL: {}", url);
            LinkedHashMap linkedHashMap = new LinkedHashMap(map);
            Map<String, Object> objectMap = getObjectMap(amazonS3Client.getRegion().id(), bucket, s3Object, url, object, config.ignoreError);
            linkedHashMap.put(OBJECT, objectMap);
            if (logger.isDebugEnabled()) {
                logger.debug("objectMap: {}", objectMap);
            }
            for (Map.Entry<String, String> entry : map2.entrySet()) {
                Object convertValue = convertValue(entry.getValue(), linkedHashMap);
                if (convertValue != null) {
                    hashMap.put(entry.getKey(), convertValue);
                }
            }
            if (logger.isDebugEnabled()) {
                logger.debug("dataMap: {}", hashMap);
            }
            indexUpdateCallback.store(map, hashMap);
        } catch (CrawlingAccessException e) {
            logger.warn("Crawling Access Exception at : " + hashMap, e);
            Throwable th = e;
            if (th instanceof MultipleCrawlingAccessException) {
                Throwable[] causes = ((MultipleCrawlingAccessException) th).getCauses();
                if (causes.length > 0) {
                    th = causes[causes.length - 1];
                }
            }
            Throwable cause = th.getCause();
            ((FailureUrlService) ComponentUtil.getComponent(FailureUrlService.class)).store(dataConfig, cause != null ? cause.getClass().getCanonicalName() : th.getClass().getCanonicalName(), "", th);
        } catch (Throwable th2) {
            logger.warn("Crawling Access Exception at : " + hashMap, th2);
            ((FailureUrlService) ComponentUtil.getComponent(FailureUrlService.class)).store(dataConfig, th2.getClass().getCanonicalName(), "", th2);
        }
    }

    protected Map<String, Object> getObjectMap(String str, Bucket bucket, S3Object s3Object, String str2, ResponseInputStream<GetObjectResponse> responseInputStream, boolean z) throws URISyntaxException {
        DeferredFileOutputStream deferredFileOutputStream;
        HashMap hashMap = new HashMap();
        GetObjectResponse response = responseInputStream.response();
        hashMap.put(OBJECT_URL, str2);
        String name = FilenameUtils.getName(s3Object.key());
        hashMap.put(OBJECT_FILENAME, name);
        hashMap.put(OBJECT_MANAGEMENT_URL, getManagementUrl(str, bucket.name(), s3Object.key()));
        hashMap.put(OBJECT_BUCKET_NAME, bucket.name());
        hashMap.put(OBJECT_BUCKET_CREATION_DATE, toDate(bucket.creationDate()));
        hashMap.put(OBJECT_KEY, s3Object.key());
        hashMap.put(OBJECT_E_TAG, s3Object.eTag());
        hashMap.put(OBJECT_LAST_MODIFIED, toDate(s3Object.lastModified()));
        Owner owner = s3Object.owner();
        hashMap.put(OBJECT_OWNER_ID, Objects.nonNull(owner) ? owner.id() : null);
        hashMap.put(OBJECT_OWNER_DISPLAY_NAME, Objects.nonNull(owner) ? owner.displayName() : null);
        hashMap.put(OBJECT_SIZE, s3Object.size());
        hashMap.put(OBJECT_STORAGE_CLASS, s3Object.storageClassAsString());
        hashMap.put(OBJECT_ACCEPT_RANGES, response.acceptRanges());
        hashMap.put(OBJECT_CACHE_CONTROL, response.cacheControl());
        hashMap.put(OBJECT_CONTENT_DISPOSITION, response.contentDisposition());
        hashMap.put(OBJECT_CONTENT_ENCODING, response.contentEncoding());
        hashMap.put(OBJECT_CONTENT_LANGUAGE, response.contentLanguage());
        hashMap.put(OBJECT_CONTENT_LENGTH, response.contentLength());
        hashMap.put(OBJECT_CONTENT_RANGE, response.contentRange());
        hashMap.put(OBJECT_DELETE_MARKER, response.deleteMarker());
        hashMap.put(OBJECT_EXPIRATION, response.expiration());
        hashMap.put(OBJECT_EXPIRES, toDate(response.expires()));
        hashMap.put(OBJECT_MISSING_META, response.missingMeta());
        hashMap.put(OBJECT_OBJECT_LOCK_LEGAL_HOLD_STATUS, response.objectLockLegalHoldStatusAsString());
        hashMap.put(OBJECT_OBJECT_LOCK_MODE, response.objectLockModeAsString());
        hashMap.put(OBJECT_OBJECT_LOCK_RETAIN_UNTIL_DATE, toDate(response.objectLockRetainUntilDate()));
        hashMap.put(OBJECT_PARTS_COUNT, response.partsCount());
        hashMap.put(OBJECT_REPLICATION_STATUS, response.replicationStatusAsString());
        hashMap.put(OBJECT_REQUEST_CHARGED, response.requestChargedAsString());
        hashMap.put(OBJECT_RESTORE, response.restore());
        hashMap.put(OBJECT_SERVER_SIDE_ENCRYPTION, response.serverSideEncryptionAsString());
        hashMap.put(OBJECT_SSE_CUSTOMER_ALGORITHM, response.sseCustomerAlgorithm());
        hashMap.put(OBJECT_SSE_CUSTOMER_KEY_MD5, response.sseCustomerKeyMD5());
        hashMap.put(OBJECT_SSEKMS_KEY_ID, response.ssekmsKeyId());
        hashMap.put(OBJECT_TAG_COUNT, response.tagCount());
        hashMap.put(OBJECT_VERSION_ID, response.versionId());
        hashMap.put(OBJECT_WEBSITE_REDIRECT_LOCATION, response.websiteRedirectLocation());
        String contentType = response.contentType();
        DeferredFileOutputStream deferredFileOutputStream2 = null;
        try {
            try {
                deferredFileOutputStream = new DeferredFileOutputStream(1000000, "fess-ds-s3-", ".out", (File) null);
            } catch (IOException e) {
                logger.warn("Failed to process " + str2, e);
                if (0 != 0 && !deferredFileOutputStream2.isInMemory()) {
                    File file = deferredFileOutputStream2.getFile();
                    if (!file.delete()) {
                        logger.warn("Failed to delete {}.", file.getAbsolutePath());
                    }
                }
            }
            try {
                CopyUtil.copy(responseInputStream, deferredFileOutputStream);
                deferredFileOutputStream.flush();
                contentType = getMimeType(name, deferredFileOutputStream);
                InputStream contentInputStream = getContentInputStream(deferredFileOutputStream);
                try {
                    hashMap.put(OBJECT_CONTENTS, getObjectContents(contentInputStream, contentType, s3Object.key(), str2, z));
                    if (contentInputStream != null) {
                        contentInputStream.close();
                    }
                    deferredFileOutputStream.close();
                    if (deferredFileOutputStream != null && !deferredFileOutputStream.isInMemory()) {
                        File file2 = deferredFileOutputStream.getFile();
                        if (!file2.delete()) {
                            logger.warn("Failed to delete {}.", file2.getAbsolutePath());
                        }
                    }
                    hashMap.put(OBJECT_FILETYPE, ComponentUtil.getFileTypeHelper().get(contentType));
                    hashMap.put(OBJECT_MIMETYPE, contentType);
                    hashMap.put(OBJECT_CONTENT_TYPE, contentType);
                    return hashMap;
                } catch (Throwable th) {
                    if (contentInputStream != null) {
                        try {
                            contentInputStream.close();
                        } catch (Throwable th2) {
                            th.addSuppressed(th2);
                        }
                    }
                    throw th;
                }
            } catch (Throwable th3) {
                try {
                    deferredFileOutputStream.close();
                } catch (Throwable th4) {
                    th3.addSuppressed(th4);
                }
                throw th3;
            }
        } catch (Throwable th5) {
            if (0 != 0 && !deferredFileOutputStream2.isInMemory()) {
                File file3 = deferredFileOutputStream2.getFile();
                if (!file3.delete()) {
                    logger.warn("Failed to delete {}.", file3.getAbsolutePath());
                }
            }
            throw th5;
        }
    }

    protected String getMimeType(String str, DeferredFileOutputStream deferredFileOutputStream) throws IOException {
        MimeTypeHelper mimeTypeHelper = (MimeTypeHelper) ComponentUtil.getComponent(MimeTypeHelper.class);
        InputStream contentInputStream = getContentInputStream(deferredFileOutputStream);
        try {
            String contentType = mimeTypeHelper.getContentType(contentInputStream, str);
            if (contentInputStream != null) {
                contentInputStream.close();
            }
            return contentType;
        } catch (Throwable th) {
            if (contentInputStream != null) {
                try {
                    contentInputStream.close();
                } catch (Throwable th2) {
                    th.addSuppressed(th2);
                }
            }
            throw th;
        }
    }

    protected InputStream getContentInputStream(DeferredFileOutputStream deferredFileOutputStream) throws IOException {
        return deferredFileOutputStream.isInMemory() ? new ByteArrayInputStream(deferredFileOutputStream.getData()) : new FileInputStream(deferredFileOutputStream.getFile());
    }

    protected String getObjectContents(InputStream inputStream, String str, String str2, String str3, boolean z) {
        try {
            Extractor extractor = ComponentUtil.getExtractorFactory().getExtractor(str);
            if (extractor == null) {
                if (logger.isDebugEnabled()) {
                    logger.debug("use a default extractor as {} by {}", this.extractorName, str);
                }
                extractor = (Extractor) ComponentUtil.getComponent(this.extractorName);
            }
            return extractor.getText(inputStream, (Map) null).getContent();
        } catch (Exception e) {
            if (!z) {
                throw new DataStoreCrawlingException(str3, "Failed to get contents: " + str2, e);
            }
            logger.warn("Failed to get contents: " + str2, e);
            return "";
        }
    }

    protected String getUrl(String str, String str2, String str3, String str4) throws URISyntaxException {
        if (!Objects.nonNull(str)) {
            return new URI("https", str3 + ".s3-" + str2 + ".amazonaws.com", "/" + str4, null).toASCIIString();
        }
        URI create = URI.create(str);
        return new URI(create.getScheme(), str3 + "." + create.getAuthority(), "/" + str4, null, null).toASCIIString();
    }

    protected String getManagementUrl(String str, String str2, String str3) throws URISyntaxException {
        return new URI("https", "s3.console.aws.amazon.com", "/s3/object/" + str2 + "/" + str3, "region=" + str, null).toASCIIString();
    }

    protected Date toDate(Instant instant) {
        if (Objects.nonNull(instant)) {
            return Date.from(instant);
        }
        return null;
    }

    protected ExecutorService newFixedThreadPool(int i) {
        if (logger.isDebugEnabled()) {
            logger.debug("Executor Thread Pool: {}", Integer.valueOf(i));
        }
        return new ThreadPoolExecutor(i, i, 0L, TimeUnit.MILLISECONDS, new LinkedBlockingQueue(i), new ThreadPoolExecutor.CallerRunsPolicy());
    }

    protected AmazonS3Client createClient(Map<String, String> map) {
        return new AmazonS3Client(map);
    }
}
