package org.dromara.pdf.pdfbox.core.ext.extractor;

import java.awt.Rectangle;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageTree;
import org.apache.pdfbox.text.PDFTextStripper;
import org.apache.pdfbox.text.PDFTextStripperByArea;
import org.dromara.pdf.pdfbox.core.base.Document;

/* loaded from: input_file:org/dromara/pdf/pdfbox/core/ext/extractor/AbstractTextExtractor.class */
public abstract class AbstractTextExtractor extends AbstractExtractor {
    protected static final Pattern TABLE_PATTERN = Pattern.compile("(\\S[^\\n\\r]+)", 66);

    @FunctionalInterface
    /* loaded from: input_file:org/dromara/pdf/pdfbox/core/ext/extractor/AbstractTextExtractor$Function.class */
    protected interface Function<R> {
        R apply(Map<String, Rectangle> map, String str, PDPage pDPage);
    }

    public AbstractTextExtractor(Document document) {
        super(document);
    }

    public abstract Map<Integer, List<String>> extractByRegex(String str, int... iArr);

    public abstract Map<Integer, Map<String, String>> extractByRegionArea(String str, Map<String, Rectangle> map, int... iArr);

    public abstract Map<Integer, Map<String, List<List<String>>>> extractByTable(String str, Map<String, Rectangle> map, int... iArr);

    /* JADX INFO: Access modifiers changed from: protected */
    public List<String> processTextByRegex(String str, PDFTextStripper pDFTextStripper) {
        ArrayList arrayList = new ArrayList(32);
        String text = pDFTextStripper.getText(getDocument());
        if (!Objects.nonNull(str) || str.trim().isEmpty()) {
            arrayList.add(text);
        } else {
            Matcher matcher = Pattern.compile(str, 66).matcher(text);
            while (matcher.find()) {
                arrayList.add(matcher.group());
            }
        }
        return arrayList;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public Map<String, String> processTextByRegionArea(Map<String, Rectangle> map, String str, PDPage pDPage) {
        HashMap hashMap;
        PDFTextStripperByArea pDFTextStripperByArea = new PDFTextStripperByArea();
        pDFTextStripperByArea.setWordSeparator(str);
        pDFTextStripperByArea.getClass();
        map.forEach((v1, v2) -> {
            r1.addRegion(v1, v2);
        });
        if (map.isEmpty()) {
            hashMap = new HashMap(0);
        } else {
            Set<String> keySet = map.keySet();
            hashMap = new HashMap(keySet.size());
            pDFTextStripperByArea.extractRegions(pDPage);
            for (String str2 : keySet) {
                hashMap.put(str2, pDFTextStripperByArea.getTextForRegion(str2));
            }
        }
        return hashMap;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public Map<String, List<List<String>>> processTextByTable(Map<String, Rectangle> map, String str, PDPage pDPage) {
        Map<String, String> processTextByRegionArea = processTextByRegionArea(map, str, pDPage);
        if (processTextByRegionArea.isEmpty()) {
            return new HashMap(0);
        }
        HashMap hashMap = new HashMap(processTextByRegionArea.size());
        processTextByRegionArea.forEach((str2, str3) -> {
            ArrayList arrayList = new ArrayList(16);
            ArrayList arrayList2 = new ArrayList(16);
            Matcher matcher = TABLE_PATTERN.matcher(str3);
            while (matcher.find()) {
                arrayList2.add(matcher.group());
            }
            Iterator it = arrayList2.iterator();
            while (it.hasNext()) {
                arrayList.add(Arrays.stream(((String) it.next()).split(str)).collect(Collectors.toList()));
            }
            hashMap.put(str2, arrayList);
        });
        return hashMap;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public <R> Map<Integer, R> extractText(Function<R> function, String str, Map<String, Rectangle> map, int... iArr) {
        HashMap hashMap = new HashMap(32);
        PDPageTree pages = getDocument().getPages();
        if (!Objects.nonNull(iArr) || iArr.length <= 0) {
            int i = 0;
            Iterator it = pages.iterator();
            while (it.hasNext()) {
                hashMap.put(Integer.valueOf(i), function.apply(map, str, (PDPage) it.next()));
                i++;
            }
        } else {
            for (int i2 : iArr) {
                try {
                    hashMap.put(Integer.valueOf(i2), function.apply(map, str, pages.get(i2)));
                } catch (Exception e) {
                    this.log.warn("the index['" + i2 + "'] is invalid, will be ignored");
                }
            }
        }
        return hashMap;
    }
}
