package io.annot8.components.documents.processors;

import com.drew.imaging.ImageMetadataReader;
import com.drew.imaging.ImageProcessingException;
import io.annot8.api.components.annotations.ComponentDescription;
import io.annot8.api.components.annotations.ComponentName;
import io.annot8.api.components.annotations.ComponentTags;
import io.annot8.api.components.annotations.SettingsClass;
import io.annot8.api.context.Context;
import io.annot8.common.data.content.FileContent;
import io.annot8.common.data.content.InputStreamContent;
import io.annot8.components.documents.data.ExtractionWithProperties;
import java.awt.image.BufferedImage;
import java.io.BufferedInputStream;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import javax.imageio.ImageIO;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.extractor.WordExtractor;
import org.apache.poi.hwpf.usermodel.Picture;
import org.apache.poi.poifs.filesystem.FileMagic;
import org.slf4j.Logger;

@ComponentDescription("Extracts image and text from Word Document (*.doc) files")
@ComponentTags({"documents", "word", "doc", "extractor", "text", "images"})
@ComponentName("Word Document (DOC) Extractor")
@SettingsClass(DocumentExtractorSettings.class)
/* loaded from: input_file:io/annot8/components/documents/processors/DocExtractor.class */
public class DocExtractor extends AbstractDocumentExtractorDescriptor<Processor> {

    /* loaded from: input_file:io/annot8/components/documents/processors/DocExtractor$Processor.class */
    public static class Processor extends AbstractDocumentExtractorProcessor<HWPFDocument> {
        private final Logger logger;

        public Processor(Context context, DocumentExtractorSettings documentExtractorSettings) {
            super(context, documentExtractorSettings);
            this.logger = getLogger();
        }

        @Override // io.annot8.components.documents.processors.AbstractDocumentExtractorProcessor
        public boolean isMetadataSupported() {
            return false;
        }

        @Override // io.annot8.components.documents.processors.AbstractDocumentExtractorProcessor
        public boolean isTextSupported() {
            return true;
        }

        @Override // io.annot8.components.documents.processors.AbstractDocumentExtractorProcessor
        public boolean isImagesSupported() {
            return true;
        }

        @Override // io.annot8.components.documents.processors.AbstractDocumentExtractorProcessor
        public boolean acceptFile(FileContent fileContent) {
            return ((File) fileContent.getData()).getName().toLowerCase().endsWith(".doc");
        }

        @Override // io.annot8.components.documents.processors.AbstractDocumentExtractorProcessor
        public boolean acceptInputStream(InputStreamContent inputStreamContent) {
            try {
                return FileMagic.OLE2 == FileMagic.valueOf(new BufferedInputStream((InputStream) inputStreamContent.getData()));
            } catch (IOException e) {
                return false;
            }
        }

        /* JADX WARN: Can't rename method to resolve collision */
        @Override // io.annot8.components.documents.processors.AbstractDocumentExtractorProcessor
        public HWPFDocument extractDocument(FileContent fileContent) throws IOException {
            return new HWPFDocument(new FileInputStream((File) fileContent.getData()));
        }

        /* JADX WARN: Can't rename method to resolve collision */
        @Override // io.annot8.components.documents.processors.AbstractDocumentExtractorProcessor
        public HWPFDocument extractDocument(InputStreamContent inputStreamContent) throws IOException {
            return new HWPFDocument((InputStream) inputStreamContent.getData());
        }

        @Override // io.annot8.components.documents.processors.AbstractDocumentExtractorProcessor
        public Map<String, Object> extractMetadata(HWPFDocument hWPFDocument) {
            return Collections.emptyMap();
        }

        @Override // io.annot8.components.documents.processors.AbstractDocumentExtractorProcessor
        public Collection<ExtractionWithProperties<String>> extractText(HWPFDocument hWPFDocument) {
            return List.of(new ExtractionWithProperties(new WordExtractor(hWPFDocument).getText()));
        }

        @Override // io.annot8.components.documents.processors.AbstractDocumentExtractorProcessor
        public Collection<ExtractionWithProperties<BufferedImage>> extractImages(HWPFDocument hWPFDocument) {
            ArrayList arrayList = new ArrayList();
            int i = 0;
            for (Picture picture : hWPFDocument.getPicturesTable().getAllPictures()) {
                i++;
                try {
                    BufferedImage read = ImageIO.read(new ByteArrayInputStream(picture.getContent()));
                    if (read == null) {
                        this.logger.warn("Null image {} extracted from document", Integer.valueOf(i));
                    } else {
                        HashMap hashMap = new HashMap();
                        try {
                            hashMap.putAll(toMap(ImageMetadataReader.readMetadata(new ByteArrayInputStream(picture.getContent()))));
                        } catch (ImageProcessingException | IOException e) {
                            this.logger.warn("Unable to extract metadata from image {}", Integer.valueOf(i), e);
                        }
                        hashMap.put("index", Integer.valueOf(i));
                        hashMap.put("name", picture.suggestFullFileName());
                        hashMap.put("description", picture.getDescription());
                        hashMap.put("mimeType", picture.getMimeType());
                        arrayList.add(new ExtractionWithProperties(read, hashMap));
                    }
                } catch (IOException e2) {
                    this.logger.warn("Unable to extract image {} from document", Integer.valueOf(i), e2);
                }
            }
            return arrayList;
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public Processor createComponent(Context context, DocumentExtractorSettings documentExtractorSettings) {
        return new Processor(context, documentExtractorSettings);
    }
}
