package eu.eudml.enhancement.pdf2textviaocr;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import org.apache.pdfbox.exceptions.CryptographyException;
import org.apache.pdfbox.exceptions.InvalidPasswordException;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDResources;
import org.apache.pdfbox.pdmodel.common.PDStream;
import org.apache.pdfbox.pdmodel.graphics.xobject.PDXObject;
import org.apache.pdfbox.pdmodel.graphics.xobject.PDXObjectForm;
import org.apache.pdfbox.pdmodel.graphics.xobject.PDXObjectImage;
import org.castor.xml.JavaNaming;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:WEB-INF/lib/eudml-processing-1.3.2-SNAPSHOT.jar:eu/eudml/enhancement/pdf2textviaocr/PdfImageExtractor.class */
public class PdfImageExtractor implements ImageExtractor {
    private static final Logger log = LoggerFactory.getLogger(PdfImageExtractor.class);
    private byte[] pdfInBytes;
    private File tempDir = new File(System.getProperty("java.io.tmpdir"));
    private String password = null;

    public byte[] getPdfInBytes() {
        return this.pdfInBytes;
    }

    public void setPdfInBytes(byte[] bArr) {
        this.pdfInBytes = bArr;
    }

    @Override // eu.eudml.enhancement.pdf2textviaocr.ImageExtractor
    public List<File> extractImages(InputStream inputStream, String str) throws PdfExctractorException {
        return extractImagesUsingPdfParser(inputStream, str, null);
    }

    @Override // eu.eudml.enhancement.pdf2textviaocr.ImageExtractor
    public List<File> extractImages(File file, String str) throws PdfExctractorException {
        if (file == null) {
            throw new NullPointerException("inputFile");
        }
        FileInputStream fileInputStream = null;
        try {
            try {
                fileInputStream = new FileInputStream(file);
                List<File> extractImages = extractImages(fileInputStream, str);
                if (fileInputStream != null) {
                    try {
                        fileInputStream.close();
                    } catch (IOException e) {
                    }
                }
                return extractImages;
            } catch (FileNotFoundException e2) {
                log.error("File {} not found: ", file, e2);
                throw new PdfExctractorException("Input file " + file.getPath() + " not found", e2);
            }
        } catch (Throwable th) {
            if (fileInputStream != null) {
                try {
                    fileInputStream.close();
                } catch (IOException e3) {
                }
            }
            throw th;
        }
    }

    @Override // eu.eudml.enhancement.pdf2textviaocr.ImageExtractor
    public List<File> extractImages(File file) throws PdfExctractorException {
        return extractImages(file, (String) null);
    }

    @Override // eu.eudml.enhancement.pdf2textviaocr.ImageExtractor
    public List<File> extractImages(InputStream inputStream) throws PdfExctractorException {
        return extractImages(inputStream, (String) null);
    }

    public List<File> extractImagesUsingPdfParser(InputStream inputStream, String str, String str2) throws PdfExctractorException {
        Map images;
        if (inputStream == null) {
            throw new NullPointerException(JavaNaming.METHOD_PREFIX_IS);
        }
        if (str2 == null) {
            str2 = "";
        }
        if (str == null) {
            str = "pdfToTextViaOcrImage";
        }
        ArrayList arrayList = new ArrayList();
        List list = null;
        ArrayList arrayList2 = new ArrayList();
        PDDocument pDDocument = null;
        try {
            try {
                log.trace("INVESTIGATION:{}:PDDocument.load(is)", str);
                pDDocument = PDDocument.load(inputStream);
                log.trace("INVESTIGATION:{}:doc.isEncrypted()", str);
                if (pDDocument.isEncrypted()) {
                    try {
                        log.trace("INVESTIGATION:{}:doc.decrypt(password)", str);
                        pDDocument.decrypt(str2);
                    } catch (CryptographyException e) {
                        log.warn("problem do decrypt the PDF", (Throwable) e);
                    } catch (InvalidPasswordException e2) {
                        log.warn("Wrong password given for decrypting PDF");
                    }
                }
                log.trace("INVESTIGATION:{}:doc.getDocumentCatalog().getAllPages()", str);
                List allPages = pDDocument.getDocumentCatalog().getAllPages();
                for (int i = 0; i < allPages.size(); i++) {
                    if (0 == 0 || list.contains(Integer.valueOf(i + 1))) {
                        log.trace("Processing page {} from {} pages.", Integer.valueOf(i), Integer.valueOf(allPages.size()));
                        log.trace("INVESTIGATION:{}:(PDPage) pages.get(pageNumber)", str);
                        PDPage pDPage = (PDPage) allPages.get(i);
                        log.trace("INVESTIGATION:{}:page.getResources()", str);
                        PDResources resources = pDPage.getResources();
                        log.trace("INVESTIGATION:{}:resources.getXObjects()", str);
                        Map xObjects = resources.getXObjects();
                        if (xObjects != null) {
                            log.trace("INVESTIGATION:{}:xobjs.keySet().iterator()", str);
                            Iterator it = xObjects.keySet().iterator();
                            log.debug("Processing external objects: {} on page {}", xObjects, Integer.valueOf(i));
                            while (true) {
                                if (it.hasNext()) {
                                    String str3 = (String) it.next();
                                    log.trace("INVESTIGATION:{}:xobjs.keySet().iterator()", str);
                                    PDXObject pDXObject = (PDXObject) xObjects.get(str3);
                                    log.trace("INVESTIGATION:{}:xobj instanceof PDXObjectForm", str);
                                    if (pDXObject instanceof PDXObjectForm) {
                                        log.trace("INVESTIGATION:{}:(PDXObjectForm) xobj", str);
                                        log.trace("INVESTIGATION:{}:xform.getResources().getImages()", str);
                                        images = ((PDXObjectForm) pDXObject).getResources().getImages();
                                    } else {
                                        log.trace("INVESTIGATION:{}:resources.getImages()", str);
                                        images = resources.getImages();
                                    }
                                    if (images.size() > 4) {
                                        arrayList2.add(Integer.valueOf(i));
                                        break;
                                    }
                                    if (images != null) {
                                        log.trace("INVESTIGATION:{}:images.keySet().iterator()", str);
                                        log.trace("INVESTIGATION:{}:imageIter.hasNext()", str);
                                        for (String str4 : images.keySet()) {
                                            log.trace("INVESTIGATION:{}:(String) imageIter.next()", str);
                                            log.trace("INVESTIGATION:{}:(PDXObjectImage) images.get(imKey)", str);
                                            PDXObjectImage pDXObjectImage = (PDXObjectImage) images.get(str4);
                                            log.trace("INVESTIGATION:{}:new PDStream(image.getCOSStream())", str);
                                            PDStream pDStream = new PDStream(pDXObjectImage.getCOSStream());
                                            log.trace("INVESTIGATION:{}:pdStr.getFilters()", str);
                                            List filters = pDStream.getFilters();
                                            if (filters == null) {
                                                log.info("Image without encoding filter specified => skipping");
                                            } else {
                                                log.trace("INVESTIGATION:{}:filters.contains(JBIG2Decode)", str);
                                                if (filters.contains("JBIG2Decode")) {
                                                    log.info("Allready compressed according to JBIG2 standard => skipping");
                                                } else {
                                                    log.trace("INVESTIGATION:{}:filters.contains(JPXDecode)", str);
                                                    if (filters.contains("JPXDecode")) {
                                                        log.info("Unsupported filter JPXDecode => skipping");
                                                    } else {
                                                        File createTempFile = File.createTempFile(str, "." + pDXObjectImage.getSuffix(), this.tempDir);
                                                        log.info("Writing image:" + createTempFile.getPath());
                                                        log.trace("INVESTIGATION:{}:image.write2file(imageName)", str);
                                                        pDXObjectImage.write2file(createTempFile);
                                                        arrayList.add(createTempFile);
                                                    }
                                                }
                                            }
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
                if (pDDocument != null) {
                    try {
                        pDDocument.close();
                    } catch (IOException e3) {
                        log.error("Unable to close opened stream", (Throwable) e3);
                    }
                }
            } catch (Throwable th) {
                if (pDDocument != null) {
                    try {
                        pDDocument.close();
                    } catch (IOException e4) {
                        log.error("Unable to close opened stream", (Throwable) e4);
                    }
                }
                throw th;
            }
        } catch (IOException e5) {
            Tools.deleteFilesFromList(arrayList);
            arrayList.clear();
            throw new PdfExctractorException("Unable to parse PDF document", e5);
        } catch (Exception e6) {
            Tools.deleteFilesFromList(arrayList);
            arrayList.clear();
            if (pDDocument != null) {
                try {
                    pDDocument.close();
                } catch (IOException e7) {
                    log.error("Unable to close opened stream", (Throwable) e7);
                }
            }
        }
        if (this.pdfInBytes != null) {
            arrayList.addAll(PdfPageToImage.convert(this.pdfInBytes, arrayList2, str));
        }
        return arrayList;
    }

    public String getPassword() {
        return this.password;
    }

    public void setPassword(String str) {
        this.password = str;
    }

    public File getTempDir() {
        return this.tempDir;
    }

    public void setTempDir(File file) {
        this.tempDir = file;
    }
}
