package eu.eudml.enhancement.pdf2mml.node;

import eu.eudml.enhancement.bibref.EnhancementUtils;
import eu.eudml.enhancement.pdf.PdfExtractorAbstractNode;
import eu.eudml.enhancement.pdf2mml.Pdf2MmlOutputTransformer;
import eu.eudml.enhancement.tools.Pdf2Mml;
import eu.eudml.processing.message.EnhancerProcessMessage;
import eu.eudml.service.storage.ContentFileHandle;
import eu.eudml.service.storage.ContentPart;
import eu.eudml.tex2mml.mml.TransformMmlElementsInDocument;
import java.io.File;
import java.util.Date;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.FutureTask;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import org.apache.commons.lang.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Required;
import pl.edu.icm.yadda.process.ctx.ProcessContext;

/* loaded from: input_file:eu/eudml/enhancement/pdf2mml/node/Pdf2MmlExtractorNode.class */
public class Pdf2MmlExtractorNode extends PdfExtractorAbstractNode {
    private static final int TIMEOUT = 120;
    private Pdf2Mml processor = null;
    private static final Logger log = LoggerFactory.getLogger(Pdf2MmlExtractorNode.class);
    private static final ExecutorService THREAD_POOL = Executors.newCachedThreadPool();

    @Override // eu.eudml.enhancement.pdf.PdfExtractorAbstractNode
    public EnhancerProcessMessage process(EnhancerProcessMessage enhancerProcessMessage, ProcessContext processContext) throws Exception {
        if (enhancerProcessMessage.getSourceRecord() == null) {
            throw new NullPointerException();
        }
        String id = enhancerProcessMessage.getId();
        ContentFileHandle handle = EnhancementUtils.getHandle(this.storage, enhancerProcessMessage, "content/raw_content/pdf");
        if (handle == null) {
            return enhancerProcessMessage;
        }
        final File file = handle.getFile();
        try {
            long currentTimeMillis = System.currentTimeMillis();
            String str = (String) timedCall(new Callable<String>() { // from class: eu.eudml.enhancement.pdf2mml.node.Pdf2MmlExtractorNode.1
                /* JADX WARN: Can't rename method to resolve collision */
                @Override // java.util.concurrent.Callable
                public String call() throws Exception {
                    return Pdf2MmlExtractorNode.this.processor.run(file);
                }
            }, 120L, TimeUnit.SECONDS);
            log.trace("STATS#{};{}", new Object[]{id, Long.valueOf(System.currentTimeMillis() - currentTimeMillis)});
            if (StringUtils.isNotEmpty(str)) {
                str = sanitizeText(Pdf2MmlOutputTransformer.stripXmlTagsExceptFormulas(TransformMmlElementsInDocument.transform(str)));
            }
            if (isGarbage(str)) {
                log.debug("Extracted text for doc with id: {} was considered as garbage.", id);
            } else {
                byte[] bytes = str.getBytes("UTF-8");
                ContentPart contentPart = new ContentPart(id, "enhanced/text", ContentPart.ContentPartType.PLAINTEXT_INDEX, bytes.length, "text/plain", id + ".txt", new Date());
                log.debug("Adding plain text of " + id);
                enhancerProcessMessage.addContentPart(contentPart, bytes);
            }
            return enhancerProcessMessage;
        } catch (ExecutionException e) {
            log.trace("STATS#{};EXCEPTION:{}", new Object[]{id, e.getCause().getMessage()});
            return enhancerProcessMessage;
        } catch (TimeoutException e2) {
            log.trace("STATS#{};TIMEOUT:{}", new Object[]{id, Integer.valueOf(TIMEOUT)});
            return enhancerProcessMessage;
        }
    }

    @Required
    public void setProcessor(Pdf2Mml pdf2Mml) {
        this.processor = pdf2Mml;
    }

    private static <T> T timedCall(Callable<T> callable, long j, TimeUnit timeUnit) throws InterruptedException, ExecutionException, TimeoutException {
        FutureTask futureTask = new FutureTask(callable);
        THREAD_POOL.execute(futureTask);
        return (T) futureTask.get(j, timeUnit);
    }
}
