package pl.edu.icm.cermine.service;

import java.io.ByteArrayInputStream;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.RejectedExecutionException;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import javax.annotation.PostConstruct;
import org.jdom.Document;
import org.jdom.Element;
import org.jdom.output.Format;
import org.jdom.output.XMLOutputter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
import pl.edu.icm.cermine.PdfNLMContentExtractor;
import pl.edu.icm.cermine.content.transformers.NLMElementToHTMLWriter;
import pl.edu.icm.cermine.exception.AnalysisException;
import pl.edu.icm.cermine.service.ExtractionTask;

@Component
/* loaded from: input_file:WEB-INF/classes/pl/edu/icm/cermine/service/CermineExtractorServiceImpl.class */
public class CermineExtractorServiceImpl implements CermineExtractorService {
    int threadPoolSize = 4;
    int maxQueueForBatch = 0;
    Logger log = LoggerFactory.getLogger(CermineExtractorServiceImpl.class);
    List<PdfNLMContentExtractor> extractors;
    ExecutorService processingExecutor;
    ExecutorService batchProcessingExecutor;

    @Autowired
    TaskManager taskManager;

    /* loaded from: input_file:WEB-INF/classes/pl/edu/icm/cermine/service/CermineExtractorServiceImpl$ExtractingTaskExecution.class */
    private class ExtractingTaskExecution implements Runnable {
        ExtractionTask task;

        public ExtractingTaskExecution(ExtractionTask extractionTask) {
            this.task = extractionTask;
        }

        @Override // java.lang.Runnable
        public void run() {
            CermineExtractorServiceImpl.this.log.debug("Starting processing task: " + this.task.getId());
            this.task.setStatus(ExtractionTask.TaskStatus.PROCESSING);
            ExtractionResult extractionResult = new ExtractionResult();
            extractionResult.setProcessingStart(new Date());
            extractionResult.setSubmit(this.task.getCreationDate());
            CermineExtractorServiceImpl.this.log.debug("Running extraction: " + this.task.getId());
            CermineExtractorServiceImpl.this.performExtraction(extractionResult, new ByteArrayInputStream(this.task.getPdf()));
            this.task.setResult(extractionResult);
            CermineExtractorServiceImpl.this.log.debug("Processing finished: " + this.task.getId());
            if (extractionResult.isSucceeded()) {
                this.task.setStatus(ExtractionTask.TaskStatus.FINISHED);
            } else {
                this.task.setStatus(ExtractionTask.TaskStatus.FAILED);
            }
            this.task.setPdf(null);
            CermineExtractorServiceImpl.this.log.debug("finishing task: " + this.task.getId());
        }
    }

    /* loaded from: input_file:WEB-INF/classes/pl/edu/icm/cermine/service/CermineExtractorServiceImpl$SimpleExtractionCallable.class */
    private class SimpleExtractionCallable implements Callable<ExtractionResult> {
        InputStream input;
        ExtractionResult result;

        public SimpleExtractionCallable(InputStream inputStream, ExtractionResult extractionResult) {
            this.input = inputStream;
            this.result = extractionResult;
        }

        /* JADX WARN: Can't rename method to resolve collision */
        @Override // java.util.concurrent.Callable
        public ExtractionResult call() {
            return CermineExtractorServiceImpl.this.performExtraction(this.result, this.input);
        }
    }

    @PostConstruct
    public void init() {
        try {
            this.processingExecutor = Executors.newFixedThreadPool(this.threadPoolSize);
            this.batchProcessingExecutor = new ThreadPoolExecutor(this.threadPoolSize, this.threadPoolSize, 1L, TimeUnit.DAYS, this.maxQueueForBatch > 0 ? new ArrayBlockingQueue(this.maxQueueForBatch) : new ArrayBlockingQueue(100000));
            this.extractors = new ArrayList();
            for (int i = 0; i < this.threadPoolSize; i++) {
                this.extractors.add(new PdfNLMContentExtractor());
            }
        } catch (Exception e) {
            this.log.error("Failed to init content extractor", (Throwable) e);
            throw new RuntimeException(e);
        }
    }

    public int getThreadPoolSize() {
        return this.threadPoolSize;
    }

    public void setThreadPoolSize(int i) {
        this.threadPoolSize = i;
    }

    public int getMaxQueueForBatch() {
        return this.maxQueueForBatch;
    }

    public void setMaxQueueForBatch(int i) {
        this.maxQueueForBatch = i;
    }

    @Override // pl.edu.icm.cermine.service.CermineExtractorService
    public ExtractionResult extractNLM(InputStream inputStream) throws AnalysisException, ServiceException {
        this.log.debug("Starting extractNLM task...");
        ExtractionResult extractionResult = new ExtractionResult();
        extractionResult.setSubmit(new Date());
        this.log.debug("submitting extractNLM task...");
        try {
            Future submit = this.batchProcessingExecutor.submit(new SimpleExtractionCallable(inputStream, extractionResult));
            Thread.yield();
            this.log.debug("waiting for extractNLM task...");
            ExtractionResult extractionResult2 = (ExtractionResult) submit.get();
            this.log.debug("finished extractNLM task...");
            return extractionResult2;
        } catch (RejectedExecutionException e) {
            throw new ServiceException("Queue size exceeded.", e);
        } catch (Exception e2) {
            this.log.error("Exception while executing extraction task...", (Throwable) e2);
            throw new RuntimeException(e2);
        }
    }

    @Override // pl.edu.icm.cermine.service.CermineExtractorService
    public long initExtractionTask(byte[] bArr, String str) {
        ExtractionTask extractionTask = new ExtractionTask();
        extractionTask.setPdf(bArr);
        extractionTask.setFileName(str);
        extractionTask.setCreationDate(new Date());
        extractionTask.setStatus(ExtractionTask.TaskStatus.CREATED);
        long registerTask = this.taskManager.registerTask(extractionTask);
        extractionTask.setStatus(ExtractionTask.TaskStatus.QUEUED);
        this.processingExecutor.submit(new ExtractingTaskExecution(extractionTask));
        return registerTask;
    }

    protected PdfNLMContentExtractor obtainExtractor() {
        PdfNLMContentExtractor remove;
        this.log.debug("Obtaining extractor from the pool");
        try {
            synchronized (this.extractors) {
                while (this.extractors.isEmpty()) {
                    this.log.debug("Extractor pool is empty, going to sleep...");
                    this.extractors.wait();
                }
                remove = this.extractors.remove(0);
            }
            return remove;
        } catch (InterruptedException e) {
            this.log.error("Unexpected exception while waiting for extractor...", (Throwable) e);
            throw new RuntimeException(e);
        }
    }

    protected void returnExtractor(PdfNLMContentExtractor pdfNLMContentExtractor) {
        this.log.debug("Returning extractor to the pool...");
        synchronized (this.extractors) {
            this.extractors.add(pdfNLMContentExtractor);
            this.extractors.notify();
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    public ExtractionResult performExtraction(ExtractionResult extractionResult, InputStream inputStream) {
        PdfNLMContentExtractor pdfNLMContentExtractor = null;
        try {
            try {
                pdfNLMContentExtractor = obtainExtractor();
                extractionResult.processingStart = new Date();
                this.log.debug("Starting extraction on the input stream...");
                Element extractContent = pdfNLMContentExtractor.extractContent(inputStream);
                this.log.debug("Extraction ok..");
                XMLOutputter xMLOutputter = new XMLOutputter(Format.getPrettyFormat());
                Document document = new Document(extractContent);
                extractionResult.setNlm(xMLOutputter.outputString(document));
                extractionResult.setHtml(new NLMElementToHTMLWriter().write(extractContent, new Object[0]));
                this.log.debug("Article meta extraction start:");
                extractionResult.setMeta(ArticleMeta.extractNLM(document));
                this.log.debug("Article meta extraction succeeded");
                extractionResult.setSucceeded(true);
                if (pdfNLMContentExtractor != null) {
                    returnExtractor(pdfNLMContentExtractor);
                }
                extractionResult.setProcessingEnd(new Date());
            } catch (Exception e) {
                this.log.debug("Exception from analysis: ", (Throwable) e);
                extractionResult.setError(e);
                extractionResult.setSucceeded(false);
                if (pdfNLMContentExtractor != null) {
                    returnExtractor(pdfNLMContentExtractor);
                }
                extractionResult.setProcessingEnd(new Date());
            }
            return extractionResult;
        } catch (Throwable th) {
            if (pdfNLMContentExtractor != null) {
                returnExtractor(pdfNLMContentExtractor);
            }
            extractionResult.setProcessingEnd(new Date());
            throw th;
        }
    }
}
