package eu.eudml.enhancement.pdf;

import eu.eudml.EudmlConstants;
import eu.eudml.processing.message.EnhancerProcessMessage;
import eu.eudml.service.EudmlServiceException;
import eu.eudml.service.storage.ContentFileHandle;
import eu.eudml.service.storage.EudmlStorage;
import java.io.IOException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Required;
import pl.edu.icm.yadda.common.YaddaException;
import pl.edu.icm.yadda.process.ctx.ProcessContext;
import pl.edu.icm.yadda.process.node.IProcessingNode;
import pl.edu.icm.yadda.tools.textcat.LanguageIdentifierBean;

/* loaded from: input_file:WEB-INF/lib/eudml-processing-1.3.2-SNAPSHOT.jar:eu/eudml/enhancement/pdf/PdfExtractorAbstractNode.class */
public abstract class PdfExtractorAbstractNode implements IProcessingNode<EnhancerProcessMessage, EnhancerProcessMessage> {
    private static final Logger log = LoggerFactory.getLogger(PdfExtractorAbstractNode.class);
    protected EudmlStorage storage;

    @Override // pl.edu.icm.yadda.process.node.IProcessingNode
    public abstract EnhancerProcessMessage process(EnhancerProcessMessage enhancerProcessMessage, ProcessContext processContext) throws Exception;

    /* JADX INFO: Access modifiers changed from: protected */
    public boolean isGarbage(String str) throws IOException, YaddaException {
        if (str == null) {
            return true;
        }
        LanguageIdentifierBean languageIdentifierBean = new LanguageIdentifierBean();
        languageIdentifierBean.setUncertaintyThreshold(0.15d);
        return languageIdentifierBean.classify(str).equals("**");
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public String sanitizeText(String str) {
        return str == null ? str : str.replaceAll("\\p{C}", " ");
    }

    @Deprecated
    protected ContentFileHandle getHandle(EnhancerProcessMessage enhancerProcessMessage, String str) throws EudmlServiceException {
        String id = enhancerProcessMessage.getSourceRecord().getId();
        ContentFileHandle fetchedPartOne = enhancerProcessMessage.getFetchedPartOne("extracted/content/tex");
        if (fetchedPartOne == null) {
            fetchedPartOne = this.storage.contentPartAsFile(id, EudmlConstants.SOURCE_EUDML_CONTENT_TXT_PART);
            if (fetchedPartOne == null) {
                return null;
            }
            enhancerProcessMessage.putFetchedPart(str, fetchedPartOne);
        }
        return fetchedPartOne;
    }

    @Required
    public void setStorage(EudmlStorage eudmlStorage) {
        this.storage = eudmlStorage;
    }
}
