package eu.eudml.enhancement.pdf2mml.node;

import eu.eudml.enhancement.pdf.PdfExtractorAbstractNode;
import eu.eudml.processing.message.AuxPropsNames;
import eu.eudml.processing.message.EnhancerProcessMessage;
import eu.eudml.service.process.StoredContentPart;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.util.Date;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.stream.StreamSource;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import pl.edu.icm.yadda.process.ctx.ProcessContext;

/* loaded from: input_file:WEB-INF/lib/eudml-processing-1.3.2-SNAPSHOT.jar:eu/eudml/enhancement/pdf2mml/node/MmlStripElementsNode.class */
public class MmlStripElementsNode extends PdfExtractorAbstractNode {
    private static String xslResource = "eu/eudml/enhancement/mml/xsl/strip-elements.xsl";
    private static final Logger log = LoggerFactory.getLogger(MmlStripElementsNode.class);

    /* JADX WARN: Can't rename method to resolve collision */
    /* JADX WARN: Type inference failed for: r0v15, types: [byte[], byte[][]] */
    @Override // eu.eudml.enhancement.pdf.PdfExtractorAbstractNode, pl.edu.icm.yadda.process.node.IProcessingNode
    public EnhancerProcessMessage process(EnhancerProcessMessage enhancerProcessMessage, ProcessContext processContext) throws Exception {
        if (enhancerProcessMessage.getSourceRecord() == null) {
            throw new NullPointerException();
        }
        String id = enhancerProcessMessage.getId();
        String[] strArr = (String[]) enhancerProcessMessage.getAuxProps().get(AuxPropsNames.TRALICS_XML_OUTPUT);
        if (strArr == null) {
            log.warn("{} - no xml file was provided for", id);
            return enhancerProcessMessage;
        }
        String[] strArr2 = new String[strArr.length];
        ?? r0 = new byte[strArr.length];
        for (int i = 0; i < strArr.length; i++) {
            if (strArr[i] != null) {
                log.info("Striping elements of xml file created by tralics {}", id);
                ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(strArr[i].getBytes("UTF-8"));
                try {
                    Transformer newTransformer = TransformerFactory.newInstance().newTransformer(new StreamSource(getClass().getClassLoader().getResourceAsStream(xslResource)));
                    ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
                    newTransformer.transform(new StreamSource(byteArrayInputStream), new StreamResult(byteArrayOutputStream));
                    String sanitizeText = sanitizeText(byteArrayOutputStream.toString("UTF-8"));
                    if (isGarbage(sanitizeText)) {
                        log.debug("Extracted text for doc with id: {} was considered as garbage.", id);
                    } else {
                        strArr2[i] = id + i + ".txt";
                        r0[i] = sanitizeText.getBytes("UTF-8");
                        log.debug("Adding MathML with stripped elements as text of {}#{}", id, Integer.valueOf(i));
                    }
                } catch (TransformerException e) {
                    log.error("During stripping elements of MathML documents were encountered errors for {}", id, e);
                }
            }
        }
        enhancerProcessMessage.addContentPart(StoredContentPart.merge(id, "extracted/content/tex", "text/plain", new Date(), strArr2, (byte[][]) r0));
        return enhancerProcessMessage;
    }
}
