package eu.eudml.processing.node;

import eu.eudml.processing.message.EnhancerProcessMessage;
import eu.eudml.service.process.StoredContentPart;
import eu.eudml.util.nlm.FulltextURL;
import eu.eudml.util.nlm.NlmConstants;
import eu.eudml.util.nlm.NlmProcessorHelper;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.util.List;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpException;
import org.apache.commons.httpclient.MultiThreadedHttpConnectionManager;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.commons.httpclient.params.HttpConnectionManagerParams;
import org.apache.commons.httpclient.util.URIUtil;
import org.apache.commons.io.IOUtils;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import pl.edu.icm.yadda.process.ctx.ProcessContext;
import pl.edu.icm.yadda.process.node.IInitializableFinalizableNode;
import pl.edu.icm.yadda.process.node.IProcessingNode;

/* loaded from: input_file:eu/eudml/processing/node/FullTextDownloaderNode.class */
public class FullTextDownloaderNode implements IProcessingNode<EnhancerProcessMessage, EnhancerProcessMessage>, IInitializableFinalizableNode {
    private final Logger log = LoggerFactory.getLogger(FullTextDownloaderNode.class);
    protected int connectionTimeoutMillis = 120000;
    protected int socketTimeoutMillis = 120000;
    private HttpClient client;

    public void setConnectionTimeoutMillis(int i) {
        this.connectionTimeoutMillis = i;
    }

    public void setSocketTimeoutMillis(int i) {
        this.socketTimeoutMillis = i;
    }

    public EnhancerProcessMessage process(EnhancerProcessMessage enhancerProcessMessage, ProcessContext processContext) throws Exception, IOException {
        List<FulltextURL> uRLsForFulltext = NlmProcessorHelper.getURLsForFulltext(enhancerProcessMessage.getMessageNLM(), NlmConstants.XPATH_PDF_FULLTEXT_URL);
        List<FulltextURL> uRLsForFulltext2 = NlmProcessorHelper.getURLsForFulltext(enhancerProcessMessage.getMessageNLM(), NlmConstants.XPATH_XML_FULLTEXT_URL);
        List<FulltextURL> uRLsForFulltext3 = NlmProcessorHelper.getURLsForFulltext(enhancerProcessMessage.getMessageNLM(), NlmConstants.XPATH_XHTML_XML_FULLTEXT_URL);
        List<FulltextURL> uRLsForFulltext4 = NlmProcessorHelper.getURLsForFulltext(enhancerProcessMessage.getMessageNLM(), NlmConstants.XPATH_TXT_FULLTEXT_URL);
        List<FulltextURL> uRLsForFulltext5 = NlmProcessorHelper.getURLsForFulltext(enhancerProcessMessage.getMessageNLM(), NlmConstants.XPATH_TEX_FULLTEXT_URL);
        addFultextToMessage(enhancerProcessMessage, uRLsForFulltext, "src/eudml/content/pdf", "application/pdf");
        addFultextToMessage(enhancerProcessMessage, uRLsForFulltext2, "src/eudml/content/xml", "application/xml");
        addFultextToMessage(enhancerProcessMessage, uRLsForFulltext3, "src/eudml/content/xhtml", "application/xhtml+xml");
        addFultextToMessage(enhancerProcessMessage, uRLsForFulltext4, "src/eudml/content/txt", "text/plain");
        addFultextToMessage(enhancerProcessMessage, uRLsForFulltext5, "src/eudml/content/tex", "application/x-tex");
        return enhancerProcessMessage;
    }

    private void addFultextToMessage(EnhancerProcessMessage enhancerProcessMessage, List<FulltextURL> list, String str, String str2) throws Exception {
        String id = enhancerProcessMessage.getId();
        StoredContentPart.StoredContentPartBuilder storedContentPartBuilder = new StoredContentPart.StoredContentPartBuilder(id, str, str2);
        for (FulltextURL fulltextURL : list) {
            String href = fulltextURL.getHref();
            if (!href.isEmpty()) {
                String encodeQuery = URIUtil.encodeQuery(href, "UTF-8");
                this.log.trace("Downloading FULLTEXT of type: {} from: {} for item with id: {}", new String[]{str, encodeQuery, id});
                GetMethod getMethod = new GetMethod(encodeQuery);
                try {
                    try {
                        try {
                            int executeMethod = this.client.executeMethod(getMethod);
                            if (executeMethod != 200) {
                                throw new HttpException("Request to " + encodeQuery + " responded with code " + executeMethod);
                            }
                            byte[] byteArray = IOUtils.toByteArray(getMethod.getResponseBodyAsStream());
                            if (str.equals("src/eudml/content/pdf")) {
                                ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(byteArray);
                                try {
                                    try {
                                        PDDocument.load(byteArrayInputStream).close();
                                        storedContentPartBuilder.addContent(getMethod.getPath(), byteArray, fulltextURL.getSpecificUsesAsStringList());
                                        byteArrayInputStream.close();
                                    } catch (IOException e) {
                                        this.log.trace("PDF downloaded from: {} for item with id: {} was corrupted.", encodeQuery, id);
                                        throw e;
                                    }
                                } catch (Throwable th) {
                                    byteArrayInputStream.close();
                                    throw th;
                                }
                            } else {
                                storedContentPartBuilder.addContent(getMethod.getPath(), byteArray, fulltextURL.getSpecificUsesAsStringList());
                            }
                        } catch (IOException e2) {
                            this.log.error("Error ocurred during downloading FULLTEXT of type: {} from: {} for item with id: {} Exception message: {}", new String[]{str, encodeQuery, id, e2.getMessage()});
                            throw e2;
                        }
                    } catch (HttpException e3) {
                        this.log.error("Error ocurred during downloading FULLTEXT of type: {} from: {} for item with id: {} Exception message: {}", new String[]{str, encodeQuery, id, e3.getMessage()});
                        throw e3;
                    }
                } finally {
                    getMethod.releaseConnection();
                }
            }
        }
        StoredContentPart build = storedContentPartBuilder.build();
        if (build != null) {
            enhancerProcessMessage.addContentPart(build);
        }
    }

    public void initialize(ProcessContext processContext) throws Exception {
        MultiThreadedHttpConnectionManager multiThreadedHttpConnectionManager = new MultiThreadedHttpConnectionManager();
        HttpConnectionManagerParams httpConnectionManagerParams = new HttpConnectionManagerParams();
        httpConnectionManagerParams.setMaxTotalConnections(1000);
        multiThreadedHttpConnectionManager.setParams(httpConnectionManagerParams);
        this.client = new HttpClient(multiThreadedHttpConnectionManager);
        this.client.getParams().setParameter("http.connection.timeout", Integer.valueOf(this.connectionTimeoutMillis));
        this.client.getParams().setParameter("http.socket.timeout", Integer.valueOf(this.socketTimeoutMillis));
    }

    public void finalize(ProcessContext processContext) throws Exception {
    }
}
