package pl.edu.icm.cermine;

import com.google.common.collect.Lists;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.jdom.Element;
import pl.edu.icm.cermine.bibref.model.BibEntry;
import pl.edu.icm.cermine.bibref.transformers.BibEntryToNLMElementConverter;
import pl.edu.icm.cermine.exception.AnalysisException;
import pl.edu.icm.cermine.exception.TransformationException;
import pl.edu.icm.cermine.structure.model.BxDocument;

/* loaded from: input_file:WEB-INF/lib/cermine-impl-1.2-SNAPSHOT.jar:pl/edu/icm/cermine/ContentExtractor.class */
public class ContentExtractor {
    private PdfBxStructureExtractor structureExtractor = new PdfBxStructureExtractor();
    private PdfNLMMetadataExtractor metadataExtractor = new PdfNLMMetadataExtractor();
    private PdfBibEntryReferencesExtractor referencesExtractor = new PdfBibEntryReferencesExtractor();
    private PdfRawTextExtractor rawTextExtractor = new PdfRawTextExtractor();
    private PdfNLMTextExtractor textExtractor = new PdfNLMTextExtractor();
    private InputStream pdfFile;
    private BxDocument bxDocument;
    private Element nlmMetadata;
    private List<BibEntry> bibEntryReferences;
    private List<Element> nlmReferences;
    private String rawFullText;
    private Element nlmFullText;
    private Element nlmContent;

    public void uploadPDF(InputStream inputStream) throws IOException {
        reset();
        this.pdfFile = inputStream;
    }

    public BxDocument getBxDocument() throws AnalysisException {
        if (this.pdfFile == null) {
            throw new AnalysisException("No PDF document uploaded!");
        }
        if (this.bxDocument == null) {
            this.bxDocument = this.structureExtractor.extractStructure(this.pdfFile);
        }
        return this.bxDocument;
    }

    public List<BibEntry> getBibEntryReferences() throws AnalysisException {
        if (this.bibEntryReferences == null) {
            getBxDocument();
            this.bibEntryReferences = Lists.newArrayList(this.referencesExtractor.extractReferences(this.bxDocument));
        }
        return this.bibEntryReferences;
    }

    public String getRawFullText() throws AnalysisException {
        if (this.rawFullText == null) {
            getBxDocument();
            this.rawFullText = this.rawTextExtractor.extractText(this.bxDocument);
        }
        return this.rawFullText;
    }

    public Element getNLMMetadata() throws AnalysisException {
        if (this.nlmMetadata == null) {
            getBxDocument();
            this.nlmMetadata = this.metadataExtractor.extractMetadata(this.bxDocument);
        }
        return this.nlmMetadata;
    }

    public List<Element> getNLMReferences() throws AnalysisException {
        if (this.nlmReferences == null) {
            getBibEntryReferences();
            this.nlmReferences = new ArrayList(this.bibEntryReferences.size());
            BibEntryToNLMElementConverter bibEntryToNLMElementConverter = new BibEntryToNLMElementConverter();
            Iterator<BibEntry> it = this.bibEntryReferences.iterator();
            while (it.hasNext()) {
                try {
                    this.nlmReferences.add(bibEntryToNLMElementConverter.convert(it.next(), new Object[0]));
                } catch (TransformationException e) {
                    throw new AnalysisException(e);
                }
            }
        }
        return this.nlmReferences;
    }

    public Element getNLMText() throws AnalysisException {
        if (this.nlmFullText == null) {
            getBxDocument();
            this.nlmFullText = this.textExtractor.extractText(this.bxDocument);
        }
        return this.nlmFullText;
    }

    public Element getNLMContent() throws AnalysisException {
        if (this.nlmContent == null) {
            getNLMMetadata();
            getNLMReferences();
            getNLMText();
            this.nlmContent = new Element(BibEntry.TYPE_ARTICLE);
            this.nlmContent.addContent((Element) this.nlmMetadata.getChild("front").clone());
            this.nlmContent.addContent(this.nlmFullText);
            Element element = new Element("back");
            Element element2 = new Element("ref-list");
            for (Element element3 : this.nlmReferences) {
                Element element4 = new Element("ref");
                element4.addContent(element3);
                element2.addContent(element4);
            }
            element.addContent(element2);
            this.nlmContent.addContent(element);
        }
        return this.nlmContent;
    }

    public void reset() throws IOException {
        this.bxDocument = null;
        this.nlmMetadata = null;
        this.bibEntryReferences = null;
        this.nlmReferences = null;
        this.rawFullText = null;
        this.nlmFullText = null;
        this.nlmContent = null;
        if (this.pdfFile != null) {
            this.pdfFile.close();
        }
        this.pdfFile = null;
    }

    public void setMetadataExtractor(PdfNLMMetadataExtractor pdfNLMMetadataExtractor) {
        this.metadataExtractor = pdfNLMMetadataExtractor;
    }

    public void setRawTextExtractor(PdfRawTextExtractor pdfRawTextExtractor) {
        this.rawTextExtractor = pdfRawTextExtractor;
    }

    public void setReferencesExtractor(PdfBibEntryReferencesExtractor pdfBibEntryReferencesExtractor) {
        this.referencesExtractor = pdfBibEntryReferencesExtractor;
    }

    public void setStructureExtractor(PdfBxStructureExtractor pdfBxStructureExtractor) {
        this.structureExtractor = pdfBxStructureExtractor;
    }

    public void setTextExtractor(PdfNLMTextExtractor pdfNLMTextExtractor) {
        this.textExtractor = pdfNLMTextExtractor;
    }
}
