package pl.edu.icm.cermine;

import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import org.jdom.Element;
import pl.edu.icm.cermine.content.LogicalStructureExtractor;
import pl.edu.icm.cermine.content.SVMLogicalStructureExtractor;
import pl.edu.icm.cermine.content.model.DocumentContentStructure;
import pl.edu.icm.cermine.content.transformers.DocContentStructToNLMElementConverter;
import pl.edu.icm.cermine.exception.AnalysisException;
import pl.edu.icm.cermine.exception.TransformationException;
import pl.edu.icm.cermine.structure.model.BxDocument;
import pl.edu.icm.cermine.tools.transformers.ModelToModelConverter;

/* loaded from: input_file:WEB-INF/lib/cermine-impl-1.0-SNAPSHOT.jar:pl/edu/icm/cermine/PdfNLMTextExtractor.class */
public class PdfNLMTextExtractor implements DocumentTextExtractor<Element> {
    private DocumentStructureExtractor strExtractor;
    private LogicalStructureExtractor logicalExtractor;
    private ModelToModelConverter<DocumentContentStructure, Element> converter;

    public PdfNLMTextExtractor() throws AnalysisException {
        this.strExtractor = new PdfBxStructureExtractor();
        this.logicalExtractor = new SVMLogicalStructureExtractor(new BufferedReader(new InputStreamReader(getClass().getResourceAsStream("/pl/edu/icm/cermine/content/filtering.model"))), new BufferedReader(new InputStreamReader(getClass().getResourceAsStream("/pl/edu/icm/cermine/content/filtering.range"))), new BufferedReader(new InputStreamReader(getClass().getResourceAsStream("/pl/edu/icm/cermine/content/header.model"))), new BufferedReader(new InputStreamReader(getClass().getResourceAsStream("/pl/edu/icm/cermine/content/header.range"))));
        this.converter = new DocContentStructToNLMElementConverter();
    }

    public PdfNLMTextExtractor(InputStream inputStream, InputStream inputStream2, InputStream inputStream3, InputStream inputStream4) throws AnalysisException {
        this.strExtractor = new PdfBxStructureExtractor();
        this.logicalExtractor = new SVMLogicalStructureExtractor(new BufferedReader(new InputStreamReader(inputStream)), new BufferedReader(new InputStreamReader(inputStream2)), new BufferedReader(new InputStreamReader(inputStream3)), new BufferedReader(new InputStreamReader(inputStream4)));
        this.converter = new DocContentStructToNLMElementConverter();
    }

    public PdfNLMTextExtractor(DocumentStructureExtractor documentStructureExtractor, LogicalStructureExtractor logicalStructureExtractor) {
        this.strExtractor = documentStructureExtractor;
        this.logicalExtractor = logicalStructureExtractor;
        this.converter = new DocContentStructToNLMElementConverter();
    }

    /* JADX WARN: Can't rename method to resolve collision */
    @Override // pl.edu.icm.cermine.DocumentTextExtractor
    public Element extractText(InputStream inputStream) throws AnalysisException {
        return extractText(this.strExtractor.extractStructure(inputStream));
    }

    /* JADX WARN: Can't rename method to resolve collision */
    @Override // pl.edu.icm.cermine.DocumentTextExtractor
    public Element extractText(BxDocument bxDocument) throws AnalysisException {
        try {
            return this.converter.convert(this.logicalExtractor.extractStructure(bxDocument), new Object[0]);
        } catch (TransformationException e) {
            throw new AnalysisException("Cannot extract text from document!", e);
        }
    }
}
