package pl.edu.icm.cermine;

import java.io.InputStream;
import pl.edu.icm.cermine.exception.AnalysisException;
import pl.edu.icm.cermine.structure.CharacterExtractor;
import pl.edu.icm.cermine.structure.DocumentSegmenter;
import pl.edu.icm.cermine.structure.HierarchicalReadingOrderResolver;
import pl.edu.icm.cermine.structure.ITextCharacterExtractor;
import pl.edu.icm.cermine.structure.ParallelDocstrumSegmenter;
import pl.edu.icm.cermine.structure.ReadingOrderResolver;
import pl.edu.icm.cermine.structure.model.BxDocument;
import pl.edu.icm.cermine.structure.tools.BxModelUtils;

/* loaded from: input_file:WEB-INF/lib/cermine-impl-1.1.jar:pl/edu/icm/cermine/PdfRawTextExtractor.class */
public class PdfRawTextExtractor implements DocumentTextExtractor<String> {
    private CharacterExtractor characterExtractor;
    private DocumentSegmenter documentSegmenter;
    private ReadingOrderResolver roResolver;

    public PdfRawTextExtractor() throws AnalysisException {
        this.characterExtractor = new ITextCharacterExtractor();
        this.documentSegmenter = new ParallelDocstrumSegmenter();
        this.roResolver = new HierarchicalReadingOrderResolver();
    }

    public PdfRawTextExtractor(CharacterExtractor characterExtractor, DocumentSegmenter documentSegmenter, ReadingOrderResolver readingOrderResolver) {
        this.characterExtractor = characterExtractor;
        this.documentSegmenter = documentSegmenter;
        this.roResolver = readingOrderResolver;
    }

    /* JADX WARN: Can't rename method to resolve collision */
    @Override // pl.edu.icm.cermine.DocumentTextExtractor
    public String extractText(InputStream inputStream) throws AnalysisException {
        return extractText(this.characterExtractor.extractCharacters(inputStream));
    }

    /* JADX WARN: Can't rename method to resolve collision */
    @Override // pl.edu.icm.cermine.DocumentTextExtractor
    public String extractText(BxDocument bxDocument) throws AnalysisException {
        BxDocument segmentDocument = this.documentSegmenter.segmentDocument(bxDocument);
        BxModelUtils.setParents(segmentDocument);
        return this.roResolver.resolve(segmentDocument).toText();
    }

    public void setGlyphExtractor(CharacterExtractor characterExtractor) {
        this.characterExtractor = characterExtractor;
    }

    public void setPageSegmenter(DocumentSegmenter documentSegmenter) {
        this.documentSegmenter = documentSegmenter;
    }

    public void setRoResolver(ReadingOrderResolver readingOrderResolver) {
        this.roResolver = readingOrderResolver;
    }
}
