package pl.edu.icm.cermine;

import java.io.InputStream;
import java.util.ArrayList;
import java.util.Iterator;
import org.jdom.Element;
import pl.edu.icm.cermine.bibref.model.BibEntry;
import pl.edu.icm.cermine.bibref.transformers.BibEntryToNLMElementConverter;
import pl.edu.icm.cermine.content.RawTextWithLabelsExtractor;
import pl.edu.icm.cermine.content.cleaning.ContentCleaner;
import pl.edu.icm.cermine.content.model.BxDocContentStructure;
import pl.edu.icm.cermine.content.model.DocumentContentStructure;
import pl.edu.icm.cermine.content.transformers.BxContentStructToDocContentStructConverter;
import pl.edu.icm.cermine.content.transformers.DocContentStructToNLMElementConverter;
import pl.edu.icm.cermine.exception.AnalysisException;
import pl.edu.icm.cermine.exception.TransformationException;
import pl.edu.icm.cermine.metadata.model.DocumentAffiliation;
import pl.edu.icm.cermine.metadata.model.DocumentMetadata;
import pl.edu.icm.cermine.metadata.transformers.DocumentMetadataToNLMElementConverter;
import pl.edu.icm.cermine.parsing.tools.ParsableStringParser;
import pl.edu.icm.cermine.structure.model.BxDocument;

/* loaded from: input_file:WEB-INF/lib/cermine-impl-1.6-SNAPSHOT.jar:pl/edu/icm/cermine/ExtractionUtils.class */
public class ExtractionUtils {
    public static BxDocument extractStructure(ComponentConfiguration componentConfiguration, InputStream inputStream) throws AnalysisException {
        return componentConfiguration.initialClassifier.classifyZones(componentConfiguration.readingOrderResolver.resolve(componentConfiguration.documentSegmenter.segmentDocument(componentConfiguration.characterExtractor.extractCharacters(inputStream))));
    }

    public static DocumentMetadata extractMetadata(ComponentConfiguration componentConfiguration, InputStream inputStream) throws AnalysisException {
        return extractMetadata(componentConfiguration, extractStructure(componentConfiguration, inputStream));
    }

    public static Element extractMetadataAsNLM(ComponentConfiguration componentConfiguration, InputStream inputStream) throws AnalysisException {
        try {
            return new DocumentMetadataToNLMElementConverter().convert(extractMetadata(componentConfiguration, inputStream), new Object[0]);
        } catch (TransformationException e) {
            throw new AnalysisException("Cannot extract metadata from the document!", e);
        }
    }

    public static DocumentMetadata extractMetadata(ComponentConfiguration componentConfiguration, BxDocument bxDocument) throws AnalysisException {
        DocumentMetadata extractMetadata = componentConfiguration.metadataExtractor.extractMetadata(componentConfiguration.metadataClassifier.classifyZones(bxDocument));
        Iterator<DocumentAffiliation> it = extractMetadata.getAffiliations().iterator();
        while (it.hasNext()) {
            componentConfiguration.affiliationParser.parse((ParsableStringParser<DocumentAffiliation>) it.next());
        }
        return extractMetadata;
    }

    public static Element extractMetadataAsNLM(ComponentConfiguration componentConfiguration, BxDocument bxDocument) throws AnalysisException {
        try {
            return new DocumentMetadataToNLMElementConverter().convert(extractMetadata(componentConfiguration, bxDocument), new Object[0]);
        } catch (TransformationException e) {
            throw new AnalysisException("Cannot extract metadata from the document!", e);
        }
    }

    public static String extractRawText(ComponentConfiguration componentConfiguration, InputStream inputStream) throws AnalysisException {
        return extractRawText(componentConfiguration, componentConfiguration.readingOrderResolver.resolve(componentConfiguration.documentSegmenter.segmentDocument(componentConfiguration.characterExtractor.extractCharacters(inputStream))));
    }

    public static String extractRawText(ComponentConfiguration componentConfiguration, BxDocument bxDocument) throws AnalysisException {
        return ContentCleaner.cleanAll(bxDocument.toText());
    }

    public static BibEntry[] extractReferences(ComponentConfiguration componentConfiguration, InputStream inputStream) throws AnalysisException {
        return extractReferences(componentConfiguration, extractStructure(componentConfiguration, inputStream));
    }

    public static BibEntry[] extractReferences(ComponentConfiguration componentConfiguration, BxDocument bxDocument) throws AnalysisException {
        String[] extractBibReferences = componentConfiguration.bibReferenceExtractor.extractBibReferences(bxDocument);
        BibEntry[] bibEntryArr = new BibEntry[extractBibReferences.length];
        for (int i = 0; i < extractBibReferences.length; i++) {
            bibEntryArr[i] = componentConfiguration.bibReferenceParser.parseBibReference(extractBibReferences[i]);
        }
        return bibEntryArr;
    }

    public static Element[] extractReferencesAsNLM(ComponentConfiguration componentConfiguration, InputStream inputStream) throws AnalysisException {
        return convertReferences(extractReferences(componentConfiguration, inputStream));
    }

    public static Element[] extractReferencesAsNLM(ComponentConfiguration componentConfiguration, BxDocument bxDocument) throws AnalysisException {
        return convertReferences(extractReferences(componentConfiguration, bxDocument));
    }

    public static Element[] convertReferences(BibEntry[] bibEntryArr) throws AnalysisException {
        ArrayList arrayList = new ArrayList(bibEntryArr.length);
        BibEntryToNLMElementConverter bibEntryToNLMElementConverter = new BibEntryToNLMElementConverter();
        for (BibEntry bibEntry : bibEntryArr) {
            try {
                arrayList.add(bibEntryToNLMElementConverter.convert(bibEntry, new Object[0]));
            } catch (TransformationException e) {
                throw new AnalysisException("Cannot convert references!", e);
            }
        }
        return (Element[]) arrayList.toArray(new Element[bibEntryArr.length]);
    }

    public static Element extractTextAsNLM(ComponentConfiguration componentConfiguration, InputStream inputStream) throws AnalysisException {
        try {
            return new DocContentStructToNLMElementConverter().convert((DocContentStructToNLMElementConverter) extractText(componentConfiguration, inputStream), new Object[0]);
        } catch (TransformationException e) {
            throw new AnalysisException("Cannot extract text from document!", e);
        }
    }

    public static Element extractTextAsNLM(ComponentConfiguration componentConfiguration, BxDocument bxDocument) throws AnalysisException {
        try {
            return new DocContentStructToNLMElementConverter().convert((DocContentStructToNLMElementConverter) extractText(componentConfiguration, bxDocument), new Object[0]);
        } catch (TransformationException e) {
            throw new AnalysisException("Cannot extract text from document!", e);
        }
    }

    public static DocumentContentStructure extractText(ComponentConfiguration componentConfiguration, InputStream inputStream) throws AnalysisException {
        return extractText(componentConfiguration, extractStructure(componentConfiguration, inputStream));
    }

    public static DocumentContentStructure extractText(ComponentConfiguration componentConfiguration, BxDocument bxDocument) throws AnalysisException {
        try {
            BxDocContentStructure extractHeaders = componentConfiguration.contentHeaderExtractor.extractHeaders(componentConfiguration.contentFilter.filter(bxDocument));
            componentConfiguration.contentCleaner.cleanupContent(extractHeaders);
            return new BxContentStructToDocContentStructConverter().convert(extractHeaders, new Object[0]);
        } catch (TransformationException e) {
            throw new AnalysisException("Cannot extract content from the document!", e);
        }
    }

    public static Element extractRawTextWithLabels(ComponentConfiguration componentConfiguration, InputStream inputStream) throws AnalysisException {
        return extractRawTextWithLabels(componentConfiguration, extractStructure(componentConfiguration, inputStream));
    }

    public static Element extractRawTextWithLabels(ComponentConfiguration componentConfiguration, BxDocument bxDocument) throws AnalysisException {
        return new RawTextWithLabelsExtractor().extractRawTextWithLabels(bxDocument, componentConfiguration.contentHeaderExtractor.extractHeaders(componentConfiguration.contentFilter.filter(componentConfiguration.metadataClassifier.classifyZones(bxDocument))));
    }
}
