package pl.edu.icm.yadda.analysis.articlecontent;

import pl.edu.icm.yadda.analysis.AnalysisException;
import pl.edu.icm.yadda.analysis.articlecontent.model.BxDocContentStructure;
import pl.edu.icm.yadda.analysis.articlecontent.model.DocumentContentStructure;
import pl.edu.icm.yadda.analysis.classification.features.FeatureVectorBuilder;
import pl.edu.icm.yadda.analysis.classification.knn.model.KnnModel;
import pl.edu.icm.yadda.analysis.textr.model.BxDocument;
import pl.edu.icm.yadda.analysis.textr.model.BxLine;
import pl.edu.icm.yadda.analysis.textr.model.BxPage;
import pl.edu.icm.yadda.analysis.textr.model.BxZone;
import pl.edu.icm.yadda.analysis.textr.model.BxZoneLabel;

/* loaded from: input_file:pl/edu/icm/yadda/analysis/articlecontent/LogicalStructureExtractor.class */
public class LogicalStructureExtractor {
    public DocumentContentStructure extractStructure(KnnModel<BxZoneLabel> knnModel, KnnModel<BxZoneLabel> knnModel2, FeatureVectorBuilder<BxZone, BxPage> featureVectorBuilder, FeatureVectorBuilder<BxLine, BxPage> featureVectorBuilder2, FeatureVectorBuilder<BxLine, BxPage> featureVectorBuilder3, BxDocument bxDocument) throws AnalysisException {
        BxDocContentStructure extractHeaders = new ContentHeaderExtractor().extractHeaders(knnModel2, featureVectorBuilder2, featureVectorBuilder3, new ContentJunkFilter().filterJunk(knnModel, featureVectorBuilder, bxDocument));
        new ContentCleaner().cleanupContent(extractHeaders);
        DocumentContentStructure documentContentStructure = new DocumentContentStructure();
        documentContentStructure.build(extractHeaders);
        return documentContentStructure;
    }
}
