package pl.edu.icm.cermine.content.filtering;

import java.util.Arrays;
import java.util.EnumMap;
import java.util.List;
import pl.edu.icm.cermine.content.filtering.features.AreaFeature;
import pl.edu.icm.cermine.content.filtering.features.FigureTableFeature;
import pl.edu.icm.cermine.content.filtering.features.GreekLettersFeature;
import pl.edu.icm.cermine.content.filtering.features.MathSymbolsFeature;
import pl.edu.icm.cermine.content.filtering.features.RelativeMeanLengthFeature;
import pl.edu.icm.cermine.content.filtering.features.XVarianceFeature;
import pl.edu.icm.cermine.evaluation.tools.EvaluationUtils;
import pl.edu.icm.cermine.exception.AnalysisException;
import pl.edu.icm.cermine.exception.TransformationException;
import pl.edu.icm.cermine.structure.model.BxDocument;
import pl.edu.icm.cermine.structure.model.BxPage;
import pl.edu.icm.cermine.structure.model.BxZone;
import pl.edu.icm.cermine.structure.model.BxZoneLabel;
import pl.edu.icm.cermine.structure.model.BxZoneLabelCategory;
import pl.edu.icm.cermine.tools.classification.features.FeatureVectorBuilder;
import pl.edu.icm.cermine.tools.classification.general.BxDocsToTrainingSamplesConverter;
import pl.edu.icm.cermine.tools.classification.general.ClassificationUtils;
import pl.edu.icm.cermine.tools.classification.general.TrainingSample;
import pl.edu.icm.cermine.tools.classification.sampleselection.OversamplingSelector;

/* loaded from: input_file:WEB-INF/lib/cermine-impl-1.4-SNAPSHOT.jar:pl/edu/icm/cermine/content/filtering/ContentFilterTools.class */
public final class ContentFilterTools {
    public static final FeatureVectorBuilder<BxZone, BxPage> VECTOR_BUILDER = new FeatureVectorBuilder<>();

    public static List<TrainingSample<BxZoneLabel>> toTrainingSamples(String str) throws AnalysisException, TransformationException {
        return toTrainingSamples(EvaluationUtils.getDocumentsFromPath(str));
    }

    public static List<TrainingSample<BxZoneLabel>> toTrainingSamples(List<BxDocument> list) throws AnalysisException {
        OversamplingSelector oversamplingSelector = new OversamplingSelector(1.0d);
        EnumMap enumMap = new EnumMap(BxZoneLabel.class);
        enumMap.put((EnumMap) BxZoneLabel.BODY_HEADING, BxZoneLabel.BODY_CONTENT);
        return oversamplingSelector.pickElements(ClassificationUtils.filterElements(BxDocsToTrainingSamplesConverter.getZoneTrainingSamples(list, VECTOR_BUILDER, enumMap), BxZoneLabelCategory.CAT_BODY));
    }

    private ContentFilterTools() {
    }

    static {
        VECTOR_BUILDER.setFeatureCalculators(Arrays.asList(new MathSymbolsFeature(), new RelativeMeanLengthFeature(), new AreaFeature(), new GreekLettersFeature(), new XVarianceFeature(), new FigureTableFeature()));
    }
}
