package pl.edu.icm.cermine.structure;

import java.io.BufferedReader;
import java.io.IOException;
import java.util.Arrays;
import java.util.Iterator;
import pl.edu.icm.cermine.exception.AnalysisException;
import pl.edu.icm.cermine.metadata.zoneclassification.features.AbstractFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.AffiliationFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.AtCountFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.AtRelativeCountFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.AuthorFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.AuthorNameRelativeFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.BibinfoFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.CommaCountFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.CommaRelativeCountFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.CorrespondenceFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.DigitCountFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.DigitRelativeCountFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.DistanceFromNearestNeighbourFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.DotCountFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.DotRelativeCountFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.EmptySpaceFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.EmptySpaceRelativeFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.FontHeightMeanFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.FullWordsRelativeFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.HeightRelativeFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.HorizontalRelativeProminenceFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.IsAfterMetTitleFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.IsAnywhereElseFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.IsFirstPageFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.IsGreatestFontOnPageFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.IsHighestOnThePageFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.IsLowestOnThePageFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.IsOnSurroundingPagesFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.IsRightFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.KeywordsFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.LastButOneZoneFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.LicenseFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.LineHeightMaxMeanFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.LineRelativeCountFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.LineWidthMeanFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.LineXPositionMeanFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.LineXWidthPositionDiffFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.LowercaseCountFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.LowercaseRelativeCountFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.PageNumberFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.PreviousZoneFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.ProportionsFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.PunctuationRelativeCountFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.RelativeMeanLengthFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.UppercaseCountFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.UppercaseRelativeCountFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.UppercaseWordCountFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.UppercaseWordRelativeCountFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.VerticalProminenceFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.WordLengthMedianFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.YPositionFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.YPositionRelativeFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.YearFeature;
import pl.edu.icm.cermine.structure.model.BxDocument;
import pl.edu.icm.cermine.structure.model.BxPage;
import pl.edu.icm.cermine.structure.model.BxZone;
import pl.edu.icm.cermine.structure.model.BxZoneLabelCategory;
import pl.edu.icm.cermine.tools.classification.general.FeatureVectorBuilder;
import pl.edu.icm.cermine.tools.classification.svm.SVMZoneClassifier;

/* loaded from: input_file:WEB-INF/lib/cermine-impl-1.11-SNAPSHOT.jar:pl/edu/icm/cermine/structure/SVMMetadataZoneClassifier.class */
public class SVMMetadataZoneClassifier extends SVMZoneClassifier {
    public SVMMetadataZoneClassifier(BufferedReader bufferedReader, BufferedReader bufferedReader2) throws AnalysisException {
        super(getFeatureVectorBuilder());
        try {
            loadModelFromFile(bufferedReader, bufferedReader2);
        } catch (IOException e) {
            throw new AnalysisException("Cannot create SVM classifier!", e);
        }
    }

    public SVMMetadataZoneClassifier(String str, String str2) throws AnalysisException {
        this(str, str2, false);
    }

    public SVMMetadataZoneClassifier(String str, String str2, boolean z) throws AnalysisException {
        super(getFeatureVectorBuilder());
        try {
            if (z) {
                loadModelFromResources(str, str2);
            } else {
                loadModelFromFile(str, str2);
            }
        } catch (IOException e) {
            throw new AnalysisException("Cannot create SVM classifier!", e);
        }
    }

    public static FeatureVectorBuilder<BxZone, BxPage> getFeatureVectorBuilder() {
        FeatureVectorBuilder<BxZone, BxPage> featureVectorBuilder = new FeatureVectorBuilder<>();
        featureVectorBuilder.setFeatureCalculators(Arrays.asList(new LineHeightMaxMeanFeature(), new KeywordsFeature(), new IsRightFeature(), new BibinfoFeature(), new IsGreatestFontOnPageFeature(), new AuthorFeature(), new CorrespondenceFeature(), new IsLowestOnThePageFeature(), new AbstractFeature(), new AtCountFeature(), new IsAfterMetTitleFeature(), new LicenseFeature(), new DotCountFeature(), new AtRelativeCountFeature(), new WordLengthMedianFeature(), new AffiliationFeature(), new DigitCountFeature(), new YearFeature(), new IsHighestOnThePageFeature(), new AuthorNameRelativeFeature(), new CommaCountFeature(), new LineXPositionMeanFeature(), new IsOnSurroundingPagesFeature(), new UppercaseCountFeature(), new UppercaseWordCountFeature(), new IsAnywhereElseFeature(), new IsFirstPageFeature(), new PageNumberFeature(), new LastButOneZoneFeature(), new LineRelativeCountFeature(), new DotRelativeCountFeature(), new PreviousZoneFeature(), new FullWordsRelativeFeature(), new CommaRelativeCountFeature(), new HorizontalRelativeProminenceFeature(), new UppercaseWordRelativeCountFeature(), new LowercaseCountFeature(), new DigitRelativeCountFeature(), new PunctuationRelativeCountFeature(), new LineXWidthPositionDiffFeature(), new FontHeightMeanFeature(), new UppercaseRelativeCountFeature(), new LowercaseRelativeCountFeature(), new DistanceFromNearestNeighbourFeature(), new HeightRelativeFeature(), new EmptySpaceFeature(), new EmptySpaceRelativeFeature(), new VerticalProminenceFeature(), new YPositionFeature(), new YPositionRelativeFeature(), new LineWidthMeanFeature(), new ProportionsFeature(), new RelativeMeanLengthFeature()));
        return featureVectorBuilder;
    }

    @Override // pl.edu.icm.cermine.tools.classification.svm.SVMZoneClassifier, pl.edu.icm.cermine.structure.ZoneClassifier
    public BxDocument classifyZones(BxDocument bxDocument) throws AnalysisException {
        Iterator<BxPage> it = bxDocument.iterator();
        while (it.hasNext()) {
            BxPage next = it.next();
            Iterator<BxZone> it2 = next.iterator();
            while (it2.hasNext()) {
                it2.next().setParent(next);
            }
        }
        for (BxZone bxZone : bxDocument.asZones()) {
            if (bxZone.getLabel().isOfCategoryOrGeneral(BxZoneLabelCategory.CAT_METADATA)) {
                bxZone.setLabel(predictLabel(bxZone, bxZone.getParent()));
            }
        }
        return bxDocument;
    }
}
