package pl.edu.icm.cermine.structure;

import java.io.BufferedReader;
import java.io.IOException;
import java.util.Arrays;
import java.util.Iterator;
import pl.edu.icm.cermine.exception.AnalysisException;
import pl.edu.icm.cermine.metadata.zoneclassification.features.AbstractFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.AffiliationFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.AuthorFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.AuthorNameRelativeFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.BibinfoFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.CharCountFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.CharCountRelativeFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.ContributionFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.DateFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.DistanceFromNearestNeighbourFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.DotCountFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.DotRelativeCountFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.EmailFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.EmptySpaceRelativeFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.FontHeightMeanFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.FreeSpaceWithinZoneFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.FullWordsRelativeFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.HeightFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.HeightRelativeFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.HorizontalRelativeProminenceFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.IsAfterMetTitleFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.IsFontBiggerThanNeighboursFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.IsGreatestFontOnPageFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.IsWidestOnThePageFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.KeywordsFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.LastButOneZoneFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.LetterCountFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.LetterRelativeCountFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.LineCountFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.LineHeightMeanFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.LineRelativeCountFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.LineWidthMeanFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.LineXPositionMeanFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.LineXWidthPositionDiffFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.LowercaseCountFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.LowercaseRelativeCountFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.PreviousZoneFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.ProportionsFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.PunctuationRelativeCountFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.UppercaseCountFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.UppercaseRelativeCountFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.UppercaseWordCountFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.UppercaseWordRelativeCountFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.VerticalProminenceFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.WhitespaceCountFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.WhitespaceRelativeCountLogFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.WidthFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.WidthRelativeFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.WordCountFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.WordCountRelativeFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.WordLengthMeanFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.WordLengthMedianFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.WordWidthMeanFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.XPositionFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.XPositionRelativeFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.YPositionFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.YPositionRelativeFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.YearFeature;
import pl.edu.icm.cermine.structure.model.BxDocument;
import pl.edu.icm.cermine.structure.model.BxPage;
import pl.edu.icm.cermine.structure.model.BxZone;
import pl.edu.icm.cermine.structure.model.BxZoneLabelCategory;
import pl.edu.icm.cermine.tools.classification.features.FeatureVectorBuilder;
import pl.edu.icm.cermine.tools.classification.svm.SVMZoneClassifier;

/* loaded from: input_file:pl/edu/icm/cermine/structure/SVMMetadataZoneClassifier.class */
public class SVMMetadataZoneClassifier extends SVMZoneClassifier {
    private static final String MODEL_FILE_PATH = "/pl/edu/icm/cermine/structure/meta_classification_svm_model";
    private static final String RANGE_FILE_PATH = "/pl/edu/icm/cermine/structure/meta_classification_svm_model.range";
    private static SVMMetadataZoneClassifier defaultInstance;

    public SVMMetadataZoneClassifier() throws AnalysisException {
        super(getFeatureVectorBuilder());
        try {
            loadModelFromResources(MODEL_FILE_PATH, RANGE_FILE_PATH);
        } catch (IOException e) {
            throw new AnalysisException("Cannot create SVM classifier!", e);
        }
    }

    public SVMMetadataZoneClassifier(BufferedReader bufferedReader, BufferedReader bufferedReader2) throws AnalysisException {
        super(getFeatureVectorBuilder());
        try {
            loadModelFromFile(bufferedReader, bufferedReader2);
        } catch (IOException e) {
            throw new AnalysisException("Cannot create SVM classifier!", e);
        }
    }

    public SVMMetadataZoneClassifier(String str, String str2) throws AnalysisException {
        super(getFeatureVectorBuilder());
        try {
            loadModelFromFile(str, str2);
        } catch (IOException e) {
            throw new AnalysisException("Cannot create SVM classifier!", e);
        }
    }

    public static FeatureVectorBuilder<BxZone, BxPage> getFeatureVectorBuilder() {
        FeatureVectorBuilder<BxZone, BxPage> featureVectorBuilder = new FeatureVectorBuilder<>();
        featureVectorBuilder.setFeatureCalculators(Arrays.asList(new AbstractFeature(), new AffiliationFeature(), new AuthorFeature(), new AuthorNameRelativeFeature(), new BibinfoFeature(), new CharCountFeature(), new CharCountRelativeFeature(), new ContributionFeature(), new DateFeature(), new DistanceFromNearestNeighbourFeature(), new DotCountFeature(), new DotRelativeCountFeature(), new EmailFeature(), new EmptySpaceRelativeFeature(), new FontHeightMeanFeature(), new FreeSpaceWithinZoneFeature(), new FullWordsRelativeFeature(), new HeightFeature(), new HeightRelativeFeature(), new HorizontalRelativeProminenceFeature(), new IsAfterMetTitleFeature(), new IsFontBiggerThanNeighboursFeature(), new IsGreatestFontOnPageFeature(), new IsWidestOnThePageFeature(), new KeywordsFeature(), new LastButOneZoneFeature(), new LineCountFeature(), new LineRelativeCountFeature(), new LineHeightMeanFeature(), new LineWidthMeanFeature(), new LineXPositionMeanFeature(), new LineXWidthPositionDiffFeature(), new LetterCountFeature(), new LetterRelativeCountFeature(), new LowercaseCountFeature(), new LowercaseRelativeCountFeature(), new PreviousZoneFeature(), new ProportionsFeature(), new PunctuationRelativeCountFeature(), new UppercaseCountFeature(), new UppercaseRelativeCountFeature(), new UppercaseWordCountFeature(), new UppercaseWordRelativeCountFeature(), new VerticalProminenceFeature(), new WidthFeature(), new WordCountFeature(), new WordCountRelativeFeature(), new WordWidthMeanFeature(), new WordLengthMeanFeature(), new WordLengthMedianFeature(), new WhitespaceCountFeature(), new WhitespaceRelativeCountLogFeature(), new WidthRelativeFeature(), new XPositionFeature(), new XPositionRelativeFeature(), new YPositionFeature(), new YPositionRelativeFeature(), new YearFeature()));
        return featureVectorBuilder;
    }

    public static SVMMetadataZoneClassifier getDefaultInstance() throws AnalysisException, IOException {
        if (defaultInstance == null) {
            defaultInstance = new SVMMetadataZoneClassifier();
        }
        return defaultInstance;
    }

    @Override // pl.edu.icm.cermine.tools.classification.svm.SVMZoneClassifier, pl.edu.icm.cermine.structure.ZoneClassifier
    public BxDocument classifyZones(BxDocument bxDocument) throws AnalysisException {
        for (BxPage bxPage : bxDocument.getPages()) {
            Iterator<BxZone> it = bxPage.getZones().iterator();
            while (it.hasNext()) {
                it.next().setParent(bxPage);
            }
        }
        for (BxZone bxZone : bxDocument.asZones()) {
            if (bxZone.getLabel().isOfCategoryOrGeneral(BxZoneLabelCategory.CAT_METADATA)) {
                bxZone.setLabel(predictLabel(bxZone, bxZone.getParent()));
            }
        }
        return bxDocument;
    }
}
