package pl.edu.icm.yadda.analysis.metadata.evaluation;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import libsvm.svm_parameter;
import org.apache.commons.cli.ParseException;
import pl.edu.icm.yadda.analysis.classification.features.FeatureVectorBuilder;
import pl.edu.icm.yadda.analysis.classification.features.SimpleFeatureVectorBuilder;
import pl.edu.icm.yadda.analysis.classification.hmm.training.TrainingElement;
import pl.edu.icm.yadda.analysis.classification.svm.SVMZoneClassifier;
import pl.edu.icm.yadda.analysis.classification.tools.ClassificationUtils;
import pl.edu.icm.yadda.analysis.metadata.sampleselection.OversamplingSelector;
import pl.edu.icm.yadda.analysis.metadata.sampleselection.SampleSelector;
import pl.edu.icm.yadda.analysis.metadata.zoneclassification.features.AbstractFeature;
import pl.edu.icm.yadda.analysis.metadata.zoneclassification.features.AffiliationFeature;
import pl.edu.icm.yadda.analysis.metadata.zoneclassification.features.AuthorFeature;
import pl.edu.icm.yadda.analysis.metadata.zoneclassification.features.AuthorNameRelativeFeature;
import pl.edu.icm.yadda.analysis.metadata.zoneclassification.features.BibinfoFeature;
import pl.edu.icm.yadda.analysis.metadata.zoneclassification.features.CharCountFeature;
import pl.edu.icm.yadda.analysis.metadata.zoneclassification.features.CharCountRelativeFeature;
import pl.edu.icm.yadda.analysis.metadata.zoneclassification.features.DateFeature;
import pl.edu.icm.yadda.analysis.metadata.zoneclassification.features.DistanceFromNearestNeighbourFeature;
import pl.edu.icm.yadda.analysis.metadata.zoneclassification.features.DotCountFeature;
import pl.edu.icm.yadda.analysis.metadata.zoneclassification.features.DotRelativeCountFeature;
import pl.edu.icm.yadda.analysis.metadata.zoneclassification.features.EmailFeature;
import pl.edu.icm.yadda.analysis.metadata.zoneclassification.features.EmptySpaceRelativeFeature;
import pl.edu.icm.yadda.analysis.metadata.zoneclassification.features.FontHeightMeanFeature;
import pl.edu.icm.yadda.analysis.metadata.zoneclassification.features.FreeSpaceWithinZoneFeature;
import pl.edu.icm.yadda.analysis.metadata.zoneclassification.features.FullWordsRelativeFeature;
import pl.edu.icm.yadda.analysis.metadata.zoneclassification.features.HeightFeature;
import pl.edu.icm.yadda.analysis.metadata.zoneclassification.features.HeightRelativeFeature;
import pl.edu.icm.yadda.analysis.metadata.zoneclassification.features.HorizontalRelativeProminenceFeature;
import pl.edu.icm.yadda.analysis.metadata.zoneclassification.features.IsAfterMetTitleFeature;
import pl.edu.icm.yadda.analysis.metadata.zoneclassification.features.IsFontBiggerThanNeighboursFeature;
import pl.edu.icm.yadda.analysis.metadata.zoneclassification.features.IsGreatestFontOnPageFeature;
import pl.edu.icm.yadda.analysis.metadata.zoneclassification.features.IsWidestOnThePageFeature;
import pl.edu.icm.yadda.analysis.metadata.zoneclassification.features.KeywordsFeature;
import pl.edu.icm.yadda.analysis.metadata.zoneclassification.features.LetterCountFeature;
import pl.edu.icm.yadda.analysis.metadata.zoneclassification.features.LetterRelativeCountFeature;
import pl.edu.icm.yadda.analysis.metadata.zoneclassification.features.LineCountFeature;
import pl.edu.icm.yadda.analysis.metadata.zoneclassification.features.LineHeightMeanFeature;
import pl.edu.icm.yadda.analysis.metadata.zoneclassification.features.LineRelativeCountFeature;
import pl.edu.icm.yadda.analysis.metadata.zoneclassification.features.LineWidthMeanFeature;
import pl.edu.icm.yadda.analysis.metadata.zoneclassification.features.LineXPositionMeanFeature;
import pl.edu.icm.yadda.analysis.metadata.zoneclassification.features.LineXWidthPositionDiffFeature;
import pl.edu.icm.yadda.analysis.metadata.zoneclassification.features.LowercaseCountFeature;
import pl.edu.icm.yadda.analysis.metadata.zoneclassification.features.LowercaseRelativeCountFeature;
import pl.edu.icm.yadda.analysis.metadata.zoneclassification.features.PreviousZoneFeature;
import pl.edu.icm.yadda.analysis.metadata.zoneclassification.features.ProportionsFeature;
import pl.edu.icm.yadda.analysis.metadata.zoneclassification.features.PunctuationRelativeCountFeature;
import pl.edu.icm.yadda.analysis.metadata.zoneclassification.features.UppercaseCountFeature;
import pl.edu.icm.yadda.analysis.metadata.zoneclassification.features.UppercaseRelativeCountFeature;
import pl.edu.icm.yadda.analysis.metadata.zoneclassification.features.UppercaseWordCountFeature;
import pl.edu.icm.yadda.analysis.metadata.zoneclassification.features.UppercaseWordRelativeCountFeature;
import pl.edu.icm.yadda.analysis.metadata.zoneclassification.features.VerticalProminenceFeature;
import pl.edu.icm.yadda.analysis.metadata.zoneclassification.features.WhitespaceCountFeature;
import pl.edu.icm.yadda.analysis.metadata.zoneclassification.features.WhitespaceRelativeCountLogFeature;
import pl.edu.icm.yadda.analysis.metadata.zoneclassification.features.WidthFeature;
import pl.edu.icm.yadda.analysis.metadata.zoneclassification.features.WidthRelativeFeature;
import pl.edu.icm.yadda.analysis.metadata.zoneclassification.features.WordCountFeature;
import pl.edu.icm.yadda.analysis.metadata.zoneclassification.features.WordCountRelativeFeature;
import pl.edu.icm.yadda.analysis.metadata.zoneclassification.features.WordLengthMeanFeature;
import pl.edu.icm.yadda.analysis.metadata.zoneclassification.features.WordLengthMedianFeature;
import pl.edu.icm.yadda.analysis.metadata.zoneclassification.features.WordWidthMeanFeature;
import pl.edu.icm.yadda.analysis.metadata.zoneclassification.features.XPositionFeature;
import pl.edu.icm.yadda.analysis.metadata.zoneclassification.features.XPositionRelativeFeature;
import pl.edu.icm.yadda.analysis.metadata.zoneclassification.features.YPositionFeature;
import pl.edu.icm.yadda.analysis.metadata.zoneclassification.features.YPositionRelativeFeature;
import pl.edu.icm.yadda.analysis.metadata.zoneclassification.features.YearFeature;
import pl.edu.icm.yadda.analysis.textr.ZoneClassifier;
import pl.edu.icm.yadda.analysis.textr.model.BxDocument;
import pl.edu.icm.yadda.analysis.textr.model.BxPage;
import pl.edu.icm.yadda.analysis.textr.model.BxZone;
import pl.edu.icm.yadda.analysis.textr.model.BxZoneLabel;
import pl.edu.icm.yadda.analysis.textr.model.BxZoneLabelCategory;

/* loaded from: input_file:pl/edu/icm/yadda/analysis/metadata/evaluation/SVMMetadataClassificationEvaluator.class */
public class SVMMetadataClassificationEvaluator extends CrossvalidatingZoneClassificationEvaluator {
    static final /* synthetic */ boolean $assertionsDisabled;

    @Override // pl.edu.icm.yadda.analysis.metadata.evaluation.CrossvalidatingZoneClassificationEvaluator
    public FeatureVectorBuilder<BxZone, BxPage> getFeatureVectorBuilder() {
        SimpleFeatureVectorBuilder simpleFeatureVectorBuilder = new SimpleFeatureVectorBuilder();
        simpleFeatureVectorBuilder.setFeatureCalculators(Arrays.asList(new AbstractFeature(), new AffiliationFeature(), new AuthorFeature(), new AuthorNameRelativeFeature(), new BibinfoFeature(), new CharCountFeature(), new CharCountRelativeFeature(), new DateFeature(), new DistanceFromNearestNeighbourFeature(), new DotCountFeature(), new DotRelativeCountFeature(), new EmailFeature(), new EmptySpaceRelativeFeature(), new FontHeightMeanFeature(), new FreeSpaceWithinZoneFeature(), new FullWordsRelativeFeature(), new HeightFeature(), new HeightRelativeFeature(), new HorizontalRelativeProminenceFeature(), new IsAfterMetTitleFeature(), new IsFontBiggerThanNeighboursFeature(), new IsGreatestFontOnPageFeature(), new IsWidestOnThePageFeature(), new KeywordsFeature(), new LineCountFeature(), new LineRelativeCountFeature(), new LineHeightMeanFeature(), new LineWidthMeanFeature(), new LineXPositionMeanFeature(), new LineXWidthPositionDiffFeature(), new LetterCountFeature(), new LetterRelativeCountFeature(), new LowercaseCountFeature(), new LowercaseRelativeCountFeature(), new PreviousZoneFeature(), new ProportionsFeature(), new PunctuationRelativeCountFeature(), new UppercaseCountFeature(), new UppercaseRelativeCountFeature(), new UppercaseWordCountFeature(), new UppercaseWordRelativeCountFeature(), new VerticalProminenceFeature(), new WidthFeature(), new WordCountFeature(), new WordCountRelativeFeature(), new WordWidthMeanFeature(), new WordLengthMeanFeature(), new WordLengthMedianFeature(), new WhitespaceCountFeature(), new WhitespaceRelativeCountLogFeature(), new WidthRelativeFeature(), new XPositionFeature(), new XPositionRelativeFeature(), new YPositionFeature(), new YPositionRelativeFeature(), new YearFeature()));
        return simpleFeatureVectorBuilder;
    }

    @Override // pl.edu.icm.yadda.analysis.metadata.evaluation.CrossvalidatingZoneClassificationEvaluator
    protected SVMZoneClassifier getZoneClassifier(List<BxDocument> list) {
        FeatureVectorBuilder<BxZone, BxPage> featureVectorBuilder = getFeatureVectorBuilder();
        Map labelToGeneralMap = BxZoneLabel.getLabelToGeneralMap();
        Iterator<BxDocument> it = list.iterator();
        while (it.hasNext()) {
            for (BxZone bxZone : it.next().asZones()) {
                if (bxZone.getLabel().getCategory() != BxZoneLabelCategory.CAT_METADATA) {
                    bxZone.setLabel((BxZoneLabel) labelToGeneralMap.get(bxZone.getLabel()));
                }
            }
        }
        List<TrainingElement<S>> pickElements = new OversamplingSelector(Double.valueOf(0.7d)).pickElements(ClassificationUtils.filterElements(EvaluationUtils.getTrainingElements(list, featureVectorBuilder), BxZoneLabelCategory.CAT_METADATA));
        SVMZoneClassifier sVMZoneClassifier = new SVMZoneClassifier(featureVectorBuilder);
        svm_parameter defaultParam = SVMZoneClassifier.getDefaultParam();
        defaultParam.svm_type = 0;
        defaultParam.gamma = 0.125d;
        defaultParam.C = 16.0d;
        defaultParam.kernel_type = 2;
        sVMZoneClassifier.setParameter(defaultParam);
        sVMZoneClassifier.buildClassifier(pickElements);
        sVMZoneClassifier.printWeigths(featureVectorBuilder);
        return sVMZoneClassifier;
    }

    public static void main(String[] strArr) throws ParseException {
        CrossvalidatingZoneClassificationEvaluator.main(strArr, new SVMMetadataClassificationEvaluator());
    }

    @Override // pl.edu.icm.yadda.analysis.metadata.evaluation.CrossvalidatingZoneClassificationEvaluator
    protected SampleSelector<BxZoneLabel> getSampleFilter() {
        return new SampleSelector<BxZoneLabel>() { // from class: pl.edu.icm.yadda.analysis.metadata.evaluation.SVMMetadataClassificationEvaluator.1
            @Override // pl.edu.icm.yadda.analysis.metadata.sampleselection.SampleSelector
            public List<TrainingElement<BxZoneLabel>> pickElements(List<TrainingElement<BxZoneLabel>> list) {
                ArrayList arrayList = new ArrayList();
                for (TrainingElement<BxZoneLabel> trainingElement : list) {
                    if (((BxZoneLabel) trainingElement.getLabel()).getCategory() == BxZoneLabelCategory.CAT_METADATA) {
                        arrayList.add(trainingElement);
                    }
                }
                return arrayList;
            }
        };
    }

    @Override // pl.edu.icm.yadda.analysis.metadata.evaluation.CrossvalidatingZoneClassificationEvaluator
    protected ClassificationResults compareDocuments(BxDocument bxDocument, BxDocument bxDocument2) {
        if (!$assertionsDisabled && bxDocument.asZones().size() != bxDocument2.asZones().size()) {
            throw new AssertionError();
        }
        ClassificationResults newResults = newResults();
        int i = 0;
        while (true) {
            Integer num = i;
            if (num.intValue() >= bxDocument.asZones().size()) {
                return newResults;
            }
            BxZone bxZone = (BxZone) bxDocument.asZones().get(num.intValue());
            BxZone bxZone2 = (BxZone) bxDocument2.asZones().get(num.intValue());
            if (bxZone.getLabel().getCategory() == BxZoneLabelCategory.CAT_METADATA) {
                System.out.println("--- " + bxZone2.getLabel() + " " + bxZone.getLabel());
                newResults.add(compareItems(bxZone, bxZone2));
            }
            i = Integer.valueOf(num.intValue() + 1);
        }
    }

    @Override // pl.edu.icm.yadda.analysis.metadata.evaluation.CrossvalidatingZoneClassificationEvaluator
    protected void preprocessDocumentForEvaluation(BxDocument bxDocument) {
    }

    @Override // pl.edu.icm.yadda.analysis.metadata.evaluation.CrossvalidatingZoneClassificationEvaluator
    protected /* bridge */ /* synthetic */ ZoneClassifier getZoneClassifier(List list) {
        return getZoneClassifier((List<BxDocument>) list);
    }

    static {
        $assertionsDisabled = !SVMMetadataClassificationEvaluator.class.desiredAssertionStatus();
    }
}
