package pl.edu.icm.yadda.analysis.metadata.zoneclassification;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import pl.edu.icm.yadda.analysis.classification.features.SimpleFeatureVectorBuilder;
import pl.edu.icm.yadda.analysis.classification.hmm.HMMServiceImpl;
import pl.edu.icm.yadda.analysis.classification.hmm.HMMZoneClassifier;
import pl.edu.icm.yadda.analysis.classification.hmm.probability.HMMProbabilityInfoFactory;
import pl.edu.icm.yadda.analysis.classification.tools.FileExtractor;
import pl.edu.icm.yadda.analysis.metadata.zoneclassification.features.AtCountFeature;
import pl.edu.icm.yadda.analysis.metadata.zoneclassification.features.AtRelativeCountFeature;
import pl.edu.icm.yadda.analysis.metadata.zoneclassification.features.CharCountFeature;
import pl.edu.icm.yadda.analysis.metadata.zoneclassification.features.CharCountRelativeFeature;
import pl.edu.icm.yadda.analysis.metadata.zoneclassification.features.CommaCountFeature;
import pl.edu.icm.yadda.analysis.metadata.zoneclassification.features.CommaRelativeCountFeature;
import pl.edu.icm.yadda.analysis.metadata.zoneclassification.features.DigitCountFeature;
import pl.edu.icm.yadda.analysis.metadata.zoneclassification.features.DigitRelativeCountFeature;
import pl.edu.icm.yadda.analysis.metadata.zoneclassification.features.DotCountFeature;
import pl.edu.icm.yadda.analysis.metadata.zoneclassification.features.DotRelativeCountFeature;
import pl.edu.icm.yadda.analysis.metadata.zoneclassification.features.HeightFeature;
import pl.edu.icm.yadda.analysis.metadata.zoneclassification.features.HeightRelativeFeature;
import pl.edu.icm.yadda.analysis.metadata.zoneclassification.features.LetterCountFeature;
import pl.edu.icm.yadda.analysis.metadata.zoneclassification.features.LetterRelativeCountFeature;
import pl.edu.icm.yadda.analysis.metadata.zoneclassification.features.LineCountFeature;
import pl.edu.icm.yadda.analysis.metadata.zoneclassification.features.LineHeightMeanFeature;
import pl.edu.icm.yadda.analysis.metadata.zoneclassification.features.LineRelativeCountFeature;
import pl.edu.icm.yadda.analysis.metadata.zoneclassification.features.LineWidthMeanFeature;
import pl.edu.icm.yadda.analysis.metadata.zoneclassification.features.LineXPositionDiffFeature;
import pl.edu.icm.yadda.analysis.metadata.zoneclassification.features.LineXPositionMeanFeature;
import pl.edu.icm.yadda.analysis.metadata.zoneclassification.features.LineXWidthPositionDiffFeature;
import pl.edu.icm.yadda.analysis.metadata.zoneclassification.features.LowercaseCountFeature;
import pl.edu.icm.yadda.analysis.metadata.zoneclassification.features.LowercaseRelativeCountFeature;
import pl.edu.icm.yadda.analysis.metadata.zoneclassification.features.ProportionsFeature;
import pl.edu.icm.yadda.analysis.metadata.zoneclassification.features.UppercaseCountFeature;
import pl.edu.icm.yadda.analysis.metadata.zoneclassification.features.UppercaseRelativeCountFeature;
import pl.edu.icm.yadda.analysis.metadata.zoneclassification.features.UppercaseWordCountFeature;
import pl.edu.icm.yadda.analysis.metadata.zoneclassification.features.UppercaseWordRelativeCountFeature;
import pl.edu.icm.yadda.analysis.metadata.zoneclassification.features.WidthFeature;
import pl.edu.icm.yadda.analysis.metadata.zoneclassification.features.WidthRelativeFeature;
import pl.edu.icm.yadda.analysis.metadata.zoneclassification.features.WordCountFeature;
import pl.edu.icm.yadda.analysis.metadata.zoneclassification.features.WordCountRelativeFeature;
import pl.edu.icm.yadda.analysis.metadata.zoneclassification.features.WordWidthMeanFeature;
import pl.edu.icm.yadda.analysis.metadata.zoneclassification.features.XPositionFeature;
import pl.edu.icm.yadda.analysis.metadata.zoneclassification.features.XPositionRelativeFeature;
import pl.edu.icm.yadda.analysis.metadata.zoneclassification.features.YPositionFeature;
import pl.edu.icm.yadda.analysis.metadata.zoneclassification.features.YPositionRelativeFeature;
import pl.edu.icm.yadda.analysis.metadata.zoneclassification.nodes.BxDocsToTrainingElementsConverterNode;
import pl.edu.icm.yadda.analysis.textr.model.BxDocument;
import pl.edu.icm.yadda.analysis.textr.model.BxPage;
import pl.edu.icm.yadda.analysis.textr.model.BxZone;
import pl.edu.icm.yadda.metadata.transformers.TransformationException;
import pl.edu.icm.yadda.process.ctx.ProcessContext;

/* loaded from: input_file:WEB-INF/lib/yadda-analysis-impl-1.12.0.jar:pl/edu/icm/yadda/analysis/metadata/zoneclassification/HMMZoneClassificationDemo.class */
public class HMMZoneClassificationDemo {
    protected static final String hmmTestFile = "/pl/edu/icm/yadda/analysis/metadata/zoneclassification/09629351.xml";

    public static void main(String[] strArr) throws TransformationException, Exception {
        SimpleFeatureVectorBuilder simpleFeatureVectorBuilder = new SimpleFeatureVectorBuilder();
        simpleFeatureVectorBuilder.setFeatureCalculators(Arrays.asList(new ProportionsFeature(), new HeightFeature(), new WidthFeature(), new XPositionFeature(), new YPositionFeature(), new HeightRelativeFeature(), new WidthRelativeFeature(), new XPositionRelativeFeature(), new YPositionRelativeFeature(), new LineCountFeature(), new LineRelativeCountFeature(), new LineHeightMeanFeature(), new LineWidthMeanFeature(), new LineXPositionMeanFeature(), new LineXPositionDiffFeature(), new LineXWidthPositionDiffFeature(), new WordCountFeature(), new WordCountRelativeFeature(), new CharCountFeature(), new CharCountRelativeFeature(), new DigitCountFeature(), new DigitRelativeCountFeature(), new LetterCountFeature(), new LetterRelativeCountFeature(), new LowercaseCountFeature(), new LowercaseRelativeCountFeature(), new UppercaseCountFeature(), new UppercaseRelativeCountFeature(), new UppercaseWordCountFeature(), new UppercaseWordRelativeCountFeature(), new AtCountFeature(), new AtRelativeCountFeature(), new CommaCountFeature(), new CommaRelativeCountFeature(), new DotCountFeature(), new DotRelativeCountFeature(), new WordWidthMeanFeature()));
        BxDocument document = new FileExtractor(HMMZoneClassificationDemo.class.getResourceAsStream(hmmTestFile)).getDocument();
        ArrayList arrayList = new ArrayList(1);
        arrayList.add(document);
        BxDocsToTrainingElementsConverterNode bxDocsToTrainingElementsConverterNode = new BxDocsToTrainingElementsConverterNode();
        bxDocsToTrainingElementsConverterNode.setFeatureVectorBuilder(simpleFeatureVectorBuilder);
        new HMMZoneClassifier(new HMMServiceImpl(), HMMProbabilityInfoFactory.getFVHMMProbability(bxDocsToTrainingElementsConverterNode.process((List<BxDocument>) arrayList, (ProcessContext) null), simpleFeatureVectorBuilder), simpleFeatureVectorBuilder).classifyZones(document);
        Iterator<BxPage> it = document.getPages().iterator();
        while (it.hasNext()) {
            for (BxZone bxZone : it.next().getZones()) {
                System.out.println();
                System.out.println(bxZone.toText());
                System.out.println("[" + bxZone.getLabel() + "]");
            }
        }
    }
}
