package pl.edu.icm.cermine.metadata.zoneclassification;

import com.thoughtworks.xstream.XStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import org.springframework.beans.PropertyAccessor;
import pl.edu.icm.cermine.exception.AnalysisException;
import pl.edu.icm.cermine.exception.TransformationException;
import pl.edu.icm.cermine.metadata.zoneclassification.features.AbstractFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.AcknowledgementFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.AffiliationFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.AtCountFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.AtRelativeCountFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.AuthorFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.BibinfoFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.BracketRelativeCount;
import pl.edu.icm.cermine.metadata.zoneclassification.features.BracketedLineRelativeCount;
import pl.edu.icm.cermine.metadata.zoneclassification.features.CharCountFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.CharCountRelativeFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.CommaCountFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.CommaRelativeCountFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.ContainsCuePhrasesFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.ContainsPageNumberFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.CuePhrasesRelativeCountFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.DateFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.DigitCountFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.DigitRelativeCountFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.DistanceFromNearestNeighbourFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.DotCountFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.DotRelativeCountFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.EmptySpaceRelativeFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.FigureFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.FontHeightMeanFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.FreeSpaceWithinZoneFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.HeightFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.HeightRelativeFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.HorizontalRelativeProminenceFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.IsFirstPageFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.IsFontBiggerThanNeighboursFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.IsHighestOnThePageFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.IsItemizeFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.IsLastPageFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.IsLowestOnThePageFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.KeywordsFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.LetterCountFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.LetterRelativeCountFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.LineCountFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.LineHeightMeanFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.LineRelativeCountFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.LineWidthMeanFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.LineXPositionDiffFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.LineXPositionMeanFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.LineXWidthPositionDiffFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.LowercaseCountFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.LowercaseRelativeCountFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.ProportionsFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.PunctuationRelativeCountFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.ReferencesFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.ReferencesTitleFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.StartsWithDigitFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.UppercaseCountFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.UppercaseRelativeCountFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.UppercaseWordCountFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.UppercaseWordRelativeCountFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.VerticalProminenceFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.WhitespaceCountFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.WhitespaceRelativeCountLogFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.WidthFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.WidthRelativeFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.WordCountFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.WordCountRelativeFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.WordLengthMeanFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.WordLengthMedianFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.WordWidthMeanFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.XPositionFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.XPositionRelativeFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.YPositionFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.YPositionRelativeFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.features.YearFeature;
import pl.edu.icm.cermine.metadata.zoneclassification.tools.BxDocsToHMMConverter;
import pl.edu.icm.cermine.structure.HMMInitialZoneClassifier;
import pl.edu.icm.cermine.structure.model.BxDocument;
import pl.edu.icm.cermine.structure.model.BxPage;
import pl.edu.icm.cermine.structure.model.BxZone;
import pl.edu.icm.cermine.structure.tools.InitiallyClassifiedZonesPreprocessor;
import pl.edu.icm.cermine.structure.transformers.TrueVizToBxDocumentReader;
import pl.edu.icm.cermine.tools.classification.features.FeatureVectorBuilder;
import pl.edu.icm.cermine.tools.classification.general.ZipExtractor;
import pl.edu.icm.cermine.tools.classification.hmm.HMMServiceImpl;
import pl.edu.icm.cermine.tools.classification.hmm.model.HMMProbabilityInfo;
import pl.edu.icm.cermine.tools.classification.hmm.model.HMMProbabilityInfoFactory;

/* loaded from: input_file:WEB-INF/lib/cermine-impl-1.1-SNAPSHOT.jar:pl/edu/icm/cermine/metadata/zoneclassification/HMMZoneGeneralClassificationBigDemo.class */
public final class HMMZoneGeneralClassificationBigDemo {
    protected static final String HMM_TRAIN_FILE = "xmls.zip";
    private static final String HMM_TEST_FILE = "/09629351.xml";

    private HMMZoneGeneralClassificationBigDemo() {
    }

    public static BxDocument getTestFile() throws TransformationException, AnalysisException {
        return new BxDocument().setPages(new TrueVizToBxDocumentReader().read(new InputStreamReader(HMMZoneClassificationDemo.class.getResourceAsStream(HMM_TEST_FILE)), new Object[0]));
    }

    public static void main(String[] strArr) throws TransformationException, AnalysisException, IOException, URISyntaxException {
        FeatureVectorBuilder<BxZone, BxPage> featureVectorBuilder = new FeatureVectorBuilder<>();
        featureVectorBuilder.setFeatureCalculators(Arrays.asList(new AbstractFeature(), new AcknowledgementFeature(), new AffiliationFeature(), new AtCountFeature(), new AtRelativeCountFeature(), new AuthorFeature(), new BibinfoFeature(), new BracketRelativeCount(), new BracketedLineRelativeCount(), new CharCountFeature(), new CharCountRelativeFeature(), new CommaCountFeature(), new CommaRelativeCountFeature(), new ContainsCuePhrasesFeature(), new CuePhrasesRelativeCountFeature(), new DateFeature(), new DigitCountFeature(), new DigitRelativeCountFeature(), new DistanceFromNearestNeighbourFeature(), new DotCountFeature(), new DotRelativeCountFeature(), new EmptySpaceRelativeFeature(), new FontHeightMeanFeature(), new FigureFeature(), new FreeSpaceWithinZoneFeature(), new HeightFeature(), new HeightRelativeFeature(), new HorizontalRelativeProminenceFeature(), new IsFirstPageFeature(), new IsFontBiggerThanNeighboursFeature(), new IsHighestOnThePageFeature(), new IsLastPageFeature(), new IsLowestOnThePageFeature(), new IsItemizeFeature(), new KeywordsFeature(), new LineCountFeature(), new LineRelativeCountFeature(), new LineHeightMeanFeature(), new LineWidthMeanFeature(), new LineXPositionMeanFeature(), new LineXPositionDiffFeature(), new LineXWidthPositionDiffFeature(), new LetterCountFeature(), new LetterRelativeCountFeature(), new LowercaseCountFeature(), new LowercaseRelativeCountFeature(), new ContainsPageNumberFeature(), new ProportionsFeature(), new PunctuationRelativeCountFeature(), new ReferencesFeature(), new ReferencesTitleFeature(), new StartsWithDigitFeature(), new UppercaseCountFeature(), new UppercaseRelativeCountFeature(), new UppercaseWordCountFeature(), new UppercaseWordRelativeCountFeature(), new VerticalProminenceFeature(), new WidthFeature(), new WordCountFeature(), new WordCountRelativeFeature(), new WordWidthMeanFeature(), new WordLengthMeanFeature(), new WordLengthMedianFeature(), new WhitespaceCountFeature(), new WhitespaceRelativeCountLogFeature(), new WidthRelativeFeature(), new XPositionFeature(), new XPositionRelativeFeature(), new YPositionFeature(), new YPositionRelativeFeature(), new YearFeature()));
        InitiallyClassifiedZonesPreprocessor initiallyClassifiedZonesPreprocessor = new InitiallyClassifiedZonesPreprocessor();
        BxDocument testFile = getTestFile();
        new ArrayList().add(testFile);
        List<BxDocument> documents = new ZipExtractor(HMM_TRAIN_FILE).getDocuments();
        ArrayList arrayList = new ArrayList();
        for (BxDocument bxDocument : documents) {
            initiallyClassifiedZonesPreprocessor.process(bxDocument);
            arrayList.add(bxDocument);
        }
        BxDocsToHMMConverter bxDocsToHMMConverter = new BxDocsToHMMConverter();
        bxDocsToHMMConverter.setFeatureVectorBuilder(featureVectorBuilder);
        HMMProbabilityInfo fVHMMProbability = HMMProbabilityInfoFactory.getFVHMMProbability(bxDocsToHMMConverter.process(documents), featureVectorBuilder);
        System.out.println(new XStream().toXML(fVHMMProbability));
        new HMMInitialZoneClassifier(new HMMServiceImpl(), fVHMMProbability, featureVectorBuilder).classifyZones(testFile);
        Iterator<BxPage> it = testFile.getPages().iterator();
        while (it.hasNext()) {
            for (BxZone bxZone : it.next().getZones()) {
                System.out.println("--------");
                System.out.println(bxZone.toText());
                System.out.println(PropertyAccessor.PROPERTY_KEY_PREFIX + bxZone.getLabel() + "]");
            }
        }
    }
}
