package pl.edu.icm.cermine.libsvm.export;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.InputStreamReader;
import java.util.Iterator;
import java.util.List;
import org.apache.commons.io.FileUtils;
import pl.edu.icm.cermine.exception.TransformationException;
import pl.edu.icm.cermine.metadata.zoneclassification.features.FeatureList;
import pl.edu.icm.cermine.structure.model.BxDocument;
import pl.edu.icm.cermine.structure.model.BxZone;
import pl.edu.icm.cermine.structure.transformers.TrueVizToBxDocumentReader;
import pl.edu.icm.cermine.tools.classification.general.FeatureVector;
import pl.edu.icm.cermine.tools.classification.general.FeatureVectorBuilder;

/* loaded from: input_file:pl/edu/icm/cermine/libsvm/export/ZonesToCSVExporter.class */
public class ZonesToCSVExporter {
    public static void main(String[] strArr) throws FileNotFoundException, TransformationException {
        FeatureVectorBuilder featureVectorBuilder = FeatureList.VECTOR_BUILDER;
        List<String> featureNames = featureVectorBuilder.getFeatureNames();
        System.out.print("Zone,Label");
        for (String str : featureNames) {
            System.out.print(",");
            System.out.print(str);
        }
        System.out.println("");
        Iterator it = FileUtils.listFiles(new File(strArr[0]), new String[]{strArr[1]}, true).iterator();
        while (it.hasNext()) {
            for (BxZone bxZone : new BxDocument().setPages(new TrueVizToBxDocumentReader().read(new InputStreamReader(new FileInputStream((File) it.next())), new Object[0])).asZones()) {
                FeatureVector featureVector = featureVectorBuilder.getFeatureVector(bxZone, bxZone.getParent());
                String replaceAll = bxZone.toText().replaceAll("[^a-zA-Z0-9 ]", "");
                System.out.print("\"" + replaceAll.substring(0, Math.min(50, replaceAll.length())) + "\"");
                System.out.print(",");
                System.out.print(bxZone.getLabel());
                for (String str2 : featureNames) {
                    System.out.print(",");
                    System.out.print(featureVector.getValue(str2));
                }
                System.out.println("");
            }
        }
    }
}
