package pl.edu.icm.cermine.metadata.optimization;

import com.itextpdf.text.html.HtmlTags;
import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.IOException;
import java.security.InvalidParameterException;
import java.util.Formatter;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.GnuParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.springframework.beans.factory.xml.BeanDefinitionParserDelegate;
import pl.edu.icm.cermine.evaluation.EvaluationUtils;
import pl.edu.icm.cermine.exception.TransformationException;
import pl.edu.icm.cermine.structure.SVMInitialZoneClassifier;
import pl.edu.icm.cermine.structure.SVMMetadataZoneClassifier;
import pl.edu.icm.cermine.structure.model.BxDocument;
import pl.edu.icm.cermine.structure.model.BxPage;
import pl.edu.icm.cermine.structure.model.BxZone;
import pl.edu.icm.cermine.structure.model.BxZoneLabel;
import pl.edu.icm.cermine.structure.model.BxZoneLabelCategory;
import pl.edu.icm.cermine.tools.classification.features.FeatureVectorBuilder;
import pl.edu.icm.cermine.tools.classification.general.BxDocsToTrainingSamplesConverter;
import pl.edu.icm.cermine.tools.classification.general.ClassificationUtils;
import pl.edu.icm.cermine.tools.classification.general.TrainingSample;
import pl.edu.icm.cermine.tools.classification.sampleselection.NormalSelector;
import pl.edu.icm.cermine.tools.classification.sampleselection.OversamplingSelector;
import pl.edu.icm.cermine.tools.classification.sampleselection.SampleSelector;
import pl.edu.icm.cermine.tools.classification.sampleselection.UndersamplingSelector;

/* loaded from: input_file:WEB-INF/lib/cermine-impl-1.0-SNAPSHOT.jar:pl/edu/icm/cermine/metadata/optimization/LibSVMExporter.class */
public class LibSVMExporter {
    public static void toLibSVM(List<TrainingSample<BxZoneLabel>> list, String str) throws IOException {
        BufferedWriter bufferedWriter = null;
        try {
            try {
                bufferedWriter = new BufferedWriter(new FileWriter(str));
                for (TrainingSample<BxZoneLabel> trainingSample : list) {
                    bufferedWriter.write(String.valueOf(trainingSample.getLabel().ordinal()));
                    bufferedWriter.write(" ");
                    Integer num = 1;
                    for (Double d : trainingSample.getFeatures().getFeatures()) {
                        StringBuilder sb = new StringBuilder();
                        Formatter formatter = new Formatter(sb, Locale.US);
                        Integer num2 = num;
                        num = Integer.valueOf(num.intValue() + 1);
                        formatter.format("%d:%.5f", num2, d);
                        bufferedWriter.write(sb.toString());
                        bufferedWriter.write(" ");
                    }
                    bufferedWriter.write("\n");
                }
                bufferedWriter.close();
                if (bufferedWriter != null) {
                    bufferedWriter.close();
                }
                System.out.println("Done.");
            } catch (Exception e) {
                System.err.println("Error: " + e.getMessage());
                if (bufferedWriter != null) {
                    bufferedWriter.close();
                }
            }
        } catch (Throwable th) {
            if (bufferedWriter != null) {
                bufferedWriter.close();
            }
            throw th;
        }
    }

    public static void main(String[] strArr) throws ParseException, IOException, TransformationException {
        FeatureVectorBuilder<BxZone, BxPage> featureVectorBuilder;
        BxZoneLabelCategory bxZoneLabelCategory;
        Options options = new Options();
        options.addOption(BeanDefinitionParserDelegate.META_ELEMENT, false, "export data for metadata classification");
        options.addOption("initial", false, "export data for initial classification");
        options.addOption("under", false, "use undersampling for data selection");
        options.addOption("over", false, "use oversampling for data selection");
        options.addOption(HtmlTags.NORMAL, false, "don't use any special strategy for data selection");
        CommandLine parse = new GnuParser().parse(options, strArr);
        if (strArr.length != 3 || !(parse.hasOption("initial") ^ parse.hasOption(BeanDefinitionParserDelegate.META_ELEMENT)) || !((parse.hasOption("under") ^ parse.hasOption("over")) ^ parse.hasOption(HtmlTags.NORMAL))) {
            new HelpFormatter().printHelp(strArr[0] + " [-options] input-directory", options);
            System.exit(1);
        }
        List<BxDocument> documentsFromPath = EvaluationUtils.getDocumentsFromPath(parse.getArgs()[0]);
        Map<BxZoneLabel, BxZoneLabel> map = null;
        if (parse.hasOption("initial")) {
            featureVectorBuilder = SVMInitialZoneClassifier.getFeatureVectorBuilder();
            map = BxZoneLabel.getLabelToGeneralMap();
            bxZoneLabelCategory = BxZoneLabelCategory.CAT_GENERAL;
        } else {
            if (!parse.hasOption(BeanDefinitionParserDelegate.META_ELEMENT)) {
                throw new InvalidParameterException("Export purpose not specified!");
            }
            featureVectorBuilder = SVMMetadataZoneClassifier.getFeatureVectorBuilder();
            bxZoneLabelCategory = BxZoneLabelCategory.CAT_METADATA;
            Iterator<BxDocument> it = documentsFromPath.iterator();
            while (it.hasNext()) {
                for (BxZone bxZone : it.next().asZones()) {
                    if (bxZone.getLabel().getCategory() != BxZoneLabelCategory.CAT_METADATA) {
                        bxZone.setLabel(bxZone.getLabel().getGeneralLabel());
                    }
                }
            }
        }
        SampleSelector sampleSelector = null;
        if (parse.hasOption("over")) {
            sampleSelector = new OversamplingSelector(Double.valueOf(1.0d));
        } else if (parse.hasOption("under")) {
            sampleSelector = new UndersamplingSelector(Double.valueOf(2.0d));
        } else if (parse.hasOption(HtmlTags.NORMAL)) {
            sampleSelector = new NormalSelector();
        } else {
            System.err.println("Sampling strategy is not specified!");
            System.exit(1);
        }
        try {
            List<TrainingSample<BxZoneLabel>> zoneTrainingSamples = BxDocsToTrainingSamplesConverter.getZoneTrainingSamples(documentsFromPath, featureVectorBuilder, map);
            Iterator<TrainingSample<BxZoneLabel>> it2 = zoneTrainingSamples.iterator();
            while (it2.hasNext()) {
                System.out.println(it2.next().getLabel());
            }
            toLibSVM(sampleSelector.pickElements(ClassificationUtils.filterElements(zoneTrainingSamples, bxZoneLabelCategory)), "zone_classification.dat");
        } catch (Exception e) {
            throw new RuntimeException("Unable to process the delivered training documents!");
        }
    }
}
