package pl.edu.icm.cermine.metadata.optimization;

import com.itextpdf.text.html.HtmlTags;
import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Formatter;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.GnuParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import pl.edu.icm.cermine.evaluation.tools.EvaluationUtils;
import pl.edu.icm.cermine.exception.AnalysisException;
import pl.edu.icm.cermine.exception.TransformationException;
import pl.edu.icm.cermine.structure.SVMInitialZoneClassifier;
import pl.edu.icm.cermine.structure.SVMMetadataZoneClassifier;
import pl.edu.icm.cermine.structure.model.BxDocument;
import pl.edu.icm.cermine.structure.model.BxZone;
import pl.edu.icm.cermine.structure.model.BxZoneLabel;
import pl.edu.icm.cermine.structure.model.BxZoneLabelCategory;
import pl.edu.icm.cermine.tools.classification.general.BxDocsToTrainingSamplesConverter;
import pl.edu.icm.cermine.tools.classification.general.TrainingSample;
import pl.edu.icm.cermine.tools.classification.sampleselection.NormalSelector;
import pl.edu.icm.cermine.tools.classification.sampleselection.OversamplingSelector;
import pl.edu.icm.cermine.tools.classification.sampleselection.SampleSelector;
import pl.edu.icm.cermine.tools.classification.sampleselection.UndersamplingSelector;

/* loaded from: input_file:WEB-INF/lib/cermine-impl-1.0-SNAPSHOT.jar:pl/edu/icm/cermine/metadata/optimization/LibSVMExporter.class */
public class LibSVMExporter {
    public static void toLibSVM(List<TrainingSample<BxZoneLabel>> list, String str) throws IOException {
        BufferedWriter bufferedWriter = null;
        try {
            try {
                bufferedWriter = new BufferedWriter(new FileWriter(str));
                for (TrainingSample<BxZoneLabel> trainingSample : list) {
                    if (trainingSample.getLabel() != null) {
                        bufferedWriter.write(String.valueOf(trainingSample.getLabel().ordinal()));
                        bufferedWriter.write(" ");
                        Integer num = 1;
                        for (Double d : trainingSample.getFeatures().getFeatures()) {
                            StringBuilder sb = new StringBuilder();
                            Formatter formatter = new Formatter(sb, Locale.US);
                            Integer num2 = num;
                            num = Integer.valueOf(num.intValue() + 1);
                            formatter.format("%d:%.5f", num2, d);
                            bufferedWriter.write(sb.toString());
                            bufferedWriter.write(" ");
                        }
                        bufferedWriter.write("\n");
                    }
                }
                bufferedWriter.close();
                if (bufferedWriter != null) {
                    bufferedWriter.close();
                }
                System.out.println("Done.");
            } catch (Exception e) {
                System.err.println("Error: " + e.getMessage());
                if (bufferedWriter != null) {
                    bufferedWriter.close();
                }
            }
        } catch (Throwable th) {
            if (bufferedWriter != null) {
                bufferedWriter.close();
            }
            throw th;
        }
    }

    public static void main(String[] strArr) throws ParseException, IOException, TransformationException, AnalysisException {
        Options options = new Options();
        options.addOption("under", false, "use undersampling for data selection");
        options.addOption("over", false, "use oversampling for data selection");
        options.addOption(HtmlTags.NORMAL, false, "don't use any special strategy for data selection");
        CommandLine parse = new GnuParser().parse(options, strArr);
        if (strArr.length != 2 || !((parse.hasOption("under") ^ parse.hasOption("over")) ^ parse.hasOption(HtmlTags.NORMAL))) {
            new HelpFormatter().printHelp(" [-options] input-directory", options);
            System.exit(1);
        }
        String str = parse.getArgs()[0];
        SampleSelector sampleSelector = null;
        if (parse.hasOption("over")) {
            sampleSelector = new OversamplingSelector(Double.valueOf(1.0d));
        } else if (parse.hasOption("under")) {
            sampleSelector = new UndersamplingSelector(Double.valueOf(2.0d));
        } else if (parse.hasOption(HtmlTags.NORMAL)) {
            sampleSelector = new NormalSelector();
        } else {
            System.err.println("Sampling pattern is not specified!");
            System.exit(1);
        }
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        Integer num = 0;
        Iterator<BxDocument> it = new EvaluationUtils.DocumentsIterator(str).iterator();
        while (it.hasNext()) {
            BxDocument next = it.next();
            System.out.println(num + ": " + next.getFilename());
            for (BxZone bxZone : next.asZones()) {
                if (bxZone.getLabel() == null) {
                    bxZone.setLabel(BxZoneLabel.OTH_UNKNOWN);
                } else if (bxZone.getLabel().getCategory() != BxZoneLabelCategory.CAT_METADATA) {
                    bxZone.setLabel(bxZone.getLabel().getGeneralLabel());
                }
            }
            List<TrainingSample<BxZoneLabel>> zoneTrainingSamples = BxDocsToTrainingSamplesConverter.getZoneTrainingSamples(next, SVMMetadataZoneClassifier.getFeatureVectorBuilder(), BxZoneLabel.getIdentityMap());
            for (TrainingSample<BxZoneLabel> trainingSample : zoneTrainingSamples) {
                if (trainingSample.getLabel().getCategory() == BxZoneLabelCategory.CAT_METADATA) {
                    arrayList2.add(trainingSample);
                }
            }
            Iterator<TrainingSample<BxZoneLabel>> it2 = zoneTrainingSamples.iterator();
            while (it2.hasNext()) {
                System.out.println("M " + it2.next().getLabel());
            }
            List<TrainingSample<BxZoneLabel>> zoneTrainingSamples2 = BxDocsToTrainingSamplesConverter.getZoneTrainingSamples(next, SVMInitialZoneClassifier.getFeatureVectorBuilder(), BxZoneLabel.getLabelToGeneralMap());
            Iterator<TrainingSample<BxZoneLabel>> it3 = zoneTrainingSamples2.iterator();
            while (it3.hasNext()) {
                System.out.println("I " + it3.next().getLabel());
            }
            arrayList.addAll(zoneTrainingSamples2);
            num = Integer.valueOf(num.intValue() + 1);
        }
        List pickElements = sampleSelector.pickElements(arrayList);
        List pickElements2 = sampleSelector.pickElements(arrayList2);
        toLibSVM(pickElements, "initial_zone_classification.dat");
        toLibSVM(pickElements2, "meta_zone_classification.dat");
    }
}
