package ws.palladian.extraction.entity.evaluation;

import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import ws.palladian.classification.utils.ClassificationUtils;
import ws.palladian.core.Annotation;
import ws.palladian.extraction.entity.Annotations;
import ws.palladian.extraction.entity.FileFormatParser;
import ws.palladian.extraction.entity.TaggingFormat;
import ws.palladian.extraction.entity.TrainableNamedEntityRecognizer;
import ws.palladian.extraction.entity.dataset.DatasetProcessor;
import ws.palladian.extraction.entity.evaluation.EvaluationResult;
import ws.palladian.extraction.entity.tagger.PalladianNer;
import ws.palladian.extraction.entity.tagger.PalladianNerTrainingSettings;
import ws.palladian.helper.StopWatch;
import ws.palladian.helper.io.FileHelper;
import ws.palladian.helper.nlp.StringHelper;

/* loaded from: input_file:ws/palladian/extraction/entity/evaluation/Evaluator.class */
public class Evaluator {
    private static final Logger LOGGER = LoggerFactory.getLogger(Evaluator.class);
    private static final String EVALUATION_PATH = "data/temp/nerEvaluation/";

    public void evaluateSeedInputOnly(String str, String str2, int i, int i2) {
        StopWatch stopWatch = new StopWatch();
        int i3 = 0;
        while (i3 < 2) {
            PalladianNerTrainingSettings m150create = i3 == 0 ? PalladianNerTrainingSettings.Builder.english().sparse().m150create() : PalladianNerTrainingSettings.Builder.languageIndependent().sparse().m150create();
            LOGGER.info("start evaluating in " + m150create.getLanguageMode() + " mode");
            StringBuilder sb = new StringBuilder();
            sb.append("TUDNER, mode = ").append(m150create.getLanguageMode()).append("\n");
            sb.append(";All;;;;;;Unseen only;;;;;;\n");
            sb.append("Number of Seeds;Exact Precision;Exact Recall;Exact F1;MUC Precision;MUC Recall;MUC F1;Exact Precision;Exact Recall;Exact F1;MUC Precision;MUC Recall;MUC F1;\n");
            for (int i4 = i; i4 <= i2; i4++) {
                LOGGER.info("evaluating with " + i4 + " seed entities");
                PalladianNer palladianNer = new PalladianNer(m150create);
                Annotations<Annotation> seedAnnotations = FileFormatParser.getSeedAnnotations(str, i4);
                LOGGER.info("train on these annotations: " + seedAnnotations);
                String str3 = "data/temp/nerEvaluation/tudner_seedOnlyEvaluation_" + i4 + "Seeds_" + m150create.getLanguageMode() + "." + palladianNer.getModelFileEndingIfNotSetAutomatically();
                palladianNer.train(seedAnnotations, str3);
                int i5 = 0;
                while (i5 < 2) {
                    PalladianNer palladianNer2 = new PalladianNer(m150create);
                    palladianNer2.loadModel(str3);
                    EvaluationResult evaluate = i5 == 0 ? palladianNer2.evaluate(str2, TaggingFormat.COLUMN) : palladianNer2.evaluate(str2, TaggingFormat.COLUMN, getValues(seedAnnotations));
                    if (i5 == 0) {
                        sb.append(i4).append(ClassificationUtils.DEFAULT_SEPARATOR);
                    }
                    sb.append(evaluate.getPrecision(EvaluationResult.EvaluationMode.EXACT_MATCH)).append(ClassificationUtils.DEFAULT_SEPARATOR);
                    sb.append(evaluate.getRecall(EvaluationResult.EvaluationMode.EXACT_MATCH)).append(ClassificationUtils.DEFAULT_SEPARATOR);
                    sb.append(evaluate.getF1(EvaluationResult.EvaluationMode.EXACT_MATCH)).append(ClassificationUtils.DEFAULT_SEPARATOR);
                    sb.append(evaluate.getPrecision(EvaluationResult.EvaluationMode.MUC)).append(ClassificationUtils.DEFAULT_SEPARATOR);
                    sb.append(evaluate.getRecall(EvaluationResult.EvaluationMode.MUC)).append(ClassificationUtils.DEFAULT_SEPARATOR);
                    sb.append(evaluate.getF1(EvaluationResult.EvaluationMode.MUC)).append(ClassificationUtils.DEFAULT_SEPARATOR);
                    if (i5 > 0) {
                        sb.append("\n");
                    }
                    i5++;
                }
            }
            FileHelper.writeToFile("data/temp/nerEvaluation/evaluateSeedInputOnlyNER_" + m150create.getLanguageMode() + ".csv", sb);
            LOGGER.info("evaluated TUDNER in " + m150create.getLanguageMode() + " mode in " + stopWatch.getElapsedTimeString());
            i3++;
        }
    }

    private Set<String> getValues(List<? extends Annotation> list) {
        HashSet hashSet = new HashSet();
        Iterator<? extends Annotation> it = list.iterator();
        while (it.hasNext()) {
            hashSet.add(it.next().getValue());
        }
        return hashSet;
    }

    public void evaluateDependencyOnTrainingSetSize(TrainableNamedEntityRecognizer trainableNamedEntityRecognizer, String str, String str2, String str3, int i, int i2, int i3) {
        StopWatch stopWatch = new StopWatch();
        LOGGER.info("evaluate " + trainableNamedEntityRecognizer.getName() + " on " + str2 + " with " + i + " to " + i2 + " documents with step size" + i3);
        List<String> splitFile = new DatasetProcessor().splitFile(str, str3, i, i2, i3);
        StringBuilder sb = new StringBuilder();
        for (String str4 : splitFile) {
            stopWatch.start();
            LOGGER.info("train " + trainableNamedEntityRecognizer.getName() + " on " + str4);
            String substringBetween = StringHelper.getSubstringBetween(str4, "_sep_", ".");
            Annotations<Annotation> seedAnnotations = FileFormatParser.getSeedAnnotations(str4, -1);
            trainableNamedEntityRecognizer.train(str4, EVALUATION_PATH + trainableNamedEntityRecognizer.getName() + "_nerModel_" + substringBetween + "." + trainableNamedEntityRecognizer.getModelFileEndingIfNotSetAutomatically());
            int i4 = 0;
            while (i4 < 2) {
                EvaluationResult evaluate = i4 == 0 ? trainableNamedEntityRecognizer.evaluate(str2, TaggingFormat.COLUMN) : trainableNamedEntityRecognizer.evaluate(str2, TaggingFormat.COLUMN, getValues(seedAnnotations));
                if (i4 == 0) {
                    sb.append(substringBetween).append(ClassificationUtils.DEFAULT_SEPARATOR);
                }
                sb.append(evaluate.getPrecision(EvaluationResult.EvaluationMode.EXACT_MATCH)).append(ClassificationUtils.DEFAULT_SEPARATOR);
                sb.append(evaluate.getRecall(EvaluationResult.EvaluationMode.EXACT_MATCH)).append(ClassificationUtils.DEFAULT_SEPARATOR);
                sb.append(evaluate.getF1(EvaluationResult.EvaluationMode.EXACT_MATCH)).append(ClassificationUtils.DEFAULT_SEPARATOR);
                sb.append(evaluate.getPrecision(EvaluationResult.EvaluationMode.MUC)).append(ClassificationUtils.DEFAULT_SEPARATOR);
                sb.append(evaluate.getRecall(EvaluationResult.EvaluationMode.MUC)).append(ClassificationUtils.DEFAULT_SEPARATOR);
                sb.append(evaluate.getF1(EvaluationResult.EvaluationMode.MUC)).append(ClassificationUtils.DEFAULT_SEPARATOR);
                if (i4 > 0) {
                    sb.append("\n");
                }
                i4++;
            }
            LOGGER.info("evaluated " + trainableNamedEntityRecognizer.getName() + " on " + substringBetween + " documents in " + stopWatch.getTotalElapsedTimeString());
            FileHelper.writeToFile("data/temp/nerEvaluation/dependencyOnTrainingSetSize_" + trainableNamedEntityRecognizer.getName() + ".csv", sb);
        }
        FileHelper.writeToFile("data/temp/nerEvaluation/dependencyOnTrainingSetSize_" + trainableNamedEntityRecognizer.getName() + ".csv", sb);
    }

    public String evaluatePerConceptPerformance(TrainableNamedEntityRecognizer trainableNamedEntityRecognizer, String str, String str2, int i) {
        StopWatch stopWatch = new StopWatch();
        LOGGER.info("start evaluating per concept performance for " + trainableNamedEntityRecognizer.getName() + " on " + i + " seeds");
        String str3 = "data/temp/nerEvaluation/evaluatePerConceptModel_" + trainableNamedEntityRecognizer.getName() + "_" + i + "." + trainableNamedEntityRecognizer.getModelFileEndingIfNotSetAutomatically();
        trainableNamedEntityRecognizer.train(str, str3);
        LOGGER.info("training " + trainableNamedEntityRecognizer.getName() + " took " + stopWatch.getElapsedTimeString());
        StringBuilder sb = new StringBuilder();
        StringBuilder sb2 = new StringBuilder();
        Annotations<Annotation> seedAnnotations = FileFormatParser.getSeedAnnotations(str, -1);
        stopWatch.start();
        trainableNamedEntityRecognizer.loadModel(str3);
        EvaluationResult evaluate = trainableNamedEntityRecognizer.evaluate(str2, TaggingFormat.COLUMN);
        LOGGER.info("evaluating " + trainableNamedEntityRecognizer.getName() + " on the complete data took " + stopWatch.getElapsedTimeString());
        stopWatch.start();
        EvaluationResult evaluate2 = trainableNamedEntityRecognizer.evaluate(str2, TaggingFormat.COLUMN, getValues(seedAnnotations));
        LOGGER.info("evaluating " + trainableNamedEntityRecognizer.getName() + " on the unseen data took " + stopWatch.getElapsedTimeString());
        Set<String> tagsFromColumnFile = FileFormatParser.getTagsFromColumnFile(str, "\t");
        sb.append(";Complete Testset;;;;;;Unseen Data Only;;;;;\n");
        sb.append(";Ex. Prec.;Ex. Rec.;Ex. F1;MUC Prec.;MUC Rec.;MUC F1;Ex. Prec.;Ex. Rec.;Ex. F1;MUC Prec.;MUC Rec.;MUC F1\n");
        for (String str4 : tagsFromColumnFile) {
            int i2 = 0;
            while (i2 < 2) {
                EvaluationResult evaluationResult = i2 == 0 ? evaluate : evaluate2;
                if (i2 == 0) {
                    sb.append(str4).append(ClassificationUtils.DEFAULT_SEPARATOR);
                }
                sb.append(evaluationResult.getPrecisionFor(str4, EvaluationResult.EvaluationMode.EXACT_MATCH)).append(ClassificationUtils.DEFAULT_SEPARATOR);
                sb.append(evaluationResult.getRecallFor(str4, EvaluationResult.EvaluationMode.EXACT_MATCH)).append(ClassificationUtils.DEFAULT_SEPARATOR);
                sb.append(evaluationResult.getF1For(str4, EvaluationResult.EvaluationMode.EXACT_MATCH)).append(ClassificationUtils.DEFAULT_SEPARATOR);
                sb.append(evaluationResult.getPrecisionFor(str4, EvaluationResult.EvaluationMode.MUC)).append(ClassificationUtils.DEFAULT_SEPARATOR);
                sb.append(evaluationResult.getRecallFor(str4, EvaluationResult.EvaluationMode.MUC)).append(ClassificationUtils.DEFAULT_SEPARATOR);
                sb.append(evaluationResult.getF1For(str4, EvaluationResult.EvaluationMode.MUC)).append(ClassificationUtils.DEFAULT_SEPARATOR);
                if (i2 > 0) {
                    sb.append("\n");
                }
                i2++;
            }
        }
        int i3 = 0;
        while (i3 < 2) {
            EvaluationResult evaluationResult2 = i3 == 0 ? evaluate : evaluate2;
            if (i3 == 0) {
                sb.append("Averaged").append(ClassificationUtils.DEFAULT_SEPARATOR);
            }
            sb2.append(evaluationResult2.getPrecision(EvaluationResult.EvaluationMode.EXACT_MATCH)).append(ClassificationUtils.DEFAULT_SEPARATOR);
            sb2.append(evaluationResult2.getRecall(EvaluationResult.EvaluationMode.EXACT_MATCH)).append(ClassificationUtils.DEFAULT_SEPARATOR);
            sb2.append(evaluationResult2.getF1(EvaluationResult.EvaluationMode.EXACT_MATCH)).append(ClassificationUtils.DEFAULT_SEPARATOR);
            sb2.append(evaluationResult2.getPrecision(EvaluationResult.EvaluationMode.MUC)).append(ClassificationUtils.DEFAULT_SEPARATOR);
            sb2.append(evaluationResult2.getRecall(EvaluationResult.EvaluationMode.MUC)).append(ClassificationUtils.DEFAULT_SEPARATOR);
            sb2.append(evaluationResult2.getF1(EvaluationResult.EvaluationMode.MUC)).append(ClassificationUtils.DEFAULT_SEPARATOR);
            i3++;
        }
        sb.append((CharSequence) sb2);
        FileHelper.writeToFile("data/temp/nerEvaluation/evaluatePerConcept_" + trainableNamedEntityRecognizer.getName() + "_" + i + ".csv", sb);
        LOGGER.info("evaluated " + trainableNamedEntityRecognizer.getName() + " on " + str + " in " + stopWatch.getTotalElapsedTimeString());
        return sb2.toString();
    }

    public void evaluateOnGeneratedTrainingset(List<TrainableNamedEntityRecognizer> list, String str, String str2) {
        StopWatch stopWatch = new StopWatch();
        StringBuilder sb = new StringBuilder();
        int length = FileHelper.getFiles(str, "seedsTest").length;
        sb.append(ClassificationUtils.DEFAULT_SEPARATOR);
        Iterator<TrainableNamedEntityRecognizer> it = list.iterator();
        while (it.hasNext()) {
            sb.append(it.next().getName()).append(";;;;;;;;;;;;");
        }
        sb.append("\n");
        sb.append(";Complete Testset;;;;;;Unseen Data Only;;;;;\n");
        sb.append(";Ex. Prec.;Ex. Rec.;Ex. F1;MUC Prec.;MUC Rec.;MUC F1;Ex. Prec.;Ex. Rec.;Ex. F1;MUC Prec.;MUC Rec.;MUC F1\n");
        for (int i = 1; i <= length; i++) {
            LOGGER.info("start evaluating on generated training data using " + i + " seeds");
            sb.append(i).append(ClassificationUtils.DEFAULT_SEPARATOR);
            String str3 = str + "seedsTest" + i + ".txt";
            Iterator<TrainableNamedEntityRecognizer> it2 = list.iterator();
            while (it2.hasNext()) {
                sb.append(evaluatePerConceptPerformance(it2.next(), str3, str2, i));
            }
            sb.append("\n");
        }
        FileHelper.writeToFile("data/temp/nerEvaluation/autoGeneratedTests.txt", sb);
        LOGGER.info("finished evaluating " + list.size() + " NERs using generated training data in " + stopWatch.getTotalElapsedTimeString());
    }

    public static void main(String[] strArr) {
        ArrayList arrayList = new ArrayList();
        arrayList.add(new PalladianNer(PalladianNerTrainingSettings.Builder.english().m150create()));
        arrayList.add(new PalladianNer(PalladianNerTrainingSettings.Builder.languageIndependent().m150create()));
        Evaluator evaluator = new Evaluator();
        Iterator it = arrayList.iterator();
        while (it.hasNext()) {
            evaluator.evaluatePerConceptPerformance((TrainableNamedEntityRecognizer) it.next(), "data/datasets/ner/conll/training.txt", "data/datasets/ner/conll/test_final.txt", 0);
        }
        System.exit(0);
        arrayList.clear();
        arrayList.add(new PalladianNer(PalladianNerTrainingSettings.Builder.english().m150create()));
        arrayList.add(new PalladianNer(PalladianNerTrainingSettings.Builder.languageIndependent().m150create()));
        Iterator it2 = arrayList.iterator();
        while (it2.hasNext()) {
            evaluator.evaluateDependencyOnTrainingSetSize((TrainableNamedEntityRecognizer) it2.next(), "data/datasets/ner/tud/tud2011_train.txt", "data/datasets/ner/tud/tud2011_test.txt", "=-DOCSTART-\tO", 1, 61, 5);
        }
        System.exit(0);
    }
}
