package ws.palladian.extraction.entity.tagger;

import edu.stanford.nlp.ie.AbstractSequenceClassifier;
import edu.stanford.nlp.ie.crf.CRFClassifier;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.util.StringUtils;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import ws.palladian.core.Annotation;
import ws.palladian.extraction.entity.Annotations;
import ws.palladian.extraction.entity.FileFormatParser;
import ws.palladian.extraction.entity.TaggingFormat;
import ws.palladian.extraction.entity.TrainableNamedEntityRecognizer;
import ws.palladian.extraction.entity.evaluation.EvaluationResult;
import ws.palladian.helper.StopWatch;
import ws.palladian.helper.io.FileHelper;

/* loaded from: input_file:ws/palladian/extraction/entity/tagger/StanfordNer.class */
public class StanfordNer extends TrainableNamedEntityRecognizer {
    private static final Logger LOGGER = LoggerFactory.getLogger(StanfordNer.class);
    private AbstractSequenceClassifier<CoreLabel> classifier;

    private static String buildConfigFile(String str, String str2) {
        return (((((((((((((((((((((((((((((("#location of the training file\n") + "trainFile = " + str + "\n") + "#location where you would like to save (serialize to) your\n") + "#classifier; adding .gz at the end automatically gzips the file,\n") + "#making it faster and smaller\n") + "serializeTo = " + str2 + "\n") + "#structure of your training file; this tells the classifier\n") + "#that the word is in column 0 and the correct answer is in\n") + "#column 1\n") + "map = word=0,answer=1\n") + "#these are the features we'd like to train with\n") + "#some are discussed below, the rest can be\n") + "#understood by looking at NERFeatureFactory\n") + "useClassFeature=true\n") + "useWord=true\n") + "useNGrams=true\n") + "#no ngrams will be included that do not contain either the\n") + "#beginning or end of the word\n") + "noMidNGrams=true\n") + "useDisjunctive=true\n") + "maxNGramLeng=6\n") + "usePrev=true\n") + "useNext=true\n") + "useSequences=true\n") + "usePrevSequences=true\n") + "maxLeft=1\n") + "#the next 4 deal with word shape features\n") + "useTypeSeqs=true\n") + "useTypeSeqs2=true\n") + "useTypeySequences=true\n") + "wordShape=chris2useLC";
    }

    public String getModelFileEnding() {
        return "ser.gz";
    }

    public boolean setsModelFileEndingAutomatically() {
        return true;
    }

    public boolean train(String str, String str2) {
        String path = FileHelper.getTempFile().getPath();
        FileFormatParser.removeWhiteSpaceInFirstColumn(str, path, "_");
        String buildConfigFile = buildConfigFile(path, str2);
        String path2 = FileHelper.getTempFile().getPath();
        FileHelper.writeToFile(path2, buildConfigFile);
        CRFClassifier cRFClassifier = new CRFClassifier(StringUtils.argsToProperties(new String[]{"-props", path2}));
        cRFClassifier.train();
        cRFClassifier.serializeClassifier(cRFClassifier.flags.serializeTo);
        return true;
    }

    public boolean loadModel(String str) {
        StopWatch stopWatch = new StopWatch();
        try {
            this.classifier = CRFClassifier.getClassifierNoExceptions(str);
            LOGGER.debug("Model {} successfully loaded in {}", str, stopWatch);
            return true;
        } catch (Exception e) {
            LOGGER.error("Exception when loading model from {}", str, e);
            return false;
        }
    }

    public List<Annotation> getAnnotations(String str) {
        Annotations annotationsFromXmlText = FileFormatParser.getAnnotationsFromXmlText(this.classifier.classifyWithInlineXML(str));
        annotationsFromXmlText.removeNested();
        return new ArrayList((Collection) annotationsFromXmlText);
    }

    public String getName() {
        return "Stanford NER";
    }

    public static void main(String[] strArr) throws Exception {
        StanfordNer stanfordNer = new StanfordNer();
        stanfordNer.train("data/datasets/ner/tud/tud2011_train.txt", "data/temp/stanfordNER2.model");
        stanfordNer.loadModel("data/temp/stanfordNER2.model");
        EvaluationResult evaluate = stanfordNer.evaluate("data/datasets/ner/tud/tud2011_test.txt", TaggingFormat.COLUMN);
        System.out.println(evaluate.getMUCResultsReadable());
        System.out.println(evaluate.getExactMatchResultsReadable());
    }
}
