package ws.palladian.extraction.entity.tagger;

import java.io.IOException;
import java.text.NumberFormat;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.function.Function;
import java.util.function.Predicate;
import org.apache.commons.lang3.Validate;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import ws.palladian.classification.text.DictionaryBuilder;
import ws.palladian.classification.text.DictionaryModel;
import ws.palladian.classification.text.DictionaryTrieModel;
import ws.palladian.classification.text.ExperimentalScorers;
import ws.palladian.classification.text.PalladianTextClassifier;
import ws.palladian.classification.text.PruningStrategies;
import ws.palladian.core.Annotation;
import ws.palladian.core.AnnotationFilters;
import ws.palladian.core.CategoryEntries;
import ws.palladian.core.CategoryEntriesBuilder;
import ws.palladian.core.ClassifyingTagger;
import ws.palladian.core.ImmutableAnnotation;
import ws.palladian.core.Instance;
import ws.palladian.core.InstanceBuilder;
import ws.palladian.core.Tagger;
import ws.palladian.core.Token;
import ws.palladian.extraction.entity.Annotations;
import ws.palladian.extraction.entity.DateAndTimeTagger;
import ws.palladian.extraction.entity.FileFormatParser;
import ws.palladian.extraction.entity.RegExTagger;
import ws.palladian.extraction.entity.StringTagger;
import ws.palladian.extraction.entity.TaggingFormat;
import ws.palladian.extraction.entity.TrainableNamedEntityRecognizer;
import ws.palladian.extraction.entity.UrlTagger;
import ws.palladian.extraction.entity.evaluation.EvaluationResult;
import ws.palladian.extraction.entity.tagger.PalladianNerTrainingSettings;
import ws.palladian.extraction.location.ClassifiedAnnotation;
import ws.palladian.extraction.token.Tokenizer;
import ws.palladian.extraction.token.WordTokenizer;
import ws.palladian.helper.collection.Bag;
import ws.palladian.helper.collection.CollectionHelper;
import ws.palladian.helper.functional.Predicates;
import ws.palladian.helper.io.FileHelper;
import ws.palladian.helper.io.LineAction;
import ws.palladian.helper.math.MathHelper;
import ws.palladian.helper.nlp.StringHelper;

/* loaded from: input_file:ws/palladian/extraction/entity/tagger/PalladianNer.class */
public class PalladianNer extends TrainableNamedEntityRecognizer implements ClassifyingTagger {
    private static final Logger LOGGER = LoggerFactory.getLogger(PalladianNer.class);
    private static final String NO_ENTITY = "###NO_ENTITY###";
    private PalladianNerTrainingSettings trainingSettings;
    private PalladianNerModel model;
    private PalladianNerTaggingSettings taggingSettings;

    public PalladianNer(PalladianNerTrainingSettings palladianNerTrainingSettings) {
        Validate.notNull(palladianNerTrainingSettings, "trainingSettings must not be null", new Object[0]);
        this.trainingSettings = palladianNerTrainingSettings;
        this.taggingSettings = new PalladianNerTaggingSettings(palladianNerTrainingSettings.getLanguageMode(), palladianNerTrainingSettings.getTrainingMode());
    }

    public PalladianNer(String str) {
        loadModel(str);
    }

    @Override // ws.palladian.extraction.entity.TrainableNamedEntityRecognizer
    public String getModelFileEnding() {
        return "model.gz";
    }

    @Override // ws.palladian.extraction.entity.TrainableNamedEntityRecognizer
    public boolean setsModelFileEndingAutomatically() {
        return false;
    }

    @Override // ws.palladian.extraction.entity.TrainableNamedEntityRecognizer
    public boolean loadModel(String str) {
        Validate.notEmpty(str, "configModelFilePath must not be empty", new Object[0]);
        this.model = null;
        try {
            this.model = (PalladianNerModel) FileHelper.deserialize(str);
            this.taggingSettings = this.model.getTaggingSettings();
            LOGGER.info("Model {} successfully loaded", str);
            return true;
        } catch (IOException e) {
            throw new IllegalStateException("Error while loading model from \"" + str + "\".", e);
        }
    }

    private void saveModel(String str) {
        LOGGER.info(this.model.toString());
        try {
            FileHelper.serialize(this.model, str);
            LOGGER.info("Serialized Palladian NER to {}", str);
        } catch (IOException e) {
            throw new IllegalStateException("Error while serializing to \"" + str + "\".", e);
        }
    }

    Set<String> buildCaseDictionary(String str) {
        LOGGER.info("Building case dictionary");
        DictionaryBuilder createDictionaryBuilder = createDictionaryBuilder();
        Iterator<Token> iterateTokens = new WordTokenizer().iterateTokens(str);
        boolean z = true;
        while (iterateTokens.hasNext()) {
            String value = iterateTokens.next().getValue();
            if (z) {
                z = false;
            } else if (value.matches("[.?!]")) {
                z = true;
            } else {
                String trim = value.trim();
                if (trim.length() > 1) {
                    String caseSignature = StringHelper.getCaseSignature(trim);
                    if (caseSignature.toLowerCase().startsWith("a")) {
                        createDictionaryBuilder.addDocument(Collections.singleton(trim.toLowerCase()), caseSignature.substring(0, 1));
                    }
                }
            }
        }
        DictionaryModel dictionaryModel = (DictionaryModel) createDictionaryBuilder.create();
        HashSet hashSet = new HashSet();
        for (DictionaryModel.DictionaryEntry dictionaryEntry : dictionaryModel) {
            String term = dictionaryEntry.getTerm();
            if (dictionaryEntry.getCategoryEntries().getProbability("a") > 0.5d) {
                hashSet.add(term);
            }
        }
        return hashSet;
    }

    @Override // ws.palladian.extraction.entity.TrainableNamedEntityRecognizer
    public boolean train(String str, String str2) {
        train(str, Collections.emptyList(), str2);
        return true;
    }

    public void train(String str, List<Annotation> list, String str2) {
        LOGGER.info("Training with settings: {}", this.trainingSettings);
        if (this.trainingSettings.getLanguageMode() == PalladianNerTrainingSettings.LanguageMode.LanguageIndependent) {
            trainLanguageIndependent(str, list);
        } else {
            trainEnglish(str, list);
        }
        saveModel(str2);
    }

    public void setEntityDictionary(String str) {
        final DictionaryBuilder createDictionaryBuilder = createDictionaryBuilder();
        FileHelper.performActionOnEveryLine(str, new LineAction() { // from class: ws.palladian.extraction.entity.tagger.PalladianNer.1
            public void performAction(String str2, int i) {
                if (i == 0) {
                    PalladianNer.this.model.conceptLikelihoodOrder = CollectionHelper.newArrayList(str2.split("\\>"));
                } else {
                    String[] split = str2.split("###");
                    if (split.length == 2) {
                        createDictionaryBuilder.addDocument(Collections.singleton(split[1]), split[0]);
                    }
                }
            }
        });
        this.model.entityDictionary = (DictionaryModel) createDictionaryBuilder.create();
        LOGGER.info("Added {} entities to the dictionary", Integer.valueOf(this.model.entityDictionary.getNumTerms()));
    }

    private DictionaryBuilder createDictionaryBuilder() {
        DictionaryTrieModel.Builder builder = new DictionaryTrieModel.Builder();
        int i = 1;
        if (this.trainingSettings != null) {
            i = this.trainingSettings.getMinDictionaryCount();
        }
        if (i > 1) {
            builder.setPruningStrategy(new PruningStrategies.TermCountPruningStrategy(i));
        }
        return builder;
    }

    public void train(List<Annotation> list, String str) {
        this.model.entityDictionary = buildEntityDictionary(list);
        this.model.annotationDictionary = buildAnnotationDictionary(list);
        saveModel(str);
    }

    private DictionaryModel buildEntityDictionary(Iterable<Annotation> iterable) {
        LOGGER.info("Building entity dictionary");
        DictionaryBuilder createDictionaryBuilder = createDictionaryBuilder();
        for (Annotation annotation : iterable) {
            createDictionaryBuilder.addDocument(Collections.singleton(annotation.getValue()), annotation.getTag());
        }
        return (DictionaryModel) createDictionaryBuilder.create();
    }

    private DictionaryModel buildAnnotationDictionary(Iterable<Annotation> iterable) {
        LOGGER.info("Building annotation dictionary");
        return new PalladianTextClassifier(PalladianNerTrainingSettings.ANNOTATION_FEATURE_SETTING, createDictionaryBuilder()).train(CollectionHelper.convert(iterable, new Function<Annotation, Instance>() { // from class: ws.palladian.extraction.entity.tagger.PalladianNer.2
            @Override // java.util.function.Function
            public Instance apply(Annotation annotation) {
                return new InstanceBuilder().setText(annotation.getValue()).create(annotation.getTag());
            }
        }));
    }

    private void trainLanguageIndependent(String str, List<Annotation> list) {
        String text = FileFormatParser.getText(str, TaggingFormat.COLUMN);
        Annotations<Annotation> annotationsFromColumnTokenBased = FileFormatParser.getAnnotationsFromColumnTokenBased(str);
        annotationsFromColumnTokenBased.addAll(list);
        Annotations<Annotation> annotationsFromColumn = FileFormatParser.getAnnotationsFromColumn(str);
        annotationsFromColumn.addAll(list);
        this.model = new PalladianNerModel();
        this.model.languageMode = PalladianNerTrainingSettings.LanguageMode.LanguageIndependent;
        this.model.trainingMode = this.trainingSettings.getTrainingMode();
        this.model.leftContexts = buildLeftContexts(text, annotationsFromColumn);
        this.model.contextDictionary = buildContextDictionary(text, annotationsFromColumn);
        this.model.entityDictionary = buildEntityDictionary(annotationsFromColumn);
        this.model.annotationDictionary = buildAnnotationDictionary(annotationsFromColumnTokenBased);
    }

    private void trainEnglish(String str, List<Annotation> list) {
        String text = FileFormatParser.getText(str, TaggingFormat.COLUMN);
        Annotations<Annotation> annotationsFromColumn = FileFormatParser.getAnnotationsFromColumn(str);
        this.model = new PalladianNerModel();
        this.model.languageMode = PalladianNerTrainingSettings.LanguageMode.English;
        this.model.trainingMode = this.trainingSettings.getTrainingMode();
        this.model.lowerCaseDictionary = buildCaseDictionary(text);
        if (this.trainingSettings.isEqualizeTypeCounts()) {
            Bag create = Bag.create(CollectionHelper.convert(annotationsFromColumn, Annotation.TAG_CONVERTER));
            int intValue = ((Integer) create.getMin().getValue()).intValue();
            Annotations<Annotation> annotations = new Annotations<>();
            Iterator it = create.uniqueItems().iterator();
            while (it.hasNext()) {
                annotations.addAll(new HashSet(MathHelper.sample(CollectionHelper.filter(annotationsFromColumn, AnnotationFilters.tag((String) it.next())), intValue)));
            }
            LOGGER.info("Original distribution {}; reduced from {} to {} for equalization", new Object[]{create, Integer.valueOf(annotationsFromColumn.size()), Integer.valueOf(annotations.size())});
            annotationsFromColumn = annotations;
        }
        this.model.leftContexts = buildLeftContexts(text, annotationsFromColumn);
        this.model.contextDictionary = buildContextDictionary(text, annotationsFromColumn);
        Annotations annotations2 = new Annotations(annotationsFromColumn);
        if (list.size() > 0) {
            annotations2.addAll(list);
            LOGGER.info("Add {} additional training annotations", Integer.valueOf(list.size()));
        }
        this.model.entityDictionary = buildEntityDictionary(annotations2);
        this.model.annotationDictionary = buildAnnotationDictionary(annotations2);
        if (this.trainingSettings.getTrainingMode() == PalladianNerTrainingSettings.TrainingMode.Complete) {
            LOGGER.info("Start retraining (because of complete dataset, no sparse annotations)");
            this.model.removeAnnotations = new HashSet();
            EvaluationResult evaluate = evaluate(str, TaggingFormat.COLUMN);
            Set convertSet = CollectionHelper.convertSet(annotationsFromColumn, Token.VALUE_CONVERTER);
            for (Annotation annotation : evaluate.getAnnotations(EvaluationResult.ResultType.ERROR1)) {
                String value = annotation.getValue();
                annotations2.add(new ImmutableAnnotation(annotation.getStartPosition(), value, NO_ENTITY));
                if (!convertSet.contains(value)) {
                    this.model.removeAnnotations.add(value.toLowerCase());
                }
            }
            LOGGER.info("{} annotations need to be completely removed", Integer.valueOf(this.model.removeAnnotations.size()));
            this.model.annotationDictionary = buildAnnotationDictionary(annotations2);
        }
    }

    private Annotations<ClassifiedAnnotation> classifyCandidates(Collection<Annotation> collection) {
        PalladianTextClassifier palladianTextClassifier = new PalladianTextClassifier(this.model.annotationDictionary.getFeatureSetting());
        Annotations<ClassifiedAnnotation> annotations = new Annotations<>();
        for (Annotation annotation : collection) {
            CategoryEntries classify = palladianTextClassifier.classify(annotation.getValue(), this.model.annotationDictionary);
            if (classify.getProbability(NO_ENTITY) < 0.5d) {
                annotations.add(new ClassifiedAnnotation(annotation, classify));
            }
        }
        return annotations;
    }

    @Override // ws.palladian.extraction.entity.NamedEntityRecognizer, ws.palladian.core.Tagger
    public List<ClassifiedAnnotation> getAnnotations(String str) {
        Annotations<ClassifiedAnnotation> annotationsInternal = getAnnotationsInternal(str);
        if (this.taggingSettings.isTagUrls()) {
            annotationsInternal.addAll(getAnnotations(UrlTagger.INSTANCE, str));
        }
        if (this.taggingSettings.isTagDates()) {
            annotationsInternal.addAll(getAnnotations(DateAndTimeTagger.DEFAULT, str));
        }
        annotationsInternal.removeNested();
        return annotationsInternal;
    }

    private static List<ClassifiedAnnotation> getAnnotations(Tagger tagger, String str) {
        ArrayList arrayList = new ArrayList();
        for (Annotation annotation : tagger.getAnnotations(str)) {
            arrayList.add(new ClassifiedAnnotation(annotation, new CategoryEntriesBuilder().set(annotation.getTag(), 1.0d).m76create()));
        }
        return arrayList;
    }

    private Annotations<ClassifiedAnnotation> postProcessAnnotations(String str, Annotations<ClassifiedAnnotation> annotations) {
        LOGGER.debug("Start post processing annotations");
        NumberFormat numberInstance = NumberFormat.getNumberInstance(Locale.US);
        if (this.taggingSettings.isSwitchTagAnnotationsUsingContext() && this.model.contextDictionary != null) {
            Annotations<ClassifiedAnnotation> annotations2 = new Annotations<>();
            int i = 0;
            Iterator<T> it = annotations.iterator();
            while (it.hasNext()) {
                ClassifiedAnnotation classifiedAnnotation = (ClassifiedAnnotation) it.next();
                ClassifiedAnnotation applyContextAnalysis = applyContextAnalysis(classifiedAnnotation, str);
                if (!applyContextAnalysis.sameTag(classifiedAnnotation)) {
                    LOGGER.debug("Changed {} from {} to {}, context: {}", new Object[]{classifiedAnnotation.getValue(), classifiedAnnotation.getTag(), applyContextAnalysis.getTag(), NerHelper.getCharacterContext(classifiedAnnotation, str, 40)});
                    i++;
                }
                annotations2.add(applyContextAnalysis);
            }
            LOGGER.debug("Changed {} % using patterns", numberInstance.format(i > 0 ? (100.0d * i) / annotations.size() : 0.0d));
            annotations = annotations2;
        }
        if (this.taggingSettings.isSwitchTagAnnotationsUsingDictionary()) {
            Annotations<ClassifiedAnnotation> annotations3 = new Annotations<>();
            int i2 = 0;
            Iterator<T> it2 = annotations.iterator();
            while (it2.hasNext()) {
                ClassifiedAnnotation classifiedAnnotation2 = (ClassifiedAnnotation) it2.next();
                CategoryEntries categoryEntries = this.model.entityDictionary.getCategoryEntries(classifiedAnnotation2.getValue());
                if (categoryEntries.size() > 0) {
                    if (this.model.conceptLikelihoodOrder != null) {
                        Iterator<String> it3 = this.model.conceptLikelihoodOrder.iterator();
                        while (true) {
                            if (!it3.hasNext()) {
                                break;
                            }
                            String next = it3.next();
                            if (categoryEntries.getProbability(next) > 0.0d) {
                                categoryEntries = new CategoryEntriesBuilder().set(next, 1.0d).m76create();
                                break;
                            }
                        }
                    }
                    if (!classifiedAnnotation2.getTag().equals(categoryEntries.getMostLikelyCategory())) {
                        LOGGER.debug("Changed {} from {} to {} with dictionary", new Object[]{classifiedAnnotation2.getValue(), classifiedAnnotation2.getTag(), categoryEntries.getMostLikelyCategory()});
                        i2++;
                    }
                    classifiedAnnotation2 = new ClassifiedAnnotation(classifiedAnnotation2, categoryEntries);
                }
                annotations3.add(classifiedAnnotation2);
            }
            LOGGER.debug("Changed {} % using entity dictionary", numberInstance.format(i2 > 0 ? (100.0d * i2) / annotations.size() : 0.0d));
            annotations = annotations3;
        }
        return annotations;
    }

    private Annotations<ClassifiedAnnotation> getAnnotationsInternal(String str) {
        HashSet hashSet = new HashSet((this.model.languageMode == PalladianNerTrainingSettings.LanguageMode.LanguageIndependent ? new RegExTagger(Tokenizer.TOKEN_SPLIT_REGEX, StringTagger.CANDIDATE_TAG) : StringTagger.INSTANCE).getAnnotations(str));
        preProcessAnnotations(hashSet);
        Annotations<ClassifiedAnnotation> postProcessAnnotations = postProcessAnnotations(str, classifyCandidates(hashSet));
        CollectionHelper.remove(postProcessAnnotations, Predicates.not(AnnotationFilters.tag(NO_ENTITY)));
        if (this.model.languageMode == PalladianNerTrainingSettings.LanguageMode.LanguageIndependent) {
            postProcessAnnotations = NerHelper.combineAnnotations(postProcessAnnotations);
        }
        return postProcessAnnotations;
    }

    private void preProcessAnnotations(Set<Annotation> set) {
        LOGGER.debug("Start pre processing annotations");
        if (this.taggingSettings.isRemoveIncorrectlyTaggedInTraining()) {
            removeIncorrectlyTaggedInTraining(set);
        }
        if (this.taggingSettings.isUnwrapEntities()) {
            NerHelper.unwrapEntities(set, this.model);
        }
        if (this.taggingSettings.isUnwrapEntitiesWithContext() && this.model.leftContexts != null) {
            unwrapWithContext(set);
        }
        if (this.taggingSettings.isRemoveDateFragments()) {
            NerHelper.removeDateFragments(set);
        }
        if (this.taggingSettings.isFixStartErrorsCaseDictionary() && this.model.lowerCaseDictionary != null) {
            fixStartErrorsWithCaseDictionary(set);
        }
        if (this.taggingSettings.isRemoveSentenceStartErrorsCaseDictionary() && this.model.lowerCaseDictionary != null) {
            removeSentenceStartErrors(set);
        }
        if (this.taggingSettings.isRemoveDates()) {
            NerHelper.removeDates(set);
        }
    }

    private void fixStartErrorsWithCaseDictionary(Set<Annotation> set) {
        HashSet hashSet = new HashSet();
        HashSet hashSet2 = new HashSet();
        for (Annotation annotation : set) {
            String value = annotation.getValue();
            String[] split = value.split("\\s");
            if (split.length != 1) {
                int i = 0;
                String str = value;
                int length = split.length;
                int i2 = 0;
                while (true) {
                    if (i2 >= length) {
                        break;
                    }
                    String str2 = split[i2];
                    if (this.model.entityDictionaryContains(str)) {
                        LOGGER.trace("'{}' is in entity dictionary, stop correcting", str);
                        break;
                    }
                    if (!this.model.lowerCaseDictionary.contains(str2.toLowerCase())) {
                        LOGGER.trace("Stop correcting '{}' at '{}' because of lc/uc ratio of {}", new Object[]{value, str, Boolean.valueOf(this.model.lowerCaseDictionary.contains(str2.toLowerCase()))});
                        break;
                    }
                    i += str2.length() + 1;
                    if (i >= value.length()) {
                        break;
                    }
                    str = value.substring(i);
                    i2++;
                }
                if (i >= value.length()) {
                    LOGGER.debug("Drop '{}' completely because of lc/uc ratio", value);
                    hashSet2.add(annotation);
                } else if (i > 0) {
                    LOGGER.debug("Correct '{}' to '{}' because of lc/uc ratios", value, str);
                    int startPosition = annotation.getStartPosition() + i;
                    hashSet2.add(annotation);
                    hashSet.add(new ImmutableAnnotation(startPosition, str));
                }
            }
        }
        LOGGER.debug("Adding {}, removing {} through case dictionary unwrapping", Integer.valueOf(hashSet.size()), Integer.valueOf(hashSet2.size()));
        set.removeAll(hashSet2);
        set.addAll(hashSet);
    }

    private void unwrapWithContext(Set<Annotation> set) {
        HashSet hashSet = new HashSet();
        HashSet hashSet2 = new HashSet();
        for (Annotation annotation : set) {
            String value = annotation.getValue();
            if (this.model.entityDictionary.getCategoryEntries(value).getTotalCount() <= 0) {
                Iterator<String> it = this.model.leftContexts.iterator();
                while (true) {
                    if (it.hasNext()) {
                        String next = it.next();
                        int indexOf = value.indexOf(next + " ");
                        int indexOf2 = value.indexOf(" " + next + " ");
                        int i = -1;
                        int i2 = -1;
                        if (indexOf == 0) {
                            i = next.length() + 1;
                            i2 = indexOf;
                        } else if (indexOf2 > -1) {
                            i = next.length() + 2;
                            i2 = indexOf2;
                        }
                        if (i2 != -1) {
                            int startPosition = annotation.getStartPosition() + i2 + i;
                            String substring = annotation.getValue().substring(i2 + i);
                            hashSet.add(new ImmutableAnnotation(startPosition, substring, annotation.getTag()));
                            String substring2 = annotation.getValue().substring(0, i2 + i);
                            for (String str : StringHelper.getSubPhrases(substring2)) {
                                if (this.model.entityDictionaryContains(str)) {
                                    hashSet.add(new ImmutableAnnotation(annotation.getStartPosition() + substring2.indexOf(str), str));
                                    LOGGER.debug("Add from prefix {}", str);
                                }
                            }
                            hashSet2.add(annotation);
                            LOGGER.debug("Add {}, delete {} (left context: {})", new Object[]{substring, annotation.getValue(), next});
                        }
                    }
                }
            }
        }
        set.addAll(hashSet);
        set.removeAll(hashSet2);
    }

    private void removeSentenceStartErrors(Set<Annotation> set) {
        LOGGER.debug("Removed {} words using case dictionary", Integer.valueOf(CollectionHelper.remove(set, new Predicate<Annotation>() { // from class: ws.palladian.extraction.entity.tagger.PalladianNer.3
            @Override // java.util.function.Predicate
            public boolean test(Annotation annotation) {
                if (annotation.getValue().contains(" ") || !PalladianNer.this.model.lowerCaseDictionary.contains(annotation.getValue().toLowerCase())) {
                    return true;
                }
                PalladianNer.LOGGER.debug("Remove by case signature: {}", annotation.getValue());
                return false;
            }
        })));
    }

    private void removeIncorrectlyTaggedInTraining(Set<Annotation> set) {
        LOGGER.debug("Removed {} incorrectly tagged entities in training data", Integer.valueOf(CollectionHelper.remove(set, new Predicate<Annotation>() { // from class: ws.palladian.extraction.entity.tagger.PalladianNer.4
            @Override // java.util.function.Predicate
            public boolean test(Annotation annotation) {
                return !PalladianNer.this.model.removeAnnotations.contains(annotation.getValue().toLowerCase());
            }
        })));
    }

    private ClassifiedAnnotation applyContextAnalysis(ClassifiedAnnotation classifiedAnnotation, String str) {
        CategoryEntriesBuilder categoryEntriesBuilder = new CategoryEntriesBuilder();
        categoryEntriesBuilder.add(classifiedAnnotation.getCategoryEntries());
        PalladianTextClassifier palladianTextClassifier = new PalladianTextClassifier(this.model.contextDictionary.getFeatureSetting(), new ExperimentalScorers.CategoryEqualizationScorer());
        String characterContext = NerHelper.getCharacterContext(classifiedAnnotation, str, 40);
        if (characterContext.trim().length() > 2) {
            categoryEntriesBuilder.add(palladianTextClassifier.classify(characterContext, this.model.contextDictionary));
        }
        return new ClassifiedAnnotation(classifiedAnnotation, categoryEntriesBuilder.m76create());
    }

    private Set<String> buildLeftContexts(String str, Annotations<Annotation> annotations) {
        LOGGER.info("Building left contexts");
        Bag create = Bag.create();
        Bag create2 = Bag.create();
        Iterator<T> it = annotations.iterator();
        while (it.hasNext()) {
            Annotation annotation = (Annotation) it.next();
            create.addAll(NerHelper.getLeftContexts(annotation, str, 3));
            String[] split = annotation.getValue().split("\\s");
            StringBuilder sb = new StringBuilder();
            for (int i = 0; i < split.length; i++) {
                if (i > 0) {
                    sb.append(' ');
                }
                sb.append(split[i]);
                create2.add(sb.toString());
            }
        }
        HashSet hashSet = new HashSet();
        int minDictionaryCount = this.trainingSettings.getMinDictionaryCount();
        for (Map.Entry entry : create.unique()) {
            String str2 = (String) entry.getKey();
            if (StringHelper.startsUppercase(str2)) {
                int intValue = ((Integer) entry.getValue()).intValue();
                int count = create2.count(str2);
                if (intValue + count >= minDictionaryCount && count / intValue < 1.0d && intValue >= 2) {
                    hashSet.add(str2);
                }
            }
        }
        return hashSet;
    }

    private DictionaryModel buildContextDictionary(final String str, Iterable<Annotation> iterable) {
        LOGGER.info("Building context dictionary");
        return new PalladianTextClassifier(PalladianNerTrainingSettings.CONTEXT_FEATURE_SETTING, createDictionaryBuilder()).train(CollectionHelper.convert(iterable, new Function<Annotation, Instance>() { // from class: ws.palladian.extraction.entity.tagger.PalladianNer.5
            @Override // java.util.function.Function
            public Instance apply(Annotation annotation) {
                return new InstanceBuilder().setText(NerHelper.getCharacterContext(annotation, str, 40)).create(annotation.getTag());
            }
        }));
    }

    public PalladianNerModel getModel() {
        return this.model;
    }

    @Override // ws.palladian.extraction.entity.NamedEntityRecognizer
    public String getName() {
        return "Palladian NER";
    }

    public PalladianNerTaggingSettings getTaggingSettings() {
        return this.taggingSettings;
    }
}
