package ws.palladian.extraction.entity.tagger;

import java.io.BufferedOutputStream;
import java.io.Closeable;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import opennlp.tools.namefind.NameFinderME;
import opennlp.tools.namefind.NameSample;
import opennlp.tools.namefind.TokenNameFinderModel;
import opennlp.tools.sentdetect.SentenceDetector;
import opennlp.tools.tokenize.Tokenizer;
import opennlp.tools.util.CollectionObjectStream;
import opennlp.tools.util.InvalidFormatException;
import opennlp.tools.util.Span;
import opennlp.tools.util.TrainingParameters;
import opennlp.tools.util.featuregen.AdaptiveFeatureGenerator;
import org.apache.commons.lang3.Validate;
import org.apache.commons.lang3.tuple.Pair;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import ws.palladian.core.Annotation;
import ws.palladian.core.AnnotationFilters;
import ws.palladian.core.CategoryEntriesBuilder;
import ws.palladian.core.ClassifyingTagger;
import ws.palladian.extraction.entity.Annotations;
import ws.palladian.extraction.entity.FileFormatParser;
import ws.palladian.extraction.entity.TaggingFormat;
import ws.palladian.extraction.entity.TrainableNamedEntityRecognizer;
import ws.palladian.extraction.location.ClassifiedAnnotation;
import ws.palladian.helper.collection.CollectionHelper;
import ws.palladian.helper.collection.LazyMap;
import ws.palladian.helper.functional.Factory;
import ws.palladian.helper.functional.Filters;
import ws.palladian.helper.io.FileHelper;

/* loaded from: input_file:ws/palladian/extraction/entity/tagger/OpenNlpNer.class */
public class OpenNlpNer extends TrainableNamedEntityRecognizer implements ClassifyingTagger {
    private static final Logger LOGGER = LoggerFactory.getLogger(OpenNlpNer.class);
    private static final TrainingParameters TRAIN_PARAMS = TrainingParameters.defaultParams();
    private static final AdaptiveFeatureGenerator FEATURE_GENERATOR = null;
    private static final Map<String, Object> RESOURCES = Collections.emptyMap();
    private final Tokenizer tokenizer;
    private final SentenceDetector sentenceDetector;
    private List<TokenNameFinderModel> nameFinderModels;

    public OpenNlpNer(Tokenizer tokenizer, SentenceDetector sentenceDetector) {
        Validate.notNull(tokenizer, "tokenizer must not be null", new Object[0]);
        Validate.notNull(sentenceDetector, "sentenceDetector must not be null", new Object[0]);
        this.tokenizer = tokenizer;
        this.sentenceDetector = sentenceDetector;
    }

    public boolean loadModel(String str) {
        Validate.notNull(str, "configModelFilePath must not be null", new Object[0]);
        Validate.isTrue(new File(str).isDirectory(), "Model file path must be an existing directory.", new Object[0]);
        List<File> files = FileHelper.getFiles(new File(str), Filters.fileExtension(new String[]{".bin"}), Filters.NONE);
        Validate.isTrue(files.size() > 0, "Model file path must at least provide one .bin model.", new Object[0]);
        this.nameFinderModels = new ArrayList();
        for (File file : files) {
            LOGGER.info("Loading {}", file);
            try {
                this.nameFinderModels.add(new TokenNameFinderModel(file));
            } catch (InvalidFormatException e) {
                throw new IllegalStateException("InvalidFormatException when trying to load " + file);
            } catch (IOException e2) {
                throw new IllegalStateException("IOException when trying to load " + file);
            }
        }
        LOGGER.info("{} models successfully loaded", Integer.valueOf(files.size()));
        return true;
    }

    public List<ClassifiedAnnotation> getAnnotations(String str) {
        if (this.nameFinderModels == null || this.nameFinderModels.isEmpty()) {
            throw new IllegalStateException("No model available; make sure to load an existing model.");
        }
        LazyMap create = LazyMap.create(new Factory<CategoryEntriesBuilder>() { // from class: ws.palladian.extraction.entity.tagger.OpenNlpNer.1
            /* renamed from: create, reason: merged with bridge method [inline-methods] */
            public CategoryEntriesBuilder m19create() {
                return new CategoryEntriesBuilder();
            }
        });
        for (Span span : this.sentenceDetector.sentPosDetect(str)) {
            String charSequence = span.getCoveredText(str).toString();
            int start = span.getStart();
            Span[] spanArr = this.tokenizer.tokenizePos(charSequence);
            String[] spansToStrings = Span.spansToStrings(spanArr, charSequence);
            Iterator<TokenNameFinderModel> it = this.nameFinderModels.iterator();
            while (it.hasNext()) {
                NameFinderME nameFinderME = new NameFinderME(it.next());
                Span[] find = nameFinderME.find(spansToStrings);
                double[] probs = nameFinderME.probs(find);
                for (int i = 0; i < find.length; i++) {
                    Span span2 = find[i];
                    ((CategoryEntriesBuilder) create.get(Pair.of(Integer.valueOf(start + spanArr[span2.getStart()].getStart()), Integer.valueOf(start + spanArr[span2.getEnd() - 1].getEnd())))).add(span2.getType(), probs[i]);
                }
            }
        }
        Annotations annotations = new Annotations();
        for (Map.Entry entry : create.entrySet()) {
            int intValue = ((Integer) ((Pair) entry.getKey()).getLeft()).intValue();
            annotations.add(new ClassifiedAnnotation(intValue, str.substring(intValue, ((Integer) ((Pair) entry.getKey()).getRight()).intValue()), ((CategoryEntriesBuilder) entry.getValue()).create()));
        }
        annotations.sort();
        return annotations;
    }

    public String getModelFileEnding() {
        return "bin";
    }

    public boolean setsModelFileEndingAutomatically() {
        return false;
    }

    public boolean oneModelPerConcept() {
        return true;
    }

    public boolean train(String str, String str2) {
        File file = new File(str2);
        if (!file.isDirectory() && !file.mkdirs()) {
            throw new IllegalArgumentException("Directory " + str2 + " could not be created.");
        }
        Annotations annotationsFromColumn = FileFormatParser.getAnnotationsFromColumn(str);
        String text = FileFormatParser.getText(str, TaggingFormat.COLUMN);
        Set<String> convertSet = CollectionHelper.convertSet(annotationsFromColumn, Annotation.TAG_CONVERTER);
        LOGGER.info("Training for types: {}", convertSet);
        for (String str3 : convertSet) {
            LOGGER.debug("Training {}", str3);
            List filterList = CollectionHelper.filterList(annotationsFromColumn, AnnotationFilters.tag(str3));
            ArrayList arrayList = new ArrayList();
            for (Span span : this.sentenceDetector.sentPosDetect(text)) {
                String charSequence = span.getCoveredText(text).toString();
                int start = span.getStart();
                int end = span.getEnd();
                Span[] spanArr = this.tokenizer.tokenizePos(charSequence);
                String[] spansToStrings = Span.spansToStrings(spanArr, charSequence);
                List filterList2 = CollectionHelper.filterList(filterList, AnnotationFilters.range(start, end));
                if (filterList2.size() > 0) {
                    arrayList.add(new NameSample(spansToStrings, getSpans(start, filterList2, spanArr), true));
                }
            }
            CollectionObjectStream collectionObjectStream = new CollectionObjectStream(arrayList);
            BufferedOutputStream bufferedOutputStream = null;
            try {
                try {
                    TokenNameFinderModel train = NameFinderME.train("en", str3, collectionObjectStream, TRAIN_PARAMS, FEATURE_GENERATOR, RESOURCES);
                    bufferedOutputStream = new BufferedOutputStream(new FileOutputStream(new File(file, "openNLP_" + str3 + ".bin")));
                    train.serialize(bufferedOutputStream);
                    try {
                        collectionObjectStream.close();
                    } catch (IOException e) {
                    }
                    FileHelper.close(new Closeable[]{bufferedOutputStream});
                } catch (IOException e2) {
                    throw new IllegalStateException("IOException during training", e2);
                }
            } catch (Throwable th) {
                try {
                    collectionObjectStream.close();
                } catch (IOException e3) {
                }
                FileHelper.close(new Closeable[]{bufferedOutputStream});
                throw th;
            }
        }
        return true;
    }

    private static Span[] getSpans(int i, List<Annotation> list, Span[] spanArr) {
        ArrayList arrayList = new ArrayList();
        for (int i2 = 0; i2 < list.size(); i2++) {
            Annotation annotation = list.get(i2);
            int i3 = -1;
            int i4 = -1;
            for (int i5 = 0; i5 < spanArr.length; i5++) {
                if (i + spanArr[i5].getStart() == annotation.getStartPosition()) {
                    i3 = i5;
                }
                if (i + spanArr[i5].getEnd() == annotation.getEndPosition()) {
                    i4 = i5 + 1;
                }
            }
            if (i3 == -1 || i4 == -1) {
                LOGGER.warn("Could not properly align {} (start={}, end={})", new Object[]{annotation, Integer.valueOf(i3), Integer.valueOf(i4)});
            } else {
                arrayList.add(new Span(i3, i4, annotation.getTag()));
            }
        }
        return (Span[]) arrayList.toArray(new Span[arrayList.size()]);
    }

    public String getName() {
        return "OpenNLP NER";
    }
}
