package pl.edu.icm.yadda.categorization.classifier.impl;

import java.util.Date;
import java.util.HashSet;
import java.util.Iterator;
import org.apache.commons.configuration.tree.DefaultExpressionEngine;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import pl.edu.icm.yadda.categorization.classifier.CategoryClassifier;
import pl.edu.icm.yadda.categorization.classifier.ClassifierFactory;
import pl.edu.icm.yadda.categorization.corpus.CategoryHistory;
import pl.edu.icm.yadda.categorization.corpus.Corpus;
import pl.edu.icm.yadda.categorization.corpus.CorpusChangedEvent;
import pl.edu.icm.yadda.categorization.corpus.CorpusHistory;
import pl.edu.icm.yadda.categorization.errors.CategorizationException;
import pl.edu.icm.yadda.common.utils.Utils;
import pl.edu.icm.yadda.service2.categorization.CDocument;
import pl.edu.icm.yadda.service2.categorization.CategoriesInfo;
import pl.edu.icm.yadda.service2.categorization.CategorizationResult;
import pl.edu.icm.yadda.service2.categorization.SimcatModelUtils;
import pl.edu.icm.yadda.tools.textcat.LanguageIdentifierBean;

/* loaded from: input_file:WEB-INF/lib/yadda-simcat-1.10.3.jar:pl/edu/icm/yadda/categorization/classifier/impl/GenericClassifier.class */
public class GenericClassifier implements CategoryClassifier {
    private static final Logger log = LoggerFactory.getLogger(GenericClassifier.class);
    private LanguageIdentifierBean langIdentifier;
    private ClassifierFactory classifierFactory;
    private ClassifierProperties properties;
    private Corpus corpus;

    public GenericClassifier() throws CategorizationException {
        try {
            this.langIdentifier = new LanguageIdentifierBean();
        } catch (Exception e) {
            throw new CategorizationException("Error occurred while creating generic classifier", e);
        }
    }

    @Override // pl.edu.icm.yadda.categorization.classifier.CategoryClassifier
    public CategorizationResult categorize(CDocument cDocument) throws CategorizationException {
        String language = cDocument.getLanguage();
        if (Utils.emptyStr(language)) {
            language = classifyLanguage(cDocument.getText());
            if (language == null) {
                log.warn("Language of document could not be recognized (doc id:" + cDocument.getId() + DefaultExpressionEngine.DEFAULT_INDEX_END);
                return CategorizationResult.createNonScoreResult(cDocument.getId(), CategorizationResult.Status.LANGUAGE_NOT_RECOGNIZED);
            }
        }
        String normalize = SimcatModelUtils.normalize(language);
        CategoryClassifier classifier = getClassifier(normalize, false);
        if (classifier != null) {
            return classifier.categorize(cDocument);
        }
        log.warn("Category classifier for language [" + normalize + "] does not exist (docid:" + cDocument.getId() + DefaultExpressionEngine.DEFAULT_INDEX_END);
        return CategorizationResult.createNonScoreResult(cDocument.getId(), CategorizationResult.Status.EMPTY_LANGUAGE_CORPUS);
    }

    @Override // pl.edu.icm.yadda.categorization.corpus.CorpusEventListener
    public void corpusChanged(CorpusChangedEvent corpusChangedEvent) {
        try {
            doCorpusChanged(corpusChangedEvent);
            log.debug("Generic classifier processed corpus changed event");
        } catch (CategorizationException e) {
            log.error("Error occured while processing corpus changes in generic classifier", (Throwable) e);
        }
    }

    public void init() throws CategorizationException {
        Date date = this.properties.getDate();
        Date date2 = new Date();
        String corpusBackendId = this.properties.getCorpusBackendId();
        String storageBackendId = this.corpus.getStorageBackendId();
        if (corpusBackendId == null) {
            this.properties.setCorpusBackendId(storageBackendId);
        } else if (!corpusBackendId.equals(storageBackendId)) {
            log.info("Corpus backend has changed (old=" + corpusBackendId + ", new=" + storageBackendId + DefaultExpressionEngine.DEFAULT_INDEX_END);
            date = null;
            this.classifierFactory.dropAllClassifiers();
            this.properties.setCorpusBackendId(storageBackendId);
        }
        CorpusChangedEvent corpusChangedEvent = new CorpusChangedEvent();
        corpusChangedEvent.setCorpus(this.corpus);
        corpusChangedEvent.setStartDate(date);
        corpusChangedEvent.setEndDate(date2);
        doCorpusChanged(corpusChangedEvent);
    }

    private void doCorpusChanged(CorpusChangedEvent corpusChangedEvent) throws CategorizationException {
        CorpusHistory history = this.corpus.getHistory(this.properties.getDate(), corpusChangedEvent.getEndDate(), null);
        HashSet hashSet = new HashSet();
        Iterator<CategoryHistory> it = history.getHistory().iterator();
        while (it.hasNext()) {
            hashSet.add(it.next().getLanguage());
        }
        Iterator it2 = hashSet.iterator();
        while (it2.hasNext()) {
            getClassifier((String) it2.next(), true);
        }
        for (CategoryClassifier categoryClassifier : this.classifierFactory.getClassifiers()) {
            categoryClassifier.corpusChanged(corpusChangedEvent);
            if (categoryClassifier.isEmpty()) {
                this.classifierFactory.dropClassifier(categoryClassifier);
            }
        }
        this.properties.setDate(corpusChangedEvent.getEndDate());
    }

    private String classifyLanguage(String str) {
        String classify = this.langIdentifier.classify(str);
        if ("**".equals(classify)) {
            return null;
        }
        return classify;
    }

    private CategoryClassifier getClassifier(String str, boolean z) throws CategorizationException {
        return this.classifierFactory.getClassifier(SimcatModelUtils.normalize(str), z);
    }

    public void setClassifierFactory(ClassifierFactory classifierFactory) {
        this.classifierFactory = classifierFactory;
    }

    public void setClassifierProperties(ClassifierProperties classifierProperties) {
        this.properties = classifierProperties;
    }

    public void setCorpus(Corpus corpus) {
        this.corpus = corpus;
    }

    @Override // pl.edu.icm.yadda.categorization.classifier.CategoryClassifier
    public boolean isEmpty() throws CategorizationException {
        return false;
    }

    @Override // pl.edu.icm.yadda.categorization.classifier.CategoryClassifier
    public CategoriesInfo getCategoriesInfo() throws CategorizationException {
        CategoriesInfoImpl categoriesInfoImpl = new CategoriesInfoImpl();
        Iterator<CategoryClassifier> it = this.classifierFactory.getClassifiers().iterator();
        while (it.hasNext()) {
            categoriesInfoImpl.addInfo(it.next().getCategoriesInfo());
        }
        return categoriesInfoImpl;
    }
}
