package pl.edu.icm.yadda.categorization.corpus.data.fs;

import java.io.File;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.Map;
import org.apache.commons.configuration.tree.DefaultExpressionEngine;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import pl.edu.icm.yadda.categorization.corpus.data.CategorySet;
import pl.edu.icm.yadda.categorization.corpus.data.CorpusImportSet;
import pl.edu.icm.yadda.categorization.errors.CategorizationException;
import pl.edu.icm.yadda.service2.categorization.SimcatModelUtils;

/* loaded from: input_file:WEB-INF/lib/yadda-simcat-1.7.2-SNAPSHOT.jar:pl/edu/icm/yadda/categorization/corpus/data/fs/CorpusImportSetFSImpl.class */
public class CorpusImportSetFSImpl implements CorpusImportSet {
    private static final Logger log = LoggerFactory.getLogger(CorpusImportSetFSImpl.class);
    private File corpusSetDir;
    private String name;
    private Map<String, CategorySet> languageSets = new HashMap();

    public CorpusImportSetFSImpl() {
    }

    public CorpusImportSetFSImpl(File file) throws CategorizationException {
        setCorpusSetDir(file);
    }

    private void init() throws CategorizationException {
        if (this.corpusSetDir == null) {
            throw new CategorizationException("Test directory not set");
        }
        this.name = this.corpusSetDir.getName();
        this.languageSets.clear();
        for (File file : FileIterator.listDirs(this.corpusSetDir)) {
            String normalize = SimcatModelUtils.normalize(file.getName());
            this.languageSets.put(normalize, new CategorySetFSImpl(this.name, normalize, file));
        }
        log.info("Corpus import set [" + this.name + "] initialized (number of language sets: " + this.languageSets.size() + DefaultExpressionEngine.DEFAULT_INDEX_END);
    }

    public File getCorpusSetDir() {
        return this.corpusSetDir;
    }

    public void setCorpusSetDir(File file) throws CategorizationException {
        this.corpusSetDir = file;
        init();
    }

    @Override // pl.edu.icm.yadda.categorization.corpus.data.CorpusImportSet
    public Collection<CategorySet> getLanguageCategoriesSets() {
        return new ArrayList(this.languageSets.values());
    }

    @Override // pl.edu.icm.yadda.categorization.corpus.data.CorpusImportSet
    public String getName() {
        return this.name;
    }

    @Override // pl.edu.icm.yadda.categorization.corpus.data.CorpusImportSet
    public CategorySet getLanguageCategoriesSet(String str) {
        return this.languageSets.get(SimcatModelUtils.normalize(str));
    }

    public static String printHelp() {
        return "Corpus import set directory structure (test dirs are optional):\n+--corpusSetDirectory\n..+--language1\n....+--category1\n......+--train\n........files containing text of corpus documents\n......+--test\n........files containing text of test documents\n....+--category2\n....+--category3\n.......\n..+--language2\n..+--language3\n.......";
    }
}
