package pl.edu.icm.yadda.categorization.corpus.data.fs;

import java.io.File;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import org.apache.commons.configuration.tree.DefaultExpressionEngine;
import org.apache.commons.io.FileUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import pl.edu.icm.yadda.categorization.corpus.data.CategorySet;
import pl.edu.icm.yadda.categorization.errors.CategorizationException;
import pl.edu.icm.yadda.service2.categorization.CDocument;
import pl.edu.icm.yadda.service2.categorization.CorpusDocument;
import pl.edu.icm.yadda.service2.categorization.SimcatModelUtils;

/* loaded from: input_file:WEB-INF/lib/yadda-simcat-1.10.1.jar:pl/edu/icm/yadda/categorization/corpus/data/fs/CategorySetFSImpl.class */
public class CategorySetFSImpl implements CategorySet {
    private static final Logger log = LoggerFactory.getLogger(CategorySetFSImpl.class);
    private static final String TRAIN_DIR = "train";
    private static final String TEST_DIR = "test";
    private String name;
    private String language;
    private File setDir;
    private Map<String, File> categoryDirs = new HashMap();

    /* loaded from: input_file:WEB-INF/lib/yadda-simcat-1.10.1.jar:pl/edu/icm/yadda/categorization/corpus/data/fs/CategorySetFSImpl$TestDocsIterator.class */
    class TestDocsIterator implements Iterator<CDocument> {
        private String language;
        private Iterator<File> fileIterator;

        TestDocsIterator(String str, Iterator<File> it) {
            this.language = str;
            this.fileIterator = it;
        }

        @Override // java.util.Iterator
        public boolean hasNext() {
            return this.fileIterator.hasNext();
        }

        /* JADX WARN: Can't rename method to resolve collision */
        @Override // java.util.Iterator
        public CDocument next() {
            try {
                File next = this.fileIterator.next();
                return new CDocument(CategorySetFSImpl.this.getDocumentId(next), this.language, FileUtils.readFileToString(next));
            } catch (Exception e) {
                throw new RuntimeException("Error occurred while iterating over test documents", e);
            }
        }

        @Override // java.util.Iterator
        public void remove() {
            throw new UnsupportedOperationException();
        }
    }

    /* loaded from: input_file:WEB-INF/lib/yadda-simcat-1.10.1.jar:pl/edu/icm/yadda/categorization/corpus/data/fs/CategorySetFSImpl$TrainDocsIterator.class */
    class TrainDocsIterator implements Iterator<CorpusDocument> {
        private String language;
        private String category;
        private Iterator<File> fileIterator;

        TrainDocsIterator(String str, String str2, Iterator<File> it) {
            this.language = str;
            this.category = str2;
            this.fileIterator = it;
        }

        @Override // java.util.Iterator
        public boolean hasNext() {
            return this.fileIterator.hasNext();
        }

        /* JADX WARN: Can't rename method to resolve collision */
        @Override // java.util.Iterator
        public CorpusDocument next() {
            try {
                File next = this.fileIterator.next();
                return new CorpusDocument(CategorySetFSImpl.this.getDocumentId(next), this.category, this.language, FileUtils.readFileToString(next));
            } catch (Exception e) {
                throw new RuntimeException("Error occurred while iterating over training corpus documents", e);
            }
        }

        @Override // java.util.Iterator
        public void remove() {
            throw new UnsupportedOperationException();
        }
    }

    public CategorySetFSImpl() {
    }

    public CategorySetFSImpl(String str, String str2, File file) throws CategorizationException {
        this.name = str;
        this.language = str2;
        setTestSetDir(file);
    }

    private void init() throws CategorizationException {
        if (this.setDir == null) {
            throw new CategorizationException("Test directory not set");
        }
        this.categoryDirs.clear();
        for (File file : FileIterator.listDirs(this.setDir)) {
            if (new File(file, TRAIN_DIR).exists()) {
                if (!new File(file, "test").exists()) {
                    log.debug("Directory [" + file + "] has no test directory");
                }
                this.categoryDirs.put(file.getName(), file);
            } else {
                log.warn("Directory [" + file + "] is not valid category directory (no " + TRAIN_DIR + " directory)");
            }
        }
    }

    @Override // pl.edu.icm.yadda.categorization.corpus.data.CategorySet
    public String[] getCategories() {
        String[] strArr = new String[this.categoryDirs.size()];
        this.categoryDirs.keySet().toArray(strArr);
        return strArr;
    }

    @Override // pl.edu.icm.yadda.categorization.corpus.data.CategorySet
    public void removeCategory(String str) {
        this.categoryDirs.remove(SimcatModelUtils.normalize(str));
    }

    @Override // pl.edu.icm.yadda.categorization.corpus.data.CategorySet
    public Iterator<CDocument> getTestDocuments(String str) throws CategorizationException {
        return new TestDocsIterator(this.language, new FileIterator(getTestDir(str)));
    }

    @Override // pl.edu.icm.yadda.categorization.corpus.data.CategorySet
    public Iterator<CorpusDocument> getTrainingDocuments(String str) throws CategorizationException {
        return new TrainDocsIterator(this.language, str, new FileIterator(getTrainDir(str)));
    }

    private File getTestDir(String str) throws CategorizationException {
        File file = this.categoryDirs.get(str);
        if (file == null) {
            throw new CategorizationException("Category [" + str + "] does not exist in this data set");
        }
        return new File(file, "test");
    }

    private File getTrainDir(String str) throws CategorizationException {
        File file = this.categoryDirs.get(str);
        if (file == null) {
            throw new CategorizationException("Category [" + str + "] does not exist in this data set");
        }
        return new File(file, TRAIN_DIR);
    }

    public String getName() {
        return this.name;
    }

    public void setName(String str) {
        this.name = str;
    }

    public File getTestSetDir() {
        return this.setDir;
    }

    public void setTestSetDir(File file) throws CategorizationException {
        this.setDir = file;
        init();
    }

    public void setLanguage(String str) {
        this.language = str;
    }

    @Override // pl.edu.icm.yadda.categorization.corpus.data.CategorySet
    public String getLanguage() {
        return this.language;
    }

    String getDocumentId(File file) {
        String absolutePath = file.getAbsolutePath();
        String absolutePath2 = this.setDir.getAbsolutePath();
        if (!absolutePath.startsWith(absolutePath2)) {
            throw new RuntimeException("Unexpected file path (root=" + absolutePath2 + ", path=" + absolutePath + DefaultExpressionEngine.DEFAULT_INDEX_END);
        }
        String substring = absolutePath.substring(absolutePath2.length());
        if (substring.startsWith("/")) {
            substring = substring.substring(1);
        }
        return this.name + "_" + this.language + "_" + substring;
    }
}
