package ws.palladian.classification.text;

import java.io.Closeable;
import java.io.File;
import java.io.IOException;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import org.apache.commons.lang3.Validate;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.KeywordAnalyzer;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.Version;
import ws.palladian.classification.text.DictionaryModel;
import ws.palladian.classification.text.DictionaryTrieModel;
import ws.palladian.core.CategoryEntries;
import ws.palladian.helper.ProgressMonitor;
import ws.palladian.helper.collection.AbstractIterator;
import ws.palladian.helper.functional.Filter;

/* loaded from: input_file:ws/palladian/classification/text/LuceneDictionaryModel.class */
public final class LuceneDictionaryModel extends AbstractDictionaryModel implements Closeable {
    static final String FIELD_TERM = "term";
    static final String FIELD_TERM_CAT = "termCategory";
    static final String FIELD_DOC_CAT = "docCategory";
    private static final String FIELD_COUNTS = "counts";
    private static final String VALUE_DOCUMENT_COUNTS = "documentCounts";
    private static final String PROPERTY_NUM_ENTRIES = "numEntries";
    private static final String PROPERTY_NAME = "name";
    private static final Version VERSION = Version.LUCENE_47;
    private static final Analyzer ANALYZER = new KeywordAnalyzer();
    private final IndexSearcher searcher;
    private final DirectoryReader reader;
    private final FeatureSetting featureSetting;
    private final String name;
    private final CategoryEntries documentCounts;
    private final CategoryEntries termCounts;
    private final int numUniqueTerms;
    private final int numEntries;

    /* loaded from: input_file:ws/palladian/classification/text/LuceneDictionaryModel$Builder.class */
    public static final class Builder implements DictionaryBuilder {
        private final DictionaryBuilder delegate = new DictionaryTrieModel.Builder();
        private final File directoryPath;

        public Builder(File file) {
            Validate.notNull(file, "directoryPath must not be null", new Object[0]);
            this.directoryPath = file;
        }

        /* renamed from: create, reason: merged with bridge method [inline-methods] */
        public DictionaryModel m6create() {
            return LuceneDictionaryModel.index((DictionaryModel) this.delegate.create(), this.directoryPath);
        }

        public DictionaryBuilder setName(String str) {
            this.delegate.setName(str);
            return this;
        }

        public DictionaryBuilder setFeatureSetting(FeatureSetting featureSetting) {
            this.delegate.setFeatureSetting(featureSetting);
            return this;
        }

        public DictionaryBuilder addDocument(Collection<String> collection, String str) {
            this.delegate.addDocument(collection, str);
            return this;
        }

        public DictionaryBuilder addDocument(Collection<String> collection, String str, int i) {
            this.delegate.addDocument(collection, str, i);
            return this;
        }

        public DictionaryBuilder setPruningStrategy(Filter<? super CategoryEntries> filter) {
            this.delegate.setPruningStrategy(filter);
            return this;
        }

        public DictionaryBuilder addDictionary(DictionaryModel dictionaryModel) {
            this.delegate.addDictionary(dictionaryModel);
            return this;
        }
    }

    public static LuceneDictionaryModel index(DictionaryModel dictionaryModel, File file) {
        Validate.notNull(dictionaryModel, "dictionary must not be null", new Object[0]);
        Validate.notNull(file, "directoryPath must not be null", new Object[0]);
        if (file.exists()) {
            throw new IllegalStateException("Path '" + file + " already exists. Delete first or pick different path.");
        }
        IndexWriter indexWriter = null;
        try {
            try {
                FSDirectory open = FSDirectory.open(file);
                indexWriter = new IndexWriter(open, new IndexWriterConfig(VERSION, ANALYZER));
                ProgressMonitor progressMonitor = new ProgressMonitor();
                progressMonitor.startTask("Writing Lucene dict.", dictionaryModel.getNumUniqTerms());
                indexWriter.addDocument(new CategoryEntriesDoc(FIELD_DOC_CAT, dictionaryModel.getDocumentCounts(), new StringField(FIELD_COUNTS, VALUE_DOCUMENT_COUNTS, Field.Store.NO)));
                int i = 0;
                Iterator it = dictionaryModel.iterator();
                while (it.hasNext()) {
                    DictionaryModel.DictionaryEntry dictionaryEntry = (DictionaryModel.DictionaryEntry) it.next();
                    CategoryEntries categoryEntries = dictionaryEntry.getCategoryEntries();
                    indexWriter.addDocument(new CategoryEntriesDoc(FIELD_TERM_CAT, categoryEntries, new StringField(FIELD_TERM, dictionaryEntry.getTerm(), Field.Store.YES)));
                    progressMonitor.increment();
                    i += categoryEntries.size();
                }
                HashMap hashMap = new HashMap();
                if (dictionaryModel.getFeatureSetting() != null) {
                    hashMap.putAll(dictionaryModel.getFeatureSetting().toMap());
                }
                if (dictionaryModel.getName() != null) {
                    hashMap.put(PROPERTY_NAME, dictionaryModel.getName());
                }
                hashMap.put(PROPERTY_NUM_ENTRIES, String.valueOf(i));
                indexWriter.setCommitData(hashMap);
                indexWriter.commit();
                indexWriter.close();
                LuceneDictionaryModel luceneDictionaryModel = new LuceneDictionaryModel((Directory) open);
                try {
                    indexWriter.close();
                } catch (IOException e) {
                }
                return luceneDictionaryModel;
            } catch (IOException e2) {
                throw new IllegalStateException(e2);
            }
        } catch (Throwable th) {
            try {
                indexWriter.close();
            } catch (IOException e3) {
            }
            throw th;
        }
    }

    public LuceneDictionaryModel(File file) throws IOException {
        this((Directory) FSDirectory.open(file));
    }

    public LuceneDictionaryModel(Directory directory) {
        Validate.notNull(directory, "directory must not be null", new Object[0]);
        try {
            this.reader = DirectoryReader.open(directory);
            this.searcher = new IndexSearcher(this.reader);
            Map<String, String> userData = this.reader.getIndexCommit().getUserData();
            this.featureSetting = getFeatureSetting(userData);
            this.name = userData.get(PROPERTY_NAME);
            this.documentCounts = getCategoryEntries(new TermQuery(new Term(FIELD_COUNTS, VALUE_DOCUMENT_COUNTS)), FIELD_DOC_CAT);
            this.termCounts = fetchTermCounts();
            this.numUniqueTerms = (int) MultiFields.getTerms(this.reader, FIELD_TERM).size();
            this.numEntries = Integer.parseInt(userData.get(PROPERTY_NUM_ENTRIES));
        } catch (IOException e) {
            throw new IllegalStateException("Error while accessing the directory", e);
        }
    }

    private CategoryEntries fetchTermCounts() throws IOException {
        return getCategoryEntries(MultiFields.getTerms(this.reader, FIELD_TERM_CAT));
    }

    private FeatureSetting getFeatureSetting(Map<String, String> map) {
        if (map.get("textFeatureType") != null) {
            return new FeatureSetting(map);
        }
        return null;
    }

    private CategoryEntries getCategoryEntries(Query query, String str) throws IOException {
        TopDocs search = this.searcher.search(query, 1);
        if (search.totalHits <= 0) {
            return CategoryEntries.EMPTY;
        }
        return getCategoryEntries(this.reader.getTermVector(search.scoreDocs[0].doc, str));
    }

    /* JADX INFO: Access modifiers changed from: private */
    public CategoryEntries getCategoryEntries(Terms terms) throws IOException {
        TermsEnum it = terms.iterator((TermsEnum) null);
        CountingCategoryEntriesBuilder countingCategoryEntriesBuilder = new CountingCategoryEntriesBuilder();
        while (true) {
            BytesRef next = it.next();
            if (next == null) {
                return countingCategoryEntriesBuilder.create();
            }
            countingCategoryEntriesBuilder.add(next.utf8ToString(), (int) it.totalTermFreq());
        }
    }

    public String getName() {
        return this.name;
    }

    public FeatureSetting getFeatureSetting() {
        return this.featureSetting;
    }

    public CategoryEntries getCategoryEntries(String str) {
        try {
            return getCategoryEntries(new TermQuery(new Term(FIELD_TERM, str)), FIELD_TERM_CAT);
        } catch (IOException e) {
            throw new IllegalStateException(e);
        }
    }

    public int getNumUniqTerms() {
        return this.numUniqueTerms;
    }

    public CategoryEntries getDocumentCounts() {
        return this.documentCounts;
    }

    public CategoryEntries getTermCounts() {
        return this.termCounts;
    }

    public Iterator<DictionaryModel.DictionaryEntry> iterator() {
        return new AbstractIterator<DictionaryModel.DictionaryEntry>() { // from class: ws.palladian.classification.text.LuceneDictionaryModel.1
            int idx = -1;

            /* JADX INFO: Access modifiers changed from: protected */
            /* renamed from: getNext, reason: merged with bridge method [inline-methods] */
            public DictionaryModel.DictionaryEntry m5getNext() throws AbstractIterator.Finished {
                if (this.idx >= LuceneDictionaryModel.this.reader.maxDoc()) {
                    throw FINISHED;
                }
                try {
                    this.idx++;
                    while (this.idx < LuceneDictionaryModel.this.reader.maxDoc()) {
                        String str = LuceneDictionaryModel.this.reader.document(this.idx).get(LuceneDictionaryModel.FIELD_TERM);
                        if (str != null) {
                            return new ImmutableDictionaryEntry(str, LuceneDictionaryModel.this.getCategoryEntries(LuceneDictionaryModel.this.reader.getTermVector(this.idx, LuceneDictionaryModel.FIELD_TERM_CAT)));
                        }
                        this.idx++;
                    }
                    throw FINISHED;
                } catch (IOException e) {
                    throw new IllegalStateException(e);
                }
            }
        };
    }

    public int getNumEntries() {
        return this.numEntries;
    }

    @Override // java.io.Closeable, java.lang.AutoCloseable
    public void close() throws IOException {
        this.reader.close();
    }
}
