package gate.creole.annic.lucene;

import com.thoughtworks.xstream.XStream;
import com.thoughtworks.xstream.io.xml.StaxDriver;
import gate.Corpus;
import gate.Document;
import gate.Factory;
import gate.creole.annic.Constants;
import gate.creole.annic.IndexException;
import gate.creole.annic.Indexer;
import gate.creole.annic.apache.lucene.analysis.Analyzer;
import gate.creole.annic.apache.lucene.index.IndexReader;
import gate.creole.annic.apache.lucene.index.IndexWriter;
import gate.creole.annic.apache.lucene.index.Term;
import gate.creole.annic.apache.lucene.search.Hits;
import gate.creole.annic.apache.lucene.search.IndexSearcher;
import gate.creole.annic.apache.lucene.search.TermQuery;
import gate.util.Files;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.commons.io.FileUtils;

/* loaded from: input_file:gate/creole/annic/lucene/LuceneIndexer.class */
public class LuceneIndexer implements Indexer {
    protected boolean DEBUG = false;
    protected Corpus corpus;
    protected Map<String, Object> parameters;

    public LuceneIndexer(URL url) throws IOException {
        if (url != null) {
            readParametersFromDisk(url);
        }
    }

    protected void checkIndexParameters(Map<String, Object> map) throws IndexException {
        File fileFromURL;
        this.parameters = map;
        if (map == null) {
            throw new IndexException("No parameters provided!");
        }
        URL url = (URL) map.get(Constants.INDEX_LOCATION_URL);
        if (url == null) {
            throw new IndexException("You must provide a URL for INDEX_LOCATION");
        }
        if (!url.getProtocol().equalsIgnoreCase("file")) {
            throw new IndexException("Index Output Directory must be set to the empty directory on the file system");
        }
        try {
            fileFromURL = new File(url.toURI());
        } catch (URISyntaxException e) {
            fileFromURL = Files.fileFromURL(url);
        }
        if (fileFromURL.exists() && !fileFromURL.isDirectory()) {
            throw new IndexException("Path doesn't exist");
        }
        String str = (String) map.get(Constants.BASE_TOKEN_ANNOTATION_TYPE);
        if (str == null || str.trim().length() == 0) {
            str = "Token";
            map.put(Constants.BASE_TOKEN_ANNOTATION_TYPE, "Token");
        } else if (str.indexOf(".") > -1 || str.indexOf("=") > -1 || str.indexOf(";") > -1 || str.indexOf(",") > -1) {
            throw new IndexException("Base token annotation type cannot have '.' , '=', ',' or ';; in it");
        }
        String str2 = (String) map.get(Constants.INDEX_UNIT_ANNOTATION_TYPE);
        if (this.DEBUG) {
            System.out.println("BTAT : " + str);
            System.out.println("IUAT : " + str2);
        }
    }

    protected Map<String, Object> getIndexParameters() {
        return this.parameters;
    }

    /* JADX WARN: Finally extract failed */
    @Override // gate.creole.annic.Indexer
    public void createIndex(Map<String, Object> map) throws IndexException {
        File fileFromURL;
        checkIndexParameters(map);
        URL url = (URL) this.parameters.get(Constants.INDEX_LOCATION_URL);
        try {
            try {
                fileFromURL = new File(url.toURI());
            } catch (URISyntaxException e) {
                fileFromURL = Files.fileFromURL(url);
            }
            IndexWriter indexWriter = new IndexWriter(fileFromURL.getAbsolutePath(), (Analyzer) new LuceneAnalyzer(), true);
            try {
                if (this.corpus != null) {
                    for (int i = 0; i < this.corpus.size(); i++) {
                        Document document = (Document) this.corpus.get(i);
                        System.out.print("Indexing : " + (document.getLRPersistenceId() == null ? document.getName() : document.getLRPersistenceId().toString()) + " ...");
                        List<gate.creole.annic.apache.lucene.document.Document> luceneDocuments = getLuceneDocuments(this.corpus.getLRPersistenceId() == null ? this.corpus.getName() : this.corpus.getLRPersistenceId().toString(), document, url.toString());
                        if (luceneDocuments != null) {
                            for (int i2 = 0; i2 < luceneDocuments.size(); i2++) {
                                if (luceneDocuments.get(i2) != null) {
                                    indexWriter.addDocument(luceneDocuments.get(i2));
                                }
                            }
                        }
                        if (document.getLRPersistenceId() != null) {
                            Factory.deleteResource(document);
                        }
                        System.out.println("Done");
                    }
                }
                indexWriter.close();
                writeParametersToDisk();
            } catch (Throwable th) {
                indexWriter.close();
                throw th;
            }
        } catch (IOException e2) {
            throw new IndexException(e2);
        }
    }

    @Override // gate.creole.annic.Indexer
    public void optimizeIndex() throws IndexException {
        try {
            IndexWriter indexWriter = new IndexWriter(((URL) this.parameters.get(Constants.INDEX_LOCATION_URL)).toString(), (Analyzer) new LuceneAnalyzer(), false);
            try {
                indexWriter.optimize();
                indexWriter.close();
            } catch (Throwable th) {
                indexWriter.close();
                throw th;
            }
        } catch (IOException e) {
            throw new IndexException(e);
        }
    }

    @Override // gate.creole.annic.Indexer
    public void deleteIndex() throws IndexException {
        File file;
        if (this.parameters == null) {
            return;
        }
        try {
            file = new File(((URL) this.parameters.get(Constants.INDEX_LOCATION_URL)).toURI());
        } catch (URISyntaxException e) {
            file = new File(((URL) this.parameters.get(Constants.INDEX_LOCATION_URL)).getFile());
        }
        if (!FileUtils.deleteQuietly(file)) {
            throw new IndexException("Can't delete directory" + file.getAbsolutePath());
        }
    }

    /* JADX WARN: Finally extract failed */
    @Override // gate.creole.annic.Indexer
    public void add(String str, List<Document> list) throws IndexException {
        String absolutePath;
        try {
            absolutePath = new File(((URL) this.parameters.get(Constants.INDEX_LOCATION_URL)).toURI()).getAbsolutePath();
        } catch (URISyntaxException e) {
            absolutePath = new File(((URL) this.parameters.get(Constants.INDEX_LOCATION_URL)).getFile()).getAbsolutePath();
        }
        try {
            IndexWriter indexWriter = new IndexWriter(absolutePath, (Analyzer) new LuceneAnalyzer(), false);
            if (list != null) {
                for (int i = 0; i < list.size(); i++) {
                    try {
                        Document document = list.get(i);
                        System.out.print("Indexing : " + (document.getLRPersistenceId() == null ? document.getName() : document.getLRPersistenceId().toString()) + " ...");
                        List<gate.creole.annic.apache.lucene.document.Document> luceneDocuments = getLuceneDocuments(str, document, absolutePath);
                        if (luceneDocuments == null) {
                            System.out.println("Done");
                        } else {
                            for (int i2 = 0; i2 < luceneDocuments.size(); i2++) {
                                indexWriter.addDocument(luceneDocuments.get(i2));
                            }
                            System.out.println("Done");
                        }
                    } catch (Throwable th) {
                        indexWriter.close();
                        throw th;
                    }
                }
            }
            indexWriter.close();
        } catch (IOException e2) {
            throw new IndexException(e2);
        }
    }

    private String getCompatibleName(String str) {
        return str.replaceAll("[\\/:\\*\\?\"<>|]", "_");
    }

    /* JADX WARN: Finally extract failed */
    @Override // gate.creole.annic.Indexer
    public void remove(List<Object> list) throws IndexException {
        String absolutePath;
        try {
            absolutePath = new File(((URL) this.parameters.get(Constants.INDEX_LOCATION_URL)).toURI()).getAbsolutePath();
        } catch (URISyntaxException e) {
            absolutePath = new File(((URL) this.parameters.get(Constants.INDEX_LOCATION_URL)).getFile()).getAbsolutePath();
        }
        try {
            IndexReader open = IndexReader.open(absolutePath);
            if (list != null) {
                for (int i = 0; i < list.size(); i++) {
                    try {
                        String obj = list.get(i).toString();
                        Set<String> namesOfSerializedFiles = getNamesOfSerializedFiles(obj);
                        if (namesOfSerializedFiles.size() > 0) {
                            System.out.print("Removing => " + obj + "...");
                            File file = new File(new File(absolutePath, Constants.SERIALIZED_FOLDER_NAME), getCompatibleName(obj));
                            for (String str : namesOfSerializedFiles) {
                                open.delete(new Term(Constants.DOCUMENT_ID_FOR_SERIALIZED_FILE, str));
                                File file2 = new File(file, getCompatibleName(str) + ".annic");
                                if (file2.exists()) {
                                    file2.delete();
                                }
                            }
                            if (file.exists() && file.isDirectory()) {
                                file.delete();
                            }
                            System.out.println("Done ");
                        }
                    } catch (Throwable th) {
                        open.close();
                        throw th;
                    }
                }
            }
            open.close();
        } catch (IOException e2) {
            throw new IndexException(e2);
        }
    }

    private List<gate.creole.annic.apache.lucene.document.Document> getLuceneDocuments(String str, Document document, String str2) throws IndexException {
        String str3 = (String) this.parameters.get(Constants.BASE_TOKEN_ANNOTATION_TYPE);
        String str4 = (String) this.parameters.get(Constants.INDEX_UNIT_ANNOTATION_TYPE);
        ArrayList arrayList = new ArrayList((List) this.parameters.get(Constants.FEATURES_TO_EXCLUDE));
        ArrayList arrayList2 = new ArrayList((List) this.parameters.get(Constants.FEATURES_TO_INCLUDE));
        ArrayList arrayList3 = new ArrayList((List) this.parameters.get(Constants.ANNOTATION_SETS_NAMES_TO_EXCLUDE));
        ArrayList arrayList4 = new ArrayList((List) this.parameters.get(Constants.ANNOTATION_SETS_NAMES_TO_INCLUDE));
        Boolean bool = (Boolean) this.parameters.get(Constants.CREATE_TOKENS_AUTOMATICALLY);
        if (bool == null) {
            bool = Boolean.TRUE;
        }
        return new LuceneDocument().createDocuments(str, document, document.getLRPersistenceId() == null ? document.getName() : document.getLRPersistenceId().toString(), arrayList4, arrayList3, arrayList2, arrayList, str2, str3, bool, str4);
    }

    @Override // gate.creole.annic.Indexer
    public Corpus getCorpus() {
        return this.corpus;
    }

    @Override // gate.creole.annic.Indexer
    public void setCorpus(Corpus corpus) throws IndexException {
        this.corpus = corpus;
        if (corpus == null) {
            throw new IndexException("Corpus is not initialized");
        }
        corpus.getFeatures().put(Constants.CORPUS_INDEX_FEATURE, Constants.CORPUS_INDEX_FEATURE_VALUE);
    }

    private void readParametersFromDisk(URL url) throws IOException {
        File file;
        try {
            file = new File(new File(url.toURI()), "LuceneIndexDefinition.xml");
        } catch (URISyntaxException e) {
            file = new File(url.getFile(), "LuceneIndexDefinition.xml");
        }
        if (file.exists()) {
            FileReader fileReader = new FileReader(file);
            try {
                this.parameters = (Map) new XStream(new StaxDriver()).fromXML(fileReader);
                this.parameters.put(Constants.INDEX_LOCATION_URL, url);
                fileReader.close();
            } catch (Throwable th) {
                fileReader.close();
                throw th;
            }
        }
    }

    private void writeParametersToDisk() throws IOException {
        File file;
        URL url = (URL) this.parameters.get(Constants.INDEX_LOCATION_URL);
        try {
            file = new File(new File(url.toURI()), "LuceneIndexDefinition.xml");
        } catch (URISyntaxException e) {
            file = new File(url.getFile(), "LuceneIndexDefinition.xml");
        }
        FileWriter fileWriter = new FileWriter(file);
        HashMap hashMap = new HashMap();
        for (Map.Entry<String, Object> entry : this.parameters.entrySet()) {
            String key = entry.getKey();
            if (!key.equals(Constants.INDEX_LOCATION_URL)) {
                hashMap.put(key, entry.getValue());
            }
        }
        hashMap.put(Constants.CORPUS_INDEX_FEATURE, Constants.CORPUS_INDEX_FEATURE_VALUE);
        if (this.corpus != null) {
            hashMap.put(Constants.CORPUS_SIZE, Integer.valueOf(this.corpus.getDocumentNames().size()));
        }
        try {
            new XStream().toXML(hashMap, fileWriter);
            fileWriter.close();
        } catch (Throwable th) {
            fileWriter.close();
            throw th;
        }
    }

    @Override // gate.creole.annic.Indexer
    public Map<String, Object> getParameters() {
        return this.parameters;
    }

    public Set<String> getNamesOfSerializedFiles(String str) throws IndexException {
        String absolutePath;
        try {
            absolutePath = new File(((URL) this.parameters.get(Constants.INDEX_LOCATION_URL)).toURI()).getAbsolutePath();
        } catch (URISyntaxException e) {
            absolutePath = new File(((URL) this.parameters.get(Constants.INDEX_LOCATION_URL)).getFile()).getAbsolutePath();
        }
        HashSet hashSet = new HashSet();
        try {
            TermQuery termQuery = new TermQuery(new Term(Constants.DOCUMENT_ID, str));
            IndexSearcher indexSearcher = new IndexSearcher(absolutePath);
            try {
                Hits search = indexSearcher.search(termQuery);
                for (int i = 0; i < search.length(); i++) {
                    hashSet.add(search.doc(i).get(Constants.DOCUMENT_ID_FOR_SERIALIZED_FILE));
                }
                return hashSet;
            } finally {
                indexSearcher.close();
            }
        } catch (IOException e2) {
            throw new IndexException(e2);
        }
    }
}
