package uk.ac.shef.dcs.jate.app;

import com.google.gson.Gson;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.Writer;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import org.apache.commons.lang.exception.ExceptionUtils;
import org.apache.lucene.analysis.jate.ComplexShingleFilter;
import org.apache.lucene.index.IndexReader;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer;
import org.apache.solr.core.SolrCore;
import org.apache.solr.search.SolrIndexSearcher;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import uk.ac.shef.dcs.jate.JATEException;
import uk.ac.shef.dcs.jate.JATEProperties;
import uk.ac.shef.dcs.jate.algorithm.TermInfoCollector;
import uk.ac.shef.dcs.jate.feature.FrequencyTermBased;
import uk.ac.shef.dcs.jate.feature.FrequencyTermBasedFBMaster;
import uk.ac.shef.dcs.jate.model.JATEDocument;
import uk.ac.shef.dcs.jate.model.JATETerm;
import uk.ac.shef.dcs.jate.util.IOUtil;
import uk.ac.shef.dcs.jate.util.JATEUtil;

/* loaded from: input_file:uk/ac/shef/dcs/jate/app/App.class */
public abstract class App {
    private final Logger log;
    protected Boolean collectTermInfo;
    protected Double cutoffThreshold;
    protected Integer cutoffTopK;
    protected Double cutoffTopKPercent;
    protected String outputFile;
    protected Integer prefilterMinTTF;
    protected Integer prefilterMinTCF;
    protected String referenceFrequencyFilePath;
    protected FrequencyTermBasedFBMaster freqFeatureBuilder;
    protected FrequencyTermBased freqFeature;
    private static String DEFAULT_OUTPUT_FILE = "terms.txt";

    public void setOutputFile(String str) {
        this.outputFile = str;
    }

    public String getOutputFile() {
        return this.outputFile;
    }

    public App() {
        this.log = LoggerFactory.getLogger(getClass());
        this.collectTermInfo = false;
        this.cutoffThreshold = null;
        this.cutoffTopK = null;
        this.cutoffTopKPercent = null;
        this.outputFile = null;
        this.prefilterMinTTF = 0;
        this.prefilterMinTCF = 0;
        this.referenceFrequencyFilePath = null;
        this.freqFeatureBuilder = null;
        this.freqFeature = null;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public static boolean isExport(Map<String, String> map) {
        return map.containsKey(AppParams.OUTPUT_FILE.getParamKey());
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public static boolean isCorpusProvided(String str) {
        return (str == null || str.isEmpty()) ? false : true;
    }

    private int parseIntParam(String str, String str2) throws JATEException {
        try {
            return Integer.parseInt(str2);
        } catch (NumberFormatException e) {
            String format = String.format("%s is not set correctly. An integer value is expected. Actual input is %s", str, str2);
            this.log.error(format);
            throw new JATEException(format);
        }
    }

    private double parseDoubleParam(String str, String str2) throws JATEException {
        try {
            return Double.parseDouble(str2);
        } catch (NumberFormatException e) {
            String format = String.format("%s is not set correctly. An integer value is expected. Actual input is %s", str, str2);
            this.log.error(format);
            throw new JATEException(format);
        }
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public App(Map<String, String> map) throws JATEException {
        String str;
        this.log = LoggerFactory.getLogger(getClass());
        this.collectTermInfo = false;
        this.cutoffThreshold = null;
        this.cutoffTopK = null;
        this.cutoffTopKPercent = null;
        this.outputFile = null;
        this.prefilterMinTTF = 0;
        this.prefilterMinTCF = 0;
        this.referenceFrequencyFilePath = null;
        this.freqFeatureBuilder = null;
        this.freqFeature = null;
        if (map.containsKey(AppParams.CUTOFF_TOP_K.getParamKey())) {
            String str2 = map.get(AppParams.CUTOFF_TOP_K.getParamKey());
            this.cutoffTopK = Integer.valueOf(parseIntParam("Cutoff parameter Top K " + AppParams.CUTOFF_TOP_K.getParamKey(), str2));
            this.log.debug(String.format("Cutoff parameter: top [%s] term candidates will be selected as final terms", str2));
        }
        if (map.containsKey(AppParams.CUTOFF_TOP_K_PERCENT.getParamKey())) {
            String str3 = map.get(AppParams.CUTOFF_TOP_K_PERCENT.getParamKey());
            this.cutoffTopKPercent = Double.valueOf(parseDoubleParam("Cutoff parameter Top K% " + AppParams.CUTOFF_TOP_K_PERCENT.getParamKey(), str3));
            this.log.debug(String.format("Cutoff parameter: top [%s] percent of term candidates will be selected as final terms", str3));
        }
        if (map.containsKey(AppParams.CUTOFF_THRESHOLD.getParamKey())) {
            String str4 = map.get(AppParams.CUTOFF_THRESHOLD.getParamKey());
            this.cutoffThreshold = Double.valueOf(parseDoubleParam("Cutoff parameter term score " + AppParams.CUTOFF_THRESHOLD.getParamKey(), str4));
            this.log.debug(String.format("Cutoff paramter: terms with a minimum score of [%s] will be selected as final terms", str4));
        }
        if (map.containsKey(AppParams.PREFILTER_MIN_TERM_CONTEXT_FREQUENCY.getParamKey())) {
            this.prefilterMinTCF = Integer.valueOf(parseIntParam("Pre-filter minimum term context frequency " + AppParams.PREFILTER_MIN_TERM_CONTEXT_FREQUENCY, map.get(AppParams.PREFILTER_MIN_TERM_CONTEXT_FREQUENCY.getParamKey())));
            this.log.debug(String.format("Pre-filter mininum term context frequency (used by co-occurrence based methods) is set to [%s]", this.prefilterMinTCF));
        }
        if (map.containsKey(AppParams.PREFILTER_MIN_TERM_TOTAL_FREQUENCY.getParamKey())) {
            this.prefilterMinTTF = Integer.valueOf(parseIntParam("Pre-filter minimum total term frequency " + AppParams.PREFILTER_MIN_TERM_TOTAL_FREQUENCY, map.get(AppParams.PREFILTER_MIN_TERM_TOTAL_FREQUENCY.getParamKey())));
            this.log.debug(String.format("Pre-filter mininum total term frequency is set to [%s]", this.prefilterMinTCF));
        }
        if (map.containsKey(AppParams.COLLECT_TERM_INFO.getParamKey()) && (str = map.get(AppParams.COLLECT_TERM_INFO.getParamKey())) != null && str.equalsIgnoreCase("true")) {
            this.collectTermInfo = true;
            this.log.debug("Term offsets will be collected and written to the output");
        }
        if (map.containsKey(AppParams.OUTPUT_FILE.getParamKey())) {
            String str5 = map.get(AppParams.OUTPUT_FILE.getParamKey());
            if (str5 == null) {
                this.log.warn("Output file is missing or its path is invalid (you can ignore this if you are running in the Plugin mode and do not require the list of terms to be exported to a file.) \nOutput will be written to a default file 'terms.txt' instead.");
                this.outputFile = DEFAULT_OUTPUT_FILE;
                return;
            }
            try {
                new PrintWriter(str5).close();
                this.outputFile = str5;
            } catch (IOException e) {
                this.log.warn("Output file is missing or its path is invalid (you can ignore this if you are running in the Plugin mode and do not require the list of terms to be exported to a file.) \nOutput will be written to a default file 'terms.txt' instead.");
                this.outputFile = DEFAULT_OUTPUT_FILE;
            }
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public void initalizeRefFreqParam(Map<String, String> map) throws JATEException {
        if (!map.containsKey(AppParams.REFERENCE_FREQUENCY_FILE.getParamKey())) {
            String format = String.format("Reference corpus frequency file (-r) %s is not set. A file path is expected.", AppParams.REFERENCE_FREQUENCY_FILE.getParamKey());
            this.log.error(format);
            throw new JATEException(format);
        }
        String str = map.get(AppParams.REFERENCE_FREQUENCY_FILE.getParamKey());
        if (str == null) {
            String format2 = String.format("Reference corpus frequency file %s is not set. A file path is expected.", AppParams.REFERENCE_FREQUENCY_FILE.getParamKey());
            this.log.error(format2);
            throw new JATEException(format2);
        }
        if (new File(str).exists()) {
            this.referenceFrequencyFilePath = str;
        } else {
            String format3 = String.format("Excepted reference corpus frequency file %s does not exist in %s.", AppParams.REFERENCE_FREQUENCY_FILE.getParamKey(), str);
            this.log.error(format3);
            throw new JATEException(format3);
        }
    }

    public abstract List<JATETerm> extract(SolrCore solrCore, String str) throws IOException, JATEException;

    public List<JATETerm> extract(String str, String str2, String str3) throws IOException, JATEException {
        EmbeddedSolrServer embeddedSolrServer = null;
        try {
            embeddedSolrServer = new EmbeddedSolrServer(Paths.get(str, new String[0]), str2);
            List<JATETerm> extract = extract(embeddedSolrServer.getCoreContainer().getCore(str2), str3);
            Iterator<JATETerm> it = extract.iterator();
            while (it.hasNext()) {
                if (it.next().getString().replaceAll("[^a-zA-Z0-9]", "").length() == 0) {
                    it.remove();
                }
            }
            if (embeddedSolrServer != null) {
                embeddedSolrServer.close();
                embeddedSolrServer.getCoreContainer().getAllCoreNames().forEach(str4 -> {
                    File file = Paths.get(str, str4, "data", "index", "write.lock").toFile();
                    if (file.exists()) {
                        file.delete();
                    }
                });
            }
            return extract;
        } catch (Throwable th) {
            if (embeddedSolrServer != null) {
                embeddedSolrServer.close();
                embeddedSolrServer.getCoreContainer().getAllCoreNames().forEach(str42 -> {
                    File file = Paths.get(str, str42, "data", "index", "write.lock").toFile();
                    if (file.exists()) {
                        file.delete();
                    }
                });
            }
            throw th;
        }
    }

    public void index(Path path, Path path2, String str, String str2) throws JATEException {
        this.log.info(String.format("Indexing corpus from [%s] and perform candidate extraction ...", path));
        List<Path> loadFiles = JATEUtil.loadFiles(path);
        this.log.info(" [" + loadFiles.size() + "] files are scanned and will be indexed and analysed.");
        EmbeddedSolrServer embeddedSolrServer = new EmbeddedSolrServer(path2, str);
        JATEProperties jateProperties = getJateProperties(str2);
        try {
            try {
                loadFiles.stream().forEach(path3 -> {
                    try {
                        indexJATEDocuments(path3, embeddedSolrServer, jateProperties, false);
                    } catch (JATEException e) {
                        e.printStackTrace();
                    }
                });
                embeddedSolrServer.commit();
                this.log.info("all corpus are indexed with term candidates.");
            } catch (SolrServerException | IOException e) {
                throw new JATEException(String.format("Failed to index current corpus. Error:[%s]", e.toString()));
            }
        } finally {
            try {
                embeddedSolrServer.close();
            } catch (Exception e2) {
                this.log.error("Unable to close solr index, error cause:");
                this.log.error(ExceptionUtils.getFullStackTrace(e2));
            }
        }
    }

    protected void indexJATEDocuments(Path path, EmbeddedSolrServer embeddedSolrServer, JATEProperties jATEProperties, boolean z) throws JATEException {
        if (path == null) {
            return;
        }
        try {
            JATEDocument loadJATEDocument = JATEUtil.loadJATEDocument(path);
            if (isNotEmpty(loadJATEDocument)) {
                JATEUtil.addNewDoc(embeddedSolrServer, loadJATEDocument.getId(), loadJATEDocument.getId(), loadJATEDocument.getContent(), jATEProperties, z);
            }
        } catch (IOException e) {
            throw new JATEException(String.format("failed to index [%s]", path.toString()) + e.toString());
        } catch (SolrServerException e2) {
            throw new JATEException(String.format("failed to index [%s] ", path.toString()) + e2.toString());
        } catch (FileNotFoundException e3) {
            throw new JATEException(e3.toString());
        }
    }

    private static boolean isNotEmpty(JATEDocument jATEDocument) {
        return (jATEDocument == null || jATEDocument.getContent() == null || jATEDocument.getContent().trim().length() == 0) ? false : true;
    }

    public void collectTermOffsets(List<JATETerm> list, IndexReader indexReader, String str, String str2) throws IOException {
        TermInfoCollector termInfoCollector = new TermInfoCollector(indexReader, str, str2);
        this.log.info("Gathering term information (e.g., provenance and offsets). This may take a while. Total=" + list.size());
        int i = 0;
        for (JATETerm jATETerm : list) {
            jATETerm.setTermInfo(termInfoCollector.collect(jATETerm.getString()));
            i++;
            if (i % 500 == 0) {
                this.log.info("done " + i);
            }
        }
    }

    public void addAdditionalTermInfo(List<JATETerm> list, SolrIndexSearcher solrIndexSearcher, String str, String str2) throws JATEException {
        if (this.collectTermInfo.booleanValue()) {
            try {
                collectTermOffsets(list, solrIndexSearcher.getSlowAtomicReader(), str, str2);
            } catch (IOException e) {
                throw new JATEException("I/O exception when reading Solr index. " + e.toString());
            }
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public void filterByTTF(List<String> list) throws JATEException {
        if (this.freqFeature == null) {
            throw new JATEException("FrequencyTermBased is not initialised for TTF term filtering.");
        }
        if (list == null || list.size() == 0 || this.prefilterMinTTF == null) {
            return;
        }
        this.log.debug(String.format("Filter [%s] term candidates by total term frequency [%s] (exclusive)", Integer.valueOf(list.size()), this.prefilterMinTTF));
        Iterator<String> it = list.iterator();
        while (it.hasNext()) {
            if (this.freqFeature.getTTF(it.next()) < this.prefilterMinTTF.intValue()) {
                it.remove();
            }
        }
        this.log.debug(String.format("filtered term candidate size: [%s]", Integer.valueOf(list.size())));
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public static Map<String, String> getParams(String[] strArr) {
        HashMap hashMap = new HashMap();
        if (strArr.length < 3) {
            return hashMap;
        }
        int i = 0;
        while (i < strArr.length) {
            if (i != strArr.length - 2 && i != strArr.length - 1 && i + 1 < strArr.length) {
                String str = strArr[i];
                String str2 = strArr[i + 1];
                i++;
                hashMap.put(str, str2);
            }
            i++;
        }
        return hashMap;
    }

    public void write(List<JATETerm> list) throws IOException {
        Gson gson = new Gson();
        if (this.outputFile == null) {
            throw new IOException("Output file is null");
        }
        this.log.info(String.format("Exporting terms to [%s]", this.outputFile));
        Writer uTF8Writer = IOUtil.getUTF8Writer(this.outputFile);
        gson.toJson(list, uTF8Writer);
        uTF8Writer.close();
        this.log.info("complete.");
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public List<JATETerm> cutoff(List<JATETerm> list) {
        return this.cutoffThreshold != null ? cutoffByTermScoreThreshold(list, this.cutoffThreshold) : this.cutoffTopK != null ? cutoffByTopK(list, this.cutoffTopK) : this.cutoffTopKPercent != null ? cutoffByTopKPercent(list, this.cutoffTopKPercent) : list;
    }

    protected List<JATETerm> cutoffByTermScoreThreshold(List<JATETerm> list, Double d) {
        ArrayList arrayList = new ArrayList();
        arrayList.addAll(list);
        if ((d != null) & (arrayList.size() > 0)) {
            this.log.debug(String.format("cutoff [%s] term candidates by termhood/unithood based threshold [%s]", Integer.valueOf(arrayList.size()), d));
            Iterator it = arrayList.iterator();
            while (it.hasNext()) {
                if (((JATETerm) it.next()).getScore() < d.doubleValue()) {
                    it.remove();
                }
            }
            this.log.debug(String.format("final filtered term candidate size [%s]", Integer.valueOf(list.size())));
        }
        return arrayList;
    }

    protected List<JATETerm> cutoffByTopK(List<JATETerm> list, Integer num) {
        if ((num != null) & (list != null) & (list.size() > 0) & (num.intValue() < list.size())) {
            this.log.debug(String.format("cutoff [%s] term candidates by Top [%s] ...", Integer.valueOf(list.size()), num));
            list = list.subList(0, num.intValue() + 1);
            this.log.debug(String.format("final filtered term list size is [%s]", Integer.valueOf(list.size())));
        }
        return list;
    }

    protected List<JATETerm> cutoffByTopKPercent(List<JATETerm> list, Double d) {
        if ((d != null) & (list != null) & (list.size() > 0)) {
            this.log.debug(String.format("filter [%s] term candidates by Top [%s] percent (rounded) ...", Integer.valueOf(list.size()), Double.valueOf(d.doubleValue() * 100.0d)));
            Integer valueOf = Integer.valueOf((int) Math.round(d.doubleValue() * list.size()));
            if (valueOf.intValue() > 0) {
                list = cutoffByTopK(list, valueOf);
            }
            this.log.debug(String.format("final filtered term list size is [%s]", Integer.valueOf(list.size())));
        }
        return list;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public static String getJATEProperties(Map<String, String> map) {
        if (map.containsKey(AppParams.JATE_PROPERTIES_FILE.getParamKey())) {
            return map.get(AppParams.JATE_PROPERTIES_FILE.getParamKey());
        }
        return null;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public static String getCorpusDir(Map<String, String> map) {
        if (map.containsKey(AppParams.CORPUS_DIR.getParamKey())) {
            return map.get(AppParams.CORPUS_DIR.getParamKey());
        }
        return null;
    }

    public static JATEProperties getJateProperties(String str) throws JATEException {
        return (str == null || str.isEmpty()) ? new JATEProperties() : new JATEProperties(str);
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public static void printHelp() {
        StringBuilder sb = new StringBuilder("Usage:\n");
        sb.append("java -cp '[CLASSPATH]' ").append(App.class.getName()).append(ComplexShingleFilter.DEFAULT_TOKEN_SEPARATOR).append("[OPTIONS] [SOLR_HOME_PATH] [SOLR_CORE_NAME] ").append("\n\n");
        sb.append("Example: java -cp '/libs/*' /corpus/ /solr/server/solr jate  -prop jate.properties -cf.k 20  ...\n\n");
        sb.append("[OPTIONS]:\n").append("\t\t-corpusDir\t\t. The corpus to be indexed, from where term candidate will be extracted, ranked and weighted.").append("\t\t-prop\t\t. jate.properties file for the configuration of Solr schema.").append("\t\t-c\t\t'true' or 'false'. Whether to collect term information for exporting, e.g., offsets in documents. Default is false.\n").append("\t\t-r\t\t. Reference corpus frequency file path (-r) is required by AppGlossEx, AppTermEx and AppWeirdness.\n").append("\t\t-cf.t\t\tA number. Cutoff score threshold for selecting terms. If multiple -cf.* parameters are set the preference order will be cf.t, cf.k, cf.kp.").append("\n").append("\t\t-cf.k\t\tA number. Cutoff top ranked K terms to be selected. If multiple -cf.* parameters are set the preference order will be cf.t, cf.k, cf.kp.").append("\n").append("\t\t-cf.kp\t\tA number. Cutoff top ranked K% terms to be selected. If multiple -cf.* parameters are set the preference order will be cf.t, cf.k, cf.kp.").append("\n").append("\t\t-pf.mttf\t\tA number. Pre-filter minimum total term frequency. \n").append("\t\t-pf.mtcf\t\tA number. Pre-filter minimum context frequency of a term (used by co-occurrence based methods). \n").append("\t\t-o\t\tA file path to save output. \n");
        System.out.println(sb);
    }
}
