package com.entopix.maui.filters;

import com.entopix.maui.stemmers.PorterStemmer;
import com.entopix.maui.stemmers.Stemmer;
import com.entopix.maui.stopwords.Stopwords;
import com.entopix.maui.stopwords.StopwordsEnglish;
import com.entopix.maui.util.Candidate;
import com.entopix.maui.util.Counter;
import com.entopix.maui.vocab.Vocabulary;
import com.entopix.maui.wikifeatures.WikiFeatures;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.StringTokenizer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import weka.classifiers.Classifier;
import weka.classifiers.meta.Bagging;
import weka.core.Attribute;
import weka.core.Capabilities;
import weka.core.FastVector;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Utils;
import weka.filters.Filter;

/* loaded from: input_file:com/entopix/maui/filters/MauiFilter.class */
public class MauiFilter extends Filter {
    private static final Logger log = LoggerFactory.getLogger(MauiFilter.class);
    private static final long serialVersionUID = 1;
    transient Vocabulary vocabulary;
    private int fileNameAtt = 0;
    private int documentAtt = 1;
    private int keyphrasesAtt = 2;
    private int maxPhraseLength = 5;
    private int minPhraseLength = 1;
    private int numIndexers = 1;
    private boolean nominalClassValue = true;
    private boolean debugMode = false;
    private int minOccurFrequency = 1;
    private int totalCorrect = 0;
    private int numFeatures = 13;
    private int tfIndex = 0;
    private int idfIndex = 1;
    private int tfidfIndex = 2;
    private int firstOccurIndex = 3;
    private int lastOccurIndex = 4;
    private int spreadOccurIndex = 5;
    private int domainKeyphIndex = 6;
    private int lengthIndex = 7;
    private int generalityIndex = 8;
    private int nodeDegreeIndex = 9;
    private int invWikipFreqIndex = 10;
    private int totalWikipKeyphrIndex = 11;
    private int wikipGeneralityIndex = 12;
    boolean useBasicFeatures = true;
    boolean useKeyphrasenessFeature = true;
    boolean useFrequencyFeatures = true;
    boolean usePositionsFeatures = true;
    boolean useThesaurusFeatures = true;
    boolean useLengthFeature = true;
    boolean useWikipediaFeatures = false;
    private MauiPhraseFilter phraseFilter = null;
    private NumbersFilter numbersFilter = null;
    private Classifier classifier = null;
    public HashMap<String, Counter> globalDictionary = null;
    private HashMap<String, Counter> keyphraseDictionary = null;
    transient HashMap<Instance, HashMap<String, Candidate>> allCandidates = null;
    private int numDocs = 0;
    private Instances classifierData = null;
    private Stemmer stemmer = new PorterStemmer();
    private Stopwords stopwords = new StopwordsEnglish();
    public String documentLanguage = "en";
    private String vocabularyName = "agrovoc";
    public String vocabularyFormat = "skos";
    public WikiFeatures wikiFeatures = null;

    /* loaded from: input_file:com/entopix/maui/filters/MauiFilter$MauiFilterException.class */
    public class MauiFilterException extends Exception {
        private static final long serialVersionUID = 1;

        public MauiFilterException(String str) {
            super(str);
        }
    }

    public int getTotalCorrect() {
        return this.totalCorrect;
    }

    public void setBasicFeatures(boolean z) {
        this.useBasicFeatures = z;
    }

    public void setClassifier(Classifier classifier) {
        this.classifier = classifier;
    }

    public void setKeyphrasenessFeature(boolean z) {
        this.useKeyphrasenessFeature = z;
    }

    public void setFrequencyFeatures(boolean z) {
        this.useFrequencyFeatures = z;
    }

    public void setPositionsFeatures(boolean z) {
        this.usePositionsFeatures = z;
    }

    public void setThesaurusFeatures(boolean z) {
        this.useThesaurusFeatures = z;
    }

    public void setWikipediaFeatures(boolean z, WikiFeatures wikiFeatures) {
        this.useWikipediaFeatures = z;
        this.wikiFeatures = wikiFeatures;
        if (z && wikiFeatures == null) {
            new WikiFeatures();
            this.wikiFeatures.load_csv("src/main/resources/data/labels.csv.gzip", true);
        }
    }

    public void setLengthFeature(boolean z) {
        this.useLengthFeature = z;
    }

    public void setStopwords(Stopwords stopwords) {
        this.stopwords = stopwords;
    }

    public void setStemmer(Stemmer stemmer) {
        this.stemmer = stemmer;
    }

    public void setNumIndexers(int i) {
        this.numIndexers = i;
    }

    public void setMinNumOccur(int i) {
        this.minOccurFrequency = i;
    }

    public void setMaxPhraseLength(int i) {
        this.maxPhraseLength = i;
    }

    public void setMinPhraseLength(int i) {
        this.minPhraseLength = i;
    }

    public void setDocumentLanguage(String str) {
        this.documentLanguage = str;
    }

    public void setDebug(boolean z) {
        this.debugMode = z;
    }

    public void setVocabularyName(String str) {
        if (str.equals("none")) {
            setThesaurusFeatures(false);
            this.vocabularyName = "none";
        } else {
            String substring = str.substring(str.lastIndexOf("/") + 1);
            this.vocabularyName = substring.substring(0, substring.indexOf("."));
        }
    }

    public void setVocabularyFormat(String str) {
        this.vocabularyFormat = str;
    }

    public int getNormalizedFormIndex() {
        return this.documentAtt;
    }

    public int getOutputFormIndex() {
        return this.documentAtt;
    }

    public int getProbabilityIndex() {
        return this.documentAtt + this.numFeatures + 1;
    }

    public int getRankIndex() {
        return getProbabilityIndex() + 1;
    }

    public int getDocumentAtt() {
        return this.documentAtt;
    }

    public void setDocumentAtt(int i) {
        this.documentAtt = i;
    }

    public int getKeyphrasesAtt() {
        return this.keyphrasesAtt;
    }

    public void setKeyphrasesAtt(int i) {
        this.keyphrasesAtt = i;
    }

    public void setVocabulary(Vocabulary vocabulary) {
        this.vocabulary = vocabulary;
    }

    public String globalInfo() {
        return "Converts incoming data into data appropriate for keyphrase classification.";
    }

    public boolean setInputFormat(Instances instances) throws MauiFilterException {
        if (instances.classIndex() >= 0) {
            throw new MauiFilterException("Don't know what do to if class index set!");
        }
        if (!instances.attribute(this.keyphrasesAtt).isString() || !instances.attribute(this.documentAtt).isString()) {
            throw new MauiFilterException("Keyphrase attribute and document attribute need to be string attributes.");
        }
        try {
            this.phraseFilter = new MauiPhraseFilter();
            this.phraseFilter.setAttributeIndicesArray(new int[]{this.documentAtt});
            this.phraseFilter.setInputFormat(instances);
            try {
                if (this.vocabularyName.equals("none")) {
                    this.numbersFilter = new NumbersFilter();
                    this.numbersFilter.setInputFormat(this.phraseFilter.getOutputFormat());
                    super.setInputFormat(this.numbersFilter.getOutputFormat());
                } else {
                    super.setInputFormat(this.phraseFilter.getOutputFormat());
                }
                return false;
            } catch (Exception e) {
                throw new MauiFilterException("Exception loading NumbersFilter");
            }
        } catch (Exception e2) {
            throw new MauiFilterException("Exception loading MauiPhraseFilter");
        }
    }

    public Capabilities getCapabilities() {
        Capabilities capabilities = super.getCapabilities();
        capabilities.enableAllAttributes();
        capabilities.enable(Capabilities.Capability.MISSING_VALUES);
        capabilities.enable(Capabilities.Capability.NOMINAL_CLASS);
        capabilities.enable(Capabilities.Capability.NO_CLASS);
        capabilities.enableAllClasses();
        return capabilities;
    }

    public boolean input(Instance instance) throws MauiFilterException {
        if (getInputFormat() == null) {
            throw new MauiFilterException("No input instance format defined");
        }
        if (this.m_NewBatch) {
            resetQueue();
            this.m_NewBatch = false;
        }
        if (this.debugMode) {
            log.info("-- Reading instance");
        }
        try {
            this.phraseFilter.input(instance);
            this.phraseFilter.batchFinished();
            Instance output = this.phraseFilter.output();
            if (this.vocabularyName.equals("none")) {
                try {
                    this.numbersFilter.input(output);
                    this.numbersFilter.batchFinished();
                    output = this.numbersFilter.output();
                } catch (Exception e) {
                    throw new MauiFilterException("Error applying NumbersFilter ");
                }
            }
            if (this.globalDictionary == null) {
                bufferInput(output);
                return false;
            }
            Enumeration elements = convertInstance(output, false).elements();
            while (elements.hasMoreElements()) {
                push((Instance) elements.nextElement());
            }
            return true;
        } catch (Exception e2) {
            throw new MauiFilterException("Error applying PhraseFilter ");
        }
    }

    public boolean batchFinished() throws MauiFilterException {
        if (getInputFormat() == null) {
            throw new MauiFilterException("No input instance format defined");
        }
        if (this.globalDictionary == null) {
            selectCandidates();
            buildGlobalDictionaries();
            buildClassifier();
            convertPendingInstances();
        }
        flushInput();
        this.m_NewBatch = true;
        return numPendingOutput() != 0;
    }

    private void selectCandidates() {
        if (this.debugMode) {
            log.info("--- Computing candidates...");
        }
        this.allCandidates = new HashMap<>();
        int numInstances = getInputFormat().numInstances();
        for (int i = 0; i < numInstances; i++) {
            Instance instance = getInputFormat().instance(i);
            String stringValue = instance.stringValue(this.fileNameAtt);
            int i2 = i + 1;
            if (this.debugMode) {
                log.info("---- Processing document " + stringValue + ", " + i2 + " out of " + numInstances + "...");
            }
            HashMap<String, Candidate> candidates = getCandidates(instance.stringValue(this.documentAtt));
            if (this.debugMode) {
                log.info("---- " + candidates.size() + " candidates");
            }
            this.allCandidates.put(instance, candidates);
        }
    }

    public void buildGlobalDictionaries() {
        if (this.debugMode) {
            log.info("--- Building global frequency dictionary");
        }
        this.globalDictionary = new HashMap<>();
        Iterator<HashMap<String, Candidate>> it = this.allCandidates.values().iterator();
        while (it.hasNext()) {
            for (String str : it.next().keySet()) {
                Counter counter = this.globalDictionary.get(str);
                if (counter == null) {
                    this.globalDictionary.put(str, new Counter());
                } else {
                    counter.increment();
                }
            }
        }
        if (this.debugMode) {
            log.info("--- Building keyphraseness dictionary");
        }
        this.keyphraseDictionary = new HashMap<>();
        for (int i = 0; i < getInputFormat().numInstances(); i++) {
            HashMap<String, Counter> givenKeyphrases = getGivenKeyphrases(getInputFormat().instance(i).stringValue(this.keyphrasesAtt));
            if (givenKeyphrases != null) {
                for (String str2 : givenKeyphrases.keySet()) {
                    Counter counter2 = givenKeyphrases.get(str2);
                    Counter counter3 = this.keyphraseDictionary.get(str2);
                    if (counter3 == null) {
                        this.keyphraseDictionary.put(str2, new Counter(counter2.value()));
                    } else {
                        counter3.increment(counter2.value());
                    }
                }
            }
        }
        if (this.debugMode) {
            log.info("--- Statistics about global dictionaries: ");
            log.info("\t" + this.globalDictionary.size() + " terms in the global dictionary");
            log.info("\t" + this.keyphraseDictionary.size() + " terms in the keyphrase dictionary");
        }
        this.numDocs = getInputFormat().numInstances();
    }

    private void buildClassifier() throws MauiFilterException {
        FastVector fastVector = new FastVector();
        for (int i = 0; i < getInputFormat().numAttributes(); i++) {
            if (i == this.documentAtt) {
                fastVector.addElement(new Attribute("Term_frequency"));
                fastVector.addElement(new Attribute("IDF"));
                fastVector.addElement(new Attribute("TFxIDF"));
                fastVector.addElement(new Attribute("First_occurrence"));
                fastVector.addElement(new Attribute("Last_occurrence"));
                fastVector.addElement(new Attribute("Spread"));
                fastVector.addElement(new Attribute("Domain_keyphraseness"));
                fastVector.addElement(new Attribute("Length"));
                fastVector.addElement(new Attribute("Generality"));
                fastVector.addElement(new Attribute("Node_degree"));
                fastVector.addElement(new Attribute("Wikipedia_keyphraseness"));
                fastVector.addElement(new Attribute("Wikipedia_inlinks"));
                fastVector.addElement(new Attribute("Wikipedia_generality"));
            } else if (i == this.keyphrasesAtt) {
                if (this.nominalClassValue) {
                    FastVector fastVector2 = new FastVector(2);
                    fastVector2.addElement("False");
                    fastVector2.addElement("True");
                    fastVector.addElement(new Attribute("Keyphrase?", fastVector2));
                } else {
                    fastVector.addElement(new Attribute("Keyphrase?"));
                }
            }
        }
        this.classifierData = new Instances("ClassifierData", fastVector, 0);
        this.classifierData.setClassIndex(this.numFeatures);
        if (this.debugMode) {
            log.info("--- Converting instances for classifier");
        }
        int numInstances = getInputFormat().numInstances();
        for (int i2 = 0; i2 < numInstances; i2++) {
            Instance instance = getInputFormat().instance(i2);
            HashMap<String, Counter> givenKeyphrases = getGivenKeyphrases(instance.stringValue(this.keyphrasesAtt));
            HashMap<String, Candidate> hashMap = this.allCandidates.get(instance);
            int i3 = 0;
            int i4 = 0;
            if (this.debugMode) {
                log.info("--- Computing features for document " + i2 + " out of " + numInstances + "...");
            }
            for (Candidate candidate : hashMap.values()) {
                if (candidate.getFrequency() >= this.minOccurFrequency) {
                    double[] computeFeatureValues = computeFeatureValues(candidate, true, givenKeyphrases, hashMap);
                    if (computeFeatureValues[computeFeatureValues.length - 1] == 0.0d) {
                        i4++;
                    } else {
                        i3++;
                    }
                    this.classifierData.add(new Instance(instance.weight(), computeFeatureValues));
                }
            }
            log.debug(i3 + " positive; " + i4 + " negative instances");
        }
        log.debug("--- Building classifier");
        if (this.classifier == null) {
            if (this.nominalClassValue) {
                this.classifier = new Bagging();
                try {
                    this.classifier.setOptions(Utils.splitOptions("-P 10 -S 1 -I 10 -W weka.classifiers.trees.J48 -- -U -M 2"));
                } catch (Exception e) {
                    log.warn("Exception while loading classifier's options " + e.getMessage());
                }
            } else {
                this.classifier = new Bagging();
                try {
                    this.classifier.setOptions(Utils.splitOptions("-P 100 -S 1 -I 10 -W weka.classifiers.trees.M5P -- -U -M 7.0"));
                } catch (Exception e2) {
                    log.warn("Exception while loading classifier's options " + e2.getMessage());
                }
            }
        }
        try {
            this.classifier.buildClassifier(this.classifierData);
            if (this.debugMode) {
                log.info(this.classifier.toString());
            }
            this.classifierData = new Instances(this.classifierData, 0);
        } catch (Exception e3) {
            throw new MauiFilterException("Exception while building classifier " + e3.getMessage());
        }
    }

    private double[] computeFeatureValues(Candidate candidate, boolean z, HashMap<String, Counter> hashMap, HashMap<String, Candidate> hashMap2) {
        double[] dArr = new double[this.numFeatures + 1];
        String name = candidate.getName();
        String name2 = candidate.getName();
        String bestFullForm = candidate.getBestFullForm();
        String title = candidate.getTitle();
        Counter counter = this.globalDictionary.get(name2);
        double d = 0.0d;
        if (counter != null) {
            d = counter.value();
            if (z) {
                d -= 1.0d;
            }
        }
        double termFrequency = candidate.getTermFrequency();
        double d2 = -Math.log((d + 1.0d) / (this.numDocs + 1.0d));
        if (this.useBasicFeatures) {
            dArr[this.tfidfIndex] = termFrequency * d2;
            dArr[this.firstOccurIndex] = candidate.getFirstOccurrence();
        }
        if (this.useFrequencyFeatures) {
            dArr[this.tfIndex] = termFrequency;
            dArr[this.idfIndex] = d2;
        }
        if (this.usePositionsFeatures) {
            dArr[this.lastOccurIndex] = candidate.getLastOccurrence();
            dArr[this.spreadOccurIndex] = candidate.getSpread();
        }
        if (this.useKeyphrasenessFeature) {
            if (!this.vocabularyName.equals("none")) {
                name2 = title;
            }
            Counter counter2 = this.keyphraseDictionary.get(name2);
            if (z && hashMap != null && hashMap.containsKey(name2)) {
                dArr[this.domainKeyphIndex] = counter2.value() - 1;
            } else if (counter2 != null) {
                dArr[this.domainKeyphIndex] = counter2.value();
            } else {
                dArr[this.domainKeyphIndex] = 0.0d;
            }
        }
        if (this.useLengthFeature) {
            if (bestFullForm == null) {
                log.warn("Warning! Problem with candidate " + name2);
                dArr[this.lengthIndex] = 1.0d;
            } else {
                dArr[this.lengthIndex] = bestFullForm.split(" ").length;
            }
        }
        if (this.useThesaurusFeatures && this.vocabulary != null) {
            int i = 0;
            ArrayList<String> related = this.vocabulary.getRelated(name);
            if (related != null) {
                Iterator<String> it = related.iterator();
                while (it.hasNext()) {
                    if (hashMap2.get(it.next()) != null) {
                        i++;
                    }
                }
            }
            if (i != 0) {
            }
            dArr[this.nodeDegreeIndex] = i;
            dArr[this.generalityIndex] = this.vocabulary.getGenerality(name);
        }
        if (this.useWikipediaFeatures) {
            double d3 = 0.0d;
            Iterator<String> it2 = candidate.getFullForms().keySet().iterator();
            while (it2.hasNext()) {
                d3 += this.wikiFeatures.getWikipediaKeyphraseness(it2.next());
            }
            dArr[this.totalWikipKeyphrIndex] = d3;
            String title2 = candidate.getTitle();
            if (this.vocabularyName.equals("none")) {
                title2 = candidate.getBestFullForm();
            }
            dArr[this.invWikipFreqIndex] = this.wikiFeatures.getInversedWikipediaFrequency(title2);
            dArr[this.wikipGeneralityIndex] = this.wikiFeatures.getWikipediaGenerality(title2);
        }
        String str = name2;
        if (!this.vocabularyName.equals("none")) {
            str = candidate.getTitle();
        }
        if (hashMap == null) {
            dArr[this.numFeatures] = 0.0d;
        } else if (!hashMap.containsKey(str)) {
            dArr[this.numFeatures] = 0.0d;
        } else if (this.nominalClassValue) {
            dArr[this.numFeatures] = 1.0d;
        } else {
            dArr[this.numFeatures] = hashMap.get(str).value() / this.numIndexers;
        }
        return dArr;
    }

    private void convertPendingInstances() {
        if (this.debugMode) {
            log.info("--- Converting pending instances");
        }
        FastVector fastVector = new FastVector();
        for (int i = 1; i < getInputFormat().numAttributes(); i++) {
            if (i == this.documentAtt) {
                fastVector.addElement(new Attribute("Candidate_name", (FastVector) null));
                fastVector.addElement(new Attribute("Candidate_original", (FastVector) null));
                fastVector.addElement(new Attribute("Term_frequency"));
                fastVector.addElement(new Attribute("IDF"));
                fastVector.addElement(new Attribute("TFxIDF"));
                fastVector.addElement(new Attribute("First_occurrence"));
                fastVector.addElement(new Attribute("Last_occurrence"));
                fastVector.addElement(new Attribute("Spread"));
                fastVector.addElement(new Attribute("Domain_keyphraseness"));
                fastVector.addElement(new Attribute("Length"));
                fastVector.addElement(new Attribute("Generality"));
                fastVector.addElement(new Attribute("Node_degree"));
                fastVector.addElement(new Attribute("Wikipedia_keyphraseness"));
                fastVector.addElement(new Attribute("Wikipedia_inlinks"));
                fastVector.addElement(new Attribute("Wikipedia_generality"));
                fastVector.addElement(new Attribute("Probability"));
                fastVector.addElement(new Attribute("Rank"));
            } else if (i != this.keyphrasesAtt) {
                fastVector.addElement(getInputFormat().attribute(i));
            } else if (this.nominalClassValue) {
                FastVector fastVector2 = new FastVector(2);
                fastVector2.addElement("False");
                fastVector2.addElement("True");
                fastVector.addElement(new Attribute("Keyphrase?", fastVector2));
            } else {
                fastVector.addElement(new Attribute("Keyphrase?"));
            }
        }
        setOutputFormat(new Instances("mauidata", fastVector, 0));
        for (int i2 = 0; i2 < getInputFormat().numInstances(); i2++) {
            Enumeration elements = convertInstance(getInputFormat().instance(i2), true).elements();
            while (elements.hasMoreElements()) {
                push((Instance) elements.nextElement());
            }
        }
    }

    private FastVector convertInstance(Instance instance, boolean z) {
        FastVector fastVector = new FastVector();
        String stringValue = instance.stringValue(this.fileNameAtt);
        if (this.debugMode) {
            log.info("-- Converting instance for document " + stringValue);
        }
        HashMap<String, Counter> givenKeyphrases = instance.isMissing(this.keyphrasesAtt) ? null : getGivenKeyphrases(instance.stringValue(this.keyphrasesAtt));
        HashMap<String, Candidate> candidates = (this.allCandidates == null || !this.allCandidates.containsKey(instance)) ? getCandidates(instance.stringValue(this.documentAtt)) : this.allCandidates.get(instance);
        if (this.debugMode) {
            log.info(candidates.size() + " candidates ");
        }
        int i = this.documentAtt + 2;
        int i2 = this.documentAtt + 3;
        int i3 = this.documentAtt + this.numFeatures;
        int i4 = 0;
        int i5 = 0;
        for (Candidate candidate : candidates.values()) {
            if (candidate.getFrequency() >= this.minOccurFrequency) {
                String name = candidate.getName();
                String bestFullForm = candidate.getBestFullForm();
                if (!this.vocabularyName.equals("none")) {
                    bestFullForm = candidate.getTitle();
                }
                Instance instance2 = new Instance(instance.weight(), computeFeatureValues(candidate, z, givenKeyphrases, candidates));
                instance2.setDataset(this.classifierData);
                try {
                    double[] distributionForInstance = this.classifier.distributionForInstance(instance2);
                    double d = distributionForInstance[0];
                    if (this.nominalClassValue) {
                        d = distributionForInstance[1];
                    }
                    double[] dArr = new double[instance.numAttributes() + this.numFeatures + 2];
                    int i6 = 0;
                    for (int i7 = 1; i7 < instance.numAttributes(); i7++) {
                        if (i7 == this.documentAtt) {
                            int addStringValue = outputFormatPeek().attribute(i6).addStringValue(name);
                            int i8 = i6;
                            int i9 = i6 + 1;
                            dArr[i8] = addStringValue;
                            int addStringValue2 = bestFullForm != null ? outputFormatPeek().attribute(i9).addStringValue(bestFullForm) : outputFormatPeek().attribute(i9).addStringValue(name);
                            int i10 = i9 + 1;
                            dArr[i9] = addStringValue2;
                            int i11 = i10 + 1;
                            dArr[i10] = instance2.value(this.tfIndex);
                            int i12 = i11 + 1;
                            dArr[i11] = instance2.value(this.idfIndex);
                            int i13 = i12 + 1;
                            dArr[i12] = instance2.value(this.tfidfIndex);
                            int i14 = i13 + 1;
                            dArr[i13] = instance2.value(this.firstOccurIndex);
                            int i15 = i14 + 1;
                            dArr[i14] = instance2.value(this.lastOccurIndex);
                            int i16 = i15 + 1;
                            dArr[i15] = instance2.value(this.spreadOccurIndex);
                            int i17 = i16 + 1;
                            dArr[i16] = instance2.value(this.domainKeyphIndex);
                            int i18 = i17 + 1;
                            dArr[i17] = instance2.value(this.lengthIndex);
                            int i19 = i18 + 1;
                            dArr[i18] = instance2.value(this.generalityIndex);
                            int i20 = i19 + 1;
                            dArr[i19] = instance2.value(this.nodeDegreeIndex);
                            int i21 = i20 + 1;
                            dArr[i20] = instance2.value(this.invWikipFreqIndex);
                            int i22 = i21 + 1;
                            dArr[i21] = instance2.value(this.totalWikipKeyphrIndex);
                            int i23 = i22 + 1;
                            dArr[i22] = instance2.value(this.wikipGeneralityIndex);
                            i3 = i23;
                            int i24 = i23 + 1;
                            dArr[i23] = d;
                            i6 = i24 + 1;
                            dArr[i24] = Instance.missingValue();
                        } else if (i7 == this.keyphrasesAtt) {
                            int i25 = i6;
                            i6++;
                            dArr[i25] = instance2.classValue();
                        } else {
                            int i26 = i6;
                            i6++;
                            dArr[i26] = instance.value(i7);
                        }
                    }
                    Instance instance3 = new Instance(instance.weight(), dArr);
                    instance3.setDataset(outputFormatPeek());
                    fastVector.addElement(instance3);
                    if (instance2.classValue() == 0.0d) {
                        i5++;
                    } else {
                        i4++;
                    }
                } catch (Exception e) {
                    log.error("Exception while getting probability for candidate " + candidate.getName());
                }
            }
        }
        if (this.debugMode) {
            log.info(i4 + " positive; " + i5 + " negative instances");
        }
        double[] dArr2 = new double[fastVector.size()];
        for (int i27 = 0; i27 < dArr2.length; i27++) {
            dArr2[i27] = ((Instance) fastVector.elementAt(i27)).value(i2);
        }
        FastVector fastVector2 = new FastVector(fastVector.size());
        int[] stableSort = Utils.stableSort(dArr2);
        for (int i28 = 0; i28 < dArr2.length; i28++) {
            fastVector2.addElement(fastVector.elementAt(stableSort[i28]));
        }
        for (int i29 = 0; i29 < dArr2.length; i29++) {
            dArr2[i29] = -((Instance) fastVector2.elementAt(i29)).value(i);
        }
        FastVector fastVector3 = new FastVector(fastVector2.size());
        int[] stableSort2 = Utils.stableSort(dArr2);
        for (int i30 = 0; i30 < dArr2.length; i30++) {
            fastVector3.addElement(fastVector2.elementAt(stableSort2[i30]));
        }
        for (int i31 = 0; i31 < dArr2.length; i31++) {
            dArr2[i31] = 1.0d - ((Instance) fastVector3.elementAt(i31)).value(i3);
        }
        FastVector fastVector4 = new FastVector(fastVector3.size());
        int[] stableSort3 = Utils.stableSort(dArr2);
        for (int i32 = 0; i32 < dArr2.length; i32++) {
            fastVector4.addElement(fastVector3.elementAt(stableSort3[i32]));
        }
        int i33 = 1;
        for (int i34 = 0; i34 < dArr2.length; i34++) {
            Instance instance4 = (Instance) fastVector4.elementAt(i34);
            if (Utils.grOrEq(dArr2[i34], 1.0d)) {
                instance4.setValue(i3 + 1, 2.147483647E9d);
            } else {
                for (int i35 = i34; i35 < dArr2.length; i35++) {
                    Instance instance5 = (Instance) fastVector4.elementAt(i35);
                    if (instance5.value(i) != instance4.value(i) || instance5.value(i3) != instance4.value(i3) || instance5.value(i2) != instance4.value(i2)) {
                        break;
                    }
                }
                int i36 = i33;
                i33++;
                instance4.setValue(i3 + 1, i36);
            }
        }
        return fastVector4;
    }

    public HashMap<String, Candidate> getCandidates(String str) {
        if (this.debugMode) {
            log.info("---- Extracting candidates... ");
        }
        HashMap<String, Candidate> hashMap = new HashMap<>();
        String[] strArr = new String[this.maxPhraseLength];
        StringTokenizer stringTokenizer = new StringTokenizer(str, "\n");
        int i = 0;
        int i2 = 0;
        while (stringTokenizer.hasMoreTokens()) {
            int i3 = 0;
            StringTokenizer stringTokenizer2 = new StringTokenizer(stringTokenizer.nextToken(), " ");
            while (stringTokenizer2.hasMoreTokens()) {
                i++;
                String nextToken = stringTokenizer2.nextToken();
                for (int i4 = 0; i4 < this.maxPhraseLength - 1; i4++) {
                    strArr[i4] = strArr[i4 + 1];
                }
                strArr[this.maxPhraseLength - 1] = nextToken;
                i3++;
                if (i3 > this.maxPhraseLength) {
                    i3 = this.maxPhraseLength;
                }
                if (!this.stopwords.isStopword(strArr[this.maxPhraseLength - 1])) {
                    StringBuffer stringBuffer = new StringBuffer();
                    for (int i5 = 1; i5 <= i3; i5++) {
                        if (i5 > 1) {
                            stringBuffer.insert(0, ' ');
                        }
                        stringBuffer.insert(0, strArr[this.maxPhraseLength - i5]);
                        if ((i5 <= 1 || !this.stopwords.isStopword(strArr[this.maxPhraseLength - i5])) && i5 >= this.minPhraseLength) {
                            String stringBuffer2 = stringBuffer.toString();
                            ArrayList arrayList = new ArrayList();
                            if (this.vocabularyName.equals("none")) {
                                String pseudoPhrase = pseudoPhrase(stringBuffer2);
                                if (pseudoPhrase != null) {
                                    arrayList.add(pseudoPhrase);
                                }
                                i2++;
                            } else {
                                Iterator<String> it = this.vocabulary.getSenses(stringBuffer2).iterator();
                                while (it.hasNext()) {
                                    String next = it.next();
                                    log.debug(stringBuffer2 + " => " + this.vocabulary.getTerm(next) + " " + this.minOccurFrequency);
                                    arrayList.add(next);
                                }
                            }
                            if (!arrayList.isEmpty()) {
                                Iterator it2 = arrayList.iterator();
                                while (it2.hasNext()) {
                                    String str2 = (String) it2.next();
                                    Candidate candidate = hashMap.get(str2);
                                    if (candidate == null) {
                                        candidate = new Candidate(str2, stringBuffer2, i - i5);
                                        i2++;
                                        if (!this.vocabularyName.equals("none")) {
                                            candidate.setTitle(this.vocabulary.getTerm(str2));
                                        }
                                    } else {
                                        candidate.recordOccurrence(stringBuffer2, i - i5);
                                        i2++;
                                    }
                                    if (candidate != null) {
                                        hashMap.put(str2, candidate);
                                    }
                                }
                            }
                        }
                    }
                }
            }
        }
        HashSet<String> hashSet = new HashSet();
        hashSet.addAll(hashMap.keySet());
        for (String str3 : hashSet) {
            Candidate candidate2 = hashMap.get(str3);
            if (candidate2.getFrequency() < this.minOccurFrequency) {
                hashMap.remove(str3);
            } else {
                candidate2.normalize(i2, i);
            }
        }
        return hashMap;
    }

    private HashMap<String, Counter> getGivenKeyphrases(String str) {
        String str2;
        int i;
        HashMap<String, Counter> hashMap = new HashMap<>();
        StringTokenizer stringTokenizer = new StringTokenizer(str, "\n");
        while (stringTokenizer.hasMoreTokens()) {
            String trim = stringTokenizer.nextToken().trim();
            int indexOf = trim.indexOf("\t");
            if (indexOf != -1) {
                str2 = trim.substring(0, indexOf);
                i = Integer.parseInt(trim.substring(indexOf + 1));
            } else {
                str2 = trim;
                i = 1;
            }
            if (this.vocabularyName.equals("none")) {
                String pseudoPhrase = pseudoPhrase(str2);
                Counter counter = hashMap.get(pseudoPhrase);
                if (counter == null) {
                    hashMap.put(pseudoPhrase, new Counter(i));
                } else {
                    counter.increment(i);
                }
            } else {
                int indexOf2 = str2.indexOf(":");
                if (indexOf2 != -1) {
                    str2 = str2.substring(indexOf2 + 2);
                }
                Iterator<String> it = this.vocabulary.getSenses(str2).iterator();
                while (it.hasNext()) {
                    String term = this.vocabulary.getTerm(it.next());
                    Counter counter2 = hashMap.get(term);
                    if (counter2 == null) {
                        hashMap.put(term, new Counter(i));
                    } else {
                        counter2.increment(i);
                    }
                }
            }
        }
        if (hashMap.isEmpty()) {
            log.warn("Warning! This documents does not contain valid keyphrases");
            log.warn(str);
            return null;
        }
        log.debug("Found " + hashMap.size());
        this.totalCorrect = hashMap.size();
        return hashMap;
    }

    public String pseudoPhrase(String str) {
        String str2 = "";
        String[] split = str.toLowerCase().split(" ");
        Arrays.sort(split);
        for (String str3 : split) {
            if (!this.stopwords.isStopword(str3)) {
                int indexOf = str3.indexOf(39);
                if (indexOf != -1) {
                    str3 = str3.substring(0, indexOf);
                }
                str2 = str2 + this.stemmer.stem(str3) + " ";
            }
        }
        String trim = str2.trim();
        if (trim.equals("")) {
            return null;
        }
        return trim;
    }

    public static void main(String[] strArr) {
        log.error("Use MauiModelBuilder or MauiTopicExtractor!");
    }
}
