package pl.edu.icm.yadda.analysis.packscanner;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.Hashtable;
import java.util.Iterator;
import java.util.List;
import morfologik.stemming.PolishStemmer;
import morfologik.stemming.WordData;
import pl.edu.icm.yadda.bwmeta.model.YDescription;
import pl.edu.icm.yadda.bwmeta.model.YElement;
import pl.edu.icm.yadda.bwmeta.model.YLanguage;
import pl.edu.icm.yadda.bwmeta.model.YName;
import pl.edu.icm.yadda.bwmeta.model.YStructure;
import pl.edu.icm.yadda.common.utils.Pair;

/* loaded from: input_file:pl/edu/icm/yadda/analysis/packscanner/TesterAndClassifier.class */
public class TesterAndClassifier {
    PolishStemmer stemmer = null;
    int total = 0;
    int totalWithAbstact = 0;
    int totalWithContent = 0;
    int totalWithTitles = 0;
    int totalKeyowordsInstances = 0;
    int totalKeyowordsInstancesInAbstracts = 0;
    int totalKeyowordsInstancesInTitles = 0;
    int totalKeyowordsInstancesInTitlesOrAbstract = 0;
    Hashtable<String, KeyPhraseStats> stats = new Hashtable<>();
    Hashtable<String, Integer> wordsInAbstractsCount = new Hashtable<>();
    Hashtable<String, Integer> keyprasesCount = new Hashtable<>();
    int numArticles = 0;
    Hashtable<Pair<String, String>, Integer> keyphraseWordCount = new Hashtable<>();
    Hashtable<String, ArrayList<String>> wordsToPossibleKeyphrases = new Hashtable<>();

    /* loaded from: input_file:pl/edu/icm/yadda/analysis/packscanner/TesterAndClassifier$KeyPhraseStats.class */
    public static class KeyPhraseStats {
        String lowerCased;
        ArrayList<String> stemmed = new ArrayList<>();
        int appears = 0;
        int appersInAbstract = 0;
        int appearsInAbstractOrContent = 0;
        int appearsInTitle = 0;
        int appearsInAbstractOrTitle = 0;
        int hasInAbstractOrContent = 0;
        int hasAbstact = 0;

        public KeyPhraseStats(String str) {
            this.lowerCased = str;
        }
    }

    /* loaded from: input_file:pl/edu/icm/yadda/analysis/packscanner/TesterAndClassifier$TextType.class */
    public enum TextType {
        ABSTRACT,
        TITLE
    }

    public String normalize(String str) {
        return str.toLowerCase().replaceAll("\\s+", " ").trim();
    }

    public ArrayList<String> allPossibleTexts(ArrayList<String>[] arrayListArr, int i) {
        ArrayList<String> arrayList = new ArrayList<>();
        if (i == arrayListArr.length - 1) {
            return arrayListArr[i];
        }
        ArrayList<String> allPossibleTexts = allPossibleTexts(arrayListArr, i + 1);
        Iterator<String> it = arrayListArr[i].iterator();
        while (it.hasNext()) {
            String next = it.next();
            Iterator<String> it2 = allPossibleTexts.iterator();
            while (it2.hasNext()) {
                arrayList.add(next + " " + it2.next());
            }
        }
        return arrayList;
    }

    ArrayList<KeyPhraseStats> getKeyPhrasesFromElement(YElement yElement) {
        if (yElement.getTagList("keyword") == null) {
            return null;
        }
        ArrayList<KeyPhraseStats> arrayList = new ArrayList<>();
        for (String str : yElement.getTagList("keyword").getValues()) {
            String normalize = normalize(str);
            KeyPhraseStats keyPhraseStats = this.stats.get(normalize);
            if (keyPhraseStats == null) {
                keyPhraseStats = new KeyPhraseStats(normalize);
                this.stats.put(normalize, keyPhraseStats);
                keyPhraseStats.stemmed.add(normalize);
                if (this.stemmer != null) {
                    String[] split = normalize.split("\\s");
                    ArrayList<String>[] arrayListArr = new ArrayList[split.length];
                    for (int i = 0; i < split.length; i++) {
                        List lookup = this.stemmer.lookup(str);
                        arrayListArr[i] = new ArrayList<>();
                        Iterator it = lookup.iterator();
                        while (it.hasNext()) {
                            arrayListArr[i].add(((WordData) it.next()).getStem().toString());
                        }
                    }
                    Iterator<String> it2 = allPossibleTexts(arrayListArr, 0).iterator();
                    while (it2.hasNext()) {
                        String next = it2.next();
                        if (!keyPhraseStats.stemmed.contains(next)) {
                            keyPhraseStats.stemmed.add(next);
                        }
                    }
                }
            }
            arrayList.add(keyPhraseStats);
        }
        return arrayList;
    }

    void printInfo(PrintStream printStream) {
        printStream.println("Got Informations about :" + this.total + " elements");
        printStream.println(this.totalWithAbstact + " elements has abstracts");
        printStream.println(this.totalWithTitles + " elements has titles");
        printStream.println("was found diffrent: " + this.stats.values().size() + " keyphrases");
        printStream.println("was found: " + this.totalKeyowordsInstances + " keywords instances");
        printStream.println("and from them " + this.totalKeyowordsInstancesInAbstracts + " (" + (this.totalKeyowordsInstances > 0 ? (100 * this.totalKeyowordsInstancesInAbstracts) / this.totalKeyowordsInstances : 0) + " %) appared in abstracts");
        printStream.println("and from them " + this.totalKeyowordsInstancesInTitles + " (" + (this.totalKeyowordsInstances > 0 ? (100 * this.totalKeyowordsInstancesInTitles) / this.totalKeyowordsInstances : 0) + " %) appared in title");
        printStream.println("and from them " + this.totalKeyowordsInstancesInTitlesOrAbstract + " (" + (this.totalKeyowordsInstances > 0 ? (100 * this.totalKeyowordsInstancesInTitlesOrAbstract) / this.totalKeyowordsInstances : 0) + " %) appared in title or abstract");
        printStream.println("Keywords statistics: ");
        ArrayList arrayList = new ArrayList(this.stats.values());
        Collections.sort(arrayList, new Comparator<KeyPhraseStats>() { // from class: pl.edu.icm.yadda.analysis.packscanner.TesterAndClassifier.1
            @Override // java.util.Comparator
            public int compare(KeyPhraseStats keyPhraseStats, KeyPhraseStats keyPhraseStats2) {
                return keyPhraseStats.appears - keyPhraseStats2.appears;
            }
        });
        printStream.println("phrase;appears;has absttract;appers with abstract;was in % abstracts;appearsInTitles;was in % titles;appers in  title or abstract; was in % titles or abstracts");
        Iterator it = arrayList.iterator();
        while (it.hasNext()) {
            KeyPhraseStats keyPhraseStats = (KeyPhraseStats) it.next();
            printStream.println(keyPhraseStats.lowerCased + ";" + keyPhraseStats.appears + ";" + keyPhraseStats.hasAbstact + ";" + keyPhraseStats.appersInAbstract + ";" + (keyPhraseStats.hasAbstact > 0 ? (100 * keyPhraseStats.appersInAbstract) / keyPhraseStats.hasAbstact : 0) + ";" + keyPhraseStats.appearsInTitle + ";" + ((100 * keyPhraseStats.appearsInTitle) / keyPhraseStats.appears) + ";" + keyPhraseStats.appearsInAbstractOrTitle + ";" + ((100 * keyPhraseStats.appearsInAbstractOrTitle) / keyPhraseStats.appears));
        }
    }

    public void processYElementAndLearn(YElement yElement, YLanguage yLanguage) {
        ArrayList<KeyPhraseStats> keyPhrasesFromElement;
        YStructure structure = yElement.getStructure("bwmeta1.hierarchy-class.hierarchy_Journal");
        if (structure == null || !"bwmeta1.level.hierarchy_Journal_Article".equalsIgnoreCase(structure.getCurrent().getLevel()) || (keyPhrasesFromElement = getKeyPhrasesFromElement(yElement)) == null) {
            return;
        }
        boolean z = false;
        ArrayList arrayList = new ArrayList();
        for (YDescription yDescription : yElement.getDescriptions()) {
            if ("abstract".equalsIgnoreCase(yDescription.getType()) && (yLanguage == null || yLanguage.equals(yDescription.getLanguage()))) {
                arrayList.add(new Pair(normalize(yDescription.getText()), TextType.ABSTRACT));
                z = true;
            }
        }
        for (YName yName : yElement.getNames()) {
            if (yLanguage == null || yLanguage.equals(yName.getLanguage())) {
                arrayList.add(new Pair(normalize(yName.getText()), TextType.TITLE));
            }
        }
        if (z) {
            ArrayList arrayList2 = new ArrayList();
            Iterator it = arrayList.iterator();
            while (it.hasNext()) {
                String[] split = ((String) ((Pair) it.next()).getFirst()).replaceAll("\\W", " ").replaceAll("\\s+", " ").split("\\s");
                for (int i = 0; i < split.length; i++) {
                    if (this.stemmer != null) {
                        for (WordData wordData : this.stemmer.lookup(split[i])) {
                            if (!arrayList2.contains(wordData.getStem().toString())) {
                                arrayList2.add(wordData.getStem().toString());
                                if (!this.wordsToPossibleKeyphrases.containsKey(wordData.getStem().toString())) {
                                    this.wordsToPossibleKeyphrases.put(wordData.getStem().toString(), new ArrayList<>());
                                }
                            }
                        }
                    } else if (!arrayList2.contains(split[i])) {
                        arrayList2.add(split[i]);
                        if (!this.wordsToPossibleKeyphrases.containsKey(split[i])) {
                            this.wordsToPossibleKeyphrases.put(split[i], new ArrayList<>());
                        }
                    }
                }
            }
            Iterator it2 = arrayList2.iterator();
            while (it2.hasNext()) {
                String str = (String) it2.next();
                if (this.wordsInAbstractsCount.containsKey(str)) {
                    this.wordsInAbstractsCount.put(str, Integer.valueOf(this.wordsInAbstractsCount.get(str).intValue() + 1));
                } else {
                    this.wordsInAbstractsCount.put(str, 1);
                }
            }
            Iterator<KeyPhraseStats> it3 = keyPhrasesFromElement.iterator();
            while (it3.hasNext()) {
                String str2 = it3.next().lowerCased;
                if (this.keyprasesCount.containsKey(str2)) {
                    this.keyprasesCount.put(str2, Integer.valueOf(this.keyprasesCount.get(str2).intValue() + 1));
                } else {
                    this.keyprasesCount.put(str2, 1);
                }
                Iterator it4 = arrayList2.iterator();
                while (it4.hasNext()) {
                    String str3 = (String) it4.next();
                    Pair<String, String> pair = new Pair<>(str2, str3);
                    if (this.keyphraseWordCount.containsKey(pair)) {
                        this.keyphraseWordCount.put(pair, Integer.valueOf(this.keyphraseWordCount.get(pair).intValue() + 1));
                    } else {
                        this.keyphraseWordCount.put(pair, 1);
                    }
                    if (!this.wordsToPossibleKeyphrases.get(str3).contains(str2)) {
                        this.wordsToPossibleKeyphrases.get(str3).add(str2);
                    }
                }
            }
            this.numArticles++;
        }
    }

    public void reduceTables(PrintStream printStream) {
        ArrayList arrayList = new ArrayList();
        for (String str : this.wordsInAbstractsCount.keySet()) {
            boolean z = false;
            int intValue = this.wordsInAbstractsCount.get(str).intValue();
            if (intValue > 1) {
                Iterator<String> it = this.wordsToPossibleKeyphrases.get(str).iterator();
                while (it.hasNext()) {
                    String next = it.next();
                    if (Math.pow((this.keyprasesCount.get(next).intValue() / this.numArticles) - (this.keyphraseWordCount.get(new Pair(next, str)).intValue() / intValue), 2.0d) > 1.0E-5d) {
                        z = true;
                    }
                }
            }
            if (!z) {
                arrayList.add(str);
            }
        }
        Iterator it2 = arrayList.iterator();
        while (it2.hasNext()) {
            String str2 = (String) it2.next();
            this.wordsInAbstractsCount.remove(str2);
            Iterator<String> it3 = this.wordsToPossibleKeyphrases.get(str2).iterator();
            while (it3.hasNext()) {
                this.keyphraseWordCount.remove(new Pair(it3.next(), str2));
            }
            this.wordsToPossibleKeyphrases.remove(str2);
        }
        Hashtable hashtable = new Hashtable();
        for (Pair<String, String> pair : this.keyphraseWordCount.keySet()) {
            if (!hashtable.containsKey(pair.getFirst())) {
                hashtable.put(pair.getFirst(), new ArrayList());
            }
            ((ArrayList) hashtable.get(pair.getFirst())).add(pair.getSecond());
        }
        for (final String str3 : this.keyprasesCount.keySet()) {
            ArrayList arrayList2 = (ArrayList) hashtable.get(str3);
            if (arrayList2 != null) {
                Collections.sort(arrayList2, new Comparator<String>() { // from class: pl.edu.icm.yadda.analysis.packscanner.TesterAndClassifier.2
                    @Override // java.util.Comparator
                    public int compare(String str4, String str5) {
                        double intValue2 = TesterAndClassifier.this.keyphraseWordCount.get(new Pair(str3, str4)).intValue() / TesterAndClassifier.this.wordsInAbstractsCount.get(str4).intValue();
                        double intValue3 = TesterAndClassifier.this.keyphraseWordCount.get(new Pair(str3, str5)).intValue() / TesterAndClassifier.this.wordsInAbstractsCount.get(str5).intValue();
                        if (intValue2 - intValue3 > 0.0d) {
                            return -1;
                        }
                        return intValue2 - intValue3 < 0.0d ? 1 : 0;
                    }
                });
                printStream.println(str3 + " : " + this.keyprasesCount.get(str3));
                Iterator it4 = arrayList2.iterator();
                while (it4.hasNext()) {
                    String str4 = (String) it4.next();
                    printStream.println("\t" + str4 + "\t p= " + (this.keyphraseWordCount.get(new Pair(str3, str4)).intValue() / this.wordsInAbstractsCount.get(str4).intValue()) + " \t|pair| = " + this.keyphraseWordCount.get(new Pair(str3, str4)) + "\t|word| " + this.wordsInAbstractsCount.get(str4));
                }
                printStream.println();
            }
        }
    }

    /* JADX WARN: Multi-variable type inference failed */
    public ArrayList<String> generateKeyWordsFor(YElement yElement, PrintStream printStream, YLanguage yLanguage) {
        YStructure structure = yElement.getStructure("bwmeta1.hierarchy-class.hierarchy_Journal");
        if (structure == null || !"bwmeta1.level.hierarchy_Journal_Article".equalsIgnoreCase(structure.getCurrent().getLevel())) {
            return null;
        }
        boolean z = false;
        ArrayList arrayList = new ArrayList();
        for (YDescription yDescription : yElement.getDescriptions()) {
            if ("abstract".equalsIgnoreCase(yDescription.getType()) && (yLanguage == null || yLanguage.equals(yDescription.getLanguage()))) {
                arrayList.add(new Pair(normalize(yDescription.getText()), TextType.ABSTRACT));
                z = true;
            }
        }
        for (YName yName : yElement.getNames()) {
            if (yLanguage == null || yLanguage.equals(yName.getLanguage())) {
                arrayList.add(new Pair(normalize(yName.getText()), TextType.TITLE));
            }
        }
        if (!z) {
            return null;
        }
        ArrayList arrayList2 = new ArrayList();
        final Hashtable hashtable = new Hashtable();
        final Hashtable hashtable2 = new Hashtable();
        Iterator it = arrayList.iterator();
        while (it.hasNext()) {
            String[] split = ((String) ((Pair) it.next()).getFirst()).replaceAll("\\W", " ").replaceAll("\\s+", " ").split("\\s");
            for (int i = 0; i < split.length; i++) {
                if (this.stemmer != null) {
                    for (WordData wordData : this.stemmer.lookup(split[i])) {
                        if (!arrayList2.contains(wordData.getStem().toString()) && this.wordsToPossibleKeyphrases.containsKey(wordData.getStem().toString())) {
                            arrayList2.add(wordData.getStem().toString());
                            Iterator<String> it2 = this.wordsToPossibleKeyphrases.get(wordData.getStem().toString()).iterator();
                            while (it2.hasNext()) {
                                String next = it2.next();
                                if (!hashtable.containsKey(next)) {
                                    hashtable.put(next, Double.valueOf(1.0d));
                                    hashtable2.put(next, Double.valueOf(1.0d));
                                }
                            }
                        }
                    }
                } else if (!arrayList2.contains(split[i]) && this.wordsToPossibleKeyphrases.containsKey(split[i])) {
                    arrayList2.add(split[i]);
                    Iterator<String> it3 = this.wordsToPossibleKeyphrases.get(split[i]).iterator();
                    while (it3.hasNext()) {
                        String next2 = it3.next();
                        if (!hashtable.containsKey(next2)) {
                            hashtable.put(next2, Double.valueOf(1.0d));
                            hashtable2.put(next2, Double.valueOf(1.0d));
                        }
                    }
                }
            }
        }
        Iterator it4 = arrayList2.iterator();
        while (it4.hasNext()) {
            String str = (String) it4.next();
            for (String str2 : hashtable.keySet()) {
                Integer num = this.keyphraseWordCount.get(new Pair(str2, str));
                double intValue = (num != null ? num.intValue() : 0) / this.wordsInAbstractsCount.get(str).intValue();
                if (intValue == 0.0d) {
                    intValue = 1.0E-5d;
                }
                if (intValue == 1.0d) {
                    intValue = 0.99999d;
                }
                hashtable.put(str2, Double.valueOf(((Double) hashtable.get(str2)).doubleValue() * (1.0d - intValue)));
                hashtable2.put(str2, Double.valueOf(((Double) hashtable2.get(str2)).doubleValue() * intValue));
            }
        }
        ArrayList arrayList3 = new ArrayList(hashtable.keySet());
        Collections.sort(arrayList3, new Comparator<String>() { // from class: pl.edu.icm.yadda.analysis.packscanner.TesterAndClassifier.3
            @Override // java.util.Comparator
            public int compare(String str3, String str4) {
                double intValue2 = ((-(1.0d - ((Double) hashtable.get(str3)).doubleValue())) / TesterAndClassifier.this.keyprasesCount.get(str3).intValue()) + ((1.0d - ((Double) hashtable.get(str4)).doubleValue()) / TesterAndClassifier.this.keyprasesCount.get(str4).intValue());
                if (intValue2 < 0.0d) {
                    return -1;
                }
                return intValue2 > 0.0d ? 1 : 0;
            }
        });
        Iterator it5 = yElement.getNames().iterator();
        while (it5.hasNext()) {
            printStream.println("Title: " + ((YName) it5.next()).getText());
        }
        for (YDescription yDescription2 : yElement.getDescriptions()) {
            if ("abstract".equalsIgnoreCase(yDescription2.getType())) {
                arrayList.add(new Pair(normalize(yDescription2.getText()), TextType.ABSTRACT));
            }
        }
        ArrayList<KeyPhraseStats> keyPhrasesFromElement = getKeyPhrasesFromElement(yElement);
        printStream.println("Original keyphrases: ");
        if (keyPhrasesFromElement != null) {
            Iterator<KeyPhraseStats> it6 = keyPhrasesFromElement.iterator();
            while (it6.hasNext()) {
                printStream.println("\t" + it6.next().lowerCased);
            }
        }
        ArrayList<String> arrayList4 = new ArrayList<>();
        printStream.println("Found keyphrase (reversed propblility): ");
        for (int i2 = 0; i2 < 10 && i2 < arrayList3.size(); i2++) {
            printStream.println("\t\t" + ((String) arrayList3.get(i2)) + " - " + ((1.0d - ((Double) hashtable.get(arrayList3.get(i2))).doubleValue()) / this.keyprasesCount.get(arrayList3.get(i2)).intValue()));
            arrayList4.add(arrayList3.get(i2));
        }
        printStream.println("Found keyphrase (normal): ");
        Collections.sort(arrayList3, new Comparator<String>() { // from class: pl.edu.icm.yadda.analysis.packscanner.TesterAndClassifier.4
            @Override // java.util.Comparator
            public int compare(String str3, String str4) {
                double doubleValue = (((Double) hashtable2.get(str3)).doubleValue() / TesterAndClassifier.this.keyprasesCount.get(str3).intValue()) - (((Double) hashtable2.get(str4)).doubleValue() / TesterAndClassifier.this.keyprasesCount.get(str4).intValue());
                if (doubleValue < 0.0d) {
                    return 1;
                }
                return doubleValue > 0.0d ? -1 : 0;
            }
        });
        for (int i3 = 0; i3 < 10 && i3 < arrayList3.size(); i3++) {
            printStream.println("\t\t" + ((String) arrayList3.get(i3)) + " - " + hashtable2.get(arrayList3.get(i3)));
        }
        return arrayList4;
    }

    public static void main(String[] strArr) throws FileNotFoundException {
        if (strArr.length != 2) {
            System.err.println("two argument - directory with files to learn, directory with files to test");
        }
        TesterAndClassifier testerAndClassifier = new TesterAndClassifier();
        if (strArr.length > 2 && "dostem".equalsIgnoreCase(strArr[2])) {
            testerAndClassifier.stemmer = new PolishStemmer();
        }
        YLanguage yLanguage = null;
        if (strArr.length > 3) {
            yLanguage = YLanguage.byCode(strArr[3]);
        }
        PackDirToYElementIterator packDirToYElementIterator = new PackDirToYElementIterator(new File(strArr[0]));
        while (packDirToYElementIterator.hasNext()) {
            testerAndClassifier.processYElementAndLearn(packDirToYElementIterator.next(), yLanguage);
        }
        PrintStream printStream = new PrintStream(new FileOutputStream("key-stats-" + System.currentTimeMillis() + ".log"));
        testerAndClassifier.reduceTables(printStream);
        PackDirToYElementIterator packDirToYElementIterator2 = new PackDirToYElementIterator(new File(strArr[1]));
        while (packDirToYElementIterator2.hasNext()) {
            testerAndClassifier.generateKeyWordsFor(packDirToYElementIterator2.next(), printStream, yLanguage);
        }
        printStream.close();
        System.exit(0);
    }
}
