package pl.edu.icm.yadda.analysis.packscanner;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.Hashtable;
import java.util.Iterator;
import java.util.List;
import morfologik.stemming.PolishStemmer;
import morfologik.stemming.WordData;
import pl.edu.icm.model.bwmeta.y.YElement;

/* loaded from: input_file:pl/edu/icm/yadda/analysis/packscanner/CountKeywordsStatistics.class */
public class CountKeywordsStatistics {
    PolishStemmer stemmer = null;
    int total = 0;
    int totalWithAbstact = 0;
    int totalWithContent = 0;
    int totalWithTitles = 0;
    int totalKeyowordsInstances = 0;
    int totalKeyowordsInstancesInAbstracts = 0;
    int totalKeyowordsInstancesInTitles = 0;
    int totalKeyowordsInstancesInTitlesOrAbstract = 0;
    Hashtable<String, KeyPhraseStats> stats = new Hashtable<>();

    /* loaded from: input_file:pl/edu/icm/yadda/analysis/packscanner/CountKeywordsStatistics$KeyPhraseStats.class */
    public static class KeyPhraseStats {
        String lowerCased;
        ArrayList<String> stemmed = new ArrayList<>();
        int appears = 0;
        int appersInAbstract = 0;
        int appearsInAbstractOrContent = 0;
        int appearsInTitle = 0;
        int appearsInAbstractOrTitle = 0;
        int hasInAbstractOrContent = 0;
        int hasAbstact = 0;

        public KeyPhraseStats(String str) {
            this.lowerCased = str;
        }
    }

    /* loaded from: input_file:pl/edu/icm/yadda/analysis/packscanner/CountKeywordsStatistics$TextType.class */
    public enum TextType {
        ABSTRACT,
        TITLE
    }

    public String normalize(String str) {
        return str.toLowerCase().replaceAll("\\s+", " ").trim();
    }

    public ArrayList<String> allPossibleTexts(ArrayList<String>[] arrayListArr, int i) {
        ArrayList<String> arrayList = new ArrayList<>();
        if (i == arrayListArr.length - 1) {
            return arrayListArr[i];
        }
        ArrayList<String> allPossibleTexts = allPossibleTexts(arrayListArr, i + 1);
        Iterator<String> it = arrayListArr[i].iterator();
        while (it.hasNext()) {
            String next = it.next();
            Iterator<String> it2 = allPossibleTexts.iterator();
            while (it2.hasNext()) {
                arrayList.add(next + " " + it2.next());
            }
        }
        return arrayList;
    }

    ArrayList<KeyPhraseStats> getKeyPhrases(YElement yElement) {
        if (yElement.getTagList("keyword") == null) {
            return null;
        }
        ArrayList<KeyPhraseStats> arrayList = new ArrayList<>();
        for (String str : yElement.getTagList("keyword").getValues()) {
            String normalize = normalize(str);
            KeyPhraseStats keyPhraseStats = this.stats.get(normalize);
            if (keyPhraseStats == null) {
                keyPhraseStats = new KeyPhraseStats(normalize);
                this.stats.put(normalize, keyPhraseStats);
                keyPhraseStats.stemmed.add(normalize);
                if (this.stemmer != null) {
                    String[] split = normalize.split("\\s");
                    ArrayList<String>[] arrayListArr = new ArrayList[split.length];
                    for (int i = 0; i < split.length; i++) {
                        List lookup = this.stemmer.lookup(str);
                        arrayListArr[i] = new ArrayList<>();
                        Iterator it = lookup.iterator();
                        while (it.hasNext()) {
                            arrayListArr[i].add(((WordData) it.next()).getStem().toString());
                        }
                    }
                    Iterator<String> it2 = allPossibleTexts(arrayListArr, 0).iterator();
                    while (it2.hasNext()) {
                        String next = it2.next();
                        if (!keyPhraseStats.stemmed.contains(next)) {
                            keyPhraseStats.stemmed.add(next);
                        }
                    }
                }
            }
            arrayList.add(keyPhraseStats);
        }
        return arrayList;
    }

    void printInfo(PrintStream printStream) {
        printStream.println("Got Informations about :" + this.total + " elements");
        printStream.println(this.totalWithAbstact + " elements has abstracts");
        printStream.println(this.totalWithTitles + " elements has titles");
        printStream.println("was found diffrent: " + this.stats.values().size() + " keyphrases");
        printStream.println("was found: " + this.totalKeyowordsInstances + " keywords instances");
        printStream.println("and from them " + this.totalKeyowordsInstancesInAbstracts + " (" + (this.totalKeyowordsInstances > 0 ? (100 * this.totalKeyowordsInstancesInAbstracts) / this.totalKeyowordsInstances : 0) + " %) appared in abstracts");
        printStream.println("and from them " + this.totalKeyowordsInstancesInTitles + " (" + (this.totalKeyowordsInstances > 0 ? (100 * this.totalKeyowordsInstancesInTitles) / this.totalKeyowordsInstances : 0) + " %) appared in title");
        printStream.println("and from them " + this.totalKeyowordsInstancesInTitlesOrAbstract + " (" + (this.totalKeyowordsInstances > 0 ? (100 * this.totalKeyowordsInstancesInTitlesOrAbstract) / this.totalKeyowordsInstances : 0) + " %) appared in title or abstract");
        printStream.println("Keywords statistics: ");
        ArrayList arrayList = new ArrayList(this.stats.values());
        Collections.sort(arrayList, new Comparator<KeyPhraseStats>() { // from class: pl.edu.icm.yadda.analysis.packscanner.CountKeywordsStatistics.1
            @Override // java.util.Comparator
            public int compare(KeyPhraseStats keyPhraseStats, KeyPhraseStats keyPhraseStats2) {
                return keyPhraseStats.appears - keyPhraseStats2.appears;
            }
        });
        printStream.println("phrase;appears;has absttract;appers with abstract;was in % abstracts;appearsInTitles;was in % titles;appers in  title or abstract; was in % titles or abstracts");
        Iterator it = arrayList.iterator();
        while (it.hasNext()) {
            KeyPhraseStats keyPhraseStats = (KeyPhraseStats) it.next();
            printStream.println(keyPhraseStats.lowerCased + ";" + keyPhraseStats.appears + ";" + keyPhraseStats.hasAbstact + ";" + keyPhraseStats.appersInAbstract + ";" + (keyPhraseStats.hasAbstact > 0 ? (100 * keyPhraseStats.appersInAbstract) / keyPhraseStats.hasAbstact : 0) + ";" + keyPhraseStats.appearsInTitle + ";" + ((100 * keyPhraseStats.appearsInTitle) / keyPhraseStats.appears) + ";" + keyPhraseStats.appearsInAbstractOrTitle + ";" + ((100 * keyPhraseStats.appearsInAbstractOrTitle) / keyPhraseStats.appears));
        }
    }

    /* JADX WARN: Can't fix incorrect switch cases order, some code will duplicate */
    /* JADX WARN: Removed duplicated region for block: B:131:0x039c  */
    /*
        Code decompiled incorrectly, please refer to instructions dump.
        To view partially-correct add '--show-bad-code' argument
    */
    public void processYElement(pl.edu.icm.model.bwmeta.y.YElement r7) {
        /*
            Method dump skipped, instructions count: 1257
            To view this dump add '--comments-level debug' option
        */
        throw new UnsupportedOperationException("Method not decompiled: pl.edu.icm.yadda.analysis.packscanner.CountKeywordsStatistics.processYElement(pl.edu.icm.model.bwmeta.y.YElement):void");
    }

    public static void main(String[] strArr) throws FileNotFoundException {
        if (strArr.length != 1) {
            System.err.println("One argument - directory with files");
        }
        CountKeywordsStatistics countKeywordsStatistics = new CountKeywordsStatistics();
        if (strArr.length > 1 && "dostem".equalsIgnoreCase(strArr[1])) {
            countKeywordsStatistics.stemmer = new PolishStemmer();
        }
        PackDirToYElementIterator packDirToYElementIterator = new PackDirToYElementIterator(new File(strArr[0]));
        while (packDirToYElementIterator.hasNext()) {
            countKeywordsStatistics.processYElement(packDirToYElementIterator.next());
        }
        PrintStream printStream = new PrintStream(new FileOutputStream("key-stats-" + System.currentTimeMillis() + ".log"));
        countKeywordsStatistics.printInfo(printStream);
        printStream.close();
        System.exit(0);
    }
}
