package pl.edu.icm.cermine.bibref;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import org.jdom.JDOMException;
import org.xml.sax.InputSource;
import pl.edu.icm.cermine.bibref.parsing.model.Citation;
import pl.edu.icm.cermine.bibref.parsing.model.CitationToken;
import pl.edu.icm.cermine.bibref.parsing.model.CitationTokenLabel;
import pl.edu.icm.cermine.bibref.parsing.tools.CitationUtils;
import pl.edu.icm.cermine.bibref.parsing.tools.NlmCitationExtractor;
import pl.edu.icm.cermine.tools.CountMap;
import pl.edu.icm.cermine.tools.PrefixTree;

/* loaded from: input_file:pl/edu/icm/cermine/bibref/MalletTrainingFileGenerator.class */
public final class MalletTrainingFileGenerator {
    private static final int MIN_TERM_COUNT = 3;
    private static final int MIN_JOURNAL_COUNT = 2;
    private static final int MIN_SURNAME_COUNT = 2;
    private static final int MIN_INST_COUNT = 1;

    public static void main(String[] strArr) throws JDOMException, IOException {
        File file = new File(strArr[0]);
        new ArrayList();
        CountMap countMap = new CountMap();
        CountMap countMap2 = new CountMap();
        CountMap countMap3 = new CountMap();
        CountMap countMap4 = new CountMap();
        FileInputStream fileInputStream = null;
        try {
            fileInputStream = new FileInputStream(file);
            List<Citation> extractCitations = NlmCitationExtractor.extractCitations(new InputSource(fileInputStream));
            if (fileInputStream != null) {
                fileInputStream.close();
            }
            for (Citation citation : extractCitations) {
                Iterator it = citation.getTokens().iterator();
                while (it.hasNext()) {
                    countMap.add(((CitationToken) it.next()).getText().toLowerCase(Locale.ENGLISH));
                }
                for (CitationToken citationToken : citation.getConcatenatedTokens()) {
                    String lowerCase = citationToken.getText().toLowerCase(Locale.ENGLISH);
                    if (citationToken.getLabel() == CitationTokenLabel.SOURCE) {
                        countMap2.add(lowerCase);
                    }
                    if (citationToken.getLabel() == CitationTokenLabel.SURNAME) {
                        countMap3.add(lowerCase);
                    }
                    if (citationToken.getLabel() == CitationTokenLabel.INSTITUTION) {
                        countMap4.add(lowerCase);
                    }
                }
            }
            List sortedEntries = countMap.getSortedEntries(MIN_TERM_COUNT);
            HashSet hashSet = new HashSet();
            Iterator it2 = sortedEntries.iterator();
            while (it2.hasNext()) {
                hashSet.add(((Map.Entry) it2.next()).getKey());
            }
            List sortedEntries2 = countMap2.getSortedEntries(2);
            HashSet hashSet2 = new HashSet();
            Iterator it3 = sortedEntries2.iterator();
            while (it3.hasNext()) {
                hashSet2.add(((Map.Entry) it3.next()).getKey());
            }
            PrefixTree prefixTree = new PrefixTree("<START>");
            prefixTree.build(hashSet2);
            List sortedEntries3 = countMap3.getSortedEntries(2);
            HashSet hashSet3 = new HashSet();
            Iterator it4 = sortedEntries3.iterator();
            while (it4.hasNext()) {
                hashSet3.add(((Map.Entry) it4.next()).getKey());
            }
            PrefixTree prefixTree2 = new PrefixTree("<START>");
            prefixTree2.build(hashSet3);
            List sortedEntries4 = countMap4.getSortedEntries(MIN_INST_COUNT);
            HashSet hashSet4 = new HashSet();
            Iterator it5 = sortedEntries4.iterator();
            while (it5.hasNext()) {
                hashSet4.add(((Map.Entry) it5.next()).getKey());
            }
            PrefixTree prefixTree3 = new PrefixTree("<START>");
            prefixTree3.build(hashSet4);
            OutputStreamWriter outputStreamWriter = null;
            OutputStreamWriter outputStreamWriter2 = null;
            try {
                outputStreamWriter = new OutputStreamWriter(new FileOutputStream(strArr[MIN_INST_COUNT]), "UTF-8");
                outputStreamWriter2 = new OutputStreamWriter(new FileOutputStream(strArr[2]), "UTF-8");
                OutputStreamWriter outputStreamWriter3 = new OutputStreamWriter(new FileOutputStream(strArr[MIN_TERM_COUNT]), "UTF-8");
                OutputStreamWriter outputStreamWriter4 = new OutputStreamWriter(new FileOutputStream(strArr[4]), "UTF-8");
                OutputStreamWriter outputStreamWriter5 = new OutputStreamWriter(new FileOutputStream(strArr[5]), "UTF-8");
                Iterator it6 = hashSet.iterator();
                while (it6.hasNext()) {
                    outputStreamWriter2.write((String) it6.next());
                    outputStreamWriter2.write("\n");
                }
                outputStreamWriter2.flush();
                outputStreamWriter2.close();
                Iterator it7 = hashSet2.iterator();
                while (it7.hasNext()) {
                    outputStreamWriter3.write((String) it7.next());
                    outputStreamWriter3.write("\n");
                }
                outputStreamWriter3.flush();
                outputStreamWriter3.close();
                Iterator it8 = hashSet3.iterator();
                while (it8.hasNext()) {
                    outputStreamWriter4.write((String) it8.next());
                    outputStreamWriter4.write("\n");
                }
                outputStreamWriter4.flush();
                outputStreamWriter4.close();
                Iterator it9 = hashSet4.iterator();
                while (it9.hasNext()) {
                    outputStreamWriter5.write((String) it9.next());
                    outputStreamWriter5.write("\n");
                }
                outputStreamWriter5.flush();
                outputStreamWriter5.close();
                Iterator it10 = extractCitations.iterator();
                while (it10.hasNext()) {
                    Iterator it11 = CitationUtils.citationToMalletInputFormat((Citation) it10.next(), hashSet, prefixTree, prefixTree2, prefixTree3).iterator();
                    while (it11.hasNext()) {
                        outputStreamWriter.write((String) it11.next());
                        outputStreamWriter.write("\n");
                    }
                    outputStreamWriter.write("\n");
                }
                outputStreamWriter.flush();
                if (outputStreamWriter != null) {
                    outputStreamWriter.close();
                }
                if (outputStreamWriter2 != null) {
                    outputStreamWriter2.close();
                }
            } catch (Throwable th) {
                if (outputStreamWriter != null) {
                    outputStreamWriter.close();
                }
                if (outputStreamWriter2 != null) {
                    outputStreamWriter2.close();
                }
                throw th;
            }
        } catch (Throwable th2) {
            if (fileInputStream != null) {
                fileInputStream.close();
            }
            throw th2;
        }
    }

    private MalletTrainingFileGenerator() {
    }
}
