package pl.edu.icm.cermine.bibref;

import com.google.common.collect.Lists;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import org.jdom.JDOMException;
import org.xml.sax.InputSource;
import pl.edu.icm.cermine.bibref.parsing.model.Citation;
import pl.edu.icm.cermine.bibref.parsing.model.CitationToken;
import pl.edu.icm.cermine.bibref.parsing.tools.CitationUtils;
import pl.edu.icm.cermine.bibref.parsing.tools.NlmCitationExtractor;

/* loaded from: input_file:pl/edu/icm/cermine/bibref/MalletTrainingFileGenerator.class */
public final class MalletTrainingFileGenerator {
    private static final int MIN_TERM_COUNT = 5;

    public static void main(String[] strArr) throws JDOMException, IOException {
        File file = new File(strArr[0]);
        new ArrayList();
        HashMap hashMap = new HashMap();
        FileInputStream fileInputStream = null;
        try {
            fileInputStream = new FileInputStream(file);
            List<Citation> extractCitations = NlmCitationExtractor.extractCitations(new InputSource(fileInputStream));
            if (fileInputStream != null) {
                fileInputStream.close();
            }
            Iterator<Citation> it = extractCitations.iterator();
            while (it.hasNext()) {
                for (CitationToken citationToken : it.next().getTokens()) {
                    if (citationToken.getText().matches("^[a-zA-Z]+$")) {
                        String lowerCase = citationToken.getText().toLowerCase(Locale.ENGLISH);
                        if (hashMap.get(lowerCase) == null) {
                            hashMap.put(lowerCase, 0);
                        }
                        hashMap.put(lowerCase, Integer.valueOf(((Integer) hashMap.get(lowerCase)).intValue() + 1));
                    }
                }
            }
            ArrayList<Map.Entry> newArrayList = Lists.newArrayList(hashMap.entrySet());
            Collections.sort(newArrayList, new Comparator<Map.Entry<String, Integer>>() { // from class: pl.edu.icm.cermine.bibref.MalletTrainingFileGenerator.1
                @Override // java.util.Comparator
                public int compare(Map.Entry<String, Integer> entry, Map.Entry<String, Integer> entry2) {
                    return entry.getValue().compareTo(entry2.getValue()) != 0 ? entry2.getValue().compareTo(entry.getValue()) : entry.getKey().compareTo(entry2.getKey());
                }
            });
            HashSet hashSet = new HashSet();
            for (Map.Entry entry : newArrayList) {
                if (((Integer) entry.getValue()).intValue() > 5) {
                    hashSet.add(entry.getKey());
                }
            }
            OutputStreamWriter outputStreamWriter = null;
            OutputStreamWriter outputStreamWriter2 = null;
            try {
                outputStreamWriter = new OutputStreamWriter(new FileOutputStream(strArr[1]), "UTF-8");
                outputStreamWriter2 = new OutputStreamWriter(new FileOutputStream(strArr[2]), "UTF-8");
                Iterator it2 = hashSet.iterator();
                while (it2.hasNext()) {
                    outputStreamWriter2.write((String) it2.next());
                    outputStreamWriter2.write("\n");
                }
                outputStreamWriter2.flush();
                outputStreamWriter2.close();
                Iterator<Citation> it3 = extractCitations.iterator();
                while (it3.hasNext()) {
                    Iterator<String> it4 = CitationUtils.citationToMalletInputFormat(it3.next(), hashSet).iterator();
                    while (it4.hasNext()) {
                        outputStreamWriter.write(it4.next());
                        outputStreamWriter.write("\n");
                    }
                    outputStreamWriter.write("\n");
                }
                outputStreamWriter.flush();
                if (outputStreamWriter != null) {
                    try {
                        outputStreamWriter.close();
                    } finally {
                        if (outputStreamWriter2 != null) {
                            outputStreamWriter2.close();
                        }
                    }
                }
            } catch (Throwable th) {
                if (outputStreamWriter != null) {
                    try {
                        outputStreamWriter.close();
                    } finally {
                        if (outputStreamWriter2 != null) {
                            outputStreamWriter2.close();
                        }
                    }
                }
                throw th;
            }
        } catch (Throwable th2) {
            if (fileInputStream != null) {
                fileInputStream.close();
            }
            throw th2;
        }
    }

    private MalletTrainingFileGenerator() {
    }
}
