package pl.edu.icm.cermine.bibref.parsing.tools;

import com.hp.hpl.jena.util.FileManager;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import org.apache.commons.io.FileUtils;
import org.jdom.Element;
import org.jdom.JDOMException;
import org.jdom.output.Format;
import org.jdom.output.XMLOutputter;
import org.xml.sax.InputSource;
import pl.edu.icm.cermine.bibref.model.BibEntry;
import pl.edu.icm.cermine.bibref.parsing.model.Citation;
import pl.edu.icm.cermine.bibref.parsing.model.CitationToken;
import pl.edu.icm.cermine.bibref.parsing.model.CitationTokenLabel;
import pl.edu.icm.cermine.bibref.transformers.BibEntryToNLMElementConverter;
import pl.edu.icm.cermine.exception.AnalysisException;
import pl.edu.icm.cermine.exception.TransformationException;

/* loaded from: input_file:WEB-INF/lib/cermine-impl-1.7-SNAPSHOT.jar:pl/edu/icm/cermine/bibref/parsing/tools/PubMedToNLM.class */
public final class PubMedToNLM {
    private static String nlmDir = "/media/4CEE59EAEE59CCB8/data/refs/data/";
    private static String outNLM = "/home/domin/phd-metadata-extraction/results/citations/citations.nxml";
    private static String outBT = "/home/domin/phd-metadata-extraction/results/citations/citations.bibtex";
    private static String outTXT = "/home/domin/phd-metadata-extraction/results/citations/citations.txt";

    /* JADX WARN: Finally extract failed */
    public static void main(String[] strArr) throws JDOMException, IOException, AnalysisException, TransformationException {
        File file = new File(nlmDir);
        HashSet<BibEntry> hashSet = new HashSet();
        int i = 0;
        for (File file2 : FileUtils.listFiles(file, new String[]{"refs"}, true)) {
            FileInputStream fileInputStream = null;
            try {
                fileInputStream = new FileInputStream(file2);
                List<Citation> extractCitations = NlmCitationExtractor.extractCitations(new InputSource(fileInputStream));
                if (fileInputStream != null) {
                    fileInputStream.close();
                }
                List<Citation> extractCitations2 = NlmCitationExtractor.extractCitations(new InputSource(new FileInputStream(new File(file2.getPath().replace(".refs", ".nxml")))));
                ArrayList arrayList = new ArrayList();
                for (int i2 = 0; i2 < 10; i2++) {
                    Citation citation = extractCitations2.get((int) (extractCitations2.size() * Math.random()));
                    String firstFieldValue = CitationUtils.citationToBibref(citation).getFirstFieldValue("title");
                    if (firstFieldValue != null) {
                        String replaceAll = firstFieldValue.replaceAll("\\s+", " ");
                        Iterator<Citation> it = extractCitations.iterator();
                        while (it.hasNext()) {
                            String replaceAll2 = it.next().getText().replaceAll("\\s+", " ");
                            if (replaceAll2.contains(replaceAll)) {
                                Citation stringToCitation = CitationUtils.stringToCitation(replaceAll2);
                                List<CitationToken> tokens = citation.getTokens();
                                for (CitationToken citationToken : stringToCitation.getTokens()) {
                                    citationToken.setLabel(CitationTokenLabel.TEXT);
                                    if (!citationToken.getText().equals(",") && !citationToken.getText().equals(".") && !citationToken.getText().equals(":") && !citationToken.getText().equals(FileManager.PATH_DELIMITER)) {
                                        CitationToken citationToken2 = null;
                                        Iterator<CitationToken> it2 = tokens.iterator();
                                        while (true) {
                                            if (!it2.hasNext()) {
                                                break;
                                            }
                                            CitationToken next = it2.next();
                                            if (next.getText().equals(citationToken.getText())) {
                                                citationToken.setLabel(next.getLabel());
                                                citationToken2 = next;
                                                break;
                                            }
                                        }
                                        if (citationToken2 != null) {
                                            tokens.remove(citationToken2);
                                        }
                                    }
                                }
                                List<CitationToken> tokens2 = stringToCitation.getTokens();
                                for (int i3 = 1; i3 < tokens2.size() - 1; i3++) {
                                    CitationToken citationToken3 = tokens2.get(i3 - 1);
                                    CitationToken citationToken4 = tokens2.get(i3);
                                    CitationToken citationToken5 = tokens2.get(i3 + 1);
                                    if (citationToken4.getText().length() == 1 && citationToken3.getLabel().equals(citationToken5.getLabel()) && !CitationTokenLabel.TEXT.equals(citationToken5.getLabel())) {
                                        citationToken4.setLabel(citationToken3.getLabel());
                                    }
                                }
                                for (int i4 = 1; i4 < tokens2.size() - 1; i4++) {
                                    CitationToken citationToken6 = tokens2.get(i4 - 1);
                                    CitationToken citationToken7 = tokens2.get(i4);
                                    if (citationToken7.getLabel().equals(CitationTokenLabel.ARTICLE_TITLE)) {
                                        break;
                                    }
                                    if (citationToken7.getText().matches("[A-Z]") || (citationToken6.getText().matches("[A-Z]") && citationToken7.getText().equals("."))) {
                                        citationToken7.setLabel(CitationTokenLabel.GIVENNAME);
                                    }
                                }
                                BibEntry citationToBibref = CitationUtils.citationToBibref(stringToCitation);
                                int i5 = 0;
                                int i6 = 0;
                                for (CitationToken citationToken8 : stringToCitation.getTokens()) {
                                    i6 += citationToken8.getText().length();
                                    if (citationToken8.getLabel().equals(CitationTokenLabel.TEXT)) {
                                        i5 += citationToken8.getText().length();
                                    }
                                }
                                if (i5 <= 0.25d * i6) {
                                    boolean z = true;
                                    Iterator it3 = arrayList.iterator();
                                    while (it3.hasNext()) {
                                        if (((BibEntry) it3.next()).getText().equals(citationToBibref.getText())) {
                                            z = false;
                                        }
                                    }
                                    if (z) {
                                        arrayList.add(citationToBibref);
                                    }
                                }
                            }
                        }
                    }
                }
                hashSet.addAll(arrayList);
                i++;
                System.out.println(i + " " + ((i * 100.0d) / r0.size()) + "%");
            } catch (Throwable th) {
                if (fileInputStream != null) {
                    fileInputStream.close();
                }
                throw th;
            }
        }
        File file3 = new File(outNLM);
        File file4 = new File(outBT);
        File file5 = new File(outTXT);
        BibEntryToNLMElementConverter bibEntryToNLMElementConverter = new BibEntryToNLMElementConverter();
        XMLOutputter xMLOutputter = new XMLOutputter(Format.getRawFormat());
        int i7 = 1;
        for (BibEntry bibEntry : hashSet) {
            Element convert = bibEntryToNLMElementConverter.convert(bibEntry, new Object[0]);
            int i8 = i7;
            i7++;
            convert.setAttribute("id", String.valueOf(i8));
            FileUtils.writeStringToFile(file3, xMLOutputter.outputString(convert), true);
            FileUtils.writeStringToFile(file4, bibEntry.toBibTeX(), true);
            FileUtils.writeStringToFile(file5, bibEntry.getText(), true);
            FileUtils.writeStringToFile(file3, "\n", true);
            FileUtils.writeStringToFile(file4, "\n", true);
            FileUtils.writeStringToFile(file5, "\n", true);
        }
    }

    private PubMedToNLM() {
    }
}
