package pl.edu.icm.cermine.bibref;

import edu.umass.cs.mallet.base.pipe.iterator.LineGroupIterator;
import edu.umass.cs.mallet.base.types.InstanceList;
import edu.umass.cs.mallet.base.types.LabelsSequence;
import edu.umass.cs.mallet.grmm.learning.ACRF;
import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.ObjectInputStream;
import java.io.StringReader;
import java.util.HashSet;
import java.util.Set;
import java.util.regex.Pattern;
import java.util.zip.GZIPInputStream;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.GnuParser;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.StringUtils;
import org.jdom.Element;
import org.jdom.output.Format;
import org.jdom.output.XMLOutputter;
import pl.edu.icm.cermine.bibref.model.BibEntry;
import pl.edu.icm.cermine.bibref.parsing.model.Citation;
import pl.edu.icm.cermine.bibref.parsing.model.CitationTokenLabel;
import pl.edu.icm.cermine.bibref.parsing.tools.CitationUtils;
import pl.edu.icm.cermine.bibref.transformers.BibEntryToNLMElementConverter;
import pl.edu.icm.cermine.exception.AnalysisException;
import pl.edu.icm.cermine.exception.TransformationException;

/* loaded from: input_file:WEB-INF/lib/cermine-impl-1.6.jar:pl/edu/icm/cermine/bibref/CRFBibReferenceParser.class */
public class CRFBibReferenceParser implements BibReferenceParser<BibEntry> {
    private static final int MAX_REFERENCE_LENGTH = 2000;
    private ACRF model;
    private static final String defaultModelFile = "/pl/edu/icm/cermine/bibref/acrf.ser.gz";
    private static final String defaultWordsFile = "/pl/edu/icm/cermine/bibref/crf-train-words.txt";
    private Set<String> words;

    public CRFBibReferenceParser(String str) throws AnalysisException {
        System.setProperty("java.util.logging.config.file", "edu/umass/cs/mallet/base/util/resources/logging.properties");
        ObjectInputStream objectInputStream = null;
        try {
            try {
                objectInputStream = new ObjectInputStream(new BufferedInputStream(new GZIPInputStream(new FileInputStream(new File(str)))));
                this.model = (ACRF) objectInputStream.readObject();
                if (objectInputStream != null) {
                    try {
                        objectInputStream.close();
                    } catch (IOException e) {
                        throw new AnalysisException("Cannot set model!", e);
                    }
                }
                this.words = new HashSet();
                try {
                    this.words.addAll(IOUtils.readLines(CitationUtils.class.getResourceAsStream(defaultWordsFile)));
                } catch (IOException e2) {
                    throw new AnalysisException("Cannot set words!", e2);
                }
            } catch (Throwable th) {
                if (objectInputStream != null) {
                    try {
                        objectInputStream.close();
                    } catch (IOException e3) {
                        throw new AnalysisException("Cannot set model!", e3);
                    }
                }
                throw th;
            }
        } catch (IOException e4) {
            throw new AnalysisException("Cannot set model!", e4);
        } catch (ClassNotFoundException e5) {
            throw new AnalysisException("Cannot set model!", e5);
        }
    }

    public CRFBibReferenceParser(InputStream inputStream) throws AnalysisException {
        System.setProperty("java.util.logging.config.file", "edu/umass/cs/mallet/base/util/resources/logging.properties");
        ObjectInputStream objectInputStream = null;
        try {
            try {
                objectInputStream = new ObjectInputStream(new BufferedInputStream(new GZIPInputStream(inputStream)));
                this.model = (ACRF) objectInputStream.readObject();
                if (objectInputStream != null) {
                    try {
                        objectInputStream.close();
                    } catch (IOException e) {
                        throw new AnalysisException("Cannot set model!", e);
                    }
                }
                this.words = new HashSet();
                try {
                    this.words.addAll(IOUtils.readLines(CitationUtils.class.getResourceAsStream(defaultWordsFile)));
                } catch (IOException e2) {
                    throw new AnalysisException("Cannot set words!", e2);
                }
            } catch (Throwable th) {
                if (objectInputStream != null) {
                    try {
                        objectInputStream.close();
                    } catch (IOException e3) {
                        throw new AnalysisException("Cannot set model!", e3);
                    }
                }
                throw th;
            }
        } catch (IOException e4) {
            throw new AnalysisException("Cannot set model!", e4);
        } catch (ClassNotFoundException e5) {
            throw new AnalysisException("Cannot set model!", e5);
        }
    }

    /* JADX WARN: Can't rename method to resolve collision */
    @Override // pl.edu.icm.cermine.bibref.BibReferenceParser
    public BibEntry parseBibReference(String str) throws AnalysisException {
        if (str.length() > 2000) {
            return new BibEntry().setText(str);
        }
        if (this.model == null) {
            throw new AnalysisException("Model object is not set!");
        }
        Citation stringToCitation = CitationUtils.stringToCitation(str);
        String join = StringUtils.join(CitationUtils.citationToMalletInputFormat(stringToCitation, this.words), "\n");
        InstanceList instanceList = new InstanceList(this.model.getInputPipe());
        instanceList.add(new LineGroupIterator(new StringReader(join), Pattern.compile("\\s*"), true));
        LabelsSequence labelsSequence = (LabelsSequence) this.model.getBestLabels(instanceList).get(0);
        for (int i = 0; i < labelsSequence.size(); i++) {
            stringToCitation.getTokens().get(i).setLabel(CitationTokenLabel.valueOf(labelsSequence.get(i).toString()));
        }
        return CitationUtils.citationToBibref(stringToCitation);
    }

    public static CRFBibReferenceParser getInstance() throws AnalysisException {
        return new CRFBibReferenceParser(CRFBibReferenceParser.class.getResourceAsStream(defaultModelFile));
    }

    public static void main(String[] strArr) throws ParseException, AnalysisException, TransformationException {
        Options options = new Options();
        options.addOption("reference", true, "reference text");
        options.addOption("format", true, "output format");
        CommandLine parse = new GnuParser().parse(options, strArr);
        String optionValue = parse.getOptionValue("reference");
        String optionValue2 = parse.getOptionValue("format");
        if (optionValue == null || (optionValue2 != null && !optionValue2.equals("bibtex") && !optionValue2.equals("nlm"))) {
            System.err.println("Usage: CRFBibReferenceParser -ref <reference text> [-format <output format>]\n\nTool for extracting metadata from reference strings.\n\nArguments:\n  -reference            the text of the reference\n  -format (optional)    the format of the output,\n                        possible values: BIBTEX (default) and NLM");
            System.exit(1);
        }
        BibEntry parseBibReference = getInstance().parseBibReference(optionValue);
        if (optionValue2 == null || optionValue2.equals("bibtex")) {
            System.out.println(parseBibReference.toBibTeX());
            return;
        }
        Element convert = new BibEntryToNLMElementConverter().convert(parseBibReference, new Object[0]);
        System.out.println(new XMLOutputter(Format.getPrettyFormat()).outputString(convert));
    }
}
