package edu.umass.cs.mallet.projects.seg_plus_coref.coreference;

import com.wcohen.secondstring.StringDistance;
import com.wcohen.secondstring.TFIDF;
import com.wcohen.secondstring.tokens.NGramTokenizer;
import com.wcohen.secondstring.tokens.SimpleTokenizer;
import edu.umass.cs.mallet.base.pipe.Pipe;
import edu.umass.cs.mallet.base.pipe.iterator.LineGroupIterator;
import edu.umass.cs.mallet.base.types.InstanceList;
import edu.umass.cs.mallet.projects.seg_plus_coref.clustering.ClusterEvaluate;
import edu.umass.cs.mallet.projects.seg_plus_coref.clustering.PairEvaluate;
import edu.umass.cs.mallet.projects.seg_plus_coref.ie.IEInterface;
import java.io.File;
import java.io.FileReader;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.logging.Logger;
import java.util.regex.Pattern;

/* loaded from: input_file:WEB-INF/lib/mallet-0.1.3.jar:edu/umass/cs/mallet/projects/seg_plus_coref/coreference/CitationUtils.class */
public class CitationUtils {
    private static Logger logger;
    public static final String PAPER = "PAPER";
    public static final String VENUE = "VENUE";
    public static final String AUTHOR = "AUTHOR";
    public static String[] SEPERATOR;
    static Class class$edu$umass$cs$mallet$projects$seg_plus_coref$coreference$CitationUtils;

    public static ArrayList computeNodes(ArrayList arrayList, IEInterface iEInterface, boolean z, int i, int i2) {
        return computeNodes(arrayList, iEInterface, z, i, i2, PAPER);
    }

    public static void addPaperFromLine(String str, ArrayList arrayList, IEInterface iEInterface, boolean z, int i, int i2) {
        HashMap locateAttributes = SGMLStringOperation.locateAttributes("meta", str);
        if (locateAttributes.size() != 2) {
            throw new IllegalArgumentException(new StringBuffer().append("Reference has no paper label tag: ").append(str).toString());
        }
        String str2 = (String) locateAttributes.get(Citation.paperCluster);
        Integer num = new Integer((String) locateAttributes.get(Citation.paperID));
        if (z) {
            arrayList.add(new PaperCitation(str, str2, num.intValue(), iEInterface, i, i2));
        } else {
            arrayList.add(new PaperCitation(str, str2, num.intValue()));
        }
    }

    public static void addVenuesFromLine(String str, ArrayList arrayList, IEInterface iEInterface, boolean z, int i, int i2) {
        HashMap locateAttributes = SGMLStringOperation.locateAttributes("booktitle", str);
        if (locateAttributes.size() != 3) {
            locateAttributes = SGMLStringOperation.locateAttributes("journal", str);
        }
        if (locateAttributes.size() != 3) {
            return;
        }
        String str2 = (String) locateAttributes.get(Citation.venueCluster);
        if (str2 == null) {
            throw new IllegalArgumentException(new StringBuffer().append("bad venue line: ").append(str).toString());
        }
        Integer num = new Integer((String) locateAttributes.get(Citation.venueID));
        if (z) {
            arrayList.add(new VenueCitation(str, str2, num.intValue(), iEInterface, i, i2));
        } else {
            arrayList.add(new VenueCitation(str, str2, num.intValue()));
        }
    }

    public static void addAuthorsFromLine(String str, ArrayList arrayList, IEInterface iEInterface, boolean z, int i, int i2) {
        throw new UnsupportedOperationException("Not yet implemented");
    }

    public static ArrayList computeNodes(ArrayList arrayList, IEInterface iEInterface, boolean z, int i, int i2, String str) {
        logger.fine("Computing nodes...");
        long currentTimeMillis = System.currentTimeMillis();
        ArrayList arrayList2 = new ArrayList();
        new HashMap();
        for (int i3 = 0; i3 < arrayList.size(); i3++) {
            File file = (File) arrayList.get(i3);
            logger.fine(new StringBuffer().append(i3).append(": ").append(file.toString()).toString());
            try {
                LineGroupIterator lineGroupIterator = new LineGroupIterator(new FileReader(file), Pattern.compile(SEPERATOR[1]), true);
                while (lineGroupIterator.hasNext()) {
                    String lineGroup = lineGroupIterator.getLineGroup();
                    if (str.equals(PAPER)) {
                        addPaperFromLine(lineGroup, arrayList2, iEInterface, z, i, i2);
                    } else if (str.equals(VENUE)) {
                        addVenuesFromLine(lineGroup, arrayList2, iEInterface, z, i, i2);
                    } else {
                        if (!str.equals(AUTHOR)) {
                            throw new IllegalArgumentException(new StringBuffer().append("Unrecognized node type: ").append(str).toString());
                        }
                        addAuthorsFromLine(lineGroup, arrayList2, iEInterface, z, i, i2);
                    }
                    lineGroupIterator.nextLineGroup();
                }
            } catch (Exception e) {
                throw new IllegalArgumentException(new StringBuffer().append("Can't read file ").append(file).toString());
            }
        }
        logger.info(new StringBuffer().append("Time elapses ").append((System.currentTimeMillis() - currentTimeMillis) / 1000.0d).append(" seconds for computing ").append(arrayList2.size()).append(" nodes.").toString());
        return arrayList2;
    }

    public static ArrayList computeNodesWPubs(ArrayList arrayList, ArrayList arrayList2, IEInterface iEInterface, int i, int i2) {
        return computeNodesWPubs(arrayList, arrayList2, iEInterface, false, i, i2);
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public static ArrayList computeNodesWPubs(ArrayList arrayList, ArrayList arrayList2, IEInterface iEInterface, boolean z, int i, int i2) {
        logger.fine("Computing nodes...");
        long currentTimeMillis = System.currentTimeMillis();
        ArrayList arrayList3 = new ArrayList();
        HashMap hashMap = new HashMap();
        int i3 = 0;
        for (int i4 = 0; i4 < arrayList.size(); i4++) {
            File file = (File) arrayList.get(i4);
            logger.fine(new StringBuffer().append(i4).append(": ").append(file.toString()).toString());
            try {
                LineGroupIterator lineGroupIterator = new LineGroupIterator(new FileReader(file), Pattern.compile(SEPERATOR[1]), true);
                while (lineGroupIterator.hasNext()) {
                    String lineGroup = lineGroupIterator.getLineGroup();
                    int i5 = i3;
                    i3++;
                    new Integer(i5);
                    HashMap locateAttributes = SGMLStringOperation.locateAttributes("meta", lineGroup);
                    if (locateAttributes.size() != 2) {
                        throw new IllegalArgumentException(new StringBuffer().append("Reference has no cluster or reference id: ").append(lineGroup).toString());
                    }
                    String str = (String) locateAttributes.get(Citation.paperCluster);
                    Integer num = new Integer((String) locateAttributes.get(Citation.paperID));
                    String intern = lineGroup.substring(lineGroup.indexOf("</meta>") + "</meta>".length(), lineGroup.length()).intern();
                    Citation citation = z ? new Citation(intern, str, num.intValue(), iEInterface, i, i2) : new Citation(intern, str, num.intValue());
                    arrayList3.add(citation);
                    Publication publication = (Publication) hashMap.get(str);
                    if (publication != null) {
                        publication.addNewCitation(citation);
                    } else {
                        Publication publication2 = new Publication(citation);
                        hashMap.put(str, publication2);
                        arrayList2.add(publication2);
                    }
                    lineGroupIterator.nextLineGroup();
                }
            } catch (Exception e) {
                throw new IllegalArgumentException(new StringBuffer().append("Can't read file ").append(file).toString());
            }
        }
        logger.fine(new StringBuffer().append("Time elapses ").append((System.currentTimeMillis() - currentTimeMillis) / 1000.0d).append(" seconds for computing nodes.").toString());
        return arrayList3;
    }

    public static StringDistance computeDistanceMetric(ArrayList arrayList) {
        ArrayList arrayList2 = new ArrayList();
        TFIDF tfidf = new TFIDF();
        for (int i = 0; i < arrayList.size(); i++) {
            arrayList2.addAll(((Citation) arrayList.get(i)).getAllStringsWrapped());
        }
        tfidf.accumulateStatistics(arrayList2.iterator());
        return tfidf;
    }

    public static void makeDistMetric(List list, StringDistance stringDistance, StringDistance stringDistance2) {
        ArrayList arrayList = new ArrayList();
        Iterator it = list.iterator();
        while (it.hasNext()) {
            arrayList.add((Citation) it.next());
        }
        NGramTokenizer nGramTokenizer = new NGramTokenizer(3, 3, false, new SimpleTokenizer(true, true));
        ArrayList arrayList2 = new ArrayList();
        TFIDF tfidf = new TFIDF();
        TFIDF tfidf2 = new TFIDF(nGramTokenizer);
        for (int i = 0; i < arrayList.size(); i++) {
            arrayList2.addAll(((Citation) arrayList.get(i)).getAllStringsWrapped());
        }
        tfidf.accumulateStatistics(arrayList2.iterator());
        tfidf2.accumulateStatistics(arrayList2.iterator());
    }

    public static InstanceList makePairs(Pipe pipe, ArrayList arrayList) {
        return makePairs(pipe, arrayList, 1.0d);
    }

    public static InstanceList makePairs(Pipe pipe, ArrayList arrayList, double d) {
        logger.fine("PairIterator...");
        long currentTimeMillis = System.currentTimeMillis();
        InstanceList instanceList = new InstanceList(pipe);
        instanceList.add(new NodePairIterator(arrayList, d));
        logger.fine("====");
        logger.fine(new StringBuffer().append("Time elapses ").append((System.currentTimeMillis() - currentTimeMillis) / 1000.0d).append(" seconds for computing pair iterator.").toString());
        return instanceList;
    }

    public static InstanceList makePairs(Pipe pipe, ArrayList arrayList, List list) {
        logger.fine("PairIterator...");
        long currentTimeMillis = System.currentTimeMillis();
        InstanceList instanceList = new InstanceList(pipe);
        instanceList.add(new NodePairIterator(arrayList, list));
        logger.fine("====");
        logger.fine(new StringBuffer().append("Time elapses ").append((System.currentTimeMillis() - currentTimeMillis) / 1000.0d).append(" seconds for computing pair iterator.").toString());
        return instanceList;
    }

    public static Collection makeCollections(ArrayList arrayList) {
        HashMap hashMap = new HashMap();
        LinkedHashSet linkedHashSet = new LinkedHashSet();
        for (int i = 0; i < arrayList.size(); i++) {
            Citation citation = (Citation) arrayList.get(i);
            Object label = citation.getLabel();
            Collection collection = (Collection) hashMap.get(label);
            if (collection != null) {
                collection.add(citation);
            } else {
                LinkedHashSet linkedHashSet2 = new LinkedHashSet();
                linkedHashSet2.add(citation);
                hashMap.put(label, linkedHashSet2);
            }
        }
        Iterator it = hashMap.values().iterator();
        while (it.hasNext()) {
            linkedHashSet.add((Collection) it.next());
        }
        return linkedHashSet;
    }

    public static void evaluateClustering(Collection collection, Collection collection2, String str) {
        ClusterEvaluate clusterEvaluate = new ClusterEvaluate(collection, collection2);
        clusterEvaluate.evaluate();
        clusterEvaluate.printVerbose();
        PairEvaluate pairEvaluate = new PairEvaluate(collection, collection2);
        pairEvaluate.evaluate();
        System.out.println(new StringBuffer().append("EXPT: ").append(str).toString());
        System.out.println(new StringBuffer().append("TESTING ObjFn Cluster F1: ").append(clusterEvaluate.getF1()).toString());
        System.out.println(new StringBuffer().append("TESTING ObjFn Cluster Recall: ").append(clusterEvaluate.getRecall()).toString());
        System.out.println(new StringBuffer().append("TESTING ObjFn Cluster Precision: ").append(clusterEvaluate.getPrecision()).toString());
        System.out.println(new StringBuffer().append("Number of clusters ").append(collection2.size()).toString());
        System.out.println(new StringBuffer().append("TESTING ObjFn Pair F1: ").append(pairEvaluate.getF1()).toString());
        System.out.println(new StringBuffer().append("TESTING ObjFn Pair Recall: ").append(pairEvaluate.getRecall()).toString());
        System.out.println(new StringBuffer().append("TESTING ObjFn Pair Precision: ").append(pairEvaluate.getPrecision()).toString());
    }

    static Class class$(String str) {
        try {
            return Class.forName(str);
        } catch (ClassNotFoundException e) {
            throw new NoClassDefFoundError().initCause(e);
        }
    }

    static {
        Class cls;
        if (class$edu$umass$cs$mallet$projects$seg_plus_coref$coreference$CitationUtils == null) {
            cls = class$("edu.umass.cs.mallet.projects.seg_plus_coref.coreference.CitationUtils");
            class$edu$umass$cs$mallet$projects$seg_plus_coref$coreference$CitationUtils = cls;
        } else {
            cls = class$edu$umass$cs$mallet$projects$seg_plus_coref$coreference$CitationUtils;
        }
        logger = Logger.getLogger(cls.getName());
        SEPERATOR = new String[]{"<NEW_HEADER>", "<NEWREFERENCE>"};
    }
}
