package edu.umass.cs.mallet.projects.seg_plus_coref.clustering;

import edu.umass.cs.mallet.base.pipe.Pipe;
import edu.umass.cs.mallet.base.pipe.SerialPipes;
import edu.umass.cs.mallet.base.pipe.Target2Label;
import edu.umass.cs.mallet.base.pipe.iterator.FileIterator;
import edu.umass.cs.mallet.base.types.FeatureVector;
import edu.umass.cs.mallet.base.types.Instance;
import edu.umass.cs.mallet.base.types.InstanceList;
import edu.umass.cs.mallet.base.types.Matrix2;
import edu.umass.cs.mallet.projects.seg_plus_coref.anaphora.AceTypeFeature;
import edu.umass.cs.mallet.projects.seg_plus_coref.anaphora.AcronymOf;
import edu.umass.cs.mallet.projects.seg_plus_coref.anaphora.AffixOfMentionPair;
import edu.umass.cs.mallet.projects.seg_plus_coref.anaphora.HobbsDistanceMentionPair;
import edu.umass.cs.mallet.projects.seg_plus_coref.anaphora.Mention;
import edu.umass.cs.mallet.projects.seg_plus_coref.anaphora.MentionPair;
import edu.umass.cs.mallet.projects.seg_plus_coref.anaphora.MentionPair2FeatureVector;
import edu.umass.cs.mallet.projects.seg_plus_coref.anaphora.MentionPairAntecedentPosition;
import edu.umass.cs.mallet.projects.seg_plus_coref.anaphora.MentionPairHeadIdentical;
import edu.umass.cs.mallet.projects.seg_plus_coref.anaphora.MentionPairIdentical;
import edu.umass.cs.mallet.projects.seg_plus_coref.anaphora.MentionPairIterator;
import edu.umass.cs.mallet.projects.seg_plus_coref.anaphora.MentionPairSentenceDistance;
import edu.umass.cs.mallet.projects.seg_plus_coref.anaphora.ModifierWordFeatures;
import edu.umass.cs.mallet.projects.seg_plus_coref.anaphora.NullAntecedentFeatureExtractor;
import edu.umass.cs.mallet.projects.seg_plus_coref.anaphora.PartOfSpeechMentionPair;
import edu.umass.cs.mallet.projects.seg_plus_coref.anaphora.ProperNounFilter;
import edu.umass.cs.mallet.projects.seg_plus_coref.anaphora.ProperNounFilterMUC;
import edu.umass.cs.mallet.projects.seg_plus_coref.anaphora.TUIGraph;
import edu.umass.cs.mallet.projects.seg_plus_coref.anaphora.XMLFileFilter;
import java.io.File;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Set;

/* loaded from: input_file:edu/umass/cs/mallet/projects/seg_plus_coref/clustering/TUI.class */
public class TUI {
    static int yesIndex;
    static int noIndex;
    public static final boolean QUANTIZE_EDGE_VALUES = false;
    static final boolean $assertionsDisabled;
    static Class class$edu$umass$cs$mallet$projects$seg_plus_coref$clustering$TUI;

    public static void main(String[] strArr) {
        String str;
        String str2;
        String str3 = null;
        if (strArr.length != 3) {
            str = new String("/usr/wod/tmp2/wellner/data/all-docs/training");
            str2 = new String("/usr/wod/tmp2/wellner/data/all-docs/test-annotated");
        } else {
            str3 = strArr[0];
            str = strArr[1];
            str2 = strArr[2];
        }
        XMLFileFilter xMLFileFilter = new XMLFileFilter(".*xml");
        FileIterator fileIterator = new FileIterator(new File(str), xMLFileFilter);
        FileIterator fileIterator2 = new FileIterator(new File(str2), xMLFileFilter);
        ArrayList arrayList = new ArrayList();
        if (str3.equals("MUC")) {
            arrayList.add(new ProperNounFilterMUC());
        } else {
            arrayList.add(new ProperNounFilter());
        }
        MentionPairIterator mentionPairIterator = new MentionPairIterator(fileIterator, str3, false, true, true, arrayList);
        MentionPairIterator mentionPairIterator2 = new MentionPairIterator(fileIterator2, str3, false, true, true, arrayList);
        SerialPipes serialPipes = new SerialPipes(new Pipe[]{new Target2Label(), new AffixOfMentionPair(), new AcronymOf(), new AceTypeFeature(), new MentionPairHeadIdentical(), new MentionPairIdentical(), new MentionPairSentenceDistance(), new PartOfSpeechMentionPair(), new HobbsDistanceMentionPair(), new MentionPairAntecedentPosition(), new NullAntecedentFeatureExtractor(), new ModifierWordFeatures(), new MentionPair2FeatureVector()});
        InstanceList instanceList = new InstanceList(serialPipes);
        instanceList.add(mentionPairIterator);
        System.out.println("About to partition training instances into associated doc sets");
        Set partitionIntoDocumentInstances = MentionPairIterator.partitionIntoDocumentInstances(instanceList);
        System.out.println(new StringBuffer().append("Number of docInstance sets: ").append(partitionIntoDocumentInstances.size()).toString());
        InstanceList instanceList2 = new InstanceList(serialPipes);
        instanceList2.add(mentionPairIterator2);
        Set partitionIntoDocumentInstances2 = MentionPairIterator.partitionIntoDocumentInstances(instanceList2);
        yesIndex = 0;
        noIndex = 1;
        ClusterLearnerAvg clusterLearnerAvg = new ClusterLearnerAvg(200, partitionIntoDocumentInstances, serialPipes, yesIndex, noIndex);
        partitionIntoDocumentInstances.iterator();
        partitionIntoDocumentInstances2.iterator();
        clusterLearnerAvg.startTrainingAvg(partitionIntoDocumentInstances2);
        clusterLearnerAvg.getFinalLambdas();
        System.out.println("Finished training...");
        Iterator it = partitionIntoDocumentInstances.iterator();
        Iterator it2 = partitionIntoDocumentInstances2.iterator();
        System.out.println("TRAINING DATA");
        System.out.println("-------------------------------");
        runTrainedModel(it, clusterLearnerAvg, serialPipes);
        System.out.println("TEST DATA");
        System.out.println("-------------------------------");
        runTrainedModel(it2, clusterLearnerAvg, serialPipes);
    }

    public static void runTrainedModel(Iterator it, ClusterLearner clusterLearner, Pipe pipe) {
        double d = 0.0d;
        double d2 = 0.0d;
        Clusterer clusterer = new Clusterer();
        int i = 0;
        while (it.hasNext()) {
            i++;
            new LinkedHashSet();
            MappedGraph mappedGraph = new MappedGraph();
            List list = (List) it.next();
            KeyClustering collectAllKeyClusters = TUIGraph.collectAllKeyClusters(list);
            System.out.println(new StringBuffer().append("Number of pairs: ").append(list.size()).toString());
            Iterator it2 = list.iterator();
            while (it2.hasNext()) {
                constructEdgesUsingTrainedClusterer(mappedGraph, (Instance) it2.next(), clusterLearner.getFinalLambdas(), pipe);
            }
            clusterer.setGraph(mappedGraph);
            Clustering clustering = clusterer.getClustering();
            ClusterEvaluate clusterEvaluate = new ClusterEvaluate(collectAllKeyClusters, clustering);
            PairEvaluate pairEvaluate = new PairEvaluate(collectAllKeyClusters, clustering);
            clusterEvaluate.evaluate();
            clusterEvaluate.printErrors(true);
            pairEvaluate.evaluate();
            System.out.println(new StringBuffer().append("Cluster F1 using Model: ").append(clusterEvaluate.getF1()).toString());
            System.out.println(new StringBuffer().append("PairWise F1 using Model: ").append(pairEvaluate.getF1()).toString());
            System.out.println(new StringBuffer().append("  -- recall ").append(pairEvaluate.getRecall()).toString());
            System.out.println(new StringBuffer().append("  -- precision ").append(pairEvaluate.getPrecision()).toString());
            d += clusterEvaluate.getF1();
            d2 += pairEvaluate.getF1();
        }
        System.out.println(new StringBuffer().append("Overall Cluster F1: ").append(d / i).toString());
        System.out.println(new StringBuffer().append("Overall PairWise F1: ").append(d2 / i).toString());
    }

    public static void constructEdgesUsingTrainedClusterer(MappedGraph mappedGraph, Instance instance, Matrix2 matrix2, Pipe pipe) {
        MentionPair mentionPair = (MentionPair) instance.getSource();
        Mention antecedent = mentionPair.getAntecedent();
        Mention referent = mentionPair.getReferent();
        int size = pipe.getDataAlphabet().size();
        double[] dArr = new double[2];
        FeatureVector featureVector = (FeatureVector) instance.getData();
        if (!$assertionsDisabled && featureVector.getAlphabet() != pipe.getDataAlphabet()) {
            throw new AssertionError();
        }
        dArr[yesIndex] = matrix2.value(yesIndex, size) + matrix2.rowDotProduct(yesIndex, featureVector, size, null);
        dArr[noIndex] = matrix2.value(noIndex, size) + matrix2.rowDotProduct(noIndex, featureVector, size, null);
        if (matrix2 == null) {
            System.out.println("LAMBDAS NULL");
        }
        double d = dArr[yesIndex] - dArr[noIndex];
        try {
            if (!mentionPair.nullPair()) {
                mappedGraph.addEdgeMap(antecedent, referent, d);
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    static Class class$(String str) {
        try {
            return Class.forName(str);
        } catch (ClassNotFoundException e) {
            throw new NoClassDefFoundError().initCause(e);
        }
    }

    static {
        Class cls;
        if (class$edu$umass$cs$mallet$projects$seg_plus_coref$clustering$TUI == null) {
            cls = class$("edu.umass.cs.mallet.projects.seg_plus_coref.clustering.TUI");
            class$edu$umass$cs$mallet$projects$seg_plus_coref$clustering$TUI = cls;
        } else {
            cls = class$edu$umass$cs$mallet$projects$seg_plus_coref$clustering$TUI;
        }
        $assertionsDisabled = !cls.desiredAssertionStatus();
        yesIndex = 0;
        noIndex = 1;
    }
}
