package edu.umass.cs.mallet.projects.seg_plus_coref.anaphora;

import edu.umass.cs.mallet.base.pipe.iterator.AbstractPipeInputIterator;
import edu.umass.cs.mallet.base.pipe.iterator.FileIterator;
import edu.umass.cs.mallet.base.types.Instance;
import edu.umass.cs.mallet.base.types.InstanceList;
import java.io.File;
import java.io.IOException;
import java.io.PrintStream;
import java.net.URI;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Set;
import java.util.Vector;
import java.util.regex.Pattern;
import org.apache.commons.lang3.StringUtils;
import org.jdom.Attribute;
import org.jdom.Document;
import org.jdom.Element;
import org.jdom.JDOMException;
import org.jdom.input.SAXBuilder;

/* loaded from: input_file:WEB-INF/lib/mallet-0.1.3.jar:edu/umass/cs/mallet/projects/seg_plus_coref/anaphora/MentionPairIterator.class */
public class MentionPairIterator extends AbstractPipeInputIterator {
    public static final int NP_LOOKBACK = 20000;
    FileIterator fileIterator;
    public static final String[] pronouns = {"He", "he", "Him", "him", "His", "his", "She", "she", "Her", "her", "hers", "it", "It", "its", "Its", "itself", "himself", "herself"};
    public static final String[] malePronouns = {"He", "he", "Him", "him", "His", "his", "himself"};
    public static final String[] femalePronouns = {"She", "she", "Her", "her", "hers", "herself"};
    public static final int pronounsSize = 18;
    public static final int numMalePronouns = 7;
    public static final int numFemalePronouns = 6;
    int refIndex;
    SAXBuilder builder;
    Document currentDocument;
    MalletDocument malletDocument;
    DocumentMentionPairIterator docNodePairIterator;
    Vector allDocuments;
    File targetDocPath;
    boolean positiveAntecedent;
    String sourceType;
    boolean addNullAntecedent;
    int numberOfReferents;
    boolean includeProperNouns;
    boolean includeEverything;
    List filters;
    ArrayList nodePairArray;
    static Class class$edu$umass$cs$mallet$projects$seg_plus_coref$anaphora$TUI;

    /* loaded from: input_file:WEB-INF/lib/mallet-0.1.3.jar:edu/umass/cs/mallet/projects/seg_plus_coref/anaphora/MentionPairIterator$DocumentMentionPairIterator.class */
    public class DocumentMentionPairIterator implements Iterator {
        int pairCount;
        int currentIndex;
        private final MentionPairIterator this$0;

        public DocumentMentionPairIterator(MentionPairIterator mentionPairIterator) {
            this.this$0 = mentionPairIterator;
            Vector vector = new Vector();
            mentionPairIterator.nodePairArray = new ArrayList();
            if (mentionPairIterator.sourceType.equals("TB")) {
                vector.add(Pattern.compile("NP.*"));
                vector.add(Pattern.compile("lex"));
            } else if (mentionPairIterator.sourceType.equals("MUC")) {
                vector.add(Pattern.compile("NG"));
                vector.add(Pattern.compile("COREF"));
            }
            fillNodePairArray(vector);
            mentionPairIterator.nodePairArray = filterPairs();
            this.pairCount = mentionPairIterator.nodePairArray.size();
            System.out.println(new StringBuffer().append("Pair array size = ").append(this.pairCount).toString());
            this.currentIndex = 0;
        }

        private ArrayList filterPairs() {
            ArrayList arrayList = new ArrayList();
            for (Filter filter : this.this$0.filters) {
                Iterator it = this.this$0.nodePairArray.iterator();
                while (it.hasNext()) {
                    MentionPair mentionPair = (MentionPair) it.next();
                    if (!filter.filters(mentionPair)) {
                        arrayList.add(mentionPair);
                    }
                }
            }
            return arrayList;
        }

        private boolean compatible(Element element, Vector vector) {
            if (this.this$0.sourceType.equals("TB")) {
                Iterator it = element.getChildren().iterator();
                Pattern compile = Pattern.compile("NP.*");
                while (it.hasNext()) {
                    if (compile.matcher(((Element) it.next()).getName()).matches()) {
                        return false;
                    }
                }
            }
            for (int i = 0; i < vector.size(); i++) {
                if (((Pattern) vector.elementAt(i)).matcher(element.getName()).matches()) {
                    return true;
                }
            }
            return false;
        }

        private void printMentions(Vector vector) {
            for (int i = 0; i < vector.size(); i++) {
                Mention mention = (Mention) vector.elementAt(i);
                System.out.println(new StringBuffer().append("Mention: ").append(mention.getString()).append(" -- ").append(mention).toString());
            }
        }

        private int fillNodePairArray(Vector vector) {
            Vector vector2 = new Vector();
            new Vector();
            getAllNodes(this.this$0.malletDocument, vector2, vector);
            Vector convertToMentions = convertToMentions(vector2);
            int i = 0;
            for (int i2 = 0; i2 < convertToMentions.size(); i2++) {
                Mention mention = (Mention) convertToMentions.elementAt(i2);
                if (compatible(mention.getElement(), vector) && validReferent(mention)) {
                    for (int i3 = i2 - 1; i3 > -1 && i2 - i3 < 20000; i3--) {
                        Mention mention2 = (Mention) convertToMentions.elementAt(i3);
                        Element element = mention2.getElement();
                        if (mention2.getString().length() > 0 && compatible(element, vector)) {
                            MentionPair mentionPair = new MentionPair(mention2, mention);
                            mention2.setSentence(getSentenceParent(mention2.getElement()));
                            mention.setSentence(getSentenceParent(mention.getElement()));
                            mention2.setGender(findGender(mention2.getString()));
                            mention.setGender(findGender(mention.getString()));
                            mentionPair.setReferentIndex(i2);
                            this.this$0.nodePairArray.add(mentionPair);
                            i++;
                        }
                    }
                    if (this.this$0.addNullAntecedent) {
                        MentionPair mentionPair2 = new MentionPair(null, mention);
                        mentionPair2.setReferentIndex(i2);
                        this.this$0.nodePairArray.add(mentionPair2);
                    }
                }
            }
            return i;
        }

        public Vector convertToMentions(Vector vector) {
            int i = 1;
            Iterator it = vector.iterator();
            Vector vector2 = new Vector();
            while (it.hasNext()) {
                vector2.add(new Mention((MalletPhrase) it.next(), this.this$0.targetDocPath, this.this$0.currentDocument, i, this.this$0.sourceType));
                i++;
            }
            return vector2;
        }

        private String findGender(String str) {
            for (int i = 0; i < 7; i++) {
                if (MentionPairIterator.malePronouns[i].equals(str)) {
                    return "male";
                }
            }
            for (int i2 = 0; i2 < 6; i2++) {
                if (MentionPairIterator.femalePronouns[i2].equals(str)) {
                    return "female";
                }
            }
            return "unknown";
        }

        private Element getSentenceParent(Element element) {
            if (element != null) {
                return (element.getName().equals("S") || element.getName().equals("s")) ? element : getSentenceParent((Element) element.getParent());
            }
            return null;
        }

        private boolean validReferent(Mention mention) {
            return this.this$0.includeEverything ? this.this$0.sourceType.equals("MUC") || !mention.getElement().getName().equals("lex") || MentionPairIterator.referentPronoun(mention) : this.this$0.includeProperNouns ? MentionPairIterator.referentProperNoun(mention) || MentionPairIterator.referentPronoun(mention) : MentionPairIterator.referentPronoun(mention);
        }

        private void getAllNodes(MalletDocument malletDocument, Vector vector, Vector vector2) {
            Iterator it = malletDocument.getPhrases().iterator();
            while (it.hasNext()) {
                MalletPhrase malletPhrase = (MalletPhrase) it.next();
                if (compatible(malletPhrase.getElement(), vector2) || (malletPhrase.getElement().getName().equals("lex") && argumentsIncludeCoreferentialInfo(malletPhrase))) {
                    vector.add(malletPhrase);
                }
            }
        }

        public void printAllNodesInContext(Vector vector) {
            for (int i = 0; i < vector.size(); i++) {
                MalletPhrase malletPhrase = (MalletPhrase) vector.elementAt(i);
                if (malletPhrase.getPreceedingPreTerm() != null) {
                    System.out.print(new StringBuffer().append(malletPhrase.getPreceedingPreTerm().getString()).append(StringUtils.SPACE).toString());
                } else {
                    System.out.print("NULL ");
                }
                malletPhrase.printPreTerms();
                if (malletPhrase.getFollowingPreTerm() != null) {
                    System.out.println(new StringBuffer().append(StringUtils.SPACE).append(malletPhrase.getFollowingPreTerm().getString()).toString());
                } else {
                    System.out.println(" NULL");
                }
            }
        }

        public void printAllNodesInContext(Set set) {
            Iterator it = set.iterator();
            while (it.hasNext()) {
                MalletPhrase malletPhrase = (MalletPhrase) it.next();
                if (malletPhrase.getPreceedingPreTerm() != null) {
                    System.out.print(new StringBuffer().append(malletPhrase.getPreceedingPreTerm().getString()).append(StringUtils.SPACE).toString());
                } else {
                    System.out.print("NULL ");
                }
                malletPhrase.printPreTerms();
                if (malletPhrase.getFollowingPreTerm() != null) {
                    System.out.println(new StringBuffer().append(StringUtils.SPACE).append(malletPhrase.getFollowingPreTerm().getString()).toString());
                } else {
                    System.out.println(" NULL");
                }
            }
        }

        private boolean argumentsIncludeCoreferentialInfo(MalletPhrase malletPhrase) {
            Pattern compile = Pattern.compile(".*OBJREF-.*");
            Iterator it = malletPhrase.getElement().getAttributes().iterator();
            while (it.hasNext()) {
                if (compile.matcher(((Attribute) it.next()).getName()).matches()) {
                    return true;
                }
            }
            return false;
        }

        /* JADX INFO: Access modifiers changed from: private */
        public String getTargetValue(MentionPair mentionPair) {
            return mentionPair.getEntityReference();
        }

        private void updateMentionFeatures(MentionPair mentionPair) {
            mentionPair.getReferent().setAntecedentCount(mentionPair.getAntecedent().getAntecedentCount() + 1);
        }

        private boolean hasNextNodePair() {
            return this.currentIndex < this.pairCount;
        }

        private MentionPair nextNodePair() {
            if (this.currentIndex >= this.pairCount) {
                return null;
            }
            int i = this.currentIndex;
            this.currentIndex++;
            return (MentionPair) this.this$0.nodePairArray.get(i);
        }

        @Override // java.util.Iterator
        public boolean hasNext() {
            return hasNextNodePair();
        }

        @Override // java.util.Iterator
        public Object next() {
            return nextNodePair();
        }

        @Override // java.util.Iterator
        public void remove() {
            throw new UnsupportedOperationException();
        }
    }

    public MentionPairIterator(FileIterator fileIterator, String str) {
        this(fileIterator, str, true);
    }

    public MentionPairIterator(FileIterator fileIterator, String str, boolean z, boolean z2, boolean z3, List list) {
        this.refIndex = 0;
        this.builder = null;
        this.positiveAntecedent = false;
        this.sourceType = "";
        this.numberOfReferents = 0;
        this.includeProperNouns = false;
        this.includeEverything = false;
        this.includeProperNouns = z2;
        this.sourceType = str;
        this.addNullAntecedent = z;
        this.includeEverything = z3;
        this.filters = list;
        constructAux(fileIterator, str);
    }

    public MentionPairIterator(FileIterator fileIterator, String str, boolean z, boolean z2) {
        this.refIndex = 0;
        this.builder = null;
        this.positiveAntecedent = false;
        this.sourceType = "";
        this.numberOfReferents = 0;
        this.includeProperNouns = false;
        this.includeEverything = false;
        this.includeProperNouns = z2;
        this.sourceType = str;
        this.addNullAntecedent = z;
        constructAux(fileIterator, str);
    }

    public MentionPairIterator(FileIterator fileIterator, String str, boolean z) {
        this.refIndex = 0;
        this.builder = null;
        this.positiveAntecedent = false;
        this.sourceType = "";
        this.numberOfReferents = 0;
        this.includeProperNouns = false;
        this.includeEverything = false;
        this.sourceType = str;
        this.addNullAntecedent = z;
        constructAux(fileIterator, str);
    }

    private void constructAux(FileIterator fileIterator, String str) {
        this.fileIterator = fileIterator;
        this.builder = new SAXBuilder();
        this.allDocuments = new Vector();
        if (this.currentDocument == null && this.fileIterator.hasNext()) {
            try {
                File file = (File) this.fileIterator.nextInstance().getData();
                this.targetDocPath = new File(file.getAbsolutePath().concat(".sys"));
                this.currentDocument = this.builder.build(file);
                this.malletDocument = new MalletDocument(this.currentDocument, str);
                this.allDocuments.add(this.currentDocument);
                this.docNodePairIterator = new DocumentMentionPairIterator(this);
            } catch (IOException e) {
                e.printStackTrace();
            } catch (JDOMException e2) {
                e2.printStackTrace();
            }
        }
    }

    public int getNumReferents() {
        return this.numberOfReferents;
    }

    public Vector getAllDocuments() {
        return this.allDocuments;
    }

    public MentionPair getNextMentionPairFromDocument() {
        if (this.docNodePairIterator.hasNext()) {
            return (MentionPair) this.docNodePairIterator.next();
        }
        return null;
    }

    public MentionPair getNextMentionPair() {
        MentionPair mentionPair;
        while (true) {
            mentionPair = (MentionPair) this.docNodePairIterator.next();
            if (mentionPair != null || this.currentDocument == null) {
                break;
            }
            if (this.fileIterator.hasNext()) {
                try {
                    File file = (File) this.fileIterator.nextInstance().getData();
                    this.targetDocPath = new File(file.getAbsolutePath().concat(".sys"));
                    this.currentDocument = this.builder.build(file);
                    this.malletDocument = new MalletDocument(this.currentDocument, this.sourceType);
                } catch (IOException e) {
                    e.printStackTrace();
                } catch (JDOMException e2) {
                    e2.printStackTrace();
                }
                this.docNodePairIterator = new DocumentMentionPairIterator(this);
            }
        }
        return mentionPair;
    }

    @Override // edu.umass.cs.mallet.base.pipe.iterator.AbstractPipeInputIterator, edu.umass.cs.mallet.base.pipe.iterator.PipeInputIterator
    public Instance nextInstance() {
        MentionPair mentionPair = null;
        String str = null;
        URI uri = null;
        if (hasNext()) {
            mentionPair = getNextMentionPair();
            if (mentionPair.nullPair()) {
                str = !this.positiveAntecedent ? "yes" : "no";
                this.positiveAntecedent = false;
            } else {
                String targetValue = this.docNodePairIterator.getTargetValue(mentionPair);
                if (targetValue != null) {
                    str = new String("yes");
                    mentionPair.setEntityReference(targetValue);
                } else {
                    mentionPair.setEntityReference(null);
                    str = new String("no");
                }
                if (str.equals("yes")) {
                    this.positiveAntecedent = true;
                }
            }
            try {
                uri = new URI("nodePairURI");
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
        return new Instance(mentionPair, str, uri, null);
    }

    @Override // edu.umass.cs.mallet.base.pipe.iterator.AbstractPipeInputIterator, java.util.Iterator
    public boolean hasNext() {
        return this.docNodePairIterator.hasNext() || this.fileIterator.hasNext();
    }

    @Override // edu.umass.cs.mallet.base.pipe.iterator.AbstractPipeInputIterator, java.util.Iterator
    public void remove() {
        throw new UnsupportedOperationException();
    }

    public static Set partitionIntoDocumentInstances(InstanceList instanceList) {
        LinkedHashSet linkedHashSet = new LinkedHashSet();
        InstanceList.Iterator it = instanceList.iterator();
        Document document = null;
        ArrayList arrayList = null;
        while (it.hasNext()) {
            Instance instance = (Instance) it.next();
            Document document2 = ((MentionPair) instance.getSource()).getReferent().getDocument();
            if (document != document2) {
                if (arrayList != null) {
                    linkedHashSet.add(arrayList);
                }
                arrayList = new ArrayList();
                document = document2;
            }
            arrayList.add(instance);
        }
        linkedHashSet.add(arrayList);
        return linkedHashSet;
    }

    public static boolean referentProperNoun(Mention mention) {
        MalletPreTerm headPreTerm = mention.getMalletPhrase().getHeadPreTerm();
        return headPreTerm.getPartOfSpeech() != null && headPreTerm.getPartOfSpeech().equals("NNP");
    }

    public static boolean referentPronoun(Mention mention) {
        String string = mention.getString();
        for (int i = 0; i < 18; i++) {
            if (pronouns[i].equals(string)) {
                return true;
            }
        }
        return false;
    }

    public static void main(String[] strArr) {
        Class cls;
        if (strArr.length != 1) {
            PrintStream printStream = System.err;
            StringBuffer append = new StringBuffer().append("Usage: ");
            if (class$edu$umass$cs$mallet$projects$seg_plus_coref$anaphora$TUI == null) {
                cls = class$("edu.umass.cs.mallet.projects.seg_plus_coref.anaphora.TUI");
                class$edu$umass$cs$mallet$projects$seg_plus_coref$anaphora$TUI = cls;
            } else {
                cls = class$edu$umass$cs$mallet$projects$seg_plus_coref$anaphora$TUI;
            }
            printStream.println(append.append(cls.getName()).append(" <directory of ACE files>").toString());
            System.exit(-1);
        }
        MentionPairIterator mentionPairIterator = new MentionPairIterator(new FileIterator(new File(strArr[0])), "TB");
        while (mentionPairIterator.hasNext()) {
            mentionPairIterator.next();
        }
    }

    static Class class$(String str) {
        try {
            return Class.forName(str);
        } catch (ClassNotFoundException e) {
            throw new NoClassDefFoundError().initCause(e);
        }
    }
}
