package edu.umass.cs.mallet.projects.seg_plus_coref.anaphora;

import com.itextpdf.text.html.HtmlTags;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.regex.Pattern;
import org.jdom.Document;
import org.jdom.Element;
import org.jdom.Parent;

/* loaded from: input_file:WEB-INF/lib/mallet-0.1.3.jar:edu/umass/cs/mallet/projects/seg_plus_coref/anaphora/MalletDocument.class */
public class MalletDocument extends MalletDocumentElement {
    LinkedHashSet sentences;
    LinkedHashSet phrases;
    Document document;
    String sourceType;

    public MalletDocument(Document document, String str) {
        this.sentences = constructSentences(document, str);
        this.phrases = collectPhrases(this.sentences);
        this.document = document;
        this.sourceType = str;
    }

    private LinkedHashSet collectPhrases(LinkedHashSet linkedHashSet) {
        LinkedHashSet linkedHashSet2 = new LinkedHashSet();
        Iterator it = linkedHashSet.iterator();
        while (it.hasNext()) {
            MalletPhrase phrases = ((MalletSentence) it.next()).getPhrases();
            while (true) {
                MalletPhrase malletPhrase = phrases;
                if (malletPhrase != null) {
                    linkedHashSet2.add(malletPhrase);
                    phrases = malletPhrase.getNext();
                }
            }
        }
        return linkedHashSet2;
    }

    public LinkedHashSet getPhrases() {
        return this.phrases;
    }

    private LinkedHashSet filterSubSentences(LinkedHashSet linkedHashSet) {
        boolean z = true;
        LinkedHashSet linkedHashSet2 = new LinkedHashSet();
        Iterator it = linkedHashSet.iterator();
        while (it.hasNext()) {
            Element element = (Element) it.next();
            Parent parent = element.getParent();
            while (true) {
                Element element2 = (Element) parent;
                if (element2 == null) {
                    break;
                }
                if (element2.getName().equals("S") || element2.getName().equals(HtmlTags.S)) {
                    z = false;
                }
                parent = element2.getParent();
            }
            if (z) {
                linkedHashSet2.add(element);
            }
            z = true;
        }
        return linkedHashSet2;
    }

    private LinkedHashSet constructSentences(Document document, String str) {
        int i = 0;
        LinkedHashSet linkedHashSet = new LinkedHashSet();
        LinkedHashSet linkedHashSet2 = new LinkedHashSet();
        linkedHashSet2.add(Pattern.compile("S"));
        linkedHashSet2.add(Pattern.compile(HtmlTags.S));
        MalletSentence malletSentence = null;
        MalletSentence malletSentence2 = null;
        Iterator it = filterSubSentences(collectElementsWithinDocument(document, linkedHashSet2, null)).iterator();
        while (it.hasNext()) {
            malletSentence2 = new MalletSentence((Element) it.next(), i, str);
            linkedHashSet.add(malletSentence2);
            if (malletSentence != null) {
                malletSentence.setNext(malletSentence2);
            }
            if (i == 0) {
                malletSentence2.setPrev(null);
            } else {
                malletSentence2.setPrev(malletSentence);
            }
            malletSentence = malletSentence2;
            i++;
        }
        malletSentence2.setNext(null);
        return linkedHashSet;
    }

    public LinkedHashSet getSentences() {
        return this.sentences;
    }
}
