package liner2.reader.parser;

import java.io.IOException;
import java.io.InputStream;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Hashtable;
import java.util.Iterator;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParserFactory;
import liner2.structure.Annotation;
import liner2.structure.Document;
import liner2.structure.Paragraph;
import liner2.structure.Sentence;
import liner2.structure.Tag;
import liner2.structure.Token;
import liner2.structure.TokenAttributeIndex;
import liner2.tools.DataFormatException;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
import weka.gui.beans.xml.XMLBeans;

/* loaded from: input_file:liner2/reader/parser/CclSaxParser.class */
public class CclSaxParser extends DefaultHandler {
    HashMap<String, String> chunkMetaData;
    Hashtable<String, Annotation> annotations;
    InputStream is;
    String tmpValue;
    String chanName;
    String chanHead;
    boolean foundDisamb;
    TokenAttributeIndex attributeIndex;
    Document document;
    private final String TAG_ANN = "ann";
    private final String TAG_BASE = "base";
    private final String TAG_CHAN = "chan";
    private final String TAG_CTAG = "ctag";
    private final String TAG_DISAMB = "disamb";
    private final String TAG_ID = XMLBeans.VAL_ID;
    private final String TAG_ORTH = "orth";
    private final String TAG_NS = "ns";
    private final String TAG_PARAGRAPH = "chunk";
    private final String TAG_PARAGRAPH_SET = "chunkSet";
    private final String TAG_SENTENCE = "sentence";
    private final String TAG_TAG = "lex";
    private final String TAG_TOKEN = "tok";
    private final String TAG_HEAD = "head";
    Paragraph currentParagraph = null;
    Sentence currentSentence = null;
    Token currentToken = null;
    String tmpBase = null;
    String tmpCtag = null;
    Boolean tmpDisamb = false;
    int idx = 0;
    ArrayList<Paragraph> paragraphs = new ArrayList<>();

    /* loaded from: input_file:liner2/reader/parser/CclSaxParser$AnnChan.class */
    class AnnChan {
        public String chan;
        public String number;
        public String head;

        public AnnChan(String str, String str2, String str3) {
            this.chan = null;
            this.number = null;
            this.head = "0";
            this.chan = str;
            this.number = str2;
            this.head = str3;
        }

        public String toString() {
            return this.chan + "#" + this.number;
        }
    }

    public CclSaxParser(String str, InputStream inputStream, TokenAttributeIndex tokenAttributeIndex) throws DataFormatException {
        this.document = null;
        this.is = inputStream;
        this.attributeIndex = tokenAttributeIndex;
        parseDocument();
        this.document = new Document(str, this.paragraphs, this.attributeIndex);
    }

    private void parseDocument() throws DataFormatException {
        try {
            SAXParserFactory.newInstance().newSAXParser().parse(this.is, this);
        } catch (IOException e) {
            throw new DataFormatException("Parse error (IOException)");
        } catch (ParserConfigurationException e2) {
            throw new DataFormatException("Parse error (ParserConfigurationException)");
        } catch (SAXException e3) {
            throw new DataFormatException("Parse error (SAXException)");
        }
    }

    @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.EntityResolver
    public InputSource resolveEntity(String str, String str2) {
        return new InputSource(new StringReader(""));
    }

    @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
    public void startElement(String str, String str2, String str3, Attributes attributes) throws SAXException {
        this.tmpValue = "";
        if (str3.equalsIgnoreCase("chunk")) {
            this.chunkMetaData = new HashMap<>();
            for (int i = 0; i < attributes.getLength(); i++) {
                if (!attributes.getQName(i).toString().equals(XMLBeans.VAL_ID)) {
                    if (attributes.getQName(i).toString().contains(":href")) {
                        this.chunkMetaData.put("xlink:href", attributes.getValue(i));
                    } else {
                        this.chunkMetaData.put(attributes.getQName(i).toString(), attributes.getValue(i));
                    }
                }
            }
            this.currentParagraph = new Paragraph(attributes.getValue(XMLBeans.VAL_ID));
            this.currentParagraph.setChunkMetaData(this.chunkMetaData);
            this.currentParagraph.setAttributeIndex(this.attributeIndex);
            return;
        }
        if (str3.equalsIgnoreCase("sentence")) {
            this.currentSentence = new Sentence();
            this.annotations = new Hashtable<>();
            this.idx = 0;
            this.currentSentence.setId(attributes.getValue(XMLBeans.VAL_ID));
            return;
        }
        if (str3.equalsIgnoreCase("tok")) {
            this.currentToken = new Token(this.attributeIndex);
            this.currentToken.setId(attributes.getValue(XMLBeans.VAL_ID));
            return;
        }
        if (str3.equalsIgnoreCase("lex")) {
            if (attributes.getValue("disamb") == null) {
                this.tmpDisamb = false;
                return;
            } else if (Integer.parseInt(attributes.getValue("disamb")) == 0) {
                this.tmpDisamb = false;
                return;
            } else {
                this.tmpDisamb = true;
                return;
            }
        }
        if (str3.equalsIgnoreCase("ann")) {
            this.chanName = attributes.getValue("chan");
            this.chanHead = attributes.getValue("head") != null ? attributes.getValue("head") : "0";
        } else {
            if (!str3.equalsIgnoreCase("ns") || this.currentToken == null) {
                return;
            }
            this.currentToken.setNoSpaceAfter(true);
        }
    }

    @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
    public void endElement(String str, String str2, String str3) throws SAXException {
        if (str3.equals("chunk")) {
            this.paragraphs.add(this.currentParagraph);
        }
        if (str3.equalsIgnoreCase("sentence")) {
            Iterator<Annotation> it = this.annotations.values().iterator();
            while (it.hasNext()) {
                this.currentSentence.addChunk(it.next());
            }
            this.currentParagraph.addSentence(this.currentSentence);
        }
        if (str3.equalsIgnoreCase("tok")) {
            ArrayList<Tag> tags = this.currentToken.getTags();
            this.foundDisamb = false;
            Iterator<Tag> it2 = tags.iterator();
            while (it2.hasNext()) {
                Tag next = it2.next();
                if (next.getDisamb()) {
                    this.currentToken.setAttributeValue(this.attributeIndex.getIndex("base"), next.getBase());
                    this.currentToken.setAttributeValue(this.attributeIndex.getIndex("ctag"), next.getCtag());
                }
            }
            if (!this.foundDisamb) {
                this.currentToken.setAttributeValue(this.attributeIndex.getIndex("base"), tags.get(0).getBase());
                this.currentToken.setAttributeValue(this.attributeIndex.getIndex("ctag"), tags.get(0).getCtag());
            }
            this.currentSentence.addToken(this.currentToken);
            this.idx++;
        }
        if (str3.equalsIgnoreCase("orth")) {
            this.currentToken.setAttributeValue(this.attributeIndex.getIndex("orth"), this.tmpValue);
        }
        if (str3.equalsIgnoreCase("lex")) {
            this.currentToken.addTag(new Tag(this.tmpBase, this.tmpCtag, this.tmpDisamb.booleanValue()));
        }
        if (str3.equalsIgnoreCase("base")) {
            this.tmpBase = this.tmpValue;
        }
        if (str3.equalsIgnoreCase("ctag")) {
            this.tmpCtag = this.tmpValue;
        }
        if (str3.equalsIgnoreCase("ann")) {
            String trim = this.tmpValue.trim();
            if (trim.equals("0")) {
                return;
            }
            AnnChan annChan = new AnnChan(this.chanName, trim, this.chanHead);
            if (this.annotations.containsKey(annChan.toString())) {
                this.annotations.get(annChan.toString()).addToken(this.idx);
            } else {
                this.annotations.put(annChan.toString(), new Annotation(this.idx, annChan.chan, Integer.parseInt(trim), this.currentSentence));
            }
            if (annChan.head.equals("1")) {
                this.annotations.get(annChan.toString()).setHead(this.idx);
            }
        }
    }

    @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
    public void characters(char[] cArr, int i, int i2) throws SAXException {
        for (int i3 = i; i3 < i + i2; i3++) {
            this.tmpValue += cArr[i3];
        }
    }

    public Document getDocument() {
        return this.document;
    }
}
