package gate.corpora;

import gate.Document;
import gate.GateConstants;
import gate.Resource;
import gate.TextualDocument;
import gate.creole.ResourceInstantiationException;
import gate.creole.metadata.AutoInstance;
import gate.creole.metadata.CreoleResource;
import gate.event.StatusListener;
import gate.util.DocumentFormatException;
import gate.util.Out;
import gate.xml.XmlDocumentHandler;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.StringReader;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;
import org.apache.commons.io.IOUtils;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;

@CreoleResource(name = "GATE XML Document Format", isPrivate = true, autoinstances = {@AutoInstance(hidden = true)})
/* loaded from: input_file:gate/corpora/XmlDocumentFormat.class */
public class XmlDocumentFormat extends TextualDocumentFormat {
    private static final long serialVersionUID = 3205973554326782116L;
    private static XMLInputFactory staxFactory;

    @Override // gate.DocumentFormat
    public Boolean supportsRepositioning() {
        return new Boolean(true);
    }

    @Override // gate.corpora.TextualDocumentFormat, gate.DocumentFormat
    public void unpackMarkup(Document document) throws DocumentFormatException {
        unpackMarkup(document, (RepositioningInfo) null, (RepositioningInfo) null);
    }

    @Override // gate.corpora.TextualDocumentFormat, gate.DocumentFormat
    public void unpackMarkup(Document document, RepositioningInfo repositioningInfo, RepositioningInfo repositioningInfo2) throws DocumentFormatException {
        if (document == null || (document.getSourceUrl() == null && document.getContent() == null)) {
            throw new DocumentFormatException("GATE document is null or no content found. Nothing to parse!");
        }
        StatusListener statusListener = new StatusListener() { // from class: gate.corpora.XmlDocumentFormat.1
            @Override // gate.event.StatusListener
            public void statusChanged(String str) {
                XmlDocumentFormat.this.fireStatusChanged(str);
            }
        };
        if (isGateXmlFormat(document)) {
            unpackGateFormatMarkup(document, statusListener);
        } else {
            unpackGeneralXmlMarkup(document, repositioningInfo, repositioningInfo2, statusListener);
        }
    }

    private void unpackGateFormatMarkup(Document document, StatusListener statusListener) throws DocumentFormatException {
        XMLStreamReader createXMLStreamReader;
        Reader reader = null;
        InputStream inputStream = null;
        try {
            if (hasContentButNoValidUrl(document)) {
                reader = new StringReader(document.getContent().toString());
                createXMLStreamReader = getInputFactory().createXMLStreamReader(reader);
            } else if (document instanceof TextualDocument) {
                reader = new InputStreamReader(document.getSourceUrl().openStream(), ((TextualDocument) document).getEncoding());
                createXMLStreamReader = getInputFactory().createXMLStreamReader(document.getSourceUrl().toExternalForm(), reader);
            } else {
                inputStream = document.getSourceUrl().openStream();
                createXMLStreamReader = getInputFactory().createXMLStreamReader(document.getSourceUrl().toExternalForm(), inputStream);
            }
            createXMLStreamReader.nextTag();
            try {
                DocumentStaxUtils.readGateXmlDocument(createXMLStreamReader, document, statusListener);
                createXMLStreamReader.close();
                if (inputStream != null) {
                    inputStream.close();
                }
                if (reader != null) {
                    reader.close();
                }
            } catch (Throwable th) {
                createXMLStreamReader.close();
                if (inputStream != null) {
                    inputStream.close();
                }
                if (reader != null) {
                    reader.close();
                }
                throw th;
            }
        } catch (XMLStreamException e) {
            document.getFeatures().put("parsingError", Boolean.TRUE);
            Boolean bool = (Boolean) document.getFeatures().get(GateConstants.THROWEX_FORMAT_PROPERTY_NAME);
            if (bool != null && bool.booleanValue()) {
                throw new DocumentFormatException((Exception) e);
            }
            Out.println("Warning: Document remains unparsed. \n\n  Stack Dump: ");
            e.printStackTrace(Out.getPrintWriter());
        } catch (IOException e2) {
            throw new DocumentFormatException("I/O exception for " + document.getSourceUrl().toString(), e2);
        }
    }

    private static XMLInputFactory getInputFactory() throws XMLStreamException {
        if (staxFactory == null) {
            staxFactory = XMLInputFactory.newInstance();
            staxFactory.setProperty("javax.xml.stream.isValidating", Boolean.FALSE);
            staxFactory.setProperty("javax.xml.stream.isNamespaceAware", Boolean.TRUE);
            staxFactory.setProperty("javax.xml.stream.isReplacingEntityReferences", Boolean.TRUE);
            staxFactory.setProperty("javax.xml.stream.isSupportingExternalEntities", Boolean.TRUE);
        }
        return staxFactory;
    }

    private void unpackGeneralXmlMarkup(Document document, RepositioningInfo repositioningInfo, RepositioningInfo repositioningInfo2, StatusListener statusListener) throws DocumentFormatException {
        InputSource inputSource;
        boolean hasContentButNoValidUrl = hasContentButNoValidUrl(document);
        XmlDocumentHandler xmlDocumentHandler = null;
        try {
            try {
                try {
                    try {
                        SAXParserFactory newInstance = SAXParserFactory.newInstance();
                        newInstance.setValidating(false);
                        newInstance.setNamespaceAware(true);
                        SAXParser newSAXParser = newInstance.newSAXParser();
                        XmlDocumentHandler xmlDocumentHandler2 = new XmlDocumentHandler(document, this.markupElementsMap, this.element2StringMap);
                        xmlDocumentHandler2.addStatusListener(statusListener);
                        xmlDocumentHandler2.setRepositioningInfo(repositioningInfo);
                        xmlDocumentHandler2.setAmpCodingInfo(repositioningInfo2);
                        XMLReader xMLReader = newSAXParser.getXMLReader();
                        xMLReader.setFeature("http://xml.org/sax/features/validation", false);
                        xMLReader.setFeature("http://xml.org/sax/features/namespaces", true);
                        xMLReader.setFeature("http://xml.org/sax/features/namespace-prefixes", true);
                        xMLReader.setContentHandler(xmlDocumentHandler2);
                        xMLReader.setErrorHandler(xmlDocumentHandler2);
                        xMLReader.setDTDHandler(xmlDocumentHandler2);
                        xMLReader.setEntityResolver(xmlDocumentHandler2);
                        InputStreamReader inputStreamReader = null;
                        try {
                            if (hasContentButNoValidUrl) {
                                inputSource = new InputSource(new StringReader(document.getContent().toString()));
                            } else if (document instanceof TextualDocument) {
                                inputStreamReader = new InputStreamReader(document.getSourceUrl().openStream(), ((TextualDocument) document).getEncoding());
                                inputSource = new InputSource(inputStreamReader);
                                inputSource.setSystemId(document.getSourceUrl().toString());
                            } else {
                                inputSource = new InputSource(document.getSourceUrl().toString());
                            }
                            xMLReader.parse(inputSource);
                            if (inputStreamReader != null) {
                                inputStreamReader.close();
                            }
                            ((DocumentImpl) document).setNextAnnotationId(xmlDocumentHandler2.getCustomObjectsId());
                            if (xmlDocumentHandler2 != null) {
                                xmlDocumentHandler2.removeStatusListener(statusListener);
                            }
                        } catch (Throwable th) {
                            if (inputStreamReader != null) {
                                inputStreamReader.close();
                            }
                            throw th;
                        }
                    } catch (SAXException e) {
                        document.getFeatures().put("parsingError", Boolean.TRUE);
                        Boolean bool = (Boolean) document.getFeatures().get(GateConstants.THROWEX_FORMAT_PROPERTY_NAME);
                        if (bool != null && bool.booleanValue()) {
                            throw new DocumentFormatException(e);
                        }
                        Out.println("Warning: Document remains unparsed. \n\n  Stack Dump: ");
                        e.printStackTrace(Out.getPrintWriter());
                        if (0 != 0) {
                            xmlDocumentHandler.removeStatusListener(statusListener);
                        }
                    }
                } catch (ParserConfigurationException e2) {
                    throw new DocumentFormatException("XML parser configuration exception ", e2);
                }
            } catch (IOException e3) {
                throw new DocumentFormatException("I/O exception for " + document.getSourceUrl(), e3);
            }
        } catch (Throwable th2) {
            if (0 != 0) {
                xmlDocumentHandler.removeStatusListener(statusListener);
            }
            throw th2;
        }
    }

    @Deprecated
    protected static boolean isGateXmlFormat(String str) {
        return (str.indexOf("<GateDocument") == -1 && str.indexOf(" GateDocument") == -1) ? false : true;
    }

    protected static boolean isGateXmlFormat(Document document) throws DocumentFormatException {
        try {
            byte[] bArr = new byte[2048];
            if (hasContentButNoValidUrl(document)) {
                String obj = document.getContent().toString();
                if (obj.length() > 2048) {
                    obj = obj.substring(0, 2048);
                }
                bArr = obj.getBytes(((TextualDocument) document).getEncoding());
            } else {
                IOUtils.read(document.getSourceUrl().openStream(), bArr);
            }
            return indexOf(bArr, "GateDocument".getBytes(((TextualDocument) document).getEncoding())) != -1;
        } catch (IOException e) {
            throw new DocumentFormatException(e);
        }
    }

    protected static int indexOf(byte[] bArr, byte[] bArr2) {
        int[] computeFailure = computeFailure(bArr2);
        int i = 0;
        if (bArr.length == 0) {
            return -1;
        }
        for (int i2 = 0; i2 < bArr.length; i2++) {
            while (i > 0 && bArr2[i] != bArr[i2]) {
                i = computeFailure[i - 1];
            }
            if (bArr2[i] == bArr[i2]) {
                i++;
            }
            if (i == bArr2.length) {
                return (i2 - bArr2.length) + 1;
            }
        }
        return -1;
    }

    private static int[] computeFailure(byte[] bArr) {
        int[] iArr = new int[bArr.length];
        int i = 0;
        for (int i2 = 1; i2 < bArr.length; i2++) {
            while (i > 0 && bArr[i] != bArr[i2]) {
                i = iArr[i - 1];
            }
            if (bArr[i] == bArr[i2]) {
                i++;
            }
            iArr[i2] = i;
        }
        return iArr;
    }

    @Override // gate.corpora.TextualDocumentFormat, gate.creole.AbstractResource, gate.Resource
    public Resource init() throws ResourceInstantiationException {
        MimeType mimeType = new MimeType("text", "xml");
        mimeString2ClassHandlerMap.put(mimeType.getType() + "/" + mimeType.getSubtype(), this);
        mimeString2mimeTypeMap.put(mimeType.getType() + "/" + mimeType.getSubtype(), mimeType);
        mimeString2mimeTypeMap.put("application/xml", mimeType);
        suffixes2mimeTypeMap.put("xml", mimeType);
        suffixes2mimeTypeMap.put("xhtm", mimeType);
        suffixes2mimeTypeMap.put("xhtml", mimeType);
        magic2mimeTypeMap.put("<?xml", mimeType);
        setMimeType(mimeType);
        return this;
    }
}
