package org.apache.any23;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.net.URISyntaxException;
import java.util.Iterator;
import java.util.List;
import org.apache.any23.configuration.DefaultConfiguration;
import org.apache.any23.configuration.ModifiableConfiguration;
import org.apache.any23.extractor.ExtractionException;
import org.apache.any23.extractor.ExtractionParameters;
import org.apache.any23.extractor.Extractor;
import org.apache.any23.extractor.microdata.MicrodataExtractor;
import org.apache.any23.filter.IgnoreAccidentalRDFa;
import org.apache.any23.filter.IgnoreTitlesOfEmptyDocuments;
import org.apache.any23.http.DefaultHTTPClient;
import org.apache.any23.mime.MIMETypeDetector;
import org.apache.any23.source.DocumentSource;
import org.apache.any23.source.HTTPDocumentSource;
import org.apache.any23.source.StringDocumentSource;
import org.apache.any23.util.FileUtils;
import org.apache.any23.util.StreamUtils;
import org.apache.any23.util.StringUtils;
import org.apache.any23.vocab.DCTerms;
import org.apache.any23.writer.CompositeTripleHandler;
import org.apache.any23.writer.CountingTripleHandler;
import org.apache.any23.writer.NTriplesWriter;
import org.apache.any23.writer.RDFXMLWriter;
import org.apache.any23.writer.ReportingTripleHandler;
import org.apache.any23.writer.RepositoryWriter;
import org.apache.any23.writer.TripleHandler;
import org.apache.any23.writer.TripleHandlerException;
import org.apache.commons.io.IOUtils;
import org.junit.Assert;
import org.junit.Ignore;
import org.junit.Test;
import org.openrdf.model.Resource;
import org.openrdf.model.Statement;
import org.openrdf.model.Value;
import org.openrdf.repository.RepositoryConnection;
import org.openrdf.repository.RepositoryResult;
import org.openrdf.repository.sail.SailRepository;
import org.openrdf.rio.RDFParseException;
import org.openrdf.sail.memory.MemoryStore;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/apache/any23/Any23Test.class */
public class Any23Test extends Any23OnlineTestBase {
    private static final String PAGE_URL = "http://bob.com";
    private static final DCTerms vDCTERMS = DCTerms.getInstance();
    private static final Logger logger = LoggerFactory.getLogger(Any23Test.class);

    @Test
    public void testTTLDetection() throws Exception {
        assertDetection("<a> <b> <c> .", "rdf-turtle");
    }

    @Test
    public void testN3Detection1() throws Exception {
        assertDetection("<Bob><brothers>(<Jim><Mark>).", "rdf-turtle");
    }

    @Test
    public void testN3Detection2() throws Exception {
        assertDetection("<http://example.org/path> <http://foo.com> <http://example.org/Document/foo#> .", "rdf-nt");
    }

    @Test
    public void testHTMLBruteForceDetection() throws Exception {
        assertDetection("<html><body><div class=\"vcard fn\">Joe</div></body></html>", new String[0]);
    }

    @Test
    public void testExplicitEncoding() throws Exception {
        assertEncodingDetection("UTF-8", "/html/encoding-test.html", "Knud Möller");
    }

    @Test
    public void testImplicitEncoding() throws Exception {
        assertEncodingDetection(null, "/html/encoding-test.html", "Knud Möller");
    }

    @Test
    public void testRDFXMLDetectionAndExtraction() throws Exception {
        assertDetectionAndExtraction("<?xml version='1.0'?> <rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#' xmlns:dc='http://purl.org/dc/elements/1.1/'><rdf:Description rdf:about='http://www.example.com'><dc:title>x</dc:title></rdf:Description></rdf:RDF>");
    }

    @Test
    public void testNTriplesDetectionAndExtraction() throws Exception {
        assertDetectionAndExtraction("<http://www.example.com> <http://purl.org/dc/elements/1.1/title> \"n3 . appo\" .");
    }

    @Test
    public void testNturtleDetectionAndExtraction() throws Exception {
        assertDetectionAndExtraction("@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .\n@prefix dc: <http://purl.org/dc/elements/1.1/> .\n@prefix ex: <http://example.org/stuff/1.0/> .\n\n<http://www.w3.org/TR/rdf-syntax-grammar>\n  dc:title \"RDF/XML Syntax Specification (Revised)\" ;\n  ex:editor [\n    ex:fullname \"Dave Beckett\";\n    ex:homePage <http://purl.org/net/dajobe/>\n  ] .");
    }

    @Test
    public void testDemoCodeSnippet1() throws Exception {
        Any23 any23 = new Any23();
        StringDocumentSource stringDocumentSource = new StringDocumentSource("@prefix foo: <http://example.org/ns#> .   @prefix : <http://other.example.org/ns#> .foo:bar foo: : .                          :bar : foo:bar .                           ", "http://host.com/service");
        ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
        NTriplesWriter nTriplesWriter = new NTriplesWriter(byteArrayOutputStream);
        try {
            any23.extract(stringDocumentSource, nTriplesWriter);
            nTriplesWriter.close();
            String byteArrayOutputStream2 = byteArrayOutputStream.toString("UTF-8");
            logger.debug("nt: " + byteArrayOutputStream2);
            Assert.assertTrue(byteArrayOutputStream2.length() > 0);
        } catch (Throwable th) {
            nTriplesWriter.close();
            throw th;
        }
    }

    @Test
    @Ignore("ANY23-140 - Revise Any23 tests to remove fetching of web content")
    public void testDemoCodeSnippet2() throws Exception {
        assumeOnlineAllowed();
        Any23 any23 = new Any23();
        any23.setHTTPUserAgent("test-user-agent");
        HTTPDocumentSource hTTPDocumentSource = new HTTPDocumentSource(any23.getHTTPClient(), "http://dbpedia.org/resource/Trento");
        ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
        NTriplesWriter nTriplesWriter = new NTriplesWriter(byteArrayOutputStream);
        try {
            any23.extract(hTTPDocumentSource, nTriplesWriter);
            nTriplesWriter.close();
            String byteArrayOutputStream2 = byteArrayOutputStream.toString("UTF-8");
            logger.debug("n3: " + byteArrayOutputStream2);
            Assert.assertTrue(byteArrayOutputStream2.length() > 0);
        } catch (Throwable th) {
            nTriplesWriter.close();
            throw th;
        }
    }

    @Test
    public void testProgrammaticExtraction() throws ExtractionException, IOException, URISyntaxException {
        Any23 any23 = new Any23();
        any23.setHTTPUserAgent("Any23-Servlet");
        any23.setHTTPClient(new DefaultHTTPClient() { // from class: org.apache.any23.Any23Test.1
            protected int getConnectionTimeout() {
                return 5000;
            }

            protected int getSoTimeout() {
                return 2000;
            }
        });
        ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
        NTriplesWriter nTriplesWriter = new NTriplesWriter(byteArrayOutputStream);
        Assert.assertTrue(any23.extract(getDocumentSourceFromResource("/html/rdfa/ansa_2010-02-26_12645863.html", "http://host.com/service"), new ReportingTripleHandler(new IgnoreAccidentalRDFa(nTriplesWriter))).hasMatchingExtractors());
        try {
            nTriplesWriter.close();
        } catch (TripleHandlerException e) {
            Assert.fail(e.getMessage());
        }
        String byteArrayOutputStream2 = byteArrayOutputStream.toString();
        logger.debug(byteArrayOutputStream2);
        Assert.assertSame("Unexpected number of triples.", 16, Integer.valueOf(StringUtils.countNL(byteArrayOutputStream2)));
    }

    @Test
    @Ignore("ANY23-140 - Revise Any23 tests to remove fetching of web content")
    public void testGZippedContent() throws IOException, URISyntaxException, ExtractionException {
        assumeOnlineAllowed();
        Any23 any23 = new Any23();
        any23.setHTTPUserAgent("test-user-agent");
        HTTPDocumentSource hTTPDocumentSource = new HTTPDocumentSource(any23.getHTTPClient(), "http://products.semweb.bestbuy.com/y/products/7590289/");
        ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
        any23.extract(hTTPDocumentSource, new NTriplesWriter(byteArrayOutputStream));
        String byteArrayOutputStream2 = byteArrayOutputStream.toString("UTF-8");
        logger.debug("N3 " + byteArrayOutputStream2);
        Assert.assertTrue(byteArrayOutputStream2.length() > 0);
    }

    @Test
    public void testExtractionParameters() throws IOException, ExtractionException, TripleHandlerException {
        Any23 any23 = new Any23();
        DocumentSource documentSourceFromResource = getDocumentSourceFromResource("/org/apache/any23/validator/missing-og-namespace.html", "http://www.test.com");
        ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
        CountingTripleHandler countingTripleHandler = new CountingTripleHandler();
        NTriplesWriter nTriplesWriter = new NTriplesWriter(byteArrayOutputStream);
        CompositeTripleHandler compositeTripleHandler = new CompositeTripleHandler();
        compositeTripleHandler.addChild(countingTripleHandler);
        compositeTripleHandler.addChild(nTriplesWriter);
        try {
            any23.extract(new ExtractionParameters(DefaultConfiguration.singleton(), ExtractionParameters.ValidationMode.None), documentSourceFromResource, compositeTripleHandler);
            compositeTripleHandler.close();
            logger.info(byteArrayOutputStream.toString());
            Assert.assertEquals("Unexpected number of triples.", 9L, countingTripleHandler.getCount());
        } catch (Throwable th) {
            compositeTripleHandler.close();
            throw th;
        }
    }

    @Test
    public void testExtractionParametersWithNestingDisabled() throws IOException, ExtractionException, TripleHandlerException {
        Any23 any23 = new Any23();
        DocumentSource documentSourceFromResource = getDocumentSourceFromResource("/microformats/nested-microformats-a1.html", "http://www.test.com");
        ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
        CountingTripleHandler countingTripleHandler = new CountingTripleHandler();
        RDFXMLWriter rDFXMLWriter = new RDFXMLWriter(byteArrayOutputStream);
        CompositeTripleHandler compositeTripleHandler = new CompositeTripleHandler();
        compositeTripleHandler.addChild(countingTripleHandler);
        compositeTripleHandler.addChild(rDFXMLWriter);
        any23.extract(new ExtractionParameters(DefaultConfiguration.singleton(), ExtractionParameters.ValidationMode.None, true), documentSourceFromResource, compositeTripleHandler);
        compositeTripleHandler.close();
        logger.debug("Out1: " + byteArrayOutputStream.toString());
        Assert.assertEquals("Unexpected number of triples.", 22L, countingTripleHandler.getCount());
        byteArrayOutputStream.reset();
        CountingTripleHandler countingTripleHandler2 = new CountingTripleHandler();
        NTriplesWriter nTriplesWriter = new NTriplesWriter(byteArrayOutputStream);
        CompositeTripleHandler compositeTripleHandler2 = new CompositeTripleHandler();
        compositeTripleHandler2.addChild(countingTripleHandler2);
        compositeTripleHandler2.addChild(nTriplesWriter);
        any23.extract(new ExtractionParameters(DefaultConfiguration.singleton(), ExtractionParameters.ValidationMode.ValidateAndFix, false), documentSourceFromResource, compositeTripleHandler2);
        compositeTripleHandler2.close();
        logger.debug("Out2: " + byteArrayOutputStream.toString());
        Assert.assertEquals("Unexpected number of triples.", 19L, countingTripleHandler2.getCount());
    }

    @Test
    public void testExceptionPropagation() throws IOException {
        try {
            new Any23().extract(getDocumentSourceFromResource("/application/turtle/geolinkeddata.ttl", "http://www.test.com"), new CountingTripleHandler());
        } catch (ExtractionException e) {
            Assert.assertTrue(e.getCause() instanceof RDFParseException);
        }
    }

    @Test
    public void testXMLMimeTypeManagement() throws IOException, ExtractionException {
        StringDocumentSource stringDocumentSource = new StringDocumentSource(StreamUtils.asString(getClass().getResourceAsStream("any23-xml-mimetype.xml")), "http://www.test.com/resource.xml", "application/xml");
        Assert.assertFalse(new Any23().extract(stringDocumentSource, new ReportingTripleHandler(new CountingTripleHandler(false))).hasMatchingExtractors());
        Assert.assertEquals(0L, r0.getCount());
    }

    @Test
    @Ignore("ANY23-140 - Revise Any23 tests to remove fetching of web content")
    public void testXMLMimeTypeManagementViaURL() throws IOException, ExtractionException {
        assumeOnlineAllowed();
        Any23 any23 = new Any23();
        any23.setHTTPUserAgent("test-user-agent");
        Assert.assertFalse(any23.extract("http://www.nativeremedies.com/XML/combos.xml", new ReportingTripleHandler(new CountingTripleHandler(false))).hasMatchingExtractors());
        Assert.assertEquals(0L, r0.getCount());
    }

    @Test
    @Ignore("ANY23-140 - Revise Any23 tests to remove fetching of web content")
    public void testBlankNodesViaURL() throws IOException, ExtractionException {
        assumeOnlineAllowed();
        Any23 any23 = new Any23();
        any23.setHTTPUserAgent("test-user-agent");
        Assert.assertTrue(any23.extract("http://www.usarab.org/news/?tag=england", new ReportingTripleHandler(new CountingTripleHandler(false))).hasMatchingExtractors());
    }

    @Test
    public void testMicrodataSupport() throws Exception {
        assertExtractorActivation(IOUtils.toString(getClass().getResourceAsStream("/microdata/microdata-basic.html")), MicrodataExtractor.class);
    }

    @Test
    public void testAbstractMethodErrorIssue186_1() throws IOException, ExtractionException {
        Any23 any23 = new Any23();
        StringDocumentSource stringDocumentSource = new StringDocumentSource(FileUtils.readResourceContent("/html/rdfa/rdfa-issue186-1.xhtml"), "http://base.com");
        ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
        any23.extract(stringDocumentSource, new NTriplesWriter(byteArrayOutputStream));
        logger.debug(byteArrayOutputStream.toString("UTF-8"));
    }

    @Test
    public void testAbstractMethodErrorIssue186_2() throws IOException, ExtractionException {
        Any23 any23 = new Any23();
        StringDocumentSource stringDocumentSource = new StringDocumentSource(FileUtils.readResourceContent("/html/rdfa/rdfa-issue186-2.xhtml"), "http://richard.cyganiak.de/");
        ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
        any23.extract(stringDocumentSource, new NTriplesWriter(byteArrayOutputStream));
        logger.debug(byteArrayOutputStream.toString("UTF-8"));
    }

    @Test
    public void testModifiableConfiguration_issue183() throws Exception {
        ModifiableConfiguration copy = DefaultConfiguration.copy();
        copy.setProperty("any23.extraction.metadata.timesize", "off");
        Any23 any23 = new Any23(copy);
        StringDocumentSource stringDocumentSource = new StringDocumentSource(FileUtils.readResourceContent("/rdf/rdf-issue183.ttl"), "http://base.com");
        ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
        NTriplesWriter nTriplesWriter = new NTriplesWriter(byteArrayOutputStream);
        any23.extract(stringDocumentSource, nTriplesWriter);
        nTriplesWriter.close();
        String byteArrayOutputStream2 = byteArrayOutputStream.toString("UTF-8");
        logger.debug(byteArrayOutputStream2);
        Assert.assertFalse("Should not contain triple with http://vocab.sindice.net/date", byteArrayOutputStream2.contains("http://vocab.sindice.net/date"));
        Assert.assertFalse("Should not contain triple with http://vocab.sindice.net/size", byteArrayOutputStream2.contains("http://vocab.sindice.net/size"));
    }

    private ExtractionReport detectAndExtract(String str) throws Exception {
        return new Any23().extract(str, "http://host.com/path", new ReportingTripleHandler(new IgnoreAccidentalRDFa(new IgnoreTitlesOfEmptyDocuments(new NTriplesWriter(new ByteArrayOutputStream())))));
    }

    private void assertDetectionAndExtraction(String str) throws Exception {
        Assert.assertTrue("Detection and extraction failed, no matching extractors.", detectAndExtract(str).hasMatchingExtractors());
    }

    private void assertExtractorActivation(String str, Class<? extends Extractor>... clsArr) throws Exception {
        ExtractionReport detectAndExtract = detectAndExtract(str);
        for (Class<? extends Extractor> cls : clsArr) {
            Assert.assertTrue(String.format("Detection and extraction failed, expected extractor [%s] not found.", cls), containsClass(detectAndExtract.getMatchingExtractors(), cls));
        }
    }

    /* JADX WARN: Finally extract failed */
    private void assertEncodingDetection(String str, String str2, String str3) throws Exception {
        DocumentSource documentSourceFromResource = getDocumentSourceFromResource(str2);
        RepositoryConnection repositoryConnection = null;
        TripleHandler tripleHandler = null;
        Any23 any23 = new Any23();
        SailRepository sailRepository = new SailRepository(new MemoryStore());
        sailRepository.initialize();
        try {
            repositoryConnection = sailRepository.getConnection();
            tripleHandler = new RepositoryWriter(repositoryConnection);
            Assert.assertTrue(any23.extract(documentSourceFromResource, tripleHandler, str).hasMatchingExtractors());
            RepositoryResult statements = repositoryConnection.getStatements((Resource) null, vDCTERMS.title, (Value) null, false, new Resource[0]);
            while (statements.hasNext()) {
                try {
                    Statement statement = (Statement) statements.next();
                    printStatement(statement);
                    Assert.assertTrue(statement.getObject().stringValue().contains(str3));
                } catch (Throwable th) {
                    statements.close();
                    throw th;
                }
            }
            statements.close();
            if (repositoryConnection != null) {
                repositoryConnection.close();
            }
            if (tripleHandler != null) {
                tripleHandler.close();
            }
        } catch (Throwable th2) {
            if (repositoryConnection != null) {
                repositoryConnection.close();
            }
            if (tripleHandler != null) {
                tripleHandler.close();
            }
            throw th2;
        }
    }

    private void assertDetection(String str, String... strArr) throws Exception {
        ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
        Any23 any23 = new Any23(strArr.length == 0 ? null : strArr);
        if (strArr.length != 0) {
            any23.setMIMETypeDetector((MIMETypeDetector) null);
        }
        NTriplesWriter nTriplesWriter = new NTriplesWriter(byteArrayOutputStream);
        any23.extract(new StringDocumentSource(str, PAGE_URL), nTriplesWriter);
        nTriplesWriter.close();
        String byteArrayOutputStream2 = byteArrayOutputStream.toString("us-ascii");
        Assert.assertNotNull(byteArrayOutputStream2);
        Assert.assertTrue(byteArrayOutputStream2.length() > 10);
    }

    private void printStatement(Statement statement) {
        logger.debug(String.format("%s\t%s\t%s", statement.getSubject(), statement.getPredicate(), statement.getObject()));
    }

    private boolean containsClass(List<?> list, Class cls) {
        Iterator<?> it = list.iterator();
        while (it.hasNext()) {
            if (it.next().getClass().equals(cls)) {
                return true;
            }
        }
        return false;
    }
}
