package ws.palladian.extraction.content;

import com.gravity.goose.Article;
import com.gravity.goose.Configuration;
import com.gravity.goose.Goose;
import java.io.File;
import java.net.URL;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import ws.palladian.retrieval.HttpResult;

/* loaded from: input_file:ws/palladian/extraction/content/GooseContentExtractor.class */
public class GooseContentExtractor extends WebPageContentExtractor {
    private Article article;

    public WebPageContentExtractor setDocument(File file, boolean z) throws PageContentExtractorException {
        throw new PageContentExtractorException("Local files are not supported");
    }

    public WebPageContentExtractor setDocument(HttpResult httpResult) throws PageContentExtractorException {
        throw new PageContentExtractorException("HttpResults are not supported");
    }

    public WebPageContentExtractor setDocument(String str, boolean z) throws PageContentExtractorException {
        if (str.startsWith("http://") || str.startsWith("https://")) {
            Configuration configuration = new Configuration();
            configuration.setEnableImageFetching(false);
            this.article = new Goose(configuration).extractContent(str);
        }
        return this;
    }

    public WebPageContentExtractor setDocument(URL url, boolean z) throws PageContentExtractorException {
        if (url.toString().startsWith("file://")) {
            throw new PageContentExtractorException("Local files are not supported");
        }
        return setDocument(url.toString(), z);
    }

    public WebPageContentExtractor setDocument(Document document) throws PageContentExtractorException {
        return setDocument(document, true);
    }

    public WebPageContentExtractor setDocument(Document document, boolean z) throws PageContentExtractorException {
        String documentURI = document.getDocumentURI();
        if (documentURI == null || documentURI.startsWith("file:")) {
            throw new IllegalArgumentException("Only extraction from web URLs is supported.");
        }
        return setDocument(documentURI, z);
    }

    public Node getResultNode() {
        throw new UnsupportedOperationException("Not supported by Goose");
    }

    public String getResultText() {
        return this.article.cleanedArticleText();
    }

    public String getResultTitle() {
        return this.article.title();
    }

    public String getExtractorName() {
        return "Goose";
    }

    public static void main(String[] strArr) throws PageContentExtractorException {
        GooseContentExtractor gooseContentExtractor = new GooseContentExtractor();
        gooseContentExtractor.setDocument("http://techcrunch.com/2012/12/01/facebook-photo-sync-data/", true);
        System.out.println(gooseContentExtractor.getResultTitle());
        System.out.println(gooseContentExtractor.getResultText());
    }
}
