package ws.palladian.extraction.content;

import org.apache.commons.lang3.Validate;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import ws.palladian.classification.text.PalladianTextClassifier;
import ws.palladian.core.Instance;
import ws.palladian.helper.UrlHelper;
import ws.palladian.retrieval.HttpException;
import ws.palladian.retrieval.HttpResult;
import ws.palladian.retrieval.HttpRetriever;
import ws.palladian.retrieval.HttpRetrieverFactory;
import ws.palladian.retrieval.parser.json.JsonException;
import ws.palladian.retrieval.parser.json.JsonObject;

/* loaded from: input_file:ws/palladian/extraction/content/AlchemyApiContentExtractor.class */
public class AlchemyApiContentExtractor extends WebPageContentExtractor {
    private static final String EXTRACTOR_NAME = "AlchemyApi";
    private final String apiKey;
    private final HttpRetriever httpRetriever;
    private String extractedResult = Instance.NO_CATEGORY_DUMMY;

    public AlchemyApiContentExtractor(String str) {
        Validate.notEmpty(str, "apiKey must not be empty", new Object[0]);
        this.apiKey = str;
        this.httpRetriever = HttpRetrieverFactory.getHttpRetriever();
    }

    @Override // ws.palladian.extraction.content.WebPageContentExtractor
    public WebPageContentExtractor setDocument(String str) throws PageContentExtractorException {
        try {
            HttpResult httpGet = this.httpRetriever.httpGet(buildRequestUrl(str));
            this.extractedResult = httpGet.getStringContent();
            try {
                this.extractedResult = new JsonObject(this.extractedResult).getString(PalladianTextClassifier.VECTOR_TEXT_IDENTIFIER);
                return this;
            } catch (JsonException e) {
                throw new PageContentExtractorException("Error while parsing the JSON response '" + httpGet.getStringContent() + "': " + e.getMessage(), e);
            }
        } catch (HttpException e2) {
            throw new PageContentExtractorException("Error when contacting API for URL \"" + str + "\": " + e2.getMessage(), e2);
        }
    }

    @Override // ws.palladian.extraction.content.WebPageContentExtractor
    public WebPageContentExtractor setDocument(Document document) throws PageContentExtractorException {
        return setDocument(document.getDocumentURI());
    }

    private String buildRequestUrl(String str) {
        return String.format("http://access.alchemyapi.com/calls/url/URLGetText?apikey=%s&outputMode=json&url=%s", this.apiKey, UrlHelper.encodeParameter(str));
    }

    @Override // ws.palladian.extraction.content.WebPageContentExtractor
    public Node getResultNode() {
        throw new UnsupportedOperationException("The AlchemyApiContentExtractor does not support main node extraction.");
    }

    @Override // ws.palladian.extraction.content.WebPageContentExtractor
    public String getResultText() {
        return this.extractedResult;
    }

    @Override // ws.palladian.extraction.content.WebPageContentExtractor
    public String getResultTitle() {
        throw new UnsupportedOperationException("The AlchemyApiContentExtractor does not support title extraction.");
    }

    @Override // ws.palladian.extraction.content.WebPageContentExtractor
    public String getExtractorName() {
        return EXTRACTOR_NAME;
    }

    public static void main(String[] strArr) {
        System.out.println("text: " + new AlchemyApiContentExtractor("b0ec6f30acfb22472f458eec1d1acf7f8e8da4f5").getResultText("http://www.bbc.co.uk/news/world-asia-17116595"));
    }
}
