package ws.palladian.retrieval.search.socialmedia;

import java.util.ArrayList;
import java.util.Collections;
import java.util.Date;
import java.util.EnumSet;
import java.util.LinkedHashSet;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.configuration.Configuration;
import org.apache.commons.lang3.StringEscapeUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.Validate;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import ws.palladian.helper.UrlHelper;
import ws.palladian.helper.constants.Language;
import ws.palladian.retrieval.HttpException;
import ws.palladian.retrieval.HttpRetriever;
import ws.palladian.retrieval.HttpRetrieverFactory;
import ws.palladian.retrieval.parser.json.JsonArray;
import ws.palladian.retrieval.parser.json.JsonException;
import ws.palladian.retrieval.parser.json.JsonObject;
import ws.palladian.retrieval.resources.BasicWebContent;
import ws.palladian.retrieval.resources.WebContent;
import ws.palladian.retrieval.search.AbstractMultifacetSearcher;
import ws.palladian.retrieval.search.Facet;
import ws.palladian.retrieval.search.MultifacetQuery;
import ws.palladian.retrieval.search.SearchResults;
import ws.palladian.retrieval.search.SearcherException;

/* loaded from: input_file:ws/palladian/retrieval/search/socialmedia/MultifacetTopsySearcher.class */
public final class MultifacetTopsySearcher extends AbstractMultifacetSearcher<WebContent> {
    public static final String CONFIG_API_KEY = "api.topsy.key";
    public static final String SEARCHER_NAME = "Topsy";
    private final String apiKey;
    private final HttpRetriever retriever;
    private static final Logger LOGGER = LoggerFactory.getLogger(MultifacetTopsySearcher.class);
    private static final Set<Language> SUPPORTED_LANGUAGES = EnumSet.of(Language.ENGLISH, Language.JAPANESE, Language.CHINESE, Language.KOREAN);
    private static final Pattern URL_STATUS_PATTERN = Pattern.compile("https?://twitter.com/[A-Za-z0-9_]*/status/(\\d+)");
    private static final Pattern CONTENT_RETWEET_PATTERN = Pattern.compile("(?:^|\\s)RT @[A-Za-z0-9_]+");
    private static final Pattern CONTENT_HASHTAG_PATTERN = Pattern.compile("#([A-Za-z0-9]+)");

    /* loaded from: input_file:ws/palladian/retrieval/search/socialmedia/MultifacetTopsySearcher$ContentType.class */
    public enum ContentType implements Facet {
        IMAGE,
        TWEET,
        VIDEO;

        private static final String TOPSY_CONTENT_TYPE = "topsy.contentType";

        @Override // ws.palladian.retrieval.search.Facet
        public String getIdentifier() {
            return TOPSY_CONTENT_TYPE;
        }
    }

    public MultifacetTopsySearcher(String str) {
        Validate.notEmpty(str, "apiKey must not be empty", new Object[0]);
        this.apiKey = str;
        this.retriever = HttpRetrieverFactory.getHttpRetriever();
    }

    public MultifacetTopsySearcher(Configuration configuration) {
        this(configuration.getString(CONFIG_API_KEY));
    }

    @Override // ws.palladian.retrieval.search.Searcher
    public String getName() {
        return SEARCHER_NAME;
    }

    @Override // ws.palladian.retrieval.search.AbstractMultifacetSearcher, ws.palladian.retrieval.search.AbstractSearcher, ws.palladian.retrieval.search.Searcher
    public SearchResults<WebContent> search(MultifacetQuery multifacetQuery) throws SearcherException {
        ArrayList arrayList = new ArrayList();
        Long l = null;
        int i = 0;
        int i2 = 1;
        loop0: while (true) {
            String buildQueryUrl = buildQueryUrl(multifacetQuery, i2, this.apiKey);
            LOGGER.debug("Request URL = {}", buildQueryUrl);
            try {
                String stringContent = this.retriever.httpGet(buildQueryUrl).getStringContent();
                LOGGER.debug("JSON = {}", stringContent);
                try {
                    JsonObject jsonObject = new JsonObject(stringContent).getJsonObject("response");
                    if (l == null) {
                        l = Long.valueOf(jsonObject.getLong("total"));
                    }
                    JsonArray jsonArray = jsonObject.getJsonArray("list");
                    if (jsonArray.size() == 0) {
                        break;
                    }
                    for (int i3 = 0; i3 < jsonArray.size(); i3++) {
                        JsonObject jsonObject2 = jsonArray.getJsonObject(i3);
                        if (!isRetweet(jsonObject2)) {
                            arrayList.add(parse(jsonObject2));
                            if (arrayList.size() == multifacetQuery.getResultCount()) {
                                break loop0;
                            }
                        } else {
                            i++;
                        }
                    }
                    i2++;
                } catch (JsonException e) {
                    throw new SearcherException("Error parsing the JSON response " + e.getMessage() + ", JSON was \"" + stringContent + "\"", e);
                }
            } catch (HttpException e2) {
                throw new SearcherException("HTTP error while searching with URL \"" + multifacetQuery + "\": " + e2.getMessage(), e2);
            }
        }
        LOGGER.debug("Skipped {} retweets", Integer.valueOf(i));
        return new SearchResults<>(arrayList, l);
    }

    private boolean isRetweet(JsonObject jsonObject) {
        String tryGetString = jsonObject.tryGetString("content");
        if (tryGetString == null) {
            throw new IllegalStateException("content from JSON was null");
        }
        return CONTENT_RETWEET_PATTERN.matcher(tryGetString).find();
    }

    private WebContent parse(JsonObject jsonObject) throws JsonException {
        System.out.println(jsonObject);
        BasicWebContent.Builder builder = new BasicWebContent.Builder();
        String tryGetString = jsonObject.tryGetString("trackback_permalink");
        builder.setUrl(tryGetString);
        String tryGetString2 = jsonObject.tryGetString("title");
        if (tryGetString2 != null) {
            builder.setTitle(StringEscapeUtils.unescapeHtml4(tryGetString2));
        }
        Integer tryGetInt = jsonObject.tryGetInt("firstpost_date");
        if (tryGetInt != null && tryGetInt.intValue() != 0) {
            builder.setPublished(new Date(tryGetInt.intValue() * 1000));
        }
        Integer tryGetInt2 = jsonObject.tryGetInt("date");
        if (tryGetInt2 != null && tryGetInt2.intValue() != 0) {
            builder.setPublished(new Date(tryGetInt2.intValue() * 1000));
        }
        String tryGetString3 = jsonObject.tryGetString("content");
        if (tryGetString3 != null) {
            builder.setSummary(StringEscapeUtils.unescapeHtml4(tryGetString3));
        }
        builder.setTags(extractTags(tryGetString3));
        builder.setIdentifier(extractIdentifier(tryGetString));
        builder.setSource(SEARCHER_NAME);
        return builder.mo100create();
    }

    static Set<String> extractTags(String str) {
        if (!StringUtils.isNotEmpty(str)) {
            return Collections.emptySet();
        }
        Matcher matcher = CONTENT_HASHTAG_PATTERN.matcher(str);
        LinkedHashSet linkedHashSet = new LinkedHashSet();
        while (matcher.find()) {
            linkedHashSet.add(matcher.group(1));
        }
        return linkedHashSet;
    }

    static String extractIdentifier(String str) {
        if (!StringUtils.isNotEmpty(str)) {
            return null;
        }
        Matcher matcher = URL_STATUS_PATTERN.matcher(str);
        if (matcher.find()) {
            return matcher.group(1);
        }
        return null;
    }

    private String buildQueryUrl(MultifacetQuery multifacetQuery, int i, String str) throws SearcherException {
        StringBuilder sb = new StringBuilder();
        if (StringUtils.isNotBlank(multifacetQuery.getText())) {
            sb.append("http://otter.topsy.com/search.json");
            sb.append("?q=").append(UrlHelper.encodeParameter(multifacetQuery.getText()));
            Facet facet = multifacetQuery.getFacet("topsy.contentType");
            if (facet != null) {
                sb.append("&type=").append(((ContentType) facet).toString().toLowerCase());
            }
        } else {
            if (!StringUtils.isNotBlank(multifacetQuery.getUrl())) {
                throw new SearcherException("Either text or URL must be provided for the query.");
            }
            validateUrl(multifacetQuery.getUrl());
            sb.append("http://otter.topsy.com/trackbacks.json");
            sb.append("?url=").append(multifacetQuery.getUrl());
        }
        sb.append("&apikey=").append(str);
        sb.append("&page=").append(i);
        sb.append("&perpage=100");
        Language language = multifacetQuery.getLanguage();
        if (language != null && SUPPORTED_LANGUAGES.contains(language)) {
            sb.append("&allow_lang=").append(language.getIso6391());
        }
        if (multifacetQuery.getStartDate() != null) {
            sb.append("&mintime=").append(multifacetQuery.getStartDate().getTime() / 1000);
        }
        if (multifacetQuery.getEndDate() != null) {
            sb.append("&maxtime=").append(multifacetQuery.getEndDate().getTime() / 1000);
        }
        return sb.toString();
    }

    private static void validateUrl(String str) {
        if (!str.startsWith("http://") && !str.startsWith("https://")) {
            throw new IllegalArgumentException("Invalid parameter, only URLs are supported (was: \"" + str + "\")");
        }
    }

    @Override // ws.palladian.retrieval.search.AbstractSearcher, ws.palladian.retrieval.search.Searcher
    public boolean isDeprecated() {
        return true;
    }
}
