package ws.palladian.helper;

import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLDecoder;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.Validate;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.commons.validator.routines.UrlValidator;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import ws.palladian.helper.collection.StringLengthComparator;
import ws.palladian.helper.html.XPathHelper;
import ws.palladian.helper.io.FileHelper;
import ws.palladian.helper.io.LineAction;
import ws.palladian.helper.nlp.PatternHelper;
import ws.palladian.helper.nlp.StringHelper;

/* loaded from: input_file:ws/palladian/helper/UrlHelper.class */
public final class UrlHelper {
    private static final String TOP_LEVEL_DOMAINS;
    private static final List<String> DOMAIN_SUFFIXES;
    public static final Pattern URL_PATTERN;
    private static final Logger LOGGER = LoggerFactory.getLogger(UrlHelper.class);
    private static final List<String> LINK_ATTRIBUTES = Arrays.asList("href", "src");
    private static final Pattern SESSION_ID_PATTERN = Pattern.compile("[&;]?(?<!\\w)(jsessionid=|s=|sid=|PHPSESSID=|sessionid=)[A-Za-z_0-9\\-]{12,200}(?!\\w)");
    private static final Pattern URL_PARAM = Pattern.compile("\\?.*");

    private UrlHelper() {
    }

    public static String removeSessionId(String str) {
        if (str == null) {
            return null;
        }
        return SESSION_ID_PATTERN.matcher(str).replaceAll("").replaceAll("\\?$", "").replaceAll("\\?&", "?");
    }

    public static void makeAbsoluteUrls(Document document) {
        String documentURI = document.getDocumentURI();
        String baseUrl = getBaseUrl(document);
        for (String str : LINK_ATTRIBUTES) {
            Iterator<Node> it = XPathHelper.getXhtmlNodes(document, "//*[@" + str + "]").iterator();
            while (it.hasNext()) {
                Node namedItem = it.next().getAttributes().getNamedItem(str);
                String nodeValue = namedItem.getNodeValue();
                String makeFullUrl = makeFullUrl(documentURI, baseUrl, nodeValue);
                if (!makeFullUrl.equals(nodeValue)) {
                    LOGGER.trace("{} -> {}", nodeValue, makeFullUrl);
                    namedItem.setNodeValue(makeFullUrl);
                }
            }
        }
    }

    public static String getBaseUrl(Document document) {
        Node xhtmlNode = XPathHelper.getXhtmlNode(document, "//head/base/@href");
        if (xhtmlNode != null) {
            return xhtmlNode.getTextContent();
        }
        return null;
    }

    public static String makeFullUrl(String str, String str2, String str3) {
        if (str3 == null) {
            throw new NullPointerException("linkUrl must not be null");
        }
        if (str2 != null && !str2.endsWith("/")) {
            str2 = str2.concat("/");
        }
        return makeFullUrl((str == null || str2 == null) ? str != null ? str : str2 : makeFullUrl(str, str2), str3);
    }

    public static String makeFullUrl(String str, String str2) {
        String str3 = str2;
        if (str != null) {
            try {
                if (str2.startsWith("?")) {
                    str3 = URL_PARAM.matcher(str).replaceAll("") + str2;
                } else {
                    str3 = new URL(new URL(str), str2).toString();
                    if (str2.startsWith(".")) {
                        str3 = str3.replace("../", "").replace("./", "");
                    } else if (str2.contains("/../")) {
                        int countOccurrences = StringHelper.countOccurrences(str2, "../");
                        for (int i = 0; i < countOccurrences; i++) {
                            Matcher matcher = PatternHelper.compileOrGet("[^/]+/../").matcher(str3);
                            if (matcher.find()) {
                                str3 = matcher.replaceAll("");
                            }
                        }
                    }
                }
            } catch (MalformedURLException e) {
            }
        }
        return str3;
    }

    public static String getCleanUrl(String str) {
        if (str == null) {
            str = "";
        }
        if (str.startsWith("https://")) {
            str = str.substring(8);
        }
        if (str.startsWith("http://")) {
            str = str.substring(7);
        }
        if (str.startsWith("www.")) {
            str = str.substring(4);
        }
        return str;
    }

    public static String removeAnchors(String str) {
        return str.replaceAll("#.*", "");
    }

    /* JADX WARN: String concatenation convert failed
    jadx.core.utils.exceptions.JadxRuntimeException: Can't remove SSA var: r8v0 java.lang.String, still in use, count: 3, list:
      (r8v0 java.lang.String) from 0x00f7: PHI (r8v1 java.lang.String) = (r8v0 java.lang.String), (r8v2 java.lang.String) binds: [B:30:0x00ec, B:24:0x00e7] A[DONT_GENERATE, DONT_INLINE]
      (r8v0 java.lang.String) from 0x00e7: PHI (r8v2 java.lang.String) = (r8v0 java.lang.String), (r8v5 java.lang.String) binds: [B:28:0x00dc, B:23:0x00cd] A[DONT_GENERATE, DONT_INLINE]
      (r8v0 java.lang.String) from 0x000f: RETURN (r8v0 java.lang.String)
    	at jadx.core.utils.InsnRemover.removeSsaVar(InsnRemover.java:151)
    	at jadx.core.utils.InsnRemover.unbindResult(InsnRemover.java:116)
    	at jadx.core.utils.InsnRemover.unbindInsn(InsnRemover.java:80)
    	at jadx.core.utils.InsnRemover.unbindArgUsage(InsnRemover.java:163)
    	at jadx.core.utils.InsnRemover.unbindAllArgs(InsnRemover.java:95)
    	at jadx.core.utils.InsnRemover.unbindInsn(InsnRemover.java:79)
    	at jadx.core.utils.InsnRemover.unbindArgUsage(InsnRemover.java:163)
    	at jadx.core.utils.InsnRemover.unbindAllArgs(InsnRemover.java:95)
    	at jadx.core.utils.InsnRemover.unbindInsn(InsnRemover.java:79)
    	at jadx.core.utils.InsnRemover.unbindArgUsage(InsnRemover.java:163)
    	at jadx.core.utils.InsnRemover.unbindAllArgs(InsnRemover.java:95)
    	at jadx.core.utils.InsnRemover.unbindInsn(InsnRemover.java:79)
    	at jadx.core.utils.InsnRemover.unbindArgUsage(InsnRemover.java:163)
    	at jadx.core.utils.InsnRemover.unbindAllArgs(InsnRemover.java:95)
    	at jadx.core.dex.visitors.SimplifyVisitor.removeStringBuilderInsns(SimplifyVisitor.java:495)
    	at jadx.core.dex.visitors.SimplifyVisitor.convertStringBuilderChain(SimplifyVisitor.java:422)
    	at jadx.core.dex.visitors.SimplifyVisitor.convertInvoke(SimplifyVisitor.java:314)
    	at jadx.core.dex.visitors.SimplifyVisitor.simplifyInsn(SimplifyVisitor.java:145)
    	at jadx.core.dex.visitors.SimplifyVisitor.simplifyBlock(SimplifyVisitor.java:86)
    	at jadx.core.dex.visitors.SimplifyVisitor.visit(SimplifyVisitor.java:71)
     */
    public static String getDomain(String str, boolean z, boolean z2) {
        String str2;
        if (str == null || str.isEmpty()) {
            return str2;
        }
        try {
            URL url = new URL(str.toLowerCase());
            if (url.getHost().isEmpty()) {
                LOGGER.trace("no domain specified {}", str);
            } else {
                str2 = new StringBuilder().append(z ? url.getProtocol() + "://" : "").append(url.getHost()).toString();
                if (!z2) {
                    String str3 = "";
                    Iterator<String> it = DOMAIN_SUFFIXES.iterator();
                    while (true) {
                        if (!it.hasNext()) {
                            break;
                        }
                        String next = it.next();
                        if (str2.endsWith(next)) {
                            str3 = next;
                            break;
                        }
                    }
                    String[] split = str2.substring(0, str2.length() - str3.length()).split("\\.");
                    str2 = split[split.length - 1] + str3;
                }
                LOGGER.trace("root url for {} -> {}", str, str2);
            }
        } catch (MalformedURLException e) {
            LOGGER.trace("could not determine domain for {}", str);
        }
        return str2;
    }

    public static String getDomain(String str, boolean z) {
        return getDomain(str, z, true);
    }

    public static String getDomain(String str) {
        return getDomain(str, true, true);
    }

    public static String getCanonicalUrl(String str) {
        if (str == null) {
            return "";
        }
        try {
            URL url = new URL(str);
            String protocol = url.getProtocol();
            String str2 = "";
            if (url.getPort() != -1 && url.getPort() != url.getDefaultPort()) {
                str2 = ":" + url.getPort();
            }
            String lowerCase = url.getHost().toLowerCase();
            String path = url.getPath();
            String[] strArr = null;
            if (url.getQuery() != null) {
                strArr = url.getQuery().split("&");
                Arrays.sort(strArr);
            }
            String[] split = path.split("/");
            String str3 = "/";
            if (split.length > 0) {
                for (int i = 0; i < split.length; i++) {
                    split[i] = split[i].trim();
                    if (split[i].equals("..")) {
                        split[i] = "";
                        if (split.length > 1 && i > 0) {
                            split[i - 1] = "";
                        }
                    }
                }
                for (String str4 : split) {
                    if (str4.length() > 0) {
                        str3 = str3 + str4 + "/";
                    }
                }
                if (split[split.length - 1].contains(".")) {
                    str3 = str3.substring(0, str3.length() - 1);
                }
                if (split[split.length - 1].contains("index") && strArr == null) {
                    str3 = str3.replaceAll("index\\..+$", "");
                }
            }
            return protocol + "://" + str2 + lowerCase + str3 + (strArr != null ? "?" + StringUtils.join(strArr, "&") : "");
        } catch (MalformedURLException e) {
            LOGGER.trace("could not determine canonical url for {}", str);
            return "";
        }
    }

    public static String decodeParameter(String str) {
        try {
            return URLDecoder.decode(str, FileHelper.DEFAULT_ENCODING);
        } catch (UnsupportedEncodingException e) {
            throw new IllegalStateException("UTF-8 encoding unsupported. This should not happen.", e);
        }
    }

    private static String tryDecodeParameter(String str) {
        try {
            return decodeParameter(str);
        } catch (IllegalArgumentException e) {
            return str;
        }
    }

    public static String encodeParameter(String str) {
        try {
            return URLEncoder.encode(str, FileHelper.DEFAULT_ENCODING);
        } catch (UnsupportedEncodingException e) {
            throw new IllegalStateException("UTF-8 encoding unsupported. This should not happen.", e);
        }
    }

    public static List<String> extractUrls(String str) {
        return StringHelper.getRegexpMatches(URL_PATTERN, str);
    }

    public static boolean isLocalFile(URL url) {
        String protocol = url.getProtocol();
        String host = url.getHost();
        return "file".equalsIgnoreCase(protocol) && !(host != null && !"".equals(host));
    }

    public static String createParameterString(List<Pair<String, String>> list) {
        Validate.notNull(list, "parameters must not be null", new Object[0]);
        StringBuilder sb = new StringBuilder();
        boolean z = true;
        for (Pair<String, String> pair : list) {
            if (z) {
                z = false;
            } else {
                sb.append('&');
            }
            sb.append(encodeParameter((String) pair.getKey()));
            String str = (String) pair.getValue();
            if (str != null) {
                sb.append('=');
                sb.append(encodeParameter(str));
            }
        }
        return sb.toString();
    }

    public static List<Pair<String, String>> parseParams(String str) {
        Validate.notNull(str, "parameterString must not be null", new Object[0]);
        ArrayList arrayList = new ArrayList();
        int indexOf = str.indexOf("?");
        if (indexOf == -1) {
            return arrayList;
        }
        int indexOf2 = str.indexOf("#");
        for (String str2 : str.substring(indexOf + 1, indexOf2 != -1 ? indexOf2 : str.length()).split("&")) {
            String[] split = str2.split("=");
            String tryDecodeParameter = tryDecodeParameter(split[0]);
            arrayList.add(Pair.of(tryDecodeParameter, (split.length == 1 && str2.contains("=")) ? "" : split.length == 1 ? null : tryDecodeParameter(str2.substring(tryDecodeParameter.length() + 1))));
        }
        return arrayList;
    }

    public static String parseBaseUrl(String str) {
        Validate.notNull(str, "url must not be null", new Object[0]);
        int indexOf = str.indexOf("?");
        int indexOf2 = str.indexOf("#");
        if (indexOf != -1) {
            indexOf2 = indexOf;
        }
        return indexOf2 != -1 ? str.substring(0, indexOf2) : str;
    }

    public static boolean isValidUrl(String str) {
        return new UrlValidator(new String[]{"http", "https"}).isValid(str);
    }

    static {
        final ArrayList arrayList = new ArrayList();
        FileHelper.performActionOnEveryLine(UrlHelper.class.getResourceAsStream("/domains.txt"), new LineAction() { // from class: ws.palladian.helper.UrlHelper.1
            @Override // ws.palladian.helper.io.LineAction
            public void performAction(String str, int i) {
                String trim = str.trim();
                if (trim.startsWith("#") || trim.isEmpty()) {
                    return;
                }
                arrayList.add(trim);
            }
        });
        arrayList.sort(StringLengthComparator.INSTANCE);
        TOP_LEVEL_DOMAINS = (String) arrayList.stream().filter(str -> {
            return StringHelper.countOccurrences(str, ".") == 1;
        }).map(str2 -> {
            return str2.substring(1);
        }).collect(Collectors.joining("|"));
        DOMAIN_SUFFIXES = Collections.unmodifiableList(arrayList);
        URL_PATTERN = Pattern.compile("\\b(?:https?://)?([0-9a-zäöü-]{1,63}?\\.)+(?:" + TOP_LEVEL_DOMAINS + ")(?:[?/](?:\\([^\\s()<>\\[\\]\"']{0,255}\\)|[^\\s()<>\\[\\]\"']{0,255})+(?:\\([^\\s()<>\\[\\]\"']{0,255}\\)|[^\\s.,;!?:()<>\\[\\]\"'])|/|\\b)", 2);
    }
}
