package ws.palladian.retrieval;

import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Pattern;
import org.apache.commons.lang3.Validate;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.DOMException;
import org.w3c.dom.Document;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import ws.palladian.helper.UrlHelper;
import ws.palladian.helper.collection.CollectionHelper;
import ws.palladian.helper.html.XPathHelper;
import ws.palladian.helper.nlp.StringHelper;

/* loaded from: input_file:ws/palladian/retrieval/PageAnalyzer.class */
public final class PageAnalyzer {
    public static final Logger LOGGER = LoggerFactory.getLogger(PageAnalyzer.class);

    private PageAnalyzer() {
    }

    public static String getTitle(Document document) {
        Node xhtmlNode = XPathHelper.getXhtmlNode(document, "//title");
        return xhtmlNode != null ? xhtmlNode.getTextContent() : "#error#";
    }

    public static String[] detectFactTable(Document document) {
        String[] strArr = {"", "", ""};
        XPathSet xPathSet = getXPathSet(document);
        strArr[0] = xPathSet.getHighestCountXPath(4);
        strArr[2] = String.valueOf((int) Math.ceil(xPathSet.getCountOfXPath(strArr[0]) / 2.0d));
        if (strArr[0].length() > 0 && xPathSet.getCountOfXPath(strArr[0].substring(0, strArr[0].length() - 1) + "h") == xPathSet.getCountOfXPath(strArr[0])) {
            strArr[0] = strArr[0].substring(0, strArr[0].length() - 1) + "h";
            strArr[2] = String.valueOf(xPathSet.getCountOfXPath(strArr[0]));
        }
        strArr[1] = "0";
        return strArr;
    }

    private static XPathSet getXPathSet(Document document) {
        XPathSet xPathSet = new XPathSet();
        for (String str : new String[]{"//td", "//th"}) {
            List xhtmlNodes = XPathHelper.getXhtmlNodes(document, str);
            if (xhtmlNodes != null) {
                Iterator it = xhtmlNodes.iterator();
                while (it.hasNext()) {
                    xPathSet.add(removeXPathIndicesNot(constructXPath((Node) it.next()), new String[]{"table"}));
                }
            }
        }
        return xPathSet;
    }

    public static Set<String> constructAllXPaths(Document document, String str) {
        return constructAllXPaths(document, str, false, false);
    }

    /* JADX WARN: Multi-variable type inference failed */
    public static Set<String> constructAllXPaths(Document document, String str, boolean z, boolean z2) {
        Set linkedHashSet = new LinkedHashSet();
        if (document == null) {
            LOGGER.warn("document was null when constructing xpaths");
            return linkedHashSet;
        }
        try {
            linkedHashSet = visit(document.getLastChild(), str, z2, linkedHashSet);
        } catch (Exception | StackOverflowError e) {
            LOGGER.error(document.getDocumentURI(), e);
        }
        LinkedHashSet linkedHashSet2 = new LinkedHashSet();
        Iterator it = linkedHashSet.iterator();
        while (it.hasNext()) {
            String addXhtmlNsToXPath = XPathHelper.addXhtmlNsToXPath(document, (String) it.next());
            if (z) {
                addXhtmlNsToXPath = removeXPathIndices(addXhtmlNsToXPath);
            }
            linkedHashSet2.add(addXhtmlNsToXPath);
        }
        String str2 = "";
        Iterator it2 = linkedHashSet2.iterator();
        while (it2.hasNext()) {
            String str3 = (String) it2.next();
            if (str3.length() > str2.length()) {
                str2 = str3;
            }
        }
        HashSet hashSet = new HashSet();
        Iterator it3 = linkedHashSet2.iterator();
        while (it3.hasNext()) {
            String str4 = (String) it3.next();
            if (str2.length() > str4.length() && str2.startsWith(str4)) {
                hashSet.add(str4);
            }
        }
        linkedHashSet2.removeAll(hashSet);
        return linkedHashSet2;
    }

    public static Set<String> keepXPathPointingTo(Set<String> set, String[] strArr) {
        LinkedHashSet linkedHashSet = new LinkedHashSet();
        HashSet hashSet = new HashSet();
        for (String str : strArr) {
            hashSet.add(str.toLowerCase());
        }
        for (String str2 : set) {
            String[] split = removeXPathIndices(str2).split("/");
            if (hashSet.contains(split[split.length - 1].toLowerCase().replaceAll("xhtml:", ""))) {
                linkedHashSet.add(str2);
            }
        }
        return linkedHashSet;
    }

    public static String makeMutualXPath(Set<String> set) {
        int indexOf;
        if (set.isEmpty()) {
            return "";
        }
        XPathSet xPathSet = new XPathSet();
        Iterator<String> it = set.iterator();
        while (it.hasNext()) {
            xPathSet.add(removeXPathIndices(it.next()));
        }
        String[] split = xPathSet.getHighestCountXPath().split("/");
        String str = "";
        int i = 0;
        int i2 = Integer.MIN_VALUE;
        for (String str2 : set) {
            boolean z = true;
            String[] split2 = removeXPathIndices(str2).split("/");
            int i3 = 0;
            int length = split2.length;
            int i4 = 0;
            while (true) {
                if (i4 >= Math.min(split2.length, split.length)) {
                    break;
                }
                if (!split2[i4].equals(split[i4])) {
                    z = false;
                    break;
                }
                i3++;
                i4++;
            }
            LOGGER.trace("# matches={}, length={}, xPath={}", new Object[]{Integer.valueOf(i3), Integer.valueOf(length), str2});
            if (z && i3 >= i && length >= i2) {
                i = i3;
                str = str2;
                i2 = length;
            }
        }
        LOGGER.debug("mutual={}, length={}, matches={}", new Object[]{str, Integer.valueOf(i), Integer.valueOf(i2)});
        String[] split3 = str.split("/");
        Integer[] numArr = new Integer[split3.length];
        for (int i5 = 0; i5 < numArr.length; i5++) {
            numArr[i5] = 1;
        }
        Iterator<String> it2 = set.iterator();
        while (it2.hasNext()) {
            String[] split4 = it2.next().split("/");
            for (int i6 = 0; i6 < Math.min(split3.length, split4.length); i6++) {
                int indexOf2 = split3[i6].indexOf("[");
                if (indexOf2 != -1 && (indexOf = split4[i6].indexOf("[")) != -1) {
                    int parseInt = Integer.parseInt(split3[i6].substring(indexOf2 + 1, split3[i6].length() - 1));
                    int parseInt2 = Integer.parseInt(split4[i6].substring(indexOf + 1, split4[i6].length() - 1));
                    if (split3[i6].substring(0, indexOf2).equals(split4[i6].substring(0, indexOf)) && parseInt != parseInt2) {
                        numArr[i6] = 0;
                    }
                }
            }
        }
        for (int i7 = 0; i7 < split3.length; i7++) {
            int indexOf3 = split3[i7].indexOf("[");
            if (indexOf3 != -1 && numArr[i7].intValue() != 1) {
                split3[i7] = split3[i7].substring(0, indexOf3);
            }
        }
        String str3 = "";
        for (String str4 : split3) {
            str3 = str3 + str4 + "/";
        }
        return str3.substring(0, str3.length() - 1);
    }

    private static Set<String> visit(Node node, String str, boolean z, Set<String> set) {
        try {
            for (Node firstChild = node.getFirstChild(); firstChild != null; firstChild = firstChild.getNextSibling()) {
                String nodeValue = firstChild.getNodeValue();
                if (firstChild.getTextContent().contains(str) || (nodeValue != null && firstChild.getNodeType() != 8 && nodeValue.toLowerCase().contains(str.toLowerCase()))) {
                    if (!z || nodeValue == null) {
                        String constructXPath = constructXPath(firstChild);
                        if (constructXPath.length() > 0) {
                            set.add(constructXPath);
                        }
                    } else if (Pattern.compile("(?<![A-Za-z_])" + Pattern.quote(str) + "(?![A-Za-z_])", 2).matcher(nodeValue).find()) {
                        String constructXPath2 = constructXPath(firstChild);
                        if (constructXPath2.length() > 0) {
                            set.add(constructXPath2);
                        }
                    }
                }
                set = visit(firstChild, str, z, set);
            }
        } catch (Exception e) {
            LOGGER.error(e.getMessage());
        }
        return set;
    }

    public static String constructXPath(Node node) {
        String str = "";
        do {
            int i = 0;
            String nodeName = node.getNodeName();
            Node previousSibling = node.getPreviousSibling();
            while (true) {
                Node node2 = previousSibling;
                if (node2 == null) {
                    break;
                }
                if (node2.getNodeName().equalsIgnoreCase(nodeName)) {
                    i++;
                }
                previousSibling = node2.getPreviousSibling();
            }
            int i2 = i + 1;
            String nodeName2 = node.getNodeName();
            if ((node.getNextSibling() != null || i2 > 1) && !node.getNodeName().equalsIgnoreCase("html") && !node.getNodeName().equalsIgnoreCase("th")) {
                nodeName2 = node.getNodeName() + "[" + i2 + "]";
            }
            str = nodeName2 + "/" + str;
            int indexOf = str.indexOf("/#text");
            if (indexOf > -1) {
                str = str.substring(0, indexOf);
            }
            node = node.getParentNode();
        } while (node != null);
        String substring = str.substring(9, str.length());
        String lowerCase = substring.toLowerCase();
        if (lowerCase.contains("/script") || lowerCase.contains("/html:script")) {
            return "";
        }
        if (substring.endsWith("/")) {
            substring = substring.substring(0, substring.length() - 1);
        }
        return substring;
    }

    public static String constructIdClassXPath(Node node) {
        Validate.notNull(node, "node must not be null", new Object[0]);
        StringBuilder sb = new StringBuilder();
        while (node != null) {
            StringBuilder sb2 = new StringBuilder();
            String nodeName = node.getNodeName();
            if (nodeName.equals("#document")) {
                break;
            }
            sb2.append(nodeName);
            sb2.append(createIdClassString(node));
            sb.append(StringHelper.reverseString(sb2.toString())).append('/');
            node = node.getParentNode();
        }
        return StringHelper.reverseString(sb.toString());
    }

    public static String createIdClassString(Node node) {
        Validate.notNull(node, "node must not be null", new Object[0]);
        StringBuilder sb = new StringBuilder();
        if (node.getAttributes() != null) {
            Node namedItem = node.getAttributes().getNamedItem("id");
            if (namedItem != null) {
                for (String str : namedItem.getNodeValue().trim().split("\\s+")) {
                    if (!str.isEmpty()) {
                        sb.append('#').append(str);
                    }
                }
            }
            Node namedItem2 = node.getAttributes().getNamedItem("class");
            if (namedItem2 != null) {
                for (String str2 : namedItem2.getNodeValue().trim().split("\\s+")) {
                    if (!str2.isEmpty()) {
                        sb.append('.').append(str2);
                    }
                }
            }
        }
        return sb.toString();
    }

    public static boolean nodeInTable(String str, int i) {
        boolean z = false;
        String[] split = str.split("/");
        int length = split.length;
        for (int i2 = length - 1; i2 > Math.max(0, (length - i) - 1); i2--) {
            if (split[i2].toLowerCase().indexOf("td") == 0 || split[i2].toLowerCase().indexOf("xhtml:td") == 0 || split[i2].toLowerCase().indexOf("th") == 0 || split[i2].toLowerCase().indexOf("xhtml:th") == 0) {
                z = true;
                break;
            }
        }
        return z;
    }

    public static String getTableCellPath(String str) {
        String[] split = str.split("/");
        int length = split.length;
        for (int length2 = split.length - 1; length2 > 0; length2--) {
            if (split[length2].toLowerCase().indexOf("td") == 0 || split[length2].toLowerCase().indexOf("xhtml:td") == 0 || split[length2].toLowerCase().indexOf("th") == 0 || split[length2].toLowerCase().indexOf("xhtml:th") == 0) {
                length = length2 + 1;
                break;
            }
        }
        StringBuilder sb = new StringBuilder();
        for (int i = 1; i < length; i++) {
            sb.append("/").append(split[i]);
        }
        return sb.toString();
    }

    public static String getTargetNode(String str) {
        return str.lastIndexOf("/") > -1 ? str.substring(str.lastIndexOf("/") + 1).toLowerCase().replace("xhtml:", "").replaceAll("\\[(\\d)+\\]", "") : "";
    }

    public static boolean nodeInBox(String str, int i) {
        boolean z = false;
        String[] split = str.split("/");
        int length = split.length;
        for (int i2 = length - 1; i2 > Math.max(0, (length - i) - 1); i2--) {
            if (split[i2].toLowerCase().indexOf("p") == 0 || split[i2].toLowerCase().indexOf("xhtml:p") == 0 || split[i2].toLowerCase().indexOf("div") == 0 || split[i2].toLowerCase().indexOf("xhtml:div") == 0) {
                z = true;
                break;
            }
        }
        return z;
    }

    public static String findLastBoxSection(String str) {
        String[] split = str.split("/");
        int length = split.length;
        for (int length2 = split.length - 1; length2 > 0; length2--) {
            if (split[length2].toLowerCase().indexOf("p") == 0 || split[length2].toLowerCase().indexOf("xhtml:p") == 0 || split[length2].toLowerCase().indexOf("div") == 0 || split[length2].toLowerCase().indexOf("xhtml:div") == 0 || split[length2].toLowerCase().indexOf("td") == 0 || split[length2].toLowerCase().indexOf("xhtml:td") == 0 || split[length2].toLowerCase().indexOf("th") == 0 || split[length2].toLowerCase().indexOf("xhtml:th") == 0 || split[length2].toLowerCase().indexOf("li") == 0 || split[length2].toLowerCase().indexOf("xhtml:li") == 0 || split[length2].toLowerCase().indexOf("h1") == 0 || split[length2].toLowerCase().indexOf("xhtml:h1") == 0 || split[length2].toLowerCase().indexOf("h2") == 0 || split[length2].toLowerCase().indexOf("xhtml:h2") == 0 || split[length2].toLowerCase().indexOf("h3") == 0 || split[length2].toLowerCase().indexOf("xhtml:h3") == 0 || split[length2].toLowerCase().indexOf("h4") == 0 || split[length2].toLowerCase().indexOf("xhtml:h4") == 0 || split[length2].toLowerCase().indexOf("h5") == 0 || split[length2].toLowerCase().indexOf("xhtml:h5") == 0 || split[length2].toLowerCase().indexOf("h6") == 0 || split[length2].toLowerCase().indexOf("xhtml:h6") == 0) {
                length = length2 + 1;
                break;
            }
        }
        StringBuilder sb = new StringBuilder();
        for (int i = 1; i < length; i++) {
            sb.append("/").append(split[i]);
        }
        return sb.toString();
    }

    public static String getNextSibling(String str) {
        return getNextSibling(str, false);
    }

    public static String getNextSibling(String str, boolean z) {
        int lastIndexOf;
        int lastIndexOf2;
        if (z) {
            lastIndexOf = Math.max(str.lastIndexOf("td["), str.lastIndexOf("TD[")) + 2;
            lastIndexOf2 = str.indexOf("]", lastIndexOf);
        } else {
            lastIndexOf = str.lastIndexOf("[");
            lastIndexOf2 = str.lastIndexOf("]");
        }
        int max = Math.max(str.toLowerCase().lastIndexOf("/td"), str.toLowerCase().lastIndexOf("/xhtml:td"));
        int max2 = Math.max(str.toLowerCase().lastIndexOf("/th"), str.toLowerCase().lastIndexOf("/xhtml:th"));
        if (max > lastIndexOf2 && max > max2) {
            return str.substring(0, max) + str.substring(max).replace("/td", "/td[1]").replace("/TD", "/TD[1]").replace("/xhtml:td", "/xhtml:td[1]").replace("/xhtml:TD", "/xhtml:TD[1]");
        }
        if (max2 > lastIndexOf2 && max2 > max) {
            return str.substring(0, max2) + str.substring(max2).replace("/th", "/td[1]").replace("/TH", "/TD[1]").replace("/xhtml:th", "/xhtml:td[1]").replace("/xhtml:TH", "/xhtml:TD[1]");
        }
        if (lastIndexOf2 <= lastIndexOf || lastIndexOf == 1) {
            return str;
        }
        return str.substring(0, lastIndexOf + 1) + String.valueOf(Integer.parseInt(str.substring(lastIndexOf + 1, lastIndexOf2)) + 1) + str.substring(lastIndexOf2);
    }

    public static String getNextTableCell(String str) {
        return getNextSibling(str, true);
    }

    public static String getFirstTableCell(String str) {
        int indexOf = str.indexOf("]", Math.max(str.lastIndexOf("td["), str.lastIndexOf("TD[")) + 2);
        int max = Math.max(str.toLowerCase().lastIndexOf("/td"), str.toLowerCase().lastIndexOf("/xhtml:td"));
        int max2 = Math.max(str.toLowerCase().lastIndexOf("/th"), str.toLowerCase().lastIndexOf("/xhtml:th"));
        if (max <= indexOf || max <= max2) {
            return str;
        }
        return str.substring(0, max) + str.substring(max).replace("/td", "/td[1]").replace("/TD", "/TD[1]").replace("/xhtml:td", "/xhtml:td[1]").replace("/xhtml:TD", "/xhtml:TD[1]");
    }

    public static int getNumberOfTableRows(Document document, String str) {
        return getTableRows(document, str, getNextSibling(str, true)).size();
    }

    public static List<String[]> getTableRows(Document document, String str) {
        return getTableRows(document, str, getNextSibling(str, true));
    }

    public static List<String[]> getTableRows(Document document, String str, String str2) {
        ArrayList arrayList = new ArrayList();
        int max = Math.max(str.lastIndexOf("tr["), str.lastIndexOf("TR[")) + 2;
        int indexOf = str.indexOf("]", max);
        if (indexOf <= max || max == 1) {
            str = getNextTableRow(str);
            str2 = getNextTableRow(str2);
            max = Math.max(str.lastIndexOf("tr["), str.lastIndexOf("TR[")) + 2;
            indexOf = str.indexOf("]", max);
        }
        if (indexOf <= max || max == 1) {
            return arrayList;
        }
        List xhtmlNodes = XPathHelper.getXhtmlNodes(document, getParentNode(str.substring(0, max)));
        if (xhtmlNodes.size() == 0) {
            return arrayList;
        }
        int i = 0;
        NodeList childNodes = ((Node) xhtmlNodes.get(0)).getChildNodes();
        for (int i2 = 0; i2 < childNodes.getLength(); i2++) {
            if (childNodes.item(i2).getNodeName().toLowerCase().equals("tr")) {
                i++;
            }
        }
        for (int i3 = 1; i3 <= i; i3++) {
            arrayList.add(new String[]{str.substring(0, max + 1) + String.valueOf(i3) + str.substring(indexOf), str2.substring(0, max + 1) + String.valueOf(i3) + str2.substring(indexOf)});
        }
        return arrayList;
    }

    public static String getNextTableRow(String str) {
        int lastIndexOf = str.toLowerCase().lastIndexOf("tr");
        if (lastIndexOf == -1) {
            return str;
        }
        if (!str.substring(lastIndexOf + 2, lastIndexOf + 3).equals("[")) {
            return str.substring(0, lastIndexOf + 2) + "[1]" + str.substring(lastIndexOf + 2);
        }
        return str.substring(0, lastIndexOf + 3) + String.valueOf(Integer.parseInt(str.substring(lastIndexOf + 3, str.indexOf("]", lastIndexOf + 3))) + 1) + str.substring(str.indexOf("]", lastIndexOf + 3));
    }

    public static String getParentNode(String str) {
        return str.substring(0, str.lastIndexOf("/"));
    }

    public static int getNumberOfTableColumns(Document document, String str) {
        List xhtmlNodes = XPathHelper.getXhtmlNodes(document, getParentNode(getTableCellPath(str)));
        LinkedHashMap linkedHashMap = new LinkedHashMap();
        for (int i = 0; i < xhtmlNodes.size(); i++) {
            List xhtmlNodes2 = XPathHelper.getXhtmlNodes((Node) xhtmlNodes.get(i), "./*[(self::xhtml:td) or (self::xhtml:th)]");
            int i2 = 0;
            for (int i3 = 0; i3 < xhtmlNodes2.size(); i3++) {
                NamedNodeMap attributes = ((Node) xhtmlNodes2.get(i3)).getAttributes();
                int i4 = 0;
                while (true) {
                    if (i4 >= attributes.getLength()) {
                        break;
                    }
                    if (attributes.item(i4).getNodeName().equalsIgnoreCase("colspan")) {
                        i2 += Integer.parseInt(attributes.item(i4).getNodeValue()) - 1;
                        break;
                    }
                    i4++;
                }
                i2++;
            }
            if (linkedHashMap.containsKey(Integer.valueOf(i2))) {
                linkedHashMap.put(Integer.valueOf(i2), Integer.valueOf(((Integer) linkedHashMap.get(Integer.valueOf(i2))).intValue() + 1));
            } else {
                linkedHashMap.put(Integer.valueOf(i2), 1);
            }
        }
        if (linkedHashMap.entrySet().isEmpty()) {
            return 0;
        }
        int intValue = ((Integer) ((Map.Entry) CollectionHelper.sortByValue(linkedHashMap, CollectionHelper.Order.DESCENDING).entrySet().iterator().next()).getKey()).intValue();
        if (intValue == 0) {
            intValue = 1;
        }
        return intValue;
    }

    public static String getTextByXPath(Document document, String str) {
        if (document == null || str.length() == 0) {
            LOGGER.warn("document is NULL or xpath is empty");
            return "";
        }
        StringBuilder sb = new StringBuilder();
        try {
            Iterator it = XPathHelper.getXhtmlNodes(document, str).iterator();
            while (it.hasNext()) {
                sb.append((CharSequence) getSeparatedTextContents((Node) it.next(), new StringBuilder(""))).append(" ");
            }
            return sb.toString();
        } catch (DOMException e) {
            LOGGER.error(str + " " + e.getMessage());
            return "#error#";
        } catch (Exception e2) {
            LOGGER.error(str + " " + e2.getMessage());
            return "#error#";
        } catch (OutOfMemoryError e3) {
            LOGGER.error(str + " " + e3.getMessage());
            return "#error#";
        }
    }

    private static StringBuilder getSeparatedTextContents(Node node, StringBuilder sb) throws OutOfMemoryError {
        Node firstChild = node.getFirstChild();
        for (int i = 0; firstChild != null && i < 50; i++) {
            if (firstChild.getNodeValue() != null && firstChild.getNodeType() == 3) {
                String trim = StringHelper.trim(firstChild.getNodeValue(), "-:.?!'\"");
                if (trim.length() > 0) {
                    sb.append(trim).append(" ");
                }
            }
            if (firstChild.getNodeName().equalsIgnoreCase("br")) {
                sb.append("\n");
            }
            sb = getSeparatedTextContents(firstChild, sb);
            firstChild = firstChild.getNextSibling();
        }
        if (node.getNodeName().equalsIgnoreCase("div")) {
            sb.append("\n");
        }
        return sb;
    }

    public static List<String> getTextsByXPath(Document document, String str) {
        ArrayList arrayList = new ArrayList();
        if (document == null) {
            return arrayList;
        }
        Iterator it = XPathHelper.getXhtmlNodes(document, str).iterator();
        while (it.hasNext()) {
            arrayList.add(((Node) it.next()).getTextContent());
        }
        return arrayList;
    }

    public static String getSiblingPage(Document document) {
        String str = "";
        String domain = UrlHelper.getDomain(document.getDocumentURI(), true);
        String decodeParameter = UrlHelper.decodeParameter(document.getDocumentURI());
        if (decodeParameter == null || decodeParameter.startsWith("file:")) {
            return str;
        }
        String removeAnchors = UrlHelper.removeAnchors(decodeParameter);
        LinkedHashMap linkedHashMap = new LinkedHashMap();
        List nodes = XPathHelper.getNodes(document, "//@href");
        if (nodes == null) {
            return str;
        }
        for (int i = 0; i < nodes.size(); i++) {
            String makeFullUrl = UrlHelper.makeFullUrl(removeAnchors, UrlHelper.removeAnchors(((Node) nodes.get(i)).getTextContent().trim()));
            if (makeFullUrl.length() != 0) {
                String decodeParameter2 = UrlHelper.decodeParameter(makeFullUrl);
                double calculateSimilarity = StringHelper.calculateSimilarity(decodeParameter2, removeAnchors, false);
                int lastIndexOf = removeAnchors.lastIndexOf(".");
                removeAnchors.length();
                if (lastIndexOf > domain.length() && removeAnchors.substring(lastIndexOf + 1).contains("?")) {
                    int indexOf = lastIndexOf + 1 + removeAnchors.substring(lastIndexOf + 1).indexOf("?");
                }
                int lastIndexOf2 = decodeParameter2.lastIndexOf(".");
                if (lastIndexOf2 > domain.length()) {
                    int length = decodeParameter2.length();
                    if (decodeParameter2.substring(lastIndexOf2 + 1).contains("?")) {
                        length = lastIndexOf2 + 1 + decodeParameter2.substring(lastIndexOf2 + 1).indexOf("?");
                    }
                    String substring = decodeParameter2.substring(lastIndexOf2 + 1, length);
                    if (!substring.equalsIgnoreCase("css")) {
                        if (!substring.equalsIgnoreCase("js")) {
                            if (!substring.equalsIgnoreCase("xml")) {
                                if (!substring.equalsIgnoreCase("ico")) {
                                    if (substring.equalsIgnoreCase("rss")) {
                                    }
                                }
                            }
                        }
                    }
                }
                if (!removeAnchors.equalsIgnoreCase(decodeParameter2)) {
                    linkedHashMap.put(decodeParameter2, Double.valueOf(calculateSimilarity));
                }
            }
        }
        Map sortByValue = CollectionHelper.sortByValue(linkedHashMap, CollectionHelper.Order.DESCENDING);
        if (sortByValue.entrySet().size() > 0) {
            try {
                URLEncoder.encode((String) ((Map.Entry) sortByValue.entrySet().iterator().next()).getKey(), "UTF-8");
                str = ((String) ((Map.Entry) sortByValue.entrySet().iterator().next()).getKey()).replace(" ", "%20");
            } catch (UnsupportedEncodingException e) {
                throw new IllegalStateException(e);
            }
        }
        LOGGER.info("sibling url: " + str);
        return str;
    }

    public static String extractTitle(Document document) {
        Iterator it = XPathHelper.getXhtmlNodes(document, "//title").iterator();
        return it.hasNext() ? ((Node) it.next()).getTextContent() : "";
    }

    public static String extractBodyContent(Document document) {
        String str = "";
        try {
            Iterator it = XPathHelper.getNodes(document, "//body").iterator();
            if (it.hasNext()) {
                str = ((Node) it.next()).getTextContent();
            }
        } catch (Exception e) {
            LOGGER.error(e.getMessage());
        } catch (OutOfMemoryError e2) {
            LOGGER.error(e2.getMessage());
        }
        return str;
    }

    public static List<String> extractDescription(Document document) {
        ArrayList arrayList = new ArrayList();
        Iterator it = XPathHelper.getNodes(document, "//meta").iterator();
        while (true) {
            if (!it.hasNext()) {
                break;
            }
            Node node = (Node) it.next();
            if (node.getAttributes().getNamedItem("name") != null && node.getAttributes().getNamedItem("content") != null && node.getAttributes().getNamedItem("name").getTextContent().equalsIgnoreCase("description")) {
                for (String str : node.getAttributes().getNamedItem("content").getTextContent().split("\\s")) {
                    arrayList.add(str.trim());
                }
            }
        }
        return arrayList;
    }

    public static Map<String, String> extractMetaInformation(Document document) {
        HashMap hashMap = new HashMap();
        for (Node node : XPathHelper.getXhtmlNodes(document, "//meta")) {
            if (node.getAttributes().getNamedItem("name") != null && node.getAttributes().getNamedItem("content") != null) {
                String textContent = node.getAttributes().getNamedItem("name").getTextContent();
                hashMap.put(textContent.toLowerCase(), node.getAttributes().getNamedItem("content").getTextContent());
            }
        }
        return hashMap;
    }

    public static List<String> extractKeywords(Document document) {
        ArrayList arrayList = new ArrayList();
        Iterator it = XPathHelper.getXhtmlNodes(document, "//meta").iterator();
        while (true) {
            if (!it.hasNext()) {
                break;
            }
            NamedNodeMap attributes = ((Node) it.next()).getAttributes();
            if (attributes.getNamedItem("name") != null && attributes.getNamedItem("content") != null && attributes.getNamedItem("name").getTextContent().equalsIgnoreCase("keywords")) {
                for (String str : attributes.getNamedItem("content").getTextContent().split(",")) {
                    arrayList.add(str.trim());
                }
            }
        }
        return arrayList;
    }

    public static String removeXPathIndices(String str) {
        return str.replaceAll("\\[(\\d)+\\]", "");
    }

    public static String removeXPathIndicesFromLastCountNode(String str) {
        return StringHelper.reverseString(StringHelper.reverseString(str).replaceFirst("\\](\\d)+\\[", ""));
    }

    public static String removeXPathIndices(String str, String[] strArr) {
        for (String str2 : strArr) {
            str = str.replaceAll(str2 + "\\[(\\d)+\\]", str2);
        }
        return str;
    }

    public static String removeXPathIndicesNot(String str, String[] strArr) {
        for (String str2 : strArr) {
            str = str.replaceAll(str2 + "\\[(\\d)+\\]", str2 + "\\{$1\\}");
        }
        String replaceAll = str.replaceAll("\\[(\\d)+\\]", "");
        for (String str3 : strArr) {
            replaceAll = replaceAll.replaceAll(str3 + "\\{(\\d)+\\}", str3 + "\\[$1\\]");
        }
        return replaceAll;
    }
}
