package ws.palladian.retrieval;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
import ws.palladian.helper.collection.CollectionHelper;
import ws.palladian.helper.date.DateHelper;
import ws.palladian.helper.html.XPathHelper;
import ws.palladian.helper.io.FileHelper;
import ws.palladian.retrieval.parser.ParserFactory;

/* loaded from: input_file:ws/palladian/retrieval/ScrapeTldList.class */
class ScrapeTldList {
    ScrapeTldList() {
    }

    public static void main(String[] strArr) throws Exception {
        HashSet hashSet = new HashSet();
        hashSet.addAll(extractFromTLDList());
        hashSet.addAll(extractFromWikipedia());
        hashSet.addAll(FileHelper.readFileToArray("../palladian-commons/src/main/resources/top-level-domains.txt"));
        hashSet.addAll(FileHelper.readFileToArray("../palladian-commons/src/main/resources/second-level-domains.txt"));
        ArrayList arrayList = new ArrayList(hashSet);
        arrayList.sort(Comparator.comparing(str -> {
            List asList = Arrays.asList(str.split("\\."));
            Collections.reverse(asList);
            return String.join(".", asList);
        }));
        arrayList.add(0, "# last update: " + DateHelper.getCurrentDatetime() + " based on https://tld-list.com/tlds-from-a-z, https://en.wikipedia.org/wiki/List_of_Internet_top-level_domains, and hand-curated entries");
        FileHelper.writeToFile("../palladian-commons/src/main/resources/domains.txt", arrayList);
        System.out.println("Wrote " + arrayList.size() + " domains");
    }

    protected static List<String> extractFromTLDList() throws Exception {
        return (List) XPathHelper.getXhtmlNodes(ParserFactory.createHtmlParser().parse(HttpRetrieverFactory.getHttpRetriever().httpGet("https://tld-list.com/tlds-from-a-z")), "//ul[@class=\"feature-list\"]/li/a").stream().map(node -> {
            return node.getTextContent();
        }).collect(Collectors.toList());
    }

    protected static List<String> extractFromWikipedia() throws Exception {
        return (List) XPathHelper.getXhtmlNodes(ParserFactory.createHtmlParser().parse(HttpRetrieverFactory.getHttpRetriever().httpGet("https://en.wikipedia.org/wiki/List_of_Internet_top-level_domains")), "//table[contains(@class,\"wikitable\")]/tbody/tr/td[1]").stream().map(node -> {
            return node.getTextContent();
        }).map((v0) -> {
            return v0.trim();
        }).map(str -> {
            return str.replaceAll("\\[.*\\]", "");
        }).map(str2 -> {
            return Arrays.asList(str2.split(",?\\s"));
        }).flatMap(list -> {
            return list.stream();
        }).filter(str3 -> {
            return str3.startsWith(".");
        }).collect(Collectors.toList());
    }

    protected static void compare() {
        List list = (List) FileHelper.readFileToArray("../palladian-commons/src/main/resources/domains.txt").stream().filter(str -> {
            return !str.startsWith("#");
        }).collect(Collectors.toList());
        List readFileToArray = FileHelper.readFileToArray("../palladian-commons/src/main/resources/top-level-domains.txt");
        List readFileToArray2 = FileHelper.readFileToArray("../palladian-commons/src/main/resources/second-level-domains.txt");
        ArrayList arrayList = new ArrayList(readFileToArray);
        arrayList.addAll(readFileToArray2);
        Set intersect = CollectionHelper.intersect(new HashSet(list), new HashSet(arrayList));
        ArrayList arrayList2 = new ArrayList(list);
        arrayList2.removeAll(intersect);
        System.out.println("Missing in old list: " + arrayList2.size());
        ArrayList arrayList3 = new ArrayList(arrayList);
        arrayList3.removeAll(intersect);
        System.out.println("Missing in new list: " + arrayList3.size());
        CollectionHelper.print(arrayList3);
    }
}
