package ws.palladian.extraction.location.scope.evaluation;

import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import ws.palladian.classification.utils.ClassificationUtils;
import ws.palladian.helper.ThreadHelper;
import ws.palladian.helper.constants.Language;
import ws.palladian.helper.io.FileHelper;
import ws.palladian.retrieval.wiki.MarkupCoordinate;
import ws.palladian.retrieval.wiki.MediaWikiDescriptor;
import ws.palladian.retrieval.wiki.MediaWikiUtil;
import ws.palladian.retrieval.wiki.WikiPage;
import ws.palladian.retrieval.wiki.WikiPageReference;

/* loaded from: input_file:ws/palladian/extraction/location/scope/evaluation/WikipediaScopeDatasetCreator.class */
public class WikipediaScopeDatasetCreator {
    private static final int NUM_TREADS = 10;
    private static final String OUTPUT_DIR = "/Users/pk/Desktop/wikipediaScopeDataset";
    private static final int MAX_SUBSEQUENT_ERROR_COUNT = 10;
    private static final Logger LOGGER = LoggerFactory.getLogger(WikipediaScopeDatasetCreator.class);
    private static final MediaWikiDescriptor WIKIPEDIA_EN = MediaWikiDescriptor.Builder.wikimedia().language(Language.ENGLISH).m237create();
    private static final AtomicInteger counter = new AtomicInteger();
    private static final AtomicInteger coordinateCounter = new AtomicInteger();
    private static final AtomicInteger errorCounter = new AtomicInteger();

    /* loaded from: input_file:ws/palladian/extraction/location/scope/evaluation/WikipediaScopeDatasetCreator$RequestThread.class */
    private static final class RequestThread extends Thread {
        private RequestThread() {
        }

        @Override // java.lang.Thread, java.lang.Runnable
        public void run() {
            while (true) {
                try {
                    WikiPageReference retrieveRandomArticle = MediaWikiUtil.retrieveRandomArticle(WikipediaScopeDatasetCreator.WIKIPEDIA_EN);
                    WikipediaScopeDatasetCreator.counter.incrementAndGet();
                    if (!retrieveRandomArticle.getTitle().toLowerCase().startsWith("list of")) {
                        WikiPage retrieveArticle = MediaWikiUtil.retrieveArticle(WikipediaScopeDatasetCreator.WIKIPEDIA_EN, retrieveRandomArticle.getTitle());
                        MarkupCoordinate mo241getCoordinate = retrieveArticle.mo241getCoordinate();
                        if (mo241getCoordinate != null) {
                            String markup = retrieveArticle.getMarkup();
                            String str = retrieveArticle.getTitle().replaceAll("\\s", "_").replace(';', '_').replace('/', '_').replaceAll("_+", "_") + ".mediawiki";
                            FileHelper.writeToFile("/Users/pk/Desktop/wikipediaScopeDataset/" + str, markup);
                            FileHelper.appendFile("/Users/pk/Desktop/wikipediaScopeDataset/_coordinates.csv", str + ClassificationUtils.DEFAULT_SEPARATOR + mo241getCoordinate.getLatitude() + ClassificationUtils.DEFAULT_SEPARATOR + mo241getCoordinate.getLongitude() + "\n");
                            WikipediaScopeDatasetCreator.coordinateCounter.incrementAndGet();
                            WikipediaScopeDatasetCreator.errorCounter.set(0);
                            System.out.println(WikipediaScopeDatasetCreator.counter.get() + " requests sent, coordinate fraction: " + (WikipediaScopeDatasetCreator.coordinateCounter.get() / WikipediaScopeDatasetCreator.counter.get()));
                        }
                    }
                } catch (IllegalStateException e) {
                    if (WikipediaScopeDatasetCreator.errorCounter.incrementAndGet() == 10) {
                        throw e;
                    }
                    WikipediaScopeDatasetCreator.LOGGER.warn("Encountered {}, waiting for some seconds, {} subsequent errors so far", e.toString(), WikipediaScopeDatasetCreator.errorCounter);
                    ThreadHelper.deepSleep(TimeUnit.SECONDS.toMillis(30L));
                }
            }
        }
    }

    public static void main(String[] strArr) {
        for (int i = 0; i < 10; i++) {
            new RequestThread().start();
        }
    }
}
