package ws.palladian.extraction.location.sources.importers;

import java.io.Closeable;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Pattern;
import javax.xml.parsers.ParserConfigurationException;
import org.apache.commons.compress.compressors.CompressorInputStream;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.Validate;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.SAXException;
import ws.palladian.extraction.location.AlternativeName;
import ws.palladian.extraction.location.ImmutableLocation;
import ws.palladian.extraction.location.LocationType;
import ws.palladian.extraction.location.sources.LocationStore;
import ws.palladian.helper.ProgressReporter;
import ws.palladian.helper.StopWatch;
import ws.palladian.helper.collection.CollectionHelper;
import ws.palladian.helper.functional.Consumer;
import ws.palladian.helper.io.FileHelper;
import ws.palladian.helper.io.ProgressReporterInputStream;
import ws.palladian.retrieval.wiki.InfoboxTypeMapper;
import ws.palladian.retrieval.wiki.MarkupCoordinate;
import ws.palladian.retrieval.wiki.MediaWikiUtil;
import ws.palladian.retrieval.wiki.MultiStreamBZip2InputStream;
import ws.palladian.retrieval.wiki.WikiPage;
import ws.palladian.retrieval.wiki.WikiTemplate;

/* loaded from: input_file:ws/palladian/extraction/location/sources/importers/WikipediaLocationImporter.class */
public class WikipediaLocationImporter {
    private static final Logger LOGGER = LoggerFactory.getLogger(WikipediaLocationImporter.class);
    private static final Pattern IGNORED_PAGES = Pattern.compile("(?:Geography|Battle) of .*");
    private final LocationStore locationStore;
    private final Map<String, Integer> locationNamesIds;
    private final int idOffset;
    private final Set<AlternativeNameExtraction> nameExtraction;
    private final ProgressReporter progressReporter;

    /* loaded from: input_file:ws/palladian/extraction/location/sources/importers/WikipediaLocationImporter$AlternativeNameExtraction.class */
    public enum AlternativeNameExtraction {
        REDIRECTS,
        PAGE
    }

    public WikipediaLocationImporter(LocationStore locationStore, int i, ProgressReporter progressReporter, AlternativeNameExtraction... alternativeNameExtractionArr) {
        Validate.notNull(locationStore, "locationStore must not be null", new Object[0]);
        Validate.isTrue(i >= 0);
        this.locationStore = locationStore;
        this.idOffset = i;
        this.locationNamesIds = new HashMap();
        this.nameExtraction = new HashSet(Arrays.asList(alternativeNameExtractionArr));
        this.progressReporter = progressReporter;
    }

    /* JADX WARN: Multi-variable type inference failed */
    public void importDumpBz2(File file) {
        Validate.notNull(file, "dumpXml must not be null", new Object[0]);
        if (!file.isFile()) {
            throw new IllegalArgumentException("At least one of the given dump paths does not exist or is no file");
        }
        if (!file.getName().endsWith(".bz2")) {
            throw new IllegalArgumentException("XML dump file must be of type .bz2");
        }
        StopWatch stopWatch = new StopWatch();
        this.progressReporter.startTask((String) null, -1L);
        this.locationStore.startImport();
        CompressorInputStream compressorInputStream = null;
        try {
            try {
                try {
                    try {
                        try {
                            boolean contains = this.nameExtraction.contains(AlternativeNameExtraction.REDIRECTS);
                            CompressorInputStream multiStreamBZip2InputStream = new MultiStreamBZip2InputStream(new ProgressReporterInputStream(file, this.progressReporter.createSubProgress(contains ? 0.5d : 1.0d)));
                            LOGGER.info("Reading location data from {}", file);
                            importLocationPages(multiStreamBZip2InputStream);
                            if (contains) {
                                compressorInputStream = new MultiStreamBZip2InputStream(new ProgressReporterInputStream(file, this.progressReporter.createSubProgress(0.5d)));
                                LOGGER.info("Reading location alternative names from redirects in {}", file);
                                importAlternativeNames(compressorInputStream);
                            } else {
                                LOGGER.info("Skip reading location alternative names from redirects.");
                            }
                            FileHelper.close(new Closeable[]{multiStreamBZip2InputStream, compressorInputStream});
                            this.locationStore.finishImport();
                            LOGGER.info("Finished import in {}", stopWatch);
                        } catch (ParserConfigurationException e) {
                            throw new IllegalStateException(e);
                        }
                    } catch (FileNotFoundException e2) {
                        throw new IllegalStateException(e2);
                    }
                } catch (IOException e3) {
                    throw new IllegalStateException(e3);
                }
            } catch (SAXException e4) {
                throw new IllegalStateException(e4);
            }
        } catch (Throwable th) {
            FileHelper.close(new Closeable[]{null, null});
            throw th;
        }
    }

    void importLocationPages(InputStream inputStream) throws ParserConfigurationException, SAXException, IOException {
        final int[] iArr = {0};
        MediaWikiUtil.parseDump(inputStream, new Consumer<WikiPage>() { // from class: ws.palladian.extraction.location.sources.importers.WikipediaLocationImporter.1
            public void process(WikiPage wikiPage) {
                if (wikiPage.getNamespaceId() == 0 && !wikiPage.isRedirect()) {
                    if (WikipediaLocationImporter.IGNORED_PAGES.matcher(wikiPage.getTitle()).matches()) {
                        WikipediaLocationImporter.LOGGER.debug("Ignoring '{}' by blacklist", wikiPage.getTitle());
                        return;
                    }
                    List<WikiTemplate> infoboxes = wikiPage.getInfoboxes();
                    if (infoboxes.isEmpty()) {
                        WikipediaLocationImporter.LOGGER.debug("Page '{}' has no infobox; skip", wikiPage.getTitle());
                        return;
                    }
                    LocationType locationType = null;
                    Iterator<WikiTemplate> it = infoboxes.iterator();
                    while (it.hasNext()) {
                        locationType = InfoboxTypeMapper.getLocationType(it.next().getName());
                        if (locationType != null) {
                            break;
                        }
                    }
                    if (locationType == null) {
                        WikipediaLocationImporter.LOGGER.debug("Unmapped type for '{}'; ignore", wikiPage.getTitle());
                        return;
                    }
                    MarkupCoordinate mo241getCoordinate = wikiPage.mo241getCoordinate();
                    if (mo241getCoordinate == null) {
                        Iterator<WikiTemplate> it2 = infoboxes.iterator();
                        while (it2.hasNext()) {
                            Set<MarkupCoordinate> coordinates = it2.next().getCoordinates();
                            if (coordinates.size() > 0) {
                                mo241getCoordinate = (MarkupCoordinate) CollectionHelper.getFirst(coordinates);
                            }
                        }
                    }
                    if (mo241getCoordinate != null) {
                        String cleanTitle = wikiPage.getCleanTitle();
                        int parseInt = Integer.parseInt(wikiPage.getIdentifier()) + WikipediaLocationImporter.this.idOffset;
                        WikipediaLocationImporter.this.locationStore.save(new ImmutableLocation(parseInt, cleanTitle, locationType, mo241getCoordinate, mo241getCoordinate.getPopulation()));
                        WikipediaLocationImporter.LOGGER.trace("Saved location with ID {}, name {}", wikiPage.getIdentifier(), cleanTitle);
                        WikipediaLocationImporter.this.locationNamesIds.put(wikiPage.getTitle(), Integer.valueOf(Integer.parseInt(wikiPage.getIdentifier())));
                        int[] iArr2 = iArr;
                        iArr2[0] = iArr2[0] + 1;
                        if (WikipediaLocationImporter.this.nameExtraction.contains(AlternativeNameExtraction.PAGE)) {
                            List<String> alternativeTitles = wikiPage.getAlternativeTitles();
                            if (alternativeTitles.size() > 0) {
                                HashSet hashSet = new HashSet();
                                for (String str : alternativeTitles) {
                                    if (StringUtils.isNotBlank(str) && !str.equals(cleanTitle)) {
                                        hashSet.add(new AlternativeName(str));
                                    }
                                }
                                WikipediaLocationImporter.this.locationStore.addAlternativeNames(parseInt, hashSet);
                                WikipediaLocationImporter.LOGGER.debug("Extracted {} alternative names from page", Integer.valueOf(hashSet.size()));
                            }
                        }
                    }
                }
            }
        });
        LOGGER.info("Finished importing {} locations", Integer.valueOf(iArr[0]));
    }

    void importAlternativeNames(InputStream inputStream) throws ParserConfigurationException, SAXException, IOException {
        final int[] iArr = {0};
        MediaWikiUtil.parseDump(inputStream, new Consumer<WikiPage>() { // from class: ws.palladian.extraction.location.sources.importers.WikipediaLocationImporter.2
            public void process(WikiPage wikiPage) {
                if (wikiPage.getNamespaceId() == 0 && wikiPage.isRedirect()) {
                    String redirectTitle = wikiPage.getRedirectTitle();
                    if (redirectTitle.contains("#")) {
                        WikipediaLocationImporter.LOGGER.debug("Skip anchor redirect '{}'", redirectTitle);
                        return;
                    }
                    Integer num = (Integer) WikipediaLocationImporter.this.locationNamesIds.get(redirectTitle);
                    if (num == null) {
                        return;
                    }
                    String cleanTitle = wikiPage.getCleanTitle();
                    if (cleanTitle.startsWith(redirectTitle + "/")) {
                        WikipediaLocationImporter.LOGGER.debug("Skip redirect from '{}' to '{}'", cleanTitle, redirectTitle);
                        return;
                    }
                    WikipediaLocationImporter.this.locationStore.addAlternativeNames(num.intValue() + WikipediaLocationImporter.this.idOffset, Collections.singleton(new AlternativeName(cleanTitle)));
                    WikipediaLocationImporter.LOGGER.debug("Save alternative name {} for location with ID {}", cleanTitle, num);
                    int[] iArr2 = iArr;
                    iArr2[0] = iArr2[0] + 1;
                }
            }
        });
        LOGGER.info("Finished importing {} alternative names", Integer.valueOf(iArr[0]));
    }
}
