package ws.palladian.extraction.location.scope.evaluation;

import java.io.File;
import java.io.IOException;
import java.util.Iterator;
import org.apache.commons.lang3.Validate;
import ws.palladian.core.Instance;
import ws.palladian.extraction.location.ImmutableLocation;
import ws.palladian.extraction.location.LocationType;
import ws.palladian.extraction.location.evaluation.LocationDocument;
import ws.palladian.helper.ProgressMonitor;
import ws.palladian.helper.collection.CollectionHelper;
import ws.palladian.helper.geo.GeoCoordinate;
import ws.palladian.helper.geo.ImmutableGeoCoordinate;
import ws.palladian.helper.io.FileHelper;
import ws.palladian.retrieval.wiki.WikiPage;

/* loaded from: input_file:ws/palladian/extraction/location/scope/evaluation/WikipediaLocationScopeIterator.class */
public final class WikipediaLocationScopeIterator implements Iterable<LocationDocument> {
    private final File datasetPath;
    private final File[] wikiPages;

    public WikipediaLocationScopeIterator(File file) {
        this(file, false);
    }

    public WikipediaLocationScopeIterator(File file, boolean z) {
        Validate.notNull(file, "datasetPath must not be null", new Object[0]);
        if (!file.isDirectory()) {
            throw new IllegalArgumentException(file + " does not point to a directory.");
        }
        this.datasetPath = file;
        this.wikiPages = FileHelper.getFiles(file.getPath(), "mediawiki");
        if (this.wikiPages.length == 0) {
            throw new IllegalArgumentException("No wiki pages found at " + file + ".");
        }
        if (z) {
            CollectionHelper.shuffle(this.wikiPages);
        }
    }

    @Override // java.lang.Iterable
    public Iterator<LocationDocument> iterator() {
        return new Iterator<LocationDocument>() { // from class: ws.palladian.extraction.location.scope.evaluation.WikipediaLocationScopeIterator.1
            private int idx = 0;
            private final ProgressMonitor monitor = new ProgressMonitor();

            {
                this.monitor.startTask((String) null, WikipediaLocationScopeIterator.this.wikiPages.length);
            }

            @Override // java.util.Iterator
            public boolean hasNext() {
                return this.idx < WikipediaLocationScopeIterator.this.wikiPages.length;
            }

            /* JADX WARN: Can't rename method to resolve collision */
            @Override // java.util.Iterator
            public LocationDocument next() {
                this.monitor.increment();
                File[] fileArr = WikipediaLocationScopeIterator.this.wikiPages;
                int i = this.idx;
                this.idx = i + 1;
                File file = fileArr[i];
                try {
                    WikiPage wikiPage = new WikiPage(0, 0, Instance.NO_CATEGORY_DUMMY, FileHelper.readFileToString(file));
                    GeoCoordinate mo219getCoordinate = wikiPage.mo219getCoordinate();
                    if (mo219getCoordinate != null) {
                        mo219getCoordinate = new ImmutableGeoCoordinate(mo219getCoordinate.getLatitude(), mo219getCoordinate.getLongitude());
                    }
                    return new LocationDocument(file.getName(), wikiPage.getCleanText(), null, new ImmutableLocation(-1, LocationDocument.UNDETERMINED, LocationType.UNDETERMINED, mo219getCoordinate, null));
                } catch (IOException e) {
                    throw new IllegalStateException("Could not read '" + file + "': " + e.getMessage(), e);
                }
            }

            @Override // java.util.Iterator
            public void remove() {
                throw new UnsupportedOperationException();
            }
        };
    }

    public String toString() {
        return "WikipediaLocationScopeIterator [datasetPath=" + this.datasetPath + ", numFiles=" + this.wikiPages.length + "]";
    }
}
