package ws.palladian.extraction.location.scope.evaluation;

import java.io.File;
import java.io.IOException;
import java.util.List;
import org.apache.commons.lang3.Validate;
import ws.palladian.classification.text.DictionaryTrieModel;
import ws.palladian.classification.text.FeatureSetting;
import ws.palladian.classification.text.FeatureSettingBuilder;
import ws.palladian.classification.text.PruningStrategies;
import ws.palladian.extraction.location.ImmutableLocation;
import ws.palladian.extraction.location.LocationType;
import ws.palladian.extraction.location.evaluation.LocationDocument;
import ws.palladian.extraction.location.scope.DictionaryScopeDetector;
import ws.palladian.helper.collection.CollectionHelper;
import ws.palladian.helper.functional.Consumer;
import ws.palladian.helper.functional.ConsumerIteratorAdapter;
import ws.palladian.helper.functional.Filter;
import ws.palladian.helper.functional.Function;
import ws.palladian.helper.geo.ImmutableGeoCoordinate;
import ws.palladian.helper.io.FileHelper;
import ws.palladian.retrieval.wiki.MarkupCoordinate;
import ws.palladian.retrieval.wiki.MediaWikiUtil;
import ws.palladian.retrieval.wiki.WikiPage;

/* loaded from: input_file:ws/palladian/extraction/location/scope/evaluation/WikipediaBigDatasetEvaluation.class */
public class WikipediaBigDatasetEvaluation {
    private static final File WIKI_DUMP = new File("/Volumes/iMac HD/temp/enwiki-20140707-pages-articles.xml.bz2");
    private static final Function<WikiPage, LocationDocument> CONVERTER = new Function<WikiPage, LocationDocument>() { // from class: ws.palladian.extraction.location.scope.evaluation.WikipediaBigDatasetEvaluation.1
        private static final String UNDETERMINED = "undetermined";

        public LocationDocument compute(WikiPage wikiPage) {
            MarkupCoordinate coordinate = wikiPage.getCoordinate();
            return new LocationDocument(wikiPage.getTitle(), wikiPage.getCleanText(), (List) null, new ImmutableLocation(-1, UNDETERMINED, LocationType.UNDETERMINED, new ImmutableGeoCoordinate(coordinate.getLatitude(), coordinate.getLongitude()), (Long) null));
        }
    };

    /* loaded from: input_file:ws/palladian/extraction/location/scope/evaluation/WikipediaBigDatasetEvaluation$ModSplitter.class */
    private static final class ModSplitter implements Filter<WikiPage> {
        private final int mod;
        private final int min;
        private final int max;

        public ModSplitter(boolean z) {
            this(2, z ? 0 : 1, z ? 0 : 1);
        }

        public ModSplitter(int i, int i2, int i3) {
            Validate.isTrue(i > 1, "mod must be greater 1", new Object[0]);
            Validate.isTrue(i2 >= 0, "min must be greater/equal 0", new Object[0]);
            Validate.isTrue(i3 >= i2, "max must be greater/equal min", new Object[0]);
            this.mod = i;
            this.min = i2;
            this.max = i3;
        }

        public boolean accept(WikiPage wikiPage) {
            int id = wikiPage.getId() % this.mod;
            return this.min <= id && id <= this.max;
        }
    }

    public static void main(String[] strArr) throws Exception {
        new ConsumerIteratorAdapter<WikiPage>() { // from class: ws.palladian.extraction.location.scope.evaluation.WikipediaBigDatasetEvaluation.2
            protected void produce(Consumer<WikiPage> consumer) throws Exception {
                MediaWikiUtil.parseDump(WikipediaBigDatasetEvaluation.WIKI_DUMP, consumer);
            }

            protected void consume(Iterable<WikiPage> iterable) {
                Iterable convert = CollectionHelper.convert(CollectionHelper.filter(CollectionHelper.filter(iterable, new Filter<WikiPage>() { // from class: ws.palladian.extraction.location.scope.evaluation.WikipediaBigDatasetEvaluation.2.1
                    public boolean accept(WikiPage wikiPage) {
                        return (wikiPage.getNamespaceId() != 0 || wikiPage.getTitle().toLowerCase().startsWith("list of") || wikiPage.getCoordinate() == null) ? false : true;
                    }
                }), new ModSplitter(10, 0, 8)), WikipediaBigDatasetEvaluation.CONVERTER);
                FeatureSetting create = FeatureSettingBuilder.words(1, 2).create();
                DictionaryTrieModel.Builder builder = new DictionaryTrieModel.Builder();
                builder.setPruningStrategy(new PruningStrategies.TermCountPruningStrategy(2));
                try {
                    FileHelper.serialize(new DictionaryScopeDetector.DictionaryScopeDetectorLearner(create, builder, 1.0d).train(convert), "enwiki-20140614-locations-1-2-words-1.0.ser");
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        };
    }
}
