package ws.palladian.extraction.location.disambiguation;

import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.commons.lang3.Validate;
import ws.palladian.core.CategoryEntries;
import ws.palladian.core.InstanceBuilder;
import ws.palladian.extraction.location.ClassifiedAnnotation;
import ws.palladian.extraction.location.Location;
import ws.palladian.extraction.location.LocationExtractorUtils;
import ws.palladian.extraction.location.LocationFilters;
import ws.palladian.extraction.location.LocationSet;
import ws.palladian.extraction.location.LocationType;
import ws.palladian.extraction.location.scope.ScopeDetector;
import ws.palladian.helper.collection.CollectionHelper;
import ws.palladian.helper.collection.DefaultMultiMap;
import ws.palladian.helper.collection.MultiMap;
import ws.palladian.helper.functional.Filters;
import ws.palladian.helper.geo.GeoCoordinate;
import ws.palladian.helper.nlp.StringHelper;
import ws.palladian.retrieval.search.Searcher;
import ws.palladian.retrieval.search.SearcherException;

/* loaded from: input_file:ws/palladian/extraction/location/disambiguation/ConfigurableFeatureExtractor.class */
public class ConfigurableFeatureExtractor implements LocationFeatureExtractor {
    private final FeatureExtractorSetting setting;

    public ConfigurableFeatureExtractor(FeatureExtractorSetting featureExtractorSetting) {
        Validate.notNull(featureExtractorSetting, "settings must not be null", new Object[0]);
        this.setting = featureExtractorSetting;
    }

    public ConfigurableFeatureExtractor() {
        this(FeatureExtractorSetting.DEFAULT);
    }

    @Override // ws.palladian.extraction.location.disambiguation.LocationFeatureExtractor
    public Set<ClassifiableLocation> extract(String str, MultiMap<ClassifiedAnnotation, Location> multiMap) {
        HashSet hashSet = new HashSet();
        LocationSet locationSet = new LocationSet(multiMap.allValues());
        LocationSet locationSet2 = new LocationSet(getUniqueLocations(multiMap.values()));
        LocationSet where = locationSet.where(LocationFilters.type(LocationType.CONTINENT));
        LocationSet where2 = locationSet.where(LocationFilters.type(LocationType.COUNTRY));
        LocationSet where3 = locationSet.where(LocationFilters.type(LocationType.UNIT));
        List<GeoCoordinate> determineTextScopes = determineTextScopes(str);
        MultiMap<Location, String> createMentionMap = createMentionMap(multiMap);
        HashSet hashSet2 = new HashSet();
        for (Map.Entry entry : multiMap.entrySet()) {
            ClassifiedAnnotation classifiedAnnotation = (ClassifiedAnnotation) entry.getKey();
            Collection<Location> collection = (Collection) entry.getValue();
            if (!collection.isEmpty()) {
                String value = classifiedAnnotation.getValue();
                if (hashSet2.add(value)) {
                    String normalizeName = LocationExtractorUtils.normalizeName(value);
                    LocationSet locationSet3 = new LocationSet(collection);
                    LocationSet where4 = locationSet.where(Filters.not(Filters.equal(collection)));
                    int length = value.length();
                    int length2 = value.split("\\s").length;
                    boolean matches = value.matches("[A-Z]+|([A-Z]\\.)+");
                    String caseSignature = StringHelper.getCaseSignature(normalizeName);
                    double size = 1.0d / collection.size();
                    double largestDistance = locationSet3.largestDistance();
                    boolean z = locationSet3.where(LocationFilters.coordinate()).largestDistance() < ((double) this.setting.getEqualDistance());
                    Map<String, Long> indexCounts = getIndexCounts(normalizeName);
                    for (Location location : collection) {
                        Long l = (Long) CollectionHelper.coalesce(new Long[]{location.getPopulation(), 0L});
                        GeoCoordinate geoCoordinate = (GeoCoordinate) CollectionHelper.coalesce(new GeoCoordinate[]{location.getCoordinate(), GeoCoordinate.NULL});
                        InstanceBuilder instanceBuilder = new InstanceBuilder();
                        instanceBuilder.set("numCharacters", length);
                        instanceBuilder.set("numTokens", length2);
                        instanceBuilder.set("acronym", matches);
                        instanceBuilder.set("caseSignature", caseSignature);
                        createMarkerFeatures(value, instanceBuilder);
                        instanceBuilder.set("locationType", location.getType().toString());
                        instanceBuilder.set("population", l.longValue());
                        if (this.setting.useHierarchyFeatures()) {
                            instanceBuilder.set("hierarchyDepth", location.getAncestorIds().size());
                        }
                        instanceBuilder.set("nameAmbiguity", size);
                        if (this.setting.useHierarchyFeatures()) {
                            instanceBuilder.set("leaf", locationSet3.where(LocationFilters.childOf(location)).size() == 0);
                        }
                        instanceBuilder.set("nameDiversity", 1.0d / location.collectAlternativeNames().size());
                        instanceBuilder.set("geoDiversity", largestDistance);
                        instanceBuilder.set("unique", z);
                        instanceBuilder.set("altMention", ((Collection) createMentionMap.get(location)).size() > 1);
                        if (this.setting.useHierarchyFeatures()) {
                            int size2 = where4.where(LocationFilters.ancestorOf(location)).size();
                            int size3 = where4.where(LocationFilters.childOf(location)).size();
                            int size4 = where4.where(LocationFilters.descendantOf(location)).size();
                            int size5 = where4.where(LocationFilters.parentOf(location)).size();
                            int size6 = where4.where(LocationFilters.siblingOf(location)).size();
                            instanceBuilder.set("contains(ancestor)", size2 > 0);
                            instanceBuilder.set("contains(child)", size3 > 0);
                            instanceBuilder.set("contains(descendant)", size4 > 0);
                            instanceBuilder.set("contains(parent)", size5 > 0);
                            instanceBuilder.set("contains(sibling)", size6 > 0);
                            instanceBuilder.set("num(ancestor)", size2);
                            instanceBuilder.set("num(child)", size3);
                            instanceBuilder.set("num(descendant)", size4);
                            instanceBuilder.set("num(sibling)", size6);
                        }
                        for (int i : this.setting.getDistanceValues()) {
                            LocationSet where5 = where4.where(LocationFilters.radius(geoCoordinate, i));
                            LocationSet where6 = locationSet.where(LocationFilters.radius(geoCoordinate, i));
                            instanceBuilder.set(String.format("numLocIn(%d)", Integer.valueOf(i)), where5.size());
                            instanceBuilder.set(String.format("popIn(%d,true)", Integer.valueOf(i)), where6.totalPopulation());
                            instanceBuilder.set(String.format("popIn(%d,false)", Integer.valueOf(i)), where5.where(Filters.not(Filters.equal(location))).totalPopulation());
                            instanceBuilder.set(String.format("uniqueIn(%d)", Integer.valueOf(i)), locationSet2.where(LocationFilters.radius(geoCoordinate, (double) i)).size() > 0);
                        }
                        for (int i2 : this.setting.getPopulationValues()) {
                            double minDistance = where4.where(LocationFilters.population(i2)).where(Filters.not(Filters.equal(location))).minDistance(geoCoordinate);
                            double minDistance2 = locationSet.where(LocationFilters.population(i2)).minDistance(geoCoordinate);
                            instanceBuilder.set(String.format("distLoc(%d,true)", Integer.valueOf(i2)), minDistance2);
                            instanceBuilder.set(String.format("distLoc(%d,false)", Integer.valueOf(i2)), minDistance);
                            for (int i3 : this.setting.getDistanceValues()) {
                                instanceBuilder.set(String.format("hasLoc(%d,%d,true)", Integer.valueOf(i2), Integer.valueOf(i3)), minDistance2 < ((double) i3));
                                instanceBuilder.set(String.format("hasLoc(%d,%d,false)", Integer.valueOf(i2), Integer.valueOf(i3)), minDistance < ((double) i3));
                            }
                        }
                        instanceBuilder.set("primaryName", value.equalsIgnoreCase(location.getPrimaryName()));
                        if (this.setting.useHierarchyFeatures()) {
                            instanceBuilder.set("inContinent", where.where(LocationFilters.ancestorOf(location)).size() > 0);
                            instanceBuilder.set("inCountry", where2.where(LocationFilters.ancestorOf(location)).size() > 0);
                            instanceBuilder.set("inUnit", where3.where(LocationFilters.ancestorOf(location)).size() > 0);
                        }
                        CategoryEntries categoryEntries = classifiedAnnotation.getCategoryEntries();
                        for (String str2 : this.setting.getEntityCategories()) {
                            instanceBuilder.set(String.format("category(%s)", str2), categoryEntries.getProbability(str2));
                        }
                        for (int i4 = 0; i4 < determineTextScopes.size(); i4++) {
                            GeoCoordinate geoCoordinate2 = determineTextScopes.get(i4);
                            instanceBuilder.set("scopeDistance-" + i4, geoCoordinate.distance(geoCoordinate2 != null ? geoCoordinate2 : GeoCoordinate.NULL));
                        }
                        for (Map.Entry<String, Long> entry2 : indexCounts.entrySet()) {
                            String key = entry2.getKey();
                            instanceBuilder.set(String.format("indexCount(%s)", key), entry2.getValue().longValue());
                            instanceBuilder.set(String.format("indexPopulationQuotient(%s)", key), l.longValue() / (r0.longValue() + 1));
                        }
                        if (this.setting.isDebug()) {
                            instanceBuilder.set("textHash", StringHelper.sha1(str));
                            instanceBuilder.set("annotationOffset", classifiedAnnotation.getStartPosition());
                            instanceBuilder.set("annotationValue", value);
                            instanceBuilder.set("locationId", String.valueOf(location.getId()));
                        }
                        hashSet.add(new ClassifiableLocation(location, instanceBuilder.create()));
                    }
                }
            }
        }
        return hashSet;
    }

    private MultiMap<Location, String> createMentionMap(MultiMap<ClassifiedAnnotation, Location> multiMap) {
        MultiMap<Location, String> createWithSet = DefaultMultiMap.createWithSet();
        for (Map.Entry entry : multiMap.entrySet()) {
            String value = ((ClassifiedAnnotation) entry.getKey()).getValue();
            Iterator it = ((Collection) entry.getValue()).iterator();
            while (it.hasNext()) {
                createWithSet.add((Location) it.next(), value);
            }
        }
        return createWithSet;
    }

    private List<GeoCoordinate> determineTextScopes(String str) {
        ArrayList arrayList = new ArrayList();
        Iterator<ScopeDetector> it = this.setting.getScopeDetectors().iterator();
        while (it.hasNext()) {
            arrayList.add(it.next().getScope(str));
        }
        return arrayList;
    }

    private Map<String, Long> getIndexCounts(String str) {
        HashMap hashMap = new HashMap();
        String format = String.format("\"%s\"", str);
        for (Searcher<?> searcher : this.setting.getIndexSearchers()) {
            try {
                hashMap.put(searcher.getName(), Long.valueOf(searcher.getTotalResultCount(format)));
            } catch (SearcherException e) {
                throw new IllegalStateException(String.format("Error while searching for %s with %s: %s", format, searcher, e.getMessage()), e);
            }
        }
        return hashMap;
    }

    private void createMarkerFeatures(String str, InstanceBuilder instanceBuilder) {
        if (this.setting.getLocationMarkers().length == 0) {
            return;
        }
        boolean z = false;
        for (String str2 : this.setting.getLocationMarkers()) {
            boolean containsWord = StringHelper.containsWord(str2, str);
            instanceBuilder.set("marker(" + str2.toLowerCase() + ")", containsWord);
            z |= containsWord;
        }
        instanceBuilder.set("marker(*)", z);
    }

    private Set<Location> getUniqueLocations(Collection<Collection<Location>> collection) {
        HashSet hashSet = new HashSet();
        for (Collection<Location> collection2 : collection) {
            if (new LocationSet(collection2).where(LocationFilters.coordinate()).largestDistance() < this.setting.getEqualDistance()) {
                hashSet.addAll(collection2);
            }
        }
        return hashSet;
    }

    public String toString() {
        return "ConfigurableFeatureExtractor [" + this.setting + "]";
    }
}
