package ws.palladian.extraction.entity.tagger;

import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Node;
import ws.palladian.core.Annotation;
import ws.palladian.core.ImmutableAnnotation;
import ws.palladian.extraction.entity.Annotations;
import ws.palladian.extraction.entity.NamedEntityRecognizer;
import ws.palladian.helper.html.XPathHelper;
import ws.palladian.retrieval.HttpException;
import ws.palladian.retrieval.HttpRequest;
import ws.palladian.retrieval.HttpResult;
import ws.palladian.retrieval.HttpRetriever;
import ws.palladian.retrieval.HttpRetrieverFactory;
import ws.palladian.retrieval.parser.DocumentParser;
import ws.palladian.retrieval.parser.ParserException;
import ws.palladian.retrieval.parser.ParserFactory;

/* loaded from: input_file:ws/palladian/extraction/entity/tagger/DigmapNer.class */
public class DigmapNer extends NamedEntityRecognizer {
    private static final String NER_NAME = "Digmap NER";
    private static final int MAXIMUM_TEXT_LENGTH = 10000;
    private static final String XML_REQUEST_TEMPLATE = "<?xml version=\"1.0\" encoding=\"UTF-8\"?><GetFeature xmlns=\"http://www.opengis.net/gp\" xmlns:wfs=\"http://www.opengis.net/wfs\" xmlns:xsi=\"http://www.w3.org/2000/10/XMLSchema-instance\" xsi:schemaLocation=\"http://www.opengis.net/gp ../gp/GetFeatureRequest.xsd http://www.opengis.net/wfs ../wfs/GetFeatureRequest.xsd\" wfs:outputFormat=\"GML2\"><wfs:Query wfs:TypeName=\"PlaceName\" /><wfs:Query wfs:TypeName=\"DateTime\" /><wfs:Query wfs:TypeName=\"People\" /><wfs:Query wfs:TypeName=\"Organizations\" /><Resource mime=\"text/plain\">%s</Resource></GetFeature>";
    private final HttpRetriever httpRetriever = HttpRetrieverFactory.getHttpRetriever();
    private final DocumentParser xmlParser = ParserFactory.createXmlParser();
    private static final Logger LOGGER = LoggerFactory.getLogger(DigmapNer.class);
    private static final Map<String, String> NAMESPACE_MAPPING = new HashMap();

    @Override // ws.palladian.extraction.entity.NamedEntityRecognizer, ws.palladian.core.Tagger
    public Annotations<Annotation> getAnnotations(String str) {
        Annotations<Annotation> annotations = new Annotations<>();
        List<String> createSentenceChunks = NerHelper.createSentenceChunks(str.replace("&", "+"), MAXIMUM_TEXT_LENGTH);
        LOGGER.debug("Sending {} text chunks, total text length {}", Integer.valueOf(createSentenceChunks.size()), Integer.valueOf(str.length()));
        for (String str2 : createSentenceChunks) {
            HttpResult httpResult = null;
            try {
                httpResult = getHttpResult(str2);
                for (Node node : XPathHelper.getNodes(this.xmlParser.parse(httpResult), "//gp:EntryCollection/*", NAMESPACE_MAPPING)) {
                    String textContent = XPathHelper.getNode(node, "./gp:Label/text()", NAMESPACE_MAPPING).getTextContent();
                    Node node2 = XPathHelper.getNode(node, "./gp:Ocurrence/gp:Range/@start", NAMESPACE_MAPPING);
                    Node node3 = XPathHelper.getNode(node, "./gp:Ocurrence/gp:Range/@end", NAMESPACE_MAPPING);
                    int parseInt = Integer.parseInt(node2.getTextContent());
                    annotations.add(new ImmutableAnnotation(parseInt, str2.substring(parseInt, Integer.parseInt(node3.getTextContent())), textContent));
                }
            } catch (ParserException e) {
                throw new IllegalStateException("Error while parsing the result XML: " + e.getMessage() + ", XML content was: " + httpResult.getStringContent(), e);
            } catch (HttpException e2) {
                throw new IllegalStateException("Error while performing HTTP request: " + e2.getMessage(), e2);
            }
        }
        annotations.sort();
        return annotations;
    }

    private HttpResult getHttpResult(String str) throws HttpException {
        HttpRequest httpRequest = new HttpRequest(HttpRequest.HttpMethod.POST, "http://geoparser.digmap.eu/geoparser-dispatch");
        httpRequest.addHeader("Accept", "application/xml");
        httpRequest.addHeader("Content-type", "application/x-www-form-urlencoded");
        httpRequest.addParameter("request", String.format(XML_REQUEST_TEMPLATE, str));
        httpRequest.addParameter("button", "GeoParse");
        return this.httpRetriever.execute(httpRequest);
    }

    @Override // ws.palladian.extraction.entity.NamedEntityRecognizer
    public String getName() {
        return NER_NAME;
    }

    public static void main(String[] strArr) {
        System.out.println(new DigmapNer().tag("John J. Smith and the Nexus One location mention Seattle in the text John J. Smith lives in Seattle. He wants to buy an iPhone 4 or a Samsung i7110 phone."));
    }

    static {
        NAMESPACE_MAPPING.put("gp", "http://www.opengis.net/gp");
    }
}
