package pl.edu.icm.cermine.parsing.tools;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import org.jdom.Element;
import org.jdom.JDOMException;
import org.jdom.Text;
import org.jdom.filter.Filter;
import org.jdom.input.SAXBuilder;
import org.xml.sax.InputSource;
import pl.edu.icm.cermine.parsing.model.ParsableString;
import pl.edu.icm.cermine.parsing.model.Token;

/* loaded from: input_file:WEB-INF/lib/cermine-impl-1.11-SNAPSHOT.jar:pl/edu/icm/cermine/parsing/tools/NLMParsableStringExtractor.class */
public abstract class NLMParsableStringExtractor<L, T extends Token<L>, P extends ParsableString<T>> {
    protected abstract List<String> getTags();

    protected abstract String getKeyText();

    protected abstract Map<String, L> getTagLabelMap();

    protected abstract P createParsableString();

    protected abstract P createParsableString(String str);

    public List<P> extractStrings(InputSource inputSource) throws JDOMException, IOException {
        SAXBuilder sAXBuilder = new SAXBuilder("org.apache.xerces.parsers.SAXParser");
        sAXBuilder.setValidation(false);
        sAXBuilder.setFeature("http://xml.org/sax/features/validation", false);
        sAXBuilder.setFeature("http://apache.org/xml/features/nonvalidating/load-dtd-grammar", false);
        sAXBuilder.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
        Iterator descendants = sAXBuilder.build(inputSource).getDescendants(new Filter() { // from class: pl.edu.icm.cermine.parsing.tools.NLMParsableStringExtractor.1
            @Override // org.jdom.filter.Filter
            public boolean matches(Object obj) {
                return (obj instanceof Element) && NLMParsableStringExtractor.this.getTags().contains(((Element) obj).getName());
            }
        });
        ArrayList arrayList = new ArrayList();
        while (descendants.hasNext()) {
            P createParsableString = createParsableString();
            readElement((Element) descendants.next(), createParsableString);
            arrayList.add(createParsableString);
        }
        return arrayList;
    }

    private void readElement(Element element, P p) {
        for (Object obj : element.getContent()) {
            if (obj instanceof Text) {
                String text = ((Text) obj).getText();
                if (text.matches("^[\\s]*$")) {
                    p.appendText(" ");
                } else {
                    for (Token token : createParsableString(text).getTokens()) {
                        token.setStartIndex(token.getStartIndex() + p.getRawText().length());
                        token.setEndIndex(token.getEndIndex() + p.getRawText().length());
                        token.setLabel(getTagLabelMap().get(getKeyText()));
                        p.addToken(token);
                    }
                    p.appendText(text);
                }
            } else if (obj instanceof Element) {
                Element element2 = (Element) obj;
                String name = element2.getName();
                if (getTagLabelMap().containsKey(name)) {
                    for (Token token2 : createParsableString(element2.getValue()).getTokens()) {
                        token2.setStartIndex(token2.getStartIndex() + p.getRawText().length());
                        token2.setEndIndex(token2.getEndIndex() + p.getRawText().length());
                        token2.setLabel(getTagLabelMap().get(name));
                        p.addToken(token2);
                    }
                    p.appendText(element2.getValue());
                } else {
                    readElement(element2, p);
                }
            }
        }
        p.clean();
    }
}
