package ws.palladian.retrieval.wiki;

import java.util.concurrent.TimeUnit;
import org.apache.commons.lang3.Validate;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
import ws.palladian.classification.text.PalladianTextClassifier;
import ws.palladian.helper.StopWatch;
import ws.palladian.helper.functional.Consumer;

/* JADX INFO: Access modifiers changed from: package-private */
/* loaded from: input_file:ws/palladian/retrieval/wiki/MediaWikiPageContentHandler.class */
public class MediaWikiPageContentHandler extends DefaultHandler {
    private static final Logger LOGGER = LoggerFactory.getLogger(MediaWikiPageContentHandler.class);
    private int pageCounter;
    private final StopWatch stopWatch;
    private final Consumer<WikiPage> callback;
    private StringBuilder buffer = new StringBuilder();
    private boolean bufferText = false;
    private boolean inRevision = false;
    private String title;
    private int pageId;
    private int namespaceId;
    private String text;

    /* JADX INFO: Access modifiers changed from: package-private */
    public MediaWikiPageContentHandler(Consumer<WikiPage> consumer) {
        Validate.notNull(consumer, "callback must not be null", new Object[0]);
        this.callback = consumer;
        this.stopWatch = new StopWatch();
    }

    @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
    public void startElement(String str, String str2, String str3, Attributes attributes) throws SAXException {
        if (str3.equals(PalladianTextClassifier.VECTOR_TEXT_IDENTIFIER) || str3.equals("title") || str3.equals("ns") || (str3.equals("id") && !this.inRevision)) {
            this.bufferText = true;
        }
        if (str3.equals("revision")) {
            this.inRevision = true;
        }
    }

    @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
    public void endElement(String str, String str2, String str3) throws SAXException {
        if (str3.equals("revision")) {
            this.inRevision = false;
            return;
        }
        if (str3.equals("id") && !this.inRevision) {
            this.pageId = Integer.parseInt(getBuffer());
            return;
        }
        if (str3.equals(PalladianTextClassifier.VECTOR_TEXT_IDENTIFIER)) {
            this.text = getBuffer();
            return;
        }
        if (str3.equals("title")) {
            this.title = getBuffer();
        } else if (str3.equals("ns")) {
            this.namespaceId = Integer.parseInt(getBuffer());
        } else if (str3.equals("page")) {
            processPage();
        }
    }

    private void processPage() {
        int i = this.pageCounter + 1;
        this.pageCounter = i;
        if (i % 1000 == 0) {
            LOGGER.debug("Processed {} pages, throughput {} pages/second.", Integer.valueOf(this.pageCounter), Integer.valueOf(Math.round(this.pageCounter / ((float) TimeUnit.MILLISECONDS.toSeconds(this.stopWatch.getElapsedTime())))));
        }
        this.callback.process(new WikiPage(this.pageId, this.namespaceId, this.title, this.text));
    }

    @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
    public void characters(char[] cArr, int i, int i2) throws SAXException {
        if (this.bufferText) {
            this.buffer.append(cArr, i, i2);
        }
    }

    private String getBuffer() {
        try {
            return this.buffer.toString();
        } finally {
            this.buffer = new StringBuilder();
            this.bufferText = false;
        }
    }
}
