package com.marklogic.mapreduce.examples;

import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import net.sf.saxon.s9api.Processor;
import net.sf.saxon.s9api.SaxonApiException;
import net.sf.saxon.s9api.XPathCompiler;
import net.sf.saxon.s9api.XPathSelector;
import net.sf.saxon.s9api.XdmItem;
import net.sf.saxon.s9api.XdmNode;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.xml.sax.SAXException;

/* JADX INFO: Access modifiers changed from: package-private */
/* compiled from: LinkCountHDFS.java */
/* loaded from: input_file:com/marklogic/mapreduce/examples/LinkRecordReader.class */
public class LinkRecordReader extends RecordReader<IntWritable, Text> {
    private static final String PATH_EXPRESSION = "//wp:a[@title and @href and not (starts-with(@href, '#') or starts-with(@href, 'http://') or starts-with(@href, 'File:')  or starts-with(@href, 'Image:'))]/@title";
    private IntWritable key;
    private Text value;
    private int count = 0;
    private List<XdmItem> items;
    private static final ThreadLocal<DocumentBuilder> builderLocal = new ThreadLocal<DocumentBuilder>() { // from class: com.marklogic.mapreduce.examples.LinkRecordReader.1
        /* JADX INFO: Access modifiers changed from: protected */
        /* JADX WARN: Can't rename method to resolve collision */
        @Override // java.lang.ThreadLocal
        public DocumentBuilder initialValue() {
            try {
                return DocumentBuilderFactory.newInstance().newDocumentBuilder();
            } catch (ParserConfigurationException e) {
                e.printStackTrace();
                return null;
            }
        }
    };
    private static Processor proc = new Processor(false);
    private static final ThreadLocal<net.sf.saxon.s9api.DocumentBuilder> saxonBuilderLocal = new ThreadLocal<net.sf.saxon.s9api.DocumentBuilder>() { // from class: com.marklogic.mapreduce.examples.LinkRecordReader.2
        /* JADX INFO: Access modifiers changed from: protected */
        /* JADX WARN: Can't rename method to resolve collision */
        @Override // java.lang.ThreadLocal
        public net.sf.saxon.s9api.DocumentBuilder initialValue() {
            return LinkRecordReader.proc.newDocumentBuilder();
        }
    };

    LinkRecordReader() {
    }

    public void close() throws IOException {
    }

    /* renamed from: getCurrentKey, reason: merged with bridge method [inline-methods] */
    public IntWritable m13getCurrentKey() throws IOException, InterruptedException {
        return this.key;
    }

    /* renamed from: getCurrentValue, reason: merged with bridge method [inline-methods] */
    public Text m12getCurrentValue() throws IOException, InterruptedException {
        return this.value;
    }

    public float getProgress() throws IOException, InterruptedException {
        if (this.items != null) {
            return this.count / this.items.size();
        }
        return 0.0f;
    }

    public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
        Path path = ((FileSplit) inputSplit).getPath();
        InputStream open = path.getFileSystem(taskAttemptContext.getConfiguration()).open(path);
        try {
            try {
                try {
                    XdmNode wrap = saxonBuilderLocal.get().wrap(builderLocal.get().parse(open));
                    XPathCompiler newXPathCompiler = proc.newXPathCompiler();
                    newXPathCompiler.declareNamespace("wp", "http://www.mediawiki.org/xml/export-0.4/");
                    XPathSelector load = newXPathCompiler.compile(PATH_EXPRESSION).load();
                    load.setContextItem(wrap);
                    this.items = new ArrayList();
                    Iterator it = load.iterator();
                    while (it.hasNext()) {
                        this.items.add((XdmItem) it.next());
                    }
                    if (open != null) {
                        open.close();
                    }
                } catch (SaxonApiException e) {
                    e.printStackTrace();
                    if (open != null) {
                        open.close();
                    }
                }
            } catch (SAXException e2) {
                e2.printStackTrace();
                throw new IOException(e2);
            }
        } catch (Throwable th) {
            if (open != null) {
                open.close();
            }
            throw th;
        }
    }

    public boolean nextKeyValue() throws IOException, InterruptedException {
        if (this.items.size() <= this.count) {
            return false;
        }
        if (this.key == null) {
            this.key = new IntWritable();
        }
        this.key.set(this.count);
        if (this.value == null) {
            this.value = new Text();
        }
        Text text = this.value;
        List<XdmItem> list = this.items;
        int i = this.count;
        this.count = i + 1;
        text.set(list.get(i).getStringValue());
        return true;
    }
}
