package org.apache.mahout.classifier.bayes;

import java.io.IOException;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.io.DataOutputBuffer;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileSplit;
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.RecordReader;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.TextInputFormat;

/* loaded from: input_file:org/apache/mahout/classifier/bayes/XmlInputFormat.class */
public class XmlInputFormat extends TextInputFormat {
    public static final String START_TAG_KEY = "xmlinput.start";
    public static final String END_TAG_KEY = "xmlinput.end";

    /* loaded from: input_file:org/apache/mahout/classifier/bayes/XmlInputFormat$XmlRecordReader.class */
    public static class XmlRecordReader implements RecordReader<LongWritable, Text> {
        private final byte[] startTag;
        private final byte[] endTag;
        private final long start;
        private final long end;
        private final FSDataInputStream fsin;
        private final DataOutputBuffer buffer = new DataOutputBuffer();

        public XmlRecordReader(FileSplit fileSplit, JobConf jobConf) throws IOException {
            this.startTag = jobConf.get(XmlInputFormat.START_TAG_KEY).getBytes("utf-8");
            this.endTag = jobConf.get(XmlInputFormat.END_TAG_KEY).getBytes("utf-8");
            this.start = fileSplit.getStart();
            this.end = this.start + fileSplit.getLength();
            this.fsin = fileSplit.getPath().getFileSystem(jobConf).open(fileSplit.getPath());
            this.fsin.seek(this.start);
        }

        public boolean next(LongWritable longWritable, Text text) throws IOException {
            if (this.fsin.getPos() >= this.end || !readUntilMatch(this.startTag, false)) {
                return false;
            }
            try {
                this.buffer.write(this.startTag);
                if (!readUntilMatch(this.endTag, true)) {
                    this.buffer.reset();
                    return false;
                }
                longWritable.set(this.fsin.getPos());
                text.set(this.buffer.getData(), 0, this.buffer.getLength());
                this.buffer.reset();
                return true;
            } catch (Throwable th) {
                this.buffer.reset();
                throw th;
            }
        }

        /* renamed from: createKey, reason: merged with bridge method [inline-methods] */
        public LongWritable m14createKey() {
            return new LongWritable();
        }

        /* renamed from: createValue, reason: merged with bridge method [inline-methods] */
        public Text m13createValue() {
            return new Text();
        }

        public long getPos() throws IOException {
            return this.fsin.getPos();
        }

        public void close() throws IOException {
            this.fsin.close();
        }

        public float getProgress() throws IOException {
            return ((float) (this.fsin.getPos() - this.start)) / ((float) (this.end - this.start));
        }

        private boolean readUntilMatch(byte[] bArr, boolean z) throws IOException {
            int i = 0;
            while (true) {
                int read = this.fsin.read();
                if (read == -1) {
                    return false;
                }
                if (z) {
                    this.buffer.write(read);
                }
                if (read == bArr[i]) {
                    i++;
                    if (i >= bArr.length) {
                        return true;
                    }
                } else {
                    i = 0;
                }
                if (!z && i == 0 && this.fsin.getPos() >= this.end) {
                    return false;
                }
            }
        }
    }

    public RecordReader<LongWritable, Text> getRecordReader(InputSplit inputSplit, JobConf jobConf, Reporter reporter) throws IOException {
        return new XmlRecordReader((FileSplit) inputSplit, jobConf);
    }
}
