package org.apache.crunch.contrib.text;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

/* loaded from: input_file:org/apache/crunch/contrib/text/AbstractSimpleExtractor.class */
public abstract class AbstractSimpleExtractor<T> implements Extractor<T> {
    private static final Log LOG = LogFactory.getLog(AbstractSimpleExtractor.class);
    private static final int LOG_ERROR_LIMIT = 100;
    private int errors;
    private boolean errorOnLast;
    private final T defaultValue;
    private final TokenizerFactory scannerFactory;

    /* JADX INFO: Access modifiers changed from: protected */
    public AbstractSimpleExtractor(T t) {
        this(t, TokenizerFactory.getDefaultInstance());
    }

    protected AbstractSimpleExtractor(T t, TokenizerFactory tokenizerFactory) {
        this.defaultValue = t;
        this.scannerFactory = tokenizerFactory;
    }

    @Override // org.apache.crunch.contrib.text.Extractor
    public void initialize() {
        this.errors = 0;
        this.errorOnLast = false;
    }

    @Override // org.apache.crunch.contrib.text.Extractor
    public T extract(String str) {
        this.errorOnLast = false;
        T t = this.defaultValue;
        try {
            t = doExtract(this.scannerFactory.create(str));
        } catch (Exception e) {
            this.errorOnLast = true;
            this.errors++;
            if (this.errors < LOG_ERROR_LIMIT) {
                LOG.error(String.format("Error occurred parsing input '%s' using extractor %s", str, this));
            }
        }
        return t;
    }

    @Override // org.apache.crunch.contrib.text.Extractor
    public boolean errorOnLastRecord() {
        return this.errorOnLast;
    }

    @Override // org.apache.crunch.contrib.text.Extractor
    public T getDefaultValue() {
        return this.defaultValue;
    }

    @Override // org.apache.crunch.contrib.text.Extractor
    public ExtractorStats getStats() {
        return new ExtractorStats(this.errors);
    }

    protected abstract T doExtract(Tokenizer tokenizer);
}
