package pl.edu.icm.coansys.output.merge.citations;

import java.io.IOException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.pig.EvalFunc;
import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;

/* loaded from: input_file:pl/edu/icm/coansys/output/merge/citations/ReadMatchedCitationsUDF.class */
public class ReadMatchedCitationsUDF extends EvalFunc<Tuple> {
    private TupleFactory tupleFactory = TupleFactory.getInstance();
    private Pattern srcDocPattern = Pattern.compile("cit_(.*)_(\\d+)");
    private Pattern refDocPattern = Pattern.compile("[\\d\\.]+:doc_(.*)");
    private Matcher matcher;

    /* renamed from: exec, reason: merged with bridge method [inline-methods] */
    public Tuple m2exec(Tuple tuple) throws IOException {
        if (tuple == null || tuple.size() != 2 || tuple.getType(0) != 55 || tuple.getType(1) != 55) {
            throw new IOException("" + getClass().getName() + " expects 2 argument, both chararrays");
        }
        String str = (String) tuple.get(0);
        String str2 = (String) tuple.get(1);
        Tuple newTuple = this.tupleFactory.newTuple();
        this.matcher = this.srcDocPattern.matcher(str);
        if (!this.matcher.find()) {
            throw new IOException("cannot extract src doc id and reference number from " + str);
        }
        newTuple.append(this.matcher.group(1));
        newTuple.append(this.matcher.group(2));
        this.matcher = this.refDocPattern.matcher(str2);
        if (!this.matcher.find()) {
            throw new IOException("cannot extract reference doc id from " + str2);
        }
        newTuple.append(this.matcher.group(1));
        return newTuple;
    }
}
