package pl.edu.icm.coansys.similarity.pig.udf;

import java.io.IOException;
import java.util.HashSet;
import org.apache.pig.EvalFunc;
import org.apache.pig.data.DataBag;
import org.apache.pig.data.DataByteArray;
import org.apache.pig.data.DefaultDataBag;
import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;
import org.apache.pig.tools.pigstats.PigStatusReporter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import pl.edu.icm.coansys.commons.java.DiacriticsRemover;
import pl.edu.icm.coansys.commons.java.StackTraceExtractor;
import pl.edu.icm.coansys.models.DocumentProtos;

/* loaded from: input_file:pl/edu/icm/coansys/similarity/pig/udf/DocSimDemo_Authors.class */
public class DocSimDemo_Authors extends EvalFunc<DataBag> {
    private static final Logger logger = LoggerFactory.getLogger(DocSimDemo_Authors.class);
    PigStatusReporter myreporter;

    /* renamed from: exec, reason: merged with bridge method [inline-methods] */
    public DataBag m10exec(Tuple tuple) throws IOException {
        String str;
        this.myreporter = PigStatusReporter.getInstance();
        if (tuple == null || tuple.size() == 0) {
            return null;
        }
        try {
            TupleFactory tupleFactory = TupleFactory.getInstance();
            try {
                try {
                    DocumentProtos.DocumentMetadata documentMetadata = DocumentProtos.DocumentWrapper.parseFrom(((DataByteArray) tuple.get(0)).get()).getDocumentMetadata();
                    String trim = documentMetadata.getBasicMetadata().getDoi().replaceAll("\\s++", " ").trim();
                    if (trim.length() == 0) {
                        throw new Exception("Lack of doi");
                    }
                    DefaultDataBag defaultDataBag = new DefaultDataBag();
                    int i = 0;
                    HashSet hashSet = new HashSet();
                    for (DocumentProtos.Author author : documentMetadata.getBasicMetadata().getAuthorList()) {
                        try {
                            String surname = author.getSurname();
                            String forenames = author.getForenames();
                            str = null;
                            if (surname != null && !surname.trim().isEmpty() && forenames != null && !forenames.trim().isEmpty()) {
                                String trim2 = DiacriticsRemover.removeDiacritics(surname).replaceAll("[^A-Za-z]", " ").replaceAll("\\s++", " ").trim();
                                String trim3 = DiacriticsRemover.removeDiacritics(forenames).replaceAll("[^A-Za-z]", " ").replaceAll("\\s++", " ").trim();
                                str = trim3.length() > 1 ? trim2 + ", " + trim3.trim().substring(0, 1) + "." : trim2;
                            }
                        } catch (Exception e) {
                            this.log.debug(StackTraceExtractor.getStackTrace(e));
                        }
                        if (str != null && !hashSet.contains(str)) {
                            Tuple newTuple = tupleFactory.newTuple();
                            newTuple.append(trim);
                            newTuple.append(Integer.valueOf(i));
                            newTuple.append(str);
                            defaultDataBag.add(newTuple);
                            i++;
                        }
                    }
                    return defaultDataBag;
                } catch (Exception e2) {
                    this.myreporter.getCounter("extraction problems [Auth]", "document metadata | lack of doi").increment(1L);
                    return null;
                }
            } catch (Exception e3) {
                this.myreporter.getCounter("extraction problems [Auth]", "DataByteArray from tuple").increment(1L);
                return null;
            }
        } catch (Exception e4) {
            logger.debug(StackTraceExtractor.getStackTrace(e4));
            throw new IOException(e4);
        }
        logger.debug(StackTraceExtractor.getStackTrace(e4));
        throw new IOException(e4);
    }
}
