package pl.edu.icm.coansys.disambiguation.author.pig.extractor;

import com.beust.jcommander.JCommander;
import com.beust.jcommander.Parameter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.UUID;
import org.apache.hadoop.mapreduce.Counter;
import org.apache.pig.EvalFunc;
import org.apache.pig.data.DataBag;
import org.apache.pig.data.DataByteArray;
import org.apache.pig.data.DefaultDataBag;
import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;
import org.apache.pig.impl.logicalLayer.FrontendException;
import org.apache.pig.impl.logicalLayer.schema.Schema;
import org.apache.pig.tools.pigstats.PigStatusReporter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import pl.edu.icm.coansys.commons.java.StackTraceExtractor;
import pl.edu.icm.coansys.disambiguation.author.features.extractors.DisambiguationExtractorFactory;
import pl.edu.icm.coansys.disambiguation.author.features.extractors.indicators.DisambiguationExtractor;
import pl.edu.icm.coansys.disambiguation.author.features.extractors.indicators.DisambiguationExtractorAuthor;
import pl.edu.icm.coansys.disambiguation.author.features.extractors.indicators.DisambiguationExtractorDocument;
import pl.edu.icm.coansys.disambiguation.features.FeatureInfo;
import pl.edu.icm.coansys.models.DocumentProtos;

/* loaded from: input_file:pl/edu/icm/coansys/disambiguation/author/pig/extractor/EXTRACT_CONTRIBDATA_GIVENDATA.class */
public class EXTRACT_CONTRIBDATA_GIVENDATA extends EvalFunc<DataBag> {
    private static final Logger logger = LoggerFactory.getLogger(EXTRACT_CONTRIBDATA_GIVENDATA.class);
    private List<DisambiguationExtractorDocument> des4Doc = new ArrayList();
    private List<DisambiguationExtractorAuthor> des4Author = new ArrayList();
    private List<String> des4DocNameOrId = new ArrayList();
    private List<String> des4AuthorNameOrId = new ArrayList();

    @Parameter(names = {"-lang", "-language"}, description = "Filter metadata by language", converter = LangConverter.class)
    private String language = null;

    @Parameter(names = {"-skipEmptyFeatures"}, arity = REPORTER_CONST.EXIST, description = "Skip contributor's features, when feature bag is empty (no data for feature).")
    private boolean skipEmptyFeatures = false;

    @Parameter(names = {"-snameToString"}, arity = REPORTER_CONST.EXIST, description = "Does not normalize surname used to blocking when true. Use only for debuging.")
    private boolean snameToString = false;

    @Parameter(names = {"-useIdsForExtractors"}, arity = REPORTER_CONST.EXIST, description = "Use short ids for extractors (features) names in temporary sequance files.")
    private boolean useIdsForExtractors = false;

    @Parameter(names = {"-returnNull"}, arity = REPORTER_CONST.EXIST, description = "Return null data bag after processing. Use only for debuging.")
    private boolean returnNull = false;

    @Parameter(names = {"-featureinfo", "-featureInfo"}, required = true, description = "Features description - model for calculating affinity and contributors clustering.")
    private String featureinfo = null;
    private DisambiguationExtractorFactory extrFactory = new DisambiguationExtractorFactory();
    private PigStatusReporter myreporter = null;
    private Counter[][] counters4Doc;
    private Counter[][] counters4Author;
    private Counter[] counterNormalizedSname;
    private Counter[] counterOriginalSname;
    private Counter countersExist;

    /* loaded from: input_file:pl/edu/icm/coansys/disambiguation/author/pig/extractor/EXTRACT_CONTRIBDATA_GIVENDATA$REPORTER_CONST.class */
    static class REPORTER_CONST {
        public static final String CONTRIB_EX = "Contrib_Existing";
        public static final String CONTRIB_MS = "Contrib_Missing";
        public static final String DOC_EX = "Doc_Existing";
        public static final String DOC_MS = "Doc_Missing";
        public static final int MISS = 0;
        public static final int EXIST = 1;

        REPORTER_CONST() {
        }
    }

    public Schema outputSchema(Schema schema) {
        try {
            return Schema.generateNestedSchema((byte) 120, new byte[0]);
        } catch (FrontendException e) {
            logger.error("Error in creating output schema:", e);
            throw new IllegalStateException((Throwable) e);
        }
    }

    private void setDisambiguationExtractor(String str) throws InstantiationException, IllegalAccessException, ClassNotFoundException {
        Class<?> cls;
        if (str == null || str.isEmpty()) {
            throw new IllegalArgumentException("FeatureInfo model is required");
        }
        List parseFeatureInfoString = FeatureInfo.parseFeatureInfoString(this.featureinfo);
        String simpleName = new DisambiguationExtractorDocument().getClass().getSimpleName();
        String simpleName2 = new DisambiguationExtractorAuthor().getClass().getSimpleName();
        for (int i = 0; i < parseFeatureInfoString.size(); i++) {
            DisambiguationExtractor create = this.extrFactory.create((FeatureInfo) parseFeatureInfoString.get(i));
            Class<?> cls2 = create.getClass();
            while (true) {
                cls = cls2;
                if (!cls.getSimpleName().startsWith("EX_")) {
                    break;
                } else {
                    cls2 = cls.getSuperclass();
                }
            }
            String simpleName3 = cls.getSimpleName();
            String exId = this.useIdsForExtractors ? this.extrFactory.toExId(create.getClass().getSimpleName()) : create.getClass().getSimpleName();
            if (simpleName3.equals(simpleName)) {
                this.des4Doc.add((DisambiguationExtractorDocument) create);
                this.des4DocNameOrId.add(exId);
            } else {
                if (!simpleName3.equals(simpleName2)) {
                    String str2 = "Cannot create extractor: " + create.getClass().getSimpleName() + ". Its superclass: " + simpleName3 + " does not match to any superclass.";
                    logger.error(str2);
                    throw new ClassNotFoundException(str2);
                }
                this.des4Author.add((DisambiguationExtractorAuthor) create);
                this.des4AuthorNameOrId.add(exId);
            }
        }
    }

    public EXTRACT_CONTRIBDATA_GIVENDATA(String str) throws InstantiationException, IllegalAccessException, ClassNotFoundException {
        new JCommander(this, str.split(" "));
        setDisambiguationExtractor(this.featureinfo);
    }

    public Map<String, Object> debugComponents() {
        HashMap hashMap = new HashMap();
        if (this.language != null) {
            hashMap.put("-lang", this.language);
        }
        if (this.skipEmptyFeatures) {
            hashMap.put("-skipEmptyFeatures", Boolean.valueOf(this.skipEmptyFeatures));
        }
        if (this.snameToString) {
            hashMap.put("-snameToString", Boolean.valueOf(this.snameToString));
        }
        if (this.useIdsForExtractors) {
            hashMap.put("-useIdsForExtractors", Boolean.valueOf(this.useIdsForExtractors));
        }
        if (this.returnNull) {
            hashMap.put("-returnNull", Boolean.valueOf(this.returnNull));
        }
        if (this.featureinfo != null) {
            hashMap.put("-featureinfo", this.featureinfo);
        }
        return hashMap;
    }

    /* renamed from: exec, reason: merged with bridge method [inline-methods] */
    public DataBag m29exec(Tuple tuple) throws IOException {
        initializePigReporterWithZeroes();
        if (tuple == null || tuple.size() == 0) {
            return null;
        }
        try {
            DocumentProtos.DocumentMetadata documentMetadata = DocumentProtos.DocumentWrapper.parseFrom(((DataByteArray) tuple.get(0)).get()).getDocumentMetadata();
            String key = documentMetadata.getKey();
            DefaultDataBag defaultDataBag = new DefaultDataBag();
            List authorList = documentMetadata.getBasicMetadata().getAuthorList();
            reportAuthors(authorList);
            if (authorList.isEmpty()) {
                return defaultDataBag;
            }
            Map<String, DataBag> extractDocBasedFeatures = extractDocBasedFeatures(documentMetadata);
            DisambiguationExtractor disambiguationExtractor = new DisambiguationExtractor();
            int i = -1;
            for (DocumentProtos.Author author : authorList) {
                i++;
                Object lowerCase = this.snameToString ? author.getSurname().toLowerCase() : disambiguationExtractor.normalizeExtracted(author.getSurname());
                reportSname(author.getSurname(), lowerCase);
                defaultDataBag.add(TupleFactory.getInstance().newTuple(Arrays.asList(key, UUID.nameUUIDFromBytes(author.toByteArray()).toString(), lowerCase, extractAuthBasedFeatures(documentMetadata, extractDocBasedFeatures, i))));
            }
            if (this.returnNull) {
                return null;
            }
            return defaultDataBag;
        } catch (Exception e) {
            logger.error("Error in processing input row:", e);
            throw new IOException("Caught exception processing input row:\n" + StackTraceExtractor.getStackTrace(e));
        }
    }

    private Map<String, DataBag> extractAuthBasedFeatures(DocumentProtos.DocumentMetadata documentMetadata, Map<String, DataBag> map, int i) {
        HashMap hashMap = new HashMap(map);
        for (int i2 = 0; i2 < this.des4Author.size(); i2++) {
            DataBag extract = this.des4Author.get(i2).extract(documentMetadata, i, this.language);
            reportAuthorDataExistance(extract, i2);
            if (extract != null && (extract.size() != 0 || !this.skipEmptyFeatures)) {
                hashMap.put(this.des4AuthorNameOrId.get(i2), extract);
            }
        }
        return hashMap;
    }

    private Map<String, DataBag> extractDocBasedFeatures(DocumentProtos.DocumentMetadata documentMetadata) {
        HashMap hashMap = new HashMap();
        for (int i = 0; i < this.des4Doc.size(); i++) {
            DataBag extract = this.des4Doc.get(i).extract(documentMetadata, this.language);
            raportDocumentDataExistance(extract, i);
            if (extract != null && (extract.size() != 0 || !this.skipEmptyFeatures)) {
                hashMap.put(this.des4DocNameOrId.get(i), extract);
            }
        }
        return hashMap;
    }

    private void initializePigReporterWithZeroes() {
        this.myreporter = PigStatusReporter.getInstance();
        this.counters4Doc = new Counter[this.des4Doc.size()][2];
        this.counters4Author = new Counter[this.des4Author.size()][2];
        this.counterNormalizedSname = new Counter[2];
        this.counterOriginalSname = new Counter[2];
        this.countersExist = this.myreporter.getCounter("unused", "unused");
        if (this.countersExist == null) {
            return;
        }
        for (int i = 0; i < this.des4Doc.size(); i++) {
            this.counters4Doc[i][0] = this.myreporter.getCounter(REPORTER_CONST.DOC_MS, this.des4Doc.get(i).getClass().getSimpleName());
            this.counters4Doc[i][1] = this.myreporter.getCounter(REPORTER_CONST.DOC_EX, this.des4Doc.get(i).getClass().getSimpleName());
            this.counters4Doc[i][0].increment(0L);
            this.counters4Doc[i][1].increment(0L);
        }
        for (int i2 = 0; i2 < this.des4Author.size(); i2++) {
            this.counters4Author[i2][0] = this.myreporter.getCounter(REPORTER_CONST.CONTRIB_MS, this.des4Author.get(i2).getClass().getSimpleName());
            this.counters4Author[i2][1] = this.myreporter.getCounter(REPORTER_CONST.CONTRIB_EX, this.des4Author.get(i2).getClass().getSimpleName());
            this.counters4Author[i2][0].increment(0L);
            this.counters4Author[i2][1].increment(0L);
        }
        this.counterNormalizedSname[0] = this.myreporter.getCounter(REPORTER_CONST.CONTRIB_MS, "Normalized sname");
        this.counterNormalizedSname[1] = this.myreporter.getCounter(REPORTER_CONST.CONTRIB_EX, "Normalized sname");
        this.counterOriginalSname[0] = this.myreporter.getCounter(REPORTER_CONST.CONTRIB_MS, "Original sname");
        this.counterOriginalSname[1] = this.myreporter.getCounter(REPORTER_CONST.CONTRIB_EX, "Original sname");
        this.counterNormalizedSname[0].increment(0L);
        this.counterNormalizedSname[1].increment(0L);
        this.counterOriginalSname[0].increment(0L);
        this.counterOriginalSname[1].increment(0L);
    }

    private void reportAuthorDataExistance(DataBag dataBag, int i) {
        if (this.countersExist == null) {
            return;
        }
        if (dataBag == null || dataBag.size() == 0) {
            this.counters4Author[i][0].increment(1L);
        } else {
            this.counters4Author[i][1].increment(1L);
        }
    }

    private void raportDocumentDataExistance(DataBag dataBag, int i) {
        if (this.countersExist == null) {
            return;
        }
        if (dataBag == null || dataBag.size() == 0) {
            this.counters4Doc[i][0].increment(1L);
        } else {
            this.counters4Doc[i][1].increment(1L);
        }
    }

    private void reportSname(Object obj, Object obj2) {
        if (this.countersExist == null) {
            return;
        }
        if (obj2 == null || obj2.toString().isEmpty()) {
            this.counterNormalizedSname[0].increment(1L);
        } else {
            this.counterNormalizedSname[1].increment(1L);
        }
        if (obj == null || obj.toString().isEmpty()) {
            this.counterOriginalSname[0].increment(1L);
        } else {
            this.counterOriginalSname[1].increment(1L);
        }
    }

    private void reportAuthors(Collection<DocumentProtos.Author> collection) {
        if (this.countersExist == null) {
            return;
        }
        this.myreporter.getCounter(REPORTER_CONST.DOC_MS, "Any author (unprocessed documents)").increment(collection.isEmpty() ? 1L : 0L);
        this.myreporter.getCounter(REPORTER_CONST.DOC_EX, "Any author (processed documents)").increment(collection.isEmpty() ? 0L : 1L);
    }
}
