package pl.edu.icm.coansys.disambiguation.author.pig.extractor;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import org.apache.pig.EvalFunc;
import org.apache.pig.data.DataBag;
import org.apache.pig.data.DataByteArray;
import org.apache.pig.data.DefaultDataBag;
import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;
import org.apache.pig.impl.logicalLayer.FrontendException;
import org.apache.pig.impl.logicalLayer.schema.Schema;
import org.apache.pig.tools.pigstats.PigStatusReporter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import pl.edu.icm.coansys.commons.java.StackTraceExtractor;
import pl.edu.icm.coansys.disambiguation.author.pig.normalizers.ToEnglishLowerCase;
import pl.edu.icm.coansys.disambiguation.features.FeatureInfo;
import pl.edu.icm.coansys.models.DocumentProtos;

/* loaded from: input_file:pl/edu/icm/coansys/disambiguation/author/pig/extractor/EXTRACT_CONTRIBDATA_GIVENDATA.class */
public class EXTRACT_CONTRIBDATA_GIVENDATA extends EvalFunc<DataBag> {
    private static final Logger logger = LoggerFactory.getLogger(EXTRACT_CONTRIBDATA_GIVENDATA.class);
    private PigStatusReporter reporter;
    private List<DisambiguationExtractorDocument> des4Doc;
    private List<DisambiguationExtractorAuthor> des4Author;
    private List<String> des4DocNameOrId;
    private List<String> des4AuthorNameOrId;
    private String language;
    private boolean skipEmptyFeatures;
    private boolean snameToString;
    private boolean useIdsForExtractors;
    private DisambiguationExtractorFactory extrFactory;
    private boolean returnNull;

    /* loaded from: input_file:pl/edu/icm/coansys/disambiguation/author/pig/extractor/EXTRACT_CONTRIBDATA_GIVENDATA$REPORTER_CONST.class */
    static class REPORTER_CONST {
        public static final String CONTRIB_EX = "Contrib_Existing";
        public static final String CONTRIB_MS = "Contrib_Missing";
        public static final String DOC_EX = "Doc_Existing";
        public static final String DOC_MS = "Doc_Missing";

        REPORTER_CONST() {
        }
    }

    public Schema outputSchema(Schema schema) {
        try {
            return Schema.generateNestedSchema((byte) 120, new byte[0]);
        } catch (FrontendException e) {
            logger.error("Error in creating output schema:", e);
            throw new IllegalStateException((Throwable) e);
        }
    }

    private void setDisambiguationExtractor(String str) throws InstantiationException, IllegalAccessException, ClassNotFoundException {
        List parseFeatureInfoString = FeatureInfo.parseFeatureInfoString(str);
        String simpleName = new DisambiguationExtractorDocument().getClass().getSimpleName();
        String simpleName2 = new DisambiguationExtractorAuthor().getClass().getSimpleName();
        for (int i = 0; i < parseFeatureInfoString.size(); i++) {
            DisambiguationExtractor create = this.extrFactory.create((FeatureInfo) parseFeatureInfoString.get(i));
            String simpleName3 = create.getClass().getSuperclass().getSimpleName();
            String exId = this.useIdsForExtractors ? this.extrFactory.toExId(create.getClass().getSimpleName()) : create.getClass().getSimpleName();
            if (simpleName3.equals(simpleName)) {
                this.des4Doc.add((DisambiguationExtractorDocument) create);
                this.des4DocNameOrId.add(exId);
            } else {
                if (!simpleName3.equals(simpleName2)) {
                    String str2 = "Cannot create extractor: " + create.getClass().getSimpleName() + ". Its superclass: " + simpleName3 + " does not match to any superclass.";
                    logger.error(str2);
                    throw new ClassNotFoundException(str2);
                }
                this.des4Author.add((DisambiguationExtractorAuthor) create);
                this.des4AuthorNameOrId.add(exId);
            }
        }
    }

    public EXTRACT_CONTRIBDATA_GIVENDATA(String str) throws InstantiationException, IllegalAccessException, ClassNotFoundException {
        this.reporter = null;
        this.des4Doc = new ArrayList();
        this.des4Author = new ArrayList();
        this.des4DocNameOrId = new ArrayList();
        this.des4AuthorNameOrId = new ArrayList();
        this.language = null;
        this.skipEmptyFeatures = false;
        this.snameToString = false;
        this.useIdsForExtractors = false;
        this.extrFactory = new DisambiguationExtractorFactory();
        this.returnNull = false;
        for (String str2 : str.split(" ")) {
            if (str2.startsWith("featureinfo=")) {
                setDisambiguationExtractor(str2.substring("featureinfo=".length()));
            } else if (str2.startsWith("lang=")) {
                this.language = parseLng(str2.substring("lang=".length()));
            } else if (str2.startsWith("skipEmptyFeatures=")) {
                this.skipEmptyFeatures = Boolean.parseBoolean(str2.substring("skipEmptyFeatures=".length()));
            } else if (str2.startsWith("snameToString=")) {
                this.snameToString = Boolean.parseBoolean(str2.substring("snameToString=".length()));
            } else if (str2.startsWith("useIdsForExtractors=")) {
                this.useIdsForExtractors = Boolean.parseBoolean(str2.substring("useIdsForExtractors=".length()));
            } else if (str2.startsWith("returnNull=")) {
                this.returnNull = Boolean.parseBoolean(str2.substring("returnNull=".length()));
            }
        }
    }

    public EXTRACT_CONTRIBDATA_GIVENDATA(String str, String str2) throws InstantiationException, IllegalAccessException, ClassNotFoundException {
        this.reporter = null;
        this.des4Doc = new ArrayList();
        this.des4Author = new ArrayList();
        this.des4DocNameOrId = new ArrayList();
        this.des4AuthorNameOrId = new ArrayList();
        this.language = null;
        this.skipEmptyFeatures = false;
        this.snameToString = false;
        this.useIdsForExtractors = false;
        this.extrFactory = new DisambiguationExtractorFactory();
        this.returnNull = false;
        this.language = parseLng(str2);
        setDisambiguationExtractor(str);
    }

    public EXTRACT_CONTRIBDATA_GIVENDATA(String str, String str2, String str3, String str4) throws InstantiationException, IllegalAccessException, ClassNotFoundException {
        this.reporter = null;
        this.des4Doc = new ArrayList();
        this.des4Author = new ArrayList();
        this.des4DocNameOrId = new ArrayList();
        this.des4AuthorNameOrId = new ArrayList();
        this.language = null;
        this.skipEmptyFeatures = false;
        this.snameToString = false;
        this.useIdsForExtractors = false;
        this.extrFactory = new DisambiguationExtractorFactory();
        this.returnNull = false;
        this.language = parseLng(str2);
        this.skipEmptyFeatures = Boolean.parseBoolean(str3);
        this.useIdsForExtractors = Boolean.parseBoolean(str4);
        setDisambiguationExtractor(str);
    }

    private String parseLng(String str) {
        if (str == null || str.equalsIgnoreCase("all") || str.equalsIgnoreCase("null") || str.equals("")) {
            return null;
        }
        return str;
    }

    /* renamed from: exec, reason: merged with bridge method [inline-methods] */
    public DataBag m29exec(Tuple tuple) throws IOException {
        this.reporter = PigStatusReporter.getInstance();
        initializePigReporterWithZeroes();
        if (tuple == null || tuple.size() == 0) {
            return null;
        }
        try {
            DocumentProtos.DocumentMetadata documentMetadata = DocumentProtos.DocumentWrapper.parseFrom(((DataByteArray) tuple.get(0)).get()).getDocumentMetadata();
            String key = documentMetadata.getKey();
            DefaultDataBag defaultDataBag = new DefaultDataBag();
            List<DocumentProtos.Author> authorList = documentMetadata.getBasicMetadata().getAuthorList();
            HashMap hashMap = new HashMap(authorList.size());
            DisambiguationExtractor disambiguationExtractor = new DisambiguationExtractor();
            for (DocumentProtos.Author author : authorList) {
                DocumentProtos.Author author2 = (DocumentProtos.Author) hashMap.put(author.getKey(), author);
                if (author2 != null) {
                    String surname = author.getSurname();
                    String surname2 = author2.getSurname();
                    Object normalizeExtracted = disambiguationExtractor.normalizeExtracted(surname);
                    Object normalizeExtracted2 = disambiguationExtractor.normalizeExtracted(surname2);
                    if (author.equals(author2)) {
                        logger.info("Author metadata clones with key: " + author.getKey() + " in document with key: " + key);
                    } else if (normalizeExtracted.equals(normalizeExtracted2)) {
                        logger.info("Duplicated author key: " + author.getKey() + " for different metadata (except surname!) in document with key: " + key);
                    } else {
                        logger.error("Duplicated aurhor key: " + author.getKey() + " for different authors: " + surname + ", " + surname2 + " in document with key: " + key);
                    }
                }
            }
            Collection<DocumentProtos.Author> values = hashMap.values();
            DataBag[] dataBagArr = new DataBag[this.des4Doc.size()];
            HashMap hashMap2 = new HashMap();
            for (int i = 0; i < this.des4Doc.size(); i++) {
                dataBagArr[i] = this.des4Doc.get(i).extract(documentMetadata, this.language);
            }
            for (int i2 = 0; i2 < this.des4Doc.size(); i2++) {
                raportDocumentDataExistance(dataBagArr, i2);
                if (dataBagArr[i2] != null && (dataBagArr[i2].size() != 0 || !this.skipEmptyFeatures)) {
                    hashMap2.put(this.des4DocNameOrId.get(i2), dataBagArr[i2]);
                }
            }
            DisambiguationExtractor disambiguationExtractor2 = new DisambiguationExtractor();
            int i3 = -1;
            for (DocumentProtos.Author author3 : values) {
                i3++;
                Object normalize = this.snameToString ? new ToEnglishLowerCase().normalize(author3.getSurname()) : disambiguationExtractor2.normalizeExtracted(author3);
                String key2 = author3.getKey();
                HashMap hashMap3 = new HashMap(hashMap2);
                DataBag[] dataBagArr2 = new DataBag[this.des4Author.size()];
                for (int i4 = 0; i4 < this.des4Author.size(); i4++) {
                    dataBagArr2[i4] = this.des4Author.get(i4).extract(documentMetadata, i3, this.language);
                }
                for (int i5 = 0; i5 < this.des4Author.size(); i5++) {
                    reportAuthorDataExistance(dataBagArr2, i5);
                    if (dataBagArr2[i5] != null && (dataBagArr2[i5].size() != 0 || !this.skipEmptyFeatures)) {
                        hashMap3.put(this.des4AuthorNameOrId.get(i5), dataBagArr2[i5]);
                    }
                }
                defaultDataBag.add(TupleFactory.getInstance().newTuple(Arrays.asList(key, key2, normalize, hashMap3)));
            }
            if (this.returnNull) {
                return null;
            }
            return defaultDataBag;
        } catch (Exception e) {
            logger.error("Error in processing input row:", e);
            throw new IOException("Caught exception processing input row:\n" + StackTraceExtractor.getStackTrace(e));
        }
    }

    private void initializePigReporterWithZeroes() {
        for (int i = 0; i < this.des4Doc.size(); i++) {
            this.reporter.getCounter(REPORTER_CONST.DOC_MS, this.des4Doc.get(i).getClass().getSimpleName()).increment(0L);
        }
        for (int i2 = 0; i2 < this.des4Doc.size(); i2++) {
            this.reporter.getCounter(REPORTER_CONST.DOC_EX, this.des4Doc.get(i2).getClass().getSimpleName()).increment(0L);
        }
        for (int i3 = 0; i3 < this.des4Author.size(); i3++) {
            this.reporter.getCounter(REPORTER_CONST.CONTRIB_MS, this.des4Author.get(i3).getClass().getSimpleName()).increment(0L);
        }
        for (int i4 = 0; i4 < this.des4Author.size(); i4++) {
            this.reporter.getCounter(REPORTER_CONST.CONTRIB_EX, this.des4Author.get(i4).getClass().getSimpleName()).increment(0L);
        }
    }

    private void reportAuthorDataExistance(DataBag[] dataBagArr, int i) {
        if (dataBagArr[i] == null || dataBagArr[i].size() == 0) {
            this.reporter.getCounter(REPORTER_CONST.CONTRIB_MS, this.des4Author.get(i).getClass().getSimpleName()).increment(1L);
        } else {
            this.reporter.getCounter(REPORTER_CONST.CONTRIB_EX, this.des4Author.get(i).getClass().getSimpleName()).increment(1L);
        }
    }

    private void raportDocumentDataExistance(DataBag[] dataBagArr, int i) {
        if (dataBagArr[i] == null || dataBagArr[i].size() == 0) {
            this.reporter.getCounter(REPORTER_CONST.DOC_MS, this.des4Doc.get(i).getClass().getSimpleName()).increment(1L);
        } else {
            this.reporter.getCounter(REPORTER_CONST.DOC_EX, this.des4Doc.get(i).getClass().getSimpleName()).increment(1L);
        }
    }
}
