package pl.edu.icm.coansys.similarity.pig.udf;

import com.google.common.base.Joiner;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import org.apache.pig.EvalFunc;
import org.apache.pig.backend.executionengine.ExecException;
import org.apache.pig.data.BagFactory;
import org.apache.pig.data.DataBag;
import org.apache.pig.data.DataByteArray;
import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;
import org.apache.pig.impl.logicalLayer.FrontendException;
import org.apache.pig.impl.logicalLayer.schema.Schema;
import pl.edu.icm.coansys.commons.java.StackTraceExtractor;
import pl.edu.icm.coansys.models.DocumentProtos;

/* loaded from: input_file:pl/edu/icm/coansys/similarity/pig/udf/DocumentProtobufToTupleMap.class */
public final class DocumentProtobufToTupleMap extends EvalFunc<Tuple> {
    private Map<String, Integer> fieldNumberMap = new HashMap<String, Integer>() { // from class: pl.edu.icm.coansys.similarity.pig.udf.DocumentProtobufToTupleMap.1
        {
            put(C.KEY, 0);
            put(C.TITLE, 1);
            put(C.ABSTRACT_TEXT, 2);
            put(C.KEYWORDS, 3);
            put(C.CONTRIBUTORS, 4);
        }
    };

    /* loaded from: input_file:pl/edu/icm/coansys/similarity/pig/udf/DocumentProtobufToTupleMap$C.class */
    private static final class C {
        public static final String KEY = "key";
        public static final String TITLE = "title";
        public static final String ABSTRACT_TEXT = "abstract";
        public static final String KEYWORDS = "keywords";
        public static final String CONTRIBUTORS = "contributors";

        private C() {
        }
    }

    public Schema outputSchema(Schema schema) {
        try {
            Schema schema2 = new Schema(new Schema.FieldSchema("keyword", new Schema(new Schema.FieldSchema("value", (byte) 55)), (byte) 110));
            Schema schema3 = new Schema(new Schema.FieldSchema("contributor", new Schema(Arrays.asList(new Schema.FieldSchema(C.KEY, (byte) 55), new Schema.FieldSchema("name", (byte) 55))), (byte) 110));
            Schema schema4 = new Schema();
            schema4.add(new Schema.FieldSchema(C.KEY, (byte) 55));
            schema4.add(new Schema.FieldSchema(C.TITLE, (byte) 55));
            schema4.add(new Schema.FieldSchema(C.ABSTRACT_TEXT, (byte) 55));
            schema4.add(new Schema.FieldSchema(C.KEYWORDS, schema2, (byte) 120));
            schema4.add(new Schema.FieldSchema(C.CONTRIBUTORS, schema3, (byte) 120));
            return new Schema(new Schema.FieldSchema(getSchemaName(getClass().getName().toLowerCase(), schema), schema4, (byte) 110));
        } catch (FrontendException e) {
            this.log.error("Error in the output Schema creation", e);
            this.log.error(StackTraceExtractor.getStackTrace(e));
            throw new RuntimeException(e);
        }
    }

    /* renamed from: exec, reason: merged with bridge method [inline-methods] */
    public Tuple m5exec(Tuple tuple) throws IOException {
        return addDocumentMetatdataFields(DocumentProtos.DocumentWrapper.parseFrom(((DataByteArray) tuple.get(0)).get()).getDocumentMetadata(), TupleFactory.getInstance().newTuple(this.fieldNumberMap.size()));
    }

    private Tuple addDocumentMetatdataFields(DocumentProtos.DocumentMetadata documentMetadata, Tuple tuple) throws ExecException {
        tuple.set(this.fieldNumberMap.get(C.KEY).intValue(), documentMetadata.getKey());
        appendToOutput(tuple, C.TITLE, documentMetadata.getBasicMetadata().getTitleList());
        appendToOutput(tuple, C.ABSTRACT_TEXT, documentMetadata.getDocumentAbstractList());
        ArrayList arrayList = new ArrayList();
        Iterator it = documentMetadata.getKeywordsList().iterator();
        while (it.hasNext()) {
            Iterator it2 = ((DocumentProtos.KeywordsList) it.next()).getKeywordsList().iterator();
            while (it2.hasNext()) {
                arrayList.add((String) it2.next());
            }
        }
        tuple.set(this.fieldNumberMap.get(C.KEYWORDS).intValue(), listToDataBag(arrayList));
        ArrayList arrayList2 = new ArrayList();
        ArrayList arrayList3 = new ArrayList();
        for (DocumentProtos.Author author : documentMetadata.getBasicMetadata().getAuthorList()) {
            arrayList2.add(author.getKey());
            arrayList3.add(author.getName());
        }
        tuple.set(this.fieldNumberMap.get(C.CONTRIBUTORS).intValue(), listToDataBag(arrayList2, arrayList3));
        return tuple;
    }

    private void appendToOutput(Tuple tuple, String str, List<DocumentProtos.TextWithLanguage> list) throws ExecException {
        ArrayList arrayList = new ArrayList();
        Iterator<DocumentProtos.TextWithLanguage> it = list.iterator();
        while (it.hasNext()) {
            arrayList.add(it.next().getText());
        }
        tuple.set(this.fieldNumberMap.get(str).intValue(), Joiner.on(" ").join(arrayList));
    }

    private <T> DataBag listToDataBag(List<T> list) {
        DataBag newDefaultBag = BagFactory.getInstance().newDefaultBag();
        Iterator<T> it = list.iterator();
        while (it.hasNext()) {
            newDefaultBag.add(TupleFactory.getInstance().newTuple(it.next()));
        }
        return newDefaultBag;
    }

    private <T1, T2> DataBag listToDataBag(List<T1> list, List<T2> list2) throws ExecException {
        DataBag newDefaultBag = BagFactory.getInstance().newDefaultBag();
        for (int i = 0; i < Math.min(list.size(), list2.size()); i++) {
            Tuple newTuple = TupleFactory.getInstance().newTuple(2);
            newTuple.set(0, list.get(i));
            newTuple.set(1, list2.get(i));
            newDefaultBag.add(newTuple);
        }
        return newDefaultBag;
    }
}
