package pl.edu.icm.coansys.classification.documents.pig.extractors;

import com.google.common.base.Joiner;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import org.apache.pig.EvalFunc;
import org.apache.pig.data.DataBag;
import org.apache.pig.data.DataByteArray;
import org.apache.pig.data.DefaultDataBag;
import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;
import org.apache.pig.impl.logicalLayer.FrontendException;
import org.apache.pig.impl.logicalLayer.schema.Schema;
import pl.edu.icm.coansys.classification.documents.auxil.StackTraceExtractor;
import pl.edu.icm.coansys.importers.models.DocumentProtos;

/* loaded from: input_file:pl/edu/icm/coansys/classification/documents/pig/extractors/EXTRACT_MAP_WHEN_CATEG_LIM.class */
public class EXTRACT_MAP_WHEN_CATEG_LIM extends EvalFunc<Map> {
    public Schema outputSchema(Schema schema) {
        try {
            return Schema.generateNestedSchema((byte) 100, new byte[0]);
        } catch (FrontendException e) {
            throw new IllegalStateException((Throwable) e);
        }
    }

    /* renamed from: exec, reason: merged with bridge method [inline-methods] */
    public Map m19exec(Tuple tuple) throws IOException {
        try {
            DocumentProtos.DocumentMetadata parseFrom = DocumentProtos.DocumentMetadata.parseFrom(((DataByteArray) tuple.get(0)).get());
            ArrayList arrayList = new ArrayList();
            Iterator it = parseFrom.getBasicMetadata().getTitleList().iterator();
            while (it.hasNext()) {
                arrayList.add(((DocumentProtos.TextWithLanguage) it.next()).getText());
            }
            String join = Joiner.on(" ").join(arrayList);
            ArrayList arrayList2 = new ArrayList();
            Iterator it2 = parseFrom.getBasicMetadata().getTitleList().iterator();
            while (it2.hasNext()) {
                arrayList2.add(((DocumentProtos.TextWithLanguage) it2.next()).getText());
            }
            String join2 = Joiner.on(" ").join(arrayList2);
            Integer valueOf = Integer.valueOf(Integer.parseInt((String) tuple.get(1)));
            DataBag categories = getCategories(parseFrom.getBasicMetadata().getClassifCodeList());
            if (categories.size() < valueOf.intValue()) {
                return null;
            }
            HashMap hashMap = new HashMap();
            hashMap.put("key", parseFrom.getKey());
            hashMap.put("title", join);
            hashMap.put("keywords", getConcatenated(parseFrom.getKeywordList()));
            hashMap.put("abstract", join2);
            hashMap.put("categories", categories);
            return hashMap;
        } catch (Exception e) {
            throw new IOException("Caught exception processing input row:\n" + StackTraceExtractor.getStackTrace(e));
        }
    }

    private DataBag getCategories(List<DocumentProtos.ClassifCode> list) {
        DefaultDataBag defaultDataBag = new DefaultDataBag();
        Iterator<DocumentProtos.ClassifCode> it = list.iterator();
        while (it.hasNext()) {
            Iterator it2 = it.next().getValueList().iterator();
            while (it2.hasNext()) {
                defaultDataBag.add(TupleFactory.getInstance().newTuple((String) it2.next()));
            }
        }
        return defaultDataBag;
    }

    private String getConcatenated(List<DocumentProtos.TextWithLanguage> list) {
        if (list == null || list.isEmpty()) {
            return null;
        }
        StringBuilder sb = new StringBuilder(list.size());
        sb.append(list.get(0));
        for (int i = 1; i < list.size(); i++) {
            sb.append(" ").append(list.get(i).getText());
        }
        return sb.toString();
    }
}
