package pl.edu.icm.coansys.classification.documents.pig.extractors;

import com.google.common.base.Joiner;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import org.apache.pig.EvalFunc;
import org.apache.pig.data.DataBag;
import org.apache.pig.data.DataByteArray;
import org.apache.pig.data.DefaultDataBag;
import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;
import org.apache.pig.impl.logicalLayer.FrontendException;
import org.apache.pig.impl.logicalLayer.schema.Schema;
import org.apache.zookeeper.KeeperException;
import pl.edu.icm.coansys.classification.documents.auxil.StackTraceExtractor;
import pl.edu.icm.coansys.disambiguation.auxil.Pair;
import pl.edu.icm.coansys.importers.models.DocumentProtos;

/* loaded from: input_file:pl/edu/icm/coansys/classification/documents/pig/extractors/EXTRACT_MAP_WHEN_CATEG_LIM.class */
public class EXTRACT_MAP_WHEN_CATEG_LIM extends EvalFunc<Map> {
    private String language;

    public EXTRACT_MAP_WHEN_CATEG_LIM(String str) {
        this.language = null;
        this.language = str;
    }

    public EXTRACT_MAP_WHEN_CATEG_LIM() {
        this.language = null;
    }

    public Schema outputSchema(Schema schema) {
        try {
            return Schema.generateNestedSchema((byte) 100, new byte[0]);
        } catch (FrontendException e) {
            throw new IllegalStateException((Throwable) e);
        }
    }

    /* renamed from: exec, reason: merged with bridge method [inline-methods] */
    public Map m20exec(Tuple tuple) throws IOException {
        try {
            DataByteArray dataByteArray = (DataByteArray) tuple.get(0);
            int intValue = ((Integer) tuple.get(1)).intValue();
            DocumentProtos.DocumentMetadata parseFrom = DocumentProtos.DocumentMetadata.parseFrom(dataByteArray.get());
            return this.language != null ? generateConcreteLanguageMap(parseFrom, intValue) : generateAllLanguageMap(parseFrom, intValue);
        } catch (Exception e) {
            throw new IOException("Caught exception processing input row:\n" + StackTraceExtractor.getStackTrace(e));
        }
    }

    protected Map generateConcreteLanguageMap(DocumentProtos.DocumentMetadata documentMetadata, int i) {
        String extractLangTitle = extractLangTitle(documentMetadata);
        if (extractLangTitle == null) {
            return null;
        }
        String extractLangAbstract = extractLangAbstract(documentMetadata);
        Pair<String, DataBag> extractLangKeywords = extractLangKeywords(documentMetadata);
        if (((DataBag) extractLangKeywords.getY()).size() <= i) {
            return null;
        }
        HashMap hashMap = new HashMap();
        hashMap.put("key", documentMetadata.getKey());
        hashMap.put("title", extractLangTitle);
        hashMap.put("keywords", extractLangKeywords.getX());
        hashMap.put("abstract", extractLangAbstract);
        hashMap.put("categories", extractLangKeywords.getY());
        return hashMap;
    }

    private Pair<String, DataBag> extractLangKeywords(DocumentProtos.DocumentMetadata documentMetadata) {
        ArrayList arrayList = new ArrayList();
        HashSet hashSet = new HashSet();
        for (DocumentProtos.TextWithLanguage textWithLanguage : documentMetadata.getKeywordList()) {
            if (this.language.equalsIgnoreCase(textWithLanguage.getLanguage())) {
                String text = textWithLanguage.getText();
                if (isClassifCode(text)) {
                    hashSet.add(text);
                } else {
                    arrayList.add(text);
                }
            }
        }
        Iterator it = documentMetadata.getBasicMetadata().getClassifCodeList().iterator();
        while (it.hasNext()) {
            Iterator it2 = ((DocumentProtos.ClassifCode) it.next()).getValueList().iterator();
            while (it2.hasNext()) {
                hashSet.add((String) it2.next());
            }
        }
        DefaultDataBag defaultDataBag = new DefaultDataBag();
        Iterator it3 = hashSet.iterator();
        while (it3.hasNext()) {
            defaultDataBag.add(TupleFactory.getInstance().newTuple((String) it3.next()));
        }
        return new Pair<>(Joiner.on(" ").join(arrayList), defaultDataBag);
    }

    private boolean isClassifCode(String str) {
        return isMSc(str);
    }

    private boolean isMSc(String str) {
        return str.toUpperCase().matches("[0-9][0-9][A-Z][0-9][0-9]");
    }

    private String extractLangAbstract(DocumentProtos.DocumentMetadata documentMetadata) {
        ArrayList arrayList = new ArrayList();
        for (DocumentProtos.TextWithLanguage textWithLanguage : documentMetadata.getDocumentAbstractList()) {
            if (this.language.equalsIgnoreCase(textWithLanguage.getLanguage())) {
            }
            arrayList.add(textWithLanguage.getText());
        }
        return Joiner.on(" ").join(arrayList);
    }

    private String extractLangTitle(DocumentProtos.DocumentMetadata documentMetadata) {
        String join;
        ArrayList arrayList = new ArrayList();
        for (DocumentProtos.TextWithLanguage textWithLanguage : documentMetadata.getBasicMetadata().getTitleList()) {
            if (this.language.equalsIgnoreCase(textWithLanguage.getLanguage())) {
            }
            arrayList.add(textWithLanguage.getText());
        }
        switch (arrayList.size()) {
            case 0:
                System.out.println("No title IN GIVEN LANG (" + this.language + ") out of " + documentMetadata.getBasicMetadata().getTitleCount() + " titles. Ignoring record!");
                return null;
            case 1:
                join = (String) arrayList.get(0);
                break;
            default:
                System.out.println("Number of titles IN GIVEN LANGUAGE (" + this.language + ") is more then one. Titles will be concatenated");
                join = Joiner.on(" ").join(arrayList);
                break;
        }
        if (join.trim().isEmpty()) {
            return null;
        }
        return join;
    }

    protected Map generateAllLanguageMap(DocumentProtos.DocumentMetadata documentMetadata, int i) throws KeeperException.UnimplementedException {
        throw new KeeperException.UnimplementedException();
    }

    private DataBag getCategories(List<DocumentProtos.ClassifCode> list) {
        DefaultDataBag defaultDataBag = new DefaultDataBag();
        Iterator<DocumentProtos.ClassifCode> it = list.iterator();
        while (it.hasNext()) {
            Iterator it2 = it.next().getValueList().iterator();
            while (it2.hasNext()) {
                defaultDataBag.add(TupleFactory.getInstance().newTuple((String) it2.next()));
            }
        }
        return defaultDataBag;
    }

    private String getConcatenated(List<DocumentProtos.TextWithLanguage> list) {
        if (list == null || list.isEmpty()) {
            return null;
        }
        return Joiner.on(" ").join(list);
    }
}
