package pl.edu.icm.coansys.disambiguation.author.pig.extractor;

import java.util.HashSet;
import org.apache.pig.data.DataBag;
import org.apache.pig.data.DefaultDataBag;
import org.apache.pig.data.TupleFactory;
import pl.edu.icm.coansys.disambiguation.author.pig.normalizers.PigNormalizer;
import pl.edu.icm.coansys.disambiguation.author.pig.normalizers.ToEnglishLowerCase;
import pl.edu.icm.coansys.models.DocumentProtos;

/* loaded from: input_file:pl/edu/icm/coansys/disambiguation/author/pig/extractor/EX_KEYWORDS_SPLIT.class */
public class EX_KEYWORDS_SPLIT extends DisambiguationExtractorDocument {
    public EX_KEYWORDS_SPLIT() {
    }

    public EX_KEYWORDS_SPLIT(PigNormalizer[] pigNormalizerArr) {
        super(pigNormalizerArr);
    }

    @Override // pl.edu.icm.coansys.disambiguation.author.pig.extractor.DisambiguationExtractorDocument
    public DataBag extract(Object obj, String str) {
        String str2;
        Object normalizeExtracted;
        DefaultDataBag defaultDataBag = new DefaultDataBag();
        HashSet hashSet = new HashSet();
        ToEnglishLowerCase toEnglishLowerCase = new ToEnglishLowerCase();
        for (DocumentProtos.KeywordsList keywordsList : ((DocumentProtos.DocumentMetadata) obj).getKeywordsList()) {
            if (str == null || keywordsList.getLanguage().equalsIgnoreCase(str)) {
                for (String str3 : keywordsList.getKeywordsList()) {
                    if (!str3.isEmpty() && !isClassifCode(str3) && (str2 = (String) toEnglishLowerCase.normalize(str3)) != null) {
                        for (String str4 : str2.split("[\\W]+")) {
                            if (!str4.isEmpty() && (normalizeExtracted = normalizeExtracted(str4)) != null) {
                                hashSet.add(normalizeExtracted);
                            }
                        }
                    }
                }
            }
        }
        for (Object obj2 : hashSet.toArray()) {
            defaultDataBag.add(TupleFactory.getInstance().newTuple(obj2));
        }
        return defaultDataBag;
    }

    @Override // pl.edu.icm.coansys.disambiguation.author.pig.extractor.DisambiguationExtractor
    public String getId() {
        return "6";
    }
}
