package pl.edu.icm.coansys.disambiguation.author.features.extractors;

import java.util.Collection;
import java.util.HashSet;
import pl.edu.icm.coansys.disambiguation.author.features.extractors.indicators.DisambiguationExtractorDocument;
import pl.edu.icm.coansys.disambiguation.author.normalizers.DiacriticsRemover;
import pl.edu.icm.coansys.disambiguation.author.normalizers.PigNormalizer;
import pl.edu.icm.coansys.models.DocumentProtos;

/* loaded from: input_file:pl/edu/icm/coansys/disambiguation/author/features/extractors/EX_KEYWORDS_SPLIT.class */
public class EX_KEYWORDS_SPLIT extends DisambiguationExtractorDocument {
    public EX_KEYWORDS_SPLIT() {
    }

    public EX_KEYWORDS_SPLIT(PigNormalizer[] pigNormalizerArr) {
        super(pigNormalizerArr);
    }

    @Override // pl.edu.icm.coansys.disambiguation.author.features.extractors.indicators.DisambiguationExtractorDocument
    public Collection<Integer> extract(Object obj, String str) {
        String normalize;
        Integer normalizeExtracted;
        HashSet hashSet = new HashSet();
        DiacriticsRemover diacriticsRemover = new DiacriticsRemover();
        for (DocumentProtos.KeywordsList keywordsList : ((DocumentProtos.DocumentMetadata) obj).getKeywordsList()) {
            if (str == null || keywordsList.getLanguage().equalsIgnoreCase(str)) {
                for (String str2 : keywordsList.getKeywordsList()) {
                    if (!str2.isEmpty() && !isClassifCode(str2) && (normalize = diacriticsRemover.normalize(str2)) != null) {
                        for (String str3 : normalize.split("[\\W]+")) {
                            if (!str3.isEmpty() && (normalizeExtracted = normalizeExtracted(str3)) != null) {
                                hashSet.add(normalizeExtracted);
                            }
                        }
                    }
                }
            }
        }
        return hashSet;
    }

    @Override // pl.edu.icm.coansys.disambiguation.author.features.extractors.indicators.DisambiguationExtractor
    public String getId() {
        return "6";
    }
}
