package pl.edu.icm.coansys.disambiguation.author.pig.extractor;

import org.apache.pig.data.DataBag;
import org.apache.pig.data.DefaultDataBag;
import org.apache.pig.data.TupleFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import pl.edu.icm.coansys.models.DocumentProtos;

/* loaded from: input_file:pl/edu/icm/coansys/disambiguation/author/pig/extractor/EX_TITLE_SPLIT.class */
public class EX_TITLE_SPLIT extends DisambiguationExtractorDocument {
    private static final Logger logger = LoggerFactory.getLogger(EX_TITLE_SPLIT.class);

    @Override // pl.edu.icm.coansys.disambiguation.author.pig.extractor.DisambiguationExtractorDocument
    public DataBag extract(Object obj) {
        DefaultDataBag defaultDataBag = new DefaultDataBag();
        for (String str : ((DocumentProtos.TextWithLanguage) ((DocumentProtos.DocumentMetadata) obj).getBasicMetadata().getTitleList().get(0)).getText().split("[\\W]+")) {
            defaultDataBag.add(TupleFactory.getInstance().newTuple(normalizeExtracted(str)));
        }
        return defaultDataBag;
    }

    @Override // pl.edu.icm.coansys.disambiguation.author.pig.extractor.DisambiguationExtractorDocument
    public DataBag extract(Object obj, String str) {
        DocumentProtos.DocumentMetadata documentMetadata = (DocumentProtos.DocumentMetadata) obj;
        DefaultDataBag defaultDataBag = new DefaultDataBag();
        for (DocumentProtos.TextWithLanguage textWithLanguage : documentMetadata.getBasicMetadata().getTitleList()) {
            if (str.equalsIgnoreCase(textWithLanguage.getLanguage())) {
                for (String str2 : textWithLanguage.getText().split("[\\W]+")) {
                    if (str2.length() != 0) {
                        defaultDataBag.add(TupleFactory.getInstance().newTuple(normalizeExtracted(str2)));
                    }
                }
                return defaultDataBag;
            }
        }
        if (defaultDataBag.size() != 0) {
            return null;
        }
        logger.info("No title IN GIVEN LANG (" + str + ") out of " + documentMetadata.getBasicMetadata().getTitleCount() + " titles!");
        return null;
    }
}
