package pl.edu.icm.coansys.classification.documents.jobs;

import com.google.common.base.Joiner;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import pl.edu.icm.coansys.commons.java.PorterStemmer;
import pl.edu.icm.coansys.disambiguation.auxil.DiacriticsRemover;
import pl.edu.icm.coansys.disambiguation.auxil.LoggingInDisambiguation;
import pl.edu.icm.coansys.disambiguation.auxil.TextArrayWritable;
import pl.edu.icm.coansys.disambiguation.auxil.TextTextArrayMapWritable;
import pl.edu.icm.coansys.models.DocumentProtos;

/* loaded from: input_file:pl/edu/icm/coansys/classification/documents/jobs/WordCountMapper_Proto.class */
public class WordCountMapper_Proto extends TableMapper<TextArrayWritable, IntWritable> {
    private static Logger logger = LoggerFactory.getLogger(LoggingInDisambiguation.class);
    private static final IntWritable one = new IntWritable(1);
    private Text key = null;

    protected void map(ImmutableBytesWritable immutableBytesWritable, Result result, Mapper<ImmutableBytesWritable, Result, TextArrayWritable, IntWritable>.Context context) throws IOException, InterruptedException {
        DocumentProtos.DocumentMetadata parseFrom = DocumentProtos.DocumentMetadata.parseFrom(result.getValue(Bytes.toBytes("m"), Bytes.toBytes("mproto")));
        this.key = new Text(parseFrom.getKey());
        StringBuilder sb = new StringBuilder();
        Iterator it = parseFrom.getDocumentAbstractList().iterator();
        while (it.hasNext()) {
            sb.append(((DocumentProtos.TextWithLanguage) it.next()).getText()).append(" ");
        }
        Iterator it2 = parseFrom.getKeywordsList().iterator();
        while (it2.hasNext()) {
            sb.append(Joiner.on(" ").join(((DocumentProtos.KeywordsList) it2.next()).getKeywordsList())).append(" ");
        }
        ArrayList arrayList = new ArrayList();
        Iterator it3 = parseFrom.getBasicMetadata().getTitleList().iterator();
        while (it3.hasNext()) {
            arrayList.add(((DocumentProtos.TextWithLanguage) it3.next()).getText());
        }
        sb.append(Joiner.on(" ").join(arrayList));
        PorterStemmer porterStemmer = new PorterStemmer();
        for (String str : DiacriticsRemover.removeDiacritics(sb.toString()).toLowerCase().split(" ")) {
            porterStemmer.add(str.toCharArray(), 0);
            porterStemmer.stem();
            context.write(new TextArrayWritable(new Text[]{this.key, new Text(porterStemmer.toString())}), one);
        }
    }

    protected void logAllFeaturesExtractedForOneAuthor(String str, TextTextArrayMapWritable textTextArrayMapWritable) {
        logger.debug("MAPPER: output key: " + str);
        logger.debug("MAPPER: output value: " + textTextArrayMapWritable);
    }

    protected /* bridge */ /* synthetic */ void map(Object obj, Object obj2, Mapper.Context context) throws IOException, InterruptedException {
        map((ImmutableBytesWritable) obj, (Result) obj2, (Mapper<ImmutableBytesWritable, Result, TextArrayWritable, IntWritable>.Context) context);
    }
}
