package pl.edu.icm.coansys.input.medline;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.util.Scanner;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.zip.GZIPInputStream;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.util.Pair;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import pl.edu.icm.coansys.hbase.mapper.pair.prepare.PrepareMapperPair;

/* loaded from: input_file:pl/edu/icm/coansys/input/medline/MedlineInitialMapper.class */
public class MedlineInitialMapper extends Mapper<Text, BytesWritable, ImmutableBytesWritable, Put> {
    private static final Logger log = LoggerFactory.getLogger(MedlineInitialMapper.class);
    PrepareMapperPair mapPairPreparator = new PrepareMapperPair();
    String singleItemStart = "<MedlineCitation ";

    protected void map(Text text, BytesWritable bytesWritable, Mapper<Text, BytesWritable, ImmutableBytesWritable, Put>.Context context) throws IOException, InterruptedException {
        Scanner scanner = new Scanner(new GZIPInputStream(new ByteArrayInputStream(bytesWritable.copyBytes())));
        scanner.useDelimiter(this.singleItemStart);
        Pattern compile = Pattern.compile("<PMID .*[^D]>.*</PMID>");
        String next = scanner.next();
        while (scanner.hasNext()) {
            String next2 = scanner.next();
            Matcher matcher = compile.matcher(next2);
            if (matcher.find()) {
                String str = matcher.group().split(">")[1].split("<")[0];
                StringBuilder sb = new StringBuilder();
                sb.append(next);
                sb.append(this.singleItemStart);
                sb.append(next2);
                if (scanner.hasNext()) {
                    sb.append("</MedlineCitationSet>");
                }
                Pair<ImmutableBytesWritable, Put> prepareDocument = this.mapPairPreparator.prepareDocument(sb.toString().getBytes(), prepareIdentifier(str), "nlm_record", "pb/nlmRecord");
                context.write(prepareDocument.getFirst(), prepareDocument.getSecond());
            } else {
                log.error("No PMID !!!");
            }
        }
    }

    String prepareIdentifier(String str) {
        return "http://comac.icm.edu.pl/elements/medline/pmid_" + str;
    }

    protected /* bridge */ /* synthetic */ void map(Object obj, Object obj2, Mapper.Context context) throws IOException, InterruptedException {
        map((Text) obj, (BytesWritable) obj2, (Mapper<Text, BytesWritable, ImmutableBytesWritable, Put>.Context) context);
    }
}
