package pl.edu.icm.coansys.hbase2sfbw2;

import com.google.protobuf.InvalidProtocolBufferException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import pl.edu.icm.coansys.hbase2sfbw2.HbToProtosMRKey;
import pl.edu.icm.coansys.input.filters.EmptyTitleFilter;
import pl.edu.icm.coansys.input.filters.HBaseToSfProtosFilter;
import pl.edu.icm.coansys.input.filters.NoAuthorFilter;
import pl.edu.icm.coansys.models.DocumentProtos;
import pl.edu.icm.coansys.models.OrganizationProtos;
import pl.edu.icm.coansys.models.PersonProtos;
import pl.edu.icm.coansys.models.ProjectProtos;
import pl.edu.icm.coansys.transformers.ProtoMediaMetadataToMetadata;
import pl.edu.icm.coansys.transformers.bwmetaToBW2Proto.BwMeta2bwProtoTransformer;
import pl.edu.icm.coansys.transformers.crossref.CrossrefJsonMediaToBw2Metadata;
import pl.edu.icm.coansys.transformers.crossref.UnixrefMediaToBw2Metadata;
import pl.edu.icm.coansys.transformers.dataciteToBW2Proto.Datacite2bwProtoTransformer;
import pl.edu.icm.coansys.transformers.dcOaiToBW2Proto.OaiDcMediaToBw2Metadata;
import pl.edu.icm.coansys.transformers.gsprotoToBW2Proto.GsMediaToBw2Metadata;
import pl.edu.icm.coansys.transformers.nlmToBW2Proto.NlMMediaToBw2Metadata;
import pl.edu.icm.coansys.transformers.openaire.OafMediaToBw2Metadata;
import pl.edu.icm.coansys.transformers.pbnToBW2Proto.PBNToBW2ProtoTransformer;
import pl.edu.icm.coansys.transformers.umultirank.UMultiRankToBw2Metadata;
import pl.edu.icm.model.transformers.coansys.MultiTypeParseResult;

/* loaded from: input_file:pl/edu/icm/coansys/hbase2sfbw2/HBaseToProtosMapper.class */
public class HBaseToProtosMapper extends TableMapper<HbToProtosMRKey, BytesWritable> {
    private static final Logger LOGGER = LoggerFactory.getLogger(HBaseToProtosMapper.class);
    protected List<ProtoMediaMetadataToMetadata> transformers = new ArrayList();
    protected List<HBaseToSfProtosFilter> filters;
    DocumentProtoUtil dpu;
    boolean improveWheneOriginallyMetadata;

    public HBaseToProtosMapper() {
        initTransformers();
        this.filters = new ArrayList();
        initFilters();
        this.dpu = new DocumentProtoUtil();
        this.improveWheneOriginallyMetadata = false;
    }

    Text transformKey(ImmutableBytesWritable immutableBytesWritable) {
        return new Text(immutableBytesWritable.copyBytes());
    }

    public void setTransformers(List<ProtoMediaMetadataToMetadata> list) {
        this.transformers = list;
    }

    public void setFilters(List<HBaseToSfProtosFilter> list) {
        this.filters = list;
    }

    protected void initTransformers() {
        this.transformers.add(new GsMediaToBw2Metadata());
        this.transformers.add(new OaiDcMediaToBw2Metadata());
        this.transformers.add(new BwMeta2bwProtoTransformer());
        this.transformers.add(new NlMMediaToBw2Metadata());
        this.transformers.add(new UnixrefMediaToBw2Metadata());
        this.transformers.add(new CrossrefJsonMediaToBw2Metadata());
        this.transformers.add(new OafMediaToBw2Metadata());
        this.transformers.add(new Datacite2bwProtoTransformer());
        this.transformers.add(new UMultiRankToBw2Metadata());
        this.transformers.add(new PBNToBW2ProtoTransformer());
    }

    protected void initFilters() {
        this.filters.add(new EmptyTitleFilter());
        this.filters.add(new NoAuthorFilter());
    }

    protected void setup(Mapper<ImmutableBytesWritable, Result, HbToProtosMRKey, BytesWritable>.Context context) throws IOException, InterruptedException {
        super.setup(context);
        Iterator<HBaseToSfProtosFilter> it = this.filters.iterator();
        while (it.hasNext()) {
            it.next().setupFilterFromContext(context);
        }
    }

    protected void map(ImmutableBytesWritable immutableBytesWritable, Result result, Mapper<ImmutableBytesWritable, Result, HbToProtosMRKey, BytesWritable>.Context context) throws IOException, InterruptedException {
        String str = new String(immutableBytesWritable.copyBytes());
        context.setStatus("hbase row id: " + str);
        String str2 = new String(transformKey(immutableBytesWritable).copyBytes());
        DocumentProtos.MediaContainer prepareMediaContainer = prepareMediaContainer(result);
        DocumentProtos.DocumentMetadata prepareDocumentProtosDocumentMetadata = prepareDocumentProtosDocumentMetadata(result);
        MultiTypeParseResult multiTypeParseResult = new MultiTypeParseResult();
        if (prepareDocumentProtosDocumentMetadata != null) {
            DocumentProtos.DocumentWrapper.Builder newBuilder = DocumentProtos.DocumentWrapper.newBuilder();
            newBuilder.setDocumentMetadata(prepareDocumentProtosDocumentMetadata);
            if (prepareMediaContainer != null) {
                newBuilder.setMediaContainer(prepareMediaContainer);
            }
            this.dpu.setIDFor(newBuilder, str2);
            HbToProtosMRKey hbToProtosMRKey = new HbToProtosMRKey(HbToProtosMRKey.Type.DOCUMENT, str2);
            if (!this.improveWheneOriginallyMetadata) {
                LOGGER.info("row id: {}", str);
                LOGGER.info("Metadata has been generated already");
                context.write(hbToProtosMRKey, new BytesWritable(newBuilder.build().toByteArray()));
                return;
            }
            multiTypeParseResult.add(newBuilder);
        }
        if (!hasMediaContainer(prepareMediaContainer) && prepareDocumentProtosDocumentMetadata == null) {
            LOGGER.info("row id: {}", str);
            LOGGER.info("No Media {}", "there is no media container");
            return;
        }
        if (transformAndReturnSuccess(str, multiTypeParseResult, prepareMediaContainer, str2, false)) {
            writeToContextAndLog(context, str, multiTypeParseResult);
            return;
        }
        LOGGER.info("row id: {}", str);
        if (prepareMediaContainer != null) {
            LOGGER.debug("There were no suitable transformation. Available media types are: ");
            for (DocumentProtos.Media media : prepareMediaContainer.getMediaList()) {
                LOGGER.debug("key: {}, type: {}", media.getKey(), media.getMediaType());
            }
        }
    }

    protected boolean hasMediaContainer(DocumentProtos.MediaContainerOrBuilder mediaContainerOrBuilder) {
        return mediaContainerOrBuilder != null;
    }

    private void writeToContextAndLog(Mapper<ImmutableBytesWritable, Result, HbToProtosMRKey, BytesWritable>.Context context, String str, MultiTypeParseResult multiTypeParseResult) throws IOException, InterruptedException {
        for (PersonProtos.PersonWrapperOrBuilder personWrapperOrBuilder : multiTypeParseResult.getPersons()) {
            boolean z = false;
            for (HBaseToSfProtosFilter hBaseToSfProtosFilter : this.filters) {
                if (!hBaseToSfProtosFilter.shouldPersonBeSavedToSequenceFile(personWrapperOrBuilder)) {
                    LOGGER.info("row id: {} person {}", str, personWrapperOrBuilder.getRowId());
                    LOGGER.info("Person has been filtered out by : {}", hBaseToSfProtosFilter.getName());
                    z = true;
                }
            }
            if (!z) {
                context.write(new HbToProtosMRKey(HbToProtosMRKey.Type.PERSON, personWrapperOrBuilder.getRowId()), new BytesWritable(personWrapperOrBuilder instanceof PersonProtos.PersonWrapper ? ((PersonProtos.PersonWrapper) personWrapperOrBuilder).toByteArray() : ((PersonProtos.PersonWrapper.Builder) personWrapperOrBuilder).build().toByteArray()));
            }
        }
        for (OrganizationProtos.OrganizationWrapperOrBuilder organizationWrapperOrBuilder : multiTypeParseResult.getOrganizations()) {
            boolean z2 = false;
            for (HBaseToSfProtosFilter hBaseToSfProtosFilter2 : this.filters) {
                if (!hBaseToSfProtosFilter2.shouldOrganizationBeSavedToSequenceFile(organizationWrapperOrBuilder)) {
                    LOGGER.info("row id: {} organization {}", str, organizationWrapperOrBuilder.getRowId());
                    LOGGER.info("Person has been filtered out by : {}", hBaseToSfProtosFilter2.getName());
                    z2 = true;
                }
            }
            if (!z2) {
                context.write(new HbToProtosMRKey(HbToProtosMRKey.Type.ORGANIZATION, organizationWrapperOrBuilder.getRowId()), new BytesWritable(organizationWrapperOrBuilder instanceof OrganizationProtos.OrganizationWrapper ? ((OrganizationProtos.OrganizationWrapper) organizationWrapperOrBuilder).toByteArray() : ((OrganizationProtos.OrganizationWrapper.Builder) organizationWrapperOrBuilder).build().toByteArray()));
            }
        }
        for (ProjectProtos.ProjectWrapperOrBuilder projectWrapperOrBuilder : multiTypeParseResult.getProjects()) {
            boolean z3 = false;
            for (HBaseToSfProtosFilter hBaseToSfProtosFilter3 : this.filters) {
                if (!hBaseToSfProtosFilter3.shouldProjectBeSavedToSequenceFile(projectWrapperOrBuilder)) {
                    LOGGER.info("row id: {} project {}", str, projectWrapperOrBuilder.getRowId());
                    LOGGER.info("Person has been filtered out by : {}", hBaseToSfProtosFilter3.getName());
                    z3 = true;
                }
            }
            if (!z3) {
                context.write(new HbToProtosMRKey(HbToProtosMRKey.Type.PROJECT, projectWrapperOrBuilder.getRowId()), new BytesWritable(projectWrapperOrBuilder instanceof ProjectProtos.ProjectWrapper ? ((ProjectProtos.ProjectWrapper) projectWrapperOrBuilder).toByteArray() : ((ProjectProtos.ProjectWrapper.Builder) projectWrapperOrBuilder).build().toByteArray()));
            }
        }
        for (DocumentProtos.DocumentWrapperOrBuilder documentWrapperOrBuilder : multiTypeParseResult.getDocuments()) {
            boolean z4 = false;
            for (HBaseToSfProtosFilter hBaseToSfProtosFilter4 : this.filters) {
                if (!hBaseToSfProtosFilter4.shouldDocumentBeSavedToSequenceFile(documentWrapperOrBuilder)) {
                    LOGGER.info("row id: {} document {}", str, documentWrapperOrBuilder.getRowId());
                    LOGGER.info("Person has been filtered out by : {}", hBaseToSfProtosFilter4.getName());
                    z4 = true;
                }
            }
            if (!z4) {
                context.write(new HbToProtosMRKey(HbToProtosMRKey.Type.DOCUMENT, documentWrapperOrBuilder.getRowId()), new BytesWritable(documentWrapperOrBuilder instanceof DocumentProtos.DocumentWrapper ? ((DocumentProtos.DocumentWrapper) documentWrapperOrBuilder).toByteArray() : ((DocumentProtos.DocumentWrapper.Builder) documentWrapperOrBuilder).build().toByteArray()));
            }
        }
        context.progress();
    }

    boolean transformAndReturnSuccess(String str, MultiTypeParseResult multiTypeParseResult, DocumentProtos.MediaContainerOrBuilder mediaContainerOrBuilder, String str2, boolean z) {
        for (ProtoMediaMetadataToMetadata protoMediaMetadataToMetadata : this.transformers) {
            for (DocumentProtos.Media media : mediaContainerOrBuilder.getMediaList()) {
                if (media.hasKey() && protoMediaMetadataToMetadata.getSupportedKey().equalsIgnoreCase(media.getKey())) {
                    try {
                        z = protoMediaMetadataToMetadata.transform(media, str2, multiTypeParseResult, mediaContainerOrBuilder) || z;
                    } catch (Exception e) {
                        LOGGER.error("exception at row id: " + str + " transfomer: " + protoMediaMetadataToMetadata.getSupportedKey(), e);
                    }
                }
            }
        }
        return z;
    }

    public static DocumentProtos.MediaContainer prepareMediaContainer(Result result) throws InvalidProtocolBufferException {
        DocumentProtos.MediaContainer mediaContainer = null;
        if (result.getValue(Bytes.toBytes("c"), Bytes.toBytes("cproto")) != null) {
            mediaContainer = DocumentProtos.MediaContainer.parseFrom(result.getValue(Bytes.toBytes("c"), Bytes.toBytes("cproto")));
        }
        return mediaContainer;
    }

    public static DocumentProtos.DocumentMetadata prepareDocumentProtosDocumentMetadata(Result result) {
        DocumentProtos.DocumentMetadata documentMetadata = null;
        if (result.getValue(Bytes.toBytes("m"), Bytes.toBytes("mproto")) != null) {
            try {
                documentMetadata = DocumentProtos.DocumentMetadata.parseFrom(result.getValue(Bytes.toBytes("m"), Bytes.toBytes("mproto")));
            } catch (Exception e) {
                LOGGER.debug(e.getMessage(), e);
            }
        }
        return documentMetadata;
    }

    protected /* bridge */ /* synthetic */ void map(Object obj, Object obj2, Mapper.Context context) throws IOException, InterruptedException {
        map((ImmutableBytesWritable) obj, (Result) obj2, (Mapper<ImmutableBytesWritable, Result, HbToProtosMRKey, BytesWritable>.Context) context);
    }
}
