package pl.edu.icm.coansys.output.merge.strategies;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.pig.data.DataByteArray;
import org.apache.pig.data.Tuple;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import pl.edu.icm.coansys.models.DocumentProtos;
import pl.edu.icm.coansys.models.PICProtos;

/* loaded from: input_file:pl/edu/icm/coansys/output/merge/strategies/CitationMerge.class */
public class CitationMerge implements IMerge {
    private static final Logger LOGGER = LoggerFactory.getLogger(CitationMerge.class);

    @Override // pl.edu.icm.coansys.output.merge.strategies.IMerge
    public DocumentProtos.DocumentWrapper.Builder execute(Tuple tuple, int i, DocumentProtos.DocumentWrapper.Builder builder) throws IOException {
        try {
            Map<Integer, String> parseCitMatch = parseCitMatch(tuple, i);
            if (parseCitMatch.size() != 0) {
                return enrichDocumentMetadata(builder, parseCitMatch);
            }
            LOGGER.warn("No citation output for the document {}", tuple.get(0));
            return builder;
        } catch (Exception e) {
            LOGGER.warn("No citation output for the document {}", tuple.get(0), e);
            return builder;
        }
    }

    public DocumentProtos.DocumentWrapper.Builder enrichDocumentMetadata(DocumentProtos.DocumentWrapper.Builder builder, Map<Integer, String> map) {
        List<DocumentProtos.ReferenceMetadata.Builder> referenceBuilderList = builder.getDocumentMetadataBuilder().getReferenceBuilderList();
        ArrayList arrayList = new ArrayList();
        for (DocumentProtos.ReferenceMetadata.Builder builder2 : referenceBuilderList) {
            int position = builder2.getPosition();
            if (map.containsKey(Integer.valueOf(position))) {
                DocumentProtos.ReferenceMetadata.Builder newBuilder = DocumentProtos.ReferenceMetadata.newBuilder(builder2.build());
                DocumentProtos.KeyValue.Builder newBuilder2 = DocumentProtos.KeyValue.newBuilder();
                newBuilder2.setKey("coansys/cit-match");
                newBuilder2.setValue(map.get(Integer.valueOf(position)));
                DocumentProtos.ProvenanceInfo.Builder newBuilder3 = DocumentProtos.ProvenanceInfo.newBuilder();
                DocumentProtos.ProvenanceInfo.SingleProvenanceInfo.Builder newBuilder4 = DocumentProtos.ProvenanceInfo.SingleProvenanceInfo.newBuilder();
                newBuilder4.setLastModificationMarkerId("coansys/cit-match");
                newBuilder4.setLastModificationDate(new Date().getTime());
                newBuilder3.setCurrentProvenance(newBuilder4);
                newBuilder2.setProvenance(newBuilder3);
                newBuilder.addExtId(newBuilder2);
                arrayList.add(newBuilder.build());
            } else {
                arrayList.add(builder2.build());
            }
        }
        builder.getDocumentMetadataBuilder().clearReference();
        builder.getDocumentMetadataBuilder().addAllReference(arrayList);
        return builder;
    }

    public Map<Integer, String> parseCitMatch(Tuple tuple, int i) throws IOException {
        HashMap hashMap = new HashMap();
        PICProtos.PicOut picOut = null;
        try {
            picOut = PICProtos.PicOut.parseFrom(((DataByteArray) tuple.get(i)).get());
        } catch (Exception e) {
            LOGGER.warn("no citation for document rowId {}", tuple.get(0), e);
        }
        if (null != picOut) {
            for (PICProtos.Reference reference : picOut.getRefsList()) {
                hashMap.put(Integer.valueOf(reference.getRefNum()), reference.getDocId());
            }
        }
        return hashMap;
    }
}
