package pl.edu.icm.coansys.transformers.gsprotoToBW2Proto;

import com.google.protobuf.InvalidProtocolBufferException;
import java.util.Arrays;
import java.util.Iterator;
import java.util.logging.Level;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.lang.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import pl.edu.icm.coansys.models.DocumentProtos;
import pl.edu.icm.coansys.transformers.DocumentBasicMediaMetadataToMetadata;
import pl.edu.icm.generated.protobuf.commoncrawl.ScholarRecordProtos;

/* loaded from: input_file:pl/edu/icm/coansys/transformers/gsprotoToBW2Proto/GsMediaToBw2Metadata.class */
public class GsMediaToBw2Metadata extends DocumentBasicMediaMetadataToMetadata {
    private static final Logger LOGGER = LoggerFactory.getLogger(GsMediaToBw2Metadata.class);

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:pl/edu/icm/coansys/transformers/gsprotoToBW2Proto/GsMediaToBw2Metadata$AuthorData.class */
    public static class AuthorData {
        String docId;
        int num = 1;

        AuthorData() {
        }

        public String getNextAuthorKey() {
            String str = "" + this.num;
            while (true) {
                String str2 = str;
                if (str2.length() >= 3) {
                    this.num++;
                    return this.docId + "#c" + str2;
                }
                str = "0" + str2;
            }
        }
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:pl/edu/icm/coansys/transformers/gsprotoToBW2Proto/GsMediaToBw2Metadata$HWInfo.class */
    public static class HWInfo {
        String hwFirstPage = null;
        String hwLastPage = null;
        String hwPages = null;

        HWInfo() {
        }
    }

    @Override // pl.edu.icm.coansys.transformers.ProtoMediaMetadataToMetadata
    public String getSupportedKey() {
        return "scholar_record";
    }

    boolean areAuthorStringsEqual(String str, String str2) {
        if (str == null) {
            return str2 == null;
        }
        if (str2 == null) {
            return false;
        }
        return str.trim().toLowerCase().equalsIgnoreCase(str2.trim().toLowerCase());
    }

    void addAuthor(String str, AuthorData authorData, DocumentProtos.BasicMetadata.Builder builder) {
        if (str == null) {
            return;
        }
        LOGGER.debug("Testing next author: {}", str);
        String str2 = null;
        String str3 = null;
        if (str.contains(",") && str.split(",").length >= 2) {
            str3 = str.split(",")[0].trim();
            str2 = str.split(",")[1].trim();
        }
        if (str3 != null) {
            for (DocumentProtos.AuthorOrBuilder authorOrBuilder : builder.getAuthorOrBuilderList()) {
                if (authorOrBuilder.hasSurname() && areAuthorStringsEqual(authorOrBuilder.getSurname(), str3) && authorOrBuilder.hasForenames() && areAuthorStringsEqual(authorOrBuilder.getForenames(), str2)) {
                    return;
                }
            }
        } else {
            Iterator it = builder.getAuthorOrBuilderList().iterator();
            while (it.hasNext()) {
                if (areAuthorStringsEqual(str.trim(), ((DocumentProtos.AuthorOrBuilder) it.next()).getName())) {
                    return;
                }
            }
        }
        DocumentProtos.Author.Builder newBuilder = DocumentProtos.Author.newBuilder();
        if (str3 != null) {
            newBuilder.setSurname(str3);
            newBuilder.setForenames(str2);
        }
        newBuilder.setName(str.trim());
        newBuilder.setKey(authorData.getNextAuthorKey());
        builder.addAuthor(newBuilder);
        LOGGER.info("adding author {}", str);
    }

    void addTitle(String str, DocumentProtos.BasicMetadata.Builder builder) {
        boolean z = false;
        for (DocumentProtos.TextWithLanguageOrBuilder textWithLanguageOrBuilder : builder.getTitleOrBuilderList()) {
            if (textWithLanguageOrBuilder.getText() != null && textWithLanguageOrBuilder.getText().equalsIgnoreCase(str)) {
                z = true;
            }
        }
        if (z) {
            return;
        }
        DocumentProtos.TextWithLanguage.Builder newBuilder = DocumentProtos.TextWithLanguage.newBuilder();
        newBuilder.setText(str);
        builder.addTitle(newBuilder);
    }

    boolean processDcHeader(String str, String str2, DocumentProtos.DocumentMetadata.Builder builder, DocumentProtos.BasicMetadata.Builder builder2, AuthorData authorData) {
        if (str.toLowerCase().startsWith("dc.contributor")) {
            addAuthor(str2, authorData, builder2);
            return true;
        }
        if (str.toLowerCase().startsWith("dc.title")) {
            addTitle(str2, builder2);
            return true;
        }
        try {
            if (str.toLowerCase().startsWith("dc.date")) {
                builder2.setYear(getYearFromDate(str2));
                return true;
            }
        } catch (IllegalStateException e) {
            LOGGER.warn("wrong date: {}", str2, e);
        }
        if (!str.toLowerCase().startsWith("dc.subject")) {
            if (str.toLowerCase().startsWith("dc.description")) {
            }
            return false;
        }
        DocumentProtos.KeywordsList.Builder newBuilder = DocumentProtos.KeywordsList.newBuilder();
        for (String str3 : str2.split(",")) {
            newBuilder.addKeywords(str3);
        }
        builder.addKeywords(newBuilder);
        return true;
    }

    public static String getYearFromDate(String str) {
        Matcher matcher = Pattern.compile("\\d{4}").matcher(str);
        matcher.find();
        return matcher.group();
    }

    boolean processHWHeader(String str, String str2, DocumentProtos.DocumentMetadata.Builder builder, DocumentProtos.BasicMetadata.Builder builder2, AuthorData authorData, HWInfo hWInfo) {
        String substring = str.startsWith("bepress_") ? str.substring(8) : str;
        if (substring.equalsIgnoreCase("citation_journal_title")) {
            if (builder2.hasJournal()) {
                return true;
            }
            builder2.setJournal(str2);
            return true;
        }
        if (substring.equalsIgnoreCase("citation_publisher")) {
            return false;
        }
        if (substring.equalsIgnoreCase("citation_authors")) {
            if (str2.contains(";")) {
                for (String str3 : str2.split(";")) {
                    addAuthor(str3, authorData, builder2);
                }
                return true;
            }
            if (str2.contains(",")) {
                addAuthor(str2, authorData, builder2);
                return true;
            }
        }
        if (substring.equalsIgnoreCase("citation_title")) {
            addTitle(str2, builder2);
            return true;
        }
        try {
            if (substring.equalsIgnoreCase("citation_date")) {
                builder2.setYear(getYearFromDate(str2));
                return true;
            }
        } catch (IllegalStateException e) {
            LOGGER.warn("wrong date: {}", str2, e);
        }
        if (substring.equalsIgnoreCase("citation_volume")) {
            builder2.setVolume(str2);
            return true;
        }
        if (substring.equalsIgnoreCase("citation_issue")) {
            builder2.setIssue(str2);
            return true;
        }
        if (substring.equalsIgnoreCase("citation_firstpage")) {
            hWInfo.hwFirstPage = str2;
            return true;
        }
        if (substring.equalsIgnoreCase("citation_lastpage")) {
            hWInfo.hwLastPage = str2;
            return true;
        }
        if (substring.equalsIgnoreCase("citation_pmid")) {
            builder.addExtId(DocumentProtos.KeyValue.newBuilder().setKey("bwmeta1.id-class.PMID").setValue(str2));
            return true;
        }
        if (substring.equalsIgnoreCase("citation_keywords")) {
            return false;
        }
        if (substring.equalsIgnoreCase("citation_doi")) {
            builder2.setDoi(str2);
            return true;
        }
        if (!substring.startsWith("citation")) {
            return false;
        }
        LOGGER.info("Got new unknown atribute: {} value: {}", substring, str2);
        return false;
    }

    private void postprocessHWHeader(DocumentProtos.DocumentMetadata.Builder builder, DocumentProtos.BasicMetadata.Builder builder2, AuthorData authorData, HWInfo hWInfo) {
        if (hWInfo.hwPages != null) {
            builder2.setPages(hWInfo.hwPages);
        } else if (hWInfo.hwFirstPage != null) {
            if (hWInfo.hwLastPage != null) {
                builder2.setPages(hWInfo.hwFirstPage + "-" + hWInfo.hwLastPage);
            } else {
                builder2.setPages(hWInfo.hwFirstPage);
            }
        }
    }

    boolean translateGoogleScholarToDocumentMetadata(ScholarRecordProtos.ScholarRecordP scholarRecordP, DocumentProtos.DocumentMetadata.Builder builder, DocumentProtos.BasicMetadata.Builder builder2, AuthorData authorData) {
        HWInfo hWInfo = new HWInfo();
        boolean z = false;
        for (ScholarRecordProtos.MetaNameP metaNameP : scholarRecordP.getMetaNameList()) {
            if (StringUtils.isNotBlank(metaNameP.getName()) && StringUtils.isNotBlank(metaNameP.getContent())) {
                z = processHWHeader(metaNameP.getName(), metaNameP.getContent(), builder, builder2, authorData, hWInfo) || (processDcHeader(metaNameP.getName(), metaNameP.getContent(), builder, builder2, authorData) || z);
            }
        }
        postprocessHWHeader(builder, builder2, authorData, hWInfo);
        return z;
    }

    @Override // pl.edu.icm.coansys.transformers.DocumentBasicMediaMetadataToMetadata
    public boolean transform(DocumentProtos.Media media, String str, DocumentProtos.DocumentMetadata.Builder builder, DocumentProtos.DocumentWrapper.Builder builder2) {
        byte[] byteArray = media.getContent().toByteArray();
        byte[] copyOf = Arrays.copyOf(byteArray, byteArray.length);
        DocumentProtos.BasicMetadata.Builder newBuilder = DocumentProtos.BasicMetadata.newBuilder();
        try {
            ScholarRecordProtos.ScholarRecordP parseFrom = ScholarRecordProtos.ScholarRecordP.parseFrom(copyOf);
            AuthorData authorData = new AuthorData();
            authorData.docId = str;
            if (!translateGoogleScholarToDocumentMetadata(parseFrom, builder, newBuilder, authorData)) {
                return false;
            }
            builder.setBasicMetadata(newBuilder);
            return true;
        } catch (InvalidProtocolBufferException e) {
            java.util.logging.Logger.getLogger(GsMediaToBw2Metadata.class.getName()).log(Level.SEVERE, (String) null, e);
            return false;
        }
    }
}
