package pl.edu.icm.synat.content.authors.impl;

import java.util.Locale;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import pl.edu.icm.synat.content.authors.AuthorParser;
import pl.edu.icm.synat.content.bibmeta.model.PublicationAuthor;
import pl.edu.icm.synat.content.regexp.Node;
import pl.edu.icm.synat.content.regexp.RegexpParser;

/* loaded from: input_file:pl/edu/icm/synat/content/authors/impl/RegexpAuthorParser.class */
public class RegexpAuthorParser implements AuthorParser {
    private static final Logger log = LoggerFactory.getLogger(RegexpAuthorParser.class);
    private final RegexpParser parser;
    private final RegexpParser surnameParser;

    public RegexpAuthorParser(String str) throws Exception {
        this.parser = new RegexpParser(str, "author");
        this.surnameParser = new RegexpParser(str, "surname");
    }

    @Override // pl.edu.icm.synat.content.authors.AuthorParser
    public PublicationAuthor parse(String str) {
        Node parse = this.parser.parse(str);
        if (parse == null) {
            return null;
        }
        if (!"author".equals(parse.getName())) {
            throw new IllegalStateException("Unexpected node");
        }
        PublicationAuthor publicationAuthor = new PublicationAuthor();
        Node firstField = parse.getFirstField("forenames");
        if (firstField != null) {
            publicationAuthor.setForenames(firstField.getValue().replaceAll(" ++", " ").trim());
        }
        Node firstField2 = parse.getFirstField("surname");
        if (firstField2 != null) {
            publicationAuthor.setSurname(firstField2.getValue().replaceAll(" ++", " ").trim());
            Node firstField3 = firstField2.getFirstField("sortKey");
            if (firstField3 != null) {
                publicationAuthor.setSortKey(firstField3.getValue().replaceAll(" ++", " ").trim());
            } else {
                publicationAuthor.setSortKey(firstField2.getValue().replaceAll(" ++", " ").trim());
            }
        }
        return publicationAuthor;
    }

    @Override // pl.edu.icm.synat.content.authors.AuthorParser
    public String getSurnameSortKey(String str) {
        Node parse = this.surnameParser.parse(str);
        if (parse == null) {
            return null;
        }
        if (!"surname".equals(parse.getName())) {
            throw new IllegalStateException("Unexpected node");
        }
        Node firstField = parse.getFirstField("sortKey");
        if (firstField == null) {
            return null;
        }
        return firstField.getValue();
    }

    @Override // pl.edu.icm.synat.content.authors.AuthorParser
    public String normalize(PublicationAuthor publicationAuthor) {
        String forenames = publicationAuthor.getForenames();
        if (forenames == null) {
            forenames = "";
        }
        String lowerCase = forenames.replaceAll("[^\\p{L}]", " ").replaceAll("(\\p{L})\\p{L}++", "$1").replaceAll(" ++", " ").trim().toLowerCase(Locale.ENGLISH);
        return lowerCase + (lowerCase.length() == 0 ? "" : " ") + publicationAuthor.getSurname().replaceAll("[^\\p{L} ]", "").replaceAll(" ++", " ").trim().toLowerCase(Locale.ENGLISH);
    }

    @Override // pl.edu.icm.synat.content.authors.AuthorParser
    public String normalize(String str) {
        PublicationAuthor parse = parse(str);
        return parse == null ? str : normalize(parse);
    }
}
