package pl.edu.icm.yadda.imports.mhp;

import java.io.File;
import java.io.FileInputStream;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import java.util.regex.Pattern;
import org.apache.commons.configuration.tree.DefaultExpressionEngine;
import org.apache.poi.hssf.usermodel.HSSFCell;
import org.apache.poi.hssf.usermodel.HSSFRow;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.xerces.impl.xs.SchemaSymbols;
import org.openrdf.http.protocol.transaction.TransactionXMLConstants;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import pl.edu.icm.model.bwmeta.y.constants.NameTypes;
import pl.edu.icm.yadda.common.utils.PositionMatcher;
import pl.edu.icm.yadda.common.utils.Utils;
import pl.edu.icm.yadda.imports.ImportException;
import pl.edu.icm.yadda.imports.export.ImportContext;
import pl.edu.icm.yadda.imports.export.impl.GenericParser;
import pl.edu.icm.yadda.imports.utils.ImportUtils;
import pl.edu.icm.yadda.repo.id.YaddaIdConstants;
import pl.edu.icm.yadda.repo.model.Contributor;
import pl.edu.icm.yadda.repo.model.ContributorConstants;
import pl.edu.icm.yadda.repo.model.Element;
import pl.edu.icm.yadda.repo.model.ElementDate;
import pl.edu.icm.yadda.repo.model.Note;
import pl.edu.icm.yadda.repo.model.builder.AttributableBuilder;
import pl.edu.icm.yadda.repo.model.builder.ContributorBuilder;
import pl.edu.icm.yadda.repo.model.builder.DescriptableBuilder;
import pl.edu.icm.yadda.repo.model.builder.ElementBuilder;
import pl.edu.icm.yadda.tools.textcat.LanguagesIso639_1;

/* loaded from: input_file:WEB-INF/lib/bwmeta-import-1.13.0.jar:pl/edu/icm/yadda/imports/mhp/MhpParser.class */
public class MhpParser extends GenericParser {
    private static final String NO_TITLE_MHP = "[brak]";
    public static final String ATTR_TITLE_NON_EXPLICIT = "title.nonexplicit";
    protected static final String ID_PREFIX = "bwmeta1.element.mhp-";
    protected static final String MHP_PUBLISHER_ID = "bwmeta1.element.mhp-publisher";
    protected static final String ID_TYPE = "mhp";
    private static final Pattern YEAR_PATTERN;
    private static final Pattern BRACKETS_PATTERN;
    String[] headerNames = {SchemaSymbols.ATTVAL_ID, TransactionXMLConstants.PREFIX_ATT, NameTypes.NM_SUFFIX, "aut:Nazwisko", "aut:Imię", "Rozmówca", "translator", "editor", "Spisał", "Redaktor", "Tytuł", "language", "journal", "Rok", "RokPublikacji", "volume", "TytułTomu", "Zeszyt", "Strony", "TypForm", "TypRodz", "Odnośniki", "Liczba autorów", "Uwagi inne", "abstract1", "abstract2", "abstract3", "abstract4"};
    private static final short cellId = 0;
    private static final short cellPrefix = 1;
    private static final short cellSuffix = 2;
    private static final short cellAuthorSurname = 3;
    private static final short cellAuthorName = 4;
    private static final short cellInterlocutor = 5;
    private static final short cellTranslator = 6;
    private static final short cellEditor = 7;
    private static final short cellWriter = 8;
    private static final short cellRedactor = 9;
    private static final short cellTitle = 10;
    private static final short cellLang = 11;
    private static final short cellJournal = 12;
    private static final short cellYear = 13;
    private static final short cellYearOfPubl = 14;
    private static final short cellVolume = 15;
    private static final short cellVolumeTitle = 16;
    private static final short cellNumber = 17;
    private static final short cellPages = 18;
    private static final short cellTypeForm = 19;
    private static final short cellTypeRodz = 20;
    private static final short cellReferences = 21;
    private static final short cellOtherNotices = 23;
    private static final short cellAbstract1 = 24;
    private static final short cellAbstract2 = 25;
    private static final short cellAbstract3 = 26;
    private static final short cellAbstract4 = 27;
    private static final Logger log = LoggerFactory.getLogger(MhpParser.class);
    protected static final String DEF_LANG = LanguagesIso639_1.Polish.getCode();
    private static final Set<String> EXPECTED_LANGUAGES = new HashSet();

    public MhpParser() {
        this.elementIdPrefix = ID_PREFIX;
        this.importFileSuffix = ".xls";
        this.importNamePrefix = "MHP import ";
    }

    @Override // pl.edu.icm.yadda.imports.export.impl.GenericParser
    protected void doParseFile(File file, ImportContext importContext) throws ImportException {
        if (!(importContext instanceof MhpContext)) {
            throw new ImportException("Expected MhpContext but was " + importContext.getClass().getCanonicalName());
        }
        MhpContext mhpContext = (MhpContext) importContext;
        try {
            HSSFWorkbook hSSFWorkbook = new HSSFWorkbook(new POIFSFileSystem(new FileInputStream(file)));
            int i = 0;
            int i2 = 0;
            for (int i3 = 0; i3 < hSSFWorkbook.getNumberOfSheets(); i3++) {
                try {
                    log.info("Parsing sheet #" + i3);
                    i = 0;
                    Iterator rowIterator = hSSFWorkbook.getSheetAt(i3).rowIterator();
                    boolean z = false;
                    if (rowIterator.hasNext()) {
                        z = parseHeaderRow((HSSFRow) rowIterator.next(), mhpContext);
                        i = 0 + 1;
                    }
                    if (z) {
                        while (rowIterator.hasNext()) {
                            if (!parseRow((HSSFRow) rowIterator.next(), mhpContext)) {
                                i2++;
                            }
                            i++;
                            if (i % 1000 == 0) {
                                log.info(i + " rows parsed, " + mhpContext.getArticleCount() + " articles created, " + mhpContext.getObjects().size() + " total elements created");
                            }
                        }
                        log.info("All " + i + " rows parsed (" + i2 + " empty rows), " + mhpContext.getArticleCount() + " articles created, " + mhpContext.getObjects().size() + " total elements created");
                        if (!mhpContext.charErrors.isEmpty()) {
                            Iterator<String> it = mhpContext.charErrors.iterator();
                            while (it.hasNext()) {
                                log.error(it.next());
                            }
                            throw new ImportException("There were character errors in file " + file.getAbsolutePath());
                        }
                        mhpContext.flush();
                    } else {
                        log.info("Sheet #" + i3 + " will not be processed! No header row found");
                    }
                } catch (ImportException e) {
                    throw e;
                } catch (Exception e2) {
                    throw new ImportException("Error occured while parsing excel file " + file.getAbsolutePath() + " (sheet #" + i3 + " row #" + i + DefaultExpressionEngine.DEFAULT_INDEX_END, e2);
                }
            }
        } catch (Exception e3) {
            throw new RuntimeException("IO error while accessing excel file " + file.getAbsolutePath(), e3);
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // pl.edu.icm.yadda.imports.export.impl.GenericParser
    public void beforeParsing(ImportContext importContext) throws ImportException {
        super.beforeParsing(importContext);
        importContext.addObject(createPublisherElement());
    }

    private Element createPublisherElement() {
        Element element = new Element();
        element.setExtId(MHP_PUBLISHER_ID);
        element.setLangs(DEF_LANG);
        DescriptableBuilder.setDefaultName(element, "MHP", DEF_LANG);
        Contributor contributor = new Contributor();
        contributor.setRole("publisher");
        contributor.setTitle("MHP");
        AttributableBuilder.addAttribute(contributor, "institution.name", "MHP");
        element.addContributor(contributor);
        ElementBuilder.addLevel(element, YaddaIdConstants.ID_LEVEL_JOURNAL_PUBLISHER, null);
        return element;
    }

    private boolean parseRow(HSSFRow hSSFRow, MhpContext mhpContext) throws ImportException {
        if (Utils.emptyStr(getCellValue(hSSFRow, (short) 0, mhpContext))) {
            return false;
        }
        String createIdentifyingString = createIdentifyingString(hSSFRow, mhpContext);
        MhpElement article = mhpContext.getArticle(createIdentifyingString);
        if (article == null) {
            mhpContext.addArticle(createIdentifyingString, createElement(hSSFRow, createIdentifyingString, mhpContext));
            return true;
        }
        updateElement(article, hSSFRow, mhpContext);
        return true;
    }

    private void resolveJournalHierarchy(MhpElement mhpElement, HSSFRow hSSFRow, MhpContext mhpContext) throws ImportException {
        String cellValue = getCellValue(hSSFRow, (short) 12, mhpContext);
        String resolveJournalHierarchyElement = resolveJournalHierarchyElement(ImportUtils.identifyingString(cellValue), "bwmeta1.element.mhp-journal-", cellValue, YaddaIdConstants.ID_LEVEL_JOURNAL_JOURNAL, MHP_PUBLISHER_ID, mhpContext, null);
        String cellValue2 = getCellValue(hSSFRow, (short) 13, mhpContext);
        String resolveJournalHierarchyElement2 = resolveJournalHierarchyElement(ImportUtils.identifyingString(cellValue, cellValue2), "bwmeta1.element.mhp-year-", cellValue2, YaddaIdConstants.ID_LEVEL_JOURNAL_YEAR, resolveJournalHierarchyElement, mhpContext, null);
        String cellValue3 = getCellValue(hSSFRow, (short) 15, mhpContext);
        if (!Utils.emptyStr(cellValue3)) {
            resolveJournalHierarchyElement2 = resolveJournalHierarchyElement(ImportUtils.identifyingString(cellValue, cellValue2, cellValue3), "bwmeta1.element.mhp-volume-", cellValue3, YaddaIdConstants.ID_LEVEL_JOURNAL_VOLUME, resolveJournalHierarchyElement2, mhpContext, getCellValue(hSSFRow, (short) 16, mhpContext));
        }
        String cellValue4 = getCellValue(hSSFRow, (short) 17, mhpContext);
        if (!Utils.emptyStr(cellValue4)) {
            resolveJournalHierarchyElement2 = resolveJournalHierarchyElement(ImportUtils.identifyingString(cellValue, cellValue2, cellValue3, cellValue4), "bwmeta1.element.mhp-number-", cellValue4, YaddaIdConstants.ID_LEVEL_JOURNAL_NUMBER, resolveJournalHierarchyElement2, mhpContext, null);
        }
        String cellValue5 = getCellValue(hSSFRow, (short) 18, mhpContext);
        if (Utils.emptyStr(cellValue5)) {
            ElementBuilder.addLevel(mhpElement, YaddaIdConstants.ID_LEVEL_JOURNAL_ARTICLE, resolveJournalHierarchyElement2);
        } else {
            int[] match = PositionMatcher.match(mhpElement.getAttribute("bibliographical.description").getValue());
            if (match != null && match[0] == match[1]) {
                ElementBuilder.addLevelWithPosition(mhpElement, YaddaIdConstants.ID_LEVEL_JOURNAL_ARTICLE, resolveJournalHierarchyElement2, String.valueOf(match[0]));
            } else if (match == null || match.length != 2) {
                ElementBuilder.addLevel(mhpElement, YaddaIdConstants.ID_LEVEL_JOURNAL_ARTICLE, resolveJournalHierarchyElement2);
                log.warn("Unknown pages info format (" + cellValue5 + DefaultExpressionEngine.DEFAULT_INDEX_END);
            } else {
                ElementBuilder.addLevelWithRange(mhpElement, YaddaIdConstants.ID_LEVEL_JOURNAL_ARTICLE, resolveJournalHierarchyElement2, String.valueOf(match[0]), String.valueOf(match[1]));
            }
        }
        mhpElement.setParentExtid(resolveJournalHierarchyElement2);
    }

    private String resolveJournalHierarchyElement(String str, String str2, String str3, String str4, String str5, ImportContext importContext, String str6) throws ImportException {
        String bwmetaIdForString = importContext.getBwmetaIdForString(str);
        if (bwmetaIdForString != null) {
            return bwmetaIdForString;
        }
        Element createHigherLevelElement = createHigherLevelElement(str, ID_TYPE, str2, str3, DEF_LANG, str4, str5);
        if (!Utils.emptyStr(str6)) {
            AttributableBuilder.addAttribute(createHigherLevelElement, "volume.title", str6);
        }
        importContext.mapStringToBwmetaId(str, createHigherLevelElement.getExtId());
        importContext.addObject(createHigherLevelElement);
        return createHigherLevelElement.getExtId();
    }

    private String createIdentifyingString(HSSFRow hSSFRow, MhpContext mhpContext) {
        return ImportUtils.identifyingString(getCellValue(hSSFRow, (short) 12, mhpContext), getCellValue(hSSFRow, (short) 13, mhpContext), getCellValue(hSSFRow, (short) 15, mhpContext), getCellValue(hSSFRow, (short) 17, mhpContext), getCellValue(hSSFRow, (short) 18, mhpContext), getCellValue(hSSFRow, (short) 10, mhpContext));
    }

    private MhpElement createElement(HSSFRow hSSFRow, String str, MhpContext mhpContext) throws ImportException {
        MhpElement mhpElement = new MhpElement();
        mhpElement.setExtId(findBwmetaId(str, ID_TYPE, "bwmeta1.element.mhp-article-"));
        String cellValue = getCellValue(hSSFRow, (short) 11, mhpContext);
        if (Utils.emptyStr(cellValue)) {
            cellValue = DEF_LANG;
        } else if (!EXPECTED_LANGUAGES.contains(cellValue)) {
            throw new ImportException("Unknown language '" + cellValue + "'");
        }
        mhpElement.setLangs(cellValue);
        String cellValue2 = getCellValue(hSSFRow, (short) 10, mhpContext);
        if (Utils.emptyStr(cellValue2)) {
            throw new ImportException("Empty title");
        }
        if (NO_TITLE_MHP.equals(cellValue2)) {
            cellValue2 = "";
        } else {
            String removeBrackets = removeBrackets(cellValue2);
            if (!removeBrackets.equals(cellValue2)) {
                AttributableBuilder.addAttribute(mhpElement, "title.nonexplicit", "true");
                cellValue2 = removeBrackets;
            }
        }
        DescriptableBuilder.setDefaultName(mhpElement, cellValue2, cellValue);
        addContributors(mhpElement, hSSFRow, mhpContext);
        resolveJournalHierarchy(mhpElement, hSSFRow, mhpContext);
        addEntityLevel(mhpElement, mhpContext);
        addDates(mhpElement, getCellValue(hSSFRow, (short) 13, mhpContext), getCellValue(hSSFRow, (short) 14, mhpContext));
        addAttribute(mhpElement, "mhp.reference", getCellValue(hSSFRow, (short) 21, mhpContext));
        String cellValue3 = getCellValue(hSSFRow, (short) 23, mhpContext);
        if (!Utils.emptyStr(cellValue3)) {
            Note note = new Note();
            note.setLang(cellValue);
            note.setText(cellValue3);
            mhpElement.addNote(note);
        }
        addAttribute(mhpElement, "bibliographical.description", getCellValue(hSSFRow, (short) 18, mhpContext));
        addAttribute(mhpElement, "mhp.typ.form", getCellValue(hSSFRow, (short) 19, mhpContext));
        addAttribute(mhpElement, "mhp.typ.rodz", getCellValue(hSSFRow, (short) 20, mhpContext));
        String[] strArr = {getCellValue(hSSFRow, (short) 24, mhpContext), getCellValue(hSSFRow, (short) 25, mhpContext), getCellValue(hSSFRow, (short) 26, mhpContext), getCellValue(hSSFRow, (short) 27, mhpContext)};
        boolean z = true;
        for (int i = 0; i < strArr.length; i++) {
            if (!Utils.emptyStr(strArr[i])) {
                if (z) {
                    z = false;
                    DescriptableBuilder.setDefaultDescription(mhpElement, strArr[i], cellValue);
                } else {
                    DescriptableBuilder.addDescription(mhpElement, strArr[i], cellValue);
                }
            }
        }
        return mhpElement;
    }

    private void updateElement(MhpElement mhpElement, HSSFRow hSSFRow, MhpContext mhpContext) {
        addContributors(mhpElement, hSSFRow, mhpContext);
    }

    private void addContributors(MhpElement mhpElement, HSSFRow hSSFRow, MhpContext mhpContext) {
        addAuthor(mhpElement, hSSFRow, mhpContext);
        addContributors(mhpElement, getCellValue(hSSFRow, (short) 6, mhpContext), "translator", null);
        addContributors(mhpElement, getCellValue(hSSFRow, (short) 7, mhpContext), "editor", null);
        addContributors(mhpElement, getCellValue(hSSFRow, (short) 5, mhpContext), ContributorConstants.ROLE_INTERLOCUTOR, null);
        addContributors(mhpElement, getCellValue(hSSFRow, (short) 9, mhpContext), ContributorConstants.ROLE_REDACTOR, null);
        addContributors(mhpElement, getCellValue(hSSFRow, (short) 8, mhpContext), "other", "spisał");
    }

    private void addAuthor(MhpElement mhpElement, HSSFRow hSSFRow, MhpContext mhpContext) {
        String cellValue = getCellValue(hSSFRow, (short) 1, mhpContext);
        String cellValue2 = getCellValue(hSSFRow, (short) 2, mhpContext);
        String cellValue3 = getCellValue(hSSFRow, (short) 4, mhpContext);
        String cellValue4 = getCellValue(hSSFRow, (short) 3, mhpContext);
        if (Utils.emptyStrings(cellValue, cellValue2, cellValue3, cellValue4)) {
            return;
        }
        if (Utils.emptyStrings(cellValue3, cellValue4)) {
            log.warn("Author has empty firstname and surname but not prefix or suffix (prefix='" + cellValue + "' suffix='" + cellValue2 + "')");
            return;
        }
        StringBuffer stringBuffer = new StringBuffer();
        if (!Utils.emptyStr(cellValue)) {
            stringBuffer.append(cellValue).append(' ');
        }
        if (!Utils.emptyStr(cellValue3)) {
            stringBuffer.append(cellValue3).append(' ');
        }
        if (!Utils.emptyStr(cellValue4)) {
            stringBuffer.append(cellValue4).append(' ');
        }
        if (!Utils.emptyStr(cellValue2)) {
            stringBuffer.append(cellValue2).append(' ');
        }
        addContributor(mhpElement, "author", stringBuffer.toString().trim(), cellValue3, cellValue4, null);
    }

    private void addContributors(MhpElement mhpElement, String str, String str2, String str3) {
        if (Utils.emptyStr(str)) {
            return;
        }
        for (String str4 : str.split(";")) {
            String[] split = str4.split(",");
            if (split.length == 1) {
                split = split[0].split(" ");
            }
            if (split.length != 2) {
                throw new RuntimeException("Invalid contributor format (" + str4 + DefaultExpressionEngine.DEFAULT_INDEX_END);
            }
            String removeBrackets = removeBrackets(split[0].trim());
            String removeBrackets2 = removeBrackets(split[1].trim());
            if (Utils.emptyStr(removeBrackets)) {
                throw new RuntimeException("Empty contributor's surname (" + str4 + DefaultExpressionEngine.DEFAULT_INDEX_END);
            }
            if (Utils.emptyStr(removeBrackets2)) {
                throw new RuntimeException("Empty contributor's names (" + str4 + DefaultExpressionEngine.DEFAULT_INDEX_END);
            }
            addContributor(mhpElement, str2, (removeBrackets2 + " " + removeBrackets).trim(), removeBrackets2, removeBrackets, str3);
        }
    }

    private void addContributor(MhpElement mhpElement, String str, String str2, String str3, String str4, String str5) {
        ContributorBuilder contributorBuilder = new ContributorBuilder();
        contributorBuilder.setRole(str);
        contributorBuilder.setTitle(str2);
        if (!Utils.emptyStr(str3)) {
            contributorBuilder.setFirstName(str3);
        }
        if (!Utils.emptyStr(str4)) {
            contributorBuilder.setLastName(str4);
        }
        if (str5 != null) {
            contributorBuilder.addAttribute("original-role", str5);
        }
        mhpElement.addContributorIfNotAdded(contributorBuilder.build());
    }

    private void addDates(MhpElement mhpElement, String str, String str2) {
        if (Utils.emptyStr(str) && Utils.emptyStr(str2)) {
            return;
        }
        if (!Utils.emptyStr(str) && !YEAR_PATTERN.matcher(str).matches()) {
            throw new RuntimeException("Invalid published year format (" + str + DefaultExpressionEngine.DEFAULT_INDEX_END);
        }
        if (!Utils.emptyStr(str2) && !YEAR_PATTERN.matcher(str2).matches()) {
            throw new RuntimeException("Invalid published year format (" + str + DefaultExpressionEngine.DEFAULT_INDEX_END);
        }
        addDate(mhpElement, str2, "published");
        if (str == null || str.equals(str2)) {
            return;
        }
        addDate(mhpElement, str, "issued");
    }

    private void addDate(MhpElement mhpElement, String str, String str2) {
        if (Utils.emptyStr(str)) {
            return;
        }
        ElementDate elementDate = new ElementDate();
        elementDate.setText(str);
        elementDate.setType(str2);
        mhpElement.addDate(elementDate);
    }

    private void addAttribute(MhpElement mhpElement, String str, String str2) {
        if (Utils.emptyStr(str2)) {
            return;
        }
        AttributableBuilder.addAttribute(mhpElement, str, str2);
    }

    private String getCellValue(HSSFRow hSSFRow, short s, MhpContext mhpContext) {
        HSSFCell cell = hSSFRow.getCell(s);
        if (cell == null) {
            return null;
        }
        switch (cell.getCellType()) {
            case 0:
                String valueOf = String.valueOf(cell.getNumericCellValue());
                return valueOf.endsWith(".0") ? valueOf.substring(0, valueOf.length() - 2) : valueOf;
            case 1:
                String trim = cell.getStringCellValue().trim();
                for (int i = 0; i < trim.length(); i++) {
                    char charAt = trim.charAt(i);
                    if (!Character.isLetter(charAt) && !Character.isWhitespace(charAt) && !Character.isDigit(charAt) && charAt != ',' && charAt != '.' && charAt != '?' && charAt != '\'' && charAt != '\"' && charAt != '-' && charAt != ':' && charAt != '/' && charAt != '<' && charAt != '>' && charAt != '|' && charAt != '(' && charAt != ')' && charAt != '!' && charAt != '=' && charAt != '_' && charAt != '{' && charAt != '}' && charAt != '[' && charAt != ']' && charAt != '\\' && charAt != ';' && charAt != '+' && charAt != '*' && charAt != 8220 && charAt != '&' && charAt != '@' && charAt != '#' && charAt != '`' && charAt != 8217 && charAt != 8230 && charAt != 8224 && charAt != 176 && charAt != 171 && charAt != 187 && charAt != 8594 && charAt != 8804 && charAt != 8805) {
                        mhpContext.charErrors.add("Unknown character (" + charAt + "), [row=" + hSSFRow.getRowNum() + " cell index=" + ((int) s) + "]");
                    }
                }
                return trim;
            case 2:
            default:
                throw new RuntimeException("Unknown cell type (" + cell.getCellType() + DefaultExpressionEngine.DEFAULT_INDEX_END);
            case 3:
                return null;
            case 4:
                return String.valueOf(cell.getBooleanCellValue());
        }
    }

    public static String removeBrackets(String str) {
        return BRACKETS_PATTERN.matcher(str).replaceAll("$1");
    }

    private boolean parseHeaderRow(HSSFRow hSSFRow, MhpContext mhpContext) {
        if (!this.headerNames[0].equals(getCellValue(hSSFRow, (short) 0, mhpContext))) {
            return false;
        }
        short s = 0;
        while (true) {
            short s2 = s;
            if (s2 >= this.headerNames.length) {
                log.info("Header OK!");
                return true;
            }
            String cellValue = getCellValue(hSSFRow, s2, mhpContext);
            if (!this.headerNames[s2].equals(cellValue)) {
                throw new RuntimeException("Invalid header column name (expected=[" + this.headerNames[s2] + "] but was [" + cellValue + "]");
            }
            s = (short) (s2 + 1);
        }
    }

    static {
        EXPECTED_LANGUAGES.add(LanguagesIso639_1.English.getCode());
        EXPECTED_LANGUAGES.add(LanguagesIso639_1.Polish.getCode());
        YEAR_PATTERN = Pattern.compile("[12]\\d{3}(-[12]\\d{3})?");
        BRACKETS_PATTERN = Pattern.compile("\\[([^\\[\\]]*)]");
    }
}
