package pl.edu.icm.yadda.imports.oldspringer;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.FilenameUtils;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.StringUtils;
import org.codehaus.groovy.tools.shell.util.ANSI;
import pl.edu.icm.yadda.bwmeta.model.YConstants;
import pl.edu.icm.yadda.desklight.model.Address;
import pl.edu.icm.yadda.desklight.model.Affiliation;
import pl.edu.icm.yadda.desklight.model.AttributeNode;
import pl.edu.icm.yadda.desklight.model.AttributedDate;
import pl.edu.icm.yadda.desklight.model.Content;
import pl.edu.icm.yadda.desklight.model.ContentFile;
import pl.edu.icm.yadda.desklight.model.Contributor;
import pl.edu.icm.yadda.desklight.model.Element;
import pl.edu.icm.yadda.desklight.model.ElementLevel;
import pl.edu.icm.yadda.desklight.model.Identified;
import pl.edu.icm.yadda.desklight.model.Identifier;
import pl.edu.icm.yadda.desklight.model.Institution;
import pl.edu.icm.yadda.desklight.model.KeywordSet;
import pl.edu.icm.yadda.desklight.model.LocalizedString;
import pl.edu.icm.yadda.desklight.serialization.BwmetaSerializer105;
import pl.edu.icm.yadda.imports.oldspringer.model.XAbstract;
import pl.edu.icm.yadda.imports.oldspringer.model.XAddress;
import pl.edu.icm.yadda.imports.oldspringer.model.XAffiliation;
import pl.edu.icm.yadda.imports.oldspringer.model.XArticle;
import pl.edu.icm.yadda.imports.oldspringer.model.XAuthor;
import pl.edu.icm.yadda.imports.oldspringer.model.XAuthorGroup;
import pl.edu.icm.yadda.imports.oldspringer.model.XConference;
import pl.edu.icm.yadda.imports.oldspringer.model.XHistoryEntry;
import pl.edu.icm.yadda.imports.oldspringer.model.XIssue;
import pl.edu.icm.yadda.imports.oldspringer.model.XKeywordGroup;
import pl.edu.icm.yadda.imports.oldspringer.model.XTitle;
import pl.edu.icm.yadda.imports.virlib.Languages;
import pl.edu.icm.yadda.process.license.LicenseConstants;
import pl.edu.icm.yadda.repo.id.YaddaIdConstants;
import pl.edu.icm.yadda.repo.model.AttributeConstants;
import pl.edu.icm.yadda.repo.model.LocationConstants;
import pl.edu.icm.yadda.tools.textcat.LanguageIdentifierBean;

/* loaded from: input_file:WEB-INF/lib/bwmeta-import-1.10.3-SNAPSHOT.jar:pl/edu/icm/yadda/imports/oldspringer/OldSpringerConverter.class */
public class OldSpringerConverter {
    LanguageIdentifierBean languageIdentifier;
    Pattern keywordPattern = Pattern.compile("(?:Key\\s*words?(?:\\sor\\sphrases)?[\\.:]?)?\\s*((?:[^;,–]+)(?:[;,–]\\s*(?:[^;,–]+))*)\\.?");
    Pattern pacsPattern = Pattern.compile("(?m)(?s)(\\w\\w)[^\\w\\.\\+\\-]*\\.[^\\w\\.\\+\\-]*(\\w\\w)[^\\w\\.\\+\\-]*(?:\\.\\W*([\\w\\+\\-]\\w))?([^\\.].*)?");
    Pattern jelPattern = Pattern.compile("\\W*(\\w)\\s*(\\d\\d?)?");
    BwmetaSerializer105 bwmetaSerializer = new BwmetaSerializer105();
    protected Set<String> publishers = new TreeSet();
    protected Set<String> roles = new TreeSet();
    protected Set<String> articleCategories = new TreeSet();
    protected Set<String> abstractHeaders = new TreeSet();
    protected Set<String> keywordClasses = new TreeSet();
    protected Set<String> unknownKeywords = new TreeSet();
    protected Set<String> dateTypes = new TreeSet();
    protected Set<String> amsCategories = new TreeSet();
    protected Set<String> pacsCategories = new TreeSet();
    protected Set<String> jelCategories = new TreeSet();
    protected Set<String> storedContents = new TreeSet();
    protected Map<String, String> existingAtricles = new HashMap();
    protected int counter = 0;

    public OldSpringerConverter() {
        try {
            this.languageIdentifier = new LanguageIdentifierBean();
        } catch (Exception e) {
            e.printStackTrace();
            throw new RuntimeException(e.getMessage());
        }
    }

    protected void storeElement(Identified identified, ZipOutputStream zipOutputStream) throws IOException {
        byte[] bytes = this.bwmetaSerializer.toString(identified.getExtId(), identified).getBytes("UTF8");
        zipOutputStream.putNextEntry(new ZipEntry("fragments/element/" + identified.getExtId().substring(YConstants.EXT_PREFIX_ELEMENT.length()) + ".xml"));
        zipOutputStream.write(bytes);
    }

    protected void storeContent(String str, String str2, ZipOutputStream zipOutputStream) throws IOException {
        if (this.storedContents.contains(str)) {
            System.out.println("Content " + str + "has been already stored.");
            return;
        }
        this.storedContents.add(str);
        zipOutputStream.putNextEntry(new ZipEntry(str2));
        FileInputStream fileInputStream = new FileInputStream(str);
        IOUtils.copy(fileInputStream, zipOutputStream);
        fileInputStream.close();
    }

    protected void parseDirectory(String str) throws Exception {
        String str2;
        String str3;
        File file = new File(str);
        File file2 = new File(file.getParentFile(), file.getName() + ".zip");
        if (file2.exists()) {
            System.out.println("Package exists");
            return;
        }
        FileOutputStream fileOutputStream = new FileOutputStream(file2);
        ZipOutputStream zipOutputStream = new ZipOutputStream(fileOutputStream);
        try {
            HashMap hashMap = new HashMap();
            OldSpringerParser oldSpringerParser = new OldSpringerParser();
            for (File file3 : FileUtils.listFiles(file, new String[]{"xml"}, true)) {
                try {
                    String substring = file3.getCanonicalPath().substring(file.getParentFile().getCanonicalPath().length());
                    XArticle parse = oldSpringerParser.parse(new FileInputStream(file3));
                    this.publishers.add(parse.getIssue().getPublishers().getName());
                    String buildId = buildId("bwmeta1.element.publisher-custom-", parse.getIssue().getPublishers().getName());
                    String buildId2 = buildId("bwmeta1.institution.publisherinst-custom-", parse.getIssue().getPublishers().getName());
                    if (!hashMap.containsKey(buildId)) {
                        Institution buildPublisherInstitution = buildPublisherInstitution(buildId2, parse);
                        hashMap.put(buildId2, buildPublisherInstitution);
                        storeElement(buildPublisherInstitution, zipOutputStream);
                        Element buildPublisher = buildPublisher(buildId, buildId2, parse, file3.toString());
                        hashMap.put(buildId, buildPublisher);
                        storeElement(buildPublisher, zipOutputStream);
                    }
                    String str4 = "bwmeta1.element.springer-journal-" + parse.getIssue().getJournal().getId();
                    if (!hashMap.containsKey(str4)) {
                        Element buildJournal = buildJournal(str4, buildId, parse.getIssue());
                        buildJournal.getIdentifiers().add(new Identifier(YaddaIdConstants.IDENTIFIER_CLASS_SPRINGERIMPORT, file3.toString()));
                        hashMap.put(str4, buildJournal);
                        storeElement(buildJournal, zipOutputStream);
                    }
                    if (parse.getIssue().getPrintCoverDate() == null) {
                        str2 = "????";
                        str3 = "bwmeta1.element.springer-journal-" + parse.getIssue().getJournal().getId() + "-xxxx";
                        System.out.println("No print cover date for " + file3.toString());
                    } else {
                        try {
                            str2 = String.valueOf(1900 + parse.getIssue().getPrintCoverDate().getDate().getYear());
                            str3 = "bwmeta1.element.springer-journal-" + parse.getIssue().getJournal().getId() + "-" + str2;
                        } catch (Exception e) {
                            str2 = "????";
                            str3 = "bwmeta1.element.springer-journal-" + parse.getIssue().getJournal().getId() + "-xxxx";
                            System.out.println("Invalid cover date for " + file3.toString());
                            e.printStackTrace();
                        }
                    }
                    if (!hashMap.containsKey(str3)) {
                        Element buildYear = buildYear(str3, str4, parse.getIssue(), str2);
                        buildYear.getIdentifiers().add(new Identifier(YaddaIdConstants.IDENTIFIER_CLASS_SPRINGERIMPORT, file3.toString()));
                        hashMap.put(str3, buildYear);
                        storeElement(buildYear, zipOutputStream);
                    }
                    String str5 = str3;
                    String str6 = YaddaIdConstants.ID_LEVEL_JOURNAL_YEAR;
                    if (parse.getIssue().getSeries() != null) {
                        String str7 = "bwmeta1.element.springer-series-" + parse.getIssue().getSeries().getId();
                        if (!hashMap.containsKey(str7)) {
                            Element buildSeries = buildSeries(str7, str4, parse.getIssue());
                            buildSeries.getIdentifiers().add(new Identifier(YaddaIdConstants.IDENTIFIER_CLASS_SPRINGERIMPORT, file3.toString()));
                            hashMap.put(str7, buildSeries);
                            storeElement(buildSeries, zipOutputStream);
                        }
                        str5 = str7;
                        str6 = "bwmeta1.level.hierarchy_Journal_Series";
                    }
                    if (parse.getIssue().getVolumeId() != null) {
                        String str8 = "bwmeta1.element.springer-volume-" + parse.getIssue().getJournal().getId() + "-" + parse.getIssue().getVolumeId();
                        if (!hashMap.containsKey(str8)) {
                            Element buildVolume = buildVolume(str8, str5, str6, parse.getIssue());
                            buildVolume.getIdentifiers().add(new Identifier(YaddaIdConstants.IDENTIFIER_CLASS_SPRINGERIMPORT, file3.toString()));
                            hashMap.put(str8, buildVolume);
                            storeElement(buildVolume, zipOutputStream);
                        }
                        str5 = str8;
                        str6 = YaddaIdConstants.ID_LEVEL_JOURNAL_VOLUME;
                    }
                    if (parse.getIssue().getId() != null) {
                        String str9 = "bwmeta1.element.springer-issue-" + parse.getIssue().getJournal().getId() + "-" + parse.getIssue().getVolumeId() + "-" + parse.getIssue().getId();
                        if (!hashMap.containsKey(str9)) {
                            Element buildIssue = buildIssue(str9, str5, str6, parse.getIssue());
                            buildIssue.getIdentifiers().add(new Identifier(YaddaIdConstants.IDENTIFIER_CLASS_SPRINGERIMPORT, file3.toString()));
                            hashMap.put(str9, buildIssue);
                            storeElement(buildIssue, zipOutputStream);
                        }
                        str5 = str9;
                        str6 = YaddaIdConstants.ID_LEVEL_JOURNAL_NUMBER;
                    }
                    Element buildArticle = buildArticle("bwmeta1.element.springer-article-" + parse.getIssue().getJournal().getId() + "-" + parse.getIssue().getVolumeId() + "-" + parse.getIssue().getId() + "-" + parse.getId(), str5, str6, parse, file3, zipOutputStream, substring);
                    if (this.existingAtricles.containsKey(buildArticle.getExtId())) {
                        System.out.println("Duplicate files: ");
                        System.out.println(this.existingAtricles.get(buildArticle.getExtId()));
                        System.out.println(file3.toString());
                    } else {
                        this.existingAtricles.put(buildArticle.getExtId(), file3.toString());
                        storeElement(buildArticle, zipOutputStream);
                    }
                } catch (Exception e2) {
                    System.out.append((CharSequence) ("Exception processing " + file3));
                    throw e2;
                }
            }
            this.existingAtricles.clear();
            zipOutputStream.close();
            fileOutputStream.close();
        } catch (Exception e3) {
            file2.delete();
            throw e3;
        }
    }

    protected String buildId(String str, String str2) {
        StringBuffer stringBuffer = new StringBuffer();
        stringBuffer.append(str);
        for (char c : str2.toCharArray()) {
            if ((c >= '0' && c <= '9') || ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))) {
                stringBuffer.append(Character.toLowerCase(c));
            } else if (Character.isSpaceChar(c)) {
                stringBuffer.append("-");
            }
        }
        return stringBuffer.toString();
    }

    protected Institution buildPublisherInstitution(String str, XArticle xArticle) {
        Institution institution = new Institution();
        institution.setExtId(str);
        institution.setName(Languages.LG_ENGLISH, xArticle.getIssue().getPublishers().getName());
        for (String str2 : xArticle.getIssue().getPublishers().getLocations()) {
            Address address = new Address();
            address.setText(str2);
            institution.getAddresses().add(address);
        }
        return institution;
    }

    protected Element buildPublisher(String str, String str2, XArticle xArticle, String str3) {
        Element element = new Element();
        element.setExtId(str);
        element.setName(Languages.LG_ENGLISH, xArticle.getIssue().getPublishers().getName());
        Contributor contributor = new Contributor();
        contributor.setPersonalityType(Contributor.ContributorType.INSTITUTION);
        contributor.setRole("publisher");
        contributor.setText(xArticle.getIssue().getPublishers().getName());
        contributor.setIndex("0");
        contributor.setExtId(str2);
        element.addContributor(contributor);
        element.addLevel(new ElementLevel(null, YaddaIdConstants.ID_LEVEL_JOURNAL_PUBLISHER, "bwmeta1.hierarchy-class.hierarchy_Journal"));
        element.getIdentifiers().add(new Identifier(YaddaIdConstants.IDENTIFIER_CLASS_SPRINGERIMPORT, str3.toString()));
        return element;
    }

    protected Element buildJournal(String str, String str2, XIssue xIssue) {
        Element element = new Element();
        element.setExtId(str);
        element.setName("**", xIssue.getJournal().getTitle());
        if (xIssue.getJournal().getSubtitle() != null) {
            element.setDescription("**", xIssue.getJournal().getSubtitle());
        }
        element.getIdentifiers().add(new Identifier("bwmeta1.id-class.ISSN", xIssue.getJournal().getIssn()));
        if (xIssue.getJournal().getCoden() != null) {
            element.getIdentifiers().add(new Identifier(YaddaIdConstants.IDENTIFIER_CLASS_CODEN, xIssue.getJournal().getIssn()));
        }
        if (xIssue.getJournal().getAlternativeTitle() != null) {
            element.getAttributes().add(new AttributeNode(AttributeConstants.ADDITIONAL_TITLE, xIssue.getJournal().getAlternativeTitle()));
        }
        if (xIssue.getJournal().getAbbreviation() != null) {
            element.getAttributes().add(new AttributeNode(AttributeConstants.TITLE_ABBREVIATION, xIssue.getJournal().getAbbreviation()));
        }
        element.addLevel(new ElementLevel(str2, YaddaIdConstants.ID_LEVEL_JOURNAL_JOURNAL, "bwmeta1.hierarchy-class.hierarchy_Journal"));
        return element;
    }

    protected Element buildYear(String str, String str2, XIssue xIssue, String str3) throws ParseException {
        Element element = new Element();
        element.setExtId(str);
        element.setName("**", str3);
        element.addLevel(new ElementLevel(str2, YaddaIdConstants.ID_LEVEL_JOURNAL_YEAR, "bwmeta1.hierarchy-class.hierarchy_Journal"));
        return element;
    }

    protected Element buildSeries(String str, String str2, XIssue xIssue) {
        Element element = new Element();
        element.setExtId(str);
        element.setName("**", String.valueOf(xIssue.getSeries().getTitle()));
        element.addLevel(new ElementLevel(str2, YaddaIdConstants.ID_LEVEL_JOURNAL_YEAR, "bwmeta1.hierarchy-class.hierarchy_Journal"));
        if (xIssue.getJournal().getSubtitle() != null) {
            element.setDescription("**", xIssue.getSeries().getSubtitle());
        }
        if (xIssue.getJournal().getAlternativeTitle() != null) {
            element.getAttributes().add(new AttributeNode(AttributeConstants.ADDITIONAL_TITLE, xIssue.getSeries().getAlternativeTitle()));
        }
        if (xIssue.getJournal().getAbbreviation() != null) {
            element.getAttributes().add(new AttributeNode(AttributeConstants.TITLE_ABBREVIATION, xIssue.getSeries().getAbbreviation()));
        }
        return element;
    }

    protected Element buildVolume(String str, String str2, String str3, XIssue xIssue) {
        Element element = new Element();
        element.setExtId(str);
        element.setName("**", xIssue.getVolumeId());
        element.addLevel(new ElementLevel(str2, str3, "bwmeta1.hierarchy-class.hierarchy_Journal"));
        return element;
    }

    protected Element buildIssue(String str, String str2, String str3, XIssue xIssue) {
        Element element = new Element();
        element.setExtId(str);
        element.setName("**", xIssue.getId());
        element.addLevel(new ElementLevel(str2, str3, "bwmeta1.hierarchy-class.hierarchy_Journal"));
        if (xIssue.getPrintCoverDate() != null) {
            element.getDates().add(new AttributedDate("printed", xIssue.getPrintCoverDate()));
        }
        if (xIssue.getOnlineCoverDate() != null) {
            element.getDates().add(new AttributedDate("published", xIssue.getOnlineCoverDate()));
        }
        if (xIssue.getType() != null) {
            element.getAttributes().add(new AttributeNode(AttributeConstants.SPRINGER_VOLUME_TYPE, xIssue.getType().toString()));
        }
        return element;
    }

    protected Element buildArticle(String str, String str2, String str3, XArticle xArticle, File file, ZipOutputStream zipOutputStream, String str4) throws IOException {
        String str5;
        String str6;
        Element element = new Element();
        element.setExtId(str);
        for (XTitle xTitle : xArticle.getTitles()) {
            if (xTitle.getPurpose() == XTitle.Purpose.NORMAL) {
                element.setName(safeLang(xTitle.getLanguage()), xTitle.getMainTitle());
                if (xTitle.getSubtitle() != null) {
                    element.setAttribute(AttributeConstants.SPRINGER_SUBTITLE_PREFIX + xTitle.getLanguage(), xTitle.getSubtitle());
                }
            }
        }
        ElementLevel elementLevel = new ElementLevel(str2, YaddaIdConstants.ID_LEVEL_JOURNAL_ARTICLE, "bwmeta1.hierarchy-class.hierarchy_Journal");
        if (xArticle.getFirstPage() == null || !xArticle.getFirstPage().equals(xArticle.getLastPage())) {
            elementLevel.setRangeFrom(xArticle.getFirstPage());
            elementLevel.setRangeTo(xArticle.getLastPage());
        } else {
            elementLevel.setPosition(xArticle.getFirstPage());
        }
        element.addLevel(elementLevel);
        for (XHistoryEntry xHistoryEntry : xArticle.getHistory()) {
            switch (xHistoryEntry.getType()) {
                case ACCEPTED:
                    str6 = "accepted";
                    break;
                case RECEIVED:
                    str6 = "received";
                    break;
                case REVISED:
                    str6 = "approved";
                    break;
                default:
                    str6 = "unknown";
                    break;
            }
            if (xHistoryEntry.getCustomDescription() != null) {
                if (xHistoryEntry.getCustomDescription().equals("Electronic Publication")) {
                    str6 = "published";
                } else {
                    this.dateTypes.add(xHistoryEntry.getCustomDescription());
                }
            }
            element.getDates().add(new AttributedDate(str6, xHistoryEntry.getDate()));
        }
        for (XAuthorGroup xAuthorGroup : xArticle.getAuthorGroups()) {
            for (XAuthor xAuthor : xAuthorGroup.getAuthors()) {
                Contributor contributor = new Contributor();
                contributor.setPersonalityType(Contributor.ContributorType.PERSON);
                contributor.setRole("author");
                String str7 = "";
                for (String str8 : xAuthor.getDegrees()) {
                    contributor.setAttribute("person.degree", str8);
                    str7 = str7 + str8 + ANSI.Renderer.CODE_TEXT_SEPARATOR;
                }
                for (String str9 : xAuthor.getFirstnames()) {
                    contributor.setAttribute("person.firstname", str9);
                    str7 = str7 + str9 + ANSI.Renderer.CODE_TEXT_SEPARATOR;
                }
                for (String str10 : xAuthor.getSurnames()) {
                    contributor.setAttribute("person.surname", str10);
                    str7 = str7 + str10 + ANSI.Renderer.CODE_TEXT_SEPARATOR;
                }
                Iterator<String> it = xAuthor.getOther().iterator();
                while (it.hasNext()) {
                    contributor.setAttribute(AttributeConstants.CONTRIBUTOR_PERSON_OTHER, it.next());
                }
                if (!xAuthor.getRoles().isEmpty()) {
                    Iterator<String> it2 = xAuthor.getRoles().iterator();
                    while (it2.hasNext()) {
                        this.roles.add(it2.next());
                    }
                }
                String trim = str7.trim();
                if (xAuthor.getText() == null && !StringUtils.isEmpty(trim)) {
                    contributor.setText(trim);
                } else if (xAuthor.getText() == null) {
                    contributor.setText("[unknown author]");
                    System.out.println(str4 + " Author text is null");
                } else {
                    contributor.setText(xAuthor.getText());
                }
                if (xAuthor.getOrganizationRef() != null) {
                    XAffiliation xAffiliation = xAuthorGroup.getAffiliations().get(xAuthor.getOrganizationRef());
                    if (xAffiliation == null) {
                        throw new IllegalStateException();
                    }
                    String str11 = "";
                    Affiliation affiliation = new Affiliation();
                    if (xAffiliation.getOrganizationName() != null) {
                        str11 = str11 + ANSI.Renderer.CODE_TEXT_SEPARATOR + xAffiliation.getOrganizationName();
                        affiliation.getAttributes().add(new AttributeNode(AttributeConstants.AFFILIATION_DETAILS_INSTITUTION, xAffiliation.getOrganizationName()));
                    }
                    if (xAffiliation.getOrganizationRef() != null) {
                        affiliation.getAttributes().add(new AttributeNode(AttributeConstants.AFFILIATION_DETAILS_INSTITUTION_REF, xAffiliation.getOrganizationRef()));
                    }
                    if (xAffiliation.getOrganizationDivision() != null) {
                        str11 = str11 + ANSI.Renderer.CODE_TEXT_SEPARATOR + xAffiliation.getOrganizationDivision();
                        affiliation.getAttributes().add(new AttributeNode(AttributeConstants.AFFILIATION_DETAILS_DIVISION, xAffiliation.getOrganizationDivision()));
                    }
                    if (xAffiliation.getDivisionRef() != null) {
                        affiliation.getAttributes().add(new AttributeNode(AttributeConstants.AFFILIATION_DETAILS_DIVISION_REF, xAffiliation.getDivisionRef()));
                    }
                    if (xAffiliation.getOrganizationAddress() != null) {
                        XAddress organizationAddress = xAffiliation.getOrganizationAddress();
                        if (organizationAddress.getCustomAddress() != null) {
                            String customAddress = organizationAddress.getCustomAddress();
                            if (customAddress.startsWith(",")) {
                                customAddress = customAddress.substring(1).trim();
                            }
                            affiliation.getAttributes().add(new AttributeNode(AttributeConstants.AFFILIATION_DETAILS_ADDRESS, customAddress));
                            str11 = str11 + ANSI.Renderer.CODE_TEXT_SEPARATOR + customAddress;
                        } else {
                            System.out.println("CUSTOM ADDRESS");
                        }
                    }
                    affiliation.setText(str11.trim());
                    contributor.getAffiliations().add(affiliation);
                }
                element.addContributor(contributor);
            }
        }
        for (XAbstract xAbstract : xArticle.getAbstracts()) {
            String str12 = "";
            String str13 = "";
            int i = 0;
            for (String str14 : xAbstract.getParts()) {
                if (str14.length() >= 25) {
                    str5 = str12 + str14 + "\n";
                } else {
                    this.abstractHeaders.add(str14);
                    if (!StringUtils.isEmpty(str12) && str13 != null) {
                        str13 = str12;
                    }
                    str5 = "";
                }
                str12 = str5;
                i++;
            }
            if (!StringUtils.isEmpty(str12) && str13 != null) {
                str13 = str12;
            }
            for (String str15 : new String[]{"<p>", "\noindent", "—", "¶"}) {
                if (str13.startsWith(str15)) {
                    str13 = str13.substring(str15.length());
                }
            }
            if (!StringUtils.isEmpty(str13) && !Character.isLetterOrDigit(str13.charAt(0)) && !Character.isWhitespace(str13.charAt(0))) {
                System.out.println(str13.charAt(0));
            }
            String classify = this.languageIdentifier.classify(str13);
            if (xAbstract.getLanguage() == null) {
                xAbstract.setLanguage(classify);
            }
            if (!classify.equalsIgnoreCase(xAbstract.getLanguage()) && !"**".equals(classify)) {
                System.out.println("Invalid language. Expected " + xAbstract.getLanguage() + " found " + classify + " : " + str13);
            }
            if (element.getDescription(xAbstract.getLanguage()) != null) {
                if (classify.equalsIgnoreCase(xAbstract.getLanguage())) {
                    System.out.println("ERROR: Multiple descriptions " + str4);
                } else {
                    element.setDescription(classify.toUpperCase(), str13);
                }
            }
            element.setDescription(safeLang(xAbstract.getLanguage()), str13);
        }
        element.getIdentifiers().add(new Identifier(YaddaIdConstants.IDENTIFIER_CLASS_SPRINGERIMPORT, str4));
        element.getIdentifiers().add(new Identifier("bwmeta1.id-class.Springer", xArticle.getIssue().getJournal().getId() + "/" + xArticle.getIssue().getVolumeId() + "/" + xArticle.getIssue().getId() + "/" + xArticle.getId()));
        if (xArticle.getIssue().getDoi() != null) {
            element.getIdentifiers().add(new Identifier("bwmeta1.id-class.DOI", xArticle.getIssue().getDoi()));
            element.setExtId("bwmeta1.element.springer-article-doi-" + xArticle.getIssue().getDoi().substring("10.1007/".length()).replace("/", "-").trim());
        }
        if (xArticle.getFigureCount() != null) {
            element.setAttribute("bibliographical.description", xArticle.getFigureCount().toString());
        }
        if (xArticle.getPageCount() != null) {
            element.setAttribute("bibliographical.description.page-count", xArticle.getPageCount().toString());
        }
        if (xArticle.getReferenceCount() != null) {
            element.setAttribute(AttributeConstants.BIB_REFERENCE_COUNT, xArticle.getReferenceCount().toString());
        }
        if (xArticle.getTableCount() != null) {
            element.setAttribute(AttributeConstants.BIB_TABLE_COUNT, xArticle.getTableCount().toString());
        }
        if (xArticle.getWordCount() != null) {
            element.setAttribute(AttributeConstants.BIB_WORD_COUNT, xArticle.getWordCount().toString());
        }
        element.setAttribute(AttributeConstants.SPRINGER_ARTICLE_TYPE, xArticle.getType().toString());
        for (String str16 : xArticle.getCategories()) {
            element.getAttributes().add(new AttributeNode(AttributeConstants.SPRINGER_ARTICLE_CATEGORY, str16));
            this.articleCategories.add(str16);
        }
        for (XConference xConference : xArticle.getConferences()) {
            LinkedList linkedList = new LinkedList();
            if (xConference.getFrom() != null) {
                linkedList.add(new AttributeNode(AttributeConstants.CONFERENCE_DETAILS_FROM, xConference.getFrom().getValue()));
            }
            if (xConference.getTo() != null) {
                linkedList.add(new AttributeNode(AttributeConstants.CONFERENCE_DETAILS_TO, xConference.getTo().getValue()));
            }
            if (xConference.getNumber() != null) {
                linkedList.add(new AttributeNode(AttributeConstants.CONFERENCE_DETAILS_NUMBER, xConference.getNumber()));
            }
            Iterator<String> it3 = xConference.getSponsor().iterator();
            while (it3.hasNext()) {
                linkedList.add(new AttributeNode(AttributeConstants.CONFERENCE_DETAILS_SPONSOR, it3.next()));
            }
            if (xConference.getPlace() != null) {
                linkedList.add(new AttributeNode(AttributeConstants.CONFERENCE_DETAILS_LOCATION, xConference.getPlace()));
            }
            element.getAttributes().add(new AttributeNode("conference", xConference.getName(), linkedList));
            element.getAttributes().add(new AttributeNode("conference.title", xConference.getName()));
        }
        Iterator<String> it4 = xArticle.getGrants().iterator();
        while (it4.hasNext()) {
            element.getAttributes().add(new AttributeNode(AttributeConstants.GRANT, it4.next()));
        }
        Iterator<String> it5 = xArticle.getDedications().iterator();
        while (it5.hasNext()) {
            element.getAttributes().add(new AttributeNode(AttributeConstants.DEDICATION, it5.next()));
        }
        Iterator<String> it6 = xArticle.getPrsentations().iterator();
        while (it6.hasNext()) {
            element.getAttributes().add(new AttributeNode(AttributeConstants.PRESENTATION, it6.next()));
        }
        Iterator<String> it7 = xArticle.getSponsors().iterator();
        while (it7.hasNext()) {
            element.getAttributes().add(new AttributeNode(AttributeConstants.SPONSOR, it7.next()));
        }
        File file2 = new File(file.getParentFile().getParentFile().getParentFile(), "papers/" + file.getParentFile().getName() + "/" + FilenameUtils.getBaseName(file.getName()) + ".pdf");
        if (file2.exists()) {
            String str17 = "archive/springer" + FilenameUtils.removeExtension(str4) + ".pdf";
            Content content = new Content(0, LicenseConstants.DATA_TYPE_CONTENT);
            ContentFile contentFile = new ContentFile();
            contentFile.setAddress(str17);
            contentFile.setMimeType(LocationConstants.MIME_PDF);
            contentFile.setRemote(false);
            contentFile.setAddressType("URI");
            contentFile.setLength(file2.length());
            content.getLocations().add(contentFile);
            element.getContents().add(content);
            storeContent(file2.toString(), str17, zipOutputStream);
        } else {
            System.out.println("Content file " + file2 + "doesnt exist");
        }
        for (XKeywordGroup xKeywordGroup : xArticle.getKeywordGroups()) {
            String keywordLanguage = xKeywordGroup.getKeywordLanguage() != null ? xKeywordGroup.getKeywordLanguage() : "**";
            LinkedList linkedList2 = new LinkedList();
            for (String str18 : xKeywordGroup.getKeywords()) {
                List<String> resolvePacs = resolvePacs(str18);
                if (!resolvePacs.isEmpty()) {
                    for (String str19 : resolvePacs) {
                        element.getAttributes().add(new AttributeNode(AttributeConstants.PACS_NUMBERS, str18));
                        this.pacsCategories.add(str19);
                    }
                } else if (str18.startsWith("2000 Mathematics Subject Classification")) {
                    String substring = str18.substring("2000 Mathematics Subject Classification".length());
                    if (substring.length() > 3) {
                        for (String str20 : substring.split("\\W")) {
                            if (!StringUtils.isEmpty(str20)) {
                                String resolveAMS = resolveAMS(str20.trim());
                                if (resolveAMS == null) {
                                    System.out.println("Unknown AMS class:" + str20);
                                } else {
                                    element.getCategoryExtIds().add("bwmeta1.category.ams-" + resolveAMS);
                                    this.amsCategories.add(resolveAMS);
                                }
                            }
                        }
                    }
                } else {
                    linkedList2.addAll(resolveKeywords(str18));
                }
            }
            if (!linkedList2.isEmpty()) {
                if (xKeywordGroup.getKeywordClass() == null) {
                    element.getKeywords().add(new KeywordSet(keywordLanguage, linkedList2));
                    this.unknownKeywords.addAll(linkedList2);
                } else if (containsSome(xKeywordGroup.getKeywordClass().toLowerCase(), new String[]{"keyword", "key word", "keywoed"})) {
                    element.getKeywords().add(new KeywordSet(keywordLanguage, new LinkedList(linkedList2)));
                } else if (containsSome(xKeywordGroup.getKeywordClass().toLowerCase(), new String[]{"schl&252;sselw&246;rte", "Schl&252;sselworte", "schl&252;sselw&246;orte", "schlagw&246;rter", "schl&252;sselworte", "sch&252;sselw&246;rte", "schl&252;ssenw&246;rte", "schlagworte:"})) {
                    element.getKeywords().add(new KeywordSet(Languages.LG_GERMAN, new LinkedList(linkedList2)));
                } else if (xKeywordGroup.getKeywordClass().contains("Mots")) {
                    element.getKeywords().add(new KeywordSet("FR", new LinkedList(linkedList2)));
                } else if (containsSome(xKeywordGroup.getKeywordClass().toLowerCase(), new String[]{"ams subject classification 2000", "mathematics subject classification 2000", "(2000)mathematics subject classification", "2000 mathematics subject classification", "ams subject classification (2000)", "ams 2000 subject classification", "msc", "mathematics subject classification", "msc:", "mathematical subject classification", "ams classifications", "ams subject classification", "2000 mr subject classification", "mr (2000) subject classification", "mr(2000) subject classification"})) {
                    Iterator it8 = linkedList2.iterator();
                    while (it8.hasNext()) {
                        String trim2 = ((String) it8.next()).trim();
                        if (trim2.contains("no classification")) {
                            break;
                        }
                        if (trim2.startsWith("Primary:")) {
                            trim2 = trim2.substring("Primary:".length());
                        }
                        if (trim2.startsWith("Secondary:")) {
                            trim2 = trim2.substring("Secondary:".length());
                        }
                        if (trim2.startsWith("Primary")) {
                            trim2 = trim2.substring("Primary".length());
                        }
                        if (trim2.startsWith("Secondary")) {
                            trim2 = trim2.substring("Secondary".length());
                        }
                        if (trim2.endsWith(".")) {
                            trim2 = trim2.substring(0, trim2.lastIndexOf("."));
                        }
                        for (String str21 : trim2.split("\\W")) {
                            if (!StringUtils.isEmpty(str21)) {
                                String resolveAMS2 = resolveAMS(str21);
                                if (resolveAMS2 == null) {
                                    System.out.println("Unknown AMS class:" + str21);
                                } else {
                                    element.getCategoryExtIds().add("bwmeta1.category.ams-" + resolveAMS2);
                                    this.amsCategories.add(resolveAMS2);
                                }
                            }
                        }
                    }
                } else if (containsSome(xKeywordGroup.getKeywordClass().toLowerCase(), new String[]{"jel classification"})) {
                    Iterator it9 = linkedList2.iterator();
                    while (it9.hasNext()) {
                        Matcher matcher = this.jelPattern.matcher((String) it9.next());
                        if (matcher.matches()) {
                            String group = matcher.group(1);
                            if (matcher.group(2) != null) {
                                group = group + matcher.group(2);
                            }
                            this.jelCategories.add(group);
                            element.getCategoryExtIds().add("bwmeta1.category.jel-" + group);
                        }
                    }
                } else if (xKeywordGroup.getKeywordClass().startsWith("PACS")) {
                    for (String str22 : xKeywordGroup.getKeywords()) {
                        List<String> resolvePacs2 = resolvePacs(str22.trim());
                        if (resolvePacs2.isEmpty()) {
                            System.out.println("Invalid PACS category " + str22.trim());
                        } else {
                            for (String str23 : resolvePacs2) {
                                element.getAttributes().add(new AttributeNode(AttributeConstants.PACS_NUMBERS, str23));
                                this.pacsCategories.add(str23);
                            }
                        }
                    }
                } else {
                    this.keywordClasses.add(xKeywordGroup.getKeywordClass());
                    element.getKeywords().add(new KeywordSet(keywordLanguage, linkedList2));
                    this.unknownKeywords.addAll(linkedList2);
                }
            }
        }
        if (xArticle.getCopyrightNote() != null) {
            element.setAttribute("copyright", xArticle.getCopyrightNote());
            element.getNotes().setDefault(new LocalizedString("**", "Copyright " + xArticle.getCopyrightNote()));
        }
        return element;
    }

    protected List<String> resolveKeywords(String str) {
        Matcher matcher = this.keywordPattern.matcher(str);
        LinkedList linkedList = new LinkedList();
        if (!matcher.matches() || matcher.groupCount() <= 0) {
            linkedList.add(str.trim());
        } else {
            for (String str2 : matcher.group(1).split("[;,–]\\s")) {
                if (!StringUtils.isEmpty(str2)) {
                    linkedList.add(str2.trim());
                }
            }
        }
        return linkedList;
    }

    protected List<String> resolvePacs(String str) {
        if (str.startsWith("PACS:") || str.startsWith("PACS.")) {
            str = str.substring("PACS:".length());
        }
        String[] split = str.split("[;]");
        ArrayList arrayList = new ArrayList(2);
        for (String str2 : split) {
            Matcher matcher = this.pacsPattern.matcher(str2.trim());
            if (matcher.matches()) {
                arrayList.add(matcher.group(3) != null ? matcher.group(1) + "." + matcher.group(2) + "." + matcher.group(3) : matcher.group(1) + "." + matcher.group(2));
            }
        }
        return arrayList;
    }

    protected String resolveAMS(String str) {
        Matcher matcher = Pattern.compile("(\\d(?:\\d)?)(?:(\\p{Alpha})(\\d\\d?)?)?").matcher(str);
        if (!matcher.matches()) {
            return null;
        }
        StringBuffer stringBuffer = new StringBuffer();
        stringBuffer.append(StringUtils.leftPad(matcher.group(1), 2, "0"));
        if (matcher.group(2) != null) {
            stringBuffer.append(matcher.group(2).toUpperCase());
        }
        if (matcher.group(3) != null) {
            stringBuffer.append(StringUtils.leftPad(matcher.group(3), 2, "0"));
        }
        return stringBuffer.toString();
    }

    protected String safeLang(String str) {
        return StringUtils.defaultString(str, "**");
    }

    protected boolean containsSome(String str, String[] strArr) {
        for (String str2 : strArr) {
            if (str.contains(str2)) {
                return true;
            }
        }
        return false;
    }

    public static void main(String[] strArr) throws Exception {
        OldSpringerConverter oldSpringerConverter = new OldSpringerConverter();
        for (File file : new File("/space/springer").listFiles()) {
            if (file.isDirectory() && file.getName().startsWith("sjou")) {
                try {
                    System.out.println("Processing " + file);
                    oldSpringerConverter.parseDirectory(file.getAbsolutePath());
                } catch (Exception e) {
                    e.printStackTrace();
                }
            } else {
                System.out.println(file);
            }
        }
        System.out.println("ABSTRACT HEADERS:");
        Iterator<String> it = oldSpringerConverter.abstractHeaders.iterator();
        while (it.hasNext()) {
            System.out.println(it.next());
        }
        System.out.println("ARTICLE CATEGORIES:");
        Iterator<String> it2 = oldSpringerConverter.articleCategories.iterator();
        while (it2.hasNext()) {
            System.out.println(it2.next());
        }
        System.out.println("UNKNOWN KEYWORD CLASSES:");
        Iterator<String> it3 = oldSpringerConverter.keywordClasses.iterator();
        while (it3.hasNext()) {
            System.out.println(it3.next());
        }
        System.out.println("UNKNOWN DATE TYPES:");
        Iterator<String> it4 = oldSpringerConverter.dateTypes.iterator();
        while (it4.hasNext()) {
            System.out.println(it4.next());
        }
        System.out.println("UNKNOWN KEYWORDS:");
        Iterator<String> it5 = oldSpringerConverter.unknownKeywords.iterator();
        while (it5.hasNext()) {
            System.out.println(it5.next());
        }
        System.out.println("JEL Categories:");
        Iterator<String> it6 = oldSpringerConverter.jelCategories.iterator();
        while (it6.hasNext()) {
            System.out.println(it6.next());
        }
        System.out.println("parserTest.AMS CATEGORIES:");
        Iterator<String> it7 = oldSpringerConverter.amsCategories.iterator();
        while (it7.hasNext()) {
            System.out.println(it7.next());
        }
        System.out.println("PACS CATEGORIES:");
        Iterator<String> it8 = oldSpringerConverter.pacsCategories.iterator();
        while (it8.hasNext()) {
            System.out.println(it8.next());
        }
        System.out.println("ROLES:");
        Iterator<String> it9 = oldSpringerConverter.roles.iterator();
        while (it9.hasNext()) {
            System.out.println(it9.next());
        }
        System.out.println("PUBLISHERS:");
        Iterator<String> it10 = oldSpringerConverter.publishers.iterator();
        while (it10.hasNext()) {
            System.out.println(it10.next());
        }
    }
}
