package pl.edu.icm.yadda.imports.baztech;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.OutputStreamWriter;
import java.io.PrintStream;
import java.io.StringWriter;
import java.net.URL;
import java.nio.ByteBuffer;
import java.nio.charset.Charset;
import java.sql.Connection;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import java.util.regex.Pattern;
import org.apache.commons.dbcp.BasicDataSource;
import org.apache.commons.lang.StringUtils;
import org.apache.tools.ant.taskdefs.XSLTLiaison;
import org.hsqldb.DatabaseURL;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import pl.edu.icm.yadda.common.utils.Utils;
import pl.edu.icm.yadda.imports.baztech.model.BaztechJournal;
import pl.edu.icm.yadda.imports.baztech.model.BaztechLibrary;
import pl.edu.icm.yadda.imports.baztech.model.BaztechPaper;
import pl.edu.icm.yadda.repo.export.BwmetaWriter;
import pl.edu.icm.yadda.repo.export.ExportException;
import pl.edu.icm.yadda.repo.export.impl.VelocityBwmetaWriterImpl;
import pl.edu.icm.yadda.repo.model.ExtIdObject;
import pl.edu.icm.yadda.repo.model.IExportableEntity;
import pl.edu.icm.yadda.service2.editor.IEditorFacade;

/* loaded from: input_file:WEB-INF/lib/bwmeta-import-1.7.3.jar:pl/edu/icm/yadda/imports/baztech/BaztechExporter.class */
public class BaztechExporter {
    private static final Logger log = LoggerFactory.getLogger(BaztechExporter.class);
    private static final Pattern STRIP_PATTERN = Pattern.compile("[\\p{Cntrl}&&[^\\x09\\x0a\\x0d]]");
    private static final String REPLACE_STRIPPED = "";
    private BaztechExportConfiguration configuration;
    private JournalProcessor journalProc;
    private PaperProcessor paperProc;
    private BaztechDataProvider provider;
    private BaztechContext ctx;
    protected Set<?> journalsBuffer;
    private IEditorFacade<String> editor;
    private BwmetaWriter bwmetaWriter;
    private int flushNumber;
    boolean dumpBadCharacters;
    PrintStream unknownCharactersStream;
    public static final String acceptableChars = "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~°÷×¬¤€£¼½↑⁺⁻⁰₀¹₁²₂³₃⁴₄⁵₅⁶₆⁷₇⁸₈⁹₉©®§¨≠·™≥¸±≤≈´∑’”¦‡†↔→•“˙‘?–„—−“‰‘‹∈˛∞›∼⊥˘⊂∩˜∫˝√≅≡∂⊆∇∆═∝∋∅∗∏◊⓪⑳∃⇔‾‚\u00ad…⋅";

    public BaztechExporter() {
        this(false);
    }

    public BaztechExporter(boolean z) {
        this.configuration = new BaztechExportConfiguration();
        this.provider = null;
        this.ctx = new BaztechContext();
        this.journalsBuffer = null;
        this.flushNumber = 1;
        try {
            this.dumpBadCharacters = z;
            this.bwmetaWriter = new VelocityBwmetaWriterImpl();
            if (this.dumpBadCharacters) {
                this.unknownCharactersStream = new PrintStream("./unknown_chars.txt");
            }
        } catch (FileNotFoundException e) {
            log.error("Exception caught", (Throwable) e);
        } catch (ExportException e2) {
            log.error("Exception caught", (Throwable) e2);
        }
    }

    public void cacheJournals() {
        this.journalsBuffer = this.ctx.resetBuffer();
    }

    public void doFlushJournals() throws Exception {
        log.info("flushing journals : " + this.journalsBuffer.size());
        int i = this.flushNumber;
        this.flushNumber = 0;
        Iterator it = new HashSet(this.journalsBuffer).iterator();
        while (it.hasNext()) {
            Object next = it.next();
            if (next instanceof ExtIdObject) {
                ExtIdObject extIdObject = (ExtIdObject) next;
                if (this.ctx.referenceMark.containsKey(extIdObject.getExtId()) && !this.ctx.referenceMark.get(extIdObject.getExtId()).booleanValue()) {
                    this.journalsBuffer.remove(next);
                }
            }
        }
        doFlush(this.journalsBuffer);
        this.flushNumber = i;
    }

    public IEditorFacade<String> getEditor() {
        return this.editor;
    }

    public void setEditor(IEditorFacade<String> iEditorFacade) {
        this.editor = iEditorFacade;
    }

    public JournalProcessor getJournalProc() {
        return this.journalProc;
    }

    public void setJournalProc(JournalProcessor journalProcessor) {
        this.journalProc = journalProcessor;
        journalProcessor.setContinuationsCSV(this.configuration.getCfg(BaztechExportConfiguration.PARAM_CONTINUATIONS_CSV), this.configuration.getCfg(BaztechExportConfiguration.PARAM_CONTINUATIONS_DELIMITER));
    }

    public PaperProcessor getPaperProc() {
        return this.paperProc;
    }

    public void setPaperProc(PaperProcessor paperProcessor) {
        this.paperProc = paperProcessor;
    }

    public BaztechDataProvider getProvider() {
        return this.provider;
    }

    public void setProvider(BaztechDataProvider baztechDataProvider) {
        this.provider = baztechDataProvider;
    }

    public void setupProperties(Map<Object, Object> map) throws Exception {
        if (this.provider == null) {
            return;
        }
        this.configuration = new BaztechExportConfiguration(map);
        BasicDataSource basicDataSource = new BasicDataSource();
        basicDataSource.setDriverClassName("com.ibm.db2.jcc.DB2Driver");
        basicDataSource.setDefaultReadOnly(true);
        basicDataSource.setUrl(this.configuration.getCfg(BaztechExportConfiguration.PARAM_DBURL));
        basicDataSource.setUsername(this.configuration.getCfg(BaztechExportConfiguration.PARAM_DBUSER));
        basicDataSource.setPassword(this.configuration.getCfg(BaztechExportConfiguration.PARAM_DBPASS));
        this.journalProc.setContinuationsCSV(this.configuration.getCfg(BaztechExportConfiguration.PARAM_CONTINUATIONS_CSV), this.configuration.getCfg(BaztechExportConfiguration.PARAM_CONTINUATIONS_DELIMITER));
        this.journalProc.setContentPrefix(this.configuration.getCfg(BaztechExportConfiguration.PARAM_CONTENT_PREFIX));
        this.journalProc.setBaztechImport(this.configuration.getCfg(BaztechExportConfiguration.PARAM_BAZTECH_IMPORT));
        this.journalProc.setImageBaseUrl(this.configuration.getCfg(BaztechExportConfiguration.PARAM_IMAGE_BASE_URL));
        this.journalProc.setJournalContentsList(this.configuration.getCfg(BaztechExportConfiguration.PARAM_JOURNAL_CONTENTS_LIST));
        if ("true".equalsIgnoreCase(this.configuration.getCfg(BaztechExportConfiguration.PARAM_PUBLISHER_INSTITUTION_ONLY).trim())) {
            this.journalProc.setPublisherOnlyInstitution(true);
        } else {
            this.journalProc.setPublisherOnlyInstitution(false);
        }
        this.paperProc.setBaztechImport(this.configuration.getCfg(BaztechExportConfiguration.PARAM_BAZTECH_IMPORT));
        this.paperProc.setArticleContentsList(this.configuration.getCfg(BaztechExportConfiguration.PARAM_ARTICLE_CONTENTS_LIST));
        this.paperProc.setContentsCsv(this.configuration.getCfg(BaztechExportConfiguration.PARAM_CONTENT_CSV), this.configuration.getCfg(BaztechExportConfiguration.PARAM_CONTENT_DELIMITER));
        this.paperProc.setContentPrefix(this.configuration.getCfg(BaztechExportConfiguration.PARAM_CONTENT_PREFIX));
        String cfg = this.configuration.getCfg(BaztechExportConfiguration.PARAM_CITATIONS_PATH);
        if (Utils.emptyStr(cfg)) {
            log.warn("Citations path not specified.");
        } else {
            this.paperProc.setCitationsPath(new File(cfg.trim()));
        }
        Double d = null;
        String cfg2 = this.configuration.getCfg(BaztechExportConfiguration.PARAM_LANGUAGE_UNCERTAINTY_TRESHOLD);
        if (StringUtils.isNotBlank(cfg2)) {
            d = Double.valueOf(cfg2);
        }
        this.paperProc.initLanguageIdentifier(d);
        this.provider.setDataSource(basicDataSource);
    }

    public void doExportJournals() throws Exception {
        List<BaztechJournal> allJournals = this.provider.getAllJournals(this.configuration.getCfg(BaztechExportConfiguration.PARAM_JOURNALS_FILTER));
        HashMap hashMap = new HashMap();
        for (BaztechJournal baztechJournal : allJournals) {
            hashMap.put(Integer.valueOf(baztechJournal.getId()), baztechJournal);
            try {
                this.journalProc.doProcessJournal(this.ctx, baztechJournal);
            } catch (Exception e) {
                log.error("Unexpected error", (Throwable) e);
                throw e;
            }
        }
        this.journalProc.doPostProcessJournal(this.ctx);
        for (BaztechLibrary baztechLibrary : this.provider.getAllLibraries()) {
            for (BaztechLibrary.Subscription subscription : baztechLibrary.getSubscriptions()) {
                BaztechJournal baztechJournal2 = (BaztechJournal) hashMap.get(Integer.valueOf(subscription.journalId));
                if (baztechJournal2 != null) {
                    String str = BaztechContext.SUBSCRIBERS_PREFIX + this.ctx.getBaztechIdGenerator().generateYearId(baztechJournal2.getTitle(), baztechJournal2.getIssn(), subscription.year);
                    if (!this.ctx.containsKey(str)) {
                        this.ctx.put(str, (Object) new HashSet());
                    }
                    ((Set) this.ctx.get(str)).add(baztechLibrary);
                }
            }
        }
    }

    public void doExportPapers() throws Exception {
        String cfg = this.configuration.getCfg(BaztechExportConfiguration.PARAM_PAPERS_FILTER);
        int parseInt = Integer.parseInt(this.configuration.getCfg(BaztechExportConfiguration.PARAM_FLUSH_SIZE));
        Connection connection = null;
        try {
            try {
                Connection connection2 = this.provider.getDataSource().getConnection();
                connection2.setReadOnly(true);
                ResultSet executeQuery = connection2.prepareStatement("SELECT * FROM BPSJC.PAPER " + cfg).executeQuery();
                int i = 0;
                while (executeQuery.next()) {
                    int i2 = i;
                    i++;
                    BaztechPaper baztechPaper = (BaztechPaper) this.provider.paperMapper.mapRow(executeQuery, i2);
                    try {
                        this.paperProc.doProcessPaper(this.ctx, baztechPaper);
                        if (this.ctx.buffer.size() > parseInt) {
                            doFlush();
                        }
                    } catch (Exception e) {
                        log.error("Error processing paper " + baztechPaper.id, (Throwable) e);
                        throw e;
                    }
                }
                log.info("Baztech import language statistics:\n" + this.ctx.titleLangConflicts + " title language conflicts,\n" + this.ctx.titleSameLang + " documents with main and alternative title in the same language,\n" + this.ctx.abstractSameLang + " documents with abstracts in the same language");
                if (connection2 != null) {
                    try {
                        connection2.close();
                    } catch (SQLException e2) {
                        log.error("Exception caught", (Throwable) e2);
                    }
                }
            } catch (SQLException e3) {
                log.error("Exception caught", (Throwable) e3);
                if (0 != 0) {
                    try {
                        connection.close();
                    } catch (SQLException e4) {
                        log.error("Exception caught", (Throwable) e4);
                    }
                }
            }
        } catch (Throwable th) {
            if (0 != 0) {
                try {
                    connection.close();
                } catch (SQLException e5) {
                    log.error("Exception caught", (Throwable) e5);
                    throw th;
                }
            }
            throw th;
        }
    }

    public String doWrite(Object obj) {
        if (!(obj instanceof IExportableEntity)) {
            return null;
        }
        StringWriter stringWriter = new StringWriter();
        try {
            this.bwmetaWriter.write(stringWriter, (IExportableEntity) obj, (Map<String, String>) null);
            return stringWriter.toString();
        } catch (ExportException e) {
            log.error("Cannot write exportable entity " + obj, (Throwable) e);
            return null;
        }
    }

    private char getCharForCp1250(char c) {
        return Charset.forName("cp1250").decode(ByteBuffer.wrap(new byte[]{(byte) c})).charAt(0);
    }

    private String checkCharsDoSubstitutuion(String str, Object obj) {
        StringWriter stringWriter = new StringWriter();
        for (char c : str.toCharArray()) {
            if (c < 128 || c >= 160) {
                stringWriter.append(c);
            } else {
                stringWriter.append(getCharForCp1250(c));
            }
        }
        String stringWriter2 = stringWriter.toString();
        if (this.dumpBadCharacters) {
            for (char c2 : stringWriter2.toCharArray()) {
                if (!Character.isLetterOrDigit(c2) && !Character.isSpaceChar(c2) && !Character.isWhitespace(c2) && !acceptableChars.contains("" + c2)) {
                    String str2 = "";
                    String str3 = "";
                    String str4 = "";
                    try {
                        if (obj instanceof ExtIdObject) {
                            ExtIdObject extIdObject = (ExtIdObject) obj;
                            str2 = extIdObject.getExtId();
                            str4 = extIdObject.getDescriptable().getDefaultName().getText();
                            str3 = str2.replaceAll("bwmeta1.element.baztech-[^-]*-", "");
                        }
                    } catch (Exception e) {
                    }
                    this.unknownCharactersStream.println(c2 + "\tyadda id:" + str2 + " baztech id: " + str3 + " title: " + str4);
                }
            }
        }
        return stringWriter2;
    }

    public void doFlush(Set set) throws Exception {
        if (set == null || set.size() == 0) {
            return;
        }
        log.info("flushing " + set.size() + " objects");
        File file = new File(this.configuration.getCfg(BaztechExportConfiguration.PARAM_OUTPUT_PREFIX) + String.format("%03d", Integer.valueOf(this.flushNumber)) + ".xml");
        file.getParentFile().mkdirs();
        this.flushNumber++;
        log.info("Writing next flush file: " + file.getAbsolutePath());
        OutputStreamWriter outputStreamWriter = new OutputStreamWriter(new FileOutputStream(file), Charset.forName("UTF-8"));
        outputStreamWriter.write("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
        outputStreamWriter.write(this.configuration.getCfg(BaztechExportConfiguration.PARAM_DTD_STRING));
        outputStreamWriter.write("\n");
        outputStreamWriter.write("<bwmeta>\n");
        for (Object obj : set) {
            String doWrite = doWrite(obj);
            if (doWrite != null) {
                outputStreamWriter.write(STRIP_PATTERN.matcher(checkCharsDoSubstitutuion(doWrite, obj)).replaceAll(""));
            }
        }
        outputStreamWriter.write("</bwmeta>\n");
        outputStreamWriter.close();
    }

    public void doFlush() throws Exception {
        doFlush(this.ctx.resetBuffer());
    }

    public static void main(String[] strArr) throws Exception {
        String str = strArr.length >= 1 ? strArr[0] : "classpath://pl/edu/icm/yadda/imports/baztech/provider.properties";
        boolean z = false;
        if (strArr.length == 2 && "-log-chars".equalsIgnoreCase(strArr[1])) {
            z = true;
        }
        Properties properties = new Properties();
        if (str.startsWith("classpath://")) {
            properties.load(BaztechExporter.class.getClassLoader().getResourceAsStream(str.substring("classpath://".length())));
        } else if (str.startsWith(DatabaseURL.S_HTTP) || str.startsWith(DatabaseURL.S_HTTPS) || str.startsWith(XSLTLiaison.FILE_PROTOCOL_PREFIX)) {
            properties.load(new URL(str).openStream());
        } else {
            properties.load(new FileInputStream(new File(str)));
        }
        BaztechDataProvider baztechDataProvider = new BaztechDataProvider();
        BaztechExporter baztechExporter = new BaztechExporter(z);
        baztechExporter.setPaperProc(new PaperProcessor());
        baztechExporter.setJournalProc(new JournalProcessor());
        baztechExporter.setProvider(baztechDataProvider);
        baztechExporter.setupProperties(properties);
        baztechExporter.doExportJournals();
        baztechExporter.cacheJournals();
        log.debug("Journals Cached");
        baztechExporter.doExportPapers();
        log.debug("Papers exported - Flashing journals");
        baztechExporter.doFlushJournals();
        log.debug("Flushing remaining objects");
        baztechExporter.doFlush();
    }
}
