package pl.edu.icm.yadda.analysis.relations.auxil.parallel.nlm2mallettrain;

import java.io.File;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.OutputStreamWriter;
import java.security.InvalidParameterException;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang.NotImplementedException;
import org.apache.xml.serialize.LineSeparator;
import pl.edu.icm.yadda.analysis.relations.auxil.parallel.Operation;
import pl.edu.icm.yadda.analysis.relations.auxil.parallel.Parallel;
import pl.edu.icm.yadda.analysis.relations.auxil.trash.YToCatObjProcessingNode;
import pl.edu.icm.yadda.analysis.zentralblatteudmlmixer.MixFileIteratorBuilder;
import pl.edu.icm.yadda.analysis.zentralblatteudmlmixer.auxil.MixRecord;
import pl.edu.icm.yadda.bwmeta.model.YAttribute;
import pl.edu.icm.yadda.bwmeta.model.YCategoryRef;
import pl.edu.icm.yadda.bwmeta.model.YConstants;
import pl.edu.icm.yadda.bwmeta.model.YContributor;
import pl.edu.icm.yadda.bwmeta.model.YElement;
import pl.edu.icm.yadda.bwmeta.model.YExportable;
import pl.edu.icm.yadda.bwmeta.model.YId;
import pl.edu.icm.yadda.bwmeta.model.YName;
import pl.edu.icm.yadda.bwmeta.model.YRelation;
import pl.edu.icm.yadda.bwmeta.model.YTagList;
import pl.edu.icm.yadda.bwmeta.transformers.Bwmeta2_0ToYTransformer;
import pl.edu.icm.yadda.bwmeta.transformers.BwmetaTransformers;
import pl.edu.icm.yadda.metadata.transformers.IMetadataReader;
import pl.edu.icm.yadda.process.ctx.ProcessContext;
import pl.edu.icm.yadda.process.iterator.IIdExtractor;
import pl.edu.icm.yadda.process.iterator.ISourceIterator;
import pl.edu.icm.yadda.process.iterator.ISourceIteratorBuilder;
import pl.edu.icm.yadda.service2.CatalogObject;
import pl.edu.icm.yadda.service2.CatalogObjectPart;

/* loaded from: input_file:WEB-INF/lib/yadda-analysis-impl-1.10.0-RC5.jar:pl/edu/icm/yadda/analysis/relations/auxil/parallel/nlm2mallettrain/FromNlmToBwmeta_IteratorBuilder.class */
public class FromNlmToBwmeta_IteratorBuilder implements ISourceIteratorBuilder<File> {
    public static final String AUX_PARAM_SOURCE_DIR = "source_dir";
    private String sourceDir;
    private String[] extensions;
    private Collection<File> files;
    static IMetadataReader reader = BwmetaTransformers.BTF.getReader(BwmetaTransformers.BWMETA_2_0, BwmetaTransformers.Y);
    static Bwmeta2_0ToYTransformer transformer = new Bwmeta2_0ToYTransformer();
    static FromNlmToBwmeta_IteratorBuilder o1 = new FromNlmToBwmeta_IteratorBuilder();
    static HashMap hm = new HashMap();
    static YToCatObjProcessingNode o3 = new YToCatObjProcessingNode();
    static String ENHANCE_NLM = "/home/pdendek/sample/ENHANCE_2/";
    static String NLM = "/home/pdendek/sample/CEDRAM/";
    static String ZBL = "/home/pdendek/sample/ZBL/";
    static String MIX = "/home/pdendek/MIX.txt";

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:WEB-INF/lib/yadda-analysis-impl-1.10.0-RC5.jar:pl/edu/icm/yadda/analysis/relations/auxil/parallel/nlm2mallettrain/FromNlmToBwmeta_IteratorBuilder$NLMFieleIterator.class */
    public static class NLMFieleIterator implements ISourceIterator<File> {
        private Collection<File> files;
        private Iterator<File> iterator;

        public NLMFieleIterator(Collection<File> collection) {
            this.files = collection;
            this.iterator = collection.iterator();
        }

        @Override // java.util.Iterator
        public boolean hasNext() {
            return this.iterator.hasNext();
        }

        @Override // java.util.Iterator
        public File next() {
            return this.iterator.next();
        }

        @Override // java.util.Iterator
        public void remove() {
            throw new NotImplementedException();
        }

        @Override // pl.edu.icm.yadda.process.iterator.ISourceIterator
        public int getEstimatedSize() throws UnsupportedOperationException {
            return this.files.size();
        }

        @Override // pl.edu.icm.yadda.process.iterator.ISourceIterator
        public void clean() {
        }
    }

    public String getSourceDir() {
        return this.sourceDir;
    }

    public void setSourceDir(String str) {
        this.sourceDir = str;
    }

    public String[] getExtensions() {
        return this.extensions;
    }

    public void setExtensions(String[] strArr) {
        this.extensions = strArr;
    }

    public static String getAuxParamSourceDir() {
        return "source_dir";
    }

    public ISourceIterator<File> build(Map<String, String> map) throws Exception {
        String str = this.sourceDir;
        if (map.get("source_dir") != null) {
            str = map.get("source_dir");
        }
        File file = new File(str);
        if (!file.isDirectory()) {
            throw new InvalidParameterException(file.getAbsolutePath() + " is not a directory!");
        }
        this.files = FileUtils.listFiles(file, this.extensions, false);
        return new NLMFieleIterator(this.files);
    }

    @Override // pl.edu.icm.yadda.process.iterator.ISourceIteratorBuilder
    public ISourceIterator<File> build(ProcessContext processContext) throws Exception {
        String str = this.sourceDir;
        if (processContext.containsAuxParam("source_dir")) {
            str = (String) processContext.getAuxParam("source_dir");
        }
        File file = new File(str);
        if (!file.isDirectory()) {
            throw new InvalidParameterException(file.getAbsolutePath() + " is not a directory!");
        }
        this.files = FileUtils.listFiles(file, this.extensions, false);
        return new NLMFieleIterator(this.files);
    }

    @Override // pl.edu.icm.yadda.process.iterator.ISourceIteratorBuilder
    public IIdExtractor<File> getIdExtractor() {
        return new IIdExtractor<File>() { // from class: pl.edu.icm.yadda.analysis.relations.auxil.parallel.nlm2mallettrain.FromNlmToBwmeta_IteratorBuilder.1
            @Override // pl.edu.icm.yadda.process.iterator.IIdExtractor
            public String getId(File file) {
                return "files";
            }
        };
    }

    public static void main(String[] strArr) throws Throwable {
        String oneAttributeSimpleValue;
        String mapExtZblToZbl;
        String mapExtZblToZbl2;
        proceedArgs(strArr);
        int i = 1;
        if (!new File(ENHANCE_NLM).exists()) {
            new File(ENHANCE_NLM).mkdirs();
        }
        HashMap hashMap = hm;
        FromNlmToBwmeta_IteratorBuilder fromNlmToBwmeta_IteratorBuilder = o1;
        hashMap.put("source_dir", NLM);
        o1.setExtensions(new String[]{"5.bwmeta.xml"});
        ISourceIterator<File> build = o1.build(hm);
        long nanoTime = System.nanoTime();
        System.out.println("Przetworze teraz " + build.getEstimatedSize() + " obiektów CEDRAMowych w formacie BWMETA");
        File file = null;
        LinkedList linkedList = new LinkedList();
        while (build.hasNext()) {
            linkedList.add(build.next());
        }
        new Parallel();
        Parallel.For(linkedList, (Operation) null);
        while (build.hasNext() && i < 100) {
            try {
                file = build.next();
                System.out.println(LineSeparator.Macintosh + i + "/" + build.getEstimatedSize() + "     " + file.getAbsolutePath());
                List<YElement> enhance = toEnhance(file);
                for (YElement yElement : enhance) {
                    String str = null;
                    Iterator<YId> it = yElement.getIds().iterator();
                    while (true) {
                        if (!it.hasNext()) {
                            break;
                        }
                        YId next = it.next();
                        if (YConstants.EXT_SCHEME_ZBL.equals(next.getScheme())) {
                            str = next.getValue();
                            break;
                        }
                    }
                    if (str != null && (mapExtZblToZbl2 = mapExtZblToZbl(str)) != null) {
                        enhanceArticleElement(yElement, getZblData(mapExtZblToZbl2));
                    }
                    for (YRelation yRelation : yElement.getRelations()) {
                        if ((YConstants.RL_REFERENCE_TO.equals(yRelation.getType()) || "related-to".equals(yRelation.getType())) && (oneAttributeSimpleValue = yRelation.getOneAttributeSimpleValue(YConstants.AT_REFERENCE_PARSED_ID_ZBL)) != null && (mapExtZblToZbl = mapExtZblToZbl(oneAttributeSimpleValue)) != null) {
                            List<YElement> zblData = getZblData(mapExtZblToZbl);
                            if (zblData.size() != 0) {
                                enhanceRelationElement(yRelation, zblData);
                            }
                        }
                    }
                }
                File file2 = new File(ENHANCE_NLM + file.getName().substring(0, file.getName().length() - 4) + "enhanced.xml");
                if (file2.exists()) {
                    file2.delete();
                    file2.createNewFile();
                }
                OutputStreamWriter outputStreamWriter = new OutputStreamWriter(new FileOutputStream(file2), "UTF-8");
                Iterator<CatalogObject<String>> it2 = o3.process(enhance, (ProcessContext) null).iterator();
                while (it2.hasNext()) {
                    Iterator<CatalogObjectPart<String>> it3 = it2.next().getParts().iterator();
                    while (it3.hasNext()) {
                        outputStreamWriter.write(it3.next().getData());
                        outputStreamWriter.flush();
                    }
                }
                outputStreamWriter.close();
                long nanoTime2 = System.nanoTime();
                System.out.println("Time till now: " + ((nanoTime2 - nanoTime) / 1.0E9d) + " sec.");
                System.out.println("Time till now per BWMETA(CEDRAM) file: " + ((nanoTime2 - nanoTime) / (i * 1.0E9d)) + " sec.");
            } catch (Exception e) {
                System.out.println("Following exception occurred in file: " + file.getAbsolutePath());
                e.printStackTrace();
            }
            i++;
        }
        long nanoTime3 = System.nanoTime();
        System.out.println("Total time: " + ((nanoTime3 - nanoTime) / 1.0E9d) + " sec.");
        System.out.println("Time per BWMETA(CEDRAM) file: " + ((nanoTime3 - nanoTime) / (i * 1.0E9d)) + " sec.");
    }

    private static void proceedArgs(String[] strArr) {
        for (String str : strArr) {
            if (str.split("=")[0].equals("NLM")) {
                NLM = str.split("=")[1];
            }
            if (str.split("=")[0].equals("ENHANCE_NLM")) {
                ENHANCE_NLM = str.split("=")[1];
            }
            if (str.split("=")[0].equals("ZBL")) {
                ZBL = str.split("=")[1];
            }
            if (str.split("=")[0].equals("MIX")) {
                MIX = str.split("=")[1];
            }
        }
    }

    private static void enhanceRelationElement(YRelation yRelation, List<YElement> list) {
        for (YElement yElement : list) {
            YAttribute yAttribute = new YAttribute("category", "");
            for (YCategoryRef yCategoryRef : yElement.getCategoryRefs()) {
                yAttribute.addAttribute(new YAttribute(yCategoryRef.getClassification(), yCategoryRef.getCode()));
            }
            yRelation.addAttribute(yAttribute);
            for (YContributor yContributor : yElement.getContributors()) {
                if (yContributor.getOneName(YConstants.NM_CANONICAL) != null) {
                    yAttribute = new YAttribute(YConstants.AT_REFERENCE_PARSED_AUTHOR, yContributor.getOneName(YConstants.NM_CANONICAL).getText().toString());
                }
                if (yContributor.getAttributes(YConstants.AT_ZBL_AUTHOR_FINGERPRINT).size() > 0) {
                    yAttribute.addAttribute(YConstants.AT_ZBL_AUTHOR_FINGERPRINT, yContributor.getAttributes(YConstants.AT_ZBL_AUTHOR_FINGERPRINT).get(0).getValue());
                }
                if (yContributor.getOneName("forenames") != null) {
                    yAttribute.addAttribute(YConstants.AT_REFERENCE_PARSED_AUTHOR_FORENAMES, yContributor.getOneName("forenames").getText().toString());
                }
                yAttribute.addAttribute(YConstants.AT_REFERENCE_PARSED_AUTHOR_SURNAME, yContributor.getOneName("surname").getText().toString());
                yRelation.addAttribute(yAttribute);
            }
            YAttribute yAttribute2 = new YAttribute("category", "");
            for (YId yId : yElement.getIds()) {
                if (yId.getScheme().equals("bwmeta1.id-class.ISSN")) {
                    yAttribute2.addAttribute(new YAttribute("bwmeta1.id-class.ISSN", yId.getValue()));
                } else if (yId.getScheme().equals("bwmeta1.id-class.ISBN")) {
                    yAttribute2.addAttribute(new YAttribute("bwmeta1.id-class.ISBN", yId.getValue()));
                } else if (yId.getScheme().equals(YConstants.EXT_SCHEME_ZBL)) {
                    yAttribute2.addAttribute(new YAttribute(YConstants.EXT_SCHEME_ZBL, yId.getValue()));
                } else if (yId.getScheme().equals(YConstants.EXT_SCHEME_ZBL)) {
                    yAttribute2.addAttribute(new YAttribute(YConstants.EXT_SCHEME_ZBL, yId.getValue()));
                }
            }
            yRelation.addAttribute(yAttribute2);
            for (YId yId2 : yElement.getIds()) {
                if (yId2.getScheme().equals("bwmeta1.id-class.ISSN")) {
                    yRelation.addAttribute(new YAttribute(YConstants.AT_REFERENCE_PARSED_ID_ISSN, yId2.getValue()));
                } else if (yId2.getScheme().equals("bwmeta1.id-class.ISBN")) {
                    yRelation.addAttribute(new YAttribute(YConstants.AT_REFERENCE_PARSED_ID_ISBN, yId2.getValue()));
                } else if (yId2.getScheme().equals(YConstants.EXT_SCHEME_ZBL)) {
                    yRelation.addAttribute(new YAttribute(YConstants.AT_REFERENCE_PARSED_ID_ZBL, yId2.getValue()));
                }
            }
            for (YName yName : yElement.getNames()) {
                YAttribute yAttribute3 = new YAttribute(YConstants.AT_ENHANCED_FROM_ZBL_NAME, "");
                yAttribute3.addAttribute(new YAttribute("lang", yName.getLanguage().getName()));
                yAttribute3.addAttribute(new YAttribute("type", yName.getType()));
                yAttribute3.addAttribute(new YAttribute("value", yName.getText()));
                yRelation.addAttribute(yAttribute3);
            }
            for (YTagList yTagList : yElement.getTagLists()) {
                YAttribute yAttribute4 = new YAttribute(YConstants.AT_ENHANCED_FROM_ZBL_TAG, "");
                yAttribute4.addAttribute(new YAttribute("type", yTagList.getType()));
                yAttribute4.addAttribute(new YAttribute("lang", yTagList.getLanguage().getName()));
                Iterator<String> it = yTagList.getValues().iterator();
                while (it.hasNext()) {
                    yAttribute4.addAttribute(new YAttribute("value", it.next()));
                }
                yRelation.addAttribute(yAttribute4);
            }
        }
    }

    private static void enhanceArticleElement(YElement yElement, List<YElement> list) {
        for (YElement yElement2 : list) {
            for (YCategoryRef yCategoryRef : yElement2.getCategoryRefs()) {
                if (!yElement.getCategoryRefs().contains(yCategoryRef)) {
                    yElement.addCategoryRef(yCategoryRef);
                }
            }
            new LinkedList();
            for (YContributor yContributor : yElement2.getContributors()) {
                for (YContributor yContributor2 : yElement.getContributors()) {
                    String str = null;
                    String str2 = null;
                    String str3 = null;
                    for (YName yName : yContributor2.getNames()) {
                        if (YConstants.NM_CANONICAL.equals(yName.getType())) {
                            str3 = yName.getText();
                        } else if ("forenames".equals(yName.getType())) {
                            str2 = yName.getText();
                        } else if ("surname".equals(yName.getType())) {
                            str = yName.getText();
                        }
                    }
                    if (str != null && yContributor.getOneName("surname") != null && str.equals(yContributor.getOneName("surname").getText())) {
                        if (yContributor.getOneName("forenames").getText().split(" ").length > str2.split(" ").length) {
                            String text = yContributor.getOneName("forenames").getText();
                            YName yName2 = new YName(text);
                            yName2.setType("forenames");
                            YName yName3 = new YName(str);
                            yName3.setType("surname");
                            YName yName4 = str3 != null ? new YName(str3) : new YName(text + " " + str);
                            yName4.setType(YConstants.NM_CANONICAL);
                            LinkedList linkedList = new LinkedList();
                            linkedList.add(yName2);
                            linkedList.add(yName3);
                            linkedList.add(yName4);
                            yContributor2.setNames(linkedList);
                        }
                        Iterator<YAttribute> it = yContributor.getAttributes(YConstants.AT_ZBL_AUTHOR_FINGERPRINT).iterator();
                        while (it.hasNext()) {
                            yContributor2.addAttribute(it.next());
                        }
                    }
                }
            }
            for (YId yId : yElement2.getIds()) {
                if (yElement.getId(yId.getScheme()) == null) {
                    yElement.addId(yId);
                }
            }
            Iterator<YTagList> it2 = yElement2.getTagLists().iterator();
            while (it2.hasNext()) {
                yElement.addTagList(it2.next());
            }
        }
    }

    private static List<YElement> getZblData(String str) throws Exception {
        File file = new File(ZBL + "Zbl" + str + ".bwmeta.xml");
        if (!file.exists()) {
            return Collections.EMPTY_LIST;
        }
        List<YExportable> read = reader.read(new FileReader(file), (Object[]) null);
        LinkedList linkedList = new LinkedList();
        for (YExportable yExportable : read) {
            if (yExportable instanceof YElement) {
                linkedList.add((YElement) yExportable);
            }
        }
        return linkedList;
    }

    private static String mapExtZblToZbl(String str) throws Exception {
        if (str == null || str.length() != 10) {
            return null;
        }
        ISourceIterator<MixRecord> build = new MixFileIteratorBuilder(new File(MIX)).build(null);
        while (build.hasNext()) {
            MixRecord next = build.next();
            if (str.equals(next.getDotId())) {
                build.clean();
                return next.get10DigitId();
            }
        }
        build.clean();
        return null;
    }

    private static List<YElement> toEnhance(File file) throws Exception {
        char[] cArr = new char[(int) file.length()];
        List<YExportable> read = reader.read(new FileReader(file), (Object[]) null);
        LinkedList linkedList = new LinkedList();
        for (YExportable yExportable : read) {
            if (yExportable instanceof YElement) {
                linkedList.add((YElement) yExportable);
            }
        }
        return linkedList;
    }
}
