package pl.edu.icm.ceon.converters.cermineDir;

import com.google.common.net.MediaType;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.Serializable;
import java.io.StringWriter;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Properties;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.vfs2.FileFilter;
import org.apache.commons.vfs2.FileFilterSelector;
import org.apache.commons.vfs2.FileObject;
import org.apache.commons.vfs2.FileSelectInfo;
import org.apache.commons.vfs2.FileSystemException;
import org.apache.commons.vfs2.FileType;
import org.jdom.Document;
import org.jdom.output.XMLOutputter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import pl.edu.icm.ceon.commons.CeonGeneralException;
import pl.edu.icm.ceon.converters.commons.ContentPart;
import pl.edu.icm.ceon.converters.commons.DataBatch;
import pl.edu.icm.ceon.converters.commons.ElementContentSource;
import pl.edu.icm.ceon.converters.commons.FileObjectBasedEntry;
import pl.edu.icm.ceon.converters.commons.IMetadataSource;
import pl.edu.icm.ceon.converters.commons.MetadataPart;
import pl.edu.icm.ceon.tools.textcat.LanguageIdentifierBean;
import pl.edu.icm.cermine.PdfNLMContentExtractor;
import pl.edu.icm.cermine.exception.AnalysisException;
import pl.edu.icm.model.bwmeta.y.YAncestor;
import pl.edu.icm.model.bwmeta.y.YContentFile;
import pl.edu.icm.model.bwmeta.y.YContributor;
import pl.edu.icm.model.bwmeta.y.YCurrent;
import pl.edu.icm.model.bwmeta.y.YDescription;
import pl.edu.icm.model.bwmeta.y.YElement;
import pl.edu.icm.model.bwmeta.y.YExportable;
import pl.edu.icm.model.bwmeta.y.YId;
import pl.edu.icm.model.bwmeta.y.YLanguage;
import pl.edu.icm.model.bwmeta.y.YName;
import pl.edu.icm.model.bwmeta.y.YStructure;
import pl.edu.icm.model.transformers.utils.AncestorsManagement;
import pl.edu.icm.yadda.imports.transformers.nlm.NlmIdGeneratorWithFallbeckToGivenId;
import pl.edu.icm.yadda.imports.transformers.nlm.jats.NlmToYTransformer;

/* loaded from: input_file:pl/edu/icm/ceon/converters/cermineDir/DirectoryReader.class */
public class DirectoryReader implements IMetadataSource, ElementContentSource {
    private static final Logger log = LoggerFactory.getLogger(DirectoryReader.class);
    static String JOUNAL_TITLE_FILE = "title.txt";
    static String JOUNAL_ISSN_FILE = "issn.txt";
    static String JOUNAL_PUBLISHER_FILE = "publisher.txt";
    static String JOUNAL_DESCRIPTION_FILE = "description.txt";
    static String VOLUME_ISSUE_NAME_FILE = "name.txt";
    ThreadLocal<LanguageIdentifierBean> langIdent;
    HashMap<String, FileObject> idsToPdfFIles;
    public static final String MODELS_PREFIX = "/pl/edu/icm/ceon/converters/cermineDir/";
    public static final String MODEL_META = "model-metadata";
    public static final String MODEL_META_RANGE = "model-metadata.range";
    public static final String MODEL_INITIAL = "model-initial";
    public static final String MODEL_INITIAL_RANGE = "model-initial.range";
    ThreadLocal<Properties> extractorsForIds;
    ThreadLocal<PdfNLMContentExtractor> extractor;
    ThreadLocal<NlmIdGeneratorWithFallbeckToGivenId> idGen;
    ThreadLocal<NlmToYTransformer> nlmReader;
    List<FileObject> fileList;

    /* loaded from: input_file:pl/edu/icm/ceon/converters/cermineDir/DirectoryReader$DirToken.class */
    public static class DirToken implements Serializable {
        List<FileObject> journalList;
        int pos = 0;

        public DirToken(List<FileObject> list) {
            this.journalList = list;
        }

        boolean isFullyUsed() {
            return this.journalList.size() <= this.pos;
        }
    }

    static FileObject foundFile(String str, FileObject fileObject) throws FileSystemException {
        if (str == null) {
            return null;
        }
        for (FileObject fileObject2 : fileObject.getChildren()) {
            if (str.equalsIgnoreCase(fileObject2.getName().getBaseName())) {
                return fileObject2;
            }
        }
        return null;
    }

    public PdfNLMContentExtractor getDefaultPdfNLMContentExtractor() throws AnalysisException {
        return new PdfNLMContentExtractor();
    }

    public void setPdfExtractorForJournal(String str) {
        try {
            String property = this.extractorsForIds.get().getProperty(str);
            if (property == null) {
                System.out.println("special model for " + str + " not found switching to default");
                this.extractor.set(getDefaultPdfNLMContentExtractor());
                return;
            }
            System.out.println("special model for " + str + " found");
            PdfNLMContentExtractor pdfNLMContentExtractor = new PdfNLMContentExtractor();
            pdfNLMContentExtractor.buildMetadataExtractor(getClass().getResourceAsStream(property + MODEL_META), getClass().getResourceAsStream(property + MODEL_META_RANGE));
            pdfNLMContentExtractor.buildStructureExtractor(getClass().getResourceAsStream(property + MODEL_INITIAL), getClass().getResourceAsStream(property + MODEL_INITIAL_RANGE));
            this.extractor.set(pdfNLMContentExtractor);
        } catch (AnalysisException e) {
            log.error(e.getMessage(), e);
        }
    }

    void processSinglePdf(FileObject fileObject, List<YExportable> list, List<YAncestor> list2) throws FileNotFoundException, AnalysisException, IOException {
        String baseName = fileObject.getName().getBaseName();
        String substring = baseName.substring(0, baseName.length() - 4);
        Document document = new Document(this.extractor.get().extractContent(fileObject.getContent().getInputStream()));
        XMLOutputter xMLOutputter = new XMLOutputter();
        StringWriter stringWriter = new StringWriter();
        xMLOutputter.output(document, stringWriter);
        String stringWriter2 = stringWriter.toString();
        log.debug(substring + " nlm:\n" + stringWriter2);
        this.idGen.get().setCurrentIdFallback(substring);
        List read = this.nlmReader.get().read(stringWriter2, new Object[0]);
        YElement yElement = null;
        boolean z = true;
        for (int i = 0; i < read.size() && z; i++) {
            yElement = (YElement) read.get(i);
            if (yElement.getStructure("bwmeta1.hierarchy-class.hierarchy_Journal") != null && "bwmeta1.level.hierarchy_Journal_Article".equalsIgnoreCase(yElement.getStructure("bwmeta1.hierarchy-class.hierarchy_Journal").getCurrent().getLevel())) {
                z = false;
            }
        }
        if (yElement == null) {
            log.error("id: " + substring + " cermine provide no valid article, skipping");
            return;
        }
        yElement.setStructures(new ArrayList());
        YStructure yStructure = new YStructure("bwmeta1.hierarchy-class.hierarchy_Journal");
        yStructure.setCurrent(new YCurrent("bwmeta1.level.hierarchy_Journal_Article"));
        Iterator<YAncestor> it = list2.iterator();
        while (it.hasNext()) {
            yStructure.addAncestor(it.next());
        }
        yElement.addStructure(yStructure);
        yElement.setId(substring);
        list.add(yElement);
        this.idsToPdfFIles.put(substring, fileObject);
    }

    void addPdfsFromDir(FileObject fileObject, List<YExportable> list, List<YAncestor> list2) throws IOException, AnalysisException {
        for (FileObject fileObject2 : fileObject.findFiles(new FileFilterSelector(new FileFilter() { // from class: pl.edu.icm.ceon.converters.cermineDir.DirectoryReader.6
            public boolean accept(FileSelectInfo fileSelectInfo) {
                return fileSelectInfo.getFile().getName().getExtension().equalsIgnoreCase("pdf");
            }
        }))) {
            processSinglePdf(fileObject2, list, list2);
        }
    }

    void processIssueDir(FileObject fileObject, List<YExportable> list, List<YAncestor> list2) throws IOException, AnalysisException {
        String baseName = fileObject.getName().getBaseName();
        String str = null;
        FileObject foundFile = foundFile(VOLUME_ISSUE_NAME_FILE, fileObject);
        if (foundFile != null) {
            str = IOUtils.toString(foundFile.getContent().getInputStream());
        }
        YElement yElement = new YElement(baseName);
        list.add(yElement);
        if (StringUtils.isNotBlank(str)) {
            yElement.addName(new YName(str));
        }
        YStructure yStructure = new YStructure("bwmeta1.hierarchy-class.hierarchy_Journal");
        yStructure.setCurrent(new YCurrent("bwmeta1.level.hierarchy_Journal_Number"));
        yStructure.setAncestors(list2);
        yElement.addStructure(yStructure);
        YAncestor yAncestor = new YAncestor("bwmeta1.level.hierarchy_Journal_Number", baseName);
        AncestorsManagement.copyDataToAncestor(yElement, yAncestor);
        list2.add(yAncestor);
        list.add(yElement);
        addPdfsFromDir(fileObject, list, list2);
        list2.remove(yAncestor);
    }

    void processVolumeDir(FileObject fileObject, List<YExportable> list, List<YAncestor> list2) throws IOException, AnalysisException {
        String baseName = fileObject.getName().getBaseName();
        FileObject foundFile = foundFile(VOLUME_ISSUE_NAME_FILE, fileObject);
        String iOUtils = foundFile != null ? IOUtils.toString(foundFile.getContent().getInputStream()) : null;
        YElement yElement = new YElement(baseName);
        list.add(yElement);
        if (StringUtils.isNotBlank(iOUtils)) {
            yElement.addName(new YName(iOUtils));
        }
        YStructure yStructure = new YStructure("bwmeta1.hierarchy-class.hierarchy_Journal");
        yStructure.setCurrent(new YCurrent("bwmeta1.level.hierarchy_Journal_Volume"));
        yStructure.setAncestors(list2);
        yElement.addStructure(yStructure);
        YAncestor yAncestor = new YAncestor("bwmeta1.level.hierarchy_Journal_Volume", baseName);
        AncestorsManagement.copyDataToAncestor(yElement, yAncestor);
        list2.add(yAncestor);
        list.add(yElement);
        addPdfsFromDir(fileObject, list, list2);
        for (FileObject fileObject2 : fileObject.getChildren()) {
            if (fileObject2.getType() == FileType.FOLDER) {
                processIssueDir(fileObject2, list, list2);
            }
        }
        list2.remove(yAncestor);
    }

    void processYearDir(FileObject fileObject, List<YExportable> list, List<YAncestor> list2) throws IOException, AnalysisException {
        String baseName = fileObject.getName().getBaseName();
        FileObject foundFile = foundFile(VOLUME_ISSUE_NAME_FILE, fileObject);
        String iOUtils = foundFile != null ? IOUtils.toString(foundFile.getContent().getInputStream()) : null;
        YElement yElement = new YElement(baseName);
        list.add(yElement);
        if (StringUtils.isNotBlank(iOUtils)) {
            yElement.addName(new YName(iOUtils));
        }
        YStructure yStructure = new YStructure("bwmeta1.hierarchy-class.hierarchy_Journal");
        yStructure.setCurrent(new YCurrent("bwmeta1.level.hierarchy_Journal_Year"));
        yStructure.setAncestors(list2);
        yElement.addStructure(yStructure);
        YAncestor yAncestor = new YAncestor("bwmeta1.level.hierarchy_Journal_Year", baseName);
        AncestorsManagement.copyDataToAncestor(yElement, yAncestor);
        list2.add(yAncestor);
        list.add(yElement);
        addPdfsFromDir(fileObject, list, list2);
        for (FileObject fileObject2 : fileObject.getChildren()) {
            if (fileObject2.getType() == FileType.FOLDER) {
                processVolumeDir(fileObject2, list, list2);
            }
        }
        list2.remove(yAncestor);
    }

    MetadataPart processJournalDir(DirToken dirToken) throws IOException, AnalysisException {
        FileObject fileObject = dirToken.journalList.get(dirToken.pos);
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        String baseName = fileObject.getName().getBaseName();
        setPdfExtractorForJournal(baseName);
        FileObject foundFile = foundFile(JOUNAL_TITLE_FILE, fileObject);
        String iOUtils = foundFile != null ? IOUtils.toString(foundFile.getContent().getInputStream()) : null;
        FileObject foundFile2 = foundFile(JOUNAL_ISSN_FILE, fileObject);
        String iOUtils2 = foundFile2 != null ? IOUtils.toString(foundFile2.getContent().getInputStream()) : null;
        FileObject foundFile3 = foundFile(JOUNAL_DESCRIPTION_FILE, fileObject);
        String iOUtils3 = foundFile3 != null ? IOUtils.toString(foundFile3.getContent().getInputStream()) : null;
        FileObject foundFile4 = foundFile(JOUNAL_PUBLISHER_FILE, fileObject);
        String iOUtils4 = foundFile4 != null ? IOUtils.toString(foundFile4.getContent().getInputStream()) : null;
        YElement yElement = new YElement(baseName);
        arrayList.add(yElement);
        if (StringUtils.isNotBlank(iOUtils2)) {
            yElement.addId(new YId("bwmeta1.id-class.ISSN", iOUtils2));
        }
        if (StringUtils.isNotBlank(iOUtils)) {
            yElement.addName(new YName(iOUtils));
        }
        if (StringUtils.isNotBlank(iOUtils3)) {
            yElement.addDescription(new YDescription(YLanguage.byCode(this.langIdent.get().classify(iOUtils3)), iOUtils3, "note"));
        }
        if (StringUtils.isNotBlank(iOUtils4)) {
            YContributor yContributor = new YContributor("publisher", true);
            yContributor.addName(new YName(iOUtils4));
            yElement.addContributor(yContributor);
        }
        YStructure yStructure = new YStructure("bwmeta1.hierarchy-class.hierarchy_Journal");
        yStructure.setCurrent(new YCurrent("bwmeta1.level.hierarchy_Journal_Journal"));
        yElement.addStructure(yStructure);
        YAncestor yAncestor = new YAncestor("bwmeta1.level.hierarchy_Journal_Journal", baseName);
        AncestorsManagement.copyDataToAncestor(yElement, yAncestor);
        arrayList2.add(yAncestor);
        arrayList.add(yElement);
        addPdfsFromDir(fileObject, arrayList, arrayList2);
        for (FileObject fileObject2 : fileObject.getChildren()) {
            if (fileObject2.getType() == FileType.FOLDER) {
                processYearDir(fileObject2, arrayList, arrayList2);
            }
        }
        MetadataPart metadataPart = new MetadataPart();
        metadataPart.setId(baseName);
        metadataPart.setEntities(arrayList);
        dirToken.pos++;
        return metadataPart;
    }

    public boolean doKnowsAboutFiles() {
        return false;
    }

    public boolean isRandomAccessSupported() {
        return false;
    }

    public boolean isSequentialAccessSupported() {
        return true;
    }

    /* renamed from: getData, reason: merged with bridge method [inline-methods] */
    public MetadataPart m58getData(String str) {
        throw new UnsupportedOperationException("Not supported yet.");
    }

    public DirectoryReader(List<FileObject> list) {
        this.langIdent = new ThreadLocal<LanguageIdentifierBean>() { // from class: pl.edu.icm.ceon.converters.cermineDir.DirectoryReader.1
            /* JADX INFO: Access modifiers changed from: protected */
            /* JADX WARN: Can't rename method to resolve collision */
            @Override // java.lang.ThreadLocal
            public LanguageIdentifierBean initialValue() {
                try {
                    return new LanguageIdentifierBean();
                } catch (IOException | CeonGeneralException e) {
                    DirectoryReader.log.error(e.getMessage(), e);
                    return null;
                }
            }
        };
        this.idsToPdfFIles = new HashMap<>();
        this.extractorsForIds = new ThreadLocal<Properties>() { // from class: pl.edu.icm.ceon.converters.cermineDir.DirectoryReader.2
            /* JADX INFO: Access modifiers changed from: protected */
            /* JADX WARN: Can't rename method to resolve collision */
            @Override // java.lang.ThreadLocal
            public Properties initialValue() {
                try {
                    Properties properties = new Properties();
                    properties.load(getClass().getResourceAsStream("/pl/edu/icm/ceon/converters/cermineDir/idsToModelsDirs.properties"));
                    return properties;
                } catch (IOException e) {
                    DirectoryReader.log.error(e.getMessage(), e);
                    return null;
                }
            }
        };
        this.extractor = new ThreadLocal<PdfNLMContentExtractor>() { // from class: pl.edu.icm.ceon.converters.cermineDir.DirectoryReader.3
            /* JADX INFO: Access modifiers changed from: protected */
            /* JADX WARN: Can't rename method to resolve collision */
            @Override // java.lang.ThreadLocal
            public PdfNLMContentExtractor initialValue() {
                try {
                    return DirectoryReader.this.getDefaultPdfNLMContentExtractor();
                } catch (AnalysisException e) {
                    DirectoryReader.log.error(e.getMessage(), e);
                    return null;
                }
            }
        };
        this.idGen = new ThreadLocal<NlmIdGeneratorWithFallbeckToGivenId>() { // from class: pl.edu.icm.ceon.converters.cermineDir.DirectoryReader.4
            /* JADX INFO: Access modifiers changed from: protected */
            /* JADX WARN: Can't rename method to resolve collision */
            @Override // java.lang.ThreadLocal
            public NlmIdGeneratorWithFallbeckToGivenId initialValue() {
                return new NlmIdGeneratorWithFallbeckToGivenId();
            }
        };
        this.nlmReader = new ThreadLocal<NlmToYTransformer>() { // from class: pl.edu.icm.ceon.converters.cermineDir.DirectoryReader.5
            /* JADX INFO: Access modifiers changed from: protected */
            /* JADX WARN: Can't rename method to resolve collision */
            @Override // java.lang.ThreadLocal
            public NlmToYTransformer initialValue() {
                NlmToYTransformer nlmToYTransformer = new NlmToYTransformer();
                nlmToYTransformer.setIdGenerator(DirectoryReader.this.idGen.get());
                return nlmToYTransformer;
            }
        };
        this.fileList = list;
    }

    public DirectoryReader(FileObject fileObject) throws FileSystemException {
        this.langIdent = new ThreadLocal<LanguageIdentifierBean>() { // from class: pl.edu.icm.ceon.converters.cermineDir.DirectoryReader.1
            /* JADX INFO: Access modifiers changed from: protected */
            /* JADX WARN: Can't rename method to resolve collision */
            @Override // java.lang.ThreadLocal
            public LanguageIdentifierBean initialValue() {
                try {
                    return new LanguageIdentifierBean();
                } catch (IOException | CeonGeneralException e) {
                    DirectoryReader.log.error(e.getMessage(), e);
                    return null;
                }
            }
        };
        this.idsToPdfFIles = new HashMap<>();
        this.extractorsForIds = new ThreadLocal<Properties>() { // from class: pl.edu.icm.ceon.converters.cermineDir.DirectoryReader.2
            /* JADX INFO: Access modifiers changed from: protected */
            /* JADX WARN: Can't rename method to resolve collision */
            @Override // java.lang.ThreadLocal
            public Properties initialValue() {
                try {
                    Properties properties = new Properties();
                    properties.load(getClass().getResourceAsStream("/pl/edu/icm/ceon/converters/cermineDir/idsToModelsDirs.properties"));
                    return properties;
                } catch (IOException e) {
                    DirectoryReader.log.error(e.getMessage(), e);
                    return null;
                }
            }
        };
        this.extractor = new ThreadLocal<PdfNLMContentExtractor>() { // from class: pl.edu.icm.ceon.converters.cermineDir.DirectoryReader.3
            /* JADX INFO: Access modifiers changed from: protected */
            /* JADX WARN: Can't rename method to resolve collision */
            @Override // java.lang.ThreadLocal
            public PdfNLMContentExtractor initialValue() {
                try {
                    return DirectoryReader.this.getDefaultPdfNLMContentExtractor();
                } catch (AnalysisException e) {
                    DirectoryReader.log.error(e.getMessage(), e);
                    return null;
                }
            }
        };
        this.idGen = new ThreadLocal<NlmIdGeneratorWithFallbeckToGivenId>() { // from class: pl.edu.icm.ceon.converters.cermineDir.DirectoryReader.4
            /* JADX INFO: Access modifiers changed from: protected */
            /* JADX WARN: Can't rename method to resolve collision */
            @Override // java.lang.ThreadLocal
            public NlmIdGeneratorWithFallbeckToGivenId initialValue() {
                return new NlmIdGeneratorWithFallbeckToGivenId();
            }
        };
        this.nlmReader = new ThreadLocal<NlmToYTransformer>() { // from class: pl.edu.icm.ceon.converters.cermineDir.DirectoryReader.5
            /* JADX INFO: Access modifiers changed from: protected */
            /* JADX WARN: Can't rename method to resolve collision */
            @Override // java.lang.ThreadLocal
            public NlmToYTransformer initialValue() {
                NlmToYTransformer nlmToYTransformer = new NlmToYTransformer();
                nlmToYTransformer.setIdGenerator(DirectoryReader.this.idGen.get());
                return nlmToYTransformer;
            }
        };
        this.fileList = new ArrayList();
        for (FileObject fileObject2 : fileObject.getChildren()) {
            if (fileObject2.getType() == FileType.FOLDER) {
                this.fileList.add(fileObject2);
            }
        }
    }

    public DataBatch<MetadataPart> getBatch(Date date, Date date2) {
        return getBatch(new DirToken(this.fileList));
    }

    public DataBatch<MetadataPart> getBatch(Serializable serializable) {
        try {
            DirToken dirToken = (DirToken) serializable;
            DataBatch<MetadataPart> dataBatch = new DataBatch<>();
            dataBatch.setPayload(Collections.singletonList(processJournalDir(dirToken)));
            if (!dirToken.isFullyUsed()) {
                dataBatch.setResumptionToken(dirToken);
            }
            return dataBatch;
        } catch (IOException | AnalysisException e) {
            log.error(e.getMessage(), e);
            return null;
        }
    }

    public ContentPart getDataForElement(String str) {
        try {
            if (!this.idsToPdfFIles.containsKey(str)) {
                return null;
            }
            FileObject fileObject = this.idsToPdfFIles.get(str);
            ContentPart contentPart = new ContentPart();
            contentPart.setId(str);
            YContentFile yContentFile = new YContentFile();
            yContentFile.setId("file-pdf");
            yContentFile.setSize(Long.valueOf(fileObject.getContent().getSize()));
            yContentFile.setFormat(MediaType.PDF.toString());
            yContentFile.setType("full-text");
            yContentFile.addName(new YName(YLanguage.NoLinguisticContent, fileObject.getName().getBaseName(), "file-name"));
            contentPart.addFile(new FileObjectBasedEntry(yContentFile, fileObject));
            return contentPart;
        } catch (FileSystemException e) {
            log.error(e.getMessage(), e);
            return null;
        }
    }
}
