package pl.edu.icm.cermine.tools;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import pl.edu.icm.cermine.exception.TransformationException;
import pl.edu.icm.cermine.structure.model.BxDocument;
import pl.edu.icm.cermine.structure.model.BxPage;
import pl.edu.icm.cermine.structure.transformers.TrueVizToBxDocumentReader;

/* loaded from: input_file:WEB-INF/lib/cermine-impl-1.13-SNAPSHOT.jar:pl/edu/icm/cermine/tools/DirExtractor.class */
public class DirExtractor implements DocumentsExtractor {
    protected File directory;

    public DirExtractor(String str) {
        this.directory = new File(str);
        if (!this.directory.exists() || !this.directory.isDirectory()) {
            throw new RuntimeException("Source directory for documents doesn't exist: " + str);
        }
    }

    public DirExtractor(File file) {
        this.directory = file;
    }

    @Override // pl.edu.icm.cermine.tools.DocumentsExtractor
    public List<BxDocument> getDocuments() throws TransformationException {
        String path = this.directory.getPath();
        TrueVizToBxDocumentReader trueVizToBxDocumentReader = new TrueVizToBxDocumentReader();
        ArrayList arrayList = new ArrayList();
        if (!path.endsWith(File.separator)) {
            path = path + File.separator;
        }
        for (String str : this.directory.list()) {
            if (new File(path + str).isFile() && str.endsWith("xml")) {
                FileInputStream fileInputStream = null;
                try {
                    try {
                        try {
                            try {
                                fileInputStream = new FileInputStream(path + str);
                                List<BxPage> read = trueVizToBxDocumentReader.read(new InputStreamReader(fileInputStream, "UTF-8"), new Object[0]);
                                BxDocument bxDocument = new BxDocument();
                                Iterator<BxPage> it = read.iterator();
                                while (it.hasNext()) {
                                    it.next().setParent(bxDocument);
                                }
                                bxDocument.setFilename(str);
                                bxDocument.setPages(read);
                                arrayList.add(bxDocument);
                                if (fileInputStream != null) {
                                    try {
                                        fileInputStream.close();
                                    } catch (IOException e) {
                                        throw new TransformationException("Cannot close stream!", e);
                                    }
                                } else {
                                    continue;
                                }
                            } catch (UnsupportedEncodingException e2) {
                                throw new TransformationException("Unsupported encoding!", e2);
                            }
                        } catch (FileNotFoundException e3) {
                            throw new TransformationException("File not found!", e3);
                        }
                    } catch (IllegalStateException e4) {
                        System.err.println(e4.getMessage());
                        System.err.println(path + str);
                        throw e4;
                    }
                } catch (Throwable th) {
                    if (fileInputStream != null) {
                        try {
                            fileInputStream.close();
                        } catch (IOException e5) {
                            throw new TransformationException("Cannot close stream!", e5);
                        }
                    }
                    throw th;
                }
            }
        }
        return arrayList;
    }
}
