package pl.edu.icm.yadda.analysis.classification.tools;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import pl.edu.icm.yadda.analysis.textr.model.BxDocument;
import pl.edu.icm.yadda.analysis.textr.model.BxPage;
import pl.edu.icm.yadda.analysis.textr.transformers.TrueVizToBxDocumentReader;
import pl.edu.icm.yadda.metadata.transformers.TransformationException;

/* loaded from: input_file:WEB-INF/lib/yadda-analysis-impl-1.11.0-RC1.jar:pl/edu/icm/yadda/analysis/classification/tools/DirExtractor.class */
public class DirExtractor implements DocumentsExtractor {
    protected File directory;

    public DirExtractor(String str) {
        this.directory = new File(str);
    }

    public DirExtractor(File file) {
        this.directory = file;
    }

    @Override // pl.edu.icm.yadda.analysis.classification.tools.DocumentsExtractor
    public List<BxDocument> getDocuments() throws TransformationException, FileNotFoundException {
        String path = this.directory.getPath();
        TrueVizToBxDocumentReader trueVizToBxDocumentReader = new TrueVizToBxDocumentReader();
        ArrayList arrayList = new ArrayList();
        if (!path.endsWith(File.separator)) {
            path = path + File.separator;
        }
        for (String str : this.directory.list()) {
            if (new File(path + str).isFile() && str.endsWith("xml")) {
                List<BxPage> read = trueVizToBxDocumentReader.read(new InputStreamReader(new FileInputStream(path + str)), new Object[0]);
                BxDocument bxDocument = new BxDocument();
                Iterator<BxPage> it = read.iterator();
                while (it.hasNext()) {
                    it.next().setContext(bxDocument);
                }
                bxDocument.setFilename(str);
                bxDocument.setPages(read);
                arrayList.add(bxDocument);
            }
        }
        return arrayList;
    }
}
