package pl.edu.icm.cermine.content.transformers;

import com.itextpdf.text.html.HtmlTags;
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.List;
import java.util.Locale;
import org.jdom.Element;
import org.jdom.JDOMException;
import org.jdom.input.SAXBuilder;
import pl.edu.icm.cermine.content.model.ContentStructure;
import pl.edu.icm.cermine.content.model.DocumentSection;
import pl.edu.icm.cermine.exception.TransformationException;
import pl.edu.icm.cermine.tools.transformers.FormatToModelReader;

/* loaded from: input_file:WEB-INF/lib/cermine-impl-1.12.jar:pl/edu/icm/cermine/content/transformers/HTMLToDocContentReader.class */
public class HTMLToDocContentReader implements FormatToModelReader<ContentStructure> {
    /* JADX WARN: Can't rename method to resolve collision */
    @Override // pl.edu.icm.cermine.tools.transformers.FormatToModelReader
    public ContentStructure read(String str, Object... objArr) throws TransformationException {
        return read((Reader) new StringReader(str), objArr);
    }

    /* JADX WARN: Can't rename method to resolve collision */
    @Override // pl.edu.icm.cermine.tools.transformers.FormatToModelReader
    public ContentStructure read(Reader reader, Object... objArr) throws TransformationException {
        try {
            Element root = getRoot(reader);
            ContentStructure contentStructure = new ContentStructure();
            List children = root.getChildren();
            if (children.isEmpty()) {
                return contentStructure;
            }
            int i = 0;
            Element element = (Element) children.get(0);
            while (element != null && isParagraph(element)) {
                i++;
                element = getNext(i, children);
            }
            if (element == null) {
                return contentStructure;
            }
            int headerLevel = getHeaderLevel(element);
            ArrayList arrayList = new ArrayList();
            while (element != null) {
                if (isHeader(element) && headerLevel == getHeaderLevel(element) && !arrayList.isEmpty()) {
                    contentStructure.addSection(createPart(arrayList, 1));
                    arrayList.clear();
                }
                arrayList.add(element);
                i++;
                element = getNext(i, children);
            }
            if (!arrayList.isEmpty()) {
                contentStructure.addSection(createPart(arrayList, 1));
            }
            return contentStructure;
        } catch (IOException e) {
            throw new TransformationException(e);
        } catch (JDOMException e2) {
            throw new TransformationException(e2);
        }
    }

    private Element getRoot(Reader reader) throws JDOMException, IOException {
        return new SAXBuilder("org.apache.xerces.parsers.SAXParser").build(reader).getRootElement();
    }

    private DocumentSection createPart(List<Element> list, int i) {
        Element element;
        DocumentSection documentSection = new DocumentSection();
        if (list.isEmpty()) {
            return documentSection;
        }
        Element element2 = list.get(0);
        documentSection.setLevel(i);
        documentSection.setTitle(element2.getValue());
        int i2 = 0 + 1;
        Element next = getNext(i2, list);
        while (true) {
            element = next;
            if (element == null || !isParagraph(element)) {
                break;
            }
            documentSection.addParagraph(element.getValue());
            i2++;
            next = getNext(i2, list);
        }
        if (element == null) {
            return documentSection;
        }
        int headerLevel = getHeaderLevel(element);
        ArrayList arrayList = new ArrayList();
        while (element != null) {
            if (isHeader(element) && headerLevel == getHeaderLevel(element) && !arrayList.isEmpty()) {
                documentSection.addSection(createPart(arrayList, i + 1));
                arrayList.clear();
            }
            arrayList.add(element);
            i2++;
            element = getNext(i2, list);
        }
        if (!arrayList.isEmpty()) {
            documentSection.addSection(createPart(arrayList, i + 1));
        }
        return documentSection;
    }

    private boolean isHeader(Element element) {
        return element.getName().toLowerCase(Locale.ENGLISH).startsWith("h");
    }

    private boolean isParagraph(Element element) {
        return element.getName().equals(HtmlTags.P);
    }

    private int getHeaderLevel(Element element) {
        return Integer.parseInt(element.getName().replaceAll("[^0-9]+", ""));
    }

    private Element getNext(int i, List<Element> list) {
        if (i < list.size()) {
            return list.get(i);
        }
        return null;
    }

    @Override // pl.edu.icm.cermine.tools.transformers.FormatToModelReader
    public List<ContentStructure> readAll(String str, Object... objArr) throws TransformationException {
        throw new UnsupportedOperationException("Not supported yet.");
    }

    @Override // pl.edu.icm.cermine.tools.transformers.FormatToModelReader
    public List<ContentStructure> readAll(Reader reader, Object... objArr) throws TransformationException {
        throw new UnsupportedOperationException("Not supported yet.");
    }
}
