package pl.edu.icm.cermine.content;

import java.util.HashSet;
import java.util.Iterator;
import org.jdom.Element;
import pl.edu.icm.cermine.content.cleaning.ContentCleaner;
import pl.edu.icm.cermine.content.model.BxContentStructure;
import pl.edu.icm.cermine.exception.AnalysisException;
import pl.edu.icm.cermine.structure.model.BxDocument;
import pl.edu.icm.cermine.structure.model.BxLine;
import pl.edu.icm.cermine.structure.model.BxZoneLabel;
import pl.edu.icm.cermine.structure.model.BxZoneLabelCategory;

/* loaded from: input_file:WEB-INF/lib/cermine-impl-1.10-SNAPSHOT.jar:pl/edu/icm/cermine/content/RawTextWithLabelsExtractor.class */
public class RawTextWithLabelsExtractor {
    public Element extractRawTextWithLabels(BxDocument bxDocument, BxContentStructure bxContentStructure) throws AnalysisException {
        HashSet hashSet = new HashSet();
        Iterator<BxContentStructure.BxDocContentPart> it = bxContentStructure.getParts().iterator();
        while (it.hasNext()) {
            hashSet.addAll(it.next().getHeaderLines());
        }
        Element element = new Element("document");
        if (!bxDocument.asLines().iterator().hasNext()) {
            return element;
        }
        BxZoneLabel label = bxDocument.asLines().iterator().next().getParent().getLabel();
        StringBuilder sb = new StringBuilder();
        for (BxLine bxLine : bxDocument.asLines()) {
            BxZoneLabel label2 = bxLine.getParent().getLabel();
            if (hashSet.contains(bxLine)) {
                if (label.equals(BxZoneLabel.BODY_HEADING)) {
                    sb.append(bxLine.toText());
                    sb.append("\n");
                } else {
                    addZone(element, label, sb.toString());
                    sb = new StringBuilder();
                    sb.append(bxLine.toText());
                    sb.append("\n");
                    label = BxZoneLabel.BODY_HEADING;
                }
            } else if (label2.isOfCategoryOrGeneral(BxZoneLabelCategory.CAT_BODY)) {
                if (label.equals(BxZoneLabel.BODY_CONTENT)) {
                    sb.append(bxLine.toText());
                    sb.append("\n");
                } else {
                    addZone(element, label, sb.toString());
                    sb = new StringBuilder();
                    sb.append(bxLine.toText());
                    sb.append("\n");
                    label = BxZoneLabel.BODY_CONTENT;
                }
            } else if (label.equals(label2)) {
                sb.append(bxLine.toText());
                sb.append("\n");
            } else {
                addZone(element, label, sb.toString());
                sb = new StringBuilder();
                sb.append(bxLine.toText());
                sb.append("\n");
                label = label2;
            }
        }
        Element element2 = new Element("zone");
        element2.setAttribute("label", label.toString());
        element2.addContent(ContentCleaner.cleanAll(sb.toString()));
        element.addContent(element2);
        return element;
    }

    private void addZone(Element element, BxZoneLabel bxZoneLabel, String str) {
        Element element2 = new Element("zone");
        element2.setAttribute("label", bxZoneLabel.toString());
        element2.addContent(ContentCleaner.cleanAll(str));
        element.addContent(element2);
    }
}
