package pl.edu.icm.cermine.content.cleaning;

import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.apache.commons.cli.HelpFormatter;
import pl.edu.icm.cermine.content.model.BxDocContentStructure;
import pl.edu.icm.cermine.structure.model.BxLine;

/* loaded from: input_file:WEB-INF/lib/cermine-impl-1.1.jar:pl/edu/icm/cermine/content/cleaning/ContentCleaner.class */
public class ContentCleaner {
    public static final double DEFAULT_PAR_LINE_MULT = 0.5d;
    public static final double DEFAULT_MIN_PAR_IND = 5.0d;
    public static final double DEFAULT_LAST_PAR_LINE_MULT = 0.8d;
    public static final double DEFAULT_FIRST_PAR_LINE_SCORE = 3.0d;
    private double paragraphLineIndentMultiplier = 0.5d;
    private double minParagraphIndent = 5.0d;
    private double lastParagraphLineLengthMult = 0.8d;
    private double firstParagraphLineMinScore = 3.0d;

    public void cleanupContent(BxDocContentStructure bxDocContentStructure) {
        for (BxDocContentStructure.BxDocContentPart bxDocContentPart : bxDocContentStructure.getParts()) {
            List<BxLine> headerLines = bxDocContentPart.getHeaderLines();
            StringBuilder sb = new StringBuilder();
            Iterator<BxLine> it = headerLines.iterator();
            while (it.hasNext()) {
                String text = it.next().toText();
                if (text.endsWith(HelpFormatter.DEFAULT_OPT_PREFIX)) {
                    String substring = text.substring(0, text.length() - 1);
                    if (substring.lastIndexOf(32) < 0) {
                        sb.append(substring);
                    } else {
                        sb.append(substring.substring(0, substring.lastIndexOf(32)));
                        sb.append(" ");
                        sb.append(substring.substring(substring.lastIndexOf(32) + 1));
                    }
                } else {
                    sb.append(text);
                    sb.append(" ");
                }
            }
            bxDocContentPart.setCleanHeaderText(cleanLigatures(sb.toString().trim()));
            List<BxLine> contentLines = bxDocContentPart.getContentLines();
            ArrayList arrayList = new ArrayList();
            double d = Double.NEGATIVE_INFINITY;
            for (BxLine bxLine : contentLines) {
                if (bxLine.getWidth() > d) {
                    d = bxLine.getWidth();
                }
            }
            String str = "";
            for (BxLine bxLine2 : contentLines) {
                BxLine prev = bxLine2.getPrev();
                BxLine next = bxLine2.getNext();
                int i = bxLine2.toText().matches("^[A-Z].*$") ? 0 + 1 : 0;
                if (prev != null) {
                    if (bxLine2.getX() > prev.getX() && bxLine2.getX() - prev.getX() < this.paragraphLineIndentMultiplier * d && bxLine2.getX() - prev.getX() > this.minParagraphIndent) {
                        i++;
                    }
                    if (prev.getWidth() < this.lastParagraphLineLengthMult * d) {
                        i++;
                    }
                    if (prev.toText().endsWith(".")) {
                        i++;
                    }
                }
                if (next != null && bxLine2.getX() > next.getX() && bxLine2.getX() - next.getX() < this.paragraphLineIndentMultiplier * d && bxLine2.getX() - next.getX() > this.minParagraphIndent) {
                    i++;
                }
                if (i >= this.firstParagraphLineMinScore) {
                    if (!str.isEmpty()) {
                        arrayList.add(cleanLigatures(str.trim()));
                    }
                    str = "";
                }
                String text2 = bxLine2.toText();
                if (text2.endsWith(HelpFormatter.DEFAULT_OPT_PREFIX)) {
                    String substring2 = text2.substring(0, text2.length() - 1);
                    str = substring2.lastIndexOf(32) < 0 ? str + substring2 : ((str + substring2.substring(0, substring2.lastIndexOf(32))) + "\n") + substring2.substring(substring2.lastIndexOf(32) + 1);
                } else {
                    str = (str + text2) + "\n";
                }
            }
            if (!str.isEmpty()) {
                arrayList.add(cleanLigatures(str.trim()));
            }
            bxDocContentPart.setCleanContentTexts(arrayList);
        }
    }

    private String cleanLigatures(String str) {
        return str.replaceAll("ﬀ", "ff").replaceAll("ﬁ", "fi").replaceAll("ﬂ", "fl").replaceAll("ﬃ", "ffi").replaceAll("ﬄ", "ffl").replaceAll("ﬅ", "ft").replaceAll("ﬆ", "st");
    }

    public void setFirstParagraphLineMinScore(double d) {
        this.firstParagraphLineMinScore = d;
    }

    public void setLastParagraphLineLengthMult(double d) {
        this.lastParagraphLineLengthMult = d;
    }

    public void setMinParagraphIndent(double d) {
        this.minParagraphIndent = d;
    }

    public void setParagraphLineIndentMultiplier(double d) {
        this.paragraphLineIndentMultiplier = d;
    }
}
