package pl.edu.icm.yadda.analysis.metadata.extraction.enhancers;

import java.util.Collection;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.regex.Pattern;
import pl.edu.icm.model.bwmeta.y.YElement;
import pl.edu.icm.yadda.analysis.textr.model.BxChunk;
import pl.edu.icm.yadda.analysis.textr.model.BxDocument;
import pl.edu.icm.yadda.analysis.textr.model.BxLine;
import pl.edu.icm.yadda.analysis.textr.model.BxPage;
import pl.edu.icm.yadda.analysis.textr.model.BxWord;
import pl.edu.icm.yadda.analysis.textr.model.BxZone;
import pl.edu.icm.yadda.analysis.textr.model.BxZoneLabel;

/* loaded from: input_file:pl/edu/icm/yadda/analysis/metadata/extraction/enhancers/AffiliationGeometricEnhancer.class */
public class AffiliationGeometricEnhancer extends AbstractSimpleEnhancer {
    private static final Pattern SKIPPED_LINE_PATTERN = Pattern.compile("Email:.+", 2);
    private static final double EPSILON = 0.001d;
    private final Set<String> headers = new HashSet();

    /* loaded from: input_file:pl/edu/icm/yadda/analysis/metadata/extraction/enhancers/AffiliationGeometricEnhancer$Processor.class */
    private static class Processor {
        private static final Pattern NONAFFILIATION_PATTERN = Pattern.compile("Correspondence:.+|Contributed equally", 2);
        private Map<String, String> affiliations;
        int nextIndex;
        private boolean firstPart;
        private String affiliationRef;
        private StringBuilder affiliationBuilder;
        private StringBuilder partBuilder;

        private Processor() {
            this.affiliations = new HashMap();
            this.nextIndex = 1;
            this.firstPart = true;
            this.affiliationRef = "";
            this.affiliationBuilder = new StringBuilder();
            this.partBuilder = new StringBuilder();
        }

        private void endAffiliation() {
            if (this.affiliationBuilder.length() > 0) {
                String sb = this.affiliationBuilder.toString();
                if (!NONAFFILIATION_PATTERN.matcher(sb).matches() && (isIndex(this.affiliationRef) || this.affiliationRef.equals(""))) {
                    this.affiliations.put(this.affiliationRef, sb);
                }
                this.affiliationBuilder.setLength(0);
            }
        }

        private boolean isIndex(String str) {
            return Enhancers.isAffiliationIndex(str);
        }

        private boolean isNextIndex(String str) {
            try {
                if (isIndex(str)) {
                    if (Integer.parseInt(str) == this.nextIndex) {
                        return true;
                    }
                }
                return false;
            } catch (NumberFormatException e) {
                return false;
            }
        }

        private String canonizeRef(String str) {
            if (isNextIndex(str)) {
                this.nextIndex++;
                return str;
            }
            if (str.equals("*") || str.equals("†")) {
                return str;
            }
            return null;
        }

        private void endPart() {
            if (this.firstPart) {
                String canonizeRef = canonizeRef(this.partBuilder.toString());
                if (canonizeRef != null) {
                    endAffiliation();
                    this.affiliationRef = canonizeRef;
                } else {
                    if (this.affiliationBuilder.length() > 0) {
                        this.affiliationBuilder.append(' ');
                    }
                    this.affiliationBuilder.append((CharSequence) this.partBuilder);
                }
            } else {
                this.affiliationBuilder.append((CharSequence) this.partBuilder);
            }
            this.partBuilder.setLength(0);
        }

        public void endWord() {
            endPart();
            this.firstPart = true;
        }

        public void endZone() {
            endAffiliation();
        }

        public void addText(String str) {
            this.partBuilder.append(str);
        }

        public void nextPart() {
            endPart();
            this.firstPart = false;
        }

        public Map<String, String> fetchAffiliations() {
            endAffiliation();
            return this.affiliations;
        }
    }

    public AffiliationGeometricEnhancer() {
        setSearchedZoneLabels(BxZoneLabel.MET_AFFILIATION);
    }

    public void setHeaders(Collection<String> collection) {
        this.headers.clear();
        Iterator<String> it = collection.iterator();
        while (it.hasNext()) {
            this.headers.add(it.next().toLowerCase());
        }
    }

    @Override // pl.edu.icm.yadda.analysis.metadata.extraction.enhancers.AbstractSimpleEnhancer
    protected Set<EnhancedField> getEnhancedFields() {
        return EnumSet.of(EnhancedField.AFFILIATION);
    }

    @Override // pl.edu.icm.yadda.analysis.metadata.extraction.enhancers.AbstractSimpleEnhancer
    protected boolean enhanceMetadata(BxDocument bxDocument, YElement yElement) {
        boolean z = false;
        for (BxPage bxPage : filterPages(bxDocument)) {
            Processor processor = new Processor();
            Iterator<BxZone> it = filterZones(bxPage).iterator();
            while (it.hasNext()) {
                boolean z2 = true;
                for (BxLine bxLine : it.next().getLines()) {
                    if (z2) {
                        z2 = false;
                        if (this.headers.contains(bxLine.toText().toLowerCase())) {
                        }
                    }
                    if (!SKIPPED_LINE_PATTERN.matcher(bxLine.toText()).matches()) {
                        Iterator it2 = bxLine.getWords().iterator();
                        while (it2.hasNext()) {
                            Iterator it3 = ((BxWord) it2.next()).getChunks().iterator();
                            if (it3.hasNext()) {
                                BxChunk bxChunk = (BxChunk) it3.next();
                                processor.addText(bxChunk.toText());
                                double y = bxChunk.getBounds().getY() + bxChunk.getBounds().getHeight();
                                while (it3.hasNext()) {
                                    BxChunk bxChunk2 = (BxChunk) it3.next();
                                    double y2 = bxChunk2.getBounds().getY() + bxChunk2.getBounds().getHeight();
                                    if (eq(y, y2)) {
                                        processor.addText(bxChunk2.toText());
                                    } else {
                                        y = y2;
                                        processor.nextPart();
                                        processor.addText(bxChunk2.getText());
                                    }
                                }
                                processor.endWord();
                            }
                        }
                    }
                }
                processor.endZone();
            }
            Map<String, String> fetchAffiliations = processor.fetchAffiliations();
            if (!fetchAffiliations.isEmpty()) {
                for (Map.Entry<String, String> entry : fetchAffiliations.entrySet()) {
                    Enhancers.getOrCreateAffiliationByRef(yElement, entry.getKey()).setText(entry.getValue().replaceFirst("[Cc]orresponding [Aa]uthor.*$", "").replaceFirst(" and$", "").replaceFirst("\\S+@.*$", "").replaceFirst("[Ee]mails?:.*$", "").replaceFirst("[Ee]-[Mm]ails?:.*$", "").trim().replaceFirst("[\\.,;]$", ""));
                    z = true;
                }
            }
        }
        return z;
    }

    private static boolean eq(double d, double d2) {
        return Math.abs(d - d2) <= EPSILON;
    }
}
