package pl.edu.icm.cermine.metadata.extraction.enhancers;

import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import com.itextpdf.text.html.HtmlTags;
import java.util.ArrayList;
import java.util.EnumSet;
import java.util.Iterator;
import java.util.Set;
import pl.edu.icm.cermine.bibref.model.BibEntry;
import pl.edu.icm.cermine.exception.AnalysisException;
import pl.edu.icm.cermine.metadata.model.DocumentMetadata;
import pl.edu.icm.cermine.structure.HierarchicalReadingOrderResolver;
import pl.edu.icm.cermine.structure.model.BxDocument;
import pl.edu.icm.cermine.structure.model.BxLine;
import pl.edu.icm.cermine.structure.model.BxZone;
import pl.edu.icm.cermine.structure.model.BxZoneLabel;
import pl.edu.icm.cermine.structure.tools.BxBoundsBuilder;

/* loaded from: input_file:WEB-INF/lib/cermine-impl-1.7-SNAPSHOT.jar:pl/edu/icm/cermine/metadata/extraction/enhancers/AffiliationAuthorSplitterEnhancer.class */
public class AffiliationAuthorSplitterEnhancer extends AbstractSimpleEnhancer {
    private static final Set<String> keywords = Sets.newHashSet("department", "departament", "universit", "institute", BibEntry.FIELD_SCHOOL, "college", "univ.", "instituto", "facultad", "universidad", HtmlTags.ALIGN_CENTER, "labs");

    public AffiliationAuthorSplitterEnhancer() {
        setSearchedZoneLabels(EnumSet.of(BxZoneLabel.MET_AFFILIATION));
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // pl.edu.icm.cermine.metadata.extraction.enhancers.AbstractSimpleEnhancer
    public boolean enhanceMetadata(BxDocument bxDocument, DocumentMetadata documentMetadata) {
        Iterator<BxZone> it = bxDocument.getFirstChild().iterator();
        while (it.hasNext()) {
            if (BxZoneLabel.MET_AUTHOR.equals(it.next().getLabel())) {
                return false;
            }
        }
        boolean z = false;
        for (BxZone bxZone : filterZones(bxDocument.getFirstChild())) {
            for (BxZone bxZone2 : filterZones(bxDocument.getFirstChild())) {
                if (!bxZone.equals(bxZone2) && Math.abs(bxZone.getY() - bxZone2.getY()) < 10.0d) {
                    z = true;
                }
            }
        }
        if (z) {
            return false;
        }
        HierarchicalReadingOrderResolver hierarchicalReadingOrderResolver = new HierarchicalReadingOrderResolver();
        BxZone bxZone3 = null;
        BxZone bxZone4 = null;
        BxZone bxZone5 = null;
        for (BxZone bxZone6 : filterZones(bxDocument.getFirstChild())) {
            BxZone bxZone7 = new BxZone();
            bxZone7.setLabel(BxZoneLabel.MET_AUTHOR);
            BxBoundsBuilder bxBoundsBuilder = new BxBoundsBuilder();
            BxZone bxZone8 = new BxZone();
            bxZone8.setLabel(BxZoneLabel.MET_AFFILIATION);
            BxBoundsBuilder bxBoundsBuilder2 = new BxBoundsBuilder();
            boolean z2 = false;
            BxLine bxLine = null;
            Iterator<BxLine> it2 = bxZone6.iterator();
            while (it2.hasNext()) {
                BxLine next = it2.next();
                String lowerCase = next.toText().toLowerCase();
                if (bxLine != null && (!bxLine.getMostPopularFontName().equals(next.getMostPopularFontName()) || bxLine.getHeight() - next.getHeight() > 1.0d)) {
                    Iterator<String> it3 = keywords.iterator();
                    while (it3.hasNext()) {
                        if (lowerCase.contains(it3.next())) {
                            z2 = true;
                        }
                    }
                }
                if (z2) {
                    bxZone8.addLine(next);
                    bxBoundsBuilder2.expand(next.getBounds());
                } else {
                    bxZone7.addLine(next);
                    bxBoundsBuilder.expand(next.getBounds());
                }
                bxLine = next;
            }
            bxZone7.setBounds(bxBoundsBuilder.getBounds());
            bxZone8.setBounds(bxBoundsBuilder2.getBounds());
            if (bxZone7.hasChildren() && bxZone8.hasChildren()) {
                bxZone3 = bxZone6;
                bxZone4 = bxZone7;
                bxZone5 = bxZone8;
            }
        }
        if (bxZone3 == null) {
            return false;
        }
        ArrayList arrayList = new ArrayList();
        arrayList.addAll(Lists.newArrayList(bxDocument.getFirstChild()));
        arrayList.remove(bxZone3);
        arrayList.add(bxZone4);
        arrayList.add(bxZone5);
        bxDocument.getFirstChild().setZones(arrayList);
        try {
            hierarchicalReadingOrderResolver.resolve(bxDocument);
            return false;
        } catch (AnalysisException e) {
            return false;
        }
    }

    @Override // pl.edu.icm.cermine.metadata.extraction.enhancers.AbstractSimpleEnhancer
    protected Set<EnhancedField> getEnhancedFields() {
        return EnumSet.noneOf(EnhancedField.class);
    }
}
