package pl.edu.icm.cermine.metadata.extraction.enhancers;

import com.google.common.base.CharMatcher;
import java.util.ArrayList;
import java.util.EnumSet;
import java.util.Iterator;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import pl.edu.icm.cermine.metadata.model.DocumentMetadata;
import pl.edu.icm.cermine.structure.model.BxChunk;
import pl.edu.icm.cermine.structure.model.BxDocument;
import pl.edu.icm.cermine.structure.model.BxLine;
import pl.edu.icm.cermine.structure.model.BxPage;
import pl.edu.icm.cermine.structure.model.BxWord;
import pl.edu.icm.cermine.structure.model.BxZone;
import pl.edu.icm.cermine.structure.model.BxZoneLabel;

/* loaded from: input_file:WEB-INF/lib/cermine-impl-1.3.jar:pl/edu/icm/cermine/metadata/extraction/enhancers/AuthorEnhancer.class */
public class AuthorEnhancer extends AbstractSimpleEnhancer {
    public AuthorEnhancer() {
        setSearchedZoneLabels(EnumSet.of(BxZoneLabel.MET_AUTHOR));
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // pl.edu.icm.cermine.metadata.extraction.enhancers.AbstractSimpleEnhancer
    public boolean enhanceMetadata(BxDocument bxDocument, DocumentMetadata documentMetadata) {
        boolean z = false;
        Iterator<BxPage> it = filterPages(bxDocument).iterator();
        while (it.hasNext()) {
            for (BxZone bxZone : filterZones(it.next())) {
                ArrayList arrayList = new ArrayList();
                Iterator<BxLine> it2 = bxZone.getLines().iterator();
                while (it2.hasNext()) {
                    Iterator<BxWord> it3 = it2.next().getWords().iterator();
                    while (it3.hasNext()) {
                        Iterator<BxChunk> it4 = it3.next().getChunks().iterator();
                        while (it4.hasNext()) {
                            arrayList.add(it4.next());
                        }
                        arrayList.add(new BxChunk(null, " "));
                    }
                }
                Pattern compile = Pattern.compile("(\\s+)(.*)");
                Pattern compile2 = Pattern.compile("(\\d+|\\*|∗|⁎|†|‡|§|\\(..?\\)|\\{|¶|\\[..?\\]|\\+|\\||⊥|\\^|¹|²|³|#|α|β|λ|ξ|ψ)(.*)");
                Pattern compile3 = Pattern.compile("(MD|Prof.|PhD|Phd|MPH|RD|LD|BCh|BAO|PharmD|BSc|FRCP|PA-C|RAC|MBA|DrPH|MBChB|BM|RGN|BA|FCCP)([^a-zA-Z].*)");
                Pattern compile4 = Pattern.compile("(MD|Prof.|PhD|Phd|MPH|RD|LD|BCh|BAO|PharmD|BSc|FRCP|PA-C|RAC|MBA|DrPH|MBChB|BM|RGN|BA|FCCP)");
                Pattern compile5 = Pattern.compile("(,|;|&|•|·|Æ)(.*)");
                Pattern compile6 = Pattern.compile("(and|AND)\\b(.*)");
                Pattern compile7 = Pattern.compile("(and|AND)");
                boolean z2 = true;
                int i = 0;
                String replaceAll = bxZone.toText().replaceAll("\n", " ");
                String str = "";
                ArrayList arrayList2 = new ArrayList();
                boolean z3 = false;
                if (!replaceAll.toLowerCase().contains("vol") || !replaceAll.toLowerCase().contains("no")) {
                    while (!replaceAll.isEmpty()) {
                        Matcher matcher = compile.matcher(replaceAll);
                        Matcher matcher2 = compile2.matcher(replaceAll);
                        Matcher matcher3 = compile3.matcher(replaceAll);
                        Matcher matcher4 = compile4.matcher(replaceAll);
                        Matcher matcher5 = compile5.matcher(replaceAll);
                        Matcher matcher6 = compile6.matcher(replaceAll);
                        Matcher matcher7 = compile7.matcher(replaceAll);
                        if (matcher.matches()) {
                            i += matcher.group(1).length();
                            replaceAll = matcher.group(2);
                            z2 = true;
                            str = str + matcher.group(1);
                        } else if (matcher5.matches()) {
                            i += matcher5.group(1).length();
                            replaceAll = matcher5.group(2);
                            z2 = true;
                            z3 = false;
                        } else if (z2 && matcher6.matches()) {
                            i += matcher6.group(1).length();
                            replaceAll = matcher6.group(2);
                            z2 = true;
                            z3 = false;
                        } else if (z2 && matcher7.matches()) {
                            replaceAll = "";
                        } else if (z2 && matcher3.matches()) {
                            i += matcher3.group(1).length();
                            replaceAll = matcher3.group(2);
                            z2 = true;
                        } else if (z2 && matcher4.matches()) {
                            replaceAll = "";
                        } else if (matcher2.matches()) {
                            i += matcher2.group(1).length();
                            replaceAll = matcher2.group(2);
                            z2 = true;
                            arrayList2.add(matcher2.group(1));
                            z3 = false;
                        } else {
                            double y = ((BxChunk) arrayList.get(i)).getY();
                            double height = ((BxChunk) arrayList.get(i)).getHeight();
                            double d = 0.0d;
                            double d2 = 0.0d;
                            int i2 = 0;
                            Iterator<BxWord> it5 = ((BxChunk) arrayList.get(i)).getParent().getParent().getWords().iterator();
                            while (it5.hasNext()) {
                                for (BxChunk bxChunk : it5.next().getChunks()) {
                                    d += bxChunk.getY();
                                    d2 += bxChunk.getHeight();
                                    i2++;
                                }
                            }
                            double d3 = d / i2;
                            double d4 = d2 / i2;
                            if (!((BxChunk) arrayList.get(i)).toText().matches("[a-f]") || Math.abs(y - d3) + Math.abs(d4 - height) <= 2.0d) {
                                if (!z3 && !str.trim().isEmpty()) {
                                    String trimFrom = CharMatcher.WHITESPACE.trimFrom(str);
                                    if (!trimFrom.toLowerCase().equals("article info") && trimFrom.matches(".*[a-zA-Z].*")) {
                                        documentMetadata.addAuthor(trimFrom, arrayList2);
                                    }
                                    str = "";
                                    arrayList2.clear();
                                }
                                z3 = true;
                                str = str + replaceAll.substring(0, 1);
                                i++;
                                replaceAll = replaceAll.substring(1);
                                z2 = false;
                            } else {
                                i++;
                                z2 = true;
                                arrayList2.add(replaceAll.substring(0, 1));
                                replaceAll = replaceAll.substring(1);
                                z3 = false;
                            }
                        }
                    }
                    if (!str.isEmpty() && !str.toLowerCase().endsWith("introduction")) {
                        String trimFrom2 = CharMatcher.WHITESPACE.trimFrom(str);
                        if (!trimFrom2.toLowerCase().equals("article info") && trimFrom2.matches(".*[a-zA-Z].*")) {
                            documentMetadata.addAuthor(trimFrom2, arrayList2);
                        }
                    }
                    z = true;
                }
            }
            if (z) {
                return true;
            }
        }
        return false;
    }

    @Override // pl.edu.icm.cermine.metadata.extraction.enhancers.AbstractSimpleEnhancer
    protected Set<EnhancedField> getEnhancedFields() {
        return EnumSet.of(EnhancedField.AUTHORS);
    }
}
