package pl.edu.icm.cermine.metadata.extraction.enhancers;

import java.util.ArrayList;
import java.util.EnumSet;
import java.util.Iterator;
import java.util.Set;
import java.util.regex.Pattern;
import org.jdom.Element;
import pl.edu.icm.cermine.structure.model.BxDocument;
import pl.edu.icm.cermine.structure.model.BxLine;
import pl.edu.icm.cermine.structure.model.BxPage;
import pl.edu.icm.cermine.structure.model.BxWord;
import pl.edu.icm.cermine.structure.model.BxZone;
import pl.edu.icm.cermine.structure.model.BxZoneLabel;

/* loaded from: input_file:pl/edu/icm/cermine/metadata/extraction/enhancers/KeywordsEnhancer.class */
public class KeywordsEnhancer extends AbstractSimpleEnhancer {
    private static final Pattern PREFIX = Pattern.compile("^key\\s?words[:-]?", 2);

    public KeywordsEnhancer() {
        setSearchedZoneLabels(EnumSet.of(BxZoneLabel.MET_KEYWORDS));
        setSearchedFirstPageOnly(true);
    }

    @Override // pl.edu.icm.cermine.metadata.extraction.enhancers.AbstractSimpleEnhancer
    protected Set<EnhancedField> getEnhancedFields() {
        return EnumSet.of(EnhancedField.KEYWORDS);
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // pl.edu.icm.cermine.metadata.extraction.enhancers.AbstractSimpleEnhancer
    public boolean enhanceMetadata(BxDocument bxDocument, Element element) {
        Iterator<BxPage> it = filterPages(bxDocument).iterator();
        while (it.hasNext()) {
            for (BxZone bxZone : filterZones(it.next())) {
                String replaceFirst = PREFIX.matcher(bxZone.toText().replace("\n", "<eol>")).replaceFirst("");
                if (replaceFirst.matches(".*[:;,.·—].*")) {
                    for (String str : replaceFirst.split("[:;,.·—]")) {
                        Enhancers.addKeyword(element, str.trim().replaceFirst("\\.$", "").replace("-<eol>", "").replace("<eol>", " "));
                    }
                    return true;
                }
                ArrayList arrayList = new ArrayList();
                for (BxLine bxLine : bxZone.getLines()) {
                    ArrayList<BxWord> arrayList2 = new ArrayList();
                    Iterator<BxWord> it2 = bxLine.getWords().iterator();
                    while (it2.hasNext()) {
                        arrayList2.add(it2.next());
                    }
                    if (PREFIX.matcher(((BxWord) arrayList2.get(0)).toText()).matches()) {
                        arrayList2.remove(0);
                    } else if (arrayList2.size() > 1 && PREFIX.matcher(((BxWord) arrayList2.get(0)).toText() + " " + ((BxWord) arrayList2.get(1)).toText()).matches()) {
                        arrayList2.remove(0);
                        arrayList2.remove(0);
                    }
                    if (!arrayList2.isEmpty()) {
                        if (((BxWord) arrayList2.get(0)).toText().charAt(0) < 'a' || ((BxWord) arrayList2.get(0)).toText().charAt(0) > 'z' || arrayList.isEmpty()) {
                            arrayList.add(((BxWord) arrayList2.get(0)).toText());
                        } else {
                            String str2 = ((String) arrayList.get(arrayList.size() - 1)) + " " + ((BxWord) arrayList2.get(0)).toText();
                            arrayList.remove(arrayList.size() - 1);
                            arrayList.add(str2);
                        }
                        for (BxWord bxWord : arrayList2) {
                            if (arrayList2.indexOf(bxWord) < arrayList2.size() - 1) {
                                if ((bxWord.getNext().getX() - bxWord.getX()) - bxWord.getWidth() > 6.0d) {
                                    arrayList.add(bxWord.getNext().toText());
                                } else {
                                    String str3 = ((String) arrayList.get(arrayList.size() - 1)) + " " + bxWord.getNext().toText();
                                    arrayList.remove(arrayList.size() - 1);
                                    arrayList.add(str3);
                                }
                            }
                        }
                    }
                }
                Iterator it3 = arrayList.iterator();
                while (it3.hasNext()) {
                    Enhancers.addKeyword(element, ((String) it3.next()).trim().replaceFirst("\\.$", ""));
                }
            }
        }
        return false;
    }
}
