package pl.edu.icm.cermine.metadata.extraction.enhancers;

import java.util.Iterator;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import pl.edu.icm.cermine.bibref.BibReferenceParser;
import pl.edu.icm.cermine.bibref.CRFBibReferenceParser;
import pl.edu.icm.cermine.bibref.model.BibEntry;
import pl.edu.icm.cermine.exception.AnalysisException;
import pl.edu.icm.cermine.metadata.model.DocumentMetadata;
import pl.edu.icm.cermine.structure.model.BxDocument;
import pl.edu.icm.cermine.structure.model.BxPage;
import pl.edu.icm.cermine.structure.model.BxZone;
import pl.edu.icm.cermine.structure.model.BxZoneLabel;

/* loaded from: input_file:WEB-INF/lib/cermine-impl-1.8-SNAPSHOT.jar:pl/edu/icm/cermine/metadata/extraction/enhancers/CiteAsEnhancer.class */
public class CiteAsEnhancer extends AbstractFilterEnhancer {
    private static final String MODEL_FILE = "/pl/edu/icm/cermine/bibref/acrf.ser.gz";
    private static final Pattern PATTERN = Pattern.compile("Cite this article as: (.*)", 32);
    private BibReferenceParser<BibEntry> referenceParser;

    public CiteAsEnhancer() {
        setSearchedZoneLabels(BxZoneLabel.MET_BIB_INFO);
        try {
            this.referenceParser = new CRFBibReferenceParser(CiteAsEnhancer.class.getResourceAsStream(MODEL_FILE));
        } catch (AnalysisException e) {
            this.referenceParser = null;
        }
    }

    public void setReferenceParser(BibReferenceParser<BibEntry> bibReferenceParser) {
        this.referenceParser = bibReferenceParser;
    }

    @Override // pl.edu.icm.cermine.metadata.extraction.enhancers.Enhancer
    public void enhanceMetadata(BxDocument bxDocument, DocumentMetadata documentMetadata, Set<EnhancedField> set) {
        String firstFieldValue;
        String firstFieldValue2;
        String firstFieldValue3;
        if (this.referenceParser == null) {
            return;
        }
        Iterator<BxPage> it = filterPages(bxDocument).iterator();
        while (it.hasNext()) {
            Iterator<BxZone> it2 = filterZones(it.next()).iterator();
            while (it2.hasNext()) {
                Matcher matcher = PATTERN.matcher(it2.next().toText());
                if (matcher.find()) {
                    try {
                        BibEntry parseBibReference = this.referenceParser.parseBibReference(matcher.group(1));
                        if (!set.contains(EnhancedField.JOURNAL) && (firstFieldValue3 = parseBibReference.getFirstFieldValue("journal")) != null) {
                            documentMetadata.setJournal(firstFieldValue3);
                            set.add(EnhancedField.JOURNAL);
                        }
                        if (!set.contains(EnhancedField.VOLUME) && (firstFieldValue2 = parseBibReference.getFirstFieldValue("volume")) != null) {
                            documentMetadata.setVolume(firstFieldValue2);
                            set.add(EnhancedField.VOLUME);
                        }
                        if (!set.contains(EnhancedField.ISSUE) && (firstFieldValue = parseBibReference.getFirstFieldValue("number")) != null) {
                            documentMetadata.setIssue(firstFieldValue);
                            set.add(EnhancedField.ISSUE);
                        }
                    } catch (AnalysisException e) {
                        return;
                    }
                }
            }
        }
    }
}
