package pl.edu.icm.yadda.imports.baztech.citations;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileFilter;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.configuration.tree.DefaultExpressionEngine;
import org.apache.commons.io.filefilter.DirectoryFileFilter;
import org.apache.commons.io.filefilter.SuffixFileFilter;
import org.apache.commons.lang.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import pl.edu.icm.yadda.common.utils.Utils;

/* loaded from: input_file:WEB-INF/lib/bwmeta-import-1.12.0-SNAPSHOT.jar:pl/edu/icm/yadda/imports/baztech/citations/CitationsProviderImpl.class */
public class CitationsProviderImpl implements CitationsProvider {
    private String encoding;
    private final Map<String, List<String>> citations;
    private static final char SPECIAL_CHAR = '@';
    private static final Logger log = LoggerFactory.getLogger(CitationsProviderImpl.class);
    private static FileFilter txtFileFilter = new SuffixFileFilter(".txt");
    private static FileFilter dirFileFilter = DirectoryFileFilter.INSTANCE;
    private static final Pattern NO_ALPHANUM_PATTERN = Pattern.compile("[^\\p{Alnum}]+");
    private static final Pattern CITATION_START = Pattern.compile("^((\\[\\d+\\])|(\\d+\\.))");
    private static final Pattern ARTICLE_START = Pattern.compile("^(\\p{Alnum}{3,4}-\\d{4,5}-\\d{4,5})(([ \\t]*:)|([ \\t]))");
    private static final Pattern ARTICLE_START_LOOKALIKE = Pattern.compile("^(\\p{Alnum}{3,}-\\d{4,}-\\d{4,})");
    private static final Pattern ARTICLES_COUNT = Pattern.compile("@?artyku[łl]y\\s*=\\s*(\\S+)");
    private static final Pattern CITATIONS_COUNT = Pattern.compile("@?cytowania\\s*=\\s*(\\S+)");

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:WEB-INF/lib/bwmeta-import-1.12.0-SNAPSHOT.jar:pl/edu/icm/yadda/imports/baztech/citations/CitationsProviderImpl$CitationsParsingContext.class */
    public static final class CitationsParsingContext {
        private final String path;
        private String articleId;
        private final Map<String, List<String>> fileCitations = new HashMap();
        private int expectedArticles = 0;
        private int errors = 0;
        private int articleExpectedCitationCount = 0;
        private List<String> articleCitations = new ArrayList();

        public CitationsParsingContext(String str) {
            this.path = str;
        }

        public void consumeCurrentArticleCitations() {
            if (CollectionUtils.isNotEmpty(this.articleCitations)) {
                if (citationsSizeAsExpected()) {
                    this.fileCitations.put(this.articleId, this.articleCitations);
                }
            } else if (this.articleId != null) {
                CitationsProviderImpl.log.error("Artykuł '{}'' nie ma żadnych cytowań (plik: {})", this.articleId, this.path);
                this.errors++;
            }
            this.articleExpectedCitationCount = 0;
            this.articleId = null;
            this.articleCitations = new ArrayList();
        }

        public boolean hasNoErrors() {
            boolean z = true;
            if (this.expectedArticles > 0 && getArticleCount() != this.expectedArticles) {
                CitationsProviderImpl.log.error("Nieprawidłowa liczba artykułów (zadeklarowana=" + this.expectedArticles + ", faktyczna=" + getArticleCount() + ") w pliku " + this.path + DefaultExpressionEngine.DEFAULT_INDEX_END);
                z = false;
            } else if (this.errors > 0) {
                z = false;
            }
            return z;
        }

        public String getPath() {
            return this.path;
        }

        public Map<String, List<String>> getFileCitations() {
            return this.fileCitations;
        }

        public int getExpectedArticles() {
            return this.expectedArticles;
        }

        public void setExpectedArticles(int i) {
            this.expectedArticles = i;
        }

        public String getArticleId() {
            return this.articleId;
        }

        public void setArticleId(String str) {
            this.articleId = str;
        }

        public void setArticleExpectedCitationCount(int i) {
            this.articleExpectedCitationCount = i;
        }

        public int getArticleExpectedCitationCount() {
            return this.articleExpectedCitationCount;
        }

        public void addCitation(String str) {
            this.articleCitations.add(str);
        }

        private int getArticleCount() {
            return this.fileCitations.size() + this.errors;
        }

        private boolean citationsSizeAsExpected() {
            boolean z = true;
            int size = this.articleCitations.size();
            if (this.articleExpectedCitationCount > 0 && size != this.articleExpectedCitationCount) {
                CitationsProviderImpl.log.error("Nieprawidłowa liczba cytowań dla artykułu [" + this.articleId + "]  (zadeklarowana=" + this.articleExpectedCitationCount + ", faktyczna=" + size + DefaultExpressionEngine.DEFAULT_INDEX_END + " w pliku " + this.path);
                this.errors++;
                z = false;
            }
            return z;
        }
    }

    public CitationsProviderImpl() {
        this("UTF-16");
    }

    public CitationsProviderImpl(String str) {
        this.encoding = "UTF-16";
        this.citations = new HashMap(12000);
        this.encoding = str;
    }

    @Override // pl.edu.icm.yadda.imports.baztech.citations.CitationsProvider
    public List<String> getCitations(String str, String str2) {
        return this.citations.get(str);
    }

    @Override // pl.edu.icm.yadda.imports.baztech.citations.CitationsProvider
    public void prepareCitations(File file) throws Exception {
        log.info("Przygotowanie cytowań rozpoczęte... (ścieżka: " + file.getAbsolutePath() + DefaultExpressionEngine.DEFAULT_INDEX_END);
        visit(file);
        log.info("Przygotowanie cytowań zakończone. Przetworzono " + this.citations.size() + " artykułów.");
    }

    protected int getCitationsCount() {
        return this.citations.size();
    }

    private void visit(File file) throws Exception {
        if (!file.isDirectory()) {
            if (txtFileFilter.accept(file)) {
                parse(file);
                return;
            } else {
                log.warn("Brak plików z cytowaniami (ścieżka: " + file.getAbsolutePath() + DefaultExpressionEngine.DEFAULT_INDEX_END);
                return;
            }
        }
        for (File file2 : file.listFiles(txtFileFilter)) {
            parse(file2);
        }
        for (File file3 : file.listFiles(dirFileFilter)) {
            visit(file3);
        }
    }

    private int checkArticlesCountLine(String str, CitationsParsingContext citationsParsingContext) {
        if (citationsParsingContext.getExpectedArticles() == -1) {
            return -1;
        }
        Matcher matcher = ARTICLES_COUNT.matcher(str);
        if (!matcher.matches()) {
            return 0;
        }
        String group = matcher.group(1);
        int atoi = Utils.atoi(group);
        if (atoi <= 0) {
            log.error("Nieprawidłowa wartość zmiennej @artykuły (" + group + ") w pliku " + citationsParsingContext.getPath());
            return -1;
        }
        if (citationsParsingContext.getExpectedArticles() <= 0) {
            return atoi;
        }
        log.error("Zmienna @artykuły występuje więcej niż jeden raz w pliku " + citationsParsingContext.getPath());
        return -1;
    }

    private int checkCitationsCountLine(String str, CitationsParsingContext citationsParsingContext) {
        Matcher matcher = CITATIONS_COUNT.matcher(str);
        if (!matcher.matches()) {
            return 0;
        }
        String group = matcher.group(1);
        int atoi = Utils.atoi(group);
        if (atoi <= 0) {
            log.error("Nieprawidłowa wartość zmiennej @cytowania (" + group + ") dla artykułu [{}] w pliku {}", citationsParsingContext.getArticleId(), citationsParsingContext.getPath());
            return -1;
        }
        if (citationsParsingContext.getArticleExpectedCitationCount() == 0) {
            return atoi;
        }
        log.error("Więcej niż jedna zmienna @cytowania dla artykułu [{}] w pliku {}", citationsParsingContext.getArticleId(), citationsParsingContext.getPath());
        return -1;
    }

    private void parse(File file) throws Exception {
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(file), this.encoding));
        try {
            parse(bufferedReader, file.getAbsolutePath());
            bufferedReader.close();
        } catch (Throwable th) {
            bufferedReader.close();
            throw th;
        }
    }

    private void parse(BufferedReader bufferedReader, String str) throws IOException {
        CitationsParsingContext citationsParsingContext = new CitationsParsingContext(str);
        String readLine = bufferedReader.readLine();
        while (true) {
            String str2 = readLine;
            if (str2 == null) {
                break;
            }
            String trim = str2.trim();
            if (StringUtils.isNotEmpty(trim)) {
                processCitationLine(trim, citationsParsingContext);
            }
            readLine = bufferedReader.readLine();
        }
        citationsParsingContext.consumeCurrentArticleCitations();
        if (citationsParsingContext.hasNoErrors()) {
            this.citations.putAll(citationsParsingContext.getFileCitations());
        }
    }

    private void processCitationLine(String str, CitationsParsingContext citationsParsingContext) {
        if (isLineWithoutAlphanumericCharacters(str)) {
            log.debug("Skipping citation line [" + str + "] (" + citationsParsingContext.getPath() + DefaultExpressionEngine.DEFAULT_INDEX_END);
            return;
        }
        if (processConditionallySpecialLine(str, citationsParsingContext)) {
            return;
        }
        Matcher matcher = ARTICLE_START.matcher(str);
        if (matcher.find()) {
            if (Utils.emptyStr(matcher.group(3))) {
                log.warn("Linia będzie traktowana jako definicja artykułu pomimo tego, że brakuje dwukropka [{}] w pliku {}", str, citationsParsingContext.getPath());
            }
            citationsParsingContext.consumeCurrentArticleCitations();
            citationsParsingContext.setArticleId(matcher.group(1));
            return;
        }
        if (ARTICLE_START_LOOKALIKE.matcher(str).find()) {
            log.warn("Linia wygląda jak definicja artykułu, ale nie jest prawidłową definicją i zostanie zinterpretowana jako cytowanie [{}] w pliku {}", str, citationsParsingContext.getPath());
        } else {
            if (citationsParsingContext.getArticleId() == null) {
                log.debug("Skipping citation line (before first article in file) [{}] w pliku {}", str, citationsParsingContext.getPath());
                return;
            }
            if (!CITATION_START.matcher(str).find()) {
                log.debug("Unknown citation format [{}] w pliku {}", str, citationsParsingContext.getPath());
            }
            citationsParsingContext.addCitation(str);
        }
    }

    private boolean isLineWithoutAlphanumericCharacters(String str) {
        return NO_ALPHANUM_PATTERN.matcher(str).matches();
    }

    private boolean processConditionallySpecialLine(String str, CitationsParsingContext citationsParsingContext) {
        int checkCitationsCountLine;
        boolean z = false;
        if (str.charAt(0) == '@') {
            z = true;
            int checkCitationsCountLine2 = checkCitationsCountLine(str, citationsParsingContext);
            if (checkCitationsCountLine2 != 0) {
                citationsParsingContext.setArticleExpectedCitationCount(checkCitationsCountLine2);
            } else {
                int checkArticlesCountLine = checkArticlesCountLine(str, citationsParsingContext);
                if (checkArticlesCountLine != 0) {
                    citationsParsingContext.setExpectedArticles(checkArticlesCountLine);
                } else {
                    log.warn("Zignorowana linia specjalna (być może komentarz): [{}] w pliku {}", str, citationsParsingContext.getPath());
                }
            }
        } else if (citationsParsingContext.getArticleExpectedCitationCount() == 0 && (checkCitationsCountLine = checkCitationsCountLine(str, citationsParsingContext)) > 0) {
            z = true;
            log.warn("Linia będzie traktowana jako definicja liczby cytowań pomimo braku @ [{}] w pliku {}", str, citationsParsingContext.getPath());
            citationsParsingContext.setArticleExpectedCitationCount(checkCitationsCountLine);
        }
        return z;
    }

    @Override // pl.edu.icm.yadda.imports.baztech.citations.CitationsProvider
    public Map<String, List<String>> getAllCitations() {
        return this.citations;
    }
}
