package pl.edu.icm.yadda.imports.baztech.content;

import au.com.bytecode.opencsv.CSVReader;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileFilter;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.configuration.tree.DefaultExpressionEngine;
import org.apache.commons.io.filefilter.DirectoryFileFilter;
import org.apache.commons.io.filefilter.SuffixFileFilter;
import org.apache.solr.common.util.ContentStreamBase;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import pl.edu.icm.yadda.imports.ImportException;

/* loaded from: input_file:WEB-INF/lib/bwmeta-import-1.10.0-RC6.jar:pl/edu/icm/yadda/imports/baztech/content/ContentUrlProviderImpl.class */
public class ContentUrlProviderImpl implements ContentUrlProvider {
    private char quoteChar = '\"';
    private Map<String, List<String>> contentUrls = new HashMap();
    private static final Logger log = LoggerFactory.getLogger(ContentUrlProviderImpl.class);
    private static FileFilter csvFileFilter = new SuffixFileFilter(".csv");
    private static FileFilter dirFileFilter = DirectoryFileFilter.INSTANCE;
    private static final Pattern HEADER_LINE = Pattern.compile("\"?id baztech\"?(.)\"?tytuł artykułu\"?(.)\"?url\"?");

    @Override // pl.edu.icm.yadda.imports.baztech.content.ContentUrlProvider
    public void prepareContentUrls(File file) throws Exception {
        log.info("Preparation of content URLs from " + file.getAbsolutePath() + " started...");
        visit(file);
        log.info("Preparation of content URLs finished. URLs has been found for " + this.contentUrls.size() + " articles");
    }

    private void visit(File file) throws Exception {
        if (!file.isDirectory()) {
            if (csvFileFilter.accept(file)) {
                parse(file);
                return;
            } else {
                log.warn("No content url files in path " + file.getAbsolutePath());
                return;
            }
        }
        for (File file2 : file.listFiles(csvFileFilter)) {
            parse(file2);
        }
        for (File file3 : file.listFiles(dirFileFilter)) {
            visit(file3);
        }
    }

    private static final Character detectSeparator(BufferedReader bufferedReader) throws Exception {
        String readLine = bufferedReader.readLine();
        while (true) {
            String str = readLine;
            if (str == null) {
                return null;
            }
            Matcher matcher = HEADER_LINE.matcher(str.trim().toLowerCase());
            if (matcher.find()) {
                String group = matcher.group(1);
                String group2 = matcher.group(2);
                if (group.equals(group2)) {
                    return Character.valueOf(group.charAt(0));
                }
                throw new ImportException("Two different types of separators in header line ([" + group + "][" + group2 + "])");
            }
            readLine = bufferedReader.readLine();
        }
    }

    private void parse(File file) throws Exception {
        String str = "windows-1250";
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(file), str));
        Character detectSeparator = detectSeparator(bufferedReader);
        if (detectSeparator == null) {
            bufferedReader.close();
            str = ContentStreamBase.DEFAULT_CHARSET;
            bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(file), str));
            detectSeparator = detectSeparator(bufferedReader);
            if (detectSeparator == null) {
                log.warn("Could not determine cvs separator in a contents file " + file.getAbsolutePath());
                return;
            }
        }
        log.debug("CSV file " + file.getAbsolutePath() + " info: detected separator:[" + detectSeparator + "], encoding: " + str);
        CSVReader cSVReader = new CSVReader(bufferedReader, detectSeparator.charValue(), this.quoteChar);
        int i = 0;
        for (String[] strArr : cSVReader.readAll()) {
            if (strArr.length < 3) {
                log.warn("Content URL error: line has less than 3 elements (line #0, file " + file.getAbsolutePath() + DefaultExpressionEngine.DEFAULT_INDEX_END);
            } else {
                String trim = strArr[0].trim();
                if (trim.length() != 0) {
                    String trim2 = strArr[2].trim();
                    if (trim2.length() == 0) {
                        log.warn("Empty content url for baztech id '" + trim + "' (line #0, file " + file.getAbsolutePath() + DefaultExpressionEngine.DEFAULT_INDEX_END);
                    } else {
                        i++;
                        List<String> list = this.contentUrls.get(trim);
                        if (list == null) {
                            list = new LinkedList();
                            this.contentUrls.put(trim, list);
                        }
                        list.add(trim2);
                    }
                }
            }
        }
        if (i == 0) {
            log.warn("File " + file.getAbsolutePath() + " contains no content URLs");
        } else {
            log.info("File " + file.getAbsolutePath() + " contains " + i + " content URLs");
        }
        cSVReader.close();
    }

    @Override // pl.edu.icm.yadda.imports.baztech.content.ContentUrlProvider
    public List<String> getContentUrls(String str) {
        return this.contentUrls.get(str);
    }

    @Override // pl.edu.icm.yadda.imports.baztech.content.ContentUrlProvider
    public Map<String, List<String>> getAllContentUrls() {
        return this.contentUrls;
    }

    public int getArticleCount() {
        return this.contentUrls.size();
    }

    public char getQuoteChar() {
        return this.quoteChar;
    }

    @Override // pl.edu.icm.yadda.imports.baztech.content.ContentUrlProvider
    public void setQuoteChar(char c) {
        this.quoteChar = c;
    }
}
