package pl.edu.icm.sedno.HMM.main;

import java.net.URISyntaxException;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import org.apache.commons.dbcp.BasicDataSource;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.context.support.ClassPathXmlApplicationContext;
import pl.edu.icm.sedno.HMM.constant.Labels;
import pl.edu.icm.sedno.HMM.container.Record;
import pl.edu.icm.sedno.HMM.container.Token;
import pl.edu.icm.sedno.HMM.features.common.IsAllDigitsFeature;
import pl.edu.icm.sedno.HMM.features.common.IsAllLettersFeature;
import pl.edu.icm.sedno.HMM.features.common.IsAllLettersOrDigitsFeature;
import pl.edu.icm.sedno.HMM.features.common.IsContainManyWordsBeforeFeature;
import pl.edu.icm.sedno.HMM.features.common.IsContainManyWordsFeature;
import pl.edu.icm.sedno.HMM.features.common.IsContainManyWordsIgnoreCaseFeature;
import pl.edu.icm.sedno.HMM.features.common.IsContainOneWordFeature;
import pl.edu.icm.sedno.HMM.features.common.IsDashBetweenNumbersFeature;
import pl.edu.icm.sedno.HMM.features.common.IsDigitDashDigitFeature;
import pl.edu.icm.sedno.HMM.features.common.IsOneOfWordsIgnoreCaseFeature;
import pl.edu.icm.sedno.HMM.features.common.IsRomanNumber;
import pl.edu.icm.sedno.HMM.features.common.IsSmallIntegerFeature;
import pl.edu.icm.sedno.HMM.features.common.IsWordFeature;
import pl.edu.icm.sedno.HMM.features.common.LengthFeature;
import pl.edu.icm.sedno.HMM.features.common.LetterRelativeCountFeature;
import pl.edu.icm.sedno.HMM.features.common.NumberOfTokensFeature;
import pl.edu.icm.sedno.HMM.features.common.RelativePositionFeature;
import pl.edu.icm.sedno.HMM.features.yearVolPage.IsNumberTextFeature;
import pl.edu.icm.sedno.HMM.features.yearVolPage.IsYearFeature;
import pl.edu.icm.sedno.HMM.features.yearVolPage.IsYears2000_NowFeature;
import pl.edu.icm.sedno.HMM.importer.LearningDataImporter;
import pl.edu.icm.sedno.HMM.sevices.HMMMasterService;
import pl.edu.icm.yadda.analysis.hmm.features.FeatureCalculator;

/* loaded from: input_file:pl/edu/icm/sedno/HMM/main/HMMYearVolPage.class */
public class HMMYearVolPage {
    public static final String hmmTrainFile = "D:/moje dokumenty/icm/publikacje2010/publikacja_all.csv";
    private static final String DATA_SOURCE_BEAN = "sednoCoreDB";
    private BasicDataSource sednoCoreDB;
    private List<Labels> labels;
    private List<Record> learning;
    private static Logger logger = LoggerFactory.getLogger(HMMYearVolPage.class);
    private static final List<FeatureCalculator> featureList = new ArrayList();

    public static void main(String[] strArr) throws URISyntaxException, Exception {
        HMMYearVolPage hMMYearVolPage = new HMMYearVolPage();
        hMMYearVolPage.labels = new ArrayList(Arrays.asList(Labels.values()));
        hMMYearVolPage.learning = new LearningDataImporter().run(strArr.length > 0 ? strArr[0] : hmmTrainFile, hMMYearVolPage.labels);
        if (hMMYearVolPage.sednoCoreDB == null) {
            hMMYearVolPage.sednoCoreDB = (BasicDataSource) new ClassPathXmlApplicationContext("spring/applicationContext-database.xml").getBean(DATA_SOURCE_BEAN);
        }
        hMMYearVolPage.split("s_publikacje", "rok_tom_str");
        hMMYearVolPage.split("s_monografia", "miejsce_rok_tom_str");
    }

    private void split(String str, String str2) throws SQLException {
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        String str3 = "SELECT id, " + str2 + " FROM " + str;
        Connection connection = null;
        try {
            connection = this.sednoCoreDB.getConnection();
            ResultSet executeQuery = connection.createStatement(1004, 1007).executeQuery(str3);
            while (executeQuery.next()) {
                arrayList2.add(Integer.valueOf(executeQuery.getInt("id")));
                String string = executeQuery.getString(str2);
                Record record = new Record();
                if (string == null) {
                    record.setText("");
                } else {
                    record.setText(string);
                }
                record.fillTokens(HMMYearVolPageCrossValidate.SEPARATOR);
                arrayList.add(record);
            }
        } catch (Exception e) {
            System.out.println("Error during load s_publikacje " + e);
        }
        new HMMMasterService(getFeatureList(), this.labels).classify(this.learning, arrayList);
        String str4 = "UPDATE " + str + " SET TOM=?, STRONY=? WHERE id=?";
        logger.info("start s_publikacje update loop");
        int i = 0;
        while (i < arrayList.size()) {
            try {
                PreparedStatement prepareStatement = connection.prepareStatement(str4);
                if (i % 100 == 0) {
                    logger.info(i + " rows updated");
                }
                String str5 = "";
                String str6 = "";
                for (int i2 = 0; i2 < ((Record) arrayList.get(i)).getTokens().size(); i2++) {
                    new Token();
                    Token token = (Token) ((Record) arrayList.get(i)).getTokens().get(i2);
                    if (((Labels) token.getLabel()).equals(Labels.STRONY) || ((Labels) token.getLabel()).equals(Labels.STRONY_E)) {
                        str6 = str6 + token.getText() + " ";
                    }
                    if (((Labels) token.getLabel()).equals(Labels.TOM) || ((Labels) token.getLabel()).equals(Labels.TOM_E)) {
                        str5 = str5 + token.getText() + " ";
                    }
                }
                prepareStatement.setString(1, str5.trim());
                prepareStatement.setString(2, str6.trim());
                prepareStatement.setInt(3, ((Integer) arrayList2.get(i)).intValue());
                prepareStatement.executeUpdate();
                i++;
            } catch (Exception e2) {
                System.out.print("Error during write to s_publikacje " + e2.toString() + "\n" + e2.getMessage() + "\n");
                for (int i3 = 0; i3 < e2.getStackTrace().length; i3++) {
                    System.out.println(e2.getStackTrace()[i3].getClassName() + "." + e2.getStackTrace()[i3].getMethodName() + ":" + e2.getStackTrace()[i3].getLineNumber());
                }
            }
        }
        logger.info(i + " rows updated");
        connection.close();
        logger.info("done");
    }

    public static List<FeatureCalculator<Token, Record>> getFeatureList() {
        return featureList;
    }

    static {
        featureList.add(new IsSmallIntegerFeature());
        featureList.add(new IsRomanNumber());
        featureList.add(new IsDashBetweenNumbersFeature());
        featureList.add(new IsYears2000_NowFeature());
        featureList.add(new IsAllDigitsFeature());
        featureList.add(new IsAllLettersFeature());
        featureList.add(new IsAllLettersOrDigitsFeature());
        featureList.add(new IsNumberTextFeature());
        featureList.add(new IsYearFeature());
        featureList.add(new LengthFeature());
        featureList.add(new NumberOfTokensFeature());
        featureList.add(new RelativePositionFeature());
        featureList.add(new LetterRelativeCountFeature());
        featureList.add(new IsDigitDashDigitFeature());
        featureList.add(new IsContainOneWordFeature("bibliogr."));
        featureList.add(new IsContainOneWordFeature("abstr."));
        featureList.add(new IsContainOneWordFeature("il."));
        featureList.add(new IsContainOneWordFeature("poz."));
        featureList.add(new IsContainOneWordFeature("."));
        featureList.add(new IsContainOneWordFeature("\""));
        featureList.add(new IsContainOneWordFeature("'"));
        featureList.add(new IsContainOneWordFeature("\\"));
        featureList.add(new IsContainOneWordFeature("/"));
        featureList.add(new IsContainOneWordFeature("()"));
        featureList.add(new IsWordFeature("-"));
        ArrayList arrayList = new ArrayList();
        arrayList.add("nr");
        arrayList.add("seria");
        arrayList.add("no.");
        arrayList.add("zeszyt");
        arrayList.add("supl.");
        arrayList.add("vol.");
        arrayList.add("t.");
        arrayList.add("z.");
        arrayList.add("tom");
        arrayList.add("issue");
        featureList.add(new IsOneOfWordsIgnoreCaseFeature(arrayList));
        ArrayList arrayList2 = new ArrayList();
        arrayList2.add("s.");
        arrayList2.add("ss.");
        arrayList2.add("str.");
        arrayList2.add("pp.");
        arrayList2.add("p.");
        featureList.add(new IsOneOfWordsIgnoreCaseFeature(arrayList2));
        ArrayList arrayList3 = new ArrayList();
        arrayList3.add("styczeń");
        arrayList3.add("luty");
        arrayList3.add("marzec");
        arrayList3.add("kwiecień");
        arrayList3.add("maj");
        arrayList3.add("czerwiec");
        arrayList3.add("lipiec");
        arrayList3.add("sierpień");
        arrayList3.add("wrzesień");
        arrayList3.add("październik");
        arrayList3.add("listopad");
        arrayList3.add("grudzień");
        featureList.add(new IsContainManyWordsIgnoreCaseFeature(arrayList3));
        ArrayList arrayList4 = new ArrayList();
        arrayList4.add("Warszawa");
        arrayList4.add("Kraków");
        arrayList4.add("Łódź");
        arrayList4.add("Wrocław");
        arrayList4.add("Poznań");
        arrayList4.add("Gdańsk");
        arrayList4.add("Szczecin");
        arrayList4.add("Bydgoszcz");
        arrayList4.add("Lublin");
        arrayList4.add("Katowice");
        arrayList4.add("Białystok");
        arrayList4.add("Gdynia");
        arrayList4.add("Częstochowa");
        arrayList4.add("Radom");
        arrayList4.add("Sosnowiec");
        arrayList4.add("Toruń");
        arrayList4.add("Kielce");
        featureList.add(new IsContainManyWordsIgnoreCaseFeature(arrayList4));
        ArrayList arrayList5 = new ArrayList();
        arrayList5.add("s.");
        arrayList5.add("ss.");
        arrayList5.add("str.");
        arrayList5.add("pp.");
        arrayList5.add("p.");
        featureList.add(new IsContainManyWordsIgnoreCaseFeature(arrayList5));
        ArrayList arrayList6 = new ArrayList();
        arrayList6.add("nr");
        arrayList6.add("seria");
        arrayList6.add("no");
        arrayList6.add("zeszyt");
        arrayList6.add("supl.");
        arrayList6.add("vol");
        arrayList6.add("t.");
        arrayList6.add("z.");
        arrayList6.add("tom");
        arrayList6.add("issue");
        featureList.add(new IsContainManyWordsIgnoreCaseFeature(arrayList6));
        ArrayList arrayList7 = new ArrayList();
        arrayList7.add("(");
        arrayList7.add(")");
        featureList.add(new IsContainManyWordsFeature(arrayList7));
        ArrayList arrayList8 = new ArrayList();
        arrayList8.add("http");
        arrayList8.add("www.");
        featureList.add(new IsContainManyWordsIgnoreCaseFeature(arrayList8));
        ArrayList arrayList9 = new ArrayList();
        arrayList9.add("nr");
        arrayList9.add("tom");
        arrayList9.add("Nr");
        arrayList9.add("Tom");
        featureList.add(new IsContainManyWordsBeforeFeature(arrayList9));
        ArrayList arrayList10 = new ArrayList();
        arrayList10.add("-");
        featureList.add(new IsContainManyWordsBeforeFeature(arrayList10));
    }
}
