package pl.edu.icm.yadda.parsing.deprec.crfadapter;

import ch.qos.logback.classic.spi.CallerData;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.StringReader;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.configuration.tree.DefaultExpressionEngine;
import org.codehaus.plexus.util.LineOrientedInterpolatingReader;
import org.springframework.aop.framework.autoproxy.target.QuickTargetSourceCreator;
import org.springframework.beans.factory.BeanFactory;
import pl.edu.icm.yadda.parsing.deprec.auxil.Feature2;
import pl.edu.icm.yadda.parsing.deprec.auxil.MInstance;
import pl.edu.icm.yadda.parsing.deprec.auxil.MReference;

/* loaded from: input_file:WEB-INF/lib/yadda-content-4.2.2-SNAPSHOT.jar:pl/edu/icm/yadda/parsing/deprec/crfadapter/TextToMinorTagACRF.class */
public class TextToMinorTagACRF {
    private static final String CZASTKA_TAGU = "([a-zA-Z]+)|([0-9]+)|([\\-\\.,\\(\\)\\&\\:\"/'`~;\\[\\]])|([<>])";
    private static final String NIE_LANCUCH_LITER = "([^a-zA-Z])";
    private static final String NIE_LANCUCH_CYFR = "([^0-9])";
    public static final String NEW_LINE = System.getProperty("line.separator");
    private static String referencesFileLocation = "/tmp/";

    public static MReference convertStringToFeatureStringBuilder(String str, StringBuilder sb) throws IOException {
        LinkedList linkedList = new LinkedList();
        linkedList.add(new MReference());
        plainTextToMReferences(new BufferedReader(new StringReader(str)), linkedList);
        sb.append((CharSequence) mReferencesToStringBuilder(linkedList));
        return (MReference) linkedList.get(0);
    }

    private static StringBuilder mReferencesToStringBuilder(LinkedList<MReference> linkedList) {
        StringBuilder sb = new StringBuilder("");
        Iterator<MReference> it = linkedList.iterator();
        int i = 0;
        while (it.hasNext()) {
            MReference next = it.next();
            if (next.isStarted()) {
                Iterator<MInstance> it2 = next.instances.iterator();
                int i2 = 0;
                while (it2.hasNext()) {
                    MInstance next2 = it2.next();
                    StringBuffer stringBuffer = new StringBuffer("");
                    stringBuffer.append(next2.category + " ---- ");
                    Iterator<String> it3 = next2.feature.iterator();
                    while (it3.hasNext()) {
                        stringBuffer.append(it3.next() + " ");
                    }
                    Iterator<MInstance> it4 = next.instances.iterator();
                    int i3 = 0;
                    while (it4.hasNext()) {
                        MInstance next3 = it4.next();
                        if (i3 != i2 && Math.abs(i2 - i3) <= 5) {
                            int i4 = i3 - i2;
                            Iterator<String> it5 = next3.feature.iterator();
                            while (it5.hasNext()) {
                                stringBuffer.append(it5.next() + "@" + i4 + " ");
                            }
                        }
                        i3++;
                    }
                    sb.append(stringBuffer.toString() + NEW_LINE);
                    i2++;
                }
            }
            i++;
        }
        return sb;
    }

    public static void main(String[] strArr) throws FileNotFoundException, IOException {
        if (strArr.length == 1) {
            referencesFileLocation = strArr[0];
        }
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new TextToMinorTagACRF().getClass().getClassLoader().getResourceAsStream("pl/edu/icm/yadda/parsing/label-extractor.txt")));
        LinkedList linkedList = new LinkedList();
        linkedList.add(new MReference());
        plainTextToMReferences(bufferedReader, linkedList);
        mReferencesToFiles(linkedList);
    }

    private static void mReferencesToFiles(LinkedList<MReference> linkedList) throws IOException {
        int i = 0 + 1;
        File file = new File(referencesFileLocation + i + ".txt");
        if (!file.exists()) {
            file.createNewFile();
        }
        BufferedWriter bufferedWriter = new BufferedWriter(new FileWriter(file));
        boolean z = false;
        Iterator<MReference> it = linkedList.iterator();
        int i2 = 0;
        while (it.hasNext()) {
            MReference next = it.next();
            if (i2 > 557 && !z) {
                z = true;
                bufferedWriter.close();
                i++;
                File file2 = new File(referencesFileLocation + i + ".txt");
                if (!file2.exists()) {
                    file2.createNewFile();
                }
                bufferedWriter = new BufferedWriter(new FileWriter(file2));
            }
            if (i2 > 557 + 200) {
                bufferedWriter.flush();
                bufferedWriter.close();
                return;
            }
            if (next.isStarted()) {
                Iterator<MInstance> it2 = next.instances.iterator();
                int i3 = 0;
                while (it2.hasNext()) {
                    MInstance next2 = it2.next();
                    StringBuffer stringBuffer = new StringBuffer("");
                    stringBuffer.append(next2.category + " ---- ");
                    Iterator<String> it3 = next2.feature.iterator();
                    while (it3.hasNext()) {
                        stringBuffer.append(it3.next() + " ");
                    }
                    Iterator<MInstance> it4 = next.instances.iterator();
                    int i4 = 0;
                    while (it4.hasNext()) {
                        MInstance next3 = it4.next();
                        if (i4 != i3 && Math.abs(i3 - i4) <= 7) {
                            int i5 = i4 - i3;
                            Iterator<String> it5 = next3.feature.iterator();
                            while (it5.hasNext()) {
                                stringBuffer.append(it5.next() + "@" + i5 + " ");
                            }
                        }
                        i4++;
                    }
                    bufferedWriter.write(stringBuffer.toString() + NEW_LINE);
                    bufferedWriter.flush();
                    i3++;
                }
            }
            i2++;
        }
        bufferedWriter.close();
    }

    private static void plainTextToMReferences(BufferedReader bufferedReader, LinkedList<MReference> linkedList) throws IOException {
        Pattern compile = Pattern.compile(CZASTKA_TAGU);
        String readLine = bufferedReader.readLine();
        while (true) {
            String str = readLine;
            if (str == null) {
                bufferedReader.close();
                return;
            }
            if (str.length() != 0) {
                StringBuffer stringBuffer = new StringBuffer("");
                String categoryAndTrimString = getCategoryAndTrimString(str, stringBuffer);
                if (categoryAndTrimString != null) {
                    extractInstances(categoryAndTrimString, stringBuffer.toString(), linkedList, compile);
                }
            } else if (linkedList.getLast().isStarted()) {
                linkedList.add(new MReference());
            }
            readLine = bufferedReader.readLine();
        }
    }

    private static void extractInstances(String str, String str2, LinkedList<MReference> linkedList, Pattern pattern) {
        for (String str3 : str.split("\\s")) {
            if (str3.length() != 0) {
                Matcher matcher = pattern.matcher(str3);
                MInstance mInstance = null;
                boolean z = true;
                while (true) {
                    boolean z2 = z;
                    if (!matcher.find()) {
                        break;
                    }
                    mInstance = new MInstance();
                    mInstance.starter = false;
                    mInstance.ender = false;
                    if (z2) {
                        mInstance.feature.add(Feature2.TAG_STARTOWY);
                        mInstance.starter = true;
                    }
                    mInstance.name = str3.substring(matcher.start(), matcher.end());
                    mInstance.category = str2;
                    addOneSignSpecialFeature(mInstance);
                    addChainYearLowercaseUppercaseFeature(mInstance);
                    linkedList.getLast().addInstance(mInstance);
                    z = false;
                }
                if (mInstance != null) {
                    mInstance.feature.add(Feature2.TAG_KONCOWY);
                    mInstance.ender = true;
                }
            }
        }
    }

    private static void addChainYearLowercaseUppercaseFeature(MInstance mInstance) {
        if (!Pattern.compile(NIE_LANCUCH_CYFR).matcher(mInstance.name).find()) {
            mInstance.feature.add(Feature2.LANCUCH_CYFR);
            int parseInt = Integer.parseInt(mInstance.name);
            if (parseInt >= 2099 || parseInt <= 1900) {
                return;
            }
            mInstance.feature.add(Feature2.ZAWIERA_ROK);
            return;
        }
        if (Pattern.compile(NIE_LANCUCH_LITER).matcher(mInstance.name).find()) {
            return;
        }
        mInstance.feature.add(Feature2.LANCUCH_LITER);
        if (mInstance.name.substring(0, 1).toLowerCase().equals(mInstance.name.substring(0, 1))) {
            mInstance.feature.add(Feature2.ZACZYNA_SIE_Z_MALEJ_LITERY);
            if (mInstance.name.toLowerCase().equals(mInstance.name)) {
                mInstance.feature.add(Feature2.ZAWIERA_MALE_ZNAKI);
                return;
            }
            return;
        }
        if (mInstance.name.substring(0, 1).toUpperCase().equals(mInstance.name.substring(0, 1))) {
            mInstance.feature.add(Feature2.ZACZYNA_SIE_Z_DUZEJ_LITERY);
            if (mInstance.name.toUpperCase().equals(mInstance.name)) {
                mInstance.feature.add(Feature2.ZAWIERA_DUZE_ZNAKI);
            }
        }
    }

    private static void addLengthFeature(MInstance mInstance) {
        if (mInstance.name.length() == 1) {
            mInstance.feature.add(Feature2.MA_DLUGOSC_JEDEN);
        } else if (mInstance.name.length() == 2) {
            mInstance.feature.add(Feature2.MA_DLUGOSC_DWA);
        }
    }

    private static void addOneSignSpecialFeature(MInstance mInstance) {
        if (mInstance.name.equals(BeanFactory.FACTORY_BEAN_PREFIX)) {
            mInstance.feature.add(Feature2.JEST_AMPERSANDEM);
            return;
        }
        if (mInstance.name.equals("'")) {
            mInstance.feature.add(Feature2.JEST_APOSTROFEM);
            return;
        }
        if (mInstance.name.equals("*")) {
            mInstance.feature.add(Feature2.JEST_ASTERIKSEM);
            return;
        }
        if (mInstance.name.equals(LineOrientedInterpolatingReader.DEFAULT_ESCAPE_SEQ)) {
            mInstance.feature.add(Feature2.JEST_BACK_SLASHEM);
            return;
        }
        if (mInstance.name.equals("^")) {
            mInstance.feature.add(Feature2.JEST_CARET);
            return;
        }
        if (mInstance.name.equals(":")) {
            mInstance.feature.add(Feature2.JEST_DWUKROPKIEM);
            return;
        }
        if (mInstance.name.equals("*")) {
            mInstance.feature.add(Feature2.JEST_GWIAZDKA);
            return;
        }
        if (mInstance.name.equals(".")) {
            mInstance.feature.add(Feature2.JEST_KROPKA);
            return;
        }
        if (mInstance.name.equals("@")) {
            mInstance.feature.add(Feature2.JEST_MALPA);
            return;
        }
        if (mInstance.name.equals("-")) {
            mInstance.feature.add(Feature2.JEST_MYSLNIKIEM);
            return;
        }
        if (mInstance.name.equals(">")) {
            mInstance.feature.add(Feature2.JEST_NAWIASEM_KATOWYM_KONC);
            return;
        }
        if (mInstance.name.equals("<")) {
            mInstance.feature.add(Feature2.JEST_NAWIASEM_KATOWYM_POCZ);
            return;
        }
        if (mInstance.name.equals("}")) {
            mInstance.feature.add(Feature2.JEST_NAWIASEM_KLAMROWYM_KONC);
            return;
        }
        if (mInstance.name.equals("{")) {
            mInstance.feature.add(Feature2.JEST_NAWIASEM_KLAMROWYM_POCZ);
            return;
        }
        if (mInstance.name.equals("]")) {
            mInstance.feature.add(Feature2.JEST_NAWIASEM_KWADRATOWYM_KONC);
            return;
        }
        if (mInstance.name.equals("[")) {
            mInstance.feature.add(Feature2.JEST_NAWIASEM_KWADRATOWYM_POCZ);
            return;
        }
        if (mInstance.name.equals(DefaultExpressionEngine.DEFAULT_INDEX_END)) {
            mInstance.feature.add(Feature2.JEST_NAWIASEM_OKRAGLYM_KONC);
            return;
        }
        if (mInstance.name.equals(DefaultExpressionEngine.DEFAULT_INDEX_START)) {
            mInstance.feature.add(Feature2.JEST_NAWIASEM_OKRAGLYM_POCZ);
            return;
        }
        if (mInstance.name.equals(QuickTargetSourceCreator.PREFIX_THREAD_LOCAL)) {
            mInstance.feature.add(Feature2.JEST_PROCENTEM);
            return;
        }
        if (mInstance.name.equals(",")) {
            mInstance.feature.add(Feature2.JEST_PRZECINKIEM);
            return;
        }
        if (mInstance.name.equals(CallerData.NA)) {
            mInstance.feature.add(Feature2.JEST_PYTAJNIKIEM);
            return;
        }
        if (mInstance.name.equals("/")) {
            mInstance.feature.add(Feature2.JEST_SLASHEM);
            return;
        }
        if (mInstance.name.equals(";")) {
            mInstance.feature.add(Feature2.JEST_SREDNIKIEM);
            return;
        }
        if (mInstance.name.equals("~")) {
            mInstance.feature.add(Feature2.JEST_TYLDA);
            return;
        }
        if (mInstance.name.equals("!")) {
            mInstance.feature.add(Feature2.JEST_WYKRZYKNIKIEM);
            return;
        }
        if (mInstance.name.equals("#")) {
            mInstance.feature.add(Feature2.JEST_HASHEM);
            return;
        }
        if (mInstance.name.equals("$")) {
            mInstance.feature.add(Feature2.JEST_DOLAREM);
            return;
        }
        if (mInstance.name.equals("=")) {
            mInstance.feature.add(Feature2.JEST_ROWNA_SIE);
        } else if (mInstance.name.equals("\"")) {
            mInstance.feature.add(Feature2.JEST_CUDZYSLOWEM);
        } else if (mInstance.name.equals("|")) {
            mInstance.feature.add(Feature2.JEST_KRESKA);
        }
    }

    private static String getCategoryAndTrimString(String str, StringBuffer stringBuffer) {
        if (str.startsWith("<author>")) {
            str = str.substring("<author>".length());
            stringBuffer.append("author");
        } else if (str.startsWith("<year>")) {
            str = str.substring("<year>".length());
            stringBuffer.append("year");
        } else if (str.startsWith("<title>")) {
            str = str.substring("<title>".length());
            stringBuffer.append("title");
        } else if (str.startsWith("<type>")) {
            str = str.substring("<type>".length());
            stringBuffer.append("type");
        } else if (str.startsWith("<institution>")) {
            str = str.substring("<institution>".length());
            stringBuffer.append("institution");
        } else if (str.startsWith("<address>")) {
            str = str.substring("<address>".length());
            stringBuffer.append("address");
        } else if (str.startsWith("<note>")) {
            str = str.substring("<note>".length());
            stringBuffer.append("note");
        } else if (str.startsWith("<journal>")) {
            str = str.substring("<journal>".length());
            stringBuffer.append("journal");
        } else if (str.startsWith("<volume>")) {
            str = str.substring("<volume>".length());
            stringBuffer.append("volume");
        } else if (str.startsWith("<pages>")) {
            str = str.substring("<pages>".length());
            stringBuffer.append("pages");
        } else if (str.startsWith("<booktitle>")) {
            str = str.substring("<booktitle>".length());
            stringBuffer.append("booktitle");
        } else if (str.startsWith("<month>")) {
            str = str.substring("<month>".length());
            stringBuffer.append("month");
        } else if (str.startsWith("<publisher>")) {
            str = str.substring("<publisher>".length());
            stringBuffer.append("publisher");
        } else if (str.startsWith("<editor>")) {
            str = str.substring("<editor>".length());
            stringBuffer.append("editor");
        } else if (str.startsWith("<date>")) {
            str = str.substring("<date>".length());
            stringBuffer.append("date");
        } else {
            stringBuffer.append("author");
        }
        return str;
    }
}
