package edu.umass.cs.mallet.share.casutton.ner;

import com.itextpdf.text.html.HtmlTags;
import edu.umass.cs.mallet.base.extract.StringSpan;
import edu.umass.cs.mallet.base.extract.StringTokenization;
import edu.umass.cs.mallet.base.pipe.Pipe;
import edu.umass.cs.mallet.base.types.Instance;
import edu.umass.cs.mallet.base.types.LabelAlphabet;
import edu.umass.cs.mallet.base.types.LabelSequence;
import edu.umass.cs.mallet.base.types.Token;
import edu.umass.cs.mallet.projects.seg_plus_coref.coreference.Citation;
import java.util.regex.Pattern;
import org.apache.commons.cli.HelpFormatter;

/* loaded from: input_file:edu/umass/cs/mallet/share/casutton/ner/ConllNer2003Sentence2TokenSequence.class */
public class ConllNer2003Sentence2TokenSequence extends Pipe {
    static final String[] endings = {"ing", "ed", "ogy", HtmlTags.S, "ly", "ion", "tion", "ity", "ies"};
    static Pattern[] endingPatterns = new Pattern[endings.length];
    static final String[][][] endingNames = new String[2][3][endings.length];
    boolean saveSource;
    boolean doConjunctions;
    boolean doTags;
    boolean doPhrases;
    boolean doSpelling;
    boolean doDigitCollapses;
    boolean doDowncasing;
    private static final long serialVersionUID = -7326674871670572522L;
    static Class class$edu$umass$cs$mallet$base$types$LabelAlphabet;

    /* JADX WARN: Illegal instructions before constructor call */
    /*
        Code decompiled incorrectly, please refer to instructions dump.
        To view partially-correct add '--show-bad-code' argument
    */
    public ConllNer2003Sentence2TokenSequence() {
        /*
            Method dump skipped, instructions count: 201
            To view this dump add '--comments-level debug' option
        */
        throw new UnsupportedOperationException("Method not decompiled: edu.umass.cs.mallet.share.casutton.ner.ConllNer2003Sentence2TokenSequence.<init>():void");
    }

    /* JADX WARN: Illegal instructions before constructor call */
    /*
        Code decompiled incorrectly, please refer to instructions dump.
        To view partially-correct add '--show-bad-code' argument
    */
    public ConllNer2003Sentence2TokenSequence(boolean r7, boolean r8) {
        /*
            Method dump skipped, instructions count: 219
            To view this dump add '--comments-level debug' option
        */
        throw new UnsupportedOperationException("Method not decompiled: edu.umass.cs.mallet.share.casutton.ner.ConllNer2003Sentence2TokenSequence.<init>(boolean, boolean):void");
    }

    @Override // edu.umass.cs.mallet.base.pipe.Pipe
    public Instance pipe(Instance instance) {
        String str;
        String[] split = ((String) instance.getData()).split("\n");
        LabelSequence labelSequence = new LabelSequence((LabelAlphabet) getTargetAlphabet(), split.length);
        boolean[][] zArr = new boolean[3][endings.length];
        boolean[][] zArr2 = new boolean[3][endings.length];
        boolean[][] zArr3 = new boolean[3][endings.length];
        StringBuffer stringBuffer = this.saveSource ? new StringBuffer() : null;
        StringTokenization stringTokenization = new StringTokenization(stringBuffer);
        String str2 = "NOLABEL";
        Pattern compile = Pattern.compile("I-.*");
        String str3 = null;
        String str4 = null;
        String str5 = null;
        for (int i = 0; i < split.length; i++) {
            if (split[i].length() != 0) {
                try {
                    String[] split2 = split[i].split(HelpFormatter.DEFAULT_LONG_OPT_SEPARATOR);
                    int i2 = 0 + 1;
                    str = split2[0];
                    if (this.doTags) {
                        i2++;
                        str3 = split2[i2];
                    }
                    if (this.doPhrases) {
                        int i3 = i2;
                        i2++;
                        str4 = split2[i3];
                    }
                    if (isTargetProcessing()) {
                        int i4 = i2;
                        int i5 = i2 + 1;
                        str5 = split2[i4];
                    }
                } catch (ArrayIndexOutOfBoundsException e) {
                    throw new IllegalArgumentException(new StringBuffer().append("Invalid line ").append(split[i]).append(" : expected word ").append(this.doTags ? ", tag" : "").append(this.doPhrases ? ", phrase" : "").append(isTargetProcessing() ? ", target" : "").append(".").toString());
                }
            } else {
                str = "-<S>-";
                str3 = "-<S>-";
                str4 = "-<S>-";
                str5 = Citation.other;
            }
            if (this.doDigitCollapses) {
                if (str.matches("19\\d\\d")) {
                    str = "<YEAR>";
                } else if (str.matches("19\\d\\ds")) {
                    str = "<YEARDECADE>";
                } else if (str.matches("19\\d\\d-\\d+")) {
                    str = "<YEARSPAN>";
                } else if (str.matches("\\d+\\\\/\\d")) {
                    str = "<FRACTION>";
                } else if (str.matches("\\d[\\d,\\.]*")) {
                    str = "<DIGITS>";
                } else if (str.matches("19\\d\\d-\\d\\d-\\d--d")) {
                    str = "<DATELINEDATE>";
                } else if (str.matches("19\\d\\d-\\d\\d-\\d\\d")) {
                    str = "<DATELINEDATE>";
                } else if (str.matches(".*-led")) {
                    str = "<LED>";
                } else if (str.matches(".*-sponsored")) {
                    str = "<LED>";
                }
            }
            if (this.doDowncasing) {
                str = str.toLowerCase();
            }
            int length = stringBuffer.length();
            if (this.saveSource) {
                if (str.equals("-<S>-")) {
                    stringBuffer.append("\n\n");
                }
                stringBuffer.append(str);
                stringBuffer.append(HelpFormatter.DEFAULT_LONG_OPT_SEPARATOR);
            }
            StringSpan stringSpan = new StringSpan(stringBuffer, length, stringBuffer.length() - 1);
            if (this.doSpelling) {
                for (int i6 = 0; i6 < endings.length; i6++) {
                    zArr[2][i6] = zArr[1][i6];
                    zArr[1][i6] = zArr[0][i6];
                    zArr[0][i6] = endingPatterns[i6].matcher(str).matches();
                    if (zArr[0][i6]) {
                        stringSpan.setFeatureValue(endingNames[0][0][i6], 1.0d);
                    }
                }
            }
            if (this.doTags) {
                stringSpan.setFeatureValue(new StringBuffer().append("T=").append(str3).toString(), 1.0d);
            }
            if (this.doPhrases) {
                stringSpan.setFeatureValue(new StringBuffer().append("P=").append(str4).toString(), 1.0d);
            }
            stringTokenization.add((Token) stringSpan);
            if (isTargetProcessing()) {
                String str6 = str5;
                if (compile.matcher(str5).matches() && (str2.length() < 3 || !str2.substring(2).equals(str5.substring(2)))) {
                    str5 = new StringBuffer().append("B").append(str6.substring(1)).toString();
                }
                str2 = str6;
                labelSequence.add(str5);
            }
        }
        instance.setData(stringTokenization);
        if (isTargetProcessing()) {
            instance.setTarget(labelSequence);
        }
        if (this.saveSource) {
            instance.setSource(stringBuffer);
        }
        return instance;
    }

    static Class class$(String str) {
        try {
            return Class.forName(str);
        } catch (ClassNotFoundException e) {
            throw new NoClassDefFoundError().initCause(e);
        }
    }
}
