package edu.umass.cs.mallet.share.weili.ner.enron;

import edu.umass.cs.mallet.base.pipe.Pipe;
import edu.umass.cs.mallet.base.types.Instance;
import edu.umass.cs.mallet.base.types.LabelAlphabet;
import edu.umass.cs.mallet.base.types.LabelSequence;
import edu.umass.cs.mallet.base.types.Token;
import edu.umass.cs.mallet.base.types.TokenSequence;
import edu.umass.cs.mallet.share.weili.ner.WordTransformation;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.Serializable;
import java.util.HashSet;
import java.util.StringTokenizer;
import org.apache.xml.serialize.LineSeparator;
import pl.edu.icm.model.bwmeta.desklight.utils.DeskLightTypes;
import pl.edu.icm.yadda.exports.zentralblatt.YElementToZentralBlattConverter;

/* loaded from: input_file:WEB-INF/lib/mallet-0.1.3.jar:edu/umass/cs/mallet/share/weili/ner/enron/EnronMessage2TokenSequence.class */
public class EnronMessage2TokenSequence extends Pipe implements Serializable {
    boolean saveSource;
    public static String[] skip;
    public static String[] skipToBlankLine;
    public static String[] labels;
    HashSet headerPersonNames;
    private static final long serialVersionUID = 1;
    private static final int CURRENT_SERIAL_VERSION = 0;
    static Class class$edu$umass$cs$mallet$base$types$LabelAlphabet;
    static final boolean $assertionsDisabled;
    static Class class$edu$umass$cs$mallet$share$weili$ner$enron$EnronMessage2TokenSequence;

    /* JADX WARN: Illegal instructions before constructor call */
    /*
        Code decompiled incorrectly, please refer to instructions dump.
        To view partially-correct add '--show-bad-code' argument
    */
    public EnronMessage2TokenSequence() {
        /*
            r5 = this;
            r0 = r5
            r1 = 0
            java.lang.Class r2 = edu.umass.cs.mallet.share.weili.ner.enron.EnronMessage2TokenSequence.class$edu$umass$cs$mallet$base$types$LabelAlphabet
            if (r2 != 0) goto L14
            java.lang.String r2 = "edu.umass.cs.mallet.base.types.LabelAlphabet"
            java.lang.Class r2 = class$(r2)
            r3 = r2
            edu.umass.cs.mallet.share.weili.ner.enron.EnronMessage2TokenSequence.class$edu$umass$cs$mallet$base$types$LabelAlphabet = r3
            goto L17
        L14:
            java.lang.Class r2 = edu.umass.cs.mallet.share.weili.ner.enron.EnronMessage2TokenSequence.class$edu$umass$cs$mallet$base$types$LabelAlphabet
        L17:
            r0.<init>(r1, r2)
            r0 = r5
            r1 = 0
            r0.saveSource = r1
            r0 = r5
            java.util.HashSet r1 = new java.util.HashSet
            r2 = r1
            r2.<init>()
            r0.headerPersonNames = r1
            return
        */
        throw new UnsupportedOperationException("Method not decompiled: edu.umass.cs.mallet.share.weili.ner.enron.EnronMessage2TokenSequence.<init>():void");
    }

    @Override // edu.umass.cs.mallet.base.pipe.Pipe
    public Instance pipe(Instance instance) {
        String readLine;
        TokenSequence tokenSequence = new TokenSequence();
        LabelSequence labelSequence = new LabelSequence((LabelAlphabet) getTargetAlphabet());
        StringBuffer stringBuffer = this.saveSource ? new StringBuffer() : null;
        WordTransformation wordTransformation = new WordTransformation();
        File file = (File) instance.getData();
        StringBuffer stringBuffer2 = new StringBuffer();
        try {
            BufferedReader bufferedReader = new BufferedReader(new FileReader(file));
            String readLine2 = bufferedReader.readLine();
            while (readLine2 != null && !readLine2.equals("")) {
                String lowerCase = readLine2.toLowerCase();
                int i = 5;
                while (i <= 9 && !lowerCase.startsWith(skipToBlankLine[i])) {
                    i++;
                }
                if (i <= 9) {
                    String substring = lowerCase.substring(skipToBlankLine[i].length());
                    while (true) {
                        String readLine3 = bufferedReader.readLine();
                        readLine2 = readLine3;
                        if (readLine3 == null || readLine2.equals("") || (!readLine2.startsWith(" ") && !readLine2.startsWith("\t"))) {
                            break;
                        }
                        substring = new StringBuffer().append(substring).append(readLine2).toString();
                    }
                    StringTokenizer stringTokenizer = new StringTokenizer(substring, " \t,");
                    while (stringTokenizer.hasMoreTokens()) {
                        String nextToken = stringTokenizer.nextToken();
                        if (nextToken.endsWith("@enron.com")) {
                            String substring2 = nextToken.substring(0, nextToken.length() - 10);
                            int indexOf = substring2.indexOf(".");
                            if (indexOf != -1) {
                                if (indexOf == substring2.lastIndexOf(".")) {
                                    if (indexOf > 1) {
                                        this.headerPersonNames.add(substring2.substring(0, indexOf));
                                    }
                                    if (indexOf + 1 < substring2.length() - 1) {
                                        this.headerPersonNames.add(substring2.substring(indexOf + 1));
                                    }
                                } else if (indexOf == substring2.lastIndexOf(".") - 1) {
                                    int i2 = indexOf + 1;
                                    if (i2 + 1 < substring2.length() - 1) {
                                        this.headerPersonNames.add(substring2.substring(i2 + 1));
                                    }
                                }
                            }
                        }
                    }
                } else {
                    readLine2 = bufferedReader.readLine();
                }
            }
            while (true) {
                String readLine4 = bufferedReader.readLine();
                if (readLine4 == null) {
                    break;
                }
                boolean z = false;
                for (int i3 = 0; i3 < skip.length; i3++) {
                    int indexOf2 = readLine4.toLowerCase().indexOf(skip[i3]);
                    if (indexOf2 != -1) {
                        String trim = readLine4.substring(0, indexOf2).trim();
                        z = true;
                        int i4 = 0;
                        while (true) {
                            if (i4 >= trim.length()) {
                                break;
                            }
                            if (trim.charAt(i4) != '-' && trim.charAt(i4) != '>' && trim.charAt(i4) != ' ') {
                                z = false;
                                break;
                            }
                            i4++;
                        }
                        if (z) {
                            break;
                        }
                    }
                }
                if (!z) {
                    for (int i5 = 0; i5 < skipToBlankLine.length; i5++) {
                        int indexOf3 = readLine4.toLowerCase().indexOf(skipToBlankLine[i5]);
                        if (indexOf3 != -1) {
                            String trim2 = readLine4.substring(0, indexOf3).trim();
                            z = true;
                            int i6 = 0;
                            while (true) {
                                if (i6 >= trim2.length()) {
                                    break;
                                }
                                if (trim2.charAt(i6) != '-' && trim2.charAt(i6) != '>' && trim2.charAt(i6) != ' ') {
                                    z = false;
                                    break;
                                }
                                i6++;
                            }
                            if (z) {
                                break;
                            }
                        }
                    }
                    if (z) {
                        do {
                            readLine = bufferedReader.readLine();
                            if (readLine != null) {
                            }
                        } while (!readLine.equals(""));
                    } else {
                        stringBuffer2.append(readLine4);
                        stringBuffer2.append("\n");
                    }
                }
            }
        } catch (IOException e) {
            System.err.println(e);
        }
        String str = "O";
        StringTokenizer stringTokenizer2 = new StringTokenizer(stringBuffer2.toString(), "<>", true);
        boolean z2 = true;
        String str2 = null;
        while (stringTokenizer2.hasMoreTokens()) {
            if (z2) {
                str2 = stringTokenizer2.nextToken();
            }
            z2 = true;
            if (str2.equals("<")) {
                String nextToken2 = stringTokenizer2.nextToken();
                if (nextToken2.equals("/ENAMEX") || nextToken2.equals("/TIMEX") || nextToken2.equals("/NUMEX")) {
                    String nextToken3 = stringTokenizer2.nextToken();
                    if (!$assertionsDisabled && !nextToken3.equals(">")) {
                        throw new AssertionError();
                    }
                    str = "O";
                } else if (nextToken2.startsWith("ENAMEX") || nextToken2.startsWith("TIMEX") || nextToken2.startsWith("NUMEX")) {
                    String substring3 = nextToken2.substring(nextToken2.indexOf(" ") + 1);
                    if (!$assertionsDisabled && !substring3.startsWith("TYPE=")) {
                        throw new AssertionError();
                    }
                    String substring4 = substring3.substring(substring3.indexOf("\"") + 1, substring3.lastIndexOf("\""));
                    int i7 = 0;
                    while (true) {
                        if (i7 >= labels.length) {
                            break;
                        }
                        if (labels[i7].equals(substring4)) {
                            str = new StringBuffer().append("B-").append(substring4).toString();
                            break;
                        }
                        i7++;
                    }
                    String nextToken4 = stringTokenizer2.nextToken();
                    if (!$assertionsDisabled && !nextToken4.equals(">")) {
                        throw new AssertionError();
                    }
                } else {
                    tokenSequence.add(new Token("<"));
                    labelSequence.add(str);
                    if (this.saveSource) {
                        stringBuffer.append("<");
                        stringBuffer.append("\n");
                    }
                    str2 = nextToken2;
                    z2 = false;
                }
            }
            StringTokenizer stringTokenizer3 = new StringTokenizer(str2, "~`!@#$%^&*()_-+={[}]|\\:;\"',<.>?/ \t\n\r", true);
            while (stringTokenizer3.hasMoreTokens()) {
                String nextToken5 = stringTokenizer3.nextToken();
                if (!nextToken5.equals(" ") && !nextToken5.equals("\t") && !nextToken5.equals("\n") && !nextToken5.equals(LineSeparator.Macintosh)) {
                    Token transformedToken = wordTransformation.transformedToken(nextToken5);
                    if (this.headerPersonNames.contains(nextToken5.toLowerCase())) {
                        transformedToken.setFeatureValue("HEADER-PERSON", 1.0d);
                    }
                    tokenSequence.add(transformedToken);
                    labelSequence.add(str);
                    if (this.saveSource) {
                        stringBuffer.append(nextToken5);
                        stringBuffer.append("\n");
                    }
                    if (str.startsWith("B-")) {
                        str = new StringBuffer().append("I-").append(str.substring(2)).toString();
                    }
                }
            }
        }
        instance.setData(tokenSequence);
        instance.setTarget(labelSequence);
        if (this.saveSource) {
            instance.setSource(stringBuffer);
        }
        return instance;
    }

    public void write(File file) {
        try {
            ObjectOutputStream objectOutputStream = new ObjectOutputStream(new FileOutputStream(file));
            objectOutputStream.writeObject(this.headerPersonNames);
            objectOutputStream.close();
        } catch (IOException e) {
            System.err.println(new StringBuffer().append("Exception writing file ").append(file).append(YElementToZentralBlattConverter.SUGGESTED_DICTIONARY_VALUE_SEPARATOR).append(e).toString());
        }
    }

    private void writeObject(ObjectOutputStream objectOutputStream) throws IOException {
        objectOutputStream.writeInt(0);
        objectOutputStream.writeBoolean(this.saveSource);
        objectOutputStream.writeObject(this.headerPersonNames);
    }

    private void readObject(ObjectInputStream objectInputStream) throws IOException, ClassNotFoundException {
        objectInputStream.readInt();
        this.saveSource = objectInputStream.readBoolean();
        this.headerPersonNames = (HashSet) objectInputStream.readObject();
    }

    static Class class$(String str) {
        try {
            return Class.forName(str);
        } catch (ClassNotFoundException e) {
            throw new NoClassDefFoundError().initCause(e);
        }
    }

    static {
        Class cls;
        if (class$edu$umass$cs$mallet$share$weili$ner$enron$EnronMessage2TokenSequence == null) {
            cls = class$("edu.umass.cs.mallet.share.weili.ner.enron.EnronMessage2TokenSequence");
            class$edu$umass$cs$mallet$share$weili$ner$enron$EnronMessage2TokenSequence = cls;
        } else {
            cls = class$edu$umass$cs$mallet$share$weili$ner$enron$EnronMessage2TokenSequence;
        }
        $assertionsDisabled = !cls.desiredAssertionStatus();
        skip = new String[]{"=_part_", "sent by:"};
        skipToBlankLine = new String[]{"subject:", "original message", "content-type:", "content-transfer-encoding:", "forwarded by", "from:", "sent:", "to:", "bcc:", "cc:"};
        labels = new String[]{"DATE", "TIME", "LOCATION", DeskLightTypes.TYPE_PERSON, "ORGANIZATION", "ACRONYM", "PHONE", "MONEY", "PERCENT"};
    }
}
