package edu.stanford.nlp.sequences;

import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.objectbank.IteratorFromReaderFactory;
import edu.stanford.nlp.objectbank.LineIterator;
import edu.stanford.nlp.util.Function;
import edu.stanford.nlp.util.StringUtils;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.Reader;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/* loaded from: input_file:WEB-INF/lib/stanford-corenlp-3.2.0.jar:edu/stanford/nlp/sequences/TrueCasingForNISTDocumentReaderAndWriter.class */
public class TrueCasingForNISTDocumentReaderAndWriter implements DocumentReaderAndWriter<CoreLabel> {
    private static final long serialVersionUID = -3000389291781534479L;
    private IteratorFromReaderFactory<List<CoreLabel>> factory;
    private Boolean verboseForTrueCasing = false;
    public static final String THREE_CLASSES_PROPERTY = "3class";
    public static final boolean THREE_CLASSES = Boolean.parseBoolean(System.getProperty(THREE_CLASSES_PROPERTY, "false"));
    private static Pattern alphabet = Pattern.compile("[A-Za-z]+");
    public static Set knownWords = null;

    /* loaded from: input_file:WEB-INF/lib/stanford-corenlp-3.2.0.jar:edu/stanford/nlp/sequences/TrueCasingForNISTDocumentReaderAndWriter$LineToTrueCasesParser.class */
    public static class LineToTrueCasesParser implements Function<String, List<CoreLabel>> {
        private static Pattern allLower = Pattern.compile("[^A-Z]*?[a-z]+[^A-Z]*?");
        private static Pattern allUpper = Pattern.compile("[^a-z]*?[A-Z]+[^a-z]*?");
        private static Pattern startUpper = Pattern.compile("[A-Z].*");

        @Override // edu.stanford.nlp.util.Function
        public List<CoreLabel> apply(String str) {
            boolean z;
            ArrayList arrayList = new ArrayList();
            int i = 0;
            for (String str2 : str.split(" ")) {
                CoreLabel coreLabel = new CoreLabel();
                if (allLower.matcher(str2).matches()) {
                    coreLabel.set(CoreAnnotations.AnswerAnnotation.class, "LOWER");
                    coreLabel.set(CoreAnnotations.GoldAnswerAnnotation.class, "LOWER");
                } else {
                    Matcher matcher = allUpper.matcher(str2);
                    if (TrueCasingForNISTDocumentReaderAndWriter.THREE_CLASSES || !matcher.matches()) {
                        Matcher matcher2 = startUpper.matcher(str2);
                        if (str2.length() > 1) {
                            String substring = str2.substring(1);
                            z = substring.equals(substring.toLowerCase());
                        } else {
                            z = false;
                        }
                        if (matcher2.matches() && z) {
                            coreLabel.set(CoreAnnotations.AnswerAnnotation.class, "INIT_UPPER");
                            coreLabel.set(CoreAnnotations.GoldAnswerAnnotation.class, "INIT_UPPER");
                        } else {
                            coreLabel.set(CoreAnnotations.AnswerAnnotation.class, "O");
                            coreLabel.set(CoreAnnotations.GoldAnswerAnnotation.class, "O");
                        }
                    } else {
                        coreLabel.set(CoreAnnotations.AnswerAnnotation.class, "UPPER");
                        coreLabel.set(CoreAnnotations.GoldAnswerAnnotation.class, "UPPER");
                    }
                }
                coreLabel.setWord(str2.toLowerCase());
                coreLabel.set(CoreAnnotations.PositionAnnotation.class, i + "");
                arrayList.add(coreLabel);
                i++;
            }
            return arrayList;
        }
    }

    public static void main(String[] strArr) throws IOException {
        BufferedReader bufferedReader = new BufferedReader(new FileReader(strArr[0]));
        TrueCasingForNISTDocumentReaderAndWriter trueCasingForNISTDocumentReaderAndWriter = new TrueCasingForNISTDocumentReaderAndWriter();
        trueCasingForNISTDocumentReaderAndWriter.init(null);
        Iterator<List<CoreLabel>> iterator = trueCasingForNISTDocumentReaderAndWriter.getIterator(bufferedReader);
        while (iterator.hasNext()) {
            Iterator<CoreLabel> it = iterator.next().iterator();
            while (it.hasNext()) {
                System.out.println(it.next());
            }
            System.out.println("========================================");
        }
    }

    @Override // edu.stanford.nlp.sequences.DocumentReaderAndWriter
    public void init(SeqClassifierFlags seqClassifierFlags) {
        this.verboseForTrueCasing = Boolean.valueOf(seqClassifierFlags.verboseForTrueCasing);
        this.factory = LineIterator.getFactory(new LineToTrueCasesParser());
    }

    public static boolean known(String str) {
        return knownWords.contains(str.toLowerCase());
    }

    @Override // edu.stanford.nlp.objectbank.IteratorFromReaderFactory
    public Iterator<List<CoreLabel>> getIterator(Reader reader) {
        return this.factory.getIterator(reader);
    }

    @Override // edu.stanford.nlp.sequences.DocumentReaderAndWriter
    public void printAnswers(List<CoreLabel> list, PrintWriter printWriter) {
        ArrayList arrayList = new ArrayList();
        int i = 0;
        for (CoreLabel coreLabel : list) {
            StringBuilder sb = new StringBuilder();
            if (!((String) coreLabel.get(CoreAnnotations.AnswerAnnotation.class)).equals(coreLabel.get(CoreAnnotations.GoldAnswerAnnotation.class))) {
                i++;
            }
            if (!THREE_CLASSES && ((String) coreLabel.get(CoreAnnotations.AnswerAnnotation.class)).equals("UPPER")) {
                sb.append(coreLabel.word().toUpperCase());
            } else if (((String) coreLabel.get(CoreAnnotations.AnswerAnnotation.class)).equals("LOWER")) {
                sb.append(coreLabel.word().toLowerCase());
            } else if (((String) coreLabel.get(CoreAnnotations.AnswerAnnotation.class)).equals("INIT_UPPER")) {
                sb.append(coreLabel.word().substring(0, 1).toUpperCase()).append(coreLabel.word().substring(1));
            } else if (((String) coreLabel.get(CoreAnnotations.AnswerAnnotation.class)).equals("O")) {
                sb.append(coreLabel.word());
                if (alphabet.matcher(coreLabel.word()).matches()) {
                    sb.append("/MIX");
                }
            }
            if (this.verboseForTrueCasing.booleanValue()) {
                sb.append("/GOLD-").append((String) coreLabel.get(CoreAnnotations.GoldAnswerAnnotation.class)).append("/GUESS-").append((String) coreLabel.get(CoreAnnotations.AnswerAnnotation.class));
            }
            arrayList.add(sb.toString());
        }
        printWriter.print(StringUtils.join(arrayList, " "));
        System.err.printf("> wrong = %d ; total = %d\n", Integer.valueOf(i), Integer.valueOf(list.size()));
        printWriter.println();
    }
}
