package org.apache.ctakes.coreference.cc;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.Writer;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Scanner;
import libsvm.svm;
import libsvm.svm_node;
import org.apache.ctakes.constituency.parser.treekernel.TreeExtractor;
import org.apache.ctakes.constituency.parser.util.TreeUtils;
import org.apache.ctakes.core.resource.FileLocator;
import org.apache.ctakes.core.resource.FileResource;
import org.apache.ctakes.core.util.DocumentIDAnnotationUtil;
import org.apache.ctakes.coreference.type.BooleanLabeledFS;
import org.apache.ctakes.coreference.type.DemMarkable;
import org.apache.ctakes.coreference.type.Markable;
import org.apache.ctakes.coreference.type.MarkablePairSet;
import org.apache.ctakes.coreference.type.NEMarkable;
import org.apache.ctakes.coreference.util.CorefConsts;
import org.apache.ctakes.coreference.util.FSIteratorToList;
import org.apache.ctakes.coreference.util.GoldStandardLabeler;
import org.apache.ctakes.coreference.util.MarkableTreeUtils;
import org.apache.ctakes.coreference.util.PairAttributeCalculator;
import org.apache.ctakes.coreference.util.SvmVectorCreator;
import org.apache.ctakes.typesystem.type.syntax.TreebankNode;
import org.apache.ctakes.utils.tree.SimpleTree;
import org.apache.log4j.Logger;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.CASException;
import org.apache.uima.cas.FSIterator;
import org.apache.uima.collection.CasConsumer_ImplBase;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.cas.FSList;
import org.apache.uima.jcas.cas.NonEmptyFSList;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.resource.ResourceProcessException;
import org.apache.uima.util.ProcessTrace;

/* loaded from: input_file:org/apache/ctakes/coreference/cc/ODIEVectorFileWriter.class */
public class ODIEVectorFileWriter extends CasConsumer_ImplBase {
    private static final Integer NGRAM_THRESHOLD = 0;
    private HashSet<String> stopwords;
    private ArrayList<String> treeFrags;
    private boolean printVectors;
    private boolean printTrees;
    private boolean anaphora;
    private Logger log = Logger.getLogger(getClass());
    private String outputDir = null;
    private String goldStandardDir = null;
    private PrintWriter anaphOut = null;
    private PrintWriter neOut = null;
    private PrintWriter pronOut = null;
    private PrintWriter demOut = null;
    private PrintWriter neTreeOut = null;
    private PrintWriter pronTreeOut = null;
    private PrintWriter demTreeOut = null;
    private PrintWriter debug = null;
    private boolean initialized = false;
    private int posNeInst = 0;
    private int negNeInst = 0;
    private int posDemInst = 0;
    private int negDemInst = 0;
    private int posPronInst = 0;
    private int negPronInst = 0;
    private int posAnaphInst = 0;
    private int negAnaphInst = 0;
    private ArrayList<Integer> anaphLabels = new ArrayList<>();
    private ArrayList<svm_node[]> anaphNodes = new ArrayList<>();
    private PairAttributeCalculator attr = null;
    private SvmVectorCreator vecCreator = null;
    private GoldStandardLabeler labeler = null;
    private boolean useFrags = true;

    public void initialize() throws ResourceInitializationException {
        this.outputDir = (String) getConfigParameterValue("outputDir");
        this.goldStandardDir = (String) getConfigParameterValue("goldStandardDir");
        this.printVectors = ((Boolean) getConfigParameterValue("writeVectors")).booleanValue();
        this.printTrees = ((Boolean) getConfigParameterValue("writeTrees")).booleanValue();
        this.anaphora = ((Boolean) getConfigParameterValue("anaphora")).booleanValue();
        try {
            new File(this.outputDir + "/" + CorefConsts.NE + "/vectors/").mkdirs();
            new File(this.outputDir + "/" + CorefConsts.PRON + "/vectors/").mkdirs();
            new File(this.outputDir + "/" + CorefConsts.DEM + "/vectors/").mkdirs();
            if (this.printVectors && this.anaphora) {
                this.anaphOut = new PrintWriter(this.outputDir + "/anaphor.trainingvectors.libsvm");
            }
            if (this.printTrees) {
                this.neTreeOut = new PrintWriter(this.outputDir + "/" + CorefConsts.NE + "/trees.txt");
                this.demTreeOut = new PrintWriter(this.outputDir + "/" + CorefConsts.DEM + "/trees.txt");
                this.pronTreeOut = new PrintWriter(this.outputDir + "/" + CorefConsts.PRON + "/trees.txt");
                this.debug = new PrintWriter((Writer) new PrintWriter(this.outputDir + "/" + CorefConsts.NE + "/fulltrees_debug.txt"), true);
            }
            this.stopwords = new HashSet<>();
            BufferedReader bufferedReader = new BufferedReader(new FileReader(((FileResource) super.getUimaContext().getResourceObject("stopWords")).getFile()));
            while (true) {
                String readLine = bufferedReader.readLine();
                if (readLine == null) {
                    break;
                }
                String trim = readLine.trim();
                if (trim.length() != 0) {
                    int indexOf = trim.indexOf(124);
                    if (indexOf > 0) {
                        this.stopwords.add(trim.substring(0, indexOf).trim());
                    } else if (indexOf < 0) {
                        this.stopwords.add(trim.trim());
                    }
                }
            }
            this.vecCreator = new SvmVectorCreator(this.stopwords, svm.svm_load_model(FileLocator.locateFile("anaphoricity.mayo.rbf.model").getAbsolutePath()));
            Scanner scanner = new Scanner(((FileResource) super.getUimaContext().getResourceObject("treeFrags")).getFile());
            if (this.useFrags) {
                this.treeFrags = new ArrayList<>();
                while (scanner.hasNextLine()) {
                    this.treeFrags.add(scanner.nextLine().split(" ")[1]);
                }
                this.vecCreator.setFrags(this.treeFrags);
            }
            this.initialized = true;
        } catch (Exception e) {
            System.err.println("Error initializing file writers.");
            throw new ResourceInitializationException();
        }
    }

    public void processCas(CAS cas) throws ResourceProcessException {
        if (this.initialized) {
            try {
                JCas jCas = cas.getCurrentView().getJCas();
                String documentID = DocumentIDAnnotationUtil.getDocumentID(jCas);
                String substring = documentID.substring(documentID.lastIndexOf(47) + 1, documentID.length());
                if (substring == null) {
                    substring = "141471681_1";
                }
                System.out.print("creating vectors for " + substring);
                int i = 0;
                this.labeler = new GoldStandardLabeler(this.goldStandardDir, substring, FSIteratorToList.convert(jCas.getAnnotationIndex(Markable.type).iterator()));
                if (this.printVectors) {
                    try {
                        this.neOut = new PrintWriter(this.outputDir + "/" + CorefConsts.NE + "/vectors/" + substring + ".libsvm");
                        this.demOut = new PrintWriter(this.outputDir + "/" + CorefConsts.DEM + "/vectors/" + substring + ".libsvm");
                        this.pronOut = new PrintWriter(this.outputDir + "/" + CorefConsts.PRON + "/vectors/" + substring + ".libsvm");
                    } catch (FileNotFoundException e) {
                        e.printStackTrace();
                    }
                }
                FSIterator allIndexedFS = jCas.getJFSIndexRepository().getAllIndexedFS(MarkablePairSet.type);
                while (allIndexedFS.hasNext()) {
                    MarkablePairSet markablePairSet = (MarkablePairSet) allIndexedFS.next();
                    Markable anaphor = markablePairSet.getAnaphor();
                    String str = anaphor instanceof NEMarkable ? CorefConsts.NE : anaphor instanceof DemMarkable ? CorefConsts.DEM : CorefConsts.PRON;
                    FSList antecedentList = markablePairSet.getAntecedentList();
                    while (antecedentList instanceof NonEmptyFSList) {
                        NonEmptyFSList nonEmptyFSList = (NonEmptyFSList) antecedentList;
                        BooleanLabeledFS booleanLabeledFS = (BooleanLabeledFS) nonEmptyFSList.getHead();
                        int i2 = booleanLabeledFS.getLabel() ? 1 : 0;
                        if (this.anaphora) {
                            if (i2 == 1) {
                                this.posAnaphInst++;
                            } else {
                                this.negAnaphInst++;
                            }
                            this.anaphLabels.add(Integer.valueOf(i2));
                            this.anaphNodes.add(this.vecCreator.createAnaphoricityVector(anaphor, jCas));
                        }
                        Markable feature = booleanLabeledFS.getFeature();
                        int i3 = this.labeler.isGoldPair(anaphor, feature) ? 1 : 0;
                        if (i3 == 1) {
                            i++;
                            if (str.equals(CorefConsts.NE)) {
                                this.posNeInst++;
                            } else if (str.equals(CorefConsts.DEM)) {
                                this.posDemInst++;
                            } else if (str.equals(CorefConsts.PRON)) {
                                this.posPronInst++;
                            }
                        } else if (i3 == 0) {
                            if (str.equals(CorefConsts.NE)) {
                                this.negNeInst++;
                            } else if (str.equals(CorefConsts.DEM)) {
                                this.negDemInst++;
                            } else if (str.equals(CorefConsts.PRON)) {
                                this.negPronInst++;
                            }
                        }
                        if (this.printVectors) {
                            svm_node[] nodeFeatures = this.vecCreator.getNodeFeatures(anaphor, feature, jCas);
                            PrintWriter printWriter = null;
                            if (str.equals(CorefConsts.NE)) {
                                printWriter = this.neOut;
                            } else if (str.equals(CorefConsts.PRON)) {
                                printWriter = this.pronOut;
                            } else if (str.equals(CorefConsts.DEM)) {
                                printWriter = this.demOut;
                            }
                            printWriter.print(i3);
                            for (svm_node svm_nodeVar : nodeFeatures) {
                                printWriter.print(" ");
                                printWriter.print(svm_nodeVar.index);
                                printWriter.print(":");
                                printWriter.print(svm_nodeVar.value);
                            }
                            printWriter.println();
                            printWriter.flush();
                        }
                        if (this.printTrees) {
                            TreebankNode markableNode = MarkableTreeUtils.markableNode(jCas, feature.getBegin(), feature.getEnd());
                            TreebankNode markableNode2 = MarkableTreeUtils.markableNode(jCas, anaphor.getBegin(), anaphor.getEnd());
                            this.debug.println(TreeUtils.tree2str(markableNode));
                            this.debug.println(TreeUtils.tree2str(markableNode2));
                            SimpleTree extractPathTree = TreeExtractor.extractPathTree(markableNode, markableNode2);
                            TreeExtractor.extractPathEnclosedTree(markableNode, markableNode2, jCas);
                            String simpleTree = extractPathTree.toString();
                            PrintWriter printWriter2 = null;
                            if (str.equals(CorefConsts.NE)) {
                                printWriter2 = this.neTreeOut;
                            } else if (str.equals(CorefConsts.PRON)) {
                                printWriter2 = this.pronTreeOut;
                            } else if (str.equals(CorefConsts.DEM)) {
                                printWriter2 = this.demTreeOut;
                            }
                            printWriter2.print(i3 == 1 ? "+1" : "-1");
                            printWriter2.print(" |BT| ");
                            printWriter2.print(simpleTree.replaceAll("\\) \\(", ")("));
                            printWriter2.println(" |ET|");
                        }
                        antecedentList = nonEmptyFSList.getTail();
                        if (i3 == 1) {
                            break;
                        }
                    }
                }
                if (this.printVectors) {
                    this.neOut.close();
                    this.demOut.close();
                    this.pronOut.close();
                }
            } catch (CASException e2) {
                e2.printStackTrace();
                System.err.println("No processing done in ODIEVectoFileWriter!");
            }
        }
    }

    private int getLabel(String str) {
        return Integer.parseInt(str.substring(0, 1));
    }

    public void collectionProcessComplete(ProcessTrace processTrace) throws ResourceProcessException, IOException {
        super.collectionProcessComplete(processTrace);
        if (this.initialized) {
            if (!this.anaphora) {
                if (this.printVectors) {
                    this.neOut.close();
                    this.demOut.close();
                    this.pronOut.close();
                }
                if (this.printTrees) {
                    this.neTreeOut.flush();
                    this.neTreeOut.close();
                    this.demTreeOut.flush();
                    this.demTreeOut.close();
                    this.pronTreeOut.flush();
                    this.pronTreeOut.close();
                    return;
                }
                return;
            }
            double d = this.posAnaphInst / this.negAnaphInst;
            for (int i = 0; i < this.anaphNodes.size(); i++) {
                this.anaphOut.print(this.anaphLabels.get(i).intValue());
                for (svm_node svm_nodeVar : this.anaphNodes.get(i)) {
                    this.anaphOut.print(" ");
                    this.anaphOut.print(svm_nodeVar.index);
                    this.anaphOut.print(":");
                    this.anaphOut.print(svm_nodeVar.value);
                }
                this.anaphOut.println();
            }
            this.anaphOut.flush();
            this.anaphOut.close();
        }
    }

    private double[] listToDoubleArray(ArrayList<Integer> arrayList) {
        double[] dArr = new double[arrayList.size()];
        for (int i = 0; i < arrayList.size(); i++) {
            dArr[i] = arrayList.get(i).intValue();
        }
        return dArr;
    }
}
