package uk.ac.shef.dcs.jate.util;

import java.io.FileInputStream;
import java.io.IOException;
import java.nio.file.Paths;
import org.junit.Assert;
import org.junit.Test;
import uk.ac.shef.dcs.jate.JATEException;
import uk.ac.shef.dcs.jate.model.JATEDocument;

/* loaded from: input_file:uk/ac/shef/dcs/jate/util/JATEUtilTest.class */
public class JATEUtilTest {
    static String workingDir;
    static final /* synthetic */ boolean $assertionsDisabled;

    @Test
    public void cleanTextTest() {
        Assert.assertTrue("Previous Efforts, CHAT - 8 0 , PRAT - 8 9 and HSQL Trondheim is a small city with a university and 140000 inhabitants.".equals(JATEUtil.cleanText("P r e v i o u s Efforts, C H A T - 8 0 , P R A T - 8 9 and HSQL Trondheim is a small city with a university and 140000 inhabitants.")));
    }

    @Test
    public void loadDocumentText() throws JATEException, IOException {
        FileInputStream fileInputStream = new FileInputStream(Paths.get(workingDir, "src", "test", "resource", "eval", "ACL_RD-TEC", "corpus", "util_test", "A00-1001_cln.xml").toFile());
        try {
            JATEDocument loadACLRDTECDocument = JATEUtil.loadACLRDTECDocument(fileInputStream);
            if (!$assertionsDisabled && !loadACLRDTECDocument.getId().equals("A00-1001")) {
                throw new AssertionError();
            }
            if (!$assertionsDisabled && loadACLRDTECDocument.getContent() == null) {
                throw new AssertionError();
            }
            if (!$assertionsDisabled && loadACLRDTECDocument.getContent().length() <= 200) {
                throw new AssertionError();
            }
        } finally {
            fileInputStream.close();
        }
    }

    @Test
    public void loadDocumentText2() throws JATEException, IOException {
        FileInputStream fileInputStream = new FileInputStream(Paths.get(workingDir, "src", "test", "resource", "eval", "ACL_RD-TEC", "corpus", "util_test", "E06-2023_cln.xml").toFile());
        try {
            JATEDocument loadACLRDTECDocument = JATEUtil.loadACLRDTECDocument(fileInputStream);
            if (!$assertionsDisabled && !loadACLRDTECDocument.getId().equals("E06-2023")) {
                throw new AssertionError();
            }
            if (!$assertionsDisabled && loadACLRDTECDocument.getContent() == null) {
                throw new AssertionError();
            }
            if (!$assertionsDisabled && !loadACLRDTECDocument.getContent().contains("According to linguistic theory, morphemes are considered to be the smallest meaning-bearing elements of a language. However, no adequate language-independent definition of the word as a unit has been agreed upon. If effective methods can be devised for the unsupervised discovery of morphemes, they could aid the formulation of a linguistic theory of morphology for a new language. The utilization of morphemes as basic representational units in a statistical language model instead of words seems a promising course [Creutz, 2004].")) {
                throw new AssertionError();
            }
            if (!$assertionsDisabled && !loadACLRDTECDocument.getContent().contains("Many natural language processing tasks, including parsing, semantic modeling, information retrieval, and machine translation, frequently require a morphological analysis of the language at hand. The task of a morphological analyzer is to identify the lexeme, citation form, or inflection class of surface word forms in a language. It seems that even approximate automated morphological analysis would be beneficial for many NL applications dealing with large vocabularies (e.g. text retrieval applications).")) {
                throw new AssertionError();
            }
            if (!$assertionsDisabled && !loadACLRDTECDocument.getContent().contains("[Monson 2004] presents a framework for unsupervised induction of natural language morphology, wherein candidate suffixes are grouped into candidate inflection classes, which are then placed in a lattice structure. With similar arranged inflection classes placed near one candidate in the lattice, it proposes this structure to be an ideal search space in which to isolate the true inflection classes of a language. [Schone and Jurafsky 2000] presents an unsupervised model in which knowledge-free distributional cues are combined orthography-based with information automatically extracted from semantic word co-occurrence patterns in the input corpus.")) {
                throw new AssertionError();
            }
            if (!$assertionsDisabled && !loadACLRDTECDocument.getContent().contains("For example we can derive more than 200 words from the stem of the verb \"raftan\" (to go).")) {
                throw new AssertionError();
            }
        } finally {
            fileInputStream.close();
        }
    }

    @Test
    public void testLoadACLRDTECDocument() throws JATEException, IOException {
        FileInputStream fileInputStream = new FileInputStream(Paths.get(workingDir, "src", "test", "resource", "eval", "ACL_RD-TEC", "corpus", "util_test", "E06-2023_cln.xml").toFile());
        try {
            JATEDocument loadACLRDTECDocument = JATEUtil.loadACLRDTECDocument(fileInputStream);
            if (!$assertionsDisabled && loadACLRDTECDocument == null) {
                throw new AssertionError();
            }
            if (!$assertionsDisabled && loadACLRDTECDocument.getId() == null) {
                throw new AssertionError();
            }
            if (!$assertionsDisabled && !loadACLRDTECDocument.getId().equals("E06-2023")) {
                throw new AssertionError();
            }
            if (!$assertionsDisabled && loadACLRDTECDocument.getContent() == null) {
                throw new AssertionError();
            }
            Assert.assertEquals(-1L, loadACLRDTECDocument.getContent().indexOf("generation.Introduction"));
            Assert.assertEquals(-1L, loadACLRDTECDocument.getContent().indexOf("5.0.Fast"));
            Assert.assertEquals(-1L, loadACLRDTECDocument.getContent().indexOf("Fast decoding and optimal"));
            Assert.assertEquals(-1L, loadACLRDTECDocument.getContent().indexOf("generation.Statistical"));
            fileInputStream.close();
        } catch (Throwable th) {
            fileInputStream.close();
            throw th;
        }
    }

    @Test
    public void loadDocumentText3() throws IOException, JATEException {
        FileInputStream fileInputStream = new FileInputStream(Paths.get(workingDir, "src", "test", "resource", "eval", "ACL_RD-TEC", "corpus", "util_test", "P06-1139_cln.xml").toFile());
        try {
            JATEDocument loadACLRDTECDocument = JATEUtil.loadACLRDTECDocument(fileInputStream);
            if (!$assertionsDisabled && !loadACLRDTECDocument.getId().equals("P06-1139")) {
                throw new AssertionError();
            }
            if (!$assertionsDisabled && loadACLRDTECDocument.getContent() == null) {
                throw new AssertionError();
            }
            if (!$assertionsDisabled && !loadACLRDTECDocument.getContent().contains("each contiguous span a18a92a75a37a10a9 a21 over a Chinese string a11a22a52a13a12a61 is considered a possible \"constituent\", and the \"non-terminals\" associated with each constituent are the English phrase translations a61 a69 a52a13a12a61 that correspond in the translation table to the Chinese string a11a56a52a13a12a61 .")) {
                throw new AssertionError();
            }
        } finally {
            fileInputStream.close();
        }
    }

    static {
        $assertionsDisabled = !JATEUtilTest.class.desiredAssertionStatus();
        workingDir = System.getProperty("user.dir");
    }
}
