package liner2.chunker.factory;

import java.io.File;
import java.io.IOException;
import java.util.HashSet;
import java.util.regex.Matcher;
import liner2.LinerOptions;
import liner2.Main;
import liner2.chunker.Chunker;
import liner2.chunker.CrfppChunker;
import liner2.features.TokenFeatureGenerator;
import liner2.reader.AbstractDocumentReader;
import liner2.reader.ReaderFactory;
import liner2.structure.Document;
import liner2.tools.Template;
import liner2.tools.TemplateFactory;
import org.ini4j.Ini;
import org.ini4j.Profile;
import weka.core.xml.XMLInstances;

/* loaded from: input_file:liner2/chunker/factory/ChunkerFactoryItemCrfpp.class */
public class ChunkerFactoryItemCrfpp extends ChunkerFactoryItem {
    public ChunkerFactoryItemCrfpp() {
        super("crfpp:([^:]*)");
    }

    @Override // liner2.chunker.factory.ChunkerFactoryItem
    public Chunker getChunker(String str, ChunkerManager chunkerManager) throws Exception {
        if (!chunkerManager.opts.libCRFPPLoaded) {
            try {
                System.load(Main.class.getProtectionDomain().getCodeSource().getLocation().getPath().replace("liner2.jar", "") + "lib/libCRFPP.so");
            } catch (UnsatisfiedLinkError e) {
                System.err.println("Cannot load the libCRFPP.so native code.\nIf you are using liner as an imported jar specify correct path as CRFlib parameter in config.\n" + e);
                System.exit(1);
            }
        }
        Matcher matcher = this.pattern.matcher(str);
        if (!matcher.find()) {
            return null;
        }
        Ini ini = new Ini(new File(matcher.group(1)));
        String str2 = ini.get("main", "mode");
        if (str2.equals("train")) {
            return train(ini, chunkerManager);
        }
        if (str2.equals("load")) {
            return load(ini);
        }
        throw new Exception("Unrecognized mode for CRFPP chunker: " + str2 + "(Valid: train/load)");
    }

    private Chunker load(Ini ini) throws IOException {
        String replace = ((String) ((Profile.Section) ini.get("main")).get("store")).replace("{INI_PATH}", ini.getFile().getParent());
        Main.log("--> CRFPP Chunker deserialize from " + replace);
        CrfppChunker crfppChunker = new CrfppChunker();
        crfppChunker.deserialize(replace);
        return crfppChunker;
    }

    private Chunker train(Ini ini, ChunkerManager chunkerManager) throws Exception {
        Main.log("--> CRFPP Chunker train");
        String parent = ini.getFile().getParent();
        Profile.Section section = (Profile.Section) ini.get("main");
        Profile.Section section2 = (Profile.Section) ini.get("data");
        int parseInt = Integer.parseInt((String) section.get("threads"));
        String str = (String) section2.get(XMLInstances.ATT_FORMAT);
        String replace = ((String) section2.get("source")).replace("{INI_PATH}", parent);
        String replace2 = ((String) section.get("store")).replace("{INI_PATH}", parent);
        HashSet hashSet = new HashSet();
        if (section2.containsKey(LinerOptions.OPTION_TYPES) && ((String) section2.get(LinerOptions.OPTION_TYPES)).length() > 0) {
            for (String str2 : ((String) section2.get(LinerOptions.OPTION_TYPES)).split(",")) {
                hashSet.add(str2);
            }
        }
        Main.log("--> Training on file=" + replace);
        AbstractDocumentReader streamReader = ReaderFactory.get().getStreamReader(replace, str);
        TokenFeatureGenerator tokenFeatureGenerator = new TokenFeatureGenerator(chunkerManager.opts.features);
        Template template = chunkerManager.opts.getTemplate((String) section.get(LinerOptions.OPTION_TEMPLATE));
        File createTempFile = File.createTempFile(LinerOptions.OPTION_TEMPLATE, ".tpl");
        CrfppChunker crfppChunker = new CrfppChunker(parseInt, hashSet);
        crfppChunker.setTemplateFilename(createTempFile.getAbsolutePath());
        crfppChunker.setModelFilename(replace2);
        Document nextDocument = streamReader.nextDocument();
        while (true) {
            Document document = nextDocument;
            if (document == null) {
                TemplateFactory.store(template, createTempFile.getAbsolutePath(), tokenFeatureGenerator.getAttributeIndex());
                crfppChunker.train();
                return crfppChunker;
            }
            tokenFeatureGenerator.generateFeatures(document);
            crfppChunker.addTrainingData(document);
            nextDocument = streamReader.nextDocument();
        }
    }
}
