package pl.edu.icm.synat.content.categorization.lingpipe.app;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Enumeration;
import java.util.List;
import java.util.Properties;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.NotImplementedException;
import org.apache.commons.lang.Validate;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.config.PropertyPlaceholderConfigurer;
import org.springframework.context.support.ClassPathXmlApplicationContext;
import pl.edu.icm.synat.content.categorization.CategorizationModule;
import pl.edu.icm.synat.content.categorization.exception.CategorizationException;
import pl.edu.icm.synat.content.categorization.model.CorpusEntry;
import pl.edu.icm.synat.content.categorization.model.PublicationMetadataDocument;

/* loaded from: input_file:pl/edu/icm/synat/content/categorization/lingpipe/app/CategorizationApp.class */
public class CategorizationApp {
    private static Logger logger = LoggerFactory.getLogger(CategorizationApp.class);
    private static final String CONTEXT_PATH = "categorization-app-context.xml";
    private static final String SPLIT_REGEX = "_";
    private static final String CHARACTER_ENCODING = "UTF-8";
    private static final String ZIP = ".zip";
    private static final int CATEGORY = 0;
    private static final int ID = 2;
    private static final int LANGUAGE = 1;
    private ClassPathXmlApplicationContext applicationContext = new ClassPathXmlApplicationContext(CONTEXT_PATH);

    public CategorizationApp(String str) {
        Properties properties = new Properties();
        properties.put("categorization.storage.dir", str);
        PropertyPlaceholderConfigurer propertyPlaceholderConfigurer = new PropertyPlaceholderConfigurer();
        propertyPlaceholderConfigurer.setProperties(properties);
        this.applicationContext.addBeanFactoryPostProcessor(propertyPlaceholderConfigurer);
        this.applicationContext.refresh();
    }

    protected void finalize() {
        this.applicationContext.close();
    }

    public static void main(String... strArr) throws Exception {
        if (strArr.length == 0) {
            throw new Exception("Not enough arguments");
        }
        String str = strArr.length >= ID ? strArr[LANGUAGE] : "TfIdfClassifierFactoryStorageDir";
        CategorizationApp categorizationApp = new CategorizationApp(str);
        Validate.notNull(categorizationApp);
        List<CorpusEntry<PublicationMetadataDocument>> returnDataForTraining = categorizationApp.returnDataForTraining(strArr[CATEGORY]);
        if (returnDataForTraining == null || returnDataForTraining.isEmpty()) {
            throw new Exception("No training documents found");
        }
        logger.info("Training the corpus using " + returnDataForTraining.size() + " files. Result will be stored in '" + str + "'.");
        categorizationApp.trainModule(returnDataForTraining);
        logger.info("Logging has been finished");
    }

    public List<CorpusEntry<PublicationMetadataDocument>> returnDataForTraining(String str) {
        if (CATEGORY == str) {
            return null;
        }
        File file = new File(str);
        if (str.endsWith(ZIP)) {
            return extractFilesAndCreateCorpusEntryListWithPublicationMetadataDocument(file);
        }
        return null;
    }

    private void trainModule(List<CorpusEntry<PublicationMetadataDocument>> list) {
        ((CategorizationModule) this.applicationContext.getBean(CategorizationModule.class)).trainModule(list);
    }

    private List<CorpusEntry<PublicationMetadataDocument>> extractFilesAndCreateCorpusEntryListWithPublicationMetadataDocument(File file) {
        ArrayList arrayList = new ArrayList();
        try {
            ZipFile zipFile = new ZipFile(file);
            Enumeration<? extends ZipEntry> entries = zipFile.entries();
            while (entries.hasMoreElements()) {
                try {
                    ZipEntry nextElement = entries.nextElement();
                    Validate.notNull(nextElement);
                    Validate.isTrue(!nextElement.isDirectory());
                    nextElement.getName();
                    InputStream inputStream = zipFile.getInputStream(nextElement);
                    Validate.notNull(inputStream);
                    String iOUtils = IOUtils.toString(inputStream, CHARACTER_ENCODING);
                    Validate.notNull(iOUtils);
                    CorpusEntry<PublicationMetadataDocument> createCorpusEntryDocument = createCorpusEntryDocument(nextElement.getName(), iOUtils);
                    Validate.notNull(createCorpusEntryDocument);
                    arrayList.add(createCorpusEntryDocument);
                    inputStream.close();
                } catch (IllegalArgumentException e) {
                    zipFile.close();
                    throw new NotImplementedException(e);
                }
            }
            zipFile.close();
            return arrayList;
        } catch (Exception e2) {
            throw new CategorizationException("An exception occurred while preparing data.", e2);
        }
    }

    protected CorpusEntry<PublicationMetadataDocument> createCorpusEntryDocument(String str, String str2) throws IOException {
        String[] split = str.split(SPLIT_REGEX);
        String str3 = split[CATEGORY];
        Validate.notNull(str3);
        String str4 = split[ID];
        Validate.notNull(str4);
        PublicationMetadataDocument publicationMetadataDocument = new PublicationMetadataDocument(str4, split[LANGUAGE].isEmpty() ? null : split[LANGUAGE], str2);
        Validate.notNull(publicationMetadataDocument);
        return new CorpusEntry<>(publicationMetadataDocument, str3);
    }
}
