package ws.palladian.extraction.date.rater;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.ObjectInputStream;
import java.util.ArrayList;
import java.util.List;
import java.util.zip.GZIPInputStream;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import ws.palladian.classification.dt.QuickDtClassifier;
import ws.palladian.classification.dt.QuickDtLearner;
import ws.palladian.classification.dt.QuickDtModel;
import ws.palladian.classification.utils.CsvDatasetReaderConfig;
import ws.palladian.core.CategoryEntries;
import ws.palladian.core.FeatureVector;
import ws.palladian.core.dataset.Dataset;
import ws.palladian.extraction.date.PageDateType;
import ws.palladian.extraction.date.dates.ContentDate;
import ws.palladian.extraction.date.dates.RatedDate;
import ws.palladian.helper.Cache;
import ws.palladian.helper.io.FileHelper;

/* loaded from: input_file:ws/palladian/extraction/date/rater/ContentDateRater.class */
public class ContentDateRater extends TechniqueDateRater<ContentDate> {
    private static final Logger LOGGER = LoggerFactory.getLogger(ContentDateRater.class);
    private static final String CLASSIFIER_MODEL_PUB = "/dates_pub_model.gz";
    private static final String CLASSIFIER_MODEL_MOD = "/dates_mod_model.gz";
    private final QuickDtModel model;
    private final QuickDtClassifier predictor;

    public ContentDateRater(PageDateType pageDateType) {
        super(pageDateType);
        this.predictor = new QuickDtClassifier();
        if (pageDateType == PageDateType.PUBLISH) {
            this.model = loadModel(CLASSIFIER_MODEL_PUB);
        } else {
            this.model = loadModel(CLASSIFIER_MODEL_MOD);
        }
    }

    private QuickDtModel loadModel(String str) {
        QuickDtModel quickDtModel = (QuickDtModel) Cache.getInstance().getDataObject(str);
        if (quickDtModel == null) {
            InputStream resourceAsStream = getClass().getResourceAsStream(CLASSIFIER_MODEL_PUB);
            if (resourceAsStream == null) {
                throw new IllegalStateException("Could not load model file \"" + str + "\"");
            }
            try {
                quickDtModel = (QuickDtModel) new ObjectInputStream(new GZIPInputStream(resourceAsStream)).readObject();
                Cache.getInstance().putDataObject(str, quickDtModel);
            } catch (IOException | ClassNotFoundException e) {
                throw new IllegalStateException("Error loading the model file \"" + str + "\": " + e.getMessage(), e);
            }
        }
        return quickDtModel;
    }

    @Override // ws.palladian.extraction.date.rater.TechniqueDateRater
    public List<RatedDate<ContentDate>> rate(List<ContentDate> list) {
        ArrayList arrayList = new ArrayList();
        for (ContentDate contentDate : list) {
            if (this.dateType.equals(PageDateType.PUBLISH) && contentDate.isInUrl()) {
                arrayList.add(RatedDate.create(contentDate, 1.0d));
            } else {
                FeatureVector createFeatureVector = DateInstanceFactory.createFeatureVector(contentDate);
                try {
                    CategoryEntries classify = this.predictor.classify(createFeatureVector, this.model);
                    arrayList.add(RatedDate.create(contentDate, classify.getProbability(classify.getMostLikelyCategory())));
                } catch (Exception e) {
                    LOGGER.error("Exception " + contentDate.getDateString() + " " + createFeatureVector, e);
                }
            }
        }
        return arrayList;
    }

    private static void buildModel(String str, String str2) throws IOException {
        FileHelper.serialize(QuickDtLearner.randomForest(10).train((Dataset) CsvDatasetReaderConfig.filePath(new File(str)).readHeader(true).m62create()), str2);
    }

    public static void main(String[] strArr) throws IOException {
        buildModel("D:\\Dates_Pub_Mod\\dates_mod.csv", "src/main/resources/dates_mod_model.gz");
        buildModel("D:\\Dates_Pub_Mod\\dates_pub.csv", "src/main/resources/dates_pub_model.gz");
    }
}
