package ws.palladian.classification.featureselection;

import java.util.HashMap;
import java.util.Iterator;
import java.util.Set;
import org.apache.commons.lang3.Validate;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import ws.palladian.classification.discretization.Discretization;
import ws.palladian.classification.text.CountingCategoryEntriesBuilder;
import ws.palladian.core.CategoryEntries;
import ws.palladian.core.CategoryEntriesBuilder;
import ws.palladian.core.Instance;
import ws.palladian.core.dataset.Dataset;
import ws.palladian.core.dataset.statistics.DatasetStatistics;
import ws.palladian.core.value.Value;
import ws.palladian.helper.ProgressReporter;

/* loaded from: input_file:ws/palladian/classification/featureselection/InformationGainFeatureRanker.class */
public final class InformationGainFeatureRanker extends AbstractFeatureRanker {
    private static final Logger LOGGER = LoggerFactory.getLogger(InformationGainFeatureRanker.class);

    @Override // ws.palladian.classification.featureselection.AbstractFeatureRanker, ws.palladian.classification.featureselection.FeatureRanker
    public FeatureRanking rankFeatures(Dataset dataset, ProgressReporter progressReporter) {
        Validate.notNull(dataset, "dataset must not be null", new Object[0]);
        HashMap hashMap = new HashMap();
        progressReporter.startTask("Information Gain", -1L);
        LOGGER.debug("Calculating discretization");
        Dataset transform = dataset.transform(new Discretization(dataset, progressReporter.createSubProgress(0.5d)));
        double entropy = new CategoryEntriesBuilder(new DatasetStatistics(dataset).getCategoryStatistics().getMap()).m64create().getEntropy();
        Set<String> featureNames = dataset.getFeatureInformation().getFeatureNames();
        ProgressReporter createSubProgress = progressReporter.createSubProgress(0.5d);
        LOGGER.debug("Calculating gain");
        createSubProgress.startTask("Calculating gain", featureNames.size());
        for (String str : featureNames) {
            hashMap.put(str, Double.valueOf(entropy - conditionalEntropy(str, transform)));
            createSubProgress.increment();
        }
        createSubProgress.finishTask();
        return new FeatureRanking(hashMap);
    }

    private static double conditionalEntropy(String str, Iterable<? extends Instance> iterable) {
        return countJointOccurrences(iterable, str).getEntropy() - countFeatureOccurrences(iterable, str).getEntropy();
    }

    private static CategoryEntries countJointOccurrences(Iterable<? extends Instance> iterable, String str) {
        CountingCategoryEntriesBuilder countingCategoryEntriesBuilder = new CountingCategoryEntriesBuilder();
        for (Instance instance : iterable) {
            countingCategoryEntriesBuilder.add(instance.getCategory() + "###" + ((Value) instance.getVector().get(str)).toString(), 1);
        }
        return countingCategoryEntriesBuilder.m28create();
    }

    private static CategoryEntries countFeatureOccurrences(Iterable<? extends Instance> iterable, String str) {
        CountingCategoryEntriesBuilder countingCategoryEntriesBuilder = new CountingCategoryEntriesBuilder();
        Iterator<? extends Instance> it = iterable.iterator();
        while (it.hasNext()) {
            countingCategoryEntriesBuilder.add(((Value) it.next().getVector().get(str)).toString(), 1);
        }
        return countingCategoryEntriesBuilder.m28create();
    }
}
