package ws.palladian.classification.encode;

import java.util.Iterator;
import java.util.Set;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import ws.palladian.core.AppendedVector;
import ws.palladian.core.FeatureVector;
import ws.palladian.core.InstanceBuilder;
import ws.palladian.core.dataset.AbstractDatasetFeatureVectorTransformer;
import ws.palladian.core.dataset.Dataset;
import ws.palladian.core.dataset.FeatureInformation;
import ws.palladian.core.dataset.FeatureInformationBuilder;
import ws.palladian.core.dataset.statistics.DatasetStatistics;
import ws.palladian.core.dataset.statistics.NominalValueStatistics;
import ws.palladian.core.value.ImmutableFloatValue;
import ws.palladian.core.value.NominalValue;
import ws.palladian.core.value.Value;
import ws.palladian.helper.functional.Predicates;

/* loaded from: input_file:ws/palladian/classification/encode/FrequencyEncoder.class */
public class FrequencyEncoder extends AbstractDatasetFeatureVectorTransformer {
    private static final Logger LOGGER = LoggerFactory.getLogger(FrequencyEncoder.class);
    private static final String FEATURE_SUFFIX = "_frequency";
    private final DatasetStatistics statistics;
    private final Set<String> nominalValueNames;
    private final long totalCount;
    private final NullValueStrategy nullValueStrategy;

    /* loaded from: input_file:ws/palladian/classification/encode/FrequencyEncoder$NullValueStrategy.class */
    public enum NullValueStrategy {
        KEEP_NULL,
        ASSIGN_FREQUENCY
    }

    public FrequencyEncoder(Dataset dataset) {
        this(dataset, NullValueStrategy.KEEP_NULL);
    }

    public FrequencyEncoder(Dataset dataset, NullValueStrategy nullValueStrategy) {
        LOGGER.info("Start initializing FrequencyEncoder");
        this.nominalValueNames = dataset.getFeatureInformation().getFeatureNamesOfType(NominalValue.class);
        this.statistics = new DatasetStatistics(dataset.filterFeatures(Predicates.equal(this.nominalValueNames)));
        this.totalCount = dataset.size();
        this.nullValueStrategy = nullValueStrategy;
    }

    @Override // ws.palladian.core.dataset.AbstractDatasetFeatureVectorTransformer, ws.palladian.core.dataset.DatasetTransformer
    public FeatureInformation getFeatureInformation(FeatureInformation featureInformation) {
        FeatureInformationBuilder featureInformationBuilder = new FeatureInformationBuilder();
        featureInformationBuilder.add(featureInformation);
        Iterator<String> it = this.nominalValueNames.iterator();
        while (it.hasNext()) {
            featureInformationBuilder.set(it.next() + FEATURE_SUFFIX, ImmutableFloatValue.class);
        }
        return featureInformationBuilder.m85create();
    }

    @Override // ws.palladian.core.dataset.AbstractDatasetFeatureVectorTransformer
    public FeatureVector apply(FeatureVector featureVector) {
        InstanceBuilder instanceBuilder = new InstanceBuilder();
        for (String str : this.nominalValueNames) {
            Value value = (Value) featureVector.get(str);
            NominalValueStatistics nominalValueStatistics = (NominalValueStatistics) this.statistics.getValueStatistics(str);
            if (!value.isNull()) {
                instanceBuilder.set(str + FEATURE_SUFFIX, nominalValueStatistics.getCount(((NominalValue) value).getString()) / ((float) this.totalCount));
            } else if (this.nullValueStrategy == NullValueStrategy.ASSIGN_FREQUENCY) {
                instanceBuilder.set(str + FEATURE_SUFFIX, nominalValueStatistics.getNumNullValues() / ((float) this.totalCount));
            } else if (this.nullValueStrategy == NullValueStrategy.KEEP_NULL) {
                instanceBuilder.setNull(str + FEATURE_SUFFIX);
            }
        }
        return new AppendedVector(featureVector, instanceBuilder.create());
    }
}
