package ws.palladian.classification.encode;

import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import ws.palladian.core.AppendedVector;
import ws.palladian.core.FeatureVector;
import ws.palladian.core.InstanceBuilder;
import ws.palladian.core.dataset.AbstractDatasetFeatureVectorTransformer;
import ws.palladian.core.dataset.Dataset;
import ws.palladian.core.dataset.FeatureInformation;
import ws.palladian.core.dataset.FeatureInformationBuilder;
import ws.palladian.core.dataset.statistics.DatasetStatistics;
import ws.palladian.core.dataset.statistics.NominalValueStatistics;
import ws.palladian.core.value.ImmutableIntegerValue;
import ws.palladian.core.value.NominalValue;
import ws.palladian.core.value.NullValue;
import ws.palladian.core.value.Value;
import ws.palladian.helper.StopWatch;
import ws.palladian.helper.collection.CollectionHelper;
import ws.palladian.helper.functional.Predicates;

/* loaded from: input_file:ws/palladian/classification/encode/LabelEncoder.class */
public class LabelEncoder extends AbstractDatasetFeatureVectorTransformer {
    private static final Logger LOGGER = LoggerFactory.getLogger(LabelEncoder.class);
    private final Map<String, Map<String, Integer>> mappings;

    public LabelEncoder(Dataset dataset) {
        LOGGER.info("Start initializing LabelEncoder");
        StopWatch stopWatch = new StopWatch();
        Set<String> featureNamesOfType = dataset.getFeatureInformation().getFeatureNamesOfType(NominalValue.class);
        DatasetStatistics datasetStatistics = new DatasetStatistics(dataset.filterFeatures(Predicates.equal(featureNamesOfType)));
        HashMap hashMap = new HashMap();
        for (String str : featureNamesOfType) {
            ArrayList arrayList = new ArrayList(((NominalValueStatistics) datasetStatistics.getValueStatistics(str)).getValues());
            Collections.sort(arrayList);
            Map createIndexMap = CollectionHelper.createIndexMap(arrayList);
            LOGGER.debug("# unique values for {}: {}", str, Integer.valueOf(arrayList.size()));
            hashMap.put(str, createIndexMap);
        }
        LOGGER.info("Initialized LabelEncoder in {}", stopWatch);
        this.mappings = hashMap;
    }

    @Override // ws.palladian.core.dataset.AbstractDatasetFeatureVectorTransformer, ws.palladian.core.dataset.DatasetTransformer
    public FeatureInformation getFeatureInformation(FeatureInformation featureInformation) {
        FeatureInformationBuilder featureInformationBuilder = new FeatureInformationBuilder();
        featureInformationBuilder.add(featureInformation);
        Iterator<String> it = this.mappings.keySet().iterator();
        while (it.hasNext()) {
            featureInformationBuilder.set(it.next() + "_labelEncoded", ImmutableIntegerValue.class);
        }
        return featureInformationBuilder.m85create();
    }

    @Override // ws.palladian.core.dataset.AbstractDatasetFeatureVectorTransformer
    public FeatureVector apply(FeatureVector featureVector) {
        Integer num;
        InstanceBuilder instanceBuilder = new InstanceBuilder();
        for (Map.Entry<String, Map<String, Integer>> entry : this.mappings.entrySet()) {
            String key = entry.getKey();
            Value value = (Value) featureVector.get(key);
            Value value2 = NullValue.NULL;
            if (!value.isNull() && (num = entry.getValue().get(((NominalValue) value).getString())) != null) {
                value2 = ImmutableIntegerValue.valueOf(num.intValue());
            }
            instanceBuilder.set(key + "_labelEncoded", value2);
        }
        return new AppendedVector(featureVector, instanceBuilder.create());
    }
}
