package ws.palladian.core.dataset;

import java.io.File;
import java.io.IOException;
import java.util.Collection;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.Queue;
import org.apache.commons.lang3.Validate;
import ws.palladian.classification.utils.CsvDatasetReaderConfig;
import ws.palladian.core.FeatureVector;
import ws.palladian.core.Instance;
import ws.palladian.core.InstanceBuilder;
import ws.palladian.core.dataset.csv.CsvDatasetWriter;
import ws.palladian.core.value.Value;
import ws.palladian.extraction.feature.FeatureRenamer;
import ws.palladian.helper.ProgressMonitor;
import ws.palladian.helper.ProgressReporter;
import ws.palladian.helper.collection.DefaultMultiMap;
import ws.palladian.helper.collection.MultiMap;
import ws.palladian.helper.io.CloseableIterator;

/* loaded from: input_file:ws/palladian/core/dataset/DatasetJoiner.class */
public final class DatasetJoiner {
    public static Dataset join(final Dataset dataset, final Dataset dataset2, final String str) {
        Validate.notNull(dataset, "d1 must not be null", new Object[0]);
        Validate.notNull(dataset2, "d2 must not be null", new Object[0]);
        Validate.notEmpty(str, "joinColumn must not be null", new Object[0]);
        ProgressMonitor progressMonitor = new ProgressMonitor();
        progressMonitor.startTask("Joining datasets", -1L);
        ProgressReporter createSubProgress = progressMonitor.createSubProgress(0.5d);
        createSubProgress.startTask("Reading d1", dataset.size());
        final MultiMap createWithList = DefaultMultiMap.createWithList();
        Iterator<Instance> iterator2 = dataset.iterator2();
        while (iterator2.hasNext()) {
            Instance next = iterator2.next();
            createWithList.add((Value) next.getVector().get(str), next.getVector());
            createSubProgress.increment();
        }
        progressMonitor.createSubProgress(0.5d).startTask("Reading d2", dataset2.size());
        return new AbstractDataset() { // from class: ws.palladian.core.dataset.DatasetJoiner.1
            @Override // ws.palladian.core.dataset.Dataset
            public long size() {
                return -1L;
            }

            @Override // ws.palladian.core.dataset.Dataset, java.lang.Iterable
            /* renamed from: iterator */
            public Iterator<Instance> iterator2() {
                return new CloseableIterator<Instance>() { // from class: ws.palladian.core.dataset.DatasetJoiner.1.1
                    CloseableIterator<Instance> i2;
                    Queue<FeatureVector> vectorsToJoin = new LinkedList();
                    Instance instance2;

                    {
                        this.i2 = Dataset.this.iterator2();
                    }

                    public boolean hasNext() {
                        return this.vectorsToJoin.size() > 0 || this.i2.hasNext();
                    }

                    /* renamed from: next, reason: merged with bridge method [inline-methods] */
                    public Instance m78next() {
                        FeatureVector poll;
                        FeatureVector poll2 = this.vectorsToJoin.poll();
                        if (poll2 != null) {
                            return DatasetJoiner.join(poll2, this.instance2);
                        }
                        do {
                            this.instance2 = (Instance) this.i2.next();
                            this.vectorsToJoin.addAll((Collection) createWithList.get(this.instance2.getVector().get(str)));
                            poll = this.vectorsToJoin.poll();
                        } while (poll == null);
                        return DatasetJoiner.join(poll, this.instance2);
                    }

                    public void close() throws IOException {
                        this.i2.close();
                    }
                };
            }

            @Override // ws.palladian.core.dataset.Dataset
            public FeatureInformation getFeatureInformation() {
                FeatureInformationBuilder featureInformationBuilder = new FeatureInformationBuilder();
                featureInformationBuilder.add(dataset.getFeatureInformation());
                featureInformationBuilder.add(Dataset.this.getFeatureInformation());
                return featureInformationBuilder.m80create();
            }
        };
    }

    /* JADX INFO: Access modifiers changed from: private */
    public static Instance join(FeatureVector featureVector, Instance instance) {
        InstanceBuilder instanceBuilder = new InstanceBuilder();
        instanceBuilder.add(featureVector);
        instanceBuilder.add(instance.getVector());
        return instanceBuilder.create(instance.getCategory());
    }

    private DatasetJoiner() {
    }

    public static void main(String[] strArr) throws IOException {
        FeatureRenamer featureRenamer = new FeatureRenamer("^((?!people|activity|outcome).+)$", "activity_$1");
        CsvDatasetReaderConfig.Builder filePath = CsvDatasetReaderConfig.filePath(new File("/Users/pk/Desktop/kaggle-red-hat-business-value/act_train.csv"));
        filePath.setFieldSeparator(',');
        filePath.readHeader(true);
        filePath.readClassFromLastColumn(true);
        filePath.treatAsNullValue(Instance.NO_CATEGORY_DUMMY);
        filePath.m56create().transform(featureRenamer);
        CsvDatasetReaderConfig.Builder filePath2 = CsvDatasetReaderConfig.filePath(new File("/Users/pk/Desktop/kaggle-red-hat-business-value/act_test.csv"));
        filePath2.setFieldSeparator(',');
        filePath2.readHeader(true);
        filePath2.readClassFromLastColumn(false);
        filePath2.treatAsNullValue(Instance.NO_CATEGORY_DUMMY);
        Dataset transform = filePath2.m56create().transform(featureRenamer);
        CsvDatasetReaderConfig.Builder filePath3 = CsvDatasetReaderConfig.filePath(new File("/Users/pk/Desktop/kaggle-red-hat-business-value/people.csv"));
        filePath3.setFieldSeparator(',');
        filePath3.readHeader(true);
        filePath3.readClassFromLastColumn(false);
        filePath3.treatAsNullValue(Instance.NO_CATEGORY_DUMMY);
        new CsvDatasetWriter(new File("/Users/pk/Desktop/kaggle-red-hat-business-value/act_test_people_joined.csv")).write(join(filePath3.m56create().transform(new FeatureRenamer("^((?!people).+)$", "people_$1")), transform, "people_id"));
    }
}
