package ws.palladian.kaggle.restaurants.dataset;

import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Objects;
import java.util.function.Predicate;
import java.util.stream.Stream;
import ws.palladian.core.Instance;
import ws.palladian.core.InstanceBuilder;
import ws.palladian.core.dataset.AbstractDataset;
import ws.palladian.core.dataset.Dataset;
import ws.palladian.core.dataset.FeatureInformation;
import ws.palladian.core.dataset.FeatureInformationBuilder;
import ws.palladian.core.value.ImmutableIntegerValue;
import ws.palladian.core.value.ImmutableStringValue;
import ws.palladian.core.value.NullValue;
import ws.palladian.core.value.Value;
import ws.palladian.dataset.ImageValue;
import ws.palladian.helper.ProgressMonitor;
import ws.palladian.helper.ProgressReporter;
import ws.palladian.helper.collection.AbstractIterator;
import ws.palladian.helper.collection.CollectionHelper;
import ws.palladian.helper.io.CloseableIterator;
import ws.palladian.kaggle.restaurants.utils.Config;
import ws.palladian.utils.ModuloFilter;

/* loaded from: input_file:ws/palladian/kaggle/restaurants/dataset/YelpKaggleDatasetReader.class */
public class YelpKaggleDatasetReader extends AbstractDataset {
    private final File photoToBizCsv;
    private final File baseImagePath;
    private final Map<Integer, int[]> businessIdToLabels;
    private final long numPhotos;
    private final boolean training;

    /* loaded from: input_file:ws/palladian/kaggle/restaurants/dataset/YelpKaggleDatasetReader$BusinessFilter.class */
    public enum BusinessFilter implements Predicate<Instance> {
        TRAIN(true),
        VALIDATE(false);

        private final boolean b;

        BusinessFilter(boolean z) {
            this.b = z;
        }

        @Override // java.util.function.Predicate
        public boolean test(Instance instance) {
            NullValue nullValue = (Value) instance.getVector().get("businessId");
            if (nullValue == null || nullValue == NullValue.NULL) {
                throw new IllegalArgumentException("businessId is missing");
            }
            return (Integer.valueOf(nullValue.toString()).intValue() % 2 == 0) ^ this.b;
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:ws/palladian/kaggle/restaurants/dataset/YelpKaggleDatasetReader$DatasetIterator.class */
    public final class DatasetIterator extends AbstractIterator<Instance> implements CloseableIterator<Instance> {
        private final Stream<String> lines;
        private final Iterator<String> iterator;
        private final ProgressReporter progress;

        DatasetIterator() {
            try {
                this.lines = Files.lines(YelpKaggleDatasetReader.this.photoToBizCsv.toPath());
                this.iterator = this.lines.iterator();
                this.iterator.next();
                this.progress = new ProgressMonitor(0.5d);
                this.progress.startTask(YelpKaggleDatasetReader.this.photoToBizCsv.toString(), YelpKaggleDatasetReader.this.numPhotos);
            } catch (IOException e) {
                throw new IllegalStateException(e);
            }
        }

        public void close() throws IOException {
            this.lines.close();
        }

        /* JADX INFO: Access modifiers changed from: protected */
        /* renamed from: getNext, reason: merged with bridge method [inline-methods] */
        public Instance m25getNext() throws AbstractIterator.Finished {
            if (!this.iterator.hasNext()) {
                throw FINISHED;
            }
            this.progress.increment();
            String[] split = this.iterator.next().split(",");
            int intValue = Integer.valueOf(split[0]).intValue();
            String str = split[1];
            InstanceBuilder instanceBuilder = new InstanceBuilder();
            instanceBuilder.set("photoId", intValue);
            instanceBuilder.set("businessId", str);
            if (YelpKaggleDatasetReader.this.training) {
                int[] iArr = (int[]) YelpKaggleDatasetReader.this.businessIdToLabels.get(Integer.valueOf(str));
                if (iArr == null) {
                    throw new IllegalStateException("No entry for businessId " + str);
                }
                int[] iArr2 = {0};
                while (iArr2[0] < Label.values().length) {
                    instanceBuilder.set(Label.getById(iArr2[0]).toString(), Arrays.stream(iArr).anyMatch(i -> {
                        return i == iArr2[0];
                    }));
                    iArr2[0] = iArr2[0] + 1;
                }
            }
            instanceBuilder.set("image", new ImageValue(new File(YelpKaggleDatasetReader.this.baseImagePath, intValue + ".jpg")));
            return instanceBuilder.create(false);
        }
    }

    /* loaded from: input_file:ws/palladian/kaggle/restaurants/dataset/YelpKaggleDatasetReader$Subset.class */
    public enum Subset {
        SMALL(200),
        MEDIUM(20),
        FULL(2);

        public final int mod;

        Subset(int i) {
            this.mod = i;
        }

        @Override // java.lang.Enum
        public String toString() {
            return name().toLowerCase();
        }
    }

    public YelpKaggleDatasetReader(File file, File file2, File file3) throws IOException {
        this.businessIdToLabels = new HashMap();
        this.photoToBizCsv = (File) Objects.requireNonNull(file);
        this.baseImagePath = (File) Objects.requireNonNull(file3);
        this.numPhotos = Files.lines(file.toPath()).count() - 1;
        this.training = file2 != null;
        if (file2 != null) {
            Stream<String> lines = Files.lines(file2.toPath());
            Throwable th = null;
            try {
                try {
                    boolean[] zArr = {true};
                    lines.forEach(str -> {
                        if (zArr[0]) {
                            zArr[0] = false;
                            return;
                        }
                        String[] split = str.split(",");
                        this.businessIdToLabels.put(Integer.valueOf(Integer.valueOf(split[0]).intValue()), split.length == 2 ? Arrays.stream(split[1].split(" ")).mapToInt(Integer::valueOf).toArray() : new int[0]);
                    });
                    if (lines != null) {
                        if (0 == 0) {
                            lines.close();
                            return;
                        }
                        try {
                            lines.close();
                        } catch (Throwable th2) {
                            th.addSuppressed(th2);
                        }
                    }
                } catch (Throwable th3) {
                    th = th3;
                    throw th3;
                }
            } catch (Throwable th4) {
                if (lines != null) {
                    if (th != null) {
                        try {
                            lines.close();
                        } catch (Throwable th5) {
                            th.addSuppressed(th5);
                        }
                    } else {
                        lines.close();
                    }
                }
                throw th4;
            }
        }
    }

    @Deprecated
    public YelpKaggleDatasetReader(File file, File file2) throws IOException {
        this(file, null, file2);
    }

    /* renamed from: iterator, reason: merged with bridge method [inline-methods] */
    public CloseableIterator<Instance> m23iterator() {
        return new DatasetIterator();
    }

    /* JADX WARN: Multi-variable type inference failed */
    public Iterable<Instance> subset(Subset subset, boolean z) {
        Objects.requireNonNull(subset);
        return CollectionHelper.filter(this, new ModuloFilter(subset.mod, z ? 0 : 1));
    }

    public FeatureInformation getFeatureInformation() {
        FeatureInformationBuilder featureInformationBuilder = new FeatureInformationBuilder();
        featureInformationBuilder.set("photoId", ImmutableIntegerValue.class);
        featureInformationBuilder.set("businessId", ImmutableStringValue.class);
        return featureInformationBuilder.create();
    }

    public long size() {
        return this.numPhotos;
    }

    public static void main(String[] strArr) throws IOException {
        YelpKaggleDatasetReader yelpKaggleDatasetReader = new YelpKaggleDatasetReader(Config.getFilePath("dataset.yelp.restaurants.train.photoToBizCsv"), Config.getFilePath("dataset.yelp.restaurants.train.csv"), Config.getFilePath("dataset.yelp.restaurants.train.photos"));
        Dataset subset = yelpKaggleDatasetReader.subset(BusinessFilter.TRAIN);
        Dataset subset2 = yelpKaggleDatasetReader.subset(BusinessFilter.VALIDATE);
        System.out.println("# training = " + CollectionHelper.count(subset.iterator()));
        System.out.println("# validation = " + CollectionHelper.count(subset2.iterator()));
    }
}
