package ws.palladian.classification.utils;

import java.io.Closeable;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.apache.commons.lang3.Validate;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import ws.palladian.core.ImmutableInstance;
import ws.palladian.core.Instance;
import ws.palladian.core.dataset.AbstractDataset;
import ws.palladian.core.dataset.FeatureInformation;
import ws.palladian.core.dataset.FeatureInformationBuilder;
import ws.palladian.core.featurevector.FlyweightVectorBuilder;
import ws.palladian.core.featurevector.FlyweightVectorSchema;
import ws.palladian.core.value.NullValue;
import ws.palladian.core.value.Value;
import ws.palladian.core.value.io.ValueParser;
import ws.palladian.core.value.io.ValueParserException;
import ws.palladian.helper.StopWatch;
import ws.palladian.helper.collection.AbstractIterator2;
import ws.palladian.helper.collection.CollectionHelper;
import ws.palladian.helper.io.CloseableIterator;
import ws.palladian.helper.io.CsvReader;
import ws.palladian.helper.io.FileHelper;
import ws.palladian.helper.nlp.StringPool;

/* loaded from: input_file:ws/palladian/classification/utils/CsvDatasetReader.class */
public class CsvDatasetReader extends AbstractDataset {
    private static final Logger LOGGER = LoggerFactory.getLogger(CsvDatasetReader.class);
    private static final int LOG_EVERY_N_LINES = 100000;
    private final CsvDatasetReaderConfig config;
    private final String[] headNames;
    private final int expectedColumns;
    private final StringPool stringPool;
    private final ValueParser[] parsers;
    private final FlyweightVectorSchema vectorSchema;
    private long size;

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:ws/palladian/classification/utils/CsvDatasetReader$CsvDatasetIterator.class */
    public final class CsvDatasetIterator extends AbstractIterator2<Instance> implements CloseableIterator<Instance> {
        final CsvReader csvReader;
        int instanceCounter;
        boolean didReadHeader = false;
        final StopWatch stopWatch = new StopWatch();

        CsvDatasetIterator() {
            try {
                this.csvReader = new CsvReader(CsvDatasetReader.this.config.openInputStream(), CsvDatasetReader.this.config.fieldSeparator(), CsvDatasetReader.this.config.quoteCharacter(), CsvDatasetReader.this.config.isUnescapeDoubleQuotes());
            } catch (FileNotFoundException e) {
                throw new IllegalStateException(CsvDatasetReader.this.config.filePath() + " not found.");
            } catch (IOException e2) {
                throw new IllegalStateException("IOException for" + CsvDatasetReader.this.config.filePath());
            }
        }

        /* JADX INFO: Access modifiers changed from: protected */
        /* renamed from: getNext, reason: merged with bridge method [inline-methods] */
        public Instance m54getNext() {
            if (this.instanceCounter == CsvDatasetReader.this.config.getLimit() + 1) {
                CsvDatasetReader.LOGGER.debug("Limit of {} reached, stopping", Long.valueOf(CsvDatasetReader.this.config.getLimit()));
                return (Instance) finished();
            }
            if (!this.csvReader.hasNext()) {
                CsvDatasetReader.LOGGER.debug("Finished reading {} instances", Integer.valueOf(this.instanceCounter));
                return (Instance) finished();
            }
            List<String> list = (List) this.csvReader.next();
            if (!this.didReadHeader && CsvDatasetReader.this.config.readHeader()) {
                this.didReadHeader = true;
                return (Instance) next();
            }
            if (list.size() < 2) {
                throw new IllegalStateException("Separator '" + CsvDatasetReader.this.config.fieldSeparator() + "' was not found, lines cannot be split ('" + this.csvReader.getLineNumber() + "').");
            }
            if (CsvDatasetReader.this.expectedColumns != list.size()) {
                throw new IllegalStateException("Unexpected number of entries in line " + this.csvReader.getLineNumber() + " (" + list.size() + ", but should be " + CsvDatasetReader.this.expectedColumns + ")");
            }
            this.instanceCounter++;
            Instance parseInstance = parseInstance(list);
            if (this.instanceCounter % 100000 == 0) {
                CsvDatasetReader.LOGGER.debug("Read {} lines in {}", Integer.valueOf(this.instanceCounter), this.stopWatch);
            }
            return parseInstance;
        }

        private Instance parseInstance(List<String> list) {
            String str;
            Value parse;
            FlyweightVectorBuilder builder = CsvDatasetReader.this.vectorSchema.builder();
            int i = 0;
            while (true) {
                if (i >= list.size() - (CsvDatasetReader.this.config.readClassFromLastColumn() ? 1 : 0)) {
                    break;
                }
                String str2 = CsvDatasetReader.this.headNames[i];
                if (str2 != null) {
                    String str3 = list.get(i);
                    if (CsvDatasetReader.this.config.isTrim()) {
                        str3 = str3.trim();
                    }
                    if (CsvDatasetReader.this.config.isNullValue(str3)) {
                        parse = NullValue.NULL;
                    } else {
                        try {
                            parse = CsvDatasetReader.this.parsers[i].parse(str3);
                        } catch (ValueParserException e) {
                            throw new IllegalStateException("Could not parse value \"" + str3 + "\" in column \"" + str2 + "\", row " + this.csvReader.getLineNumber() + " using " + CsvDatasetReader.this.parsers[i].getClass().getName() + ".", e);
                        }
                    }
                    builder.set(str2, parse);
                }
                i++;
            }
            if (CsvDatasetReader.this.config.readClassFromLastColumn()) {
                String str4 = list.get(list.size() - 1);
                if (CsvDatasetReader.this.config.isTrim()) {
                    str4 = str4.trim();
                }
                str = CsvDatasetReader.this.stringPool.get(str4);
            } else {
                str = Instance.NO_CATEGORY_DUMMY;
            }
            return new ImmutableInstance(builder.m94create(), str);
        }

        public void close() throws IOException {
            this.csvReader.close();
        }
    }

    @Deprecated
    public CsvDatasetReader(File file) {
        this(file, true);
    }

    @Deprecated
    public CsvDatasetReader(File file, boolean z) {
        this(file, z, ClassificationUtils.DEFAULT_SEPARATOR);
    }

    @Deprecated
    public CsvDatasetReader(File file, boolean z, String str) {
        this(CsvDatasetReaderConfig.filePath(file).readHeader(z).setFieldSeparator(str).createConfig());
    }

    /* JADX WARN: Finally extract failed */
    public CsvDatasetReader(CsvDatasetReaderConfig csvDatasetReaderConfig) {
        this.stringPool = new StringPool();
        this.size = -1L;
        Validate.notNull(csvDatasetReaderConfig, "config must not be null", new Object[0]);
        this.config = csvDatasetReaderConfig;
        try {
            CsvReader csvReader = new CsvReader(csvDatasetReaderConfig.openInputStream(), csvDatasetReaderConfig.fieldSeparator(), csvDatasetReaderConfig.quoteCharacter());
            Throwable th = null;
            while (csvReader.hasNext()) {
                try {
                    List<String> list = (List) csvReader.next();
                    if (!list.isEmpty()) {
                        this.expectedColumns = list.size();
                        int size = csvDatasetReaderConfig.readClassFromLastColumn() ? list.size() - 1 : list.size();
                        this.headNames = new String[size];
                        if (csvDatasetReaderConfig.readHeader()) {
                            ArrayList arrayList = new ArrayList();
                            for (int i = 0; i < size; i++) {
                                String str = list.get(i);
                                if (csvDatasetReaderConfig.isSkippedColumn(str)) {
                                    LOGGER.debug("Skipping column {}", str);
                                } else {
                                    str = csvDatasetReaderConfig.isTrim() ? str.trim() : str;
                                    this.headNames[i] = str;
                                    arrayList.add(str);
                                }
                            }
                            this.vectorSchema = new FlyweightVectorSchema((String[]) arrayList.toArray(new String[0]));
                            list = (List) csvReader.next();
                        } else {
                            for (int i2 = 0; i2 < size; i2++) {
                                this.headNames[i2] = String.valueOf(i2);
                            }
                            this.vectorSchema = new FlyweightVectorSchema(this.headNames);
                        }
                        this.parsers = detectParsers(list);
                        if (csvReader != null) {
                            if (0 != 0) {
                                try {
                                    csvReader.close();
                                } catch (Throwable th2) {
                                    th.addSuppressed(th2);
                                }
                            } else {
                                csvReader.close();
                            }
                        }
                        return;
                    }
                } catch (Throwable th3) {
                    if (csvReader != null) {
                        if (0 != 0) {
                            try {
                                csvReader.close();
                            } catch (Throwable th4) {
                                th.addSuppressed(th4);
                            }
                        } else {
                            csvReader.close();
                        }
                    }
                    throw th3;
                }
            }
            throw new IllegalStateException("No lines in file.");
        } catch (FileNotFoundException e) {
            throw new IllegalStateException(csvDatasetReaderConfig.filePath() + " not found.");
        } catch (IOException e2) {
            throw new IllegalStateException("IOException for" + csvDatasetReaderConfig.filePath());
        }
    }

    /* JADX WARN: Multi-variable type inference failed */
    @Deprecated
    public List<Instance> readAll() {
        Iterator<Instance> iterator2 = iterator2();
        try {
            ArrayList newArrayList = CollectionHelper.newArrayList(iterator2);
            FileHelper.close(new Closeable[]{iterator2});
            return newArrayList;
        } catch (Throwable th) {
            FileHelper.close(new Closeable[]{iterator2});
            throw th;
        }
    }

    private final ValueParser[] detectParsers(List<String> list) {
        int size = this.config.readClassFromLastColumn() ? list.size() - 1 : list.size();
        ValueParser[] valueParserArr = new ValueParser[size];
        for (int i = 0; i < size; i++) {
            String str = this.headNames[i];
            if (str != null) {
                ValueParser parser = this.config.getParser(str);
                if (parser == null) {
                    String str2 = list.get(i);
                    Iterator<ValueParser> it = this.config.getDefaultParsers().iterator();
                    while (true) {
                        if (!it.hasNext()) {
                            break;
                        }
                        ValueParser next = it.next();
                        if (next.canParse(str2)) {
                            parser = next;
                            break;
                        }
                    }
                }
                LOGGER.debug("Parser for {}: {}", this.headNames[i], parser.getClass().getName());
                valueParserArr[i] = parser;
            }
        }
        return valueParserArr;
    }

    @Override // ws.palladian.core.dataset.Dataset, java.lang.Iterable
    /* renamed from: iterator */
    public Iterator<Instance> iterator2() {
        return new CsvDatasetIterator();
    }

    @Override // ws.palladian.core.dataset.Dataset
    public FeatureInformation getFeatureInformation() {
        FeatureInformationBuilder featureInformationBuilder = new FeatureInformationBuilder();
        for (int i = 0; i < this.headNames.length; i++) {
            String str = this.headNames[i];
            if (str != null) {
                featureInformationBuilder.set(str, this.parsers[i].getType());
            }
        }
        return featureInformationBuilder.m80create();
    }

    @Override // ws.palladian.core.dataset.Dataset
    public long size() {
        if (this.size == -1) {
            try {
                InputStream openInputStream = this.config.openInputStream();
                Throwable th = null;
                try {
                    try {
                        this.size = Math.min(this.config.readHeader() ? r0 - 1 : FileHelper.getNumberOfLines(openInputStream), this.config.getLimit());
                        if (openInputStream != null) {
                            if (0 != 0) {
                                try {
                                    openInputStream.close();
                                } catch (Throwable th2) {
                                    th.addSuppressed(th2);
                                }
                            } else {
                                openInputStream.close();
                            }
                        }
                    } finally {
                    }
                } finally {
                }
            } catch (IOException e) {
                throw new IllegalStateException("IOException for" + this.config.filePath());
            }
        }
        return this.size;
    }
}
