package org.apache.flink.formats.parquet;

import java.io.IOException;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import javax.annotation.Nullable;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.connector.base.source.reader.SourceReaderOptions;
import org.apache.flink.connector.file.src.FileSourceSplit;
import org.apache.flink.connector.file.src.reader.BulkFormat;
import org.apache.flink.connector.file.src.util.CheckpointedPosition;
import org.apache.flink.connector.file.src.util.Pool;
import org.apache.flink.core.fs.Path;
import org.apache.flink.formats.parquet.utils.SerializableConfiguration;
import org.apache.flink.formats.parquet.vector.ColumnBatchFactory;
import org.apache.flink.formats.parquet.vector.ParquetDecimalVector;
import org.apache.flink.formats.parquet.vector.ParquetSplitReaderUtil;
import org.apache.flink.formats.parquet.vector.reader.AbstractColumnReader;
import org.apache.flink.formats.parquet.vector.reader.ColumnReader;
import org.apache.flink.table.data.vector.ColumnVector;
import org.apache.flink.table.data.vector.VectorizedColumnBatch;
import org.apache.flink.table.data.vector.writable.WritableColumnVector;
import org.apache.flink.table.types.logical.LogicalType;
import org.apache.flink.table.types.logical.LogicalTypeRoot;
import org.apache.flink.table.types.logical.RowType;
import org.apache.flink.util.FlinkRuntimeException;
import org.apache.flink.util.Preconditions;
import org.apache.hudi.org.apache.parquet.column.ColumnDescriptor;
import org.apache.hudi.org.apache.parquet.column.page.PageReadStore;
import org.apache.hudi.org.apache.parquet.filter2.compat.RowGroupFilter;
import org.apache.hudi.org.apache.parquet.format.converter.ParquetMetadataConverter;
import org.apache.hudi.org.apache.parquet.hadoop.ParquetFileReader;
import org.apache.hudi.org.apache.parquet.hadoop.metadata.BlockMetaData;
import org.apache.hudi.org.apache.parquet.hadoop.metadata.ParquetMetadata;
import org.apache.hudi.org.apache.parquet.schema.GroupType;
import org.apache.hudi.org.apache.parquet.schema.MessageType;
import org.apache.hudi.org.apache.parquet.schema.Type;
import org.apache.hudi.org.apache.parquet.schema.Types;

/* loaded from: input_file:org/apache/flink/formats/parquet/ParquetVectorizedInputFormat.class */
public abstract class ParquetVectorizedInputFormat<T, SplitT extends FileSourceSplit> implements BulkFormat<T, SplitT> {
    private static final long serialVersionUID = 1;
    private final SerializableConfiguration hadoopConfig;
    private final String[] projectedFields;
    private final LogicalType[] projectedTypes;
    private final ColumnBatchFactory<SplitT> batchFactory;
    private final int batchSize;
    private final boolean isUtcTimestamp;
    private final boolean isCaseSensitive;
    static final /* synthetic */ boolean $assertionsDisabled;

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:org/apache/flink/formats/parquet/ParquetVectorizedInputFormat$ParquetReader.class */
    public class ParquetReader implements BulkFormat.Reader<T> {
        private ParquetFileReader reader;
        private final MessageType requestedSchema;
        private final long totalRowCount;
        private final Pool<ParquetReaderBatch<T>> pool;
        private long rowsReturned;
        private long totalCountLoadedSoFar;
        private ColumnReader[] columnReaders;
        private long recordsToSkip;

        private ParquetReader(ParquetFileReader parquetFileReader, MessageType messageType, long j, Pool<ParquetReaderBatch<T>> pool) {
            this.reader = parquetFileReader;
            this.requestedSchema = messageType;
            this.totalRowCount = j;
            this.pool = pool;
            this.rowsReturned = 0L;
            this.totalCountLoadedSoFar = 0L;
            this.recordsToSkip = 0L;
        }

        @Nullable
        public BulkFormat.RecordIterator<T> readBatch() throws IOException {
            ParquetReaderBatch<T> cachedEntry = getCachedEntry();
            long j = this.rowsReturned;
            if (!nextBatch(cachedEntry)) {
                cachedEntry.recycle();
                return null;
            }
            BulkFormat.RecordIterator<T> convertAndGetIterator = cachedEntry.convertAndGetIterator(j);
            skipRecord(convertAndGetIterator);
            return convertAndGetIterator;
        }

        private boolean nextBatch(ParquetReaderBatch<T> parquetReaderBatch) throws IOException {
            for (WritableColumnVector writableColumnVector : ((ParquetReaderBatch) parquetReaderBatch).writableVectors) {
                writableColumnVector.reset();
            }
            parquetReaderBatch.columnarBatch.setNumRows(0);
            if (this.rowsReturned >= this.totalRowCount) {
                return false;
            }
            if (this.rowsReturned == this.totalCountLoadedSoFar) {
                readNextRowGroup();
            }
            int min = (int) Math.min(ParquetVectorizedInputFormat.this.batchSize, this.totalCountLoadedSoFar - this.rowsReturned);
            for (int i = 0; i < this.columnReaders.length; i++) {
                this.columnReaders[i].readToVector(min, ((ParquetReaderBatch) parquetReaderBatch).writableVectors[i]);
            }
            this.rowsReturned += min;
            parquetReaderBatch.columnarBatch.setNumRows(min);
            return true;
        }

        private void readNextRowGroup() throws IOException {
            PageReadStore readNextRowGroup = this.reader.readNextRowGroup();
            if (readNextRowGroup == null) {
                throw new IOException("expecting more rows but reached last block. Read " + this.rowsReturned + " out of " + this.totalRowCount);
            }
            List<ColumnDescriptor> columns = this.requestedSchema.getColumns();
            this.columnReaders = new AbstractColumnReader[columns.size()];
            for (int i = 0; i < columns.size(); i++) {
                this.columnReaders[i] = ParquetSplitReaderUtil.createColumnReader(ParquetVectorizedInputFormat.this.isUtcTimestamp, ParquetVectorizedInputFormat.this.projectedTypes[i], columns.get(i), readNextRowGroup.getPageReader(columns.get(i)));
            }
            this.totalCountLoadedSoFar += readNextRowGroup.getRowCount();
        }

        public void seek(long j) {
            if (this.totalCountLoadedSoFar != 0) {
                throw new UnsupportedOperationException("Only support seek at first.");
            }
            for (BlockMetaData blockMetaData : this.reader.getRowGroups()) {
                if (blockMetaData.getRowCount() > j) {
                    break;
                }
                this.reader.skipNextRowGroup();
                this.rowsReturned += blockMetaData.getRowCount();
                this.totalCountLoadedSoFar += blockMetaData.getRowCount();
                j -= blockMetaData.getRowCount();
            }
            this.recordsToSkip = j;
        }

        private ParquetReaderBatch<T> getCachedEntry() throws IOException {
            try {
                return (ParquetReaderBatch) this.pool.pollEntry();
            } catch (InterruptedException e) {
                Thread.currentThread().interrupt();
                throw new IOException("Interrupted");
            }
        }

        private void skipRecord(BulkFormat.RecordIterator<T> recordIterator) {
            while (this.recordsToSkip > 0 && recordIterator.next() != null) {
                this.recordsToSkip--;
            }
        }

        public void close() throws IOException {
            if (this.reader != null) {
                this.reader.close();
                this.reader = null;
            }
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    /* loaded from: input_file:org/apache/flink/formats/parquet/ParquetVectorizedInputFormat$ParquetReaderBatch.class */
    public static abstract class ParquetReaderBatch<T> {
        private final WritableColumnVector[] writableVectors;
        protected final VectorizedColumnBatch columnarBatch;
        private final Pool.Recycler<ParquetReaderBatch<T>> recycler;

        /* JADX INFO: Access modifiers changed from: protected */
        public ParquetReaderBatch(WritableColumnVector[] writableColumnVectorArr, VectorizedColumnBatch vectorizedColumnBatch, Pool.Recycler<ParquetReaderBatch<T>> recycler) {
            this.writableVectors = writableColumnVectorArr;
            this.columnarBatch = vectorizedColumnBatch;
            this.recycler = recycler;
        }

        public void recycle() {
            this.recycler.recycle(this);
        }

        public abstract BulkFormat.RecordIterator<T> convertAndGetIterator(long j) throws IOException;
    }

    public ParquetVectorizedInputFormat(SerializableConfiguration serializableConfiguration, RowType rowType, ColumnBatchFactory<SplitT> columnBatchFactory, int i, boolean z, boolean z2) {
        this.hadoopConfig = serializableConfiguration;
        this.projectedFields = (String[]) rowType.getFieldNames().toArray(new String[0]);
        this.projectedTypes = (LogicalType[]) rowType.getChildren().toArray(new LogicalType[0]);
        this.batchFactory = columnBatchFactory;
        this.batchSize = i;
        this.isUtcTimestamp = z;
        this.isCaseSensitive = z2;
    }

    public ParquetVectorizedInputFormat<T, SplitT>.ParquetReader createReader(Configuration configuration, SplitT splitt) throws IOException {
        Path path = splitt.path();
        long offset = splitt.offset();
        long length = splitt.length();
        org.apache.hadoop.fs.Path path2 = new org.apache.hadoop.fs.Path(path.toUri());
        ParquetMetadata readFooter = ParquetFileReader.readFooter(this.hadoopConfig.conf(), path2, ParquetMetadataConverter.range(offset, offset + length));
        MessageType schema = readFooter.getFileMetaData().getSchema();
        List<BlockMetaData> filterRowGroups = RowGroupFilter.filterRowGroups(org.apache.hudi.org.apache.parquet.hadoop.ParquetInputFormat.getFilter(this.hadoopConfig.conf()), readFooter.getBlocks(), schema);
        MessageType clipParquetSchema = clipParquetSchema(schema);
        ParquetFileReader parquetFileReader = new ParquetFileReader(this.hadoopConfig.conf(), readFooter.getFileMetaData(), path2, filterRowGroups, clipParquetSchema.getColumns());
        long j = 0;
        Iterator<BlockMetaData> it = filterRowGroups.iterator();
        while (it.hasNext()) {
            j += it.next().getRowCount();
        }
        checkSchema(schema, clipParquetSchema);
        return new ParquetReader(parquetFileReader, clipParquetSchema, j, createPoolOfBatches(splitt, clipParquetSchema, configuration.getInteger(SourceReaderOptions.ELEMENT_QUEUE_CAPACITY)));
    }

    public ParquetVectorizedInputFormat<T, SplitT>.ParquetReader restoreReader(Configuration configuration, SplitT splitt) throws IOException {
        if (!$assertionsDisabled && !splitt.getReaderPosition().isPresent()) {
            throw new AssertionError();
        }
        CheckpointedPosition checkpointedPosition = (CheckpointedPosition) splitt.getReaderPosition().get();
        Preconditions.checkArgument(checkpointedPosition.getOffset() == -1, "The offset of CheckpointedPosition should always be NO_OFFSET");
        ParquetVectorizedInputFormat<T, SplitT>.ParquetReader createReader = createReader(configuration, (Configuration) splitt);
        createReader.seek(checkpointedPosition.getRecordsAfterOffset());
        return createReader;
    }

    public boolean isSplittable() {
        return true;
    }

    /* JADX WARN: Multi-variable type inference failed */
    private MessageType clipParquetSchema(GroupType groupType) {
        Type[] typeArr = new Type[this.projectedFields.length];
        if (this.isCaseSensitive) {
            for (int i = 0; i < this.projectedFields.length; i++) {
                String str = this.projectedFields[i];
                if (groupType.getFieldIndex(str) < 0) {
                    throw new IllegalArgumentException(str + " does not exist");
                }
                typeArr[i] = groupType.getType(str);
            }
        } else {
            HashMap hashMap = new HashMap();
            for (Type type : groupType.getFields()) {
                hashMap.compute(type.getName().toLowerCase(Locale.ROOT), (str2, type2) -> {
                    if (type2 != null) {
                        throw new FlinkRuntimeException("Parquet with case insensitive mode should have no duplicate key: " + str2);
                    }
                    return type;
                });
            }
            for (int i2 = 0; i2 < this.projectedFields.length; i2++) {
                Type type3 = (Type) hashMap.get(this.projectedFields[i2].toLowerCase(Locale.ROOT));
                if (type3 == null) {
                    throw new IllegalArgumentException(this.projectedFields[i2] + " does not exist");
                }
                typeArr[i2] = type3;
            }
        }
        return (MessageType) Types.buildMessage().addFields(typeArr).named("flink-parquet");
    }

    private void checkSchema(MessageType messageType, MessageType messageType2) throws IOException, UnsupportedOperationException {
        if (this.projectedFields.length != messageType2.getFieldCount()) {
            throw new RuntimeException("The quality of field type is incompatible with the request schema!");
        }
        for (int i = 0; i < messageType2.getFieldCount(); i++) {
            Type type = messageType2.getFields().get(i);
            if (!type.isPrimitive() || type.isRepetition(Type.Repetition.REPEATED)) {
                throw new UnsupportedOperationException("Complex types not supported.");
            }
            String[] strArr = messageType2.getPaths().get(i);
            if (messageType.containsPath(strArr)) {
                if (!messageType.getColumnDescription(strArr).equals(messageType2.getColumns().get(i))) {
                    throw new UnsupportedOperationException("Schema evolution not supported.");
                }
            } else if (messageType2.getColumns().get(i).getMaxDefinitionLevel() == 0) {
                throw new IOException("Required column is missing in data file. Col: " + Arrays.toString(strArr));
            }
        }
    }

    private Pool<ParquetReaderBatch<T>> createPoolOfBatches(SplitT splitt, MessageType messageType, int i) {
        Pool<ParquetReaderBatch<T>> pool = new Pool<>(i);
        for (int i2 = 0; i2 < i; i2++) {
            pool.add(createReaderBatch((ParquetVectorizedInputFormat<T, SplitT>) splitt, messageType, pool.recycler()));
        }
        return pool;
    }

    private ParquetReaderBatch<T> createReaderBatch(SplitT splitt, MessageType messageType, Pool.Recycler<ParquetReaderBatch<T>> recycler) {
        WritableColumnVector[] createWritableVectors = createWritableVectors(messageType);
        return createReaderBatch(createWritableVectors, this.batchFactory.create(splitt, createReadableVectors(createWritableVectors)), recycler);
    }

    private WritableColumnVector[] createWritableVectors(MessageType messageType) {
        WritableColumnVector[] writableColumnVectorArr = new WritableColumnVector[this.projectedTypes.length];
        for (int i = 0; i < this.projectedTypes.length; i++) {
            writableColumnVectorArr[i] = ParquetSplitReaderUtil.createWritableColumnVector(this.batchSize, this.projectedTypes[i], messageType.getColumns().get(i).getPrimitiveType());
        }
        return writableColumnVectorArr;
    }

    /* JADX WARN: Multi-variable type inference failed */
    private ColumnVector[] createReadableVectors(WritableColumnVector[] writableColumnVectorArr) {
        ColumnVector[] columnVectorArr = new ColumnVector[writableColumnVectorArr.length];
        for (int i = 0; i < writableColumnVectorArr.length; i++) {
            columnVectorArr[i] = this.projectedTypes[i].getTypeRoot() == LogicalTypeRoot.DECIMAL ? new ParquetDecimalVector(writableColumnVectorArr[i]) : writableColumnVectorArr[i];
        }
        return columnVectorArr;
    }

    protected abstract ParquetReaderBatch<T> createReaderBatch(WritableColumnVector[] writableColumnVectorArr, VectorizedColumnBatch vectorizedColumnBatch, Pool.Recycler<ParquetReaderBatch<T>> recycler);

    /* JADX WARN: Multi-variable type inference failed */
    /* renamed from: restoreReader, reason: collision with other method in class */
    public /* bridge */ /* synthetic */ BulkFormat.Reader m1549restoreReader(Configuration configuration, FileSourceSplit fileSourceSplit) throws IOException {
        return restoreReader(configuration, (Configuration) fileSourceSplit);
    }

    /* JADX WARN: Multi-variable type inference failed */
    /* renamed from: createReader, reason: collision with other method in class */
    public /* bridge */ /* synthetic */ BulkFormat.Reader m1550createReader(Configuration configuration, FileSourceSplit fileSourceSplit) throws IOException {
        return createReader(configuration, (Configuration) fileSourceSplit);
    }

    static {
        $assertionsDisabled = !ParquetVectorizedInputFormat.class.desiredAssertionStatus();
    }
}
