package org.apache.iceberg.spark.source;

import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import org.apache.hadoop.conf.Configuration;
import org.apache.iceberg.DataFile;
import org.apache.iceberg.FileScanTask;
import org.apache.iceberg.PartitionSpec;
import org.apache.iceberg.Schema;
import org.apache.iceberg.Table;
import org.apache.iceberg.hadoop.HadoopTables;
import org.apache.iceberg.spark.SparkSchemaUtil;
import org.apache.iceberg.spark.data.RandomData;
import org.apache.iceberg.types.Types;
import org.apache.iceberg.util.ByteBuffers;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.SparkSession;
import org.junit.AfterClass;
import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TemporaryFolder;

/* loaded from: input_file:org/apache/iceberg/spark/source/TestWriteMetricsConfig.class */
public class TestWriteMetricsConfig {

    @Rule
    public TemporaryFolder temp = new TemporaryFolder();
    private static final Configuration CONF = new Configuration();
    private static final Schema SIMPLE_SCHEMA = new Schema(new Types.NestedField[]{Types.NestedField.optional(1, "id", Types.IntegerType.get()), Types.NestedField.optional(2, "data", Types.StringType.get())});
    private static final Schema COMPLEX_SCHEMA = new Schema(new Types.NestedField[]{Types.NestedField.required(1, "longCol", Types.IntegerType.get()), Types.NestedField.optional(2, "strCol", Types.StringType.get()), Types.NestedField.required(3, "record", Types.StructType.of(new Types.NestedField[]{Types.NestedField.required(4, "id", Types.IntegerType.get()), Types.NestedField.required(5, "data", Types.StringType.get())}))});
    private static SparkSession spark = null;
    private static JavaSparkContext sc = null;

    @BeforeClass
    public static void startSpark() {
        spark = SparkSession.builder().master("local[2]").getOrCreate();
        sc = new JavaSparkContext(spark.sparkContext());
    }

    @AfterClass
    public static void stopSpark() {
        SparkSession sparkSession = spark;
        spark = null;
        sc = null;
        sparkSession.stop();
    }

    @Test
    public void testFullMetricsCollectionForParquet() throws IOException {
        String file = this.temp.newFolder("iceberg-table").toString();
        HadoopTables hadoopTables = new HadoopTables(CONF);
        PartitionSpec unpartitioned = PartitionSpec.unpartitioned();
        HashMap newHashMap = Maps.newHashMap();
        newHashMap.put("write.metadata.metrics.default", "full");
        Table create = hadoopTables.create(SIMPLE_SCHEMA, unpartitioned, newHashMap, file);
        spark.createDataFrame(Lists.newArrayList(new SimpleRecord[]{new SimpleRecord(1, "a"), new SimpleRecord(2, "b"), new SimpleRecord(3, "c")}), SimpleRecord.class).select("id", new String[]{"data"}).coalesce(1).write().format("iceberg").option("write-format", "parquet").mode("append").save(file);
        Iterator it = create.newScan().includeColumnStats().planFiles().iterator();
        while (it.hasNext()) {
            DataFile file2 = ((FileScanTask) it.next()).file();
            Assert.assertEquals(2L, file2.nullValueCounts().size());
            Assert.assertEquals(2L, file2.valueCounts().size());
            Assert.assertEquals(2L, file2.lowerBounds().size());
            Assert.assertEquals(2L, file2.upperBounds().size());
        }
    }

    @Test
    public void testCountMetricsCollectionForParquet() throws IOException {
        String file = this.temp.newFolder("iceberg-table").toString();
        HadoopTables hadoopTables = new HadoopTables(CONF);
        PartitionSpec unpartitioned = PartitionSpec.unpartitioned();
        HashMap newHashMap = Maps.newHashMap();
        newHashMap.put("write.metadata.metrics.default", "counts");
        Table create = hadoopTables.create(SIMPLE_SCHEMA, unpartitioned, newHashMap, file);
        spark.createDataFrame(Lists.newArrayList(new SimpleRecord[]{new SimpleRecord(1, "a"), new SimpleRecord(2, "b"), new SimpleRecord(3, "c")}), SimpleRecord.class).select("id", new String[]{"data"}).coalesce(1).write().format("iceberg").option("write-format", "parquet").mode("append").save(file);
        Iterator it = create.newScan().includeColumnStats().planFiles().iterator();
        while (it.hasNext()) {
            DataFile file2 = ((FileScanTask) it.next()).file();
            Assert.assertEquals(2L, file2.nullValueCounts().size());
            Assert.assertEquals(2L, file2.valueCounts().size());
            Assert.assertTrue(file2.lowerBounds().isEmpty());
            Assert.assertTrue(file2.upperBounds().isEmpty());
        }
    }

    @Test
    public void testNoMetricsCollectionForParquet() throws IOException {
        String file = this.temp.newFolder("iceberg-table").toString();
        HadoopTables hadoopTables = new HadoopTables(CONF);
        PartitionSpec unpartitioned = PartitionSpec.unpartitioned();
        HashMap newHashMap = Maps.newHashMap();
        newHashMap.put("write.metadata.metrics.default", "none");
        Table create = hadoopTables.create(SIMPLE_SCHEMA, unpartitioned, newHashMap, file);
        spark.createDataFrame(Lists.newArrayList(new SimpleRecord[]{new SimpleRecord(1, "a"), new SimpleRecord(2, "b"), new SimpleRecord(3, "c")}), SimpleRecord.class).select("id", new String[]{"data"}).coalesce(1).write().format("iceberg").option("write-format", "parquet").mode("append").save(file);
        Iterator it = create.newScan().includeColumnStats().planFiles().iterator();
        while (it.hasNext()) {
            DataFile file2 = ((FileScanTask) it.next()).file();
            Assert.assertTrue(file2.nullValueCounts().isEmpty());
            Assert.assertTrue(file2.valueCounts().isEmpty());
            Assert.assertTrue(file2.lowerBounds().isEmpty());
            Assert.assertTrue(file2.upperBounds().isEmpty());
        }
    }

    @Test
    public void testCustomMetricCollectionForParquet() throws IOException {
        String file = this.temp.newFolder("iceberg-table").toString();
        HadoopTables hadoopTables = new HadoopTables(CONF);
        PartitionSpec unpartitioned = PartitionSpec.unpartitioned();
        HashMap newHashMap = Maps.newHashMap();
        newHashMap.put("write.metadata.metrics.default", "counts");
        newHashMap.put("write.metadata.metrics.column.id", "full");
        Table create = hadoopTables.create(SIMPLE_SCHEMA, unpartitioned, newHashMap, file);
        spark.createDataFrame(Lists.newArrayList(new SimpleRecord[]{new SimpleRecord(1, "a"), new SimpleRecord(2, "b"), new SimpleRecord(3, "c")}), SimpleRecord.class).select("id", new String[]{"data"}).coalesce(1).write().format("iceberg").option("write-format", "parquet").mode("append").save(file);
        Types.NestedField findField = create.schema().findField("id");
        Iterator it = create.newScan().includeColumnStats().planFiles().iterator();
        while (it.hasNext()) {
            DataFile file2 = ((FileScanTask) it.next()).file();
            Assert.assertEquals(2L, file2.nullValueCounts().size());
            Assert.assertEquals(2L, file2.valueCounts().size());
            Assert.assertEquals(1L, file2.lowerBounds().size());
            Assert.assertTrue(file2.lowerBounds().containsKey(Integer.valueOf(findField.fieldId())));
            Assert.assertEquals(1L, file2.upperBounds().size());
            Assert.assertTrue(file2.upperBounds().containsKey(Integer.valueOf(findField.fieldId())));
        }
    }

    @Test
    public void testCustomMetricCollectionForNestedParquet() throws IOException {
        String file = this.temp.newFolder("iceberg-table").toString();
        HadoopTables hadoopTables = new HadoopTables(CONF);
        PartitionSpec build = PartitionSpec.builderFor(COMPLEX_SCHEMA).identity("strCol").build();
        HashMap newHashMap = Maps.newHashMap();
        newHashMap.put("write.metadata.metrics.default", "none");
        newHashMap.put("write.metadata.metrics.column.longCol", "counts");
        newHashMap.put("write.metadata.metrics.column.record.id", "full");
        newHashMap.put("write.metadata.metrics.column.record.data", "truncate(2)");
        Table create = hadoopTables.create(COMPLEX_SCHEMA, build, newHashMap, file);
        spark.internalCreateDataFrame(JavaRDD.toRDD(sc.parallelize(Lists.newArrayList(RandomData.generateSpark(COMPLEX_SCHEMA, 10, 0L)))), SparkSchemaUtil.convert(COMPLEX_SCHEMA), false).coalesce(1).write().format("iceberg").option("write-format", "parquet").mode("append").save(file);
        Schema schema = create.schema();
        Types.NestedField findField = schema.findField("longCol");
        Types.NestedField findField2 = schema.findField("record.id");
        Types.NestedField findField3 = schema.findField("record.data");
        Iterator it = create.newScan().includeColumnStats().planFiles().iterator();
        while (it.hasNext()) {
            DataFile file2 = ((FileScanTask) it.next()).file();
            Map nullValueCounts = file2.nullValueCounts();
            Assert.assertEquals(3L, nullValueCounts.size());
            Assert.assertTrue(nullValueCounts.containsKey(Integer.valueOf(findField.fieldId())));
            Assert.assertTrue(nullValueCounts.containsKey(Integer.valueOf(findField2.fieldId())));
            Assert.assertTrue(nullValueCounts.containsKey(Integer.valueOf(findField3.fieldId())));
            Map valueCounts = file2.valueCounts();
            Assert.assertEquals(3L, valueCounts.size());
            Assert.assertTrue(valueCounts.containsKey(Integer.valueOf(findField.fieldId())));
            Assert.assertTrue(valueCounts.containsKey(Integer.valueOf(findField2.fieldId())));
            Assert.assertTrue(valueCounts.containsKey(Integer.valueOf(findField3.fieldId())));
            Map lowerBounds = file2.lowerBounds();
            Assert.assertEquals(2L, lowerBounds.size());
            Assert.assertTrue(lowerBounds.containsKey(Integer.valueOf(findField2.fieldId())));
            Assert.assertEquals(2L, ByteBuffers.toByteArray((ByteBuffer) lowerBounds.get(Integer.valueOf(findField3.fieldId()))).length);
            Map upperBounds = file2.upperBounds();
            Assert.assertEquals(2L, upperBounds.size());
            Assert.assertTrue(upperBounds.containsKey(Integer.valueOf(findField2.fieldId())));
            Assert.assertEquals(2L, ByteBuffers.toByteArray((ByteBuffer) upperBounds.get(Integer.valueOf(findField3.fieldId()))).length);
        }
    }
}
