This is an automated email from the ASF dual-hosted git repository.
etudenhoefner pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg.git
The following commit(s) were added to refs/heads/main by this push:
new 2899b5a751 API, Core: Introduce classes for content stats (#13933)
2899b5a751 is described below
commit 2899b5a75106c698ea8e59fe0b93c4857acaadee
Author: Eduard Tudenhoefner <[email protected]>
AuthorDate: Wed Nov 12 07:47:53 2025 +0100
API, Core: Introduce classes for content stats (#13933)
---
.../org/apache/iceberg/stats/ContentStats.java | 41 +++
.../org/apache/iceberg/stats/FieldStatistic.java | 110 +++++++
.../java/org/apache/iceberg/stats/FieldStats.java | 54 ++++
.../java/org/apache/iceberg/stats/StatsUtil.java | 195 +++++++++++++
.../org/apache/iceberg/stats/TestStatsUtil.java | 214 ++++++++++++++
.../org/apache/iceberg/stats/BaseContentStats.java | 259 +++++++++++++++++
.../org/apache/iceberg/stats/BaseFieldStats.java | 318 +++++++++++++++++++++
.../org/apache/iceberg/stats/TestContentStats.java | 306 ++++++++++++++++++++
.../org/apache/iceberg/stats/TestFieldStats.java | 214 ++++++++++++++
9 files changed, 1711 insertions(+)
diff --git a/api/src/main/java/org/apache/iceberg/stats/ContentStats.java
b/api/src/main/java/org/apache/iceberg/stats/ContentStats.java
new file mode 100644
index 0000000000..b39db25651
--- /dev/null
+++ b/api/src/main/java/org/apache/iceberg/stats/ContentStats.java
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg.stats;
+
+import java.util.List;
+import org.apache.iceberg.StructLike;
+import org.apache.iceberg.types.Types;
+
+public interface ContentStats extends StructLike {
+
+ /** A list of all the {@link FieldStats} */
+ List<FieldStats<?>> fieldStats();
+
+ /**
+ * Returns a {@link FieldStats} instance holding field stats for the given
field ID.
+ *
+ * @param fieldId The field ID to retrieve {@link FieldStats} for
+ * @return A {@link FieldStats} instance holding field stats for the given
field ID.
+ * @param <T> The type of the underlying {@link FieldStats} instance.
+ */
+ <T> FieldStats<T> statsFor(int fieldId);
+
+ /** The stats struct holding nested structs with their respective field
stats */
+ Types.StructType statsStruct();
+}
diff --git a/api/src/main/java/org/apache/iceberg/stats/FieldStatistic.java
b/api/src/main/java/org/apache/iceberg/stats/FieldStatistic.java
new file mode 100644
index 0000000000..7715359ea2
--- /dev/null
+++ b/api/src/main/java/org/apache/iceberg/stats/FieldStatistic.java
@@ -0,0 +1,110 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg.stats;
+
+import static org.apache.iceberg.types.Types.NestedField.optional;
+
+import org.apache.iceberg.types.Type;
+import org.apache.iceberg.types.Types;
+
+public enum FieldStatistic {
+ VALUE_COUNT(0, "value_count"),
+ NULL_VALUE_COUNT(1, "null_value_count"),
+ NAN_VALUE_COUNT(2, "nan_value_count"),
+ AVG_VALUE_SIZE(3, "avg_value_size"),
+ MAX_VALUE_SIZE(4, "max_value_size"),
+ LOWER_BOUND(5, "lower_bound"),
+ UPPER_BOUND(6, "upper_bound"),
+ EXACT_BOUNDS(7, "exact_bounds");
+
+ private final int offset;
+ private final String fieldName;
+
+ FieldStatistic(int offset, String fieldName) {
+ this.offset = offset;
+ this.fieldName = fieldName;
+ }
+
+ public int offset() {
+ return offset;
+ }
+
+ public String fieldName() {
+ return fieldName;
+ }
+
+ public static FieldStatistic fromOffset(int offset) {
+ switch (offset) {
+ case 0:
+ return VALUE_COUNT;
+ case 1:
+ return NULL_VALUE_COUNT;
+ case 2:
+ return NAN_VALUE_COUNT;
+ case 3:
+ return AVG_VALUE_SIZE;
+ case 4:
+ return MAX_VALUE_SIZE;
+ case 5:
+ return LOWER_BOUND;
+ case 6:
+ return UPPER_BOUND;
+ case 7:
+ return EXACT_BOUNDS;
+ default:
+ throw new IllegalArgumentException("Invalid statistic offset: " +
offset);
+ }
+ }
+
+ public static Types.StructType fieldStatsFor(Type type, int fieldId) {
+ return Types.StructType.of(
+ optional(
+ fieldId + VALUE_COUNT.offset(),
+ VALUE_COUNT.fieldName(),
+ Types.LongType.get(),
+ "Total value count, including null and NaN"),
+ optional(
+ fieldId + NULL_VALUE_COUNT.offset(),
+ NULL_VALUE_COUNT.fieldName(),
+ Types.LongType.get(),
+ "Total null value count"),
+ optional(
+ fieldId + NAN_VALUE_COUNT.offset(),
+ NAN_VALUE_COUNT.fieldName(),
+ Types.LongType.get(),
+ "Total NaN value count"),
+ optional(
+ fieldId + AVG_VALUE_SIZE.offset(),
+ AVG_VALUE_SIZE.fieldName(),
+ Types.IntegerType.get(),
+ "Avg value size of variable-length types (String, Binary)"),
+ optional(
+ fieldId + MAX_VALUE_SIZE.offset(),
+ MAX_VALUE_SIZE.fieldName(),
+ Types.IntegerType.get(),
+ "Max value size of variable-length types (String, Binary)"),
+ optional(fieldId + LOWER_BOUND.offset(), LOWER_BOUND.fieldName(),
type, "Lower bound"),
+ optional(fieldId + UPPER_BOUND.offset(), UPPER_BOUND.fieldName(),
type, "Upper bound"),
+ optional(
+ fieldId + EXACT_BOUNDS.offset(),
+ EXACT_BOUNDS.fieldName(),
+ Types.BooleanType.get(),
+ "Whether the upper/lower bound is exact or not"));
+ }
+}
diff --git a/api/src/main/java/org/apache/iceberg/stats/FieldStats.java
b/api/src/main/java/org/apache/iceberg/stats/FieldStats.java
new file mode 100644
index 0000000000..6411b479af
--- /dev/null
+++ b/api/src/main/java/org/apache/iceberg/stats/FieldStats.java
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg.stats;
+
+import org.apache.iceberg.StructLike;
+import org.apache.iceberg.types.Type;
+
+public interface FieldStats<T> extends StructLike {
+ /** The field ID of the statistic */
+ int fieldId();
+
+ /** The field type of the statistic */
+ Type type();
+
+ /** The total value count, including null and NaN */
+ Long valueCount();
+
+ /** The total null value count */
+ Long nullValueCount();
+
+ /** The total NaN value count */
+ Long nanValueCount();
+
+ /** The avg value size of variable-length types (String, Binary) */
+ Integer avgValueSize();
+
+ /** The max value size of variable-length types (String, Binary) */
+ Integer maxValueSize();
+
+ /** The lower bound */
+ T lowerBound();
+
+ /** The upper bound */
+ T upperBound();
+
+ /** Whether the upper/lower bound is exact or not. */
+ boolean hasExactBounds();
+}
diff --git a/api/src/main/java/org/apache/iceberg/stats/StatsUtil.java
b/api/src/main/java/org/apache/iceberg/stats/StatsUtil.java
new file mode 100644
index 0000000000..1e2bef98a7
--- /dev/null
+++ b/api/src/main/java/org/apache/iceberg/stats/StatsUtil.java
@@ -0,0 +1,195 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg.stats;
+
+import static org.apache.iceberg.types.Types.NestedField.optional;
+
+import java.util.Comparator;
+import java.util.List;
+import java.util.Objects;
+import java.util.Set;
+import java.util.stream.Collectors;
+import org.apache.iceberg.Schema;
+import org.apache.iceberg.relocated.com.google.common.collect.Lists;
+import org.apache.iceberg.relocated.com.google.common.collect.Sets;
+import org.apache.iceberg.types.TypeUtil;
+import org.apache.iceberg.types.Types;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class StatsUtil {
+ private static final Logger LOG = LoggerFactory.getLogger(StatsUtil.class);
+ // the number of reserved field IDs from the reserved field ID space as
defined in
+ // https://iceberg.apache.org/spec/#reserved-field-ids
+ static final int NUM_RESERVED_FIELD_IDS = 200;
+ // the starting field ID of the reserved field ID space
+ static final int RESERVED_FIELD_IDS_START = Integer.MAX_VALUE -
NUM_RESERVED_FIELD_IDS;
+ // the number of supported stats per table column
+ static final int NUM_SUPPORTED_STATS_PER_COLUMN = 200;
+ // the starting field ID of the stats space for data field IDs
+ static final int STATS_SPACE_FIELD_ID_START_FOR_DATA_FIELDS = 10_000;
+ // the starting field ID of the stats space for metadata field IDs
+ static final int STATS_SPACE_FIELD_ID_START_FOR_METADATA_FIELDS =
2_147_000_000;
+ // support stats for only up to this amount of data field IDs
+ static final int MAX_DATA_FIELD_ID = 1_000_000;
+ static final int MAX_DATA_STATS_FIELD_ID = 200_010_000;
+
+ private StatsUtil() {}
+
+ public static int statsFieldIdForField(int fieldId) {
+ return fieldId >= RESERVED_FIELD_IDS_START
+ ? statsFieldIdForReservedField(fieldId)
+ : statsFieldIdForDataField(fieldId);
+ }
+
+ private static int statsFieldIdForDataField(int fieldId) {
+ long statsFieldId =
+ STATS_SPACE_FIELD_ID_START_FOR_DATA_FIELDS
+ + (NUM_SUPPORTED_STATS_PER_COLUMN * (long) fieldId);
+ if (fieldId < 0 || fieldId > MAX_DATA_FIELD_ID) {
+ return -1;
+ }
+
+ return (int) statsFieldId;
+ }
+
+ private static int statsFieldIdForReservedField(int fieldId) {
+ int offset = NUM_RESERVED_FIELD_IDS - (Integer.MAX_VALUE - fieldId);
+
+ long statsFieldId =
+ STATS_SPACE_FIELD_ID_START_FOR_METADATA_FIELDS
+ + (NUM_SUPPORTED_STATS_PER_COLUMN * (long) offset);
+ if (statsFieldId < 0 || statsFieldId > RESERVED_FIELD_IDS_START) {
+ // ID overflows
+ return -1;
+ }
+
+ return (int) statsFieldId;
+ }
+
+ public static int fieldIdForStatsField(int statsFieldId) {
+ if (statsFieldId < STATS_SPACE_FIELD_ID_START_FOR_DATA_FIELDS
+ || statsFieldId % NUM_SUPPORTED_STATS_PER_COLUMN != 0) {
+ return -1;
+ }
+
+ return statsFieldId < STATS_SPACE_FIELD_ID_START_FOR_METADATA_FIELDS
+ ? fieldIdForStatsFieldFromDataField(statsFieldId)
+ : fieldIdForStatsFieldFromReservedField(statsFieldId);
+ }
+
+ private static int fieldIdForStatsFieldFromDataField(int statsFieldId) {
+ return Math.max(
+ -1,
+ (statsFieldId - STATS_SPACE_FIELD_ID_START_FOR_DATA_FIELDS)
+ / NUM_SUPPORTED_STATS_PER_COLUMN);
+ }
+
+ private static int fieldIdForStatsFieldFromReservedField(int statsFieldId) {
+ return Math.max(
+ -1,
+ statsFieldId
+ - NUM_RESERVED_FIELD_IDS
+ + (Integer.MAX_VALUE - statsFieldId)
+ + (statsFieldId - STATS_SPACE_FIELD_ID_START_FOR_METADATA_FIELDS)
+ / NUM_SUPPORTED_STATS_PER_COLUMN);
+ }
+
+ public static Types.NestedField contentStatsFor(Schema schema) {
+ ContentStatsSchemaVisitor visitor = new ContentStatsSchemaVisitor();
+ Types.NestedField result = TypeUtil.visit(schema, visitor);
+ if (!visitor.skippedFieldIds.isEmpty()) {
+ LOG.warn("Could not create stats schema for field ids: {}",
visitor.skippedFieldIds);
+ }
+
+ return result;
+ }
+
+ private static class ContentStatsSchemaVisitor extends
TypeUtil.SchemaVisitor<Types.NestedField> {
+ private final List<Types.NestedField> statsFields = Lists.newArrayList();
+ private final Set<Integer> skippedFieldIds = Sets.newLinkedHashSet();
+
+ @Override
+ public Types.NestedField schema(Schema schema, Types.NestedField
structResult) {
+ return optional(
+ 146,
+ "content_stats",
+ Types.StructType.of(
+ statsFields.stream()
+ .filter(Objects::nonNull)
+ .sorted(Comparator.comparing(Types.NestedField::fieldId))
+ .collect(Collectors.toList())));
+ }
+
+ @Override
+ public Types.NestedField list(Types.ListType list, Types.NestedField
elementResult) {
+ list.fields()
+ .forEach(
+ field -> {
+ Types.NestedField result = field(field, null);
+ if (null != result) {
+ statsFields.add(result);
+ }
+ });
+ return null;
+ }
+
+ @Override
+ public Types.NestedField map(
+ Types.MapType map, Types.NestedField keyResult, Types.NestedField
valueResult) {
+ map.fields()
+ .forEach(
+ field -> {
+ Types.NestedField result = field(field, null);
+ if (null != result) {
+ statsFields.add(result);
+ }
+ });
+ return null;
+ }
+
+ @Override
+ public Types.NestedField struct(Types.StructType struct,
List<Types.NestedField> fields) {
+ statsFields.addAll(fields);
+ return null;
+ }
+
+ @Override
+ public Types.NestedField field(Types.NestedField field, Types.NestedField
fieldResult) {
+ if (field.type().isNestedType() || field.type().isVariantType()) {
+ return null;
+ }
+
+ int fieldId = StatsUtil.statsFieldIdForField(field.fieldId());
+ if (fieldId >= 0) {
+ Types.StructType structType =
FieldStatistic.fieldStatsFor(field.type(), fieldId + 1);
+ return optional(fieldId, Integer.toString(field.fieldId()),
structType);
+ } else {
+ skippedFieldIds.add(field.fieldId());
+ }
+
+ return null;
+ }
+
+ @Override
+ public Types.NestedField variant(Types.VariantType variant) {
+ return null;
+ }
+ }
+}
diff --git a/api/src/test/java/org/apache/iceberg/stats/TestStatsUtil.java
b/api/src/test/java/org/apache/iceberg/stats/TestStatsUtil.java
new file mode 100644
index 0000000000..cf7f2fc3f9
--- /dev/null
+++ b/api/src/test/java/org/apache/iceberg/stats/TestStatsUtil.java
@@ -0,0 +1,214 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg.stats;
+
+import static org.apache.iceberg.types.Types.NestedField.optional;
+import static org.apache.iceberg.types.Types.NestedField.required;
+import static org.assertj.core.api.Assertions.assertThat;
+
+import java.util.concurrent.ThreadLocalRandom;
+import org.apache.iceberg.Schema;
+import org.apache.iceberg.types.Types;
+import org.junit.jupiter.api.Test;
+
+public class TestStatsUtil {
+
+ @Test
+ public void statsIdsForTableColumns() {
+ int offset = 0;
+ for (int id = 0; id < StatsUtil.MAX_DATA_FIELD_ID; id++) {
+ int statsFieldId = StatsUtil.statsFieldIdForField(id);
+ int expected = StatsUtil.STATS_SPACE_FIELD_ID_START_FOR_DATA_FIELDS +
offset;
+ assertThat(statsFieldId).as("at pos %s", id).isEqualTo(expected);
+ offset += StatsUtil.NUM_SUPPORTED_STATS_PER_COLUMN;
+ assertThat(StatsUtil.fieldIdForStatsField(statsFieldId)).as("at pos %s",
id).isEqualTo(id);
+ }
+
+ // also verify hardcoded field IDs from docs
+ int fieldId = 0;
+ int statsFieldId = 10_000;
+
assertThat(StatsUtil.statsFieldIdForField(fieldId)).isEqualTo(statsFieldId);
+
assertThat(StatsUtil.fieldIdForStatsField(statsFieldId)).isEqualTo(fieldId);
+
+ fieldId = 1;
+ statsFieldId = 10_200;
+
assertThat(StatsUtil.statsFieldIdForField(fieldId)).isEqualTo(statsFieldId);
+
assertThat(StatsUtil.fieldIdForStatsField(statsFieldId)).isEqualTo(fieldId);
+
+ fieldId = 2;
+ statsFieldId = 10_400;
+
assertThat(StatsUtil.statsFieldIdForField(fieldId)).isEqualTo(statsFieldId);
+
assertThat(StatsUtil.fieldIdForStatsField(statsFieldId)).isEqualTo(fieldId);
+
+ fieldId = 5;
+ statsFieldId = 11_000;
+
assertThat(StatsUtil.statsFieldIdForField(fieldId)).isEqualTo(statsFieldId);
+
assertThat(StatsUtil.fieldIdForStatsField(statsFieldId)).isEqualTo(fieldId);
+
+ fieldId = 100;
+ statsFieldId = 30_000;
+
assertThat(StatsUtil.statsFieldIdForField(fieldId)).isEqualTo(statsFieldId);
+
assertThat(StatsUtil.fieldIdForStatsField(statsFieldId)).isEqualTo(fieldId);
+
+ fieldId = StatsUtil.MAX_DATA_FIELD_ID;
+ statsFieldId = StatsUtil.MAX_DATA_STATS_FIELD_ID;
+
assertThat(StatsUtil.statsFieldIdForField(fieldId)).isEqualTo(statsFieldId);
+
assertThat(StatsUtil.fieldIdForStatsField(statsFieldId)).isEqualTo(fieldId);
+
+ fieldId = -1;
+ statsFieldId = -1;
+
assertThat(StatsUtil.statsFieldIdForField(fieldId)).isEqualTo(statsFieldId);
+
assertThat(StatsUtil.fieldIdForStatsField(statsFieldId)).isEqualTo(fieldId);
+ }
+
+ @Test
+ public void statsIdsOverflowForTableColumns() {
+ // pick 100 random IDs that are > MAX_FIELD_ID and <
METADATA_SPACE_FIELD_ID_START as going over
+ // the entire ID range takes too long
+ int invalidFieldId = -1;
+ for (int i = 0; i < 100; i++) {
+ int id =
+ ThreadLocalRandom.current()
+ .nextInt(
+ StatsUtil.MAX_DATA_FIELD_ID + 1,
+ StatsUtil.STATS_SPACE_FIELD_ID_START_FOR_METADATA_FIELDS);
+ assertThat(StatsUtil.statsFieldIdForField(id)).as("at pos %s",
id).isEqualTo(invalidFieldId);
+ }
+
+ assertThat(StatsUtil.fieldIdForStatsField(-1)).isEqualTo(invalidFieldId);
+
assertThat(StatsUtil.fieldIdForStatsField(5_000)).isEqualTo(invalidFieldId);
+
assertThat(StatsUtil.fieldIdForStatsField(10_001)).isEqualTo(invalidFieldId);
+
assertThat(StatsUtil.fieldIdForStatsField(10_201)).isEqualTo(invalidFieldId);
+
assertThat(StatsUtil.fieldIdForStatsField(10_500)).isEqualTo(invalidFieldId);
+
assertThat(StatsUtil.fieldIdForStatsField(10_900)).isEqualTo(invalidFieldId);
+ }
+
+ @Test
+ public void statsIdsForReservedColumns() {
+ int offset = 0;
+ for (int id = StatsUtil.RESERVED_FIELD_IDS_START; id < Integer.MAX_VALUE;
id++) {
+ int statsFieldId = StatsUtil.statsFieldIdForField(id);
+ int expected = StatsUtil.STATS_SPACE_FIELD_ID_START_FOR_METADATA_FIELDS
+ offset;
+ assertThat(statsFieldId).as("at pos %s", id).isEqualTo(expected);
+ offset = offset + StatsUtil.NUM_SUPPORTED_STATS_PER_COLUMN;
+ assertThat(StatsUtil.fieldIdForStatsField(statsFieldId)).as("at pos %s",
id).isEqualTo(id);
+ }
+
+ // also verify hardcoded IDs that are mentioned in the docs
+ int fieldId = 2_147_483_447;
+ int statsFieldId = 2_147_000_000;
+
assertThat(StatsUtil.statsFieldIdForField(fieldId)).isEqualTo(statsFieldId);
+
assertThat(StatsUtil.fieldIdForStatsField(statsFieldId)).isEqualTo(fieldId);
+
+ fieldId = 2_147_483_448;
+ statsFieldId = 2_147_000_200;
+
assertThat(StatsUtil.statsFieldIdForField(fieldId)).isEqualTo(statsFieldId);
+
assertThat(StatsUtil.fieldIdForStatsField(statsFieldId)).isEqualTo(fieldId);
+
+ fieldId = 2_147_483_541;
+ statsFieldId = 2_147_018_800;
+
assertThat(StatsUtil.statsFieldIdForField(fieldId)).isEqualTo(statsFieldId);
+
assertThat(StatsUtil.fieldIdForStatsField(statsFieldId)).isEqualTo(fieldId);
+
+ fieldId = 2_147_483_645;
+ statsFieldId = 2_147_039_600;
+
assertThat(StatsUtil.statsFieldIdForField(fieldId)).isEqualTo(statsFieldId);
+
assertThat(StatsUtil.fieldIdForStatsField(statsFieldId)).isEqualTo(fieldId);
+
+ fieldId = 2_147_483_646;
+ statsFieldId = 2_147_039_800;
+
assertThat(StatsUtil.statsFieldIdForField(fieldId)).isEqualTo(statsFieldId);
+
assertThat(StatsUtil.fieldIdForStatsField(statsFieldId)).isEqualTo(fieldId);
+ }
+
+ @Test
+ public void contentStatsForSimpleSchema() {
+ Schema schema =
+ new Schema(
+ required(0, "i", Types.IntegerType.get()),
+ required(2, "f", Types.FloatType.get()),
+ required(4, "s", Types.StringType.get()),
+ required(6, "b", Types.BooleanType.get()),
+ required(1_000_000, "u", Types.UUIDType.get()));
+ Schema expectedStatsSchema =
+ new Schema(
+ optional(
+ 146,
+ "content_stats",
+ Types.StructType.of(
+ optional(
+ 10000, "0",
FieldStatistic.fieldStatsFor(Types.IntegerType.get(), 10001)),
+ optional(
+ 10400, "2",
FieldStatistic.fieldStatsFor(Types.FloatType.get(), 10401)),
+ optional(
+ 10800, "4",
FieldStatistic.fieldStatsFor(Types.StringType.get(), 10801)),
+ optional(
+ 11200, "6",
FieldStatistic.fieldStatsFor(Types.BooleanType.get(), 11201)),
+ optional(
+ 200010000,
+ "1000000",
+ FieldStatistic.fieldStatsFor(Types.UUIDType.get(),
200010001)))));
+ Schema statsSchema = new Schema(StatsUtil.contentStatsFor(schema));
+
assertThat(statsSchema.asStruct()).isEqualTo(expectedStatsSchema.asStruct());
+ }
+
+ @Test
+ public void contentStatsForComplexSchema() {
+ Schema schema =
+ new Schema(
+ required(0, "i", Types.IntegerType.get()),
+ required(2, "list", Types.ListType.ofOptional(3,
Types.IntegerType.get())),
+ required(
+ 6,
+ "simple_struct",
+ Types.StructType.of(
+ optional(7, "int", Types.IntegerType.get()),
+ optional(8, "string", Types.StringType.get()))),
+ required(
+ 20,
+ "b",
+ Types.MapType.ofOptional(22, 24, Types.IntegerType.get(),
Types.StringType.get())),
+ required(30, "variant", Types.VariantType.get()),
+ required(100_000, "u", Types.UUIDType.get()));
+ Schema expectedStatsSchema =
+ new Schema(
+ optional(
+ 146,
+ "content_stats",
+ Types.StructType.of(
+ optional(
+ 10000, "0",
FieldStatistic.fieldStatsFor(Types.IntegerType.get(), 10001)),
+ optional(
+ 10600, "3",
FieldStatistic.fieldStatsFor(Types.IntegerType.get(), 10601)),
+ optional(
+ 11400, "7",
FieldStatistic.fieldStatsFor(Types.IntegerType.get(), 11401)),
+ optional(
+ 11600, "8",
FieldStatistic.fieldStatsFor(Types.StringType.get(), 11601)),
+ optional(
+ 14400, "22",
FieldStatistic.fieldStatsFor(Types.IntegerType.get(), 14401)),
+ optional(
+ 14800, "24",
FieldStatistic.fieldStatsFor(Types.StringType.get(), 14801)),
+ optional(
+ 20010000,
+ "100000",
+ FieldStatistic.fieldStatsFor(Types.UUIDType.get(),
20010001)))));
+ Schema statsSchema = new Schema(StatsUtil.contentStatsFor(schema));
+
assertThat(statsSchema.asStruct()).isEqualTo(expectedStatsSchema.asStruct());
+ }
+}
diff --git a/core/src/main/java/org/apache/iceberg/stats/BaseContentStats.java
b/core/src/main/java/org/apache/iceberg/stats/BaseContentStats.java
new file mode 100644
index 0000000000..9ba885568f
--- /dev/null
+++ b/core/src/main/java/org/apache/iceberg/stats/BaseContentStats.java
@@ -0,0 +1,259 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg.stats;
+
+import java.io.Serializable;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+import java.util.Set;
+import java.util.stream.Collectors;
+import org.apache.iceberg.Schema;
+import org.apache.iceberg.data.GenericRecord;
+import org.apache.iceberg.relocated.com.google.common.base.MoreObjects;
+import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
+import org.apache.iceberg.relocated.com.google.common.collect.Lists;
+import org.apache.iceberg.relocated.com.google.common.collect.Maps;
+import org.apache.iceberg.types.Type;
+import org.apache.iceberg.types.Types;
+
+public class BaseContentStats implements ContentStats, Serializable {
+
+ private final List<FieldStats<?>> fieldStats;
+ private final Map<Integer, FieldStats<?>> fieldStatsById;
+ private final Types.StructType statsStruct;
+
+ /** Used by Avro reflection to instantiate this class when reading manifest
files. */
+ public BaseContentStats(Types.StructType projection) {
+ this.statsStruct = projection;
+ this.fieldStats =
Lists.newArrayListWithCapacity(projection.fields().size());
+ this.fieldStatsById =
Maps.newLinkedHashMapWithExpectedSize(projection.fields().size());
+ for (int i = 0; i < projection.fields().size(); i++) {
+ Types.NestedField field = projection.fields().get(i);
+ Preconditions.checkArgument(
+ field.type().isStructType(), "Field stats must be a struct type:
%s", field.type());
+ Types.StructType structType = field.type().asStructType();
+ Type type = null;
+ if (null != structType.field("lower_bound")) {
+ type = structType.field("lower_bound").type();
+ } else if (null != structType.field("upper_bound")) {
+ type = structType.field("upper_bound").type();
+ }
+
+ fieldStats.add(
+ BaseFieldStats.builder()
+ .fieldId(StatsUtil.fieldIdForStatsField(field.fieldId()))
+ .type(type)
+ .build());
+ }
+ }
+
+ private BaseContentStats(Types.StructType struct, List<FieldStats<?>>
fieldStats) {
+ this.statsStruct = struct;
+ this.fieldStats = Lists.newArrayList(fieldStats);
+ this.fieldStatsById =
Maps.newLinkedHashMapWithExpectedSize(fieldStats.size());
+ }
+
+ @Override
+ public List<FieldStats<?>> fieldStats() {
+ return fieldStats;
+ }
+
+ @Override
+ public Types.StructType statsStruct() {
+ return statsStruct;
+ }
+
+ @SuppressWarnings("unchecked")
+ @Override
+ public <T> FieldStats<T> statsFor(int fieldId) {
+ if (fieldStatsById.isEmpty() && !fieldStats.isEmpty()) {
+ fieldStats.stream()
+ .filter(Objects::nonNull)
+ .forEach(stat -> fieldStatsById.put(stat.fieldId(), stat));
+ }
+
+ return (FieldStats<T>) fieldStatsById.get(fieldId);
+ }
+
+ @Override
+ public int size() {
+ return fieldStats.size();
+ }
+
+ @Override
+ public <T> T get(int pos, Class<T> javaClass) {
+ if (pos > statsStruct.fields().size() - 1) {
+ // return null in case there are more stats schemas than actual stats
available as Avro calls
+ // get() for all available stats schemas of a given table
+ return null;
+ }
+
+ int statsFieldId = statsStruct.fields().get(pos).fieldId();
+ FieldStats<?> value =
statsFor(StatsUtil.fieldIdForStatsField(statsFieldId));
+ if (value == null || javaClass.isInstance(value)) {
+ return javaClass.cast(value);
+ }
+
+ throw new IllegalArgumentException(
+ String.format(
+ "Wrong class, expected %s but was %s for object: %s",
+ javaClass.getName(), value.getClass().getName(), value));
+ }
+
+ @SuppressWarnings({"unchecked", "rawtypes", "CyclomaticComplexity"})
+ @Override
+ public <T> void set(int pos, T value) {
+ if (value instanceof GenericRecord) {
+ GenericRecord record = (GenericRecord) value;
+ FieldStats<?> stat = fieldStats.get(pos);
+ BaseFieldStats.Builder builder = BaseFieldStats.buildFrom(stat);
+ Type type = stat.type();
+ if (null != record.getField(FieldStatistic.VALUE_COUNT.fieldName())) {
+ builder.valueCount((Long)
record.getField(FieldStatistic.VALUE_COUNT.fieldName()));
+ }
+
+ if (null != record.getField(FieldStatistic.NAN_VALUE_COUNT.fieldName()))
{
+ builder.nanValueCount((Long)
record.getField(FieldStatistic.NAN_VALUE_COUNT.fieldName()));
+ }
+
+ if (null !=
record.getField(FieldStatistic.NULL_VALUE_COUNT.fieldName())) {
+ builder.nullValueCount((Long)
record.getField(FieldStatistic.NULL_VALUE_COUNT.fieldName()));
+ }
+
+ if (null != record.getField(FieldStatistic.AVG_VALUE_SIZE.fieldName())) {
+ builder.avgValueSize((Integer)
record.getField(FieldStatistic.AVG_VALUE_SIZE.fieldName()));
+ }
+
+ if (null != record.getField(FieldStatistic.MAX_VALUE_SIZE.fieldName())) {
+ builder.maxValueSize((Integer)
record.getField(FieldStatistic.MAX_VALUE_SIZE.fieldName()));
+ }
+
+ Object lowerBound =
record.getField(FieldStatistic.LOWER_BOUND.fieldName());
+ if (null != type && null != lowerBound) {
+ Preconditions.checkArgument(
+ type.typeId().javaClass().isInstance(lowerBound),
+ "Invalid lower bound type, expected a subtype of %s: %s",
+ type.typeId().javaClass(),
+ lowerBound.getClass().getName());
+ builder.lowerBound(type.typeId().javaClass().cast(lowerBound));
+ }
+
+ Object upperBound =
record.getField(FieldStatistic.UPPER_BOUND.fieldName());
+ if (null != type && null != upperBound) {
+ Preconditions.checkArgument(
+ type.typeId().javaClass().isInstance(upperBound),
+ "Invalid upper bound type, expected a subtype of %s: %s",
+ type.typeId().javaClass(),
+ upperBound.getClass().getName());
+ builder.upperBound(type.typeId().javaClass().cast(upperBound));
+ }
+
+ if (null != record.getField(FieldStatistic.EXACT_BOUNDS.fieldName())) {
+ Boolean exactBounds = (Boolean)
record.getField(FieldStatistic.EXACT_BOUNDS.fieldName());
+ builder.hasExactBounds(null != exactBounds && exactBounds);
+ }
+
+ BaseFieldStats<?> newStat = builder.build();
+ fieldStats.set(pos, newStat);
+ }
+ }
+
+ @Override
+ public String toString() {
+ return MoreObjects.toStringHelper(this).add("fieldStats",
fieldStats).toString();
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (!(o instanceof BaseContentStats)) {
+ return false;
+ }
+
+ BaseContentStats that = (BaseContentStats) o;
+ return Objects.equals(fieldStats, that.fieldStats)
+ && Objects.equals(statsStruct, that.statsStruct);
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(fieldStats, statsStruct);
+ }
+
+ public static Builder builder() {
+ return new Builder();
+ }
+
+ public static Builder buildFrom(ContentStats stats) {
+ return
builder().withStatsStruct(stats.statsStruct()).withFieldStats(stats.fieldStats());
+ }
+
+ public static Builder buildFrom(ContentStats stats, Set<Integer>
requestedColumnIds) {
+ if (null == requestedColumnIds) {
+ return buildFrom(stats);
+ }
+
+ return builder()
+ .withStatsStruct(stats.statsStruct())
+ .withFieldStats(
+ stats.fieldStats().stream()
+ .filter(stat -> requestedColumnIds.contains(stat.fieldId()))
+ .collect(Collectors.toList()));
+ }
+
+ public static class Builder {
+ private final List<FieldStats<?>> stats = Lists.newArrayList();
+ private Types.StructType statsStruct;
+ private Schema schema;
+
+ private Builder() {}
+
+ public Builder withStatsStruct(Types.StructType struct) {
+ this.statsStruct = struct;
+ return this;
+ }
+
+ public Builder withTableSchema(Schema tableSchema) {
+ this.schema = tableSchema;
+ return this;
+ }
+
+ public Builder withFieldStats(FieldStats<?> fieldStats) {
+ stats.add(fieldStats);
+ return this;
+ }
+
+ public Builder withFieldStats(List<FieldStats<?>> fieldStats) {
+ stats.addAll(fieldStats);
+ return this;
+ }
+
+ public BaseContentStats build() {
+ Preconditions.checkArgument(
+ null != statsStruct || null != schema, "Either stats struct or table
schema must be set");
+ Preconditions.checkArgument(
+ null == statsStruct || null == schema, "Cannot set stats struct and
table schema");
+ if (null != schema) {
+ this.statsStruct =
StatsUtil.contentStatsFor(schema).type().asStructType();
+ }
+
+ return new BaseContentStats(statsStruct, stats);
+ }
+ }
+}
diff --git a/core/src/main/java/org/apache/iceberg/stats/BaseFieldStats.java
b/core/src/main/java/org/apache/iceberg/stats/BaseFieldStats.java
new file mode 100644
index 0000000000..338d1d04a5
--- /dev/null
+++ b/core/src/main/java/org/apache/iceberg/stats/BaseFieldStats.java
@@ -0,0 +1,318 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg.stats;
+
+import java.io.Serializable;
+import java.util.Objects;
+import org.apache.iceberg.relocated.com.google.common.base.MoreObjects;
+import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
+import org.apache.iceberg.types.Type;
+
+public class BaseFieldStats<T> implements FieldStats<T>, Serializable {
+ private final int fieldId;
+ private final Type type;
+ private final Long valueCount;
+ private final Long nullValueCount;
+ private final Long nanValueCount;
+ private final Integer avgValueSize;
+ private final Integer maxValueSize;
+ private final T lowerBound;
+ private final T upperBound;
+ private final boolean hasExactBounds;
+
+ private BaseFieldStats(
+ int fieldId,
+ Type type,
+ Long valueCount,
+ Long nullValueCount,
+ Long nanValueCount,
+ Integer avgValueSize,
+ Integer maxValueSize,
+ T lowerBound,
+ T upperBound,
+ boolean hasExactBounds) {
+ this.fieldId = fieldId;
+ this.type = type;
+ this.valueCount = valueCount;
+ this.nullValueCount = nullValueCount;
+ this.nanValueCount = nanValueCount;
+ this.avgValueSize = avgValueSize;
+ this.maxValueSize = maxValueSize;
+ this.lowerBound = lowerBound;
+ this.upperBound = upperBound;
+ this.hasExactBounds = hasExactBounds;
+ }
+
+ @Override
+ public int fieldId() {
+ return fieldId;
+ }
+
+ @Override
+ public Type type() {
+ return type;
+ }
+
+ @Override
+ public Long valueCount() {
+ return valueCount;
+ }
+
+ @Override
+ public Long nullValueCount() {
+ return nullValueCount;
+ }
+
+ @Override
+ public Long nanValueCount() {
+ return nanValueCount;
+ }
+
+ @Override
+ public Integer avgValueSize() {
+ return avgValueSize;
+ }
+
+ @Override
+ public Integer maxValueSize() {
+ return maxValueSize;
+ }
+
+ @Override
+ public T lowerBound() {
+ return lowerBound;
+ }
+
+ @Override
+ public T upperBound() {
+ return upperBound;
+ }
+
+ @Override
+ public boolean hasExactBounds() {
+ return hasExactBounds;
+ }
+
+ @Override
+ public int size() {
+ return 7;
+ }
+
+ @Override
+ public <X> X get(int pos, Class<X> javaClass) {
+ switch (FieldStatistic.fromOffset(pos)) {
+ case VALUE_COUNT:
+ return javaClass.cast(valueCount);
+ case NULL_VALUE_COUNT:
+ return javaClass.cast(nullValueCount);
+ case NAN_VALUE_COUNT:
+ return javaClass.cast(nanValueCount);
+ case AVG_VALUE_SIZE:
+ return javaClass.cast(avgValueSize);
+ case MAX_VALUE_SIZE:
+ return javaClass.cast(maxValueSize);
+ case LOWER_BOUND:
+ return javaClass.cast(lowerBound);
+ case UPPER_BOUND:
+ return javaClass.cast(upperBound);
+ case EXACT_BOUNDS:
+ return javaClass.cast(hasExactBounds);
+ default:
+ throw new UnsupportedOperationException("Unknown field ordinal: " +
pos);
+ }
+ }
+
+ @Override
+ public void set(int pos, Object value) {
+ throw new UnsupportedOperationException("set() not supported");
+ }
+
+ @Override
+ public String toString() {
+ return MoreObjects.toStringHelper(this)
+ .add("fieldId", fieldId)
+ .add("type", type)
+ .add("valueCount", valueCount)
+ .add("nullValueCount", nullValueCount)
+ .add("nanValueCount", nanValueCount)
+ .add("avgValueSize", avgValueSize)
+ .add("maxValueSize", maxValueSize)
+ .add("lowerBound", lowerBound)
+ .add("upperBound", upperBound)
+ .add("hasExactBounds", hasExactBounds)
+ .toString();
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (!(o instanceof BaseFieldStats)) {
+ return false;
+ }
+
+ BaseFieldStats<?> that = (BaseFieldStats<?>) o;
+ return fieldId == that.fieldId
+ && Objects.equals(type, that.type)
+ && Objects.equals(valueCount, that.valueCount)
+ && Objects.equals(nullValueCount, that.nullValueCount)
+ && Objects.equals(nanValueCount, that.nanValueCount)
+ && Objects.equals(avgValueSize, that.avgValueSize)
+ && Objects.equals(maxValueSize, that.maxValueSize)
+ && Objects.equals(lowerBound, that.lowerBound)
+ && Objects.equals(upperBound, that.upperBound)
+ && hasExactBounds == that.hasExactBounds;
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(
+ fieldId,
+ type,
+ valueCount,
+ nullValueCount,
+ nanValueCount,
+ avgValueSize,
+ maxValueSize,
+ lowerBound,
+ upperBound,
+ hasExactBounds);
+ }
+
+ public static <X> Builder<X> builder() {
+ return new Builder<>();
+ }
+
+ public static <X> Builder<X> buildFrom(FieldStats<X> value) {
+ Preconditions.checkArgument(null != value, "Invalid column stats: null");
+ return BaseFieldStats.<X>builder()
+ .type(value.type())
+ .fieldId(value.fieldId())
+ .valueCount(value.valueCount())
+ .nullValueCount(value.nullValueCount())
+ .nanValueCount(value.nanValueCount())
+ .avgValueSize(value.avgValueSize())
+ .maxValueSize(value.maxValueSize())
+ .lowerBound(value.lowerBound())
+ .upperBound(value.upperBound())
+ .hasExactBounds(value.hasExactBounds());
+ }
+
+ public static class Builder<T> {
+ private int fieldId;
+ private Type type;
+ private Long valueCount;
+ private Long nullValueCount;
+ private Long nanValueCount;
+ private Integer avgValueSize;
+ private Integer maxValueSize;
+ private T lowerBound;
+ private T upperBound;
+ private boolean hasExactBounds;
+
+ private Builder() {}
+
+ public Builder<T> type(Type newType) {
+ this.type = newType;
+ return this;
+ }
+
+ public Builder<T> valueCount(Long newValueCount) {
+ this.valueCount = newValueCount;
+ return this;
+ }
+
+ public Builder<T> nullValueCount(Long newNullValueCount) {
+ this.nullValueCount = newNullValueCount;
+ return this;
+ }
+
+ public Builder<T> nanValueCount(Long newNanValueCount) {
+ this.nanValueCount = newNanValueCount;
+ return this;
+ }
+
+ public Builder<T> avgValueSize(Integer newAvgValueSize) {
+ this.avgValueSize = newAvgValueSize;
+ return this;
+ }
+
+ public Builder<T> maxValueSize(Integer newMaxValueSize) {
+ this.maxValueSize = newMaxValueSize;
+ return this;
+ }
+
+ public Builder<T> lowerBound(T newLowerBound) {
+ this.lowerBound = newLowerBound;
+ return this;
+ }
+
+ public Builder<T> upperBound(T newUpperBound) {
+ this.upperBound = newUpperBound;
+ return this;
+ }
+
+ public Builder<T> fieldId(int newFieldId) {
+ this.fieldId = newFieldId;
+ return this;
+ }
+
+ public Builder<T> hasExactBounds(boolean newHasExactBounds) {
+ this.hasExactBounds = newHasExactBounds;
+ return this;
+ }
+
+ public Builder<T> hasExactBounds() {
+ this.hasExactBounds = true;
+ return this;
+ }
+
+ public BaseFieldStats<T> build() {
+ if (null != lowerBound) {
+ Preconditions.checkArgument(
+ null != type, "Invalid type (required when lower bound is set):
null");
+ Preconditions.checkArgument(
+ type.typeId().javaClass().isInstance(lowerBound),
+ "Invalid lower bound type, expected a subtype of %s: %s",
+ type.typeId().javaClass().getName(),
+ lowerBound.getClass().getName());
+ }
+
+ if (null != upperBound) {
+ Preconditions.checkArgument(
+ null != type, "Invalid type (required when lower bound is set):
null");
+ Preconditions.checkArgument(
+ type.typeId().javaClass().isInstance(upperBound),
+ "Invalid upper bound type, expected a subtype of %s: %s",
+ type.typeId().javaClass().getName(),
+ upperBound.getClass().getName());
+ }
+
+ return new BaseFieldStats<>(
+ fieldId,
+ type,
+ valueCount,
+ nullValueCount,
+ nanValueCount,
+ avgValueSize,
+ maxValueSize,
+ lowerBound,
+ upperBound,
+ hasExactBounds);
+ }
+ }
+}
diff --git a/core/src/test/java/org/apache/iceberg/stats/TestContentStats.java
b/core/src/test/java/org/apache/iceberg/stats/TestContentStats.java
new file mode 100644
index 0000000000..7e64b9f11e
--- /dev/null
+++ b/core/src/test/java/org/apache/iceberg/stats/TestContentStats.java
@@ -0,0 +1,306 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg.stats;
+
+import static org.apache.iceberg.stats.FieldStatistic.AVG_VALUE_SIZE;
+import static org.apache.iceberg.stats.FieldStatistic.EXACT_BOUNDS;
+import static org.apache.iceberg.stats.FieldStatistic.LOWER_BOUND;
+import static org.apache.iceberg.stats.FieldStatistic.MAX_VALUE_SIZE;
+import static org.apache.iceberg.stats.FieldStatistic.NAN_VALUE_COUNT;
+import static org.apache.iceberg.stats.FieldStatistic.NULL_VALUE_COUNT;
+import static org.apache.iceberg.stats.FieldStatistic.UPPER_BOUND;
+import static org.apache.iceberg.stats.FieldStatistic.VALUE_COUNT;
+import static org.apache.iceberg.types.Types.NestedField.optional;
+import static org.apache.iceberg.types.Types.NestedField.required;
+import static org.assertj.core.api.Assertions.assertThat;
+import static org.assertj.core.api.Assertions.assertThatThrownBy;
+
+import org.apache.iceberg.Schema;
+import org.apache.iceberg.data.GenericRecord;
+import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet;
+import org.apache.iceberg.types.Types;
+import org.junit.jupiter.api.Test;
+
+public class TestContentStats {
+
+ @Test
+ public void contentStatsWithoutStatsStruct() {
+ assertThatThrownBy(() -> BaseContentStats.builder().build())
+ .isInstanceOf(IllegalArgumentException.class)
+ .hasMessage("Either stats struct or table schema must be set");
+
+ assertThatThrownBy(
+ () ->
+ BaseContentStats.builder()
+ .withTableSchema(new Schema())
+ .withStatsStruct(new Schema().asStruct())
+ .build())
+ .isInstanceOf(IllegalArgumentException.class)
+ .hasMessage("Cannot set stats struct and table schema");
+ }
+
+ @Test
+ public void emptyContentStats() {
+ BaseContentStats stats = BaseContentStats.builder().withTableSchema(new
Schema()).build();
+ assertThat(stats).isNotNull();
+ assertThat(stats.fieldStats()).isEmpty();
+ }
+
+ @Test
+ public void validContentStats() {
+ BaseFieldStats<?> fieldStatsOne =
BaseFieldStats.builder().fieldId(1).build();
+ BaseFieldStats<?> fieldStatsTwo =
BaseFieldStats.builder().fieldId(2).build();
+ BaseContentStats stats =
+ BaseContentStats.builder()
+ .withTableSchema(
+ new Schema(
+ optional(1, "id", Types.IntegerType.get()),
+ optional(2, "id2", Types.IntegerType.get())))
+ .withFieldStats(fieldStatsOne)
+ .withFieldStats(fieldStatsTwo)
+ .build();
+
+ assertThat(stats.fieldStats()).containsExactly(fieldStatsOne,
fieldStatsTwo);
+ assertThat(stats.size()).isEqualTo(stats.fieldStats().size()).isEqualTo(2);
+ }
+
+ @Test
+ public void buildFromExistingStats() {
+ BaseFieldStats<?> fieldStatsOne =
BaseFieldStats.builder().fieldId(1).build();
+ BaseFieldStats<?> fieldStatsTwo =
BaseFieldStats.builder().fieldId(2).build();
+ BaseFieldStats<?> fieldStatsThree =
BaseFieldStats.builder().fieldId(3).build();
+
+ BaseContentStats stats =
+ BaseContentStats.buildFrom(
+ BaseContentStats.builder()
+ .withTableSchema(
+ new Schema(
+ optional(1, "id", Types.IntegerType.get()),
+ optional(2, "id2", Types.IntegerType.get()),
+ optional(3, "id3", Types.IntegerType.get())))
+ .withFieldStats(fieldStatsOne)
+ .withFieldStats(fieldStatsTwo)
+ .build())
+ .withFieldStats(fieldStatsThree)
+ .build();
+ assertThat(stats.fieldStats()).containsExactly(fieldStatsOne,
fieldStatsTwo, fieldStatsThree);
+ }
+
+ @Test
+ public void buildFromExistingStatsWithRequestedIds() {
+ BaseFieldStats<?> fieldStatsOne =
BaseFieldStats.builder().fieldId(1).build();
+ BaseFieldStats<?> fieldStatsTwo =
BaseFieldStats.builder().fieldId(2).build();
+ BaseFieldStats<?> fieldStatsThree =
BaseFieldStats.builder().fieldId(3).build();
+
+ BaseContentStats stats =
+ BaseContentStats.builder()
+ .withTableSchema(
+ new Schema(
+ optional(1, "id", Types.IntegerType.get()),
+ optional(2, "id2", Types.IntegerType.get()),
+ optional(3, "id3", Types.IntegerType.get())))
+ .withFieldStats(fieldStatsOne)
+ .withFieldStats(fieldStatsTwo)
+ .withFieldStats(fieldStatsThree)
+ .build();
+
+ assertThat(BaseContentStats.buildFrom(stats,
null).build()).isEqualTo(stats);
+ assertThat(BaseContentStats.buildFrom(stats, ImmutableSet.of(1,
3)).build().fieldStats())
+ .containsExactly(fieldStatsOne, fieldStatsThree);
+ assertThat(BaseContentStats.buildFrom(stats,
ImmutableSet.of(2)).build().fieldStats())
+ .containsExactly(fieldStatsTwo);
+ assertThat(
+ BaseContentStats.buildFrom(stats, ImmutableSet.of(2, 5, 10,
12)).build().fieldStats())
+ .containsExactly(fieldStatsTwo);
+ assertThat(BaseContentStats.buildFrom(stats, ImmutableSet.of(5, 10,
12)).build().fieldStats())
+ .isEmpty();
+ }
+
+ @Test
+ public void retrievalByPosition() {
+ BaseFieldStats<?> fieldStatsOne =
BaseFieldStats.builder().fieldId(1).build();
+ BaseFieldStats<?> fieldStatsTwo =
BaseFieldStats.builder().fieldId(2).build();
+ BaseContentStats stats =
+ BaseContentStats.builder()
+ .withTableSchema(
+ new Schema(
+ optional(1, "id", Types.IntegerType.get()),
+ optional(2, "id2", Types.IntegerType.get())))
+ .withFieldStats(fieldStatsOne)
+ .withFieldStats(fieldStatsTwo)
+ .build();
+
+ assertThat(stats.get(0, FieldStats.class)).isEqualTo(fieldStatsOne);
+ assertThat(stats.get(1, FieldStats.class)).isEqualTo(fieldStatsTwo);
+ assertThat(stats.get(2, FieldStats.class)).isNull();
+ assertThat(stats.get(10, FieldStats.class)).isNull();
+
+ assertThatThrownBy(() -> stats.get(0, Long.class))
+ .isInstanceOf(IllegalArgumentException.class)
+ .hasMessageContaining(
+ "Wrong class, expected java.lang.Long but was
org.apache.iceberg.stats.BaseFieldStats for object:");
+ }
+
+ @Test
+ public void retrievalByFieldId() {
+ Schema schema =
+ new Schema(
+ required(1, "id", Types.IntegerType.get()),
+ required(2, "id2", Types.StringType.get()),
+ required(3, "id3", Types.DoubleType.get()),
+ required(4, "id4", Types.LongType.get()),
+ required(5, "id5", Types.FloatType.get()));
+
+ BaseFieldStats<Object> fieldStatsTwo =
+ BaseFieldStats.builder()
+ .fieldId(2)
+ .type(Types.StringType.get())
+ .lowerBound("aaa")
+ .upperBound("zzz")
+ .build();
+ BaseFieldStats<Object> fieldStatsFive =
+ BaseFieldStats.builder()
+ .fieldId(5)
+ .type(Types.FloatType.get())
+ .lowerBound(1.0f)
+ .upperBound(5.0f)
+ .build();
+
+ // table schema has 5 columns, but we only have stats for field IDs 2 and
5 and hold the stats
+ // in an inverse order
+ BaseContentStats stats =
+ BaseContentStats.builder()
+ .withTableSchema(schema)
+ .withFieldStats(fieldStatsFive)
+ .withFieldStats(fieldStatsTwo)
+ .build();
+
+ assertThat(stats.statsFor(1)).isNull();
+ assertThat(stats.statsFor(2)).isEqualTo(fieldStatsTwo);
+ assertThat(stats.statsFor(3)).isNull();
+ assertThat(stats.statsFor(4)).isNull();
+ assertThat(stats.statsFor(5)).isEqualTo(fieldStatsFive);
+ assertThat(stats.statsFor(100)).isNull();
+ }
+
+ @Test
+ public void retrievalByPositionWithPartialStats() {
+ Schema schema =
+ new Schema(
+ required(1, "id", Types.IntegerType.get()),
+ required(2, "id2", Types.StringType.get()),
+ required(3, "id3", Types.DoubleType.get()),
+ required(4, "id4", Types.LongType.get()),
+ required(5, "id5", Types.FloatType.get()));
+
+ BaseFieldStats<Object> fieldStatsTwo =
+ BaseFieldStats.builder()
+ .fieldId(2)
+ .type(Types.StringType.get())
+ .lowerBound("aaa")
+ .upperBound("zzz")
+ .build();
+ BaseFieldStats<Object> fieldStatsFive =
+ BaseFieldStats.builder()
+ .fieldId(5)
+ .type(Types.FloatType.get())
+ .lowerBound(1.0f)
+ .upperBound(5.0f)
+ .build();
+
+ // table schema has 5 columns, but we only have stats for field IDs 2 and
5 and hold the stats
+ // in an inverse order
+ BaseContentStats stats =
+ BaseContentStats.builder()
+ .withTableSchema(schema)
+ .withFieldStats(fieldStatsFive)
+ .withFieldStats(fieldStatsTwo)
+ .build();
+
+ assertThat(stats.get(0, FieldStats.class)).isNull();
+ assertThat(stats.get(1, FieldStats.class)).isEqualTo(fieldStatsTwo);
+ assertThat(stats.get(2, FieldStats.class)).isNull();
+ assertThat(stats.get(3, FieldStats.class)).isNull();
+ assertThat(stats.get(4, FieldStats.class)).isEqualTo(fieldStatsFive);
+ }
+
+ @Test
+ public void setByPosition() {
+ Schema tableSchema = new Schema(required(1, "id",
Types.IntegerType.get()));
+ Types.StructType rootStatsStruct =
StatsUtil.contentStatsFor(tableSchema).type().asStructType();
+ Types.StructType statsStructForIdField =
rootStatsStruct.fields().get(0).type().asStructType();
+
+ GenericRecord record = GenericRecord.create(statsStructForIdField);
+ BaseFieldStats<Integer> fieldStats =
+ BaseFieldStats.<Integer>builder()
+ .type(Types.IntegerType.get())
+ .fieldId(1)
+ .valueCount(10L)
+ .nullValueCount(2L)
+ .nanValueCount(3L)
+ .avgValueSize(30)
+ .maxValueSize(70)
+ .lowerBound(5)
+ .upperBound(20)
+ .hasExactBounds()
+ .build();
+
+ record.set(VALUE_COUNT.offset(), fieldStats.valueCount());
+ record.set(NULL_VALUE_COUNT.offset(), fieldStats.nullValueCount());
+ record.set(NAN_VALUE_COUNT.offset(), fieldStats.nanValueCount());
+ record.set(AVG_VALUE_SIZE.offset(), fieldStats.avgValueSize());
+ record.set(MAX_VALUE_SIZE.offset(), fieldStats.maxValueSize());
+ record.set(LOWER_BOUND.offset(), fieldStats.lowerBound());
+ record.set(UPPER_BOUND.offset(), fieldStats.upperBound());
+ record.set(EXACT_BOUNDS.offset(), fieldStats.hasExactBounds());
+
+ // this is typically called by Avro reflection code
+ BaseContentStats stats = new BaseContentStats(rootStatsStruct);
+ stats.set(0, record);
+ assertThat(stats.fieldStats()).containsExactly(fieldStats);
+ }
+
+ @Test
+ public void setByPositionWithInvalidLowerAndUpperBound() {
+ Schema tableSchema = new Schema(required(1, "id",
Types.IntegerType.get()));
+ Types.StructType rootStatsStruct =
StatsUtil.contentStatsFor(tableSchema).type().asStructType();
+ Types.StructType statsStructForIdField =
rootStatsStruct.fields().get(0).type().asStructType();
+
+ GenericRecord record = GenericRecord.create(statsStructForIdField);
+ // this is typically called by Avro reflection code
+ BaseContentStats stats = new BaseContentStats(rootStatsStruct);
+
+ // invalid lower bound
+ record.set(LOWER_BOUND.offset(), 5.0);
+ assertThatThrownBy(() -> stats.set(0, record))
+ .isInstanceOf(IllegalArgumentException.class)
+ .hasMessage(
+ "Invalid lower bound type, expected a subtype of class
java.lang.Integer: java.lang.Double");
+
+ // set valid lower bound so that upper bound is evaluated
+ record.set(LOWER_BOUND.offset(), 5);
+
+ // invalid upper bound
+ record.set(UPPER_BOUND.offset(), "20");
+ assertThatThrownBy(() -> stats.set(0, record))
+ .isInstanceOf(IllegalArgumentException.class)
+ .hasMessage(
+ "Invalid upper bound type, expected a subtype of class
java.lang.Integer: java.lang.String");
+ }
+}
diff --git a/core/src/test/java/org/apache/iceberg/stats/TestFieldStats.java
b/core/src/test/java/org/apache/iceberg/stats/TestFieldStats.java
new file mode 100644
index 0000000000..d5d0ae2309
--- /dev/null
+++ b/core/src/test/java/org/apache/iceberg/stats/TestFieldStats.java
@@ -0,0 +1,214 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg.stats;
+
+import static org.apache.iceberg.stats.FieldStatistic.AVG_VALUE_SIZE;
+import static org.apache.iceberg.stats.FieldStatistic.EXACT_BOUNDS;
+import static org.apache.iceberg.stats.FieldStatistic.LOWER_BOUND;
+import static org.apache.iceberg.stats.FieldStatistic.MAX_VALUE_SIZE;
+import static org.apache.iceberg.stats.FieldStatistic.NAN_VALUE_COUNT;
+import static org.apache.iceberg.stats.FieldStatistic.NULL_VALUE_COUNT;
+import static org.apache.iceberg.stats.FieldStatistic.UPPER_BOUND;
+import static org.apache.iceberg.stats.FieldStatistic.VALUE_COUNT;
+import static org.assertj.core.api.Assertions.assertThat;
+import static org.assertj.core.api.Assertions.assertThatThrownBy;
+
+import org.apache.iceberg.types.Types;
+import org.junit.jupiter.api.Test;
+
+public class TestFieldStats {
+
+ @Test
+ public void empty() {
+ BaseFieldStats<?> empty = BaseFieldStats.builder().build();
+ assertThat(empty.fieldId()).isEqualTo(0);
+ assertThat(empty.type()).isNull();
+ assertThat(empty.valueCount()).isNull();
+ assertThat(empty.nullValueCount()).isNull();
+ assertThat(empty.nanValueCount()).isNull();
+ assertThat(empty.avgValueSize()).isNull();
+ assertThat(empty.maxValueSize()).isNull();
+ assertThat(empty.lowerBound()).isNull();
+ assertThat(empty.upperBound()).isNull();
+ }
+
+ @Test
+ public void validIndividualValues() {
+ BaseFieldStats<Integer> fieldStats =
+ BaseFieldStats.<Integer>builder()
+ .type(Types.IntegerType.get())
+ .fieldId(23)
+ .valueCount(10L)
+ .nullValueCount(2L)
+ .nanValueCount(3L)
+ .avgValueSize(30)
+ .maxValueSize(70)
+ .lowerBound(5)
+ .upperBound(20)
+ .hasExactBounds()
+ .build();
+
+ assertThat(fieldStats.type()).isEqualTo(Types.IntegerType.get());
+ assertThat(fieldStats.fieldId()).isEqualTo(23);
+ assertThat(fieldStats.valueCount()).isEqualTo(10L);
+ assertThat(fieldStats.nullValueCount()).isEqualTo(2L);
+ assertThat(fieldStats.nanValueCount()).isEqualTo(3L);
+ assertThat(fieldStats.avgValueSize()).isEqualTo(30);
+ assertThat(fieldStats.maxValueSize()).isEqualTo(70);
+ assertThat(fieldStats.lowerBound()).isEqualTo(5);
+ assertThat(fieldStats.upperBound()).isEqualTo(20);
+ assertThat(fieldStats.hasExactBounds()).isTrue();
+ }
+
+ @Test
+ public void buildFromExistingStats() {
+ BaseFieldStats<Integer> fieldStats =
+ BaseFieldStats.buildFrom(
+ BaseFieldStats.<Integer>builder()
+ .type(Types.IntegerType.get())
+ .fieldId(23)
+ .valueCount(10L)
+ .nullValueCount(2L)
+ .nanValueCount(3L)
+ .avgValueSize(30)
+ .maxValueSize(70)
+ .lowerBound(5)
+ .upperBound(20)
+ .build())
+ .lowerBound(2)
+ .upperBound(50)
+ .maxValueSize(90)
+ .hasExactBounds()
+ .build();
+ assertThat(fieldStats.type()).isEqualTo(Types.IntegerType.get());
+ assertThat(fieldStats.fieldId()).isEqualTo(23);
+ assertThat(fieldStats.valueCount()).isEqualTo(10L);
+ assertThat(fieldStats.nullValueCount()).isEqualTo(2L);
+ assertThat(fieldStats.nanValueCount()).isEqualTo(3L);
+ assertThat(fieldStats.avgValueSize()).isEqualTo(30);
+ assertThat(fieldStats.maxValueSize()).isEqualTo(90);
+ assertThat(fieldStats.lowerBound()).isEqualTo(2);
+ assertThat(fieldStats.upperBound()).isEqualTo(50);
+ assertThat(fieldStats.hasExactBounds()).isTrue();
+ }
+
+ @Test
+ public void validFieldStats() {
+ assertThat(BaseFieldStats.builder().build()).isNotNull();
+ assertThat(BaseFieldStats.builder().fieldId(1).build()).isNotNull();
+ assertThat(BaseFieldStats.builder().valueCount(3L).build()).isNotNull();
+
assertThat(BaseFieldStats.builder().nullValueCount(3L).build()).isNotNull();
+ assertThat(BaseFieldStats.builder().nanValueCount(3L).build()).isNotNull();
+
assertThat(BaseFieldStats.builder().type(Types.IntegerType.get()).build()).isNotNull();
+ assertThat(BaseFieldStats.builder().avgValueSize(3).build()).isNotNull();
+ assertThat(BaseFieldStats.builder().maxValueSize(3).build()).isNotNull();
+
+
assertThat(BaseFieldStats.builder().type(Types.LongType.get()).lowerBound(3L).build())
+ .isNotNull();
+
assertThat(BaseFieldStats.builder().type(Types.LongType.get()).upperBound(10L).build())
+ .isNotNull();
+ assertThat(
+ BaseFieldStats.builder()
+ .type(Types.LongType.get())
+ .lowerBound(3L)
+ .upperBound(10L)
+ .build())
+ .isNotNull();
+ assertThat(
+ BaseFieldStats.<Long>builder()
+ .type(Types.LongType.get())
+ .lowerBound(3L)
+ .upperBound(10L)
+ .build())
+ .isNotNull();
+ }
+
+ @Test
+ public void missingTypeWithUpperOrLowerBound() {
+ assertThatThrownBy(() -> BaseFieldStats.builder().lowerBound(3).build())
+ .isInstanceOf(IllegalArgumentException.class)
+ .hasMessage("Invalid type (required when lower bound is set): null");
+ assertThatThrownBy(() -> BaseFieldStats.builder().upperBound(3).build())
+ .isInstanceOf(IllegalArgumentException.class)
+ .hasMessage("Invalid type (required when lower bound is set): null");
+ }
+
+ @Test
+ public void invalidType() {
+ assertThatThrownBy(
+ () ->
BaseFieldStats.builder().type(Types.LongType.get()).lowerBound(3).build())
+ .isInstanceOf(IllegalArgumentException.class)
+ .hasMessage(
+ "Invalid lower bound type, expected a subtype of java.lang.Long:
java.lang.Integer");
+ assertThatThrownBy(
+ () ->
+
BaseFieldStats.<Integer>builder().type(Types.LongType.get()).lowerBound(3).build())
+ .isInstanceOf(IllegalArgumentException.class)
+ .hasMessage(
+ "Invalid lower bound type, expected a subtype of java.lang.Long:
java.lang.Integer");
+
+ assertThatThrownBy(
+ () ->
BaseFieldStats.builder().type(Types.LongType.get()).upperBound(3).build())
+ .isInstanceOf(IllegalArgumentException.class)
+ .hasMessage(
+ "Invalid upper bound type, expected a subtype of java.lang.Long:
java.lang.Integer");
+ assertThatThrownBy(
+ () ->
+
BaseFieldStats.<Integer>builder().type(Types.LongType.get()).upperBound(3).build())
+ .isInstanceOf(IllegalArgumentException.class)
+ .hasMessage(
+ "Invalid upper bound type, expected a subtype of java.lang.Long:
java.lang.Integer");
+ }
+
+ @Test
+ public void retrievalByPosition() {
+ BaseFieldStats<Integer> fieldStats =
+ BaseFieldStats.<Integer>builder()
+ .type(Types.IntegerType.get())
+ .fieldId(23)
+ .valueCount(10L)
+ .nullValueCount(2L)
+ .nanValueCount(3L)
+ .avgValueSize(30)
+ .maxValueSize(70)
+ .lowerBound(5)
+ .upperBound(20)
+ .hasExactBounds()
+ .build();
+
+ assertThat(fieldStats.get(VALUE_COUNT.offset(),
Long.class)).isEqualTo(10L);
+ assertThat(fieldStats.get(NULL_VALUE_COUNT.offset(),
Long.class)).isEqualTo(2L);
+ assertThat(fieldStats.get(NAN_VALUE_COUNT.offset(),
Long.class)).isEqualTo(3L);
+ assertThat(fieldStats.get(AVG_VALUE_SIZE.offset(),
Integer.class)).isEqualTo(30);
+ assertThat(fieldStats.get(MAX_VALUE_SIZE.offset(),
Integer.class)).isEqualTo(70);
+ assertThat(fieldStats.get(LOWER_BOUND.offset(),
Integer.class)).isEqualTo(5);
+ assertThat(fieldStats.get(UPPER_BOUND.offset(),
Integer.class)).isEqualTo(20);
+ assertThat(fieldStats.get(EXACT_BOUNDS.offset(),
Boolean.class)).isEqualTo(true);
+
+ assertThatThrownBy(() -> assertThat(fieldStats.get(10, Long.class)))
+ .isInstanceOf(IllegalArgumentException.class)
+ .hasMessage("Invalid statistic offset: 10");
+ assertThatThrownBy(() -> assertThat(fieldStats.get(VALUE_COUNT.offset(),
Double.class)))
+ .isInstanceOf(ClassCastException.class)
+ .hasMessage("Cannot cast java.lang.Long to java.lang.Double");
+ assertThatThrownBy(() ->
assertThat(fieldStats.get(AVG_VALUE_SIZE.offset(), Long.class)))
+ .isInstanceOf(ClassCastException.class)
+ .hasMessage("Cannot cast java.lang.Integer to java.lang.Long");
+ }
+}