This is an automated email from the ASF dual-hosted git repository.

huaxingao pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg.git


The following commit(s) were added to refs/heads/main by this push:
     new 17891bc4e3 API, Core: Align offsets of field stats with Design doc / 
Spec (#15432)
17891bc4e3 is described below

commit 17891bc4e300c49b3678117be3189864aaf3866b
Author: Eduard Tudenhoefner <[email protected]>
AuthorDate: Wed Feb 25 20:09:20 2026 +0100

    API, Core: Align offsets of field stats with Design doc / Spec (#15432)
---
 .../org/apache/iceberg/stats/FieldStatistic.java   | 92 +++++++++++++---------
 .../java/org/apache/iceberg/stats/StatsUtil.java   |  2 +-
 .../org/apache/iceberg/stats/TestStatsUtil.java    | 24 +++---
 .../org/apache/iceberg/stats/BaseFieldStats.java   |  4 +-
 .../org/apache/iceberg/stats/TestContentStats.java | 22 +++---
 .../org/apache/iceberg/stats/TestFieldStats.java   | 22 +++---
 6 files changed, 91 insertions(+), 75 deletions(-)

diff --git a/api/src/main/java/org/apache/iceberg/stats/FieldStatistic.java 
b/api/src/main/java/org/apache/iceberg/stats/FieldStatistic.java
index 7715359ea2..8d13ba5567 100644
--- a/api/src/main/java/org/apache/iceberg/stats/FieldStatistic.java
+++ b/api/src/main/java/org/apache/iceberg/stats/FieldStatistic.java
@@ -24,14 +24,14 @@ import org.apache.iceberg.types.Type;
 import org.apache.iceberg.types.Types;
 
 public enum FieldStatistic {
-  VALUE_COUNT(0, "value_count"),
-  NULL_VALUE_COUNT(1, "null_value_count"),
-  NAN_VALUE_COUNT(2, "nan_value_count"),
-  AVG_VALUE_SIZE(3, "avg_value_size"),
-  MAX_VALUE_SIZE(4, "max_value_size"),
-  LOWER_BOUND(5, "lower_bound"),
-  UPPER_BOUND(6, "upper_bound"),
-  EXACT_BOUNDS(7, "exact_bounds");
+  VALUE_COUNT(1, "value_count"),
+  NULL_VALUE_COUNT(2, "null_value_count"),
+  NAN_VALUE_COUNT(3, "nan_value_count"),
+  AVG_VALUE_SIZE(4, "avg_value_size"),
+  MAX_VALUE_SIZE(5, "max_value_size"),
+  LOWER_BOUND(6, "lower_bound"),
+  UPPER_BOUND(7, "upper_bound"),
+  EXACT_BOUNDS(8, "exact_bounds");
 
   private final int offset;
   private final String fieldName;
@@ -41,68 +41,84 @@ public enum FieldStatistic {
     this.fieldName = fieldName;
   }
 
+  /**
+   * The offset from the field ID of the base stats structure
+   *
+   * @return The offset from the field ID of the base strats structure
+   */
   public int offset() {
     return offset;
   }
 
+  /**
+   * The ordinal position (0-based) within the stats structure
+   *
+   * @return The ordinal position (0-based) within the stats structure
+   */
+  public int position() {
+    return offset - 1;
+  }
+
+  /**
+   * The field name
+   *
+   * @return The field name
+   */
   public String fieldName() {
     return fieldName;
   }
 
-  public static FieldStatistic fromOffset(int offset) {
-    switch (offset) {
-      case 0:
-        return VALUE_COUNT;
-      case 1:
-        return NULL_VALUE_COUNT;
-      case 2:
-        return NAN_VALUE_COUNT;
-      case 3:
-        return AVG_VALUE_SIZE;
-      case 4:
-        return MAX_VALUE_SIZE;
-      case 5:
-        return LOWER_BOUND;
-      case 6:
-        return UPPER_BOUND;
-      case 7:
-        return EXACT_BOUNDS;
-      default:
-        throw new IllegalArgumentException("Invalid statistic offset: " + 
offset);
-    }
+  /**
+   * Returns the {@link FieldStatistic} from its ordinal position (0-based) in 
the stats structure
+   *
+   * @param position The ordinal position (0-based) in the stats structure
+   * @return The {@link FieldStatistic} from its ordinal position (0-based) in 
the stats structure
+   */
+  public static FieldStatistic fromPosition(int position) {
+    return switch (position) {
+      case 0 -> VALUE_COUNT;
+      case 1 -> NULL_VALUE_COUNT;
+      case 2 -> NAN_VALUE_COUNT;
+      case 3 -> AVG_VALUE_SIZE;
+      case 4 -> MAX_VALUE_SIZE;
+      case 5 -> LOWER_BOUND;
+      case 6 -> UPPER_BOUND;
+      case 7 -> EXACT_BOUNDS;
+      default -> throw new IllegalArgumentException("Invalid statistic 
position: " + position);
+    };
   }
 
-  public static Types.StructType fieldStatsFor(Type type, int fieldId) {
+  public static Types.StructType fieldStatsFor(Type type, int baseFieldId) {
     return Types.StructType.of(
         optional(
-            fieldId + VALUE_COUNT.offset(),
+            baseFieldId + VALUE_COUNT.offset(),
             VALUE_COUNT.fieldName(),
             Types.LongType.get(),
             "Total value count, including null and NaN"),
         optional(
-            fieldId + NULL_VALUE_COUNT.offset(),
+            baseFieldId + NULL_VALUE_COUNT.offset(),
             NULL_VALUE_COUNT.fieldName(),
             Types.LongType.get(),
             "Total null value count"),
         optional(
-            fieldId + NAN_VALUE_COUNT.offset(),
+            baseFieldId + NAN_VALUE_COUNT.offset(),
             NAN_VALUE_COUNT.fieldName(),
             Types.LongType.get(),
             "Total NaN value count"),
         optional(
-            fieldId + AVG_VALUE_SIZE.offset(),
+            baseFieldId + AVG_VALUE_SIZE.offset(),
             AVG_VALUE_SIZE.fieldName(),
             Types.IntegerType.get(),
             "Avg value size of variable-length types (String, Binary)"),
         optional(
-            fieldId + MAX_VALUE_SIZE.offset(),
+            baseFieldId + MAX_VALUE_SIZE.offset(),
             MAX_VALUE_SIZE.fieldName(),
             Types.IntegerType.get(),
             "Max value size of variable-length types (String, Binary)"),
-        optional(fieldId + LOWER_BOUND.offset(), LOWER_BOUND.fieldName(), 
type, "Lower bound"),
-        optional(fieldId + UPPER_BOUND.offset(), UPPER_BOUND.fieldName(), 
type, "Upper bound"),
+        optional(baseFieldId + LOWER_BOUND.offset(), LOWER_BOUND.fieldName(), 
type, "Lower bound"),
+        optional(baseFieldId + UPPER_BOUND.offset(), UPPER_BOUND.fieldName(), 
type, "Upper bound"),
         optional(
-            fieldId + EXACT_BOUNDS.offset(),
+            baseFieldId + EXACT_BOUNDS.offset(),
             EXACT_BOUNDS.fieldName(),
             Types.BooleanType.get(),
             "Whether the upper/lower bound is exact or not"));
diff --git a/api/src/main/java/org/apache/iceberg/stats/StatsUtil.java 
b/api/src/main/java/org/apache/iceberg/stats/StatsUtil.java
index 1e2bef98a7..349f9fe75b 100644
--- a/api/src/main/java/org/apache/iceberg/stats/StatsUtil.java
+++ b/api/src/main/java/org/apache/iceberg/stats/StatsUtil.java
@@ -178,7 +178,7 @@ public class StatsUtil {
 
       int fieldId = StatsUtil.statsFieldIdForField(field.fieldId());
       if (fieldId >= 0) {
-        Types.StructType structType = 
FieldStatistic.fieldStatsFor(field.type(), fieldId + 1);
+        Types.StructType structType = 
FieldStatistic.fieldStatsFor(field.type(), fieldId);
         return optional(fieldId, Integer.toString(field.fieldId()), 
structType);
       } else {
         skippedFieldIds.add(field.fieldId());
diff --git a/api/src/test/java/org/apache/iceberg/stats/TestStatsUtil.java 
b/api/src/test/java/org/apache/iceberg/stats/TestStatsUtil.java
index cf7f2fc3f9..4a17081ab7 100644
--- a/api/src/test/java/org/apache/iceberg/stats/TestStatsUtil.java
+++ b/api/src/test/java/org/apache/iceberg/stats/TestStatsUtil.java
@@ -153,17 +153,17 @@ public class TestStatsUtil {
                 "content_stats",
                 Types.StructType.of(
                     optional(
-                        10000, "0", 
FieldStatistic.fieldStatsFor(Types.IntegerType.get(), 10001)),
+                        10000, "0", 
FieldStatistic.fieldStatsFor(Types.IntegerType.get(), 10000)),
                     optional(
-                        10400, "2", 
FieldStatistic.fieldStatsFor(Types.FloatType.get(), 10401)),
+                        10400, "2", 
FieldStatistic.fieldStatsFor(Types.FloatType.get(), 10400)),
                     optional(
-                        10800, "4", 
FieldStatistic.fieldStatsFor(Types.StringType.get(), 10801)),
+                        10800, "4", 
FieldStatistic.fieldStatsFor(Types.StringType.get(), 10800)),
                     optional(
-                        11200, "6", 
FieldStatistic.fieldStatsFor(Types.BooleanType.get(), 11201)),
+                        11200, "6", 
FieldStatistic.fieldStatsFor(Types.BooleanType.get(), 11200)),
                     optional(
                         200010000,
                         "1000000",
-                        FieldStatistic.fieldStatsFor(Types.UUIDType.get(), 
200010001)))));
+                        FieldStatistic.fieldStatsFor(Types.UUIDType.get(), 
200010000)))));
     Schema statsSchema = new Schema(StatsUtil.contentStatsFor(schema));
     
assertThat(statsSchema.asStruct()).isEqualTo(expectedStatsSchema.asStruct());
   }
@@ -193,21 +193,21 @@ public class TestStatsUtil {
                 "content_stats",
                 Types.StructType.of(
                     optional(
-                        10000, "0", 
FieldStatistic.fieldStatsFor(Types.IntegerType.get(), 10001)),
+                        10000, "0", 
FieldStatistic.fieldStatsFor(Types.IntegerType.get(), 10000)),
                     optional(
-                        10600, "3", 
FieldStatistic.fieldStatsFor(Types.IntegerType.get(), 10601)),
+                        10600, "3", 
FieldStatistic.fieldStatsFor(Types.IntegerType.get(), 10600)),
                     optional(
-                        11400, "7", 
FieldStatistic.fieldStatsFor(Types.IntegerType.get(), 11401)),
+                        11400, "7", 
FieldStatistic.fieldStatsFor(Types.IntegerType.get(), 11400)),
                     optional(
-                        11600, "8", 
FieldStatistic.fieldStatsFor(Types.StringType.get(), 11601)),
+                        11600, "8", 
FieldStatistic.fieldStatsFor(Types.StringType.get(), 11600)),
                     optional(
-                        14400, "22", 
FieldStatistic.fieldStatsFor(Types.IntegerType.get(), 14401)),
+                        14400, "22", 
FieldStatistic.fieldStatsFor(Types.IntegerType.get(), 14400)),
                     optional(
-                        14800, "24", 
FieldStatistic.fieldStatsFor(Types.StringType.get(), 14801)),
+                        14800, "24", 
FieldStatistic.fieldStatsFor(Types.StringType.get(), 14800)),
                     optional(
                         20010000,
                         "100000",
-                        FieldStatistic.fieldStatsFor(Types.UUIDType.get(), 
20010001)))));
+                        FieldStatistic.fieldStatsFor(Types.UUIDType.get(), 
20010000)))));
     Schema statsSchema = new Schema(StatsUtil.contentStatsFor(schema));
     
assertThat(statsSchema.asStruct()).isEqualTo(expectedStatsSchema.asStruct());
   }
diff --git a/core/src/main/java/org/apache/iceberg/stats/BaseFieldStats.java 
b/core/src/main/java/org/apache/iceberg/stats/BaseFieldStats.java
index 26f26d2a5f..f26294213c 100644
--- a/core/src/main/java/org/apache/iceberg/stats/BaseFieldStats.java
+++ b/core/src/main/java/org/apache/iceberg/stats/BaseFieldStats.java
@@ -145,12 +145,12 @@ public class BaseFieldStats<T> implements FieldStats<T>, 
Serializable {
 
   @Override
   public int size() {
-    return 7;
+    return 8;
   }
 
   @Override
   public <X> X get(int pos, Class<X> javaClass) {
-    return switch (FieldStatistic.fromOffset(pos)) {
+    return switch (FieldStatistic.fromPosition(pos)) {
       case VALUE_COUNT -> javaClass.cast(valueCount);
       case NULL_VALUE_COUNT -> javaClass.cast(nullValueCount);
       case NAN_VALUE_COUNT -> javaClass.cast(nanValueCount);
diff --git a/core/src/test/java/org/apache/iceberg/stats/TestContentStats.java 
b/core/src/test/java/org/apache/iceberg/stats/TestContentStats.java
index 7e64b9f11e..d083e73065 100644
--- a/core/src/test/java/org/apache/iceberg/stats/TestContentStats.java
+++ b/core/src/test/java/org/apache/iceberg/stats/TestContentStats.java
@@ -261,14 +261,14 @@ public class TestContentStats {
             .hasExactBounds()
             .build();
 
-    record.set(VALUE_COUNT.offset(), fieldStats.valueCount());
-    record.set(NULL_VALUE_COUNT.offset(), fieldStats.nullValueCount());
-    record.set(NAN_VALUE_COUNT.offset(), fieldStats.nanValueCount());
-    record.set(AVG_VALUE_SIZE.offset(), fieldStats.avgValueSize());
-    record.set(MAX_VALUE_SIZE.offset(), fieldStats.maxValueSize());
-    record.set(LOWER_BOUND.offset(), fieldStats.lowerBound());
-    record.set(UPPER_BOUND.offset(), fieldStats.upperBound());
-    record.set(EXACT_BOUNDS.offset(), fieldStats.hasExactBounds());
+    record.set(VALUE_COUNT.position(), fieldStats.valueCount());
+    record.set(NULL_VALUE_COUNT.position(), fieldStats.nullValueCount());
+    record.set(NAN_VALUE_COUNT.position(), fieldStats.nanValueCount());
+    record.set(AVG_VALUE_SIZE.position(), fieldStats.avgValueSize());
+    record.set(MAX_VALUE_SIZE.position(), fieldStats.maxValueSize());
+    record.set(LOWER_BOUND.position(), fieldStats.lowerBound());
+    record.set(UPPER_BOUND.position(), fieldStats.upperBound());
+    record.set(EXACT_BOUNDS.position(), fieldStats.hasExactBounds());
 
     // this is typically called by Avro reflection code
     BaseContentStats stats = new BaseContentStats(rootStatsStruct);
@@ -287,17 +287,17 @@ public class TestContentStats {
     BaseContentStats stats = new BaseContentStats(rootStatsStruct);
 
     // invalid lower bound
-    record.set(LOWER_BOUND.offset(), 5.0);
+    record.set(LOWER_BOUND.position(), 5.0);
     assertThatThrownBy(() -> stats.set(0, record))
         .isInstanceOf(IllegalArgumentException.class)
         .hasMessage(
             "Invalid lower bound type, expected a subtype of class 
java.lang.Integer: java.lang.Double");
 
     // set valid lower bound so that upper bound is evaluated
-    record.set(LOWER_BOUND.offset(), 5);
+    record.set(LOWER_BOUND.position(), 5);
 
     // invalid upper bound
-    record.set(UPPER_BOUND.offset(), "20");
+    record.set(UPPER_BOUND.position(), "20");
     assertThatThrownBy(() -> stats.set(0, record))
         .isInstanceOf(IllegalArgumentException.class)
         .hasMessage(
diff --git a/core/src/test/java/org/apache/iceberg/stats/TestFieldStats.java 
b/core/src/test/java/org/apache/iceberg/stats/TestFieldStats.java
index eb56439f85..ffd91efd8a 100644
--- a/core/src/test/java/org/apache/iceberg/stats/TestFieldStats.java
+++ b/core/src/test/java/org/apache/iceberg/stats/TestFieldStats.java
@@ -201,22 +201,22 @@ public class TestFieldStats {
             .hasExactBounds()
             .build();
 
-    assertThat(fieldStats.get(VALUE_COUNT.offset(), 
Long.class)).isEqualTo(10L);
-    assertThat(fieldStats.get(NULL_VALUE_COUNT.offset(), 
Long.class)).isEqualTo(2L);
-    assertThat(fieldStats.get(NAN_VALUE_COUNT.offset(), 
Long.class)).isEqualTo(3L);
-    assertThat(fieldStats.get(AVG_VALUE_SIZE.offset(), 
Integer.class)).isEqualTo(30);
-    assertThat(fieldStats.get(MAX_VALUE_SIZE.offset(), 
Integer.class)).isEqualTo(70);
-    assertThat(fieldStats.get(LOWER_BOUND.offset(), 
Integer.class)).isEqualTo(5);
-    assertThat(fieldStats.get(UPPER_BOUND.offset(), 
Integer.class)).isEqualTo(20);
-    assertThat(fieldStats.get(EXACT_BOUNDS.offset(), 
Boolean.class)).isEqualTo(true);
+    assertThat(fieldStats.get(VALUE_COUNT.position(), 
Long.class)).isEqualTo(10L);
+    assertThat(fieldStats.get(NULL_VALUE_COUNT.position(), 
Long.class)).isEqualTo(2L);
+    assertThat(fieldStats.get(NAN_VALUE_COUNT.position(), 
Long.class)).isEqualTo(3L);
+    assertThat(fieldStats.get(AVG_VALUE_SIZE.position(), 
Integer.class)).isEqualTo(30);
+    assertThat(fieldStats.get(MAX_VALUE_SIZE.position(), 
Integer.class)).isEqualTo(70);
+    assertThat(fieldStats.get(LOWER_BOUND.position(), 
Integer.class)).isEqualTo(5);
+    assertThat(fieldStats.get(UPPER_BOUND.position(), 
Integer.class)).isEqualTo(20);
+    assertThat(fieldStats.get(EXACT_BOUNDS.position(), 
Boolean.class)).isEqualTo(true);
 
     assertThatThrownBy(() -> assertThat(fieldStats.get(10, Long.class)))
         .isInstanceOf(IllegalArgumentException.class)
-        .hasMessage("Invalid statistic offset: 10");
-    assertThatThrownBy(() -> assertThat(fieldStats.get(VALUE_COUNT.offset(), 
Double.class)))
+        .hasMessage("Invalid statistic position: 10");
+    assertThatThrownBy(() -> assertThat(fieldStats.get(VALUE_COUNT.position(), 
Double.class)))
         .isInstanceOf(ClassCastException.class)
         .hasMessage("Cannot cast java.lang.Long to java.lang.Double");
-    assertThatThrownBy(() -> 
assertThat(fieldStats.get(AVG_VALUE_SIZE.offset(), Long.class)))
+    assertThatThrownBy(() -> 
assertThat(fieldStats.get(AVG_VALUE_SIZE.position(), Long.class)))
         .isInstanceOf(ClassCastException.class)
         .hasMessage("Cannot cast java.lang.Integer to java.lang.Long");
   }

Reply via email to