This is an automated email from the ASF dual-hosted git repository.

timbrown pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-xtable.git


The following commit(s) were added to refs/heads/main by this push:
     new 22f4026f Delta source: extract nested field's stats (#769)
22f4026f is described below

commit 22f4026f00b05069e952d7bfbefee7dda10d79c3
Author: Hanzhi Wang <[email protected]>
AuthorDate: Wed Dec 17 06:49:13 2025 -0800

    Delta source: extract nested field's stats (#769)
    
    Co-authored-by: Hanzhi Wang <[email protected]>
---
 .../apache/xtable/delta/DeltaConversionSource.java |  2 +-
 .../xtable/delta/DeltaDataFileExtractor.java       |  2 +-
 .../apache/xtable/testutil/ColumnStatMapUtil.java  | 35 ++++++++++++++++++++++
 3 files changed, 37 insertions(+), 2 deletions(-)

diff --git 
a/xtable-core/src/main/java/org/apache/xtable/delta/DeltaConversionSource.java 
b/xtable-core/src/main/java/org/apache/xtable/delta/DeltaConversionSource.java
index 97804d5f..bb40a315 100644
--- 
a/xtable-core/src/main/java/org/apache/xtable/delta/DeltaConversionSource.java
+++ 
b/xtable-core/src/main/java/org/apache/xtable/delta/DeltaConversionSource.java
@@ -124,7 +124,7 @@ public class DeltaConversionSource implements 
ConversionSource<Long> {
                 snapshotAtVersion,
                 fileFormat,
                 tableAtVersion.getPartitioningFields(),
-                tableAtVersion.getReadSchema().getFields(),
+                tableAtVersion.getReadSchema().getAllFields(),
                 true,
                 DeltaPartitionExtractor.getInstance(),
                 DeltaStatsExtractor.getInstance());
diff --git 
a/xtable-core/src/main/java/org/apache/xtable/delta/DeltaDataFileExtractor.java 
b/xtable-core/src/main/java/org/apache/xtable/delta/DeltaDataFileExtractor.java
index cffbcbaa..c91bd6bc 100644
--- 
a/xtable-core/src/main/java/org/apache/xtable/delta/DeltaDataFileExtractor.java
+++ 
b/xtable-core/src/main/java/org/apache/xtable/delta/DeltaDataFileExtractor.java
@@ -64,7 +64,7 @@ public class DeltaDataFileExtractor {
         Snapshot snapshot, InternalSchema schema, boolean includeColumnStats) {
       this.fileFormat =
           
actionsConverter.convertToFileFormat(snapshot.metadata().format().provider());
-      this.fields = schema.getFields();
+      this.fields = schema.getAllFields();
       this.partitionFields =
           partitionExtractor.convertFromDeltaPartitionFormat(
               schema, snapshot.metadata().partitionSchema());
diff --git 
a/xtable-core/src/test/java/org/apache/xtable/testutil/ColumnStatMapUtil.java 
b/xtable-core/src/test/java/org/apache/xtable/testutil/ColumnStatMapUtil.java
index 1703b916..03016a0d 100644
--- 
a/xtable-core/src/test/java/org/apache/xtable/testutil/ColumnStatMapUtil.java
+++ 
b/xtable-core/src/test/java/org/apache/xtable/testutil/ColumnStatMapUtil.java
@@ -200,6 +200,24 @@ public class ColumnStatMapUtil {
           
.schema(InternalSchema.builder().name("double").dataType(InternalType.DOUBLE).build())
           .build();
 
+  private static final InternalField NESTED_STRING_FIELD =
+      InternalField.builder()
+          .name("nested_string_field")
+          .parentPath("nested_struct_field_primitive")
+          
.schema(InternalSchema.builder().name("a_string").dataType(InternalType.STRING).build())
+          .build();
+
+  private static final InternalField NESTED_STRUCT_FIELD_PRIMITIVE =
+      InternalField.builder()
+          .name("nested_struct_field_primitive")
+          .schema(
+              InternalSchema.builder()
+                  .name("nested_struct_field_primitive")
+                  .dataType(InternalType.RECORD)
+                  .fields(Arrays.asList(NESTED_STRING_FIELD))
+                  .build())
+          .build();
+
   public static InternalSchema getSchema() {
     return InternalSchema.builder()
         .name("record")
@@ -216,6 +234,7 @@ public class ColumnStatMapUtil {
                 ARRAY_LONG_FIELD,
                 MAP_STRING_LONG_FIELD,
                 NESTED_STRUCT_FIELD,
+                NESTED_STRUCT_FIELD_PRIMITIVE,
                 DECIMAL_FIELD,
                 FLOAT_FIELD,
                 DOUBLE_FIELD))
@@ -343,7 +362,21 @@ public class ColumnStatMapUtil {
             .numValues(50)
             .totalSize(123)
             .build();
+    ColumnStat nestedStringColumnStats =
+        ColumnStat.builder()
+            .field(NESTED_STRING_FIELD)
+            .numNulls(1)
+            .range(Range.vector("alice", "zion"))
+            .numValues(50)
+            .totalSize(500)
+            .build();
 
+    ColumnStat ignoredColumnStatsNestedStructFieldPrimitive =
+        ColumnStat.builder()
+            .field(NESTED_STRUCT_FIELD_PRIMITIVE)
+            .numNulls(0)
+            .range(Range.scalar("IGNORED"))
+            .build();
     ColumnStat ignoredColumnStatsArrayLongField =
         ColumnStat.builder()
             .field(ARRAY_LONG_FIELD)
@@ -385,6 +418,8 @@ public class ColumnStatMapUtil {
         decimalColumnStats,
         floatColumnStats,
         doubleColumnStats,
+        nestedStringColumnStats,
+        ignoredColumnStatsNestedStructFieldPrimitive,
         ignoredColumnStatsArrayLongField,
         ignoredColumnStatsMapStringField,
         ignoredColumnStatsNestedStructField,

Reply via email to