This is an automated email from the ASF dual-hosted git repository.
timbrown pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-xtable.git
The following commit(s) were added to refs/heads/main by this push:
new 22f4026f Delta source: extract nested field's stats (#769)
22f4026f is described below
commit 22f4026f00b05069e952d7bfbefee7dda10d79c3
Author: Hanzhi Wang <[email protected]>
AuthorDate: Wed Dec 17 06:49:13 2025 -0800
Delta source: extract nested field's stats (#769)
Co-authored-by: Hanzhi Wang <[email protected]>
---
.../apache/xtable/delta/DeltaConversionSource.java | 2 +-
.../xtable/delta/DeltaDataFileExtractor.java | 2 +-
.../apache/xtable/testutil/ColumnStatMapUtil.java | 35 ++++++++++++++++++++++
3 files changed, 37 insertions(+), 2 deletions(-)
diff --git
a/xtable-core/src/main/java/org/apache/xtable/delta/DeltaConversionSource.java
b/xtable-core/src/main/java/org/apache/xtable/delta/DeltaConversionSource.java
index 97804d5f..bb40a315 100644
---
a/xtable-core/src/main/java/org/apache/xtable/delta/DeltaConversionSource.java
+++
b/xtable-core/src/main/java/org/apache/xtable/delta/DeltaConversionSource.java
@@ -124,7 +124,7 @@ public class DeltaConversionSource implements
ConversionSource<Long> {
snapshotAtVersion,
fileFormat,
tableAtVersion.getPartitioningFields(),
- tableAtVersion.getReadSchema().getFields(),
+ tableAtVersion.getReadSchema().getAllFields(),
true,
DeltaPartitionExtractor.getInstance(),
DeltaStatsExtractor.getInstance());
diff --git
a/xtable-core/src/main/java/org/apache/xtable/delta/DeltaDataFileExtractor.java
b/xtable-core/src/main/java/org/apache/xtable/delta/DeltaDataFileExtractor.java
index cffbcbaa..c91bd6bc 100644
---
a/xtable-core/src/main/java/org/apache/xtable/delta/DeltaDataFileExtractor.java
+++
b/xtable-core/src/main/java/org/apache/xtable/delta/DeltaDataFileExtractor.java
@@ -64,7 +64,7 @@ public class DeltaDataFileExtractor {
Snapshot snapshot, InternalSchema schema, boolean includeColumnStats) {
this.fileFormat =
actionsConverter.convertToFileFormat(snapshot.metadata().format().provider());
- this.fields = schema.getFields();
+ this.fields = schema.getAllFields();
this.partitionFields =
partitionExtractor.convertFromDeltaPartitionFormat(
schema, snapshot.metadata().partitionSchema());
diff --git
a/xtable-core/src/test/java/org/apache/xtable/testutil/ColumnStatMapUtil.java
b/xtable-core/src/test/java/org/apache/xtable/testutil/ColumnStatMapUtil.java
index 1703b916..03016a0d 100644
---
a/xtable-core/src/test/java/org/apache/xtable/testutil/ColumnStatMapUtil.java
+++
b/xtable-core/src/test/java/org/apache/xtable/testutil/ColumnStatMapUtil.java
@@ -200,6 +200,24 @@ public class ColumnStatMapUtil {
.schema(InternalSchema.builder().name("double").dataType(InternalType.DOUBLE).build())
.build();
+ private static final InternalField NESTED_STRING_FIELD =
+ InternalField.builder()
+ .name("nested_string_field")
+ .parentPath("nested_struct_field_primitive")
+
.schema(InternalSchema.builder().name("a_string").dataType(InternalType.STRING).build())
+ .build();
+
+ private static final InternalField NESTED_STRUCT_FIELD_PRIMITIVE =
+ InternalField.builder()
+ .name("nested_struct_field_primitive")
+ .schema(
+ InternalSchema.builder()
+ .name("nested_struct_field_primitive")
+ .dataType(InternalType.RECORD)
+ .fields(Arrays.asList(NESTED_STRING_FIELD))
+ .build())
+ .build();
+
public static InternalSchema getSchema() {
return InternalSchema.builder()
.name("record")
@@ -216,6 +234,7 @@ public class ColumnStatMapUtil {
ARRAY_LONG_FIELD,
MAP_STRING_LONG_FIELD,
NESTED_STRUCT_FIELD,
+ NESTED_STRUCT_FIELD_PRIMITIVE,
DECIMAL_FIELD,
FLOAT_FIELD,
DOUBLE_FIELD))
@@ -343,7 +362,21 @@ public class ColumnStatMapUtil {
.numValues(50)
.totalSize(123)
.build();
+ ColumnStat nestedStringColumnStats =
+ ColumnStat.builder()
+ .field(NESTED_STRING_FIELD)
+ .numNulls(1)
+ .range(Range.vector("alice", "zion"))
+ .numValues(50)
+ .totalSize(500)
+ .build();
+ ColumnStat ignoredColumnStatsNestedStructFieldPrimitive =
+ ColumnStat.builder()
+ .field(NESTED_STRUCT_FIELD_PRIMITIVE)
+ .numNulls(0)
+ .range(Range.scalar("IGNORED"))
+ .build();
ColumnStat ignoredColumnStatsArrayLongField =
ColumnStat.builder()
.field(ARRAY_LONG_FIELD)
@@ -385,6 +418,8 @@ public class ColumnStatMapUtil {
decimalColumnStats,
floatColumnStats,
doubleColumnStats,
+ nestedStringColumnStats,
+ ignoredColumnStatsNestedStructFieldPrimitive,
ignoredColumnStatsArrayLongField,
ignoredColumnStatsMapStringField,
ignoredColumnStatsNestedStructField,