This is an automated email from the ASF dual-hosted git repository.

etudenhoefner pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg.git


The following commit(s) were added to refs/heads/main by this push:
     new b35c7ec1b0 Core: Fix NAN_VALUE_COUNTS serialization for ContentFile 
(#14721)
b35c7ec1b0 is described below

commit b35c7ec1b03e3897da68960cd556d635b2f5ae54
Author: Huaxin Gao <[email protected]>
AuthorDate: Mon Dec 1 00:45:09 2025 -0800

    Core: Fix NAN_VALUE_COUNTS serialization for ContentFile (#14721)
---
 .../java/org/apache/iceberg/ContentFileParser.java |  2 +-
 .../org/apache/iceberg/TestContentFileParser.java  | 39 ++++++++++++++++++++++
 2 files changed, 40 insertions(+), 1 deletion(-)

diff --git a/core/src/main/java/org/apache/iceberg/ContentFileParser.java 
b/core/src/main/java/org/apache/iceberg/ContentFileParser.java
index b48334d822..3d0deba3df 100644
--- a/core/src/main/java/org/apache/iceberg/ContentFileParser.java
+++ b/core/src/main/java/org/apache/iceberg/ContentFileParser.java
@@ -228,7 +228,7 @@ public class ContentFileParser {
           DataFile.NULL_VALUE_COUNTS.type(), contentFile.nullValueCounts(), 
generator);
     }
 
-    if (contentFile.nullValueCounts() != null) {
+    if (contentFile.nanValueCounts() != null) {
       generator.writeFieldName(NAN_VALUE_COUNTS);
       SingleValueParser.toJson(
           DataFile.NAN_VALUE_COUNTS.type(), contentFile.nanValueCounts(), 
generator);
diff --git a/core/src/test/java/org/apache/iceberg/TestContentFileParser.java 
b/core/src/test/java/org/apache/iceberg/TestContentFileParser.java
index 3f463f722e..f8f0d77b0f 100644
--- a/core/src/test/java/org/apache/iceberg/TestContentFileParser.java
+++ b/core/src/test/java/org/apache/iceberg/TestContentFileParser.java
@@ -64,6 +64,21 @@ public class TestContentFileParser {
         .hasMessage("Invalid partition spec: null");
   }
 
+  @Test
+  public void testNanCountsOnlyWritesNanValueCounts() throws Exception {
+    PartitionSpec spec = PartitionSpec.unpartitioned();
+    DataFile dataFile = dataFileWithOnlyNanCounts(spec);
+    String jsonStr = ContentFileParser.toJson(dataFile, spec);
+    // ensure nan counts are present and null counts are not emitted
+    assertThat(jsonStr).contains("\"nan-value-counts\"");
+    assertThat(jsonStr).doesNotContain("\"null-value-counts\"");
+    JsonNode jsonNode = JsonUtil.mapper().readTree(jsonStr);
+    ContentFile<?> deserialized =
+        ContentFileParser.fromJson(jsonNode, Map.of(TestBase.SPEC.specId(), 
spec));
+    assertThat(deserialized).isInstanceOf(DataFile.class);
+    assertContentFileEquals(dataFile, deserialized, spec);
+  }
+
   @ParameterizedTest
   @MethodSource("provideSpecAndDataFile")
   public void testDataFile(PartitionSpec spec, DataFile dataFile, String 
expectedJson)
@@ -125,6 +140,30 @@ public class TestContentFileParser {
     return builder.build();
   }
 
+  private static DataFile dataFileWithOnlyNanCounts(PartitionSpec spec) {
+    DataFiles.Builder builder =
+        DataFiles.builder(spec)
+            .withPath("/path/to/data-nan-only.parquet")
+            .withMetrics(
+                new Metrics(
+                    1L, // record count
+                    null, // column sizes
+                    null, // value counts
+                    null, // null value counts (intentionally null)
+                    ImmutableMap.of(3, 0L), // nan value counts present
+                    null, // lower bounds
+                    null // upper bounds
+                    ))
+            .withFileSizeInBytes(10)
+            .withRecordCount(1);
+
+    if (spec.isPartitioned()) {
+      builder.withPartitionPath("data_bucket=1");
+    }
+
+    return builder.build();
+  }
+
   private static String dataFileJsonWithRequiredOnly(PartitionSpec spec) {
     if (spec.isUnpartitioned()) {
       return 
"{\"spec-id\":0,\"content\":\"DATA\",\"file-path\":\"/path/to/data-a.parquet\",\"file-format\":\"PARQUET\","

Reply via email to