(iceberg) branch 1.10.x updated: Spark: Fix Z-order UDF to correctly handle DateType (#14108) (#14556)

huaxingao Mon, 10 Nov 2025 15:45:30 -0800

This is an automated email from the ASF dual-hosted git repository.

huaxingao pushed a commit to branch 1.10.x
in repository https://gitbox.apache.org/repos/asf/iceberg.git



The following commit(s) were added to refs/heads/1.10.x by this push:
     new 25ff57a22e Spark: Fix Z-order UDF to correctly handle DateType  
(#14108) (#14556)
25ff57a22e is described below

commit 25ff57a22e066977d6b869b2ebd5ae98d64fca64
Author: Huaxin Gao <[email protected]>
AuthorDate: Mon Nov 10 15:45:16 2025 -0800

    Spark: Fix Z-order UDF to correctly handle DateType  (#14108) (#14556)
    
    * Fix Z-order UDF to correctly handle DateType using unix_date
    
    * Update 
spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/actions/TestRewriteDataFilesAction.java
    
    ---------
    
    
    (cherry picked from commit bd1b8900ad17a0625349aec3f73c87e79ad2af66)
    
    Co-authored-by: Ron Kapoor <[email protected]>
    Co-authored-by: Eduard Tudenhoefner <[email protected]>
---
 .../apache/iceberg/spark/actions/SparkZOrderUDF.java  |  2 +-
 .../spark/actions/TestRewriteDataFilesAction.java     | 19 +++++++++++++++++++
 2 files changed, 20 insertions(+), 1 deletion(-)

diff --git 
a/spark/v4.0/spark/src/main/java/org/apache/iceberg/spark/actions/SparkZOrderUDF.java
 
b/spark/v4.0/spark/src/main/java/org/apache/iceberg/spark/actions/SparkZOrderUDF.java
index db359fdd62..d142e3fd1a 100644
--- 
a/spark/v4.0/spark/src/main/java/org/apache/iceberg/spark/actions/SparkZOrderUDF.java
+++ 
b/spark/v4.0/spark/src/main/java/org/apache/iceberg/spark/actions/SparkZOrderUDF.java
@@ -310,7 +310,7 @@ class SparkZOrderUDF implements Serializable {
     } else if (type instanceof TimestampType) {
       return longToOrderedBytesUDF().apply(column.cast(DataTypes.LongType));
     } else if (type instanceof DateType) {
-      return longToOrderedBytesUDF().apply(column.cast(DataTypes.LongType));
+      return 
longToOrderedBytesUDF().apply(functions.unix_date(column).cast(DataTypes.LongType));
     } else {
       throw new IllegalArgumentException(
           String.format(
diff --git 
a/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/actions/TestRewriteDataFilesAction.java
 
b/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/actions/TestRewriteDataFilesAction.java
index 7e9462424d..255938eb3f 100644
--- 
a/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/actions/TestRewriteDataFilesAction.java
+++ 
b/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/actions/TestRewriteDataFilesAction.java
@@ -21,6 +21,7 @@ package org.apache.iceberg.spark.actions;
 import static org.apache.iceberg.TableProperties.COMMIT_NUM_RETRIES;
 import static org.apache.iceberg.types.Types.NestedField.optional;
 import static org.apache.iceberg.types.Types.NestedField.required;
+import static org.apache.spark.sql.functions.col;
 import static org.apache.spark.sql.functions.current_date;
 import static org.apache.spark.sql.functions.date_add;
 import static org.apache.spark.sql.functions.expr;
@@ -128,6 +129,7 @@ import org.apache.spark.sql.Encoders;
 import org.apache.spark.sql.Row;
 import org.apache.spark.sql.catalyst.analysis.NoSuchTableException;
 import org.apache.spark.sql.internal.SQLConf;
+import org.apache.spark.sql.types.DataTypes;
 import org.junit.jupiter.api.BeforeAll;
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.TestTemplate;
@@ -2110,6 +2112,23 @@ public class TestRewriteDataFilesAction extends TestBase 
{
         .isFalse();
   }
 
+  @TestTemplate
+  public void testZOrderUDFWithDateType() {
+    SparkZOrderUDF zorderUDF = new SparkZOrderUDF(1, 16, 1024);
+    Dataset<Row> result =
+        spark
+            .sql("SELECT DATE '2025-01-01' as test_col")
+            .withColumn(
+                "zorder_result",
+                zorderUDF.sortedLexicographically(col("test_col"), 
DataTypes.DateType));
+
+    
assertThat(result.schema().apply("zorder_result").dataType()).isEqualTo(DataTypes.BinaryType);
+    List<Row> rows = result.collectAsList();
+    Row row = rows.get(0);
+    byte[] zorderBytes = row.getAs("zorder_result");
+    assertThat(zorderBytes).isNotNull().isNotEmpty();
+  }
+
   protected void shouldRewriteDataFilesWithPartitionSpec(Table table, int 
outputSpecId) {
     List<DataFile> rewrittenFiles = currentDataFiles(table);
     assertThat(rewrittenFiles).allMatch(file -> file.specId() == outputSpecId);

(iceberg) branch 1.10.x updated: Spark: Fix Z-order UDF to correctly handle DateType (#14108) (#14556)

Reply via email to