This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 94ed08a364 fix: dictionary encoded column to partition column casting 
bug (#15652)
94ed08a364 is described below

commit 94ed08a3645758a77b9d62a99f801a103532311e
Author: haruband <harub...@gmail.com>
AuthorDate: Thu Apr 10 00:37:23 2025 +0900

    fix: dictionary encoded column to partition column casting bug (#15652)
    
    * Fix partition values bugs from dictionary encoded column
    
    * Add some sqllogictests
---
 datafusion/datasource/src/write/demux.rs          | 10 ++++------
 datafusion/sqllogictest/test_files/dictionary.slt |  7 +++++++
 2 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/datafusion/datasource/src/write/demux.rs 
b/datafusion/datasource/src/write/demux.rs
index fc2e5daf92..49c3a64d24 100644
--- a/datafusion/datasource/src/write/demux.rs
+++ b/datafusion/datasource/src/write/demux.rs
@@ -28,8 +28,8 @@ use datafusion_common::error::Result;
 use datafusion_physical_plan::SendableRecordBatchStream;
 
 use arrow::array::{
-    builder::UInt64Builder, cast::AsArray, downcast_dictionary_array, 
RecordBatch,
-    StringArray, StructArray,
+    builder::UInt64Builder, cast::AsArray, downcast_dictionary_array, 
ArrayAccessor,
+    RecordBatch, StringArray, StructArray,
 };
 use arrow::datatypes::{DataType, Schema};
 use datafusion_common::cast::{
@@ -482,10 +482,8 @@ fn compute_partition_keys_by_row<'a>(
                             .ok_or(exec_datafusion_err!("it is not yet 
supported to write to hive partitions with datatype {}",
                             dtype))?;
 
-                        for val in array.values() {
-                            partition_values.push(
-                                
Cow::from(val.ok_or(exec_datafusion_err!("Cannot partition by null value for 
column {}", col))?),
-                            );
+                        for i in 0..rb.num_rows() {
+                            partition_values.push(Cow::from(array.value(i)));
                         }
                     },
                     _ => unreachable!(),
diff --git a/datafusion/sqllogictest/test_files/dictionary.slt 
b/datafusion/sqllogictest/test_files/dictionary.slt
index 778b3537d1..1769f42c2d 100644
--- a/datafusion/sqllogictest/test_files/dictionary.slt
+++ b/datafusion/sqllogictest/test_files/dictionary.slt
@@ -450,3 +450,10 @@ query I
 select dense_rank() over (order by arrow_cast('abc', 'Dictionary(UInt16, 
Utf8)'));
 ----
 1
+
+# Test dictionary encoded column to partition column casting
+statement ok
+CREATE TABLE test0 AS VALUES ('foo',1), ('bar',2), ('foo',3);
+
+statement ok
+COPY (SELECT arrow_cast(column1, 'Dictionary(Int32, Utf8)') AS column1, 
column2 FROM test0) TO 'test_files/scratch/copy/part_dict_test' STORED AS 
PARQUET PARTITIONED BY (column1);
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@datafusion.apache.org
For additional commands, e-mail: commits-h...@datafusion.apache.org

Reply via email to