This is an automated email from the ASF dual-hosted git repository. alamb pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push: new 94ed08a364 fix: dictionary encoded column to partition column casting bug (#15652) 94ed08a364 is described below commit 94ed08a3645758a77b9d62a99f801a103532311e Author: haruband <harub...@gmail.com> AuthorDate: Thu Apr 10 00:37:23 2025 +0900 fix: dictionary encoded column to partition column casting bug (#15652) * Fix partition values bugs from dictionary encoded column * Add some sqllogictests --- datafusion/datasource/src/write/demux.rs | 10 ++++------ datafusion/sqllogictest/test_files/dictionary.slt | 7 +++++++ 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/datafusion/datasource/src/write/demux.rs b/datafusion/datasource/src/write/demux.rs index fc2e5daf92..49c3a64d24 100644 --- a/datafusion/datasource/src/write/demux.rs +++ b/datafusion/datasource/src/write/demux.rs @@ -28,8 +28,8 @@ use datafusion_common::error::Result; use datafusion_physical_plan::SendableRecordBatchStream; use arrow::array::{ - builder::UInt64Builder, cast::AsArray, downcast_dictionary_array, RecordBatch, - StringArray, StructArray, + builder::UInt64Builder, cast::AsArray, downcast_dictionary_array, ArrayAccessor, + RecordBatch, StringArray, StructArray, }; use arrow::datatypes::{DataType, Schema}; use datafusion_common::cast::{ @@ -482,10 +482,8 @@ fn compute_partition_keys_by_row<'a>( .ok_or(exec_datafusion_err!("it is not yet supported to write to hive partitions with datatype {}", dtype))?; - for val in array.values() { - partition_values.push( - Cow::from(val.ok_or(exec_datafusion_err!("Cannot partition by null value for column {}", col))?), - ); + for i in 0..rb.num_rows() { + partition_values.push(Cow::from(array.value(i))); } }, _ => unreachable!(), diff --git a/datafusion/sqllogictest/test_files/dictionary.slt b/datafusion/sqllogictest/test_files/dictionary.slt index 778b3537d1..1769f42c2d 100644 --- a/datafusion/sqllogictest/test_files/dictionary.slt +++ b/datafusion/sqllogictest/test_files/dictionary.slt @@ -450,3 +450,10 @@ query I select dense_rank() over (order by arrow_cast('abc', 'Dictionary(UInt16, Utf8)')); ---- 1 + +# Test dictionary encoded column to partition column casting +statement ok +CREATE TABLE test0 AS VALUES ('foo',1), ('bar',2), ('foo',3); + +statement ok +COPY (SELECT arrow_cast(column1, 'Dictionary(Int32, Utf8)') AS column1, column2 FROM test0) TO 'test_files/scratch/copy/part_dict_test' STORED AS PARQUET PARTITIONED BY (column1); \ No newline at end of file --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@datafusion.apache.org For additional commands, e-mail: commits-h...@datafusion.apache.org