This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new b38c731ec7 fix: write hive partitions for any int/uint/float (#15337)
b38c731ec7 is described below

commit b38c731ec7bde6794322edf36d20855481192191
Author: christophermcdermott <[email protected]>
AuthorDate: Mon Mar 24 15:02:51 2025 -0400

    fix: write hive partitions for any int/uint/float (#15337)
    
    Co-authored-by: christophermcdermott <masked>
---
 datafusion/datasource/src/write/demux.rs    | 60 ++++++++++++++++++++++++++++-
 datafusion/sqllogictest/test_files/copy.slt | 34 ++++++++++------
 2 files changed, 81 insertions(+), 13 deletions(-)

diff --git a/datafusion/datasource/src/write/demux.rs 
b/datafusion/datasource/src/write/demux.rs
index 111d22060c..fc2e5daf92 100644
--- a/datafusion/datasource/src/write/demux.rs
+++ b/datafusion/datasource/src/write/demux.rs
@@ -33,8 +33,10 @@ use arrow::array::{
 };
 use arrow::datatypes::{DataType, Schema};
 use datafusion_common::cast::{
-    as_boolean_array, as_date32_array, as_date64_array, as_int32_array, 
as_int64_array,
-    as_string_array, as_string_view_array,
+    as_boolean_array, as_date32_array, as_date64_array, as_float16_array,
+    as_float32_array, as_float64_array, as_int16_array, as_int32_array, 
as_int64_array,
+    as_int8_array, as_string_array, as_string_view_array, as_uint16_array,
+    as_uint32_array, as_uint64_array, as_uint8_array,
 };
 use datafusion_common::{exec_datafusion_err, not_impl_err, DataFusionError};
 use datafusion_common_runtime::SpawnedTask;
@@ -407,6 +409,18 @@ fn compute_partition_keys_by_row<'a>(
                     partition_values.push(Cow::from(date));
                 }
             }
+            DataType::Int8 => {
+                let array = as_int8_array(col_array)?;
+                for i in 0..rb.num_rows() {
+                    
partition_values.push(Cow::from(array.value(i).to_string()));
+                }
+            }
+            DataType::Int16 => {
+                let array = as_int16_array(col_array)?;
+                for i in 0..rb.num_rows() {
+                    
partition_values.push(Cow::from(array.value(i).to_string()));
+                }
+            }
             DataType::Int32 => {
                 let array = as_int32_array(col_array)?;
                 for i in 0..rb.num_rows() {
@@ -419,6 +433,48 @@ fn compute_partition_keys_by_row<'a>(
                     
partition_values.push(Cow::from(array.value(i).to_string()));
                 }
             }
+            DataType::UInt8 => {
+                let array = as_uint8_array(col_array)?;
+                for i in 0..rb.num_rows() {
+                    
partition_values.push(Cow::from(array.value(i).to_string()));
+                }
+            }
+            DataType::UInt16 => {
+                let array = as_uint16_array(col_array)?;
+                for i in 0..rb.num_rows() {
+                    
partition_values.push(Cow::from(array.value(i).to_string()));
+                }
+            }
+            DataType::UInt32 => {
+                let array = as_uint32_array(col_array)?;
+                for i in 0..rb.num_rows() {
+                    
partition_values.push(Cow::from(array.value(i).to_string()));
+                }
+            }
+            DataType::UInt64 => {
+                let array = as_uint64_array(col_array)?;
+                for i in 0..rb.num_rows() {
+                    
partition_values.push(Cow::from(array.value(i).to_string()));
+                }
+            }
+            DataType::Float16 => {
+                let array = as_float16_array(col_array)?;
+                for i in 0..rb.num_rows() {
+                    
partition_values.push(Cow::from(array.value(i).to_string()));
+                }
+            }
+            DataType::Float32 => {
+                let array = as_float32_array(col_array)?;
+                for i in 0..rb.num_rows() {
+                    
partition_values.push(Cow::from(array.value(i).to_string()));
+                }
+            }
+            DataType::Float64 => {
+                let array = as_float64_array(col_array)?;
+                for i in 0..rb.num_rows() {
+                    
partition_values.push(Cow::from(array.value(i).to_string()));
+                }
+            }
             DataType::Dictionary(_, _) => {
                 downcast_dictionary_array!(
                     col_array =>  {
diff --git a/datafusion/sqllogictest/test_files/copy.slt 
b/datafusion/sqllogictest/test_files/copy.slt
index e2bb23e357..925f96bd4a 100644
--- a/datafusion/sqllogictest/test_files/copy.slt
+++ b/datafusion/sqllogictest/test_files/copy.slt
@@ -110,24 +110,36 @@ a
 
 # Copy to directory as partitioned files
 query I
-COPY (values (1::int, 2::bigint, 19968::date, arrow_cast(1725235200000, 
'Date64'), false, 'x'),
-    (11::int, 22::bigint, 19969::date, arrow_cast(1725148800000, 'Date64'), 
true, 'y')
+COPY (values (arrow_cast(1, 'Int8'), arrow_cast(2, 'UInt8'), arrow_cast(3, 
'Int16'), arrow_cast(4, 'UInt16'),
+        arrow_cast(5, 'Int32'), arrow_cast(6, 'UInt32'), arrow_cast(7, 
'Int64'), arrow_cast(8, 'UInt64'),
+        arrow_cast(9.1015625, 'Float16'), arrow_cast(10.1, 'Float32'), 
arrow_cast(11.1, 'Float64'), 19968::date,
+        arrow_cast(1725235200000, 'Date64'), false, 'x'),
+    (arrow_cast(11, 'Int8'), arrow_cast(22, 'UInt8'), arrow_cast(33, 'Int16'), 
arrow_cast(44, 'UInt16'),
+         arrow_cast(55, 'Int32'), arrow_cast(66, 'UInt32'), arrow_cast(77, 
'Int64'), arrow_cast(88, 'UInt64'),
+         arrow_cast(9.203125, 'Float16'), arrow_cast(10.2, 'Float32'), 
arrow_cast(11.2, 'Float64'), 19969::date,
+         arrow_cast(1725148800000, 'Date64'), true, 'y')
 )
-TO 'test_files/scratch/copy/partitioned_table5/' STORED AS parquet PARTITIONED 
BY (column1, column2, column3, column4, column5)
+TO 'test_files/scratch/copy/partitioned_table5/' STORED AS parquet PARTITIONED 
BY (column1, column2, column3, column4,
+    column5, column6, column7, column8, column9, column10, column11, column12, 
column13, column14)
 OPTIONS ('format.compression' 'zstd(10)');
 ----
 2
 
 # validate partitioning
 statement ok
-CREATE EXTERNAL TABLE validate_partitioned_parquet5 (column1 int, column2 
bigint, column3 date, column4 date, column5 boolean, column6 varchar) STORED AS 
PARQUET
-LOCATION 'test_files/scratch/copy/partitioned_table5/' PARTITIONED BY 
(column1, column2, column3, column4, column5);
-
-query IIDDBT
-select column1, column2, column3, column4, column5, column6 from 
validate_partitioned_parquet5 order by column1,column2,column3,column4,column5;
-----
-1 2 2024-09-02 2024-09-02 false x
-11 22 2024-09-03 2024-09-01 true y
+CREATE EXTERNAL TABLE validate_partitioned_parquet5 (column1 int, column2 int, 
column3 int, column4 int, column5 int,
+    column6 int, column7 bigint, column8 bigint, column9 float, column10 
float, column11 float, column12 date,
+    column13 date, column14 boolean, column15 varchar) STORED AS PARQUET
+LOCATION 'test_files/scratch/copy/partitioned_table5/' PARTITIONED BY 
(column1, column2, column3, column4, column5,
+    column6, column7, column8, column9, column10, column11, column12, 
column13, column14);
+
+query IIIIIIIIRRRDDBT
+select column1, column2, column3, column4, column5, column6, column7, column8, 
column9, column10, column11, column12,
+    column13, column14, column15 from validate_partitioned_parquet5 order by 
column1, column2, column3, column4,
+    column5, column6, column7, column8, column9, column10, column11, column12, 
column13;
+----
+1 2 3 4 5 6 7 8 9.1015625 10.1 11.1 2024-09-02 2024-09-02 false x
+11 22 33 44 55 66 77 88 9.203125 10.2 11.2 2024-09-03 2024-09-01 true y
 
 
 statement ok


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to