coderfender commented on code in PR #2099: URL: https://github.com/apache/datafusion-comet/pull/2099#discussion_r2318024241
########## native/spark-expr/src/static_invoke/char_varchar_utils/read_side_padding.rs: ########## @@ -43,17 +42,69 @@ fn spark_read_side_padding2( match args { [ColumnarValue::Array(array), ColumnarValue::Scalar(ScalarValue::Int32(Some(length)))] => { match array.data_type() { - DataType::Utf8 => spark_read_side_padding_internal::<i32>(array, *length, truncate), - DataType::LargeUtf8 => { - spark_read_side_padding_internal::<i64>(array, *length, truncate) + DataType::Utf8 => spark_read_side_padding_internal::<i32>( + array, + truncate, + ColumnarValue::Scalar(ScalarValue::Int32(Some(*length))), + ), + DataType::LargeUtf8 => spark_read_side_padding_internal::<i64>( + array, + truncate, + ColumnarValue::Scalar(ScalarValue::Int32(Some(*length))), + ), + // Dictionary support required for SPARK-48498 + DataType::Dictionary(_, value_type) => { + let dict = as_dictionary_array::<Int32Type>(array); + let col = if value_type.as_ref() == &DataType::Utf8 { + spark_read_side_padding_internal::<i32>( + dict.values(), + truncate, + ColumnarValue::Scalar(ScalarValue::Int32(Some(*length))), + )? + } else { + spark_read_side_padding_internal::<i64>( + dict.values(), + truncate, + ColumnarValue::Scalar(ScalarValue::Int32(Some(*length))), + )? + }; + // col consists of an array, so arg of to_array() is not used. Can be anything + let values = col.to_array(0)?; + let result = DictionaryArray::try_new(dict.keys().clone(), values)?; + Ok(ColumnarValue::Array(make_array(result.into()))) } + other => Err(DataFusionError::Internal(format!( + "Unsupported data type {other:?} for function rpad/read_side_padding", + ))), + } + } + [ColumnarValue::Array(array), ColumnarValue::Array(array_int)] => { + match array.data_type() { + DataType::Utf8 => spark_read_side_padding_internal::<i32>( + array, + truncate, + ColumnarValue::Array(Arc::<dyn arrow::array::Array>::clone(array_int)), + ), + DataType::LargeUtf8 => spark_read_side_padding_internal::<i64>( + array, + truncate, + ColumnarValue::Array(Arc::<dyn arrow::array::Array>::clone(array_int)), + ), // Dictionary support required for SPARK-48498 Review Comment: Great catch! My understanding is that dictionary support ensures SQL-compliant CHAR type literals, which always have a fixed length (This change already existed by the time I picked up this issue). Therefore, my support for the array argument is obsolete. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org