This is an automated email from the ASF dual-hosted git repository.
github-bot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 16fa35fa25 Allow base64 encoding of fixedsizebinary arrays (#18950)
16fa35fa25 is described below
commit 16fa35fa2597c991e37218d9577c8963daafc983
Author: Max Burke <[email protected]>
AuthorDate: Sun Dec 14 04:12:34 2025 -0800
Allow base64 encoding of fixedsizebinary arrays (#18950)
## Which issue does this PR close?
<!--
We generally require a GitHub issue to be filed for all bug fixes and
enhancements and this helps us generate change logs for our releases.
You can link an issue to this PR using the GitHub syntax. For example
`Closes #123` indicates that this PR will close issue #123.
-->
- Closes #18949
## Rationale for this change
We need to be able to base64 encode fixedsizebinary types.
## Are these changes tested?
Yes
## Are there any user-facing changes?
No
---------
Co-authored-by: Tim Saucer <[email protected]>
---
datafusion/functions/src/encoding/inner.rs | 49 ++++++++++++++++++++++++-
datafusion/sqllogictest/test_files/encoding.slt | 13 +++++++
2 files changed, 61 insertions(+), 1 deletion(-)
diff --git a/datafusion/functions/src/encoding/inner.rs
b/datafusion/functions/src/encoding/inner.rs
index cb3f45b60a..d60c39a25d 100644
--- a/datafusion/functions/src/encoding/inner.rs
+++ b/datafusion/functions/src/encoding/inner.rs
@@ -31,7 +31,9 @@ use base64::{
use datafusion_common::{DataFusionError, Result};
use datafusion_common::{ScalarValue, exec_err, internal_datafusion_err};
use datafusion_common::{
- cast::{as_generic_binary_array, as_generic_string_array},
+ cast::{
+ as_fixed_size_binary_array, as_generic_binary_array,
as_generic_string_array,
+ },
not_impl_err, plan_err,
utils::take_function_args,
};
@@ -105,6 +107,7 @@ impl ScalarUDFImpl for EncodeFunc {
Utf8View => Utf8,
Binary => Utf8,
LargeBinary => LargeUtf8,
+ FixedSizeBinary(_) => Utf8,
Null => Null,
_ => {
return plan_err!(
@@ -135,6 +138,9 @@ impl ScalarUDFImpl for EncodeFunc {
DataType::LargeUtf8 => Ok(vec![DataType::LargeUtf8,
DataType::Utf8]),
DataType::Binary => Ok(vec![DataType::Binary, DataType::Utf8]),
DataType::LargeBinary => Ok(vec![DataType::LargeBinary,
DataType::Utf8]),
+ DataType::FixedSizeBinary(sz) => {
+ Ok(vec![DataType::FixedSizeBinary(*sz), DataType::Utf8])
+ }
_ => plan_err!(
"1st argument should be Utf8 or Binary or Null, got {:?}",
arg_types[0]
@@ -246,6 +252,9 @@ fn encode_process(value: &ColumnarValue, encoding:
Encoding) -> Result<ColumnarV
DataType::Utf8View =>
encoding.encode_utf8_array::<i32>(a.as_ref()),
DataType::Binary =>
encoding.encode_binary_array::<i32>(a.as_ref()),
DataType::LargeBinary =>
encoding.encode_binary_array::<i64>(a.as_ref()),
+ DataType::FixedSizeBinary(_) => {
+ encoding.encode_fixed_size_binary_array(a.as_ref())
+ }
other => exec_err!(
"Unsupported data type {other:?} for function
encode({encoding})"
),
@@ -265,6 +274,9 @@ fn encode_process(value: &ColumnarValue, encoding:
Encoding) -> Result<ColumnarV
),
ScalarValue::LargeBinary(a) => Ok(encoding
.encode_large_scalar(a.as_ref().map(|v: &Vec<u8>|
v.as_slice()))),
+ ScalarValue::FixedSizeBinary(_, a) => Ok(
+ encoding.encode_scalar(a.as_ref().map(|v: &Vec<u8>|
v.as_slice()))
+ ),
other => exec_err!(
"Unsupported data type {other:?} for function
encode({encoding})"
),
@@ -401,6 +413,15 @@ impl Encoding {
Ok(ColumnarValue::Array(array))
}
+ fn encode_fixed_size_binary_array(self, value: &dyn Array) ->
Result<ColumnarValue> {
+ let input_value = as_fixed_size_binary_array(value)?;
+ let array: ArrayRef = match self {
+ Self::Base64 => encode_to_array!(base64_encode, input_value),
+ Self::Hex => encode_to_array!(hex_encode, input_value),
+ };
+ Ok(ColumnarValue::Array(array))
+ }
+
fn encode_utf8_array<T>(self, value: &dyn Array) -> Result<ColumnarValue>
where
T: OffsetSizeTrait,
@@ -553,3 +574,29 @@ fn decode(args: &[ColumnarValue]) -> Result<ColumnarValue>
{
}?;
decode_process(expression, encoding)
}
+
+#[cfg(test)]
+mod tests {
+ #[test]
+ fn test_encode_fsb() {
+ use super::*;
+
+ let value = vec![0u8; 16];
+ let array =
arrow::array::FixedSizeBinaryArray::try_from_sparse_iter_with_size(
+ vec![Some(value)].into_iter(),
+ 16,
+ )
+ .unwrap();
+ let value = ColumnarValue::Array(Arc::new(array));
+
+ let ColumnarValue::Array(result) =
+ encode_process(&value, Encoding::Base64).unwrap()
+ else {
+ panic!("unexpected value");
+ };
+
+ let string_array =
result.as_any().downcast_ref::<StringArray>().unwrap();
+ let result_value = string_array.value(0);
+ assert_eq!(result_value, "AAAAAAAAAAAAAAAAAAAAAA");
+ }
+}
diff --git a/datafusion/sqllogictest/test_files/encoding.slt
b/datafusion/sqllogictest/test_files/encoding.slt
index 300294f6e1..57fb76b6c8 100644
--- a/datafusion/sqllogictest/test_files/encoding.slt
+++ b/datafusion/sqllogictest/test_files/encoding.slt
@@ -125,3 +125,16 @@ query T
select encode(digest('hello', 'sha256'), 'hex');
----
2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824
+
+# test for FixedSizeBinary support for encode
+statement ok
+CREATE TABLE test_fsb AS
+SELECT arrow_cast(X'0123456789ABCDEF', 'FixedSizeBinary(8)') as fsb_col;
+
+query TT
+SELECT
+ encode(fsb_col, 'base64') AS fsb_base64,
+ encode(fsb_col, 'hex') AS fsb_hex
+FROM test_fsb;
+----
+ASNFZ4mrze8 0123456789abcdef
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]