Jefffrey commented on code in PR #19503:
URL: https://github.com/apache/datafusion/pull/19503#discussion_r2648743126
##########
datafusion/functions/src/string/to_hex.rs:
##########
@@ -37,42 +37,142 @@ use datafusion_expr::{ScalarFunctionArgs, ScalarUDFImpl,
Signature, Volatility};
use datafusion_expr_common::signature::TypeSignature::Exact;
use datafusion_macros::user_doc;
+/// Hex lookup table for fast conversion
+const HEX_CHARS: &[u8; 16] = b"0123456789abcdef";
+
/// Converts the number to its equivalent hexadecimal representation.
/// to_hex(2147483647) = '7fffffff'
fn to_hex<T: ArrowPrimitiveType>(args: &[ArrayRef]) -> Result<ArrayRef>
where
- T::Native: std::fmt::LowerHex,
+ T::Native: ToHex,
{
let integer_array = as_primitive_array::<T>(&args[0])?;
+ let len = integer_array.len();
- let mut result = GenericStringBuilder::<i32>::with_capacity(
- integer_array.len(),
- // * 8 to convert to bits, / 4 bits per hex char
- integer_array.len() * (T::Native::get_byte_width() * 8 / 4),
- );
+ // Max hex string length: 16 chars for u64/i64
+ let max_hex_len = T::Native::get_byte_width() * 2;
- for integer in integer_array {
- if let Some(value) = integer {
- if let Some(value_usize) = value.to_usize() {
- write!(result, "{value_usize:x}")?;
- } else if let Some(value_isize) = value.to_isize() {
- write!(result, "{value_isize:x}")?;
- } else {
- return exec_err!(
- "Unsupported data type {integer:?} for function to_hex"
- );
- }
- result.append_value("");
- } else {
- result.append_null();
- }
+ // Pre-allocate buffers - avoid the builder API overhead
+ let mut offsets: Vec<i32> = Vec::with_capacity(len + 1);
+ let mut values: Vec<u8> = Vec::with_capacity(len * max_hex_len);
+
+ // Reusable buffer for hex conversion
+ let mut hex_buffer = [0u8; 16];
+
+ // Start with offset 0
+ offsets.push(0);
+
+ // Process all values directly (including null slots - we write empty
strings for nulls)
Review Comment:
nit: I don't think this code is writing empty strings for nulls, it seems to
process whatever value is in the null slot normally
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]