This is an automated email from the ASF dual-hosted git repository.

comphead pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-comet.git


The following commit(s) were added to refs/heads/main by this push:
     new 619f368dc chore: use datafusion impl of hex function (#2915)
619f368dc is described below

commit 619f368dc4d437be8ef0d9605d1c4d152932ad92
Author: Kazantsev Maksim <[email protected]>
AuthorDate: Tue Dec 16 08:02:07 2025 -0800

    chore: use datafusion impl of hex function (#2915)
    
    * impl map_from_entries
    
    * Use datafusion impl of hex function
    
    * Revert "impl map_from_entries"
    
    This reverts commit 768b3e90f261c7aea58bdb98dc698b90deeeae34.
    
    ---------
    
    Co-authored-by: Kazantsev Maksim <[email protected]>
---
 native/core/src/execution/jni_api.rs        |   2 +
 native/spark-expr/src/comet_scalar_funcs.rs |   8 +-
 native/spark-expr/src/lib.rs                |   4 +-
 native/spark-expr/src/math_funcs/hex.rs     | 291 ----------------------------
 native/spark-expr/src/math_funcs/mod.rs     |   2 -
 5 files changed, 6 insertions(+), 301 deletions(-)

diff --git a/native/core/src/execution/jni_api.rs 
b/native/core/src/execution/jni_api.rs
index a24d99305..aaed1fc74 100644
--- a/native/core/src/execution/jni_api.rs
+++ b/native/core/src/execution/jni_api.rs
@@ -47,6 +47,7 @@ use 
datafusion_spark::function::datetime::date_sub::SparkDateSub;
 use datafusion_spark::function::hash::sha1::SparkSha1;
 use datafusion_spark::function::hash::sha2::SparkSha2;
 use datafusion_spark::function::math::expm1::SparkExpm1;
+use datafusion_spark::function::math::hex::SparkHex;
 use datafusion_spark::function::string::char::CharFunc;
 use datafusion_spark::function::string::concat::SparkConcat;
 use futures::poll;
@@ -337,6 +338,7 @@ fn register_datafusion_spark_function(session_ctx: 
&SessionContext) {
     session_ctx.register_udf(ScalarUDF::new_from_impl(SparkSha1::default()));
     session_ctx.register_udf(ScalarUDF::new_from_impl(SparkConcat::default()));
     
session_ctx.register_udf(ScalarUDF::new_from_impl(SparkBitwiseNot::default()));
+    session_ctx.register_udf(ScalarUDF::new_from_impl(SparkHex::default()));
 }
 
 /// Prepares arrow arrays for output.
diff --git a/native/spark-expr/src/comet_scalar_funcs.rs 
b/native/spark-expr/src/comet_scalar_funcs.rs
index e9f1caa8a..56b12a9e4 100644
--- a/native/spark-expr/src/comet_scalar_funcs.rs
+++ b/native/spark-expr/src/comet_scalar_funcs.rs
@@ -21,8 +21,8 @@ use crate::math_funcs::checked_arithmetic::{checked_add, 
checked_div, checked_mu
 use crate::math_funcs::modulo_expr::spark_modulo;
 use crate::{
     spark_array_repeat, spark_ceil, spark_decimal_div, 
spark_decimal_integral_div, spark_floor,
-    spark_hex, spark_isnan, spark_lpad, spark_make_decimal, 
spark_read_side_padding, spark_round,
-    spark_rpad, spark_unhex, spark_unscaled_value, EvalMode, 
SparkBitwiseCount, SparkDateTrunc,
+    spark_isnan, spark_lpad, spark_make_decimal, spark_read_side_padding, 
spark_round, spark_rpad,
+    spark_unhex, spark_unscaled_value, EvalMode, SparkBitwiseCount, 
SparkDateTrunc,
     SparkStringSpace,
 };
 use arrow::datatypes::DataType;
@@ -130,10 +130,6 @@ pub fn create_comet_physical_fun_with_eval_mode(
         "make_decimal" => {
             make_comet_scalar_udf!("make_decimal", spark_make_decimal, 
data_type)
         }
-        "hex" => {
-            let func = Arc::new(spark_hex);
-            make_comet_scalar_udf!("hex", func, without data_type)
-        }
         "unhex" => {
             let func = Arc::new(spark_unhex);
             make_comet_scalar_udf!("unhex", func, without data_type)
diff --git a/native/spark-expr/src/lib.rs b/native/spark-expr/src/lib.rs
index 932fcbe53..2903061d6 100644
--- a/native/spark-expr/src/lib.rs
+++ b/native/spark-expr/src/lib.rs
@@ -74,8 +74,8 @@ pub use hash_funcs::*;
 pub use json_funcs::ToJson;
 pub use math_funcs::{
     create_modulo_expr, create_negate_expr, spark_ceil, spark_decimal_div,
-    spark_decimal_integral_div, spark_floor, spark_hex, spark_make_decimal, 
spark_round,
-    spark_unhex, spark_unscaled_value, CheckOverflow, NegativeExpr, 
NormalizeNaNAndZero,
+    spark_decimal_integral_div, spark_floor, spark_make_decimal, spark_round, 
spark_unhex,
+    spark_unscaled_value, CheckOverflow, NegativeExpr, NormalizeNaNAndZero,
 };
 pub use string_funcs::*;
 
diff --git a/native/spark-expr/src/math_funcs/hex.rs 
b/native/spark-expr/src/math_funcs/hex.rs
deleted file mode 100644
index 5f476a673..000000000
--- a/native/spark-expr/src/math_funcs/hex.rs
+++ /dev/null
@@ -1,291 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::sync::Arc;
-
-use arrow::array::StringArray;
-use arrow::datatypes::DataType;
-use arrow::{
-    array::{as_dictionary_array, as_largestring_array, as_string_array},
-    datatypes::Int32Type,
-};
-use datafusion::common::{
-    cast::{as_binary_array, as_fixed_size_binary_array, as_int64_array},
-    exec_err, DataFusionError,
-};
-use datafusion::logical_expr::ColumnarValue;
-use std::fmt::Write;
-
-fn hex_int64(num: i64) -> String {
-    format!("{num:X}")
-}
-
-#[inline(always)]
-fn hex_encode<T: AsRef<[u8]>>(data: T, lower_case: bool) -> String {
-    let mut s = String::with_capacity(data.as_ref().len() * 2);
-    if lower_case {
-        for b in data.as_ref() {
-            // Writing to a string never errors, so we can unwrap here.
-            write!(&mut s, "{b:02x}").unwrap();
-        }
-    } else {
-        for b in data.as_ref() {
-            // Writing to a string never errors, so we can unwrap here.
-            write!(&mut s, "{b:02X}").unwrap();
-        }
-    }
-    s
-}
-
-#[inline(always)]
-fn hex_bytes<T: AsRef<[u8]>>(bytes: T) -> Result<String, std::fmt::Error> {
-    let hex_string = hex_encode(bytes, false);
-    Ok(hex_string)
-}
-
-/// Spark-compatible `hex` function
-pub fn spark_hex(args: &[ColumnarValue]) -> Result<ColumnarValue, 
DataFusionError> {
-    if args.len() != 1 {
-        return Err(DataFusionError::Internal(
-            "hex expects exactly one argument".to_string(),
-        ));
-    }
-
-    match &args[0] {
-        ColumnarValue::Array(array) => match array.data_type() {
-            DataType::Int64 => {
-                let array = as_int64_array(array)?;
-
-                let hexed_array: StringArray = array.iter().map(|v| 
v.map(hex_int64)).collect();
-
-                Ok(ColumnarValue::Array(Arc::new(hexed_array)))
-            }
-            DataType::Utf8 => {
-                let array = as_string_array(array);
-
-                let hexed: StringArray = array
-                    .iter()
-                    .map(|v| v.map(hex_bytes).transpose())
-                    .collect::<Result<_, _>>()?;
-
-                Ok(ColumnarValue::Array(Arc::new(hexed)))
-            }
-            DataType::LargeUtf8 => {
-                let array = as_largestring_array(array);
-
-                let hexed: StringArray = array
-                    .iter()
-                    .map(|v| v.map(hex_bytes).transpose())
-                    .collect::<Result<_, _>>()?;
-
-                Ok(ColumnarValue::Array(Arc::new(hexed)))
-            }
-            DataType::Binary => {
-                let array = as_binary_array(array)?;
-
-                let hexed: StringArray = array
-                    .iter()
-                    .map(|v| v.map(hex_bytes).transpose())
-                    .collect::<Result<_, _>>()?;
-
-                Ok(ColumnarValue::Array(Arc::new(hexed)))
-            }
-            DataType::FixedSizeBinary(_) => {
-                let array = as_fixed_size_binary_array(array)?;
-
-                let hexed: StringArray = array
-                    .iter()
-                    .map(|v| v.map(hex_bytes).transpose())
-                    .collect::<Result<_, _>>()?;
-
-                Ok(ColumnarValue::Array(Arc::new(hexed)))
-            }
-            DataType::Dictionary(_, value_type) => {
-                let dict = as_dictionary_array::<Int32Type>(&array);
-
-                let values = match **value_type {
-                    DataType::Int64 => as_int64_array(dict.values())?
-                        .iter()
-                        .map(|v| v.map(hex_int64))
-                        .collect::<Vec<_>>(),
-                    DataType::Utf8 => as_string_array(dict.values())
-                        .iter()
-                        .map(|v| v.map(hex_bytes).transpose())
-                        .collect::<Result<_, _>>()?,
-                    DataType::Binary => as_binary_array(dict.values())?
-                        .iter()
-                        .map(|v| v.map(hex_bytes).transpose())
-                        .collect::<Result<_, _>>()?,
-                    _ => exec_err!(
-                        "hex got an unexpected argument type: {:?}",
-                        array.data_type()
-                    )?,
-                };
-
-                let new_values: Vec<Option<String>> = dict
-                    .keys()
-                    .iter()
-                    .map(|key| key.map(|k| values[k as 
usize].clone()).unwrap_or(None))
-                    .collect();
-
-                let string_array_values = StringArray::from(new_values);
-
-                Ok(ColumnarValue::Array(Arc::new(string_array_values)))
-            }
-            _ => exec_err!(
-                "hex got an unexpected argument type: {:?}",
-                array.data_type()
-            ),
-        },
-        _ => exec_err!("native hex does not support scalar values at this 
time"),
-    }
-}
-
-#[cfg(test)]
-mod test {
-    use std::sync::Arc;
-
-    use arrow::array::{Int64Array, StringArray};
-    use arrow::{
-        array::{
-            as_string_array, BinaryDictionaryBuilder, 
PrimitiveDictionaryBuilder, StringBuilder,
-            StringDictionaryBuilder,
-        },
-        datatypes::{Int32Type, Int64Type},
-    };
-    use datafusion::logical_expr::ColumnarValue;
-
-    #[test]
-    fn test_dictionary_hex_utf8() {
-        let mut input_builder = StringDictionaryBuilder::<Int32Type>::new();
-        input_builder.append_value("hi");
-        input_builder.append_value("bye");
-        input_builder.append_null();
-        input_builder.append_value("rust");
-        let input = input_builder.finish();
-
-        let mut string_builder = StringBuilder::new();
-        string_builder.append_value("6869");
-        string_builder.append_value("627965");
-        string_builder.append_null();
-        string_builder.append_value("72757374");
-        let expected = string_builder.finish();
-
-        let columnar_value = ColumnarValue::Array(Arc::new(input));
-        let result = super::spark_hex(&[columnar_value]).unwrap();
-
-        let result = match result {
-            ColumnarValue::Array(array) => array,
-            _ => panic!("Expected array"),
-        };
-
-        let result = as_string_array(&result);
-
-        assert_eq!(result, &expected);
-    }
-
-    #[test]
-    fn test_dictionary_hex_int64() {
-        let mut input_builder = PrimitiveDictionaryBuilder::<Int32Type, 
Int64Type>::new();
-        input_builder.append_value(1);
-        input_builder.append_value(2);
-        input_builder.append_null();
-        input_builder.append_value(3);
-        let input = input_builder.finish();
-
-        let mut string_builder = StringBuilder::new();
-        string_builder.append_value("1");
-        string_builder.append_value("2");
-        string_builder.append_null();
-        string_builder.append_value("3");
-        let expected = string_builder.finish();
-
-        let columnar_value = ColumnarValue::Array(Arc::new(input));
-        let result = super::spark_hex(&[columnar_value]).unwrap();
-
-        let result = match result {
-            ColumnarValue::Array(array) => array,
-            _ => panic!("Expected array"),
-        };
-
-        let result = as_string_array(&result);
-
-        assert_eq!(result, &expected);
-    }
-
-    #[test]
-    fn test_dictionary_hex_binary() {
-        let mut input_builder = BinaryDictionaryBuilder::<Int32Type>::new();
-        input_builder.append_value("1");
-        input_builder.append_value("j");
-        input_builder.append_null();
-        input_builder.append_value("3");
-        let input = input_builder.finish();
-
-        let mut expected_builder = StringBuilder::new();
-        expected_builder.append_value("31");
-        expected_builder.append_value("6A");
-        expected_builder.append_null();
-        expected_builder.append_value("33");
-        let expected = expected_builder.finish();
-
-        let columnar_value = ColumnarValue::Array(Arc::new(input));
-        let result = super::spark_hex(&[columnar_value]).unwrap();
-
-        let result = match result {
-            ColumnarValue::Array(array) => array,
-            _ => panic!("Expected array"),
-        };
-
-        let result = as_string_array(&result);
-
-        assert_eq!(result, &expected);
-    }
-
-    #[test]
-    fn test_hex_int64() {
-        let num = 1234;
-        let hexed = super::hex_int64(num);
-        assert_eq!(hexed, "4D2".to_string());
-
-        let num = -1;
-        let hexed = super::hex_int64(num);
-        assert_eq!(hexed, "FFFFFFFFFFFFFFFF".to_string());
-    }
-
-    #[test]
-    fn test_spark_hex_int64() {
-        let int_array = Int64Array::from(vec![Some(1), Some(2), None, 
Some(3)]);
-        let columnar_value = ColumnarValue::Array(Arc::new(int_array));
-
-        let result = super::spark_hex(&[columnar_value]).unwrap();
-        let result = match result {
-            ColumnarValue::Array(array) => array,
-            _ => panic!("Expected array"),
-        };
-
-        let string_array = as_string_array(&result);
-        let expected_array = StringArray::from(vec![
-            Some("1".to_string()),
-            Some("2".to_string()),
-            None,
-            Some("3".to_string()),
-        ]);
-
-        assert_eq!(string_array, &expected_array);
-    }
-}
diff --git a/native/spark-expr/src/math_funcs/mod.rs 
b/native/spark-expr/src/math_funcs/mod.rs
index 7df87eb9f..35c1dc650 100644
--- a/native/spark-expr/src/math_funcs/mod.rs
+++ b/native/spark-expr/src/math_funcs/mod.rs
@@ -20,7 +20,6 @@ mod ceil;
 pub(crate) mod checked_arithmetic;
 mod div;
 mod floor;
-pub(crate) mod hex;
 pub mod internal;
 pub mod modulo_expr;
 mod negative;
@@ -32,7 +31,6 @@ pub use ceil::spark_ceil;
 pub use div::spark_decimal_div;
 pub use div::spark_decimal_integral_div;
 pub use floor::spark_floor;
-pub use hex::spark_hex;
 pub use internal::*;
 pub use modulo_expr::create_modulo_expr;
 pub use negative::{create_negate_expr, NegativeExpr};


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to