(datafusion) branch main updated: feat: Support log for Decimal32 and Decimal64 (#18999)

github-bot Thu, 18 Dec 2025 18:49:01 -0800

This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git



The following commit(s) were added to refs/heads/main by this push:
     new c2747eb109 feat: Support log for Decimal32 and Decimal64 (#18999)
c2747eb109 is described below

commit c2747eb10957b646f9052a9d6a530a5f38db97b8
Author: Nimalan <[email protected]>
AuthorDate: Fri Dec 19 08:18:32 2025 +0530

    feat: Support log for Decimal32 and Decimal64 (#18999)
    
    ## Which issue does this PR close?
    
    - Part of #17555 .
    
    ## Rationale for this change
    
    ### Analysis
    
    Other engines:
    1. Clickhouse seems to only consider `"(U)Int*", "Float*", "Decimal*"`
    as arguments for log
    
https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/log.cpp#L47-L63
    
    Libraries
    
    1. There a C++ library libdecimal which internally uses [Intel Decimal
    Floating Point
    
Library](https://www.intel.com/content/www/us/en/developer/articles/tool/intel-decimal-floating-point-math-library.html)
    for it's
    
[decimal32](https://github.com/GaryHughes/stddecimal/blob/main/libdecimal/decimal_cmath.cpp#L150-L159)
    operations. Intel's library itself converts the decimal32 to double and
    calls `log`.
    
https://github.com/karlorz/IntelRDFPMathLib20U2/blob/main/LIBRARY/src/bid32_log.c
    2. There was another C++ library based on IBM's decimal decNumber
    library https://github.com/semihc/CppDecimal . This one's implementation
    of
    
[`log`](https://github.com/semihc/CppDecimal/blob/main/src/decNumber.c#L1384-L1518)
    is fully using decimal, but I don't think this would be very performant
    way to do this
    
    I'm going to go with an approach similar to the one inside Intel's
    decimal library. To begin with the `decimal32 -> double` is done by a
    simple scaling
    
    ## What changes are included in this PR?
    
    1. Support Decimal32 for log
    
    ## Are these changes tested?
    
    Yes, unit tests have been added, and I've tested this from the
    datafusion cli for Decimal32
    
    ```
    > select log(2.0, arrow_cast(12345.67, 'Decimal32(9, 2)'));
    +-----------------------------------------------------------------------+
    | log(Float64(2),arrow_cast(Float64(12345.67),Utf8("Decimal32(9, 2)"))) |
    +-----------------------------------------------------------------------+
    | 13.591717513271785                                                    |
    +-----------------------------------------------------------------------+
    1 row(s) fetched.
    Elapsed 0.021 seconds.
    ```
    
    ## Are there any user-facing changes?
    
    1. The precision of the result for Decimal32 will change, the precision
    loss in #18524 does not occur in this PR
    
    ---------
    
    Co-authored-by: Andrew Lamb <[email protected]>
---
 datafusion/functions/src/math/log.rs           |  75 ++++++++++++--
 datafusion/functions/src/utils.rs              | 137 +++++++++++++++++++++++++
 datafusion/sqllogictest/test_files/decimal.slt |  11 ++
 3 files changed, 212 insertions(+), 11 deletions(-)

diff --git a/datafusion/functions/src/math/log.rs 
b/datafusion/functions/src/math/log.rs
index 72a9cf4555..18229fb076 100644
--- a/datafusion/functions/src/math/log.rs
+++ b/datafusion/functions/src/math/log.rs
@@ -21,11 +21,13 @@ use std::any::Any;
 
 use super::power::PowerFunc;
 
-use crate::utils::{calculate_binary_math, decimal128_to_i128};
+use crate::utils::{
+    calculate_binary_math, decimal32_to_i32, decimal64_to_i64, 
decimal128_to_i128,
+};
 use arrow::array::{Array, ArrayRef};
-use arrow::compute::kernels::cast;
 use arrow::datatypes::{
-    DataType, Decimal128Type, Decimal256Type, Float16Type, Float32Type, 
Float64Type,
+    DataType, Decimal32Type, Decimal64Type, Decimal128Type, Decimal256Type, 
Float16Type,
+    Float32Type, Float64Type,
 };
 use arrow::error::ArrowError;
 use arrow_buffer::i256;
@@ -102,6 +104,54 @@ impl LogFunc {
     }
 }
 
+/// Binary function to calculate logarithm of Decimal32 `value` using `base` 
base
+/// Returns error if base is invalid
+fn log_decimal32(value: i32, scale: i8, base: f64) -> Result<f64, ArrowError> {
+    if !base.is_finite() || base.trunc() != base {
+        return Err(ArrowError::ComputeError(format!(
+            "Log cannot use non-integer base: {base}"
+        )));
+    }
+    if (base as u32) < 2 {
+        return Err(ArrowError::ComputeError(format!(
+            "Log base must be greater than 1: {base}"
+        )));
+    }
+
+    let unscaled_value = decimal32_to_i32(value, scale)?;
+    if unscaled_value > 0 {
+        let log_value: u32 = unscaled_value.ilog(base as i32);
+        Ok(log_value as f64)
+    } else {
+        // Reflect f64::log behaviour
+        Ok(f64::NAN)
+    }
+}
+
+/// Binary function to calculate logarithm of Decimal64 `value` using `base` 
base
+/// Returns error if base is invalid
+fn log_decimal64(value: i64, scale: i8, base: f64) -> Result<f64, ArrowError> {
+    if !base.is_finite() || base.trunc() != base {
+        return Err(ArrowError::ComputeError(format!(
+            "Log cannot use non-integer base: {base}"
+        )));
+    }
+    if (base as u32) < 2 {
+        return Err(ArrowError::ComputeError(format!(
+            "Log base must be greater than 1: {base}"
+        )));
+    }
+
+    let unscaled_value = decimal64_to_i64(value, scale)?;
+    if unscaled_value > 0 {
+        let log_value: u32 = unscaled_value.ilog(base as i64);
+        Ok(log_value as f64)
+    } else {
+        // Reflect f64::log behaviour
+        Ok(f64::NAN)
+    }
+}
+
 /// Binary function to calculate an integer logarithm of Decimal128 `value` 
using `base` base
 /// Returns error if base is invalid
 fn log_decimal128(value: i128, scale: i8, base: f64) -> Result<f64, 
ArrowError> {
@@ -223,15 +273,18 @@ impl ScalarUDFImpl for LogFunc {
                     |value, base| Ok(value.log(base)),
                 )?
             }
-            // TODO: native log support for decimal 32 & 64; right now upcast
-            //       to decimal128 to calculate
-            //       https://github.com/apache/datafusion/issues/17555
-            DataType::Decimal32(precision, scale)
-            | DataType::Decimal64(precision, scale) => {
-                calculate_binary_math::<Decimal128Type, Float64Type, 
Float64Type, _>(
-                    &cast(&value, &DataType::Decimal128(*precision, *scale))?,
+            DataType::Decimal32(_, scale) => {
+                calculate_binary_math::<Decimal32Type, Float64Type, 
Float64Type, _>(
+                    &value,
                     &base,
-                    |value, base| log_decimal128(value, *scale, base),
+                    |value, base| log_decimal32(value, *scale, base),
+                )?
+            }
+            DataType::Decimal64(_, scale) => {
+                calculate_binary_math::<Decimal64Type, Float64Type, 
Float64Type, _>(
+                    &value,
+                    &base,
+                    |value, base| log_decimal64(value, *scale, base),
                 )?
             }
             DataType::Decimal128(_, scale) => {
diff --git a/datafusion/functions/src/utils.rs 
b/datafusion/functions/src/utils.rs
index c4f15d0cca..e160eb68d5 100644
--- a/datafusion/functions/src/utils.rs
+++ b/datafusion/functions/src/utils.rs
@@ -219,6 +219,40 @@ pub fn decimal128_to_i128(value: i128, scale: i8) -> 
Result<i128, ArrowError> {
     }
 }
 
+pub fn decimal32_to_i32(value: i32, scale: i8) -> Result<i32, ArrowError> {
+    if scale < 0 {
+        Err(ArrowError::ComputeError(
+            "Negative scale is not supported".into(),
+        ))
+    } else if scale == 0 {
+        Ok(value)
+    } else {
+        match 10_i32.checked_pow(scale as u32) {
+            Some(divisor) => Ok(value / divisor),
+            None => Err(ArrowError::ComputeError(format!(
+                "Cannot get a power of {scale}"
+            ))),
+        }
+    }
+}
+
+pub fn decimal64_to_i64(value: i64, scale: i8) -> Result<i64, ArrowError> {
+    if scale < 0 {
+        Err(ArrowError::ComputeError(
+            "Negative scale is not supported".into(),
+        ))
+    } else if scale == 0 {
+        Ok(value)
+    } else {
+        match i64::from(10).checked_pow(scale as u32) {
+            Some(divisor) => Ok(value / divisor),
+            None => Err(ArrowError::ComputeError(format!(
+                "Cannot get a power of {scale}"
+            ))),
+        }
+    }
+}
+
 #[cfg(test)]
 pub mod test {
     /// $FUNC ScalarUDFImpl to test
@@ -334,6 +368,7 @@ pub mod test {
     }
 
     use arrow::datatypes::DataType;
+    use itertools::Either;
     pub(crate) use test_function;
 
     use super::*;
@@ -376,4 +411,106 @@ pub mod test {
             }
         }
     }
+
+    #[test]
+    fn test_decimal32_to_i32() {
+        let cases: [(i32, i8, Either<i32, String>); _] = [
+            (123, 0, Either::Left(123)),
+            (1230, 1, Either::Left(123)),
+            (123000, 3, Either::Left(123)),
+            (1234567, 2, Either::Left(12345)),
+            (-1234567, 2, Either::Left(-12345)),
+            (1, 0, Either::Left(1)),
+            (
+                123,
+                -3,
+                Either::Right("Negative scale is not supported".into()),
+            ),
+            (
+                123,
+                i8::MAX,
+                Either::Right("Cannot get a power of 127".into()),
+            ),
+            (999999999, 0, Either::Left(999999999)),
+            (999999999, 3, Either::Left(999999)),
+        ];
+
+        for (value, scale, expected) in cases {
+            match decimal32_to_i32(value, scale) {
+                Ok(actual) => {
+                    let expected_value =
+                        expected.left().expect("Got value but expected none");
+                    assert_eq!(
+                        actual, expected_value,
+                        "{value} and {scale} vs {expected_value:?}"
+                    );
+                }
+                Err(ArrowError::ComputeError(msg)) => {
+                    assert_eq!(
+                        msg,
+                        expected.right().expect("Got error but expected value")
+                    );
+                }
+                Err(_) => {
+                    assert!(expected.is_right())
+                }
+            }
+        }
+    }
+
+    #[test]
+    fn test_decimal64_to_i64() {
+        let cases: [(i64, i8, Either<i64, String>); _] = [
+            (123, 0, Either::Left(123)),
+            (1234567890, 2, Either::Left(12345678)),
+            (-1234567890, 2, Either::Left(-12345678)),
+            (
+                123,
+                -3,
+                Either::Right("Negative scale is not supported".into()),
+            ),
+            (
+                123,
+                i8::MAX,
+                Either::Right("Cannot get a power of 127".into()),
+            ),
+            (
+                999999999999999999i64,
+                0,
+                Either::Left(999999999999999999i64),
+            ),
+            (
+                999999999999999999i64,
+                3,
+                Either::Left(999999999999999999i64 / 1000),
+            ),
+            (
+                -999999999999999999i64,
+                3,
+                Either::Left(-999999999999999999i64 / 1000),
+            ),
+        ];
+
+        for (value, scale, expected) in cases {
+            match decimal64_to_i64(value, scale) {
+                Ok(actual) => {
+                    let expected_value =
+                        expected.left().expect("Got value but expected none");
+                    assert_eq!(
+                        actual, expected_value,
+                        "{value} and {scale} vs {expected_value:?}"
+                    );
+                }
+                Err(ArrowError::ComputeError(msg)) => {
+                    assert_eq!(
+                        msg,
+                        expected.right().expect("Got error but expected value")
+                    );
+                }
+                Err(_) => {
+                    assert!(expected.is_right())
+                }
+            }
+        }
+    }
 }
diff --git a/datafusion/sqllogictest/test_files/decimal.slt 
b/datafusion/sqllogictest/test_files/decimal.slt
index a6b6dd0488..143cd786ab 100644
--- a/datafusion/sqllogictest/test_files/decimal.slt
+++ b/datafusion/sqllogictest/test_files/decimal.slt
@@ -794,6 +794,11 @@ select log(arrow_cast(100, 'Decimal32(9, 2)'));
 ----
 2
 
+query R
+select log(2.0, arrow_cast(12345.67, 'Decimal32(9, 2)'));
+----
+13
+
 # log for small decimal64
 query R
 select log(arrow_cast(100, 'Decimal64(18, 0)'));
@@ -805,6 +810,12 @@ select log(arrow_cast(100, 'Decimal64(18, 2)'));
 ----
 2
 
+query R
+select log(2.0, arrow_cast(12345.6789, 'Decimal64(15, 4)'));
+----
+13
+
+
 # log for small decimal128
 query R
 select log(arrow_cast(100, 'Decimal128(38, 0)'));


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(datafusion) branch main updated: feat: Support log for Decimal32 and Decimal64 (#18999)

Reply via email to