[arrow-rs] branch master updated: Cast numeric to decimal256 (#2923)

viirya Thu, 27 Oct 2022 16:53:26 -0700

This is an automated email from the ASF dual-hosted git repository.

viirya pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git



The following commit(s) were added to refs/heads/master by this push:
     new 73416f8e6 Cast numeric to decimal256 (#2923)
73416f8e6 is described below

commit 73416f8e67efe1d0d8a8529c96c099429ab1b366
Author: Liang-Chi Hsieh <[email protected]>
AuthorDate: Thu Oct 27 16:53:14 2022 -0700

    Cast numeric to decimal256 (#2923)
    
    * Cast numeric to decimal256
    
    * For review
    
    * Check scale overflow
    
    * Fix clippy
---
 arrow-buffer/src/bigint.rs        |  16 +++
 arrow/src/compute/kernels/cast.rs | 243 ++++++++++++++++++++++++++++++++++----
 2 files changed, 239 insertions(+), 20 deletions(-)

diff --git a/arrow-buffer/src/bigint.rs b/arrow-buffer/src/bigint.rs
index fe135b329..892c6c99d 100644
--- a/arrow-buffer/src/bigint.rs
+++ b/arrow-buffer/src/bigint.rs
@@ -15,6 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
+use num::cast::AsPrimitive;
 use num::BigInt;
 use std::cmp::Ordering;
 
@@ -395,6 +396,21 @@ fn mulx(a: u128, b: u128) -> (u128, u128) {
     (low, high)
 }
 
+macro_rules! define_as_primitive {
+    ($native_ty:ty) => {
+        impl AsPrimitive<i256> for $native_ty {
+            fn as_(self) -> i256 {
+                i256::from_i128(self as i128)
+            }
+        }
+    };
+}
+
+define_as_primitive!(i8);
+define_as_primitive!(i16);
+define_as_primitive!(i32);
+define_as_primitive!(i64);
+
 #[cfg(test)]
 mod tests {
     use super::*;
diff --git a/arrow/src/compute/kernels/cast.rs 
b/arrow/src/compute/kernels/cast.rs
index d354a95f0..73868dd98 100644
--- a/arrow/src/compute/kernels/cast.rs
+++ b/arrow/src/compute/kernels/cast.rs
@@ -88,6 +88,7 @@ pub fn can_cast_types(from_type: &DataType, to_type: 
&DataType) -> bool {
         (Decimal256(_, _), Decimal128(_, _)) => true,
         // signed numeric to decimal
         (Null | Int8 | Int16 | Int32 | Int64 | Float32 | Float64, 
Decimal128(_, _)) |
+        (Null | Int8 | Int16 | Int32 | Int64 | Float32 | Float64, 
Decimal256(_, _)) |
         // decimal to signed numeric
         (Decimal128(_, _), Null | Int8 | Int16 | Int32 | Int64 | Float32 | 
Float64)
         | (
@@ -305,8 +306,8 @@ pub fn cast(array: &ArrayRef, to_type: &DataType) -> 
Result<ArrayRef> {
     cast_with_options(array, to_type, &DEFAULT_CAST_OPTIONS)
 }
 
-/// Cast the primitive array to defined decimal data type array
-fn cast_primitive_to_decimal<T: ArrayAccessor, F>(
+/// Cast the primitive array to defined decimal128 data type array
+fn cast_primitive_to_decimal128<T: ArrayAccessor, F>(
     array: T,
     op: F,
     precision: u8,
@@ -324,7 +325,26 @@ where
     Ok(Arc::new(decimal_array))
 }
 
-fn cast_integer_to_decimal<T: ArrowNumericType>(
+/// Cast the primitive array to defined decimal256 data type array
+fn cast_primitive_to_decimal256<T: ArrayAccessor, F>(
+    array: T,
+    op: F,
+    precision: u8,
+    scale: u8,
+) -> Result<ArrayRef>
+where
+    F: Fn(T::Item) -> i256,
+{
+    #[allow(clippy::redundant_closure)]
+    let decimal_array = ArrayIter::new(array)
+        .map(|v| v.map(|v| op(v)))
+        .collect::<Decimal256Array>()
+        .with_precision_and_scale(precision, scale)?;
+
+    Ok(Arc::new(decimal_array))
+}
+
+fn cast_integer_to_decimal128<T: ArrowNumericType>(
     array: &PrimitiveArray<T>,
     precision: u8,
     scale: u8,
@@ -334,12 +354,30 @@ where
 {
     let mul: i128 = 10_i128.pow(scale as u32);
 
-    // with_precision_and_scale validates the
-    // value is within range for the output precision
-    cast_primitive_to_decimal(array, |v| v.as_() * mul, precision, scale)
+    cast_primitive_to_decimal128(array, |v| v.as_() * mul, precision, scale)
+}
+
+fn cast_integer_to_decimal256<T: ArrowNumericType>(
+    array: &PrimitiveArray<T>,
+    precision: u8,
+    scale: u8,
+) -> Result<ArrayRef>
+where
+    <T as ArrowPrimitiveType>::Native: AsPrimitive<i256>,
+{
+    let mul: i256 = i256::from_i128(10_i128)
+        .checked_pow(scale as u32)
+        .ok_or_else(|| {
+            ArrowError::CastError(format!(
+                "Cannot cast to Decimal256({}, {}). The scale causes 
overflow.",
+                precision, scale
+            ))
+        })?;
+
+    cast_primitive_to_decimal256(array, |v| v.as_().wrapping_mul(mul), 
precision, scale)
 }
 
-fn cast_floating_point_to_decimal<T: ArrowNumericType>(
+fn cast_floating_point_to_decimal128<T: ArrowNumericType>(
     array: &PrimitiveArray<T>,
     precision: u8,
     scale: u8,
@@ -349,13 +387,22 @@ where
 {
     let mul = 10_f64.powi(scale as i32);
 
-    cast_primitive_to_decimal(
+    cast_primitive_to_decimal128(array, |v| (v.as_() * mul) as i128, 
precision, scale)
+}
+
+fn cast_floating_point_to_decimal256<T: ArrowNumericType>(
+    array: &PrimitiveArray<T>,
+    precision: u8,
+    scale: u8,
+) -> Result<ArrayRef>
+where
+    <T as ArrowPrimitiveType>::Native: AsPrimitive<f64>,
+{
+    let mul = 10_f64.powi(scale as i32);
+
+    cast_primitive_to_decimal256(
         array,
-        |v| {
-            // with_precision_and_scale validates the
-            // value is within range for the output precision
-            (v.as_() * mul) as i128
-        },
+        |v| i256::from_i128((v.as_() * mul) as i128),
         precision,
         scale,
     )
@@ -545,32 +592,73 @@ pub fn cast_with_options(
             // cast data to decimal
             match from_type {
                 // TODO now just support signed numeric to decimal, support 
decimal to numeric later
-                Int8 => cast_integer_to_decimal(
+                Int8 => cast_integer_to_decimal128(
+                    as_primitive_array::<Int8Type>(array),
+                    *precision,
+                    *scale,
+                ),
+                Int16 => cast_integer_to_decimal128(
+                    as_primitive_array::<Int16Type>(array),
+                    *precision,
+                    *scale,
+                ),
+                Int32 => cast_integer_to_decimal128(
+                    as_primitive_array::<Int32Type>(array),
+                    *precision,
+                    *scale,
+                ),
+                Int64 => cast_integer_to_decimal128(
+                    as_primitive_array::<Int64Type>(array),
+                    *precision,
+                    *scale,
+                ),
+                Float32 => cast_floating_point_to_decimal128(
+                    as_primitive_array::<Float32Type>(array),
+                    *precision,
+                    *scale,
+                ),
+                Float64 => cast_floating_point_to_decimal128(
+                    as_primitive_array::<Float64Type>(array),
+                    *precision,
+                    *scale,
+                ),
+                Null => Ok(new_null_array(to_type, array.len())),
+                _ => Err(ArrowError::CastError(format!(
+                    "Casting from {:?} to {:?} not supported",
+                    from_type, to_type
+                ))),
+            }
+        }
+        (_, Decimal256(precision, scale)) => {
+            // cast data to decimal
+            match from_type {
+                // TODO now just support signed numeric to decimal, support 
decimal to numeric later
+                Int8 => cast_integer_to_decimal256(
                     as_primitive_array::<Int8Type>(array),
                     *precision,
                     *scale,
                 ),
-                Int16 => cast_integer_to_decimal(
+                Int16 => cast_integer_to_decimal256(
                     as_primitive_array::<Int16Type>(array),
                     *precision,
                     *scale,
                 ),
-                Int32 => cast_integer_to_decimal(
+                Int32 => cast_integer_to_decimal256(
                     as_primitive_array::<Int32Type>(array),
                     *precision,
                     *scale,
                 ),
-                Int64 => cast_integer_to_decimal(
+                Int64 => cast_integer_to_decimal256(
                     as_primitive_array::<Int64Type>(array),
                     *precision,
                     *scale,
                 ),
-                Float32 => cast_floating_point_to_decimal(
+                Float32 => cast_floating_point_to_decimal256(
                     as_primitive_array::<Float32Type>(array),
                     *precision,
                     *scale,
                 ),
-                Float64 => cast_floating_point_to_decimal(
+                Float64 => cast_floating_point_to_decimal256(
                     as_primitive_array::<Float64Type>(array),
                     *precision,
                     *scale,
@@ -3071,7 +3159,7 @@ mod tests {
 
     #[test]
     #[cfg(not(feature = "force_validate"))]
-    fn test_cast_numeric_to_decimal() {
+    fn test_cast_numeric_to_decimal128() {
         // test negative cast type
         let decimal_type = DataType::Decimal128(38, 6);
         assert!(!can_cast_types(&DataType::UInt64, &decimal_type));
@@ -3184,6 +3272,121 @@ mod tests {
         );
     }
 
+    #[test]
+    #[cfg(not(feature = "force_validate"))]
+    fn test_cast_numeric_to_decimal256() {
+        // test negative cast type
+        let decimal_type = DataType::Decimal256(58, 6);
+        assert!(!can_cast_types(&DataType::UInt64, &decimal_type));
+
+        // i8, i16, i32, i64
+        let input_datas = vec![
+            Arc::new(Int8Array::from(vec![
+                Some(1),
+                Some(2),
+                Some(3),
+                None,
+                Some(5),
+            ])) as ArrayRef, // i8
+            Arc::new(Int16Array::from(vec![
+                Some(1),
+                Some(2),
+                Some(3),
+                None,
+                Some(5),
+            ])) as ArrayRef, // i16
+            Arc::new(Int32Array::from(vec![
+                Some(1),
+                Some(2),
+                Some(3),
+                None,
+                Some(5),
+            ])) as ArrayRef, // i32
+            Arc::new(Int64Array::from(vec![
+                Some(1),
+                Some(2),
+                Some(3),
+                None,
+                Some(5),
+            ])) as ArrayRef, // i64
+        ];
+        for array in input_datas {
+            generate_cast_test_case!(
+                &array,
+                Decimal256Array,
+                &decimal_type,
+                vec![
+                    Some(i256::from_i128(1000000_i128)),
+                    Some(i256::from_i128(2000000_i128)),
+                    Some(i256::from_i128(3000000_i128)),
+                    None,
+                    Some(i256::from_i128(5000000_i128))
+                ]
+            );
+        }
+
+        // test i8 to decimal type with overflow the result type
+        // the 100 will be converted to 1000_i128, but it is out of range for 
max value in the precision 3.
+        let array = Int8Array::from(vec![1, 2, 3, 4, 100]);
+        let array = Arc::new(array) as ArrayRef;
+        let casted_array = cast(&array, &DataType::Decimal256(3, 1));
+        assert!(casted_array.is_ok());
+        let array = casted_array.unwrap();
+        let array: &Decimal256Array = as_primitive_array(&array);
+        let err = array.validate_decimal_precision(3);
+        assert_eq!("Invalid argument error: 1000 is too large to store in a 
Decimal256 of precision 3. Max is 999", err.unwrap_err().to_string());
+
+        // test f32 to decimal type
+        let array = Float32Array::from(vec![
+            Some(1.1),
+            Some(2.2),
+            Some(4.4),
+            None,
+            Some(1.123_456_7),
+            Some(1.123_456_7),
+        ]);
+        let array = Arc::new(array) as ArrayRef;
+        generate_cast_test_case!(
+            &array,
+            Decimal256Array,
+            &decimal_type,
+            vec![
+                Some(i256::from_i128(1100000_i128)),
+                Some(i256::from_i128(2200000_i128)),
+                Some(i256::from_i128(4400000_i128)),
+                None,
+                Some(i256::from_i128(1123456_i128)),
+                Some(i256::from_i128(1123456_i128)),
+            ]
+        );
+
+        // test f64 to decimal type
+        let array = Float64Array::from(vec![
+            Some(1.1),
+            Some(2.2),
+            Some(4.4),
+            None,
+            Some(1.123_456_789_123_4),
+            Some(1.123_456_789_012_345_6),
+            Some(1.123_456_789_012_345_6),
+        ]);
+        let array = Arc::new(array) as ArrayRef;
+        generate_cast_test_case!(
+            &array,
+            Decimal256Array,
+            &decimal_type,
+            vec![
+                Some(i256::from_i128(1100000_i128)),
+                Some(i256::from_i128(2200000_i128)),
+                Some(i256::from_i128(4400000_i128)),
+                None,
+                Some(i256::from_i128(1123456_i128)),
+                Some(i256::from_i128(1123456_i128)),
+                Some(i256::from_i128(1123456_i128)),
+            ]
+        );
+    }
+
     #[test]
     fn test_cast_i32_to_f64() {
         let a = Int32Array::from(vec![5, 6, 7, 8, 9]);

[arrow-rs] branch master updated: Cast numeric to decimal256 (#2923)

Reply via email to