This is an automated email from the ASF dual-hosted git repository.
viirya pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 73153fec8 Add Decimal128Iter and Decimal256Iter and do maximum
precision/scale check (#2140)
73153fec8 is described below
commit 73153fec814a8871fe9e6ea6a7bc66198118cd25
Author: Liang-Chi Hsieh <[email protected]>
AuthorDate: Sun Jul 24 20:45:49 2022 -0700
Add Decimal128Iter and Decimal256Iter and do maximum precision/scale check
(#2140)
* Add precision/scale check. Add decimal128 and decimal256 iters.
* Trigger Build
* Improve deprecation hint
---
arrow/src/array/array_binary.rs | 1 -
arrow/src/array/array_decimal.rs | 127 ++++++++++++++++++++++++++++----------
arrow/src/array/iterator.rs | 17 ++++-
arrow/src/compute/kernels/cast.rs | 4 +-
arrow/src/datatypes/datatype.rs | 4 +-
arrow/src/util/decimal.rs | 76 +++++++++++++++++++++--
6 files changed, 185 insertions(+), 44 deletions(-)
diff --git a/arrow/src/array/array_binary.rs b/arrow/src/array/array_binary.rs
index b01696b03..4848a25a0 100644
--- a/arrow/src/array/array_binary.rs
+++ b/arrow/src/array/array_binary.rs
@@ -24,7 +24,6 @@ use super::{
FixedSizeListArray, GenericBinaryIter, GenericListArray, OffsetSizeTrait,
};
use crate::array::array::ArrayAccessor;
-pub use crate::array::DecimalIter;
use crate::buffer::Buffer;
use crate::error::{ArrowError, Result};
use crate::util::bit_util;
diff --git a/arrow/src/array/array_decimal.rs b/arrow/src/array/array_decimal.rs
index 494b4e9d1..473160858 100644
--- a/arrow/src/array/array_decimal.rs
+++ b/arrow/src/array/array_decimal.rs
@@ -15,6 +15,7 @@
// specific language governing permissions and limitations
// under the License.
+use crate::array::{ArrayAccessor, Decimal128Iter, Decimal256Iter};
use std::borrow::Borrow;
use std::convert::From;
use std::fmt;
@@ -24,13 +25,11 @@ use super::{
array::print_long_array, raw_pointer::RawPtrBox, Array, ArrayData,
FixedSizeListArray,
};
use super::{BooleanBufferBuilder, FixedSizeBinaryArray};
+#[allow(deprecated)]
pub use crate::array::DecimalIter;
use crate::buffer::Buffer;
-use crate::datatypes::DataType;
-use crate::datatypes::{
- validate_decimal_precision, DECIMAL_DEFAULT_SCALE, DECIMAL_MAX_PRECISION,
- DECIMAL_MAX_SCALE,
-};
+use crate::datatypes::{validate_decimal_precision, DECIMAL_DEFAULT_SCALE};
+use crate::datatypes::{DataType, DECIMAL128_MAX_PRECISION,
DECIMAL128_MAX_SCALE};
use crate::error::{ArrowError, Result};
use crate::util::decimal::{BasicDecimal, Decimal128, Decimal256};
@@ -103,11 +102,18 @@ pub trait BasicDecimalArray<T: BasicDecimal, U:
From<ArrayData>>:
/// Returns the element at index `i`.
fn value(&self, i: usize) -> T {
- let data = self.data();
- assert!(i < data.len(), "Out of bounds access");
+ assert!(i < self.data().len(), "Out of bounds access");
+ unsafe { self.value_unchecked(i) }
+ }
+
+ /// Returns the element at index `i`.
+ /// # Safety
+ /// Caller is responsible for ensuring that the index is within the bounds
of the array
+ unsafe fn value_unchecked(&self, i: usize) -> T {
+ let data = self.data();
let offset = i + data.offset();
- let raw_val = unsafe {
+ let raw_val = {
let pos = self.value_offset_at(offset);
std::slice::from_raw_parts(
self.raw_value_data_ptr().offset(pos as isize),
@@ -270,24 +276,24 @@ impl Decimal128Array {
/// specified precision.
///
/// Returns an Error if:
- /// 1. `precision` is larger than [`DECIMAL_MAX_PRECISION`]
- /// 2. `scale` is larger than [`DECIMAL_MAX_SCALE`];
+ /// 1. `precision` is larger than [`DECIMAL128_MAX_PRECISION`]
+ /// 2. `scale` is larger than [`DECIMAL128_MAX_SCALE`];
/// 3. `scale` is > `precision`
pub fn with_precision_and_scale(
mut self,
precision: usize,
scale: usize,
) -> Result<Self> {
- if precision > DECIMAL_MAX_PRECISION {
+ if precision > DECIMAL128_MAX_PRECISION {
return Err(ArrowError::InvalidArgumentError(format!(
"precision {} is greater than max {}",
- precision, DECIMAL_MAX_PRECISION
+ precision, DECIMAL128_MAX_PRECISION
)));
}
- if scale > DECIMAL_MAX_SCALE {
+ if scale > DECIMAL128_MAX_SCALE {
return Err(ArrowError::InvalidArgumentError(format!(
"scale {} is greater than max {}",
- scale, DECIMAL_MAX_SCALE
+ scale, DECIMAL128_MAX_SCALE
)));
}
if scale > precision {
@@ -302,7 +308,7 @@ impl Decimal128Array {
// decreased
if precision < self.precision {
for v in self.iter().flatten() {
- validate_decimal_precision(v, precision)?;
+ validate_decimal_precision(v.as_i128(), precision)?;
}
}
@@ -322,7 +328,7 @@ impl Decimal128Array {
/// The default precision and scale used when not specified.
pub fn default_type() -> DataType {
// Keep maximum precision
- DataType::Decimal(DECIMAL_MAX_PRECISION, DECIMAL_DEFAULT_SCALE)
+ DataType::Decimal(DECIMAL128_MAX_PRECISION, DECIMAL_DEFAULT_SCALE)
}
}
@@ -368,19 +374,13 @@ impl From<ArrayData> for Decimal256Array {
}
}
-impl<'a> IntoIterator for &'a Decimal128Array {
- type Item = Option<i128>;
- type IntoIter = DecimalIter<'a>;
-
- fn into_iter(self) -> Self::IntoIter {
- DecimalIter::<'a>::new(self)
- }
-}
-
impl<'a> Decimal128Array {
- /// constructs a new iterator
- pub fn iter(&'a self) -> DecimalIter<'a> {
- DecimalIter::new(self)
+ /// Constructs a new iterator that iterates `Decimal128` values as i128
values.
+ /// This is kept mostly for back-compatibility purpose.
+ /// Suggests to use `iter()` that returns `Decimal128Iter`.
+ #[allow(deprecated)]
+ pub fn i128_iter(&'a self) -> DecimalIter<'a> {
+ DecimalIter::<'a>::new(self)
}
}
@@ -421,7 +421,7 @@ impl<Ptr: Borrow<Option<i128>>> FromIterator<Ptr> for
Decimal128Array {
}
macro_rules! def_decimal_array {
- ($ty:ident, $array_name:expr) => {
+ ($ty:ident, $array_name:expr, $decimal_ty:ident, $iter_ty:ident) => {
impl private_decimal::DecimalArrayPrivate for $ty {
fn raw_value_data_ptr(&self) -> *const u8 {
self.value_data.as_ptr()
@@ -463,15 +463,55 @@ macro_rules! def_decimal_array {
write!(f, "]")
}
}
+
+ impl<'a> ArrayAccessor for &'a $ty {
+ type Item = $decimal_ty;
+
+ fn value(&self, index: usize) -> Self::Item {
+ $ty::value(self, index)
+ }
+
+ unsafe fn value_unchecked(&self, index: usize) -> Self::Item {
+ $ty::value_unchecked(self, index)
+ }
+ }
+
+ impl<'a> IntoIterator for &'a $ty {
+ type Item = Option<$decimal_ty>;
+ type IntoIter = $iter_ty<'a>;
+
+ fn into_iter(self) -> Self::IntoIter {
+ $iter_ty::<'a>::new(self)
+ }
+ }
+
+ impl<'a> $ty {
+ /// constructs a new iterator
+ pub fn iter(&'a self) -> $iter_ty<'a> {
+ $iter_ty::<'a>::new(self)
+ }
+ }
};
}
-def_decimal_array!(Decimal128Array, "Decimal128Array");
-def_decimal_array!(Decimal256Array, "Decimal256Array");
+def_decimal_array!(
+ Decimal128Array,
+ "Decimal128Array",
+ Decimal128,
+ Decimal128Iter
+);
+def_decimal_array!(
+ Decimal256Array,
+ "Decimal256Array",
+ Decimal256,
+ Decimal256Iter
+);
#[cfg(test)]
mod tests {
+ use crate::array::Decimal256Builder;
use crate::{array::Decimal128Builder, datatypes::Field};
+ use num::{BigInt, Num};
use super::*;
@@ -567,7 +607,7 @@ mod tests {
let data = vec![Some(-100), None, Some(101)];
let array: Decimal128Array = data.clone().into_iter().collect();
- let collected: Vec<_> = array.iter().collect();
+ let collected: Vec<_> = array.iter().map(|d| d.map(|v|
v.as_i128())).collect();
assert_eq!(data, collected);
}
@@ -576,7 +616,8 @@ mod tests {
let data = vec![Some(-100), None, Some(101)];
let array: Decimal128Array = data.clone().into_iter().collect();
- let collected: Vec<_> = array.into_iter().collect();
+ let collected: Vec<_> =
+ array.into_iter().map(|d| d.map(|v| v.as_i128())).collect();
assert_eq!(data, collected);
}
@@ -750,4 +791,24 @@ mod tests {
assert!(decimal.is_null(0));
assert_eq!(decimal.value_as_string(1), "56".to_string());
}
+
+ #[test]
+ fn test_decimal256_iter() {
+ // TODO: Impl FromIterator for Decimal256Array
+ let mut builder = Decimal256Builder::new(30, 76, 6);
+ let value = BigInt::from_str_radix("12345", 10).unwrap();
+ let decimal1 = Decimal256::from_big_int(&value, 76, 6).unwrap();
+ builder.append_value(&decimal1).unwrap();
+
+ builder.append_null();
+
+ let value = BigInt::from_str_radix("56789", 10).unwrap();
+ let decimal2 = Decimal256::from_big_int(&value, 76, 6).unwrap();
+ builder.append_value(&decimal2).unwrap();
+
+ let array: Decimal256Array = builder.finish();
+
+ let collected: Vec<_> = array.iter().collect();
+ assert_eq!(vec![Some(decimal1), None, Some(decimal2)], collected);
+ }
}
diff --git a/arrow/src/array/iterator.rs b/arrow/src/array/iterator.rs
index a4853d7d7..8ee9f2544 100644
--- a/arrow/src/array/iterator.rs
+++ b/arrow/src/array/iterator.rs
@@ -16,7 +16,7 @@
// under the License.
use crate::array::array::ArrayAccessor;
-use crate::array::BasicDecimalArray;
+use crate::array::{BasicDecimalArray, Decimal256Array};
use super::{
Array, BooleanArray, Decimal128Array, GenericBinaryArray, GenericListArray,
@@ -104,15 +104,28 @@ pub type GenericStringIter<'a, T> = ArrayIter<&'a
GenericStringArray<T>>;
pub type GenericBinaryIter<'a, T> = ArrayIter<&'a GenericBinaryArray<T>>;
pub type GenericListArrayIter<'a, O> = ArrayIter<&'a GenericListArray<O>>;
+/// an iterator that returns `Some(Decimal128)` or `None`, that can be used on
a
+/// [`Decimal128Array`]
+pub type Decimal128Iter<'a> = ArrayIter<&'a Decimal128Array>;
+
+/// an iterator that returns `Some(Decimal256)` or `None`, that can be used on
a
+/// [`Decimal256Array`]
+pub type Decimal256Iter<'a> = ArrayIter<&'a Decimal256Array>;
+
/// an iterator that returns `Some(i128)` or `None`, that can be used on a
/// [`Decimal128Array`]
#[derive(Debug)]
+#[deprecated(note = "Please use `Decimal128Iter` instead. \
+ `DecimalIter` iterates `Decimal128` values as i128 values. \
+ This is kept mostly for back-compatibility purpose. Suggests to use
`Decimal128Array.iter()` \
+ that returns `Decimal128Iter`.")]
pub struct DecimalIter<'a> {
array: &'a Decimal128Array,
current: usize,
current_end: usize,
}
+#[allow(deprecated)]
impl<'a> DecimalIter<'a> {
pub fn new(array: &'a Decimal128Array) -> Self {
Self {
@@ -123,6 +136,7 @@ impl<'a> DecimalIter<'a> {
}
}
+#[allow(deprecated)]
impl<'a> std::iter::Iterator for DecimalIter<'a> {
type Item = Option<i128>;
@@ -150,6 +164,7 @@ impl<'a> std::iter::Iterator for DecimalIter<'a> {
}
/// iterator has known size.
+#[allow(deprecated)]
impl<'a> std::iter::ExactSizeIterator for DecimalIter<'a> {}
#[cfg(test)]
diff --git a/arrow/src/compute/kernels/cast.rs
b/arrow/src/compute/kernels/cast.rs
index 954acef76..781f199a6 100644
--- a/arrow/src/compute/kernels/cast.rs
+++ b/arrow/src/compute/kernels/cast.rs
@@ -1205,7 +1205,7 @@ fn cast_decimal_to_decimal(
let div = 10_i128.pow((input_scale - output_scale) as u32);
array
.iter()
- .map(|v| v.map(|v| v / div))
+ .map(|v| v.map(|v| v.as_i128() / div))
.collect::<Decimal128Array>()
} else {
// For example, input_scale is 3 and output_scale is 4;
@@ -1213,7 +1213,7 @@ fn cast_decimal_to_decimal(
let mul = 10_i128.pow((output_scale - input_scale) as u32);
array
.iter()
- .map(|v| v.map(|v| v * mul))
+ .map(|v| v.map(|v| v.as_i128() * mul))
.collect::<Decimal128Array>()
}
.with_precision_and_scale(*output_precision, *output_scale)?;
diff --git a/arrow/src/datatypes/datatype.rs b/arrow/src/datatypes/datatype.rs
index 74a2ab450..429a94f24 100644
--- a/arrow/src/datatypes/datatype.rs
+++ b/arrow/src/datatypes/datatype.rs
@@ -431,10 +431,10 @@ pub const MIN_DECIMAL_FOR_LARGER_PRECISION: [&str; 38] = [
];
/// The maximum precision for [DataType::Decimal] values
-pub const DECIMAL_MAX_PRECISION: usize = 38;
+pub const DECIMAL128_MAX_PRECISION: usize = 38;
/// The maximum scale for [DataType::Decimal] values
-pub const DECIMAL_MAX_SCALE: usize = 38;
+pub const DECIMAL128_MAX_SCALE: usize = 38;
/// The maximum precision for [DataType::Decimal256] values
pub const DECIMAL256_MAX_PRECISION: usize = 76;
diff --git a/arrow/src/util/decimal.rs b/arrow/src/util/decimal.rs
index 4d6724564..8f9d394ef 100644
--- a/arrow/src/util/decimal.rs
+++ b/arrow/src/util/decimal.rs
@@ -17,13 +17,22 @@
//! Decimal related utils
+use crate::datatypes::{
+ DECIMAL128_MAX_PRECISION, DECIMAL128_MAX_SCALE, DECIMAL256_MAX_PRECISION,
+ DECIMAL256_MAX_SCALE,
+};
use crate::error::{ArrowError, Result};
use num::bigint::BigInt;
+use num::Signed;
use std::cmp::{min, Ordering};
pub trait BasicDecimal: PartialOrd + Ord + PartialEq + Eq {
/// The bit-width of the internal representation.
const BIT_WIDTH: usize;
+ /// The maximum precision.
+ const MAX_PRECISION: usize;
+ /// The maximum scale.
+ const MAX_SCALE: usize;
/// Tries to create a decimal value from precision, scale and bytes.
/// If the length of bytes isn't same as the bit width of this decimal,
@@ -36,6 +45,21 @@ pub trait BasicDecimal: PartialOrd + Ord + PartialEq + Eq {
where
Self: Sized,
{
+ if precision > Self::MAX_PRECISION {
+ return Err(ArrowError::InvalidArgumentError(format!(
+ "precision {} is greater than max {}",
+ precision,
+ Self::MAX_PRECISION
+ )));
+ }
+ if scale > Self::MAX_SCALE {
+ return Err(ArrowError::InvalidArgumentError(format!(
+ "scale {} is greater than max {}",
+ scale,
+ Self::MAX_SCALE
+ )));
+ }
+
if precision < scale {
return Err(ArrowError::InvalidArgumentError(format!(
"Precision {} is less than scale {}",
@@ -138,10 +162,30 @@ pub struct Decimal256 {
value: [u8; 32],
}
+impl Decimal256 {
+ /// Constructs a `Decimal256` value from a `BigInt`.
+ pub fn from_big_int(
+ num: &BigInt,
+ precision: usize,
+ scale: usize,
+ ) -> Result<Decimal256> {
+ let mut bytes = if num.is_negative() {
+ vec![255; 32]
+ } else {
+ vec![0; 32]
+ };
+ let num_bytes = &num.to_signed_bytes_le();
+ bytes[0..num_bytes.len()].clone_from_slice(num_bytes);
+ Decimal256::try_new_from_bytes(precision, scale, &bytes)
+ }
+}
+
macro_rules! def_decimal {
- ($ty:ident, $bit:expr) => {
+ ($ty:ident, $bit:expr, $max_p:expr, $max_s:expr) => {
impl BasicDecimal for $ty {
const BIT_WIDTH: usize = $bit;
+ const MAX_PRECISION: usize = $max_p;
+ const MAX_SCALE: usize = $max_s;
fn new(precision: usize, scale: usize, bytes: &[u8]) -> Self {
$ty {
@@ -201,12 +245,23 @@ macro_rules! def_decimal {
};
}
-def_decimal!(Decimal128, 128);
-def_decimal!(Decimal256, 256);
+def_decimal!(
+ Decimal128,
+ 128,
+ DECIMAL128_MAX_PRECISION,
+ DECIMAL128_MAX_SCALE
+);
+def_decimal!(
+ Decimal256,
+ 256,
+ DECIMAL256_MAX_PRECISION,
+ DECIMAL256_MAX_SCALE
+);
#[cfg(test)]
mod tests {
use crate::util::decimal::{BasicDecimal, Decimal128, Decimal256};
+ use num::{BigInt, Num};
#[test]
fn decimal_128_to_string() {
@@ -281,10 +336,10 @@ mod tests {
// smaller than i128 minimum
bytes = vec![255; 32];
bytes[31] = 128;
- let value = Decimal256::try_new_from_bytes(79, 4, &bytes).unwrap();
+ let value = Decimal256::try_new_from_bytes(76, 4, &bytes).unwrap();
assert_eq!(
value.to_string(),
-
"-5744373177007483132341216834415376678658315645522012356644966081642565415.7313"
+
"-574437317700748313234121683441537667865831564552201235664496608164256541.5731"
);
bytes = vec![255; 32];
@@ -302,4 +357,15 @@ mod tests {
let integer = i128_func(value);
assert_eq!(integer, 100);
}
+
+ #[test]
+ fn bigint_to_decimal256() {
+ let num = BigInt::from_str_radix("123456789", 10).unwrap();
+ let value = Decimal256::from_big_int(&num, 30, 2).unwrap();
+ assert_eq!(value.to_string(), "1234567.89");
+
+ let num =
BigInt::from_str_radix("-5744373177007483132341216834415376678658315645522012356644966081642565415731",
10).unwrap();
+ let value = Decimal256::from_big_int(&num, 76, 4).unwrap();
+ assert_eq!(value.to_string(),
"-574437317700748313234121683441537667865831564552201235664496608164256541.5731");
+ }
}