This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 6948373f1 fix: Don't instantiate the scalar composition code
quadratically for dictionaries (#2391)
6948373f1 is described below
commit 6948373f147815753ac3d34bb3a73a8ba2079873
Author: Markus Westerlind <[email protected]>
AuthorDate: Thu Aug 11 14:06:58 2022 +0200
fix: Don't instantiate the scalar composition code quadratically for
dictionaries (#2391)
* fix: Don't instantiate the scalar composition code quadratically for
dictionaries
Instead, re-use the ones normal function. Reduces how much code
`datafusion-physical-expr` generated significantly (since the functions are
generic, and not instantiated in `arrow` itself, it only shows up downstream).
https://github.com/apache/arrow-datafusion
There is technically an extra indirect call now as the recursive call to
`eq_dyn_scalar` etc coerces to a `dyn Array` again but that seems unlikely to
matter.
## cargo llvm-lines -p datafusion-physical-expr
### Before
```
Lines Copies Function name
----- ------ -------------
2270242 (100%) 38377 (100%) (TOTAL)
245854 (10.8%) 5580 (14.5%) core::option::Option<T>::ok_or_else
58690 (2.6%) 10 (0.0%)
arrow::compute::kernels::comparison::eq_dyn_scalar
58690 (2.6%) 10 (0.0%)
arrow::compute::kernels::comparison::gt_dyn_scalar
58690 (2.6%) 10 (0.0%)
arrow::compute::kernels::comparison::gt_eq_dyn_scalar
58690 (2.6%) 10 (0.0%)
arrow::compute::kernels::comparison::lt_dyn_scalar
58690 (2.6%) 10 (0.0%)
arrow::compute::kernels::comparison::lt_eq_dyn_scalar
58690 (2.6%) 10 (0.0%)
arrow::compute::kernels::comparison::neq_dyn_scalar
55800 (2.5%) 900 (2.3%)
arrow::compute::kernels::comparison::eq_dyn_scalar::{{closure}}
55800 (2.5%) 900 (2.3%)
arrow::compute::kernels::comparison::gt_dyn_scalar::{{closure}}
55800 (2.5%) 900 (2.3%)
arrow::compute::kernels::comparison::gt_eq_dyn_scalar::{{closure}}
55800 (2.5%) 900 (2.3%)
arrow::compute::kernels::comparison::lt_dyn_scalar::{{closure}}
55800 (2.5%) 900 (2.3%)
arrow::compute::kernels::comparison::lt_eq_dyn_scalar::{{closure}}
55800 (2.5%) 900 (2.3%)
arrow::compute::kernels::comparison::neq_dyn_scalar::{{closure}}
44929 (2.0%) 900 (2.3%) core::option::Option<T>::map
40986 (1.8%) 162 (0.4%) <arrow::array::array_boolean::BooleanArray
as core::iter::traits::collect::FromIterator<Ptr>>::from_iter
37528 (1.7%) 508 (1.3%) core::iter::traits::iterator::Iterator::fold
30595 (1.3%) 245 (0.6%) <alloc::vec::Vec<T> as
alloc::vec::spec_from_iter_nested::SpecFromIterNested<T,I>>::from_iter
29272 (1.3%) 46 (0.1%)
<core::iter::adapters::flatten::FlattenCompat<I,U> as
core::iter::traits::iterator::Iterator>::size_hint
27815 (1.2%) 285 (0.7%)
core::iter::traits::iterator::Iterator::try_fold
26014 (1.1%) 1 (0.0%)
datafusion_physical_expr::expressions::binary::BinaryExpr::evaluate_array_scalar
25095 (1.1%) 441 (1.1%)
core::iter::adapters::map::map_fold::{{closure}}
22849 (1.0%) 174 (0.5%) <core::iter::adapters::GenericShunt<I,R> as
core::iter::traits::iterator::Iterator>::try_fold::{{closure}}
21888 (1.0%) 96 (0.3%)
arrow::compute::kernels::comparison::compare_op_scalar
21464 (0.9%) 56 (0.1%)
<arrow::array::array_string::GenericStringArray<OffsetSize> as
core::iter::traits::collect::FromIterator<core::option::Option<Ptr>>>::from_iter
21461 (0.9%) 441 (1.1%) <core::iter::adapters::map::Map<I,F> as
core::iter::traits::iterator::Iterator>::fold
19918 (0.9%) 118 (0.3%)
arrow::buffer::mutable::MutableBuffer::from_trusted_len_iter
16916 (0.7%) 246 (0.6%) <alloc::vec::Vec<T,A> as
alloc::vec::spec_extend::SpecExtend<T,I>>::spec_extend
```
### After
```
Lines Copies Function name
----- ------ -------------
1475122 (100%) 28777 (100%) (TOTAL)
44929 (3.0%) 900 (3.1%) core::option::Option<T>::map
40986 (2.8%) 162 (0.6%) <arrow::array::array_boolean::BooleanArray
as core::iter::traits::collect::FromIterator<Ptr>>::from_iter
37528 (2.5%) 508 (1.8%) core::iter::traits::iterator::Iterator::fold
34174 (2.3%) 780 (2.7%) core::option::Option<T>::ok_or_else
30595 (2.1%) 245 (0.9%) <alloc::vec::Vec<T> as
alloc::vec::spec_from_iter_nested::SpecFromIterNested<T,I>>::from_iter
29272 (2.0%) 46 (0.2%)
<core::iter::adapters::flatten::FlattenCompat<I,U> as
core::iter::traits::iterator::Iterator>::size_hint
27815 (1.9%) 285 (1.0%)
core::iter::traits::iterator::Iterator::try_fold
26014 (1.8%) 1 (0.0%)
datafusion_physical_expr::expressions::binary::BinaryExpr::evaluate_array_scalar
25095 (1.7%) 441 (1.5%)
core::iter::adapters::map::map_fold::{{closure}}
22849 (1.5%) 174 (0.6%) <core::iter::adapters::GenericShunt<I,R> as
core::iter::traits::iterator::Iterator>::try_fold::{{closure}}
21888 (1.5%) 96 (0.3%)
arrow::compute::kernels::comparison::compare_op_scalar
21464 (1.5%) 56 (0.2%)
<arrow::array::array_string::GenericStringArray<OffsetSize> as
core::iter::traits::collect::FromIterator<core::option::Option<Ptr>>>::from_iter
21461 (1.5%) 441 (1.5%) <core::iter::adapters::map::Map<I,F> as
core::iter::traits::iterator::Iterator>::fold
19918 (1.4%) 118 (0.4%)
arrow::buffer::mutable::MutableBuffer::from_trusted_len_iter
16916 (1.1%) 246 (0.9%) <alloc::vec::Vec<T,A> as
alloc::vec::spec_extend::SpecExtend<T,I>>::spec_extend
16146 (1.1%) 960 (3.3%) core::iter::adapters::map::Map<I,F>::new
15492 (1.1%) 427 (1.5%)
core::iter::traits::iterator::Iterator::for_each
14921 (1.0%) 111 (0.4%) alloc::vec::Vec<T,A>::extend_desugared
14670 (1.0%) 126 (0.4%) core::iter::adapters::try_process
13918 (0.9%) 1 (0.0%)
datafusion_physical_expr::expressions::binary::BinaryExpr::evaluate_scalar_array
13120 (0.9%) 64 (0.2%)
<arrow::array::array_primitive::PrimitiveArray<T> as
core::iter::traits::collect::FromIterator<Ptr>>::from_iter
12963 (0.9%) 52 (0.2%)
<core::iter::adapters::flatten::FlattenCompat<I,U> as
core::iter::traits::iterator::Iterator>::try_fold
12245 (0.8%) 180 (0.6%)
<core::iter::adapters::enumerate::Enumerate<I> as
core::iter::traits::iterator::Iterator>::fold::enumerate::{{closure}}
12201 (0.8%) 81 (0.3%)
arrow::buffer::mutable::MutableBuffer::extend_from_iter
11826 (0.8%) 162 (0.6%) <arrow::array::array_boolean::BooleanArray
as core::iter::traits::collect::FromIterator<Ptr>>::from_iter::{{closure}}
11536 (0.8%) 960 (3.3%) core::iter::traits::iterator::Iterator::map
11200 (0.8%) 32 (0.1%) alloc::raw_vec::RawVec<T,A>::grow_amortized
```
* refactor: Avoid instantiating a quadratic number of closures due to
try_to_type in comparisons (-4%)
Reduces the number of llvm-lines in datafusion-physical-expr by another 4%
---
arrow/src/compute/kernels/comparison.rs | 71 +++++++++++----------------------
1 file changed, 24 insertions(+), 47 deletions(-)
diff --git a/arrow/src/compute/kernels/comparison.rs
b/arrow/src/compute/kernels/comparison.rs
index e4187ef87..1d0bc938e 100644
--- a/arrow/src/compute/kernels/comparison.rs
+++ b/arrow/src/compute/kernels/comparison.rs
@@ -987,18 +987,19 @@ pub fn gt_eq_utf8_scalar<OffsetSize: OffsetSizeTrait>(
compare_op_scalar(left, |a| a >= right)
}
+// Avoids creating a closure for each combination of `$RIGHT` and `$TY`
+fn try_to_type_result<T>(value: Option<T>, right: &str, ty: &str) -> Result<T>
{
+ value.ok_or_else(|| {
+ ArrowError::ComputeError(format!("Could not convert {} with {}",
right, ty,))
+ })
+}
+
/// Calls $RIGHT.$TY() (e.g. `right.to_i128()`) with a nice error message.
/// Type of expression is `Result<.., ArrowError>`
macro_rules! try_to_type {
- ($RIGHT: expr, $TY: ident) => {{
- $RIGHT.$TY().ok_or_else(|| {
- ArrowError::ComputeError(format!(
- "Could not convert {} with {}",
- stringify!($RIGHT),
- stringify!($TY)
- ))
- })
- }};
+ ($RIGHT: expr, $TY: ident) => {
+ try_to_type_result($RIGHT.$TY(), stringify!($RIGHT), stringify!($TYPE))
+ };
}
macro_rules! dyn_compare_scalar {
@@ -1068,59 +1069,35 @@ macro_rules! dyn_compare_scalar {
match $KT.as_ref() {
DataType::UInt8 => {
let left = as_dictionary_array::<UInt8Type>($LEFT);
- unpack_dict_comparison(
- left,
- dyn_compare_scalar!(left.values(), $RIGHT, $OP)?,
- )
+ unpack_dict_comparison(left, $OP(left.values(), $RIGHT)?)
}
DataType::UInt16 => {
let left = as_dictionary_array::<UInt16Type>($LEFT);
- unpack_dict_comparison(
- left,
- dyn_compare_scalar!(left.values(), $RIGHT, $OP)?,
- )
+ unpack_dict_comparison(left, $OP(left.values(), $RIGHT)?)
}
DataType::UInt32 => {
let left = as_dictionary_array::<UInt32Type>($LEFT);
- unpack_dict_comparison(
- left,
- dyn_compare_scalar!(left.values(), $RIGHT, $OP)?,
- )
+ unpack_dict_comparison(left, $OP(left.values(), $RIGHT)?)
}
DataType::UInt64 => {
let left = as_dictionary_array::<UInt64Type>($LEFT);
- unpack_dict_comparison(
- left,
- dyn_compare_scalar!(left.values(), $RIGHT, $OP)?,
- )
+ unpack_dict_comparison(left, $OP(left.values(), $RIGHT)?)
}
DataType::Int8 => {
let left = as_dictionary_array::<Int8Type>($LEFT);
- unpack_dict_comparison(
- left,
- dyn_compare_scalar!(left.values(), $RIGHT, $OP)?,
- )
+ unpack_dict_comparison(left, $OP(left.values(), $RIGHT)?)
}
DataType::Int16 => {
let left = as_dictionary_array::<Int16Type>($LEFT);
- unpack_dict_comparison(
- left,
- dyn_compare_scalar!(left.values(), $RIGHT, $OP)?,
- )
+ unpack_dict_comparison(left, $OP(left.values(), $RIGHT)?)
}
DataType::Int32 => {
let left = as_dictionary_array::<Int32Type>($LEFT);
- unpack_dict_comparison(
- left,
- dyn_compare_scalar!(left.values(), $RIGHT, $OP)?,
- )
+ unpack_dict_comparison(left, $OP(left.values(), $RIGHT)?)
}
DataType::Int64 => {
let left = as_dictionary_array::<Int64Type>($LEFT);
- unpack_dict_comparison(
- left,
- dyn_compare_scalar!(left.values(), $RIGHT, $OP)?,
- )
+ unpack_dict_comparison(left, $OP(left.values(), $RIGHT)?)
}
_ => Err(ArrowError::ComputeError(format!(
"Unsupported dictionary key type {:?}",
@@ -1186,7 +1163,7 @@ where
{
match left.data_type() {
DataType::Dictionary(key_type, _value_type) => {
- dyn_compare_scalar!(left, right, key_type, eq_scalar)
+ dyn_compare_scalar!(left, right, key_type, eq_dyn_scalar)
}
_ => dyn_compare_scalar!(left, right, eq_scalar),
}
@@ -1200,7 +1177,7 @@ where
{
match left.data_type() {
DataType::Dictionary(key_type, _value_type) => {
- dyn_compare_scalar!(left, right, key_type, lt_scalar)
+ dyn_compare_scalar!(left, right, key_type, lt_dyn_scalar)
}
_ => dyn_compare_scalar!(left, right, lt_scalar),
}
@@ -1214,7 +1191,7 @@ where
{
match left.data_type() {
DataType::Dictionary(key_type, _value_type) => {
- dyn_compare_scalar!(left, right, key_type, lt_eq_scalar)
+ dyn_compare_scalar!(left, right, key_type, lt_eq_dyn_scalar)
}
_ => dyn_compare_scalar!(left, right, lt_eq_scalar),
}
@@ -1228,7 +1205,7 @@ where
{
match left.data_type() {
DataType::Dictionary(key_type, _value_type) => {
- dyn_compare_scalar!(left, right, key_type, gt_scalar)
+ dyn_compare_scalar!(left, right, key_type, gt_dyn_scalar)
}
_ => dyn_compare_scalar!(left, right, gt_scalar),
}
@@ -1242,7 +1219,7 @@ where
{
match left.data_type() {
DataType::Dictionary(key_type, _value_type) => {
- dyn_compare_scalar!(left, right, key_type, gt_eq_scalar)
+ dyn_compare_scalar!(left, right, key_type, gt_eq_dyn_scalar)
}
_ => dyn_compare_scalar!(left, right, gt_eq_scalar),
}
@@ -1256,7 +1233,7 @@ where
{
match left.data_type() {
DataType::Dictionary(key_type, _value_type) => {
- dyn_compare_scalar!(left, right, key_type, neq_scalar)
+ dyn_compare_scalar!(left, right, key_type, neq_dyn_scalar)
}
_ => dyn_compare_scalar!(left, right, neq_scalar),
}