This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 57280e42dc Minor: reduce replication for nested comparison (#11149)
57280e42dc is described below
commit 57280e42dc2391ab65c24c0fb52032942d3d85a8
Author: Andrew Lamb <[email protected]>
AuthorDate: Thu Jun 27 20:26:46 2024 -0400
Minor: reduce replication for nested comparison (#11149)
---
datafusion/physical-expr-common/src/datum.rs | 2 +-
datafusion/physical-plan/src/joins/hash_join.rs | 12 ++++--------
2 files changed, 5 insertions(+), 9 deletions(-)
diff --git a/datafusion/physical-expr-common/src/datum.rs
b/datafusion/physical-expr-common/src/datum.rs
index fd3f2587e2..96c903180e 100644
--- a/datafusion/physical-expr-common/src/datum.rs
+++ b/datafusion/physical-expr-common/src/datum.rs
@@ -87,7 +87,7 @@ pub fn apply_cmp_for_nested(
}
/// Compare on nested type List, Struct, and so on
-fn compare_op_for_nested(
+pub fn compare_op_for_nested(
op: &Operator,
lhs: &dyn Datum,
rhs: &dyn Datum,
diff --git a/datafusion/physical-plan/src/joins/hash_join.rs
b/datafusion/physical-plan/src/joins/hash_join.rs
index 7d268839df..b2f9ef5607 100644
--- a/datafusion/physical-plan/src/joins/hash_join.rs
+++ b/datafusion/physical-plan/src/joins/hash_join.rs
@@ -52,15 +52,13 @@ use arrow::array::{
Array, ArrayRef, BooleanArray, BooleanBufferBuilder, PrimitiveArray,
UInt32Array,
UInt64Array,
};
-use arrow::buffer::NullBuffer;
use arrow::compute::kernels::cmp::{eq, not_distinct};
use arrow::compute::{and, concat_batches, take, FilterBuilder};
use arrow::datatypes::{Schema, SchemaRef};
use arrow::record_batch::RecordBatch;
use arrow::util::bit_util;
use arrow_array::cast::downcast_array;
-use arrow_ord::ord::make_comparator;
-use arrow_schema::{ArrowError, SortOptions};
+use arrow_schema::ArrowError;
use datafusion_common::utils::memory::estimate_memory_size;
use datafusion_common::{
internal_datafusion_err, internal_err, plan_err, project_schema,
DataFusionError,
@@ -75,6 +73,8 @@ use datafusion_physical_expr::expressions::UnKnownColumn;
use datafusion_physical_expr::{PhysicalExpr, PhysicalExprRef};
use ahash::RandomState;
+use datafusion_expr::Operator;
+use datafusion_physical_expr_common::datum::compare_op_for_nested;
use futures::{ready, Stream, StreamExt, TryStreamExt};
use parking_lot::Mutex;
@@ -1216,11 +1216,7 @@ fn eq_dyn_null(
// implementation
// <https://github.com/apache/datafusion/issues/10749>
if left.data_type().is_nested() && null_equals_null {
- let cmp = make_comparator(left, right, SortOptions::default())?;
- let len = left.len().min(right.len());
- let values = (0..len).map(|i| cmp(i, i).is_eq()).collect();
- let nulls = NullBuffer::union(left.nulls(), right.nulls());
- return Ok(BooleanArray::new(values, nulls));
+ return Ok(compare_op_for_nested(&Operator::Eq, &left, &right)?);
}
match (left.data_type(), right.data_type()) {
_ if null_equals_null => not_distinct(&left, &right),
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]