xudong963 commented on a change in pull request #1135:
URL: https://github.com/apache/arrow-datafusion/pull/1135#discussion_r743614121
##########
File path: datafusion/src/logical_plan/builder.rs
##########
@@ -488,6 +488,7 @@ impl LogicalPlanBuilder {
right: &LogicalPlan,
Review comment:
Nice idea! Then we can call join_detailed in `INTESECT` and other places
keep original.
##########
File path: datafusion/src/sql/planner.rs
##########
@@ -191,23 +191,32 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
left,
right,
all,
- } => match (op, all) {
+ } => {
+ let left_plan = self.set_expr_to_plan(left.as_ref(), None,
ctes)?;
+ let right_plan = self.set_expr_to_plan(right.as_ref(), None,
ctes)?;
+ match (op, all) {
(SetOperator::Union, true) => {
- let left_plan = self.set_expr_to_plan(left.as_ref(), None,
ctes)?;
- let right_plan = self.set_expr_to_plan(right.as_ref(),
None, ctes)?;
union_with_alias(left_plan, right_plan, alias)
}
(SetOperator::Union, false) => {
- let left_plan = self.set_expr_to_plan(left.as_ref(), None,
ctes)?;
- let right_plan = self.set_expr_to_plan(right.as_ref(),
None, ctes)?;
let union_plan = union_with_alias(left_plan, right_plan,
alias)?;
LogicalPlanBuilder::from(union_plan).distinct()?.build()
}
+ (SetOperator::Intersect, true) => {
+ let join_keys =
left_plan.schema().fields().iter().zip(right_plan.schema().fields().iter()).map(|(left_field,
right_field)| ((Column::from_name(left_field.name())),
(Column::from_name(right_field.name())))).unzip();
+ LogicalPlanBuilder::from(left_plan).join(&right_plan,
JoinType::Semi, join_keys, true)?.build()
+ }
+ (SetOperator::Intersect, false) => {
+ let distinct_left_plan =
LogicalPlanBuilder::from(left_plan).distinct()?.build()?;
+ let join_keys =
distinct_left_plan.schema().fields().iter().zip(right_plan.schema().fields().iter()).map(|(left_field,
right_field)| ((Column::from_name(left_field.name())),
(Column::from_name(right_field.name())))).unzip();
+
LogicalPlanBuilder::from(distinct_left_plan).join(&right_plan, JoinType::Semi,
join_keys, true)?.build()
Review comment:
Good caught!
##########
File path: datafusion/tests/sql.rs
##########
@@ -5385,3 +5385,70 @@ async fn query_nested_get_indexed_field() -> Result<()> {
assert_eq!(expected, actual);
Ok(())
}
+
+#[tokio::test]
+async fn intersect_with_null_not_equal() {
+ let sql = "SELECT * FROM (SELECT null AS id1, 1 AS id2) t1
+ INTERSECT SELECT * FROM (SELECT null AS id1, 2 AS id2) t2";
+
+ let expected: &[&[&str]] = &[];
+
+ let mut ctx = create_join_context_qualified().unwrap();
+ let actual = execute(&mut ctx, sql).await;
Review comment:
The first two tests contain `NULL` in results. I noticed the return
value format of `execute` is clearer.
https://github.com/apache/arrow-datafusion/blob/3d4db909e83ea8354e9b9134c20b1cbef06a5beb/datafusion/tests/sql.rs#L3453
##########
File path: datafusion/src/physical_plan/hash_join.rs
##########
@@ -751,12 +788,19 @@ fn build_join_indexes(
}
macro_rules! equal_rows_elem {
- ($array_type:ident, $l: ident, $r: ident, $left: ident, $right: ident) =>
{{
+ ($array_type:ident, $l: ident, $r: ident, $left: ident, $right: ident,
$null_equal_null: ident) => {{
let left_array = $l.as_any().downcast_ref::<$array_type>().unwrap();
let right_array = $r.as_any().downcast_ref::<$array_type>().unwrap();
match (left_array.is_null($left), right_array.is_null($right)) {
(false, false) => left_array.value($left) ==
right_array.value($right),
+ (true, true) => {
+ if $null_equal_null {
Review comment:
Good point, I think you mean `$null_equal_null` not `!$null_equal_null` 😁
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]