mithuncy commented on code in PR #22104:
URL: https://github.com/apache/datafusion/pull/22104#discussion_r3227582724
##########
datafusion/proto/tests/cases/roundtrip_logical_plan.rs:
##########
@@ -3173,3 +3173,105 @@ async fn roundtrip_empty_table_scan_with_projection()
-> Result<()> {
);
Ok(())
}
+
+// Regression test for https://github.com/apache/datafusion/issues/22065
+#[tokio::test]
+async fn roundtrip_join_null_aware() -> Result<()> {
+ use datafusion_common::tree_node::{TreeNode, TreeNodeRecursion};
+ use datafusion_expr::JoinType;
+
+ let ctx = SessionContext::new();
+ let sql = "
+ SELECT id
+ FROM (VALUES (1), (2), (3)) AS t1(id)
+ WHERE id NOT IN (
+ SELECT bad_id
+ FROM (VALUES (CAST(1 AS INT)), (CAST(NULL AS INT))) AS
excludes(bad_id)
+ )
+ ";
+
+ let df = ctx.sql(sql).await?;
+ let plan = ctx.state().optimize(df.logical_plan())?;
+
+ let mut found_null_aware = false;
+ plan.apply(|n| {
+ if let LogicalPlan::Join(j) = n
+ && j.join_type == JoinType::LeftAnti
+ && j.null_aware
+ {
+ found_null_aware = true;
+ }
+ Ok(TreeNodeRecursion::Continue)
+ })?;
+ assert!(found_null_aware);
+
+ let bytes = logical_plan_to_bytes(&plan)?;
+ let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx.task_ctx())?;
+ assert_eq!(format!("{plan:?}"), format!("{logical_round_trip:?}"));
+
+ let direct: usize = ctx
+ .execute_logical_plan(plan)
+ .await?
+ .collect()
+ .await?
+ .iter()
+ .map(|b| b.num_rows())
+ .sum();
+ let rt: usize = ctx
+ .execute_logical_plan(logical_round_trip)
+ .await?
+ .collect()
+ .await?
+ .iter()
+ .map(|b| b.num_rows())
+ .sum();
+ assert_eq!(direct, 0);
+ assert_eq!(rt, direct);
+
+ Ok(())
+}
+
+// Regression test for null_equality round-trip (related to #22065).
+#[tokio::test]
+async fn roundtrip_join_null_equality() -> Result<()> {
Review Comment:
The first test exercises null_aware = true (via NOT IN SQL); the second
forces null_equality = NullEqualsNull. They both use the round-trip Debug
compare, but each puts one field into a non-default state, a decoder that drops
the field to its default. The first test uses the default null_equality
(NullEqualsNothing), so a regression of that exact shape on null_equality would
slip past it.
Happy to consolidate into a single test that exercises both non-default
fields on the same Join if you'd prefer — just wanted to flag the coverage
tradeoff before deleting either one
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]