This is an automated email from the ASF dual-hosted git repository. milenkovicm pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/datafusion-ballista.git
The following commit(s) were added to refs/heads/main by this push: new 15fc7987 fix: Disable CollectLeft as it is broken in ballista (#1301) 15fc7987 is described below commit 15fc7987870f483eea9d0e3b2ec1282005f50ebc Author: Marko Milenković <milenkov...@users.noreply.github.com> AuthorDate: Mon Sep 1 20:18:53 2025 +0100 fix: Disable CollectLeft as it is broken in ballista (#1301) --- ballista/client/tests/context_checks.rs | 35 +++++++++++++++++++++++++++++++++ ballista/core/src/extension.rs | 14 +++++++++++++ 2 files changed, 49 insertions(+) diff --git a/ballista/client/tests/context_checks.rs b/ballista/client/tests/context_checks.rs index 1d1c710a..025cc4fb 100644 --- a/ballista/client/tests/context_checks.rs +++ b/ballista/client/tests/context_checks.rs @@ -517,6 +517,41 @@ mod supported { Ok(()) } + // As mentioned in https://github.com/apache/datafusion-ballista/issues/1055 + // "Left/full outer join incorrect for CollectLeft / broadcast" + // + // In order to make correct results (decreasing performance) CollectLeft + // has been disabled until fixed + + #[rstest] + #[case::standalone(standalone_context())] + #[case::remote(remote_context())] + #[tokio::test] + async fn should_disable_collect_left( + #[future(awt)] + #[case] + ctx: SessionContext, + ) -> datafusion::error::Result<()> { + let result = ctx + .sql("select name, value from information_schema.df_settings where name in ('datafusion.optimizer.hash_join_single_partition_threshold', 'datafusion.optimizer.hash_join_single_partition_threshold_rows') order by name limit 2") + .await? + .collect() + .await?; + + let expected = [ + "+----------------------------------------------------------------+-------+", + "| name | value |", + "+----------------------------------------------------------------+-------+", + "| datafusion.optimizer.hash_join_single_partition_threshold | 0 |", + "| datafusion.optimizer.hash_join_single_partition_threshold_rows | 0 |", + "+----------------------------------------------------------------+-------+", + ]; + + assert_batches_eq!(expected, &result); + + Ok(()) + } + #[rstest] #[case::standalone(standalone_context())] #[case::remote(remote_context())] diff --git a/ballista/core/src/extension.rs b/ballista/core/src/extension.rs index 88992f87..f1c0c200 100644 --- a/ballista/core/src/extension.rs +++ b/ballista/core/src/extension.rs @@ -396,6 +396,20 @@ impl SessionConfigHelperExt for SessionConfig { ) // same like previous comment .set_bool("datafusion.sql_parser.map_string_types_to_utf8view", false) + // + // As mentioned in https://github.com/apache/datafusion-ballista/issues/1055 + // "Left/full outer join incorrect for CollectLeft / broadcast" + // + // In order to make correct results (decreasing performance) CollectLeft + // has been disabled until fixed + .set_u64( + "datafusion.optimizer.hash_join_single_partition_threshold", + 0, + ) + .set_u64( + "datafusion.optimizer.hash_join_single_partition_threshold_rows", + 0, + ) } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@datafusion.apache.org For additional commands, e-mail: commits-h...@datafusion.apache.org