This is an automated email from the ASF dual-hosted git repository.

milenkovicm pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-ballista.git


The following commit(s) were added to refs/heads/main by this push:
     new 15fc7987 fix: Disable CollectLeft as it is broken in ballista (#1301)
15fc7987 is described below

commit 15fc7987870f483eea9d0e3b2ec1282005f50ebc
Author: Marko Milenković <milenkov...@users.noreply.github.com>
AuthorDate: Mon Sep 1 20:18:53 2025 +0100

    fix: Disable CollectLeft as it is broken in ballista (#1301)
---
 ballista/client/tests/context_checks.rs | 35 +++++++++++++++++++++++++++++++++
 ballista/core/src/extension.rs          | 14 +++++++++++++
 2 files changed, 49 insertions(+)

diff --git a/ballista/client/tests/context_checks.rs 
b/ballista/client/tests/context_checks.rs
index 1d1c710a..025cc4fb 100644
--- a/ballista/client/tests/context_checks.rs
+++ b/ballista/client/tests/context_checks.rs
@@ -517,6 +517,41 @@ mod supported {
         Ok(())
     }
 
+    // As mentioned in 
https://github.com/apache/datafusion-ballista/issues/1055
+    // "Left/full outer join incorrect for CollectLeft / broadcast"
+    //
+    // In order to make correct results (decreasing performance) CollectLeft
+    // has been disabled until fixed
+
+    #[rstest]
+    #[case::standalone(standalone_context())]
+    #[case::remote(remote_context())]
+    #[tokio::test]
+    async fn should_disable_collect_left(
+        #[future(awt)]
+        #[case]
+        ctx: SessionContext,
+    ) -> datafusion::error::Result<()> {
+        let result = ctx
+            .sql("select name, value from information_schema.df_settings where 
name in ('datafusion.optimizer.hash_join_single_partition_threshold', 
'datafusion.optimizer.hash_join_single_partition_threshold_rows') order by name 
limit 2")
+            .await?
+            .collect()
+            .await?;
+
+        let expected = [
+            
"+----------------------------------------------------------------+-------+",
+            "| name                                                           
| value |",
+            
"+----------------------------------------------------------------+-------+",
+            "| datafusion.optimizer.hash_join_single_partition_threshold      
| 0     |",
+            "| datafusion.optimizer.hash_join_single_partition_threshold_rows 
| 0     |",
+            
"+----------------------------------------------------------------+-------+",
+        ];
+
+        assert_batches_eq!(expected, &result);
+
+        Ok(())
+    }
+
     #[rstest]
     #[case::standalone(standalone_context())]
     #[case::remote(remote_context())]
diff --git a/ballista/core/src/extension.rs b/ballista/core/src/extension.rs
index 88992f87..f1c0c200 100644
--- a/ballista/core/src/extension.rs
+++ b/ballista/core/src/extension.rs
@@ -396,6 +396,20 @@ impl SessionConfigHelperExt for SessionConfig {
             )
             // same like previous comment
             .set_bool("datafusion.sql_parser.map_string_types_to_utf8view", 
false)
+            //
+            // As mentioned in 
https://github.com/apache/datafusion-ballista/issues/1055
+            // "Left/full outer join incorrect for CollectLeft / broadcast"
+            //
+            // In order to make correct results (decreasing performance) 
CollectLeft
+            // has been disabled until fixed
+            .set_u64(
+                "datafusion.optimizer.hash_join_single_partition_threshold",
+                0,
+            )
+            .set_u64(
+                
"datafusion.optimizer.hash_join_single_partition_threshold_rows",
+                0,
+            )
     }
 }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@datafusion.apache.org
For additional commands, e-mail: commits-h...@datafusion.apache.org

Reply via email to