This is an automated email from the ASF dual-hosted git repository.

berkay pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 2176330916 Revert the removal of reservation in HashJoin (#13792)
2176330916 is described below

commit 2176330916a22ada58100d530ac43eeccb458cef
Author: Jay Zhan <[email protected]>
AuthorDate: Mon Dec 16 15:37:53 2024 +0800

    Revert the removal of reservation in HashJoin (#13792)
    
    * fix: restore memory reservation in JoinLeftData for accurate memory 
accounting in HashJoin
    
    This commit reintroduces the `_reservation` field in the `JoinLeftData` 
structure to ensure proper tracking of memory resources during join operations. 
The absence of this field could lead to inconsistent memory usage reporting and 
potential out-of-memory issues as upstream operators increase their memory 
consumption.
    
    * fmt
    
    Signed-off-by: Jay Zhan <[email protected]>
    
    ---------
    
    Signed-off-by: Jay Zhan <[email protected]>
---
 datafusion/physical-plan/src/joins/hash_join.rs | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/datafusion/physical-plan/src/joins/hash_join.rs 
b/datafusion/physical-plan/src/joins/hash_join.rs
index 9fcb39e65d..532a91da75 100644
--- a/datafusion/physical-plan/src/joins/hash_join.rs
+++ b/datafusion/physical-plan/src/joins/hash_join.rs
@@ -90,6 +90,11 @@ struct JoinLeftData {
     /// Counter of running probe-threads, potentially
     /// able to update `visited_indices_bitmap`
     probe_threads_counter: AtomicUsize,
+    /// We need to keep this field to maintain accurate memory accounting, 
even though we don't directly use it.
+    /// Without holding onto this reservation, the recorded memory usage would 
become inconsistent with actual usage.
+    /// This could hide potential out-of-memory issues, especially when 
upstream operators increase their memory consumption.
+    /// The MemoryReservation ensures proper tracking of memory resources 
throughout the join operation's lifecycle.
+    _reservation: MemoryReservation,
 }
 
 impl JoinLeftData {
@@ -99,12 +104,14 @@ impl JoinLeftData {
         batch: RecordBatch,
         visited_indices_bitmap: SharedBitmapBuilder,
         probe_threads_counter: AtomicUsize,
+        reservation: MemoryReservation,
     ) -> Self {
         Self {
             hash_map,
             batch,
             visited_indices_bitmap,
             probe_threads_counter,
+            _reservation: reservation,
         }
     }
 
@@ -897,6 +904,7 @@ async fn collect_left_input(
         single_batch,
         Mutex::new(visited_indices_bitmap),
         AtomicUsize::new(probe_threads_count),
+        reservation,
     );
 
     Ok(data)


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to