This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 80f4322429 Minor: reuse Rows buffer in GroupValuesRows (#10980)
80f4322429 is described below

commit 80f43224299bf05f18ffa3be3909303473b2cd36
Author: Andrew Lamb <[email protected]>
AuthorDate: Tue Jun 18 17:01:12 2024 -0400

    Minor: reuse Rows buffer in GroupValuesRows (#10980)
---
 .../physical-plan/src/aggregates/group_values/row.rs     | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/datafusion/physical-plan/src/aggregates/group_values/row.rs 
b/datafusion/physical-plan/src/aggregates/group_values/row.rs
index 3b7480cd29..96a12d7b62 100644
--- a/datafusion/physical-plan/src/aggregates/group_values/row.rs
+++ b/datafusion/physical-plan/src/aggregates/group_values/row.rs
@@ -59,9 +59,12 @@ pub struct GroupValuesRows {
     /// [`Row`]: arrow::row::Row
     group_values: Option<Rows>,
 
-    // buffer to be reused to store hashes
+    /// reused buffer to store hashes
     hashes_buffer: Vec<u64>,
 
+    /// reused buffer to store rows
+    rows_buffer: Rows,
+
     /// Random state for creating hashes
     random_state: RandomState,
 }
@@ -78,6 +81,10 @@ impl GroupValuesRows {
 
         let map = RawTable::with_capacity(0);
 
+        let starting_rows_capacity = 1000;
+        let starting_data_capacity = 64 * starting_rows_capacity;
+        let rows_buffer =
+            row_converter.empty_rows(starting_rows_capacity, 
starting_data_capacity);
         Ok(Self {
             schema,
             row_converter,
@@ -85,6 +92,7 @@ impl GroupValuesRows {
             map_size: 0,
             group_values: None,
             hashes_buffer: Default::default(),
+            rows_buffer,
             random_state: Default::default(),
         })
     }
@@ -93,8 +101,9 @@ impl GroupValuesRows {
 impl GroupValues for GroupValuesRows {
     fn intern(&mut self, cols: &[ArrayRef], groups: &mut Vec<usize>) -> 
Result<()> {
         // Convert the group keys into the row format
-        // Avoid reallocation when 
https://github.com/apache/arrow-rs/issues/4479 is available
-        let group_rows = self.row_converter.convert_columns(cols)?;
+        let group_rows = &mut self.rows_buffer;
+        group_rows.clear();
+        self.row_converter.append(group_rows, cols)?;
         let n_rows = group_rows.num_rows();
 
         let mut group_values = match self.group_values.take() {
@@ -150,6 +159,7 @@ impl GroupValues for GroupValuesRows {
         self.row_converter.size()
             + group_values_size
             + self.map_size
+            + self.rows_buffer.size()
             + self.hashes_buffer.allocated_size()
     }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to