This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 80f4322429 Minor: reuse Rows buffer in GroupValuesRows (#10980)
80f4322429 is described below
commit 80f43224299bf05f18ffa3be3909303473b2cd36
Author: Andrew Lamb <[email protected]>
AuthorDate: Tue Jun 18 17:01:12 2024 -0400
Minor: reuse Rows buffer in GroupValuesRows (#10980)
---
.../physical-plan/src/aggregates/group_values/row.rs | 16 +++++++++++++---
1 file changed, 13 insertions(+), 3 deletions(-)
diff --git a/datafusion/physical-plan/src/aggregates/group_values/row.rs
b/datafusion/physical-plan/src/aggregates/group_values/row.rs
index 3b7480cd29..96a12d7b62 100644
--- a/datafusion/physical-plan/src/aggregates/group_values/row.rs
+++ b/datafusion/physical-plan/src/aggregates/group_values/row.rs
@@ -59,9 +59,12 @@ pub struct GroupValuesRows {
/// [`Row`]: arrow::row::Row
group_values: Option<Rows>,
- // buffer to be reused to store hashes
+ /// reused buffer to store hashes
hashes_buffer: Vec<u64>,
+ /// reused buffer to store rows
+ rows_buffer: Rows,
+
/// Random state for creating hashes
random_state: RandomState,
}
@@ -78,6 +81,10 @@ impl GroupValuesRows {
let map = RawTable::with_capacity(0);
+ let starting_rows_capacity = 1000;
+ let starting_data_capacity = 64 * starting_rows_capacity;
+ let rows_buffer =
+ row_converter.empty_rows(starting_rows_capacity,
starting_data_capacity);
Ok(Self {
schema,
row_converter,
@@ -85,6 +92,7 @@ impl GroupValuesRows {
map_size: 0,
group_values: None,
hashes_buffer: Default::default(),
+ rows_buffer,
random_state: Default::default(),
})
}
@@ -93,8 +101,9 @@ impl GroupValuesRows {
impl GroupValues for GroupValuesRows {
fn intern(&mut self, cols: &[ArrayRef], groups: &mut Vec<usize>) ->
Result<()> {
// Convert the group keys into the row format
- // Avoid reallocation when
https://github.com/apache/arrow-rs/issues/4479 is available
- let group_rows = self.row_converter.convert_columns(cols)?;
+ let group_rows = &mut self.rows_buffer;
+ group_rows.clear();
+ self.row_converter.append(group_rows, cols)?;
let n_rows = group_rows.num_rows();
let mut group_values = match self.group_values.take() {
@@ -150,6 +159,7 @@ impl GroupValues for GroupValuesRows {
self.row_converter.size()
+ group_values_size
+ self.map_size
+ + self.rows_buffer.size()
+ self.hashes_buffer.allocated_size()
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]