alamb commented on code in PR #19910:
URL: https://github.com/apache/datafusion/pull/19910#discussion_r2722483850
##########
datafusion/common/src/utils/proxy.rs:
##########
@@ -161,22 +162,24 @@ where
) {
let hash = hasher(&x);
- // NOTE: `find_entry` does NOT grow!
- match self.find_entry(hash, |y| y == &x) {
- Ok(_occupied) => {}
- Err(_absent) => {
- if self.len() == self.capacity() {
- // need to request more memory
- let bump_elements = self.capacity().max(16);
- let bump_size = bump_elements * size_of::<T>();
- *accounting =
(*accounting).checked_add(bump_size).expect("overflow");
+ if cfg!(debug_assertions) {
+ // In debug mode, check that the element is not already present
+ debug_assert!(
+ self.find_entry(hash, |y| y == &x).is_err(),
+ "attempted to insert duplicate element into
HashTableAllocExt::insert_accounted"
+ );
+ }
- self.reserve(bump_elements, &hasher);
- }
+ if self.len() == self.capacity() {
+ // need to request more memory
+ let bump_elements = self.capacity().max(16);
Review Comment:
I double checked that capacity is in terms of elements --
https://docs.rs/hashbrown/latest/hashbrown/struct.HashTable.html#method.capacity
So this makes sense
##########
datafusion/common/src/utils/proxy.rs:
##########
@@ -161,22 +162,24 @@ where
) {
let hash = hasher(&x);
- // NOTE: `find_entry` does NOT grow!
- match self.find_entry(hash, |y| y == &x) {
- Ok(_occupied) => {}
- Err(_absent) => {
- if self.len() == self.capacity() {
- // need to request more memory
- let bump_elements = self.capacity().max(16);
- let bump_size = bump_elements * size_of::<T>();
- *accounting =
(*accounting).checked_add(bump_size).expect("overflow");
+ if cfg!(debug_assertions) {
Review Comment:
Maybe it would be nice to update the docs of `HashTableAllocExt` to update
```rust
/// This method assumes that the element is not already present
```
To say
```rust
/// Panics:
/// Assumes the element is not already present, and may panic if it does
```
##########
datafusion/common/src/utils/proxy.rs:
##########
@@ -161,22 +162,24 @@ where
) {
let hash = hasher(&x);
- // NOTE: `find_entry` does NOT grow!
- match self.find_entry(hash, |y| y == &x) {
- Ok(_occupied) => {}
- Err(_absent) => {
- if self.len() == self.capacity() {
- // need to request more memory
- let bump_elements = self.capacity().max(16);
- let bump_size = bump_elements * size_of::<T>();
- *accounting =
(*accounting).checked_add(bump_size).expect("overflow");
+ if cfg!(debug_assertions) {
+ // In debug mode, check that the element is not already present
+ debug_assert!(
+ self.find_entry(hash, |y| y == &x).is_err(),
+ "attempted to insert duplicate element into
HashTableAllocExt::insert_accounted"
+ );
+ }
- self.reserve(bump_elements, &hasher);
- }
+ if self.len() == self.capacity() {
+ // need to request more memory
+ let bump_elements = self.capacity().max(16);
+ let bump_size = bump_elements * size_of::<T>();
+ *accounting =
(*accounting).checked_add(bump_size).expect("overflow");
- // still need to insert the element since first try failed
- self.entry(hash, |y| y == &x, hasher).insert(x);
- }
+ self.reserve(bump_elements, &hasher);
}
+
+ // still need to insert the element since first try failed
Review Comment:
I don't see a first attempt to insert -- maybe this comment needs to be
updated
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]