pitrou commented on a change in pull request #12032:
URL: https://github.com/apache/arrow/pull/12032#discussion_r790574210
##########
File path: cpp/src/arrow/compute/exec/key_hash.cc
##########
@@ -286,34 +286,40 @@ void Hashing::HashMultiColumn(const
std::vector<KeyEncoder::KeyColumnArray>& col
bool is_first = true;
for (size_t icol = 0; icol < cols.size(); ++icol) {
- if (cols[icol].metadata().is_fixed_length) {
- uint32_t col_width = cols[icol].metadata().fixed_length;
- if (col_width == 0) {
- util::bit_util::bits_to_bytes(ctx->hardware_flags, num_rows,
cols[icol].data(1),
- byte_temp_buf.mutable_data(),
- cols[icol].bit_offset(1));
- }
- Hashing::hash_fixed(
- ctx->hardware_flags, num_rows, col_width == 0 ? 1 : col_width,
- col_width == 0 ? byte_temp_buf.mutable_data() : cols[icol].data(1),
- is_first ? out_hash : hash_temp_buf.mutable_data());
+ // Set the hash value as zero for a null type col
+ if (cols[icol].metadata().is_null_type) {
+ uint32_t* dst_hash = is_first ? out_hash : hash_temp_buf.mutable_data();
Review comment:
Can you factor out the `dst_hash` calculation for clarity?
##########
File path: cpp/src/arrow/compute/kernels/hash_aggregate.cc
##########
@@ -322,14 +333,20 @@ struct GrouperFastImpl : Grouper {
group_ids, AllocateBuffer(sizeof(uint32_t) * num_rows,
ctx_->memory_pool()));
for (int icol = 0; icol < num_columns; ++icol) {
- const uint8_t* non_nulls = nullptr;
- if (batch[icol].array()->buffers[0] != NULLPTR) {
- non_nulls = batch[icol].array()->buffers[0]->data();
- }
- const uint8_t* fixedlen = batch[icol].array()->buffers[1]->data();
- const uint8_t* varlen = nullptr;
- if (!col_metadata_[icol].is_fixed_length) {
- varlen = batch[icol].array()->buffers[2]->data();
+ const uint8_t* non_nulls = NULLPTR;
+ const uint8_t* fixedlen = NULLPTR;
+ const uint8_t* varlen = NULLPTR;
+
+ // Skip if the key's type is NULL
+ if (key_types_[icol]->id() != Type::NA) {
+ if (batch[icol].array()->buffers[0] != NULLPTR) {
+ non_nulls = batch[icol].array()->buffers[0]->data();
+ }
+ auto array = batch[icol].array();
Review comment:
What is this variable for? Did you mean to use it?
##########
File path: cpp/src/arrow/compute/kernels/hash_aggregate.cc
##########
@@ -322,14 +333,20 @@ struct GrouperFastImpl : Grouper {
group_ids, AllocateBuffer(sizeof(uint32_t) * num_rows,
ctx_->memory_pool()));
for (int icol = 0; icol < num_columns; ++icol) {
- const uint8_t* non_nulls = nullptr;
- if (batch[icol].array()->buffers[0] != NULLPTR) {
- non_nulls = batch[icol].array()->buffers[0]->data();
- }
- const uint8_t* fixedlen = batch[icol].array()->buffers[1]->data();
- const uint8_t* varlen = nullptr;
- if (!col_metadata_[icol].is_fixed_length) {
- varlen = batch[icol].array()->buffers[2]->data();
+ const uint8_t* non_nulls = NULLPTR;
+ const uint8_t* fixedlen = NULLPTR;
+ const uint8_t* varlen = NULLPTR;
+
+ // Skip if the key's type is NULL
+ if (key_types_[icol]->id() != Type::NA) {
+ if (batch[icol].array()->buffers[0] != NULLPTR) {
+ non_nulls = batch[icol].array()->buffers[0]->data();
+ }
+ auto array = batch[icol].array();
Review comment:
What is this variable for? Did you mean to use it?
##########
File path: cpp/src/arrow/compute/exec/key_compare.cc
##########
@@ -334,6 +334,14 @@ void KeyCompare::CompareColumnsToRows(uint32_t
num_rows_to_compare,
bool is_first_column = true;
for (size_t icol = 0; icol < cols.size(); ++icol) {
const KeyEncoder::KeyColumnArray& col = cols[icol];
+ if (col.metadata().is_null_type) {
+ // If this null type col is the first column, the match_bytevector_A
needs to be
+ // initialized with 0xFF. Otherwise, the calculation can be skipped
Review comment:
Why 0xFF? Can you explain?
##########
File path: cpp/src/arrow/compute/kernels/hash_aggregate.cc
##########
@@ -237,6 +245,9 @@ struct GrouperFastImpl : Grouper {
} else if (is_binary_like(key->id())) {
impl->col_metadata_[icol] =
arrow::compute::KeyEncoder::KeyColumnMetadata(false,
sizeof(uint32_t));
+ } else if (key->id() == Type::NA) {
+ impl->col_metadata_[icol] =
+ arrow::compute::KeyEncoder::KeyColumnMetadata(true, 0, true);
Review comment:
Can you please denominate arguments for clarity? Such as:
```c++
impl->col_metadata_[icol] =
arrow::compute::KeyEncoder::KeyColumnMetadata(/*arg1=*/true, 0,
/*arg2=*/true);
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]