This is an automated email from the ASF dual-hosted git repository.
panxiaolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 9451382428 [Improvement](aggregate) optimization for
AggregationMethodKeysFixed::insert_keys_into_columns (#22216)
9451382428 is described below
commit 9451382428cdb6cb55b48b7d2e1941c0fbe72701
Author: Pxl <[email protected]>
AuthorDate: Wed Jul 26 16:19:15 2023 +0800
[Improvement](aggregate) optimization for
AggregationMethodKeysFixed::insert_keys_into_columns (#22216)
optimization for AggregationMethodKeysFixed::insert_keys_into_columns
---
be/src/vec/columns/column_vector.h | 4 +-
be/src/vec/common/aggregation_common.h | 24 +++----
be/src/vec/exec/vaggregation_node.h | 111 ++++++++++-----------------------
3 files changed, 46 insertions(+), 93 deletions(-)
diff --git a/be/src/vec/columns/column_vector.h
b/be/src/vec/columns/column_vector.h
index b8c119a217..04908d8711 100644
--- a/be/src/vec/columns/column_vector.h
+++ b/be/src/vec/columns/column_vector.h
@@ -227,9 +227,7 @@ public:
use by date, datetime, basic type
*/
void insert_many_fix_len_data(const char* data_ptr, size_t num) override {
- if constexpr (!std::is_same_v<T, vectorized::Int64>) {
- insert_many_in_copy_way(data_ptr, num);
- } else if (IColumn::is_date) {
+ if (IColumn::is_date) {
insert_date_column(data_ptr, num);
} else if (IColumn::is_date_time) {
insert_datetime_column(data_ptr, num);
diff --git a/be/src/vec/common/aggregation_common.h
b/be/src/vec/common/aggregation_common.h
index 39beb8b25f..31f19e7418 100644
--- a/be/src/vec/common/aggregation_common.h
+++ b/be/src/vec/common/aggregation_common.h
@@ -166,17 +166,18 @@ T pack_fixed(size_t i, size_t keys_size, const
ColumnRawPtrs& key_columns, const
}
for (size_t j = 0; j < keys_size; ++j) {
- bool is_null;
+ bool is_null = false;
- if (!has_bitmap)
- is_null = false;
- else {
+ if (has_bitmap) {
size_t bucket = j / 8;
size_t off = j % 8;
is_null = ((bitmap[bucket] >> off) & 1) == 1;
}
- if (is_null) continue;
+ if (is_null) {
+ offset += key_sizes[j];
+ continue;
+ }
switch (key_sizes[j]) {
case 1:
@@ -184,28 +185,24 @@ T pack_fixed(size_t i, size_t keys_size, const
ColumnRawPtrs& key_columns, const
static_cast<const
ColumnVectorHelper*>(key_columns[j])->get_raw_data_begin<1>() +
i,
1);
- offset += 1;
break;
case 2:
memcpy(bytes + offset,
static_cast<const
ColumnVectorHelper*>(key_columns[j])->get_raw_data_begin<2>() +
i * 2,
2);
- offset += 2;
break;
case 4:
memcpy(bytes + offset,
static_cast<const
ColumnVectorHelper*>(key_columns[j])->get_raw_data_begin<4>() +
i * 4,
4);
- offset += 4;
break;
case 8:
memcpy(bytes + offset,
static_cast<const
ColumnVectorHelper*>(key_columns[j])->get_raw_data_begin<8>() +
i * 8,
8);
- offset += 8;
break;
default:
memcpy(bytes + offset,
@@ -214,6 +211,8 @@ T pack_fixed(size_t i, size_t keys_size, const
ColumnRawPtrs& key_columns, const
key_sizes[j]);
offset += key_sizes[j];
}
+
+ offset += key_sizes[j];
}
return key;
@@ -224,7 +223,9 @@ inline UInt128 hash128(size_t i, size_t keys_size, const
ColumnRawPtrs& key_colu
UInt128 key;
SipHash hash;
- for (size_t j = 0; j < keys_size; ++j)
key_columns[j]->update_hash_with_value(i, hash);
+ for (size_t j = 0; j < keys_size; ++j) {
+ key_columns[j]->update_hash_with_value(i, hash);
+ }
hash.get128(key.low, key.high);
@@ -253,8 +254,9 @@ inline StringRef serialize_keys_to_pool_contiguous(size_t
i, size_t keys_size,
const char* begin = nullptr;
size_t sum_size = 0;
- for (size_t j = 0; j < keys_size; ++j)
+ for (size_t j = 0; j < keys_size; ++j) {
sum_size += key_columns[j]->serialize_value_into_arena(i, pool,
begin).size;
+ }
return {begin, sum_size};
}
diff --git a/be/src/vec/exec/vaggregation_node.h
b/be/src/vec/exec/vaggregation_node.h
index 9d6f4c4979..e31240cdbc 100644
--- a/be/src/vec/exec/vaggregation_node.h
+++ b/be/src/vec/exec/vaggregation_node.h
@@ -154,14 +154,6 @@ struct AggregationMethodSerialized {
return max_one_row_byte_size;
}
- static void insert_key_into_columns(const StringRef& key, MutableColumns&
key_columns,
- const Sizes&) {
- auto pos = key.data;
- for (auto& column : key_columns) {
- pos = column->deserialize_and_insert_from_arena(pos);
- }
- }
-
static void insert_keys_into_columns(std::vector<StringRef>& keys,
MutableColumns& key_columns,
const size_t num_rows, const Sizes&) {
for (auto& column : key_columns) {
@@ -215,11 +207,6 @@ struct AggregationMethodStringNoCache {
static const bool low_cardinality_optimization = false;
- static void insert_key_into_columns(const StringRef& key, MutableColumns&
key_columns,
- const Sizes&) {
- key_columns[0]->insert_data(key.data, key.size);
- }
-
static void insert_keys_into_columns(std::vector<StringRef>& keys,
MutableColumns& key_columns,
const size_t num_rows, const Sizes&) {
key_columns[0]->reserve(num_rows);
@@ -256,14 +243,6 @@ struct AggregationMethodOneNumber {
using State = ColumnsHashing::HashMethodOneNumber<typename
Data::value_type, Mapped, FieldType,
consecutive_keys_optimization>;
- // Insert the key from the hash table into columns.
- static void insert_key_into_columns(const Key& key, MutableColumns&
key_columns,
- const Sizes& /*key_sizes*/) {
- const auto* key_holder = reinterpret_cast<const char*>(&key);
- auto* column = static_cast<ColumnVectorHelper*>(key_columns[0].get());
- column->insert_raw_data<sizeof(FieldType)>(key_holder);
- }
-
static void insert_keys_into_columns(std::vector<Key>& keys,
MutableColumns& key_columns,
const size_t num_rows, const Sizes&) {
key_columns[0]->reserve(num_rows);
@@ -328,59 +307,44 @@ struct AggregationMethodKeysFixed {
using State = ColumnsHashing::HashMethodKeysFixed<typename
Data::value_type, Key, Mapped,
has_nullable_keys,
false>;
- static void insert_key_into_columns(const Key& key, MutableColumns&
key_columns,
- const Sizes& key_sizes) {
- size_t keys_size = key_columns.size();
-
- static constexpr auto bitmap_size =
- has_nullable_keys ? std::tuple_size<KeysNullMap<Key>>::value :
0;
- /// In any hash key value, column values to be read start just after
the bitmap, if it exists.
- size_t pos = bitmap_size;
-
- for (size_t i = 0; i < keys_size; ++i) {
- IColumn* observed_column;
- ColumnUInt8* null_map;
-
- bool column_nullable = false;
- if constexpr (has_nullable_keys) {
- column_nullable = is_column_nullable(*key_columns[i]);
- }
-
- /// If we have a nullable column, get its nested column and its
null map.
- if (column_nullable) {
+ static void insert_keys_into_columns(std::vector<Key>& keys,
MutableColumns& key_columns,
+ const size_t num_rows, const Sizes&
key_sizes) {
+ // In any hash key value, column values to be read start just after
the bitmap, if it exists.
+ size_t pos = has_nullable_keys ?
std::tuple_size<KeysNullMap<Key>>::value : 0;
+
+ for (size_t i = 0; i < key_columns.size(); ++i) {
+ size_t size = key_sizes[i];
+ key_columns[i]->resize(num_rows);
+ // If we have a nullable column, get its nested column and its
null map.
+ if (is_column_nullable(*key_columns[i])) {
ColumnNullable& nullable_col =
assert_cast<ColumnNullable&>(*key_columns[i]);
- observed_column = &nullable_col.get_nested_column();
- null_map =
assert_cast<ColumnUInt8*>(&nullable_col.get_null_map_column());
- } else {
- observed_column = key_columns[i].get();
- null_map = nullptr;
- }
- bool is_null = false;
- if (column_nullable) {
- /// The current column is nullable. Check if the value of the
- /// corresponding key is nullable. Update the null map
accordingly.
+ char* data =
+
const_cast<char*>(nullable_col.get_nested_column().get_raw_data().data);
+ UInt8* nullmap =
assert_cast<ColumnUInt8*>(&nullable_col.get_null_map_column())
+ ->get_data()
+ .data();
+
+ // The current column is nullable. Check if the value of the
+ // corresponding key is nullable. Update the null map
accordingly.
size_t bucket = i / 8;
size_t offset = i % 8;
- UInt8 val = (reinterpret_cast<const UInt8*>(&key)[bucket] >>
offset) & 1;
- null_map->insert_value(val);
- is_null = val == 1;
- }
-
- if (has_nullable_keys && is_null) {
- observed_column->insert_default();
+ for (size_t j = 0; j < num_rows; j++) {
+ const Key& key = keys[j];
+ UInt8 val = (reinterpret_cast<const UInt8*>(&key)[bucket]
>> offset) & 1;
+ nullmap[j] = val;
+ if (!val) {
+ memcpy(data + j * size, reinterpret_cast<const
char*>(&key) + pos, size);
+ }
+ }
} else {
- size_t size = key_sizes[i];
- observed_column->insert_data(reinterpret_cast<const
char*>(&key) + pos, size);
- pos += size;
+ char* data =
const_cast<char*>(key_columns[i]->get_raw_data().data);
+ for (size_t j = 0; j < num_rows; j++) {
+ const Key& key = keys[j];
+ memcpy(data + j * size, reinterpret_cast<const
char*>(&key) + pos, size);
+ }
}
- }
- }
-
- static void insert_keys_into_columns(std::vector<Key>& keys,
MutableColumns& key_columns,
- const size_t num_rows, const Sizes&
key_sizes) {
- for (size_t i = 0; i != num_rows; ++i) {
- insert_key_into_columns(keys[i], key_columns, key_sizes);
+ pos += size;
}
}
@@ -411,17 +375,6 @@ struct AggregationMethodSingleNullableColumn : public
SingleColumnMethod {
using State = ColumnsHashing::HashMethodSingleLowNullableColumn<BaseState,
Mapped, true>;
- static void insert_key_into_columns(const Key& key, MutableColumns&
key_columns,
- const Sizes& /*key_sizes*/) {
- auto col = key_columns[0].get();
-
- if constexpr (std::is_same_v<Key, StringRef>) {
- col->insert_data(key.data, key.size);
- } else {
- col->insert_data(reinterpret_cast<const char*>(&key), sizeof(key));
- }
- }
-
static void insert_keys_into_columns(std::vector<Key>& keys,
MutableColumns& key_columns,
const size_t num_rows, const Sizes&) {
auto col = key_columns[0].get();
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]