This is an automated email from the ASF dual-hosted git repository.
dataroaring pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new b5249a9e471 [opt](cloud) reduce cache hotspot table write
amplification (#45557)
b5249a9e471 is described below
commit b5249a9e47149bd5ff33c25933a9ae882c6e6b45
Author: zhengyu <[email protected]>
AuthorDate: Fri Dec 20 00:08:33 2024 +0800
[opt](cloud) reduce cache hotspot table write amplification (#45557)
1. batch insert cloud_cache_hotspot in FE
2. enlarge polling interval in FE
3. shrink bucket num to 1 for cloud_cache_hotspot table
4. ignore stable statistics only catch the dynamic in BE
Signed-off-by: zhengyu <[email protected]>
---
be/src/cloud/cloud_tablet_hotspot.cpp | 94 +++++++++++++---------
be/src/cloud/cloud_tablet_hotspot.h | 19 +++++
.../main/java/org/apache/doris/common/Config.java | 4 +-
.../apache/doris/cloud/CacheHotspotManager.java | 2 +-
.../doris/cloud/CacheHotspotManagerUtils.java | 3 +-
5 files changed, 80 insertions(+), 42 deletions(-)
diff --git a/be/src/cloud/cloud_tablet_hotspot.cpp
b/be/src/cloud/cloud_tablet_hotspot.cpp
index dd197268646..6391a2dc5c4 100644
--- a/be/src/cloud/cloud_tablet_hotspot.cpp
+++ b/be/src/cloud/cloud_tablet_hotspot.cpp
@@ -57,18 +57,55 @@ TabletHotspot::~TabletHotspot() {
}
}
-struct MapKeyHash {
- int64_t operator()(const std::pair<int64_t, int64_t>& key) const {
- return std::hash<int64_t> {}(key.first) + std::hash<int64_t>
{}(key.second);
+void get_return_partitions(
+ const std::unordered_map<TabletHotspotMapKey,
+ std::unordered_map<int64_t,
TabletHotspotMapValue>, MapKeyHash>&
+ hot_partition,
+ const std::unordered_map<TabletHotspotMapKey,
+ std::unordered_map<int64_t,
TabletHotspotMapValue>, MapKeyHash>&
+ last_hot_partition,
+ std::vector<THotTableMessage>* hot_tables, int& return_partitions, int
N) {
+ for (const auto& [key, partition_to_value] : hot_partition) {
+ THotTableMessage msg;
+ msg.table_id = key.first;
+ msg.index_id = key.second;
+ for (const auto& [partition_id, value] : partition_to_value) {
+ if (return_partitions > N) {
+ return;
+ }
+ auto last_value_iter = last_hot_partition.find(key);
+ if (last_value_iter != last_hot_partition.end()) {
+ auto last_partition_iter =
last_value_iter->second.find(partition_id);
+ if (last_partition_iter != last_value_iter->second.end()) {
+ const auto& last_value = last_partition_iter->second;
+ if (std::abs(static_cast<int64_t>(value.qpd) -
+ static_cast<int64_t>(last_value.qpd)) < 5 &&
+ std::abs(static_cast<int64_t>(value.qpw) -
+ static_cast<int64_t>(last_value.qpw)) < 10 &&
+ std::abs(static_cast<int64_t>(value.last_access_time) -
+
static_cast<int64_t>(last_value.last_access_time)) < 60) {
+ LOG(INFO) << "skip partition_id=" << partition_id << "
qpd=" << value.qpd
+ << " qpw=" << value.qpw
+ << " last_access_time=" <<
value.last_access_time
+ << " last_qpd=" << last_value.qpd
+ << " last_qpw=" << last_value.qpw
+ << " last_access_time=" <<
last_value.last_access_time;
+ continue;
+ }
+ }
+ }
+ THotPartition hot_partition;
+ hot_partition.__set_partition_id(partition_id);
+ hot_partition.__set_query_per_day(value.qpd);
+ hot_partition.__set_query_per_week(value.qpw);
+ hot_partition.__set_last_access_time(value.last_access_time);
+ msg.hot_partitions.push_back(hot_partition);
+ return_partitions++;
+ }
+ msg.__isset.hot_partitions = !msg.hot_partitions.empty();
+ hot_tables->push_back(std::move(msg));
}
-};
-struct TabletHotspotMapValue {
- uint64_t qpd = 0; // query per day
- uint64_t qpw = 0; // query per week
- int64_t last_access_time;
-};
-
-using TabletHotspotMapKey = std::pair<int64_t, int64_t>;
+}
void TabletHotspot::get_top_n_hot_partition(std::vector<THotTableMessage>*
hot_tables) {
// map<pair<table_id, index_id>, map<partition_id, value>> for day
@@ -108,33 +145,14 @@ void
TabletHotspot::get_top_n_hot_partition(std::vector<THotTableMessage>* hot_t
});
constexpr int N = 50;
int return_partitions = 0;
- auto get_return_partitions =
- [=, &return_partitions](
- const std::unordered_map<TabletHotspotMapKey,
- std::unordered_map<int64_t,
TabletHotspotMapValue>,
- MapKeyHash>& hot_partition) {
- for (const auto& [key, partition_to_value] : hot_partition) {
- THotTableMessage msg;
- msg.table_id = key.first;
- msg.index_id = key.second;
- for (const auto& [partition_id, value] :
partition_to_value) {
- if (return_partitions > N) {
- return;
- }
- THotPartition hot_partition;
- hot_partition.__set_partition_id(partition_id);
- hot_partition.__set_query_per_day(value.qpd);
- hot_partition.__set_query_per_week(value.qpw);
-
hot_partition.__set_last_access_time(value.last_access_time);
- msg.hot_partitions.push_back(hot_partition);
- return_partitions++;
- }
- msg.__isset.hot_partitions = !msg.hot_partitions.empty();
- hot_tables->push_back(std::move(msg));
- }
- };
- get_return_partitions(day_hot_partitions);
- get_return_partitions(week_hot_partitions);
+
+ get_return_partitions(day_hot_partitions, _last_day_hot_partitions,
hot_tables,
+ return_partitions, N);
+ get_return_partitions(week_hot_partitions, _last_week_hot_partitions,
hot_tables,
+ return_partitions, N);
+
+ _last_day_hot_partitions = std::move(day_hot_partitions);
+ _last_week_hot_partitions = std::move(week_hot_partitions);
}
void HotspotCounter::make_dot_point() {
diff --git a/be/src/cloud/cloud_tablet_hotspot.h
b/be/src/cloud/cloud_tablet_hotspot.h
index af98f99a558..0be1c085a6c 100644
--- a/be/src/cloud/cloud_tablet_hotspot.h
+++ b/be/src/cloud/cloud_tablet_hotspot.h
@@ -49,6 +49,19 @@ struct HotspotCounter {
};
using HotspotCounterPtr = std::shared_ptr<HotspotCounter>;
+using TabletHotspotMapKey = std::pair<int64_t, int64_t>;
+
+struct TabletHotspotMapValue {
+ uint64_t qpd = 0; // query per day
+ uint64_t qpw = 0; // query per week
+ int64_t last_access_time;
+};
+
+struct MapKeyHash {
+ int64_t operator()(const std::pair<int64_t, int64_t>& key) const {
+ return std::hash<int64_t> {}(key.first) + std::hash<int64_t>
{}(key.second);
+ }
+};
class TabletHotspot {
public:
@@ -71,6 +84,12 @@ private:
bool _closed {false};
std::mutex _mtx;
std::condition_variable _cond;
+ std::unordered_map<TabletHotspotMapKey, std::unordered_map<int64_t,
TabletHotspotMapValue>,
+ MapKeyHash>
+ _last_day_hot_partitions;
+ std::unordered_map<TabletHotspotMapKey, std::unordered_map<int64_t,
TabletHotspotMapValue>,
+ MapKeyHash>
+ _last_week_hot_partitions;
};
} // namespace doris
diff --git a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
index c601a492162..935300dee6f 100644
--- a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
+++ b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
@@ -3190,11 +3190,11 @@ public class Config extends ConfigBase {
public static boolean enable_fetch_cluster_cache_hotspot = true;
@ConfField(mutable = true)
- public static long fetch_cluster_cache_hotspot_interval_ms = 600000;
+ public static long fetch_cluster_cache_hotspot_interval_ms = 3600000;
// to control the max num of values inserted into cache hotspot internal
table
// insert into cache table when the size of batch values reaches this limit
@ConfField(mutable = true)
- public static long batch_insert_cluster_cache_hotspot_num = 1000;
+ public static long batch_insert_cluster_cache_hotspot_num = 5000;
/**
* intervals between be status checks for CloudUpgradeMgr
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/cloud/CacheHotspotManager.java
b/fe/fe-core/src/main/java/org/apache/doris/cloud/CacheHotspotManager.java
index 0b83baa94d6..f4c7392eb75 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/cloud/CacheHotspotManager.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/cloud/CacheHotspotManager.java
@@ -159,9 +159,9 @@ public class CacheHotspotManager extends MasterDaemon {
}
});
}
- triggerBatchInsert();
});
});
+ triggerBatchInsert();
idToTable.clear();
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/cloud/CacheHotspotManagerUtils.java
b/fe/fe-core/src/main/java/org/apache/doris/cloud/CacheHotspotManagerUtils.java
index 20de42f8cdc..72710debaef 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/cloud/CacheHotspotManagerUtils.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/cloud/CacheHotspotManagerUtils.java
@@ -70,9 +70,10 @@ public class CacheHotspotManagerUtils {
+ " last_access_time DATETIMEV2)\n"
+ " UNIQUE KEY(cluster_id, backend_id, table_id, index_id,
partition_id, insert_day)\n"
+ " PARTITION BY RANGE (insert_day) ()\n"
- + " DISTRIBUTED BY HASH (cluster_id)\n"
+ + " DISTRIBUTED BY HASH (cluster_id) BUCKETS 1\n"
+ " PROPERTIES (\n"
+ " \"dynamic_partition.enable\" = \"true\",\n"
+ + " \"dynamic_partition.buckets\" = \"1\",\n"
+ " \"dynamic_partition.time_unit\" = \"DAY\",\n"
+ " \"dynamic_partition.start\" = \"-7\",\n"
+ " \"dynamic_partition.end\" = \"3\",\n"
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]