This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 53760a54dd6 [improvement](create tablet) be choose disk tolerate with
little skew (#30354)
53760a54dd6 is described below
commit 53760a54dd61fa62cfff2f9cb7b83cfbadbf9174
Author: yujun <[email protected]>
AuthorDate: Thu Jan 25 23:59:37 2024 +0800
[improvement](create tablet) be choose disk tolerate with little skew
(#30354)
---
be/src/common/config.cpp | 3 +++
be/src/common/config.h | 3 +++
be/src/olap/storage_engine.cpp | 53 +++++++++++++++++++++++++++++++++---------
be/src/olap/storage_engine.h | 4 +---
4 files changed, 49 insertions(+), 14 deletions(-)
diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp
index f006fa43342..6954de836ca 100644
--- a/be/src/common/config.cpp
+++ b/be/src/common/config.cpp
@@ -1164,6 +1164,9 @@ DEFINE_mInt32(report_query_statistics_interval_ms,
"3000");
// 30s
DEFINE_mInt32(query_statistics_reserve_timeout_ms, "30000");
+// consider two high usage disk at the same available level if they do not
exceed this diff.
+DEFINE_mDouble(high_disk_avail_level_diff_usages, "0.15");
+
// create tablet in partition random robin idx lru size, default 10000
DEFINE_Int32(partition_disk_index_lru_size, "10000");
diff --git a/be/src/common/config.h b/be/src/common/config.h
index 64555adbbb7..8a33c8c19d1 100644
--- a/be/src/common/config.h
+++ b/be/src/common/config.h
@@ -1240,6 +1240,9 @@ DECLARE_Int32(ignore_invalid_partition_id_rowset_num);
DECLARE_mInt32(report_query_statistics_interval_ms);
DECLARE_mInt32(query_statistics_reserve_timeout_ms);
+// consider two high usage disk at the same available level if they do not
exceed this diff.
+DECLARE_mDouble(high_disk_avail_level_diff_usages);
+
// create tablet in partition random robin idx lru size, default 10000
DECLARE_Int32(partition_disk_index_lru_size);
diff --git a/be/src/olap/storage_engine.cpp b/be/src/olap/storage_engine.cpp
index 2090127f41c..069734d8acd 100644
--- a/be/src/olap/storage_engine.cpp
+++ b/be/src/olap/storage_engine.cpp
@@ -456,16 +456,6 @@ Status StorageEngine::set_cluster_id(int32_t cluster_id) {
return Status::OK();
}
-StorageEngine::DiskRemainingLevel get_available_level(double
disk_usage_percent) {
- assert(disk_usage_percent <= 1);
- if (disk_usage_percent < 0.7) {
- return StorageEngine::DiskRemainingLevel::LOW;
- } else if (disk_usage_percent < 0.85) {
- return StorageEngine::DiskRemainingLevel::MID;
- }
- return StorageEngine::DiskRemainingLevel::HIGH;
-}
-
int StorageEngine::_get_and_set_next_disk_index(int64 partition_id,
TStorageMedium::type
storage_medium) {
auto key = CreateTabletIdxCache::get_key(partition_id, storage_medium);
@@ -481,6 +471,7 @@ int StorageEngine::_get_and_set_next_disk_index(int64
partition_id,
void StorageEngine::_get_candidate_stores(TStorageMedium::type storage_medium,
std::vector<DirInfo>& dir_infos) {
+ std::vector<double> usages;
for (auto& it : _store_map) {
DataDir* data_dir = it.second.get();
if (data_dir->is_used()) {
@@ -489,11 +480,51 @@ void
StorageEngine::_get_candidate_stores(TStorageMedium::type storage_medium,
!data_dir->reach_capacity_limit(0)) {
DirInfo dir_info;
dir_info.data_dir = data_dir;
- dir_info.available_level =
get_available_level(data_dir->get_usage(0));
+ dir_info.available_level = 0;
+ usages.push_back(data_dir->get_usage(0));
dir_infos.push_back(dir_info);
}
}
}
+
+ if (dir_infos.size() <= 1) {
+ return;
+ }
+
+ std::sort(usages.begin(), usages.end());
+ if (usages.back() < 0.7) {
+ return;
+ }
+
+ std::vector<double> level_min_usages;
+ level_min_usages.push_back(usages[0]);
+ for (auto usage : usages) {
+ // usage < 0.7 consider as one level, give a small skew
+ if (usage < 0.7 - (config::high_disk_avail_level_diff_usages / 2.0)) {
+ continue;
+ }
+
+ // at high usages, default 15% is one level
+ // for example: there disk usages are: 0.66, 0.72, 0.83
+ // then level_min_usages = [0.66, 0.83], divide disks into 2 levels:
[0.66, 0.72], [0.83]
+ if (usage >= level_min_usages.back() +
config::high_disk_avail_level_diff_usages) {
+ level_min_usages.push_back(usage);
+ }
+ }
+ for (auto& dir_info : dir_infos) {
+ double usage = dir_info.data_dir->get_usage(0);
+ for (size_t i = 1; i < level_min_usages.size() && usage >=
level_min_usages[i]; i++) {
+ dir_info.available_level++;
+ }
+
+ // when usage is too high, no matter consider balance now,
+ // make it a higher level.
+ // for example, two disks and usages are: 0.85 and 0.92, then let
tablets fall on the first disk.
+ // by default, storage_flood_stage_usage_percent = 90
+ if (usage > config::storage_flood_stage_usage_percent / 100.0) {
+ dir_info.available_level++;
+ }
+ }
}
std::vector<DataDir*> StorageEngine::get_stores_for_create_tablet(
diff --git a/be/src/olap/storage_engine.h b/be/src/olap/storage_engine.h
index b2b72b6d523..bc581aa329a 100644
--- a/be/src/olap/storage_engine.h
+++ b/be/src/olap/storage_engine.h
@@ -150,8 +150,6 @@ public:
StorageEngine(const EngineOptions& options);
~StorageEngine() override;
- enum class DiskRemainingLevel { LOW, MID, HIGH };
-
Status open() override;
Status create_tablet(const TCreateTabletReq& request, RuntimeProfile*
profile);
@@ -541,7 +539,7 @@ public:
struct DirInfo {
DataDir* data_dir;
- StorageEngine::DiskRemainingLevel available_level;
+ int available_level = 0;
bool operator<(const DirInfo& other) const {
if (available_level != other.available_level) {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]