This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.1 by this push:
new 3c85740a1e5 branch-2.1: [Opt](metric) Add metrics for invalid delete
bitmap key count (#51654)
3c85740a1e5 is described below
commit 3c85740a1e5735e93292e3f8f6a10246018eea93
Author: bobhan1 <[email protected]>
AuthorDate: Fri Jun 13 11:24:54 2025 +0800
branch-2.1: [Opt](metric) Add metrics for invalid delete bitmap key count
(#51654)
### What problem does this PR solve?
```
# TYPE doris_be_invalid_delete_bitmap_key_count gauge
doris_be_invalid_delete_bitmap_key_count 0
# TYPE doris_be_valid_delete_bitmap_key_count gauge
doris_be_valid_delete_bitmap_key_count 409
```
https://github.com/apache/doris/pull/50080
---
be/src/common/config.cpp | 3 +++
be/src/common/config.h | 3 +++
be/src/common/daemon.cpp | 22 ++++++++++++++++++++++
be/src/common/daemon.h | 1 +
be/src/olap/tablet.cpp | 23 +++++++++++++++++++++++
be/src/olap/tablet.h | 3 +++
be/src/olap/tablet_manager.cpp | 16 ++++++++++++++++
be/src/olap/tablet_manager.h | 2 ++
be/src/olap/tablet_meta.cpp | 21 +++++++++++++++++++++
be/src/olap/tablet_meta.h | 4 ++++
be/src/util/doris_metrics.cpp | 4 ++++
be/src/util/doris_metrics.h | 2 ++
12 files changed, 104 insertions(+)
diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp
index 41fea7aa651..7db8f200600 100644
--- a/be/src/common/config.cpp
+++ b/be/src/common/config.cpp
@@ -1373,6 +1373,9 @@ DEFINE_mInt32(load_trigger_compaction_version_percent,
"66");
DEFINE_mInt64(base_compaction_interval_seconds_since_last_operation, "86400");
DEFINE_mBool(enable_compaction_pause_on_high_memory, "true");
+DEFINE_mBool(enable_report_delete_bitmap_metrics, "false");
+DEFINE_mInt32(report_delete_bitmap_metrics_interval_s, "60");
+
// clang-format off
#ifdef BE_TEST
// test s3
diff --git a/be/src/common/config.h b/be/src/common/config.h
index 1bb1ea0ccc2..c0a60419cea 100644
--- a/be/src/common/config.h
+++ b/be/src/common/config.h
@@ -1445,6 +1445,9 @@ DECLARE_mInt32(load_trigger_compaction_version_percent);
DECLARE_mInt64(base_compaction_interval_seconds_since_last_operation);
DECLARE_mBool(enable_compaction_pause_on_high_memory);
+DECLARE_mBool(enable_report_delete_bitmap_metrics);
+DECLARE_mInt32(report_delete_bitmap_metrics_interval_s);
+
#ifdef BE_TEST
// test s3
DECLARE_String(test_s3_resource);
diff --git a/be/src/common/daemon.cpp b/be/src/common/daemon.cpp
index 11050c233c7..60f875ff1d4 100644
--- a/be/src/common/daemon.cpp
+++ b/be/src/common/daemon.cpp
@@ -365,6 +365,23 @@ void Daemon::report_runtime_query_statistics_thread() {
}
}
+void Daemon::report_delete_bitmap_metrics_thread() {
+ while (!_stop_background_threads_latch.wait_for(
+
std::chrono::seconds(config::report_delete_bitmap_metrics_interval_s))) {
+ if (config::enable_report_delete_bitmap_metrics) {
+ auto* metrics = DorisMetrics::instance();
+ metrics->valid_delete_bitmap_key_count->set_value(
+ StorageEngine::instance()
+ ->tablet_manager()
+ ->get_valid_delete_bitmap_key_count());
+ metrics->invalid_delete_bitmap_key_count->set_value(
+ StorageEngine::instance()
+ ->tablet_manager()
+ ->get_invalid_delete_bitmap_key_count());
+ }
+ }
+}
+
void Daemon::je_purge_dirty_pages_thread() const {
do {
std::unique_lock<std::mutex>
l(doris::MemInfo::je_purge_dirty_pages_lock);
@@ -455,6 +472,11 @@ void Daemon::start() {
[this]() { this->report_runtime_query_statistics_thread(); },
&_threads.emplace_back());
CHECK(st.ok()) << st;
+ st = Thread::create(
+ "Daemon", "delete_bitmap_metrics_thread",
+ [this]() { this->report_delete_bitmap_metrics_thread(); },
&_threads.emplace_back());
+ CHECK(st.ok()) << st;
+
st = Thread::create(
"Daemon", "wg_weighted_memory_ratio_refresh_thread",
[this]() { this->wg_weighted_memory_ratio_refresh_thread(); },
diff --git a/be/src/common/daemon.h b/be/src/common/daemon.h
index 9674b139f00..85b41e6f4ca 100644
--- a/be/src/common/daemon.h
+++ b/be/src/common/daemon.h
@@ -45,6 +45,7 @@ private:
void je_purge_dirty_pages_thread() const;
void cache_prune_stale_thread();
void report_runtime_query_statistics_thread();
+ void report_delete_bitmap_metrics_thread();
void wg_weighted_memory_ratio_refresh_thread();
void calculate_workload_group_metrics_thread();
diff --git a/be/src/olap/tablet.cpp b/be/src/olap/tablet.cpp
index fe229202701..4661f5eae8d 100644
--- a/be/src/olap/tablet.cpp
+++ b/be/src/olap/tablet.cpp
@@ -4279,4 +4279,27 @@ Status Tablet::show_nested_index_file(std::string*
json_meta) {
return Status::OK();
}
+uint64_t Tablet::valid_delete_bitmap_key_count() {
+ uint64_t ret {0};
+ std::shared_lock<std::shared_mutex> rlock {_meta_lock};
+ _tablet_meta->delete_bitmap().traverse_rowset_id_prefix(
+ [&](const DeleteBitmap& self, const RowsetId& rowset_id) {
+ if (_contains_rowset(rowset_id)) {
+ ret += self.count_key_with_rowset_id_unlocked(rowset_id);
+ }
+ });
+ return ret;
+}
+
+uint64_t Tablet::invalid_delete_bitmap_key_count() {
+ uint64_t ret {0};
+ std::shared_lock<std::shared_mutex> rlock {_meta_lock};
+ _tablet_meta->delete_bitmap().traverse_rowset_id_prefix(
+ [&](const DeleteBitmap& self, const RowsetId& rowset_id) {
+ if (!_contains_rowset(rowset_id)) {
+ ret += self.count_key_with_rowset_id_unlocked(rowset_id);
+ }
+ });
+ return ret;
+}
} // namespace doris
diff --git a/be/src/olap/tablet.h b/be/src/olap/tablet.h
index 362ffcd2e06..a3f2c40a612 100644
--- a/be/src/olap/tablet.h
+++ b/be/src/olap/tablet.h
@@ -614,6 +614,9 @@ public:
int32_t* rowset_count, int64_t* file_count);
Status show_nested_index_file(std::string* json_meta);
+ uint64_t valid_delete_bitmap_key_count();
+ uint64_t invalid_delete_bitmap_key_count();
+
private:
Status _init_once_action();
void _print_missed_versions(const std::vector<Version>& missed_versions)
const;
diff --git a/be/src/olap/tablet_manager.cpp b/be/src/olap/tablet_manager.cpp
index 63c30b1f2ce..911b758bc81 100644
--- a/be/src/olap/tablet_manager.cpp
+++ b/be/src/olap/tablet_manager.cpp
@@ -685,6 +685,22 @@ uint64_t TabletManager::get_segment_nums() {
return segment_nums;
}
+uint64_t TabletManager::get_valid_delete_bitmap_key_count() {
+ uint64_t valid_delete_bitmap_key_count = 0;
+ for_each_tablet([&](const TabletSharedPtr& tablet) {
+ valid_delete_bitmap_key_count +=
tablet->valid_delete_bitmap_key_count();
+ });
+ return valid_delete_bitmap_key_count;
+}
+
+uint64_t TabletManager::get_invalid_delete_bitmap_key_count() {
+ uint64_t invalid_delete_bitmap_key_count = 0;
+ for_each_tablet([&](const TabletSharedPtr& tablet) {
+ invalid_delete_bitmap_key_count +=
tablet->invalid_delete_bitmap_key_count();
+ });
+ return invalid_delete_bitmap_key_count;
+}
+
bool TabletManager::get_tablet_id_and_schema_hash_from_path(const string& path,
TTabletId*
tablet_id,
TSchemaHash*
schema_hash) {
diff --git a/be/src/olap/tablet_manager.h b/be/src/olap/tablet_manager.h
index f4f3765df65..bf000fad95b 100644
--- a/be/src/olap/tablet_manager.h
+++ b/be/src/olap/tablet_manager.h
@@ -97,6 +97,8 @@ public:
uint64_t get_rowset_nums();
uint64_t get_segment_nums();
+ uint64_t get_valid_delete_bitmap_key_count();
+ uint64_t get_invalid_delete_bitmap_key_count();
// Extract tablet_id and schema_hash from given path.
//
diff --git a/be/src/olap/tablet_meta.cpp b/be/src/olap/tablet_meta.cpp
index 7db33c66151..c5fdb21479d 100644
--- a/be/src/olap/tablet_meta.cpp
+++ b/be/src/olap/tablet_meta.cpp
@@ -27,6 +27,7 @@
#include <time.h>
#include <cstdint>
+#include <limits>
#include <memory>
#include <set>
#include <utility>
@@ -1159,6 +1160,26 @@ bool
DeleteBitmap::has_calculated_for_multi_segments(const RowsetId& rowset_id)
return contains({rowset_id, INVALID_SEGMENT_ID, TEMP_VERSION_COMMON},
ROWSET_SENTINEL_MARK);
}
+void DeleteBitmap::traverse_rowset_id_prefix(
+ const std::function<void(const DeleteBitmap&, const RowsetId&
rowsetId)>& func) const {
+ std::shared_lock rlock {lock};
+ auto it = delete_bitmap.cbegin();
+ while (it != delete_bitmap.cend()) {
+ RowsetId rowset_id = std::get<0>(it->first);
+ func(*this, rowset_id);
+ // find next rowset id
+ it = delete_bitmap.upper_bound({rowset_id,
std::numeric_limits<SegmentId>::max(),
+ std::numeric_limits<Version>::max()});
+ }
+}
+
+uint64_t DeleteBitmap::count_key_with_rowset_id_unlocked(const RowsetId&
rowset_id) const {
+ auto lower_bound = delete_bitmap.lower_bound({rowset_id, 0, 0});
+ auto upper_bound = delete_bitmap.upper_bound({rowset_id,
std::numeric_limits<SegmentId>::max(),
+
std::numeric_limits<Version>::max()});
+ return std::distance(lower_bound, upper_bound);
+}
+
// We cannot just copy the underlying memory to construct a string
// due to equivalent objects may have different padding bytes.
// Reading padding bytes is undefined behavior, neither copy nor
diff --git a/be/src/olap/tablet_meta.h b/be/src/olap/tablet_meta.h
index 7767e6d2184..ba7dbbaf20d 100644
--- a/be/src/olap/tablet_meta.h
+++ b/be/src/olap/tablet_meta.h
@@ -511,6 +511,10 @@ public:
bool has_calculated_for_multi_segments(const RowsetId& rowset_id) const;
+ void traverse_rowset_id_prefix(
+ const std::function<void(const DeleteBitmap&, const RowsetId&
rowsetId)>& func) const;
+ uint64_t count_key_with_rowset_id_unlocked(const RowsetId& rowset_id)
const;
+
class AggCachePolicy : public LRUCachePolicyTrackingManual {
public:
AggCachePolicy(size_t capacity)
diff --git a/be/src/util/doris_metrics.cpp b/be/src/util/doris_metrics.cpp
index d55c09a4b94..76f93fbb969 100644
--- a/be/src/util/doris_metrics.cpp
+++ b/be/src/util/doris_metrics.cpp
@@ -137,6 +137,8 @@
DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(tablet_base_max_compaction_score, MetricUnit:
DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(all_rowsets_num, MetricUnit::NOUNIT);
DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(all_segments_num, MetricUnit::NOUNIT);
+DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(valid_delete_bitmap_key_count,
MetricUnit::NOUNIT);
+DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(invalid_delete_bitmap_key_count,
MetricUnit::NOUNIT);
DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(compaction_used_permits,
MetricUnit::NOUNIT);
DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(compaction_waitting_permits,
MetricUnit::NOUNIT);
@@ -269,6 +271,8 @@ DorisMetrics::DorisMetrics() :
_metric_registry(_s_registry_name) {
INT_GAUGE_METRIC_REGISTER(_server_metric_entity, all_rowsets_num);
INT_GAUGE_METRIC_REGISTER(_server_metric_entity, all_segments_num);
+ INT_GAUGE_METRIC_REGISTER(_server_metric_entity,
valid_delete_bitmap_key_count);
+ INT_GAUGE_METRIC_REGISTER(_server_metric_entity,
invalid_delete_bitmap_key_count);
INT_GAUGE_METRIC_REGISTER(_server_metric_entity, compaction_used_permits);
INT_GAUGE_METRIC_REGISTER(_server_metric_entity,
compaction_waitting_permits);
diff --git a/be/src/util/doris_metrics.h b/be/src/util/doris_metrics.h
index 2f1dc4da557..0b37882f77d 100644
--- a/be/src/util/doris_metrics.h
+++ b/be/src/util/doris_metrics.h
@@ -133,6 +133,8 @@ public:
IntGauge* all_rowsets_num = nullptr;
IntGauge* all_segments_num = nullptr;
+ IntGauge* valid_delete_bitmap_key_count {nullptr};
+ IntGauge* invalid_delete_bitmap_key_count {nullptr};
// permits have been used for all compaction tasks
IntGauge* compaction_used_permits = nullptr;
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]