This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
     new b72fc6255 [metrics] Add metrics for create and delete op time
b72fc6255 is described below

commit b72fc6255a77418ef87fc5f07476528b027c6f99
Author: kedeng <kdeng...@gmail.com>
AuthorDate: Wed Apr 17 14:10:27 2024 +0800

    [metrics] Add metrics for create and delete op time
    
    Add server-level statistics to track the time consumption of
    create tablet and delete tablet operations.
    The addition of monitoring items will aid in historical issue
    tracking and analysis, as well as facilitate the configuration
    of monitoring alarms.
    
    Change-Id: I02bd52013caa94a33143cb16ff3831a49b74bac4
    Reviewed-on: http://gerrit.cloudera.org:8080/21316
    Tested-by: Kudu Jenkins
    Reviewed-by: Alexey Serbin <ale...@apache.org>
---
 src/kudu/tserver/tablet_server-test.cc     |  7 +++++
 src/kudu/tserver/ts_tablet_manager-test.cc | 41 ++++++++++++++++++++++++++++++
 src/kudu/tserver/ts_tablet_manager.cc      | 30 ++++++++++++++++++++++
 src/kudu/tserver/ts_tablet_manager.h       |  4 +++
 4 files changed, 82 insertions(+)

diff --git a/src/kudu/tserver/tablet_server-test.cc 
b/src/kudu/tserver/tablet_server-test.cc
index 6b1229776..69678c03e 100644
--- a/src/kudu/tserver/tablet_server-test.cc
+++ b/src/kudu/tserver/tablet_server-test.cc
@@ -238,6 +238,7 @@ METRIC_DECLARE_gauge_size(slow_scans);
 METRIC_DECLARE_histogram(flush_dms_duration);
 METRIC_DECLARE_histogram(op_apply_queue_length);
 METRIC_DECLARE_histogram(op_apply_queue_time);
+METRIC_DECLARE_histogram(delete_tablet_run_time);
 
 
 namespace kudu {
@@ -4051,6 +4052,9 @@ TEST_F(TabletServerTest, TestDeleteTablet) {
 }
 
 TEST_F(TabletServerTest, TestDeleteTablet_TabletNotCreated) {
+  scoped_refptr<Histogram> delete_tablet_run_time =
+      
METRIC_delete_tablet_run_time.Instantiate(mini_server_->server()->metric_entity());
+  ASSERT_EQ(0, delete_tablet_run_time->TotalCount());
   DeleteTabletRequestPB req;
   DeleteTabletResponsePB resp;
   RpcController rpc;
@@ -4067,6 +4071,9 @@ TEST_F(TabletServerTest, 
TestDeleteTablet_TabletNotCreated) {
     ASSERT_TRUE(resp.has_error());
     ASSERT_EQ(TabletServerErrorPB::TABLET_NOT_FOUND, resp.error().code());
   }
+
+  // Check that the histogram is not populated.
+  ASSERT_EQ(0, delete_tablet_run_time->TotalCount());
 }
 
 TEST_F(TabletServerTest, TestDeleteTabletBenchmark) {
diff --git a/src/kudu/tserver/ts_tablet_manager-test.cc 
b/src/kudu/tserver/ts_tablet_manager-test.cc
index f9b2df870..a3bf78889 100644
--- a/src/kudu/tserver/ts_tablet_manager-test.cc
+++ b/src/kudu/tserver/ts_tablet_manager-test.cc
@@ -51,6 +51,7 @@
 #include "kudu/tserver/mini_tablet_server.h"
 #include "kudu/tserver/tablet_server.h"
 #include "kudu/util/logging.h"
+#include "kudu/util/metrics.h"
 #include "kudu/util/monotime.h"
 #include "kudu/util/net/net_util.h"
 #include "kudu/util/oid_generator.h"
@@ -68,6 +69,9 @@ 
DECLARE_bool(tablet_bootstrap_skip_opening_tablet_for_testing);
 DECLARE_int32(tablet_metadata_load_inject_latency_ms);
 DECLARE_int32(update_tablet_metrics_interval_ms);
 
+METRIC_DECLARE_histogram(create_tablet_run_time);
+METRIC_DECLARE_histogram(delete_tablet_run_time);
+
 #define ASSERT_REPORT_HAS_UPDATED_TABLET(report, tablet_id) \
   NO_FATALS(AssertReportHasUpdatedTablet(report, tablet_id))
 
@@ -81,6 +85,7 @@ using kudu::master::TabletReportPB;
 using kudu::pb_util::SecureShortDebugString;
 using kudu::tablet::LocalTabletWriter;
 using kudu::tablet::Tablet;
+using kudu::tablet::TabletDataState;
 using kudu::tablet::TabletReplica;
 using std::nullopt;
 using std::optional;
@@ -183,6 +188,42 @@ class TsTabletManagerTest : public KuduTest {
   RaftConfigPB config_;
 };
 
+class TestCreateAndDeleteMetrics :
+    public TsTabletManagerTest,
+    public ::testing::WithParamInterface<TabletDataState> {
+};
+
+INSTANTIATE_TEST_SUITE_P(Params, TestCreateAndDeleteMetrics,
+                         
::testing::Values(TabletDataState::TABLET_DATA_DELETED,
+                                           
TabletDataState::TABLET_DATA_TOMBSTONED));
+
+TEST_P(TestCreateAndDeleteMetrics, TestCreateAndDifferentModeDeleteMetrics) {
+  TabletDataState data_state = GetParam();
+  scoped_refptr<Histogram> create_tablet_run_time =
+    
METRIC_create_tablet_run_time.Instantiate(mini_server_->server()->metric_entity());
+  ASSERT_EQ(0, create_tablet_run_time->TotalCount());
+
+  scoped_refptr<Histogram> delete_tablet_run_time =
+    
METRIC_delete_tablet_run_time.Instantiate(mini_server_->server()->metric_entity());
+  ASSERT_EQ(0, delete_tablet_run_time->TotalCount());
+
+  string test_tablet = "ffffffffffffffffffffffffffffffff";
+  scoped_refptr<TabletReplica> test_replica;
+
+  // Create a new tablet.
+  ASSERT_OK(CreateNewTablet(test_tablet, schema_, true, nullopt, nullopt, 
&test_replica));
+  ASSERT_EQ(test_tablet, test_replica->tablet()->tablet_id());
+  // Metrics should be incremented.
+  ASSERT_EQ(1, create_tablet_run_time->TotalCount());
+
+  ASSERT_EQ(0, delete_tablet_run_time->TotalCount());
+  ASSERT_OK(tablet_manager_->DeleteTablet(test_tablet,
+                                          data_state,
+                                          nullopt));
+  // Metrics should be incremented.
+  ASSERT_EQ(1, delete_tablet_run_time->TotalCount());
+}
+
 TEST_F(TsTabletManagerTest, TestCreateTablet) {
   string tablet1 = "0fffffffffffffffffffffffffffffff";
   string tablet2 = "1fffffffffffffffffffffffffffffff";
diff --git a/src/kudu/tserver/ts_tablet_manager.cc 
b/src/kudu/tserver/ts_tablet_manager.cc
index 8b3ae9a24..64550806c 100644
--- a/src/kudu/tserver/ts_tablet_manager.cc
+++ b/src/kudu/tserver/ts_tablet_manager.cc
@@ -69,6 +69,7 @@
 #include "kudu/util/flag_tags.h"
 #include "kudu/util/flag_validators.h"
 #include "kudu/util/logging.h"
+#include "kudu/util/metrics.h"
 #include "kudu/util/monotime.h"
 #include "kudu/util/net/net_util.h"
 #include "kudu/util/scoped_cleanup.h"
@@ -266,6 +267,20 @@ METRIC_DEFINE_gauge_int64(server, 
tablets_opening_time_startup,
                           "Time taken to start the tablets during server 
startup",
                           kudu::MetricLevel::kDebug);
 
+METRIC_DEFINE_histogram(server, create_tablet_run_time,
+                        "Create Tablet Operation Run Time",
+                        kudu::MetricUnit::kMicroseconds,
+                        "The runtime of the create tablet operation.",
+                        kudu::MetricLevel::kDebug,
+                        10000000, 2);
+
+METRIC_DEFINE_histogram(server, delete_tablet_run_time,
+                        "Delete Tablet Operation Run Time",
+                        kudu::MetricUnit::kMicroseconds,
+                        "The runtime of the delete tablet operation.",
+                        kudu::MetricLevel::kDebug,
+                        10000000, 2);
+
 DECLARE_int32(heartbeat_interval_ms);
 
 using kudu::consensus::ConsensusMetadata;
@@ -379,6 +394,11 @@ TSTabletManager::TSTabletManager(TabletServer* server)
 
   tablets_num_opened_startup_ = METRIC_tablets_num_opened_startup.Instantiate(
       server->metric_entity(), 0);
+
+  create_tablet_run_time_ =
+      METRIC_create_tablet_run_time.Instantiate(server->metric_entity());
+  delete_tablet_run_time_ =
+      METRIC_delete_tablet_run_time.Instantiate(server->metric_entity());
 }
 
 // Base class for tasks submitted against TSTabletManager threadpools whose
@@ -633,6 +653,7 @@ Status TSTabletManager::CreateNewTablet(const string& 
table_id,
 
   // Set the initial opid_index for a RaftConfigPB to -1.
   config.set_opid_index(consensus::kInvalidOpIdIndex);
+  MonoTime time_started = MonoTime::Now();
 
   scoped_refptr<TransitionInProgressDeleter> deleter;
   {
@@ -686,6 +707,10 @@ Status TSTabletManager::CreateNewTablet(const string& 
table_id,
   if (replica) {
     *replica = new_replica;
   }
+
+  create_tablet_run_time_->Increment(
+      (MonoTime::Now() - time_started).ToMicroseconds());
+
   return Status::OK();
 }
 
@@ -1118,6 +1143,7 @@ Status TSTabletManager::DeleteTablet(
   }
 
   TRACE("Deleting tablet $0", tablet_id);
+  MonoTime time_started = MonoTime::Now();
 
   if (PREDICT_FALSE(FLAGS_delete_tablet_inject_latency_ms > 0)) {
     LOG(WARNING) << "Injecting " << FLAGS_delete_tablet_inject_latency_ms
@@ -1139,6 +1165,8 @@ Status TSTabletManager::DeleteTablet(
   // If a tablet is already tombstoned, then a request to tombstone
   // the same tablet should become a no-op.
   if (delete_type == TABLET_DATA_TOMBSTONED && data_state == 
TABLET_DATA_TOMBSTONED) {
+    delete_tablet_run_time_->Increment(
+        (MonoTime::Now() - time_started).ToMicroseconds());
     return Status::OK();
   }
 
@@ -1197,6 +1225,8 @@ Status TSTabletManager::DeleteTablet(
     CHECK_EQ(1, tablet_map_.erase(tablet_id)) << tablet_id;
     InsertOrDie(&perm_deleted_tablet_ids_, tablet_id);
   }
+  delete_tablet_run_time_->Increment(
+      (MonoTime::Now() - time_started).ToMicroseconds());
 
   return Status::OK();
 }
diff --git a/src/kudu/tserver/ts_tablet_manager.h 
b/src/kudu/tserver/ts_tablet_manager.h
index d00464816..144afbe95 100644
--- a/src/kudu/tserver/ts_tablet_manager.h
+++ b/src/kudu/tserver/ts_tablet_manager.h
@@ -501,6 +501,10 @@ class TSTabletManager : public 
tserver::TabletReplicaLookupIf {
   // ensures we do not attempt to collect metrics while calling the destructor.
   FunctionGaugeDetacher metric_detacher_;
 
+  // Track the creation and deletion time of tablets on the node.
+  scoped_refptr<Histogram> create_tablet_run_time_;
+  scoped_refptr<Histogram> delete_tablet_run_time_;
+
   DISALLOW_COPY_AND_ASSIGN(TSTabletManager);
 };
 

Reply via email to