This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
     new 44d687c  [client] add logging on slow metacache operations
44d687c is described below

commit 44d687caf3633d75b0cc64f1a25f444484ce43ad
Author: Alexey Serbin <ale...@apache.org>
AuthorDate: Tue Mar 23 14:08:46 2021 -0700

    [client] add logging on slow metacache operations
    
    This patch adds 50ms threshold slow logging for metacache operations:
      * ProcessGetTableLocationsResponse
      * ProcessGetTabletLocationsResponse
      * LookupEntryByIdFastPath
      * LookupEntryByKeyFastPath
    
    Change-Id: I5eb0593794f9a971f3daea7d346f70b0dc32bfa7
    Reviewed-on: http://gerrit.cloudera.org:8080/17221
    Tested-by: Kudu Jenkins
    Reviewed-by: Grant Henke <granthe...@apache.org>
---
 src/kudu/client/meta_cache.cc | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/src/kudu/client/meta_cache.cc b/src/kudu/client/meta_cache.cc
index 7bb692b..131795e 100644
--- a/src/kudu/client/meta_cache.cc
+++ b/src/kudu/client/meta_cache.cc
@@ -56,6 +56,7 @@
 #include "kudu/util/net/sockaddr.h"
 #include "kudu/util/pb_util.h"
 #include "kudu/util/scoped_cleanup.h"
+#include "kudu/util/stopwatch.h"
 
 using kudu::consensus::RaftPeerPB;
 using kudu::master::ANY_REPLICA;
@@ -284,8 +285,9 @@ void RemoteTablet::MarkReplicaFailed(RemoteTabletServer *ts,
   std::lock_guard<simple_spinlock> l(lock_);
   VLOG(2) << "Tablet " << tablet_id_ << ": Current remote replicas in meta 
cache: "
           << ReplicasAsStringUnlocked();
-  KLOG_EVERY_N_SECS(WARNING, 1) << "Tablet " << tablet_id_ << ": Replica " << 
ts->ToString()
-               << " has failed: " << status.ToString();
+  KLOG_EVERY_N_SECS(WARNING, 1) << Substitute(
+      "tablet $0: replica $1 has failed: $2",
+                                     tablet_id_, ts->ToString(), 
status.ToString());
   for (RemoteReplica& rep : replicas_) {
     if (rep.ts == ts) {
       rep.failed = true;
@@ -965,6 +967,7 @@ Status MetaCache::ProcessLookupResponse(const LookupRpc& 
rpc,
 Status MetaCache::ProcessGetTabletLocationsResponse(const string& tablet_id,
                                                     const 
GetTabletLocationsResponsePB& resp,
                                                     MetaCacheEntry* 
cache_entry) {
+  SCOPED_LOG_SLOW_EXECUTION(WARNING, 50, "processing tablet locations 
response");
   MonoTime expiration_time = MonoTime::Now() +
       MonoDelta::FromMilliseconds(FLAGS_client_tablet_locations_by_id_ttl_ms);
   const auto& tablet_locations = resp.tablet_locations();
@@ -1020,6 +1023,7 @@ Status MetaCache::ProcessGetTableLocationsResponse(const 
KuduTable* table,
                                                    const 
GetTableLocationsResponsePB& resp,
                                                    MetaCacheEntry* cache_entry,
                                                    int max_returned_locations) 
{
+  SCOPED_LOG_SLOW_EXECUTION(WARNING, 50, "processing table locations 
response");
   MonoTime expiration_time = MonoTime::Now() +
       MonoDelta::FromMilliseconds(resp.ttl_millis());
 
@@ -1177,6 +1181,7 @@ Status MetaCache::ProcessGetTableLocationsResponse(const 
KuduTable* table,
 bool MetaCache::LookupEntryByKeyFastPath(const KuduTable* table,
                                          const string& partition_key,
                                          MetaCacheEntry* entry) {
+  SCOPED_LOG_SLOW_EXECUTION(WARNING, 50, "slow lookup of entry by key");
   shared_lock<rw_spinlock> l(lock_.get_lock());
   const TabletMap* tablets = FindOrNull(tablets_by_table_and_key_, 
table->id());
   if (PREDICT_FALSE(!tablets)) {
@@ -1229,6 +1234,7 @@ Status MetaCache::DoFastPathLookup(const KuduTable* table,
 
 bool MetaCache::LookupEntryByIdFastPath(const string& tablet_id,
                                         MetaCacheEntry* entry) {
+  SCOPED_LOG_SLOW_EXECUTION(WARNING, 50, "slow lookup of entry by ID");
   shared_lock<rw_spinlock> l(lock_.get_lock());
   const auto* cache_entry = FindOrNull(entry_by_tablet_id_, tablet_id);
   if (PREDICT_FALSE(!cache_entry)) {
@@ -1328,11 +1334,12 @@ void MetaCache::LookupTabletById(KuduClient* client,
 
 void MetaCache::MarkTSFailed(RemoteTabletServer* ts,
                              const Status& status) {
-  LOG(INFO) << "Marking tablet server " << ts->ToString() << " as failed.";
-  Status ts_status = status.CloneAndPrepend("TS failed");
+  LOG(INFO) << Substitute("marking tablet server $0 as failed", 
ts->ToString());
+  SCOPED_LOG_SLOW_EXECUTION(WARNING, 50, "marking tablet server as failed");
+  const auto ts_status = status.CloneAndPrepend("TS failed");
 
   shared_lock<rw_spinlock> l(lock_.get_lock());
-  // TODO: replace with a ts->tablet multimap for faster lookup?
+  // TODO(adar): replace with a ts->tablet multimap for faster lookup?
   for (const auto& tablet : tablets_by_id_) {
     // We just loop on all tablets; if a tablet does not have a replica on this
     // TS, MarkReplicaFailed() returns false and we ignore the return value.

Reply via email to