This is an automated email from the ASF dual-hosted git repository. achennaka pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/kudu.git
The following commit(s) were added to refs/heads/master by this push: new 1214a38e5 [log] use THROTTLE_MSG for KLOG_EVERY_N_SECS when necessary 1214a38e5 is described below commit 1214a38e55231ff3aad342ef9efbc326f7a6e659 Author: Alexey Serbin <ale...@apache.org> AuthorDate: Mon Dec 18 17:55:08 2023 -0800 [log] use THROTTLE_MSG for KLOG_EVERY_N_SECS when necessary As a follow-up to e8f459284 and 8bc0be58e, this patch updates call sites of KLOG_EVERY_N_SECS w.r.t. using THROTTLE_MSG to report on the number of suppressed messages. The rationale behind this update is to provide extra information that is useful for troubleshooting, according to my findings while troubleshooting a few recent issues. In addition, I added one micro-test for KLOG_EVERY_N_SECS when used without THROTTLE_MSG and removed useless logging in another test: it seems the logging was added there just for debugging while implementing the test. I also took the liberty of updating corresponding messages and the logging frequency to be more appropriate. Otherwise, this patch doesn't contain any functional modifications. Change-Id: I1ef2952b86b15f15cd8cd806619e5113ad1c758f Reviewed-on: http://gerrit.cloudera.org:8080/20855 Tested-by: Kudu Jenkins Reviewed-by: Abhishek Chennaka <achenn...@cloudera.com> --- src/kudu/master/master_service.cc | 6 ++++-- src/kudu/master/ts_manager.cc | 5 +++-- src/kudu/rpc/proxy.cc | 2 +- src/kudu/rpc/server_negotiation.cc | 19 +++++++++++++------ src/kudu/security/token-test.cc | 8 +------- src/kudu/security/token_verifier.cc | 8 ++++++-- src/kudu/tablet/rowset_info.cc | 4 ++-- src/kudu/tablet/tablet_replica.cc | 11 +++++++---- src/kudu/thrift/sasl_client_transport.cc | 5 +++-- src/kudu/util/logging-test.cc | 24 ++++++++++++++++++++++++ src/kudu/util/maintenance_manager.cc | 6 +++--- 11 files changed, 67 insertions(+), 31 deletions(-) diff --git a/src/kudu/master/master_service.cc b/src/kudu/master/master_service.cc index b4bb01c99..54d5c21f7 100644 --- a/src/kudu/master/master_service.cc +++ b/src/kudu/master/master_service.cc @@ -881,8 +881,10 @@ void MasterServiceImpl::ConnectToMaster(const ConnectToMasterRequestPB* /*req*/, resp->mutable_error()->mutable_status()); resp->mutable_error()->set_code(MasterErrorPB::UNKNOWN_ERROR); rpc->RespondSuccess(); - KLOG_EVERY_N_SECS(WARNING, 60) << Substitute("invalid request from $0: $1", - rpc->requestor_string(), kErrMsg); + KLOG_EVERY_N_SECS(WARNING, 60) + << Substitute("invalid request from $0: $1", + rpc->requestor_string(), kErrMsg) + << THROTTLE_MSG; return; } diff --git a/src/kudu/master/ts_manager.cc b/src/kudu/master/ts_manager.cc index 13a81d289..aede186cb 100644 --- a/src/kudu/master/ts_manager.cc +++ b/src/kudu/master/ts_manager.cc @@ -25,6 +25,7 @@ #include <mutex> #include <optional> #include <ostream> +#include <type_traits> #include <utility> #include <gflags/gflags.h> @@ -155,7 +156,7 @@ Status TSManager::RegisterTS(const NodeInstancePB& instance, ? uuid : registration.rpc_addresses(0).host(); TRACE(Substitute("tablet server $0: assigning location", uuid)); string location_str; - const auto s = location_cache_->GetLocation(cmd_arg, &location_str); + auto s = location_cache_->GetLocation(cmd_arg, &location_str); TRACE(Substitute( "tablet server $0: assigned location '$1'", uuid, location_str)); @@ -166,7 +167,7 @@ Status TSManager::RegisterTS(const NodeInstancePB& instance, KLOG_EVERY_N_SECS(ERROR, 60) << Substitute( "Unable to assign location to tablet server $0: $1", Substitute("$0 ($1:$2)", uuid, addr.host(), addr.port()), - s.ToString()); + s.ToString()) << THROTTLE_MSG; return s; } location.emplace(std::move(location_str)); diff --git a/src/kudu/rpc/proxy.cc b/src/kudu/rpc/proxy.cc index b6b355960..2ec1244f0 100644 --- a/src/kudu/rpc/proxy.cc +++ b/src/kudu/rpc/proxy.cc @@ -238,7 +238,7 @@ void Proxy::AsyncRequest(const string& method, if (PREDICT_FALSE(!controller->status().ok())) { KLOG_EVERY_N_SECS(WARNING, 5) << Substitute("Call had error, refreshing address and retrying: $0", - controller->status().ToString()); + controller->status().ToString()) << THROTTLE_MSG; auto req_payload = controller->ReleaseRequestPayload(); controller->Reset(); RefreshDnsAndEnqueueRequest(method, std::move(req_payload), response, controller, callback); diff --git a/src/kudu/rpc/server_negotiation.cc b/src/kudu/rpc/server_negotiation.cc index 2f4481ee4..188cd0b35 100644 --- a/src/kudu/rpc/server_negotiation.cc +++ b/src/kudu/rpc/server_negotiation.cc @@ -74,6 +74,7 @@ using std::set; using std::string; using std::unique_ptr; using std::vector; +using strings::Substitute; // Fault injection flags. DEFINE_double(rpc_inject_invalid_authn_token_ratio, 0, @@ -504,12 +505,18 @@ Status ServerNegotiation::HandleNegotiate(const NegotiatePB& request) { authn_types.insert(AuthenticationType::JWT); } break; - case AuthenticationTypePB::TYPE_NOT_SET: { + case AuthenticationTypePB::TYPE_NOT_SET: + default: { Sockaddr addr; - RETURN_NOT_OK(socket_->GetPeerAddress(&addr)); + const auto s = socket_->GetPeerAddress(&addr); + WARN_NOT_OK(s, "unable to get peer address"); + constexpr const char* const kFormat = + "client at $0 supports unknown authentication type $1, consider updating server"; KLOG_EVERY_N_SECS(WARNING, 60) - << "client supports unknown authentication type, consider updating server, address: " - << addr.ToString(); + << Substitute(kFormat, + s.ok() ? addr.ToString() : "<unknown address>", + static_cast<uint32_t>(type.type_case())) + << THROTTLE_MSG; break; } } @@ -699,8 +706,8 @@ Status ServerNegotiation::AuthenticateBySasl(faststring* recv_buf) { // locally for the purposes of group mapping, ACLs, etc. string local_name; RETURN_NOT_OK_PREPEND(security::MapPrincipalToLocalName(principal, &local_name), - strings::Substitute("could not map krb5 principal '$0' to username", - principal)); + Substitute("could not map krb5 principal '$0' to username", + principal)); authenticated_user_.SetAuthenticatedByKerberos(std::move(local_name), std::move(principal)); } else { authenticated_user_.SetUnauthenticated(c_username); diff --git a/src/kudu/security/token-test.cc b/src/kudu/security/token-test.cc index 158cb91f5..34a6c1d87 100644 --- a/src/kudu/security/token-test.cc +++ b/src/kudu/security/token-test.cc @@ -21,6 +21,7 @@ #include <ostream> #include <string> #include <thread> +#include <type_traits> #include <utility> #include <vector> @@ -35,7 +36,6 @@ #include "kudu/security/token_signing_key.h" #include "kudu/security/token_verifier.h" #include "kudu/util/countdown_latch.h" -#include "kudu/util/logging.h" #include "kudu/util/monotime.h" #include "kudu/util/openssl_util.h" #include "kudu/util/pb_util.h" @@ -80,8 +80,6 @@ Status SignUntilRotatePast(TokenSigner* signer, TokenGenerator generate_token, auto cur_seq_num = seq_num; while (cur_seq_num == seq_num) { SleepFor(MonoDelta::FromMilliseconds(50)); - KLOG_EVERY_N_SECS(INFO, 1) << - Substitute("Generating $0 token for activity interval $1", token_type, seq_num); RETURN_NOT_OK_PREPEND(signer->TryRotateKey(), "Failed to attempt to rotate key"); SignedTokenPB signed_token; RETURN_NOT_OK_PREPEND(generate_token(&signed_token), @@ -816,18 +814,14 @@ TEST_F(TokenTest, TestKeyValidity) { const double key_validity_seconds = signer.key_validity_seconds_; threads.emplace_back([&first_tsk_validity_latch, key_validity_seconds] { SleepFor(MonoDelta::FromSeconds(key_validity_seconds)); - LOG(INFO) << Substitute("First TSK's validity interval of $0 secs has finished!", - key_validity_seconds); first_tsk_validity_latch.CountDown(); }); // Set up a second TSK so our threads can rotate TSKs when the time comes. while (true) { - KLOG_EVERY_N_SECS(INFO, 1) << "Waiting for a second key..."; unique_ptr<TokenSigningPrivateKey> tsk; ASSERT_OK(signer.CheckNeedKey(&tsk)); if (tsk) { - LOG(INFO) << "Added second key!"; ASSERT_OK(signer.AddKey(std::move(tsk))); break; } diff --git a/src/kudu/security/token_verifier.cc b/src/kudu/security/token_verifier.cc index 373c27258..155ecfa1c 100644 --- a/src/kudu/security/token_verifier.cc +++ b/src/kudu/security/token_verifier.cc @@ -28,6 +28,7 @@ #include <glog/logging.h> #include "kudu/gutil/map-util.h" +#include "kudu/gutil/strings/substitute.h" #include "kudu/gutil/walltime.h" #include "kudu/security/token.pb.h" #include "kudu/security/token_signing_key.h" @@ -40,6 +41,7 @@ using std::string; using std::transform; using std::unique_ptr; using std::vector; +using strings::Substitute; namespace kudu { namespace security { @@ -123,8 +125,10 @@ TokenVerificationResult TokenVerifier::VerifyTokenSignature( for (auto flag : token->incompatible_features()) { if (!TokenPB::Feature_IsValid(flag)) { - KLOG_EVERY_N_SECS(WARNING, 60) << "received token with unknown feature; " - "server needs to be updated"; + constexpr const char* const kFormat = + "received token with unknown feature $0; consider updating server"; + KLOG_EVERY_N_SECS(WARNING, 60) << Substitute( + kFormat, static_cast<uint32_t>(flag)) << THROTTLE_MSG; return TokenVerificationResult::INCOMPATIBLE_FEATURE; } } diff --git a/src/kudu/tablet/rowset_info.cc b/src/kudu/tablet/rowset_info.cc index 3abbfc208..770295401 100644 --- a/src/kudu/tablet/rowset_info.cc +++ b/src/kudu/tablet/rowset_info.cc @@ -298,9 +298,9 @@ void RowSetInfo::ComputeCdfAndCollectOrdered( if (rs->IsAvailableForCompaction()) { if (is_on_memory_budget && !(*is_on_memory_budget)(rs.get())) { // Skip rowsets filtered out by the memory budgeting. - KLOG_EVERY_N_SECS(INFO, 600) << Substitute( + KLOG_EVERY_N_SECS(INFO, 60) << Substitute( "$0 removed from compaction input due to memory constraints", - rs->ToString()); + rs->ToString()) << THROTTLE_MSG; continue; } available_rowsets.push_back(rs); diff --git a/src/kudu/tablet/tablet_replica.cc b/src/kudu/tablet/tablet_replica.cc index 0536580c0..ee2d99343 100644 --- a/src/kudu/tablet/tablet_replica.cc +++ b/src/kudu/tablet/tablet_replica.cc @@ -1184,8 +1184,10 @@ Status TabletReplica::TxnOpDispatcher::Dispatch( const auto txn_id = op->request()->txn_id(); std::lock_guard<simple_spinlock> guard(lock_); if (PREDICT_FALSE(unregistered_)) { - KLOG_EVERY_N_SECS(WARNING, 10) << Substitute( - "received request for unregistered TxnOpDispatcher (txn ID $0)", txn_id); + KLOG_EVERY_N_SECS(WARNING, 10) + << Substitute("received request for unregistered TxnOpDispatcher (txn ID $0)", + txn_id) + << THROTTLE_MSG; // TODO(aserbin): Status::ServiceUnavailable() is more appropriate here? return Status::IllegalState( "tablet replica could not accept txn write operation"); @@ -1293,8 +1295,9 @@ Status TabletReplica::TxnOpDispatcher::Submit() { void TabletReplica::TxnOpDispatcher::Cancel(const Status& status, TabletServerErrorPB::Code code) { CHECK(!status.ok()); - KLOG_EVERY_N_SECS(WARNING, 1) << Substitute("$0: cancelling pending write operations", - status.ToString()); + KLOG_EVERY_N_SECS(WARNING, 1) + << Substitute("$0: cancelling pending write operations", + status.ToString()) << THROTTLE_MSG; decltype(ops_queue_) ops; { std::lock_guard<simple_spinlock> guard(lock_); diff --git a/src/kudu/thrift/sasl_client_transport.cc b/src/kudu/thrift/sasl_client_transport.cc index a1766c38b..e0d5b4052 100644 --- a/src/kudu/thrift/sasl_client_transport.cc +++ b/src/kudu/thrift/sasl_client_transport.cc @@ -24,9 +24,9 @@ #include <memory> #include <ostream> #include <string> +#include <type_traits> #include <glog/logging.h> -#include <thrift/transport/TTransport.h> #include "kudu/gutil/endian.h" #include "kudu/gutil/port.h" @@ -159,7 +159,8 @@ void SaslClientTransport::ReadFrame() { if (payload_len > 1024 * 1024) { KLOG_EVERY_N_SECS(WARNING, 60) << "Received large Thrift SASL frame: " - << HumanReadableNumBytes::ToString(payload_len); + << HumanReadableNumBytes::ToString(payload_len) + << THROTTLE_MSG; if (payload_len > max_recv_buf_size_) { throw TTransportException(Substitute("Thrift SASL frame is too long: $0/$1", HumanReadableNumBytes::ToString(payload_len), diff --git a/src/kudu/util/logging-test.cc b/src/kudu/util/logging-test.cc index 6855a6d70..78994faf8 100644 --- a/src/kudu/util/logging-test.cc +++ b/src/kudu/util/logging-test.cc @@ -64,6 +64,30 @@ TEST(LoggingTest, TestThrottledLogging) { EXPECT_THAT(msgs[1], testing::ContainsRegex("\\[suppressed [0-9]{3,} similar messages\\]")); } +TEST(LoggingTest, ThrottledLoggingNoThrottleMsg) { + SKIP_IF_SLOW_NOT_ALLOWED(); + StringVectorSink sink; + ScopedRegisterSink srs(&sink); + + for (int i = 0; i < 10000; i++) { + KLOG_EVERY_N_SECS(INFO, 1) << "test"; + SleepFor(MonoDelta::FromMilliseconds(1)); + if (sink.logged_msgs().size() >= 2) { + break; + } + } + const vector<string>& msgs = sink.logged_msgs(); + ASSERT_GE(msgs.size(), 2); + + for (const auto& m: msgs) { + // All the lines should contain the message logged. + ASSERT_THAT(m, testing::ContainsRegex("test$")); + // Since the special THROTTLE_MSG isn't used, there isn't any report on + // suppressed messages. + ASSERT_STR_NOT_CONTAINS(m, "suppressed"); + } +} + // Test the KLOG_EVERY_N_SECS(...) macro with slow-paced messages, making sure // no messages are lost or suppressed if they come staggered by more than // the suppression time interval. diff --git a/src/kudu/util/maintenance_manager.cc b/src/kudu/util/maintenance_manager.cc index 898fb0e3f..5145cd929 100644 --- a/src/kudu/util/maintenance_manager.cc +++ b/src/kudu/util/maintenance_manager.cc @@ -335,15 +335,15 @@ void MaintenanceManager::RunSchedulerThread() { bool prev_iter_found_no_work = false; while (true) { - if (!FLAGS_enable_maintenance_manager) { + if (PREDICT_FALSE(!FLAGS_enable_maintenance_manager)) { { - std::unique_lock<Mutex> guard(lock_); + std::lock_guard<Mutex> guard(lock_); if (shutdown_) { VLOG_AND_TRACE_WITH_PREFIX("maintenance", 1) << "Shutting down maintenance manager."; return; } } - KLOG_EVERY_N_SECS(INFO, 1200) + KLOG_EVERY_N_SECS(INFO, 300) << "Maintenance manager is disabled (check --enable_maintenance_manager)."; SleepFor(polling_interval_); continue;