This is an automated email from the ASF dual-hosted git repository. achennaka pushed a commit to branch branch-1.17.x in repository https://gitbox.apache.org/repos/asf/kudu.git
The following commit(s) were added to refs/heads/branch-1.17.x by this push: new ceaffc5f6 [clock] KUDU-3521 fix crash when clock is synchronized by PTPd ceaffc5f6 is described below commit ceaffc5f6f50745f8eaf687668d3b4ac767eea76 Author: Alexey Serbin <ale...@apache.org> AuthorDate: Wed Oct 25 16:38:21 2023 -0700 [clock] KUDU-3521 fix crash when clock is synchronized by PTPd There was an earlier attempt to address the issue [1], but the fix hasn't received +2 since there was not enough evidence behind the root cause analysis. With the report on #kudu-general Slack channel [2], from the analysis of the code [3] it's easy to see there isn't any other way to get such a manifestation of the issue but a negative value for the 'maxerror' field of the 'timex' structure returned by the ntp_adjtime()/adjtimex() system call. The essence of the problem is that in Kudu the maximum error is supposed to be a non-negative number. This patch addresses the issue. [1] https://gerrit.cloudera.org/#/c/12149/ [2] https://getkudu.slack.com/archives/C0CPXJ3CH/p1698246065354269 [3] https://github.com/apache/kudu/blob/04fdbd0974f4418295d57c0daa4b67de3e777a43/src/kudu/clock/hybrid_clock.cc#L627-L706 Change-Id: Ibbe1a50c4857b9742d2ffde35440d0dee082edc0 Reviewed-on: http://gerrit.cloudera.org:8080/20626 Tested-by: Kudu Jenkins Reviewed-by: Yingchun Lai <laiyingc...@apache.org> Reviewed-by: Abhishek Chennaka <achenn...@cloudera.com> (cherry picked from commit 4859d290277bf36f0bd84891c4764194c2cf9521) Reviewed-on: http://gerrit.cloudera.org:8080/20644 --- src/kudu/clock/builtin_ntp.cc | 12 ++++++------ src/kudu/clock/hybrid_clock.cc | 27 +++++++++++++++++++-------- src/kudu/clock/system_ntp.cc | 3 ++- 3 files changed, 27 insertions(+), 15 deletions(-) diff --git a/src/kudu/clock/builtin_ntp.cc b/src/kudu/clock/builtin_ntp.cc index f5f416e82..348a85e8b 100644 --- a/src/kudu/clock/builtin_ntp.cc +++ b/src/kudu/clock/builtin_ntp.cc @@ -24,7 +24,7 @@ #include <cerrno> #include <cstdint> -#include <cstring> +#include <cstdlib> #include <deque> #include <functional> #include <limits> @@ -569,19 +569,19 @@ Status BuiltInNtp::WalltimeWithError(uint64_t* now_usec, uint64_t* error_usec) { const auto mono = GetMonoTimeMicrosRaw(); DCHECK_GE(mono, last.mono); - const int64_t delta_t = mono - last.mono; + const int64_t delta = mono - last.mono; if (PREDICT_FALSE(MonoDelta::FromSeconds( FLAGS_builtin_ntp_true_time_refresh_max_interval_s) < - MonoDelta::FromMicroseconds(delta_t))) { + MonoDelta::FromMicroseconds(delta))) { return Status::ServiceUnavailable(Substitute( "$0: too long after last true time refresh", - MonoDelta::FromMicroseconds(delta_t).ToString())); + MonoDelta::FromMicroseconds(delta).ToString())); } // TODO(KUDU-2940): apply measured local clock skew against the true time // clock when computing projected wallclock reading and error - *now_usec = delta_t + last.wall; - *error_usec = last.error + delta_t * kSkewPpm / 1e6; + *now_usec = delta + last.wall; + *error_usec = std::abs(last.error + (delta * kSkewPpm / 1000000)); return Status::OK(); } diff --git a/src/kudu/clock/hybrid_clock.cc b/src/kudu/clock/hybrid_clock.cc index 820d93c4e..f9c0a7816 100644 --- a/src/kudu/clock/hybrid_clock.cc +++ b/src/kudu/clock/hybrid_clock.cc @@ -672,9 +672,9 @@ Status HybridClock::InitWithTimeSource(TimeSource time_source) { } Status HybridClock::WalltimeWithError(uint64_t* now_usec, uint64_t* error_usec) { - auto read_time_before = MonoTime::Now(); + const auto read_time_before = MonoTime::Now(); Status s = time_service_->WalltimeWithError(now_usec, error_usec); - auto read_time_after = MonoTime::Now(); + const auto read_time_after = MonoTime::Now(); bool is_extrapolated = false; if (PREDICT_TRUE(s.ok())) { @@ -697,8 +697,15 @@ Status HybridClock::WalltimeWithError(uint64_t* now_usec, uint64_t* error_usec) // The max likelihood estimate is that 'B' corresponds to the average of 'A' // and 'C'. Then we need to add in this uncertainty (half of C - A) into any // future clock readings that we extrapolate from this estimate. - int64_t read_duration_us = (read_time_after - read_time_before).ToMicroseconds(); - int64_t read_time_error_us = read_duration_us / 2; + const int64_t read_duration_us = (read_time_after - read_time_before).ToMicroseconds(); + + // Per manual page of clock_gettime(CLOCK_MONOTONIC) [1], 'read_duration_us' + // is guaranteed to be a non-negative number. Just out of paranoia, + // transform it into 0 if it turns to be a negative number. + // + // [1] https://man7.org/linux/man-pages/man3/clock_gettime.3.html + const uint64_t read_time_error_us = + (read_duration_us > 0) ? read_duration_us / 2 : 0; MonoTime read_time_max_likelihood = read_time_before + MonoDelta::FromMicroseconds(read_time_error_us); @@ -724,12 +731,16 @@ Status HybridClock::WalltimeWithError(uint64_t* now_usec, uint64_t* error_usec) is_extrapolating_ = true; extrapolating_->set_value(is_extrapolating_); } - if (!last_clock_read_time_.Initialized()) { + if (PREDICT_FALSE(!last_clock_read_time_.Initialized())) { RETURN_NOT_OK_PREPEND(s, "could not read system time source"); } MonoDelta time_since_last_read = read_time_after - last_clock_read_time_; - int64_t micros_since_last_read = time_since_last_read.ToMicroseconds(); - int64_t accum_error_us = (micros_since_last_read * time_service_->skew_ppm()) / 1000000; + const int64_t micros_since_last_read = time_since_last_read.ToMicroseconds(); + if (PREDICT_FALSE(micros_since_last_read < 0)) { + RETURN_NOT_OK_PREPEND(s, "inconsistent readings of the monotonic clock"); + } + const uint64_t accum_error_us = + std::abs(micros_since_last_read * time_service_->skew_ppm()) / 1000000; *now_usec = last_clock_read_physical_ + micros_since_last_read; *error_usec = last_clock_read_error_ + accum_error_us; is_extrapolated = true; @@ -749,7 +760,7 @@ Status HybridClock::WalltimeWithError(uint64_t* now_usec, uint64_t* error_usec) *error_usec, is_extrapolated ? "unsynchronized" : "synchronized")); } - return kudu::Status::OK(); + return Status::OK(); } // Used to get the timestamp for metrics. diff --git a/src/kudu/clock/system_ntp.cc b/src/kudu/clock/system_ntp.cc index 4c2debbc7..af28d7c99 100644 --- a/src/kudu/clock/system_ntp.cc +++ b/src/kudu/clock/system_ntp.cc @@ -21,6 +21,7 @@ #include <sys/timex.h> #include <cerrno> +#include <cstdlib> #include <functional> #include <limits> #include <ostream> @@ -170,7 +171,7 @@ Status SystemNtp::WalltimeWithError(uint64_t* now_usec, uint64_t* error_usec) { uint64_t now = static_cast<uint64_t>(t.time.tv_sec) * 1000000 + t.time.tv_usec; #endif - *error_usec = t.maxerror; + *error_usec = std::abs(t.maxerror); *now_usec = now; return Status::OK(); }