This is an automated email from the ASF dual-hosted git repository. alexey pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/kudu.git
The following commit(s) were added to refs/heads/master by this push: new 60a5cb66a [ranger] enhance the robustness of key retrieval 60a5cb66a is described below commit 60a5cb66a986b3a6901b7737253c10094fe6318b Author: kedeng <kdeng...@gmail.com> AuthorDate: Wed Aug 14 17:10:49 2024 +0800 [ranger] enhance the robustness of key retrieval In real-world scenarios where encryption keys are generated using Ranger, we might encounter the following error when starting the cluster: 'master_main.cc:42] Remote error: RunMasterServer() failed: Could not create new FS layout: unable to create instance metadata: failed to generate server key: HTTP 403'. This error can be resolved by simply restarting without making any changes. Upon investigation, it was found that the keys requested from Ranger have an effective period of 30 seconds after adding a new user, as referenced in [1]. To enhance the robustness of the Kudu code, I have added a retry mechanism for key retrieval in this patch to mitigate the impact of Ranger user activation periods on the startup process. Since only retry logic was added, no new unit tests were introduced. However, I still verified the success rate of the new patch in a real-world installation, and it reached 100%, which is a significant improvement compared to the previous 50%. [1]https://github.com/apache/ranger/blob/4e365456f6533ee5515c5070c92e355198922c81/agents-common/src/main/java/org/apache/ranger/plugin/util/PolicyRefresher.java#L92 Change-Id: I1fd3263ad6ba6d8e444036bb7d2158986098cb4b Reviewed-on: http://gerrit.cloudera.org:8080/21673 Reviewed-by: Alexey Serbin <ale...@apache.org> Tested-by: KeDeng <kdeng...@gmail.com> --- src/kudu/ranger-kms/ranger_kms_client.cc | 48 +++++++++++++++++++++++++++++++- 1 file changed, 47 insertions(+), 1 deletion(-) diff --git a/src/kudu/ranger-kms/ranger_kms_client.cc b/src/kudu/ranger-kms/ranger_kms_client.cc index 79cd44c17..3bfd87d2d 100644 --- a/src/kudu/ranger-kms/ranger_kms_client.cc +++ b/src/kudu/ranger-kms/ranger_kms_client.cc @@ -17,18 +17,34 @@ #include "kudu/ranger-kms/ranger_kms_client.h" +#include <ostream> #include <string> #include <vector> +#include <gflags/gflags.h> #include <glog/logging.h> #include <rapidjson/document.h> +#include "kudu/gutil/port.h" #include "kudu/gutil/strings/escaping.h" #include "kudu/gutil/strings/substitute.h" #include "kudu/util/curl_util.h" #include "kudu/util/easy_json.h" #include "kudu/util/faststring.h" #include "kudu/util/jsonreader.h" +#include "kudu/util/monotime.h" + +// We should set a value greater than the Apache Ranger to ensure the +// robustness of the key generation. +// The default value of Apache Ranger we can get from [1]. +// +// [1]https://github.com/apache/ranger/blob/4e365456f6533ee5515c5070c92e355198922c81/agents-common/src/main/java/org/apache/ranger/plugin/util/PolicyRefresher.java#L92 +DEFINE_int32(ranger_kms_client_generate_key_max_retry_time_s, 40, + "The maximum retry time for generating encryption keys using " + "the Ranger KMS client. The maximum effective time for adding " + "a new account to Apache Ranger is about 30 seconds, and the retry " + "time for using the client to generate the key should be greater " + "than this value."); using rapidjson::Value; using std::string; @@ -91,8 +107,38 @@ Status RangerKMSClient::GenerateEncryptionKeyFromKMS(const string& key_name, urls.emplace_back(Substitute("$0/v1/key/$1/_eek?eek_op=generate&num_keys=1", url, key_name)); } + faststring resp; - RETURN_NOT_OK_PREPEND(curl.FetchURL(urls, &resp), "failed to generate encryption key"); + const MonoTime deadline = MonoTime::Now() + + MonoDelta::FromSeconds(FLAGS_ranger_kms_client_generate_key_max_retry_time_s); + Status s; + int backoff_ms = 300; + constexpr const char* const kErrorMsg = "Failed to generate server key."; + + do { + s = curl.FetchURL(urls, &resp); + if (s.ok()) { + break; + } + + LOG(WARNING) << kErrorMsg << " Status: " << s.ToString(); + if (MonoTime::Now() >= deadline) { + // Timeout + break; + } + SleepFor(MonoDelta::FromMilliseconds(backoff_ms)); + + backoff_ms += 300; + // Sleep for a maximum of 1800 milliseconds. + if (backoff_ms > 1800) { + backoff_ms = 1800; + } + } while (true); + + if (PREDICT_FALSE(!s.ok())) { + LOG_AND_RETURN(ERROR, s.CloneAndPrepend(kErrorMsg)); + } + JsonReader r(resp.ToString()); RETURN_NOT_OK(r.Init()); vector<const Value*> keys;