This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-4.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-4.1 by this push:
     new 9eac4a594c6 branch-4.1: [fix](s3) Avoid retrying object storage 
SlowDown errors #63776 (#64327)
9eac4a594c6 is described below

commit 9eac4a594c6a2a0de67c51fb8bb2a5d0102f6c1e
Author: github-actions[bot] 
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Wed Jun 10 14:08:37 2026 +0800

    branch-4.1: [fix](s3) Avoid retrying object storage SlowDown errors #63776 
(#64327)
    
    Cherry-picked from #63776
    
    Co-authored-by: Yixuan Wang <[email protected]>
---
 be/src/util/s3_util.cpp            |  3 +--
 cloud/src/common/config.h          |  2 ++
 cloud/src/recycler/s3_accessor.cpp |  8 +++++---
 common/cpp/obj_retry_strategy.cpp  | 10 ++++++++--
 common/cpp/obj_retry_strategy.h    |  7 +++++--
 5 files changed, 21 insertions(+), 9 deletions(-)

diff --git a/be/src/util/s3_util.cpp b/be/src/util/s3_util.cpp
index d977b0aca87..14915b1d19c 100644
--- a/be/src/util/s3_util.cpp
+++ b/be/src/util/s3_util.cpp
@@ -349,7 +349,6 @@ std::shared_ptr<io::ObjStorageClient> 
S3ClientFactory::_create_azure_client(
     }
 
     Azure::Storage::Blobs::BlobClientOptions options;
-    
options.Retry.StatusCodes.insert(Azure::Core::Http::HttpStatusCode::TooManyRequests);
     options.Retry.MaxRetries = config::max_s3_client_retry;
     
options.PerRetryPolicies.emplace_back(std::make_unique<AzureRetryRecordPolicy>());
     if (_ca_cert_file_path.empty()) {
@@ -526,7 +525,7 @@ std::shared_ptr<io::ObjStorageClient> 
S3ClientFactory::_create_s3_client(
     }
 
     aws_config.retryStrategy = std::make_shared<S3CustomRetryStrategy>(
-            config::max_s3_client_retry /*scaleFactor = 25*/);
+            config::max_s3_client_retry /*scaleFactor = 25*/, 
/*retry_slow_down=*/false);
 
     std::shared_ptr<Aws::S3::S3Client> new_client = 
std::make_shared<Aws::S3::S3Client>(
             get_aws_credentials_provider(s3_conf), std::move(aws_config),
diff --git a/cloud/src/common/config.h b/cloud/src/common/config.h
index 85aaaf6f0d2..ad430ede29b 100644
--- a/cloud/src/common/config.h
+++ b/cloud/src/common/config.h
@@ -314,6 +314,8 @@ CONF_Validator(s3_client_http_scheme, [](const std::string& 
config) -> bool {
 
 // Max retry times for object storage request
 CONF_mInt64(max_s3_client_retry, "10");
+// Whether to retry on S3 SlowDown (429/503) errors
+CONF_Bool(s3_client_retry_slow_down, "false");
 
 // Max byte getting delete bitmap can return, default is 1GB
 CONF_mInt64(max_get_delete_bitmap_byte, "1073741824");
diff --git a/cloud/src/recycler/s3_accessor.cpp 
b/cloud/src/recycler/s3_accessor.cpp
index 9d0094437eb..84b94a0803d 100644
--- a/cloud/src/recycler/s3_accessor.cpp
+++ b/cloud/src/recycler/s3_accessor.cpp
@@ -393,7 +393,9 @@ int S3Accessor::init() {
     case S3Conf::AZURE: {
 #ifdef USE_AZURE
         Azure::Storage::Blobs::BlobClientOptions options;
-        
options.Retry.StatusCodes.insert(Azure::Core::Http::HttpStatusCode::TooManyRequests);
+        if (config::s3_client_retry_slow_down) {
+            
options.Retry.StatusCodes.insert(Azure::Core::Http::HttpStatusCode::TooManyRequests);
+        }
         options.Retry.MaxRetries = config::max_s3_client_retry;
         auto cred =
                 
std::make_shared<Azure::Storage::StorageSharedKeyCredential>(conf_.ak, 
conf_.sk);
@@ -405,7 +407,7 @@ int S3Accessor::init() {
         // In Azure's HTTP requests, all policies in the vector are called in 
a chained manner following the HTTP pipeline approach.
         // Within the RetryPolicy, the nextPolicy is called multiple times 
inside a loop.
         // All policies in the PerRetryPolicies are downstream of the 
RetryPolicy.
-        // Therefore, you only need to add a policy to check if the response 
code is 429 and if the retry count meets the condition, it can record the retry 
count.
+        // Therefore, the policy can record retries after the RetryPolicy has 
handled the response.
         
options.PerRetryPolicies.emplace_back(std::make_unique<AzureRetryRecordPolicy>());
         auto container_client = 
std::make_shared<Azure::Storage::Blobs::BlobContainerClient>(
                 uri_, cred, std::move(options));
@@ -439,7 +441,7 @@ int S3Accessor::init() {
             aws_config.scheme = Aws::Http::Scheme::HTTP;
         }
         aws_config.retryStrategy = std::make_shared<S3CustomRetryStrategy>(
-                config::max_s3_client_retry /*scaleFactor = 25*/);
+                config::max_s3_client_retry, 
config::s3_client_retry_slow_down);
 
         if (_ca_cert_file_path.empty()) {
             _ca_cert_file_path =
diff --git a/common/cpp/obj_retry_strategy.cpp 
b/common/cpp/obj_retry_strategy.cpp
index 6da4c23980a..58ca47f6a68 100644
--- a/common/cpp/obj_retry_strategy.cpp
+++ b/common/cpp/obj_retry_strategy.cpp
@@ -25,7 +25,8 @@ namespace doris {
 
 bvar::Adder<int64_t> object_request_retry_count("object_request_retry_count");
 
-S3CustomRetryStrategy::S3CustomRetryStrategy(int maxRetries) : 
DefaultRetryStrategy(maxRetries) {}
+S3CustomRetryStrategy::S3CustomRetryStrategy(int maxRetries, bool 
retry_slow_down)
+        : DefaultRetryStrategy(maxRetries), _retry_slow_down(retry_slow_down) 
{}
 
 S3CustomRetryStrategy::~S3CustomRetryStrategy() = default;
 
@@ -35,6 +36,11 @@ bool S3CustomRetryStrategy::ShouldRetry(const 
Aws::Client::AWSError<Aws::Client:
         return false;
     }
 
+    if (!_retry_slow_down && error.GetExceptionName() == "SlowDown" &&
+        error.GetResponseCode() == 
Aws::Http::HttpResponseCode::SERVICE_UNAVAILABLE) {
+        return false;
+    }
+
     if (Aws::Http::IsRetryableHttpResponseCode(error.GetResponseCode()) || 
error.ShouldRetry()) {
         object_request_retry_count << 1;
         LOG(INFO) << "retry due to error: " << error << ", attempt: " << 
attemptedRetries + 1 << "/"
@@ -74,4 +80,4 @@ std::unique_ptr<AzureRetryRecordPolicy::HttpPolicy> 
AzureRetryRecordPolicy::Clon
     return std::make_unique<AzureRetryRecordPolicy>(*this);
 }
 #endif
-} // namespace doris
\ No newline at end of file
+} // namespace doris
diff --git a/common/cpp/obj_retry_strategy.h b/common/cpp/obj_retry_strategy.h
index dd98f871716..aaf35f6b100 100644
--- a/common/cpp/obj_retry_strategy.h
+++ b/common/cpp/obj_retry_strategy.h
@@ -27,11 +27,14 @@
 namespace doris {
 class S3CustomRetryStrategy final : public Aws::Client::DefaultRetryStrategy {
 public:
-    S3CustomRetryStrategy(int maxRetries);
+    S3CustomRetryStrategy(int maxRetries, bool retry_slow_down = true);
     ~S3CustomRetryStrategy() override;
 
     bool ShouldRetry(const Aws::Client::AWSError<Aws::Client::CoreErrors>& 
error,
                      long attemptedRetries) const override;
+
+private:
+    bool _retry_slow_down;
 };
 
 #ifdef USE_AZURE
@@ -47,4 +50,4 @@ public:
             Azure::Core::Context const& context) const override;
 };
 #endif
-} // namespace doris
\ No newline at end of file
+} // namespace doris


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to