This is an automated email from the ASF dual-hosted git repository.

kirs pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new ebdf4e53edb [feat](be) Enhance CA cert troubleshooting in Azure object 
storage client (#61102)
ebdf4e53edb is described below

commit ebdf4e53edb14d40b7890ee772025c17dbec091b
Author: Calvin Kirs <[email protected]>
AuthorDate: Wed Mar 11 14:30:37 2026 +0800

    [feat](be) Enhance CA cert troubleshooting in Azure object storage client 
(#61102)
    
    ### Problem Summary:
    1. Azure Blob requests can fail with TLS/CA verification errors, but
    current error messages do not provide enough CA-related debug context.
    2. Azure client creation does not explicitly bind Doris-selected CA file
    to curl transport, which can make CA source ambiguous across
    environments.
    
      ### What is changed
    1. In Azure client factory, resolve CA file from
    `config::ca_cert_file_paths` and set `CurlTransportOptions::CAInfo`.
    2. Build a `tls_debug(...)` context at client creation time, including:
         - configured CA paths,
    - selected CA file, - whether selected CA exists/readable, - SSL-related
    env vars (`SSL_CERT_FILE`, `CURL_CA_BUNDLE`, `SSL_CERT_DIR`).
      3. Pass TLS debug context into `AzureObjStorageClient`.
    4. Append TLS debug suffix only when error message matches TLS CA
    failure patterns.
    5. Add unit tests for helper logic in
    `azure_obj_storage_client_test.cpp`:
         - `detects_tls_ca_error`
         - `appends_debug_suffix_only_for_tls_ca_error`
    
    ``` W20260306 16:38:44.997861 86429 file_reader.cpp:36] 
[INTERNAL_ERROR]Azure request failed because Fail to get a new connection for:
      https://<azure-storage-endpoint>.blob.core.windows.net. Problem with the 
SSL CA cert (path? access rights?), error msg , http code 0, path msg bucket 
<conta
      iner-name>, key <object-key>, prefix , path , 
tls_debug(ca_cert_file_paths='<ca-cert-path>', 
selected_ca_file='<ca-cert-path>', selected_ca_exists=true, sel
      ected_ca_readable=true, SSL_CERT_FILE='', CURL_CA_BUNDLE='', 
SSL_CERT_DIR='')failed to read
    
      W20260306 16:38:44.997881 86429 internal_service.cpp:888] failed to init 
reader, errmsg=```
    [INTERNAL_ERROR]Azure request failed because Fail to get a new conne
      ction for: https://<azure-storage-endpoint>.blob.core.windows.net. 
Problem with the SSL CA cert (path? access rights?), error msg , http code 0, 
path msg bu
      cket <container-name>, key <object-key>, prefix , path , 
tls_debug(ca_cert_file_paths='<ca-cert-path>', 
selected_ca_file='<ca-cert-path>', selected_ca_exist
      s=true, selected_ca_readable=true, SSL_CERT_FILE='', CURL_CA_BUNDLE='', 
SSL_CERT_DIR='')failed to read
    
      W20260306 16:38:51.028247 85476 pipeline_fragment_context.cpp:1813] 
Failed to send report for query <query-id>, fragment 0: [E-241]
    
      ERROR 1105 (HY000): errCode = 2, detailMessage = Can not build 
FunctionGenTable 's3'. error: errCode = 2, detailMessage = 
(<backend-ip>)[INTERNAL_ERROR]Azure
      request failed because Fail to get a new connection for: 
https://<azure-storage-endpoint>.blob.core.windows.net. Problem with the SSL CA 
cert (path? access
      rights?), error msg , http code 0, path msg bucket <container-name>, key 
<object-key>, prefix , path , tls_debug(ca_cert_file_paths='<ca-cert-path>', 
select
      ed_ca_file='<ca-cert-path>', selected_ca_exists=true, 
selected_ca_readable=true, SSL_CERT_FILE='', CURL_CA_BUNDLE='', 
SSL_CERT_DIR='')failed to read
    `
---
 be/src/io/fs/azure_obj_storage_client.cpp       | 79 ++++++++++++++++++-------
 be/src/io/fs/azure_obj_storage_client.h         | 15 ++++-
 be/src/util/s3_util.cpp                         | 42 ++++++++++++-
 be/test/io/fs/azure_obj_storage_client_test.cpp | 23 ++++++-
 4 files changed, 133 insertions(+), 26 deletions(-)

diff --git a/be/src/io/fs/azure_obj_storage_client.cpp 
b/be/src/io/fs/azure_obj_storage_client.cpp
index 026d5b56b93..53ea5837f66 100644
--- a/be/src/io/fs/azure_obj_storage_client.cpp
+++ b/be/src/io/fs/azure_obj_storage_client.cpp
@@ -33,10 +33,12 @@
 #include <azure/storage/common/account_sas_builder.hpp>
 #include <azure/storage/common/storage_credential.hpp>
 #include <azure/storage/common/storage_exception.hpp>
+#include <cctype>
 #include <chrono>
 #include <exception>
 #include <iterator>
 #include <ranges>
+#include <string_view>
 
 #include "common/exception.h"
 #include "common/logging.h"
@@ -54,6 +56,13 @@ std::string wrap_object_storage_path_msg(const 
doris::io::ObjectStoragePathOptio
                        opts.path.native());
 }
 
+std::string to_lower_ascii(std::string_view input) {
+    std::string lowered(input);
+    std::transform(lowered.begin(), lowered.end(), lowered.begin(),
+                   [](unsigned char ch) { return 
static_cast<char>(std::tolower(ch)); });
+    return lowered;
+}
+
 auto base64_encode_part_num(int part_num) {
     uint8_t buf[4];
     doris::encode_fixed32_le(buf, static_cast<uint32_t>(part_num));
@@ -93,22 +102,42 @@ namespace doris::io {
 // > Each batch request supports a maximum of 256 subrequests.
 constexpr size_t BlobBatchMaxOperations = 256;
 
+bool is_azure_tls_ca_error_message(std::string_view message) {
+    std::string lower = to_lower_ascii(message);
+    return lower.find("ssl ca cert") != std::string::npos ||
+           lower.find("peer failed verification") != std::string::npos ||
+           lower.find("unable to get local issuer certificate") != 
std::string::npos ||
+           lower.find("problem with the ssl ca cert") != std::string::npos;
+}
+
+std::string build_azure_tls_debug_suffix(std::string_view error_message,
+                                         std::string_view tls_debug_context) {
+    if (tls_debug_context.empty() || 
!is_azure_tls_ca_error_message(error_message)) {
+        return "";
+    }
+    return fmt::format(", {}", tls_debug_context);
+}
+
 template <typename Func>
-ObjectStorageResponse do_azure_client_call(Func f, const 
ObjectStoragePathOptions& opts) {
+ObjectStorageResponse do_azure_client_call(Func f, const 
ObjectStoragePathOptions& opts,
+                                           std::string_view tls_debug_context) 
{
     try {
         f();
     } catch (Azure::Core::RequestFailedException& e) {
+        auto tls_debug_suffix = build_azure_tls_debug_suffix(
+                fmt::format("{} {}", e.what(), e.Message), tls_debug_context);
         auto msg = fmt::format(
-                "Azure request failed because {}, error msg {}, http code {}, 
path msg {}",
+                "Azure request failed because {}, error msg {}, http code {}, 
path msg {}{}",
                 e.what(), e.Message, static_cast<int>(e.StatusCode),
-                wrap_object_storage_path_msg(opts));
+                wrap_object_storage_path_msg(opts), tls_debug_suffix);
         LOG_WARNING(msg);
         return {.status = 
convert_to_obj_response(Status::InternalError<false>(std::move(msg))),
                 .http_code = static_cast<int>(e.StatusCode),
                 .request_id = std::move(e.RequestId)};
     } catch (std::exception& e) {
-        auto msg = fmt::format("Azure request failed because {}, path msg {}", 
e.what(),
-                               wrap_object_storage_path_msg(opts));
+        auto msg = fmt::format("Azure request failed because {}, path msg 
{}{}", e.what(),
+                               wrap_object_storage_path_msg(opts),
+                               build_azure_tls_debug_suffix(e.what(), 
tls_debug_context));
         LOG_WARNING(msg);
         return {.status = 
convert_to_obj_response(Status::InternalError<false>(std::move(msg)))};
     }
@@ -116,8 +145,12 @@ ObjectStorageResponse do_azure_client_call(Func f, const 
ObjectStoragePathOption
 }
 
 struct AzureBatchDeleter {
-    AzureBatchDeleter(BlobContainerClient* client, const 
ObjectStoragePathOptions& opts)
-            : _client(client), _batch(client->CreateBatch()), _opts(opts) {}
+    AzureBatchDeleter(BlobContainerClient* client, const 
ObjectStoragePathOptions& opts,
+                      std::string_view tls_debug_context)
+            : _client(client),
+              _batch(client->CreateBatch()),
+              _opts(opts),
+              _tls_debug_context(tls_debug_context) {}
     // Submit one blob to be deleted in `AzureBatchDeleter::execute`
     void delete_blob(const std::string& blob_name) {
         deferred_resps.emplace_back(_batch.DeleteBlob(blob_name));
@@ -133,7 +166,7 @@ struct AzureBatchDeleter {
                         _client->SubmitBatch(_batch);
                     });
                 },
-                _opts);
+                _opts, _tls_debug_context);
         if (resp.status.code != ErrorCode::OK) {
             return resp;
         }
@@ -154,9 +187,12 @@ struct AzureBatchDeleter {
                     continue;
                 }
                 auto msg = fmt::format(
-                        "Azure request failed because {}, error msg {}, http 
code {}, path msg {}",
+                        "Azure request failed because {}, error msg {}, http 
code {}, path msg "
+                        "{}{}",
                         e.what(), e.Message, static_cast<int>(e.StatusCode),
-                        wrap_object_storage_path_msg(_opts));
+                        wrap_object_storage_path_msg(_opts),
+                        build_azure_tls_debug_suffix(fmt::format("{} {}", 
e.what(), e.Message),
+                                                     _tls_debug_context));
                 LOG_WARNING(msg);
                 return {.status = convert_to_obj_response(
                                 Status::InternalError<false>(std::move(msg))),
@@ -172,6 +208,7 @@ private:
     BlobContainerClient* _client;
     BlobContainerBatch _batch;
     const ObjectStoragePathOptions& _opts;
+    std::string_view _tls_debug_context;
     std::vector<Azure::Storage::DeferredResponse<Models::DeleteBlobResult>> 
deferred_resps;
 };
 
@@ -194,7 +231,7 @@ ObjectStorageResponse 
AzureObjStorageClient::put_object(const ObjectStoragePathO
                                       stream.size());
                 });
             },
-            opts);
+            opts, _tls_debug_context);
 }
 
 ObjectStorageUploadResponse AzureObjStorageClient::upload_part(const 
ObjectStoragePathOptions& opts,
@@ -211,7 +248,7 @@ ObjectStorageUploadResponse 
AzureObjStorageClient::upload_part(const ObjectStora
                     client.StageBlock(base64_encode_part_num(part_num), 
memory_body);
                 });
             },
-            opts);
+            opts, _tls_debug_context);
     return ObjectStorageUploadResponse {
             .resp = resp,
     };
@@ -232,7 +269,7 @@ ObjectStorageResponse 
AzureObjStorageClient::complete_multipart_upload(
                     client.CommitBlockList(string_block_ids);
                 });
             },
-            opts);
+            opts, _tls_debug_context);
 }
 
 ObjectStorageHeadResponse AzureObjStorageClient::head_object(const 
ObjectStoragePathOptions& opts) {
@@ -244,7 +281,7 @@ ObjectStorageHeadResponse 
AzureObjStorageClient::head_object(const ObjectStorage
                     return 
_client->GetBlockBlobClient(opts.key).GetProperties().Value;
                 });
             },
-            opts);
+            opts, _tls_debug_context);
     if (resp.http_code == 
static_cast<int>(Azure::Core::Http::HttpStatusCode::NotFound)) {
         return ObjectStorageHeadResponse {
                 .resp = {.status = convert_to_obj_response(
@@ -275,7 +312,7 @@ ObjectStorageResponse 
AzureObjStorageClient::get_object(const ObjectStoragePathO
                 });
                 *size_return = resp.Value.ContentRange.Length.Value();
             },
-            opts);
+            opts, _tls_debug_context);
 }
 
 ObjectStorageResponse AzureObjStorageClient::list_objects(const 
ObjectStoragePathOptions& opts,
@@ -304,7 +341,7 @@ ObjectStorageResponse 
AzureObjStorageClient::list_objects(const ObjectStoragePat
                     get_file_file(resp);
                 }
             },
-            opts);
+            opts, _tls_debug_context);
 }
 
 // As Azure's doc said, the batch size is 256
@@ -318,7 +355,7 @@ ObjectStorageResponse 
AzureObjStorageClient::delete_objects(const ObjectStorageP
     auto end = std::end(objs);
 
     while (begin != end) {
-        auto deleter = AzureBatchDeleter(_client.get(), opts);
+        auto deleter = AzureBatchDeleter(_client.get(), opts, 
_tls_debug_context);
         auto chunk_end = begin;
         std::advance(chunk_end, std::min(BlobBatchMaxOperations,
                                          
static_cast<size_t>(std::distance(begin, end))));
@@ -344,7 +381,7 @@ ObjectStorageResponse 
AzureObjStorageClient::delete_object(const ObjectStoragePa
                     throw Exception(Status::IOError<false>("Delete azure blob 
failed"));
                 }
             },
-            opts);
+            opts, _tls_debug_context);
 }
 
 ObjectStorageResponse AzureObjStorageClient::delete_objects_recursively(
@@ -353,7 +390,7 @@ ObjectStorageResponse 
AzureObjStorageClient::delete_objects_recursively(
     list_opts.Prefix = opts.prefix;
     list_opts.PageSizeHint = BlobBatchMaxOperations;
     auto delete_func = [&](const std::vector<Models::BlobItem>& blobs) -> 
ObjectStorageResponse {
-        auto deleter = AzureBatchDeleter(_client.get(), opts);
+        auto deleter = AzureBatchDeleter(_client.get(), opts, 
_tls_debug_context);
         auto batch = _client->CreateBatch();
         for (auto&& blob_item : blobs) {
             deleter.delete_blob(blob_item.Name);
@@ -372,7 +409,7 @@ ObjectStorageResponse 
AzureObjStorageClient::delete_objects_recursively(
                     return _client->ListBlobs(list_opts);
                 });
             },
-            opts);
+            opts, _tls_debug_context);
     if (list_resp.status.code != ErrorCode::OK) {
         return list_resp;
     }
@@ -390,7 +427,7 @@ ObjectStorageResponse 
AzureObjStorageClient::delete_objects_recursively(
                         return _client->ListBlobs(list_opts);
                     });
                 },
-                opts);
+                opts, _tls_debug_context);
         if (list_resp.status.code != ErrorCode::OK) {
             return list_resp;
         }
diff --git a/be/src/io/fs/azure_obj_storage_client.h 
b/be/src/io/fs/azure_obj_storage_client.h
index ec1a75997b0..7d1cecc502e 100644
--- a/be/src/io/fs/azure_obj_storage_client.h
+++ b/be/src/io/fs/azure_obj_storage_client.h
@@ -17,6 +17,9 @@
 
 #pragma once
 
+#include <string>
+#include <string_view>
+
 #include "io/fs/obj_storage_client.h"
 
 namespace Azure::Storage::Blobs {
@@ -27,10 +30,15 @@ namespace doris::io {
 
 class ObjClientHolder;
 
+bool is_azure_tls_ca_error_message(std::string_view message);
+std::string build_azure_tls_debug_suffix(std::string_view error_message,
+                                         std::string_view tls_debug_context);
+
 class AzureObjStorageClient final : public ObjStorageClient {
 public:
-    
AzureObjStorageClient(std::shared_ptr<Azure::Storage::Blobs::BlobContainerClient>
 client)
-            : _client(std::move(client)) {}
+    
AzureObjStorageClient(std::shared_ptr<Azure::Storage::Blobs::BlobContainerClient>
 client,
+                          std::string tls_debug_context = {})
+            : _client(std::move(client)), 
_tls_debug_context(std::move(tls_debug_context)) {}
     ~AzureObjStorageClient() override = default;
     ObjectStorageUploadResponse create_multipart_upload(
             const ObjectStoragePathOptions& opts) override;
@@ -56,6 +64,7 @@ public:
 
 private:
     std::shared_ptr<Azure::Storage::Blobs::BlobContainerClient> _client;
+    std::string _tls_debug_context;
 };
 
-} // namespace doris::io
\ No newline at end of file
+} // namespace doris::io
diff --git a/be/src/util/s3_util.cpp b/be/src/util/s3_util.cpp
index 6e5da0d9a09..3b22ebcbdfa 100644
--- a/be/src/util/s3_util.cpp
+++ b/be/src/util/s3_util.cpp
@@ -38,10 +38,12 @@
 
 #ifdef USE_AZURE
 #include <azure/core/diagnostics/logger.hpp>
+#include <azure/core/http/curl_transport.hpp>
 #include <azure/storage/blobs/blob_container_client.hpp>
 #endif
 #include <cstdlib>
 #include <filesystem>
+#include <fstream>
 #include <functional>
 #include <memory>
 #include <ostream>
@@ -110,6 +112,33 @@ bool to_int(std::string_view str, int& res) {
     return ec == std::errc {};
 }
 
+#ifdef USE_AZURE
+std::string env_or_empty(const char* env_name) {
+    if (const char* value = std::getenv(env_name); value != nullptr) {
+        return value;
+    }
+    return "";
+}
+
+std::string build_azure_tls_debug_context(const std::string& selected_ca_file) 
{
+    bool selected_ca_exists = false;
+    bool selected_ca_readable = false;
+    if (!selected_ca_file.empty()) {
+        std::error_code ec;
+        selected_ca_exists = std::filesystem::exists(selected_ca_file, ec) && 
!ec;
+        std::ifstream input(selected_ca_file);
+        selected_ca_readable = input.good();
+    }
+
+    return fmt::format(
+            "tls_debug(ca_cert_file_paths='{}', selected_ca_file='{}', 
selected_ca_exists={}, "
+            "selected_ca_readable={}, SSL_CERT_FILE='{}', CURL_CA_BUNDLE='{}', 
SSL_CERT_DIR='{}')",
+            config::ca_cert_file_paths, selected_ca_file, selected_ca_exists, 
selected_ca_readable,
+            env_or_empty("SSL_CERT_FILE"), env_or_empty("CURL_CA_BUNDLE"),
+            env_or_empty("SSL_CERT_DIR"));
+}
+#endif
+
 constexpr char USE_PATH_STYLE[] = "use_path_style";
 
 constexpr char AZURE_PROVIDER_STRING[] = "AZURE";
@@ -328,14 +357,25 @@ std::shared_ptr<io::ObjStorageClient> 
S3ClientFactory::_create_azure_client(
     
options.Retry.StatusCodes.insert(Azure::Core::Http::HttpStatusCode::TooManyRequests);
     options.Retry.MaxRetries = config::max_s3_client_retry;
     
options.PerRetryPolicies.emplace_back(std::make_unique<AzureRetryRecordPolicy>());
+    if (_ca_cert_file_path.empty()) {
+        _ca_cert_file_path = 
get_valid_ca_cert_path(doris::split(config::ca_cert_file_paths, ";"));
+    }
+    if (!_ca_cert_file_path.empty()) {
+        Azure::Core::Http::CurlTransportOptions curl_options;
+        curl_options.CAInfo = _ca_cert_file_path;
+        options.Transport.Transport =
+                
std::make_shared<Azure::Core::Http::CurlTransport>(std::move(curl_options));
+    }
 
     std::string normalized_uri = normalize_http_uri(uri);
     VLOG_DEBUG << "uri:" << uri << ", normalized_uri:" << normalized_uri;
+    std::string tls_debug_context = 
build_azure_tls_debug_context(_ca_cert_file_path);
 
     auto containerClient = 
std::make_shared<Azure::Storage::Blobs::BlobContainerClient>(
             uri, cred, std::move(options));
     LOG_INFO("create one azure client with {}", s3_conf.to_string());
-    return 
std::make_shared<io::AzureObjStorageClient>(std::move(containerClient));
+    return 
std::make_shared<io::AzureObjStorageClient>(std::move(containerClient),
+                                                       
std::move(tls_debug_context));
 #else
     LOG_FATAL("BE is not compiled with azure support, export BUILD_AZURE=ON 
before building");
     return nullptr;
diff --git a/be/test/io/fs/azure_obj_storage_client_test.cpp 
b/be/test/io/fs/azure_obj_storage_client_test.cpp
index 02e78e8b499..7591b4bf2ea 100644
--- a/be/test/io/fs/azure_obj_storage_client_test.cpp
+++ b/be/test/io/fs/azure_obj_storage_client_test.cpp
@@ -36,6 +36,27 @@ namespace doris {
 
 using namespace Azure::Storage::Blobs;
 
+TEST(AzureObjStorageClientTlsHelperTest, detects_tls_ca_error) {
+    EXPECT_TRUE(io::is_azure_tls_ca_error_message(
+            "Problem with the SSL CA cert (path? access rights?)"));
+    EXPECT_TRUE(io::is_azure_tls_ca_error_message(
+            "curl error: peer failed verification for cert chain"));
+    EXPECT_TRUE(io::is_azure_tls_ca_error_message("unable to get local issuer 
certificate"));
+    EXPECT_FALSE(io::is_azure_tls_ca_error_message("AuthenticationFailed"));
+}
+
+TEST(AzureObjStorageClientTlsHelperTest, 
appends_debug_suffix_only_for_tls_ca_error) {
+    std::string_view debug_ctx = 
"tls_debug(selected_ca_file='/etc/ssl/certs/ca-bundle.crt')";
+
+    EXPECT_EQ(io::build_azure_tls_debug_suffix(
+                      "Problem with the SSL CA cert (path? access rights?)", 
debug_ctx),
+              ", tls_debug(selected_ca_file='/etc/ssl/certs/ca-bundle.crt')");
+    EXPECT_EQ(io::build_azure_tls_debug_suffix("AuthenticationFailed", 
debug_ctx), "");
+    EXPECT_EQ(io::build_azure_tls_debug_suffix(
+                      "Problem with the SSL CA cert (path? access rights?)", 
""),
+              "");
+}
+
 class AzureObjStorageClientTest : public testing::Test {
 protected:
     static std::shared_ptr<io::ObjStorageClient> obj_storage_client;
@@ -148,4 +169,4 @@ TEST_F(AzureObjStorageClientTest, dummy_test) {
 
 #endif // #ifdef USE_AZURE
 
-} // namespace doris
\ No newline at end of file
+} // namespace doris


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to