This is an automated email from the ASF dual-hosted git repository.
kirs pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new ebdf4e53edb [feat](be) Enhance CA cert troubleshooting in Azure object
storage client (#61102)
ebdf4e53edb is described below
commit ebdf4e53edb14d40b7890ee772025c17dbec091b
Author: Calvin Kirs <[email protected]>
AuthorDate: Wed Mar 11 14:30:37 2026 +0800
[feat](be) Enhance CA cert troubleshooting in Azure object storage client
(#61102)
### Problem Summary:
1. Azure Blob requests can fail with TLS/CA verification errors, but
current error messages do not provide enough CA-related debug context.
2. Azure client creation does not explicitly bind Doris-selected CA file
to curl transport, which can make CA source ambiguous across
environments.
### What is changed
1. In Azure client factory, resolve CA file from
`config::ca_cert_file_paths` and set `CurlTransportOptions::CAInfo`.
2. Build a `tls_debug(...)` context at client creation time, including:
- configured CA paths,
- selected CA file, - whether selected CA exists/readable, - SSL-related
env vars (`SSL_CERT_FILE`, `CURL_CA_BUNDLE`, `SSL_CERT_DIR`).
3. Pass TLS debug context into `AzureObjStorageClient`.
4. Append TLS debug suffix only when error message matches TLS CA
failure patterns.
5. Add unit tests for helper logic in
`azure_obj_storage_client_test.cpp`:
- `detects_tls_ca_error`
- `appends_debug_suffix_only_for_tls_ca_error`
``` W20260306 16:38:44.997861 86429 file_reader.cpp:36]
[INTERNAL_ERROR]Azure request failed because Fail to get a new connection for:
https://<azure-storage-endpoint>.blob.core.windows.net. Problem with the
SSL CA cert (path? access rights?), error msg , http code 0, path msg bucket
<conta
iner-name>, key <object-key>, prefix , path ,
tls_debug(ca_cert_file_paths='<ca-cert-path>',
selected_ca_file='<ca-cert-path>', selected_ca_exists=true, sel
ected_ca_readable=true, SSL_CERT_FILE='', CURL_CA_BUNDLE='',
SSL_CERT_DIR='')failed to read
W20260306 16:38:44.997881 86429 internal_service.cpp:888] failed to init
reader, errmsg=```
[INTERNAL_ERROR]Azure request failed because Fail to get a new conne
ction for: https://<azure-storage-endpoint>.blob.core.windows.net.
Problem with the SSL CA cert (path? access rights?), error msg , http code 0,
path msg bu
cket <container-name>, key <object-key>, prefix , path ,
tls_debug(ca_cert_file_paths='<ca-cert-path>',
selected_ca_file='<ca-cert-path>', selected_ca_exist
s=true, selected_ca_readable=true, SSL_CERT_FILE='', CURL_CA_BUNDLE='',
SSL_CERT_DIR='')failed to read
W20260306 16:38:51.028247 85476 pipeline_fragment_context.cpp:1813]
Failed to send report for query <query-id>, fragment 0: [E-241]
ERROR 1105 (HY000): errCode = 2, detailMessage = Can not build
FunctionGenTable 's3'. error: errCode = 2, detailMessage =
(<backend-ip>)[INTERNAL_ERROR]Azure
request failed because Fail to get a new connection for:
https://<azure-storage-endpoint>.blob.core.windows.net. Problem with the SSL CA
cert (path? access
rights?), error msg , http code 0, path msg bucket <container-name>, key
<object-key>, prefix , path , tls_debug(ca_cert_file_paths='<ca-cert-path>',
select
ed_ca_file='<ca-cert-path>', selected_ca_exists=true,
selected_ca_readable=true, SSL_CERT_FILE='', CURL_CA_BUNDLE='',
SSL_CERT_DIR='')failed to read
`
---
be/src/io/fs/azure_obj_storage_client.cpp | 79 ++++++++++++++++++-------
be/src/io/fs/azure_obj_storage_client.h | 15 ++++-
be/src/util/s3_util.cpp | 42 ++++++++++++-
be/test/io/fs/azure_obj_storage_client_test.cpp | 23 ++++++-
4 files changed, 133 insertions(+), 26 deletions(-)
diff --git a/be/src/io/fs/azure_obj_storage_client.cpp
b/be/src/io/fs/azure_obj_storage_client.cpp
index 026d5b56b93..53ea5837f66 100644
--- a/be/src/io/fs/azure_obj_storage_client.cpp
+++ b/be/src/io/fs/azure_obj_storage_client.cpp
@@ -33,10 +33,12 @@
#include <azure/storage/common/account_sas_builder.hpp>
#include <azure/storage/common/storage_credential.hpp>
#include <azure/storage/common/storage_exception.hpp>
+#include <cctype>
#include <chrono>
#include <exception>
#include <iterator>
#include <ranges>
+#include <string_view>
#include "common/exception.h"
#include "common/logging.h"
@@ -54,6 +56,13 @@ std::string wrap_object_storage_path_msg(const
doris::io::ObjectStoragePathOptio
opts.path.native());
}
+std::string to_lower_ascii(std::string_view input) {
+ std::string lowered(input);
+ std::transform(lowered.begin(), lowered.end(), lowered.begin(),
+ [](unsigned char ch) { return
static_cast<char>(std::tolower(ch)); });
+ return lowered;
+}
+
auto base64_encode_part_num(int part_num) {
uint8_t buf[4];
doris::encode_fixed32_le(buf, static_cast<uint32_t>(part_num));
@@ -93,22 +102,42 @@ namespace doris::io {
// > Each batch request supports a maximum of 256 subrequests.
constexpr size_t BlobBatchMaxOperations = 256;
+bool is_azure_tls_ca_error_message(std::string_view message) {
+ std::string lower = to_lower_ascii(message);
+ return lower.find("ssl ca cert") != std::string::npos ||
+ lower.find("peer failed verification") != std::string::npos ||
+ lower.find("unable to get local issuer certificate") !=
std::string::npos ||
+ lower.find("problem with the ssl ca cert") != std::string::npos;
+}
+
+std::string build_azure_tls_debug_suffix(std::string_view error_message,
+ std::string_view tls_debug_context) {
+ if (tls_debug_context.empty() ||
!is_azure_tls_ca_error_message(error_message)) {
+ return "";
+ }
+ return fmt::format(", {}", tls_debug_context);
+}
+
template <typename Func>
-ObjectStorageResponse do_azure_client_call(Func f, const
ObjectStoragePathOptions& opts) {
+ObjectStorageResponse do_azure_client_call(Func f, const
ObjectStoragePathOptions& opts,
+ std::string_view tls_debug_context)
{
try {
f();
} catch (Azure::Core::RequestFailedException& e) {
+ auto tls_debug_suffix = build_azure_tls_debug_suffix(
+ fmt::format("{} {}", e.what(), e.Message), tls_debug_context);
auto msg = fmt::format(
- "Azure request failed because {}, error msg {}, http code {},
path msg {}",
+ "Azure request failed because {}, error msg {}, http code {},
path msg {}{}",
e.what(), e.Message, static_cast<int>(e.StatusCode),
- wrap_object_storage_path_msg(opts));
+ wrap_object_storage_path_msg(opts), tls_debug_suffix);
LOG_WARNING(msg);
return {.status =
convert_to_obj_response(Status::InternalError<false>(std::move(msg))),
.http_code = static_cast<int>(e.StatusCode),
.request_id = std::move(e.RequestId)};
} catch (std::exception& e) {
- auto msg = fmt::format("Azure request failed because {}, path msg {}",
e.what(),
- wrap_object_storage_path_msg(opts));
+ auto msg = fmt::format("Azure request failed because {}, path msg
{}{}", e.what(),
+ wrap_object_storage_path_msg(opts),
+ build_azure_tls_debug_suffix(e.what(),
tls_debug_context));
LOG_WARNING(msg);
return {.status =
convert_to_obj_response(Status::InternalError<false>(std::move(msg)))};
}
@@ -116,8 +145,12 @@ ObjectStorageResponse do_azure_client_call(Func f, const
ObjectStoragePathOption
}
struct AzureBatchDeleter {
- AzureBatchDeleter(BlobContainerClient* client, const
ObjectStoragePathOptions& opts)
- : _client(client), _batch(client->CreateBatch()), _opts(opts) {}
+ AzureBatchDeleter(BlobContainerClient* client, const
ObjectStoragePathOptions& opts,
+ std::string_view tls_debug_context)
+ : _client(client),
+ _batch(client->CreateBatch()),
+ _opts(opts),
+ _tls_debug_context(tls_debug_context) {}
// Submit one blob to be deleted in `AzureBatchDeleter::execute`
void delete_blob(const std::string& blob_name) {
deferred_resps.emplace_back(_batch.DeleteBlob(blob_name));
@@ -133,7 +166,7 @@ struct AzureBatchDeleter {
_client->SubmitBatch(_batch);
});
},
- _opts);
+ _opts, _tls_debug_context);
if (resp.status.code != ErrorCode::OK) {
return resp;
}
@@ -154,9 +187,12 @@ struct AzureBatchDeleter {
continue;
}
auto msg = fmt::format(
- "Azure request failed because {}, error msg {}, http
code {}, path msg {}",
+ "Azure request failed because {}, error msg {}, http
code {}, path msg "
+ "{}{}",
e.what(), e.Message, static_cast<int>(e.StatusCode),
- wrap_object_storage_path_msg(_opts));
+ wrap_object_storage_path_msg(_opts),
+ build_azure_tls_debug_suffix(fmt::format("{} {}",
e.what(), e.Message),
+ _tls_debug_context));
LOG_WARNING(msg);
return {.status = convert_to_obj_response(
Status::InternalError<false>(std::move(msg))),
@@ -172,6 +208,7 @@ private:
BlobContainerClient* _client;
BlobContainerBatch _batch;
const ObjectStoragePathOptions& _opts;
+ std::string_view _tls_debug_context;
std::vector<Azure::Storage::DeferredResponse<Models::DeleteBlobResult>>
deferred_resps;
};
@@ -194,7 +231,7 @@ ObjectStorageResponse
AzureObjStorageClient::put_object(const ObjectStoragePathO
stream.size());
});
},
- opts);
+ opts, _tls_debug_context);
}
ObjectStorageUploadResponse AzureObjStorageClient::upload_part(const
ObjectStoragePathOptions& opts,
@@ -211,7 +248,7 @@ ObjectStorageUploadResponse
AzureObjStorageClient::upload_part(const ObjectStora
client.StageBlock(base64_encode_part_num(part_num),
memory_body);
});
},
- opts);
+ opts, _tls_debug_context);
return ObjectStorageUploadResponse {
.resp = resp,
};
@@ -232,7 +269,7 @@ ObjectStorageResponse
AzureObjStorageClient::complete_multipart_upload(
client.CommitBlockList(string_block_ids);
});
},
- opts);
+ opts, _tls_debug_context);
}
ObjectStorageHeadResponse AzureObjStorageClient::head_object(const
ObjectStoragePathOptions& opts) {
@@ -244,7 +281,7 @@ ObjectStorageHeadResponse
AzureObjStorageClient::head_object(const ObjectStorage
return
_client->GetBlockBlobClient(opts.key).GetProperties().Value;
});
},
- opts);
+ opts, _tls_debug_context);
if (resp.http_code ==
static_cast<int>(Azure::Core::Http::HttpStatusCode::NotFound)) {
return ObjectStorageHeadResponse {
.resp = {.status = convert_to_obj_response(
@@ -275,7 +312,7 @@ ObjectStorageResponse
AzureObjStorageClient::get_object(const ObjectStoragePathO
});
*size_return = resp.Value.ContentRange.Length.Value();
},
- opts);
+ opts, _tls_debug_context);
}
ObjectStorageResponse AzureObjStorageClient::list_objects(const
ObjectStoragePathOptions& opts,
@@ -304,7 +341,7 @@ ObjectStorageResponse
AzureObjStorageClient::list_objects(const ObjectStoragePat
get_file_file(resp);
}
},
- opts);
+ opts, _tls_debug_context);
}
// As Azure's doc said, the batch size is 256
@@ -318,7 +355,7 @@ ObjectStorageResponse
AzureObjStorageClient::delete_objects(const ObjectStorageP
auto end = std::end(objs);
while (begin != end) {
- auto deleter = AzureBatchDeleter(_client.get(), opts);
+ auto deleter = AzureBatchDeleter(_client.get(), opts,
_tls_debug_context);
auto chunk_end = begin;
std::advance(chunk_end, std::min(BlobBatchMaxOperations,
static_cast<size_t>(std::distance(begin, end))));
@@ -344,7 +381,7 @@ ObjectStorageResponse
AzureObjStorageClient::delete_object(const ObjectStoragePa
throw Exception(Status::IOError<false>("Delete azure blob
failed"));
}
},
- opts);
+ opts, _tls_debug_context);
}
ObjectStorageResponse AzureObjStorageClient::delete_objects_recursively(
@@ -353,7 +390,7 @@ ObjectStorageResponse
AzureObjStorageClient::delete_objects_recursively(
list_opts.Prefix = opts.prefix;
list_opts.PageSizeHint = BlobBatchMaxOperations;
auto delete_func = [&](const std::vector<Models::BlobItem>& blobs) ->
ObjectStorageResponse {
- auto deleter = AzureBatchDeleter(_client.get(), opts);
+ auto deleter = AzureBatchDeleter(_client.get(), opts,
_tls_debug_context);
auto batch = _client->CreateBatch();
for (auto&& blob_item : blobs) {
deleter.delete_blob(blob_item.Name);
@@ -372,7 +409,7 @@ ObjectStorageResponse
AzureObjStorageClient::delete_objects_recursively(
return _client->ListBlobs(list_opts);
});
},
- opts);
+ opts, _tls_debug_context);
if (list_resp.status.code != ErrorCode::OK) {
return list_resp;
}
@@ -390,7 +427,7 @@ ObjectStorageResponse
AzureObjStorageClient::delete_objects_recursively(
return _client->ListBlobs(list_opts);
});
},
- opts);
+ opts, _tls_debug_context);
if (list_resp.status.code != ErrorCode::OK) {
return list_resp;
}
diff --git a/be/src/io/fs/azure_obj_storage_client.h
b/be/src/io/fs/azure_obj_storage_client.h
index ec1a75997b0..7d1cecc502e 100644
--- a/be/src/io/fs/azure_obj_storage_client.h
+++ b/be/src/io/fs/azure_obj_storage_client.h
@@ -17,6 +17,9 @@
#pragma once
+#include <string>
+#include <string_view>
+
#include "io/fs/obj_storage_client.h"
namespace Azure::Storage::Blobs {
@@ -27,10 +30,15 @@ namespace doris::io {
class ObjClientHolder;
+bool is_azure_tls_ca_error_message(std::string_view message);
+std::string build_azure_tls_debug_suffix(std::string_view error_message,
+ std::string_view tls_debug_context);
+
class AzureObjStorageClient final : public ObjStorageClient {
public:
-
AzureObjStorageClient(std::shared_ptr<Azure::Storage::Blobs::BlobContainerClient>
client)
- : _client(std::move(client)) {}
+
AzureObjStorageClient(std::shared_ptr<Azure::Storage::Blobs::BlobContainerClient>
client,
+ std::string tls_debug_context = {})
+ : _client(std::move(client)),
_tls_debug_context(std::move(tls_debug_context)) {}
~AzureObjStorageClient() override = default;
ObjectStorageUploadResponse create_multipart_upload(
const ObjectStoragePathOptions& opts) override;
@@ -56,6 +64,7 @@ public:
private:
std::shared_ptr<Azure::Storage::Blobs::BlobContainerClient> _client;
+ std::string _tls_debug_context;
};
-} // namespace doris::io
\ No newline at end of file
+} // namespace doris::io
diff --git a/be/src/util/s3_util.cpp b/be/src/util/s3_util.cpp
index 6e5da0d9a09..3b22ebcbdfa 100644
--- a/be/src/util/s3_util.cpp
+++ b/be/src/util/s3_util.cpp
@@ -38,10 +38,12 @@
#ifdef USE_AZURE
#include <azure/core/diagnostics/logger.hpp>
+#include <azure/core/http/curl_transport.hpp>
#include <azure/storage/blobs/blob_container_client.hpp>
#endif
#include <cstdlib>
#include <filesystem>
+#include <fstream>
#include <functional>
#include <memory>
#include <ostream>
@@ -110,6 +112,33 @@ bool to_int(std::string_view str, int& res) {
return ec == std::errc {};
}
+#ifdef USE_AZURE
+std::string env_or_empty(const char* env_name) {
+ if (const char* value = std::getenv(env_name); value != nullptr) {
+ return value;
+ }
+ return "";
+}
+
+std::string build_azure_tls_debug_context(const std::string& selected_ca_file)
{
+ bool selected_ca_exists = false;
+ bool selected_ca_readable = false;
+ if (!selected_ca_file.empty()) {
+ std::error_code ec;
+ selected_ca_exists = std::filesystem::exists(selected_ca_file, ec) &&
!ec;
+ std::ifstream input(selected_ca_file);
+ selected_ca_readable = input.good();
+ }
+
+ return fmt::format(
+ "tls_debug(ca_cert_file_paths='{}', selected_ca_file='{}',
selected_ca_exists={}, "
+ "selected_ca_readable={}, SSL_CERT_FILE='{}', CURL_CA_BUNDLE='{}',
SSL_CERT_DIR='{}')",
+ config::ca_cert_file_paths, selected_ca_file, selected_ca_exists,
selected_ca_readable,
+ env_or_empty("SSL_CERT_FILE"), env_or_empty("CURL_CA_BUNDLE"),
+ env_or_empty("SSL_CERT_DIR"));
+}
+#endif
+
constexpr char USE_PATH_STYLE[] = "use_path_style";
constexpr char AZURE_PROVIDER_STRING[] = "AZURE";
@@ -328,14 +357,25 @@ std::shared_ptr<io::ObjStorageClient>
S3ClientFactory::_create_azure_client(
options.Retry.StatusCodes.insert(Azure::Core::Http::HttpStatusCode::TooManyRequests);
options.Retry.MaxRetries = config::max_s3_client_retry;
options.PerRetryPolicies.emplace_back(std::make_unique<AzureRetryRecordPolicy>());
+ if (_ca_cert_file_path.empty()) {
+ _ca_cert_file_path =
get_valid_ca_cert_path(doris::split(config::ca_cert_file_paths, ";"));
+ }
+ if (!_ca_cert_file_path.empty()) {
+ Azure::Core::Http::CurlTransportOptions curl_options;
+ curl_options.CAInfo = _ca_cert_file_path;
+ options.Transport.Transport =
+
std::make_shared<Azure::Core::Http::CurlTransport>(std::move(curl_options));
+ }
std::string normalized_uri = normalize_http_uri(uri);
VLOG_DEBUG << "uri:" << uri << ", normalized_uri:" << normalized_uri;
+ std::string tls_debug_context =
build_azure_tls_debug_context(_ca_cert_file_path);
auto containerClient =
std::make_shared<Azure::Storage::Blobs::BlobContainerClient>(
uri, cred, std::move(options));
LOG_INFO("create one azure client with {}", s3_conf.to_string());
- return
std::make_shared<io::AzureObjStorageClient>(std::move(containerClient));
+ return
std::make_shared<io::AzureObjStorageClient>(std::move(containerClient),
+
std::move(tls_debug_context));
#else
LOG_FATAL("BE is not compiled with azure support, export BUILD_AZURE=ON
before building");
return nullptr;
diff --git a/be/test/io/fs/azure_obj_storage_client_test.cpp
b/be/test/io/fs/azure_obj_storage_client_test.cpp
index 02e78e8b499..7591b4bf2ea 100644
--- a/be/test/io/fs/azure_obj_storage_client_test.cpp
+++ b/be/test/io/fs/azure_obj_storage_client_test.cpp
@@ -36,6 +36,27 @@ namespace doris {
using namespace Azure::Storage::Blobs;
+TEST(AzureObjStorageClientTlsHelperTest, detects_tls_ca_error) {
+ EXPECT_TRUE(io::is_azure_tls_ca_error_message(
+ "Problem with the SSL CA cert (path? access rights?)"));
+ EXPECT_TRUE(io::is_azure_tls_ca_error_message(
+ "curl error: peer failed verification for cert chain"));
+ EXPECT_TRUE(io::is_azure_tls_ca_error_message("unable to get local issuer
certificate"));
+ EXPECT_FALSE(io::is_azure_tls_ca_error_message("AuthenticationFailed"));
+}
+
+TEST(AzureObjStorageClientTlsHelperTest,
appends_debug_suffix_only_for_tls_ca_error) {
+ std::string_view debug_ctx =
"tls_debug(selected_ca_file='/etc/ssl/certs/ca-bundle.crt')";
+
+ EXPECT_EQ(io::build_azure_tls_debug_suffix(
+ "Problem with the SSL CA cert (path? access rights?)",
debug_ctx),
+ ", tls_debug(selected_ca_file='/etc/ssl/certs/ca-bundle.crt')");
+ EXPECT_EQ(io::build_azure_tls_debug_suffix("AuthenticationFailed",
debug_ctx), "");
+ EXPECT_EQ(io::build_azure_tls_debug_suffix(
+ "Problem with the SSL CA cert (path? access rights?)",
""),
+ "");
+}
+
class AzureObjStorageClientTest : public testing::Test {
protected:
static std::shared_ptr<io::ObjStorageClient> obj_storage_client;
@@ -148,4 +169,4 @@ TEST_F(AzureObjStorageClientTest, dummy_test) {
#endif // #ifdef USE_AZURE
-} // namespace doris
\ No newline at end of file
+} // namespace doris
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]