This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new ad551b0e3a6 branch-3.0: [fix](s3) support chinacloudapi endpoint for 
azure #47703 (#48642)
ad551b0e3a6 is described below

commit ad551b0e3a6cb230ba45e0ffd9bc77c7ed7a4257
Author: github-actions[bot] 
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Mon Mar 10 20:51:04 2025 +0800

    branch-3.0: [fix](s3) support chinacloudapi endpoint for azure #47703 
(#48642)
    
    Cherry-picked from #47703
    
    Co-authored-by: Kaijie Chen <[email protected]>
---
 be/src/common/config.cpp                           |  2 +
 be/src/common/config.h                             |  2 +
 be/src/io/fs/azure_obj_storage_client.cpp          | 14 ++++--
 be/src/util/s3_util.cpp                            | 11 ++++-
 cloud/src/common/config.h                          |  2 +
 cloud/src/recycler/s3_accessor.cpp                 | 10 +++-
 .../main/java/org/apache/doris/common/Config.java  |  9 ++++
 .../apache/doris/cloud/storage/AzureRemote.java    | 15 +++---
 .../property/constants/AzureProperties.java        | 17 +++++--
 .../org/apache/doris/fs/obj/AzureObjStorage.java   | 15 +++---
 .../doris/tablefunction/S3TableValuedFunction.java | 17 +++----
 .../property/constants/AzurePropertiesTest.java    | 55 ++++++++++++++++++++++
 12 files changed, 134 insertions(+), 35 deletions(-)

diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp
index bdcadef7820..0ac96160f62 100644
--- a/be/src/common/config.cpp
+++ b/be/src/common/config.cpp
@@ -1306,6 +1306,8 @@ DEFINE_Int32(spill_io_thread_pool_queue_size, "102400");
 
 DEFINE_mBool(check_segment_when_build_rowset_meta, "false");
 
+DEFINE_mBool(force_azure_blob_global_endpoint, "false");
+
 DEFINE_mInt32(max_s3_client_retry, "10");
 DEFINE_mInt32(s3_read_base_wait_time_ms, "100");
 DEFINE_mInt32(s3_read_max_wait_time_ms, "800");
diff --git a/be/src/common/config.h b/be/src/common/config.h
index 2edf21b17d0..79a8db32a8a 100644
--- a/be/src/common/config.h
+++ b/be/src/common/config.h
@@ -1382,6 +1382,8 @@ DECLARE_mBool(check_segment_when_build_rowset_meta);
 
 DECLARE_Int32(num_query_ctx_map_partitions);
 
+DECLARE_mBool(force_azure_blob_global_endpoint);
+
 DECLARE_mBool(enable_s3_rate_limiter);
 DECLARE_mInt64(s3_get_bucket_tokens);
 DECLARE_mInt64(s3_get_token_per_second);
diff --git a/be/src/io/fs/azure_obj_storage_client.cpp 
b/be/src/io/fs/azure_obj_storage_client.cpp
index bf2e370da6f..ee4b8f7ac89 100644
--- a/be/src/io/fs/azure_obj_storage_client.cpp
+++ b/be/src/io/fs/azure_obj_storage_client.cpp
@@ -80,7 +80,7 @@ auto s3_put_rate_limit(Func callback) -> decltype(callback()) 
{
     return s3_rate_limit(doris::S3RateLimitType::PUT, std::move(callback));
 }
 
-constexpr char SAS_TOKEN_URL_TEMPLATE[] = 
"https://{}.blob.core.windows.net/{}/{}{}";;
+constexpr char SAS_TOKEN_URL_TEMPLATE[] = "{}/{}/{}{}";
 constexpr char BlobNotFound[] = "BlobNotFound";
 } // namespace
 
@@ -416,6 +416,14 @@ std::string 
AzureObjStorageClient::generate_presigned_url(const ObjectStoragePat
     std::string sasToken = sas_builder.GenerateSasToken(
             Azure::Storage::StorageSharedKeyCredential(conf.ak, conf.sk));
 
-    return fmt::format(SAS_TOKEN_URL_TEMPLATE, conf.ak, conf.bucket, opts.key, 
sasToken);
+    std::string endpoint = conf.endpoint;
+    if (doris::config::force_azure_blob_global_endpoint) {
+        endpoint = fmt::format("https://{}.blob.core.windows.net";, conf.ak);
+    }
+    auto sasURL = fmt::format(SAS_TOKEN_URL_TEMPLATE, endpoint, conf.bucket, 
opts.key, sasToken);
+    if (sasURL.find("://") == std::string::npos) {
+        sasURL = "https://"; + sasURL;
+    }
+    return sasURL;
 }
-} // namespace doris::io
\ No newline at end of file
+} // namespace doris::io
diff --git a/be/src/util/s3_util.cpp b/be/src/util/s3_util.cpp
index dece1074e60..c5937d7659f 100644
--- a/be/src/util/s3_util.cpp
+++ b/be/src/util/s3_util.cpp
@@ -254,8 +254,15 @@ std::shared_ptr<io::ObjStorageClient> 
S3ClientFactory::_create_azure_client(
             
std::make_shared<Azure::Storage::StorageSharedKeyCredential>(s3_conf.ak, 
s3_conf.sk);
 
     const std::string container_name = s3_conf.bucket;
-    const std::string uri =
-            fmt::format("{}://{}.blob.core.windows.net/{}", "https", 
s3_conf.ak, container_name);
+    std::string uri;
+    if (config::force_azure_blob_global_endpoint) {
+        uri = fmt::format("https://{}.blob.core.windows.net/{}";, s3_conf.ak, 
container_name);
+    } else {
+        uri = fmt::format("{}/{}", s3_conf.endpoint, container_name);
+        if (s3_conf.endpoint.find("://") == std::string::npos) {
+            uri = "https://"; + uri;
+        }
+    }
 
     auto containerClient = 
std::make_shared<Azure::Storage::Blobs::BlobContainerClient>(uri, cred);
     LOG_INFO("create one azure client with {}", s3_conf.to_string());
diff --git a/cloud/src/common/config.h b/cloud/src/common/config.h
index fbae4dbeddf..fd79bff8e16 100644
--- a/cloud/src/common/config.h
+++ b/cloud/src/common/config.h
@@ -228,6 +228,8 @@ CONF_Validator(s3_client_http_scheme, [](const std::string& 
config) -> bool {
     return config == "http" || config == "https";
 });
 
+CONF_Bool(force_azure_blob_global_endpoint, "false");
+
 // Max retry times for object storage request
 CONF_mInt64(max_s3_client_retry, "10");
 
diff --git a/cloud/src/recycler/s3_accessor.cpp 
b/cloud/src/recycler/s3_accessor.cpp
index 5356ddf38af..baf69df57e3 100644
--- a/cloud/src/recycler/s3_accessor.cpp
+++ b/cloud/src/recycler/s3_accessor.cpp
@@ -255,8 +255,14 @@ int S3Accessor::init() {
         options.Retry.MaxRetries = config::max_s3_client_retry;
         auto cred =
                 
std::make_shared<Azure::Storage::StorageSharedKeyCredential>(conf_.ak, 
conf_.sk);
-        uri_ = fmt::format("{}://{}.blob.core.windows.net/{}", 
config::s3_client_http_scheme,
-                           conf_.ak, conf_.bucket);
+        if (config::force_azure_blob_global_endpoint) {
+            uri_ = fmt::format("https://{}.blob.core.windows.net/{}";, 
conf_.ak, conf_.bucket);
+        } else {
+            uri_ = fmt::format("{}/{}", conf_.endpoint, conf_.bucket);
+            if (uri_.find("://") == std::string::npos) {
+                uri_ = "https://"; + uri_;
+            }
+        }
         // In Azure's HTTP requests, all policies in the vector are called in 
a chained manner following the HTTP pipeline approach.
         // Within the RetryPolicy, the nextPolicy is called multiple times 
inside a loop.
         // All policies in the PerRetryPolicies are downstream of the 
RetryPolicy.
diff --git a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java 
b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
index cda1b914041..90a1951fc45 100644
--- a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
+++ b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
@@ -3138,6 +3138,15 @@ public class Config extends ConfigBase {
             + "for example: s3_load_endpoint_white_list=a,b,c"})
     public static String[] s3_load_endpoint_white_list = {};
 
+    @ConfField(mutable = true, description = {
+            "此参数控制是否强制使用 Azure global endpoint。默认值为 false,系统将使用用户指定的 endpoint。"
+            + "如果设置为 true,系统将强制使用 {account}.blob.core.windows.net。",
+            "This parameter controls whether to force the use of the Azure 
global endpoint. "
+            + "The default is false, meaning the system will use the 
user-specified endpoint. "
+            + "If set to true, the system will force the use of 
{account}.blob.core.windows.net."
+    })
+    public static boolean force_azure_blob_global_endpoint = false;
+
     @ConfField(mutable = true, description = {"指定Jdbc driver url白名单, 举例: 
jdbc_driver_url_white_list=a,b,c",
             "the white list for jdbc driver url, if it is empty, no white list 
will be set"
             + "for example: jdbc_driver_url_white_list=a,b,c"
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/cloud/storage/AzureRemote.java 
b/fe/fe-core/src/main/java/org/apache/doris/cloud/storage/AzureRemote.java
index 62026b47b87..86d5610bb0e 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/cloud/storage/AzureRemote.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/cloud/storage/AzureRemote.java
@@ -18,6 +18,7 @@
 package org.apache.doris.cloud.storage;
 
 import org.apache.doris.common.DdlException;
+import org.apache.doris.datasource.property.constants.AzureProperties;
 
 import com.azure.core.credential.AccessToken;
 import com.azure.core.credential.TokenCredential;
@@ -58,8 +59,6 @@ public class AzureRemote extends RemoteBase {
 
     private static final Logger LOG = LogManager.getLogger(AzureRemote.class);
 
-    private static final String URI_TEMPLATE = 
"https://%s.blob.core.windows.net/%s";;
-
     private BlobContainerClient client;
 
     public AzureRemote(ObjectInfo obj) {
@@ -72,8 +71,8 @@ public class AzureRemote extends RemoteBase {
             BlobContainerClientBuilder builder = new 
BlobContainerClientBuilder();
             builder.credential(new StorageSharedKeyCredential(obj.getAk(), 
obj.getSk()));
             String containerName = obj.getBucket();
-            String uri = String.format(URI_TEMPLATE, obj.getAk(),
-                    containerName);
+            String endpoint = 
AzureProperties.formatAzureEndpoint(obj.getEndpoint(), obj.getAk());
+            String uri = endpoint + "/" + containerName;
             builder.endpoint(uri);
             BlobContainerClient containerClient = builder.buildClient();
 
@@ -134,8 +133,8 @@ public class AzureRemote extends RemoteBase {
             BlobContainerClientBuilder builder = new 
BlobContainerClientBuilder();
             builder.credential(new StorageSharedKeyCredential(obj.getAk(), 
obj.getSk()));
             String containerName = obj.getBucket();
-            String uri = String.format(URI_TEMPLATE, obj.getAk(),
-                    containerName);
+            String endpoint = 
AzureProperties.formatAzureEndpoint(obj.getEndpoint(), obj.getAk());
+            String uri = endpoint + "/" + containerName;
             builder.endpoint(uri);
             BlobContainerClient containerClient = builder.buildClient();
             BlobServiceClient blobServiceClient = 
containerClient.getServiceClient();
@@ -229,8 +228,8 @@ public class AzureRemote extends RemoteBase {
                 builder.credential(new StorageSharedKeyCredential(obj.getAk(), 
obj.getSk()));
             }
             String containerName = obj.getBucket();
-            String uri = String.format(URI_TEMPLATE, obj.getAk(),
-                    containerName);
+            String endpoint = 
AzureProperties.formatAzureEndpoint(obj.getEndpoint(), obj.getAk());
+            String uri = endpoint + "/" + containerName;
             builder.endpoint(uri);
             client = builder.buildClient();
         }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/constants/AzureProperties.java
 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/constants/AzureProperties.java
index daceda0bc35..70ba490ccfd 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/constants/AzureProperties.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/constants/AzureProperties.java
@@ -17,7 +17,7 @@
 
 package org.apache.doris.datasource.property.constants;
 
-
+import org.apache.doris.common.Config;
 import org.apache.doris.common.credentials.CloudCredential;
 
 import java.util.Arrays;
@@ -36,7 +36,7 @@ public class AzureProperties extends BaseProperties {
     public static final String SESSION_TOKEN = "azure.session_token";
     public static final List<String> REQUIRED_FIELDS = Arrays.asList(ENDPOINT, 
ACCESS_KEY, SECRET_KEY);
 
-    public static final String AZURE_ENDPOINT_TEMPLATE = 
"%s.blob.core.windows.net/%s";
+    public static final String AZURE_ENDPOINT_TEMPLATE = 
"https://%s.blob.core.windows.net";;
 
     public static class FS {
         public static final String SESSION_TOKEN = "fs.azure.session.token";
@@ -47,7 +47,7 @@ public class AzureProperties extends BaseProperties {
         return getCloudCredential(props, ACCESS_KEY, SECRET_KEY, 
SESSION_TOKEN);
     }
 
-    public static Boolean checkAzureProviderPropertyExist(Map<String, String> 
properties) {
+    public static boolean checkAzureProviderPropertyExist(Map<String, String> 
properties) {
         for (Map.Entry<String, String> entry : properties.entrySet()) {
             if (entry.getKey().toLowerCase().contains(S3Properties.PROVIDER)
                     && 
entry.getValue().toUpperCase().equals(AzureProperties.AZURE_NAME)) {
@@ -56,4 +56,15 @@ public class AzureProperties extends BaseProperties {
         }
         return false;
     }
+
+    public static String formatAzureEndpoint(String endpoint, String 
accountName) {
+        if (Config.force_azure_blob_global_endpoint) {
+            return String.format(AZURE_ENDPOINT_TEMPLATE, accountName);
+        }
+        if (endpoint.contains("://")) {
+            return endpoint;
+        }
+        return "https://"; + endpoint;
+    }
+
 }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/fs/obj/AzureObjStorage.java 
b/fe/fe-core/src/main/java/org/apache/doris/fs/obj/AzureObjStorage.java
index cda78ba8773..e59bc6ac52e 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/fs/obj/AzureObjStorage.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/fs/obj/AzureObjStorage.java
@@ -22,6 +22,7 @@ import org.apache.doris.common.DdlException;
 import org.apache.doris.common.UserException;
 import org.apache.doris.common.util.S3URI;
 import org.apache.doris.datasource.property.PropertyConverter;
+import org.apache.doris.datasource.property.constants.AzureProperties;
 import org.apache.doris.datasource.property.constants.S3Properties;
 import org.apache.doris.fs.remote.RemoteFile;
 
@@ -61,7 +62,6 @@ import java.util.TreeMap;
 
 public class AzureObjStorage implements ObjStorage<BlobServiceClient> {
     private static final Logger LOG = 
LogManager.getLogger(AzureObjStorage.class);
-    private static final String URI_TEMPLATE = 
"https://%s.blob.core.windows.net";;
     protected Map<String, String> properties;
     private BlobServiceClient client;
     private boolean isUsePathStyle = false;
@@ -99,7 +99,7 @@ public class AzureObjStorage implements 
ObjStorage<BlobServiceClient> {
         }
         // Virtual hosted-style is recommended in the s3 protocol.
         // The path-style has been abandoned, but for some unexplainable 
reasons,
-        // the s3 client will determine whether the endpiont starts with `s3`
+        // the s3 client will determine whether the endpoint starts with `s3`
         // when generating a virtual hosted-sytle request.
         // If not, it will not be converted ( 
https://github.com/aws/aws-sdk-java-v2/pull/763),
         // but the endpoints of many cloud service providers for object 
storage do not start with s3,
@@ -116,12 +116,14 @@ public class AzureObjStorage implements 
ObjStorage<BlobServiceClient> {
     @Override
     public BlobServiceClient getClient() throws UserException {
         if (client == null) {
-            String uri = String.format(URI_TEMPLATE, 
properties.get(S3Properties.ACCESS_KEY));
-            StorageSharedKeyCredential cred = new 
StorageSharedKeyCredential(properties.get(S3Properties.ACCESS_KEY),
+            final String accountName = properties.get(S3Properties.ACCESS_KEY);
+            final String endpoint = AzureProperties.formatAzureEndpoint(
+                    properties.get(S3Properties.ENDPOINT), accountName);
+            StorageSharedKeyCredential cred = new 
StorageSharedKeyCredential(accountName,
                     properties.get(S3Properties.SECRET_KEY));
             BlobServiceClientBuilder builder = new BlobServiceClientBuilder();
             builder.credential(cred);
-            builder.endpoint(uri);
+            builder.endpoint(endpoint);
             client = builder.buildClient();
         }
         return client;
@@ -389,7 +391,8 @@ public class AzureObjStorage implements 
ObjStorage<BlobServiceClient> {
                     + " failed because azure error: " + e.getMessage());
         } catch (Exception e) {
             LOG.warn("errors while glob file " + remotePath, e);
-            st = new Status(Status.ErrCode.COMMON_ERROR, "errors while glob 
file " + remotePath + e.getMessage());
+            st = new Status(Status.ErrCode.COMMON_ERROR,
+                    "errors while glob file " + remotePath + ": " + 
e.getMessage());
         } finally {
             long endTime = System.nanoTime();
             long duration = endTime - startTime;
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/S3TableValuedFunction.java
 
b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/S3TableValuedFunction.java
index 3defb171a9f..56c438c303e 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/S3TableValuedFunction.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/S3TableValuedFunction.java
@@ -118,21 +118,16 @@ public class S3TableValuedFunction extends 
ExternalFileTableValuedFunction {
     }
 
     private String constructEndpoint(Map<String, String> properties, S3URI 
s3uri) throws AnalysisException {
-        String endpoint;
-        if (!AzureProperties.checkAzureProviderPropertyExist(properties)) {
-            // get endpoint first from properties, if not present, get it from 
s3 uri.
-            // If endpoint is missing, exception will be thrown.
-            endpoint = getOrDefaultAndRemove(properties, 
S3Properties.ENDPOINT, s3uri.getEndpoint().orElse(""));
-            if (Strings.isNullOrEmpty(endpoint)) {
-                throw new AnalysisException(String.format("Properties '%s' is 
required.", S3Properties.ENDPOINT));
-            }
-        } else {
-            String bucket = s3uri.getBucket();
+        // get endpoint first from properties, if not present, get it from s3 
uri.
+        String endpoint = getOrDefaultAndRemove(properties, 
S3Properties.ENDPOINT, s3uri.getEndpoint().orElse(""));
+        if (AzureProperties.checkAzureProviderPropertyExist(properties)) {
             String accountName = 
properties.getOrDefault(S3Properties.ACCESS_KEY, "");
             if (accountName.isEmpty()) {
                 throw new AnalysisException(String.format("Properties '%s' is 
required.", S3Properties.ACCESS_KEY));
             }
-            endpoint = String.format(AzureProperties.AZURE_ENDPOINT_TEMPLATE, 
accountName, bucket);
+            endpoint = AzureProperties.formatAzureEndpoint(endpoint, 
accountName);
+        } else if (Strings.isNullOrEmpty(endpoint)) {
+            throw new AnalysisException(String.format("Properties '%s' is 
required.", S3Properties.ENDPOINT));
         }
         return endpoint;
     }
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/datasource/property/constants/AzurePropertiesTest.java
 
b/fe/fe-core/src/test/java/org/apache/doris/datasource/property/constants/AzurePropertiesTest.java
new file mode 100644
index 00000000000..e155b5434d8
--- /dev/null
+++ 
b/fe/fe-core/src/test/java/org/apache/doris/datasource/property/constants/AzurePropertiesTest.java
@@ -0,0 +1,55 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.datasource.property.constants;
+
+import org.apache.doris.common.Config;
+
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+public class AzurePropertiesTest {
+
+    @Test
+    public static void testFormatAzureEndpointGlobal() {
+        Config.force_azure_blob_global_endpoint = true;
+        String endpoint = AzureProperties.formatAzureEndpoint("ANY-ENDPOINT", 
"ak");
+        Assertions.assertEquals("https://ak.blob.core.windows.net";, endpoint);
+    }
+
+    @Test
+    public static void testFormatAzureEndpoint() {
+        Config.force_azure_blob_global_endpoint = false;
+        String endpoint = 
AzureProperties.formatAzureEndpoint("ak.blob.core.chinacloudapi.cn", 
"ANY-ACCOUNT");
+        Assertions.assertEquals("https://ak.blob.core.chinacloudapi.cn";, 
endpoint);
+    }
+
+    @Test
+    public static void testFormatAzureEndpointHTTPS() {
+        Config.force_azure_blob_global_endpoint = false;
+        String endpoint = 
AzureProperties.formatAzureEndpoint("https://ak.blob.core.chinacloudapi.cn";, 
"ANY-ACCOUNT");
+        Assertions.assertEquals("https://ak.blob.core.chinacloudapi.cn";, 
endpoint);
+    }
+
+    @Test
+    public static void testFormatAzureEndpointHTTP() {
+        Config.force_azure_blob_global_endpoint = false;
+        String endpoint = 
AzureProperties.formatAzureEndpoint("http://ak.blob.core.chinacloudapi.cn";, 
"ANY-ACCOUNT");
+        Assertions.assertEquals("http://ak.blob.core.chinacloudapi.cn";, 
endpoint);
+    }
+
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to