This is an automated email from the ASF dual-hosted git repository.
morrysnow pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.1 by this push:
new 98f24c02b5f branch-3.1: [Feat](refactor-param) Abstract Endpoint and
Region Extraction Logic wiith Azure Support #50220 (#52399)
98f24c02b5f is described below
commit 98f24c02b5f98677c51f27337b51da10da468d1b
Author: Calvin Kirs <[email protected]>
AuthorDate: Fri Jun 27 16:28:00 2025 +0800
branch-3.1: [Feat](refactor-param) Abstract Endpoint and Region Extraction
Logic wiith Azure Support #50220 (#52399)
cherry pick #50220
---
...es.java => AbstractS3CompatibleProperties.java} | 45 +++--
.../property/storage/AzureProperties.java | 157 +++++++++++++++++
.../datasource/property/storage/COSProperties.java | 39 ++---
.../datasource/property/storage/OBSProperties.java | 43 ++---
.../datasource/property/storage/OSSProperties.java | 63 ++-----
.../property/storage/ObjectStorageProperties.java | 2 +
.../datasource/property/storage/S3Properties.java | 100 ++++-------
.../property/storage/StorageProperties.java | 7 +-
.../property/storage/AzurePropertiesTest.java | 194 +++++++++++++++++++++
.../property/storage/OBSPropertyTest.java | 4 +-
.../property/storage/OSSPropertiesTest.java | 6 +-
.../property/storage/S3PropertiesTest.java | 28 +++
12 files changed, 494 insertions(+), 194 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/storage/AbstractObjectStorageProperties.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/storage/AbstractS3CompatibleProperties.java
similarity index 88%
rename from
fe/fe-core/src/main/java/org/apache/doris/datasource/property/storage/AbstractObjectStorageProperties.java
rename to
fe/fe-core/src/main/java/org/apache/doris/datasource/property/storage/AbstractS3CompatibleProperties.java
index 26ac10fb32a..5685c278347 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/storage/AbstractObjectStorageProperties.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/storage/AbstractS3CompatibleProperties.java
@@ -24,9 +24,9 @@ import lombok.Getter;
import lombok.Setter;
import org.apache.commons.lang3.StringUtils;
-import java.net.URI;
import java.util.HashMap;
import java.util.Map;
+import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
@@ -38,7 +38,7 @@ import java.util.regex.Pattern;
* The properties include connection settings (e.g., timeouts and maximum
connections) and a flag to
* determine if path-style URLs should be used for the storage system.
*/
-public abstract class AbstractObjectStorageProperties extends
StorageProperties implements ObjectStorageProperties {
+public abstract class AbstractS3CompatibleProperties extends StorageProperties
implements ObjectStorageProperties {
/**
* The maximum number of concurrent connections that can be made to the
object storage system.
@@ -86,7 +86,7 @@ public abstract class AbstractObjectStorageProperties extends
StorageProperties
* @param type the type of object storage system.
* @param origProps the original properties map.
*/
- protected AbstractObjectStorageProperties(Type type, Map<String, String>
origProps) {
+ protected AbstractS3CompatibleProperties(Type type, Map<String, String>
origProps) {
super(type, origProps);
}
@@ -158,32 +158,31 @@ public abstract class AbstractObjectStorageProperties
extends StorageProperties
}
}
- protected abstract Pattern endpointPattern();
-
- private boolean isValidEndpoint(String endpoint) {
+ private void initRegionIfNecessary() {
+ if (StringUtils.isNotBlank(getRegion())) {
+ return;
+ }
+ String endpoint = getEndpoint();
if (endpoint == null || endpoint.isEmpty()) {
- return false;
+ throw new IllegalArgumentException("endpoint is required");
}
-
- String host = extractHost(endpoint);
- if (host == null || host.isEmpty()) {
- return false;
+ Matcher matcher = endpointPattern().matcher(endpoint.toLowerCase());
+ if (matcher.find()) {
+ String region = matcher.group(1);
+ if (StringUtils.isBlank(region)) {
+ throw new IllegalArgumentException("Invalid endpoint format: "
+ endpoint);
+ }
+ setRegion(region);
+ return;
}
- host = host.replaceFirst("\\.internal$", "");
- return endpointPattern().matcher(host).matches();
+ throw new IllegalArgumentException("Not a valid region, and cannot be
parsed from endpoint: " + endpoint);
}
- private String extractHost(String endpoint) {
- try {
- String url = endpoint.matches("^[a-zA-Z][a-zA-Z0-9+.-]*://.*") ?
endpoint : "http://" + endpoint;
- URI uri = new URI(url);
- return uri.getHost();
- } catch (Exception e) {
- throw new IllegalArgumentException("Invalid endpoint format: " +
endpoint, e);
- }
- }
+ protected abstract Pattern endpointPattern();
- protected abstract void initRegionIfNecessary();
+ private boolean isValidEndpoint(String endpoint) {
+ return endpointPattern().matcher(endpoint).matches();
+ }
private void setEndpointIfNotSet() throws UserException {
if (StringUtils.isNotBlank(getEndpoint())) {
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/storage/AzureProperties.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/storage/AzureProperties.java
new file mode 100644
index 00000000000..512ed92cca9
--- /dev/null
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/storage/AzureProperties.java
@@ -0,0 +1,157 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.datasource.property.storage;
+
+import org.apache.doris.common.UserException;
+import org.apache.doris.datasource.property.ConnectorProperty;
+
+import com.google.common.base.Strings;
+import lombok.Getter;
+import lombok.Setter;
+
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Objects;
+import java.util.stream.Stream;
+
+/**
+ * AzureProperties is a specialized configuration class for accessing Azure
Blob Storage
+ * using an S3-compatible interface.
+ *
+ * <p>This class extends {@link StorageProperties} and adapts Azure-specific
properties
+ * to a format that is compatible with the backend engine (BE), which expects
configurations
+ * similar to Amazon S3. This is necessary because the backend is designed to
work with
+ * S3-style parameters regardless of the actual cloud provider.
+ *
+ * <p>Although Azure Blob Storage does not use all of the S3 parameters (e.g.,
region),
+ * this class maps and provides dummy or compatible values to satisfy the
expected format.
+ * It also tags the provider as "azure" in the final configuration map.
+ *
+ * <p>The class supports common parameters like access key, secret key,
endpoint, and
+ * path style access, while also ensuring compatibility with existing S3
processing
+ * logic by delegating some functionality to {@code S3PropertyUtils}.
+ *
+ * <p>Typical usage includes validation of required parameters, transformation
to a
+ * backend-compatible configuration map, and conversion of URLs to storage
paths.
+ *
+ * <p>Note: This class may evolve as the backend introduces native Azure
support
+ * or adopts a more flexible configuration model.
+ *
+ * @see StorageProperties
+ * @see S3PropertyUtils
+ */
+public class AzureProperties extends StorageProperties {
+ @Getter
+ @ConnectorProperty(names = {"s3.endpoint", "AWS_ENDPOINT", "endpoint",
"ENDPOINT", "azure.endpoint"},
+ description = "The endpoint of S3.")
+ protected String endpoint = "";
+
+
+ @Getter
+ @ConnectorProperty(names = {"s3.access_key", "AWS_ACCESS_KEY",
"ACCESS_KEY", "access_key", "azure.access_key"},
+ description = "The access key of S3.")
+ protected String accessKey = "";
+
+ @Getter
+ @ConnectorProperty(names = {"s3.secret_key", "AWS_SECRET_KEY",
"secret_key", "azure.secret_key"},
+ description = "The secret key of S3.")
+ protected String secretKey = "";
+
+ @Getter
+ @ConnectorProperty(names = {"s3.bucket"},
+ required = false,
+ description = "The container of Azure blob.")
+ protected String container = "";
+
+ /**
+ * Flag indicating whether to use path-style URLs for the object storage
system.
+ * This value is optional and can be configured by the user.
+ */
+ @Setter
+ @Getter
+ @ConnectorProperty(names = {"use_path_style", "s3.path-style-access"},
required = false,
+ description = "Whether to use path style URL for the storage.")
+ protected String usePathStyle = "false";
+ @ConnectorProperty(names = {"force_parsing_by_standard_uri"}, required =
false,
+ description = "Whether to use path style URL for the storage.")
+ @Getter
+ protected String forceParsingByStandardUrl = "false";
+
+
+ public AzureProperties(Map<String, String> origProps) {
+ super(Type.AZURE, origProps);
+ }
+
+ private static final String AZURE_ENDPOINT_SUFFIX =
".blob.core.windows.net";
+
+ @Override
+ protected void initNormalizeAndCheckProps() throws UserException {
+ super.initNormalizeAndCheckProps();
+ //check endpoint
+ if (!endpoint.endsWith(AZURE_ENDPOINT_SUFFIX)) {
+ throw new IllegalArgumentException(String.format("Endpoint '%s' is
not valid. It should end with '%s'.",
+ endpoint, AZURE_ENDPOINT_SUFFIX));
+ }
+ }
+
+ public static boolean guessIsMe(Map<String, String> origProps) {
+ boolean enable = origProps.containsKey(FS_PROVIDER_KEY)
+ && origProps.get(FS_PROVIDER_KEY).equalsIgnoreCase("azure");
+ if (enable) {
+ return true;
+ }
+ String value = Stream.of("azure.endpoint", "s3.endpoint",
"AWS_ENDPOINT", "endpoint", "ENDPOINT")
+ .map(origProps::get)
+ .filter(Objects::nonNull)
+ .findFirst()
+ .orElse(null);
+ if (!Strings.isNullOrEmpty(value)) {
+ return value.endsWith("blob.core.windows.net");
+ }
+ return false;
+ }
+
+ @Override
+ public Map<String, String> getBackendConfigProperties() {
+ Map<String, String> s3Props = new HashMap<>();
+ s3Props.put("AWS_ENDPOINT", endpoint);
+ s3Props.put("AWS_REGION", "dummy_region");
+ s3Props.put("AWS_ACCESS_KEY", accessKey);
+ s3Props.put("AWS_SECRET_KEY", secretKey);
+ s3Props.put("AWS_NEED_OVERRIDE_ENDPOINT", "true");
+ s3Props.put("provider", "azure");
+ s3Props.put("use_path_style", usePathStyle);
+ return s3Props;
+ }
+
+ @Override
+ public String validateAndNormalizeUri(String url) throws UserException {
+ return S3PropertyUtils.validateAndNormalizeUri(url, usePathStyle,
forceParsingByStandardUrl);
+
+ }
+
+ @Override
+ public String validateAndGetUri(Map<String, String> loadProps) throws
UserException {
+ return S3PropertyUtils.validateAndGetUri(loadProps);
+ }
+
+ @Override
+ public String getStorageName() {
+ return "Azure";
+ }
+}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/storage/COSProperties.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/storage/COSProperties.java
index 7202f49cd96..6dc72e1b27c 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/storage/COSProperties.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/storage/COSProperties.java
@@ -25,11 +25,10 @@ import lombok.Setter;
import java.util.Map;
import java.util.Objects;
-import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Stream;
-public class COSProperties extends AbstractObjectStorageProperties {
+public class COSProperties extends AbstractS3CompatibleProperties {
@Setter
@Getter
@@ -39,6 +38,7 @@ public class COSProperties extends
AbstractObjectStorageProperties {
protected String endpoint = "";
@Getter
+ @Setter
@ConnectorProperty(names = {"cos.region", "s3.region", "AWS_REGION",
"region", "REGION"},
required = false,
description = "The region of COS.")
@@ -54,8 +54,15 @@ public class COSProperties extends
AbstractObjectStorageProperties {
description = "The secret key of COS.")
protected String secretKey = "";
- private static final Pattern COS_ENDPOINT_PATTERN = Pattern
- .compile("^cos\\.[a-z0-9-]+\\.myqcloud\\.com(\\.internal)?$");
+ /**
+ * Pattern to extract the region from a Tencent Cloud COS endpoint.
+ * <p>
+ * Supported formats:
+ * - cos.ap-guangzhou.myqcloud.com => region = ap-guangzhou*
<p>
+ * Group(1) captures the region name.
+ */
+ private static final Pattern ENDPOINT_PATTERN =
+
Pattern.compile("^(?:https?://)?cos\\.([a-z0-9-]+)\\.myqcloud\\.com$");
protected COSProperties(Map<String, String> origProps) {
super(Type.COS, origProps);
@@ -68,7 +75,7 @@ public class COSProperties extends
AbstractObjectStorageProperties {
.findFirst()
.orElse(null);
if (!Strings.isNullOrEmpty(value)) {
- return value.contains("myqcloud.com");
+ return ENDPOINT_PATTERN.matcher(value).matches();
}
if (!origProps.containsKey("uri")) {
return false;
@@ -78,27 +85,7 @@ public class COSProperties extends
AbstractObjectStorageProperties {
@Override
protected Pattern endpointPattern() {
- return COS_ENDPOINT_PATTERN;
- }
-
- /**
- * Initializes the cosRegion field based on the COS endpoint if it's not
already set.
- * <p>
- * This method extracts the region from Tencent Cloud COS endpoints.
- * It supports typical COS endpoint formats like:
- * <p>
- * Example:
- * - "cos.ap-guangzhou.myqcloud.com" → cosRegion = "ap-guangzhou"
- */
- @Override
- protected void initRegionIfNecessary() {
- if (Strings.isNullOrEmpty(this.region)) {
- Pattern cosPattern =
Pattern.compile("cos\\.([a-z0-9-]+)\\.myqcloud\\.com");
- Matcher matcher = cosPattern.matcher(endpoint);
- if (matcher.find()) {
- this.region = matcher.group(1);
- }
- }
+ return ENDPOINT_PATTERN;
}
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/storage/OBSProperties.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/storage/OBSProperties.java
index 474acb0f5f2..87ad9b5761c 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/storage/OBSProperties.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/storage/OBSProperties.java
@@ -25,11 +25,10 @@ import lombok.Setter;
import java.util.Map;
import java.util.Objects;
-import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Stream;
-public class OBSProperties extends AbstractObjectStorageProperties {
+public class OBSProperties extends AbstractS3CompatibleProperties {
@Setter
@Getter
@@ -49,12 +48,24 @@ public class OBSProperties extends
AbstractObjectStorageProperties {
protected String secretKey = "";
@Getter
+ @Setter
@ConnectorProperty(names = {"obs.region", "s3.region", "AWS_REGION",
"region", "REGION"}, required = false,
description = "The region of OBS.")
protected String region;
- private static Pattern ENDPOINT_PATTERN = Pattern
- .compile("^obs\\.[a-z0-9-]+\\.myhuaweicloud\\.com(\\.internal)?$");
+ /**
+ * Pattern to extract the region from a Huawei Cloud OBS endpoint.
+ * <p>
+ * Supported formats:
+ * - obs-cn-hangzhou.myhuaweicloud.com => region = cn-hangzhou
+ * - https://obs-cn-shanghai.myhuaweicloud.com => region = cn-shanghai
+ * <p>
+ * Group(1) captures the region name (e.g., cn-hangzhou).
+ * FYI: https://console-intl.huaweicloud.com/apiexplorer/#/endpoint/OBS
+ */
+ private static final Pattern ENDPOINT_PATTERN = Pattern
+
.compile("^(?:https?://)?obs\\.([a-z0-9-]+)\\.myhuaweicloud\\.com$");
+
public OBSProperties(Map<String, String> origProps) {
super(Type.OBS, origProps);
@@ -62,14 +73,14 @@ public class OBSProperties extends
AbstractObjectStorageProperties {
}
protected static boolean guessIsMe(Map<String, String> origProps) {
- String value = Stream.of("obs.endpoint", "s3.endpoint",
"AWS_ENDPOINT", "endpoint", "ENDPOINT", "uri")
+ String value = Stream.of("obs.endpoint", "s3.endpoint",
"AWS_ENDPOINT", "endpoint", "ENDPOINT")
.map(origProps::get)
.filter(Objects::nonNull)
.findFirst()
.orElse(null);
if (!Strings.isNullOrEmpty(value)) {
- return value.contains("myhuaweicloud.com");
+ return ENDPOINT_PATTERN.matcher(value).matches();
}
if (!origProps.containsKey("uri")) {
return false;
@@ -83,24 +94,4 @@ public class OBSProperties extends
AbstractObjectStorageProperties {
return ENDPOINT_PATTERN;
}
- /**
- * Initializes the region field based on the OBS endpoint if it's not
already set.
- * <p>
- * This method extracts the region from Huawei Cloud OBS endpoints.
- * It supports typical OBS endpoint formats like:
- * <p>
- * Example:
- * - "obs.cn-north-4.myhuaweicloud.com" → region = "cn-north-4"
- */
- @Override
- protected void initRegionIfNecessary() {
- if (Strings.isNullOrEmpty(this.region)) {
- Pattern obsPattern =
Pattern.compile("obs\\.([a-z0-9-]+)\\.myhuaweicloud\\.com");
- Matcher matcher = obsPattern.matcher(endpoint);
- if (matcher.find()) {
- this.region = matcher.group(1);
- }
- }
- }
-
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/storage/OSSProperties.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/storage/OSSProperties.java
index 86fd1e7796a..34982f4b690 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/storage/OSSProperties.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/storage/OSSProperties.java
@@ -17,7 +17,6 @@
package org.apache.doris.datasource.property.storage;
-import org.apache.doris.common.UserException;
import org.apache.doris.datasource.property.ConnectorProperty;
import com.google.common.base.Strings;
@@ -26,11 +25,10 @@ import lombok.Setter;
import java.util.Map;
import java.util.Objects;
-import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Stream;
-public class OSSProperties extends AbstractObjectStorageProperties {
+public class OSSProperties extends AbstractS3CompatibleProperties {
@Setter
@Getter
@@ -50,11 +48,24 @@ public class OSSProperties extends
AbstractObjectStorageProperties {
protected String secretKey = "";
@Getter
+ @Setter
@ConnectorProperty(names = {"oss.region", "s3.region", "AWS_REGION",
"region", "REGION"}, required = false,
description = "The region of OSS.")
protected String region;
- private static Pattern ENDPOINT_PATTERN =
Pattern.compile("^oss-[a-z0-9-]+\\.aliyuncs\\.com(\\.internal)?$");
+ /**
+ * Pattern to extract the region from an Alibaba Cloud OSS endpoint.
+ * <p>
+ * Supported formats:
+ * - oss-cn-hangzhou.aliyuncs.com => region = cn-hangzhou
+ * - <a href="https://oss-cn-shanghai.aliyuncs.com">...</a> => region
= cn-shanghai
+ * - oss-cn-beijing-internal.aliyuncs.com => region = cn-beijing
(internal endpoint)
+ * - <a href="http://oss-cn-shenzhen-internal.aliyuncs.com">...</a> =>
region = cn-shenzhen
+ * <p>
+ * Group(1) captures the region name (e.g., cn-hangzhou).
+ */
+ private static final Pattern ENDPOINT_PATTERN = Pattern
+
.compile("^(?:https?://)?oss-([a-z0-9-]+?)(?:-internal)?\\.aliyuncs\\.com$");
protected OSSProperties(Map<String, String> origProps) {
super(Type.OSS, origProps);
@@ -67,7 +78,7 @@ public class OSSProperties extends
AbstractObjectStorageProperties {
.findFirst()
.orElse(null);
if (!Strings.isNullOrEmpty(value)) {
- return value.contains("aliyuncs.com");
+ return ENDPOINT_PATTERN.matcher(value).matches();
}
if (!origProps.containsKey("uri")) {
return false;
@@ -75,51 +86,9 @@ public class OSSProperties extends
AbstractObjectStorageProperties {
return origProps.get("uri").contains("aliyuncs.com");
}
- @Override
- protected void initNormalizeAndCheckProps() throws UserException {
- super.initNormalizeAndCheckProps();
- initRegionIfNecessary();
- }
-
@Override
protected Pattern endpointPattern() {
return ENDPOINT_PATTERN;
}
- /**
- * Initializes the region field based on the endpoint if it's not already
set.
- * <p>
- * This method attempts to extract the region name from the OSS endpoint
string.
- * It supports both external and internal Alibaba Cloud OSS endpoint
formats.
- * <p>
- * Examples:
- * - External endpoint: "oss-cn-hangzhou.aliyuncs.com" → region =
"cn-hangzhou"
- * - Internal endpoint: "oss-cn-shanghai.intranet.aliyuncs.com" → region =
"cn-shanghai"
- */
- public void initRegionIfNecessary() {
- // Return the region if it is already set
- if (!Strings.isNullOrEmpty(this.region)) {
- return;
- }
- // Check for external endpoint and extract region
- if (endpoint.contains("aliyuncs.com")) {
- // Regex pattern for external endpoint (e.g.,
oss-<region>.aliyuncs.com)
- Pattern ossPattern =
Pattern.compile("oss-([a-z0-9-]+)\\.aliyuncs\\.com");
- Matcher matcher = ossPattern.matcher(endpoint);
- if (matcher.find()) {
- this.region = matcher.group(1);
- return;
- }
- }
- // Check for internal endpoint and extract region
- if (endpoint.contains("intranet.aliyuncs.com")) {
- // Regex pattern for internal endpoint (e.g.,
oss-<region>.intranet.aliyuncs.com)
- Pattern ossIntranetPattern =
Pattern.compile("oss-([a-z0-9-]+)\\.intranet\\.aliyuncs\\.com");
- Matcher matcher = ossIntranetPattern.matcher(endpoint);
- if (matcher.find()) {
- this.region = matcher.group(1);
- }
- }
- }
-
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/storage/ObjectStorageProperties.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/storage/ObjectStorageProperties.java
index 1ba241a9444..f36e03e1b25 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/storage/ObjectStorageProperties.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/storage/ObjectStorageProperties.java
@@ -34,4 +34,6 @@ public interface ObjectStorageProperties {
void setEndpoint(String endpoint);
+ void setRegion(String region);
+
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/storage/S3Properties.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/storage/S3Properties.java
index e630613904a..478f45ee3d0 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/storage/S3Properties.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/storage/S3Properties.java
@@ -21,36 +21,42 @@ import
org.apache.doris.datasource.property.ConnectorProperty;
import com.google.common.base.Strings;
import com.google.common.collect.Lists;
-import org.apache.commons.lang3.StringUtils;
+import lombok.Getter;
+import lombok.Setter;
import java.lang.reflect.Field;
import java.util.List;
import java.util.Map;
import java.util.Objects;
-import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Stream;
-public class S3Properties extends AbstractObjectStorageProperties {
+public class S3Properties extends AbstractS3CompatibleProperties {
+ @Setter
+ @Getter
@ConnectorProperty(names = {"s3.endpoint", "AWS_ENDPOINT", "endpoint",
"ENDPOINT"},
required = false,
description = "The endpoint of S3.")
- protected String s3Endpoint = "";
+ protected String endpoint = "";
+ @Setter
+ @Getter
@ConnectorProperty(names = {"s3.region", "AWS_REGION", "region", "REGION"},
required = false,
description = "The region of S3.")
- protected String s3Region = "";
+ protected String region = "";
+ @Getter
@ConnectorProperty(names = {"s3.access_key", "AWS_ACCESS_KEY",
"ACCESS_KEY", "access_key"},
description = "The access key of S3.")
- protected String s3AccessKey = "";
+ protected String accessKey = "";
+ @Getter
@ConnectorProperty(names = {"s3.secret_key", "AWS_SECRET_KEY",
"secret_key", "SECRET_KEY"},
description = "The secret key of S3.")
- protected String s3SecretKey = "";
+ protected String secretKey = "";
@ConnectorProperty(names = {"s3.connection.maximum",
@@ -95,12 +101,21 @@ public class S3Properties extends
AbstractObjectStorageProperties {
description = "The external id of S3.")
protected String s3ExternalId = "";
- private static final Pattern REGION_PATTERN = Pattern.compile(
- "s3[.-](?:dualstack[.-])?([a-z0-9-]+)\\.amazonaws\\.com(?:\\.cn)?"
- );
-
- private static Pattern ENDPOINT_PATTERN =
Pattern.compile("^s3(\\.[a-z0-9-]+)?\\.amazonaws\\.com$");
+ /**
+ * Pattern to match various AWS S3 endpoint formats and extract the region
part.
+ * <p>
+ * Supported formats:
+ * - s3.us-west-2.amazonaws.com => region = us-west-2
+ * - s3.dualstack.us-east-1.amazonaws.com => region = us-east-1
+ * - s3-fips.us-east-2.amazonaws.com => region = us-east-2
+ * - s3-fips.dualstack.us-east-2.amazonaws.com => region = us-east-2
+ * <p>
+ * Group(1) in the pattern captures the region part if available.
+ */
+ private static final Pattern ENDPOINT_PATTERN = Pattern.compile(
+
"^(?:https?://)?s3(?:[-.]fips)?(?:[-.]dualstack)?(?:[-.]([a-z0-9-]+))?\\.amazonaws\\.com$"
+ );
public S3Properties(Map<String, String> origProps) {
super(Type.S3, origProps);
@@ -120,15 +135,13 @@ public class S3Properties extends
AbstractObjectStorageProperties {
.findFirst()
.orElse(null);
if (!Strings.isNullOrEmpty(endpoint)) {
- return endpoint.contains("amazonaws.com");
+ return ENDPOINT_PATTERN.matcher(endpoint).matches();
}
if (!origProps.containsKey("uri")) {
return false;
}
String uri = origProps.get("uri");
return uri.contains("amazonaws.com");
-
-
}
@Override
@@ -166,10 +179,10 @@ public class S3Properties extends
AbstractObjectStorageProperties {
public void toIcebergS3FileIOProperties(Map<String, String> catalogProps) {
// See S3FileIOProperties.java
- catalogProps.put("s3.endpoint", s3Endpoint);
- catalogProps.put("s3.access-key-id", s3AccessKey);
- catalogProps.put("s3.secret-access-key", s3SecretKey);
- catalogProps.put("client.region", s3Region);
+ catalogProps.put("s3.endpoint", endpoint);
+ catalogProps.put("s3.access-key-id", accessKey);
+ catalogProps.put("s3.secret-access-key", secretKey);
+ catalogProps.put("client.region", region);
catalogProps.put("s3.path-style-access", usePathStyle);
}
@@ -178,53 +191,4 @@ public class S3Properties extends
AbstractObjectStorageProperties {
return generateBackendS3Configuration(s3ConnectionMaximum,
s3ConnectionRequestTimeoutS, s3ConnectionTimeoutS,
String.valueOf(usePathStyle));
}
-
- /**
- * Initializes the s3Region field based on the S3 endpoint if it's not
already set.
- * <p>
- * This method extracts the region from Amazon S3-compatible endpoints
using a predefined regex pattern.
- * The endpoint is first converted to lowercase before matching.
- * <p>
- * Example:
- * - "s3.us-west-2.amazonaws.com" → s3Region = "us-west-2"
- * - "s3.cn-north-1.amazonaws.com.cn" → s3Region = "cn-north-1"
- * <p>
- * Note: REGION_PATTERN must be defined to capture the region from the S3
endpoint.
- * Example pattern:
- * Pattern.compile("s3[.-]([a-z0-9-]+)\\.")
- */
- @Override
- protected void initRegionIfNecessary() {
- if (StringUtils.isBlank(s3Region) &&
StringUtils.isNotBlank(s3Endpoint)) {
- Matcher matcher = REGION_PATTERN.matcher(s3Endpoint.toLowerCase());
- if (matcher.find()) {
- this.s3Region = matcher.group(1);
- }
- }
- }
-
- @Override
- public String getEndpoint() {
- return s3Endpoint;
- }
-
- @Override
- public String getRegion() {
- return s3Region;
- }
-
- @Override
- public String getAccessKey() {
- return s3AccessKey;
- }
-
- @Override
- public String getSecretKey() {
- return s3SecretKey;
- }
-
- @Override
- public void setEndpoint(String endpoint) {
- this.s3Endpoint = endpoint;
- }
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/storage/StorageProperties.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/storage/StorageProperties.java
index 95516c6067a..3fb55d1d8f8 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/storage/StorageProperties.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/storage/StorageProperties.java
@@ -40,12 +40,15 @@ public abstract class StorageProperties extends
ConnectionProperties {
public static final String FS_OBS_SUPPORT = "fs.obs.support";
public static final String FS_COS_SUPPORT = "fs.cos.support";
+ public static final String FS_PROVIDER_KEY = "provider";
+
public enum Type {
HDFS,
S3,
OSS,
OBS,
COS,
+ AZURE,
UNKNOWN
}
@@ -115,7 +118,9 @@ public abstract class StorageProperties extends
ConnectionProperties {
props -> (isFsSupport(props, FS_OBS_SUPPORT)
|| OBSProperties.guessIsMe(props)) ? new
OBSProperties(props) : null,
props -> (isFsSupport(props, FS_COS_SUPPORT)
- || COSProperties.guessIsMe(props)) ? new
COSProperties(props) : null
+ || COSProperties.guessIsMe(props)) ? new
COSProperties(props) : null,
+ props -> (isFsSupport(props, FS_AZURE_SUPPORT)
+ || AzureProperties.guessIsMe(props)) ? new
AzureProperties(props) : null
);
protected StorageProperties(Type type, Map<String, String> origProps) {
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/datasource/property/storage/AzurePropertiesTest.java
b/fe/fe-core/src/test/java/org/apache/doris/datasource/property/storage/AzurePropertiesTest.java
new file mode 100644
index 00000000000..fadefd43f57
--- /dev/null
+++
b/fe/fe-core/src/test/java/org/apache/doris/datasource/property/storage/AzurePropertiesTest.java
@@ -0,0 +1,194 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.datasource.property.storage;
+
+import org.apache.doris.common.UserException;
+
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+public class AzurePropertiesTest {
+
+ private Map<String, String> origProps;
+
+ // Setup method to initialize the properties map before each test
+ @BeforeEach
+ public void setup() {
+ origProps = new HashMap<>();
+ }
+
+ // Test for valid Azure configuration
+ @Test
+ public void testValidAzureConfiguration() throws UserException {
+ origProps.put("s3.endpoint",
"https://mystorageaccount.blob.core.windows.net");
+ origProps.put("s3.access_key", "myAzureAccessKey");
+ origProps.put("s3.secret_key", "myAzureSecretKey");
+ origProps.put("provider", "azure");
+
+ AzureProperties azureProperties = (AzureProperties)
StorageProperties.createPrimary(origProps);
+
+ // Verify if the properties are correctly parsed
+
Assertions.assertEquals("https://mystorageaccount.blob.core.windows.net",
azureProperties.getEndpoint());
+ Assertions.assertEquals("myAzureAccessKey",
azureProperties.getAccessKey());
+ Assertions.assertEquals("myAzureSecretKey",
azureProperties.getSecretKey());
+ Assertions.assertEquals("false", azureProperties.getUsePathStyle());
+ Assertions.assertEquals("false",
azureProperties.getForceParsingByStandardUrl());
+ Assertions.assertEquals("Azure", azureProperties.getStorageName());
+ }
+
+ // Test for missing access_key configuration, should throw an exception
+ @Test
+ public void testMissingAccessKey() {
+ origProps.put("s3.endpoint",
"https://mystorageaccount.blob.core.windows.net");
+ origProps.put("provider", "azure");
+ origProps.put("s3.secret_key", "myAzureSecretKey");
+
+ // Expect an exception due to missing access_key
+ Assertions.assertThrows(IllegalArgumentException.class, () ->
+ StorageProperties.createAll(origProps), "Property
s3.access_key is required.");
+ origProps.put("s3.access_key", "myAzureAccessKey");
+ Assertions.assertDoesNotThrow(() ->
StorageProperties.createAll(origProps));
+ }
+
+ // Test for missing provider configuration, should throw an exception
+ @Test
+ public void testMissingProvider() throws UserException {
+ origProps.put("s3.endpoint",
"https://mystorageaccount.blob.core.windows.net");
+ origProps.put("s3.access_key", "myAzureAccessKey");
+ origProps.put("s3.secret_key", "myAzureSecretKey");
+ List<StorageProperties> storagePropertiesList =
StorageProperties.createAll(origProps);
+ Assertions.assertEquals(2, storagePropertiesList.size());
+ Assertions.assertEquals(HdfsProperties.class,
storagePropertiesList.get(1).getClass());
+ Assertions.assertEquals(AzureProperties.class,
storagePropertiesList.get(0).getClass());
+ origProps.put("s3.endpoint", "https://mystorageaccount.net");
+ Assertions.assertThrows(RuntimeException.class, () ->
+ StorageProperties.createPrimary(origProps), "No supported
storage type found.");
+ // Expect an exception due to missing provider
+ origProps.put("provider", "azure");
+ Assertions.assertThrows(IllegalArgumentException.class, () ->
+ StorageProperties.createPrimary(origProps), "Endpoint
'https://mystorageaccount.net' is not valid. It should end with
'.blob.core.windows.net'.");
+ }
+
+ // Test for empty configuration, should throw an exception
+ @Test
+ public void testEmptyConfiguration() {
+ // Expect an exception due to empty configuration
+ Assertions.assertThrows(RuntimeException.class, () ->
+ StorageProperties.createPrimary(new HashMap<>()), "Empty
configuration is not allowed.");
+
+
+ }
+
+ // Test for path style when use_path_style is false
+ @Test
+ public void testPathStyleCombinations() throws Exception {
+ origProps.put("s3.endpoint",
"https://mystorageaccount.blob.core.windows.net");
+ origProps.put("s3.access_key", "a");
+ origProps.put("s3.secret_key", "b");
+ origProps.put("provider", "azure");
+
+ // By default, use_path_style is false
+ AzureProperties azureProperties = (AzureProperties)
StorageProperties.createPrimary(origProps);
+ Assertions.assertEquals("s3://mystorageaccount/mycontainer/blob.txt",
+
azureProperties.validateAndNormalizeUri("https://mystorageaccount.blob.core.windows.net/mycontainer/blob.txt"));
+
+ // Set use_path_style to true
+ origProps.put("use_path_style", "true");
+ azureProperties = (AzureProperties)
StorageProperties.createPrimary(origProps);
+ Assertions.assertEquals("s3://mycontainer/blob.txt",
+
azureProperties.validateAndNormalizeUri("https://mystorageaccount.blob.core.windows.net/mycontainer/blob.txt"));
+ }
+
+ @Test
+ public void testParsingUri() throws Exception {
+ origProps.put("s3.endpoint",
"https://mystorageaccount.blob.core.windows.net");
+ origProps.put("s3.access_key", "a");
+ origProps.put("s3.secret_key", "b");
+ origProps.put("provider", "azure");
+ origProps.put("use_path_style", "true");
+
+ AzureProperties azureProperties = (AzureProperties)
StorageProperties.createPrimary(origProps);
+ Assertions.assertEquals("s3://mycontainer/blob.txt",
+
azureProperties.validateAndNormalizeUri("https://mystorageaccount.blob.core.windows.net/mycontainer/blob.txt"));
+ Assertions.assertThrowsExactly(UserException.class, () ->
+ azureProperties.validateAndGetUri(origProps),
+ "props must contain uri");
+ origProps.put("uri",
"https://mystorageaccount.blob.core.windows.net/mycontainer/blob.txt");
+
Assertions.assertEquals("https://mystorageaccount.blob.core.windows.net/mycontainer/blob.txt",
+ azureProperties.validateAndGetUri(origProps));
+ azureProperties.setUsePathStyle("false");
+ Assertions.assertEquals("s3://mystorageaccount/mycontainer/blob.txt",
+
azureProperties.validateAndNormalizeUri("https://mystorageaccount.blob.core.windows.net/mycontainer/blob.txt"));
+
+
+ }
+
+ // Test for backend configuration properties in Azure
+ @Test
+ public void testBackendConfigProperties() throws UserException {
+ origProps.put("s3.endpoint",
"https://mystorageaccount.blob.core.windows.net");
+ origProps.put("s3.access_key", "myAzureAccessKey");
+ origProps.put("s3.secret_key", "myAzureSecretKey");
+ origProps.put("provider", "azure");
+
+ AzureProperties azureProperties = (AzureProperties)
StorageProperties.createPrimary(origProps);
+ Map<String, String> nativeProps =
azureProperties.getBackendConfigProperties();
+
+ // Verify if backend properties are set correctly
+
Assertions.assertEquals("https://mystorageaccount.blob.core.windows.net",
nativeProps.get("AWS_ENDPOINT"));
+ Assertions.assertEquals("dummy_region", nativeProps.get("AWS_REGION"));
+ Assertions.assertEquals("myAzureAccessKey",
nativeProps.get("AWS_ACCESS_KEY"));
+ Assertions.assertEquals("myAzureSecretKey",
nativeProps.get("AWS_SECRET_KEY"));
+ Assertions.assertEquals("true",
nativeProps.get("AWS_NEED_OVERRIDE_ENDPOINT"));
+ Assertions.assertEquals("azure", nativeProps.get("provider"));
+ Assertions.assertEquals("false", nativeProps.get("use_path_style"));
+ }
+
+ // Test for force_parsing_by_standard_uri being false
+ @Test
+ public void testForceParsingByStandardUriFalse() {
+ origProps.put("s3.endpoint", "https://s3.amazonaws.com");
+ origProps.put("s3.access_key", "myAWSAccessKey");
+ origProps.put("s3.secret_key", "myAWSSecretKey");
+ origProps.put("provider", "azure");
+ origProps.put("force_parsing_by_standard_uri", "false");
+
+ // Expect an exception since force_parsing_by_standard_uri cannot be
false for S3
+ Assertions.assertThrows(IllegalArgumentException.class, () ->
+ StorageProperties.createPrimary(origProps),
"force_parsing_by_standard_uri cannot be false for S3.");
+ }
+
+ // Test for empty path, should throw an exception
+ @Test
+ public void testEmptyPath() throws UserException {
+ origProps.put("s3.endpoint",
"https://mystorageaccount.blob.core.windows.net");
+ origProps.put("s3.access_key", "myAzureAccessKey");
+ origProps.put("s3.secret_key", "myAzureSecretKey");
+ origProps.put("provider", "azure");
+
+ AzureProperties azureProperties = (AzureProperties)
StorageProperties.createPrimary(origProps);
+ // Expect an exception when the path is empty
+ Assertions.assertThrows(UserException.class, () ->
+ azureProperties.validateAndNormalizeUri(""), "Path cannot be
empty.");
+ }
+}
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/datasource/property/storage/OBSPropertyTest.java
b/fe/fe-core/src/test/java/org/apache/doris/datasource/property/storage/OBSPropertyTest.java
index a33edb451a0..03914f76693 100644
---
a/fe/fe-core/src/test/java/org/apache/doris/datasource/property/storage/OBSPropertyTest.java
+++
b/fe/fe-core/src/test/java/org/apache/doris/datasource/property/storage/OBSPropertyTest.java
@@ -29,12 +29,14 @@ public class OBSPropertyTest {
private Map<String, String> origProps = new HashMap<>();
@Test
- public void testBasicCreateTest() {
+ public void testBasicCreateTest() throws UserException {
//Map<String, String> origProps = new HashMap<>();
origProps.put("obs.endpoint", "https://obs.example.com");
origProps.put("obs.access_key", "myOBSAccessKey");
origProps.put("obs.secret_key", "myOBSSecretKey");
origProps.put(StorageProperties.FS_OBS_SUPPORT, "true");
+ Assertions.assertThrows(IllegalArgumentException.class, () ->
StorageProperties.createAll(origProps), "Invalid endpoint format:
https://obs.example.com");
+
// Test creation without additional properties
origProps = new HashMap<>();
origProps.put("obs.endpoint", "https://obs.example.com");
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/datasource/property/storage/OSSPropertiesTest.java
b/fe/fe-core/src/test/java/org/apache/doris/datasource/property/storage/OSSPropertiesTest.java
index 1b67ced5720..ad0d89f1db8 100644
---
a/fe/fe-core/src/test/java/org/apache/doris/datasource/property/storage/OSSPropertiesTest.java
+++
b/fe/fe-core/src/test/java/org/apache/doris/datasource/property/storage/OSSPropertiesTest.java
@@ -46,7 +46,7 @@ public class OSSPropertiesTest {
origProps = new HashMap<>();
origProps.put("oss.endpoint", "https://oss.aliyuncs.com");
Map<String, String> finalOrigProps2 = origProps;
- Assertions.assertThrowsExactly(IllegalArgumentException.class, () ->
StorageProperties.createPrimary(finalOrigProps2));
+ Assertions.assertThrowsExactly(RuntimeException.class, () ->
StorageProperties.createPrimary(finalOrigProps2));
}
@@ -78,7 +78,7 @@ public class OSSPropertiesTest {
s3Props = ossProperties.generateBackendS3Configuration();
Assertions.assertEquals("oss-cn-beijing-internal.aliyuncs.com",
s3Props.get("AWS_ENDPOINT"));
- Assertions.assertEquals("cn-beijing-internal",
s3Props.get("AWS_REGION"));
+ Assertions.assertEquals("cn-beijing", s3Props.get("AWS_REGION"));
Assertions.assertEquals("myOSSAccessKey",
s3Props.get("AWS_ACCESS_KEY"));
Assertions.assertEquals("myOSSSecretKey",
s3Props.get("AWS_SECRET_KEY"));
Assertions.assertEquals("88", s3Props.get("AWS_MAX_CONNECTIONS"));
@@ -102,6 +102,8 @@ public class OSSPropertiesTest {
Assertions.assertEquals("myCOSAccessKey",
ossProperties.getAccessKey());
Assertions.assertEquals("myCOSSecretKey",
ossProperties.getSecretKey());
Assertions.assertEquals("oss-cn-hangzhou.aliyuncs.com",
ossProperties.getEndpoint());
+ origProps.put("oss.endpoint", "oss-cn-hangzhou-internal.aliyuncs.com");
+ Assertions.assertEquals("cn-hangzhou", ((OSSProperties)
StorageProperties.createPrimary(origProps)).getRegion());
}
@Test
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/datasource/property/storage/S3PropertiesTest.java
b/fe/fe-core/src/test/java/org/apache/doris/datasource/property/storage/S3PropertiesTest.java
index b5a8ce0d825..14a449ab91f 100644
---
a/fe/fe-core/src/test/java/org/apache/doris/datasource/property/storage/S3PropertiesTest.java
+++
b/fe/fe-core/src/test/java/org/apache/doris/datasource/property/storage/S3PropertiesTest.java
@@ -57,6 +57,34 @@ public class S3PropertiesTest {
Assertions.assertDoesNotThrow(() ->
StorageProperties.createAll(origProps));
}
+ @Test
+ public void testEndpointPattern() throws UserException {
+ /*
+ * region:
+ * us-east-2
+ * endpoint:
+ * s3.us-east-2.amazonaws.com
+ * s3.dualstack.us-east-2.amazonaws.com
+ * s3-fips.dualstack.us-east-2.amazonaws.com
+ * s3-fips.us-east-2.amazonaws.com
+ * */
+ String endpoint = "s3.us-east-2.amazonaws.com";
+ origProps.put("s3.endpoint", endpoint);
+ origProps.put("s3.access_key", "myS3AccessKey");
+ origProps.put("s3.secret_key", "myS3SecretKey");
+ S3Properties s3Properties = (S3Properties)
StorageProperties.createPrimary(origProps);
+ Assertions.assertEquals("us-east-2", s3Properties.getRegion());
+ origProps.put("s3.endpoint", "s3.dualstack.us-east-2.amazonaws.com");
+ s3Properties = (S3Properties)
StorageProperties.createPrimary(origProps);
+ Assertions.assertEquals("us-east-2", s3Properties.getRegion());
+ origProps.put("s3.endpoint",
"s3-fips.dualstack.us-east-2.amazonaws.com");
+ s3Properties = (S3Properties)
StorageProperties.createPrimary(origProps);
+ Assertions.assertEquals("us-east-2", s3Properties.getRegion());
+ origProps.put("s3.endpoint", "s3-fips.us-east-2.amazonaws.com");
+ s3Properties = (S3Properties)
StorageProperties.createPrimary(origProps);
+ Assertions.assertEquals("us-east-2", s3Properties.getRegion());
+ }
+
@Test
public void testToNativeS3Configuration() throws UserException {
origProps.put("s3.endpoint", "https://cos.example.com");
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]