github-actions[bot] commented on code in PR #63252: URL: https://github.com/apache/doris/pull/63252#discussion_r3245372588
########## fe/fe-filesystem/fe-filesystem-s3/src/main/java/org/apache/doris/filesystem/s3/S3CredentialsProviderFactory.java: ########## @@ -0,0 +1,198 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.filesystem.s3; + +import org.apache.commons.lang3.StringUtils; +import software.amazon.awssdk.auth.credentials.AnonymousCredentialsProvider; +import software.amazon.awssdk.auth.credentials.AwsBasicCredentials; +import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider; +import software.amazon.awssdk.auth.credentials.AwsCredentialsProviderChain; +import software.amazon.awssdk.auth.credentials.AwsSessionCredentials; +import software.amazon.awssdk.auth.credentials.ContainerCredentialsProvider; +import software.amazon.awssdk.auth.credentials.EnvironmentVariableCredentialsProvider; +import software.amazon.awssdk.auth.credentials.InstanceProfileCredentialsProvider; +import software.amazon.awssdk.auth.credentials.StaticCredentialsProvider; +import software.amazon.awssdk.auth.credentials.SystemPropertyCredentialsProvider; +import software.amazon.awssdk.auth.credentials.WebIdentityTokenFileCredentialsProvider; +import software.amazon.awssdk.regions.Region; +import software.amazon.awssdk.services.sts.StsClient; +import software.amazon.awssdk.services.sts.auth.StsAssumeRoleCredentialsProvider; + +import java.util.ArrayList; +import java.util.List; +import java.util.UUID; +import java.util.function.BiFunction; + +/** + * Credential provider factory for S3 filesystem AWS SDK v2 clients. + */ +public final class S3CredentialsProviderFactory { + + private S3CredentialsProviderFactory() { + } + + public static AwsCredentialsProvider createClientProvider(S3FileSystemProperties properties) { + return createClientProvider(properties, S3CredentialsProviderFactory::buildStsClient); + } + + public static AwsCredentialsProvider createClientProvider(S3FileSystemProperties properties, + BiFunction<AwsCredentialsProvider, String, StsClient> stsClientFactory) { + if (properties.hasAssumeRole()) { + return createAssumeRoleProvider(properties, stsClientFactory); + } + return createBaseProvider(properties, true); + } + + public static AwsCredentialsProvider createStsSourceProvider(S3FileSystemProperties properties) { + return createBaseProvider(properties, false); + } + + public static AwsCredentialsProvider create( + S3CredentialsProviderType type, boolean includeAnonymousInDefault) { + switch (type) { + case ENV: + return EnvironmentVariableCredentialsProvider.create(); + case SYSTEM_PROPERTIES: + return SystemPropertyCredentialsProvider.create(); + case WEB_IDENTITY: + return WebIdentityTokenFileCredentialsProvider.create(); + case CONTAINER: + return ContainerCredentialsProvider.create(); + case INSTANCE_PROFILE: + return InstanceProfileCredentialsProvider.create(); + case ANONYMOUS: + return AnonymousCredentialsProvider.create(); + case DEFAULT: + return createDefault(includeAnonymousInDefault); + default: + throw new UnsupportedOperationException( + "AWS SDK V2 does not support credentials provider mode: " + type); + } + } + + public static String hadoopClassName( + S3CredentialsProviderType type, boolean includeAnonymousInDefault) { + switch (type) { + case ENV: + return EnvironmentVariableCredentialsProvider.class.getName(); + case SYSTEM_PROPERTIES: + return SystemPropertyCredentialsProvider.class.getName(); + case WEB_IDENTITY: + return WebIdentityTokenFileCredentialsProvider.class.getName(); + case CONTAINER: + return ContainerCredentialsProvider.class.getName(); + case INSTANCE_PROFILE: + return InstanceProfileCredentialsProvider.class.getName(); + case ANONYMOUS: + return AnonymousCredentialsProvider.class.getName(); + case DEFAULT: + List<String> providers = new ArrayList<>(); + providers.add(EnvironmentVariableCredentialsProvider.class.getName()); + providers.add(SystemPropertyCredentialsProvider.class.getName()); + providers.add(InstanceProfileCredentialsProvider.class.getName()); + if (isWebIdentityConfigured()) { + providers.add(WebIdentityTokenFileCredentialsProvider.class.getName()); + } + if (isContainerCredentialsConfigured()) { + providers.add(ContainerCredentialsProvider.class.getName()); + } + if (includeAnonymousInDefault) { + providers.add(AnonymousCredentialsProvider.class.getName()); + } + return String.join(",", providers); + default: + throw new UnsupportedOperationException( + "AWS SDK V2 does not support credentials provider mode: " + type); + } + } + + private static AwsCredentialsProvider createAssumeRoleProvider(S3FileSystemProperties properties, + BiFunction<AwsCredentialsProvider, String, StsClient> stsClientFactory) { + StsClient stsClient = stsClientFactory.apply(createStsSourceProvider(properties), properties.getRegion()); + return StsAssumeRoleCredentialsProvider.builder() + .stsClient(stsClient) + .refreshRequest(builder -> { + builder.roleArn(properties.getRoleArn()) + .roleSessionName("doris_" + UUID.randomUUID().toString().replace("-", "")); + String externalId = properties.getExternalId(); + if (StringUtils.isNotBlank(externalId)) { + builder.externalId(externalId); + } + }).build(); + } + + private static StsClient buildStsClient(AwsCredentialsProvider credentialsProvider, String region) { + return StsClient.builder() + .credentialsProvider(credentialsProvider) + .region(Region.of(region)) + .build(); + } + + private static AwsCredentialsProvider createBaseProvider(S3FileSystemProperties properties, + boolean includeAnonymousInDefault) { + AwsCredentialsProvider staticProvider = createStaticProvider( + properties.getAccessKey(), + properties.getSecretKey(), + properties.getSessionToken()); + if (staticProvider != null) { + return staticProvider; + } + return create(properties.getCredentialsProviderType(), includeAnonymousInDefault); + } + + private static AwsCredentialsProvider createStaticProvider(String accessKey, String secretKey, + String sessionToken) { + if (StringUtils.isBlank(accessKey) || StringUtils.isBlank(secretKey)) { + return null; + } + if (StringUtils.isNotBlank(sessionToken)) { + return StaticCredentialsProvider.create( + AwsSessionCredentials.create(accessKey, secretKey, sessionToken)); + } + return StaticCredentialsProvider.create(AwsBasicCredentials.create(accessKey, secretKey)); + } + + private static AwsCredentialsProvider createDefault(boolean includeAnonymous) { Review Comment: This changes the meaning of the default credentials mode. The old S3ObjStorage fallback used `DefaultCredentialsProvider.create()` before anonymous fallback, so AWS profile credentials, process credentials, and any other providers in the SDK's default chain worked. This hand-built chain omits those providers, so an existing deployment that relies on `~/.aws/credentials` / `AWS_PROFILE` and does not set AK/SK will now fail to resolve credentials. Please keep the SDK default provider in the DEFAULT path (and only append anonymous when requested) instead of replacing it with a partial chain. ########## fe/fe-filesystem/fe-filesystem-s3/src/main/java/org/apache/doris/filesystem/s3/S3FileSystemProperties.java: ########## @@ -0,0 +1,398 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.filesystem.s3; + +import org.apache.doris.filesystem.FileSystemType; +import org.apache.doris.filesystem.properties.BackendStorageKind; +import org.apache.doris.filesystem.properties.BackendStorageProperties; +import org.apache.doris.filesystem.properties.FileSystemProperties; +import org.apache.doris.filesystem.properties.HadoopStorageProperties; +import org.apache.doris.filesystem.properties.S3CompatibleFileSystemProperties; +import org.apache.doris.filesystem.properties.StorageKind; +import org.apache.doris.foundation.property.ConnectorPropertiesUtils; +import org.apache.doris.foundation.property.ConnectorProperty; +import org.apache.doris.foundation.property.ParamRules; + +import lombok.Getter; +import org.apache.commons.lang3.StringUtils; + +import java.lang.reflect.Field; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import java.util.Optional; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * Provider-owned typed properties for AWS S3 and S3-compatible object storage. + * Binding uses {@link ConnectorProperty} aliases so legacy key names can continue + * to work while callers migrate to canonical s3.* names. + */ +public final class S3FileSystemProperties + implements FileSystemProperties, BackendStorageProperties, HadoopStorageProperties, + S3CompatibleFileSystemProperties { + + public static final String ENDPOINT = "s3.endpoint"; + public static final String REGION = "s3.region"; + public static final String ACCESS_KEY = "s3.access_key"; + public static final String SECRET_KEY = "s3.secret_key"; + public static final String SESSION_TOKEN = "s3.session_token"; + public static final String ROLE_ARN = "s3.role_arn"; + public static final String EXTERNAL_ID = "s3.external_id"; + public static final String BUCKET = "s3.bucket"; + public static final String ROOT_PATH = "s3.root.path"; + public static final String MAX_CONNECTIONS = "s3.connection.maximum"; + public static final String REQUEST_TIMEOUT_MS = "s3.connection.request.timeout"; + public static final String CONNECTION_TIMEOUT_MS = "s3.connection.timeout"; + public static final String USE_PATH_STYLE = "use_path_style"; + public static final String CREDENTIALS_PROVIDER_TYPE = "s3.credentials_provider_type"; + + public static final String DEFAULT_MAX_CONNECTIONS = "50"; + public static final String DEFAULT_REQUEST_TIMEOUT_MS = "3000"; + public static final String DEFAULT_CONNECTION_TIMEOUT_MS = "1000"; + public static final String DEFAULT_CREDENTIALS_PROVIDER_TYPE = "DEFAULT"; + + private static final Pattern[] ENDPOINT_PATTERNS = new Pattern[] { + Pattern.compile( + "^(?:https?://)?(?:" + + "s3(?:[-.]fips)?(?:[-.]dualstack)?[-.]([a-z0-9-]+)|" + + "s3express-control\\.([a-z0-9-]+)|" + + "s3express-[a-z0-9-]+\\.([a-z0-9-]+)" + + ")\\.amazonaws\\.com(?:/.*)?$", + Pattern.CASE_INSENSITIVE), + Pattern.compile( + "^(?:https?://)?glue(?:-fips)?\\.([a-z0-9-]+)\\.(amazonaws\\.com(?:\\.cn)?|api\\.aws)$", + Pattern.CASE_INSENSITIVE) + }; + + @Getter + @ConnectorProperty(names = {ENDPOINT, "AWS_ENDPOINT", "endpoint", "ENDPOINT", "aws.endpoint", + "glue.endpoint", "aws.glue.endpoint"}, + required = false, + description = "The endpoint of S3.") + private String endpoint = ""; + + @Getter + @ConnectorProperty(names = {REGION, "AWS_REGION", "region", "REGION", "aws.region", "glue.region", + "aws.glue.region", "iceberg.rest.signing-region", "rest.signing-region", "client.region"}, + required = false, + isRegionField = true, + description = "The region of S3.") + private String region = ""; + + @Getter + @ConnectorProperty(names = {ACCESS_KEY, "AWS_ACCESS_KEY", "access_key", "ACCESS_KEY", + "glue.access_key", "aws.glue.access-key", + "client.credentials-provider.glue.access_key", "iceberg.rest.access-key-id", + "s3.access-key-id"}, + required = false, + sensitive = true, + description = "The access key of S3.") + private String accessKey = ""; + + @Getter + @ConnectorProperty(names = {SECRET_KEY, "AWS_SECRET_KEY", "secret_key", "SECRET_KEY", + "glue.secret_key", "aws.glue.secret-key", + "client.credentials-provider.glue.secret_key", "iceberg.rest.secret-access-key", + "s3.secret-access-key"}, + required = false, + sensitive = true, + description = "The secret key of S3.") + private String secretKey = ""; + + @Getter + @ConnectorProperty(names = {SESSION_TOKEN, "AWS_TOKEN", "session_token", + "s3.session-token", "iceberg.rest.session-token"}, + required = false, + description = "The session token of S3.") + private String sessionToken = ""; + + @Getter + @ConnectorProperty(names = {ROLE_ARN, "AWS_ROLE_ARN", "glue.role_arn"}, + required = false, + description = "The IAM role ARN for AssumeRole-based access.") + private String roleArn = ""; + + @Getter + @ConnectorProperty(names = {EXTERNAL_ID, "AWS_EXTERNAL_ID", "glue.external_id"}, + required = false, + description = "The external ID for AssumeRole trust policy.") + private String externalId = ""; + + @Getter + @ConnectorProperty(names = {BUCKET, "AWS_BUCKET"}, + required = false, + description = "The default bucket name.") + private String bucket = ""; + + @Getter + @ConnectorProperty(names = {ROOT_PATH, "AWS_ROOT_PATH"}, + required = false, + description = "The root path prefix inside the bucket.") + private String rootPath = ""; + + @Getter + @ConnectorProperty(names = {MAX_CONNECTIONS, "AWS_MAX_CONNECTIONS"}, + required = false, + description = "The maximum number of connections to S3.") + private String maxConnections = DEFAULT_MAX_CONNECTIONS; + + @Getter + @ConnectorProperty(names = {REQUEST_TIMEOUT_MS, "AWS_REQUEST_TIMEOUT_MS"}, + required = false, + description = "The request timeout of S3 in milliseconds.") + private String requestTimeoutMs = DEFAULT_REQUEST_TIMEOUT_MS; + + @Getter + @ConnectorProperty(names = {CONNECTION_TIMEOUT_MS, "AWS_CONNECTION_TIMEOUT_MS"}, + required = false, + description = "The connection timeout of S3 in milliseconds.") + private String connectionTimeoutMs = DEFAULT_CONNECTION_TIMEOUT_MS; + + @Getter + @ConnectorProperty(names = {USE_PATH_STYLE, "s3.path-style-access"}, + required = false, + description = "Whether to use path-style bucket addressing.") + private String usePathStyle = "false"; + + @ConnectorProperty(names = {CREDENTIALS_PROVIDER_TYPE, "AWS_CREDENTIALS_PROVIDER_TYPE", + "glue.credentials_provider_type", "iceberg.rest.credentials_provider_type"}, + required = false, + description = "The credentials provider type.") + private String credentialsProviderType = DEFAULT_CREDENTIALS_PROVIDER_TYPE; + + private final Map<String, String> rawProperties; + private final Map<String, String> matchedProperties; + + private S3FileSystemProperties(Map<String, String> rawProperties) { + this.rawProperties = Collections.unmodifiableMap(new HashMap<>(rawProperties)); + this.matchedProperties = Collections.unmodifiableMap(collectMatchedProperties(rawProperties)); + ConnectorPropertiesUtils.bindConnectorProperties(this, rawProperties); + normalizeForLegacyS3Compatibility(); + } + + /** Binds and validates raw properties. */ + public static S3FileSystemProperties of(Map<String, String> properties) { + S3FileSystemProperties props = new S3FileSystemProperties(properties); + props.validate(); + return props; + } + + private void validate() { + new ParamRules() + .requireTogether(new String[] {accessKey, secretKey}, + "s3.access_key and s3.secret_key must be set together") + .requireAllIfPresent(sessionToken, new String[] {accessKey, secretKey}, + "s3.session_token requires s3.access_key and s3.secret_key") + .requireAllIfPresent(externalId, new String[] {roleArn}, + "s3.external_id must be used together with s3.role_arn") + .check(this::hasUnsupportedCredentialsProviderType, + "Unsupported s3.credentials_provider_type: " + credentialsProviderType) + .check(() -> StringUtils.isBlank(endpoint) && StringUtils.isBlank(region), + "Either s3.endpoint or s3.region must be set") + .check(() -> StringUtils.isBlank(region), Review Comment: This validation rejects existing custom S3-compatible configurations that set only an endpoint. Before this migration, `S3ObjStorage.buildClient()` explicitly allowed a missing region, logged a warning, and used `us-east-1` as the SigV4 placeholder when an endpoint override was present; the removed `buildClient_missingRegionLogsWarnAndFallsBack` test covered that compatibility path. Now `new S3ObjStorage(Map.of("AWS_ENDPOINT", "https://minio.local"))` fails during binding before the client can be built. Since the PR says there is no behavior change, please preserve the previous endpoint-only fallback for the runtime filesystem path or document and gate this as an intentional incompatible change. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
