Samrat002 commented on code in PR #27788: URL: https://github.com/apache/flink/pull/27788#discussion_r3023505902
########## flink-filesystems/flink-s3-fs-native/src/main/java/org/apache/flink/fs/s3native/BucketConfigProvider.java: ########## @@ -0,0 +1,177 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.fs.s3native; + +import org.apache.flink.annotation.Internal; +import org.apache.flink.annotation.VisibleForTesting; +import org.apache.flink.configuration.Configuration; +import org.apache.flink.configuration.IllegalConfigurationException; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.annotation.Nullable; + +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; + +/** + * Parses bucket-specific S3 configuration using format {@code s3.bucket.<bucket-name>.<property>}. + * + * <p>Enables per-bucket overrides for endpoints, credentials, encryption, and IAM roles. Bucket + * names containing dots are supported; properties are matched by longest suffix first. + * + * <p>Immutable and thread-safe after construction. + */ +@Internal +final class BucketConfigProvider { + + private static final Logger LOG = LoggerFactory.getLogger(BucketConfigProvider.class); + + static final String BUCKET_CONFIG_PREFIX = "s3.bucket."; + + /** + * Known bucket-level properties, sorted by descending length so that the longest match wins. + */ + private static final String[] KNOWN_PROPERTIES = + new String[] { + "assume-role.session-duration", + "assume-role.session-name", + "assume-role.external-id", + "credentials.provider", + "path-style-access", + "sse.kms-key-id", + "assume-role.arn", + "access-key", + "secret-key", + "sse.type", + "endpoint", + "region" + }; + + private final Map<String, S3BucketConfig> bucketConfigs; + + BucketConfigProvider(Configuration flinkConfig) { + this.bucketConfigs = Collections.unmodifiableMap(parseBucketConfigs(flinkConfig)); + } + + @Nullable + S3BucketConfig getBucketConfig(String bucketName) { + return bucketConfigs.get(bucketName); + } + + @VisibleForTesting + boolean hasBucketConfig(String bucketName) { + return bucketConfigs.containsKey(bucketName); + } + + @VisibleForTesting + int size() { + return bucketConfigs.size(); + } + + private static Map<String, S3BucketConfig> parseBucketConfigs(Configuration flinkConfig) { + Map<String, Map<String, String>> rawConfigs = new HashMap<>(); + + for (String key : flinkConfig.keySet()) { + if (!key.startsWith(BUCKET_CONFIG_PREFIX)) { + continue; + } + String suffix = key.substring(BUCKET_CONFIG_PREFIX.length()); + String value = flinkConfig.getString(key, null); + if (value == null) { + continue; + } + + for (String prop : KNOWN_PROPERTIES) { + if (suffix.endsWith("." + prop)) { + String bucketName = suffix.substring(0, suffix.length() - prop.length() - 1); + if (!bucketName.isEmpty()) { + rawConfigs + .computeIfAbsent(bucketName, k -> new HashMap<>()) + .put(prop, value); + } + break; + } + } + } + + Map<String, S3BucketConfig> result = new HashMap<>(); + for (Map.Entry<String, Map<String, String>> entry : rawConfigs.entrySet()) { + String bucketName = entry.getKey(); + Map<String, String> props = entry.getValue(); + + S3BucketConfig bucketConfig = buildBucketConfig(bucketName, props); + if (bucketConfig.hasAnyOverride()) { + result.put(bucketName, bucketConfig); + LOG.info( + "Registered bucket-specific configuration for bucket '{}': {}", + bucketName, + bucketConfig); + } + } + + return result; + } + + private static S3BucketConfig buildBucketConfig(String bucketName, Map<String, String> props) { + S3BucketConfig.Builder builder = S3BucketConfig.builder(bucketName); + + applyIfPresent(props, "region", builder::region); + applyIfPresent(props, "endpoint", builder::endpoint); + applyIfPresent(props, "access-key", builder::accessKey); + applyIfPresent(props, "secret-key", builder::secretKey); + applyIfPresent(props, "sse.type", builder::sseType); + applyIfPresent(props, "sse.kms-key-id", builder::sseKmsKeyId); + applyIfPresent(props, "assume-role.arn", builder::assumeRoleArn); + applyIfPresent(props, "assume-role.external-id", builder::assumeRoleExternalId); + applyIfPresent(props, "assume-role.session-name", builder::assumeRoleSessionName); + applyIfPresent(props, "credentials.provider", builder::credentialsProvider); + + String pathStyleStr = props.get("path-style-access"); + if (pathStyleStr != null) { + builder.pathStyleAccess(Boolean.parseBoolean(pathStyleStr)); + } + + String durationStr = props.get("assume-role.session-duration"); + if (durationStr != null) { + try { + builder.assumeRoleSessionDurationSeconds(Integer.parseInt(durationStr)); Review Comment: We're limited here because bucket-level properties use dynamic keys (s3.bucket.<name>.<property>), so they're read as raw strings rather than through typed ConfigOption parsing. Using integer seconds is consistent with: 1. The global s3.assume-role.session-duration which is defined as ConfigOption<Integer> (default 3600) 2. The AWS STS AssumeRole API which takes durationSeconds as an integer 3. The convention in other Flink S3 implementations We could add Duration parsing (via TimeUtils.parseDuration()) as a follow-up for both global and bucket-level settings together. Changing only the bucket-level format would create an inconsistency where the global setting takes 3600 but the bucket-level takes 1h for the same semantics. Happy to file a JIRA for this if you think it's worth pursuing. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
