[ https://issues.apache.org/jira/browse/HADOOP-18708?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17745450#comment-17745450 ]
ASF GitHub Bot commented on HADOOP-18708: ----------------------------------------- mehakmeet commented on code in PR #5767: URL: https://github.com/apache/hadoop/pull/5767#discussion_r1270255116 ########## hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/GetS3RegionOperation.java: ########## @@ -0,0 +1,140 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a.impl; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import software.amazon.awssdk.regions.Region; +import software.amazon.awssdk.services.s3.S3Client; +import software.amazon.awssdk.services.s3.model.HeadBucketRequest; +import software.amazon.awssdk.services.s3.model.HeadBucketResponse; +import software.amazon.awssdk.services.s3.model.S3Exception; + +import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.fs.s3a.AWSCredentialProviderList; +import org.apache.hadoop.fs.s3a.UnknownStoreException; +import org.apache.hadoop.fs.store.LogExactlyOnce; +import org.apache.hadoop.util.DurationInfo; + +import static org.apache.hadoop.fs.s3a.Constants.BUCKET_REGION_HEADER; +import static org.apache.hadoop.fs.s3a.Constants.FS_S3A_BUCKET_PREFIX; +import static org.apache.hadoop.fs.s3a.Statistic.STORE_REGION_PROBE; +import static org.apache.hadoop.fs.s3a.impl.InternalConstants.SC_301_MOVED_PERMANENTLY; +import static org.apache.hadoop.fs.s3a.impl.InternalConstants.SC_404_NOT_FOUND; +import static org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding.trackDuration; + +/** + * Operation to get the S3 region for a bucket. + * If a region is not specified in {@link org.apache.hadoop.fs.s3a.Constants#AWS_REGION}, this + * operation will warn and then probe S3 to get the region. + */ +public class GetS3RegionOperation extends ExecutingStoreOperation<Region> { + + private final AWSCredentialProviderList credentials; + + private final String region; + + private final String bucket; + + private final StoreContext context; + + private final static Map<String, Region> BUCKET_REGIONS = new HashMap<>(); Review Comment: Interesting, so we aren't caching in the SDK anymore, is that correct? ########## hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/GetS3RegionOperation.java: ########## @@ -0,0 +1,140 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a.impl; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import software.amazon.awssdk.regions.Region; +import software.amazon.awssdk.services.s3.S3Client; +import software.amazon.awssdk.services.s3.model.HeadBucketRequest; +import software.amazon.awssdk.services.s3.model.HeadBucketResponse; +import software.amazon.awssdk.services.s3.model.S3Exception; + +import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.fs.s3a.AWSCredentialProviderList; +import org.apache.hadoop.fs.s3a.UnknownStoreException; +import org.apache.hadoop.fs.store.LogExactlyOnce; +import org.apache.hadoop.util.DurationInfo; + +import static org.apache.hadoop.fs.s3a.Constants.BUCKET_REGION_HEADER; +import static org.apache.hadoop.fs.s3a.Constants.FS_S3A_BUCKET_PREFIX; +import static org.apache.hadoop.fs.s3a.Statistic.STORE_REGION_PROBE; +import static org.apache.hadoop.fs.s3a.impl.InternalConstants.SC_301_MOVED_PERMANENTLY; +import static org.apache.hadoop.fs.s3a.impl.InternalConstants.SC_404_NOT_FOUND; +import static org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding.trackDuration; + +/** + * Operation to get the S3 region for a bucket. + * If a region is not specified in {@link org.apache.hadoop.fs.s3a.Constants#AWS_REGION}, this + * operation will warn and then probe S3 to get the region. + */ +public class GetS3RegionOperation extends ExecutingStoreOperation<Region> { + + private final AWSCredentialProviderList credentials; + + private final String region; + + private final String bucket; + + private final StoreContext context; + + private final static Map<String, Region> BUCKET_REGIONS = new HashMap<>(); + + private static final Logger LOG = LoggerFactory.getLogger( + GetS3RegionOperation.class); + + /** Exactly once log to warn about setting the region in config to avoid probe. */ + private static final LogExactlyOnce SET_REGION_WARNING = new LogExactlyOnce(LOG); + + public GetS3RegionOperation( + final StoreContext context, + String region, + String bucket, + AWSCredentialProviderList credentials) { + super(context); + this.context = context; + this.credentials = credentials; + this.region = region; + this.bucket = bucket; + } + + + @Override + public Region execute() throws IOException { + + if (!StringUtils.isBlank(region)) { + return Region.of(region); + } + + Region cachedRegion = BUCKET_REGIONS.get(bucket); + + if (cachedRegion != null) { + LOG.debug("Got region {} for bucket {} from cache", cachedRegion, bucket); + return cachedRegion; + } + + try (DurationInfo ignore = + new DurationInfo(LOG, false, "Get S3 region")) { + + SET_REGION_WARNING.warn( + "Getting region for bucket {} from S3, this will slow down FS initialisation. " + + "To avoid this, set the region using property {}", bucket, + FS_S3A_BUCKET_PREFIX + bucket + ".endpoint.region"); + + // build a s3 client with region eu-west-1 that can be used to get the region of the + // bucket. Using eu-west-1, as headBucket() doesn't work with us-east-1. This is because + // us-east-1 uses the endpoint s3.amazonaws.com, which resolves bucket.s3.amazonaws.com + // to the actual region the bucket is in. As the request is signed with us-east-1 and + // not the bucket's region, it fails. + S3Client getRegionS3Client = + S3Client.builder().region(Region.EU_WEST_1).credentialsProvider(credentials) + .build(); + + HeadBucketResponse headBucketResponse = trackDuration(context.getInstrumentation(), + STORE_REGION_PROBE.getSymbol(), + () -> getRegionS3Client.headBucket(HeadBucketRequest.builder().bucket(bucket).build())); + + Region bucketRegion = Region.of( + headBucketResponse.sdkHttpResponse().headers().get(BUCKET_REGION_HEADER).get(0)); + BUCKET_REGIONS.put(bucket, bucketRegion); + + return bucketRegion; + } catch (S3Exception exception) { + if (exception.statusCode() == SC_301_MOVED_PERMANENTLY) { Review Comment: add a comment here explaining what we are doing here in case of 301 ########## hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java: ########## @@ -47,17 +39,16 @@ import org.apache.hadoop.fs.s3a.audit.AuditFailureException; import org.apache.hadoop.fs.s3a.audit.AuditIntegration; import org.apache.hadoop.fs.s3a.auth.delegation.EncryptionSecrets; -import org.apache.hadoop.fs.s3a.auth.IAMInstanceCredentialsProvider; -import org.apache.hadoop.fs.s3a.impl.NetworkBinding; -import org.apache.hadoop.fs.s3a.impl.V2Migration; +import org.apache.hadoop.fs.s3a.impl.MultiObjectDeleteException; import org.apache.hadoop.fs.s3native.S3xLoginHelper; import org.apache.hadoop.net.ConnectTimeoutException; import org.apache.hadoop.security.ProviderUtils; -import org.apache.hadoop.util.VersionInfo; +import org.apache.hadoop.util.Preconditions; import org.apache.hadoop.util.Lists; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import software.amazon.encryption.s3.S3EncryptionClientException; Review Comment: move to the top with the rest of the same imports ########## hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3AHugeFilesNoMultipart.java: ########## @@ -103,9 +103,9 @@ public void test_030_postCreationAssertions() throws Throwable { conf.setInt(MULTIPART_SIZE, MULTIPART_MIN_SIZE); S3ATestUtils.disableFilesystemCaching(conf); - try (FileSystem fs2 = FileSystem.get(fs.getUri(), conf)) { - intercept(UnsupportedRequestException.class, () -> - fs2.rename(hugefile, hugefileRenamed)); - } +// try (FileSystem fs2 = FileSystem.get(fs.getUri(), conf)) { Review Comment: this needs to be changed I assume. ########## hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/GetS3RegionOperation.java: ########## @@ -0,0 +1,140 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a.impl; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import software.amazon.awssdk.regions.Region; +import software.amazon.awssdk.services.s3.S3Client; +import software.amazon.awssdk.services.s3.model.HeadBucketRequest; +import software.amazon.awssdk.services.s3.model.HeadBucketResponse; +import software.amazon.awssdk.services.s3.model.S3Exception; + +import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.fs.s3a.AWSCredentialProviderList; +import org.apache.hadoop.fs.s3a.UnknownStoreException; +import org.apache.hadoop.fs.store.LogExactlyOnce; +import org.apache.hadoop.util.DurationInfo; + +import static org.apache.hadoop.fs.s3a.Constants.BUCKET_REGION_HEADER; +import static org.apache.hadoop.fs.s3a.Constants.FS_S3A_BUCKET_PREFIX; +import static org.apache.hadoop.fs.s3a.Statistic.STORE_REGION_PROBE; +import static org.apache.hadoop.fs.s3a.impl.InternalConstants.SC_301_MOVED_PERMANENTLY; +import static org.apache.hadoop.fs.s3a.impl.InternalConstants.SC_404_NOT_FOUND; +import static org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding.trackDuration; + +/** + * Operation to get the S3 region for a bucket. + * If a region is not specified in {@link org.apache.hadoop.fs.s3a.Constants#AWS_REGION}, this + * operation will warn and then probe S3 to get the region. + */ +public class GetS3RegionOperation extends ExecutingStoreOperation<Region> { + + private final AWSCredentialProviderList credentials; + + private final String region; + + private final String bucket; + + private final StoreContext context; + + private final static Map<String, Region> BUCKET_REGIONS = new HashMap<>(); + + private static final Logger LOG = LoggerFactory.getLogger( + GetS3RegionOperation.class); + + /** Exactly once log to warn about setting the region in config to avoid probe. */ + private static final LogExactlyOnce SET_REGION_WARNING = new LogExactlyOnce(LOG); + + public GetS3RegionOperation( + final StoreContext context, + String region, + String bucket, + AWSCredentialProviderList credentials) { + super(context); + this.context = context; + this.credentials = credentials; + this.region = region; + this.bucket = bucket; + } + + + @Override + public Region execute() throws IOException { + + if (!StringUtils.isBlank(region)) { + return Region.of(region); + } + + Region cachedRegion = BUCKET_REGIONS.get(bucket); + + if (cachedRegion != null) { + LOG.debug("Got region {} for bucket {} from cache", cachedRegion, bucket); + return cachedRegion; + } + + try (DurationInfo ignore = + new DurationInfo(LOG, false, "Get S3 region")) { + + SET_REGION_WARNING.warn( + "Getting region for bucket {} from S3, this will slow down FS initialisation. " + + "To avoid this, set the region using property {}", bucket, + FS_S3A_BUCKET_PREFIX + bucket + ".endpoint.region"); + + // build a s3 client with region eu-west-1 that can be used to get the region of the + // bucket. Using eu-west-1, as headBucket() doesn't work with us-east-1. This is because + // us-east-1 uses the endpoint s3.amazonaws.com, which resolves bucket.s3.amazonaws.com + // to the actual region the bucket is in. As the request is signed with us-east-1 and + // not the bucket's region, it fails. + S3Client getRegionS3Client = + S3Client.builder().region(Region.EU_WEST_1).credentialsProvider(credentials) + .build(); + + HeadBucketResponse headBucketResponse = trackDuration(context.getInstrumentation(), + STORE_REGION_PROBE.getSymbol(), + () -> getRegionS3Client.headBucket(HeadBucketRequest.builder().bucket(bucket).build())); + + Region bucketRegion = Region.of( + headBucketResponse.sdkHttpResponse().headers().get(BUCKET_REGION_HEADER).get(0)); + BUCKET_REGIONS.put(bucket, bucketRegion); + + return bucketRegion; + } catch (S3Exception exception) { + if (exception.statusCode() == SC_301_MOVED_PERMANENTLY) { + Region bucketRegion = Region.of( + exception.awsErrorDetails().sdkHttpResponse().headers().get(BUCKET_REGION_HEADER) + .get(0)); + BUCKET_REGIONS.put(bucket, bucketRegion); + Review Comment: Log an error/warn about the exception caught before returning the region. > AWS SDK V2 - Implement CSE > -------------------------- > > Key: HADOOP-18708 > URL: https://issues.apache.org/jira/browse/HADOOP-18708 > Project: Hadoop Common > Issue Type: Sub-task > Components: fs/s3 > Affects Versions: 3.4.0 > Reporter: Ahmar Suhail > Priority: Major > Labels: pull-request-available > > S3 Encryption client for SDK V2 is now available, so add client side > encryption back in. -- This message was sent by Atlassian Jira (v8.20.10#820010) --------------------------------------------------------------------- To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-issues-h...@hadoop.apache.org