[ 
https://issues.apache.org/jira/browse/HADOOP-18708?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17745450#comment-17745450
 ] 

ASF GitHub Bot commented on HADOOP-18708:
-----------------------------------------

mehakmeet commented on code in PR #5767:
URL: https://github.com/apache/hadoop/pull/5767#discussion_r1270255116


##########
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/GetS3RegionOperation.java:
##########
@@ -0,0 +1,140 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.impl;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import software.amazon.awssdk.regions.Region;
+import software.amazon.awssdk.services.s3.S3Client;
+import software.amazon.awssdk.services.s3.model.HeadBucketRequest;
+import software.amazon.awssdk.services.s3.model.HeadBucketResponse;
+import software.amazon.awssdk.services.s3.model.S3Exception;
+
+import org.apache.commons.lang3.StringUtils;
+import org.apache.hadoop.fs.s3a.AWSCredentialProviderList;
+import org.apache.hadoop.fs.s3a.UnknownStoreException;
+import org.apache.hadoop.fs.store.LogExactlyOnce;
+import org.apache.hadoop.util.DurationInfo;
+
+import static org.apache.hadoop.fs.s3a.Constants.BUCKET_REGION_HEADER;
+import static org.apache.hadoop.fs.s3a.Constants.FS_S3A_BUCKET_PREFIX;
+import static org.apache.hadoop.fs.s3a.Statistic.STORE_REGION_PROBE;
+import static 
org.apache.hadoop.fs.s3a.impl.InternalConstants.SC_301_MOVED_PERMANENTLY;
+import static org.apache.hadoop.fs.s3a.impl.InternalConstants.SC_404_NOT_FOUND;
+import static 
org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding.trackDuration;
+
+/**
+ * Operation to get the S3 region for a bucket.
+ * If a region is not specified in {@link 
org.apache.hadoop.fs.s3a.Constants#AWS_REGION}, this
+ * operation will warn and then probe S3 to get the region.
+ */
+public class GetS3RegionOperation extends ExecutingStoreOperation<Region> {
+
+  private final AWSCredentialProviderList credentials;
+
+  private final String region;
+
+  private final String bucket;
+
+  private final StoreContext context;
+
+  private final static Map<String, Region> BUCKET_REGIONS = new HashMap<>();

Review Comment:
   Interesting, so we aren't caching in the SDK anymore, is that correct?



##########
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/GetS3RegionOperation.java:
##########
@@ -0,0 +1,140 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.impl;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import software.amazon.awssdk.regions.Region;
+import software.amazon.awssdk.services.s3.S3Client;
+import software.amazon.awssdk.services.s3.model.HeadBucketRequest;
+import software.amazon.awssdk.services.s3.model.HeadBucketResponse;
+import software.amazon.awssdk.services.s3.model.S3Exception;
+
+import org.apache.commons.lang3.StringUtils;
+import org.apache.hadoop.fs.s3a.AWSCredentialProviderList;
+import org.apache.hadoop.fs.s3a.UnknownStoreException;
+import org.apache.hadoop.fs.store.LogExactlyOnce;
+import org.apache.hadoop.util.DurationInfo;
+
+import static org.apache.hadoop.fs.s3a.Constants.BUCKET_REGION_HEADER;
+import static org.apache.hadoop.fs.s3a.Constants.FS_S3A_BUCKET_PREFIX;
+import static org.apache.hadoop.fs.s3a.Statistic.STORE_REGION_PROBE;
+import static 
org.apache.hadoop.fs.s3a.impl.InternalConstants.SC_301_MOVED_PERMANENTLY;
+import static org.apache.hadoop.fs.s3a.impl.InternalConstants.SC_404_NOT_FOUND;
+import static 
org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding.trackDuration;
+
+/**
+ * Operation to get the S3 region for a bucket.
+ * If a region is not specified in {@link 
org.apache.hadoop.fs.s3a.Constants#AWS_REGION}, this
+ * operation will warn and then probe S3 to get the region.
+ */
+public class GetS3RegionOperation extends ExecutingStoreOperation<Region> {
+
+  private final AWSCredentialProviderList credentials;
+
+  private final String region;
+
+  private final String bucket;
+
+  private final StoreContext context;
+
+  private final static Map<String, Region> BUCKET_REGIONS = new HashMap<>();
+
+  private static final Logger LOG = LoggerFactory.getLogger(
+      GetS3RegionOperation.class);
+
+  /** Exactly once log to warn about setting the region in config to avoid 
probe. */
+  private static final LogExactlyOnce SET_REGION_WARNING = new 
LogExactlyOnce(LOG);
+
+  public GetS3RegionOperation(
+      final StoreContext context,
+      String region,
+      String bucket,
+      AWSCredentialProviderList credentials) {
+      super(context);
+      this.context = context;
+      this.credentials = credentials;
+      this.region = region;
+      this.bucket = bucket;
+  }
+
+
+  @Override
+  public Region execute() throws IOException {
+    
+    if (!StringUtils.isBlank(region)) {
+      return Region.of(region);
+    }
+
+    Region cachedRegion = BUCKET_REGIONS.get(bucket);
+
+    if (cachedRegion != null) {
+      LOG.debug("Got region {} for bucket {} from cache", cachedRegion, 
bucket);
+      return cachedRegion;
+    }
+
+    try (DurationInfo ignore =
+        new DurationInfo(LOG, false, "Get S3 region")) {
+
+      SET_REGION_WARNING.warn(
+          "Getting region for bucket {} from S3, this will slow down FS 
initialisation. "
+              + "To avoid this, set the region using property {}", bucket,
+          FS_S3A_BUCKET_PREFIX + bucket + ".endpoint.region");
+
+      // build a s3 client with region eu-west-1 that can be used to get the 
region of the
+      // bucket. Using eu-west-1, as headBucket() doesn't work with us-east-1. 
This is because
+      // us-east-1 uses the endpoint s3.amazonaws.com, which resolves 
bucket.s3.amazonaws.com
+      // to the actual region the bucket is in. As the request is signed with 
us-east-1 and
+      // not the bucket's region, it fails.
+      S3Client getRegionS3Client =
+          
S3Client.builder().region(Region.EU_WEST_1).credentialsProvider(credentials)
+              .build();
+
+      HeadBucketResponse headBucketResponse = 
trackDuration(context.getInstrumentation(),
+          STORE_REGION_PROBE.getSymbol(),
+          () -> 
getRegionS3Client.headBucket(HeadBucketRequest.builder().bucket(bucket).build()));
+
+      Region bucketRegion = Region.of(
+          
headBucketResponse.sdkHttpResponse().headers().get(BUCKET_REGION_HEADER).get(0));
+      BUCKET_REGIONS.put(bucket, bucketRegion);
+
+      return bucketRegion;
+    } catch (S3Exception exception) {
+      if (exception.statusCode() == SC_301_MOVED_PERMANENTLY) {

Review Comment:
   add a comment here explaining what we are doing here in case of 301



##########
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java:
##########
@@ -47,17 +39,16 @@
 import org.apache.hadoop.fs.s3a.audit.AuditFailureException;
 import org.apache.hadoop.fs.s3a.audit.AuditIntegration;
 import org.apache.hadoop.fs.s3a.auth.delegation.EncryptionSecrets;
-import org.apache.hadoop.fs.s3a.auth.IAMInstanceCredentialsProvider;
-import org.apache.hadoop.fs.s3a.impl.NetworkBinding;
-import org.apache.hadoop.fs.s3a.impl.V2Migration;
+import org.apache.hadoop.fs.s3a.impl.MultiObjectDeleteException;
 import org.apache.hadoop.fs.s3native.S3xLoginHelper;
 import org.apache.hadoop.net.ConnectTimeoutException;
 import org.apache.hadoop.security.ProviderUtils;
-import org.apache.hadoop.util.VersionInfo;
+import org.apache.hadoop.util.Preconditions;
 
 import org.apache.hadoop.util.Lists;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
+import software.amazon.encryption.s3.S3EncryptionClientException;

Review Comment:
   move to the top with the rest of the same imports



##########
hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3AHugeFilesNoMultipart.java:
##########
@@ -103,9 +103,9 @@ public void test_030_postCreationAssertions() throws 
Throwable {
     conf.setInt(MULTIPART_SIZE, MULTIPART_MIN_SIZE);
     S3ATestUtils.disableFilesystemCaching(conf);
 
-    try (FileSystem fs2 = FileSystem.get(fs.getUri(), conf)) {
-      intercept(UnsupportedRequestException.class, () ->
-          fs2.rename(hugefile, hugefileRenamed));
-    }
+//    try (FileSystem fs2 = FileSystem.get(fs.getUri(), conf)) {

Review Comment:
   this needs to be changed I assume.



##########
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/GetS3RegionOperation.java:
##########
@@ -0,0 +1,140 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.impl;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import software.amazon.awssdk.regions.Region;
+import software.amazon.awssdk.services.s3.S3Client;
+import software.amazon.awssdk.services.s3.model.HeadBucketRequest;
+import software.amazon.awssdk.services.s3.model.HeadBucketResponse;
+import software.amazon.awssdk.services.s3.model.S3Exception;
+
+import org.apache.commons.lang3.StringUtils;
+import org.apache.hadoop.fs.s3a.AWSCredentialProviderList;
+import org.apache.hadoop.fs.s3a.UnknownStoreException;
+import org.apache.hadoop.fs.store.LogExactlyOnce;
+import org.apache.hadoop.util.DurationInfo;
+
+import static org.apache.hadoop.fs.s3a.Constants.BUCKET_REGION_HEADER;
+import static org.apache.hadoop.fs.s3a.Constants.FS_S3A_BUCKET_PREFIX;
+import static org.apache.hadoop.fs.s3a.Statistic.STORE_REGION_PROBE;
+import static 
org.apache.hadoop.fs.s3a.impl.InternalConstants.SC_301_MOVED_PERMANENTLY;
+import static org.apache.hadoop.fs.s3a.impl.InternalConstants.SC_404_NOT_FOUND;
+import static 
org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding.trackDuration;
+
+/**
+ * Operation to get the S3 region for a bucket.
+ * If a region is not specified in {@link 
org.apache.hadoop.fs.s3a.Constants#AWS_REGION}, this
+ * operation will warn and then probe S3 to get the region.
+ */
+public class GetS3RegionOperation extends ExecutingStoreOperation<Region> {
+
+  private final AWSCredentialProviderList credentials;
+
+  private final String region;
+
+  private final String bucket;
+
+  private final StoreContext context;
+
+  private final static Map<String, Region> BUCKET_REGIONS = new HashMap<>();
+
+  private static final Logger LOG = LoggerFactory.getLogger(
+      GetS3RegionOperation.class);
+
+  /** Exactly once log to warn about setting the region in config to avoid 
probe. */
+  private static final LogExactlyOnce SET_REGION_WARNING = new 
LogExactlyOnce(LOG);
+
+  public GetS3RegionOperation(
+      final StoreContext context,
+      String region,
+      String bucket,
+      AWSCredentialProviderList credentials) {
+      super(context);
+      this.context = context;
+      this.credentials = credentials;
+      this.region = region;
+      this.bucket = bucket;
+  }
+
+
+  @Override
+  public Region execute() throws IOException {
+    
+    if (!StringUtils.isBlank(region)) {
+      return Region.of(region);
+    }
+
+    Region cachedRegion = BUCKET_REGIONS.get(bucket);
+
+    if (cachedRegion != null) {
+      LOG.debug("Got region {} for bucket {} from cache", cachedRegion, 
bucket);
+      return cachedRegion;
+    }
+
+    try (DurationInfo ignore =
+        new DurationInfo(LOG, false, "Get S3 region")) {
+
+      SET_REGION_WARNING.warn(
+          "Getting region for bucket {} from S3, this will slow down FS 
initialisation. "
+              + "To avoid this, set the region using property {}", bucket,
+          FS_S3A_BUCKET_PREFIX + bucket + ".endpoint.region");
+
+      // build a s3 client with region eu-west-1 that can be used to get the 
region of the
+      // bucket. Using eu-west-1, as headBucket() doesn't work with us-east-1. 
This is because
+      // us-east-1 uses the endpoint s3.amazonaws.com, which resolves 
bucket.s3.amazonaws.com
+      // to the actual region the bucket is in. As the request is signed with 
us-east-1 and
+      // not the bucket's region, it fails.
+      S3Client getRegionS3Client =
+          
S3Client.builder().region(Region.EU_WEST_1).credentialsProvider(credentials)
+              .build();
+
+      HeadBucketResponse headBucketResponse = 
trackDuration(context.getInstrumentation(),
+          STORE_REGION_PROBE.getSymbol(),
+          () -> 
getRegionS3Client.headBucket(HeadBucketRequest.builder().bucket(bucket).build()));
+
+      Region bucketRegion = Region.of(
+          
headBucketResponse.sdkHttpResponse().headers().get(BUCKET_REGION_HEADER).get(0));
+      BUCKET_REGIONS.put(bucket, bucketRegion);
+
+      return bucketRegion;
+    } catch (S3Exception exception) {
+      if (exception.statusCode() == SC_301_MOVED_PERMANENTLY) {
+        Region bucketRegion = Region.of(
+            
exception.awsErrorDetails().sdkHttpResponse().headers().get(BUCKET_REGION_HEADER)
+                .get(0));
+        BUCKET_REGIONS.put(bucket, bucketRegion);
+

Review Comment:
   Log an error/warn about the exception caught before returning the region.





> AWS SDK V2 - Implement CSE
> --------------------------
>
>                 Key: HADOOP-18708
>                 URL: https://issues.apache.org/jira/browse/HADOOP-18708
>             Project: Hadoop Common
>          Issue Type: Sub-task
>          Components: fs/s3
>    Affects Versions: 3.4.0
>            Reporter: Ahmar Suhail
>            Priority: Major
>              Labels: pull-request-available
>
> S3 Encryption client for SDK V2 is now available, so add client side 
> encryption back in. 



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

---------------------------------------------------------------------
To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-issues-h...@hadoop.apache.org

Reply via email to