steveloughran commented on code in PR #6544: URL: https://github.com/apache/hadoop/pull/6544#discussion_r1516477473
########## hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java: ########## @@ -1624,4 +1624,21 @@ private Constants() { * Value: {@value}. */ public static final boolean DEFAULT_AWS_S3_CLASSLOADER_ISOLATION = true; + + /** + * Flag {@value} + * to enable S3 Access Grants to control authorization to S3 data. More information: + * https://aws.amazon.com/s3/features/access-grants/ + * and + * https://github.com/aws/aws-s3-accessgrants-plugin-java-v2/ + */ + public static final String AWS_S3_ACCESS_GRANTS_ENABLED = "fs.s3a.s3accessgrants.enabled"; Review Comment: 1. can you use "fs.s3a.access.grants" as the prefix here and below 2. It'd be good have s3afs .hasPathCapability() return the enabled flag for ease of testing ########## hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AccessGrantConfiguration.java: ########## @@ -0,0 +1,107 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.test.AbstractHadoopTestBase; +import org.junit.Assert; +import org.junit.Test; + +import software.amazon.awssdk.awscore.AwsClient; +import software.amazon.awssdk.s3accessgrants.plugin.S3AccessGrantsIdentityProvider; + +import java.io.IOException; +import java.net.URI; +import java.net.URISyntaxException; + +import static org.apache.hadoop.fs.s3a.Constants.AWS_S3_ACCESS_GRANTS_ENABLED; + + +/** + * Test S3 Access Grants configurations. + */ +public class TestS3AccessGrantConfiguration extends AbstractHadoopTestBase { + /** + * This credential provider will be attached to any client + * that has been configured with the S3 Access Grants plugin. + * {@link software.amazon.awssdk.s3accessgrants.plugin.S3AccessGrantsPlugin}. + */ + public static final String S3_ACCESS_GRANTS_EXPECTED_CREDENTIAL_PROVIDER_CLASS = + S3AccessGrantsIdentityProvider.class.getName(); + + @Test + public void testS3AccessGrantsEnabled() throws IOException, URISyntaxException { + // Feature is explicitly enabled + AwsClient s3AsyncClient = getAwsClient(createConfig(true), true); + Assert.assertEquals( Review Comment: 1. I prefer AssertJ asserts with useful .description() values in new test suites. AssertEquals is not as bad as the others: it does generate a message, but more details are good. 2. the same assert and operation is being used everywhere. Factor it out into a method and call it where needed. ########## hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java: ########## @@ -401,4 +411,20 @@ private static Region getS3RegionFromEndpoint(final String endpoint, return Region.of(AWS_S3_DEFAULT_REGION); } + private static <BuilderT extends S3BaseClientBuilder<BuilderT, ClientT>, ClientT> void + applyS3AccessGrantsConfigurations(BuilderT builder, Configuration conf) { + if (!conf.getBoolean(AWS_S3_ACCESS_GRANTS_ENABLED, false)){ Review Comment: define and use a constant `AWS_S3_ACCESS_GRANTS_ENABLED` here. makes it easier to see/change what the default is in future. ########## hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AccessGrantConfiguration.java: ########## @@ -0,0 +1,107 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.test.AbstractHadoopTestBase; +import org.junit.Assert; +import org.junit.Test; + +import software.amazon.awssdk.awscore.AwsClient; +import software.amazon.awssdk.s3accessgrants.plugin.S3AccessGrantsIdentityProvider; + +import java.io.IOException; +import java.net.URI; +import java.net.URISyntaxException; + +import static org.apache.hadoop.fs.s3a.Constants.AWS_S3_ACCESS_GRANTS_ENABLED; + + +/** + * Test S3 Access Grants configurations. + */ +public class TestS3AccessGrantConfiguration extends AbstractHadoopTestBase { + /** + * This credential provider will be attached to any client + * that has been configured with the S3 Access Grants plugin. + * {@link software.amazon.awssdk.s3accessgrants.plugin.S3AccessGrantsPlugin}. + */ + public static final String S3_ACCESS_GRANTS_EXPECTED_CREDENTIAL_PROVIDER_CLASS = + S3AccessGrantsIdentityProvider.class.getName(); + + @Test + public void testS3AccessGrantsEnabled() throws IOException, URISyntaxException { + // Feature is explicitly enabled + AwsClient s3AsyncClient = getAwsClient(createConfig(true), true); + Assert.assertEquals( + S3_ACCESS_GRANTS_EXPECTED_CREDENTIAL_PROVIDER_CLASS, + getCredentialProviderName(s3AsyncClient)); + + AwsClient s3Client = getAwsClient(createConfig(true), false); + Assert.assertEquals( + S3_ACCESS_GRANTS_EXPECTED_CREDENTIAL_PROVIDER_CLASS, + getCredentialProviderName(s3Client)); + } + + @Test + public void testS3AccessGrantsDisabled() throws IOException, URISyntaxException { + // Disabled by default + AwsClient s3AsyncDefaultClient = getAwsClient(new Configuration(), true); + Assert.assertNotEquals( + S3_ACCESS_GRANTS_EXPECTED_CREDENTIAL_PROVIDER_CLASS, + getCredentialProviderName(s3AsyncDefaultClient)); + + AwsClient s3DefaultClient = getAwsClient(new Configuration(), true); + Assert.assertNotEquals( + S3_ACCESS_GRANTS_EXPECTED_CREDENTIAL_PROVIDER_CLASS, + getCredentialProviderName(s3DefaultClient)); + + // Disabled if explicitly set + AwsClient s3AsyncExplicitlyDisabledClient = getAwsClient(createConfig(false), true); + Assert.assertNotEquals( + S3_ACCESS_GRANTS_EXPECTED_CREDENTIAL_PROVIDER_CLASS, + getCredentialProviderName(s3AsyncExplicitlyDisabledClient)); + + AwsClient s3ExplicitlyDisabledClient = getAwsClient(createConfig(false), true); + Assert.assertNotEquals( + S3_ACCESS_GRANTS_EXPECTED_CREDENTIAL_PROVIDER_CLASS, + getCredentialProviderName(s3ExplicitlyDisabledClient)); + } + + private Configuration createConfig(boolean s3agEnabled) { + Configuration conf = new Configuration(); Review Comment: FYI, this reads in core-default and core-site xml files, not a problem here but it does mean that site overrides can get picked up. ########## hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md: ########## @@ -614,6 +614,38 @@ If the following property is not set or set to `true`, the following exception w java.io.IOException: From option fs.s3a.aws.credentials.provider java.lang.ClassNotFoundException: Class CustomCredentialsProvider not found ``` +## S3 Authorization Using S3 Access Grants + +[S3 Access Grants](https://aws.amazon.com/s3/features/access-grants/) can be used to grant accesses to S3 data using IAM Principals. +In order to enable S3 Access Grants, S3A utilizes the +[S3 Access Grants plugin](https://github.com/aws/aws-s3-accessgrants-plugin-java-v2) on all S3 clients, +which is found within the AWS Java SDK bundle (v2.23.19+). + +S3A supports both cross-region access (by default) and the +[fallback-to-IAM configuration](https://github.com/aws/aws-s3-accessgrants-plugin-java-v2?tab=readme-ov-file#using-the-plugin) +which allows S3A to fallback to using the IAM role (and its permission sets directly) to access S3 data in the case that S3 Access Grants +is unable to authorize the S3 call. + +The following is how this feature can be enabled: Review Comment: To enable this feature ########## hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AccessGrantConfiguration.java: ########## @@ -0,0 +1,107 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a; + +import org.apache.hadoop.conf.Configuration; Review Comment: nit: import ordering ########## hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java: ########## @@ -401,4 +411,20 @@ private static Region getS3RegionFromEndpoint(final String endpoint, return Region.of(AWS_S3_DEFAULT_REGION); } + private static <BuilderT extends S3BaseClientBuilder<BuilderT, ClientT>, ClientT> void + applyS3AccessGrantsConfigurations(BuilderT builder, Configuration conf) { + if (!conf.getBoolean(AWS_S3_ACCESS_GRANTS_ENABLED, false)){ + LOG_S3AG_ENABLED.debug("S3 Access Grants plugin is not enabled."); + return; + } + + boolean isFallbackEnabled = + conf.getBoolean(AWS_S3_ACCESS_GRANTS_FALLBACK_TO_IAM_ENABLED, false); + S3AccessGrantsPlugin accessGrantsPlugin = + S3AccessGrantsPlugin.builder().enableFallback(isFallbackEnabled).build(); Review Comment: nit. put the enable() and build() on their own lines ########## hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md: ########## @@ -614,6 +614,38 @@ If the following property is not set or set to `true`, the following exception w java.io.IOException: From option fs.s3a.aws.credentials.provider java.lang.ClassNotFoundException: Class CustomCredentialsProvider not found ``` +## S3 Authorization Using S3 Access Grants + +[S3 Access Grants](https://aws.amazon.com/s3/features/access-grants/) can be used to grant accesses to S3 data using IAM Principals. Review Comment: this might be a good time to move everything related into authentication into its own markdown file (authentication.md) and link that off index. this entry would be its own heading ## S3 Access grants with a <a name> reference so index generation will create a quick link ########## hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java: ########## @@ -401,4 +411,20 @@ private static Region getS3RegionFromEndpoint(final String endpoint, return Region.of(AWS_S3_DEFAULT_REGION); } + private static <BuilderT extends S3BaseClientBuilder<BuilderT, ClientT>, ClientT> void + applyS3AccessGrantsConfigurations(BuilderT builder, Configuration conf) { + if (!conf.getBoolean(AWS_S3_ACCESS_GRANTS_ENABLED, false)){ + LOG_S3AG_ENABLED.debug("S3 Access Grants plugin is not enabled."); + return; + } + + boolean isFallbackEnabled = + conf.getBoolean(AWS_S3_ACCESS_GRANTS_FALLBACK_TO_IAM_ENABLED, false); + S3AccessGrantsPlugin accessGrantsPlugin = + S3AccessGrantsPlugin.builder().enableFallback(isFallbackEnabled).build(); + builder.addPlugin(accessGrantsPlugin); + LOG_S3AG_ENABLED.info( Review Comment: recommend logging at debug to the normal LOG ########## hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AccessGrantConfiguration.java: ########## @@ -0,0 +1,107 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.test.AbstractHadoopTestBase; +import org.junit.Assert; +import org.junit.Test; + +import software.amazon.awssdk.awscore.AwsClient; +import software.amazon.awssdk.s3accessgrants.plugin.S3AccessGrantsIdentityProvider; Review Comment: is there anything to be gained by adding a subclass of this into fs.s3a.auth module? we've found it useful in the past, including for migrating the underlying implementation, and other bug fixes/scale issues. ########## hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md: ########## @@ -614,6 +614,38 @@ If the following property is not set or set to `true`, the following exception w java.io.IOException: From option fs.s3a.aws.credentials.provider java.lang.ClassNotFoundException: Class CustomCredentialsProvider not found ``` +## S3 Authorization Using S3 Access Grants + +[S3 Access Grants](https://aws.amazon.com/s3/features/access-grants/) can be used to grant accesses to S3 data using IAM Principals. +In order to enable S3 Access Grants, S3A utilizes the +[S3 Access Grants plugin](https://github.com/aws/aws-s3-accessgrants-plugin-java-v2) on all S3 clients, +which is found within the AWS Java SDK bundle (v2.23.19+). + +S3A supports both cross-region access (by default) and the +[fallback-to-IAM configuration](https://github.com/aws/aws-s3-accessgrants-plugin-java-v2?tab=readme-ov-file#using-the-plugin) +which allows S3A to fallback to using the IAM role (and its permission sets directly) to access S3 data in the case that S3 Access Grants +is unable to authorize the S3 call. + +The following is how this feature can be enabled: + +```xml +<property> + <name>fs.s3a.s3accessgrants.enabled</name> + <value>true</value> +</property> +<property> + <!--Optional: Defaults to False--> + <name>fs.s3a.s3accessgrants.fallback.to.iam</name> + <value>true</value> +</property> +``` + +Please note that S3A only enables the [S3 Access Grants plugin](https://github.com/aws/aws-s3-accessgrants-plugin-java-v2) on the S3 clients Review Comment: add a subsection "notes" as more notes are added. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-issues-h...@hadoop.apache.org