This is an automated email from the ASF dual-hosted git repository.

slfan1989 pushed a commit to branch branch-3.4.0
in repository https://gitbox.apache.org/repos/asf/hadoop.git


The following commit(s) were added to refs/heads/branch-3.4.0 by this push:
     new aa9ba0a39389 HADOOP-18975 S3A: Add option fs.s3a.endpoint.fips to use 
AWS FIPS endpoints (#6277)
aa9ba0a39389 is described below

commit aa9ba0a39389556222c47eb40d767dbe0648a884
Author: Steve Loughran <ste...@cloudera.com>
AuthorDate: Tue Jan 16 14:16:12 2024 +0000

    HADOOP-18975 S3A: Add option fs.s3a.endpoint.fips to use AWS FIPS endpoints 
(#6277)
    
    
    Adds a new option `fs.s3a.endpoint.fips` to switch the SDK client to use
    FIPS endpoints, as an alternative to explicitly declaring them.
    
    
    * The option is available as a path capability for probes.
    * SDK v2 itself doesn't know that some regions don't have FIPS endpoints
    * SDK only fails with endpoint + fips flag as a retried exception; wit this
      change the S3A client should fail fast.
      PR fails fast.
    * Adds a new "connecting.md" doc; moves existing docs there and 
restructures.
    * New Tests in ITestS3AEndpointRegion
    
    bucket-info command support:
    
    * added to list of path capabilities
    * added -fips flag and test for explicit probe
    * also now prints bucket region
    * and removed some of the obsolete s3guard options
    * updated docs
    
    Contributed by Steve Loughran
---
 .../java/org/apache/hadoop/fs/s3a/Constants.java   |   9 +
 .../hadoop/fs/s3a/DefaultS3ClientFactory.java      |  22 +
 .../org/apache/hadoop/fs/s3a/S3AFileSystem.java    |  12 +
 .../org/apache/hadoop/fs/s3a/S3ClientFactory.java  |  23 +
 .../hadoop/fs/s3a/impl/InternalConstants.java      |   2 +
 .../apache/hadoop/fs/s3a/s3guard/S3GuardTool.java  |  22 +-
 .../site/markdown/tools/hadoop-aws/connecting.md   | 477 +++++++++++++++++++++
 .../src/site/markdown/tools/hadoop-aws/index.md    | 261 +----------
 .../site/markdown/tools/hadoop-aws/performance.md  |   1 +
 .../src/site/markdown/tools/hadoop-aws/s3guard.md  |  17 +-
 .../hadoop/fs/s3a/ITestS3ABucketExistence.java     |   2 +
 .../hadoop/fs/s3a/ITestS3AEndpointRegion.java      | 124 ++++--
 .../hadoop/fs/s3a/s3guard/ITestS3GuardTool.java    |  16 +
 .../hadoop-aws/src/test/resources/core-site.xml    |   6 +
 14 files changed, 688 insertions(+), 306 deletions(-)

diff --git 
a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java 
b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java
index c1c12b594828..e33f762cdfcf 100644
--- 
a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java
+++ 
b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java
@@ -1335,6 +1335,15 @@ public final class Constants {
    */
   public static final String AWS_S3_DEFAULT_REGION = "us-east-2";
 
+  /**
+   * Is the endpoint a FIPS endpoint?
+   * Can be queried as a path capability.
+   * Value {@value}.
+   */
+  public static final String FIPS_ENDPOINT = "fs.s3a.endpoint.fips";
+
+  public static final boolean ENDPOINT_FIPS_DEFAULT = false;
+
   /**
    * Require that all S3 access is made through Access Points.
    */
diff --git 
a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java
 
b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java
index 05ac5ef921c9..0fde93e6548b 100644
--- 
a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java
+++ 
b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java
@@ -22,6 +22,7 @@ import java.io.IOException;
 import java.net.URI;
 import java.net.URISyntaxException;
 
+import org.apache.hadoop.classification.VisibleForTesting;
 import org.apache.hadoop.fs.s3a.impl.AWSClientConfig;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -54,6 +55,7 @@ import org.apache.hadoop.fs.store.LogExactlyOnce;
 import static org.apache.hadoop.fs.s3a.Constants.AWS_REGION;
 import static org.apache.hadoop.fs.s3a.Constants.AWS_S3_DEFAULT_REGION;
 import static org.apache.hadoop.fs.s3a.Constants.CENTRAL_ENDPOINT;
+import static org.apache.hadoop.fs.s3a.Constants.FIPS_ENDPOINT;
 import static org.apache.hadoop.fs.s3a.Constants.HTTP_SIGNER_CLASS_NAME;
 import static org.apache.hadoop.fs.s3a.Constants.HTTP_SIGNER_ENABLED;
 import static org.apache.hadoop.fs.s3a.Constants.HTTP_SIGNER_ENABLED_DEFAULT;
@@ -63,6 +65,7 @@ import static 
org.apache.hadoop.fs.s3a.Constants.AWS_SERVICE_IDENTIFIER_S3;
 import static org.apache.hadoop.fs.s3a.auth.SignerFactory.createHttpSigner;
 import static org.apache.hadoop.fs.s3a.impl.AWSHeaders.REQUESTER_PAYS_HEADER;
 import static 
org.apache.hadoop.fs.s3a.impl.InternalConstants.AUTH_SCHEME_AWS_SIGV_4;
+import static org.apache.hadoop.util.Preconditions.checkArgument;
 
 
 /**
@@ -102,6 +105,13 @@ public class DefaultS3ClientFactory extends Configured
   /** Exactly once log to inform about ignoring the AWS-SDK Warnings for CSE. 
*/
   private static final LogExactlyOnce IGNORE_CSE_WARN = new 
LogExactlyOnce(LOG);
 
+  /**
+   * Error message when an endpoint is set with FIPS enabled: {@value}.
+   */
+  @VisibleForTesting
+  public static final String ERROR_ENDPOINT_WITH_FIPS =
+      "An endpoint cannot set when " + FIPS_ENDPOINT + " is true";
+
   @Override
   public S3Client createS3Client(
       final URI uri,
@@ -248,6 +258,7 @@ public class DefaultS3ClientFactory extends Configured
    * @param conf  conf configuration object
    * @param <BuilderT> S3 client builder type
    * @param <ClientT> S3 client type
+   * @throws IllegalArgumentException if endpoint is set when FIPS is enabled.
    */
   private <BuilderT extends S3BaseClientBuilder<BuilderT, ClientT>, ClientT> 
void configureEndpointAndRegion(
       BuilderT builder, S3ClientCreationParameters parameters, Configuration 
conf) {
@@ -263,7 +274,18 @@ public class DefaultS3ClientFactory extends Configured
       region = Region.of(configuredRegion);
     }
 
+    // FIPs? Log it, then reject any attempt to set an endpoint
+    final boolean fipsEnabled = parameters.isFipsEnabled();
+    if (fipsEnabled) {
+      LOG.debug("Enabling FIPS mode");
+    }
+    // always setting it guarantees the value is non-null,
+    // which tests expect.
+    builder.fipsEnabled(fipsEnabled);
+
     if (endpoint != null) {
+      checkArgument(!fipsEnabled,
+          "%s : %s", ERROR_ENDPOINT_WITH_FIPS, endpoint);
       builder.endpointOverride(endpoint);
       // No region was configured, try to determine it from the endpoint.
       if (region == null) {
diff --git 
a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
 
b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
index df7d3f1fb689..1aad1ad2f858 100644
--- 
a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
+++ 
b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
@@ -461,6 +461,11 @@ public class S3AFileSystem extends FileSystem implements 
StreamCapabilities,
    */
   private boolean isMultipartCopyEnabled;
 
+  /**
+   * Is FIPS enabled?
+   */
+  private boolean fipsEnabled;
+
   /**
    * A cache of files that should be deleted when the FileSystem is closed
    * or the JVM is exited.
@@ -614,6 +619,8 @@ public class S3AFileSystem extends FileSystem implements 
StreamCapabilities,
           ? conf.getTrimmed(AWS_REGION)
           : accessPoint.getRegion();
 
+      fipsEnabled = conf.getBoolean(FIPS_ENDPOINT, ENDPOINT_FIPS_DEFAULT);
+
       // is this an S3Express store?
       s3ExpressStore = isS3ExpressStore(bucket, endpoint);
 
@@ -1046,6 +1053,7 @@ public class S3AFileSystem extends FileSystem implements 
StreamCapabilities,
         .withMultipartThreshold(multiPartThreshold)
         .withTransferManagerExecutor(unboundedThreadPool)
         .withRegion(configuredRegion)
+        .withFipsEnabled(fipsEnabled)
         .withExpressCreateSession(
             conf.getBoolean(S3EXPRESS_CREATE_SESSION, 
S3EXPRESS_CREATE_SESSION_DEFAULT));
 
@@ -5521,6 +5529,10 @@ public class S3AFileSystem extends FileSystem implements 
StreamCapabilities,
     case OPTIMIZED_COPY_FROM_LOCAL:
       return optimizedCopyFromLocal;
 
+    // probe for a fips endpoint
+    case FIPS_ENDPOINT:
+      return fipsEnabled;
+
     default:
       return super.hasPathCapability(p, cap);
     }
diff --git 
a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ClientFactory.java
 
b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ClientFactory.java
index 305bcbb56504..404a255528ff 100644
--- 
a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ClientFactory.java
+++ 
b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ClientFactory.java
@@ -176,6 +176,11 @@ public interface S3ClientFactory {
      */
     private boolean expressCreateSession = S3EXPRESS_CREATE_SESSION_DEFAULT;
 
+    /**
+     * Is FIPS enabled?
+     */
+    private boolean fipsEnabled;
+
     /**
      * List of execution interceptors to include in the chain
      * of interceptors in the SDK.
@@ -461,5 +466,23 @@ public interface S3ClientFactory {
           ", expressCreateSession=" + expressCreateSession +
           '}';
     }
+
+    /**
+     * Get the FIPS flag.
+     * @return is fips enabled
+     */
+    public boolean isFipsEnabled() {
+      return fipsEnabled;
+    }
+
+    /**
+     * Set builder value.
+     * @param value new value
+     * @return the builder
+     */
+    public S3ClientCreationParameters withFipsEnabled(final boolean value) {
+      fipsEnabled = value;
+      return this;
+    }
   }
 }
diff --git 
a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/InternalConstants.java
 
b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/InternalConstants.java
index 1148f6fcd483..8ebf8c013d10 100644
--- 
a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/InternalConstants.java
+++ 
b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/InternalConstants.java
@@ -38,6 +38,7 @@ import static 
org.apache.hadoop.fs.CommonPathCapabilities.FS_MULTIPART_UPLOADER;
 import static 
org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_STANDARD_OPTIONS;
 import static 
org.apache.hadoop.fs.s3a.Constants.DIRECTORY_OPERATIONS_PURGE_UPLOADS;
 import static org.apache.hadoop.fs.s3a.Constants.ENABLE_MULTI_DELETE;
+import static org.apache.hadoop.fs.s3a.Constants.FIPS_ENDPOINT;
 import static org.apache.hadoop.fs.s3a.Constants.FS_S3A_CREATE_PERFORMANCE;
 import static 
org.apache.hadoop.fs.s3a.Constants.FS_S3A_CREATE_PERFORMANCE_ENABLED;
 import static org.apache.hadoop.fs.s3a.Constants.STORE_CAPABILITY_AWS_V2;
@@ -272,6 +273,7 @@ public final class InternalConstants {
           FS_CHECKSUMS,
           FS_MULTIPART_UPLOADER,
           DIRECTORY_LISTING_INCONSISTENT,
+          FIPS_ENDPOINT,
 
           // s3 specific
           STORE_CAPABILITY_AWS_V2,
diff --git 
a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java
 
b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java
index 41251d190c44..26b6acda3090 100644
--- 
a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java
+++ 
b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java
@@ -357,12 +357,11 @@ public abstract class S3GuardTool extends Configured 
implements Tool,
     public static final String NAME = BUCKET_INFO;
     public static final String GUARDED_FLAG = "guarded";
     public static final String UNGUARDED_FLAG = "unguarded";
-    public static final String AUTH_FLAG = "auth";
-    public static final String NONAUTH_FLAG = "nonauth";
     public static final String ENCRYPTION_FLAG = "encryption";
     public static final String MAGIC_FLAG = "magic";
     public static final String MARKERS_FLAG = "markers";
     public static final String MARKERS_AWARE = "aware";
+    public static final String FIPS_FLAG = "fips";
 
     public static final String PURPOSE = "provide/check information"
         + " about a specific bucket";
@@ -370,8 +369,7 @@ public abstract class S3GuardTool extends Configured 
implements Tool,
     private static final String USAGE = NAME + " [OPTIONS] s3a://BUCKET\n"
         + "\t" + PURPOSE + "\n\n"
         + "Common options:\n"
-        + "  -" + AUTH_FLAG + " - Require the S3Guard mode to be 
\"authoritative\"\n"
-        + "  -" + NONAUTH_FLAG + " - Require the S3Guard mode to be 
\"non-authoritative\"\n"
+        + "  -" + FIPS_FLAG + " - Require the client is using a FIPS 
endpoint\n"
         + "  -" + MAGIC_FLAG +
         " - Require the S3 filesystem to be support the \"magic\" committer\n"
         + "  -" + ENCRYPTION_FLAG
@@ -395,7 +393,7 @@ public abstract class S3GuardTool extends Configured 
implements Tool,
             + " directory markers are not deleted";
 
     public BucketInfo(Configuration conf) {
-      super(conf, GUARDED_FLAG, UNGUARDED_FLAG, AUTH_FLAG, NONAUTH_FLAG, 
MAGIC_FLAG);
+      super(conf, GUARDED_FLAG, UNGUARDED_FLAG, FIPS_FLAG, MAGIC_FLAG);
       CommandFormat format = getCommandFormat();
       format.addOptionWithValue(ENCRYPTION_FLAG);
       format.addOptionWithValue(MARKERS_FLAG);
@@ -462,6 +460,10 @@ public abstract class S3GuardTool extends Configured 
implements Tool,
       println(out, "\tEndpoint: %s=%s",
           ENDPOINT,
           StringUtils.isNotEmpty(endpoint) ? endpoint : "(unset)");
+      String region = conf.getTrimmed(AWS_REGION, "");
+      println(out, "\tRegion: %s=%s", AWS_REGION,
+          StringUtils.isNotEmpty(region) ? region : "(unset)");
+
       String encryption =
           printOption(out, "\tEncryption", Constants.S3_ENCRYPTION_ALGORITHM,
               "none");
@@ -487,12 +489,12 @@ public abstract class S3GuardTool extends Configured 
implements Tool,
           FS_S3A_COMMITTER_NAME, COMMITTER_NAME_FILE);
       switch (committer) {
       case COMMITTER_NAME_FILE:
-        println(out, "The original 'file' commmitter is active"
+        println(out, "The original 'file' committer is active"
             + " -this is slow and potentially unsafe");
         break;
       case InternalCommitterConstants.COMMITTER_NAME_STAGING:
         println(out, "The 'staging' committer is used "
-            + "-prefer the 'directory' committer");
+            + "-prefer the 'magic' committer");
         // fall through
       case COMMITTER_NAME_DIRECTORY:
         // fall through
@@ -555,13 +557,17 @@ public abstract class S3GuardTool extends Configured 
implements Tool,
       processMarkerOption(out, fs,
           getCommandFormat().getOptValue(MARKERS_FLAG));
 
-      // and check for capabilitities
+      // and check for capabilities
       println(out, "%nStore Capabilities");
       for (String capability : S3A_DYNAMIC_CAPABILITIES) {
         out.printf("\t%s %s%n", capability,
             fs.hasPathCapability(root, capability));
       }
       println(out, "");
+
+      if (commands.getOpt(FIPS_FLAG) && !fs.hasPathCapability(root, 
FIPS_ENDPOINT)) {
+        throw badState("FIPS endpoint was required but the filesystem is not 
using it");
+      }
       // and finally flush the output and report a success.
       out.flush();
       return SUCCESS;
diff --git 
a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/connecting.md 
b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/connecting.md
new file mode 100644
index 000000000000..600e1e128a2c
--- /dev/null
+++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/connecting.md
@@ -0,0 +1,477 @@
+<!---
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License. See accompanying LICENSE file.
+-->
+
+# Connecting to an Amazon S3 Bucket through the S3A Connector
+
+<!-- MACRO{toc|fromDepth=0|toDepth=2} -->
+
+
+1. This document covers how to connect to and authenticate with S3 stores, 
primarily AWS S3.
+2. There have been changes in this mechanism between the V1 and V2 SDK, in 
particular specifying
+the region is now preferred to specifying the regional S3 endpoint.
+3. For connecting to third-party stores, please read [Working with Third-party 
S3 Stores](third_party_stores.html) *after* reading this document.
+
+## <a name="foundational"></a> Foundational Concepts
+
+### <a name="regions"></a>  AWS Regions and Availability Zones
+
+AWS provides storage, compute and other services around the world, in 
*regions*.
+
+Data in S3 is stored *buckets*; each bucket is a single region.
+
+There are some "special" regions: China, AWS GovCloud.
+It is *believed* that the S3A connector works in these places, at least to the 
extent that nobody has complained about it not working.
+
+### <a name="endpoints"></a> Endpoints
+
+The S3A connector connects to Amazon S3 storage over HTTPS connections, either 
directly or through an HTTP proxy.
+HTTP HEAD and GET, PUT, POST and DELETE requests are invoked to perform 
different read/write operations against the store.
+
+There are multiple ways to connect to an S3 bucket
+
+* To an [S3 Endpoint](https://docs.aws.amazon.com/general/latest/gr/s3.html); 
an HTTPS server hosted by amazon or a third party.
+* To a FIPS-compliant S3 Endpoint.
+* To an AWS S3 [Access 
Point](https://docs.aws.amazon.com/AmazonS3/latest/userguide/access-points.html).
+* Through a VPC connection, [AWS PrivateLink for Amazon 
S3](https://docs.aws.amazon.com/AmazonS3/latest/userguide/privatelink-interface-endpoints.html).
+* AWS [Outposts](https://aws.amazon.com/outposts/).
+
+The S3A connector supports all these; S3 Endpoints are the primary mechanism 
used -either explicitly declared or automatically determined from the declared 
region of the bucket.
+
+Not supported:
+* AWS [Snowball](https://aws.amazon.com/snowball/).
+
+As of December 2023, AWS S3 uses Transport Layer Security (TLS) [version 
1.2](https://aws.amazon.com/blogs/security/tls-1-2-required-for-aws-endpoints/) 
to secure the communications channel; the S3A client is does this through
+the Apache [HttpClient library](https://hc.apache.org/index.html).
+
+### <a name="third-party"></a> Third party stores
+
+Third-party stores implementing the S3 API are also supported.
+These often only implement a subset of the S3 API; not all features are 
available.
+If TLS authentication is used, then the HTTPS certificates for the private 
stores
+_MUST_ be installed on the JVMs on hosts within the Hadoop cluster.
+
+See [Working with Third-party S3 Stores](third_party_stores.html) *after* 
reading this document.
+
+
+## <a name="settings"></a> Connection Settings
+
+There are three core settings to connect to an S3 store, endpoint, region and 
whether or not to use path style access.
+
+
+```xml
+<property>
+  <name>fs.s3a.endpoint</name>
+  <description>AWS S3 endpoint to connect to. An up-to-date list is
+    provided in the AWS Documentation: regions and endpoints. Without this
+    property, the standard region (s3.amazonaws.com) is assumed.
+  </description>
+</property>
+
+<property>
+  <name>fs.s3a.endpoint.region</name>
+  <value>REGION</value>
+  <description>AWS Region of the data</description>
+</property>
+
+<property>
+  <name>fs.s3a.path.style.access</name>
+  <value>false</value>
+  <description>Enable S3 path style access by disabling the default virtual 
hosting behaviour.
+    Needed for AWS PrivateLink, S3 AccessPoints, and, generally, third party 
stores.
+    Default: false.
+  </description>
+</property>
+```
+
+Historically the S3A connector has preferred the endpoint as defined by the 
option `fs.s3a.endpoint`.
+With the move to the AWS V2 SDK, there is more emphasis on the region, set by 
the `fs.s3a.endpoint.region` option.
+
+Normally, declaring the region in `fs.s3a.endpoint.region` should be 
sufficient to set up the network connection to correctly connect to an 
AWS-hosted S3 store.
+
+### <a name="timeouts"></a> Network timeouts
+
+See [Timeouts](performance.html#timeouts).
+
+### <a name="networking"></a> Low-level Network Options
+
+```xml
+
+<property>
+  <name>fs.s3a.connection.maximum</name>
+  <value>200</value>
+  <description>Controls the maximum number of simultaneous connections to S3.
+    This must be bigger than the value of fs.s3a.threads.max so as to stop
+    threads being blocked waiting for new HTTPS connections.
+  </description>
+</property>
+
+<property>
+  <name>fs.s3a.connection.ssl.enabled</name>
+  <value>true</value>
+  <description>
+    Enables or disables SSL connections to AWS services.
+  </description>
+</property>
+
+<property>
+  <name>fs.s3a.ssl.channel.mode</name>
+  <value>Default_JSSE</value>
+  <description>
+    TLS implementation and cipher options.
+    Values: OpenSSL, Default, Default_JSSE, Default_JSSE_with_GCM
+
+    Default_JSSE is not truly the the default JSSE implementation because
+    the GCM cipher is disabled when running on Java 8. However, the name
+    was not changed in order to preserve backwards compatibility. Instead,
+    new mode called Default_JSSE_with_GCM delegates to the default JSSE
+    implementation with no changes to the list of enabled ciphers.
+
+    OpenSSL requires the wildfly JAR on the classpath and a compatible 
installation of the openssl binaries.
+    It is often faster than the JVM libraries, but also trickier to
+    use.
+  </description>
+</property>
+
+<property>
+  <name>fs.s3a.socket.send.buffer</name>
+  <value>8192</value>
+  <description>
+    Socket send buffer hint to amazon connector. Represented in bytes.
+  </description>
+</property>
+
+<property>
+  <name>fs.s3a.socket.recv.buffer</name>
+  <value>8192</value>
+  <description>
+    Socket receive buffer hint to amazon connector. Represented in bytes.
+  </description>
+</property>
+```
+
+### <a name="proxies"></a> Proxy Settings
+
+Connections to S3A stores can be made through an HTTP or HTTPS proxy.
+
+```xml
+<property>
+  <name>fs.s3a.proxy.host</name>
+  <description>
+    Hostname of the (optional) proxy server for S3 connections.
+  </description>
+</property>
+
+<property>
+  <name>fs.s3a.proxy.ssl.enabled</name>
+  <value>false</value>
+  <description>
+    Does the proxy use a TLS connection?
+  </description>
+</property>
+
+<property>
+  <name>fs.s3a.proxy.port</name>
+  <description>
+    Proxy server port. If this property is not set
+    but fs.s3a.proxy.host is, port 80 or 443 is assumed (consistent with
+    the value of fs.s3a.connection.ssl.enabled).
+  </description>
+</property>
+
+<property>
+  <name>fs.s3a.proxy.username</name>
+  <description>Username for authenticating with proxy server.</description>
+</property>
+
+<property>
+  <name>fs.s3a.proxy.password</name>
+  <description>Password for authenticating with proxy server.</description>
+</property>
+
+<property>
+  <name>fs.s3a.proxy.domain</name>
+  <description>Domain for authenticating with proxy server.</description>
+</property>
+
+<property>
+  <name>fs.s3a.proxy.workstation</name>
+  <description>Workstation for authenticating with proxy server.</description>
+</property>
+```
+
+Sometimes the proxy can be source of problems, especially if HTTP connections 
are kept
+in the connection pool for some time.
+Experiment with the values of `fs.s3a.connection.ttl` and 
`fs.s3a.connection.request.timeout`
+if long-lived connections have problems.
+
+
+##  <a name="per_bucket_endpoints"></a>Using Per-Bucket Configuration to 
access data round the world
+
+S3 Buckets are hosted in different "regions", the default being "US-East-1".
+The S3A client talks to this region by default, issuing HTTP requests
+to the server `s3.amazonaws.com`.
+
+S3A can work with buckets from any region. Each region has its own
+S3 endpoint, documented [by 
Amazon](http://docs.aws.amazon.com/general/latest/gr/rande.html#s3_region).
+
+1. Applications running in EC2 infrastructure do not pay for IO to/from
+*local S3 buckets*. They will be billed for access to remote buckets. Always
+use local buckets and local copies of data, wherever possible.
+2. With the V4 signing protocol, AWS requires the explicit region endpoint
+to be used —hence S3A must be configured to use the specific endpoint. This
+is done in the configuration option `fs.s3a.endpoint`.
+3. All endpoints other than the default endpoint only support interaction
+with buckets local to that S3 instance.
+4. Standard S3 buckets support "cross-region" access where use of the original 
`us-east-1`
+   endpoint allows access to the data, but newer storage types, particularly 
S3 Express are
+   not supported.
+
+
+
+If the wrong endpoint is used, the request will fail. This may be reported as 
a 301/redirect error,
+or as a 400 Bad Request: take these as cues to check the endpoint setting of
+a bucket.
+
+The up to date list of regions is [Available 
online](https://docs.aws.amazon.com/general/latest/gr/s3.html).
+
+This list can be used to specify the endpoint of individual buckets, for 
example
+for buckets in the central and EU/Ireland endpoints.
+
+```xml
+<property>
+  <name>fs.s3a.bucket.landsat-pds.endpoint</name>
+  <value>s3-us-west-2.amazonaws.com</value>
+</property>
+
+<property>
+  <name>fs.s3a.bucket.eu-dataset.endpoint</name>
+  <value>s3.eu-west-1.amazonaws.com</value>
+</property>
+```
+
+Declaring the region for the data is simpler, as it avoid having to look up 
the full URL and having to worry about historical quirks of regional endpoint 
hostnames.
+
+```xml
+<property>
+  <name>fs.s3a.bucket.landsat-pds.endpoint.region</name>
+  <value>us-west-2</value>
+  <description>The endpoint for s3a://landsat-pds URLs</description>
+</property>
+
+<property>
+  <name>fs.s3a.bucket.eu-dataset.endpoint.region</name>
+  <value>eu-west-1</value>
+</property>
+```
+
+
+## <a name="privatelink"></a> AWS PrivateLink
+
+[AWS PrivateLink for Amazon 
S3](https://docs.aws.amazon.com/AmazonS3/latest/userguide/privatelink-interface-endpoints.html)
 allows for a private connection to a bucket to be defined, with network access 
rules managing how a bucket can be accessed.
+
+
+1. Follow the documentation to create the private link
+2. retrieve the DNS name from the console, such as 
`vpce-f264a96c-6d27bfa7c85e.s3.us-west-2.vpce.amazonaws.com`
+3. Convert this to an endpoint URL by prefixing "https://bucket.";
+4. Declare this as the bucket endpoint and switch to path-style access.
+5. Declare the region: there is no automated determination of the region from
+   the `vpce` URL.
+
+```xml
+
+<property>
+  <name>fs.s3a.bucket.example-usw2.endpoint</name>
+  
<value>https://bucket.vpce-f264a96c-6d27bfa7c85e.s3.us-west-2.vpce.amazonaws.com/</value>
+</property>
+
+<property>
+  <name>fs.s3a.bucket.example-usw2.path.style.access</name>
+  <value>true</value>
+</property>
+
+<property>
+  <name>fs.s3a.bucket.example-usw2.endpoint.region</name>
+  <value>us-west-2</value>
+</property>
+```
+
+## <a name="fips"></a> Federal Information Processing Standards (FIPS) 
Endpoints
+
+
+It is possible to use 
[FIPs-compliant](https://www.nist.gov/itl/fips-general-information) endpoints 
which
+support a restricted subset of TLS algorithms.
+
+Amazon provide a specific set of [FIPS 
endpoints](https://aws.amazon.com/compliance/fips/)
+to use so callers can be confident that the network communication is compliant 
with the standard:
+non-compliant algorithms are unavailable.
+
+The boolean option `fs.s3a.endpoint.fips` (default `false`) switches the S3A 
connector to using the FIPS endpoint of a region.
+
+```xml
+<property>
+  <name>fs.s3a.endpoint.fips</name>
+  <value>true</value>
+  <description>Use the FIPS endpoint</description>
+</property>
+```
+
+For a single bucket:
+```xml
+<property>
+  <name>fs.s3a.bucket.landsat-pds.endpoint.fips</name>
+  <value>true</value>
+  <description>Use the FIPS endpoint for the landsat dataset</description>
+</property>
+```
+
+If this option is `true`, the endpoint option `fs.s3a.endpoint` MUST NOT be 
set:
+
+```
+A custom endpoint cannot be combined with FIPS: 
https://s3.eu-west-2.amazonaws.com
+```
+
+The SDK calculates the FIPS-specific endpoint without any awareness as to 
whether FIPs is supported by a region. The first attempt to interact with the 
service will fail
+
+```
+java.net.UnknownHostException: 
software.amazon.awssdk.core.exception.SdkClientException:
+Received an UnknownHostException when attempting to interact with a service.
+    See cause for the exact endpoint that is failing to resolve.
+    If this is happening on an endpoint that previously worked,
+    there may be a network connectivity issue or your DNS cache
+    could be storing endpoints for too long.:
+    example-london-1.s3-fips.eu-west-2.amazonaws.com
+
+```
+
+*Important* OpenSSL and FIPS endpoints
+
+Linux distributions with an FIPS-compliant SSL library may not be compatible 
with wildfly.
+Always use with the JDK SSL implementation unless you are confident that the 
library
+is compatible, or wish to experiment with the settings outside of production 
deployments.
+
+```xml
+<property>
+  <name>fs.s3a.ssl.channel.mode</name>
+  <value>Default_JSSE</value>
+</property>
+```
+
+## <a name="accesspoints"></a>Configuring S3 AccessPoints usage with S3A
+
+S3A supports [S3 Access 
Point](https://aws.amazon.com/s3/features/access-points/) usage which
+improves VPC integration with S3 and simplifies your data's permission model 
because different
+policies can be applied now on the Access Point level. For more information 
about why to use and
+how to create them make sure to read the official documentation.
+
+Accessing data through an access point, is done by using its ARN, as opposed 
to just the bucket name.
+You can set the Access Point ARN property using the following per bucket 
configuration property:
+
+```xml
+<property>
+  <name>fs.s3a.bucket.sample-bucket.accesspoint.arn</name>
+  <value> {ACCESSPOINT_ARN_HERE} </value>
+  <description>Configure S3a traffic to use this AccessPoint</description>
+</property>
+```
+
+This configures access to the `sample-bucket` bucket for S3A, to go through the
+new Access Point ARN. So, for example `s3a://sample-bucket/key` will now use 
your
+configured ARN when getting data from S3 instead of your bucket.
+
+_the name of the bucket used in the s3a:// URLs is irrelevant; it is not used 
when connecting with the store_
+
+Example
+
+```xml
+<property>
+  <name>fs.s3a.bucket.example-ap.accesspoint.arn</name>
+  
<value>arn:aws:s3:eu-west-2:152813717728:accesspoint/ap-example-london</value>
+  <description>AccessPoint bound to bucket name example-ap</description>
+</property>
+```
+
+The `fs.s3a.accesspoint.required` property can also require all access to S3 
to go through Access
+Points. This has the advantage of increasing security inside a VPN / VPC as 
you only allow access
+to known sources of data defined through Access Points. In case there is a 
need to access a bucket
+directly (without Access Points) then you can use per bucket overrides to 
disable this setting on a
+bucket by bucket basis i.e. `fs.s3a.bucket.{YOUR-BUCKET}.accesspoint.required`.
+
+```xml
+<!-- Require access point only access -->
+<property>
+  <name>fs.s3a.accesspoint.required</name>
+  <value>true</value>
+</property>
+<!-- Disable it on a per-bucket basis if needed -->
+<property>
+  <name>fs.s3a.bucket.example-bucket.accesspoint.required</name>
+  <value>false</value>
+</property>
+```
+
+Before using Access Points make sure you're not impacted by the following:
+- `ListObjectsV1` is not supported, this is also deprecated on AWS S3 for 
performance reasons;
+- The endpoint for S3 requests will automatically change to use
+`s3-accesspoint.REGION.amazonaws.{com | com.cn}` depending on the Access Point 
ARN. While
+considering endpoints, if you have any custom signers that use the host 
endpoint property make
+sure to update them if needed;
+
+## <a name="debugging"></a> Debugging network problems
+
+The `storediag` command within the utility 
[cloudstore](https://github.com/exampleoughran/cloudstore)
+JAR is recommended as the way to view and print settings.
+
+If `storediag` doesn't connect to your S3 store, *nothing else will*.
+
+## <a name="common-problems"></a> Common Sources of Connection Problems
+
+Based on the experience of people who field support calls, here are
+some of the main connectivity issues which cause problems.
+
+### <a name="inconsistent-config"></a> Inconsistent configuration across a 
cluster
+
+All hosts in the cluster need to have the configuration secrets;
+local environment variables are not enough.
+
+If HTTPS/TLS is used for a private store, the relevant certificates MUST be 
installed everywhere.
+
+For applications such as distcp, the options need to be passed with the job.
+
+### <a name="public-private-mixup"></a> Confusion between public/private S3 
Stores.
+
+If your cluster is configured to use a private store, AWS-hosted buckets are 
not visible.
+If you wish to read access in a private store, you need to change the endpoint.
+
+Private S3 stores generally expect path style access.
+
+### <a name="region-misconfigure"></a> Region and endpoints misconfigured
+
+These usually surface rapidly and with meaningful messages.
+
+Region errors generally surface as
+* `UnknownHostException`
+* `AWSRedirectException` "Received permanent redirect response to region"
+
+Endpoint configuration problems can be more varied, as they are just HTTPS 
URLs.
+
+### <a name="wildfly"></a> Wildfly/OpenSSL Brittleness
+
+When it works, it is fast. But it is fussy as to openSSL implementations, TLS 
protocols and more.
+Because it uses the native openssl binaries, operating system updates can 
trigger regressions.
+
+Disabling it should be the first step to troubleshooting any TLS problems.
+
+### <a name="proxy-misconfiguration"></a> Proxy setup
+
+If there is a proxy, set it up correctly.
diff --git 
a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md 
b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md
index dcf3be2b0831..0f09c7f87315 100644
--- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md
+++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md
@@ -33,6 +33,7 @@ full details.
 
 ## <a name="documents"></a> Documents
 
+* [Connecting](./connecting.html)
 * [Encryption](./encryption.html)
 * [Performance](./performance.html)
 * [The upgrade to AWS Java SDK V2](./aws_sdk_upgrade.html)
@@ -223,6 +224,10 @@ Do not inadvertently share these credentials through means 
such as:
 If you do any of these: change your credentials immediately!
 
 
+## Connecting to Amazon S3 or a third-party store
+
+See [Connecting to an Amazon S3 Bucket through the S3A 
Connector](connecting.md).
+
 ## <a name="authenticating"></a> Authenticating with S3
 
 Except when interacting with public S3 buckets, the S3A client
@@ -835,61 +840,15 @@ Here are some the S3A properties for use in production.
 </property>
 
 <property>
-  <name>fs.s3a.connection.ssl.enabled</name>
-  <value>true</value>
-  <description>Enables or disables SSL connections to AWS services.
-    Also sets the default port to use for the s3a proxy settings,
-    when not explicitly set in fs.s3a.proxy.port.</description>
-</property>
-
-<property>
-  <name>fs.s3a.endpoint</name>
-  <description>AWS S3 endpoint to connect to. An up-to-date list is
-    provided in the AWS Documentation: regions and endpoints. Without this
-    property, the standard region (s3.amazonaws.com) is assumed.
+  <name>fs.s3a.connection.maximum</name>
+  <value>96</value>
+  <description>Controls the maximum number of simultaneous connections to S3.
+    This must be bigger than the value of fs.s3a.threads.max so as to stop
+    threads being blocked waiting for new HTTPS connections.
+    Why not equal? The AWS SDK transfer manager also uses these connections.
   </description>
 </property>
 
-<property>
-  <name>fs.s3a.path.style.access</name>
-  <value>false</value>
-  <description>Enable S3 path style access ie disabling the default virtual 
hosting behaviour.
-    Useful for S3A-compliant storage providers as it removes the need to set 
up DNS for virtual hosting.
-  </description>
-</property>
-
-<property>
-  <name>fs.s3a.proxy.host</name>
-  <description>Hostname of the (optional) proxy server for S3 
connections.</description>
-</property>
-
-<property>
-  <name>fs.s3a.proxy.port</name>
-  <description>Proxy server port. If this property is not set
-    but fs.s3a.proxy.host is, port 80 or 443 is assumed (consistent with
-    the value of fs.s3a.connection.ssl.enabled).</description>
-</property>
-
-<property>
-  <name>fs.s3a.proxy.username</name>
-  <description>Username for authenticating with proxy server.</description>
-</property>
-
-<property>
-  <name>fs.s3a.proxy.password</name>
-  <description>Password for authenticating with proxy server.</description>
-</property>
-
-<property>
-  <name>fs.s3a.proxy.domain</name>
-  <description>Domain for authenticating with proxy server.</description>
-</property>
-
-<property>
-  <name>fs.s3a.proxy.workstation</name>
-  <description>Workstation for authenticating with proxy server.</description>
-</property>
-
 <property>
   <name>fs.s3a.attempts.maximum</name>
   <value>5</value>
@@ -1005,14 +964,6 @@ Here are some the S3A properties for use in production.
     implementations can still be used</description>
 </property>
 
-<property>
-  <name>fs.s3a.accesspoint.required</name>
-  <value>false</value>
-  <description>Require that all S3 access is made through Access Points and 
not through
-  buckets directly. If enabled, use per-bucket overrides to allow bucket 
access to a specific set
-  of buckets.</description>
-</property>
-
 <property>
   <name>fs.s3a.block.size</name>
   <value>32M</value>
@@ -1218,23 +1169,6 @@ Here are some the S3A properties for use in production.
   </description>
 </property>
 
-<property>
-  <name>fs.s3a.connection.request.timeout</name>
-  <value>0</value>
-  <description>
-    Time out on HTTP requests to the AWS service; 0 means no timeout.
-    Measured in seconds; the usual time suffixes are all supported
-
-    Important: this is the maximum duration of any AWS service call,
-    including upload and copy operations. If non-zero, it must be larger
-    than the time to upload multi-megabyte blocks to S3 from the client,
-    and to rename many-GB files. Use with care.
-
-    Values that are larger than Integer.MAX_VALUE milliseconds are
-    converged to Integer.MAX_VALUE milliseconds
-  </description>
-</property>
-
 <property>
   <name>fs.s3a.etag.checksum.enabled</name>
   <value>false</value>
@@ -1699,179 +1633,6 @@ For a site configuration of:
 The bucket "nightly" will be encrypted with SSE-KMS using the KMS key
 `arn:aws:kms:eu-west-2:1528130000000:key/753778e4-2d0f-42e6-b894-6a3ae4ea4e5f`
 
-###  <a name="per_bucket_endpoints"></a>Using Per-Bucket Configuration to 
access data round the world
-
-S3 Buckets are hosted in different "regions", the default being "US-East".
-The S3A client talks to this region by default, issuing HTTP requests
-to the server `s3.amazonaws.com`.
-
-S3A can work with buckets from any region. Each region has its own
-S3 endpoint, documented [by 
Amazon](http://docs.aws.amazon.com/general/latest/gr/rande.html#s3_region).
-
-1. Applications running in EC2 infrastructure do not pay for IO to/from
-*local S3 buckets*. They will be billed for access to remote buckets. Always
-use local buckets and local copies of data, wherever possible.
-1. The default S3 endpoint can support data IO with any bucket when the V1 
request
-signing protocol is used.
-1. When the V4 signing protocol is used, AWS requires the explicit region 
endpoint
-to be used —hence S3A must be configured to use the specific endpoint. This
-is done in the configuration option `fs.s3a.endpoint`.
-1. All endpoints other than the default endpoint only support interaction
-with buckets local to that S3 instance.
-
-While it is generally simpler to use the default endpoint, working with
-V4-signing-only regions (Frankfurt, Seoul) requires the endpoint to be 
identified.
-Expect better performance from direct connections —traceroute will give you 
some insight.
-
-If the wrong endpoint is used, the request may fail. This may be reported as a 
301/redirect error,
-or as a 400 Bad Request: take these as cues to check the endpoint setting of
-a bucket.
-
-Here is a list of properties defining all AWS S3 regions, current as of June 
2017:
-
-```xml
-<!--
- This is the default endpoint, which can be used to interact
- with any v2 region.
- -->
-<property>
-  <name>central.endpoint</name>
-  <value>s3.amazonaws.com</value>
-</property>
-
-<property>
-  <name>canada.endpoint</name>
-  <value>s3.ca-central-1.amazonaws.com</value>
-</property>
-
-<property>
-  <name>frankfurt.endpoint</name>
-  <value>s3.eu-central-1.amazonaws.com</value>
-</property>
-
-<property>
-  <name>ireland.endpoint</name>
-  <value>s3-eu-west-1.amazonaws.com</value>
-</property>
-
-<property>
-  <name>london.endpoint</name>
-  <value>s3.eu-west-2.amazonaws.com</value>
-</property>
-
-<property>
-  <name>mumbai.endpoint</name>
-  <value>s3.ap-south-1.amazonaws.com</value>
-</property>
-
-<property>
-  <name>ohio.endpoint</name>
-  <value>s3.us-east-2.amazonaws.com</value>
-</property>
-
-<property>
-  <name>oregon.endpoint</name>
-  <value>s3-us-west-2.amazonaws.com</value>
-</property>
-
-<property>
-  <name>sao-paolo.endpoint</name>
-  <value>s3-sa-east-1.amazonaws.com</value>
-</property>
-
-<property>
-  <name>seoul.endpoint</name>
-  <value>s3.ap-northeast-2.amazonaws.com</value>
-</property>
-
-<property>
-  <name>singapore.endpoint</name>
-  <value>s3-ap-southeast-1.amazonaws.com</value>
-</property>
-
-<property>
-  <name>sydney.endpoint</name>
-  <value>s3-ap-southeast-2.amazonaws.com</value>
-</property>
-
-<property>
-  <name>tokyo.endpoint</name>
-  <value>s3-ap-northeast-1.amazonaws.com</value>
-</property>
-
-<property>
-  <name>virginia.endpoint</name>
-  <value>${central.endpoint}</value>
-</property>
-```
-
-This list can be used to specify the endpoint of individual buckets, for 
example
-for buckets in the central and EU/Ireland endpoints.
-
-```xml
-<property>
-  <name>fs.s3a.bucket.landsat-pds.endpoint</name>
-  <value>${central.endpoint}</value>
-  <description>The endpoint for s3a://landsat-pds URLs</description>
-</property>
-
-<property>
-  <name>fs.s3a.bucket.eu-dataset.endpoint</name>
-  <value>${ireland.endpoint}</value>
-  <description>The endpoint for s3a://eu-dataset URLs</description>
-</property>
-```
-
-Why explicitly declare a bucket bound to the central endpoint? It ensures
-that if the default endpoint is changed to a new region, data store in
-US-east is still reachable.
-
-## <a name="accesspoints"></a>Configuring S3 AccessPoints usage with S3A
-S3a now supports [S3 Access 
Point](https://aws.amazon.com/s3/features/access-points/) usage which
-improves VPC integration with S3 and simplifies your data's permission model 
because different
-policies can be applied now on the Access Point level. For more information 
about why to use and
-how to create them make sure to read the official documentation.
-
-Accessing data through an access point, is done by using its ARN, as opposed 
to just the bucket name.
-You can set the Access Point ARN property using the following per bucket 
configuration property:
-```xml
-<property>
-    <name>fs.s3a.bucket.sample-bucket.accesspoint.arn</name>
-    <value> {ACCESSPOINT_ARN_HERE} </value>
-    <description>Configure S3a traffic to use this AccessPoint</description>
-</property>
-```
-
-This configures access to the `sample-bucket` bucket for S3A, to go through the
-new Access Point ARN. So, for example `s3a://sample-bucket/key` will now use 
your
-configured ARN when getting data from S3 instead of your bucket.
-
-The `fs.s3a.accesspoint.required` property can also require all access to S3 
to go through Access
-Points. This has the advantage of increasing security inside a VPN / VPC as 
you only allow access
-to known sources of data defined through Access Points. In case there is a 
need to access a bucket
-directly (without Access Points) then you can use per bucket overrides to 
disable this setting on a
-bucket by bucket basis i.e. `fs.s3a.bucket.{YOUR-BUCKET}.accesspoint.required`.
-
-```xml
-<!-- Require access point only access -->
-<property>
-    <name>fs.s3a.accesspoint.required</name>
-    <value>true</value>
-</property>
-<!-- Disable it on a per-bucket basis if needed -->
-<property>
-    <name>fs.s3a.bucket.example-bucket.accesspoint.required</name>
-    <value>false</value>
-</property>
-```
-
-Before using Access Points make sure you're not impacted by the following:
-- `ListObjectsV1` is not supported, this is also deprecated on AWS S3 for 
performance reasons;
-- The endpoint for S3 requests will automatically change from 
`s3.amazonaws.com` to use
-`s3-accesspoint.REGION.amazonaws.{com | com.cn}` depending on the Access Point 
ARN. While
-considering endpoints, if you have any custom signers that use the host 
endpoint property make
-sure to update them if needed;
-
 ## <a name="requester_pays"></a>Requester Pays buckets
 
 S3A supports buckets with
diff --git 
a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/performance.md 
b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/performance.md
index 37cf472277d2..4d506b6bfc49 100644
--- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/performance.md
+++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/performance.md
@@ -218,6 +218,7 @@ everything uses the same HTTP connection pool.
 | `fs.s3a.executor.capacity`     | `16`    | Maximum threads for any single 
operation                         |
 | `fs.s3a.max.total.tasks`       | `16`    | Extra tasks which can be queued 
excluding prefetching operations |
 
+### <a name="timeouts"></a> Timeouts.
 
 Network timeout options can be tuned to make the client fail faster *or* retry 
more.
 The choice is yours. Generally recovery is better, but sometimes fail-fast is 
more useful.
diff --git 
a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/s3guard.md 
b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/s3guard.md
index c5e807c96413..af4c6a76becb 100644
--- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/s3guard.md
+++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/s3guard.md
@@ -132,20 +132,17 @@ This auditing information can be used to identify 
opportunities to reduce load.
 Prints and optionally checks the status of a bucket.
 
 ```bash
-hadoop s3guard bucket-info [-guarded] [-unguarded] [-auth] [-nonauth] [-magic] 
[-encryption ENCRYPTION] [-markers MARKER] s3a://BUCKET
+hadoop s3guard bucket-info [-fips] [-magic] [-encryption ENCRYPTION] [-markers 
MARKER] s3a://BUCKET
 ```
 
 Options
 
-| argument | meaning |
-|-----------|-------------|
-| `-guarded` | Require S3Guard to be enabled. This will now always fail |
-| `-unguarded` | Require S3Guard to be disabled. This will now always succeed |
-| `-auth` | Require the S3Guard mode to be "authoritative". This will now 
always fail |
-| `-nonauth` | Require the S3Guard mode to be "non-authoritative". This will 
now always fail |
-| `-magic` | Require the S3 filesystem to be support the "magic" committer |
-| `-markers` | Directory marker status: `aware`, `keep`, `delete`, 
`authoritative` |
-| `-encryption <type>` | Require a specific encryption algorithm  |
+| argument             | meaning                                               
              |
+|----------------------|---------------------------------------------------------------------|
+| `-fips`              | Require FIPS endopint to be in use                    
              |
+| `-magic`             | Require the S3 filesystem to be support the "magic" 
committer       |
+| `-markers`           | Directory marker status: `aware`, `keep`, `delete`, 
`authoritative` |
+| `-encryption <type>` | Require a specific encryption algorithm               
              |
 
 The server side encryption options are not directly related to S3Guard, but
 it is often convenient to check them at the same time.
diff --git 
a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ABucketExistence.java
 
b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ABucketExistence.java
index ded2f0b88507..ce6d8a7e1ef6 100644
--- 
a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ABucketExistence.java
+++ 
b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ABucketExistence.java
@@ -39,6 +39,7 @@ import static 
org.apache.hadoop.fs.contract.ContractTestUtils.writeDataset;
 import static org.apache.hadoop.fs.s3a.Constants.AWS_REGION;
 import static org.apache.hadoop.fs.s3a.Constants.AWS_S3_ACCESSPOINT_REQUIRED;
 import static org.apache.hadoop.fs.s3a.Constants.ENDPOINT;
+import static org.apache.hadoop.fs.s3a.Constants.FIPS_ENDPOINT;
 import static org.apache.hadoop.fs.s3a.Constants.FS_S3A;
 import static org.apache.hadoop.fs.s3a.Constants.PATH_STYLE_ACCESS;
 import static org.apache.hadoop.fs.s3a.Constants.S3A_BUCKET_PROBE;
@@ -138,6 +139,7 @@ public class ITestS3ABucketExistence extends 
AbstractS3ATestBase {
     removeBaseAndBucketOverrides(conf,
         S3A_BUCKET_PROBE,
         ENDPOINT,
+        FIPS_ENDPOINT,
         AWS_REGION,
         PATH_STYLE_ACCESS);
     conf.setInt(S3A_BUCKET_PROBE, probe);
diff --git 
a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java
 
b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java
index 5d10590dfe30..5e6991128b20 100644
--- 
a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java
+++ 
b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java
@@ -20,7 +20,6 @@ package org.apache.hadoop.fs.s3a;
 
 import java.io.IOException;
 import java.net.URI;
-import java.net.URISyntaxException;
 import java.net.UnknownHostException;
 import java.nio.file.AccessDeniedException;
 import java.util.ArrayList;
@@ -36,16 +35,17 @@ import 
software.amazon.awssdk.core.interceptor.ExecutionAttributes;
 import software.amazon.awssdk.core.interceptor.ExecutionInterceptor;
 import software.amazon.awssdk.services.s3.S3Client;
 import software.amazon.awssdk.services.s3.model.HeadBucketRequest;
+import software.amazon.awssdk.services.s3.model.HeadBucketResponse;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.s3a.statistics.impl.EmptyS3AStatisticsContext;
 
 import static org.apache.hadoop.fs.s3a.Constants.AWS_REGION;
+import static org.apache.hadoop.fs.s3a.Constants.CENTRAL_ENDPOINT;
 import static org.apache.hadoop.fs.s3a.Constants.PATH_STYLE_ACCESS;
+import static 
org.apache.hadoop.fs.s3a.DefaultS3ClientFactory.ERROR_ENDPOINT_WITH_FIPS;
 import static 
org.apache.hadoop.fs.s3a.S3ATestUtils.removeBaseAndBucketOverrides;
 import static org.apache.hadoop.io.IOUtils.closeStream;
-import static org.apache.hadoop.fs.s3a.Constants.CENTRAL_ENDPOINT;
-
 import static org.apache.hadoop.test.LambdaTestUtils.intercept;
 
 /**
@@ -82,6 +82,8 @@ public class ITestS3AEndpointRegion extends 
AbstractS3ATestBase {
 
   private static final String VPC_ENDPOINT = 
"vpce-1a2b3c4d-5e6f.s3.us-west-2.vpce.amazonaws.com";
 
+  public static final String EXCEPTION_THROWN_BY_INTERCEPTOR = "Exception 
thrown by interceptor";
+
   /**
    * New FS instance which will be closed in teardown.
    */
@@ -134,10 +136,9 @@ public class ITestS3AEndpointRegion extends 
AbstractS3ATestBase {
     describe("Create a client with a configured endpoint");
     Configuration conf = getConfiguration();
 
-    S3Client client = createS3Client(conf, AWS_ENDPOINT_TEST, null, US_EAST_2);
+    S3Client client = createS3Client(conf, AWS_ENDPOINT_TEST, null, US_EAST_2, 
false);
 
-    intercept(AwsServiceException.class, "Exception thrown by interceptor", () 
-> client.headBucket(
-        
HeadBucketRequest.builder().bucket(getFileSystem().getBucket()).build()));
+    expectInterceptorException(client);
   }
 
   @Test
@@ -145,10 +146,9 @@ public class ITestS3AEndpointRegion extends 
AbstractS3ATestBase {
     describe("Create a client with the central endpoint");
     Configuration conf = getConfiguration();
 
-    S3Client client = createS3Client(conf, CENTRAL_ENDPOINT, null, US_EAST_1);
+    S3Client client = createS3Client(conf, CENTRAL_ENDPOINT, null, US_EAST_1, 
false);
 
-    intercept(AwsServiceException.class, "Exception thrown by interceptor", () 
-> client.headBucket(
-        
HeadBucketRequest.builder().bucket(getFileSystem().getBucket()).build()));
+    expectInterceptorException(client);
   }
 
   @Test
@@ -156,21 +156,40 @@ public class ITestS3AEndpointRegion extends 
AbstractS3ATestBase {
     describe("Create a client with a configured region");
     Configuration conf = getConfiguration();
 
-    S3Client client = createS3Client(conf, null, EU_WEST_2, EU_WEST_2);
+    S3Client client = createS3Client(conf, null, EU_WEST_2, EU_WEST_2, false);
 
-    intercept(AwsServiceException.class, "Exception thrown by interceptor", () 
-> client.headBucket(
-        
HeadBucketRequest.builder().bucket(getFileSystem().getBucket()).build()));
+    expectInterceptorException(client);
   }
 
+  @Test
+  public void testWithFips() throws Throwable {
+    describe("Create a client with fips enabled");
+
+    S3Client client = createS3Client(getConfiguration(),
+        null, EU_WEST_2, EU_WEST_2, true);
+    expectInterceptorException(client);
+  }
+
+  /**
+   * Attempting to create a client with fips enabled and an endpoint specified
+   * fails during client construction.
+   */
+  @Test
+  public void testWithFipsAndEndpoint() throws Throwable {
+    describe("Create a client with fips and an endpoint");
+
+    intercept(IllegalArgumentException.class, ERROR_ENDPOINT_WITH_FIPS, () ->
+        createS3Client(getConfiguration(), CENTRAL_ENDPOINT, null, US_EAST_1, 
true));
+  }
 
+  @Test
   public void testEUWest2Endpoint() throws Throwable {
     describe("Create a client with the eu west 2 endpoint");
     Configuration conf = getConfiguration();
 
-    S3Client client = createS3Client(conf, EU_WEST_2_ENDPOINT, null, 
EU_WEST_2);
+    S3Client client = createS3Client(conf, EU_WEST_2_ENDPOINT, null, 
EU_WEST_2, false);
 
-    intercept(AwsServiceException.class, "Exception thrown by interceptor", () 
-> client.headBucket(
-        
HeadBucketRequest.builder().bucket(getFileSystem().getBucket()).build()));
+    expectInterceptorException(client);
   }
 
   @Test
@@ -178,10 +197,9 @@ public class ITestS3AEndpointRegion extends 
AbstractS3ATestBase {
     describe("Test that when both region and endpoint are configured, region 
takes precedence");
     Configuration conf = getConfiguration();
 
-    S3Client client = createS3Client(conf, EU_WEST_2_ENDPOINT, US_WEST_2, 
US_WEST_2);
+    S3Client client = createS3Client(conf, EU_WEST_2_ENDPOINT, US_WEST_2, 
US_WEST_2, false);
 
-    intercept(AwsServiceException.class, "Exception thrown by interceptor", () 
-> client.headBucket(
-        
HeadBucketRequest.builder().bucket(getFileSystem().getBucket()).build()));
+    expectInterceptorException(client);
   }
 
   @Test
@@ -189,21 +207,43 @@ public class ITestS3AEndpointRegion extends 
AbstractS3ATestBase {
     describe("Test with a china endpoint");
     Configuration conf = getConfiguration();
 
-    S3Client client = createS3Client(conf, CN_ENDPOINT, null, CN_NORTHWEST_1);
+    S3Client client = createS3Client(conf, CN_ENDPOINT, null, CN_NORTHWEST_1, 
false);
+
+    expectInterceptorException(client);
+  }
+
+  /**
+   * Expect an exception to be thrown by the interceptor with the message
+   * {@link #EXCEPTION_THROWN_BY_INTERCEPTOR}.
+   * @param client client to issue a head request against.
+   * @return the expected exception.
+   * @throws Exception any other exception.
+   */
+  private AwsServiceException expectInterceptorException(final S3Client client)
+      throws Exception {
+
+    return intercept(AwsServiceException.class, 
EXCEPTION_THROWN_BY_INTERCEPTOR,
+        () -> head(client));
+  }
 
-    intercept(AwsServiceException.class, "Exception thrown by interceptor", () 
-> client.headBucket(
-        
HeadBucketRequest.builder().bucket(getFileSystem().getBucket()).build()));
+  /**
+   * Issue a head request against the bucket.
+   * @param client client to use
+   * @return the response.
+   */
+  private HeadBucketResponse head(final S3Client client) {
+    return client.headBucket(
+        
HeadBucketRequest.builder().bucket(getFileSystem().getBucket()).build());
   }
 
   @Test
   public void testWithGovCloudEndpoint() throws Throwable {
-    describe("Test with a gov cloud endpoint");
+    describe("Test with a gov cloud endpoint; enable fips");
     Configuration conf = getConfiguration();
 
-    S3Client client = createS3Client(conf, GOV_ENDPOINT, null, US_GOV_EAST_1);
+    S3Client client = createS3Client(conf, GOV_ENDPOINT, null, US_GOV_EAST_1, 
false);
 
-    intercept(AwsServiceException.class, "Exception thrown by interceptor", () 
-> client.headBucket(
-        
HeadBucketRequest.builder().bucket(getFileSystem().getBucket()).build()));
+    expectInterceptorException(client);
   }
 
   @Test
@@ -212,19 +252,20 @@ public class ITestS3AEndpointRegion extends 
AbstractS3ATestBase {
     describe("Test with vpc endpoint");
     Configuration conf = getConfiguration();
 
-    S3Client client = createS3Client(conf, VPC_ENDPOINT, null, US_WEST_2);
+    S3Client client = createS3Client(conf, VPC_ENDPOINT, null, US_WEST_2, 
false);
 
-    intercept(AwsServiceException.class, "Exception thrown by interceptor", () 
-> client.headBucket(
-        
HeadBucketRequest.builder().bucket(getFileSystem().getBucket()).build()));
+    expectInterceptorException(client);
   }
 
-  class RegionInterceptor implements ExecutionInterceptor {
-    private String endpoint;
-    private String region;
+  private final class RegionInterceptor implements ExecutionInterceptor {
+    private final String endpoint;
+    private final String region;
+    private final boolean isFips;
 
-    RegionInterceptor(String endpoint, String region) {
+    RegionInterceptor(String endpoint, String region, final boolean isFips) {
       this.endpoint = endpoint;
       this.region = region;
+      this.isFips = isFips;
     }
 
     @Override
@@ -249,8 +290,15 @@ public class ITestS3AEndpointRegion extends 
AbstractS3ATestBase {
               
executionAttributes.getAttribute(AwsExecutionAttribute.AWS_REGION).toString())
           .describedAs("Incorrect region set").isEqualTo(region);
 
+      // verify the fips state matches expectation.
+      Assertions.assertThat(executionAttributes.getAttribute(
+          AwsExecutionAttribute.FIPS_ENDPOINT_ENABLED))
+          .describedAs("Incorrect FIPS flag set in execution attributes")
+          .isNotNull()
+          .isEqualTo(isFips);
+
       // We don't actually want to make a request, so exit early.
-      throw AwsServiceException.builder().message("Exception thrown by 
interceptor").build();
+      throw 
AwsServiceException.builder().message(EXCEPTION_THROWN_BY_INTERCEPTOR).build();
     }
   }
 
@@ -261,17 +309,17 @@ public class ITestS3AEndpointRegion extends 
AbstractS3ATestBase {
    * @param conf configuration to use.
    * @param endpoint endpoint.
    * @param expectedRegion the region that should be set in the client.
+   * @param isFips is this a FIPS endpoint?
    * @return the client.
-   * @throws URISyntaxException parse problems.
    * @throws IOException IO problems
    */
   @SuppressWarnings("deprecation")
   private S3Client createS3Client(Configuration conf,
-      String endpoint, String configuredRegion, String expectedRegion)
+      String endpoint, String configuredRegion, String expectedRegion, boolean 
isFips)
       throws IOException {
 
     List<ExecutionInterceptor> interceptors = new ArrayList<>();
-    interceptors.add(new RegionInterceptor(endpoint, expectedRegion));
+    interceptors.add(new RegionInterceptor(endpoint, expectedRegion, isFips));
 
     DefaultS3ClientFactory factory
         = new DefaultS3ClientFactory();
@@ -283,8 +331,8 @@ public class ITestS3AEndpointRegion extends 
AbstractS3ATestBase {
         .withMetrics(new EmptyS3AStatisticsContext()
             .newStatisticsFromAwsSdk())
         .withExecutionInterceptors(interceptors)
-        .withRegion(configuredRegion);
-
+        .withRegion(configuredRegion)
+        .withFipsEnabled(isFips);
 
     S3Client client = factory.createS3Client(
         getFileSystem().getUri(),
diff --git 
a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestS3GuardTool.java
 
b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestS3GuardTool.java
index 28bc2a246af1..08696ae62d24 100644
--- 
a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestS3GuardTool.java
+++ 
b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestS3GuardTool.java
@@ -33,6 +33,8 @@ import org.apache.hadoop.fs.s3a.S3AFileSystem;
 import org.apache.hadoop.test.LambdaTestUtils;
 import org.apache.hadoop.util.StringUtils;
 
+import static org.apache.hadoop.fs.contract.ContractTestUtils.skip;
+import static org.apache.hadoop.fs.s3a.Constants.FIPS_ENDPOINT;
 import static org.apache.hadoop.fs.s3a.Constants.S3_ENCRYPTION_ALGORITHM;
 import static org.apache.hadoop.fs.s3a.MultipartTestUtils.assertNoUploadsAt;
 import static org.apache.hadoop.fs.s3a.MultipartTestUtils.clearAnyUploads;
@@ -97,6 +99,20 @@ public class ITestS3GuardTool extends 
AbstractS3GuardToolTestBase {
     LOG.info("Exec output=\n{}", output);
   }
 
+  @Test
+  public void testStoreInfoFips() throws Throwable {
+    final S3AFileSystem fs = getFileSystem();
+    if (!fs.hasPathCapability(new Path("/"), FIPS_ENDPOINT)) {
+      skip("FIPS not enabled");
+    }
+    S3GuardTool.BucketInfo cmd =
+        toClose(new S3GuardTool.BucketInfo(fs.getConf()));
+    String output = exec(cmd, cmd.getName(),
+        "-" + BucketInfo.FIPS_FLAG,
+        fs.getUri().toString());
+    LOG.info("Exec output=\n{}", output);
+  }
+
   private final static String UPLOAD_NAME = "test-upload";
 
   @Test
diff --git a/hadoop-tools/hadoop-aws/src/test/resources/core-site.xml 
b/hadoop-tools/hadoop-aws/src/test/resources/core-site.xml
index f871369ed571..c99d7d43134c 100644
--- a/hadoop-tools/hadoop-aws/src/test/resources/core-site.xml
+++ b/hadoop-tools/hadoop-aws/src/test/resources/core-site.xml
@@ -56,6 +56,12 @@
     <description>Do not add the referrer header to landsat 
operations</description>
   </property>
 
+  <property>
+    <name>fs.s3a.bucket.landsat-pds.endpoint.fips</name>
+    <value>true</value>
+    <description>Use the fips endpoint</description>
+  </property>
+
   <!-- Per-bucket configurations: usgs-landsat -->
   <property>
     <name>fs.s3a.bucket.usgs-landsat.endpoint.region</name>


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-commits-h...@hadoop.apache.org

Reply via email to