This is an automated email from the ASF dual-hosted git repository.
stevel pushed a commit to branch branch-3.3
in repository https://gitbox.apache.org/repos/asf/hadoop.git
The following commit(s) were added to refs/heads/branch-3.3 by this push:
new 203b94414fd HADOOP-19317. S3A: fs.s3a.connection.expect.continue
controls 100 CONTINUE behavior (#7134) (#7178)
203b94414fd is described below
commit 203b94414fdc67d1b77006dc05402d373c4f74d1
Author: Steve Loughran <[email protected]>
AuthorDate: Mon Nov 25 19:02:59 2024 +0000
HADOOP-19317. S3A: fs.s3a.connection.expect.continue controls 100 CONTINUE
behavior (#7134) (#7178)
New option
fs.s3a.connection.expect.continue
This controls whether or not an PUT request to the S3 store
sets the "Expect: 100-continue" header and awaits a 100 CONTINUE
response before uploading any data.
This allows for throttling and other problems to be detected fast.
The default is "true" -the header is sent.
(This is the SDK v1 backport).
Contributed by Steve Loughran
Change-Id: Ic448407268b80d2532c7b7080a21b0fe48694ef5
---
.../java/org/apache/hadoop/fs/s3a/Constants.java | 14 +++++++
.../java/org/apache/hadoop/fs/s3a/S3AUtils.java | 3 ++
.../site/markdown/tools/hadoop-aws/connecting.md | 13 ++++++
.../fs/contract/s3a/ITestS3AContractCreate.java | 46 ++++++++++++++++++++++
.../hadoop/fs/s3a/ITestS3AConfiguration.java | 19 +++++++++
.../fs/s3a/scale/ITestS3AHugeFilesNoMultipart.java | 6 +++
6 files changed, 101 insertions(+)
diff --git
a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java
b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java
index 796b0752feb..9e7cef02581 100644
---
a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java
+++
b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java
@@ -282,6 +282,20 @@ public final class Constants {
"fs.s3a.connection.request.timeout";
public static final int DEFAULT_REQUEST_TIMEOUT = 0;
+ /**
+ * Should PUT requests await a 100 CONTINUE responses before uploading
+ * data?
+ * <p>
+ * Value: {@value}.
+ */
+ public static final String CONNECTION_EXPECT_CONTINUE =
+ "fs.s3a.connection.expect.continue";
+
+ /**
+ * Default value for {@link #CONNECTION_EXPECT_CONTINUE}.
+ */
+ public static final boolean CONNECTION_EXPECT_CONTINUE_DEFAULT = true;
+
// socket send buffer to be used in Amazon client
public static final String SOCKET_SEND_BUFFER = "fs.s3a.socket.send.buffer";
public static final int DEFAULT_SOCKET_SEND_BUFFER = 8 * 1024;
diff --git
a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java
b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java
index dc0c211fcab..590b0b55ac4 100644
---
a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java
+++
b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java
@@ -1317,6 +1317,9 @@ public final class S3AUtils {
LOG.debug("Signer override = {}", signerOverride);
awsConf.setSignerOverride(signerOverride);
}
+ boolean expectContinueEnabled = conf.getBoolean(CONNECTION_EXPECT_CONTINUE,
+ CONNECTION_EXPECT_CONTINUE_DEFAULT);
+ awsConf.setUseExpectContinue(expectContinueEnabled);
}
/**
diff --git
a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/connecting.md
b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/connecting.md
index f1839a0b203..e41a85aa715 100644
--- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/connecting.md
+++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/connecting.md
@@ -117,6 +117,19 @@ See [Timeouts](performance.html#timeouts).
</description>
</property>
+<property>
+ <name>fs.s3a.connection.expect.continue</name>
+ <value>true</value>
+ <description>
+ Should PUT requests await a 100 CONTINUE responses before uploading
+ data?
+ This should normally be left alone unless a third party store which
+ does not support it is encountered, or file upload over long
+ distance networks time out.
+ (see HADOOP-19317 as an example)
+ </description>
+</property>
+
<property>
<name>fs.s3a.connection.ssl.enabled</name>
<value>true</value>
diff --git
a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractCreate.java
b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractCreate.java
index d2a858f615e..236ebd05285 100644
---
a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractCreate.java
+++
b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractCreate.java
@@ -18,18 +18,64 @@
package org.apache.hadoop.fs.contract.s3a;
+import java.util.Arrays;
+import java.util.Collection;
+
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.contract.AbstractContractCreateTest;
import org.apache.hadoop.fs.contract.AbstractFSContract;
+import org.apache.hadoop.fs.s3a.S3ATestUtils;
+
+import static org.apache.hadoop.fs.s3a.Constants.CONNECTION_EXPECT_CONTINUE;
+import static
org.apache.hadoop.fs.s3a.S3ATestUtils.removeBaseAndBucketOverrides;
/**
* S3A contract tests creating files.
*/
+@RunWith(Parameterized.class)
public class ITestS3AContractCreate extends AbstractContractCreateTest {
+ /**
+ * This test suite is parameterized for the different create file
+ * options.
+ * @return a list of test parameters.
+ */
+ @Parameterized.Parameters
+ public static Collection<Object[]> params() {
+ return Arrays.asList(new Object[][]{
+ {false},
+ {true}
+ });
+ }
+
+ /**
+ * Expect a 100-continue response?
+ */
+ private final boolean expectContinue;
+
+ public ITestS3AContractCreate(final boolean expectContinue) {
+ this.expectContinue = expectContinue;
+ }
+
@Override
protected AbstractFSContract createContract(Configuration conf) {
return new S3AContract(conf);
}
+ @Override
+ protected Configuration createConfiguration() {
+ final Configuration conf =
+ super.createConfiguration();
+
+ removeBaseAndBucketOverrides(
+ conf,
+ CONNECTION_EXPECT_CONTINUE);
+ conf.setBoolean(CONNECTION_EXPECT_CONTINUE, expectContinue);
+ S3ATestUtils.disableFilesystemCaching(conf);
+ return conf;
+ }
+
}
diff --git
a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java
b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java
index ff75f6e2613..cf7d40ecfb8 100644
---
a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java
+++
b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java
@@ -574,4 +574,23 @@ public class ITestS3AConfiguration {
.assertEquals(signerOverride, clientConfiguration.getSignerOverride());
}
+ @Test(timeout = 10_000L)
+ public void testExpectContinueFalse() throws Exception {
+ Configuration config = new Configuration(false);
+ config.setBoolean(CONNECTION_EXPECT_CONTINUE, false);
+ ClientConfiguration awsConf = new ClientConfiguration();
+ initConnectionSettings(config, awsConf);
+ Assertions.assertThat(awsConf.isUseExpectContinue())
+ .describedAs("expect continue flag")
+ .isFalse();
+ }
+
+ @Test(timeout = 10_000L)
+ public void testExpectContinueDefault() throws Exception {
+ ClientConfiguration awsConf = new ClientConfiguration();
+ initConnectionSettings(new Configuration(false), awsConf);
+ Assertions.assertThat(awsConf.isUseExpectContinue())
+ .describedAs("expect continue flag")
+ .isTrue();
+ }
}
diff --git
a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3AHugeFilesNoMultipart.java
b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3AHugeFilesNoMultipart.java
index ed300dba01e..99b87dd7cb8 100644
---
a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3AHugeFilesNoMultipart.java
+++
b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3AHugeFilesNoMultipart.java
@@ -27,6 +27,7 @@ import org.apache.hadoop.fs.s3a.S3ATestUtils;
import org.apache.hadoop.fs.s3a.api.UnsupportedRequestException;
import static
org.apache.hadoop.fs.contract.ContractTestUtils.IO_CHUNK_BUFFER_SIZE;
+import static org.apache.hadoop.fs.s3a.Constants.CONNECTION_EXPECT_CONTINUE;
import static org.apache.hadoop.fs.s3a.Constants.MIN_MULTIPART_THRESHOLD;
import static org.apache.hadoop.fs.s3a.Constants.MULTIPART_MIN_SIZE;
import static org.apache.hadoop.fs.s3a.Constants.MULTIPART_SIZE;
@@ -65,17 +66,22 @@ public class ITestS3AHugeFilesNoMultipart extends
AbstractSTestS3AHugeFiles {
* Create a configuration without multipart upload,
* and a long request timeout to allow for a very slow
* PUT in close.
+ * <p>
+ * 100-continue is disabled so as to verify the behavior
+ * on a large PUT.
* @return the configuration to create the test FS with.
*/
@Override
protected Configuration createScaleConfiguration() {
Configuration conf = super.createScaleConfiguration();
removeBaseAndBucketOverrides(conf,
+ CONNECTION_EXPECT_CONTINUE,
IO_CHUNK_BUFFER_SIZE,
MIN_MULTIPART_THRESHOLD,
MULTIPART_UPLOADS_ENABLED,
MULTIPART_SIZE,
REQUEST_TIMEOUT);
+ conf.setBoolean(CONNECTION_EXPECT_CONTINUE, false);
conf.setInt(IO_CHUNK_BUFFER_SIZE, 655360);
conf.set(MIN_MULTIPART_THRESHOLD, S_1T);
conf.set(MULTIPART_SIZE, S_1T);
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]