This is an automated email from the ASF dual-hosted git repository. dazhou pushed a commit to branch branch-2 in repository https://gitbox.apache.org/repos/asf/hadoop.git
The following commit(s) were added to refs/heads/branch-2 by this push: new 27d7d09 HADOOP-16630 : Backport of Hadoop-16548 : Disable Flush() over config 27d7d09 is described below commit 27d7d09fa48afe4cccd766a5567464b772a46300 Author: Sneha Vijayarajan <sneha.vijayara...@gmail.com> AuthorDate: Wed Oct 9 16:50:06 2019 -0700 HADOOP-16630 : Backport of Hadoop-16548 : Disable Flush() over config --- .../hadoop/fs/azurebfs/AbfsConfiguration.java | 14 +++++++++++ .../fs/azurebfs/AzureBlobFileSystemStore.java | 6 +++-- .../fs/azurebfs/constants/ConfigurationKeys.java | 8 +++++++ .../constants/FileSystemConfigurations.java | 1 + .../fs/azurebfs/services/AbfsOutputStream.java | 7 ++++-- .../hadoop-azure/src/site/markdown/abfs.md | 18 +++++++++++++++ .../fs/azurebfs/ITestAzureBlobFileSystemFlush.java | 27 +++++++++++----------- 7 files changed, 64 insertions(+), 17 deletions(-) diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java index 67055c5..56ff622 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java @@ -146,6 +146,10 @@ public class AbfsConfiguration{ DefaultValue = DEFAULT_ENABLE_FLUSH) private boolean enableFlush; + @BooleanConfigurationValidatorAnnotation(ConfigurationKey = FS_AZURE_DISABLE_OUTPUTSTREAM_FLUSH, + DefaultValue = DEFAULT_DISABLE_OUTPUTSTREAM_FLUSH) + private boolean disableOutputStreamFlush; + @BooleanConfigurationValidatorAnnotation(ConfigurationKey = FS_AZURE_ENABLE_AUTOTHROTTLING, DefaultValue = DEFAULT_ENABLE_AUTOTHROTTLING) private boolean enableAutoThrottling; @@ -427,6 +431,10 @@ public class AbfsConfiguration{ return this.enableFlush; } + public boolean isOutputStreamFlushDisabled() { + return this.disableOutputStreamFlush; + } + public boolean isAutoThrottlingEnabled() { return this.enableAutoThrottling; } @@ -635,4 +643,10 @@ public class AbfsConfiguration{ void setEnableFlush(boolean enableFlush) { this.enableFlush = enableFlush; } + + @VisibleForTesting + void setDisableOutputStreamFlush(boolean disableOutputStreamFlush) { + this.disableOutputStreamFlush = disableOutputStreamFlush; + } + } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java index 2694565..6710275 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java @@ -354,7 +354,8 @@ public class AzureBlobFileSystemStore { AbfsHttpConstants.FORWARD_SLASH + getRelativePath(path), 0, abfsConfiguration.getWriteBufferSize(), - abfsConfiguration.isFlushEnabled()); + abfsConfiguration.isFlushEnabled(), + abfsConfiguration.isOutputStreamFlushDisabled()); } public void createDirectory(final Path path, final FsPermission permission, final FsPermission umask) @@ -426,7 +427,8 @@ public class AzureBlobFileSystemStore { AbfsHttpConstants.FORWARD_SLASH + getRelativePath(path), offset, abfsConfiguration.getWriteBufferSize(), - abfsConfiguration.isFlushEnabled()); + abfsConfiguration.isFlushEnabled(), + abfsConfiguration.isOutputStreamFlushDisabled()); } public void rename(final Path source, final Path destination) throws diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java index 8cd86bf..cd86f18 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java @@ -51,7 +51,15 @@ public final class ConfigurationKeys { public static final String FS_AZURE_ALWAYS_USE_HTTPS = "fs.azure.always.use.https"; public static final String FS_AZURE_ATOMIC_RENAME_KEY = "fs.azure.atomic.rename.key"; public static final String FS_AZURE_READ_AHEAD_QUEUE_DEPTH = "fs.azure.readaheadqueue.depth"; + /** Provides a config control to enable or disable ABFS Flush operations - + * HFlush and HSync. Default is true. **/ public static final String FS_AZURE_ENABLE_FLUSH = "fs.azure.enable.flush"; + /** Provides a config control to disable or enable OutputStream Flush API + * operations in AbfsOutputStream. Flush() will trigger actions that + * guarantee that buffered data is persistent with a perf cost while the API + * documentation does not have such expectations of data being persisted. + * Default value of this config is true. **/ + public static final String FS_AZURE_DISABLE_OUTPUTSTREAM_FLUSH = "fs.azure.disable.outputstream.flush"; public static final String FS_AZURE_USER_AGENT_PREFIX_KEY = "fs.azure.user.agent.prefix"; public static final String FS_AZURE_SSL_CHANNEL_MODE_KEY = "fs.azure.ssl.channel.mode"; public static final String FS_AZURE_USE_UPN = "fs.azure.use.upn"; diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java index a2a0064..29367eb 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java @@ -57,6 +57,7 @@ public final class FileSystemConfigurations { public static final int DEFAULT_READ_AHEAD_QUEUE_DEPTH = -1; public static final boolean DEFAULT_ENABLE_FLUSH = true; + public static final boolean DEFAULT_DISABLE_OUTPUTSTREAM_FLUSH = true; public static final boolean DEFAULT_ENABLE_AUTOTHROTTLING = true; public static final SSLSocketFactoryEx.SSLChannelMode DEFAULT_FS_AZURE_SSL_CHANNEL_MODE diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsOutputStream.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsOutputStream.java index 679f22e..be2ab67 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsOutputStream.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsOutputStream.java @@ -52,6 +52,7 @@ public class AbfsOutputStream extends OutputStream implements Syncable, StreamCa private long position; private boolean closed; private boolean supportFlush; + private boolean disableOutputStreamFlush; private volatile IOException lastError; private long lastFlushOffset; @@ -80,12 +81,14 @@ public class AbfsOutputStream extends OutputStream implements Syncable, StreamCa final String path, final long position, final int bufferSize, - final boolean supportFlush) { + final boolean supportFlush, + final boolean disableOutputStreamFlush) { this.client = client; this.path = path; this.position = position; this.closed = false; this.supportFlush = supportFlush; + this.disableOutputStreamFlush = disableOutputStreamFlush; this.lastError = null; this.lastFlushOffset = 0; this.bufferSize = bufferSize; @@ -199,7 +202,7 @@ public class AbfsOutputStream extends OutputStream implements Syncable, StreamCa */ @Override public void flush() throws IOException { - if (supportFlush) { + if (!disableOutputStreamFlush) { flushInternalAsync(); } } diff --git a/hadoop-tools/hadoop-azure/src/site/markdown/abfs.md b/hadoop-tools/hadoop-azure/src/site/markdown/abfs.md index db55e67..d8452a2 100644 --- a/hadoop-tools/hadoop-azure/src/site/markdown/abfs.md +++ b/hadoop-tools/hadoop-azure/src/site/markdown/abfs.md @@ -72,6 +72,24 @@ or you can configure an identity to be used only for a specific storage account Note that it doesn't make sense to do this with some properties, like shared keys that are inherently account-specific. +### <a name="flushconfigoptions"></a> Flush Options + +#### <a name="abfsflushconfigoptions"></a> 1. Azure Blob File System Flush Options +Config `fs.azure.enable.flush` provides an option to render ABFS flush APIs - + HFlush() and HSync() to be no-op. By default, this +config will be set to true. + +Both the APIs will ensure that data is persisted. + +#### <a name="outputstreamflushconfigoptions"></a> 2. OutputStream Flush Options +Config `fs.azure.disable.outputstream.flush` provides an option to render +OutputStream Flush() API to be a no-op in AbfsOutputStream. By default, this +config will be set to true. + +Hflush() being the only documented API that can provide persistent data +transfer, Flush() also attempting to persist buffered data will lead to +performance issues. + ## Testing ABFS See the relevant section in [Testing Azure](testing_azure.html). diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemFlush.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemFlush.java index d60cae8..60f7f7d 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemFlush.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemFlush.java @@ -208,43 +208,44 @@ public class ITestAzureBlobFileSystemFlush extends AbstractAbfsScaleTest { } @Test - public void testFlushWithFlushEnabled() throws Exception { - testFlush(true); + public void testFlushWithOutputStreamFlushEnabled() throws Exception { + testFlush(false); } @Test - public void testFlushWithFlushDisabled() throws Exception { - testFlush(false); + public void testFlushWithOutputStreamFlushDisabled() throws Exception { + testFlush(true); } - private void testFlush(boolean flushEnabled) throws Exception { + private void testFlush(boolean disableOutputStreamFlush) throws Exception { final AzureBlobFileSystem fs = (AzureBlobFileSystem) getFileSystem(); - // Simulate setting "fs.azure.enable.flush" to true or false - fs.getAbfsStore().getAbfsConfiguration().setEnableFlush(flushEnabled); + // Simulate setting "fs.azure.disable.outputstream.flush" to true or false + fs.getAbfsStore().getAbfsConfiguration() + .setDisableOutputStreamFlush(disableOutputStreamFlush); final Path testFilePath = path(methodName.getMethodName()); byte[] buffer = getRandomBytesArray(); // The test case must write "fs.azure.write.request.size" bytes // to the stream in order for the data to be uploaded to storage. - assertEquals( - fs.getAbfsStore().getAbfsConfiguration().getWriteBufferSize(), + assertEquals(fs.getAbfsStore().getAbfsConfiguration().getWriteBufferSize(), buffer.length); try (FSDataOutputStream stream = fs.create(testFilePath)) { stream.write(buffer); // Write asynchronously uploads data, so we must wait for completion - AbfsOutputStream abfsStream = (AbfsOutputStream) stream.getWrappedStream(); + AbfsOutputStream abfsStream = (AbfsOutputStream) stream + .getWrappedStream(); abfsStream.waitForPendingUploads(); // Flush commits the data so it can be read. stream.flush(); - // Verify that the data can be read if flushEnabled is true; and otherwise - // cannot be read. - validate(fs.open(testFilePath), buffer, flushEnabled); + // Verify that the data can be read if disableOutputStreamFlush is + // false; and otherwise cannot be read. + validate(fs.open(testFilePath), buffer, !disableOutputStreamFlush); } } --------------------------------------------------------------------- To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-commits-h...@hadoop.apache.org