HADOOP-11687. Ignore x-* and response headers when copying an Amazon S3 object. Contributed by Aaron Peterson and harsh.
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/256c82fe Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/256c82fe Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/256c82fe Branch: refs/heads/HDFS-7240 Commit: 256c82fe2981748cd0befc5490d8118d139908f9 Parents: 3488c4f Author: Harsh J <ha...@cloudera.com> Authored: Fri Apr 1 14:18:10 2016 +0530 Committer: Harsh J <ha...@cloudera.com> Committed: Fri Apr 1 14:18:10 2016 +0530 ---------------------------------------------------------------------- .../org/apache/hadoop/fs/s3a/S3AFileSystem.java | 70 +++++++++++++++++++- .../src/site/markdown/tools/hadoop-aws/index.md | 7 ++ 2 files changed, 76 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/256c82fe/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index 7ab6c79..6afb05d 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -26,6 +26,7 @@ import java.net.URI; import java.util.ArrayList; import java.util.Date; import java.util.List; +import java.util.Map; import java.util.concurrent.ExecutorService; import java.util.concurrent.TimeUnit; @@ -1128,7 +1129,7 @@ public class S3AFileSystem extends FileSystem { } ObjectMetadata srcom = s3.getObjectMetadata(bucket, srcKey); - final ObjectMetadata dstom = srcom.clone(); + ObjectMetadata dstom = cloneObjectMetadata(srcom); if (StringUtils.isNotBlank(serverSideEncryptionAlgorithm)) { dstom.setSSEAlgorithm(serverSideEncryptionAlgorithm); } @@ -1235,6 +1236,73 @@ public class S3AFileSystem extends FileSystem { } /** + * Creates a copy of the passed {@link ObjectMetadata}. + * Does so without using the {@link ObjectMetadata#clone()} method, + * to avoid copying unnecessary headers. + * @param source the {@link ObjectMetadata} to copy + * @return a copy of {@link ObjectMetadata} with only relevant attributes + */ + private ObjectMetadata cloneObjectMetadata(ObjectMetadata source) { + // This approach may be too brittle, especially if + // in future there are new attributes added to ObjectMetadata + // that we do not explicitly call to set here + ObjectMetadata ret = new ObjectMetadata(); + + // Non null attributes + ret.setContentLength(source.getContentLength()); + + // Possibly null attributes + // Allowing nulls to pass breaks it during later use + if (source.getCacheControl() != null) { + ret.setCacheControl(source.getCacheControl()); + } + if (source.getContentDisposition() != null) { + ret.setContentDisposition(source.getContentDisposition()); + } + if (source.getContentEncoding() != null) { + ret.setContentEncoding(source.getContentEncoding()); + } + if (source.getContentMD5() != null) { + ret.setContentMD5(source.getContentMD5()); + } + if (source.getContentType() != null) { + ret.setContentType(source.getContentType()); + } + if (source.getExpirationTime() != null) { + ret.setExpirationTime(source.getExpirationTime()); + } + if (source.getExpirationTimeRuleId() != null) { + ret.setExpirationTimeRuleId(source.getExpirationTimeRuleId()); + } + if (source.getHttpExpiresDate() != null) { + ret.setHttpExpiresDate(source.getHttpExpiresDate()); + } + if (source.getLastModified() != null) { + ret.setLastModified(source.getLastModified()); + } + if (source.getOngoingRestore() != null) { + ret.setOngoingRestore(source.getOngoingRestore()); + } + if (source.getRestoreExpirationTime() != null) { + ret.setRestoreExpirationTime(source.getRestoreExpirationTime()); + } + if (source.getSSEAlgorithm() != null) { + ret.setSSEAlgorithm(source.getSSEAlgorithm()); + } + if (source.getSSECustomerAlgorithm() != null) { + ret.setSSECustomerAlgorithm(source.getSSECustomerAlgorithm()); + } + if (source.getSSECustomerKeyMd5() != null) { + ret.setSSECustomerKeyMd5(source.getSSECustomerKeyMd5()); + } + + for (Map.Entry<String, String> e : source.getUserMetadata().entrySet()) { + ret.addUserMetadata(e.getKey(), e.getValue()); + } + return ret; + } + + /** * Return the number of bytes that large input files should be optimally * be split into to minimize i/o time. * @deprecated use {@link #getDefaultBlockSize(Path)} instead http://git-wip-us.apache.org/repos/asf/hadoop/blob/256c82fe/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md index af3541f..7382029 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md @@ -417,6 +417,13 @@ which pass in authentication details to the test runner These are both Hadoop XML configuration files, which must be placed into `hadoop-tools/hadoop-aws/src/test/resources`. +### `core-site.xml` + +This file pre-exists and sources the configurations created +under `auth-keys.xml`. + +For most purposes you will not need to edit this file unless you +need to apply a specific, non-default property change during the tests. ### `auth-keys.xml`