Repository: hive Updated Branches: refs/heads/master 92015567c -> 12005fd17
HIVE-16918 : Skip ReplCopyTask distcp for _metadata copying. Also enable -pb for distcp (Sushanth Sowmyan, reviewed by Thejas Nair, Anishek Agarwal) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/12005fd1 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/12005fd1 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/12005fd1 Branch: refs/heads/master Commit: 12005fd17d12270081b5c047fa9d785267cefba2 Parents: 9201556 Author: Thejas M Nair <the...@hortonworks.com> Authored: Wed Jun 21 17:28:44 2017 -0700 Committer: Thejas M Nair <the...@hortonworks.com> Committed: Wed Jun 21 17:28:44 2017 -0700 ---------------------------------------------------------------------- .../apache/hadoop/hive/ql/exec/ReplCopyTask.java | 18 ++++++++++++++---- .../apache/hadoop/hive/shims/Hadoop23Shims.java | 1 + .../hadoop/hive/shims/TestHadoop23Shims.java | 9 ++++++--- 3 files changed, 21 insertions(+), 7 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/12005fd1/ql/src/java/org/apache/hadoop/hive/ql/exec/ReplCopyTask.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ReplCopyTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ReplCopyTask.java index d0f30bc..285f624 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ReplCopyTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ReplCopyTask.java @@ -187,15 +187,25 @@ public class ReplCopyTask extends Task<ReplCopyWork> implements Serializable { } private boolean doCopy(Path dst, FileSystem dstFs, Path src, FileSystem srcFs) throws IOException { - if (conf.getBoolVar(HiveConf.ConfVars.HIVE_IN_TEST)){ - // regular copy in test env. + if (conf.getBoolVar(HiveConf.ConfVars.HIVE_IN_TEST) + || isLocalFile(src) || isLocalFile(dst)){ + // regular copy in test env, or when source or destination is a local file + // distcp runs inside a mapper task, and cannot handle file:/// + LOG.debug("Using regular copy for {} -> {}", src.toUri(), dst.toUri()); return FileUtils.copy(srcFs, src, dstFs, dst, false, true, conf); } else { // distcp in actual deployment with privilege escalation + LOG.debug("Using privleged distcp for {} -> {}", src.toUri(), dst.toUri()); return FileUtils.privilegedCopy(srcFs, src, dst, conf); } } + private boolean isLocalFile(Path p) { + String scheme = p.toUri().getScheme(); + boolean isLocalFile = scheme.equalsIgnoreCase("file"); + LOG.debug("{} was a local file? {}, had scheme {}",p.toUri(), isLocalFile, scheme); + return isLocalFile; + } private List<FileStatus> filesInFileListing(FileSystem fs, Path path) throws IOException { @@ -251,7 +261,7 @@ public class ReplCopyTask extends Task<ReplCopyWork> implements Serializable { public static Task<?> getLoadCopyTask(ReplicationSpec replicationSpec, Path srcPath, Path dstPath, HiveConf conf) { Task<?> copyTask = null; LOG.debug("ReplCopyTask:getLoadCopyTask: "+srcPath + "=>" + dstPath); - if (replicationSpec.isInReplicationScope()){ + if ((replicationSpec != null) && replicationSpec.isInReplicationScope()){ ReplCopyWork rcwork = new ReplCopyWork(srcPath, dstPath, false); LOG.debug("ReplCopyTask:\trcwork"); if (replicationSpec.isLazy()){ @@ -269,7 +279,7 @@ public class ReplCopyTask extends Task<ReplCopyWork> implements Serializable { public static Task<?> getDumpCopyTask(ReplicationSpec replicationSpec, Path srcPath, Path dstPath, HiveConf conf) { Task<?> copyTask = null; LOG.debug("ReplCopyTask:getDumpCopyTask: "+srcPath + "=>" + dstPath); - if (replicationSpec.isInReplicationScope()){ + if ((replicationSpec != null) && replicationSpec.isInReplicationScope()){ ReplCopyWork rcwork = new ReplCopyWork(srcPath, dstPath, false); LOG.debug("ReplCopyTask:\trcwork"); if (replicationSpec.isLazy()){ http://git-wip-us.apache.org/repos/asf/hive/blob/12005fd1/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java ---------------------------------------------------------------------- diff --git a/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java b/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java index 4319bed..e3d1199 100644 --- a/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java +++ b/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java @@ -1100,6 +1100,7 @@ public class Hadoop23Shims extends HadoopShimsSecure { // if no entries were added via conf, we initiate our defaults params.add("-update"); params.add("-skipcrccheck"); + params.add("-pb"); } params.add(src.toString()); params.add(dst.toString()); http://git-wip-us.apache.org/repos/asf/hive/blob/12005fd1/shims/0.23/src/main/test/org/apache/hadoop/hive/shims/TestHadoop23Shims.java ---------------------------------------------------------------------- diff --git a/shims/0.23/src/main/test/org/apache/hadoop/hive/shims/TestHadoop23Shims.java b/shims/0.23/src/main/test/org/apache/hadoop/hive/shims/TestHadoop23Shims.java index ba1086c..6c93df5 100644 --- a/shims/0.23/src/main/test/org/apache/hadoop/hive/shims/TestHadoop23Shims.java +++ b/shims/0.23/src/main/test/org/apache/hadoop/hive/shims/TestHadoop23Shims.java @@ -48,11 +48,12 @@ public class TestHadoop23Shims { Hadoop23Shims shims = new Hadoop23Shims(); List<String> paramsDefault = shims.constructDistCpParams(copySrc, copyDst, conf); - assertEquals(4, paramsDefault.size()); + assertEquals(5, paramsDefault.size()); assertTrue("Distcp -update set by default", paramsDefault.contains("-update")); assertTrue("Distcp -skipcrccheck set by default", paramsDefault.contains("-skipcrccheck")); - assertEquals(copySrc.toString(), paramsDefault.get(2)); - assertEquals(copyDst.toString(), paramsDefault.get(3)); + assertTrue("Distcp -pb set by default", paramsDefault.contains("-pb")); + assertEquals(copySrc.toString(), paramsDefault.get(3)); + assertEquals(copyDst.toString(), paramsDefault.get(4)); conf.set("distcp.options.foo", "bar"); // should set "-foo bar" conf.set("distcp.options.blah", ""); // should set "-blah" @@ -67,6 +68,8 @@ public class TestHadoop23Shims { !paramsWithCustomParamInjection.contains("-update")); assertTrue("Distcp -skipcrccheck not set if not requested", !paramsWithCustomParamInjection.contains("-skipcrccheck")); + assertTrue("Distcp -pb not set if not requested", + !paramsWithCustomParamInjection.contains("-pb")); // the "-foo bar" and "-blah" params order is not guaranteed String firstParam = paramsWithCustomParamInjection.get(0);