Repository: hive
Updated Branches:
  refs/heads/master 92015567c -> 12005fd17


HIVE-16918 : Skip ReplCopyTask distcp for _metadata copying. Also enable -pb 
for distcp (Sushanth Sowmyan, reviewed by Thejas Nair, Anishek Agarwal)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/12005fd1
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/12005fd1
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/12005fd1

Branch: refs/heads/master
Commit: 12005fd17d12270081b5c047fa9d785267cefba2
Parents: 9201556
Author: Thejas M Nair <the...@hortonworks.com>
Authored: Wed Jun 21 17:28:44 2017 -0700
Committer: Thejas M Nair <the...@hortonworks.com>
Committed: Wed Jun 21 17:28:44 2017 -0700

----------------------------------------------------------------------
 .../apache/hadoop/hive/ql/exec/ReplCopyTask.java  | 18 ++++++++++++++----
 .../apache/hadoop/hive/shims/Hadoop23Shims.java   |  1 +
 .../hadoop/hive/shims/TestHadoop23Shims.java      |  9 ++++++---
 3 files changed, 21 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/12005fd1/ql/src/java/org/apache/hadoop/hive/ql/exec/ReplCopyTask.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ReplCopyTask.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/ReplCopyTask.java
index d0f30bc..285f624 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ReplCopyTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ReplCopyTask.java
@@ -187,15 +187,25 @@ public class ReplCopyTask extends Task<ReplCopyWork> 
implements Serializable {
   }
 
   private boolean doCopy(Path dst, FileSystem dstFs, Path src, FileSystem 
srcFs) throws IOException {
-    if (conf.getBoolVar(HiveConf.ConfVars.HIVE_IN_TEST)){
-      // regular copy in test env.
+    if (conf.getBoolVar(HiveConf.ConfVars.HIVE_IN_TEST)
+        || isLocalFile(src) || isLocalFile(dst)){
+      // regular copy in test env, or when source or destination is a local 
file
+      // distcp runs inside a mapper task, and cannot handle file:///
+      LOG.debug("Using regular copy for {} -> {}", src.toUri(), dst.toUri());
       return FileUtils.copy(srcFs, src, dstFs, dst, false, true, conf);
     } else {
       // distcp in actual deployment with privilege escalation
+      LOG.debug("Using privleged distcp for {} -> {}", src.toUri(), 
dst.toUri());
       return FileUtils.privilegedCopy(srcFs, src, dst, conf);
     }
   }
 
+  private boolean isLocalFile(Path p) {
+    String scheme = p.toUri().getScheme();
+    boolean isLocalFile = scheme.equalsIgnoreCase("file");
+    LOG.debug("{} was a local file? {}, had scheme {}",p.toUri(), isLocalFile, 
scheme);
+    return isLocalFile;
+  }
 
   private List<FileStatus> filesInFileListing(FileSystem fs, Path path)
       throws IOException {
@@ -251,7 +261,7 @@ public class ReplCopyTask extends Task<ReplCopyWork> 
implements Serializable {
   public static Task<?> getLoadCopyTask(ReplicationSpec replicationSpec, Path 
srcPath, Path dstPath, HiveConf conf) {
     Task<?> copyTask = null;
     LOG.debug("ReplCopyTask:getLoadCopyTask: "+srcPath + "=>" + dstPath);
-    if (replicationSpec.isInReplicationScope()){
+    if ((replicationSpec != null) && replicationSpec.isInReplicationScope()){
       ReplCopyWork rcwork = new ReplCopyWork(srcPath, dstPath, false);
       LOG.debug("ReplCopyTask:\trcwork");
       if (replicationSpec.isLazy()){
@@ -269,7 +279,7 @@ public class ReplCopyTask extends Task<ReplCopyWork> 
implements Serializable {
   public static Task<?> getDumpCopyTask(ReplicationSpec replicationSpec, Path 
srcPath, Path dstPath, HiveConf conf) {
     Task<?> copyTask = null;
     LOG.debug("ReplCopyTask:getDumpCopyTask: "+srcPath + "=>" + dstPath);
-    if (replicationSpec.isInReplicationScope()){
+    if ((replicationSpec != null) && replicationSpec.isInReplicationScope()){
       ReplCopyWork rcwork = new ReplCopyWork(srcPath, dstPath, false);
       LOG.debug("ReplCopyTask:\trcwork");
       if (replicationSpec.isLazy()){

http://git-wip-us.apache.org/repos/asf/hive/blob/12005fd1/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java
----------------------------------------------------------------------
diff --git 
a/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java 
b/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java
index 4319bed..e3d1199 100644
--- a/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java
+++ b/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java
@@ -1100,6 +1100,7 @@ public class Hadoop23Shims extends HadoopShimsSecure {
       // if no entries were added via conf, we initiate our defaults
       params.add("-update");
       params.add("-skipcrccheck");
+      params.add("-pb");
     }
     params.add(src.toString());
     params.add(dst.toString());

http://git-wip-us.apache.org/repos/asf/hive/blob/12005fd1/shims/0.23/src/main/test/org/apache/hadoop/hive/shims/TestHadoop23Shims.java
----------------------------------------------------------------------
diff --git 
a/shims/0.23/src/main/test/org/apache/hadoop/hive/shims/TestHadoop23Shims.java 
b/shims/0.23/src/main/test/org/apache/hadoop/hive/shims/TestHadoop23Shims.java
index ba1086c..6c93df5 100644
--- 
a/shims/0.23/src/main/test/org/apache/hadoop/hive/shims/TestHadoop23Shims.java
+++ 
b/shims/0.23/src/main/test/org/apache/hadoop/hive/shims/TestHadoop23Shims.java
@@ -48,11 +48,12 @@ public class TestHadoop23Shims {
     Hadoop23Shims shims = new Hadoop23Shims();
     List<String> paramsDefault = shims.constructDistCpParams(copySrc, copyDst, 
conf);
 
-    assertEquals(4, paramsDefault.size());
+    assertEquals(5, paramsDefault.size());
     assertTrue("Distcp -update set by default", 
paramsDefault.contains("-update"));
     assertTrue("Distcp -skipcrccheck set by default", 
paramsDefault.contains("-skipcrccheck"));
-    assertEquals(copySrc.toString(), paramsDefault.get(2));
-    assertEquals(copyDst.toString(), paramsDefault.get(3));
+    assertTrue("Distcp -pb set by default", paramsDefault.contains("-pb"));
+    assertEquals(copySrc.toString(), paramsDefault.get(3));
+    assertEquals(copyDst.toString(), paramsDefault.get(4));
 
     conf.set("distcp.options.foo", "bar"); // should set "-foo bar"
     conf.set("distcp.options.blah", ""); // should set "-blah"
@@ -67,6 +68,8 @@ public class TestHadoop23Shims {
         !paramsWithCustomParamInjection.contains("-update"));
     assertTrue("Distcp -skipcrccheck not set if not requested",
         !paramsWithCustomParamInjection.contains("-skipcrccheck"));
+    assertTrue("Distcp -pb not set if not requested",
+        !paramsWithCustomParamInjection.contains("-pb"));
 
     // the "-foo bar" and "-blah" params order is not guaranteed
     String firstParam = paramsWithCustomParamInjection.get(0);

Reply via email to