Repository: spark
Updated Branches:
  refs/heads/branch-2.2 a2f5ced32 -> 612952251


[SPARK-19812] YARN shuffle service fails to relocate recovery DB acro…

…ss NFS directories

## What changes were proposed in this pull request?

Change from using java Files.move to use Hadoop filesystem operations to move 
the directories.  The java Files.move does not work when moving directories 
across NFS mounts and in fact also says that if the directory has entries you 
should do a recursive move. We are already using Hadoop filesystem here so just 
use the local filesystem from there as it handles this properly.

Note that the DB here is actually a directory of files and not just a single 
file, hence the change in the name of the local var.

## How was this patch tested?

Ran YarnShuffleServiceSuite unit tests.  Unfortunately couldn't easily add one 
here since involves NFS.
Ran manual tests to verify that the DB directories were properly moved across 
NFS mounted directories. Have been running this internally for weeks.

Author: Tom Graves <tgra...@apache.org>

Closes #17748 from tgravescs/SPARK-19812.

(cherry picked from commit 7fecf5130163df9c204a2764d121a7011d007f4e)
Signed-off-by: Tom Graves <tgra...@yahoo-inc.com>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/61295225
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/61295225
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/61295225

Branch: refs/heads/branch-2.2
Commit: 612952251c5ac626e256bc2ab9414faf1662dde9
Parents: a2f5ced
Author: Tom Graves <tgra...@apache.org>
Authored: Wed Apr 26 08:23:31 2017 -0500
Committer: Tom Graves <tgra...@yahoo-inc.com>
Committed: Wed Apr 26 08:24:12 2017 -0500

----------------------------------------------------------------------
 .../spark/network/yarn/YarnShuffleService.java  | 23 +++++++++++---------
 1 file changed, 13 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/61295225/common/network-yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleService.java
----------------------------------------------------------------------
diff --git 
a/common/network-yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleService.java
 
b/common/network-yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleService.java
index c7620d0..4acc203 100644
--- 
a/common/network-yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleService.java
+++ 
b/common/network-yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleService.java
@@ -21,7 +21,6 @@ import java.io.File;
 import java.io.IOException;
 import java.nio.charset.StandardCharsets;
 import java.nio.ByteBuffer;
-import java.nio.file.Files;
 import java.util.List;
 import java.util.Map;
 
@@ -340,9 +339,9 @@ public class YarnShuffleService extends AuxiliaryService {
    * when it previously was not. If YARN NM recovery is enabled it uses that 
path, otherwise
    * it will uses a YARN local dir.
    */
-  protected File initRecoveryDb(String dbFileName) {
+  protected File initRecoveryDb(String dbName) {
     if (_recoveryPath != null) {
-        File recoveryFile = new File(_recoveryPath.toUri().getPath(), 
dbFileName);
+        File recoveryFile = new File(_recoveryPath.toUri().getPath(), dbName);
         if (recoveryFile.exists()) {
           return recoveryFile;
         }
@@ -350,7 +349,7 @@ public class YarnShuffleService extends AuxiliaryService {
     // db doesn't exist in recovery path go check local dirs for it
     String[] localDirs = 
_conf.getTrimmedStrings("yarn.nodemanager.local-dirs");
     for (String dir : localDirs) {
-      File f = new File(new Path(dir).toUri().getPath(), dbFileName);
+      File f = new File(new Path(dir).toUri().getPath(), dbName);
       if (f.exists()) {
         if (_recoveryPath == null) {
           // If NM recovery is not enabled, we should specify the recovery 
path using NM local
@@ -363,17 +362,21 @@ public class YarnShuffleService extends AuxiliaryService {
           // make sure to move all DBs to the recovery path from the old NM 
local dirs.
           // If another DB was initialized first just make sure all the DBs 
are in the same
           // location.
-          File newLoc = new File(_recoveryPath.toUri().getPath(), dbFileName);
-          if (!newLoc.equals(f)) {
+          Path newLoc = new Path(_recoveryPath, dbName);
+          Path copyFrom = new Path(f.toURI()); 
+          if (!newLoc.equals(copyFrom)) {
+            logger.info("Moving " + copyFrom + " to: " + newLoc); 
             try {
-              Files.move(f.toPath(), newLoc.toPath());
+              // The move here needs to handle moving non-empty directories 
across NFS mounts
+              FileSystem fs = FileSystem.getLocal(_conf);
+              fs.rename(copyFrom, newLoc);
             } catch (Exception e) {
               // Fail to move recovery file to new path, just continue on with 
new DB location
               logger.error("Failed to move recovery file {} to the path {}",
-                dbFileName, _recoveryPath.toString(), e);
+                dbName, _recoveryPath.toString(), e);
             }
           }
-          return newLoc;
+          return new File(newLoc.toUri().getPath());
         }
       }
     }
@@ -381,7 +384,7 @@ public class YarnShuffleService extends AuxiliaryService {
       _recoveryPath = new Path(localDirs[0]);
     }
 
-    return new File(_recoveryPath.toUri().getPath(), dbFileName);
+    return new File(_recoveryPath.toUri().getPath(), dbName);
   }
 
   /**


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to