Author: jlowe Date: Fri Apr 19 05:13:49 2013 New Revision: 1469686 URL: http://svn.apache.org/r1469686 Log: svn merge -c 1460808 FIXES: YARN-71. Fix the NodeManager to clean up local-dirs on restart. Contributed by Xuan Gong
Added: hadoop/common/branches/branch-0.23/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerReboot.java - copied unchanged from r1460808, hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerReboot.java Modified: hadoop/common/branches/branch-0.23/hadoop-yarn-project/CHANGES.txt hadoop/common/branches/branch-0.23/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java hadoop/common/branches/branch-0.23/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/ResourceLocalizationService.java Modified: hadoop/common/branches/branch-0.23/hadoop-yarn-project/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-yarn-project/CHANGES.txt?rev=1469686&r1=1469685&r2=1469686&view=diff ============================================================================== --- hadoop/common/branches/branch-0.23/hadoop-yarn-project/CHANGES.txt (original) +++ hadoop/common/branches/branch-0.23/hadoop-yarn-project/CHANGES.txt Fri Apr 19 05:13:49 2013 @@ -15,6 +15,9 @@ Release 0.23.8 - UNRELEASED YARN-476. ProcfsBasedProcessTree info message confuses users. (sandyr via tucu) + YARN-71. Fix the NodeManager to clean up local-dirs on restart. + (Xuan Gong via sseth) + Release 0.23.7 - 2013-04-18 INCOMPATIBLE CHANGES Modified: hadoop/common/branches/branch-0.23/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java?rev=1469686&r1=1469685&r2=1469686&view=diff ============================================================================== --- hadoop/common/branches/branch-0.23/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java (original) +++ hadoop/common/branches/branch-0.23/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java Fri Apr 19 05:13:49 2013 @@ -58,6 +58,8 @@ import org.apache.hadoop.yarn.service.Co import org.apache.hadoop.yarn.service.Service; import org.apache.hadoop.yarn.util.Records; +import com.google.common.annotations.VisibleForTesting; + public class NodeManager extends CompositeService implements EventHandler<NodeManagerEvent> { @@ -113,6 +115,10 @@ public class NodeManager extends Composi return new WebServer(nmContext, resourceView, aclsManager, dirsHandler); } + protected DeletionService createDeletionService(ContainerExecutor exec) { + return new DeletionService(exec); + } + protected void doSecureLogin() throws IOException { SecurityUtil.login(getConfig(), YarnConfiguration.NM_KEYTAB, YarnConfiguration.NM_PRINCIPAL); @@ -143,7 +149,7 @@ public class NodeManager extends Composi } catch (IOException e) { throw new YarnException("Failed to initialize container executor", e); } - DeletionService del = new DeletionService(exec); + DeletionService del = createDeletionService(exec); addService(del); // NodeManager level dispatcher @@ -351,6 +357,11 @@ public class NodeManager extends Composi return containerManager; } + @VisibleForTesting + Context getNMContext() { + return this.context; + } + public static void main(String[] args) { Thread.setDefaultUncaughtExceptionHandler(new YarnUncaughtExceptionHandler()); StringUtils.startupShutdownMessage(NodeManager.class, args, LOG); Modified: hadoop/common/branches/branch-0.23/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/ResourceLocalizationService.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/ResourceLocalizationService.java?rev=1469686&r1=1469685&r2=1469686&view=diff ============================================================================== --- hadoop/common/branches/branch-0.23/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/ResourceLocalizationService.java (original) +++ hadoop/common/branches/branch-0.23/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/ResourceLocalizationService.java Fri Apr 19 05:13:49 2013 @@ -22,6 +22,7 @@ import static org.apache.hadoop.fs.Creat import java.io.DataOutputStream; import java.io.File; +import java.io.FileNotFoundException; import java.io.IOException; import java.net.InetSocketAddress; import java.net.URISyntaxException; @@ -53,8 +54,10 @@ import org.apache.commons.logging.LogFac import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeysPublic; import org.apache.hadoop.fs.FileContext; +import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.RemoteIterator; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.ipc.Server; import org.apache.hadoop.security.Credentials; @@ -175,9 +178,11 @@ public class ResourceLocalizationService this.recordFactory = RecordFactoryProvider.getRecordFactory(conf); try { - // TODO queue deletions here, rather than NM init? FileContext lfs = getLocalFileContext(conf); lfs.setUMask(new FsPermission((short)FsPermission.DEFAULT_UMASK)); + + cleanUpLocalDir(lfs,delService); + List<String> localDirs = dirsHandler.getLocalDirs(); for (String localDir : localDirs) { // $local/usercache @@ -926,4 +931,76 @@ public class ResourceLocalizationService } + private void cleanUpLocalDir(FileContext lfs, DeletionService del) { + long currentTimeStamp = System.currentTimeMillis(); + for (String localDir : dirsHandler.getLocalDirs()) { + renameLocalDir(lfs, localDir, ContainerLocalizer.USERCACHE, + currentTimeStamp); + renameLocalDir(lfs, localDir, ContainerLocalizer.FILECACHE, + currentTimeStamp); + renameLocalDir(lfs, localDir, ResourceLocalizationService.NM_PRIVATE_DIR, + currentTimeStamp); + try { + deleteLocalDir(lfs, del, localDir); + } catch (IOException e) { + // Do nothing, just give the warning + LOG.warn("Failed to delete localDir: " + localDir); + } + } + } + + private void renameLocalDir(FileContext lfs, String localDir, + String localSubDir, long currentTimeStamp) { + try { + lfs.rename(new Path(localDir, localSubDir), new Path( + localDir, localSubDir + "_DEL_" + currentTimeStamp)); + } catch (FileNotFoundException ex) { + // No need to handle this exception + // localSubDir may not be exist + } catch (Exception ex) { + // Do nothing, just give the warning + LOG.warn("Failed to rename the local file under " + + localDir + "/" + localSubDir); + } + } + + private void deleteLocalDir(FileContext lfs, DeletionService del, + String localDir) throws IOException { + RemoteIterator<FileStatus> fileStatus = lfs.listStatus(new Path(localDir)); + if (fileStatus != null) { + while (fileStatus.hasNext()) { + FileStatus status = fileStatus.next(); + try { + if (status.getPath().getName().matches(".*" + + ContainerLocalizer.USERCACHE + "_DEL_.*")) { + cleanUpFilesFromSubDir(lfs, del, status.getPath()); + } else if (status.getPath().getName() + .matches(".*" + NM_PRIVATE_DIR + "_DEL_.*") + || + status.getPath().getName() + .matches(".*" + ContainerLocalizer.FILECACHE + "_DEL_.*")) { + del.delete(null, status.getPath(), new Path[] {}); + } + } catch (IOException ex) { + // Do nothing, just give the warning + LOG.warn("Failed to delete this local Directory: " + + status.getPath().getName()); + } + } + } + } + + private void cleanUpFilesFromSubDir(FileContext lfs, DeletionService del, + Path dirPath) throws IOException { + RemoteIterator<FileStatus> fileStatus = lfs.listStatus(dirPath); + if (fileStatus != null) { + while (fileStatus.hasNext()) { + FileStatus status = fileStatus.next(); + String owner = status.getOwner(); + del.delete(owner, status.getPath(), new Path[] {}); + } + } + del.delete(null, dirPath, new Path[] {}); + } + }