Repository: hadoop
Updated Branches:
  refs/heads/trunk 7fd00b3db -> 6d84cc16b


MAPREDUCE-6550. archive-logs tool changes log ownership to the Yarn user when 
using DefaultContainerExecutor (rkanter)


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/6d84cc16
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/6d84cc16
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/6d84cc16

Branch: refs/heads/trunk
Commit: 6d84cc16b3e0685fef01d0e3526b0f7556ceff51
Parents: 7fd00b3
Author: Robert Kanter <rkan...@apache.org>
Authored: Wed Nov 25 17:12:40 2015 -0800
Committer: Robert Kanter <rkan...@apache.org>
Committed: Wed Nov 25 17:12:40 2015 -0800

----------------------------------------------------------------------
 hadoop-mapreduce-project/CHANGES.txt            |  3 +
 .../apache/hadoop/tools/HadoopArchiveLogs.java  | 18 +++++-
 .../hadoop/tools/HadoopArchiveLogsRunner.java   | 66 ++++++++++++++++----
 .../src/site/markdown/HadoopArchiveLogs.md      | 17 +++++
 .../hadoop/tools/TestHadoopArchiveLogs.java     | 34 +++++++---
 .../tools/TestHadoopArchiveLogsRunner.java      | 21 ++++++-
 6 files changed, 139 insertions(+), 20 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/6d84cc16/hadoop-mapreduce-project/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-mapreduce-project/CHANGES.txt 
b/hadoop-mapreduce-project/CHANGES.txt
index 503e687..aadb0c6 100644
--- a/hadoop-mapreduce-project/CHANGES.txt
+++ b/hadoop-mapreduce-project/CHANGES.txt
@@ -653,6 +653,9 @@ Release 2.8.0 - UNRELEASED
     MAPREDUCE-6549. multibyte delimiters with LineRecordReader cause
     duplicate records (wilfreds via rkanter)
 
+    MAPREDUCE-6550. archive-logs tool changes log ownership to the Yarn
+    user when using DefaultContainerExecutor (rkanter)
+
 Release 2.7.3 - UNRELEASED
 
   INCOMPATIBLE CHANGES

http://git-wip-us.apache.org/repos/asf/hadoop/blob/6d84cc16/hadoop-tools/hadoop-archive-logs/src/main/java/org/apache/hadoop/tools/HadoopArchiveLogs.java
----------------------------------------------------------------------
diff --git 
a/hadoop-tools/hadoop-archive-logs/src/main/java/org/apache/hadoop/tools/HadoopArchiveLogs.java
 
b/hadoop-tools/hadoop-archive-logs/src/main/java/org/apache/hadoop/tools/HadoopArchiveLogs.java
index 363e287..6b8af97 100644
--- 
a/hadoop-tools/hadoop-archive-logs/src/main/java/org/apache/hadoop/tools/HadoopArchiveLogs.java
+++ 
b/hadoop-tools/hadoop-archive-logs/src/main/java/org/apache/hadoop/tools/HadoopArchiveLogs.java
@@ -77,6 +77,7 @@ public class HadoopArchiveLogs implements Tool {
   private static final String MEMORY_OPTION = "memory";
   private static final String VERBOSE_OPTION = "verbose";
   private static final String FORCE_OPTION = "force";
+  private static final String NO_PROXY_OPTION = "noProxy";
 
   private static final int DEFAULT_MAX_ELIGIBLE = -1;
   private static final int DEFAULT_MIN_NUM_LOG_FILES = 20;
@@ -94,6 +95,8 @@ public class HadoopArchiveLogs implements Tool {
   private boolean verbose = false;
   @VisibleForTesting
   boolean force = false;
+  @VisibleForTesting
+  boolean proxy = true;
 
   @VisibleForTesting
   Set<AppInfo> eligibleApplications;
@@ -208,6 +211,12 @@ public class HadoopArchiveLogs implements Tool {
         "Force recreating the working directory if an existing one is found. " 
+
             "This should only be used if you know that another instance is " +
             "not currently running");
+    Option noProxyOpt = new Option(NO_PROXY_OPTION, false,
+        "When specified, all processing will be done as the user running this" 
+
+            " command (or the Yarn user if DefaultContainerExecutor is in " +
+            "use). When not specified, all processing will be done as the " +
+            "user who owns that application; if the user running this command" 
+
+            " is not allowed to impersonate that user, it will fail");
     opts.addOption(helpOpt);
     opts.addOption(maxEligibleOpt);
     opts.addOption(minNumLogFilesOpt);
@@ -215,6 +224,7 @@ public class HadoopArchiveLogs implements Tool {
     opts.addOption(memoryOpt);
     opts.addOption(verboseOpt);
     opts.addOption(forceOpt);
+    opts.addOption(noProxyOpt);
 
     try {
       CommandLineParser parser = new GnuParser();
@@ -252,6 +262,9 @@ public class HadoopArchiveLogs implements Tool {
       if (commandLine.hasOption(FORCE_OPTION)) {
         force = true;
       }
+      if (commandLine.hasOption(NO_PROXY_OPTION)) {
+        proxy = false;
+      }
     } catch (ParseException pe) {
       HelpFormatter formatter = new HelpFormatter();
       formatter.printHelp("mapred archive-logs", opts);
@@ -274,7 +287,7 @@ public class HadoopArchiveLogs implements Tool {
     }
     fs.mkdirs(workingDir);
     fs.setPermission(workingDir,
-        new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.NONE));
+        new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.ALL, true));
     return true;
   }
 
@@ -479,6 +492,9 @@ public class HadoopArchiveLogs implements Tool {
       fw.write(remoteRootLogDir.toString());
       fw.write(" -suffix ");
       fw.write(suffix);
+      if (!proxy) {
+        fw.write(" -noProxy\n");
+      }
       fw.write("\n");
     } finally {
       if (fw != null) {

http://git-wip-us.apache.org/repos/asf/hadoop/blob/6d84cc16/hadoop-tools/hadoop-archive-logs/src/main/java/org/apache/hadoop/tools/HadoopArchiveLogsRunner.java
----------------------------------------------------------------------
diff --git 
a/hadoop-tools/hadoop-archive-logs/src/main/java/org/apache/hadoop/tools/HadoopArchiveLogsRunner.java
 
b/hadoop-tools/hadoop-archive-logs/src/main/java/org/apache/hadoop/tools/HadoopArchiveLogsRunner.java
index 347e5fb..b3c2de6 100644
--- 
a/hadoop-tools/hadoop-archive-logs/src/main/java/org/apache/hadoop/tools/HadoopArchiveLogsRunner.java
+++ 
b/hadoop-tools/hadoop-archive-logs/src/main/java/org/apache/hadoop/tools/HadoopArchiveLogsRunner.java
@@ -31,33 +31,45 @@ import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.PathFilter;
+import org.apache.hadoop.fs.permission.FsAction;
+import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.util.Tool;
 import org.apache.hadoop.util.ToolRunner;
 
 import java.io.File;
+import java.security.PrivilegedExceptionAction;
 
 /**
  * This is a child program designed to be used by the {@link HadoopArchiveLogs}
  * tool via the Distributed Shell.  It's not meant to be run directly.
  */
 public class HadoopArchiveLogsRunner implements Tool {
-  private static final Log LOG = 
LogFactory.getLog(HadoopArchiveLogsRunner.class);
+  private static final Log LOG =
+      LogFactory.getLog(HadoopArchiveLogsRunner.class);
 
   private static final String APP_ID_OPTION = "appId";
   private static final String USER_OPTION = "user";
   private static final String WORKING_DIR_OPTION = "workingDir";
-  private static final String REMOTE_ROOT_LOG_DIR = "remoteRootLogDir";
+  private static final String REMOTE_ROOT_LOG_DIR_OPTION = "remoteRootLogDir";
   private static final String SUFFIX_OPTION = "suffix";
+  private static final String NO_PROXY_OPTION = "noProxy";
 
   private String appId;
   private String user;
   private String workingDir;
   private String remoteLogDir;
   private String suffix;
+  private boolean proxy;
 
   private JobConf conf;
 
+  private static final FsPermission HAR_DIR_PERM =
+      new FsPermission(FsAction.ALL, FsAction.READ_EXECUTE, FsAction.NONE);
+  private static final FsPermission HAR_INNER_FILES_PERM =
+      new FsPermission(FsAction.READ_WRITE, FsAction.READ, FsAction.NONE);
+
   public HadoopArchiveLogsRunner(Configuration conf) {
     setConf(conf);
   }
@@ -87,13 +99,40 @@ public class HadoopArchiveLogsRunner implements Tool {
   @Override
   public int run(String[] args) throws Exception {
     handleOpts(args);
+
+    Integer exitCode = 1;
+    UserGroupInformation loginUser = UserGroupInformation.getLoginUser();
+    // If we're running as the user, then no need to impersonate
+    // (which might fail if user is not a proxyuser for themselves)
+    // Also if !proxy is set
+    if (!proxy || loginUser.getShortUserName().equals(user)) {
+      LOG.info("Running as " + user);
+      exitCode = runInternal();
+    } else {
+      // Otherwise impersonate user.  If we're not allowed to, then this will
+      // fail with an Exception
+      LOG.info("Running as " + loginUser.getShortUserName() + " but will " +
+          "impersonate " + user);
+      UserGroupInformation proxyUser =
+          UserGroupInformation.createProxyUser(user, loginUser);
+      exitCode = proxyUser.doAs(new PrivilegedExceptionAction<Integer>() {
+        @Override
+        public Integer run() throws Exception {
+          return runInternal();
+        }
+      });
+    }
+    return exitCode;
+  }
+
+  private int runInternal() throws Exception {
     String remoteAppLogDir = remoteLogDir + File.separator + user
         + File.separator + suffix + File.separator + appId;
-
     // Run 'hadoop archives' command in local mode
-    Configuration haConf = new Configuration(getConf());
-    haConf.set("mapreduce.framework.name", "local");
-    HadoopArchives ha = new HadoopArchives(haConf);
+    conf.set("mapreduce.framework.name", "local");
+    // Set the umask so we get 640 files and 750 dirs
+    conf.set("fs.permissions.umask-mode", "027");
+    HadoopArchives ha = new HadoopArchives(conf);
     String[] haArgs = {
         "-archiveName",
         appId + ".har",
@@ -113,9 +152,9 @@ public class HadoopArchiveLogsRunner implements Tool {
     // Move har file to correct location and delete original logs
     try {
       fs = FileSystem.get(conf);
+      Path harDest = new Path(remoteAppLogDir, appId + ".har");
       LOG.info("Moving har to original location");
-      fs.rename(new Path(workingDir, appId + ".har"),
-          new Path(remoteAppLogDir, appId + ".har"));
+      fs.rename(new Path(workingDir, appId + ".har"), harDest);
       LOG.info("Deleting original logs");
       for (FileStatus original : fs.listStatus(new Path(remoteAppLogDir),
           new PathFilter() {
@@ -131,7 +170,6 @@ public class HadoopArchiveLogsRunner implements Tool {
         fs.close();
       }
     }
-
     return 0;
   }
 
@@ -144,24 +182,30 @@ public class HadoopArchiveLogsRunner implements Tool {
     Option workingDirOpt = new Option(WORKING_DIR_OPTION, true,
         "Working Directory");
     workingDirOpt.setRequired(true);
-    Option remoteLogDirOpt = new Option(REMOTE_ROOT_LOG_DIR, true,
+    Option remoteLogDirOpt = new Option(REMOTE_ROOT_LOG_DIR_OPTION, true,
         "Remote Root Log Directory");
     remoteLogDirOpt.setRequired(true);
     Option suffixOpt = new Option(SUFFIX_OPTION, true, "Suffix");
     suffixOpt.setRequired(true);
+    Option useProxyOpt = new Option(NO_PROXY_OPTION, false, "Use Proxy");
     opts.addOption(appIdOpt);
     opts.addOption(userOpt);
     opts.addOption(workingDirOpt);
     opts.addOption(remoteLogDirOpt);
     opts.addOption(suffixOpt);
+    opts.addOption(useProxyOpt);
 
     CommandLineParser parser = new GnuParser();
     CommandLine commandLine = parser.parse(opts, args);
     appId = commandLine.getOptionValue(APP_ID_OPTION);
     user = commandLine.getOptionValue(USER_OPTION);
     workingDir = commandLine.getOptionValue(WORKING_DIR_OPTION);
-    remoteLogDir = commandLine.getOptionValue(REMOTE_ROOT_LOG_DIR);
+    remoteLogDir = commandLine.getOptionValue(REMOTE_ROOT_LOG_DIR_OPTION);
     suffix = commandLine.getOptionValue(SUFFIX_OPTION);
+    proxy = true;
+    if (commandLine.hasOption(NO_PROXY_OPTION)) {
+      proxy = false;
+    }
   }
 
   @Override

http://git-wip-us.apache.org/repos/asf/hadoop/blob/6d84cc16/hadoop-tools/hadoop-archive-logs/src/site/markdown/HadoopArchiveLogs.md
----------------------------------------------------------------------
diff --git 
a/hadoop-tools/hadoop-archive-logs/src/site/markdown/HadoopArchiveLogs.md 
b/hadoop-tools/hadoop-archive-logs/src/site/markdown/HadoopArchiveLogs.md
index a54c9a9..ce9ceba 100644
--- a/hadoop-tools/hadoop-archive-logs/src/site/markdown/HadoopArchiveLogs.md
+++ b/hadoop-tools/hadoop-archive-logs/src/site/markdown/HadoopArchiveLogs.md
@@ -48,6 +48,14 @@ How to Archive Logs
                                    each container (default: 1024)
     -minNumberLogFiles <n>         The minimum number of log files required
                                    to be eligible (default: 20)
+    -noProxy                       When specified, all processing will be
+                                   done as the user running this command (or
+                                   the Yarn user if DefaultContainerExecutor
+                                   is in use). When not specified, all
+                                   processing will be done as the user who
+                                   owns that application; if the user
+                                   running this command is not allowed to
+                                   impersonate that user, it will fail
     -verbose                       Print more details.
 
 The tool only supports running one instance on a cluster at a time in order
@@ -77,6 +85,15 @@ The tool works by performing the following procedure:
     the ``hadoop archives`` command for a single application and replaces
     its aggregated log files with the resulting archive.
 
+The ``-noProxy`` option makes the tool process everything as the user who is
+currently running it, or the Yarn user if DefaultContainerExecutor is in use.
+When not specified, all processing will be done by the user who owns that
+application; if the user running this command is not allowed to impersonate 
that
+user, it will fail.  This is useful if you want an admin user to handle all
+aggregation without enabling impersonation.  With ``-noProxy`` the resulting
+HAR files will be owned by whoever ran the tool, instead of whoever originally
+owned the logs.
+
 The ``-verbose`` option makes the tool print more details about what it's
 doing.
 

http://git-wip-us.apache.org/repos/asf/hadoop/blob/6d84cc16/hadoop-tools/hadoop-archive-logs/src/test/java/org/apache/hadoop/tools/TestHadoopArchiveLogs.java
----------------------------------------------------------------------
diff --git 
a/hadoop-tools/hadoop-archive-logs/src/test/java/org/apache/hadoop/tools/TestHadoopArchiveLogs.java
 
b/hadoop-tools/hadoop-archive-logs/src/test/java/org/apache/hadoop/tools/TestHadoopArchiveLogs.java
index 95835d1..7fcb0bf 100644
--- 
a/hadoop-tools/hadoop-archive-logs/src/test/java/org/apache/hadoop/tools/TestHadoopArchiveLogs.java
+++ 
b/hadoop-tools/hadoop-archive-logs/src/test/java/org/apache/hadoop/tools/TestHadoopArchiveLogs.java
@@ -163,7 +163,7 @@ public class TestHadoopArchiveLogs {
     Assert.assertTrue(hal.eligibleApplications.contains(app3));
   }
 
-  @Test(timeout = 10000)
+  @Test(timeout = 30000)
   public void testFilterAppsByAggregatedStatus() throws Exception {
     MiniYARNCluster yarnCluster = null;
     try {
@@ -246,6 +246,11 @@ public class TestHadoopArchiveLogs {
 
   @Test(timeout = 10000)
   public void testGenerateScript() throws Exception {
+    _testGenerateScript(false);
+    _testGenerateScript(true);
+  }
+
+  private void _testGenerateScript(boolean proxy) throws Exception {
     Configuration conf = new Configuration();
     HadoopArchiveLogs hal = new HadoopArchiveLogs(conf);
     ApplicationId app1 = ApplicationId.newInstance(CLUSTER_TIMESTAMP, 1);
@@ -254,6 +259,7 @@ public class TestHadoopArchiveLogs {
         USER));
     hal.eligibleApplications.add(new HadoopArchiveLogs.AppInfo(app2.toString(),
         USER));
+    hal.proxy = proxy;
 
     File localScript = new File("target", "script.sh");
     Path workingDir = new Path("/tmp", "working");
@@ -286,10 +292,21 @@ public class TestHadoopArchiveLogs {
     Assert.assertEquals("fi", lines[12]);
     Assert.assertEquals("export HADOOP_CLIENT_OPTS=\"-Xmx1024m\"", lines[13]);
     Assert.assertTrue(lines[14].startsWith("export HADOOP_CLASSPATH="));
-    Assert.assertEquals("\"$HADOOP_PREFIX\"/bin/hadoop 
org.apache.hadoop.tools." +
-        "HadoopArchiveLogsRunner -appId \"$appId\" -user \"$user\" -workingDir 
"
-        + workingDir.toString() + " -remoteRootLogDir " +
-        remoteRootLogDir.toString() + " -suffix " + suffix, lines[15]);
+    if (proxy) {
+      Assert.assertEquals(
+          "\"$HADOOP_PREFIX\"/bin/hadoop org.apache.hadoop.tools." +
+              "HadoopArchiveLogsRunner -appId \"$appId\" -user \"$user\" " +
+              "-workingDir " + workingDir.toString() + " -remoteRootLogDir " +
+              remoteRootLogDir.toString() + " -suffix " + suffix,
+          lines[15]);
+    } else {
+      Assert.assertEquals(
+          "\"$HADOOP_PREFIX\"/bin/hadoop org.apache.hadoop.tools." +
+              "HadoopArchiveLogsRunner -appId \"$appId\" -user \"$user\" " +
+              "-workingDir " + workingDir.toString() + " -remoteRootLogDir " +
+              remoteRootLogDir.toString() + " -suffix " + suffix + " -noProxy",
+          lines[15]);
+    }
   }
 
   /**
@@ -325,7 +342,7 @@ public class TestHadoopArchiveLogs {
     Assert.assertTrue(dirPrepared);
     Assert.assertTrue(fs.exists(workingDir));
     Assert.assertEquals(
-        new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.NONE),
+        new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.ALL, true),
         fs.getFileStatus(workingDir).getPermission());
     // Throw a file in the dir
     Path dummyFile = new Path(workingDir, "dummy.txt");
@@ -337,6 +354,9 @@ public class TestHadoopArchiveLogs {
     Assert.assertFalse(dirPrepared);
     Assert.assertTrue(fs.exists(workingDir));
     Assert.assertTrue(fs.exists(dummyFile));
+    Assert.assertEquals(
+        new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.ALL, true),
+        fs.getFileStatus(workingDir).getPermission());
     // -force is true and the dir exists, so it will recreate it and the dummy
     // won't exist anymore
     hal.force = true;
@@ -344,7 +364,7 @@ public class TestHadoopArchiveLogs {
     Assert.assertTrue(dirPrepared);
     Assert.assertTrue(fs.exists(workingDir));
     Assert.assertEquals(
-        new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.NONE),
+        new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.ALL, true),
         fs.getFileStatus(workingDir).getPermission());
     Assert.assertFalse(fs.exists(dummyFile));
   }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/6d84cc16/hadoop-tools/hadoop-archive-logs/src/test/java/org/apache/hadoop/tools/TestHadoopArchiveLogsRunner.java
----------------------------------------------------------------------
diff --git 
a/hadoop-tools/hadoop-archive-logs/src/test/java/org/apache/hadoop/tools/TestHadoopArchiveLogsRunner.java
 
b/hadoop-tools/hadoop-archive-logs/src/test/java/org/apache/hadoop/tools/TestHadoopArchiveLogsRunner.java
index af66f14..098e2fd 100644
--- 
a/hadoop-tools/hadoop-archive-logs/src/test/java/org/apache/hadoop/tools/TestHadoopArchiveLogsRunner.java
+++ 
b/hadoop-tools/hadoop-archive-logs/src/test/java/org/apache/hadoop/tools/TestHadoopArchiveLogsRunner.java
@@ -24,7 +24,10 @@ import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.HarFs;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.permission.FsAction;
+import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.util.ToolRunner;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
@@ -47,7 +50,7 @@ public class TestHadoopArchiveLogsRunner {
     new Random().nextBytes(DUMMY_DATA);
   }
 
-  @Test(timeout = 30000)
+  @Test(timeout = 50000)
   public void testHadoopArchiveLogs() throws Exception {
     MiniYARNCluster yarnCluster = null;
     MiniDFSCluster dfsCluster = null;
@@ -63,6 +66,7 @@ public class TestHadoopArchiveLogsRunner {
       yarnCluster.start();
       conf = yarnCluster.getConfig();
       dfsCluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
+      conf = new JobConf(conf);
 
       ApplicationId app1 =
           ApplicationId.newInstance(System.currentTimeMillis(), 1);
@@ -108,10 +112,25 @@ public class TestHadoopArchiveLogsRunner {
       });
       Assert.assertEquals("log1", harLogs[0].getPath().getName());
       Assert.assertEquals(3 * FILE_SIZE_INCREMENT, harLogs[0].getLen());
+      Assert.assertEquals(
+          new FsPermission(FsAction.READ_WRITE, FsAction.READ, FsAction.NONE),
+          harLogs[0].getPermission());
+      Assert.assertEquals(System.getProperty("user.name"),
+          harLogs[0].getOwner());
       Assert.assertEquals("log2", harLogs[1].getPath().getName());
       Assert.assertEquals(4 * FILE_SIZE_INCREMENT, harLogs[1].getLen());
+      Assert.assertEquals(
+          new FsPermission(FsAction.READ_WRITE, FsAction.READ, FsAction.NONE),
+          harLogs[1].getPermission());
+      Assert.assertEquals(System.getProperty("user.name"),
+          harLogs[1].getOwner());
       Assert.assertEquals("log3", harLogs[2].getPath().getName());
       Assert.assertEquals(2 * FILE_SIZE_INCREMENT, harLogs[2].getLen());
+      Assert.assertEquals(
+          new FsPermission(FsAction.READ_WRITE, FsAction.READ, FsAction.NONE),
+          harLogs[2].getPermission());
+      Assert.assertEquals(System.getProperty("user.name"),
+          harLogs[2].getOwner());
       Assert.assertEquals(0, fs.listStatus(workingDir).length);
     } finally {
       if (yarnCluster != null) {

Reply via email to