Repository: hadoop
Updated Branches:
  refs/heads/trunk 522ddbde7 -> 4aba85875


MAPREDUCE-6741. Add MR support to redact job conf properties. Contributed by 
Haibo Chen


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/4aba8587
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/4aba8587
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/4aba8587

Branch: refs/heads/trunk
Commit: 4aba858750900de25940c16211c21de4addd1926
Parents: 522ddbd
Author: Jason Lowe <jl...@apache.org>
Authored: Tue Aug 9 15:55:35 2016 +0000
Committer: Jason Lowe <jl...@apache.org>
Committed: Tue Aug 9 15:55:35 2016 +0000

----------------------------------------------------------------------
 .../jobhistory/JobHistoryEventHandler.java      | 19 +++---
 .../mapreduce/v2/app/webapp/dao/ConfInfo.java   |  4 +-
 .../jobhistory/TestJobHistoryEventHandler.java  | 71 ++++++++++++++++++++
 .../mapreduce/v2/app/webapp/TestBlocks.java     | 10 ++-
 .../apache/hadoop/mapreduce/MRJobConfig.java    |  5 ++
 .../hadoop/mapreduce/util/MRJobConfUtil.java    | 45 +++++++++++++
 .../src/main/resources/mapred-default.xml       |  8 +++
 7 files changed, 149 insertions(+), 13 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/4aba8587/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/jobhistory/JobHistoryEventHandler.java
----------------------------------------------------------------------
diff --git 
a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/jobhistory/JobHistoryEventHandler.java
 
b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/jobhistory/JobHistoryEventHandler.java
index 817cd14..53d8a36 100644
--- 
a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/jobhistory/JobHistoryEventHandler.java
+++ 
b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/jobhistory/JobHistoryEventHandler.java
@@ -52,6 +52,7 @@ import org.apache.hadoop.mapreduce.MRJobConfig;
 import org.apache.hadoop.mapreduce.TaskType;
 import org.apache.hadoop.mapreduce.TypeConverter;
 import org.apache.hadoop.mapreduce.util.JobHistoryEventUtils;
+import org.apache.hadoop.mapreduce.util.MRJobConfUtil;
 import org.apache.hadoop.mapreduce.v2.api.records.JobId;
 import org.apache.hadoop.mapreduce.v2.api.records.JobState;
 import org.apache.hadoop.mapreduce.v2.app.AppContext;
@@ -507,16 +508,16 @@ public class JobHistoryEventHandler extends 
AbstractService
       if (conf != null) {
         // TODO Ideally this should be written out to the job dir
         // (.staging/jobid/files - RecoveryService will need to be patched)
-        FSDataOutputStream jobFileOut = null;
-        try {
-          if (logDirConfPath != null) {
-            jobFileOut = stagingDirFS.create(logDirConfPath, true);
-            conf.writeXml(jobFileOut);
-            jobFileOut.close();
+        if (logDirConfPath != null) {
+          Configuration redactedConf = new Configuration(conf);
+          MRJobConfUtil.redact(redactedConf);
+          try (FSDataOutputStream jobFileOut = stagingDirFS
+              .create(logDirConfPath, true)) {
+            redactedConf.writeXml(jobFileOut);
+          } catch (IOException e) {
+            LOG.info("Failed to write the job configuration file", e);
+            throw e;
           }
-        } catch (IOException e) {
-          LOG.info("Failed to write the job configuration file", e);
-          throw e;
         }
       }
     }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/4aba8587/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/dao/ConfInfo.java
----------------------------------------------------------------------
diff --git 
a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/dao/ConfInfo.java
 
b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/dao/ConfInfo.java
index a05c317..287ab99 100644
--- 
a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/dao/ConfInfo.java
+++ 
b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/dao/ConfInfo.java
@@ -26,8 +26,7 @@ import javax.xml.bind.annotation.XmlAccessorType;
 import javax.xml.bind.annotation.XmlRootElement;
 
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileContext;
-import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapreduce.util.MRJobConfUtil;
 import org.apache.hadoop.mapreduce.v2.app.job.Job;
 
 @XmlRootElement(name = "conf")
@@ -45,6 +44,7 @@ public class ConfInfo {
     this.property = new ArrayList<ConfEntryInfo>();
     Configuration jobConf = job.loadConfFile();
     this.path = job.getConfFile().toString();
+    MRJobConfUtil.redact(jobConf);
     for (Map.Entry<String, String> entry : jobConf) {
       this.property.add(new ConfEntryInfo(entry.getKey(), entry.getValue(), 
           jobConf.getPropertySources(entry.getKey())));

http://git-wip-us.apache.org/repos/asf/hadoop/blob/4aba8587/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/jobhistory/TestJobHistoryEventHandler.java
----------------------------------------------------------------------
diff --git 
a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/jobhistory/TestJobHistoryEventHandler.java
 
b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/jobhistory/TestJobHistoryEventHandler.java
index 064f9ec..bfe764d 100644
--- 
a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/jobhistory/TestJobHistoryEventHandler.java
+++ 
b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/jobhistory/TestJobHistoryEventHandler.java
@@ -29,6 +29,7 @@ import static org.mockito.Mockito.when;
 
 import java.io.File;
 import java.io.FileOutputStream;
+import java.io.InputStream;
 import java.io.IOException;
 import java.util.HashMap;
 
@@ -52,6 +53,7 @@ import org.apache.hadoop.mapreduce.TaskID;
 import org.apache.hadoop.mapreduce.TaskType;
 import org.apache.hadoop.mapreduce.TypeConverter;
 import org.apache.hadoop.mapreduce.util.JobHistoryEventUtils;
+import org.apache.hadoop.mapreduce.util.MRJobConfUtil;
 import org.apache.hadoop.mapreduce.v2.api.records.JobId;
 import org.apache.hadoop.mapreduce.v2.app.AppContext;
 import org.apache.hadoop.mapreduce.v2.app.MRAppMaster.RunningAppContext;
@@ -372,6 +374,74 @@ public class TestJobHistoryEventHandler {
     }
   }
 
+  @Test
+  public void testPropertyRedactionForJHS() throws Exception {
+    final Configuration conf = new Configuration();
+
+    String sensitivePropertyName = "aws.fake.credentials.name";
+    String sensitivePropertyValue = "aws.fake.credentials.val";
+    conf.set(sensitivePropertyName, sensitivePropertyValue);
+    conf.set(MRJobConfig.MR_JOB_REDACTED_PROPERTIES,
+        sensitivePropertyName);
+    conf.set(CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY,
+        dfsCluster.getURI().toString());
+    final TestParams params = new TestParams();
+    conf.set(MRJobConfig.MR_AM_STAGING_DIR, params.dfsWorkDir);
+
+    final JHEvenHandlerForTest jheh =
+        new JHEvenHandlerForTest(params.mockAppContext, 0, false);
+
+    try {
+      jheh.init(conf);
+      jheh.start();
+      handleEvent(jheh, new JobHistoryEvent(params.jobId,
+          new AMStartedEvent(params.appAttemptId, 200, params.containerId,
+              "nmhost", 3000, 4000, -1)));
+      handleEvent(jheh, new JobHistoryEvent(params.jobId,
+          new JobUnsuccessfulCompletionEvent(TypeConverter.fromYarn(
+              params.jobId), 0, 0, 0, JobStateInternal.FAILED.toString())));
+
+      // verify the value of the sensitive property in job.xml is restored.
+      Assert.assertEquals(sensitivePropertyName + " is modified.",
+          conf.get(sensitivePropertyName), sensitivePropertyValue);
+
+      // load the job_conf.xml in JHS directory and verify property redaction.
+      Path jhsJobConfFile = getJobConfInIntermediateDoneDir(conf, 
params.jobId);
+      Assert.assertTrue("The job_conf.xml file is not in the JHS directory",
+          FileContext.getFileContext(conf).util().exists(jhsJobConfFile));
+      Configuration jhsJobConf = new Configuration();
+
+      try (InputStream input = FileSystem.get(conf).open(jhsJobConfFile)) {
+        jhsJobConf.addResource(input);
+        Assert.assertEquals(
+            sensitivePropertyName + " is not redacted in HDFS.",
+            MRJobConfUtil.REDACTION_REPLACEMENT_VAL,
+            jhsJobConf.get(sensitivePropertyName));
+      }
+    } finally {
+      jheh.stop();
+      purgeHdfsHistoryIntermediateDoneDirectory(conf);
+    }
+  }
+
+  private static Path getJobConfInIntermediateDoneDir(Configuration conf,
+      JobId jobId) throws IOException {
+    Path userDoneDir = new Path(
+        JobHistoryUtils.getHistoryIntermediateDoneDirForUser(conf));
+    Path doneDirPrefix =
+        FileContext.getFileContext(conf).makeQualified(userDoneDir);
+    return new Path(
+        doneDirPrefix, JobHistoryUtils.getIntermediateConfFileName(jobId));
+  }
+
+  private void purgeHdfsHistoryIntermediateDoneDirectory(Configuration conf)
+      throws IOException {
+    FileSystem fs = FileSystem.get(dfsCluster.getConfiguration(0));
+    String intermDoneDirPrefix =
+        JobHistoryUtils.getConfiguredHistoryIntermediateDoneDirPrefix(conf);
+    fs.delete(new Path(intermDoneDirPrefix), true);
+  }
+
   @Test (timeout=50000)
   public void testDefaultFsIsUsedForHistory() throws Exception {
     // Create default configuration pointing to the minicluster
@@ -413,6 +483,7 @@ public class TestJobHistoryEventHandler {
           localFileSystem.exists(new Path(t.dfsWorkDir)));
     } finally {
       jheh.stop();
+      purgeHdfsHistoryIntermediateDoneDirectory(conf);
     }
   }
 

http://git-wip-us.apache.org/repos/asf/hadoop/blob/4aba8587/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/webapp/TestBlocks.java
----------------------------------------------------------------------
diff --git 
a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/webapp/TestBlocks.java
 
b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/webapp/TestBlocks.java
index 0e250f4..82b8a37 100644
--- 
a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/webapp/TestBlocks.java
+++ 
b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/webapp/TestBlocks.java
@@ -23,6 +23,8 @@ import java.io.PrintWriter;
 import java.util.HashMap;
 import java.util.Map;
 
+import org.apache.hadoop.mapreduce.MRJobConfig;
+import org.apache.hadoop.mapreduce.util.MRJobConfUtil;
 import org.apache.hadoop.yarn.webapp.View;
 import org.junit.Test;
 import org.apache.hadoop.conf.Configuration;
@@ -65,6 +67,9 @@ public class TestBlocks {
     Path path = new Path("conf");
     Configuration configuration = new Configuration();
     configuration.set("Key for test", "Value for test");
+    final String redactedProp = "Key for redaction";
+    configuration.set(MRJobConfig.MR_JOB_REDACTED_PROPERTIES,
+        redactedProp);
     when(job.getConfFile()).thenReturn(path);
     when(job.loadConfFile()).thenReturn(configuration);
 
@@ -85,9 +90,10 @@ public class TestBlocks {
     configurationBlock.render(html);
     pWriter.flush();
     assertTrue(data.toString().contains("Key for test"));
-
     assertTrue(data.toString().contains("Value for test"));
-
+    assertTrue(data.toString().contains(redactedProp));
+    assertTrue(data.toString().contains(
+        MRJobConfUtil.REDACTION_REPLACEMENT_VAL));
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/hadoop/blob/4aba8587/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java
----------------------------------------------------------------------
diff --git 
a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java
 
b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java
index be47bfc..53e4b38 100644
--- 
a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java
+++ 
b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java
@@ -977,4 +977,9 @@ public interface MRJobConfig {
   public static final String MR_NUM_OPPORTUNISTIC_MAPS_PER_100 =
       "mapreduce.job.num-opportunistic-maps-per-100";
   public static final int DEFAULT_MR_NUM_OPPORTUNISTIC_MAPS_PER_100 = 0;
+
+  /**
+   * A comma-separated list of properties whose value will be redacted.
+   */
+  String MR_JOB_REDACTED_PROPERTIES = "mapreduce.job.redacted-properties";
 }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/4aba8587/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/util/MRJobConfUtil.java
----------------------------------------------------------------------
diff --git 
a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/util/MRJobConfUtil.java
 
b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/util/MRJobConfUtil.java
new file mode 100644
index 0000000..11d49a4
--- /dev/null
+++ 
b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/util/MRJobConfUtil.java
@@ -0,0 +1,45 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.mapreduce.util;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.mapreduce.MRJobConfig;
+
+/**
+ * A class that contains utility methods for MR Job configuration.
+ */
+public final class MRJobConfUtil {
+  public static final String REDACTION_REPLACEMENT_VAL = "*********(redacted)";
+
+  /**
+   * Redact job configuration properties.
+   * @param conf the job configuration to redact
+   */
+  public static void redact(final Configuration conf) {
+    for (String prop : conf.getTrimmedStringCollection(
+        MRJobConfig.MR_JOB_REDACTED_PROPERTIES)) {
+      conf.set(prop, REDACTION_REPLACEMENT_VAL);
+    }
+  }
+
+  /**
+   * There is no reason to instantiate this utility class.
+   */
+  private MRJobConfUtil() {
+  }
+}

http://git-wip-us.apache.org/repos/asf/hadoop/blob/4aba8587/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml
----------------------------------------------------------------------
diff --git 
a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml
 
b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml
index 1741d88..8ba0ec9 100644
--- 
a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml
+++ 
b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml
@@ -1932,4 +1932,12 @@
   <name>mapreduce.jobhistory.loadedjob.tasks.max</name>
   <value>-1</value>
 </property>
+
+<property>
+  <description>
+    The list of job configuration properties whose value will be redacted.
+  </description>
+  <name>mapreduce.job.redacted-properties</name>
+  <value></value>
+</property>
 </configuration>


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-commits-h...@hadoop.apache.org

Reply via email to