spark git commit: [SPARK-13148][YARN] document zero-keytab Oozie application launch; add diagnostics

2016-05-26 Thread tgraves
Repository: spark
Updated Branches:
  refs/heads/branch-2.0 9cf34727c -> 0cb69a918


[SPARK-13148][YARN] document zero-keytab Oozie application launch; add 
diagnostics

This patch provides detail on what to do for keytabless Oozie launches of spark 
apps, and adds some debug-level diagnostics of what credentials have been 
submitted

Author: Steve Loughran 
Author: Steve Loughran 

Closes #11033 from steveloughran/stevel/feature/SPARK-13148-oozie.

(cherry picked from commit 01b350a4f7c17d6516b27b6cd27ba8390834d40c)
Signed-off-by: Tom Graves 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0cb69a91
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0cb69a91
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0cb69a91

Branch: refs/heads/branch-2.0
Commit: 0cb69a918220304ec46c5c1f4032796fda0d449d
Parents: 9cf3472
Author: Steve Loughran 
Authored: Thu May 26 13:55:22 2016 -0500
Committer: Tom Graves 
Committed: Thu May 26 13:56:04 2016 -0500

--
 .../apache/spark/deploy/SparkHadoopUtil.scala   | 51 ++-
 docs/running-on-yarn.md | 96 
 .../org/apache/spark/deploy/yarn/Client.scala   |  3 +
 3 files changed, 148 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/0cb69a91/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
--
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala 
b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
index 2e9e45a..7a5fc86 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
@@ -17,10 +17,11 @@
 
 package org.apache.spark.deploy
 
-import java.io.{ByteArrayInputStream, DataInputStream}
+import java.io.{ByteArrayInputStream, DataInputStream, IOException}
 import java.lang.reflect.Method
 import java.security.PrivilegedExceptionAction
-import java.util.{Arrays, Comparator}
+import java.text.DateFormat
+import java.util.{Arrays, Comparator, Date}
 
 import scala.collection.JavaConverters._
 import scala.concurrent.duration._
@@ -34,6 +35,8 @@ import org.apache.hadoop.fs.FileSystem.Statistics
 import 
org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier
 import org.apache.hadoop.mapred.JobConf
 import org.apache.hadoop.security.{Credentials, UserGroupInformation}
+import org.apache.hadoop.security.token.{Token, TokenIdentifier}
+import 
org.apache.hadoop.security.token.delegation.AbstractDelegationTokenIdentifier
 
 import org.apache.spark.{SparkConf, SparkException}
 import org.apache.spark.annotation.DeveloperApi
@@ -357,6 +360,50 @@ class SparkHadoopUtil extends Logging {
 newConf.setBoolean(confKey, true)
 newConf
   }
+
+  /**
+   * Dump the credentials' tokens to string values.
+   *
+   * @param credentials credentials
+   * @return an iterator over the string values. If no credentials are passed 
in: an empty list
+   */
+  private[spark] def dumpTokens(credentials: Credentials): Iterable[String] = {
+if (credentials != null) {
+  credentials.getAllTokens.asScala.map(tokenToString)
+} else {
+  Seq()
+}
+  }
+
+  /**
+   * Convert a token to a string for logging.
+   * If its an abstract delegation token, attempt to unmarshall it and then
+   * print more details, including timestamps in human-readable form.
+   *
+   * @param token token to convert to a string
+   * @return a printable string value.
+   */
+  private[spark] def tokenToString(token: Token[_ <: TokenIdentifier]): String 
= {
+val df = DateFormat.getDateTimeInstance(DateFormat.SHORT, DateFormat.SHORT)
+val buffer = new StringBuilder(128)
+buffer.append(token.toString)
+try {
+  val ti = token.decodeIdentifier
+  buffer.append("; ").append(ti)
+  ti match {
+case dt: AbstractDelegationTokenIdentifier =>
+  // include human times and the renewer, which the HDFS tokens 
toString omits
+  buffer.append("; Renewer: ").append(dt.getRenewer)
+  buffer.append("; Issued: ").append(df.format(new 
Date(dt.getIssueDate)))
+  buffer.append("; Max Date: ").append(df.format(new 
Date(dt.getMaxDate)))
+case _ =>
+  }
+} catch {
+  case e: IOException =>
+logDebug("Failed to decode $token: $e", e)
+}
+buffer.toString
+  }
 }
 
 object SparkHadoopUtil {

http://git-wip-us.apache.org/repos/asf/spark/blob/0cb69a91/docs/running-on-yarn.md
--
diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md
index 3bd16bf..f2fbe3c 100644
--- a/docs/running-on-yarn.md
+++ b/docs/running-on-yarn.md
@@ -476,

spark git commit: [SPARK-13148][YARN] document zero-keytab Oozie application launch; add diagnostics

2016-05-26 Thread tgraves
Repository: spark
Updated Branches:
  refs/heads/master c76457c8e -> 01b350a4f


[SPARK-13148][YARN] document zero-keytab Oozie application launch; add 
diagnostics

This patch provides detail on what to do for keytabless Oozie launches of spark 
apps, and adds some debug-level diagnostics of what credentials have been 
submitted

Author: Steve Loughran 
Author: Steve Loughran 

Closes #11033 from steveloughran/stevel/feature/SPARK-13148-oozie.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/01b350a4
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/01b350a4
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/01b350a4

Branch: refs/heads/master
Commit: 01b350a4f7c17d6516b27b6cd27ba8390834d40c
Parents: c76457c
Author: Steve Loughran 
Authored: Thu May 26 13:55:22 2016 -0500
Committer: Tom Graves 
Committed: Thu May 26 13:55:22 2016 -0500

--
 .../apache/spark/deploy/SparkHadoopUtil.scala   | 51 ++-
 docs/running-on-yarn.md | 96 
 .../org/apache/spark/deploy/yarn/Client.scala   |  3 +
 3 files changed, 148 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/01b350a4/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
--
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala 
b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
index 2e9e45a..7a5fc86 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
@@ -17,10 +17,11 @@
 
 package org.apache.spark.deploy
 
-import java.io.{ByteArrayInputStream, DataInputStream}
+import java.io.{ByteArrayInputStream, DataInputStream, IOException}
 import java.lang.reflect.Method
 import java.security.PrivilegedExceptionAction
-import java.util.{Arrays, Comparator}
+import java.text.DateFormat
+import java.util.{Arrays, Comparator, Date}
 
 import scala.collection.JavaConverters._
 import scala.concurrent.duration._
@@ -34,6 +35,8 @@ import org.apache.hadoop.fs.FileSystem.Statistics
 import 
org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier
 import org.apache.hadoop.mapred.JobConf
 import org.apache.hadoop.security.{Credentials, UserGroupInformation}
+import org.apache.hadoop.security.token.{Token, TokenIdentifier}
+import 
org.apache.hadoop.security.token.delegation.AbstractDelegationTokenIdentifier
 
 import org.apache.spark.{SparkConf, SparkException}
 import org.apache.spark.annotation.DeveloperApi
@@ -357,6 +360,50 @@ class SparkHadoopUtil extends Logging {
 newConf.setBoolean(confKey, true)
 newConf
   }
+
+  /**
+   * Dump the credentials' tokens to string values.
+   *
+   * @param credentials credentials
+   * @return an iterator over the string values. If no credentials are passed 
in: an empty list
+   */
+  private[spark] def dumpTokens(credentials: Credentials): Iterable[String] = {
+if (credentials != null) {
+  credentials.getAllTokens.asScala.map(tokenToString)
+} else {
+  Seq()
+}
+  }
+
+  /**
+   * Convert a token to a string for logging.
+   * If its an abstract delegation token, attempt to unmarshall it and then
+   * print more details, including timestamps in human-readable form.
+   *
+   * @param token token to convert to a string
+   * @return a printable string value.
+   */
+  private[spark] def tokenToString(token: Token[_ <: TokenIdentifier]): String 
= {
+val df = DateFormat.getDateTimeInstance(DateFormat.SHORT, DateFormat.SHORT)
+val buffer = new StringBuilder(128)
+buffer.append(token.toString)
+try {
+  val ti = token.decodeIdentifier
+  buffer.append("; ").append(ti)
+  ti match {
+case dt: AbstractDelegationTokenIdentifier =>
+  // include human times and the renewer, which the HDFS tokens 
toString omits
+  buffer.append("; Renewer: ").append(dt.getRenewer)
+  buffer.append("; Issued: ").append(df.format(new 
Date(dt.getIssueDate)))
+  buffer.append("; Max Date: ").append(df.format(new 
Date(dt.getMaxDate)))
+case _ =>
+  }
+} catch {
+  case e: IOException =>
+logDebug("Failed to decode $token: $e", e)
+}
+buffer.toString
+  }
 }
 
 object SparkHadoopUtil {

http://git-wip-us.apache.org/repos/asf/spark/blob/01b350a4/docs/running-on-yarn.md
--
diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md
index 3bd16bf..f2fbe3c 100644
--- a/docs/running-on-yarn.md
+++ b/docs/running-on-yarn.md
@@ -476,3 +476,99 @@ If you need a reference to the proper location to put log 
files in the YARN so t
 - In `clu