spark git commit: [SPARK-13148][YARN] document zero-keytab Oozie application launch; add diagnostics
Repository: spark Updated Branches: refs/heads/branch-2.0 9cf34727c -> 0cb69a918 [SPARK-13148][YARN] document zero-keytab Oozie application launch; add diagnostics This patch provides detail on what to do for keytabless Oozie launches of spark apps, and adds some debug-level diagnostics of what credentials have been submitted Author: Steve Loughran Author: Steve Loughran Closes #11033 from steveloughran/stevel/feature/SPARK-13148-oozie. (cherry picked from commit 01b350a4f7c17d6516b27b6cd27ba8390834d40c) Signed-off-by: Tom Graves Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0cb69a91 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0cb69a91 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0cb69a91 Branch: refs/heads/branch-2.0 Commit: 0cb69a918220304ec46c5c1f4032796fda0d449d Parents: 9cf3472 Author: Steve Loughran Authored: Thu May 26 13:55:22 2016 -0500 Committer: Tom Graves Committed: Thu May 26 13:56:04 2016 -0500 -- .../apache/spark/deploy/SparkHadoopUtil.scala | 51 ++- docs/running-on-yarn.md | 96 .../org/apache/spark/deploy/yarn/Client.scala | 3 + 3 files changed, 148 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/0cb69a91/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala -- diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala index 2e9e45a..7a5fc86 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala @@ -17,10 +17,11 @@ package org.apache.spark.deploy -import java.io.{ByteArrayInputStream, DataInputStream} +import java.io.{ByteArrayInputStream, DataInputStream, IOException} import java.lang.reflect.Method import java.security.PrivilegedExceptionAction -import java.util.{Arrays, Comparator} +import java.text.DateFormat +import java.util.{Arrays, Comparator, Date} import scala.collection.JavaConverters._ import scala.concurrent.duration._ @@ -34,6 +35,8 @@ import org.apache.hadoop.fs.FileSystem.Statistics import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier import org.apache.hadoop.mapred.JobConf import org.apache.hadoop.security.{Credentials, UserGroupInformation} +import org.apache.hadoop.security.token.{Token, TokenIdentifier} +import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenIdentifier import org.apache.spark.{SparkConf, SparkException} import org.apache.spark.annotation.DeveloperApi @@ -357,6 +360,50 @@ class SparkHadoopUtil extends Logging { newConf.setBoolean(confKey, true) newConf } + + /** + * Dump the credentials' tokens to string values. + * + * @param credentials credentials + * @return an iterator over the string values. If no credentials are passed in: an empty list + */ + private[spark] def dumpTokens(credentials: Credentials): Iterable[String] = { +if (credentials != null) { + credentials.getAllTokens.asScala.map(tokenToString) +} else { + Seq() +} + } + + /** + * Convert a token to a string for logging. + * If its an abstract delegation token, attempt to unmarshall it and then + * print more details, including timestamps in human-readable form. + * + * @param token token to convert to a string + * @return a printable string value. + */ + private[spark] def tokenToString(token: Token[_ <: TokenIdentifier]): String = { +val df = DateFormat.getDateTimeInstance(DateFormat.SHORT, DateFormat.SHORT) +val buffer = new StringBuilder(128) +buffer.append(token.toString) +try { + val ti = token.decodeIdentifier + buffer.append("; ").append(ti) + ti match { +case dt: AbstractDelegationTokenIdentifier => + // include human times and the renewer, which the HDFS tokens toString omits + buffer.append("; Renewer: ").append(dt.getRenewer) + buffer.append("; Issued: ").append(df.format(new Date(dt.getIssueDate))) + buffer.append("; Max Date: ").append(df.format(new Date(dt.getMaxDate))) +case _ => + } +} catch { + case e: IOException => +logDebug("Failed to decode $token: $e", e) +} +buffer.toString + } } object SparkHadoopUtil { http://git-wip-us.apache.org/repos/asf/spark/blob/0cb69a91/docs/running-on-yarn.md -- diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md index 3bd16bf..f2fbe3c 100644 --- a/docs/running-on-yarn.md +++ b/docs/running-on-yarn.md @@ -476,
spark git commit: [SPARK-13148][YARN] document zero-keytab Oozie application launch; add diagnostics
Repository: spark Updated Branches: refs/heads/master c76457c8e -> 01b350a4f [SPARK-13148][YARN] document zero-keytab Oozie application launch; add diagnostics This patch provides detail on what to do for keytabless Oozie launches of spark apps, and adds some debug-level diagnostics of what credentials have been submitted Author: Steve Loughran Author: Steve Loughran Closes #11033 from steveloughran/stevel/feature/SPARK-13148-oozie. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/01b350a4 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/01b350a4 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/01b350a4 Branch: refs/heads/master Commit: 01b350a4f7c17d6516b27b6cd27ba8390834d40c Parents: c76457c Author: Steve Loughran Authored: Thu May 26 13:55:22 2016 -0500 Committer: Tom Graves Committed: Thu May 26 13:55:22 2016 -0500 -- .../apache/spark/deploy/SparkHadoopUtil.scala | 51 ++- docs/running-on-yarn.md | 96 .../org/apache/spark/deploy/yarn/Client.scala | 3 + 3 files changed, 148 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/01b350a4/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala -- diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala index 2e9e45a..7a5fc86 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala @@ -17,10 +17,11 @@ package org.apache.spark.deploy -import java.io.{ByteArrayInputStream, DataInputStream} +import java.io.{ByteArrayInputStream, DataInputStream, IOException} import java.lang.reflect.Method import java.security.PrivilegedExceptionAction -import java.util.{Arrays, Comparator} +import java.text.DateFormat +import java.util.{Arrays, Comparator, Date} import scala.collection.JavaConverters._ import scala.concurrent.duration._ @@ -34,6 +35,8 @@ import org.apache.hadoop.fs.FileSystem.Statistics import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier import org.apache.hadoop.mapred.JobConf import org.apache.hadoop.security.{Credentials, UserGroupInformation} +import org.apache.hadoop.security.token.{Token, TokenIdentifier} +import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenIdentifier import org.apache.spark.{SparkConf, SparkException} import org.apache.spark.annotation.DeveloperApi @@ -357,6 +360,50 @@ class SparkHadoopUtil extends Logging { newConf.setBoolean(confKey, true) newConf } + + /** + * Dump the credentials' tokens to string values. + * + * @param credentials credentials + * @return an iterator over the string values. If no credentials are passed in: an empty list + */ + private[spark] def dumpTokens(credentials: Credentials): Iterable[String] = { +if (credentials != null) { + credentials.getAllTokens.asScala.map(tokenToString) +} else { + Seq() +} + } + + /** + * Convert a token to a string for logging. + * If its an abstract delegation token, attempt to unmarshall it and then + * print more details, including timestamps in human-readable form. + * + * @param token token to convert to a string + * @return a printable string value. + */ + private[spark] def tokenToString(token: Token[_ <: TokenIdentifier]): String = { +val df = DateFormat.getDateTimeInstance(DateFormat.SHORT, DateFormat.SHORT) +val buffer = new StringBuilder(128) +buffer.append(token.toString) +try { + val ti = token.decodeIdentifier + buffer.append("; ").append(ti) + ti match { +case dt: AbstractDelegationTokenIdentifier => + // include human times and the renewer, which the HDFS tokens toString omits + buffer.append("; Renewer: ").append(dt.getRenewer) + buffer.append("; Issued: ").append(df.format(new Date(dt.getIssueDate))) + buffer.append("; Max Date: ").append(df.format(new Date(dt.getMaxDate))) +case _ => + } +} catch { + case e: IOException => +logDebug("Failed to decode $token: $e", e) +} +buffer.toString + } } object SparkHadoopUtil { http://git-wip-us.apache.org/repos/asf/spark/blob/01b350a4/docs/running-on-yarn.md -- diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md index 3bd16bf..f2fbe3c 100644 --- a/docs/running-on-yarn.md +++ b/docs/running-on-yarn.md @@ -476,3 +476,99 @@ If you need a reference to the proper location to put log files in the YARN so t - In `clu