git commit: [SPARK-3788] [yarn] Fix compareFs to do the right thing for HDFS namespaces.
Repository: spark Updated Branches: refs/heads/master 35afdfd62 - 7fca8f41c [SPARK-3788] [yarn] Fix compareFs to do the right thing for HDFS namespaces. HA and viewfs use namespaces instead of host names, so you can't resolve them since that will fail. So be smarter to avoid doing unnecessary work. Author: Marcelo Vanzin van...@cloudera.com Closes #2649 from vanzin/SPARK-3788 and squashes the following commits: fedbc73 [Marcelo Vanzin] Update comment. c938845 [Marcelo Vanzin] Use Objects.equal() to avoid issues with ==. 9f7b571 [Marcelo Vanzin] [SPARK-3788] [yarn] Fix compareFs to do the right thing for HA, federation. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7fca8f41 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7fca8f41 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7fca8f41 Branch: refs/heads/master Commit: 7fca8f41c8889a41d9ab05ad0ab39c7639f657ed Parents: 35afdfd Author: Marcelo Vanzin van...@cloudera.com Authored: Wed Oct 8 08:48:55 2014 -0500 Committer: Thomas Graves tgra...@apache.org Committed: Wed Oct 8 08:48:55 2014 -0500 -- .../apache/spark/deploy/yarn/ClientBase.scala | 31 1 file changed, 12 insertions(+), 19 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/7fca8f41/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala -- diff --git a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala index 6ecac6e..14a0386 100644 --- a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala +++ b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala @@ -23,6 +23,7 @@ import scala.collection.JavaConversions._ import scala.collection.mutable.{HashMap, ListBuffer, Map} import scala.util.{Try, Success, Failure} +import com.google.common.base.Objects import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs._ import org.apache.hadoop.fs.permission.FsPermission @@ -64,12 +65,12 @@ private[spark] trait ClientBase extends Logging { smemory capability of the cluster ($maxMem MB per container)) val executorMem = args.executorMemory + executorMemoryOverhead if (executorMem maxMem) { - throw new IllegalArgumentException(sRequired executor memory (${args.executorMemory} + + throw new IllegalArgumentException(sRequired executor memory (${args.executorMemory} + s+$executorMemoryOverhead MB) is above the max threshold ($maxMem MB) of this cluster!) } val amMem = args.amMemory + amMemoryOverhead if (amMem maxMem) { - throw new IllegalArgumentException(sRequired AM memory (${args.amMemory} + + throw new IllegalArgumentException(sRequired AM memory (${args.amMemory} + s+$amMemoryOverhead MB) is above the max threshold ($maxMem MB) of this cluster!) } logInfo(Will allocate AM container, with %d MB memory including %d MB overhead.format( @@ -771,15 +772,17 @@ private[spark] object ClientBase extends Logging { private def compareFs(srcFs: FileSystem, destFs: FileSystem): Boolean = { val srcUri = srcFs.getUri() val dstUri = destFs.getUri() -if (srcUri.getScheme() == null) { - return false -} -if (!srcUri.getScheme().equals(dstUri.getScheme())) { +if (srcUri.getScheme() == null || srcUri.getScheme() != dstUri.getScheme()) { return false } + var srcHost = srcUri.getHost() var dstHost = dstUri.getHost() -if ((srcHost != null) (dstHost != null)) { + +// In HA or when using viewfs, the host part of the URI may not actually be a host, but the +// name of the HDFS namespace. Those names won't resolve, so avoid even trying if they +// match. +if (srcHost != null dstHost != null srcHost != dstHost) { try { srcHost = InetAddress.getByName(srcHost).getCanonicalHostName() dstHost = InetAddress.getByName(dstHost).getCanonicalHostName() @@ -787,19 +790,9 @@ private[spark] object ClientBase extends Logging { case e: UnknownHostException = return false } - if (!srcHost.equals(dstHost)) { -return false - } -} else if (srcHost == null dstHost != null) { - return false -} else if (srcHost != null dstHost == null) { - return false -} -if (srcUri.getPort() != dstUri.getPort()) { - false -} else { - true } + +Objects.equal(srcHost, dstHost) srcUri.getPort() == dstUri.getPort() } } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands,
git commit: [SPARK-3788] [yarn] Fix compareFs to do the right thing for HDFS namespaces (1.1 version).
Repository: spark Updated Branches: refs/heads/branch-1.1 a1f833f75 - a44af7302 [SPARK-3788] [yarn] Fix compareFs to do the right thing for HDFS namespaces (1.1 version). HA and viewfs use namespaces instead of host names, so you can't resolve them since that will fail. So be smarter to avoid doing unnecessary work. Author: Marcelo Vanzin van...@cloudera.com Closes #2650 from vanzin/SPARK-3788-1.1 and squashes the following commits: 174bf71 [Marcelo Vanzin] Update comment. 0e36be7 [Marcelo Vanzin] Use Objects.equal() instead of ==. 772aead [Marcelo Vanzin] [SPARK-3788] [yarn] Fix compareFs to do the right thing for HA, federation (1.1 version). Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a44af730 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a44af730 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a44af730 Branch: refs/heads/branch-1.1 Commit: a44af7302f814204fdbcc7ad620bc6984b376468 Parents: a1f833f Author: Marcelo Vanzin van...@cloudera.com Authored: Wed Oct 8 08:51:17 2014 -0500 Committer: Thomas Graves tgra...@apache.org Committed: Wed Oct 8 08:51:17 2014 -0500 -- .../apache/spark/deploy/yarn/ClientBase.scala | 29 1 file changed, 11 insertions(+), 18 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/a44af730/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala -- diff --git a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala index 6da3b16..27ee04a 100644 --- a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala +++ b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala @@ -24,6 +24,7 @@ import scala.collection.JavaConversions._ import scala.collection.mutable.{HashMap, ListBuffer, Map} import scala.util.{Try, Success, Failure} +import com.google.common.base.Objects import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs._ import org.apache.hadoop.fs.permission.FsPermission @@ -122,15 +123,17 @@ trait ClientBase extends Logging { private def compareFs(srcFs: FileSystem, destFs: FileSystem): Boolean = { val srcUri = srcFs.getUri() val dstUri = destFs.getUri() -if (srcUri.getScheme() == null) { - return false -} -if (!srcUri.getScheme().equals(dstUri.getScheme())) { +if (srcUri.getScheme() == null || srcUri.getScheme() != dstUri.getScheme()) { return false } + var srcHost = srcUri.getHost() var dstHost = dstUri.getHost() -if ((srcHost != null) (dstHost != null)) { + +// In HA or when using viewfs, the host part of the URI may not actually be a host, but the +// name of the HDFS namespace. Those names won't resolve, so avoid even trying if they +// match. +if (srcHost != null dstHost != null srcHost != dstHost) { try { srcHost = InetAddress.getByName(srcHost).getCanonicalHostName() dstHost = InetAddress.getByName(dstHost).getCanonicalHostName() @@ -138,19 +141,9 @@ trait ClientBase extends Logging { case e: UnknownHostException = return false } - if (!srcHost.equals(dstHost)) { -return false - } -} else if (srcHost == null dstHost != null) { - return false -} else if (srcHost != null dstHost == null) { - return false -} -if (srcUri.getPort() != dstUri.getPort()) { - false -} else { - true } + +Objects.equal(srcHost, dstHost) srcUri.getPort() == dstUri.getPort() } /** Copy the file into HDFS if needed. */ @@ -621,7 +614,7 @@ object ClientBase extends Logging { YarnSparkHadoopUtil.addToEnvironment(env, Environment.CLASSPATH.name, path, File.pathSeparator) - /** + /** * Get the list of namenodes the user may access. */ private[yarn] def getNameNodesToAccess(sparkConf: SparkConf): Set[Path] = { - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org