AMBARI-22644 - Node Managers fail to start after Spark2 is patched due to CNF YarnShuffleService (jonathanhurley)
Project: http://git-wip-us.apache.org/repos/asf/ambari/repo Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/1d87b21c Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/1d87b21c Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/1d87b21c Branch: refs/heads/trunk Commit: 1d87b21cf34c51bc19f80199e2b8641474fe098a Parents: eab6722 Author: Jonathan Hurley <jhur...@hortonworks.com> Authored: Wed Dec 13 12:33:26 2017 -0500 Committer: Jonathan Hurley <jhur...@hortonworks.com> Committed: Mon Dec 18 15:10:43 2017 -0500 ---------------------------------------------------------------------- .../2.1.0.2.0/package/scripts/params_linux.py | 19 ++++++++++++------- .../3.0.0.3.0/package/scripts/params_linux.py | 20 ++++++++++++-------- .../services/YARN/configuration/yarn-site.xml | 4 ++-- .../stacks/HDP/2.6/upgrades/config-upgrade.xml | 2 -- .../services/YARN/configuration/yarn-site.xml | 4 ++-- 5 files changed, 28 insertions(+), 21 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/ambari/blob/1d87b21c/ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/package/scripts/params_linux.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/package/scripts/params_linux.py b/ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/package/scripts/params_linux.py index 9997779..4a49822 100644 --- a/ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/package/scripts/params_linux.py +++ b/ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/package/scripts/params_linux.py @@ -89,16 +89,20 @@ def get_spark_version(service_name, component_name, yarn_version): """ Attempts to calculate the correct version placeholder value for spark or spark2 based on what is installed in the cluster. If Spark is not installed, then this value will need to be - that of YARN so it can still find the correct shuffle class. + that of YARN so it can still find the correct spark class. On cluster installs, we have not yet calcualted any versions and all known values could be None. This doesn't affect daemons, but it does affect client-only hosts where they will never receive a start command after install. Therefore, this function will attempt to use stack-select as a last resort to get a value value. + + ATS needs this since it relies on packages installed by Spark. Some classes, like the shuffle + classes, are not provided by spark, but by a dependent RPM to YARN, so they do not use this + value. :param service_name: the service name (SPARK, SPARK2, etc) :param component_name: the component name (SPARK_CLIENT, etc) :param yarn_version: the default version of Yarn to use if no spark is installed - :return: a value for the version placeholder in shuffle classpath properties + :return: a value for the version placeholder in spark classpath properties """ # start off seeing if we need to populate a default value for YARN if yarn_version is None: @@ -106,18 +110,19 @@ def get_spark_version(service_name, component_name, yarn_version): component_name = "YARN_CLIENT") # now try to get the version of spark/spark2, defaulting to the version if YARN - shuffle_classpath_version = component_version.get_component_repository_version(service_name = service_name, + spark_classpath_version = component_version.get_component_repository_version(service_name = service_name, component_name = component_name, default_value = yarn_version) # even with the default of using YARN's version, on an install this might be None since we haven't # calculated the version of YARN yet - use stack_select as a last ditch effort - if shuffle_classpath_version is None: + if spark_classpath_version is None: try: - shuffle_classpath_version = stack_select.get_role_component_current_stack_version() + spark_classpath_version = stack_select.get_role_component_current_stack_version() except: - Logger.exception("Unable to query for the correct shuffle classpath") + Logger.exception("Unable to query for the correct spark version to use when building classpaths") + + return spark_classpath_version - return shuffle_classpath_version # these are used to render the classpath for picking up Spark classes # in the event that spark is not installed, then we must default to the vesrion of YARN installed http://git-wip-us.apache.org/repos/asf/ambari/blob/1d87b21c/ambari-server/src/main/resources/common-services/YARN/3.0.0.3.0/package/scripts/params_linux.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/common-services/YARN/3.0.0.3.0/package/scripts/params_linux.py b/ambari-server/src/main/resources/common-services/YARN/3.0.0.3.0/package/scripts/params_linux.py index 8e9632c..9afd112 100644 --- a/ambari-server/src/main/resources/common-services/YARN/3.0.0.3.0/package/scripts/params_linux.py +++ b/ambari-server/src/main/resources/common-services/YARN/3.0.0.3.0/package/scripts/params_linux.py @@ -89,16 +89,20 @@ def get_spark_version(service_name, component_name, yarn_version): """ Attempts to calculate the correct version placeholder value for spark or spark2 based on what is installed in the cluster. If Spark is not installed, then this value will need to be - that of YARN so it can still find the correct shuffle class. + that of YARN so it can still find the correct spark class. On cluster installs, we have not yet calcualted any versions and all known values could be None. This doesn't affect daemons, but it does affect client-only hosts where they will never receive a start command after install. Therefore, this function will attempt to use stack-select as a last resort to get a value value. + + ATS needs this since it relies on packages installed by Spark. Some classes, like the shuffle + classes, are not provided by spark, but by a dependent RPM to YARN, so they do not use this + value. :param service_name: the service name (SPARK, SPARK2, etc) :param component_name: the component name (SPARK_CLIENT, etc) :param yarn_version: the default version of Yarn to use if no spark is installed - :return: a value for the version placeholder in shuffle classpath properties + :return: a value for the version placeholder in spark classpath properties """ # start off seeing if we need to populate a default value for YARN if yarn_version is None: @@ -106,18 +110,19 @@ def get_spark_version(service_name, component_name, yarn_version): component_name = "YARN_CLIENT") # now try to get the version of spark/spark2, defaulting to the version if YARN - shuffle_classpath_version = component_version.get_component_repository_version(service_name = service_name, + spark_classpath_version = component_version.get_component_repository_version(service_name = service_name, component_name = component_name, default_value = yarn_version) # even with the default of using YARN's version, on an install this might be None since we haven't # calculated the version of YARN yet - use stack_select as a last ditch effort - if shuffle_classpath_version is None: + if spark_classpath_version is None: try: - shuffle_classpath_version = stack_select.get_role_component_current_stack_version() + spark_classpath_version = stack_select.get_role_component_current_stack_version() except: - Logger.exception("Unable to query for the correct shuffle classpath") + Logger.exception("Unable to query for the correct spark version to use when building classpaths") + + return spark_classpath_version - return shuffle_classpath_version # these are used to render the classpath for picking up Spark classes # in the event that spark is not installed, then we must default to the vesrion of YARN installed @@ -317,7 +322,6 @@ if include_file_path and manage_include_files: ats_host = set(default("/clusterHostInfo/app_timeline_server_hosts", [])) has_ats = not len(ats_host) == 0 - # don't using len(nm_hosts) here, because check can take too much time on large clusters number_of_nm = 1 http://git-wip-us.apache.org/repos/asf/ambari/blob/1d87b21c/ambari-server/src/main/resources/stacks/HDP/2.5/services/YARN/configuration/yarn-site.xml ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/HDP/2.5/services/YARN/configuration/yarn-site.xml b/ambari-server/src/main/resources/stacks/HDP/2.5/services/YARN/configuration/yarn-site.xml index 29833fb..b6fadcb 100644 --- a/ambari-server/src/main/resources/stacks/HDP/2.5/services/YARN/configuration/yarn-site.xml +++ b/ambari-server/src/main/resources/stacks/HDP/2.5/services/YARN/configuration/yarn-site.xml @@ -31,13 +31,13 @@ </property> <property> <name>yarn.nodemanager.aux-services.spark_shuffle.classpath</name> - <value>{{stack_root}}/{{spark_version}}/spark/aux/*</value> + <value>{{stack_root}}/${hdp.version}/spark/aux/*</value> <description>The auxiliary service classpath to use for Spark</description> <on-ambari-upgrade add="false"/> </property> <property> <name>yarn.nodemanager.aux-services.spark2_shuffle.classpath</name> - <value>{{stack_root}}/{{spark2_version}}/spark2/aux/*</value> + <value>{{stack_root}}/${hdp.version}/spark2/aux/*</value> <description>The auxiliary service classpath to use for Spark 2</description> <on-ambari-upgrade add="false"/> </property> http://git-wip-us.apache.org/repos/asf/ambari/blob/1d87b21c/ambari-server/src/main/resources/stacks/HDP/2.6/upgrades/config-upgrade.xml ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/HDP/2.6/upgrades/config-upgrade.xml b/ambari-server/src/main/resources/stacks/HDP/2.6/upgrades/config-upgrade.xml index 9c3d0dd..d8f71cd 100644 --- a/ambari-server/src/main/resources/stacks/HDP/2.6/upgrades/config-upgrade.xml +++ b/ambari-server/src/main/resources/stacks/HDP/2.6/upgrades/config-upgrade.xml @@ -183,8 +183,6 @@ <changes> <definition xsi:type="configure" id="hdp_2_6_yarn_spark_versioned_classpaths"> <type>yarn-site</type> - <set key="yarn.nodemanager.aux-services.spark_shuffle.classpath" value="{{stack_root}}/{{spark_version}}/spark/aux/*"/> - <set key="yarn.nodemanager.aux-services.spark2_shuffle.classpath" value="{{stack_root}}/{{spark2_version}}/spark2/aux/*"/> <replace key="yarn.timeline-service.entity-group-fs-store.group-id-plugin-classpath" find="${hdp.version}/spark/" replace-with="{{spark_version}}/spark/"/> </definition> </changes> http://git-wip-us.apache.org/repos/asf/ambari/blob/1d87b21c/ambari-server/src/main/resources/stacks/HDP/3.0/services/YARN/configuration/yarn-site.xml ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/HDP/3.0/services/YARN/configuration/yarn-site.xml b/ambari-server/src/main/resources/stacks/HDP/3.0/services/YARN/configuration/yarn-site.xml index d32e2de..01551c4 100644 --- a/ambari-server/src/main/resources/stacks/HDP/3.0/services/YARN/configuration/yarn-site.xml +++ b/ambari-server/src/main/resources/stacks/HDP/3.0/services/YARN/configuration/yarn-site.xml @@ -20,7 +20,7 @@ <property> <name>yarn.nodemanager.aux-services.spark_shuffle.classpath</name> - <value>{{stack_root}}/{{spark_version}}/spark/aux/*</value> + <value>{{stack_root}}/${hdp.version}/spark/aux/*</value> <description>The auxiliary service classpath to use for Spark</description> <on-ambari-upgrade add="false"/> </property> @@ -41,7 +41,7 @@ <!-- These configs were inherited from HDP 2.5 --> <property> <name>yarn.nodemanager.aux-services.spark2_shuffle.classpath</name> - <value>{{stack_root}}/{{spark2_version}}/spark2/aux/*</value> + <value>{{stack_root}}/${hdp.version}/spark2/aux/*</value> <description>The auxiliary service classpath to use for Spark 2</description> <on-ambari-upgrade add="false"/> </property>