AMBARI-20233. [Yarn 3.0] "refresh yarn queue" takes 20 minutes to fail (aonishuk)
Project: http://git-wip-us.apache.org/repos/asf/ambari/repo Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/d5efb772 Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/d5efb772 Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/d5efb772 Branch: refs/heads/branch-feature-AMBARI-12556 Commit: d5efb77276443257ef49baaed1994e2204c623bd Parents: 5cf5e31 Author: Andrew Onishuk <aonis...@hortonworks.com> Authored: Tue Feb 28 15:07:11 2017 +0200 Committer: Andrew Onishuk <aonis...@hortonworks.com> Committed: Tue Feb 28 15:07:11 2017 +0200 ---------------------------------------------------------------------- .../YARN/3.0.0.3.0/package/scripts/service.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/ambari/blob/d5efb772/ambari-server/src/main/resources/common-services/YARN/3.0.0.3.0/package/scripts/service.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/common-services/YARN/3.0.0.3.0/package/scripts/service.py b/ambari-server/src/main/resources/common-services/YARN/3.0.0.3.0/package/scripts/service.py index e0d6475..1c612ef 100644 --- a/ambari-server/src/main/resources/common-services/YARN/3.0.0.3.0/package/scripts/service.py +++ b/ambari-server/src/main/resources/common-services/YARN/3.0.0.3.0/package/scripts/service.py @@ -25,6 +25,7 @@ from resource_management.core.shell import as_user, as_sudo from resource_management.libraries.functions.show_logs import show_logs from resource_management.libraries.functions.format import format from resource_management.core.resources.system import Execute, File +from resource_management.core.signal_utils import TerminateStrategy @OsFamilyFuncImpl(os_family=OSConst.WINSRV_FAMILY) def service(componentName, action='start', serviceName='yarn'): @@ -103,4 +104,10 @@ def service(componentName, action='start', serviceName='yarn'): elif action == 'refreshQueues': rm_kinit_cmd = params.rm_kinit_cmd refresh_cmd = format("{rm_kinit_cmd} export HADOOP_LIBEXEC_DIR={hadoop_libexec_dir} && {yarn_container_bin}/yarn rmadmin -refreshQueues") - Execute(refresh_cmd, user=usr) + Execute(refresh_cmd, + user = usr, + timeout = 20, # when Yarn is not started command hangs forever and should be killed + tries = 5, + try_sleep = 5, + timeout_kill_strategy = TerminateStrategy.KILL_PROCESS_GROUP, # the process cannot be simply killed by 'kill -15', so kill pg group instread. + )