This is an automated email from the ASF dual-hosted git repository. vjasani pushed a commit to branch branch-2.7 in repository https://gitbox.apache.org/repos/asf/ambari.git
The following commit(s) were added to refs/heads/branch-2.7 by this push: new b65a433114 AMBARI-25949 regionserver graceful start/stop should be compatible for hbase 2 (#3701) b65a433114 is described below commit b65a4331145072cb3553c0825c98329c1ba69efa Author: Viraj Jasani <vjas...@apache.org> AuthorDate: Mon Jul 31 12:00:06 2023 -0700 AMBARI-25949 regionserver graceful start/stop should be compatible for hbase 2 (#3701) --- .../0.96.0.2.0/package/files/draining_servers.rb | 11 +- .../0.96.0.2.0/package/files/draining_servers2.rb | 159 +++++++++++++++++++++ .../package/scripts/hbase_decommission.py | 85 ++++++++--- .../package/scripts/hbase_regionserver.py | 28 +++- .../0.96.0.2.0/package/scripts/params_linux.py | 3 + .../python/stacks/2.0.6/HBASE/test_hbase_master.py | 18 ++- 6 files changed, 267 insertions(+), 37 deletions(-) diff --git a/ambari-server/src/main/resources/common-services/HBASE/0.96.0.2.0/package/files/draining_servers.rb b/ambari-server/src/main/resources/common-services/HBASE/0.96.0.2.0/package/files/draining_servers.rb index 5bcb5b6b60..ab3ed10447 100644 --- a/ambari-server/src/main/resources/common-services/HBASE/0.96.0.2.0/package/files/draining_servers.rb +++ b/ambari-server/src/main/resources/common-services/HBASE/0.96.0.2.0/package/files/draining_servers.rb @@ -100,15 +100,16 @@ end def removeServers(options, hostOrServers) config = HBaseConfiguration.create() - servers = getServerNames(hostOrServers, config) - zkw = org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher.new(config, "draining_servers", nil) parentZnode = zkw.drainingZNode - + servers = ZKUtil.listChildrenNoWatch(zkw, parentZnode) + begin for server in servers - node = ZKUtil.joinZNode(parentZnode, server) - ZKUtil.deleteNodeFailSilent(zkw, node) + if hostOrServers.include?(server.split(',')[0]) + node = ZKUtil.joinZNode(parentZnode, server) + ZKUtil.deleteNodeFailSilent(zkw, node) + end end ensure zkw.close() diff --git a/ambari-server/src/main/resources/common-services/HBASE/0.96.0.2.0/package/files/draining_servers2.rb b/ambari-server/src/main/resources/common-services/HBASE/0.96.0.2.0/package/files/draining_servers2.rb new file mode 100644 index 0000000000..7e1a4e95c2 --- /dev/null +++ b/ambari-server/src/main/resources/common-services/HBASE/0.96.0.2.0/package/files/draining_servers2.rb @@ -0,0 +1,159 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Add or remove servers from draining mode via zookeeper +# Deprecated in 2.0, and will be removed in 3.0. Use Admin decommission +# API instead. + +require 'optparse' +include Java + +java_import org.apache.hadoop.hbase.HBaseConfiguration +java_import org.apache.hadoop.hbase.client.ConnectionFactory +java_import org.apache.hadoop.hbase.client.HBaseAdmin +java_import org.apache.hadoop.hbase.zookeeper.ZKUtil +java_import org.apache.hadoop.hbase.zookeeper.ZNodePaths +java_import org.slf4j.LoggerFactory + +# Name of this script +NAME = 'draining_servers'.freeze + +# Do command-line parsing +options = {} +optparse = OptionParser.new do |opts| + opts.banner = "Usage: ./hbase org.jruby.Main #{NAME}.rb [options] add|remove|list <hostname>|<host:port>|<servername> ..." + opts.separator 'Add remove or list servers in draining mode. Can accept either hostname to drain all region servers' \ + 'in that host, a host:port pair or a host,port,startCode triplet. More than one server can be given separated by space' + opts.on('-h', '--help', 'Display usage information') do + puts opts + exit + end +end +optparse.parse! + +# Return array of servernames where servername is hostname+port+startcode +# comma-delimited +def getServers(admin) + serverInfos = admin.getClusterStatus.getServers + servers = [] + serverInfos.each do |server| + servers << server.getServerName + end + servers +end + +# rubocop:disable Metrics/AbcSize +def getServerNames(hostOrServers, config) + ret = [] + connection = ConnectionFactory.createConnection(config) + admin = nil + + hostOrServers.each do |host_or_server| + # check whether it is already serverName. No need to connect to cluster + parts = host_or_server.split(',') + if parts.size == 3 + ret << host_or_server + else + admin ||= connection.getAdmin + servers = getServers(admin) + + host_or_server = host_or_server.tr(':', ',') + servers.each do |server| + ret << server if server.start_with?(host_or_server) + end + end + end + + admin.close if admin + connection.close + ret +end + +def addServers(_options, hostOrServers) + config = HBaseConfiguration.create + servers = getServerNames(hostOrServers, config) + + zkw = org.apache.hadoop.hbase.zookeeper.ZKWatcher.new(config, 'draining_servers', nil) + + begin + parentZnode = zkw.getZNodePaths.drainingZNode + servers.each do |server| + node = ZNodePaths.joinZNode(parentZnode, server) + ZKUtil.createAndFailSilent(zkw, node) + end + ensure + zkw.close + end +end + +def removeServers(_options, hostOrServers) + config = HBaseConfiguration.create + servers = getServerNames(hostOrServers, config) + + zkw = org.apache.hadoop.hbase.zookeeper.ZKWatcher.new(config, 'draining_servers', nil) + + begin + parentZnode = zkw.getZNodePaths.drainingZNode + servers.each do |server| + node = ZNodePaths.joinZNode(parentZnode, server) + ZKUtil.deleteNodeFailSilent(zkw, node) + end + ensure + zkw.close + end +end +# rubocop:enable Metrics/AbcSize + +# list servers in draining mode +def listServers(_options) + config = HBaseConfiguration.create + + zkw = org.apache.hadoop.hbase.zookeeper.ZKWatcher.new(config, 'draining_servers', nil) + + begin + parentZnode = zkw.getZNodePaths.drainingZNode + servers = ZKUtil.listChildrenNoWatch(zkw, parentZnode) + servers.each { |server| puts server } + ensure + zkw.close + end +end + +hostOrServers = ARGV[1..ARGV.size] + +# Create a logger and save it to ruby global +$LOG = LoggerFactory.getLogger(NAME) +case ARGV[0] +when 'add' + if ARGV.length < 2 + puts optparse + exit 1 + end + addServers(options, hostOrServers) +when 'remove' + if ARGV.length < 2 + puts optparse + exit 1 + end + removeServers(options, hostOrServers) +when 'list' + listServers(options) +else + puts optparse + exit 3 +end diff --git a/ambari-server/src/main/resources/common-services/HBASE/0.96.0.2.0/package/scripts/hbase_decommission.py b/ambari-server/src/main/resources/common-services/HBASE/0.96.0.2.0/package/scripts/hbase_decommission.py index 1ce0b8cb97..df7668728f 100644 --- a/ambari-server/src/main/resources/common-services/HBASE/0.96.0.2.0/package/scripts/hbase_decommission.py +++ b/ambari-server/src/main/resources/common-services/HBASE/0.96.0.2.0/package/scripts/hbase_decommission.py @@ -22,6 +22,8 @@ from resource_management.core.source import StaticFile from resource_management.libraries.functions.format import format from ambari_commons.os_family_impl import OsFamilyFuncImpl, OsFamilyImpl from ambari_commons import OSConst +from resource_management.core.logger import Logger + @OsFamilyFuncImpl(os_family=OSConst.WINSRV_FAMILY) def hbase_decommission(env): @@ -38,7 +40,7 @@ def hbase_decommission(env): Execute(regiondrainer_cmd, user=params.hbase_user, logoutput=True) else: regiondrainer_cmd = format("cmd /c {hbase_executable} org.jruby.Main {region_drainer} add {host}") - regionmover_cmd = format("cmd /c {hbase_executable} org.jruby.Main {region_mover} unload {host}") + regionmover_cmd = format("cmd /c {hbase_executable} org.jruby.Main {region_mover} -m 24 unload {host}") Execute(regiondrainer_cmd, user=params.hbase_user, logoutput=True) Execute(regionmover_cmd, user=params.hbase_user, logoutput=True) @@ -54,7 +56,11 @@ def hbase_decommission(env): content=StaticFile("draining_servers.rb"), mode=0755 ) - + File(params.region_drainer2, + content=StaticFile("draining_servers2.rb"), + mode=0755 + ) + if params.hbase_excluded_hosts and params.hbase_excluded_hosts.split(","): hosts = params.hbase_excluded_hosts.split(",") elif params.hbase_included_hosts and params.hbase_included_hosts.split(","): @@ -63,32 +69,65 @@ def hbase_decommission(env): if params.hbase_drain_only: for host in hosts: if host: - regiondrainer_cmd = format( - "{kinit_cmd} {hbase_cmd} --config {hbase_conf_dir} {client_security_config} org.jruby.Main {region_drainer} remove {host}") - Execute(regiondrainer_cmd, - user=params.hbase_user, - logoutput=True - ) - pass + try: + regiondrainer_cmd = format( + "{kinit_cmd} {hbase_cmd} --config {hbase_conf_dir} {client_security_config} org.jruby.Main {region_drainer} remove {host}") + Execute(regiondrainer_cmd, + user=params.hbase_user, + logoutput=True + ) + pass + except Exception as e: + # Execute HBase 2 scripts if HBase 1 scripts fail. + # If the Exception is genuine, it will fail here because HBase 1 scripts work only for HBase 1 + # and HBase 2 scripts work only for HBase 2 cluster. + Logger.info("HBase 1 RegionMover failed. Will try with HBase 2 RegionMover." + str(e)) + regiondrainer_cmd = format( + "{kinit_cmd} {hbase_cmd} --config {hbase_conf_dir} {client_security_config} org.jruby.Main {region_drainer2} remove {host}") + Execute(regiondrainer_cmd, + user=params.hbase_user, + logoutput=True + ) + pass pass else: for host in hosts: if host: - regiondrainer_cmd = format( - "{kinit_cmd} {hbase_cmd} --config {hbase_conf_dir} {client_security_config} org.jruby.Main {region_drainer} add {host}") - regionmover_cmd = format( - "{kinit_cmd} {hbase_cmd} --config {hbase_conf_dir} {client_security_config} org.jruby.Main {region_mover} unload {host}") - - Execute(regiondrainer_cmd, - user=params.hbase_user, - logoutput=True - ) - - Execute(regionmover_cmd, - user=params.hbase_user, - logoutput=True - ) + try: + regiondrainer_cmd = format( + "{kinit_cmd} {hbase_cmd} --config {hbase_conf_dir} {client_security_config} org.jruby.Main {region_drainer} add {host}") + regionmover_cmd = format( + "{kinit_cmd} {hbase_cmd} --config {hbase_conf_dir} {client_security_config} org.jruby.Main {region_mover} -m 24 unload {host}") + + Execute(regiondrainer_cmd, + user=params.hbase_user, + logoutput=True + ) + + Execute(regionmover_cmd, + user=params.hbase_user, + logoutput=True + ) + except Exception as e: + # Execute HBase 2 scripts if HBase 1 scripts fail. + # If the Exception is genuine, it will fail here because HBase 1 scripts work only for HBase 1 + # and HBase 2 scripts work only for HBase 2 cluster. + Logger.info("HBase 1 Region unload failed. Will try with HBase 2." + str(e)) + regiondrainer_cmd = format( + "{kinit_cmd} {hbase_cmd} --config {hbase_conf_dir} {client_security_config} org.jruby.Main {region_drainer2} add {host}") + regionmover_cmd = format( + "{kinit_cmd} {hbase_cmd} --config {hbase_conf_dir} {client_security_config} org.jruby.Main {region_mover} -m 24 -o unload -r {host}") + + Execute(regiondrainer_cmd, + user=params.hbase_user, + logoutput=True + ) + + Execute(regionmover_cmd, + user=params.hbase_user, + logoutput=True + ) pass pass pass diff --git a/ambari-server/src/main/resources/common-services/HBASE/0.96.0.2.0/package/scripts/hbase_regionserver.py b/ambari-server/src/main/resources/common-services/HBASE/0.96.0.2.0/package/scripts/hbase_regionserver.py index 75083acf60..b335d4b24d 100644 --- a/ambari-server/src/main/resources/common-services/HBASE/0.96.0.2.0/package/scripts/hbase_regionserver.py +++ b/ambari-server/src/main/resources/common-services/HBASE/0.96.0.2.0/package/scripts/hbase_regionserver.py @@ -38,6 +38,7 @@ from hbase_service import hbase_service import upgrade from setup_ranger_hbase import setup_ranger_hbase from hbase_decommission import hbase_decommission +from resource_management.core.logger import Logger class HbaseRegionServer(Script): @@ -83,12 +84,27 @@ class HbaseRegionServer(Script): host = params.hostname regionmover_cmd = format( - "{kinit_cmd} {hbase_cmd} --config {hbase_conf_dir} {client_security_config} org.jruby.Main {region_mover} load {host}") - - Execute(regionmover_cmd, - user=params.hbase_user, - logoutput=True - ) + "{kinit_cmd} {hbase_cmd} --config {hbase_conf_dir} {client_security_config} org.jruby.Main {region_mover} -m 24 load {host}") + + try: + Execute(regionmover_cmd, + user=params.hbase_user, + logoutput=True + ) + except Exception as e: + Logger.info("HBase 1: region_mover failed while loading regions back to source RS." + str(e)) + # Execute HBase 2 scripts if HBase 1 scripts fail. + # If the Exception is genuine, it will fail here because HBase 1 scripts work only for HBase 1 + # and HBase 2 scripts work only for HBase 2 cluster. + try: + regionmover_cmd = format( + "{kinit_cmd} {hbase_cmd} --config {hbase_conf_dir} {client_security_config} org.jruby.Main {region_mover} -m 24 -o load -r {host}") + Execute(regionmover_cmd, + user=params.hbase_user, + logoutput=True + ) + except Exception as ex: + Logger.info("HBase 2: region_mover failed while loading regions back to source RS." + str(ex)) diff --git a/ambari-server/src/main/resources/common-services/HBASE/0.96.0.2.0/package/scripts/params_linux.py b/ambari-server/src/main/resources/common-services/HBASE/0.96.0.2.0/package/scripts/params_linux.py index c702104950..3d32582cef 100644 --- a/ambari-server/src/main/resources/common-services/HBASE/0.96.0.2.0/package/scripts/params_linux.py +++ b/ambari-server/src/main/resources/common-services/HBASE/0.96.0.2.0/package/scripts/params_linux.py @@ -76,6 +76,7 @@ hadoop_conf_dir = conf_select.get_hadoop_conf_dir() daemon_script = "/usr/lib/hbase/bin/hbase-daemon.sh" region_mover = "/usr/lib/hbase/bin/region_mover.rb" region_drainer = "/usr/lib/hbase/bin/draining_servers.rb" +region_drainer2 = "/usr/lib/hbase/bin/draining_servers2.rb" hbase_replication = "/usr/lib/hbase/bin/hbase_replication.rb" hbase_cmd = "/usr/lib/hbase/bin/hbase" hbase_max_direct_memory_size = None @@ -85,6 +86,7 @@ if stack_version_formatted and check_stack_feature(StackFeature.ROLLING_UPGRADE, daemon_script = format('{stack_root}/current/hbase-client/bin/hbase-daemon.sh') region_mover = format('{stack_root}/current/hbase-client/bin/region_mover.rb') region_drainer = format('{stack_root}/current/hbase-client/bin/draining_servers.rb') + region_drainer2 = format('{stack_root}/current/hbase-client/bin/draining_servers2.rb') hbase_cmd = format('{stack_root}/current/hbase-client/bin/hbase') hbase_max_direct_memory_size = default('configurations/hbase-env/hbase_max_direct_memory_size', None) @@ -92,6 +94,7 @@ if stack_version_formatted and check_stack_feature(StackFeature.ROLLING_UPGRADE, daemon_script=format("{stack_root}/current/{component_directory}/bin/hbase-daemon.sh") region_mover = format("{stack_root}/current/{component_directory}/bin/region_mover.rb") region_drainer = format("{stack_root}/current/{component_directory}/bin/draining_servers.rb") + region_drainer2 = format("{stack_root}/current/{component_directory}/bin/draining_servers2.rb") hbase_replication = format("{stack_root}/current/{component_directory}/bin/hbase_replication.rb") hbase_cmd = format("{stack_root}/current/{component_directory}/bin/hbase") diff --git a/ambari-server/src/test/python/stacks/2.0.6/HBASE/test_hbase_master.py b/ambari-server/src/test/python/stacks/2.0.6/HBASE/test_hbase_master.py index 39c5b2f208..12a6949cff 100644 --- a/ambari-server/src/test/python/stacks/2.0.6/HBASE/test_hbase_master.py +++ b/ambari-server/src/test/python/stacks/2.0.6/HBASE/test_hbase_master.py @@ -192,11 +192,15 @@ class TestHBaseMaster(RMFTestCase): content = StaticFile('draining_servers.rb'), mode = 0755, ) + self.assertResourceCalled('File', '/usr/lib/hbase/bin/draining_servers2.rb', + content = StaticFile('draining_servers2.rb'), + mode = 0755, + ) self.assertResourceCalled('Execute', ' /usr/lib/hbase/bin/hbase --config /etc/hbase/conf org.jruby.Main /usr/lib/hbase/bin/draining_servers.rb add host1', logoutput = True, user = 'hbase', ) - self.assertResourceCalled('Execute', ' /usr/lib/hbase/bin/hbase --config /etc/hbase/conf org.jruby.Main /usr/lib/hbase/bin/region_mover.rb unload host1', + self.assertResourceCalled('Execute', ' /usr/lib/hbase/bin/hbase --config /etc/hbase/conf org.jruby.Main /usr/lib/hbase/bin/region_mover.rb -m 24 unload host1', logoutput = True, user = 'hbase', ) @@ -204,7 +208,7 @@ class TestHBaseMaster(RMFTestCase): logoutput = True, user = 'hbase', ) - self.assertResourceCalled('Execute', ' /usr/lib/hbase/bin/hbase --config /etc/hbase/conf org.jruby.Main /usr/lib/hbase/bin/region_mover.rb unload host2', + self.assertResourceCalled('Execute', ' /usr/lib/hbase/bin/hbase --config /etc/hbase/conf org.jruby.Main /usr/lib/hbase/bin/region_mover.rb -m 24 unload host2', logoutput = True, user = 'hbase', ) @@ -223,6 +227,10 @@ class TestHBaseMaster(RMFTestCase): content = StaticFile('draining_servers.rb'), mode = 0755, ) + self.assertResourceCalled('File', '/usr/lib/hbase/bin/draining_servers2.rb', + content = StaticFile('draining_servers2.rb'), + mode = 0755, + ) self.assertResourceCalled('Execute', ' /usr/lib/hbase/bin/hbase --config /etc/hbase/conf org.jruby.Main /usr/lib/hbase/bin/draining_servers.rb remove host1', logoutput = True, user = 'hbase', @@ -291,11 +299,15 @@ class TestHBaseMaster(RMFTestCase): content = StaticFile('draining_servers.rb'), mode = 0755, ) + self.assertResourceCalled('File', '/usr/lib/hbase/bin/draining_servers2.rb', + content = StaticFile('draining_servers2.rb'), + mode = 0755, + ) self.assertResourceCalled('Execute', '/usr/bin/kinit -kt /etc/security/keytabs/hbase.service.keytab hbase/c6401.ambari.apache....@example.com; /usr/lib/hbase/bin/hbase --config /etc/hbase/conf -Djava.security.auth.login.config=/etc/hbase/conf/hbase_client_jaas.conf org.jruby.Main /usr/lib/hbase/bin/draining_servers.rb add host1', logoutput = True, user = 'hbase', ) - self.assertResourceCalled('Execute', '/usr/bin/kinit -kt /etc/security/keytabs/hbase.service.keytab hbase/c6401.ambari.apache....@example.com; /usr/lib/hbase/bin/hbase --config /etc/hbase/conf -Djava.security.auth.login.config=/etc/hbase/conf/hbase_client_jaas.conf org.jruby.Main /usr/lib/hbase/bin/region_mover.rb unload host1', + self.assertResourceCalled('Execute', '/usr/bin/kinit -kt /etc/security/keytabs/hbase.service.keytab hbase/c6401.ambari.apache....@example.com; /usr/lib/hbase/bin/hbase --config /etc/hbase/conf -Djava.security.auth.login.config=/etc/hbase/conf/hbase_client_jaas.conf org.jruby.Main /usr/lib/hbase/bin/region_mover.rb -m 24 unload host1', logoutput = True, user = 'hbase', ) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@ambari.apache.org For additional commands, e-mail: commits-h...@ambari.apache.org