Script 'mail_helper' called by obssrc Hello community, here is the log from the commit of package crmsh for openSUSE:Factory checked in at 2025-04-20 09:35:10 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/crmsh (Old) and /work/SRC/openSUSE:Factory/.crmsh.new.30101 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "crmsh" Sun Apr 20 09:35:10 2025 rev:364 rq:1270864 version:5.0.0+20250418.bb6c26c2 Thu Apr 17 16:10:12 2025 rev:363 rq:1270229 version:5.0.0+20250417.367bbfad Changes: -------- --- /work/SRC/openSUSE:Factory/crmsh/crmsh.changes 2025-04-03 16:51:53.631970717 +0200 +++ /work/SRC/openSUSE:Factory/.crmsh.new.30101/crmsh.changes 2025-04-20 19:51:02.262549551 +0200 @@ -1,0 +2,24 @@ +Fri Apr 18 14:42:07 UTC 2025 - xli...@suse.com + +- Update to version 5.0.0+20250418.bb6c26c2: + * Dev: unittests: Adjust unit test for previous commit + * Dev: ui_sbd: Improve log info when adjust sbd related timeout + * Dev: doc: Update crm.8.adoc for using -F/--force option for leveraging maintenance mode + * Dev: sbd: Leverage maintenance mode when need to restart cluster (jsc#PED-11931) + +------------------------------------------------------------------- +Thu Apr 17 08:27:07 UTC 2025 - xli...@suse.com + +- Update to version 5.0.0+20250417.367bbfad: + * Dev: unittests: Adjust unit test for previous commit + * Dev: behave: Adjust functional test for previous commit + * Dev: ui_sbd: Show fence_sbd parameter in 'sbd configure show' + * Dev: doc: Update doc/crm.8.adoc to add crashdump option + * Dev: ui_sbd: Add 'crashdump' option for 'sbd purge' command + * Dev: ui_sbd: Refactor the condition for configuring crashdump + * Dev: ui_sbd: Compare crashdump watchdog timeout value if configured + * Dev: ui_sbd: Add warning to emphasize that kdump service is required + * Dev: ui_sbd: Update the re pattern to match the possible arguments + * Dev: ui_sbd: Configure crashdump watchdog timeout (jsc#PED-11931) + +------------------------------------------------------------------- Old: ---- crmsh-5.0.0+20250403.1442a04a.tar.bz2 New: ---- crmsh-5.0.0+20250418.bb6c26c2.tar.bz2 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ crmsh.spec ++++++ --- /var/tmp/diff_new_pack.3fAZbx/_old 2025-04-20 19:51:04.222631123 +0200 +++ /var/tmp/diff_new_pack.3fAZbx/_new 2025-04-20 19:51:04.238631789 +0200 @@ -36,7 +36,7 @@ Summary: High Availability cluster command-line interface License: GPL-2.0-or-later Group: %{pkg_group} -Version: 5.0.0+20250403.1442a04a +Version: 5.0.0+20250418.bb6c26c2 Release: 0 URL: http://crmsh.github.io Source0: %{name}-%{version}.tar.bz2 ++++++ _servicedata ++++++ --- /var/tmp/diff_new_pack.3fAZbx/_old 2025-04-20 19:51:04.602646938 +0200 +++ /var/tmp/diff_new_pack.3fAZbx/_new 2025-04-20 19:51:04.650648936 +0200 @@ -9,7 +9,7 @@ </service> <service name="tar_scm"> <param name="url">https://github.com/ClusterLabs/crmsh.git</param> - <param name="changesrevision">1442a04aba88f012c0995982e64ed9589f140a45</param> + <param name="changesrevision">bfb056b1c60929d7b97d9ee1c37d0e3714c60d46</param> </service> </servicedata> (No newline at EOF) ++++++ crmsh-5.0.0+20250403.1442a04a.tar.bz2 -> crmsh-5.0.0+20250418.bb6c26c2.tar.bz2 ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/crmsh-5.0.0+20250403.1442a04a/crmsh/bootstrap.py new/crmsh-5.0.0+20250418.bb6c26c2/crmsh/bootstrap.py --- old/crmsh-5.0.0+20250403.1442a04a/crmsh/bootstrap.py 2025-04-03 10:05:11.000000000 +0200 +++ new/crmsh-5.0.0+20250418.bb6c26c2/crmsh/bootstrap.py 2025-04-18 16:21:27.000000000 +0200 @@ -2725,12 +2725,12 @@ for res in cib_factory.fence_id_list_without_pcmk_delay(): cmd = "crm resource param {} set pcmk_delay_max {}s".format(res, PCMK_DELAY_MAX) shell.get_stdout_or_raise_error(cmd) - logger.debug("Add parameter 'pcmk_delay_max={}s' for resource '{}'".format(PCMK_DELAY_MAX, res)) + logger.info("Add parameter 'pcmk_delay_max={}s' for resource '{}'".format(PCMK_DELAY_MAX, res)) else: for res in cib_factory.fence_id_list_with_pcmk_delay(): cmd = "crm resource param {} delete pcmk_delay_max".format(res) shell.get_stdout_or_raise_error(cmd) - logger.debug("Delete parameter 'pcmk_delay_max' for resource '{}'".format(res)) + logger.info("Delete parameter 'pcmk_delay_max' for resource '{}'".format(res)) def adjust_stonith_timeout(): diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/crmsh-5.0.0+20250403.1442a04a/crmsh/cibquery.py new/crmsh-5.0.0+20250418.bb6c26c2/crmsh/cibquery.py --- old/crmsh-5.0.0+20250403.1442a04a/crmsh/cibquery.py 2025-04-03 10:05:11.000000000 +0200 +++ new/crmsh-5.0.0+20250418.bb6c26c2/crmsh/cibquery.py 2025-04-18 16:21:27.000000000 +0200 @@ -33,6 +33,26 @@ f'/instance_attributes/nvpair[@name="fstype" and @value="{fstype}"]' )) + +def get_primitives_with_ra(cib: lxml.etree.Element, ra: ResourceAgent) -> list[str]: + """ + Given cib and ResourceAgent instance, return id list of primitives that matched + consider provider as optional + """ + provider_condition = f' and @provider="{ra.m_provider}"' if ra.m_provider else "" + return cib.xpath( + f'/cib/configuration/resources//primitive[@class="{ra.m_class}"{provider_condition} and @type="{ra.m_type}"]/@id' + ) + + +def get_parameter_value(cib: lxml.etree.Element, res_id: str, param_name: str) -> typing.Optional[str]: + result = cib.xpath( + f'/cib/configuration/resources//primitive[@id="{res_id}"]' + f'/instance_attributes/nvpair[@name="{param_name}"]/@value' + ) + return result[0] if result else None + + def get_cluster_nodes(cib: lxml.etree.Element) -> list[ClusterNode]: """Return a list of cluster nodes, excluding pacemaker-remote nodes""" result = list() diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/crmsh-5.0.0+20250403.1442a04a/crmsh/sbd.py new/crmsh-5.0.0+20250418.bb6c26c2/crmsh/sbd.py --- old/crmsh-5.0.0+20250403.1442a04a/crmsh/sbd.py 2025-04-03 10:05:11.000000000 +0200 +++ new/crmsh-5.0.0+20250418.bb6c26c2/crmsh/sbd.py 2025-04-18 16:21:27.000000000 +0200 @@ -81,6 +81,14 @@ return utils.parse_sysconfig(SBDManager.SYSCONFIG_SBD).get(key) @staticmethod + def get_crashdump_watchdog_timeout() -> typing.Optional[int]: + res = SBDUtils.get_sbd_value_from_config("SBD_OPTS") + if not res: + return None + matched = re.search(r"-C\s+(\d+)", res) + return int(matched.group(1)) if matched else None + + @staticmethod def get_sbd_device_from_config(): ''' Get sbd device list from config @@ -546,11 +554,13 @@ service_manager.enable_service(constants.SBD_SERVICE, node) @staticmethod - def restart_cluster_if_possible(): + def restart_cluster_if_possible(with_maintenance_mode=False): if not ServiceManager().service_is_active(constants.PCMK_SERVICE): return - if xmlutil.CrmMonXmlParser().is_any_resource_running(): + if xmlutil.CrmMonXmlParser().is_any_resource_running() and not with_maintenance_mode: logger.warning("Resource is running, need to restart cluster service manually on each node") + logger.warning("Or, run with `crm -F` or `--force` option, the `sbd` subcommand will leverage maintenance mode for any changes that require restarting sbd.service") + logger.warning("Understand risks that running RA has no cluster protection while the cluster is in maintenance mode and restarting") else: bootstrap.restart_cluster() @@ -559,7 +569,8 @@ Configure fence_sbd resource and related properties ''' if self.diskless_sbd: - utils.set_property("stonith-watchdog-timeout", SBDTimeout.STONITH_WATCHDOG_TIMEOUT_DEFAULT) + swt_value = self.timeout_dict.get("stonith-watchdog", SBDTimeout.STONITH_WATCHDOG_TIMEOUT_DEFAULT) + utils.set_property("stonith-watchdog-timeout", swt_value) else: if utils.get_property("stonith-watchdog-timeout", get_default=False): utils.delete_property("stonith-watchdog-timeout") @@ -684,14 +695,15 @@ return self._load_attributes_from_bootstrap() - self.initialize_sbd() - self.update_configuration() - SBDManager.enable_sbd_service() - - if self.cluster_is_running: - SBDManager.restart_cluster_if_possible() - self.configure_sbd() - bootstrap.adjust_properties() + with utils.leverage_maintenance_mode() as enabled: + self.initialize_sbd() + self.update_configuration() + SBDManager.enable_sbd_service() + + if self.cluster_is_running: + SBDManager.restart_cluster_if_possible(with_maintenance_mode=enabled) + self.configure_sbd() + bootstrap.adjust_properties() def join_sbd(self, remote_user, peer_host): ''' diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/crmsh-5.0.0+20250403.1442a04a/crmsh/ui_sbd.py new/crmsh-5.0.0+20250418.bb6c26c2/crmsh/ui_sbd.py --- old/crmsh-5.0.0+20250403.1442a04a/crmsh/ui_sbd.py 2025-04-03 10:05:11.000000000 +0200 +++ new/crmsh-5.0.0+20250418.bb6c26c2/crmsh/ui_sbd.py 2025-04-18 16:21:27.000000000 +0200 @@ -12,6 +12,7 @@ from crmsh import sh from crmsh import xmlutil from crmsh import constants +from crmsh import cibquery from crmsh.service_manager import ServiceManager @@ -88,30 +89,37 @@ - sbd purge ''' name = "sbd" - TIMEOUT_TYPES = ("watchdog", "allocate", "loop", "msgwait") - DISKLESS_TIMEOUT_TYPES = ("watchdog",) + TIMEOUT_TYPES = ("watchdog", "allocate", "loop", "msgwait", "crashdump-watchdog") + DISKLESS_TIMEOUT_TYPES = ("watchdog", "crashdump-watchdog") SHOW_TYPES = ("disk_metadata", "sysconfig", "property") DISKLESS_SHOW_TYPES = ("sysconfig", "property") PCMK_ATTRS = ( "have-watchdog", "stonith-timeout", - "stonith-enabled", - "priority-fencing-delay", - "pcmk_delay_max" + "stonith-enabled" ) PCMK_ATTRS_DISKLESS = ('stonith-watchdog-timeout',) PARSE_RE = re.compile( - # Match keys with non-empty values, capturing possible suffix - r'(\w+)(?:-(\w+))?=("[^"]+"|[\w/\d;]+)' + # To extract key, suffix and value from these possible arguments: + # watchdog-timeout=30 + # crashdump-watchdog-timeout=120 + # watchdog-device=/dev/watchdog + r'([\w-]+)-([\w]+)=([\w/]+)' ) + # re pattern to match "-C <number>" or "-C <number> -Z" + SBD_OPTS_RE = r'-C\s+\d+(\s+-Z)?' class SyntaxError(Exception): pass + class MissingRequiredException(Exception): + pass + def __init__(self): self.device_list_from_config: list[str] = None self.device_meta_dict_runtime: dict[str, int] = None self.watchdog_timeout_from_config: int = None + self.crashdump_watchdog_timeout_from_config: int = None self.watchdog_device_from_config: str = None self.service_manager: ServiceManager = None self.cluster_shell: sh.cluster_shell = None @@ -130,6 +138,7 @@ except Exception: self.watchdog_timeout_from_config = None self.watchdog_device_from_config = watchdog.Watchdog.get_watchdog_device_from_sbd_config() + self.crashdump_watchdog_timeout_from_config = sbd.SBDUtils.get_crashdump_watchdog_timeout() self.service_manager = ServiceManager() self.cluster_shell = sh.cluster_shell() @@ -217,6 +226,13 @@ for match in matches: print(f"{match[0]}={match[1]}") + cmd = "crm configure show related:fence_sbd" + out = self.cluster_shell.get_stdout_or_raise_error(cmd) + if out: + print() + logger.info('%s', cmd) + print(out) + print() logger.info('%s', sbd.SBDTimeout.SHOW_SBD_START_TIMEOUT_CMD) systemd_start_timeout = sbd.SBDTimeout.get_sbd_systemd_start_timeout() @@ -280,13 +296,100 @@ return timeout_dict if watchdog_timeout and not msgwait_timeout: timeout_dict["msgwait"] = 2*watchdog_timeout - logger.info("No msgwait timeout specified, use 2*watchdog timeout: %s", 2*watchdog_timeout) + logger.info("No 'msgwait-timeout=' specified in the command, use 2*watchdog timeout: %s", 2*watchdog_timeout) return timeout_dict if msgwait_timeout and not watchdog_timeout: watchdog_timeout = msgwait_timeout//2 timeout_dict["watchdog"] = watchdog_timeout - logger.info("No watchdog timeout specified, use msgwait timeout/2: %s", watchdog_timeout) + logger.info("No 'watchdog-timeout=' specified in the command, use msgwait timeout/2: %s", watchdog_timeout) return timeout_dict + return timeout_dict + + def _set_crashdump_option(self, delete=False): + ''' + Set crashdump option for fence_sbd resource + ''' + cib = xmlutil.text2elem(self.cluster_shell.get_stdout_or_raise_error('crm configure show xml')) + ra = cibquery.ResourceAgent("stonith", "", "fence_sbd") + res_id_list = cibquery.get_primitives_with_ra(cib, ra) + if not res_id_list: + if delete: + return + logger.error("No fence_sbd resource found") + raise self.MissingRequiredException + + crashdump_value = cibquery.get_parameter_value(cib, res_id_list[0], "crashdump") + cmd = "" + if utils.is_boolean_false(crashdump_value): + if delete: + return + cmd = f"crm resource param {res_id_list[0]} set crashdump 1" + logger.info("Set crashdump option for fence_sbd resource") + elif delete: + cmd = f"crm resource param {res_id_list[0]} delete crashdump" + logger.info("Delete crashdump option for fence_sbd resource") + if cmd: + self.cluster_shell.get_stdout_or_raise_error(cmd) + + def _set_crashdump_in_sysconfig(self, crashdump_watchdog_timeout=None, restore=False, diskless=False) -> dict: + update_dict = {} + sbd_timeout_action_for_crashdump = "flush,crashdump" + comment_action_line = f"sed -i '/^SBD_TIMEOUT_ACTION/s/^/#__sbd_crashdump_backup__ /' {sbd.SBDManager.SYSCONFIG_SBD}" + add_action_line = f"sed -i '/^#__sbd_crashdump_backup__/a SBD_TIMEOUT_ACTION={sbd_timeout_action_for_crashdump}' {sbd.SBDManager.SYSCONFIG_SBD}" + comment_out_action_line = f"sed -i 's/^#__sbd_crashdump_backup__ SBD_TIMEOUT_ACTION/SBD_TIMEOUT_ACTION/' {sbd.SBDManager.SYSCONFIG_SBD}" + delete_action_line = f"sed -i '/^SBD_TIMEOUT_ACTION/d' {sbd.SBDManager.SYSCONFIG_SBD}" + + sbd_timeout_action_configured = sbd.SBDUtils.get_sbd_value_from_config("SBD_TIMEOUT_ACTION") + if restore: + if sbd_timeout_action_configured and sbd_timeout_action_configured == sbd_timeout_action_for_crashdump: + cmd_delete_and_comment_out = f"{delete_action_line} && {comment_out_action_line}" + logger.info("Delete SBD_TIMEOUT_ACTION: %s and restore original value", sbd_timeout_action_for_crashdump) + self.cluster_shell.get_stdout_or_raise_error(cmd_delete_and_comment_out) + + sbd_opts = sbd.SBDUtils.get_sbd_value_from_config("SBD_OPTS") + if sbd_opts and re.search(self.SBD_OPTS_RE, sbd_opts): + sbd_opts = re.sub(self.SBD_OPTS_RE, '', sbd_opts) + update_dict["SBD_OPTS"] = ' '.join(sbd_opts.split()) + + elif crashdump_watchdog_timeout: + if not sbd_timeout_action_configured: + update_dict["SBD_TIMEOUT_ACTION"] = sbd_timeout_action_for_crashdump + elif sbd_timeout_action_configured != sbd_timeout_action_for_crashdump: + cmd_comment_and_add = f"{comment_action_line} && {add_action_line}" + self.cluster_shell.get_stdout_or_raise_error(cmd_comment_and_add) + logger.info("Update SBD_TIMEOUT_ACTION in %s: %s", sbd.SBDManager.SYSCONFIG_SBD, sbd_timeout_action_for_crashdump) + + value_for_diskless = " -Z" if diskless else "" + value_for_sbd_opts = f"-C {crashdump_watchdog_timeout}{value_for_diskless}" + sbd_opts = sbd.SBDUtils.get_sbd_value_from_config("SBD_OPTS") + sbd_opts = re.sub(self.SBD_OPTS_RE, '', sbd_opts) + update_dict["SBD_OPTS"] = f"{' '.join(sbd_opts.split())} {value_for_sbd_opts}" if sbd_opts else value_for_sbd_opts + + return update_dict + + def _check_kdump_service(self): + no_kdump = False + for node in self.cluster_nodes: + if not self.service_manager.service_is_active("kdump.service", node): + logger.warning("Kdump service is not active on %s", node) + no_kdump = True + if no_kdump: + logger.warning("Kdump service is required for crashdump") + + def _should_configure_crashdump( + self, + crashdump_watchdog_timeout, + watchdog_timeout, + diskless=False + ) -> bool: + if not crashdump_watchdog_timeout and not self.crashdump_watchdog_timeout_from_config: + return False + ct_updated = crashdump_watchdog_timeout and \ + crashdump_watchdog_timeout != self.crashdump_watchdog_timeout_from_config + watchdog_timeout_configured = self.watchdog_timeout_from_config if diskless \ + else self.device_meta_dict_runtime.get("watchdog") + wt_updated = watchdog_timeout and watchdog_timeout != watchdog_timeout_configured + return ct_updated or wt_updated def _configure_diskbase(self, parameter_dict: dict): ''' @@ -296,18 +399,28 @@ watchdog_device = parameter_dict.get("watchdog-device") if watchdog_device != self.watchdog_device_from_config: update_dict["SBD_WATCHDOG_DEV"] = watchdog_device - timeout_dict = {k: v for k, v in parameter_dict.items() if k in self.TIMEOUT_TYPES} - is_subdict_timeout = utils.is_subdict(timeout_dict, self.device_meta_dict_runtime) - if is_subdict_timeout and not update_dict: + timeout_dict = { + k: v for k, v in parameter_dict.items() + if k in self.TIMEOUT_TYPES and k != "crashdump-watchdog" + } + timeout_dict = self._adjust_timeout_dict(timeout_dict) + # merge runtime timeout dict into parameter timeout dict without overwriting + timeout_dict = {**self.device_meta_dict_runtime, **timeout_dict} + + crashdump_watchdog_timeout = parameter_dict.get("crashdump-watchdog", self.crashdump_watchdog_timeout_from_config) + if self._should_configure_crashdump(crashdump_watchdog_timeout, timeout_dict.get("watchdog")): + self._check_kdump_service() + self._set_crashdump_option() + timeout_dict["msgwait"] = 2*timeout_dict["watchdog"] + crashdump_watchdog_timeout + logger.info("Set msgwait-timeout to 2*watchdog-timeout + crashdump-watchdog-timeout: %s", timeout_dict["msgwait"]) + result_dict = self._set_crashdump_in_sysconfig(crashdump_watchdog_timeout) + update_dict = {**update_dict, **result_dict} + + if timeout_dict == self.device_meta_dict_runtime and not update_dict: logger.info("No change in SBD configuration") return - if not is_subdict_timeout: - timeout_dict = self._adjust_timeout_dict(timeout_dict) - # merge runtime timeout dict into parameter timeout dict without overwriting - timeout_dict = {**self.device_meta_dict_runtime, **timeout_dict} - sbd_manager = sbd.SBDManager( device_list_to_init=self.device_list_from_config, timeout_dict=timeout_dict, @@ -320,17 +433,30 @@ Configure diskless SBD based on input parameters and runtime config ''' update_dict = {} + timeout_dict = {} + watchdog_timeout = parameter_dict.get("watchdog") if watchdog_timeout and watchdog_timeout != self.watchdog_timeout_from_config: update_dict["SBD_WATCHDOG_TIMEOUT"] = str(watchdog_timeout) watchdog_device = parameter_dict.get("watchdog-device") if watchdog_device != self.watchdog_device_from_config: update_dict["SBD_WATCHDOG_DEV"] = watchdog_device + + crashdump_watchdog_timeout = parameter_dict.get("crashdump-watchdog", self.crashdump_watchdog_timeout_from_config) + if self._should_configure_crashdump(crashdump_watchdog_timeout, watchdog_timeout, diskless=True): + self._check_kdump_service() + result_dict = self._set_crashdump_in_sysconfig(crashdump_watchdog_timeout, diskless=True) + update_dict = {**update_dict, **result_dict} + sbd_watchdog_timeout = watchdog_timeout or self.watchdog_timeout_from_config + stonith_watchdog_timeout = sbd_watchdog_timeout + crashdump_watchdog_timeout + logger.info("Set stonith-watchdog-timeout to SBD_WATCHDOG_TIMEOUT + crashdump-watchdog-timeout: %s", stonith_watchdog_timeout) + timeout_dict["stonith-watchdog"] = stonith_watchdog_timeout if not update_dict: logger.info("No change in SBD configuration") return sbd_manager = sbd.SBDManager( + timeout_dict=timeout_dict, update_dict=update_dict, diskless_sbd=True ) @@ -379,6 +505,7 @@ ''' Implement sbd device command ''' + self._load_attributes() if not self.service_is_active(constants.PCMK_SERVICE): return False if not sbd.SBDUtils.is_using_disk_based_sbd(): @@ -417,15 +544,16 @@ Implement sbd configure command ''' try: - for service in (constants.PCMK_SERVICE, constants.SBD_SERVICE): - if not self.service_is_active(service): - return False + self._load_attributes() if not args: raise self.SyntaxError("No argument") - if args[0] == "show": self._configure_show(args) return True + for service in (constants.PCMK_SERVICE, constants.SBD_SERVICE): + if not self.service_is_active(service): + return False + parameter_dict = self._parse_args(args) if sbd.SBDUtils.is_using_disk_based_sbd(): self._configure_diskbase(parameter_dict) @@ -439,13 +567,26 @@ if usage: print(usage) return False + except self.MissingRequiredException: + return False - def do_purge(self, context) -> bool: + @command.completers(completers.choice(['crashdump'])) + def do_purge(self, context, *args) -> bool: ''' Implement sbd purge command ''' + self._load_attributes() if not self.service_is_active(constants.SBD_SERVICE): return False + + if args and args[0] == "crashdump": + self._set_crashdump_option(delete=True) + update_dict = self._set_crashdump_in_sysconfig(restore=True) + if update_dict: + sbd.SBDManager.update_sbd_configuration(update_dict) + sbd.SBDManager.restart_cluster_if_possible() + return True + sbd.purge_sbd_from_cluster() sbd.SBDManager.restart_cluster_if_possible() return True @@ -544,6 +685,7 @@ ''' Implement sbd status command ''' + self._load_attributes() self._print_sbd_type() self._print_sbd_status() self._print_sbd_cgroup_status() diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/crmsh-5.0.0+20250403.1442a04a/crmsh/utils.py new/crmsh-5.0.0+20250418.bb6c26c2/crmsh/utils.py --- old/crmsh-5.0.0+20250403.1442a04a/crmsh/utils.py 2025-04-03 10:05:11.000000000 +0200 +++ new/crmsh-5.0.0+20250418.bb6c26c2/crmsh/utils.py 2025-04-18 16:21:27.000000000 +0200 @@ -976,6 +976,20 @@ return False +def is_dc_idle(): + dc = get_dc() + if not dc: + return False + cmd = f"crmadmin -S {dc}" + rc, out, err = ShellUtils().get_stdout_stderr(cmd) + if rc != 0 and err: + logger.error("Failed to get DC status: %s", err) + return False + if not out: + return False + return "ok" in out and "S_IDLE" in out + + def get_dc(peer=None): cmd = "crmadmin -D -t 1" _, out, _ = sh.cluster_shell().get_rc_stdout_stderr_without_input(peer, cmd) @@ -2788,6 +2802,25 @@ return False +@contextmanager +def leverage_maintenance_mode() -> bool: + if not config.core.force: + yield False + return + + if is_dc_idle(): + try: + logger.info("Set cluster to maintenance mode") + set_property("maintenance-mode", "true") + yield True + finally: + logger.info("Set cluster from maintenance mode to normal") + delete_property("maintenance-mode") + else: + logger.warning("Pacemaker state transition is in progress. Skip restarting cluster in maintenance mode.") + yield False + + def check_no_quorum_policy_with_dlm(): """ Give warning when no-quorum-policy not freeze while configured DLM @@ -3193,11 +3226,4 @@ """ ansi_escape_pattern = re.compile(r'\x1B\[[0-?]*[ -/]*[@-~]') return ansi_escape_pattern.sub('', text) - - -def is_subdict(sub_dict, main_dict): - """ - Check if sub_dict is a sub-dictionary of main_dict - """ - return all(main_dict.get(k) == v for k, v in sub_dict.items()) # vim:ts=4:sw=4:et: diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/crmsh-5.0.0+20250403.1442a04a/doc/crm.8.adoc new/crmsh-5.0.0+20250418.bb6c26c2/doc/crm.8.adoc --- old/crmsh-5.0.0+20250403.1442a04a/doc/crm.8.adoc 2025-04-03 10:05:11.000000000 +0200 +++ new/crmsh-5.0.0+20250418.bb6c26c2/doc/crm.8.adoc 2025-04-18 16:21:27.000000000 +0200 @@ -2126,6 +2126,11 @@ as well as the on-disk metadata for the disk-based scenario. Currently, SBD management requires a running cluster. +When run with `crm -F` or `--force` option, the `sbd` subcommand will leverage maintenance mode +for any changes that require restarting sbd.service. +WARNING: Understand risks that running RA has no cluster protection while the cluster is +in maintenance mode and restarting + [[cmdhelp.sbd.configure,configure SBD]] ==== `configure` @@ -2136,6 +2141,7 @@ - Show contents of /etc/sysconfig/sbd - Show SBD related cluster properties - Update the SBD related configuration parameters +- NOTE: sbd crashdump is used for debugging. Understand the risks and run `crm sbd purge crashdump` afterward For more details on SBD and related parameters, please see man sbd(8). @@ -2143,11 +2149,11 @@ ............... # For disk-based SBD crm sbd configure show [disk_metadata|sysconfig|property] -crm sbd configure [watchdog-timeout=<integer>] [allocate-timeout=<integer>] [loop-timeout=<integer>] [msgwait-timeout=<integer>] [watchdog-device=<device>] +crm sbd configure [watchdog-timeout=<integer>] [allocate-timeout=<integer>] [loop-timeout=<integer>] [msgwait-timeout=<integer>] [crashdump-watchdog-timeout=<integer>] [watchdog-device=<device>] # For disk-less SBD crm sbd configure show [sysconfig|property] -crm sbd configure [watchdog-timeout=<integer>] [watchdog-device=<device>] +crm sbd configure [watchdog-timeout=<integer>] [crashdump-watchdog-timeout=<integer>] [watchdog-device=<device>] ............... example: @@ -2189,10 +2195,13 @@ Disable the systemd sbd.service on all cluster nodes, move the sbd sysconfig to .bak and adjust SBD related cluster properties. +If `crashdump` is specified, the crashdump related configurations will be +removed. Usage: ............... purge +purge crashdump ............... [[cmdhelp.node,Node management]] diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/crmsh-5.0.0+20250403.1442a04a/test/features/sbd_ui.feature new/crmsh-5.0.0+20250418.bb6c26c2/test/features/sbd_ui.feature --- old/crmsh-5.0.0+20250403.1442a04a/test/features/sbd_ui.feature 2025-04-03 10:05:11.000000000 +0200 +++ new/crmsh-5.0.0+20250418.bb6c26c2/test/features/sbd_ui.feature 2025-04-18 16:21:27.000000000 +0200 @@ -38,7 +38,7 @@ When Try "crm sbd configure watchdog-timeout=f" Then Except "ERROR: Invalid timeout value: f" When Try "crm sbd configure name=testing" - Then Except "ERROR: Unknown argument: name=testing" + Then Except "ERROR: Invalid argument: name=testing" When Try "crm sbd device add /dev/sda6 /dev/sda6" Then Expected "Duplicated device path detected" in stderr When Try "crm sbd device add /dev/sda6 /dev/sda7 /dev/sda8" @@ -49,6 +49,26 @@ When Run "crm sbd configure watchdog-timeout=30 msgwait-timeout=60" on "hanode1" Then Run "crm sbd configure show disk_metadata|grep -E "watchdog.*30"" OK Then Run "crm sbd configure show disk_metadata|grep -E "msgwait.*60"" OK + When Run "crm sbd configure watchdog-timeout=30 msgwait-timeout=60" on "hanode1" + Then Expected "No change in SBD configuration" in stdout + + Scenario: sbd configure for crashdump, disk-based sbd case + When Run "crm sbd configure crashdump-watchdog-timeout=60" on "hanode1" + Then Run "crm sbd configure show disk_metadata|grep -E "watchdog.*30"" OK + Then Run "crm sbd configure show disk_metadata|grep -E "msgwait.*120"" OK + Then Run "crm configure show stonith-sbd|grep crashdump=1" OK + Then Run "crm sbd configure show sysconfig |grep SBD_TIMEOUT_ACTION=flush,crashdump" OK + Then Run "crm sbd configure show sysconfig |grep "SBD_OPTS=\"-C 60\""" OK + When Run "crm sbd configure crashdump-watchdog-timeout=60" on "hanode1" + Then Expected "No change in SBD configuration" in stdout + # Purge crashdump + When Run "crm sbd purge crashdump" on "hanode1" + And Try "crm configure show stonith-sbd|grep crashdump" + Then Expected return code is "1" + When Try "crm sbd configure show sysconfig |grep SBD_TIMEOUT_ACTION=flush,crashdump" + Then Expected return code is "1" + When Try "crm sbd configure show sysconfig |grep "SBD_OPTS=\"-C 60\""" + Then Expected return code is "1" Scenario: sbd device add and remove # Add a sbd disk @@ -57,7 +77,7 @@ Then Run "crm sbd configure show sysconfig|grep -E "SBD_DEVICE=\"/dev/sda5;/dev/sda6\""" OK Then Run "crm sbd configure show sysconfig|grep -E "SBD_DEVICE=\"/dev/sda5;/dev/sda6\""" OK on "hanode2" And Run "crm sbd configure show disk_metadata |grep -A 8 '/dev/sda6'|grep -E "watchdog.*30"" OK - And Run "crm sbd configure show disk_metadata |grep -A 8 '/dev/sda6'|grep -E "msgwait.*60"" OK + And Run "crm sbd configure show disk_metadata |grep -A 8 '/dev/sda6'|grep -E "msgwait.*120"" OK When Run "crm cluster restart --all" on "hanode1" And Wait for DC # Remove a sbd disk @@ -71,7 +91,7 @@ Then Run "crm sbd configure show sysconfig|grep -E "SBD_DEVICE=\"/dev/sda6;/dev/sda7\""" OK Then Run "crm sbd configure show sysconfig|grep -E "SBD_DEVICE=\"/dev/sda6;/dev/sda7\""" OK on "hanode2" And Run "crm sbd configure show disk_metadata |grep -A 8 '/dev/sda7'|grep -E "watchdog.*30"" OK - And Run "crm sbd configure show disk_metadata |grep -A 8 '/dev/sda7'|grep -E "msgwait.*60"" OK + And Run "crm sbd configure show disk_metadata |grep -A 8 '/dev/sda7'|grep -E "msgwait.*120"" OK When Run "crm cluster restart --all" on "hanode1" And Wait for DC # Purge sbd from cluster @@ -93,6 +113,20 @@ # Shoud not has any sbd device configured When Try "crm sbd configure show sysconfig|grep -E "SBD_DEVICE=.+"" Then Expected return code is "1" + # enable crashdump + Then Run "crm sbd configure show sysconfig |grep SBD_WATCHDOG_TIMEOUT=15" OK + When Run "crm sbd configure crashdump-watchdog-timeout=60" on "hanode1" + Then Run "crm sbd configure show sysconfig |grep SBD_TIMEOUT_ACTION=flush,crashdump" OK + Then Run "crm sbd configure show sysconfig |grep "SBD_OPTS=\"-C 60 -Z\""" OK + Then Run "crm sbd configure show property |grep stonith-watchdog-timeout=75" OK + When Run "crm sbd configure crashdump-watchdog-timeout=60" on "hanode1" + Then Expected "No change in SBD configuration" in stdout + # Purge crashdump + When Run "crm sbd purge crashdump" on "hanode1" + When Try "crm sbd configure show sysconfig |grep SBD_TIMEOUT_ACTION=flush,crashdump" + Then Expected return code is "1" + When Try "crm sbd configure show sysconfig |grep "SBD_OPTS=\"-C 60 -Z\""" + Then Expected return code is "1" # Purge sbd from cluster When Run "crm sbd purge" on "hanode1" And Run "crm cluster restart --all" on "hanode1" diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/crmsh-5.0.0+20250403.1442a04a/test/unittests/test_sbd.py new/crmsh-5.0.0+20250418.bb6c26c2/test/unittests/test_sbd.py --- old/crmsh-5.0.0+20250403.1442a04a/test/unittests/test_sbd.py 2025-04-03 10:05:11.000000000 +0200 +++ new/crmsh-5.0.0+20250418.bb6c26c2/test/unittests/test_sbd.py 2025-04-18 16:21:27.000000000 +0200 @@ -97,6 +97,18 @@ result = SBDUtils.get_sbd_device_from_config() self.assertEqual(result, ['/dev/sbd_device', '/dev/another_sbd_device']) + @patch('crmsh.sbd.SBDUtils.get_sbd_value_from_config') + def test_get_crashdump_watchdog_timeout_none(self, mock_get_sbd_value_from_config): + mock_get_sbd_value_from_config.return_value = None + result = SBDUtils.get_crashdump_watchdog_timeout() + self.assertIsNone(result) + + @patch('crmsh.sbd.SBDUtils.get_sbd_value_from_config') + def test_get_crashdump_watchdog_timeout(self, mock_get_sbd_value_from_config): + mock_get_sbd_value_from_config.return_value = "-C 60 -Z" + result = SBDUtils.get_crashdump_watchdog_timeout() + self.assertEqual(result, 60) + @patch('crmsh.sbd.SBDUtils.get_sbd_device_from_config') @patch('crmsh.service_manager.ServiceManager.service_is_active') def test_is_using_diskless_sbd(self, mock_service_is_active, mock_get_sbd_device_from_config): @@ -401,7 +413,11 @@ mock_CrmMonXmlParser.return_value.is_any_resource_running.return_value = True SBDManager.restart_cluster_if_possible() mock_ServiceManager.return_value.service_is_active.assert_called_once_with(constants.PCMK_SERVICE) - mock_logger_warning.assert_called_once_with("Resource is running, need to restart cluster service manually on each node") + mock_logger_warning.assert_has_calls([ + call("Resource is running, need to restart cluster service manually on each node"), + call("Or, run with `crm -F` or `--force` option, the `sbd` subcommand will leverage maintenance mode for any changes that require restarting sbd.service"), + call("Understand risks that running RA has no cluster protection while the cluster is in maintenance mode and restarting") + ]) @patch('crmsh.bootstrap.restart_cluster') @patch('logging.Logger.warning') diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/crmsh-5.0.0+20250403.1442a04a/test/unittests/test_ui_sbd.py new/crmsh-5.0.0+20250418.bb6c26c2/test/unittests/test_ui_sbd.py --- old/crmsh-5.0.0+20250403.1442a04a/test/unittests/test_ui_sbd.py 2025-04-03 10:05:11.000000000 +0200 +++ new/crmsh-5.0.0+20250418.bb6c26c2/test/unittests/test_ui_sbd.py 2025-04-18 16:21:27.000000000 +0200 @@ -90,7 +90,7 @@ mock_ServiceManager.return_value.service_is_active.side_effect = [True, True] mock_is_using_disk_based_sbd.return_value = False mock_is_using_diskless_sbd.return_value = True - self.assertEqual(ui_sbd.sbd_configure_completer(["configure", ""]), ["show", "watchdog-timeout=", "watchdog-device="]) + self.assertEqual(ui_sbd.sbd_configure_completer(["configure", ""]), ["show", "watchdog-timeout=", "crashdump-watchdog-timeout=", "watchdog-device="]) mock_ServiceManager.return_value.service_is_active.assert_has_calls([ mock.call(constants.PCMK_SERVICE), mock.call(constants.SBD_SERVICE) @@ -115,7 +115,7 @@ mock_get_sbd_device_from_config.return_value = ["/dev/sda1"] mock_get_watchdog_device_from_sbd_config.return_value = "/dev/watchdog0" mock_get_sbd_watchdog_timeout.return_value = 10 - mock_get_sbd_device_metadata.return_value = {"watchdog": 10, "msgwait": 20} + mock_get_sbd_device_metadata.return_value = {"watchdog": 10, "allocate": 5, "loop": 5, "msgwait": 20} self.sbd_instance_diskbased = ui_sbd.SBD() self.sbd_instance_diskbased._load_attributes() @@ -198,14 +198,14 @@ def test_do_configure_no_service(self): self.sbd_instance_diskbased.service_is_active = mock.Mock(return_value=False) - res = self.sbd_instance_diskbased.do_configure(mock.Mock()) + res = self.sbd_instance_diskbased.do_configure(mock.Mock(), "test") self.assertFalse(res) @mock.patch('crmsh.sbd.SBDTimeout.get_sbd_systemd_start_timeout') @mock.patch('logging.Logger.info') @mock.patch('builtins.print') def test_show_property(self, mock_print, mock_logger_info, mock_get_sbd_systemd_start_timeout): - data = """property cib-bootstrap-options: \ + data1 = """property cib-bootstrap-options: \ have-watchdog=true \ dc-version="2.1.7+20240711.239cba384-1.1-2.1.7+20240711.239cba384" \ cluster-infrastructure=corosync \ @@ -214,18 +214,21 @@ stonith-timeout=83 \ priority-fencing-delay=60 """ - self.sbd_instance_diskbased.cluster_shell.get_stdout_or_raise_error = mock.Mock(return_value=data) + data2 = "fence_sbd parameters" + self.sbd_instance_diskbased.cluster_shell.get_stdout_or_raise_error = mock.Mock(side_effect=[data1, data2]) mock_get_sbd_systemd_start_timeout.return_value = 10 self.sbd_instance_diskbased._show_property() mock_logger_info.assert_has_calls([ mock.call("crm sbd configure show property"), + mock.call("%s", "crm configure show related:fence_sbd"), mock.call("%s", sbd.SBDTimeout.SHOW_SBD_START_TIMEOUT_CMD) ]) mock_print.assert_has_calls([ mock.call("have-watchdog=true"), mock.call("stonith-enabled=true"), mock.call("stonith-timeout=83"), - mock.call("priority-fencing-delay=60"), + mock.call(), + mock.call("fence_sbd parameters"), mock.call(), mock.call(f"TimeoutStartUSec=10") ]) @@ -255,6 +258,19 @@ self.sbd_instance_diskbased._configure_show(["show", "property"]) self.sbd_instance_diskbased._show_property.assert_called_once() + def test_parse_re(self): + test_data = [ + ("watchdog-timeout=30", ("watchdog", "timeout", "30")), + ("crashdump-watchdog-timeout=120", ("crashdump-watchdog", "timeout", "120")), + ("watchdog-device=/dev/watchdog", ("watchdog", "device", "/dev/watchdog")), + ("loop-timeout=5", ("loop", "timeout", "5")), + ("msgwait-timeout=10", ("msgwait", "timeout", "10")), + ] + for input_str, expected in test_data: + match = ui_sbd.SBD.PARSE_RE.match(input_str) + self.assertIsNotNone(match) + self.assertEqual(match.groups(), expected) + @mock.patch('crmsh.ui_sbd.SBD._show_sysconfig') @mock.patch('builtins.print') def test_configure_show(self, mock_print, mock_show_sysconfig): @@ -273,11 +289,6 @@ self.sbd_instance_diskbased._parse_args(["watchdog-timeout=xxx"]) self.assertEqual(str(e.exception), "Invalid timeout value: xxx") - def test_parse_args_unknown_arg(self): - with self.assertRaises(ui_sbd.SBD.SyntaxError) as e: - self.sbd_instance_diskbased._parse_args(["name=xin"]) - self.assertEqual(str(e.exception), "Unknown argument: name=xin") - @mock.patch('logging.Logger.debug') @mock.patch('crmsh.watchdog.Watchdog.get_watchdog_device') def test_parse_args(self, mock_get_watchdog_device, mock_logger_debug): @@ -299,60 +310,139 @@ res = ui_sbd.SBD._adjust_timeout_dict(timeout_dict) self.assertEqual(res, {"watchdog": 5, "msgwait": 10}) + @mock.patch('logging.Logger.error') + @mock.patch('crmsh.cibquery.get_primitives_with_ra') + @mock.patch('crmsh.cibquery.ResourceAgent') + @mock.patch('crmsh.xmlutil.text2elem') + def test_set_crashdump_option_exception(self, mock_text2elem, mock_ResourceAgent, mock_get_primitives_with_ra, mock_logger_error): + self.sbd_instance_diskbased.cluster_shell.get_stdout_or_raise_error = mock.Mock(return_value="<dummy></dummy>") + mock_text2elem.return_value = "dummy" + mock_ra_instance = mock.Mock() + mock_ResourceAgent.return_value = mock_ra_instance + mock_get_primitives_with_ra.return_value = [] + + with self.assertRaises(ui_sbd.SBD.MissingRequiredException): + self.sbd_instance_diskbased._set_crashdump_option() + + self.sbd_instance_diskbased.cluster_shell.get_stdout_or_raise_error.assert_called_once_with("crm configure show xml") + mock_logger_error.assert_called_once_with("No fence_sbd resource found") + + @mock.patch('logging.Logger.info') + @mock.patch('crmsh.utils.is_boolean_false') + @mock.patch('crmsh.cibquery.get_parameter_value') + @mock.patch('crmsh.cibquery.get_primitives_with_ra') + @mock.patch('crmsh.cibquery.ResourceAgent') + @mock.patch('crmsh.xmlutil.text2elem') + def test_set_crashdump_option(self, mock_text2elem, mock_ResourceAgent, mock_get_primitives_with_ra, mock_get_parameter_value, mock_is_boolean_false, mock_logger_info): + self.sbd_instance_diskbased.cluster_shell.get_stdout_or_raise_error = mock.Mock(side_effect=["<dummy></dummy>", ""]) + mock_text2elem.return_value = "dummy" + mock_ra_instance = mock.Mock() + mock_ResourceAgent.return_value = mock_ra_instance + mock_get_primitives_with_ra.return_value = ["fence_sbd"] + mock_get_parameter_value.return_value = None + mock_is_boolean_false.return_value = True + + self.sbd_instance_diskbased._set_crashdump_option() + mock_logger_info.assert_called_once_with("Set crashdump option for fence_sbd resource") + + @mock.patch('logging.Logger.info') + @mock.patch('crmsh.utils.is_boolean_false') + @mock.patch('crmsh.cibquery.get_parameter_value') + @mock.patch('crmsh.cibquery.get_primitives_with_ra') + @mock.patch('crmsh.cibquery.ResourceAgent') + @mock.patch('crmsh.xmlutil.text2elem') + def test_set_crashdump_option_delete(self, mock_text2elem, mock_ResourceAgent, mock_get_primitives_with_ra, mock_get_parameter_value, mock_is_boolean_false, mock_logger_info): + self.sbd_instance_diskbased.cluster_shell.get_stdout_or_raise_error = mock.Mock(side_effect=["<dummy></dummy>", ""]) + mock_text2elem.return_value = "dummy" + mock_ra_instance = mock.Mock() + mock_ResourceAgent.return_value = mock_ra_instance + mock_get_primitives_with_ra.return_value = ["fence_sbd"] + mock_get_parameter_value.return_value = None + mock_is_boolean_false.return_value = False + + self.sbd_instance_diskbased._set_crashdump_option(delete=True) + mock_logger_info.assert_called_once_with("Delete crashdump option for fence_sbd resource") + + @mock.patch('logging.Logger.warning') + def test_check_kdump_service(self, mock_logger_warning): + self.sbd_instance_diskbased.service_manager.service_is_active = mock.Mock(side_effect=[True, False]) + self.sbd_instance_diskbased._check_kdump_service() + mock_logger_warning.assert_has_calls([ + mock.call("Kdump service is not active on %s", "node2"), + mock.call("Kdump service is required for crashdump") + ]) + + def test_should_configure_crashdump_no_set(self): + self.sbd_instance_diskbased.crashdump_watchdog_timeout_from_config = None + res = self.sbd_instance_diskbased._should_configure_crashdump(None, None) + self.assertFalse(res) + + def test_should_configure_crashdump(self): + self.sbd_instance_diskbased.crashdump_watchdog_timeout_from_config = 1 + res = self.sbd_instance_diskbased._should_configure_crashdump(10, None) + self.assertTrue(res) + @mock.patch("crmsh.ui_sbd.SBD.configure_usage", new_callable=mock.PropertyMock) @mock.patch('builtins.print') @mock.patch('logging.Logger.error') def test_do_configure_no_args(self, mock_logger_error, mock_print, mock_configure_usage): - self.sbd_instance_diskbased.service_is_active = mock.Mock(side_effect=[True, True]) + self.sbd_instance_diskbased._load_attributes = mock.Mock() mock_configure_usage.return_value = "usage data" res = self.sbd_instance_diskbased.do_configure(mock.Mock()) self.assertFalse(res) mock_logger_error.assert_called_once_with('%s', "No argument") mock_print.assert_called_once_with("usage data") + @mock.patch('logging.Logger.info') @mock.patch('crmsh.sbd.SBDManager') - def test_configure_diskbase(self, mock_SBDManager): - parameter_dict = {"watchdog": 12, "watchdog-device": "/dev/watchdog100"} - self.sbd_instance_diskbased._adjust_timeout_dict = mock.Mock(return_value=parameter_dict) + def test_configure_diskbase(self, mock_SBDManager, mock_logger_info): + parameter_dict = {"watchdog": 12, "watchdog-device": "/dev/watchdog100", "crashdump-watchdog": 12} + self.sbd_instance_diskbased._should_configure_crashdump = mock.Mock(return_value=True) + self.sbd_instance_diskbased._check_kdump_service = mock.Mock() + self.sbd_instance_diskbased._set_crashdump_option = mock.Mock() + self.sbd_instance_diskbased._set_crashdump_in_sysconfig = mock.Mock(return_value={"SBD_TIMEOUT_ACTION": "flush,crashdump", "SBD_OPTS": "-C 12"}) mock_SBDManager.return_value.init_and_deploy_sbd = mock.Mock() self.sbd_instance_diskbased._configure_diskbase(parameter_dict) mock_SBDManager.assert_called_once_with( device_list_to_init=self.sbd_instance_diskbased.device_list_from_config, - timeout_dict={"watchdog": 12, "msgwait": 20, "watchdog-device": "/dev/watchdog100"}, - update_dict={ - "SBD_WATCHDOG_DEV": "/dev/watchdog100" - } + timeout_dict={'watchdog': 12, 'allocate': 5, 'loop': 5, 'msgwait': 36}, + update_dict={'SBD_TIMEOUT_ACTION': 'flush,crashdump', 'SBD_OPTS': '-C 12', 'SBD_WATCHDOG_DEV': '/dev/watchdog100'} ) mock_SBDManager.return_value.init_and_deploy_sbd.assert_called_once() + self.sbd_instance_diskbased._check_kdump_service.assert_called_once() + self.sbd_instance_diskbased._set_crashdump_option.assert_called_once() @mock.patch('logging.Logger.info') - @mock.patch('crmsh.utils.is_subdict') @mock.patch('crmsh.sbd.SBDManager') - def test_configure_diskbase_no_change(self, mock_SBDManager, mock_is_subdict, mock_logger_info): - parameter_dict = {"watchdog": 10, "watchdog-device": "/dev/watchdog0"} - mock_is_subdict.return_value = True + def test_configure_diskbase_no_change(self, mock_SBDManager, mock_logger_info): + parameter_dict = {"msgwait": 20, "watchdog": 10, "watchdog-device": "/dev/watchdog0"} + self.sbd_instance_diskbased._should_configure_crashdump = mock.Mock(return_value=False) self.sbd_instance_diskbased._configure_diskbase(parameter_dict) mock_logger_info.assert_called_once_with("No change in SBD configuration") + @mock.patch('logging.Logger.info') @mock.patch('crmsh.sbd.SBDManager') - def test_configure_diskless(self, mock_SBDManager): - parameter_dict = {"watchdog": 12, "watchdog-device": "/dev/watchdog100"} - self.sbd_instance_diskless._adjust_timeout_dict = mock.Mock(return_value=parameter_dict) + def test_configure_diskless(self, mock_SBDManager, mock_logger_info): + parameter_dict = {"watchdog": 12, "watchdog-device": "/dev/watchdog100", "crashdump-watchdog": 12} + self.sbd_instance_diskless._should_configure_crashdump = mock.Mock(return_value=True) + self.sbd_instance_diskless._check_kdump_service = mock.Mock() + self.sbd_instance_diskless._check_kdump_service = mock.Mock() + self.sbd_instance_diskless._set_crashdump_in_sysconfig = mock.Mock(return_value={"SBD_TIMEOUT_ACTION": "flush,crashdump", "SBD_OPTS": "-C 12 -Z"}) mock_SBDManager.return_value.init_and_deploy_sbd = mock.Mock() self.sbd_instance_diskless._configure_diskless(parameter_dict) mock_SBDManager.assert_called_once_with( - update_dict={ - "SBD_WATCHDOG_DEV": "/dev/watchdog100", - "SBD_WATCHDOG_TIMEOUT": "12" - }, + timeout_dict={'stonith-watchdog': 24}, + update_dict={'SBD_WATCHDOG_TIMEOUT': '12', 'SBD_WATCHDOG_DEV': '/dev/watchdog100', 'SBD_TIMEOUT_ACTION': 'flush,crashdump', 'SBD_OPTS': '-C 12 -Z'}, diskless_sbd=True ) mock_SBDManager.return_value.init_and_deploy_sbd.assert_called_once() + self.sbd_instance_diskless._check_kdump_service.assert_called_once() @mock.patch('logging.Logger.info') @mock.patch('crmsh.sbd.SBDManager') def test_configure_diskless_no_change(self, mock_SBDManager, mock_logger_info): parameter_dict = {"watchdog": 10, "watchdog-device": "/dev/watchdog0"} + self.sbd_instance_diskless._should_configure_crashdump = mock.Mock(return_value=False) self.sbd_instance_diskless._configure_diskless(parameter_dict) mock_logger_info.assert_called_once_with("No change in SBD configuration") @@ -447,6 +537,7 @@ def test_do_device_add(self, mock_is_using_disk_based_sbd, mock_logger_info): mock_is_using_disk_based_sbd.return_value = True self.sbd_instance_diskbased.service_is_active = mock.Mock(return_value=True) + self.sbd_instance_diskbased._load_attributes = mock.Mock() self.sbd_instance_diskbased._device_add = mock.Mock() res = self.sbd_instance_diskbased.do_device(mock.Mock(), "add", "/dev/sda2;/dev/sda3") self.assertTrue(res) @@ -458,6 +549,7 @@ def test_do_device_remove(self, mock_is_using_disk_based_sbd, mock_logger_info): mock_is_using_disk_based_sbd.return_value = True self.sbd_instance_diskbased.service_is_active = mock.Mock(return_value=True) + self.sbd_instance_diskbased._load_attributes = mock.Mock() self.sbd_instance_diskbased._device_remove = mock.Mock() res = self.sbd_instance_diskbased.do_device(mock.Mock(), "remove", "/dev/sda1") self.assertTrue(res) @@ -471,14 +563,12 @@ self.assertFalse(res) mock_purge_sbd_from_cluster.assert_not_called() - @mock.patch('crmsh.sbd.SBDManager.restart_cluster_if_possible') @mock.patch('crmsh.sbd.purge_sbd_from_cluster') - def test_do_purge(self, mock_purge_sbd_from_cluster, mock_restart_cluster_if_possible): + def test_do_purge(self, mock_purge_sbd_from_cluster): self.sbd_instance_diskbased.service_is_active = mock.Mock(return_value=True) res = self.sbd_instance_diskbased.do_purge(mock.Mock()) self.assertTrue(res) mock_purge_sbd_from_cluster.assert_called_once() - mock_restart_cluster_if_possible.assert_called_once() @mock.patch('crmsh.xmlutil.CrmMonXmlParser') def test_print_sbd_agent_status(self, mock_CrmMonXmlParser): diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/crmsh-5.0.0+20250403.1442a04a/test/unittests/test_utils.py new/crmsh-5.0.0+20250418.bb6c26c2/test/unittests/test_utils.py --- old/crmsh-5.0.0+20250403.1442a04a/test/unittests/test_utils.py 2025-04-03 10:05:11.000000000 +0200 +++ new/crmsh-5.0.0+20250418.bb6c26c2/test/unittests/test_utils.py 2025-04-18 16:21:27.000000000 +0200 @@ -1342,7 +1342,52 @@ mock_error.assert_called_once_with('Operation is denied. The current user lacks the necessary privilege.') -def test_is_subdict(): - d1 = {"a": 1, "b": 2} - d2 = {"a": 1} - assert utils.is_subdict(d2, d1) is True +@mock.patch('logging.Logger.warning') +@mock.patch('crmsh.utils.is_dc_idle') +def test_leverage_maintenance_mode_skip(mock_idle, mock_warn): + config.core.force = True + mock_idle.return_value = False + with utils.leverage_maintenance_mode() as result: + assert result is False + mock_warn.assert_called_once_with("Pacemaker state transition is in progress. Skip restarting cluster in maintenance mode.") + + +@mock.patch('crmsh.utils.delete_property') +@mock.patch('crmsh.utils.set_property') +@mock.patch('logging.Logger.info') +@mock.patch('crmsh.utils.is_dc_idle') +def test_leverage_maintenance_mode(mock_idle, mock_info, mock_set, mock_delete): + config.core.force = True + mock_idle.return_value = True + with utils.leverage_maintenance_mode() as result: + assert result is True + mock_set.assert_called_once_with("maintenance-mode", "true") + mock_delete.assert_called_once_with("maintenance-mode") + + +@mock.patch('crmsh.utils.get_dc') +def test_is_dc_idle_no_dc(mock_dc): + mock_dc.return_value = None + assert utils.is_dc_idle() is False + + +@mock.patch('logging.Logger.error') +@mock.patch('crmsh.utils.ShellUtils') +@mock.patch('crmsh.utils.get_dc') +def test_is_dc_idle_failed_get_dc_status(mock_dc, mock_shell, mock_error): + mock_dc.return_value = "test" + mock_shell_inst = mock.Mock() + mock_shell.return_value = mock_shell_inst + mock_shell_inst.get_stdout_stderr.return_value = (1, None, "error") + assert utils.is_dc_idle() is False + mock_error.assert_called_once_with("Failed to get DC status: %s", "error") + + +@mock.patch('crmsh.utils.ShellUtils') +@mock.patch('crmsh.utils.get_dc') +def test_is_dc_idle(mock_dc, mock_shell): + mock_dc.return_value = "test" + mock_shell_inst = mock.Mock() + mock_shell.return_value = mock_shell_inst + mock_shell_inst.get_stdout_stderr.return_value = (0, "in S_IDLE: ok", None) + assert utils.is_dc_idle() is True