This happens because noded operates only on request data passed in the RPC payload for verify_node. This doesn't include the cluster-level ndparams. To fix this we need to make what's technically a protocol change by passing in the cluster-level ssh_port setting if available.
NB this is a backwards-compatible fix: if an old noded gets the extra data, it just ignores it, and if a new noded doesn't receive the data (either because of an old master node, or because the data isn't needed) it just falls back to the old default behaviour. This is fixed in a different way in 2.16 and later. Signed-off-by: Brian Foley <[email protected]> --- lib/backend.py | 3 +++ lib/cmdlib/cluster/verify.py | 12 +++++++++++- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/lib/backend.py b/lib/backend.py index 82aa493..cb680b6 100644 --- a/lib/backend.py +++ b/lib/backend.py @@ -1180,9 +1180,12 @@ def VerifyNode(what, cluster_name, all_hvparams, node_groups, groups_cfg): # Try to contact all nodes val = {} + cluster_default_ssh_port = what.get(constants.ND_SSH_PORT) for node in nodes: params = groups_cfg.get(node_groups.get(node)) ssh_port = params["ndparams"].get(constants.ND_SSH_PORT) + if ssh_port is not None: + ssh_port = cluster_default_ssh_port logging.debug("Ssh port %s (None = default) for node %s", str(ssh_port), node) diff --git a/lib/cmdlib/cluster/verify.py b/lib/cmdlib/cluster/verify.py index 77358dc..9f754f8 100644 --- a/lib/cmdlib/cluster/verify.py +++ b/lib/cmdlib/cluster/verify.py @@ -1881,11 +1881,21 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors): constants.NV_CLIENT_CERT: None, } - if self.cfg.GetClusterInfo().modify_ssh_setup: + ci = self.cfg.GetClusterInfo() + if ci.modify_ssh_setup: node_verify_param[constants.NV_SSH_SETUP] = self._PrepareSshSetupCheck() if self.op.verify_clutter: node_verify_param[constants.NV_SSH_CLUTTER] = True + # This is to work around the fact that noded's VerifyNode doesn't look up a + # node's ndparams using GetNdParams (which fills in the cluster defaults if + # the node params aren't set), but instead uses parameters explictly passed + # in the RPC call. This is fixed differently on >= 2.16. See #773. + if ci.ndparams: + cluster_default_ssh_port = ci.ndparams.get(constants.ND_SSH_PORT) + if cluster_default_ssh_port is not None: + node_verify_param[constants.ND_SSH_PORT] = cluster_default_ssh_port + if vg_name is not None: node_verify_param[constants.NV_VGLIST] = None node_verify_param[constants.NV_LVLIST] = vg_name -- 2.8.0.rc3.226.g39d4020
