This fixes issue 1131. 'gnt-cluster verify' should stop complaining about broken SSH setups of offline nodes.
Additionally, this fixes a problem when readding nodes. In some cases, Ganeti complains about a possible attack, which is a valid case for readding a node (if a key renew took place between offlining and readding the node). Signed-off-by: Helga Velroyen <[email protected]> --- lib/backend.py | 24 ++++++++++++++++-------- lib/cmdlib/cluster.py | 2 +- lib/cmdlib/node.py | 9 ++++++--- lib/rpc_defs.py | 4 +++- lib/server/noded.py | 5 +++-- test/py/ganeti.backend_unittest.py | 6 +++--- 6 files changed, 32 insertions(+), 18 deletions(-) diff --git a/lib/backend.py b/lib/backend.py index 5454aba..646fc70 100644 --- a/lib/backend.py +++ b/lib/backend.py @@ -985,11 +985,12 @@ def _VerifySshSetup(node_status_list, my_name, if node_status_list is None: return ["No node list to check against the pub_key_file received."] - my_status_list = [(my_uuid, name, mc, pot_mc) for (my_uuid, name, mc, pot_mc) + my_status_list = [(my_uuid, name, mc, pot_mc, online) for + (my_uuid, name, mc, pot_mc, online) in node_status_list if name == my_name] if len(my_status_list) == 0: return ["Cannot find node information for node '%s'." % my_name] - (my_uuid, _, _, potential_master_candidate) = \ + (my_uuid, _, _, potential_master_candidate, online) = \ my_status_list[0] result = [] @@ -1000,14 +1001,16 @@ def _VerifySshSetup(node_status_list, my_name, " [--no-ssh-key-check]' to fix this." % pub_key_file) return result - pot_mc_uuids = [uuid for (uuid, _, _, _) in node_status_list] + pot_mc_uuids = [uuid for (uuid, _, _, _, _) in node_status_list] + offline_nodes = [uuid for (uuid, _, _, _, online) in node_status_list + if not online] pub_keys = ssh.QueryPubKeyFile(None) if potential_master_candidate: # Check that the set of potential master candidates matches the # public key file - pub_uuids_set = set(pub_keys.keys()) - pot_mc_uuids_set = set(pot_mc_uuids) + pub_uuids_set = set(pub_keys.keys()) - set(offline_nodes) + pot_mc_uuids_set = set(pot_mc_uuids) - set(offline_nodes) missing_uuids = set([]) if pub_uuids_set != pot_mc_uuids_set: unknown_uuids = pub_uuids_set - pot_mc_uuids_set @@ -1044,7 +1047,9 @@ def _VerifySshSetup(node_status_list, my_name, # Check that all master candidate keys are in the authorized_keys file (auth_key_file, _) = \ ssh.GetAllUserFiles(constants.SSH_LOGIN_USER, mkdir=False, dircheck=False) - for (uuid, name, mc, _) in node_status_list: + for (uuid, name, mc, _, online) in node_status_list: + if not online: + continue if uuid in missing_uuids: continue if mc: @@ -1574,6 +1579,7 @@ def RemoveNodeSshKey(node_uuid, node_name, pub_key_file=pathutils.SSH_PUB_KEYS, ssconf_store=None, noded_cert_file=pathutils.NODED_CERT_FILE, + readd=False, run_cmd_fn=ssh.RunSshCmdWithStdin): """Removes the node's SSH keys from the key files and distributes those. @@ -1607,6 +1613,8 @@ def RemoveNodeSshKey(node_uuid, node_name, should be cleared on the node whose keys are removed @type clear_public_keys: boolean @param clear_public_keys: whether to clear the node's C{ganeti_pub_key} file + @type readd: boolean + @param readd: whether this is called during a readd operation. @rtype: list of string @returns: list of feedback messages @@ -1630,7 +1638,7 @@ def RemoveNodeSshKey(node_uuid, node_name, keys = keys_to_remove else: keys = ssh.QueryPubKeyFile([node_uuid], key_file=pub_key_file) - if not keys or node_uuid not in keys: + if (not keys or node_uuid not in keys) and not readd: raise errors.SshUpdateError("Node '%s' not found in the list of public" " SSH keys. It seems someone tries to" " remove a key from outside the cluster!" @@ -1648,7 +1656,7 @@ def RemoveNodeSshKey(node_uuid, node_name, if node_name == master_node and not keys_to_remove: raise errors.SshUpdateError("Cannot remove the master node's keys.") - if keys[node_uuid]: + if node_uuid in keys: base_data = {} _InitSshUpdateData(base_data, noded_cert_file, ssconf_store) cluster_name = base_data[constants.SSHS_CLUSTER_NAME] diff --git a/lib/cmdlib/cluster.py b/lib/cmdlib/cluster.py index dd758ec..cc2fa01 100644 --- a/lib/cmdlib/cluster.py +++ b/lib/cmdlib/cluster.py @@ -3436,7 +3436,7 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors): potential_master_candidates = self.cfg.GetPotentialMasterCandidates() node_status = [ (uuid, node_info.name, node_info.master_candidate, - node_info.name in potential_master_candidates) + node_info.name in potential_master_candidates, not node_info.offline) for (uuid, node_info) in all_nodes_info.items()] return node_status diff --git a/lib/cmdlib/node.py b/lib/cmdlib/node.py index 9c2ba21..6578741 100644 --- a/lib/cmdlib/node.py +++ b/lib/cmdlib/node.py @@ -359,7 +359,8 @@ class LUNodeAdd(LogicalUnit): True, # from authorized keys True, # from public keys False, # clear authorized keys - True) # clear public keys + True, # clear public keys + True) # it's a readd remove_result[master_node].Raise( "Could not remove SSH keys of node %s before readding," " (UUID: %s)." % (new_node_name, new_node_uuid)) @@ -878,7 +879,8 @@ class LUNodeSetParams(LogicalUnit): True, # remove node's key from all nodes' authorized_keys file False, # currently, all nodes are potential master candidates False, # do not clear node's 'authorized_keys' - False) # do not clear node's 'ganeti_pub_keys' + False, # do not clear node's 'ganeti_pub_keys' + False) # no readd ssh_result[master_node].Raise( "Could not adjust the SSH setup after demoting node '%s'" " (UUID: %s)." % (node.name, node.uuid)) @@ -1586,7 +1588,8 @@ class LUNodeRemove(LogicalUnit): self.node.master_candidate, # from_authorized_keys potential_master_candidate, # from_public_keys True, # clear node's 'authorized_keys' - True) # clear node's 'ganeti_public_keys' + True, # clear node's 'ganeti_public_keys' + False) # no readd result[master_node].Raise( "Could not remove the SSH key of node '%s' (UUID: %s)." % (self.op.node_name, self.node.uuid)) diff --git a/lib/rpc_defs.py b/lib/rpc_defs.py index 40feb0c..7660510 100644 --- a/lib/rpc_defs.py +++ b/lib/rpc_defs.py @@ -556,7 +556,9 @@ _NODE_CALLS = [ ("clear_authorized_keys", None, "If the 'authorized_keys' file of the node should be cleared."), ("clear_public_keys", None, - "If the 'ganeti_pub_keys' file of the node should be cleared.")], + "If the 'ganeti_pub_keys' file of the node should be cleared."), + ("readd", None, + "Whether this is a readd operation.")], None, None, "Remove a node's SSH key from the other nodes' key files."), ("node_ssh_keys_renew", MULTI, None, constants.RPC_TMO_SLOW, [ ("node_uuids", None, "UUIDs of the nodes whose key is renewed"), diff --git a/lib/server/noded.py b/lib/server/noded.py index 27f5128..bcdb000 100644 --- a/lib/server/noded.py +++ b/lib/server/noded.py @@ -950,14 +950,15 @@ class NodeRequestHandler(http.server.HttpServerHandler): (node_uuid, node_name, master_candidate_uuids, potential_master_candidates, from_authorized_keys, from_public_keys, clear_authorized_keys, - clear_public_keys) = params + clear_public_keys, readd) = params return backend.RemoveNodeSshKey(node_uuid, node_name, master_candidate_uuids, potential_master_candidates, from_authorized_keys=from_authorized_keys, from_public_keys=from_public_keys, clear_authorized_keys=clear_authorized_keys, - clear_public_keys=clear_public_keys) + clear_public_keys=clear_public_keys, + readd=readd) # cluster -------------------------- diff --git a/test/py/ganeti.backend_unittest.py b/test/py/ganeti.backend_unittest.py index c75d5b2..373adf0 100755 --- a/test/py/ganeti.backend_unittest.py +++ b/test/py/ganeti.backend_unittest.py @@ -1626,9 +1626,9 @@ class TestVerifySshSetup(testutils.GanetiTestCase): _NODE3_KEYS = ["key31"] _NODE_STATUS_LIST = [ - (_NODE1_UUID, _NODE1_NAME, True, True), - (_NODE2_UUID, _NODE2_NAME, False, True), - (_NODE3_UUID, _NODE3_NAME, False, False), + (_NODE1_UUID, _NODE1_NAME, True, True, True), + (_NODE2_UUID, _NODE2_NAME, False, True, True), + (_NODE3_UUID, _NODE3_NAME, False, False, True), ] _PUB_KEY_RESULT = { -- 2.6.0.rc2.230.g3dd15c0
