On Tue, Nov 24, 2015 at 3:15 PM, 'Helga Velroyen' via ganeti-devel < [email protected]> wrote:
> In order to improve the runtime complexity of > 'renew-crypto', this patch adds a function to > bulk-remove SSH keys of nodes (in contrast to > the function that only removes one key at a time). > > Within this patch, it is only called in a unit > test. Further patches will integrate and test it > further. > > Signed-off-by: Helga Velroyen <[email protected]> > --- > lib/backend.py | 254 > +++++++++++++++++++++++++++++++++++++ > test/py/ganeti.backend_unittest.py | 31 +++++ > 2 files changed, 285 insertions(+) > > diff --git a/lib/backend.py b/lib/backend.py > index d470060..3d0443c 100644 > --- a/lib/backend.py > +++ b/lib/backend.py > @@ -1857,6 +1857,260 @@ def RemoveNodeSshKey(node_uuid, node_name, > return result_msgs > > > +# Node info named tuple specifically for the use with RemoveNodeSshKeyBulk > +SshRemoveNodeInfo = collections.namedtuple( > + "SshRemoveNodeInfo", > + ["uuid", > + "name", > + "from_authorized_keys", > + "from_public_keys", > + "clear_authorized_keys", > + "clear_public_keys"]) > + > + > +def RemoveNodeSshKeyBulk(node_list, > + master_candidate_uuids, > + potential_master_candidates, > + master_uuid=None, > + keys_to_remove=None, > + pub_key_file=pathutils.SSH_PUB_KEYS, > + ssconf_store=None, > + noded_cert_file=pathutils.NODED_CERT_FILE, > + readd=False, > + run_cmd_fn=ssh.RunSshCmdWithStdin): > + """Removes the node's SSH keys from the key files and distributes those. > + > + Note that at least one of the flags C{from_authorized_keys}, > + C{from_public_keys}, C{clear_authorized_keys}, and C{clear_public_keys} > + of at least one node has to be set to C{True} for the function to > perform any > + action at all. Not doing so will trigger an assertion in the function. + > + @type node_list: list of C{SshRemoveNodeInfo}. > + @param node_list: list of information about nodes whose keys are being > removed > + @type master_candidate_uuids: list of str > + @param master_candidate_uuids: list of UUIDs of the current master > candidates > + @type potential_master_candidates: list of str > + @param potential_master_candidates: list of names of potential master > + candidates > + @type keys_to_remove: dict of str to list of str > + @param keys_to_remove: a dictionary mapping node UUIDS to lists of SSH > keys > + to be removed. This list is supposed to be used only if the keys are > not > + in the public keys file. This is for example the case when removing a > + master node's key. > + @type readd: boolean > + @param readd: whether this is called during a readd operation. > + @rtype: list of string > + @returns: list of feedback messages > + > + """ > + # Non-disruptive error messages, list of (node, msg) pairs > + result_msgs = [] > + > + # whether there are any keys to be added or retrieved at all > + from_authorized_keys = any([node_info.from_authorized_keys for > node_info in > + node_list]) > + from_public_keys = any([node_info.from_public_keys for node_info in > + node_list]) > + clear_authorized_keys = any([node_info.clear_authorized_keys for > node_info in > + node_list]) > + clear_public_keys = any([node_info.clear_public_keys for node_info in > + node_list]) > + > + # Make sure at least one of these flags is true. > + if not (from_authorized_keys or from_public_keys or > clear_authorized_keys > + or clear_public_keys): > + raise errors.SshUpdateError("No removal from any key file was > requested.") > + > + if not ssconf_store: > + ssconf_store = ssconf.SimpleStore() > + > + master_node = ssconf_store.GetMasterNode() > + ssh_port_map = ssconf_store.GetSshPortMap() > + > + if from_authorized_keys or from_public_keys: > + all_keys_to_remove = {} > + for node_info in node_list: > + > + if keys_to_remove: > + keys = keys_to_remove > As you only have one dict of keys_to_remove, you could already initialize all_keys_to_remove with that. > + else: > + keys = ssh.QueryPubKeyFile([node_info.uuid], > key_file=pub_key_file) > + if (not keys or node_info.uuid not in keys) and not readd: > + raise errors.SshUpdateError("Node '%s' not found in the list of" > + " public SSH keys. It seems someone" > + " tries to remove a key from > outside" > + " the cluster!" % node_info.uuid) > + # During an upgrade all nodes have the master key. In this case we > + # should not remove it to avoid accidentally shutting down cluster > + # SSH communication > + master_keys = None > + if master_uuid: > + master_keys = ssh.QueryPubKeyFile([master_uuid], > + key_file=pub_key_file) > + for master_key in master_keys: > + if master_key in keys[node_info.uuid]: > + keys[node_info.uuid].remove(master_key) > + > + if node_info.name == master_node and not keys_to_remove: > + raise errors.SshUpdateError("Cannot remove the master node's > keys.") > I would move this at the beginning of the for loop to fail early. > + > + all_keys_to_remove.update(keys) > + > + if all_keys_to_remove: > + base_data = {} > + _InitSshUpdateData(base_data, noded_cert_file, ssconf_store) > + cluster_name = base_data[constants.SSHS_CLUSTER_NAME] > + > + if from_authorized_keys: > + # UUIDs of nodes that are supposed to be removed from the > + # authorized_keys files. > + nodes_remove_from_authorized_keys = [ > + node_info.uuid for node_info in node_list > + if node_info.from_authorized_keys] > + keys_to_remove_from_authorized_keys = dict([ > + (uuid, keys) for (uuid, keys) in all_keys_to_remove.items() > + if uuid in nodes_remove_from_authorized_keys]) > + base_data[constants.SSHS_SSH_AUTHORIZED_KEYS] = \ > + (constants.SSHS_REMOVE, keys_to_remove_from_authorized_keys) > + (auth_key_file, _) = \ > + ssh.GetAllUserFiles(constants.SSH_LOGIN_USER, mkdir=False, > + dircheck=False) > + > + for uuid in nodes_remove_from_authorized_keys: > + ssh.RemoveAuthorizedKeys(auth_key_file, > + > keys_to_remove_from_authorized_keys[uuid]) > + > + pot_mc_data = base_data.copy() > + > + if from_public_keys: > + nodes_remove_from_public_keys = [ > + node_info.uuid for node_info in node_list > + if node_info.from_public_keys] > + keys_to_remove_from_public_keys = dict([ > + (uuid, keys) for (uuid, keys) in all_keys_to_remove.items() > + if uuid in nodes_remove_from_public_keys]) > + pot_mc_data[constants.SSHS_SSH_PUBLIC_KEYS] = \ > + (constants.SSHS_REMOVE, keys_to_remove_from_public_keys) > + > + all_nodes = ssconf_store.GetNodeList() > + online_nodes = ssconf_store.GetOnlineNodeList() > + all_nodes_to_remove = [node_info.name for node_info in node_list] > + logging.debug("Removing keys of nodes '%s' from all nodes but > itself and" > + " master.", ", ".join(all_nodes_to_remove)) > + for node in all_nodes: > + if node == master_node: > + logging.debug("Skipping master node '%s'.", master_node) > + continue > + if node not in online_nodes: > + logging.debug("Skipping offline node '%s'.", node) > + continue > + if node in all_nodes_to_remove: > + logging.debug("Skipping node whose key is removed itself > '%s'.", node) + continue > + ssh_port = ssh_port_map.get(node) > + if not ssh_port: > + raise errors.OpExecError("No SSH port information available for" > + " node '%s', map: %s." % > + (node, ssh_port_map)) > + error_msg_final = ("When removing the key of node '%s', updating > the" > + " SSH key files of node '%s' failed. Last > error" > + " was: %s.") > + if node in potential_master_candidates: > + logging.debug("Updating key setup of potential master candidate > node" > + " %s.", node) > + try: > + utils.RetryByNumberOfTimes( > + constants.SSHS_MAX_RETRIES, > + errors.SshUpdateError, > + run_cmd_fn, cluster_name, node, pathutils.SSH_UPDATE, > + ssh_port, pot_mc_data, > + debug=False, verbose=False, use_cluster_key=False, > + ask_key=False, strict_host_check=False) > + except errors.SshUpdateError as last_exception: > + error_msg = error_msg_final % ( > + node_info.name, node, last_exception) > + result_msgs.append((node, error_msg)) > + logging.error(error_msg) > + > + else: > + if from_authorized_keys: > + logging.debug("Updating key setup of normal node %s.", node) > + try: > + utils.RetryByNumberOfTimes( > + constants.SSHS_MAX_RETRIES, > + errors.SshUpdateError, > + run_cmd_fn, cluster_name, node, pathutils.SSH_UPDATE, > + ssh_port, base_data, > + debug=False, verbose=False, use_cluster_key=False, > + ask_key=False, strict_host_check=False) > + except errors.SshUpdateError as last_exception: > + error_msg = error_msg_final % ( > + node_info.name, node, last_exception) > + result_msgs.append((node, error_msg)) > + logging.error(error_msg) > + > + for node_info in node_list: > + if node_info.clear_authorized_keys or node_info.from_public_keys or \ > + node_info.clear_public_keys: > + data = {} > + _InitSshUpdateData(data, noded_cert_file, ssconf_store) > + cluster_name = data[constants.SSHS_CLUSTER_NAME] > + ssh_port = ssh_port_map.get(node_info.name) > + if not ssh_port: > + raise errors.OpExecError("No SSH port information available for" > + " node '%s', which is leaving the > cluster.") > + > + if node_info.clear_authorized_keys: > + # The 'authorized_keys' file is not solely managed by Ganeti. > Therefore, > + # we have to specify exactly which keys to clear to leave keys > untouched > + # that were not added by Ganeti. > + other_master_candidate_uuids = [uuid for uuid in > master_candidate_uuids > + if uuid != node_info.uuid] > + candidate_keys = ssh.QueryPubKeyFile(other_master_candidate_uuids, > + key_file=pub_key_file) > + data[constants.SSHS_SSH_AUTHORIZED_KEYS] = \ > + (constants.SSHS_REMOVE, candidate_keys) > + > + if node_info.clear_public_keys: > + data[constants.SSHS_SSH_PUBLIC_KEYS] = \ > + (constants.SSHS_CLEAR, {}) > + elif node_info.from_public_keys: > + # Since clearing the public keys subsumes removing just a single > key, > + # we only do it of clear_public_keys is 'False'. > s/of/if > + > + if all_keys_to_remove: > + data[constants.SSHS_SSH_PUBLIC_KEYS] = \ > + (constants.SSHS_REMOVE, all_keys_to_remove) > + > + # If we have no changes to any keyfile, just return > + if not (constants.SSHS_SSH_PUBLIC_KEYS in data or > + constants.SSHS_SSH_AUTHORIZED_KEYS in data): > + return > + > + logging.debug("Updating SSH key setup of target node '%s'.", > + node_info.name) > + try: > + utils.RetryByNumberOfTimes( > + constants.SSHS_MAX_RETRIES, > + errors.SshUpdateError, > + run_cmd_fn, cluster_name, node_info.name, > pathutils.SSH_UPDATE, > + ssh_port, data, > + debug=False, verbose=False, use_cluster_key=False, > + ask_key=False, strict_host_check=False) > + except errors.SshUpdateError as last_exception: > + result_msgs.append( > + (node_info.name, > + ("Removing SSH keys from node '%s' failed." > + " This can happen when the node is already unreachable." > + " Error: %s" % (node_info.name, last_exception)))) > + > + if all_keys_to_remove and from_public_keys: > + for node_uuid in nodes_remove_from_public_keys: > + ssh.RemovePublicKey(node_uuid, key_file=pub_key_file) > + > + return result_msgs > + > + > def _GenerateNodeSshKey(node_uuid, node_name, ssh_port_map, > pub_key_file=pathutils.SSH_PUB_KEYS, > ssconf_store=None, > diff --git a/test/py/ganeti.backend_unittest.py b/test/py/ > ganeti.backend_unittest.py > index ab3de9e..484348f 100755 > --- a/test/py/ganeti.backend_unittest.py > +++ b/test/py/ganeti.backend_unittest.py > @@ -1367,6 +1367,37 @@ class > TestAddRemoveGenerateNodeSshKey(testutils.GanetiTestCase): > self.assertEqual(1, > len(self._ssh_file_manager.GetAuthorizedKeysOfNode(node_name))) > > + def testRemoveMasterCandidateBulk(self): > + node_list = [] > + key_map = {} > + for node_name, (node_uuid, node_key, _, _, _) in \ > + self._ssh_file_manager.GetAllMasterCandidates()[:3]: > + node_list.append(backend.SshRemoveNodeInfo(uuid=node_uuid, > + name=node_name, > + > from_authorized_keys=True, > + from_public_keys=True, > + > clear_authorized_keys=True, > + clear_public_keys=True)) > + key_map[node_name] = node_key > + > + backend.RemoveNodeSshKeyBulk(node_list, > + self._master_candidate_uuids, > + self._potential_master_candidates, > + pub_key_file=self._pub_key_file, > + ssconf_store=self._ssconf_mock, > + noded_cert_file=self.noded_cert_file, > + run_cmd_fn=self._run_cmd_mock) > + > + for node_info in node_list: > + self._ssh_file_manager.AssertNoNodeHasPublicKey( > + node_info.uuid, key_map[node_info.name]) > + self._ssh_file_manager.AssertNodeSetOnlyHasAuthorizedKey( > + [node_info.name], key_map[node_info.name]) > + self.assertEqual(0, > + len(self._ssh_file_manager.GetPublicKeysOfNode(node_info.name > ))) > + self.assertEqual(1, > + len(self._ssh_file_manager.GetAuthorizedKeysOfNode( > node_info.name))) > + > def testRemovePotentialMasterCandidate(self): > (node_name, node_info) = \ > self._ssh_file_manager.GetAllPurePotentialMasterCandidates()[0] > -- > 2.6.0.rc2.230.g3dd15c0 > > -- Lisa Velden Software Engineer [email protected] Google Germany GmbH Dienerstraße 12 80331 München Geschäftsführer: Matthew Scott Sucherman, Paul Terence Manicle Registergericht und -nummer: Hamburg, HRB 86891 Sitz der Gesellschaft: Hamburg
