LGTM overall; your call whether to apply my comment regarding the message prefixing the raising of master errors.
On Thu, Apr 23, 2015 at 12:49 PM, Helga Velroyen <[email protected]> wrote: > Hi! > > On Wed, 22 Apr 2015 at 18:41 Hrvoje Ribicic <[email protected]> wrote: > >> On Fri, Apr 17, 2015 at 4:37 PM, 'Helga Velroyen' via ganeti-devel < >> [email protected]> wrote: >> >>> This patch introduces retries for updating non-master nodes >>> when a new SSH key is added to the cluster. If an SSH update >>> fails even after the maximum number of retries is exhausted, >>> this does not raise an exception, but an error message is >>> added to the result of the method, which can be displayed >>> in the LU operations so that the admin sees which nodes >>> were the problem and need further attention. >>> >>> Signed-off-by: Helga Velroyen <[email protected]> >>> --- >>> lib/backend.py | 73 +++++++++++++++++++-------- >>> lib/cmdlib/cluster.py | 9 +++- >>> lib/cmdlib/node.py | 14 ++++- >>> test/py/ganeti.backend_unittest.py | 101 >>> ++++++++++++++++++++++++++++++++++++- >>> 4 files changed, 169 insertions(+), 28 deletions(-) >>> >>> diff --git a/lib/backend.py b/lib/backend.py >>> index 818f3fb..3b17306 100644 >>> --- a/lib/backend.py >>> +++ b/lib/backend.py >>> @@ -1566,17 +1566,38 @@ def AddNodeSshKey(node_uuid, node_name, >>> master_node = ssconf_store.GetMasterNode() >>> online_nodes = ssconf_store.GetOnlineNodeList() >>> >>> + node_errors = {} >> >> for node in all_nodes: >>> if node == master_node: >>> + logging.debug("Skipping master node '%s'.", master_node) >>> continue >>> if node not in online_nodes: >>> logging.debug("Skipping offline node '%s'.", node) >>> continue >>> if node in potential_master_candidates: >>> - run_cmd_fn(cluster_name, node, pathutils.SSH_UPDATE, >>> - ssh_port_map.get(node), pot_mc_data, >>> - debug=False, verbose=False, use_cluster_key=False, >>> - ask_key=False, strict_host_check=False) >>> + logging.debug("Updating SSH key files of node '%s'.") >>> + for i in range(max_retries): >>> + try: >>> + run_cmd_fn(cluster_name, node, pathutils.SSH_UPDATE, >>> + ssh_port_map.get(node), pot_mc_data, >>> + debug=False, verbose=False, use_cluster_key=False, >>> + ask_key=False, strict_host_check=False) >>> + break >>> + except errors.OpExecError as e: >>> + logging.error("Updating SSH key files of node '%s' failed" >>> + " at try no %s. Error: %s.", node, i, e) >>> + last_exception = e >>> + else: >>> + if last_exception: >>> + error_msg = ("When adding the key of node '%s', updating SSH >>> key" >>> + " files of node '%s' failed after %s retries." >>> + " Not trying again. Last errors was: %s." % >>> >> >> Nitpick: s/errors/error/ >> > > ACK. > > >> >> >>> + (node, node_name, max_retries, last_exception)) >> >> + node_errors[node] = error_msg >>> + # We only log the error and don't throw an exception, because >>> + # one unreachable node shall not abort the entire procedure. >>> + logging.error(error_msg) >>> + >>> else: >>> if to_authorized_keys: >>> run_cmd_fn(cluster_name, node, pathutils.SSH_UPDATE, >>> @@ -1584,6 +1605,8 @@ def AddNodeSshKey(node_uuid, node_name, >>> debug=False, verbose=False, use_cluster_key=False, >>> ask_key=False, strict_host_check=False) >>> >>> + return node_errors >>> + >>> >>> def RemoveNodeSshKey(node_uuid, node_name, >>> master_candidate_uuids, >>> @@ -1925,6 +1948,10 @@ def RenewSshKeys(node_uuids, node_names, >>> ssh_port_map, >>> >>> master_node_name = ssconf_store.GetMasterNode() >>> master_node_uuid = _GetMasterNodeUUID(node_uuid_name_map, >>> master_node_name) >>> + # List of all node errors that happened, but which did not abort the >>> + # procedure as a whole. It is important that this is a list to have a >>> + # somewhat chronological history of events. >>> + all_node_errors = [] >>> >>> # process non-master nodes >>> for node_uuid, node_name in node_uuid_name_map: >>> @@ -1989,15 +2016,16 @@ def RenewSshKeys(node_uuids, node_names, >>> ssh_port_map, >>> ssh.AddPublicKey(node_uuid, pub_key, key_file=pub_key_file) >>> >>> logging.debug("Add ssh key of node '%s'.", node_name) >>> - AddNodeSshKey(node_uuid, node_name, >>> - potential_master_candidates, >>> - ssh_port_map, >>> - to_authorized_keys=master_candidate, >>> - to_public_keys=potential_master_candidate, >>> - get_public_keys=True, >>> - pub_key_file=pub_key_file, ssconf_store=ssconf_store, >>> - noded_cert_file=noded_cert_file, >>> - run_cmd_fn=run_cmd_fn) >>> + node_errors = AddNodeSshKey( >>> + node_uuid, node_name, potential_master_candidates, >>> + ssh_port_map, to_authorized_keys=master_candidate, >>> + to_public_keys=potential_master_candidate, >>> + get_public_keys=True, >>> + pub_key_file=pub_key_file, ssconf_store=ssconf_store, >>> + noded_cert_file=noded_cert_file, >>> + run_cmd_fn=run_cmd_fn) >>> + if node_errors: >>> + all_node_errors = all_node_errors + node_errors.items() >>> >>> # Renewing the master node's key >>> >>> @@ -2022,15 +2050,14 @@ def RenewSshKeys(node_uuids, node_names, >>> ssh_port_map, >>> >>> # Add new master key to all node's public and authorized keys >>> logging.debug("Add new master key to all nodes.") >>> - AddNodeSshKey(master_node_uuid, master_node_name, >>> - potential_master_candidates, >>> - ssh_port_map, >>> - to_authorized_keys=True, >>> - to_public_keys=True, >>> - get_public_keys=False, >>> - pub_key_file=pub_key_file, ssconf_store=ssconf_store, >>> - noded_cert_file=noded_cert_file, >>> - run_cmd_fn=run_cmd_fn) >>> + node_errors = AddNodeSshKey( >>> + master_node_uuid, master_node_name, potential_master_candidates, >>> + ssh_port_map, to_authorized_keys=True, to_public_keys=True, >>> + get_public_keys=False, pub_key_file=pub_key_file, >>> + ssconf_store=ssconf_store, noded_cert_file=noded_cert_file, >>> + run_cmd_fn=run_cmd_fn) >>> + if node_errors: >>> + all_node_errors = all_node_errors + node_errors.items() >>> >>> # Remove the old key file and rename the new key to the non-temporary >>> filename >>> _ReplaceMasterKeyOnMaster(root_keyfiles) >>> @@ -2053,6 +2080,8 @@ def RenewSshKeys(node_uuids, node_names, >>> ssh_port_map, >>> clear_authorized_keys=False, >>> clear_public_keys=False) >>> >>> + return all_node_errors >>> + >>> >>> def GetBlockDevSizes(devices): >>> """Return the size of the given block devices >>> diff --git a/lib/cmdlib/cluster.py b/lib/cmdlib/cluster.py >>> index c504d60..14c18da 100644 >>> --- a/lib/cmdlib/cluster.py >>> +++ b/lib/cmdlib/cluster.py >>> @@ -210,7 +210,7 @@ class LUClusterRenewCrypto(NoHooksLU): >>> self.cfg.RemoveNodeFromCandidateCerts("%s-SERVER" % master_uuid) >>> self.cfg.RemoveNodeFromCandidateCerts("%s-OLDMASTER" % master_uuid) >>> >>> - def _RenewSshKeys(self): >>> + def _RenewSshKeys(self, feedback_fn): >>> """Renew all nodes' SSH keys. >>> >>> """ >>> @@ -230,6 +230,11 @@ class LUClusterRenewCrypto(NoHooksLU): >>> master_candidate_uuids, >>> potential_master_candidates) >>> result[master_uuid].Raise("Could not renew the SSH keys of all >>> nodes") >>> >> >> Just out of curiosity: under which conditions would this actually be >> raised? >> If my observation has merit, maybe you can change the message to reflect >> the unexpectedness of the failure. >> > > Well, that can have several reasons, the most obvious ones are: > - key files on the master not reachable, > - the keys that are supposed to be renewed are not in the public key file > (that is a security mechanism), > - generating a new key of a node worked, but then master was not able to > fetch it. > The core of my objection here was the error message raised here should be different given the changes in the code. It makes it seem like there is a problem with the nodes whereas the master is at fault in one way or another - we should be ignoring node failures. That said, additional information should be present, so it's your call whether you modify this or not. > > >> >> Also, a comment and/or an empty line would be useful here. Otherwise it >> looks like a bit of a bug. >> The same is true for the next two entries. >> > > What do you mean by "next two entries"? > In the patch series, of a similar structure. Refactoring took care of that. > > >> >> >>> + node_errors = result[master_uuid].payload >>> + if node_errors: >>> + feedback_fn("Some nodes' SSH key files could not be updated:") >>> + for node_name, error_msg in node_errors: >>> + feedback_fn("%s: %s" % (node_name, error_msg)) >>> >>> def Exec(self, feedback_fn): >>> if self.op.node_certificates: >>> @@ -237,7 +242,7 @@ class LUClusterRenewCrypto(NoHooksLU): >>> self._RenewNodeSslCertificates(feedback_fn) >>> if self.op.ssh_keys and not self._ssh_renewal_suppressed: >>> feedback_fn("Renewing SSH keys") >>> - self._RenewSshKeys() >>> + self._RenewSshKeys(feedback_fn) >>> elif self._ssh_renewal_suppressed: >>> feedback_fn("Cannot renew SSH keys if the cluster is configured >>> to not" >>> " modify the SSH setup.") >>> diff --git a/lib/cmdlib/node.py b/lib/cmdlib/node.py >>> index 19b630a..6a13d8a 100644 >>> --- a/lib/cmdlib/node.py >>> +++ b/lib/cmdlib/node.py >>> @@ -339,7 +339,7 @@ class LUNodeAdd(LogicalUnit): >>> result.Raise("Failed to initialize OpenVSwitch on new node") >>> >>> def _SshUpdate(self, new_node_uuid, new_node_name, >>> is_master_candidate, >>> - is_potential_master_candidate, rpcrunner, readd): >>> + is_potential_master_candidate, rpcrunner, readd, >>> feedback_fn): >>> """Update the SSH setup of all nodes after adding a new node. >>> >>> @type readd: boolean >>> @@ -373,6 +373,11 @@ class LUNodeAdd(LogicalUnit): >>> is_master_candidate, is_potential_master_candidate, >>> is_potential_master_candidate) >>> result[master_node].Raise("Could not update the node's SSH setup.") >>> + node_errors = result[master_node].payload >>> + if node_errors: >>> + feedback_fn("Some nodes' SSH key files could not be updated:") >>> + for node_name, error_msg in node_errors.items(): >>> + feedback_fn("%s: %s." % (node_name, error_msg)) >>> >> >> Between SSL and the multiple placements, this really is calling out to be >> a helper function. >> > > Added it. Interdiff see below. This also has an effect on other patches. > Will send interdiffs there too. > > >> >> >>> >>> def Exec(self, feedback_fn): >>> """Adds the new node to the cluster. >>> @@ -481,7 +486,7 @@ class LUNodeAdd(LogicalUnit): >>> # FIXME: so far, all nodes are considered potential master >>> candidates >>> self._SshUpdate(self.new_node.uuid, self.new_node.name, >>> self.new_node.master_candidate, True, >>> - self.rpc, self.op.readd) >>> + self.rpc, self.op.readd, feedback_fn) >>> >>> >>> class LUNodeSetParams(LogicalUnit): >>> @@ -897,6 +902,11 @@ class LUNodeSetParams(LogicalUnit): >>> ssh_result[master_node].Raise( >>> "Could not update the SSH setup of node '%s' after >>> promotion" >>> " (UUID: %s)." % (node.name, node.uuid)) >> >> + node_errors = ssh_result[master_node].payload >>> + if node_errors: >>> + feedback_fn("Some nodes' SSH key files could not be >>> updated:") >>> + for node_name, error_msg in node_errors.items(): >>> + feedback_fn("%s: %s." % (node_name, error_msg)) >> >> >>> return result >>> >>> diff --git a/test/py/ganeti.backend_unittest.py b/test/py/ >>> ganeti.backend_unittest.py >>> index cc65501..e05580d 100755 >>> --- a/test/py/ganeti.backend_unittest.py >>> +++ b/test/py/ganeti.backend_unittest.py >>> @@ -1400,7 +1400,14 @@ class >>> TestAddRemoveGenerateNodeSshKey(testutils.GanetiTestCase): >>> self.assertTrue(self._ssh_file_manager.NodeHasAuthorizedKey( >>> node, node_key)) >>> >>> - def testAddKeySuccessfullyWithRetries(self): >>> + def testAddKeySuccessfullyOnNewNodeWithRetries(self): >>> + """Tests adding a new node's key when updating that node takes >>> retries. >>> + >>> + This test checks whether adding a new node's key successfully >>> updates >>> + all node's SSH key files, even if updating the new node's key files >>> itself >>> >> >> Nit: s/node's/nodes'/ for the first one. The second one stays as-is. >> Possible better formulation: >> > > I think you actually meant the second one. However, I picked this > alternative formulation: > We may have differed in lines. > > >> >> .. updates the SSH key files of all nodes, even if updating the new >> node's files ... >> > > See interdiff. > > > >> >> >>> + takes a couple of retries to succeed. >>> + >>> + """ >>> new_node_name = "new_node_name" >>> new_node_uuid = "new_node_uuid" >>> new_node_key = "new_node_key" >>> @@ -1430,7 +1437,14 @@ class >>> TestAddRemoveGenerateNodeSshKey(testutils.GanetiTestCase): >>> self.assertTrue(self._ssh_file_manager.AllNodesHaveAuthorizedKey( >>> new_node_key)) >>> >>> - def testAddKeyFailedWithRetries(self): >>> + def testAddKeyFailedOnNewNodeWithRetries(self): >>> + """Tests clean up if updating a new node's SSH setup fails. >>> + >>> + If adding a new node's keys fails, because the SSH key files of that >>> + new node fails, check whether already carried out operations are >>> >> >> Nit: Better formulation: If adding the keys of a new node fails because >> the SSH key files ... >> > > Ack. See interdiff. > > >> >> >>> + successfully rolled back and thus the state of the cluster is >>> cleaned up. >>> + >>> + """ >>> new_node_name = "new_node_name" >>> new_node_uuid = "new_node_uuid" >>> new_node_key = "new_node_key" >>> @@ -1466,6 +1480,89 @@ class >>> TestAddRemoveGenerateNodeSshKey(testutils.GanetiTestCase): >>> self.assertTrue(self._ssh_file_manager.NoNodeHasPublicKey( >>> new_node_uuid, new_node_key)) >>> >>> + def testAddKeySuccessfullyOnOldNodeWithRetries(self): >>> + """Tests adding a new key even if updating nodes takes retries. >>> + >>> + This tests whether adding a new node's key successfully finishes, >>> + even if one of the other cluster nodes takes a couple of retries >>> + to succeed. >>> + >>> + """ >>> + new_node_name = "new_node_name" >>> + new_node_uuid = "new_node_uuid" >>> + new_node_key = "new_node_key" >>> + is_master_candidate = True >>> + is_potential_master_candidate = True >>> + is_master = False >>> + >>> + other_node_name, _, _, _, _, _ = self._ssh_file_manager \ >>> + .GetAllMasterCandidates()[0] >>> >> >> Afaik, this will be fixed in a follow-up patch, right? >> I second Petr's suggestion of named tuples. >> > > Yes. > > >> >> >>> + self._ssh_file_manager.SetMaxRetries(other_node_name, 3) >>> + assert other_node_name != new_node_name >>> + self._AddNewNodeToTestData( >>> + new_node_name, new_node_uuid, new_node_key, >>> + is_potential_master_candidate, is_master_candidate, >>> + is_master) >>> + >>> + backend.AddNodeSshKey(new_node_uuid, new_node_name, >>> + self._potential_master_candidates, >>> + self._ssh_port_map, >>> + to_authorized_keys=is_master_candidate, >>> + to_public_keys=is_potential_master_candidate, >>> + get_public_keys=is_potential_master_candidate, >>> + pub_key_file=self._pub_key_file, >>> + ssconf_store=self._ssconf_mock, >>> + noded_cert_file=self.noded_cert_file, >>> + run_cmd_fn=self._run_cmd_mock) >>> + >>> + self.assertTrue(self._ssh_file_manager.AllNodesHaveAuthorizedKey( >>> + new_node_key)) >>> + >>> + def testAddKeyFailedOnOldNodeWithRetries(self): >>> + """Tests adding keys when updating one node's SSH setup fails. >>> + >>> + This tests, whether when adding a new node's key and one node is >>> >> >> Nit: s/,// >> >> >>> + unreachable (but not marked as offline) the operation still finishes >>> + properly and only that unreachable node's SSH key setup did not get >>> + updated. >>> + >>> + """ >>> + new_node_name = "new_node_name" >>> + new_node_uuid = "new_node_uuid" >>> + new_node_key = "new_node_key" >>> + is_master_candidate = True >>> + is_potential_master_candidate = True >>> + is_master = False >>> + >>> + other_node_name, _, _, _, _, _ = self._ssh_file_manager \ >>> + .GetAllMasterCandidates()[0] >>> + self._ssh_file_manager.SetMaxRetries(other_node_name, 4) >>> + assert other_node_name != new_node_name >>> + self._AddNewNodeToTestData( >>> + new_node_name, new_node_uuid, new_node_key, >>> + is_potential_master_candidate, is_master_candidate, >>> + is_master) >>> + >>> + node_errors = backend.AddNodeSshKey( >>> + new_node_uuid, new_node_name, self._potential_master_candidates, >>> + self._ssh_port_map, to_authorized_keys=is_master_candidate, >>> + to_public_keys=is_potential_master_candidate, >>> + get_public_keys=is_potential_master_candidate, >>> + pub_key_file=self._pub_key_file, >>> + ssconf_store=self._ssconf_mock, >>> + noded_cert_file=self.noded_cert_file, >>> + run_cmd_fn=self._run_cmd_mock) >>> + >>> + for node in self._all_nodes: >>> + if node == other_node_name: >>> + self.assertFalse(self._ssh_file_manager.NodeHasAuthorizedKey( >>> + node, new_node_key)) >>> + else: >>> + self.assertTrue(self._ssh_file_manager.NodeHasAuthorizedKey( >>> + node, new_node_key)) >>> + >>> + self.assertTrue(node_errors[other_node_name]) >>> + >>> >>> class TestVerifySshSetup(testutils.GanetiTestCase): >>> >>> -- >>> 2.2.0.rc0.207.ga3a616c >>> >>> > Interdiff: > > commit 49ced9fa7adefd1865ca9933d0a5bb5252fc1726 > Author: Helga Velroyen <[email protected]> > Date: Thu Apr 23 11:48:40 2015 +0200 > > interdiff > > diff --git a/lib/backend.py b/lib/backend.py > index 5a09efa..6a17edb 100644 > --- a/lib/backend.py > +++ b/lib/backend.py > @@ -1564,7 +1564,7 @@ def AddNodeSshKey(node_uuid, node_name, > master_node = ssconf_store.GetMasterNode() > online_nodes = ssconf_store.GetOnlineNodeList() > > - node_errors = {} > + node_errors = [] > for node in all_nodes: > if node == master_node: > logging.debug("Skipping master node '%s'.", master_node) > @@ -1589,10 +1589,10 @@ def AddNodeSshKey(node_uuid, node_name, > if last_exception: > error_msg = ("When adding the key of node '%s', updating SSH > key" > " files of node '%s' failed after %s retries." > - " Not trying again. Last errors was: %s." % > + " Not trying again. Last error was: %s." % > (node, node_name, constants.SSHS_MAX_RETRIES, > last_exception)) > - node_errors[node] = error_msg > + node_errors.append((node, error_msg)) > # We only log the error and don't throw an exception, because > # one unreachable node shall not abort the entire procedure. > logging.error(error_msg) > @@ -2024,7 +2024,7 @@ def RenewSshKeys(node_uuids, node_names, > ssh_port_map, > noded_cert_file=noded_cert_file, > run_cmd_fn=run_cmd_fn) > if node_errors: > - all_node_errors = all_node_errors + node_errors.items() > + all_node_errors = all_node_errors + node_errors > > # Renewing the master node's key > > @@ -2056,7 +2056,7 @@ def RenewSshKeys(node_uuids, node_names, > ssh_port_map, > ssconf_store=ssconf_store, noded_cert_file=noded_cert_file, > run_cmd_fn=run_cmd_fn) > if node_errors: > - all_node_errors = all_node_errors + node_errors.items() > + all_node_errors = all_node_errors + node_errors > > # Remove the old key file and rename the new key to the non-temporary > filename > _ReplaceMasterKeyOnMaster(root_keyfiles) > diff --git a/lib/cmdlib/cluster.py b/lib/cmdlib/cluster.py > index 14c18da..b56ca61 100644 > --- a/lib/cmdlib/cluster.py > +++ b/lib/cmdlib/cluster.py > @@ -68,7 +68,8 @@ from ganeti.cmdlib.common import ShareAll, RunPostHook, \ > CheckDiskAccessModeConsistency, CreateNewClientCert, \ > AddInstanceCommunicationNetworkOp, > ConnectInstanceCommunicationNetworkOp, \ > CheckImageValidity, \ > - CheckDiskAccessModeConsistency, CreateNewClientCert, EnsureKvmdOnNodes > + CheckDiskAccessModeConsistency, CreateNewClientCert, EnsureKvmdOnNodes, > \ > + EvaluateSshUpdateRPC > > import ganeti.masterd.instance > > @@ -224,17 +225,19 @@ class LUClusterRenewCrypto(NoHooksLU): > port_map = ssh.GetSshPortMap(node_names, self.cfg) > potential_master_candidates = self.cfg.GetPotentialMasterCandidates() > master_candidate_uuids = self.cfg.GetMasterCandidateUuids() > + > result = self.rpc.call_node_ssh_keys_renew( > [master_uuid], > node_uuids, node_names, port_map, > master_candidate_uuids, > potential_master_candidates) > + > + # Check if there were serious errors (for example master key files not > + # writable). > result[master_uuid].Raise("Could not renew the SSH keys of all nodes") > - node_errors = result[master_uuid].payload > - if node_errors: > - feedback_fn("Some nodes' SSH key files could not be updated:") > - for node_name, error_msg in node_errors: > - feedback_fn("%s: %s" % (node_name, error_msg)) > + > + # Process any non-disruptive errors (a few nodes unreachable etc.) > + EvaluateSshUpdateRPC(result, master_uuid, feedback_fn) > > def Exec(self, feedback_fn): > if self.op.node_certificates: > diff --git a/lib/cmdlib/common.py b/lib/cmdlib/common.py > index 21948af..5e5f573 100644 > --- a/lib/cmdlib/common.py > +++ b/lib/cmdlib/common.py > @@ -1554,3 +1554,11 @@ def EnsureKvmdOnNodes(lu, feedback_fn, nodes=None): > for node_uuid in stop_nodes: > results[node_uuid].Warn("Failed to stop KVM daemon in node '%s'" % > node_uuid, feedback_fn) > + > + > +def EvaluateSshUpdateRPC(result, master_uuid, feedback_fn): > + node_errors = result[master_uuid].payload > + if node_errors: > + feedback_fn("Some nodes' SSH key files could not be updated:") > + for node_name, error_msg in node_errors: > + feedback_fn("%s: %s" % (node_name, error_msg)) > diff --git a/lib/cmdlib/node.py b/lib/cmdlib/node.py > index 6a13d8a..c90965e 100644 > --- a/lib/cmdlib/node.py > +++ b/lib/cmdlib/node.py > @@ -53,7 +53,7 @@ from ganeti.cmdlib.common import CheckParamsNotGlobal, \ > GetWantedNodes, MapInstanceLvsToNodes, RunPostHook, \ > FindFaultyInstanceDisks, CheckStorageTypeEnabled, CreateNewClientCert, \ > AddNodeCertToCandidateCerts, RemoveNodeCertFromCandidateCerts, \ > - EnsureKvmdOnNodes > + EnsureKvmdOnNodes, EvaluateSshUpdateRPC > from ganeti.ssh import GetSshPortMap > > > @@ -372,12 +372,9 @@ class LUNodeAdd(LogicalUnit): > potential_master_candidates, port_map, > is_master_candidate, is_potential_master_candidate, > is_potential_master_candidate) > + > result[master_node].Raise("Could not update the node's SSH setup.") > - node_errors = result[master_node].payload > - if node_errors: > - feedback_fn("Some nodes' SSH key files could not be updated:") > - for node_name, error_msg in node_errors.items(): > - feedback_fn("%s: %s." % (node_name, error_msg)) > + EvaluateSshUpdateRPC(result, master_node, feedback_fn) > > def Exec(self, feedback_fn): > """Adds the new node to the cluster. > @@ -902,11 +899,7 @@ class LUNodeSetParams(LogicalUnit): > ssh_result[master_node].Raise( > "Could not update the SSH setup of node '%s' after promotion" > " (UUID: %s)." % (node.name, node.uuid)) > - node_errors = ssh_result[master_node].payload > - if node_errors: > - feedback_fn("Some nodes' SSH key files could not be updated:") > - for node_name, error_msg in node_errors.items(): > - feedback_fn("%s: %s." % (node_name, error_msg)) > + EvaluateSshUpdateRPC(ssh_result, master_node, feedback_fn) > > return result > > diff --git a/test/py/ganeti.backend_unittest.py b/test/py/ > ganeti.backend_unittest.py > index ec61f07..77b4f46 100755 > --- a/test/py/ganeti.backend_unittest.py > +++ b/test/py/ganeti.backend_unittest.py > @@ -1404,8 +1404,8 @@ class > TestAddRemoveGenerateNodeSshKey(testutils.GanetiTestCase): > """Tests adding a new node's key when updating that node takes > retries. > > This test checks whether adding a new node's key successfully updates > - all node's SSH key files, even if updating the new node's key files > itself > - takes a couple of retries to succeed. > + the SSH key files of all nodes, even if updating the new node's key > files > + itself takes a couple of retries to succeed. > > """ > new_node_name = "new_node_name" > @@ -1441,8 +1441,8 @@ class > TestAddRemoveGenerateNodeSshKey(testutils.GanetiTestCase): > def testAddKeyFailedOnNewNodeWithRetries(self): > """Tests clean up if updating a new node's SSH setup fails. > > - If adding a new node's keys fails, because the SSH key files of that > - new node fails, check whether already carried out operations are > + If adding the keys of a new node fails, because updating the SSH key > files > + of that new node fails, check whether already carried out operations > are > successfully rolled back and thus the state of the cluster is cleaned > up. > > """ > @@ -1524,7 +1524,7 @@ class > TestAddRemoveGenerateNodeSshKey(testutils.GanetiTestCase): > def testAddKeyFailedOnOldNodeWithRetries(self): > """Tests adding keys when updating one node's SSH setup fails. > > - This tests, whether when adding a new node's key and one node is > + This tests whether when adding a new node's key and one node is > unreachable (but not marked as offline) the operation still finishes > properly and only that unreachable node's SSH key setup did not get > updated. > @@ -1565,7 +1565,8 @@ class > TestAddRemoveGenerateNodeSshKey(testutils.GanetiTestCase): > self.assertTrue(self._ssh_file_manager.NodeHasAuthorizedKey( > node, new_node_key)) > > - self.assertTrue(node_errors[other_node_name]) > + self.assertTrue([error_msg for (node, error_msg) in node_errors > + if node == other_node_name]) > > > class TestVerifySshSetup(testutils.GanetiTestCase): > > >> >> Hrvoje Ribicic >> Ganeti Engineering >> Google Germany GmbH >> Dienerstr. 12, 80331, München >> >> Registergericht und -nummer: Hamburg, HRB 86891 >> Sitz der Gesellschaft: Hamburg >> Geschäftsführer: Graham Law, Christine Elizabeth Flores >> Steuernummer: 48/725/00206 >> Umsatzsteueridentifikationsnummer: DE813741370 >> > Hrvoje Ribicic Ganeti Engineering Google Germany GmbH Dienerstr. 12, 80331, München Registergericht und -nummer: Hamburg, HRB 86891 Sitz der Gesellschaft: Hamburg Geschäftsführer: Graham Law, Christine Elizabeth Flores Steuernummer: 48/725/00206 Umsatzsteueridentifikationsnummer: DE813741370
