Hi!

On Wed, 22 Apr 2015 at 18:41 Hrvoje Ribicic <[email protected]> wrote:

> On Fri, Apr 17, 2015 at 4:37 PM, 'Helga Velroyen' via ganeti-devel <
> [email protected]> wrote:
>
>> This patch introduces retries for updating non-master nodes
>> when a new SSH key is added to the cluster. If an SSH update
>> fails even after the maximum number of retries is exhausted,
>> this does not raise an exception, but an error message is
>> added to the result of the method, which can be displayed
>> in the LU operations so that the admin sees which nodes
>> were the problem and need further attention.
>>
>> Signed-off-by: Helga Velroyen <[email protected]>
>> ---
>>  lib/backend.py                     |  73 +++++++++++++++++++--------
>>  lib/cmdlib/cluster.py              |   9 +++-
>>  lib/cmdlib/node.py                 |  14 ++++-
>>  test/py/ganeti.backend_unittest.py | 101
>> ++++++++++++++++++++++++++++++++++++-
>>  4 files changed, 169 insertions(+), 28 deletions(-)
>>
>> diff --git a/lib/backend.py b/lib/backend.py
>> index 818f3fb..3b17306 100644
>> --- a/lib/backend.py
>> +++ b/lib/backend.py
>> @@ -1566,17 +1566,38 @@ def AddNodeSshKey(node_uuid, node_name,
>>    master_node = ssconf_store.GetMasterNode()
>>    online_nodes = ssconf_store.GetOnlineNodeList()
>>
>> +  node_errors = {}
>
>    for node in all_nodes:
>>      if node == master_node:
>> +      logging.debug("Skipping master node '%s'.", master_node)
>>        continue
>>      if node not in online_nodes:
>>        logging.debug("Skipping offline node '%s'.", node)
>>        continue
>>      if node in potential_master_candidates:
>> -      run_cmd_fn(cluster_name, node, pathutils.SSH_UPDATE,
>> -                 ssh_port_map.get(node), pot_mc_data,
>> -                 debug=False, verbose=False, use_cluster_key=False,
>> -                 ask_key=False, strict_host_check=False)
>> +      logging.debug("Updating SSH key files of node '%s'.")
>> +      for i in range(max_retries):
>> +        try:
>> +          run_cmd_fn(cluster_name, node, pathutils.SSH_UPDATE,
>> +                     ssh_port_map.get(node), pot_mc_data,
>> +                     debug=False, verbose=False, use_cluster_key=False,
>> +                     ask_key=False, strict_host_check=False)
>> +          break
>> +        except errors.OpExecError as e:
>> +          logging.error("Updating SSH key files of node '%s' failed"
>> +                        " at try no %s. Error: %s.", node, i, e)
>> +          last_exception = e
>> +      else:
>> +        if last_exception:
>> +          error_msg = ("When adding the key of node '%s', updating SSH
>> key"
>> +                       " files of node '%s' failed after %s retries."
>> +                       " Not trying again. Last errors was: %s." %
>>
>
> Nitpick: s/errors/error/
>

ACK.


>
>
>> +                       (node, node_name, max_retries, last_exception))
>
> +          node_errors[node] = error_msg
>> +          # We only log the error and don't throw an exception, because
>> +          # one unreachable node shall not abort the entire procedure.
>> +          logging.error(error_msg)
>> +
>>      else:
>>        if to_authorized_keys:
>>          run_cmd_fn(cluster_name, node, pathutils.SSH_UPDATE,
>> @@ -1584,6 +1605,8 @@ def AddNodeSshKey(node_uuid, node_name,
>>                     debug=False, verbose=False, use_cluster_key=False,
>>                     ask_key=False, strict_host_check=False)
>>
>> +  return node_errors
>> +
>>
>>  def RemoveNodeSshKey(node_uuid, node_name,
>>                       master_candidate_uuids,
>> @@ -1925,6 +1948,10 @@ def RenewSshKeys(node_uuids, node_names,
>> ssh_port_map,
>>
>>    master_node_name = ssconf_store.GetMasterNode()
>>    master_node_uuid = _GetMasterNodeUUID(node_uuid_name_map,
>> master_node_name)
>> +  # List of all node errors that happened, but which did not abort the
>> +  # procedure as a whole. It is important that this is a list to have a
>> +  # somewhat chronological history of events.
>> +  all_node_errors = []
>>
>>    # process non-master nodes
>>    for node_uuid, node_name in node_uuid_name_map:
>> @@ -1989,15 +2016,16 @@ def RenewSshKeys(node_uuids, node_names,
>> ssh_port_map,
>>        ssh.AddPublicKey(node_uuid, pub_key, key_file=pub_key_file)
>>
>>      logging.debug("Add ssh key of node '%s'.", node_name)
>> -    AddNodeSshKey(node_uuid, node_name,
>> -                  potential_master_candidates,
>> -                  ssh_port_map,
>> -                  to_authorized_keys=master_candidate,
>> -                  to_public_keys=potential_master_candidate,
>> -                  get_public_keys=True,
>> -                  pub_key_file=pub_key_file, ssconf_store=ssconf_store,
>> -                  noded_cert_file=noded_cert_file,
>> -                  run_cmd_fn=run_cmd_fn)
>> +    node_errors = AddNodeSshKey(
>> +        node_uuid, node_name, potential_master_candidates,
>> +        ssh_port_map, to_authorized_keys=master_candidate,
>> +        to_public_keys=potential_master_candidate,
>> +        get_public_keys=True,
>> +        pub_key_file=pub_key_file, ssconf_store=ssconf_store,
>> +        noded_cert_file=noded_cert_file,
>> +        run_cmd_fn=run_cmd_fn)
>> +    if node_errors:
>> +      all_node_errors = all_node_errors + node_errors.items()
>>
>>    # Renewing the master node's key
>>
>> @@ -2022,15 +2050,14 @@ def RenewSshKeys(node_uuids, node_names,
>> ssh_port_map,
>>
>>    # Add new master key to all node's public and authorized keys
>>    logging.debug("Add new master key to all nodes.")
>> -  AddNodeSshKey(master_node_uuid, master_node_name,
>> -                potential_master_candidates,
>> -                ssh_port_map,
>> -                to_authorized_keys=True,
>> -                to_public_keys=True,
>> -                get_public_keys=False,
>> -                pub_key_file=pub_key_file, ssconf_store=ssconf_store,
>> -                noded_cert_file=noded_cert_file,
>> -                run_cmd_fn=run_cmd_fn)
>> +  node_errors = AddNodeSshKey(
>> +      master_node_uuid, master_node_name, potential_master_candidates,
>> +      ssh_port_map, to_authorized_keys=True, to_public_keys=True,
>> +      get_public_keys=False, pub_key_file=pub_key_file,
>> +      ssconf_store=ssconf_store, noded_cert_file=noded_cert_file,
>> +      run_cmd_fn=run_cmd_fn)
>> +  if node_errors:
>> +    all_node_errors = all_node_errors + node_errors.items()
>>
>>    # Remove the old key file and rename the new key to the non-temporary
>> filename
>>    _ReplaceMasterKeyOnMaster(root_keyfiles)
>> @@ -2053,6 +2080,8 @@ def RenewSshKeys(node_uuids, node_names,
>> ssh_port_map,
>>                     clear_authorized_keys=False,
>>                     clear_public_keys=False)
>>
>> +  return all_node_errors
>> +
>>
>>  def GetBlockDevSizes(devices):
>>    """Return the size of the given block devices
>> diff --git a/lib/cmdlib/cluster.py b/lib/cmdlib/cluster.py
>> index c504d60..14c18da 100644
>> --- a/lib/cmdlib/cluster.py
>> +++ b/lib/cmdlib/cluster.py
>> @@ -210,7 +210,7 @@ class LUClusterRenewCrypto(NoHooksLU):
>>      self.cfg.RemoveNodeFromCandidateCerts("%s-SERVER" % master_uuid)
>>      self.cfg.RemoveNodeFromCandidateCerts("%s-OLDMASTER" % master_uuid)
>>
>> -  def _RenewSshKeys(self):
>> +  def _RenewSshKeys(self, feedback_fn):
>>      """Renew all nodes' SSH keys.
>>
>>      """
>> @@ -230,6 +230,11 @@ class LUClusterRenewCrypto(NoHooksLU):
>>        master_candidate_uuids,
>>        potential_master_candidates)
>>      result[master_uuid].Raise("Could not renew the SSH keys of all
>> nodes")
>>
>
> Just out of curiosity: under which conditions would this actually be
> raised?
> If my observation has merit, maybe you can change the message to reflect
> the unexpectedness of the failure.
>

Well, that can have several reasons, the most obvious ones are:
- key files on the master not reachable,
- the keys that are supposed to be renewed are not in the public key file
(that is a security mechanism),
- generating a new key of a node worked, but then master was not able to
fetch it.


>
> Also, a comment and/or an empty line would be useful here. Otherwise it
> looks like a bit of a bug.
> The same is true for the next two entries.
>

What do you mean by "next two entries"?


>
>
>> +    node_errors = result[master_uuid].payload
>> +    if node_errors:
>> +      feedback_fn("Some nodes' SSH key files could not be updated:")
>> +      for node_name, error_msg in node_errors:
>> +        feedback_fn("%s: %s" % (node_name, error_msg))
>>
>>    def Exec(self, feedback_fn):
>>      if self.op.node_certificates:
>> @@ -237,7 +242,7 @@ class LUClusterRenewCrypto(NoHooksLU):
>>        self._RenewNodeSslCertificates(feedback_fn)
>>      if self.op.ssh_keys and not self._ssh_renewal_suppressed:
>>        feedback_fn("Renewing SSH keys")
>> -      self._RenewSshKeys()
>> +      self._RenewSshKeys(feedback_fn)
>>      elif self._ssh_renewal_suppressed:
>>        feedback_fn("Cannot renew SSH keys if the cluster is configured to
>> not"
>>                    " modify the SSH setup.")
>> diff --git a/lib/cmdlib/node.py b/lib/cmdlib/node.py
>> index 19b630a..6a13d8a 100644
>> --- a/lib/cmdlib/node.py
>> +++ b/lib/cmdlib/node.py
>> @@ -339,7 +339,7 @@ class LUNodeAdd(LogicalUnit):
>>        result.Raise("Failed to initialize OpenVSwitch on new node")
>>
>>    def _SshUpdate(self, new_node_uuid, new_node_name, is_master_candidate,
>> -                 is_potential_master_candidate, rpcrunner, readd):
>> +                 is_potential_master_candidate, rpcrunner, readd,
>> feedback_fn):
>>      """Update the SSH setup of all nodes after adding a new node.
>>
>>      @type readd: boolean
>> @@ -373,6 +373,11 @@ class LUNodeAdd(LogicalUnit):
>>        is_master_candidate, is_potential_master_candidate,
>>        is_potential_master_candidate)
>>      result[master_node].Raise("Could not update the node's SSH setup.")
>> +    node_errors = result[master_node].payload
>> +    if node_errors:
>> +      feedback_fn("Some nodes' SSH key files could not be updated:")
>> +      for node_name, error_msg in node_errors.items():
>> +        feedback_fn("%s: %s." % (node_name, error_msg))
>>
>
> Between SSL and the multiple placements, this really is calling out to be
> a helper function.
>

Added it. Interdiff see below. This also has an effect on other patches.
Will send interdiffs there too.


>
>
>>
>>    def Exec(self, feedback_fn):
>>      """Adds the new node to the cluster.
>> @@ -481,7 +486,7 @@ class LUNodeAdd(LogicalUnit):
>>        # FIXME: so far, all nodes are considered potential master
>> candidates
>>        self._SshUpdate(self.new_node.uuid, self.new_node.name,
>>                        self.new_node.master_candidate, True,
>> -                      self.rpc, self.op.readd)
>> +                      self.rpc, self.op.readd, feedback_fn)
>>
>>
>>  class LUNodeSetParams(LogicalUnit):
>> @@ -897,6 +902,11 @@ class LUNodeSetParams(LogicalUnit):
>>            ssh_result[master_node].Raise(
>>              "Could not update the SSH setup of node '%s' after promotion"
>>              " (UUID: %s)." % (node.name, node.uuid))
>
> +          node_errors = ssh_result[master_node].payload
>> +          if node_errors:
>> +            feedback_fn("Some nodes' SSH key files could not be
>> updated:")
>> +            for node_name, error_msg in node_errors.items():
>> +              feedback_fn("%s: %s." % (node_name, error_msg))
>
>
>>      return result
>>
>> diff --git a/test/py/ganeti.backend_unittest.py b/test/py/
>> ganeti.backend_unittest.py
>> index cc65501..e05580d 100755
>> --- a/test/py/ganeti.backend_unittest.py
>> +++ b/test/py/ganeti.backend_unittest.py
>> @@ -1400,7 +1400,14 @@ class
>> TestAddRemoveGenerateNodeSshKey(testutils.GanetiTestCase):
>>          self.assertTrue(self._ssh_file_manager.NodeHasAuthorizedKey(
>>              node, node_key))
>>
>> -  def testAddKeySuccessfullyWithRetries(self):
>> +  def testAddKeySuccessfullyOnNewNodeWithRetries(self):
>> +    """Tests adding a new node's key when updating that node takes
>> retries.
>> +
>> +    This test checks whether adding a new node's key successfully updates
>> +    all node's SSH key files, even if updating the new node's key files
>> itself
>>
>
> Nit: s/node's/nodes'/ for the first one. The second one stays as-is.
> Possible better formulation:
>

I think you actually meant the second one. However, I picked this
alternative formulation:


>
> .. updates the SSH key files of all nodes, even if updating the new node's
> files ...
>

See interdiff.



>
>
>> +    takes a couple of retries to succeed.
>> +
>> +    """
>>      new_node_name = "new_node_name"
>>      new_node_uuid = "new_node_uuid"
>>      new_node_key = "new_node_key"
>> @@ -1430,7 +1437,14 @@ class
>> TestAddRemoveGenerateNodeSshKey(testutils.GanetiTestCase):
>>      self.assertTrue(self._ssh_file_manager.AllNodesHaveAuthorizedKey(
>>          new_node_key))
>>
>> -  def testAddKeyFailedWithRetries(self):
>> +  def testAddKeyFailedOnNewNodeWithRetries(self):
>> +    """Tests clean up if updating a new node's SSH setup fails.
>> +
>> +    If adding a new node's keys fails, because the SSH key files of that
>> +    new node fails, check whether already carried out operations are
>>
>
> Nit: Better formulation: If adding the keys of a new node fails because
> the SSH key files ...
>

Ack. See interdiff.


>
>
>> +    successfully rolled back and thus the state of the cluster is
>> cleaned up.
>> +
>> +    """
>>      new_node_name = "new_node_name"
>>      new_node_uuid = "new_node_uuid"
>>      new_node_key = "new_node_key"
>> @@ -1466,6 +1480,89 @@ class
>> TestAddRemoveGenerateNodeSshKey(testutils.GanetiTestCase):
>>      self.assertTrue(self._ssh_file_manager.NoNodeHasPublicKey(
>>          new_node_uuid, new_node_key))
>>
>> +  def testAddKeySuccessfullyOnOldNodeWithRetries(self):
>> +    """Tests adding a new key even if updating nodes takes retries.
>> +
>> +    This tests whether adding a new node's key successfully finishes,
>> +    even if one of the other cluster nodes takes a couple of retries
>> +    to succeed.
>> +
>> +    """
>> +    new_node_name = "new_node_name"
>> +    new_node_uuid = "new_node_uuid"
>> +    new_node_key = "new_node_key"
>> +    is_master_candidate = True
>> +    is_potential_master_candidate = True
>> +    is_master = False
>> +
>> +    other_node_name, _, _, _, _, _ = self._ssh_file_manager \
>> +        .GetAllMasterCandidates()[0]
>>
>
> Afaik, this will be fixed in a follow-up patch, right?
> I second Petr's suggestion of named tuples.
>

Yes.


>
>
>> +    self._ssh_file_manager.SetMaxRetries(other_node_name, 3)
>> +    assert other_node_name != new_node_name
>> +    self._AddNewNodeToTestData(
>> +        new_node_name, new_node_uuid, new_node_key,
>> +        is_potential_master_candidate, is_master_candidate,
>> +        is_master)
>> +
>> +    backend.AddNodeSshKey(new_node_uuid, new_node_name,
>> +                          self._potential_master_candidates,
>> +                          self._ssh_port_map,
>> +                          to_authorized_keys=is_master_candidate,
>> +                          to_public_keys=is_potential_master_candidate,
>> +                          get_public_keys=is_potential_master_candidate,
>> +                          pub_key_file=self._pub_key_file,
>> +                          ssconf_store=self._ssconf_mock,
>> +                          noded_cert_file=self.noded_cert_file,
>> +                          run_cmd_fn=self._run_cmd_mock)
>> +
>> +    self.assertTrue(self._ssh_file_manager.AllNodesHaveAuthorizedKey(
>> +        new_node_key))
>> +
>> +  def testAddKeyFailedOnOldNodeWithRetries(self):
>> +    """Tests adding keys when updating one node's SSH setup fails.
>> +
>> +    This tests, whether when adding a new node's key and one node is
>>
>
> Nit: s/,//
>
>
>> +    unreachable (but not marked as offline) the operation still finishes
>> +    properly and only that unreachable node's SSH key setup did not get
>> +    updated.
>> +
>> +    """
>> +    new_node_name = "new_node_name"
>> +    new_node_uuid = "new_node_uuid"
>> +    new_node_key = "new_node_key"
>> +    is_master_candidate = True
>> +    is_potential_master_candidate = True
>> +    is_master = False
>> +
>> +    other_node_name, _, _, _, _, _ = self._ssh_file_manager \
>> +        .GetAllMasterCandidates()[0]
>> +    self._ssh_file_manager.SetMaxRetries(other_node_name, 4)
>> +    assert other_node_name != new_node_name
>> +    self._AddNewNodeToTestData(
>> +        new_node_name, new_node_uuid, new_node_key,
>> +        is_potential_master_candidate, is_master_candidate,
>> +        is_master)
>> +
>> +    node_errors = backend.AddNodeSshKey(
>> +        new_node_uuid, new_node_name, self._potential_master_candidates,
>> +        self._ssh_port_map, to_authorized_keys=is_master_candidate,
>> +        to_public_keys=is_potential_master_candidate,
>> +        get_public_keys=is_potential_master_candidate,
>> +        pub_key_file=self._pub_key_file,
>> +        ssconf_store=self._ssconf_mock,
>> +        noded_cert_file=self.noded_cert_file,
>> +        run_cmd_fn=self._run_cmd_mock)
>> +
>> +    for node in self._all_nodes:
>> +      if node == other_node_name:
>> +        self.assertFalse(self._ssh_file_manager.NodeHasAuthorizedKey(
>> +          node, new_node_key))
>> +      else:
>> +        self.assertTrue(self._ssh_file_manager.NodeHasAuthorizedKey(
>> +          node, new_node_key))
>> +
>> +    self.assertTrue(node_errors[other_node_name])
>> +
>>
>>  class TestVerifySshSetup(testutils.GanetiTestCase):
>>
>> --
>> 2.2.0.rc0.207.ga3a616c
>>
>>
Interdiff:

commit 49ced9fa7adefd1865ca9933d0a5bb5252fc1726
Author: Helga Velroyen <[email protected]>
Date:   Thu Apr 23 11:48:40 2015 +0200

    interdiff

diff --git a/lib/backend.py b/lib/backend.py
index 5a09efa..6a17edb 100644
--- a/lib/backend.py
+++ b/lib/backend.py
@@ -1564,7 +1564,7 @@ def AddNodeSshKey(node_uuid, node_name,
   master_node = ssconf_store.GetMasterNode()
   online_nodes = ssconf_store.GetOnlineNodeList()

-  node_errors = {}
+  node_errors = []
   for node in all_nodes:
     if node == master_node:
       logging.debug("Skipping master node '%s'.", master_node)
@@ -1589,10 +1589,10 @@ def AddNodeSshKey(node_uuid, node_name,
         if last_exception:
           error_msg = ("When adding the key of node '%s', updating SSH key"
                        " files of node '%s' failed after %s retries."
-                       " Not trying again. Last errors was: %s." %
+                       " Not trying again. Last error was: %s." %
                        (node, node_name, constants.SSHS_MAX_RETRIES,
                         last_exception))
-          node_errors[node] = error_msg
+          node_errors.append((node, error_msg))
           # We only log the error and don't throw an exception, because
           # one unreachable node shall not abort the entire procedure.
           logging.error(error_msg)
@@ -2024,7 +2024,7 @@ def RenewSshKeys(node_uuids, node_names, ssh_port_map,
         noded_cert_file=noded_cert_file,
         run_cmd_fn=run_cmd_fn)
     if node_errors:
-      all_node_errors = all_node_errors + node_errors.items()
+      all_node_errors = all_node_errors + node_errors

   # Renewing the master node's key

@@ -2056,7 +2056,7 @@ def RenewSshKeys(node_uuids, node_names, ssh_port_map,
       ssconf_store=ssconf_store, noded_cert_file=noded_cert_file,
       run_cmd_fn=run_cmd_fn)
   if node_errors:
-    all_node_errors = all_node_errors + node_errors.items()
+    all_node_errors = all_node_errors + node_errors

   # Remove the old key file and rename the new key to the non-temporary
filename
   _ReplaceMasterKeyOnMaster(root_keyfiles)
diff --git a/lib/cmdlib/cluster.py b/lib/cmdlib/cluster.py
index 14c18da..b56ca61 100644
--- a/lib/cmdlib/cluster.py
+++ b/lib/cmdlib/cluster.py
@@ -68,7 +68,8 @@ from ganeti.cmdlib.common import ShareAll, RunPostHook, \
   CheckDiskAccessModeConsistency, CreateNewClientCert, \
   AddInstanceCommunicationNetworkOp,
ConnectInstanceCommunicationNetworkOp, \
   CheckImageValidity, \
-  CheckDiskAccessModeConsistency, CreateNewClientCert, EnsureKvmdOnNodes
+  CheckDiskAccessModeConsistency, CreateNewClientCert, EnsureKvmdOnNodes, \
+  EvaluateSshUpdateRPC

 import ganeti.masterd.instance

@@ -224,17 +225,19 @@ class LUClusterRenewCrypto(NoHooksLU):
     port_map = ssh.GetSshPortMap(node_names, self.cfg)
     potential_master_candidates = self.cfg.GetPotentialMasterCandidates()
     master_candidate_uuids = self.cfg.GetMasterCandidateUuids()
+
     result = self.rpc.call_node_ssh_keys_renew(
       [master_uuid],
       node_uuids, node_names, port_map,
       master_candidate_uuids,
       potential_master_candidates)
+
+    # Check if there were serious errors (for example master key files not
+    # writable).
     result[master_uuid].Raise("Could not renew the SSH keys of all nodes")
-    node_errors = result[master_uuid].payload
-    if node_errors:
-      feedback_fn("Some nodes' SSH key files could not be updated:")
-      for node_name, error_msg in node_errors:
-        feedback_fn("%s: %s" % (node_name, error_msg))
+
+    # Process any non-disruptive errors (a few nodes unreachable etc.)
+    EvaluateSshUpdateRPC(result, master_uuid, feedback_fn)

   def Exec(self, feedback_fn):
     if self.op.node_certificates:
diff --git a/lib/cmdlib/common.py b/lib/cmdlib/common.py
index 21948af..5e5f573 100644
--- a/lib/cmdlib/common.py
+++ b/lib/cmdlib/common.py
@@ -1554,3 +1554,11 @@ def EnsureKvmdOnNodes(lu, feedback_fn, nodes=None):
     for node_uuid in stop_nodes:
       results[node_uuid].Warn("Failed to stop KVM daemon in node '%s'" %
                               node_uuid, feedback_fn)
+
+
+def EvaluateSshUpdateRPC(result, master_uuid, feedback_fn):
+  node_errors = result[master_uuid].payload
+  if node_errors:
+    feedback_fn("Some nodes' SSH key files could not be updated:")
+    for node_name, error_msg in node_errors:
+      feedback_fn("%s: %s" % (node_name, error_msg))
diff --git a/lib/cmdlib/node.py b/lib/cmdlib/node.py
index 6a13d8a..c90965e 100644
--- a/lib/cmdlib/node.py
+++ b/lib/cmdlib/node.py
@@ -53,7 +53,7 @@ from ganeti.cmdlib.common import CheckParamsNotGlobal, \
   GetWantedNodes, MapInstanceLvsToNodes, RunPostHook, \
   FindFaultyInstanceDisks, CheckStorageTypeEnabled, CreateNewClientCert, \
   AddNodeCertToCandidateCerts, RemoveNodeCertFromCandidateCerts, \
-  EnsureKvmdOnNodes
+  EnsureKvmdOnNodes, EvaluateSshUpdateRPC
 from ganeti.ssh import GetSshPortMap


@@ -372,12 +372,9 @@ class LUNodeAdd(LogicalUnit):
       potential_master_candidates, port_map,
       is_master_candidate, is_potential_master_candidate,
       is_potential_master_candidate)
+
     result[master_node].Raise("Could not update the node's SSH setup.")
-    node_errors = result[master_node].payload
-    if node_errors:
-      feedback_fn("Some nodes' SSH key files could not be updated:")
-      for node_name, error_msg in node_errors.items():
-        feedback_fn("%s: %s." % (node_name, error_msg))
+    EvaluateSshUpdateRPC(result, master_node, feedback_fn)

   def Exec(self, feedback_fn):
     """Adds the new node to the cluster.
@@ -902,11 +899,7 @@ class LUNodeSetParams(LogicalUnit):
           ssh_result[master_node].Raise(
             "Could not update the SSH setup of node '%s' after promotion"
             " (UUID: %s)." % (node.name, node.uuid))
-          node_errors = ssh_result[master_node].payload
-          if node_errors:
-            feedback_fn("Some nodes' SSH key files could not be updated:")
-            for node_name, error_msg in node_errors.items():
-              feedback_fn("%s: %s." % (node_name, error_msg))
+          EvaluateSshUpdateRPC(ssh_result, master_node, feedback_fn)

     return result

diff --git a/test/py/ganeti.backend_unittest.py b/test/py/
ganeti.backend_unittest.py
index ec61f07..77b4f46 100755
--- a/test/py/ganeti.backend_unittest.py
+++ b/test/py/ganeti.backend_unittest.py
@@ -1404,8 +1404,8 @@ class
TestAddRemoveGenerateNodeSshKey(testutils.GanetiTestCase):
     """Tests adding a new node's key when updating that node takes retries.

     This test checks whether adding a new node's key successfully updates
-    all node's SSH key files, even if updating the new node's key files
itself
-    takes a couple of retries to succeed.
+    the SSH key files of all nodes, even if updating the new node's key
files
+    itself takes a couple of retries to succeed.

     """
     new_node_name = "new_node_name"
@@ -1441,8 +1441,8 @@ class
TestAddRemoveGenerateNodeSshKey(testutils.GanetiTestCase):
   def testAddKeyFailedOnNewNodeWithRetries(self):
     """Tests clean up if updating a new node's SSH setup fails.

-    If adding a new node's keys fails, because the SSH key files of that
-    new node fails, check whether already carried out operations are
+    If adding the keys of a new node fails, because updating the SSH key
files
+    of that new node fails, check whether already carried out operations
are
     successfully rolled back and thus the state of the cluster is cleaned
up.

     """
@@ -1524,7 +1524,7 @@ class
TestAddRemoveGenerateNodeSshKey(testutils.GanetiTestCase):
   def testAddKeyFailedOnOldNodeWithRetries(self):
     """Tests adding keys when updating one node's SSH setup fails.

-    This tests, whether when adding a new node's key and one node is
+    This tests whether when adding a new node's key and one node is
     unreachable (but not marked as offline) the operation still finishes
     properly and only that unreachable node's SSH key setup did not get
     updated.
@@ -1565,7 +1565,8 @@ class
TestAddRemoveGenerateNodeSshKey(testutils.GanetiTestCase):
         self.assertTrue(self._ssh_file_manager.NodeHasAuthorizedKey(
           node, new_node_key))

-    self.assertTrue(node_errors[other_node_name])
+    self.assertTrue([error_msg for (node, error_msg) in node_errors
+                     if node == other_node_name])


 class TestVerifySshSetup(testutils.GanetiTestCase):


>
> Hrvoje Ribicic
> Ganeti Engineering
> Google Germany GmbH
> Dienerstr. 12, 80331, München
>
> Registergericht und -nummer: Hamburg, HRB 86891
> Sitz der Gesellschaft: Hamburg
> Geschäftsführer: Graham Law, Christine Elizabeth Flores
> Steuernummer: 48/725/00206
> Umsatzsteueridentifikationsnummer: DE813741370
>

Reply via email to