Interdiff for the max-retry constant, introduced in patch 8:

diff --git a/lib/backend.py b/lib/backend.py
index 27781e1..66bd147 100644
--- a/lib/backend.py
+++ b/lib/backend.py
@@ -1812,7 +1812,7 @@ def RemoveNodeSshKey(node_uuid, node_name,
       return

     last_exception = None
-    for i in range(max_retries):
+    for i in range(constants.SSHS_MAX_RETRIES):
       try:
         run_cmd_fn(cluster_name, node_name, pathutils.SSH_UPDATE,
                    ssh_port, data,
@@ -1828,7 +1828,7 @@ def RemoveNodeSshKey(node_uuid, node_name,
         result_msgs[node_name] = \
             ("Removing SSH keys from node '%s' failed after %s retries."
              " This can happen when the node is already unreachable."
-             " Error: %s" % (node_name, max_retries, e))
+             " Error: %s" % (node_name, constants.SSHS_MAX_RETRIES, e))

   return result_msgs

diff --git a/test/py/ganeti.backend_unittest.py b/test/py/
ganeti.backend_unittest.py
index 2cdb613..50bb7dd 100755
--- a/test/py/ganeti.backend_unittest.py
+++ b/test/py/ganeti.backend_unittest.py
@@ -1614,7 +1614,8 @@ class
TestAddRemoveGenerateNodeSshKey(testutils.GanetiTestCase):
     all_master_candidates = self._ssh_file_manager.GetAllMasterCandidates()
     node_name, node_uuid, node_key, _, _, _ = all_master_candidates[0]
     assert node_name != self._master_node
-    self._ssh_file_manager.SetMaxRetries(node_name, 3)
+    self._ssh_file_manager.SetMaxRetries(
+        node_name, constants.SSH_MAX_RETRIES)

     backend.RemoveNodeSshKey(node_uuid, node_name,
                              self._master_candidate_uuids,
@@ -1643,7 +1644,8 @@ class
TestAddRemoveGenerateNodeSshKey(testutils.GanetiTestCase):
     all_master_candidates = self._ssh_file_manager.GetAllMasterCandidates()
     node_name, node_uuid, node_key, _, _, _ = all_master_candidates[0]
     assert node_name != self._master_node
-    self._ssh_file_manager.SetMaxRetries(node_name, 4)
+    self._ssh_file_manager.SetMaxRetries(
+        node_name, constants.SSHS_MAX_RETRIES + 1)

     error_msgs = backend.RemoveNodeSshKey(
         node_uuid, node_name, self._master_candidate_uuids,


On Fri, 17 Apr 2015 at 16:37 Helga Velroyen <[email protected]> wrote:

> This patch adds retries to the part of 'NodeSshRemoveKey',
> where the target node itself is updated. As this is likely
> to fail if the node is for example already offline, we
> do not fail the entire operation, but emit a non-disruptive
> error which can be showing as feedback by the LU.
>
> Signed-off-by: Helga Velroyen <[email protected]>
> ---
>  lib/backend.py                     | 28 +++++++++++-------
>  test/py/ganeti.backend_unittest.py | 59
> ++++++++++++++++++++++++++++++++++++++
>  2 files changed, 77 insertions(+), 10 deletions(-)
>
> diff --git a/lib/backend.py b/lib/backend.py
> index c3190d2..7b7b5e5 100644
> --- a/lib/backend.py
> +++ b/lib/backend.py
> @@ -1814,16 +1814,24 @@ def RemoveNodeSshKey(node_uuid, node_name,
>              constants.SSHS_SSH_AUTHORIZED_KEYS in data):
>        return
>
> -    try:
> -      run_cmd_fn(cluster_name, node_name, pathutils.SSH_UPDATE,
> -                 ssh_port, data,
> -                 debug=False, verbose=False, use_cluster_key=False,
> -                 ask_key=False, strict_host_check=False)
> -    except errors.OpExecError as e:
> -      result_msgs[node_name] = \
> -          ("Removing SSH keys from node '%s' failed. This can"
> -           " happen when the node is already unreachable."
> -           " Error: %s" % (node_name, e))
> +    last_exception = None
> +    for i in range(max_retries):
> +      try:
> +        run_cmd_fn(cluster_name, node_name, pathutils.SSH_UPDATE,
> +                   ssh_port, data,
> +                   debug=False, verbose=False, use_cluster_key=False,
> +                   ask_key=False, strict_host_check=False)
> +      except errors.OpExecError as e:
> +        logging.error("Updating the SSH key files of node '%s', whose key"
> +                      " itself is being removed failed with try no %s."
> +                      " Error: %s", node_name, i, e)
> +        last_exception = e
> +    else:
> +      if last_exception:
> +        result_msgs[node_name] = \
> +            ("Removing SSH keys from node '%s' failed after %s retries."
> +             " This can happen when the node is already unreachable."
> +             " Error: %s" % (node_name, max_retries, e))
>
>    return result_msgs
>
> diff --git a/test/py/ganeti.backend_unittest.py b/test/py/
> ganeti.backend_unittest.py
> index 73db0a9..cfeaf93 100755
> --- a/test/py/ganeti.backend_unittest.py
> +++ b/test/py/ganeti.backend_unittest.py
> @@ -1598,6 +1598,65 @@ class
> TestAddRemoveGenerateNodeSshKey(testutils.GanetiTestCase):
>              node, node_key))
>      self.assertTrue(error_msgs[other_node_name])
>
> +  def testRemoveKeySuccessfullyWithRetriesOnTargetNode(self):
> +    """Test removing keys even if the target nodes needs retries.
> +
> +    This tests checks whether a key can be removed successfully even
> +    when removing the key on the node itself needs retries.
> +
> +    """
> +    all_master_candidates =
> self._ssh_file_manager.GetAllMasterCandidates()
> +    node_name, node_uuid, node_key, _, _, _ = all_master_candidates[0]
> +    assert node_name != self._master_node
> +    self._ssh_file_manager.SetMaxRetries(node_name, 3)
> +
> +    backend.RemoveNodeSshKey(node_uuid, node_name,
> +                             self._master_candidate_uuids,
> +                             self._potential_master_candidates,
> +                             self._ssh_port_map,
> +                             from_authorized_keys=True,
> +                             from_public_keys=True,
> +                             clear_authorized_keys=True,
> +                             clear_public_keys=True,
> +                             pub_key_file=self._pub_key_file,
> +                             ssconf_store=self._ssconf_mock,
> +                             noded_cert_file=self.noded_cert_file,
> +                             run_cmd_fn=self._run_cmd_mock)
> +
> +    self._ssh_file_manager.AssertNoNodeHasPublicKey(node_uuid, node_key)
> +    self._ssh_file_manager.AssertNoNodeHasAuthorizedKey(node_key)
> +
> +  def testRemoveKeyFailedWithRetriesOnTargetNode(self):
> +    """Test removing keys even if contacting the node fails with retries.
> +
> +    This tests checks whether the removal of a key finishes properly,
> even if
> +    the update of the key files on the node itself fails despite several
> +    retries.
> +
> +    """
> +    all_master_candidates =
> self._ssh_file_manager.GetAllMasterCandidates()
> +    node_name, node_uuid, node_key, _, _, _ = all_master_candidates[0]
> +    assert node_name != self._master_node
> +    self._ssh_file_manager.SetMaxRetries(node_name, 4)
> +
> +    error_msgs = backend.RemoveNodeSshKey(
> +        node_uuid, node_name, self._master_candidate_uuids,
> +        self._potential_master_candidates, self._ssh_port_map,
> +        from_authorized_keys=True, from_public_keys=True,
> +        clear_authorized_keys=True, clear_public_keys=True,
> +        pub_key_file=self._pub_key_file, ssconf_store=self._ssconf_mock,
> +        noded_cert_file=self.noded_cert_file,
> run_cmd_fn=self._run_cmd_mock)
> +
> +    for node in self._all_nodes:
> +      if node == node_name:
> +        self.assertTrue(self._ssh_file_manager.NodeHasAuthorizedKey(
> +            node, node_key))
> +      else:
> +        self.assertFalse(self._ssh_file_manager.NodeHasAuthorizedKey(
> +            node, node_key))
> +    self.assertTrue(error_msgs[node_name])
> +
> +
>  class TestVerifySshSetup(testutils.GanetiTestCase):
>
>    _NODE1_UUID = "uuid1"
> --
> 2.2.0.rc0.207.ga3a616c
>
>

Reply via email to