Interdiff for the max-retry constant, introduced in patch 8:
diff --git a/lib/backend.py b/lib/backend.py
index 27781e1..66bd147 100644
--- a/lib/backend.py
+++ b/lib/backend.py
@@ -1812,7 +1812,7 @@ def RemoveNodeSshKey(node_uuid, node_name,
return
last_exception = None
- for i in range(max_retries):
+ for i in range(constants.SSHS_MAX_RETRIES):
try:
run_cmd_fn(cluster_name, node_name, pathutils.SSH_UPDATE,
ssh_port, data,
@@ -1828,7 +1828,7 @@ def RemoveNodeSshKey(node_uuid, node_name,
result_msgs[node_name] = \
("Removing SSH keys from node '%s' failed after %s retries."
" This can happen when the node is already unreachable."
- " Error: %s" % (node_name, max_retries, e))
+ " Error: %s" % (node_name, constants.SSHS_MAX_RETRIES, e))
return result_msgs
diff --git a/test/py/ganeti.backend_unittest.py b/test/py/
ganeti.backend_unittest.py
index 2cdb613..50bb7dd 100755
--- a/test/py/ganeti.backend_unittest.py
+++ b/test/py/ganeti.backend_unittest.py
@@ -1614,7 +1614,8 @@ class
TestAddRemoveGenerateNodeSshKey(testutils.GanetiTestCase):
all_master_candidates = self._ssh_file_manager.GetAllMasterCandidates()
node_name, node_uuid, node_key, _, _, _ = all_master_candidates[0]
assert node_name != self._master_node
- self._ssh_file_manager.SetMaxRetries(node_name, 3)
+ self._ssh_file_manager.SetMaxRetries(
+ node_name, constants.SSH_MAX_RETRIES)
backend.RemoveNodeSshKey(node_uuid, node_name,
self._master_candidate_uuids,
@@ -1643,7 +1644,8 @@ class
TestAddRemoveGenerateNodeSshKey(testutils.GanetiTestCase):
all_master_candidates = self._ssh_file_manager.GetAllMasterCandidates()
node_name, node_uuid, node_key, _, _, _ = all_master_candidates[0]
assert node_name != self._master_node
- self._ssh_file_manager.SetMaxRetries(node_name, 4)
+ self._ssh_file_manager.SetMaxRetries(
+ node_name, constants.SSHS_MAX_RETRIES + 1)
error_msgs = backend.RemoveNodeSshKey(
node_uuid, node_name, self._master_candidate_uuids,
On Fri, 17 Apr 2015 at 16:37 Helga Velroyen <[email protected]> wrote:
> This patch adds retries to the part of 'NodeSshRemoveKey',
> where the target node itself is updated. As this is likely
> to fail if the node is for example already offline, we
> do not fail the entire operation, but emit a non-disruptive
> error which can be showing as feedback by the LU.
>
> Signed-off-by: Helga Velroyen <[email protected]>
> ---
> lib/backend.py | 28 +++++++++++-------
> test/py/ganeti.backend_unittest.py | 59
> ++++++++++++++++++++++++++++++++++++++
> 2 files changed, 77 insertions(+), 10 deletions(-)
>
> diff --git a/lib/backend.py b/lib/backend.py
> index c3190d2..7b7b5e5 100644
> --- a/lib/backend.py
> +++ b/lib/backend.py
> @@ -1814,16 +1814,24 @@ def RemoveNodeSshKey(node_uuid, node_name,
> constants.SSHS_SSH_AUTHORIZED_KEYS in data):
> return
>
> - try:
> - run_cmd_fn(cluster_name, node_name, pathutils.SSH_UPDATE,
> - ssh_port, data,
> - debug=False, verbose=False, use_cluster_key=False,
> - ask_key=False, strict_host_check=False)
> - except errors.OpExecError as e:
> - result_msgs[node_name] = \
> - ("Removing SSH keys from node '%s' failed. This can"
> - " happen when the node is already unreachable."
> - " Error: %s" % (node_name, e))
> + last_exception = None
> + for i in range(max_retries):
> + try:
> + run_cmd_fn(cluster_name, node_name, pathutils.SSH_UPDATE,
> + ssh_port, data,
> + debug=False, verbose=False, use_cluster_key=False,
> + ask_key=False, strict_host_check=False)
> + except errors.OpExecError as e:
> + logging.error("Updating the SSH key files of node '%s', whose key"
> + " itself is being removed failed with try no %s."
> + " Error: %s", node_name, i, e)
> + last_exception = e
> + else:
> + if last_exception:
> + result_msgs[node_name] = \
> + ("Removing SSH keys from node '%s' failed after %s retries."
> + " This can happen when the node is already unreachable."
> + " Error: %s" % (node_name, max_retries, e))
>
> return result_msgs
>
> diff --git a/test/py/ganeti.backend_unittest.py b/test/py/
> ganeti.backend_unittest.py
> index 73db0a9..cfeaf93 100755
> --- a/test/py/ganeti.backend_unittest.py
> +++ b/test/py/ganeti.backend_unittest.py
> @@ -1598,6 +1598,65 @@ class
> TestAddRemoveGenerateNodeSshKey(testutils.GanetiTestCase):
> node, node_key))
> self.assertTrue(error_msgs[other_node_name])
>
> + def testRemoveKeySuccessfullyWithRetriesOnTargetNode(self):
> + """Test removing keys even if the target nodes needs retries.
> +
> + This tests checks whether a key can be removed successfully even
> + when removing the key on the node itself needs retries.
> +
> + """
> + all_master_candidates =
> self._ssh_file_manager.GetAllMasterCandidates()
> + node_name, node_uuid, node_key, _, _, _ = all_master_candidates[0]
> + assert node_name != self._master_node
> + self._ssh_file_manager.SetMaxRetries(node_name, 3)
> +
> + backend.RemoveNodeSshKey(node_uuid, node_name,
> + self._master_candidate_uuids,
> + self._potential_master_candidates,
> + self._ssh_port_map,
> + from_authorized_keys=True,
> + from_public_keys=True,
> + clear_authorized_keys=True,
> + clear_public_keys=True,
> + pub_key_file=self._pub_key_file,
> + ssconf_store=self._ssconf_mock,
> + noded_cert_file=self.noded_cert_file,
> + run_cmd_fn=self._run_cmd_mock)
> +
> + self._ssh_file_manager.AssertNoNodeHasPublicKey(node_uuid, node_key)
> + self._ssh_file_manager.AssertNoNodeHasAuthorizedKey(node_key)
> +
> + def testRemoveKeyFailedWithRetriesOnTargetNode(self):
> + """Test removing keys even if contacting the node fails with retries.
> +
> + This tests checks whether the removal of a key finishes properly,
> even if
> + the update of the key files on the node itself fails despite several
> + retries.
> +
> + """
> + all_master_candidates =
> self._ssh_file_manager.GetAllMasterCandidates()
> + node_name, node_uuid, node_key, _, _, _ = all_master_candidates[0]
> + assert node_name != self._master_node
> + self._ssh_file_manager.SetMaxRetries(node_name, 4)
> +
> + error_msgs = backend.RemoveNodeSshKey(
> + node_uuid, node_name, self._master_candidate_uuids,
> + self._potential_master_candidates, self._ssh_port_map,
> + from_authorized_keys=True, from_public_keys=True,
> + clear_authorized_keys=True, clear_public_keys=True,
> + pub_key_file=self._pub_key_file, ssconf_store=self._ssconf_mock,
> + noded_cert_file=self.noded_cert_file,
> run_cmd_fn=self._run_cmd_mock)
> +
> + for node in self._all_nodes:
> + if node == node_name:
> + self.assertTrue(self._ssh_file_manager.NodeHasAuthorizedKey(
> + node, node_key))
> + else:
> + self.assertFalse(self._ssh_file_manager.NodeHasAuthorizedKey(
> + node, node_key))
> + self.assertTrue(error_msgs[node_name])
> +
> +
> class TestVerifySshSetup(testutils.GanetiTestCase):
>
> _NODE1_UUID = "uuid1"
> --
> 2.2.0.rc0.207.ga3a616c
>
>