Missed a spot. Small interdiff to that interdiff:
diff --git a/test/py/ganeti.backend_unittest.py b/test/py/
ganeti.backend_unittest.py
index 50bb7dd..1f9021b 100755
--- a/test/py/ganeti.backend_unittest.py
+++ b/test/py/ganeti.backend_unittest.py
@@ -1615,7 +1615,7 @@ class
TestAddRemoveGenerateNodeSshKey(testutils.GanetiTestCase):
node_name, node_uuid, node_key, _, _, _ = all_master_candidates[0]
assert node_name != self._master_node
self._ssh_file_manager.SetMaxRetries(
- node_name, constants.SSH_MAX_RETRIES)
+ node_name, constants.SSHS_MAX_RETRIES)
backend.RemoveNodeSshKey(node_uuid, node_name,
self._master_candidate_uuids,
On Tue, 21 Apr 2015 at 16:49 Helga Velroyen <[email protected]> wrote:
> Interdiff for the max-retry constant, introduced in patch 8:
>
> diff --git a/lib/backend.py b/lib/backend.py
> index 27781e1..66bd147 100644
> --- a/lib/backend.py
> +++ b/lib/backend.py
> @@ -1812,7 +1812,7 @@ def RemoveNodeSshKey(node_uuid, node_name,
> return
>
> last_exception = None
> - for i in range(max_retries):
> + for i in range(constants.SSHS_MAX_RETRIES):
> try:
> run_cmd_fn(cluster_name, node_name, pathutils.SSH_UPDATE,
> ssh_port, data,
> @@ -1828,7 +1828,7 @@ def RemoveNodeSshKey(node_uuid, node_name,
> result_msgs[node_name] = \
> ("Removing SSH keys from node '%s' failed after %s retries."
> " This can happen when the node is already unreachable."
> - " Error: %s" % (node_name, max_retries, e))
> + " Error: %s" % (node_name, constants.SSHS_MAX_RETRIES, e))
>
> return result_msgs
>
> diff --git a/test/py/ganeti.backend_unittest.py b/test/py/
> ganeti.backend_unittest.py
> index 2cdb613..50bb7dd 100755
> --- a/test/py/ganeti.backend_unittest.py
> +++ b/test/py/ganeti.backend_unittest.py
> @@ -1614,7 +1614,8 @@ class
> TestAddRemoveGenerateNodeSshKey(testutils.GanetiTestCase):
> all_master_candidates =
> self._ssh_file_manager.GetAllMasterCandidates()
> node_name, node_uuid, node_key, _, _, _ = all_master_candidates[0]
> assert node_name != self._master_node
> - self._ssh_file_manager.SetMaxRetries(node_name, 3)
> + self._ssh_file_manager.SetMaxRetries(
> + node_name, constants.SSH_MAX_RETRIES)
>
> backend.RemoveNodeSshKey(node_uuid, node_name,
> self._master_candidate_uuids,
> @@ -1643,7 +1644,8 @@ class
> TestAddRemoveGenerateNodeSshKey(testutils.GanetiTestCase):
> all_master_candidates =
> self._ssh_file_manager.GetAllMasterCandidates()
> node_name, node_uuid, node_key, _, _, _ = all_master_candidates[0]
> assert node_name != self._master_node
> - self._ssh_file_manager.SetMaxRetries(node_name, 4)
> + self._ssh_file_manager.SetMaxRetries(
> + node_name, constants.SSHS_MAX_RETRIES + 1)
>
> error_msgs = backend.RemoveNodeSshKey(
> node_uuid, node_name, self._master_candidate_uuids,
>
>
> On Fri, 17 Apr 2015 at 16:37 Helga Velroyen <[email protected]> wrote:
>
>> This patch adds retries to the part of 'NodeSshRemoveKey',
>> where the target node itself is updated. As this is likely
>> to fail if the node is for example already offline, we
>> do not fail the entire operation, but emit a non-disruptive
>> error which can be showing as feedback by the LU.
>>
>> Signed-off-by: Helga Velroyen <[email protected]>
>> ---
>> lib/backend.py | 28 +++++++++++-------
>> test/py/ganeti.backend_unittest.py | 59
>> ++++++++++++++++++++++++++++++++++++++
>> 2 files changed, 77 insertions(+), 10 deletions(-)
>>
>> diff --git a/lib/backend.py b/lib/backend.py
>> index c3190d2..7b7b5e5 100644
>> --- a/lib/backend.py
>> +++ b/lib/backend.py
>> @@ -1814,16 +1814,24 @@ def RemoveNodeSshKey(node_uuid, node_name,
>> constants.SSHS_SSH_AUTHORIZED_KEYS in data):
>> return
>>
>> - try:
>> - run_cmd_fn(cluster_name, node_name, pathutils.SSH_UPDATE,
>> - ssh_port, data,
>> - debug=False, verbose=False, use_cluster_key=False,
>> - ask_key=False, strict_host_check=False)
>> - except errors.OpExecError as e:
>> - result_msgs[node_name] = \
>> - ("Removing SSH keys from node '%s' failed. This can"
>> - " happen when the node is already unreachable."
>> - " Error: %s" % (node_name, e))
>> + last_exception = None
>> + for i in range(max_retries):
>> + try:
>> + run_cmd_fn(cluster_name, node_name, pathutils.SSH_UPDATE,
>> + ssh_port, data,
>> + debug=False, verbose=False, use_cluster_key=False,
>> + ask_key=False, strict_host_check=False)
>> + except errors.OpExecError as e:
>> + logging.error("Updating the SSH key files of node '%s', whose
>> key"
>> + " itself is being removed failed with try no %s."
>> + " Error: %s", node_name, i, e)
>> + last_exception = e
>> + else:
>> + if last_exception:
>> + result_msgs[node_name] = \
>> + ("Removing SSH keys from node '%s' failed after %s retries."
>> + " This can happen when the node is already unreachable."
>> + " Error: %s" % (node_name, max_retries, e))
>>
>> return result_msgs
>>
>> diff --git a/test/py/ganeti.backend_unittest.py b/test/py/
>> ganeti.backend_unittest.py
>> index 73db0a9..cfeaf93 100755
>> --- a/test/py/ganeti.backend_unittest.py
>> +++ b/test/py/ganeti.backend_unittest.py
>> @@ -1598,6 +1598,65 @@ class
>> TestAddRemoveGenerateNodeSshKey(testutils.GanetiTestCase):
>> node, node_key))
>> self.assertTrue(error_msgs[other_node_name])
>>
>> + def testRemoveKeySuccessfullyWithRetriesOnTargetNode(self):
>> + """Test removing keys even if the target nodes needs retries.
>> +
>> + This tests checks whether a key can be removed successfully even
>> + when removing the key on the node itself needs retries.
>> +
>> + """
>> + all_master_candidates =
>> self._ssh_file_manager.GetAllMasterCandidates()
>> + node_name, node_uuid, node_key, _, _, _ = all_master_candidates[0]
>> + assert node_name != self._master_node
>> + self._ssh_file_manager.SetMaxRetries(node_name, 3)
>> +
>> + backend.RemoveNodeSshKey(node_uuid, node_name,
>> + self._master_candidate_uuids,
>> + self._potential_master_candidates,
>> + self._ssh_port_map,
>> + from_authorized_keys=True,
>> + from_public_keys=True,
>> + clear_authorized_keys=True,
>> + clear_public_keys=True,
>> + pub_key_file=self._pub_key_file,
>> + ssconf_store=self._ssconf_mock,
>> + noded_cert_file=self.noded_cert_file,
>> + run_cmd_fn=self._run_cmd_mock)
>> +
>> + self._ssh_file_manager.AssertNoNodeHasPublicKey(node_uuid, node_key)
>> + self._ssh_file_manager.AssertNoNodeHasAuthorizedKey(node_key)
>> +
>> + def testRemoveKeyFailedWithRetriesOnTargetNode(self):
>> + """Test removing keys even if contacting the node fails with retries.
>> +
>> + This tests checks whether the removal of a key finishes properly,
>> even if
>> + the update of the key files on the node itself fails despite several
>> + retries.
>> +
>> + """
>> + all_master_candidates =
>> self._ssh_file_manager.GetAllMasterCandidates()
>> + node_name, node_uuid, node_key, _, _, _ = all_master_candidates[0]
>> + assert node_name != self._master_node
>> + self._ssh_file_manager.SetMaxRetries(node_name, 4)
>> +
>> + error_msgs = backend.RemoveNodeSshKey(
>> + node_uuid, node_name, self._master_candidate_uuids,
>> + self._potential_master_candidates, self._ssh_port_map,
>> + from_authorized_keys=True, from_public_keys=True,
>> + clear_authorized_keys=True, clear_public_keys=True,
>> + pub_key_file=self._pub_key_file, ssconf_store=self._ssconf_mock,
>> + noded_cert_file=self.noded_cert_file,
>> run_cmd_fn=self._run_cmd_mock)
>> +
>> + for node in self._all_nodes:
>> + if node == node_name:
>> + self.assertTrue(self._ssh_file_manager.NodeHasAuthorizedKey(
>> + node, node_key))
>> + else:
>> + self.assertFalse(self._ssh_file_manager.NodeHasAuthorizedKey(
>> + node, node_key))
>> + self.assertTrue(error_msgs[node_name])
>> +
>> +
>> class TestVerifySshSetup(testutils.GanetiTestCase):
>>
>> _NODE1_UUID = "uuid1"
>> --
>> 2.2.0.rc0.207.ga3a616c
>>
>>