On Thu, 23 Apr 2015 at 13:31 Hrvoje Ribicic <[email protected]> wrote: > On Thu, Apr 23, 2015 at 1:18 PM, Helga Velroyen <[email protected]> wrote: > >> Interdiff (because the result type of NodeSshRemoveKey changed to make it >> equal to the type of RenewCrypto.) >> >> diff --git a/lib/backend.py b/lib/backend.py >> index 46b44ee..8bfec14 100644 >> --- a/lib/backend.py >> +++ b/lib/backend.py >> @@ -1824,10 +1824,11 @@ def RemoveNodeSshKey(node_uuid, node_name, >> last_exception = e >> else: >> if last_exception: >> - result_msgs[node_name] = \ >> + result_msgs.append( >> + node_name, >> ("Removing SSH keys from node '%s' failed after %s retries." >> " This can happen when the node is already unreachable." >> - " Error: %s" % (node_name, constants.SSHS_MAX_RETRIES, e)) >> + " Error: %s" % (node_name, constants.SSHS_MAX_RETRIES, e))) >> > > Don't you have to put these two in a tuple when appending? >
True. Consider it fixed. > > >> >> return result_msgs >> >> diff --git a/test/py/ganeti.backend_unittest.py b/test/py/ >> ganeti.backend_unittest.py >> index 706b9e2..51b4dd2 100755 >> --- a/test/py/ganeti.backend_unittest.py >> +++ b/test/py/ganeti.backend_unittest.py >> @@ -1664,7 +1664,8 @@ class >> TestAddRemoveGenerateNodeSshKey(testutils.GanetiTestCase): >> else: >> self.assertFalse(self._ssh_file_manager.NodeHasAuthorizedKey( >> node, node_key)) >> - self.assertTrue(error_msgs[node_name]) >> + self.assertTrue([error_msg for (node, error_msg) in error_msgs >> + if node == node_name]) >> >> >> class TestVerifySshSetup(testutils.GanetiTestCase): >> >> >> On Thu, 23 Apr 2015 at 11:42 Hrvoje Ribicic <[email protected]> wrote: >> >>> LGTM, thanks >>> >>> >>> On Wed, Apr 22, 2015 at 1:59 PM, 'Helga Velroyen' via ganeti-devel < >>> [email protected]> wrote: >>> >>>> Missed a spot. Small interdiff to that interdiff: >>>> diff --git a/test/py/ganeti.backend_unittest.py b/test/py/ >>>> ganeti.backend_unittest.py >>>> index 50bb7dd..1f9021b 100755 >>>> --- a/test/py/ganeti.backend_unittest.py >>>> +++ b/test/py/ganeti.backend_unittest.py >>>> @@ -1615,7 +1615,7 @@ class >>>> TestAddRemoveGenerateNodeSshKey(testutils.GanetiTestCase): >>>> node_name, node_uuid, node_key, _, _, _ = all_master_candidates[0] >>>> assert node_name != self._master_node >>>> self._ssh_file_manager.SetMaxRetries( >>>> - node_name, constants.SSH_MAX_RETRIES) >>>> + node_name, constants.SSHS_MAX_RETRIES) >>>> >>>> backend.RemoveNodeSshKey(node_uuid, node_name, >>>> self._master_candidate_uuids, >>>> >>>> >>>> On Tue, 21 Apr 2015 at 16:49 Helga Velroyen <[email protected]> wrote: >>>> >>>>> Interdiff for the max-retry constant, introduced in patch 8: >>>>> >>>>> diff --git a/lib/backend.py b/lib/backend.py >>>>> index 27781e1..66bd147 100644 >>>>> --- a/lib/backend.py >>>>> +++ b/lib/backend.py >>>>> @@ -1812,7 +1812,7 @@ def RemoveNodeSshKey(node_uuid, node_name, >>>>> return >>>>> >>>>> last_exception = None >>>>> - for i in range(max_retries): >>>>> + for i in range(constants.SSHS_MAX_RETRIES): >>>>> try: >>>>> run_cmd_fn(cluster_name, node_name, pathutils.SSH_UPDATE, >>>>> ssh_port, data, >>>>> @@ -1828,7 +1828,7 @@ def RemoveNodeSshKey(node_uuid, node_name, >>>>> result_msgs[node_name] = \ >>>>> ("Removing SSH keys from node '%s' failed after %s >>>>> retries." >>>>> " This can happen when the node is already unreachable." >>>>> - " Error: %s" % (node_name, max_retries, e)) >>>>> + " Error: %s" % (node_name, constants.SSHS_MAX_RETRIES, >>>>> e)) >>>>> >>>>> return result_msgs >>>>> >>>>> diff --git a/test/py/ganeti.backend_unittest.py b/test/py/ >>>>> ganeti.backend_unittest.py >>>>> index 2cdb613..50bb7dd 100755 >>>>> --- a/test/py/ganeti.backend_unittest.py >>>>> +++ b/test/py/ganeti.backend_unittest.py >>>>> @@ -1614,7 +1614,8 @@ class >>>>> TestAddRemoveGenerateNodeSshKey(testutils.GanetiTestCase): >>>>> all_master_candidates = >>>>> self._ssh_file_manager.GetAllMasterCandidates() >>>>> node_name, node_uuid, node_key, _, _, _ = all_master_candidates[0] >>>>> assert node_name != self._master_node >>>>> - self._ssh_file_manager.SetMaxRetries(node_name, 3) >>>>> + self._ssh_file_manager.SetMaxRetries( >>>>> + node_name, constants.SSH_MAX_RETRIES) >>>>> >>>>> backend.RemoveNodeSshKey(node_uuid, node_name, >>>>> self._master_candidate_uuids, >>>>> @@ -1643,7 +1644,8 @@ class >>>>> TestAddRemoveGenerateNodeSshKey(testutils.GanetiTestCase): >>>>> all_master_candidates = >>>>> self._ssh_file_manager.GetAllMasterCandidates() >>>>> node_name, node_uuid, node_key, _, _, _ = all_master_candidates[0] >>>>> assert node_name != self._master_node >>>>> - self._ssh_file_manager.SetMaxRetries(node_name, 4) >>>>> + self._ssh_file_manager.SetMaxRetries( >>>>> + node_name, constants.SSHS_MAX_RETRIES + 1) >>>>> >>>>> error_msgs = backend.RemoveNodeSshKey( >>>>> node_uuid, node_name, self._master_candidate_uuids, >>>>> >>>>> >>>>> On Fri, 17 Apr 2015 at 16:37 Helga Velroyen <[email protected]> wrote: >>>>> >>>>>> This patch adds retries to the part of 'NodeSshRemoveKey', >>>>>> where the target node itself is updated. As this is likely >>>>>> to fail if the node is for example already offline, we >>>>>> do not fail the entire operation, but emit a non-disruptive >>>>>> error which can be showing as feedback by the LU. >>>>>> >>>>>> Signed-off-by: Helga Velroyen <[email protected]> >>>>>> --- >>>>>> lib/backend.py | 28 +++++++++++------- >>>>>> test/py/ganeti.backend_unittest.py | 59 >>>>>> ++++++++++++++++++++++++++++++++++++++ >>>>>> 2 files changed, 77 insertions(+), 10 deletions(-) >>>>>> >>>>>> diff --git a/lib/backend.py b/lib/backend.py >>>>>> index c3190d2..7b7b5e5 100644 >>>>>> --- a/lib/backend.py >>>>>> +++ b/lib/backend.py >>>>>> @@ -1814,16 +1814,24 @@ def RemoveNodeSshKey(node_uuid, node_name, >>>>>> constants.SSHS_SSH_AUTHORIZED_KEYS in data): >>>>>> return >>>>>> >>>>>> - try: >>>>>> - run_cmd_fn(cluster_name, node_name, pathutils.SSH_UPDATE, >>>>>> - ssh_port, data, >>>>>> - debug=False, verbose=False, use_cluster_key=False, >>>>>> - ask_key=False, strict_host_check=False) >>>>>> - except errors.OpExecError as e: >>>>>> - result_msgs[node_name] = \ >>>>>> - ("Removing SSH keys from node '%s' failed. This can" >>>>>> - " happen when the node is already unreachable." >>>>>> - " Error: %s" % (node_name, e)) >>>>>> + last_exception = None >>>>>> + for i in range(max_retries): >>>>>> + try: >>>>>> + run_cmd_fn(cluster_name, node_name, pathutils.SSH_UPDATE, >>>>>> + ssh_port, data, >>>>>> + debug=False, verbose=False, use_cluster_key=False, >>>>>> + ask_key=False, strict_host_check=False) >>>>>> + except errors.OpExecError as e: >>>>>> + logging.error("Updating the SSH key files of node '%s', >>>>>> whose key" >>>>>> + " itself is being removed failed with try no >>>>>> %s." >>>>>> + " Error: %s", node_name, i, e) >>>>>> + last_exception = e >>>>>> + else: >>>>>> + if last_exception: >>>>>> + result_msgs[node_name] = \ >>>>>> + ("Removing SSH keys from node '%s' failed after %s >>>>>> retries." >>>>>> + " This can happen when the node is already unreachable." >>>>>> + " Error: %s" % (node_name, max_retries, e)) >>>>>> >>>>>> return result_msgs >>>>>> >>>>>> diff --git a/test/py/ganeti.backend_unittest.py b/test/py/ >>>>>> ganeti.backend_unittest.py >>>>>> index 73db0a9..cfeaf93 100755 >>>>>> --- a/test/py/ganeti.backend_unittest.py >>>>>> +++ b/test/py/ganeti.backend_unittest.py >>>>>> @@ -1598,6 +1598,65 @@ class >>>>>> TestAddRemoveGenerateNodeSshKey(testutils.GanetiTestCase): >>>>>> node, node_key)) >>>>>> self.assertTrue(error_msgs[other_node_name]) >>>>>> >>>>>> + def testRemoveKeySuccessfullyWithRetriesOnTargetNode(self): >>>>>> + """Test removing keys even if the target nodes needs retries. >>>>>> + >>>>>> + This tests checks whether a key can be removed successfully even >>>>>> + when removing the key on the node itself needs retries. >>>>>> + >>>>>> + """ >>>>>> + all_master_candidates = >>>>>> self._ssh_file_manager.GetAllMasterCandidates() >>>>>> + node_name, node_uuid, node_key, _, _, _ = >>>>>> all_master_candidates[0] >>>>>> + assert node_name != self._master_node >>>>>> + self._ssh_file_manager.SetMaxRetries(node_name, 3) >>>>>> + >>>>>> + backend.RemoveNodeSshKey(node_uuid, node_name, >>>>>> + self._master_candidate_uuids, >>>>>> + self._potential_master_candidates, >>>>>> + self._ssh_port_map, >>>>>> + from_authorized_keys=True, >>>>>> + from_public_keys=True, >>>>>> + clear_authorized_keys=True, >>>>>> + clear_public_keys=True, >>>>>> + pub_key_file=self._pub_key_file, >>>>>> + ssconf_store=self._ssconf_mock, >>>>>> + noded_cert_file=self.noded_cert_file, >>>>>> + run_cmd_fn=self._run_cmd_mock) >>>>>> + >>>>>> + self._ssh_file_manager.AssertNoNodeHasPublicKey(node_uuid, >>>>>> node_key) >>>>>> + self._ssh_file_manager.AssertNoNodeHasAuthorizedKey(node_key) >>>>>> + >>>>>> + def testRemoveKeyFailedWithRetriesOnTargetNode(self): >>>>>> + """Test removing keys even if contacting the node fails with >>>>>> retries. >>>>>> + >>>>>> + This tests checks whether the removal of a key finishes >>>>>> properly, even if >>>>>> + the update of the key files on the node itself fails despite >>>>>> several >>>>>> + retries. >>>>>> + >>>>>> + """ >>>>>> + all_master_candidates = >>>>>> self._ssh_file_manager.GetAllMasterCandidates() >>>>>> + node_name, node_uuid, node_key, _, _, _ = >>>>>> all_master_candidates[0] >>>>>> + assert node_name != self._master_node >>>>>> + self._ssh_file_manager.SetMaxRetries(node_name, 4) >>>>>> + >>>>>> + error_msgs = backend.RemoveNodeSshKey( >>>>>> + node_uuid, node_name, self._master_candidate_uuids, >>>>>> + self._potential_master_candidates, self._ssh_port_map, >>>>>> + from_authorized_keys=True, from_public_keys=True, >>>>>> + clear_authorized_keys=True, clear_public_keys=True, >>>>>> + pub_key_file=self._pub_key_file, >>>>>> ssconf_store=self._ssconf_mock, >>>>>> + noded_cert_file=self.noded_cert_file, >>>>>> run_cmd_fn=self._run_cmd_mock) >>>>>> + >>>>>> + for node in self._all_nodes: >>>>>> + if node == node_name: >>>>>> + self.assertTrue(self._ssh_file_manager.NodeHasAuthorizedKey( >>>>>> + node, node_key)) >>>>>> + else: >>>>>> + self.assertFalse(self._ssh_file_manager.NodeHasAuthorizedKey( >>>>>> + node, node_key)) >>>>>> + self.assertTrue(error_msgs[node_name]) >>>>>> + >>>>>> + >>>>>> class TestVerifySshSetup(testutils.GanetiTestCase): >>>>>> >>>>>> _NODE1_UUID = "uuid1" >>>>>> -- >>>>>> 2.2.0.rc0.207.ga3a616c >>>>>> >>>>>> >>> Hrvoje Ribicic >>> Ganeti Engineering >>> Google Germany GmbH >>> Dienerstr. 12, 80331, München >>> >>> Registergericht und -nummer: Hamburg, HRB 86891 >>> Sitz der Gesellschaft: Hamburg >>> Geschäftsführer: Graham Law, Christine Elizabeth Flores >>> Steuernummer: 48/725/00206 >>> Umsatzsteueridentifikationsnummer: DE813741370 >>> >> Hrvoje Ribicic > Ganeti Engineering > Google Germany GmbH > Dienerstr. 12, 80331, München > > Registergericht und -nummer: Hamburg, HRB 86891 > Sitz der Gesellschaft: Hamburg > Geschäftsführer: Graham Law, Christine Elizabeth Flores > Steuernummer: 48/725/00206 > Umsatzsteueridentifikationsnummer: DE813741370 >
