Hi! On Wed, 22 Apr 2015 at 18:41 Hrvoje Ribicic <[email protected]> wrote:
> On Fri, Apr 17, 2015 at 4:37 PM, 'Helga Velroyen' via ganeti-devel < > [email protected]> wrote: > >> This patch introduces retries for updating non-master nodes >> when a new SSH key is added to the cluster. If an SSH update >> fails even after the maximum number of retries is exhausted, >> this does not raise an exception, but an error message is >> added to the result of the method, which can be displayed >> in the LU operations so that the admin sees which nodes >> were the problem and need further attention. >> >> Signed-off-by: Helga Velroyen <[email protected]> >> --- >> lib/backend.py | 73 +++++++++++++++++++-------- >> lib/cmdlib/cluster.py | 9 +++- >> lib/cmdlib/node.py | 14 ++++- >> test/py/ganeti.backend_unittest.py | 101 >> ++++++++++++++++++++++++++++++++++++- >> 4 files changed, 169 insertions(+), 28 deletions(-) >> >> diff --git a/lib/backend.py b/lib/backend.py >> index 818f3fb..3b17306 100644 >> --- a/lib/backend.py >> +++ b/lib/backend.py >> @@ -1566,17 +1566,38 @@ def AddNodeSshKey(node_uuid, node_name, >> master_node = ssconf_store.GetMasterNode() >> online_nodes = ssconf_store.GetOnlineNodeList() >> >> + node_errors = {} > > for node in all_nodes: >> if node == master_node: >> + logging.debug("Skipping master node '%s'.", master_node) >> continue >> if node not in online_nodes: >> logging.debug("Skipping offline node '%s'.", node) >> continue >> if node in potential_master_candidates: >> - run_cmd_fn(cluster_name, node, pathutils.SSH_UPDATE, >> - ssh_port_map.get(node), pot_mc_data, >> - debug=False, verbose=False, use_cluster_key=False, >> - ask_key=False, strict_host_check=False) >> + logging.debug("Updating SSH key files of node '%s'.") >> + for i in range(max_retries): >> + try: >> + run_cmd_fn(cluster_name, node, pathutils.SSH_UPDATE, >> + ssh_port_map.get(node), pot_mc_data, >> + debug=False, verbose=False, use_cluster_key=False, >> + ask_key=False, strict_host_check=False) >> + break >> + except errors.OpExecError as e: >> + logging.error("Updating SSH key files of node '%s' failed" >> + " at try no %s. Error: %s.", node, i, e) >> + last_exception = e >> + else: >> + if last_exception: >> + error_msg = ("When adding the key of node '%s', updating SSH >> key" >> + " files of node '%s' failed after %s retries." >> + " Not trying again. Last errors was: %s." % >> > > Nitpick: s/errors/error/ > ACK. > > >> + (node, node_name, max_retries, last_exception)) > > + node_errors[node] = error_msg >> + # We only log the error and don't throw an exception, because >> + # one unreachable node shall not abort the entire procedure. >> + logging.error(error_msg) >> + >> else: >> if to_authorized_keys: >> run_cmd_fn(cluster_name, node, pathutils.SSH_UPDATE, >> @@ -1584,6 +1605,8 @@ def AddNodeSshKey(node_uuid, node_name, >> debug=False, verbose=False, use_cluster_key=False, >> ask_key=False, strict_host_check=False) >> >> + return node_errors >> + >> >> def RemoveNodeSshKey(node_uuid, node_name, >> master_candidate_uuids, >> @@ -1925,6 +1948,10 @@ def RenewSshKeys(node_uuids, node_names, >> ssh_port_map, >> >> master_node_name = ssconf_store.GetMasterNode() >> master_node_uuid = _GetMasterNodeUUID(node_uuid_name_map, >> master_node_name) >> + # List of all node errors that happened, but which did not abort the >> + # procedure as a whole. It is important that this is a list to have a >> + # somewhat chronological history of events. >> + all_node_errors = [] >> >> # process non-master nodes >> for node_uuid, node_name in node_uuid_name_map: >> @@ -1989,15 +2016,16 @@ def RenewSshKeys(node_uuids, node_names, >> ssh_port_map, >> ssh.AddPublicKey(node_uuid, pub_key, key_file=pub_key_file) >> >> logging.debug("Add ssh key of node '%s'.", node_name) >> - AddNodeSshKey(node_uuid, node_name, >> - potential_master_candidates, >> - ssh_port_map, >> - to_authorized_keys=master_candidate, >> - to_public_keys=potential_master_candidate, >> - get_public_keys=True, >> - pub_key_file=pub_key_file, ssconf_store=ssconf_store, >> - noded_cert_file=noded_cert_file, >> - run_cmd_fn=run_cmd_fn) >> + node_errors = AddNodeSshKey( >> + node_uuid, node_name, potential_master_candidates, >> + ssh_port_map, to_authorized_keys=master_candidate, >> + to_public_keys=potential_master_candidate, >> + get_public_keys=True, >> + pub_key_file=pub_key_file, ssconf_store=ssconf_store, >> + noded_cert_file=noded_cert_file, >> + run_cmd_fn=run_cmd_fn) >> + if node_errors: >> + all_node_errors = all_node_errors + node_errors.items() >> >> # Renewing the master node's key >> >> @@ -2022,15 +2050,14 @@ def RenewSshKeys(node_uuids, node_names, >> ssh_port_map, >> >> # Add new master key to all node's public and authorized keys >> logging.debug("Add new master key to all nodes.") >> - AddNodeSshKey(master_node_uuid, master_node_name, >> - potential_master_candidates, >> - ssh_port_map, >> - to_authorized_keys=True, >> - to_public_keys=True, >> - get_public_keys=False, >> - pub_key_file=pub_key_file, ssconf_store=ssconf_store, >> - noded_cert_file=noded_cert_file, >> - run_cmd_fn=run_cmd_fn) >> + node_errors = AddNodeSshKey( >> + master_node_uuid, master_node_name, potential_master_candidates, >> + ssh_port_map, to_authorized_keys=True, to_public_keys=True, >> + get_public_keys=False, pub_key_file=pub_key_file, >> + ssconf_store=ssconf_store, noded_cert_file=noded_cert_file, >> + run_cmd_fn=run_cmd_fn) >> + if node_errors: >> + all_node_errors = all_node_errors + node_errors.items() >> >> # Remove the old key file and rename the new key to the non-temporary >> filename >> _ReplaceMasterKeyOnMaster(root_keyfiles) >> @@ -2053,6 +2080,8 @@ def RenewSshKeys(node_uuids, node_names, >> ssh_port_map, >> clear_authorized_keys=False, >> clear_public_keys=False) >> >> + return all_node_errors >> + >> >> def GetBlockDevSizes(devices): >> """Return the size of the given block devices >> diff --git a/lib/cmdlib/cluster.py b/lib/cmdlib/cluster.py >> index c504d60..14c18da 100644 >> --- a/lib/cmdlib/cluster.py >> +++ b/lib/cmdlib/cluster.py >> @@ -210,7 +210,7 @@ class LUClusterRenewCrypto(NoHooksLU): >> self.cfg.RemoveNodeFromCandidateCerts("%s-SERVER" % master_uuid) >> self.cfg.RemoveNodeFromCandidateCerts("%s-OLDMASTER" % master_uuid) >> >> - def _RenewSshKeys(self): >> + def _RenewSshKeys(self, feedback_fn): >> """Renew all nodes' SSH keys. >> >> """ >> @@ -230,6 +230,11 @@ class LUClusterRenewCrypto(NoHooksLU): >> master_candidate_uuids, >> potential_master_candidates) >> result[master_uuid].Raise("Could not renew the SSH keys of all >> nodes") >> > > Just out of curiosity: under which conditions would this actually be > raised? > If my observation has merit, maybe you can change the message to reflect > the unexpectedness of the failure. > Well, that can have several reasons, the most obvious ones are: - key files on the master not reachable, - the keys that are supposed to be renewed are not in the public key file (that is a security mechanism), - generating a new key of a node worked, but then master was not able to fetch it. > > Also, a comment and/or an empty line would be useful here. Otherwise it > looks like a bit of a bug. > The same is true for the next two entries. > What do you mean by "next two entries"? > > >> + node_errors = result[master_uuid].payload >> + if node_errors: >> + feedback_fn("Some nodes' SSH key files could not be updated:") >> + for node_name, error_msg in node_errors: >> + feedback_fn("%s: %s" % (node_name, error_msg)) >> >> def Exec(self, feedback_fn): >> if self.op.node_certificates: >> @@ -237,7 +242,7 @@ class LUClusterRenewCrypto(NoHooksLU): >> self._RenewNodeSslCertificates(feedback_fn) >> if self.op.ssh_keys and not self._ssh_renewal_suppressed: >> feedback_fn("Renewing SSH keys") >> - self._RenewSshKeys() >> + self._RenewSshKeys(feedback_fn) >> elif self._ssh_renewal_suppressed: >> feedback_fn("Cannot renew SSH keys if the cluster is configured to >> not" >> " modify the SSH setup.") >> diff --git a/lib/cmdlib/node.py b/lib/cmdlib/node.py >> index 19b630a..6a13d8a 100644 >> --- a/lib/cmdlib/node.py >> +++ b/lib/cmdlib/node.py >> @@ -339,7 +339,7 @@ class LUNodeAdd(LogicalUnit): >> result.Raise("Failed to initialize OpenVSwitch on new node") >> >> def _SshUpdate(self, new_node_uuid, new_node_name, is_master_candidate, >> - is_potential_master_candidate, rpcrunner, readd): >> + is_potential_master_candidate, rpcrunner, readd, >> feedback_fn): >> """Update the SSH setup of all nodes after adding a new node. >> >> @type readd: boolean >> @@ -373,6 +373,11 @@ class LUNodeAdd(LogicalUnit): >> is_master_candidate, is_potential_master_candidate, >> is_potential_master_candidate) >> result[master_node].Raise("Could not update the node's SSH setup.") >> + node_errors = result[master_node].payload >> + if node_errors: >> + feedback_fn("Some nodes' SSH key files could not be updated:") >> + for node_name, error_msg in node_errors.items(): >> + feedback_fn("%s: %s." % (node_name, error_msg)) >> > > Between SSL and the multiple placements, this really is calling out to be > a helper function. > Added it. Interdiff see below. This also has an effect on other patches. Will send interdiffs there too. > > >> >> def Exec(self, feedback_fn): >> """Adds the new node to the cluster. >> @@ -481,7 +486,7 @@ class LUNodeAdd(LogicalUnit): >> # FIXME: so far, all nodes are considered potential master >> candidates >> self._SshUpdate(self.new_node.uuid, self.new_node.name, >> self.new_node.master_candidate, True, >> - self.rpc, self.op.readd) >> + self.rpc, self.op.readd, feedback_fn) >> >> >> class LUNodeSetParams(LogicalUnit): >> @@ -897,6 +902,11 @@ class LUNodeSetParams(LogicalUnit): >> ssh_result[master_node].Raise( >> "Could not update the SSH setup of node '%s' after promotion" >> " (UUID: %s)." % (node.name, node.uuid)) > > + node_errors = ssh_result[master_node].payload >> + if node_errors: >> + feedback_fn("Some nodes' SSH key files could not be >> updated:") >> + for node_name, error_msg in node_errors.items(): >> + feedback_fn("%s: %s." % (node_name, error_msg)) > > >> return result >> >> diff --git a/test/py/ganeti.backend_unittest.py b/test/py/ >> ganeti.backend_unittest.py >> index cc65501..e05580d 100755 >> --- a/test/py/ganeti.backend_unittest.py >> +++ b/test/py/ganeti.backend_unittest.py >> @@ -1400,7 +1400,14 @@ class >> TestAddRemoveGenerateNodeSshKey(testutils.GanetiTestCase): >> self.assertTrue(self._ssh_file_manager.NodeHasAuthorizedKey( >> node, node_key)) >> >> - def testAddKeySuccessfullyWithRetries(self): >> + def testAddKeySuccessfullyOnNewNodeWithRetries(self): >> + """Tests adding a new node's key when updating that node takes >> retries. >> + >> + This test checks whether adding a new node's key successfully updates >> + all node's SSH key files, even if updating the new node's key files >> itself >> > > Nit: s/node's/nodes'/ for the first one. The second one stays as-is. > Possible better formulation: > I think you actually meant the second one. However, I picked this alternative formulation: > > .. updates the SSH key files of all nodes, even if updating the new node's > files ... > See interdiff. > > >> + takes a couple of retries to succeed. >> + >> + """ >> new_node_name = "new_node_name" >> new_node_uuid = "new_node_uuid" >> new_node_key = "new_node_key" >> @@ -1430,7 +1437,14 @@ class >> TestAddRemoveGenerateNodeSshKey(testutils.GanetiTestCase): >> self.assertTrue(self._ssh_file_manager.AllNodesHaveAuthorizedKey( >> new_node_key)) >> >> - def testAddKeyFailedWithRetries(self): >> + def testAddKeyFailedOnNewNodeWithRetries(self): >> + """Tests clean up if updating a new node's SSH setup fails. >> + >> + If adding a new node's keys fails, because the SSH key files of that >> + new node fails, check whether already carried out operations are >> > > Nit: Better formulation: If adding the keys of a new node fails because > the SSH key files ... > Ack. See interdiff. > > >> + successfully rolled back and thus the state of the cluster is >> cleaned up. >> + >> + """ >> new_node_name = "new_node_name" >> new_node_uuid = "new_node_uuid" >> new_node_key = "new_node_key" >> @@ -1466,6 +1480,89 @@ class >> TestAddRemoveGenerateNodeSshKey(testutils.GanetiTestCase): >> self.assertTrue(self._ssh_file_manager.NoNodeHasPublicKey( >> new_node_uuid, new_node_key)) >> >> + def testAddKeySuccessfullyOnOldNodeWithRetries(self): >> + """Tests adding a new key even if updating nodes takes retries. >> + >> + This tests whether adding a new node's key successfully finishes, >> + even if one of the other cluster nodes takes a couple of retries >> + to succeed. >> + >> + """ >> + new_node_name = "new_node_name" >> + new_node_uuid = "new_node_uuid" >> + new_node_key = "new_node_key" >> + is_master_candidate = True >> + is_potential_master_candidate = True >> + is_master = False >> + >> + other_node_name, _, _, _, _, _ = self._ssh_file_manager \ >> + .GetAllMasterCandidates()[0] >> > > Afaik, this will be fixed in a follow-up patch, right? > I second Petr's suggestion of named tuples. > Yes. > > >> + self._ssh_file_manager.SetMaxRetries(other_node_name, 3) >> + assert other_node_name != new_node_name >> + self._AddNewNodeToTestData( >> + new_node_name, new_node_uuid, new_node_key, >> + is_potential_master_candidate, is_master_candidate, >> + is_master) >> + >> + backend.AddNodeSshKey(new_node_uuid, new_node_name, >> + self._potential_master_candidates, >> + self._ssh_port_map, >> + to_authorized_keys=is_master_candidate, >> + to_public_keys=is_potential_master_candidate, >> + get_public_keys=is_potential_master_candidate, >> + pub_key_file=self._pub_key_file, >> + ssconf_store=self._ssconf_mock, >> + noded_cert_file=self.noded_cert_file, >> + run_cmd_fn=self._run_cmd_mock) >> + >> + self.assertTrue(self._ssh_file_manager.AllNodesHaveAuthorizedKey( >> + new_node_key)) >> + >> + def testAddKeyFailedOnOldNodeWithRetries(self): >> + """Tests adding keys when updating one node's SSH setup fails. >> + >> + This tests, whether when adding a new node's key and one node is >> > > Nit: s/,// > > >> + unreachable (but not marked as offline) the operation still finishes >> + properly and only that unreachable node's SSH key setup did not get >> + updated. >> + >> + """ >> + new_node_name = "new_node_name" >> + new_node_uuid = "new_node_uuid" >> + new_node_key = "new_node_key" >> + is_master_candidate = True >> + is_potential_master_candidate = True >> + is_master = False >> + >> + other_node_name, _, _, _, _, _ = self._ssh_file_manager \ >> + .GetAllMasterCandidates()[0] >> + self._ssh_file_manager.SetMaxRetries(other_node_name, 4) >> + assert other_node_name != new_node_name >> + self._AddNewNodeToTestData( >> + new_node_name, new_node_uuid, new_node_key, >> + is_potential_master_candidate, is_master_candidate, >> + is_master) >> + >> + node_errors = backend.AddNodeSshKey( >> + new_node_uuid, new_node_name, self._potential_master_candidates, >> + self._ssh_port_map, to_authorized_keys=is_master_candidate, >> + to_public_keys=is_potential_master_candidate, >> + get_public_keys=is_potential_master_candidate, >> + pub_key_file=self._pub_key_file, >> + ssconf_store=self._ssconf_mock, >> + noded_cert_file=self.noded_cert_file, >> + run_cmd_fn=self._run_cmd_mock) >> + >> + for node in self._all_nodes: >> + if node == other_node_name: >> + self.assertFalse(self._ssh_file_manager.NodeHasAuthorizedKey( >> + node, new_node_key)) >> + else: >> + self.assertTrue(self._ssh_file_manager.NodeHasAuthorizedKey( >> + node, new_node_key)) >> + >> + self.assertTrue(node_errors[other_node_name]) >> + >> >> class TestVerifySshSetup(testutils.GanetiTestCase): >> >> -- >> 2.2.0.rc0.207.ga3a616c >> >> Interdiff: commit 49ced9fa7adefd1865ca9933d0a5bb5252fc1726 Author: Helga Velroyen <[email protected]> Date: Thu Apr 23 11:48:40 2015 +0200 interdiff diff --git a/lib/backend.py b/lib/backend.py index 5a09efa..6a17edb 100644 --- a/lib/backend.py +++ b/lib/backend.py @@ -1564,7 +1564,7 @@ def AddNodeSshKey(node_uuid, node_name, master_node = ssconf_store.GetMasterNode() online_nodes = ssconf_store.GetOnlineNodeList() - node_errors = {} + node_errors = [] for node in all_nodes: if node == master_node: logging.debug("Skipping master node '%s'.", master_node) @@ -1589,10 +1589,10 @@ def AddNodeSshKey(node_uuid, node_name, if last_exception: error_msg = ("When adding the key of node '%s', updating SSH key" " files of node '%s' failed after %s retries." - " Not trying again. Last errors was: %s." % + " Not trying again. Last error was: %s." % (node, node_name, constants.SSHS_MAX_RETRIES, last_exception)) - node_errors[node] = error_msg + node_errors.append((node, error_msg)) # We only log the error and don't throw an exception, because # one unreachable node shall not abort the entire procedure. logging.error(error_msg) @@ -2024,7 +2024,7 @@ def RenewSshKeys(node_uuids, node_names, ssh_port_map, noded_cert_file=noded_cert_file, run_cmd_fn=run_cmd_fn) if node_errors: - all_node_errors = all_node_errors + node_errors.items() + all_node_errors = all_node_errors + node_errors # Renewing the master node's key @@ -2056,7 +2056,7 @@ def RenewSshKeys(node_uuids, node_names, ssh_port_map, ssconf_store=ssconf_store, noded_cert_file=noded_cert_file, run_cmd_fn=run_cmd_fn) if node_errors: - all_node_errors = all_node_errors + node_errors.items() + all_node_errors = all_node_errors + node_errors # Remove the old key file and rename the new key to the non-temporary filename _ReplaceMasterKeyOnMaster(root_keyfiles) diff --git a/lib/cmdlib/cluster.py b/lib/cmdlib/cluster.py index 14c18da..b56ca61 100644 --- a/lib/cmdlib/cluster.py +++ b/lib/cmdlib/cluster.py @@ -68,7 +68,8 @@ from ganeti.cmdlib.common import ShareAll, RunPostHook, \ CheckDiskAccessModeConsistency, CreateNewClientCert, \ AddInstanceCommunicationNetworkOp, ConnectInstanceCommunicationNetworkOp, \ CheckImageValidity, \ - CheckDiskAccessModeConsistency, CreateNewClientCert, EnsureKvmdOnNodes + CheckDiskAccessModeConsistency, CreateNewClientCert, EnsureKvmdOnNodes, \ + EvaluateSshUpdateRPC import ganeti.masterd.instance @@ -224,17 +225,19 @@ class LUClusterRenewCrypto(NoHooksLU): port_map = ssh.GetSshPortMap(node_names, self.cfg) potential_master_candidates = self.cfg.GetPotentialMasterCandidates() master_candidate_uuids = self.cfg.GetMasterCandidateUuids() + result = self.rpc.call_node_ssh_keys_renew( [master_uuid], node_uuids, node_names, port_map, master_candidate_uuids, potential_master_candidates) + + # Check if there were serious errors (for example master key files not + # writable). result[master_uuid].Raise("Could not renew the SSH keys of all nodes") - node_errors = result[master_uuid].payload - if node_errors: - feedback_fn("Some nodes' SSH key files could not be updated:") - for node_name, error_msg in node_errors: - feedback_fn("%s: %s" % (node_name, error_msg)) + + # Process any non-disruptive errors (a few nodes unreachable etc.) + EvaluateSshUpdateRPC(result, master_uuid, feedback_fn) def Exec(self, feedback_fn): if self.op.node_certificates: diff --git a/lib/cmdlib/common.py b/lib/cmdlib/common.py index 21948af..5e5f573 100644 --- a/lib/cmdlib/common.py +++ b/lib/cmdlib/common.py @@ -1554,3 +1554,11 @@ def EnsureKvmdOnNodes(lu, feedback_fn, nodes=None): for node_uuid in stop_nodes: results[node_uuid].Warn("Failed to stop KVM daemon in node '%s'" % node_uuid, feedback_fn) + + +def EvaluateSshUpdateRPC(result, master_uuid, feedback_fn): + node_errors = result[master_uuid].payload + if node_errors: + feedback_fn("Some nodes' SSH key files could not be updated:") + for node_name, error_msg in node_errors: + feedback_fn("%s: %s" % (node_name, error_msg)) diff --git a/lib/cmdlib/node.py b/lib/cmdlib/node.py index 6a13d8a..c90965e 100644 --- a/lib/cmdlib/node.py +++ b/lib/cmdlib/node.py @@ -53,7 +53,7 @@ from ganeti.cmdlib.common import CheckParamsNotGlobal, \ GetWantedNodes, MapInstanceLvsToNodes, RunPostHook, \ FindFaultyInstanceDisks, CheckStorageTypeEnabled, CreateNewClientCert, \ AddNodeCertToCandidateCerts, RemoveNodeCertFromCandidateCerts, \ - EnsureKvmdOnNodes + EnsureKvmdOnNodes, EvaluateSshUpdateRPC from ganeti.ssh import GetSshPortMap @@ -372,12 +372,9 @@ class LUNodeAdd(LogicalUnit): potential_master_candidates, port_map, is_master_candidate, is_potential_master_candidate, is_potential_master_candidate) + result[master_node].Raise("Could not update the node's SSH setup.") - node_errors = result[master_node].payload - if node_errors: - feedback_fn("Some nodes' SSH key files could not be updated:") - for node_name, error_msg in node_errors.items(): - feedback_fn("%s: %s." % (node_name, error_msg)) + EvaluateSshUpdateRPC(result, master_node, feedback_fn) def Exec(self, feedback_fn): """Adds the new node to the cluster. @@ -902,11 +899,7 @@ class LUNodeSetParams(LogicalUnit): ssh_result[master_node].Raise( "Could not update the SSH setup of node '%s' after promotion" " (UUID: %s)." % (node.name, node.uuid)) - node_errors = ssh_result[master_node].payload - if node_errors: - feedback_fn("Some nodes' SSH key files could not be updated:") - for node_name, error_msg in node_errors.items(): - feedback_fn("%s: %s." % (node_name, error_msg)) + EvaluateSshUpdateRPC(ssh_result, master_node, feedback_fn) return result diff --git a/test/py/ganeti.backend_unittest.py b/test/py/ ganeti.backend_unittest.py index ec61f07..77b4f46 100755 --- a/test/py/ganeti.backend_unittest.py +++ b/test/py/ganeti.backend_unittest.py @@ -1404,8 +1404,8 @@ class TestAddRemoveGenerateNodeSshKey(testutils.GanetiTestCase): """Tests adding a new node's key when updating that node takes retries. This test checks whether adding a new node's key successfully updates - all node's SSH key files, even if updating the new node's key files itself - takes a couple of retries to succeed. + the SSH key files of all nodes, even if updating the new node's key files + itself takes a couple of retries to succeed. """ new_node_name = "new_node_name" @@ -1441,8 +1441,8 @@ class TestAddRemoveGenerateNodeSshKey(testutils.GanetiTestCase): def testAddKeyFailedOnNewNodeWithRetries(self): """Tests clean up if updating a new node's SSH setup fails. - If adding a new node's keys fails, because the SSH key files of that - new node fails, check whether already carried out operations are + If adding the keys of a new node fails, because updating the SSH key files + of that new node fails, check whether already carried out operations are successfully rolled back and thus the state of the cluster is cleaned up. """ @@ -1524,7 +1524,7 @@ class TestAddRemoveGenerateNodeSshKey(testutils.GanetiTestCase): def testAddKeyFailedOnOldNodeWithRetries(self): """Tests adding keys when updating one node's SSH setup fails. - This tests, whether when adding a new node's key and one node is + This tests whether when adding a new node's key and one node is unreachable (but not marked as offline) the operation still finishes properly and only that unreachable node's SSH key setup did not get updated. @@ -1565,7 +1565,8 @@ class TestAddRemoveGenerateNodeSshKey(testutils.GanetiTestCase): self.assertTrue(self._ssh_file_manager.NodeHasAuthorizedKey( node, new_node_key)) - self.assertTrue(node_errors[other_node_name]) + self.assertTrue([error_msg for (node, error_msg) in node_errors + if node == other_node_name]) class TestVerifySshSetup(testutils.GanetiTestCase): > > Hrvoje Ribicic > Ganeti Engineering > Google Germany GmbH > Dienerstr. 12, 80331, München > > Registergericht und -nummer: Hamburg, HRB 86891 > Sitz der Gesellschaft: Hamburg > Geschäftsführer: Graham Law, Christine Elizabeth Flores > Steuernummer: 48/725/00206 > Umsatzsteueridentifikationsnummer: DE813741370 >
