As the LURenewCrypto is a pain to debug, this patch adds a lot more logging of events to the method. Note: - As renew-crypto is a relatively rarely used operation in a normal production cluster, this won't clutter up real user's log files. - Most of the messages are in debug mode, so they would anyway just show up in log files of clusters run in debug mode (as for example our QA clusters. - A few log messages are in error mode, which is intentional as they log more details about the errors than is given in the feedback functions.
Signed-off-by: Helga Velroyen <[email protected]> --- lib/cmdlib/cluster.py | 47 ++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 42 insertions(+), 5 deletions(-) diff --git a/lib/cmdlib/cluster.py b/lib/cmdlib/cluster.py index 6cb1401..e2f9499 100644 --- a/lib/cmdlib/cluster.py +++ b/lib/cmdlib/cluster.py @@ -115,24 +115,37 @@ class LUClusterRenewCrypto(NoHooksLU): def Exec(self, feedback_fn): master_uuid = self.cfg.GetMasterNode() + logging.debug("Renewing the master's SSL node certificate." + "Master's UUID: %s.", master_uuid) cluster = self.cfg.GetClusterInfo() server_digest = utils.GetCertificateDigest( cert_filename=pathutils.NODED_CERT_FILE) + logging.debug("SSL digest of the node certificate: %s.", server_digest) utils.AddNodeToCandidateCerts("%s-SERVER" % master_uuid, server_digest, cluster.candidate_certs) + logging.debug("Added master's digest as *-SERVER entry to configuration." + "Current list of candidate certificates: %s.", + str(cluster.candidate_certs)) + try: old_master_digest = utils.GetCertificateDigest( cert_filename=pathutils.NODED_CLIENT_CERT_FILE) + logging.debug("SSL digest of old master's SSL node certificate: %s.", + old_master_digest) utils.AddNodeToCandidateCerts("%s-OLDMASTER" % master_uuid, old_master_digest, cluster.candidate_certs) + logging.debug("Added old master's node certificate digest to config" + "as *-OLDMASTER. Current list of candidate certificates:" + " %s.", str(cluster.candidate_certs)) + except IOError: - logging.info("No old certificate available.") + logging.info("No old master certificate available.") last_exception = None - for _ in range(self._MAX_NUM_RETRIES): + for i in range(self._MAX_NUM_RETRIES): try: # Technically it should not be necessary to set the cert # paths. However, due to a bug in the mock library, we @@ -141,8 +154,11 @@ class LUClusterRenewCrypto(NoHooksLU): self, master_uuid, cluster, feedback_fn, client_cert=pathutils.NODED_CLIENT_CERT_FILE, client_cert_tmp=pathutils.NODED_CLIENT_CERT_FILE_TMP) + logging.debug("Successfully renewed the master's node certificate.") break except errors.OpExecError as e: + logging.error("Renewing the master's SSL node certificate failed" + " at attempt no. %s with error '%s'", str(i), e) last_exception = e else: if last_exception: @@ -153,30 +169,46 @@ class LUClusterRenewCrypto(NoHooksLU): cluster.candidate_certs) utils.RemoveNodeFromCandidateCerts("%s-OLDMASTER" % master_uuid, cluster.candidate_certs) + logging.debug("Cleaned up *-SERVER and *-OLDMASTER certificate from" + " master candidate cert list. Current state of the" + " list: %s.", str(cluster.candidate_certs)) try: utils.RemoveFile(pathutils.NODED_CLIENT_CERT_FILE_TMP) - except IOError: - pass + except IOError as e: + logging.debug("Could not clean up temporary node certificate of the" + " master node. (Possibly because it was already removed" + " properly.) Error: %s.", e) return node_errors = {} nodes = self.cfg.GetAllNodesInfo() + logging.debug("Renewing non-master nodes' node certificates.") for (node_uuid, node_info) in nodes.items(): if node_info.offline: feedback_fn("* Skipping offline node %s" % node_info.name) + logging.debug("Skipping offline node %s (UUID: %s).", + node_info.name, node_uuid) continue if node_uuid != master_uuid: + logging.debug("Renewing node certificate of node '%s'.", node_uuid) last_exception = None - for _ in range(self._MAX_NUM_RETRIES): + for i in range(self._MAX_NUM_RETRIES): try: new_digest = CreateNewClientCert(self, node_uuid) if node_info.master_candidate: utils.AddNodeToCandidateCerts(node_uuid, new_digest, cluster.candidate_certs) + logging.debug("Added the node's certificate to candidate" + " certificate list. Current list: %s.", + str(cluster.candidate_certs)) break except errors.OpExecError as e: last_exception = e + logging.error("Could not renew a non-master node's SSL node" + " certificate at attempt no. %s. The node's UUID" + " is %s, and the error was: %s.", + str(i), node_uuid, e) else: if last_exception: node_errors[node_uuid] = last_exception @@ -193,7 +225,12 @@ class LUClusterRenewCrypto(NoHooksLU): cluster.candidate_certs) utils.RemoveNodeFromCandidateCerts("%s-OLDMASTER" % master_uuid, cluster.candidate_certs) + logging.debug("Cleaned up *-SERVER and *-OLDMASTER certificate from" + " master candidate cert list. Current state of the" + " list: %s.", cluster.candidate_certs) + # Trigger another update of the config now with the new master cert + logging.debug("Trigger an update of the configuration on all nodes.") self.cfg.Update(cluster, feedback_fn) -- 2.2.0.rc0.207.ga3a616c
