On Mon, Apr 13, 2015 at 11:10:49AM +0200, 'Helga Velroyen' via ganeti-devel 
wrote:
As the LURenewCrypto is a pain to debug, this patch
adds a lot more logging of events to the method.
Note:
- As renew-crypto is a relatively rarely used operation
 in a normal production cluster, this won't clutter up
 real user's log files.
- Most of the messages are in debug mode, so they would
 anyway just show up in log files of clusters run in
 debug mode (as for example our QA clusters.
- A few log messages are in error mode, which is
 intentional as they log more details about the errors
 than is given in the feedback functions.

Signed-off-by: Helga Velroyen <[email protected]>
---
lib/cmdlib/cluster.py | 47 ++++++++++++++++++++++++++++++++++++++++++-----
1 file changed, 42 insertions(+), 5 deletions(-)

diff --git a/lib/cmdlib/cluster.py b/lib/cmdlib/cluster.py
index 6cb1401..e2f9499 100644
--- a/lib/cmdlib/cluster.py
+++ b/lib/cmdlib/cluster.py
@@ -115,24 +115,37 @@ class LUClusterRenewCrypto(NoHooksLU):

  def Exec(self, feedback_fn):
    master_uuid = self.cfg.GetMasterNode()
+    logging.debug("Renewing the master's SSL node certificate."
+                  "Master's UUID: %s.", master_uuid)

Just nitpicking: Missing space after the leading ".

    cluster = self.cfg.GetClusterInfo()

    server_digest = utils.GetCertificateDigest(
      cert_filename=pathutils.NODED_CERT_FILE)
+    logging.debug("SSL digest of the node certificate: %s.", server_digest)
    utils.AddNodeToCandidateCerts("%s-SERVER" % master_uuid,
                                  server_digest,
                                  cluster.candidate_certs)
+    logging.debug("Added master's digest as *-SERVER entry to configuration."
+                  "Current list of candidate certificates: %s.",

.. also here

+                  str(cluster.candidate_certs))
+
    try:
      old_master_digest = utils.GetCertificateDigest(
        cert_filename=pathutils.NODED_CLIENT_CERT_FILE)
+      logging.debug("SSL digest of old master's SSL node certificate: %s.",
+                    old_master_digest)
      utils.AddNodeToCandidateCerts("%s-OLDMASTER" % master_uuid,
                                    old_master_digest,
                                    cluster.candidate_certs)
+      logging.debug("Added old master's node certificate digest to config"
+                    "as *-OLDMASTER. Current list of candidate certificates:"

also here.

+                    " %s.", str(cluster.candidate_certs))
+
    except IOError:
-      logging.info("No old certificate available.")
+      logging.info("No old master certificate available.")

    last_exception = None
-    for _ in range(self._MAX_NUM_RETRIES):
+    for i in range(self._MAX_NUM_RETRIES):
      try:
        # Technically it should not be necessary to set the cert
        # paths. However, due to a bug in the mock library, we
@@ -141,8 +154,11 @@ class LUClusterRenewCrypto(NoHooksLU):
            self, master_uuid, cluster, feedback_fn,
            client_cert=pathutils.NODED_CLIENT_CERT_FILE,
            client_cert_tmp=pathutils.NODED_CLIENT_CERT_FILE_TMP)
+        logging.debug("Successfully renewed the master's node certificate.")
        break
      except errors.OpExecError as e:
+        logging.error("Renewing the master's SSL node certificate failed"
+                      " at attempt no. %s with error '%s'", str(i), e)
        last_exception = e
    else:
      if last_exception:
@@ -153,30 +169,46 @@ class LUClusterRenewCrypto(NoHooksLU):
                                         cluster.candidate_certs)
      utils.RemoveNodeFromCandidateCerts("%s-OLDMASTER" % master_uuid,
                                         cluster.candidate_certs)
+      logging.debug("Cleaned up *-SERVER and *-OLDMASTER certificate from"
+                    " master candidate cert list. Current state of the"
+                    " list: %s.", str(cluster.candidate_certs))
      try:
        utils.RemoveFile(pathutils.NODED_CLIENT_CERT_FILE_TMP)
-      except IOError:
-        pass
+      except IOError as e:
+        logging.debug("Could not clean up temporary node certificate of the"
+                      " master node. (Possibly because it was already removed"
+                      " properly.) Error: %s.", e)
      return

    node_errors = {}
    nodes = self.cfg.GetAllNodesInfo()
+    logging.debug("Renewing non-master nodes' node certificates.")
    for (node_uuid, node_info) in nodes.items():
      if node_info.offline:
        feedback_fn("* Skipping offline node %s" % node_info.name)
+        logging.debug("Skipping offline node %s (UUID: %s).",
+                      node_info.name, node_uuid)
        continue
      if node_uuid != master_uuid:
+        logging.debug("Renewing node certificate of node '%s'.", node_uuid)
        last_exception = None
-        for _ in range(self._MAX_NUM_RETRIES):
+        for i in range(self._MAX_NUM_RETRIES):
          try:
            new_digest = CreateNewClientCert(self, node_uuid)
            if node_info.master_candidate:
              utils.AddNodeToCandidateCerts(node_uuid,
                                            new_digest,
                                            cluster.candidate_certs)
+              logging.debug("Added the node's certificate to candidate"
+                            " certificate list. Current list: %s.",
+                            str(cluster.candidate_certs))
            break
          except errors.OpExecError as e:
            last_exception = e
+            logging.error("Could not renew a non-master node's SSL node"
+                          " certificate at attempt no. %s. The node's UUID"
+                          " is %s, and the error was: %s.",
+                          str(i), node_uuid, e)
        else:
          if last_exception:
            node_errors[node_uuid] = last_exception
@@ -193,7 +225,12 @@ class LUClusterRenewCrypto(NoHooksLU):
                                       cluster.candidate_certs)
    utils.RemoveNodeFromCandidateCerts("%s-OLDMASTER" % master_uuid,
                                       cluster.candidate_certs)
+    logging.debug("Cleaned up *-SERVER and *-OLDMASTER certificate from"
+                  " master candidate cert list. Current state of the"
+                  " list: %s.", cluster.candidate_certs)
+
    # Trigger another update of the config now with the new master cert
+    logging.debug("Trigger an update of the configuration on all nodes.")
    self.cfg.Update(cluster, feedback_fn)


--
2.2.0.rc0.207.ga3a616c


LGTM, thanks, no need to resend.

Reply via email to