Hi!

thanks for pointing those out. I even found a third one ;) Will fix those
and then submit.

Cheers, Helga

On Mon, 13 Apr 2015 at 11:17 Petr Pudlak <[email protected]> wrote:

> On Mon, Apr 13, 2015 at 11:10:49AM +0200, 'Helga Velroyen' via
> ganeti-devel wrote:
> >As the LURenewCrypto is a pain to debug, this patch
> >adds a lot more logging of events to the method.
> >Note:
> >- As renew-crypto is a relatively rarely used operation
> >  in a normal production cluster, this won't clutter up
> >  real user's log files.
> >- Most of the messages are in debug mode, so they would
> >  anyway just show up in log files of clusters run in
> >  debug mode (as for example our QA clusters.
> >- A few log messages are in error mode, which is
> >  intentional as they log more details about the errors
> >  than is given in the feedback functions.
> >
> >Signed-off-by: Helga Velroyen <[email protected]>
> >---
> > lib/cmdlib/cluster.py | 47
> ++++++++++++++++++++++++++++++++++++++++++-----
> > 1 file changed, 42 insertions(+), 5 deletions(-)
> >
> >diff --git a/lib/cmdlib/cluster.py b/lib/cmdlib/cluster.py
> >index 6cb1401..e2f9499 100644
> >--- a/lib/cmdlib/cluster.py
> >+++ b/lib/cmdlib/cluster.py
> >@@ -115,24 +115,37 @@ class LUClusterRenewCrypto(NoHooksLU):
> >
> >   def Exec(self, feedback_fn):
> >     master_uuid = self.cfg.GetMasterNode()
> >+    logging.debug("Renewing the master's SSL node certificate."
> >+                  "Master's UUID: %s.", master_uuid)
>
> Just nitpicking: Missing space after the leading ".
>
> >     cluster = self.cfg.GetClusterInfo()
> >
> >     server_digest = utils.GetCertificateDigest(
> >       cert_filename=pathutils.NODED_CERT_FILE)
> >+    logging.debug("SSL digest of the node certificate: %s.",
> server_digest)
> >     utils.AddNodeToCandidateCerts("%s-SERVER" % master_uuid,
> >                                   server_digest,
> >                                   cluster.candidate_certs)
> >+    logging.debug("Added master's digest as *-SERVER entry to
> configuration."
> >+                  "Current list of candidate certificates: %s.",
>
> .. also here
>
> >+                  str(cluster.candidate_certs))
> >+
> >     try:
> >       old_master_digest = utils.GetCertificateDigest(
> >         cert_filename=pathutils.NODED_CLIENT_CERT_FILE)
> >+      logging.debug("SSL digest of old master's SSL node certificate:
> %s.",
> >+                    old_master_digest)
> >       utils.AddNodeToCandidateCerts("%s-OLDMASTER" % master_uuid,
> >                                     old_master_digest,
> >                                     cluster.candidate_certs)
> >+      logging.debug("Added old master's node certificate digest to
> config"
> >+                    "as *-OLDMASTER. Current list of candidate
> certificates:"
>
> also here.
>
> >+                    " %s.", str(cluster.candidate_certs))
> >+
> >     except IOError:
> >-      logging.info("No old certificate available.")
> >+      logging.info("No old master certificate available.")
> >
> >     last_exception = None
> >-    for _ in range(self._MAX_NUM_RETRIES):
> >+    for i in range(self._MAX_NUM_RETRIES):
> >       try:
> >         # Technically it should not be necessary to set the cert
> >         # paths. However, due to a bug in the mock library, we
> >@@ -141,8 +154,11 @@ class LUClusterRenewCrypto(NoHooksLU):
> >             self, master_uuid, cluster, feedback_fn,
> >             client_cert=pathutils.NODED_CLIENT_CERT_FILE,
> >             client_cert_tmp=pathutils.NODED_CLIENT_CERT_FILE_TMP)
> >+        logging.debug("Successfully renewed the master's node
> certificate.")
> >         break
> >       except errors.OpExecError as e:
> >+        logging.error("Renewing the master's SSL node certificate failed"
> >+                      " at attempt no. %s with error '%s'", str(i), e)
> >         last_exception = e
> >     else:
> >       if last_exception:
> >@@ -153,30 +169,46 @@ class LUClusterRenewCrypto(NoHooksLU):
> >                                          cluster.candidate_certs)
> >       utils.RemoveNodeFromCandidateCerts("%s-OLDMASTER" % master_uuid,
> >                                          cluster.candidate_certs)
> >+      logging.debug("Cleaned up *-SERVER and *-OLDMASTER certificate
> from"
> >+                    " master candidate cert list. Current state of the"
> >+                    " list: %s.", str(cluster.candidate_certs))
> >       try:
> >         utils.RemoveFile(pathutils.NODED_CLIENT_CERT_FILE_TMP)
> >-      except IOError:
> >-        pass
> >+      except IOError as e:
> >+        logging.debug("Could not clean up temporary node certificate of
> the"
> >+                      " master node. (Possibly because it was already
> removed"
> >+                      " properly.) Error: %s.", e)
> >       return
> >
> >     node_errors = {}
> >     nodes = self.cfg.GetAllNodesInfo()
> >+    logging.debug("Renewing non-master nodes' node certificates.")
> >     for (node_uuid, node_info) in nodes.items():
> >       if node_info.offline:
> >         feedback_fn("* Skipping offline node %s" % node_info.name)
> >+        logging.debug("Skipping offline node %s (UUID: %s).",
> >+                      node_info.name, node_uuid)
> >         continue
> >       if node_uuid != master_uuid:
> >+        logging.debug("Renewing node certificate of node '%s'.",
> node_uuid)
> >         last_exception = None
> >-        for _ in range(self._MAX_NUM_RETRIES):
> >+        for i in range(self._MAX_NUM_RETRIES):
> >           try:
> >             new_digest = CreateNewClientCert(self, node_uuid)
> >             if node_info.master_candidate:
> >               utils.AddNodeToCandidateCerts(node_uuid,
> >                                             new_digest,
> >                                             cluster.candidate_certs)
> >+              logging.debug("Added the node's certificate to candidate"
> >+                            " certificate list. Current list: %s.",
> >+                            str(cluster.candidate_certs))
> >             break
> >           except errors.OpExecError as e:
> >             last_exception = e
> >+            logging.error("Could not renew a non-master node's SSL node"
> >+                          " certificate at attempt no. %s. The node's
> UUID"
> >+                          " is %s, and the error was: %s.",
> >+                          str(i), node_uuid, e)
> >         else:
> >           if last_exception:
> >             node_errors[node_uuid] = last_exception
> >@@ -193,7 +225,12 @@ class LUClusterRenewCrypto(NoHooksLU):
> >                                        cluster.candidate_certs)
> >     utils.RemoveNodeFromCandidateCerts("%s-OLDMASTER" % master_uuid,
> >                                        cluster.candidate_certs)
> >+    logging.debug("Cleaned up *-SERVER and *-OLDMASTER certificate from"
> >+                  " master candidate cert list. Current state of the"
> >+                  " list: %s.", cluster.candidate_certs)
> >+
> >     # Trigger another update of the config now with the new master cert
> >+    logging.debug("Trigger an update of the configuration on all nodes.")
> >     self.cfg.Update(cluster, feedback_fn)
> >
> >
> >--
> >2.2.0.rc0.207.ga3a616c
> >
>
> LGTM, thanks, no need to resend.
>

Reply via email to