Ignore this one for now, I just found a mistake. Will resent soon! On Wed, 1 Jul 2015 at 10:42 Helga Velroyen <[email protected]> wrote:
> This patch will handle the downgrade of the SSL setup > from 2.12 to 2.11. Essentially, all client.pem and > ssconf_master_candidates_certs files will be deleted. > This will kick the cluster in a pre-2.11 mode wrt to > SSL and result in a nagging message to re-run > 'gnt-cluster renew-crypto' when as output of 'gnt-cluster > verify'. > > Since we don't have downgrade-hooks there is unfortunately > no way to run 'gnt-cluster renew-crypto' automatically > after a downgrade. (Even if we would add them now, they > would need to be added to 2.11, which is unlikely to > get another release). To fix this for our QA, I added > a try-block to catch this an run renew-crypto if necessary. > > Signed-off-by: Helga Velroyen <[email protected]> > --- > lib/client/gnt_cluster.py | 33 +++++++++++++++++++++++++++++++++ > lib/tools/ssl_update.py | 45 > ++++++++++++++++++++++++++++++++++++++++++++- > qa/qa_cluster.py | 7 ++++++- > src/Ganeti/Constants.hs | 14 +++++++++++++- > tools/cfgupgrade | 5 +++++ > 5 files changed, 101 insertions(+), 3 deletions(-) > > diff --git a/lib/client/gnt_cluster.py b/lib/client/gnt_cluster.py > index a25c130..c56e2bb 100644 > --- a/lib/client/gnt_cluster.py > +++ b/lib/client/gnt_cluster.py > @@ -1071,6 +1071,7 @@ def _RenewCrypto(new_cluster_cert, new_rapi_cert, # > pylint: disable=R0911 > constants.NDS_NODE_DAEMON_CERTIFICATE: > utils.ReadFile(pathutils.NODED_CERT_FILE), > constants.NDS_NODE_NAME: node_name, > + constants.NDS_ACTION: constants.CRYPTO_ACTION_CREATE, > } > > bootstrap.RunNodeSetupCmd( > @@ -2068,6 +2069,38 @@ def _VersionSpecificDowngrade(): > @return: True upon success > """ > ToStdout("Performing version-specific downgrade tasks.") > + > + nodes = ssconf.SimpleStore().GetOnlineNodeList() > + cluster_name = ssconf.SimpleStore().GetClusterName() > + ssh_ports = ssconf.SimpleStore().GetSshPortMap() > + > + for node in nodes: > + data = { > + constants.NDS_CLUSTER_NAME: cluster_name, > + constants.NDS_NODE_DAEMON_CERTIFICATE: > + utils.ReadFile(pathutils.NODED_CERT_FILE), > + constants.NDS_NODE_NAME: node, > + constants.NDS_ACTION: constants.CRYPTO_ACTION_DELETE, > + } > + > + try: > + bootstrap.RunNodeSetupCmd( > + cluster_name, > + node, > + pathutils.SSL_UPDATE, > + True, # debug > + True, # verbose, > + True, # use cluster key > + False, # ask key > + True, # strict host check > + ssh_ports[node], > + data) > + except Exception as e: # pylint: disable=W0703 > + # As downgrading can fail if a node is temporarily unreachable > + # only output the error, but do not abort the entire operation. > + ToStderr("Downgrading SSL setup of node '%s' failed: %s." % > + (node, e)) > + > return True > > > diff --git a/lib/tools/ssl_update.py b/lib/tools/ssl_update.py > index 4d17d9d..88a24ee 100644 > --- a/lib/tools/ssl_update.py > +++ b/lib/tools/ssl_update.py > @@ -42,6 +42,7 @@ from ganeti import constants > from ganeti import errors > from ganeti import utils > from ganeti import ht > +from ganeti import pathutils > from ganeti.tools import common > > > @@ -49,6 +50,7 @@ _DATA_CHECK = ht.TStrictDict(False, True, { > constants.NDS_CLUSTER_NAME: ht.TNonEmptyString, > constants.NDS_NODE_DAEMON_CERTIFICATE: ht.TNonEmptyString, > constants.NDS_NODE_NAME: ht.TNonEmptyString, > + constants.NDS_ACTION: ht.TNonEmptyString, > }) > > > @@ -75,6 +77,37 @@ def ParseOptions(): > return common.VerifyOptions(parser, opts, args) > > > +def DeleteClientCertificate(): > + """Deleting the client certificate. This is necessary for downgrades.""" > + if os.path.exists(pathutils.NODED_CLIENT_CERT_FILE): > + os.remove(pathutils.NODED_CLIENT_CERT_FILE) > + else: > + logging.debug("Trying to delete the client certificate '%s' which did > not" > + " exist.", pathutils.NODED_CLIENT_CERT_FILE) > + > + > +def ClearMasterCandidateSsconfList(): > + """Clear the ssconf list of master candidate certs. > + > + This is necessary when deleting the client certificates for a downgrade, > + because otherwise the master cannot distribute the configuration to the > + nodes via RPC during a downgrade anymore. > + > + """ > + ssconf_file = os.path.join( > + pathutils.DATA_DIR, > + "%s%s" % (constants.SSCONF_FILEPREFIX, > + constants.SS_MASTER_CANDIDATES_CERTS)) > + if os.path.exists: > + os.remove(ssconf_file) > + else: > + logging.debug("Trying to delete the ssconf file '%s' which does not" > + " exist.", ssconf_file) > + > + > +# pylint: disable=E1103 > +# This pyling message complains about 'data' as 'bool' not having a get > +# member, but obviously the type is wrongly inferred. > def Main(): > """Main routine. > > @@ -92,7 +125,17 @@ def Main(): > # is the same as on this node. > common.VerifyCertificate(data, SslSetupError) > > - common.GenerateClientCertificate(data, SslSetupError) > + action = data.get(constants.NDS_ACTION) > + if not action: > + raise SslSetupError("No Action specified.") > + > + if action == constants.CRYPTO_ACTION_CREATE: > + common.GenerateClientCertificate(data, SslSetupError) > + elif action == constants.CRYPTO_ACTION_DELETE: > + DeleteClientCertificate() > + ClearMasterCandidateSsconfList() > + else: > + raise SslSetupError("Unsupported action: %s." % action) > > except Exception, err: # pylint: disable=W0703 > logging.debug("Caught unhandled exception", exc_info=True) > diff --git a/qa/qa_cluster.py b/qa/qa_cluster.py > index 138b5f9..27580a0 100644 > --- a/qa/qa_cluster.py > +++ b/qa/qa_cluster.py > @@ -1381,7 +1381,12 @@ def TestUpgrade(): > nodes = qa_config.AcquireManyNodes(n) > live_instances.append(cf(nodes)) > > - AssertCommand(["gnt-cluster", "upgrade", "--to", this_version]) > + try: > + AssertCommand(["gnt-cluster", "upgrade", "--to", this_version]) > + except qa_error.Error: > + # This can be due to a downgrade to 2.11, in this case see if > + # can be fixed with a renew-crypto. (Issue 1008 could fix this.) > + AssertCommand(["gnt-cluster", "renew-crypto", > "--new-node-certificates"]) > AssertCommand(["gnt-cluster", "verify"]) > > for instance in live_instances: > diff --git a/src/Ganeti/Constants.hs b/src/Ganeti/Constants.hs > index 305ffc6..f525d6e 100644 > --- a/src/Ganeti/Constants.hs > +++ b/src/Ganeti/Constants.hs > @@ -4388,8 +4388,17 @@ cryptoTypes = ConstantUtils.mkSet > [cryptoTypeSslDigest] > cryptoActionGet :: String > cryptoActionGet = "get" > > +cryptoActionCreate :: String > +cryptoActionCreate = "create" > + > +cryptoActionDelete :: String > +cryptoActionDelete = "delete" > + > cryptoActions :: FrozenSet String > -cryptoActions = ConstantUtils.mkSet [cryptoActionGet] > +cryptoActions = > + ConstantUtils.mkSet [ cryptoActionCreate > + , cryptoActionGet > + , cryptoActionDelete] > > -- Key word for master candidate cert list for bootstrapping. > > @@ -4479,6 +4488,9 @@ ndsStartNodeDaemon = "start_node_daemon" > ndsNodeName :: String > ndsNodeName = "node_name" > > +ndsAction :: String > +ndsAction = "action" > + > -- * VCluster related constants > > vClusterEtcHosts :: String > diff --git a/tools/cfgupgrade b/tools/cfgupgrade > index 9c2775d..9131fde 100755 > --- a/tools/cfgupgrade > +++ b/tools/cfgupgrade > @@ -524,6 +524,11 @@ def DowngradeCluster(config_data): > if "max_tracked_jobs" in cluster: > del cluster["max_tracked_jobs"] > > + if "candidate_certs" in cluster: > + # Clear the candidate certs to make people run 'gnt-cluster > renew-crypto' > + # after a downgrade from 2.12 to 2.11. > + cluster["candidate_certs"] = {} > + > > def DowngradeGroups(config_data): > for group in config_data["nodegroups"].values(): > -- > 2.4.3.573.g4eafbef > >
