On Wed, Jul 01, 2015 at 10:42:56AM +0200, 'Helga Velroyen' via ganeti-devel 
wrote:
For the downgrading of the SSL setup from 2.12 to 2.11, we
need to be able to SSH into machines while no daemons are
running. Unfortunately currently the only way to obtain
custom-configured SSH ports is by queries. In order to
access this information with daemons being shutdown, this
patch adds the SSH port information to an ssconf file.

This will also be used to simplify some backend calls for
the *SSH* handling in 2.13.

Signed-off-by: Helga Velroyen <[email protected]>
---
lib/config.py               | 19 ++++++++++++++++--
lib/ssconf.py               | 49 ++++++++++++++++++++++++++++++++-------------
src/Ganeti/Constants.hs     |  3 +++
src/Ganeti/Ssconf.hs        |  1 +
src/Ganeti/WConfd/Ssconf.hs |  9 +++++++++
5 files changed, 65 insertions(+), 16 deletions(-)

diff --git a/lib/config.py b/lib/config.py
index 333567d..1f37feb 100644
--- a/lib/config.py
+++ b/lib/config.py
@@ -323,6 +323,10 @@ class ConfigWriter(object):
    """
    return os.path.exists(pathutils.CLUSTER_CONF_FILE)

+  def _UnlockedGetNdParams(self, node):
+    nodegroup = self._UnlockedGetNodeGroup(node.group)
+    return self._ConfigData().cluster.FillND(node, nodegroup)
+
  @_ConfigSync(shared=1)
  def GetNdParams(self, node):
    """Get the node params populated with cluster defaults.
@@ -332,8 +336,7 @@ class ConfigWriter(object):
    @return: A dict with the filled in node params

    """
-    nodegroup = self._UnlockedGetNodeGroup(node.group)
-    return self._ConfigData().cluster.FillND(node, nodegroup)
+    return self._UnlockedGetNdParams(node)

  @_ConfigSync(shared=1)
  def GetNdGroupParams(self, nodegroup):
@@ -2967,6 +2970,13 @@ class ConfigWriter(object):
      ssconf_values[ssconf_key] = all_hvparams[hv]
    return ssconf_values

+  def _UnlockedGetSshPortMap(self, node_infos):
+    node_ports = dict([(node.name,
+                        self._UnlockedGetNdParams(node).get(
+                            constants.ND_SSH_PORT))
+                       for node in node_infos])
+    return node_ports
+
  def _UnlockedGetSsconfValues(self):
    """Return the values needed by ssconf.

@@ -3018,6 +3028,10 @@ class ConfigWriter(object):
                self._ConfigData().networks.values()]
    networks_data = fn(utils.NiceSort(networks))

+    ssh_ports = fn("%s=%s" % (node_name, port)
+                   for node_name, port
+                   in self._UnlockedGetSshPortMap(node_infos).items())
+
    ssconf_values = {
      constants.SS_CLUSTER_NAME: cluster.cluster_name,
      constants.SS_CLUSTER_TAGS: cluster_tags,
@@ -3046,6 +3060,7 @@ class ConfigWriter(object):
      constants.SS_NODEGROUPS: nodegroups_data,
      constants.SS_NETWORKS: networks_data,
      constants.SS_ENABLED_USER_SHUTDOWN: str(cluster.enabled_user_shutdown),
+      constants.SS_SSH_PORTS: ssh_ports,
      }
    ssconf_values = self._ExtendByAllHvparamsStrings(ssconf_values,
                                                     all_hvparams)
diff --git a/lib/ssconf.py b/lib/ssconf.py
index 07ec612..111493e 100644
--- a/lib/ssconf.py
+++ b/lib/ssconf.py
@@ -84,6 +84,7 @@ _VALID_KEYS = compat.UniqueFrozenset([
  constants.SS_HVPARAMS_XEN_CHROOT,
  constants.SS_HVPARAMS_XEN_LXC,
  constants.SS_ENABLED_USER_SHUTDOWN,
+  constants.SS_SSH_PORTS,
  ])

#: Maximum size for ssconf files
@@ -257,6 +258,27 @@ class SimpleStore(object):
    nl = data.splitlines(False)
    return nl

+  def _GetDictOfSsconfMap(self, ss_file_key):
+    """Reads a file with lines like key=value and returns a dict.
+
+    This utility function reads a file containing ssconf values of
+    the form "key=value", splits the lines at "=" and returns a
+    dictionary mapping the keys to the values.
+
+    @type ss_file_key: string
+    @param ss_file_key: the constant referring to an ssconf file
+    @rtype: dict of string to string
+    @return: a dictionary mapping the keys to the values
+
+    """
+    data = self._ReadFile(ss_file_key)
+    lines = data.splitlines(False)
+    mapping = {}
+    for line in lines:
+      (key, value) = line.split("=")
+      mapping[key] = value
+    return mapping

Thanks for adding this function, this was repeated so many times in the module.

+
  def GetMasterCandidatesCertMap(self):
    """Returns the map of master candidate UUIDs to ssl cert.

@@ -265,13 +287,18 @@ class SimpleStore(object):
      to their SSL certificate digests

    """
-    data = self._ReadFile(constants.SS_MASTER_CANDIDATES_CERTS)
-    lines = data.splitlines(False)
-    certs = {}
-    for line in lines:
-      (node_uuid, cert_digest) = line.split("=")
-      certs[node_uuid] = cert_digest
-    return certs
+    return self._GetDictOfSsconfMap(constants.SS_MASTER_CANDIDATES_CERTS)
+
+  def GetSshPortMap(self):
+    """Returns the map of node names to SSH port.
+
+    @rtype: dict of string to string
+    @return: dictionary mapping the node names to their SSH port
+
+    """
+    return dict([(node_name, int(ssh_port)) for
+                  node_name, ssh_port in
+                  self._GetDictOfSsconfMap(constants.SS_SSH_PORTS).items()])

  def GetMasterIP(self):
    """Get the IP of the master node for this cluster.
@@ -389,13 +416,7 @@ class SimpleStore(object):
    @returns: dictionary with hypervisor parameters

    """
-    data = self._ReadFile(constants.SS_HVPARAMS_PREF + hvname)
-    lines = data.splitlines(False)
-    hvparams = {}
-    for line in lines:
-      (key, value) = line.split("=")
-      hvparams[key] = value
-    return hvparams
+    return self._GetDictOfSsconfMap(constants.SS_HVPARAMS_PREF + hvname)

  def GetHvparams(self):
    """Return the hypervisor parameters of all hypervisors.
diff --git a/src/Ganeti/Constants.hs b/src/Ganeti/Constants.hs
index a1249f9..305ffc6 100644
--- a/src/Ganeti/Constants.hs
+++ b/src/Ganeti/Constants.hs
@@ -3768,6 +3768,9 @@ ssFilePerms = 0o444
ssEnabledUserShutdown :: String
ssEnabledUserShutdown = "enabled_user_shutdown"

+ssSshPorts :: String
+ssSshPorts = "ssh_ports"
+
-- | Cluster wide default parameters
defaultEnabledHypervisor :: String
defaultEnabledHypervisor = htXenPvm
diff --git a/src/Ganeti/Ssconf.hs b/src/Ganeti/Ssconf.hs
index ae8283d..bfcf3c8 100644
--- a/src/Ganeti/Ssconf.hs
+++ b/src/Ganeti/Ssconf.hs
@@ -113,6 +113,7 @@ $(declareLADT ''String "SSKey" (
  , ("SSNodegroups",            C.ssNodegroups)
  , ("SSNetworks",              C.ssNetworks)
  , ("SSEnabledUserShutdown",   C.ssEnabledUserShutdown)
+  , ("SSSshPorts",              C.ssSshPorts)
  ] ++
  -- Automatically generate SSHvparamsXxx for each hypervisor type:
  map ((("SSHvparams" ++) . show)
diff --git a/src/Ganeti/WConfd/Ssconf.hs b/src/Ganeti/WConfd/Ssconf.hs
index 6aa7765..c305915 100644
--- a/src/Ganeti/WConfd/Ssconf.hs
+++ b/src/Ganeti/WConfd/Ssconf.hs
@@ -126,6 +126,8 @@ mkSSConf cdata = SSConf . M.fromList $
                   . configNetworks $ cdata)
    , (SSEnabledUserShutdown, return . show . clusterEnabledUserShutdown
                              $ cluster)
+    , (SSSshPorts, mapLines (eqPair . (nodeName
+                                       &&& getSshPort cdata)) nodes)
    ] ++
    map (first hvparamsSSKey) (mkSSConfHvparams cluster)
  where
@@ -139,3 +141,10 @@ mkSSConf cdata = SSConf . M.fromList $
    nodes = niceSortKey nodeName . toList $ configNodes cdata
    (offline, online) = partition nodeOffline nodes
    nodeGroups = niceSortKey groupName . toList $ configNodegroups cdata
+
+    getSshPort :: ConfigData -> Node -> String
+    getSshPort cfg node =
+      let maybe_nd_params = getNodeNdParams cfg node
+      in case maybe_nd_params of
+        Just nd -> show (ndpSshPort nd)
+        Nothing -> ""

I was somewhat concerned by the "", but it turned out that it happens only when the configuration is corrupt and we can't find a node group for a node. So it's ok, perhaps we could add a small comment to explain that. And a minor idea, it could be simplified using maybe as

   maybe "" (show . ndpSshPort) $ getNodeNdParams cfg node

(no need to resend in any case).


As discussed, adding a new SSConf in a stable branch should be safe even for in-place upgrades, unlike changing the format of an existing one.

In the case the in-place upgrades aren't performed synchronously (one node's daemons restarted somewhat later than another's), the older node will reject the ssconf update, but no other harm will be done - the error will get propagated to the RPC layer and logged in WConfd log, the daemons will survive, and once they're all restarted with the new version, another ssconf update will fix the discrepancy (I tried that out).

There might be a subtle race when something that belongs to ssconf is changed just before the command is called, so the change doesn't get distributed in time, but ssconf is already used during up/downgrades, the race is already there, so we're not making things worse.

So since all the discussed issues seem to be safe,

LGTM


--
2.4.3.573.g4eafbef

Reply via email to