2010/2/22 René Nussbaumer <[email protected]>:
> --- /dev/null
> +++ b/tools/cluster-merge
> +"""Tool to merge two or more clusters together.
> +
> +The clusters have to run the same version of Ganeti!
I don't see a check verifying the config file versions.
> +"""
Empty line before """.
> +def Flatten(unflatten_list):
> + """Flattens a list.
> +
> + @param unflatten_list: A list of unflatten list objects.
> + @return: A flatten list
> + """
Empty line before """:
def X():
"""Foo bar baz.
@…
"""
var = …
> +class Merger(object):
> + ROLLBACK_STEPS = (
> + "Remove our key from authorized_keys on nodes: %(nodes)s",
> + "Start all instances again on the merging clusters: %(clusters)s",
> + "Restore %s from another master candidate" %
> constants.CLUSTER_CONF_FILE)
Is this not a list?
ROLLBACK_STEPS = [
"step1",
"step2",
…
]
> + def __init__(self, clusters):
> + """Initialize object with sane defaults and infos required.
> +
> + @param clusters: The list of clusters to merge in
> + """
Empty line before """.
> + self.ssh_map = {}
> + self.rollback_step = -1
> + self.clusters = clusters
> + self.work_dir = tempfile.mkdtemp(suffix="cluster-merger")
> + self.ssh_runner = ssh.SshRunner("faked-cluster-merger-cluster")
> +
> + def Setup(self, remote_key_name="id_dsa"):
> + """Sets up our end so we can do the merger.
> +
> + This method is setting us up as a preparation for the merger.
> + It makes the initial contact and gathers information needed.
> +
> + @param remote_key_name: The name of remote private key to fetch
> + default: id_dsa
> + @raise errors.RemoteError: for errors in communication/grabbing
> + """
> +
> + remote_path = os.path.join("~", ".ssh", remote_key_name)
See above for docstring formatting. No empty line before code.
> +
> + # Fetch remotes private key
> + for cluster in self.clusters:
> + self.ssh_map[cluster] = {}
> +
> + # Define the closure (we need to reference outer variables)
> + def _WriteKey(key_file):
> + """Writes the remote private key.
> +
> + @param key_file: The opened key_file
> + """
> + cmd = self._RunCmd(hostname=cluster, command="cat %s" % remote_path,
> + batch=False, ask_key=False)
cmd is usually the command itself and we use “result” for the result of RunCmd.
> + if cmd.failed:
> + raise errors.RemoteError("There was an error while grabbing key
> from"
> + " %s. Exit: %i" % (cluster,
> cmd.exit_code))
Don't assume users to have psychic powers and log include
RunResult.output, too. E.g.:
"Command '%s' failed (%s); output: %s" % (…, result.fail_reason, result.output)
> +
> + key_file.write(cmd.stdout)
> +
> + key_path = self.ssh_map[cluster]["key"] = os.path.join(self.work_dir,
> + cluster)
Why not populate a local dict and assign it to self.ssh_map at the end
of the body? Less code, shorter lines.
> + utils.WriteFile(file_name=key_path, mode=0600, fn=_WriteKey)
Why not fetch the key and then call WriteFile? Why pass the first argument as
a keyword argument?
> + cmd = self._RunCmd(hostname=cluster, private_key=key_path,
> + command="gnt-node list -o name --no-header")
> + if not cmd.failed:
> + self.ssh_map[cluster]["nodes"] = cmd.stdout.splitlines()
Local dict, less code.
> + else:
> + raise errors.RemoteError("Unable to retrieve list of nodes."
> + " Exit: %i" % cmd.exit_code)
See above.
> + cmd = self._RunCmd(hostname=cluster, private_key=key_path,
> + command="gnt-instance list -o name --no-header")
> + if not cmd.failed:
> + self.ssh_map[cluster]["instances"] = cmd.stdout.splitlines()
Local dict, less code.
> + else:
> + raise errors.RemoteError("Unable to retrieve list of instances."
> + " Exit: %i" % cmd.exit_code)
See above.
> + def _PrepareAuthorizedKeys(self):
> + """Prepare the authorized_keys on every merging node.
> +
> + This method add our public key to remotes authorized_key for further
> + communication.
> + """
See above.
> + pub_key_file = open(ssh.GetUserFiles("root")[1])
Unpack tuple locally, e.g. (_, root_pubkey_file) = ssh.Get…()
> + try:
> + pub_key = pub_key_file.read()
> + finally:
> + pub_key_file.close()
utils.ReadFile
> + for cluster, data in self.ssh_map.iteritems():
> + for node in data["nodes"]:
> + cmd = self._RunCmd(hostname=node, private_key=data["key"],
> + command=("cat >> %s/.ssh/authorized_keys <<
> '!EOF.'"
> + "\n%s!EOF.\n" %
> + (utils.GetHomeDir("root"), pub_key)))
Verify pub_key doesn't contain dangerous things (just to be on the
safe side). See bootstrap.py.
Don't pass non-keyword arguments as arguments (command is not a
keyword argument on SshRunner.BuildCmd).
> + if cmd.failed:
> + raise errors.RemoteError("Unable to add our public key to %s in
> %s."
> + % (node, cluster))
Give more details, see above.
> + def _RunCmd(self, *args, **kwargs):
> + """Wrapping SshRunner.Run with default parameters.
> +
> + For explanation of parameters see L{SshRunner.Run}.
> + """
See above.
> + my_opts = {"user": "root",
> + "use_cluster_key": False,
> + "strict_host_check": False}
my_opts = {
"user": "root",
"use…": …,
}
Less changed lines when the dict is modified in the future.
> + my_opts.update(kwargs)
> + # pylint: disable-msg=W0142
Do such things only per line, if possible and sensible:
return self.ssh_runner.Run(*args, **my_opts) # pylint: …
> + # W0142: Merger._RunCmd: Used * or ** magic
> + return self.ssh_runner.Run(*args, **my_opts)
> + def _StopMergingInstances(self):
> + """Stop instances on merging clusters."""
Ganeti docstrings are always multiline (except in unittests), see
above for format. Here and in many other places.
> + for cluster in self.clusters:
> + self._RunCmd(hostname=cluster,
> + command="gnt-instance shutdown --all --force-multiple")
Hostname and command are not keyword parameters.
> + def _DisableWatcher(self):
> + """Disable watch on all merging clusters, including ourself."""
> + for cluster in ["localhost"] + self.clusters:
> + self._RunCmd(hostname=cluster,
> + command="gnt-cluster watcher pause 1800")
> + # pylint: disable-msg=R0201
> + # R0201: Method could be a function
Per-line, see above.
> + def _EnableWatcher(self):
> + """Reenable watcher (locally)."""
> + utils.RunCmd("gnt-cluster watcher continue")
Why use a shell for such commands? ["gnt-cluster", "watcher", "continue"]
> + def _StopDaemons(self):
> + """Stop all daemons on merging nodes."""
> + for data in self.ssh_map.values():
> + for node in data["nodes"]:
> + self._RunCmd(hostname=node,
> + command="/etc/init.d/ganeti stop")
Hardcoded path. Use daemon-util.
> + def _FetchRemoteConfig(self):
> + """Fetches and stores remote cluster config from the master.
> +
> + This step is needed before we can merge the config.
> + """
> + for cluster in self.clusters:
> + cmd = self._RunCmd(hostname=cluster,
> + command="cat '%s'" % constants.CLUSTER_CONF_FILE)
> +
> + if cmd.failed:
> + raise errors.RemoteError("Unable to retrieve remote config on %s." %
> + cluster)
> +
> + config_path = self.ssh_map[cluster]["config"] = os.path.join(
> + self.work_dir, "%s_config.data" % cluster)
config_path = os.path.join(…)
utils.WriteFile(config_path, cmd.stdout)
self.ssh_map[cluster]["config"] = config_path
> + config_file = open(config_path, "w")
> + try:
> + config_file.write(cmd.stdout)
> + finally:
> + config_file.close()
utils.WriteFile
> + # pylint: disable-msg=R0201
> + # R0201: Method could be a function
Per-line, see above. Also in other places. Otherwise it's for the
whole block (the class in this case).
> + def _KillMasterDaemon(self):
> + """Kills the local master daemon.
> +
> + @raise errors.ProgrammerError: If unable to kill
> + """
> + if utils.RunCmd("%s stop_master" % constants.DAEMON_UTIL).failed:
> + raise errors.ProgrammerError("Unable to kill ganeti-masterd.")
Give more details in error message, see above.
I stop here. Please go through the comments above and apply them to
the whole patch and resend.
Regards,
Michael