Am 18. Februar 2011 14:15 schrieb René Nussbaumer <[email protected]>:
> --- a/lib/client/gnt_cluster.py
> +++ b/lib/client/gnt_cluster.py
> +_PING_INTERVAL = 30 # 30 seconds between pings
> +_REACHABLE_TIMEOUT = 15 * 60 # 15 minutes
Prefix with “_EPO”, please.
> +def _InstanceStart(opts, inst_list, start):
> + if start:
> + opcls = opcodes.OpInstanceStartup
> + text_addition = ("startup", "started", "starting")
> + else:
> + opcls = opcodes.OpInstanceShutdown
> + text_addition = ("shutdown", "stopped", "stopping")
Make these separate variables, please.
> + jex = JobExecutor(opts=opts)
> +
> + for inst in inst_list:
> + ToStdout("Submit %s of instance %s", text_addition[0], inst)
> + op = opcls(instance_name=inst)
> + jex.QueueJob(inst, op)
> +
> + results = jex.GetResults()
> + bad_cnt = len([row for row in results if not row[0]])
> +
> + if bad_cnt == 0:
> + ToStdout("All instances has been %s successfully", text_addition[1])
s/has/have/
> + else:
> + ToStderr("There were errors while %s instances:\n"
> + "%d error(s) out of %d instance(s)", text_addition[2], bad_cnt,
> + len(results))
> + return False
> +
> + return True
> +
> +
> +class _RunWhenNodesReachableHelper:
> + """Helper class to make shared internal state sharing easier.
> +
> + """
> + _PING_TIMEOUT = 1
Please put this together with the other constants. :)
> + def Wait(self, secs):
> + start = time.time()
> + for node in self.down:
> + if self._ping_fn(self.node2ip[node], constants.DEFAULT_NODED_PORT,
You must look up the port using netutils.GetDaemonPort, see other
places where it's used.
> + timeout=self._PING_TIMEOUT, live_port_needed=True):
> + ToStdout("Node %s became available", node)
> + self.up.add(node)
> + self.down -= self.up
Please add a comment here describing why you're doing this inside the
loop (because you're returning right away).
> + return
> +
> + self._sleep_fn(max(0.0, start + secs - time.time()))
Are you sure this calculation is correct?
>>> start = 5; now = 3; secs = 30
>>> start + secs - now
32
> +def _RunWhenNodesReachable(node_list, action_cb, interval):
> + client = GetClient()
> + cluster_info = client.QueryClusterInfo()
> + if cluster_info["primary_ip_version"] == constants.IP4_VERSION:
> + family = netutils.IPAddress.family
> + else:
> + family = netutils.IP6Address.family
> +
> + node2ip = {}
> + for node in node_list:
> + host = netutils.GetHostname(node, family=family)
> + node2ip[node] = host.ip
node2ip = dict((node, netutils.GetHostname(node, family=family).ip)
for node in node_list)
> + helper = _RunWhenNodesReachableHelper(node_list, action_cb, node2ip)
> +
> + try:
> + utils.Retry(helper, interval, _REACHABLE_TIMEOUT, wait_fn=helper.Wait)
> + return helper.success
Actually you can return success as Retry's return value, so this
becomes just “return utils.Retry(…)”.
> + except utils.RetryTimeout:
> + ToStderr("Time exceeded while waiting for nodes to become reachable"
> + " again:\n - %s", " - ".join(helper.down))
> + return False
Will look at the rest in a second round.
Michael