Am 18. Februar 2011 14:15 schrieb René Nussbaumer <[email protected]>:
> --- a/lib/client/gnt_cluster.py
> +++ b/lib/client/gnt_cluster.py
> +_PING_INTERVAL = 30 # 30 seconds between pings
> +_REACHABLE_TIMEOUT = 15 * 60 # 15 minutes

Prefix with “_EPO”, please.

> +def _InstanceStart(opts, inst_list, start):
> +  if start:
> +    opcls = opcodes.OpInstanceStartup
> +    text_addition = ("startup", "started", "starting")
> +  else:
> +    opcls = opcodes.OpInstanceShutdown
> +    text_addition = ("shutdown", "stopped", "stopping")

Make these separate variables, please.

> +  jex = JobExecutor(opts=opts)
> +
> +  for inst in inst_list:
> +    ToStdout("Submit %s of instance %s", text_addition[0], inst)
> +    op = opcls(instance_name=inst)
> +    jex.QueueJob(inst, op)
> +
> +  results = jex.GetResults()
> +  bad_cnt = len([row for row in results if not row[0]])
> +
> +  if bad_cnt == 0:
> +    ToStdout("All instances has been %s successfully", text_addition[1])

s/has/have/

> +  else:
> +    ToStderr("There were errors while %s instances:\n"
> +             "%d error(s) out of %d instance(s)", text_addition[2], bad_cnt,
> +             len(results))
> +    return False
> +
> +  return True
> +
> +
> +class _RunWhenNodesReachableHelper:
> +  """Helper class to make shared internal state sharing easier.
> +
> +  """
> +  _PING_TIMEOUT = 1

Please put this together with the other constants. :)

> +  def Wait(self, secs):
> +    start = time.time()
> +    for node in self.down:
> +      if self._ping_fn(self.node2ip[node], constants.DEFAULT_NODED_PORT,

You must look up the port using netutils.GetDaemonPort, see other
places where it's used.

> +                       timeout=self._PING_TIMEOUT, live_port_needed=True):
> +        ToStdout("Node %s became available", node)
> +        self.up.add(node)
> +        self.down -= self.up

Please add a comment here describing why you're doing this inside the
loop (because you're returning right away).

> +        return
> +
> +    self._sleep_fn(max(0.0, start + secs - time.time()))

Are you sure this calculation is correct?

>>> start = 5; now = 3; secs = 30
>>> start + secs - now
32

> +def _RunWhenNodesReachable(node_list, action_cb, interval):
> +  client = GetClient()
> +  cluster_info = client.QueryClusterInfo()
> +  if cluster_info["primary_ip_version"] == constants.IP4_VERSION:
> +    family = netutils.IPAddress.family
> +  else:
> +    family = netutils.IP6Address.family
> +
> +  node2ip = {}
> +  for node in node_list:
> +    host = netutils.GetHostname(node, family=family)
> +    node2ip[node] = host.ip

node2ip = dict((node, netutils.GetHostname(node, family=family).ip)
               for node in node_list)

> +  helper = _RunWhenNodesReachableHelper(node_list, action_cb, node2ip)
> +
> +  try:
> +    utils.Retry(helper, interval, _REACHABLE_TIMEOUT, wait_fn=helper.Wait)
> +    return helper.success

Actually you can return success as Retry's return value, so this
becomes just “return utils.Retry(…)”.

> +  except utils.RetryTimeout:
> +    ToStderr("Time exceeded while waiting for nodes to become reachable"
> +             " again:\n  - %s", "  - ".join(helper.down))
> +    return False

Will look at the rest in a second round.

Michael

Reply via email to