Am 18. Februar 2011 15:14 schrieb René Nussbaumer <[email protected]>:
> This is a convinience command to do an automated EPO in the possible limits of

s/convinience/convenience/

> Ganeti.

> +def _InstanceStart(opts, inst_list, start):
> […]
> +  bad_cnt = len([row for row in results if not row[0]])

What is row[0]? (hint: explicit unpacking)

> +class _RunWhenNodesReachableHelper:
> +  """Helper class to make shared internal state sharing easier.
> +
> +    @ivar success: Indicates if all action_cb calls were successful
> +  """

Wrong indentation for docstring, missing empty line.

> +  def Wait(self, secs):
> +    start = time.time()
> +    for node in self.down:
> +      if self._ping_fn(self.node2ip[node], self.port,
> +                       timeout=self._EPO_PING_TIMEOUT, 
> live_port_needed=True):

You didn't test or pylint this code, did you? Hint: there's no self._EPO_*.

> +def _RunWhenNodesReachable(node_list, action_cb, interval):
> +  client = GetClient()
> +  cluster_info = client.QueryClusterInfo()
> +  if cluster_info["primary_ip_version"] == constants.IP4_VERSION:
> +    family = netutils.IPAddress.family
> +  else:
> +    family = netutils.IP6Address.family
> +
> +  node2ip = dict((node, netutils.GetHostname(node, family=family).ip)
> +                 for node in node_list)
> +  for node in node_list:
> +    host = netutils.GetHostname(node, family=family)
> +    node2ip[node] = host.ip

\o/ No, you don't need that loop, the code before it does everything.

> +  helper = _RunWhenNodesReachableHelper(node_list, action_cb, node2ip)
> +
> +  try:
> +    return utils.Retry(helper, interval, _EPO_REACHABLE_TIMEOUT,
> +                       wait_fn=helper.Wait)
> +  except utils.RetryTimeout:
> +    ToStderr("Time exceeded while waiting for nodes to become reachable"
> +             " again:\n  - %s", "  - ".join(helper.down))
> +    return False

Very nice!

> +def _EpoOn(opts, full_node_list, node_list, inst_map):
> +  """Does the actual power on.
> +
> +  @param opts: The command line options selected by the user
> +  @param full_node_list: All nodes to operate on (includes nodes not 
> supporting
> +                         OOB)
> +  @param node_list: The list of nodes to operate on (all need to support OOB)
> +  @param inst_map: A dict of inst -> nodes mapping
> +  @return: The desired exit status
> +
> +  """
> +  if node_list and not _OobPower(opts, node_list, False):
> +    ToStderr("Not all nodes seems to get back up, investigate and start"

s/seems/seem/

> +             " manually if needed")
> +
> +  # Wait for the nodes to be back up
> +  action_cb = compat.partial(_MaybeInstanceStartup, opts, dict(inst_map))
> +
> +  ToStdout("Waiting until all nodes are available again")
> +  if not _RunWhenNodesReachable(full_node_list, action_cb, 
> _EPO_PING_INTERVAL):
> +    ToStderr("Please investigate and start not running instances manually")

s/not running/stopped/

> +    return constants.EXIT_FAILURE
> +
> +  return constants.EXIT_FAILURE
> +
> +
> +def _EpoOff(opts, node_list, inst_map):
> +  if not _InstanceStart(opts, inst_map.keys(), False):
> +    ToStderr("Please investigate and stop instances manually before 
> continue")

s/continue/continuing/

> +    return constants.EXIT_FAILURE
> +
> +  if node_list and _OobPower(opts, node_list, False):
> +    return constants.EXIT_SUCCESS
> +  else:
> +    return constants.EXIT_FAILURE

> +def Epo(opts, args):
> […]
> +  for (idx, (node, master, pinsts, sinsts, powered,
> +             offline)) in enumerate(result):
> +    # Normalize the node_query_list as well
> +    node_query_list[idx] = node
> +    if not offline:
> +      for inst in (pinsts + sinsts):
> +        if inst not in inst_map:

Please swap the two branches, so that you don't need “not” here.

> +          if master:
> +            inst_map[inst] = set()
> +          else:
> +            inst_map[inst] = set([node])
> +        else:
> +          if not master:

elif

> +            inst_map[inst].add(node)
> +
> +    if master and opts.on:
> +      # We ignore the master for turning on the machines, in fact we are
> +      # already operating on the master at this point :)
> +      continue
> +    elif master and not opts.show_all:
> +      ToStderr("%s is the master node, please do a master-failover to 
> another"
> +               " node not affected by the EPO or use --all if you intend to"
> +               " shutdown the whole cluster", node)
> +      return constants.EXIT_FAILURE
> +    elif powered is None:
> +      ToStdout("Node %s does not support out-of-band handling, it can not be"
> +               " handled in a fully automated manner", node)
> +    elif powered == opts.on:
> +      ToStdout("Node %s is already in desired power state, skipping", node)
> +    else:
> +      if not offline or (offline and powered):

elif

> +        node_list.append(node)

> +  "epo": (
> +    Epo, [ArgUnknown()],
> +    [FORCE_OPT, ON_OPT, GROUPS_OPT, ALL_OPT, OOB_TIMEOUT_OPT],
> +    "[opts...] [args]",
> +    "Performs an EPO on given args"),

s/EPO/emergency power-off/ (maybe also in other places, don't assume
everyone knows abbreviations)

> --- a/man/gnt-cluster.rst
> +++ b/man/gnt-cluster.rst
> @@ -93,6 +93,23 @@ Remove all configuration files related to the cluster, so 
> that a
>  Since this is a dangerous command, you are required to pass the
>  argument *--yes-do-it.*
>
> +EPO
> +~~~
> +
> +**epo** [--on] [--groups|--all] *arguments*
> +
> +Performans an EPO on nodes given as arguments. If ``--groups`` is given,

Here again.

> +arguments are node groups. If ``--all`` is provided, the whole cluster will 
> be
> +shut down.
> +
> +The ``--on`` flag says, that the cluster should be recovered from a EPO.

“… flag restarts the cluster after an emergency power-off”

> +Please note that the master node will not be turned down or up automatically.
> +It will just be left in a state, where you can manully perform the shutdown 
> of

s/manully/manually/

> +that one node. If the master is in the list of affected nodes and this is not

“is not _a_”

> +complete cluster EPO (e.g. using ``--all``), you're required to do a master

emergency power-off

> +failover to another node not affected.

Michael

Reply via email to