On Thu, May 05, 2016 at 12:41:13PM +0100, 'Federico Morg Pareschi' via 
ganeti-devel wrote:
> Verify disks should not run on clusters or nodegroups with only ext
> storage, as it requires an unnecessary amount of locks for its jobs.
> This commit performs additional checks when submitting a gnt-cluster
> verify-disks job or during a watcher run to prevent this.
>
> Signed-off-by: Federico Morg Pareschi <[email protected]>

Hi Morg,

Apart from a couple of minor nits, this is mostly OK with me. However there
is a problem: ganeti-extstorage-interface specifies that external storage
providers may implement a verify script.

As it turns out, the current ExtStorageDevice implementation doesn't provide an
API for this (several ES_ACTION_* actions are called by methods in the class,
but ES_ACTION_VERIFY isn't.)

Could you please add comments to the "if not only_ext" checks saying that this
needs to be revisited if ES_ACTION_VERIFY is implemented.

Thanks,
Brian.

> ---
>  lib/cmdlib/cluster/verify.py       | 14 +++++++++++---
>  lib/watcher/__init__.py            | 15 ++++++++++-----
>  test/py/cmdlib/cluster_unittest.py | 23 +++++++++++++++++++++++
>  test/py/testutils/config_mock.py   |  6 ++++++
>  4 files changed, 50 insertions(+), 8 deletions(-)
> 
> diff --git a/lib/cmdlib/cluster/verify.py b/lib/cmdlib/cluster/verify.py
> index 868f7a6..4e6a031 100644
> --- a/lib/cmdlib/cluster/verify.py
> +++ b/lib/cmdlib/cluster/verify.py
> @@ -239,10 +239,18 @@ class LUClusterVerifyDisks(NoHooksLU):
>  
>    def Exec(self, feedback_fn):
>      group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
> +    instances = self.cfg.GetInstanceList()
>  
> -    # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
> -    return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
> -                           for group in group_names])
> +    only_ext = not [i for i in instances if
> +                    self.cfg.GetInstanceDiskTemplate(i) != constants.DT_EXT]

Nit: Might be clearer as
only_ext = compat.all(self.cfg.GetInstanceDiskTemplate(i) == constants.DT_EXT
                      for i in instances)

> +    if only_ext:
> +      logging.info("All instances have ext storage, skipping verify disks.")
> +      return ResultWithJobs([])
> +    else:
> +      # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
> +      return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
> +                             for group in group_names])
>  
>  
>  class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
> diff --git a/lib/watcher/__init__.py b/lib/watcher/__init__.py
> index 9c2a272..1cc17db 100644
> --- a/lib/watcher/__init__.py
> +++ b/lib/watcher/__init__.py
> @@ -148,13 +148,14 @@ class Instance(object):
>  
>    """
>    def __init__(self, name, status, config_state, config_state_source,
> -               disks_active, snodes):
> +               disks_active, snodes, disk_template):
>      self.name = name
>      self.status = status
>      self.config_state = config_state
>      self.config_state_source = config_state_source
>      self.disks_active = disks_active
>      self.snodes = snodes
> +    self.disk_template = disk_template
>  
>    def Restart(self, cl):
>      """Encapsulates the start of an instance.
> @@ -753,7 +754,7 @@ def _GetGroupData(qcl, uuid):
>    queries = [
>        (constants.QR_INSTANCE,
>         ["name", "status", "admin_state", "admin_state_source", 
> "disks_active",
> -        "snodes", "pnode.group.uuid", "snodes.group.uuid"],
> +        "snodes", "pnode.group.uuid", "snodes.group.uuid", "disk_template"],
>         [qlang.OP_EQUAL, "pnode.group.uuid", uuid]),
>        (constants.QR_NODE,
>         ["name", "bootid", "offline"],
> @@ -779,14 +780,14 @@ def _GetGroupData(qcl, uuid):
>  
>    # Load all instances
>    for (name, status, config_state, config_state_source, disks_active, snodes,
> -       pnode_group_uuid, snodes_group_uuid) in raw_instances:
> +       pnode_group_uuid, snodes_group_uuid, disk_template) in raw_instances:
>      if snodes and set([pnode_group_uuid]) != set(snodes_group_uuid):
>        logging.error("Ignoring split instance '%s', primary group %s, 
> secondary"
>                      " groups %s", name, pnode_group_uuid,
>                      utils.CommaJoin(snodes_group_uuid))
>      else:
>        instances.append(Instance(name, status, config_state, 
> config_state_source,
> -                                disks_active, snodes))
> +                                disks_active, snodes, disk_template))
>  
>        for node in snodes:
>          secondaries.setdefault(node, set()).add(name)
> @@ -865,7 +866,11 @@ def _GroupWatcher(opts):
>  
>      started = _CheckInstances(client, notepad, instances, locks)
>      _CheckDisks(client, notepad, nodes, instances, started)
> -    if not opts.no_verify_disks:
> +
> +    # Check if the nodegroup only has ext storage type
> +    only_ext = not [i for i in instances.values()
> +                    if i.disk_template != constants.DT_EXT]

As above
only_ext = compat.all(i.disk_template == constants.DT_EXT
                      for i in instances.values())

> +    if not opts.no_verify_disks and not only_ext:
>        _VerifyDisks(client, group_uuid, nodes, instances)
>    except Exception, err:
>      logging.info("Not updating status file due to failure: %s", err)
> diff --git a/test/py/cmdlib/cluster_unittest.py 
> b/test/py/cmdlib/cluster_unittest.py
> index 90099a0..d8f3185 100644
> --- a/test/py/cmdlib/cluster_unittest.py
> +++ b/test/py/cmdlib/cluster_unittest.py
> @@ -2326,7 +2326,30 @@ class 
> TestLUClusterVerifyGroupHooksCallBack(TestLUClusterVerifyGroupMethods):
>  
>  
>  class TestLUClusterVerifyDisks(CmdlibTestCase):
> +
>    def testVerifyDisks(self):
> +    self.cfg.AddNewInstance(uuid="tst1.inst.corp.google.com",
> +                            disk_template=constants.DT_PLAIN)
> +    op = opcodes.OpClusterVerifyDisks()
> +    result = self.ExecOpCode(op)
> +
> +    self.assertEqual(1, len(result["jobs"]))
> +
> +  def testVerifyDisksExt(self):
> +    self.cfg.AddNewInstance(uuid="tst1.inst.corp.google.com",
> +                            disk_template=constants.DT_EXT)
> +    self.cfg.AddNewInstance(uuid="tst2.inst.corp.google.com",
> +                            disk_template=constants.DT_EXT)
> +    op = opcodes.OpClusterVerifyDisks()
> +    result = self.ExecOpCode(op)
> +
> +    self.assertEqual(0, len(result["jobs"]))
> +
> +  def testVerifyDisksMixed(self):
> +    self.cfg.AddNewInstance(uuid="tst1.inst.corp.google.com",
> +                            disk_template=constants.DT_EXT)
> +    self.cfg.AddNewInstance(uuid="tst2.inst.corp.google.com",
> +                            disk_template=constants.DT_PLAIN)
>      op = opcodes.OpClusterVerifyDisks()
>      result = self.ExecOpCode(op)
>  
> diff --git a/test/py/testutils/config_mock.py 
> b/test/py/testutils/config_mock.py
> index 7ae27bc..1d70798 100644
> --- a/test/py/testutils/config_mock.py
> +++ b/test/py/testutils/config_mock.py
> @@ -274,6 +274,12 @@ class ConfigMock(config.ConfigWriter):
>      if disks is None:
>        if disk_template == constants.DT_DISKLESS:
>          disks = []
> +      elif disk_template == constants.DT_EXT:
> +        provider = "mock_provider"
> +        disks = [self.CreateDisk(dev_type=disk_template,
> +                                 primary_node=primary_node,
> +                                 secondary_node=secondary_node,
> +                                 params={constants.IDISK_PROVIDER: 
> provider})]
>        else:
>          disks = [self.CreateDisk(dev_type=disk_template,
>                                   primary_node=primary_node,
> -- 
> 2.8.0.rc3.226.g39d4020
> 

Reply via email to