On Thu, May 05, 2016 at 12:41:13PM +0100, 'Federico Morg Pareschi' via
ganeti-devel wrote:
> Verify disks should not run on clusters or nodegroups with only ext
> storage, as it requires an unnecessary amount of locks for its jobs.
> This commit performs additional checks when submitting a gnt-cluster
> verify-disks job or during a watcher run to prevent this.
>
> Signed-off-by: Federico Morg Pareschi <[email protected]>
Hi Morg,
Apart from a couple of minor nits, this is mostly OK with me. However there
is a problem: ganeti-extstorage-interface specifies that external storage
providers may implement a verify script.
As it turns out, the current ExtStorageDevice implementation doesn't provide an
API for this (several ES_ACTION_* actions are called by methods in the class,
but ES_ACTION_VERIFY isn't.)
Could you please add comments to the "if not only_ext" checks saying that this
needs to be revisited if ES_ACTION_VERIFY is implemented.
Thanks,
Brian.
> ---
> lib/cmdlib/cluster/verify.py | 14 +++++++++++---
> lib/watcher/__init__.py | 15 ++++++++++-----
> test/py/cmdlib/cluster_unittest.py | 23 +++++++++++++++++++++++
> test/py/testutils/config_mock.py | 6 ++++++
> 4 files changed, 50 insertions(+), 8 deletions(-)
>
> diff --git a/lib/cmdlib/cluster/verify.py b/lib/cmdlib/cluster/verify.py
> index 868f7a6..4e6a031 100644
> --- a/lib/cmdlib/cluster/verify.py
> +++ b/lib/cmdlib/cluster/verify.py
> @@ -239,10 +239,18 @@ class LUClusterVerifyDisks(NoHooksLU):
>
> def Exec(self, feedback_fn):
> group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
> + instances = self.cfg.GetInstanceList()
>
> - # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
> - return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
> - for group in group_names])
> + only_ext = not [i for i in instances if
> + self.cfg.GetInstanceDiskTemplate(i) != constants.DT_EXT]
Nit: Might be clearer as
only_ext = compat.all(self.cfg.GetInstanceDiskTemplate(i) == constants.DT_EXT
for i in instances)
> + if only_ext:
> + logging.info("All instances have ext storage, skipping verify disks.")
> + return ResultWithJobs([])
> + else:
> + # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
> + return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
> + for group in group_names])
>
>
> class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
> diff --git a/lib/watcher/__init__.py b/lib/watcher/__init__.py
> index 9c2a272..1cc17db 100644
> --- a/lib/watcher/__init__.py
> +++ b/lib/watcher/__init__.py
> @@ -148,13 +148,14 @@ class Instance(object):
>
> """
> def __init__(self, name, status, config_state, config_state_source,
> - disks_active, snodes):
> + disks_active, snodes, disk_template):
> self.name = name
> self.status = status
> self.config_state = config_state
> self.config_state_source = config_state_source
> self.disks_active = disks_active
> self.snodes = snodes
> + self.disk_template = disk_template
>
> def Restart(self, cl):
> """Encapsulates the start of an instance.
> @@ -753,7 +754,7 @@ def _GetGroupData(qcl, uuid):
> queries = [
> (constants.QR_INSTANCE,
> ["name", "status", "admin_state", "admin_state_source",
> "disks_active",
> - "snodes", "pnode.group.uuid", "snodes.group.uuid"],
> + "snodes", "pnode.group.uuid", "snodes.group.uuid", "disk_template"],
> [qlang.OP_EQUAL, "pnode.group.uuid", uuid]),
> (constants.QR_NODE,
> ["name", "bootid", "offline"],
> @@ -779,14 +780,14 @@ def _GetGroupData(qcl, uuid):
>
> # Load all instances
> for (name, status, config_state, config_state_source, disks_active, snodes,
> - pnode_group_uuid, snodes_group_uuid) in raw_instances:
> + pnode_group_uuid, snodes_group_uuid, disk_template) in raw_instances:
> if snodes and set([pnode_group_uuid]) != set(snodes_group_uuid):
> logging.error("Ignoring split instance '%s', primary group %s,
> secondary"
> " groups %s", name, pnode_group_uuid,
> utils.CommaJoin(snodes_group_uuid))
> else:
> instances.append(Instance(name, status, config_state,
> config_state_source,
> - disks_active, snodes))
> + disks_active, snodes, disk_template))
>
> for node in snodes:
> secondaries.setdefault(node, set()).add(name)
> @@ -865,7 +866,11 @@ def _GroupWatcher(opts):
>
> started = _CheckInstances(client, notepad, instances, locks)
> _CheckDisks(client, notepad, nodes, instances, started)
> - if not opts.no_verify_disks:
> +
> + # Check if the nodegroup only has ext storage type
> + only_ext = not [i for i in instances.values()
> + if i.disk_template != constants.DT_EXT]
As above
only_ext = compat.all(i.disk_template == constants.DT_EXT
for i in instances.values())
> + if not opts.no_verify_disks and not only_ext:
> _VerifyDisks(client, group_uuid, nodes, instances)
> except Exception, err:
> logging.info("Not updating status file due to failure: %s", err)
> diff --git a/test/py/cmdlib/cluster_unittest.py
> b/test/py/cmdlib/cluster_unittest.py
> index 90099a0..d8f3185 100644
> --- a/test/py/cmdlib/cluster_unittest.py
> +++ b/test/py/cmdlib/cluster_unittest.py
> @@ -2326,7 +2326,30 @@ class
> TestLUClusterVerifyGroupHooksCallBack(TestLUClusterVerifyGroupMethods):
>
>
> class TestLUClusterVerifyDisks(CmdlibTestCase):
> +
> def testVerifyDisks(self):
> + self.cfg.AddNewInstance(uuid="tst1.inst.corp.google.com",
> + disk_template=constants.DT_PLAIN)
> + op = opcodes.OpClusterVerifyDisks()
> + result = self.ExecOpCode(op)
> +
> + self.assertEqual(1, len(result["jobs"]))
> +
> + def testVerifyDisksExt(self):
> + self.cfg.AddNewInstance(uuid="tst1.inst.corp.google.com",
> + disk_template=constants.DT_EXT)
> + self.cfg.AddNewInstance(uuid="tst2.inst.corp.google.com",
> + disk_template=constants.DT_EXT)
> + op = opcodes.OpClusterVerifyDisks()
> + result = self.ExecOpCode(op)
> +
> + self.assertEqual(0, len(result["jobs"]))
> +
> + def testVerifyDisksMixed(self):
> + self.cfg.AddNewInstance(uuid="tst1.inst.corp.google.com",
> + disk_template=constants.DT_EXT)
> + self.cfg.AddNewInstance(uuid="tst2.inst.corp.google.com",
> + disk_template=constants.DT_PLAIN)
> op = opcodes.OpClusterVerifyDisks()
> result = self.ExecOpCode(op)
>
> diff --git a/test/py/testutils/config_mock.py
> b/test/py/testutils/config_mock.py
> index 7ae27bc..1d70798 100644
> --- a/test/py/testutils/config_mock.py
> +++ b/test/py/testutils/config_mock.py
> @@ -274,6 +274,12 @@ class ConfigMock(config.ConfigWriter):
> if disks is None:
> if disk_template == constants.DT_DISKLESS:
> disks = []
> + elif disk_template == constants.DT_EXT:
> + provider = "mock_provider"
> + disks = [self.CreateDisk(dev_type=disk_template,
> + primary_node=primary_node,
> + secondary_node=secondary_node,
> + params={constants.IDISK_PROVIDER:
> provider})]
> else:
> disks = [self.CreateDisk(dev_type=disk_template,
> primary_node=primary_node,
> --
> 2.8.0.rc3.226.g39d4020
>