On Fri, Oct 24, 2014 at 05:04:47PM +0300, Dimitris Aragiorgis wrote:
Commit 9b0e86e issues an error during cluster verify if the status
of DRBD is degraded and if the local disk is not UpToDate.
Still in case of a newly created instance or a newly attached disk with
--no-wait-for-sync, if a cluster verify runs it will report errors
for the instance's disks that are syncing.
This patch does the following:
- Adds a new local disk state namely LDS_SYNC.
- Thinks of a drbd device that is syncing as degraded.
- During cluster verify, if the device is degraded, the local disk
status is checked and if it is ok or syncing it prints a warning
instead of an error.
- Updates the order of lds to: ok < syncing < unknown < faulty
Signed-off-by: Dimitris Aragiorgis <[email protected]>
---
lib/cmdlib/cluster.py | 21 ++++++++++++---------
lib/storage/drbd.py | 2 ++
src/Ganeti/Constants.hs | 3 +++
src/Ganeti/Types.hs | 8 +++++---
test/py/cmdlib/cluster_unittest.py | 1 +
5 files changed, 23 insertions(+), 12 deletions(-)
diff --git a/lib/cmdlib/cluster.py b/lib/cmdlib/cluster.py
index 42c9327..b7a205e 100644
--- a/lib/cmdlib/cluster.py
+++ b/lib/cmdlib/cluster.py
@@ -2520,17 +2520,20 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
"couldn't retrieve status for disk/%s on %s: %s",
idx, self.cfg.GetNodeName(nname), bdev_status)
- if instance.disks_active and success and \
- (bdev_status.is_degraded or
- bdev_status.ldisk_status != constants.LDS_OKAY):
- msg = "disk/%s on %s" % (idx, self.cfg.GetNodeName(nname))
- if bdev_status.is_degraded:
- msg += " is degraded"
- if bdev_status.ldisk_status != constants.LDS_OKAY:
- msg += "; state is '%s'" % \
+ if instance.disks_active and success and bdev_status.is_degraded:
+ msg = "disk/%s on %s is degraded" % (idx, self.cfg.GetNodeName(nname))
+
+ code = self.ETYPE_ERROR
+ accepted_lds = [constants.LDS_OKAY, constants.LDS_SYNC]
+
+ if bdev_status.ldisk_status in accepted_lds:
+ code = self.ETYPE_WARNING
+
+ msg += "; local disk state is '%s'" % \
constants.LDS_NAMES[bdev_status.ldisk_status]
- self._Error(constants.CV_EINSTANCEFAULTYDISK, instance.name, msg)
+ self._Error(constants.CV_EINSTANCEFAULTYDISK, instance.name, msg,
+ code=code)
self._ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
constants.CV_ENODERPC, self.cfg.GetNodeName(pnode_uuid),
diff --git a/lib/storage/drbd.py b/lib/storage/drbd.py
index de91704..558508c 100644
--- a/lib/storage/drbd.py
+++ b/lib/storage/drbd.py
@@ -653,6 +653,8 @@ class DRBD8Dev(base.BlockDev):
ldisk_status = constants.LDS_OKAY
elif stats.is_diskless:
ldisk_status = constants.LDS_FAULTY
+ elif stats.is_in_resync:
+ ldisk_status = constants.LDS_SYNC
else:
ldisk_status = constants.LDS_UNKNOWN
diff --git a/src/Ganeti/Constants.hs b/src/Ganeti/Constants.hs
index fecad84..6c95638 100644
--- a/src/Ganeti/Constants.hs
+++ b/src/Ganeti/Constants.hs
@@ -862,6 +862,9 @@ ldsOkay = Types.localDiskStatusToRaw DiskStatusOk
ldsUnknown :: Int
ldsUnknown = Types.localDiskStatusToRaw DiskStatusUnknown
+ldsSync :: Int
+ldsSync = Types.localDiskStatusToRaw DiskStatusSync
+
ldsNames :: Map Int String
ldsNames =
Map.fromList [ (Types.localDiskStatusToRaw ds,
diff --git a/src/Ganeti/Types.hs b/src/Ganeti/Types.hs
index 6c99cb0..9ceb8e8 100644
--- a/src/Ganeti/Types.hs
+++ b/src/Ganeti/Types.hs
@@ -866,14 +866,16 @@ $(THH.makeJSONInstance ''DiskAccessMode)
-- Python code depends on:
-- DiskStatusOk < DiskStatusUnknown < DiskStatusFaulty
$(THH.declareILADT "LocalDiskStatus"
- [ ("DiskStatusFaulty", 3)
- , ("DiskStatusOk", 1)
- , ("DiskStatusUnknown", 2)
+ [ ("DiskStatusOk", 1)
+ , ("DiskStatusSync", 2)
+ , ("DiskStatusUnknown", 3)
+ , ("DiskStatusFaulty", 4)
])
localDiskStatusName :: LocalDiskStatus -> String
localDiskStatusName DiskStatusFaulty = "faulty"
localDiskStatusName DiskStatusOk = "ok"
+localDiskStatusName DiskStatusSync = "syncing"
localDiskStatusName DiskStatusUnknown = "unknown"
-- | Replace disks type.
diff --git a/test/py/cmdlib/cluster_unittest.py
b/test/py/cmdlib/cluster_unittest.py
index e91cb30..1e182cd 100644
--- a/test/py/cmdlib/cluster_unittest.py
+++ b/test/py/cmdlib/cluster_unittest.py
@@ -1670,6 +1670,7 @@ class
TestLUClusterVerifyGroupVerifyInstance(TestLUClusterVerifyGroupMethods):
@withLockedLU
def testNotOkayDiskStatus(self, lu):
+ self.diskstatus[self.master_uuid][0][1].is_degraded = True
self.diskstatus[self.master_uuid][0][1].ldisk_status = constants.LDS_FAULTY
lu._VerifyInstance(self.running_inst, self.node_imgs, self.diskstatus)
self.mcpu.assertLogContainsRegex("instance .* state is 'faulty'")
--
1.7.10.4
LGTM, thanks.
Passed the QA tests, I will push it.