Commit 9b0e86e issues an error during cluster verify if the status
of DRBD is degraded and if the local disk is not UpToDate.

Still in case of a newly created instance or a newly attached disk with
--no-wait-for-sync, if a cluster verify runs it will report errors
for the instance's disks that are syncing.

This patch does the following:

  - Adds a new local disk state namely LDS_SYNC.

  - Thinks of a drbd device that is syncing as degraded.

  - During cluster verify, if the device is degraded, the local disk
    status is checked and if it is ok or syncing it prints a warning
    instead of an error.

  - Updates the order of lds to: ok < syncing < unknown < faulty

Signed-off-by: Dimitris Aragiorgis <[email protected]>
---
 lib/cmdlib/cluster.py              |   21 ++++++++++++---------
 lib/storage/drbd.py                |    2 ++
 src/Ganeti/Constants.hs            |    3 +++
 src/Ganeti/Types.hs                |    8 +++++---
 test/py/cmdlib/cluster_unittest.py |    1 +
 5 files changed, 23 insertions(+), 12 deletions(-)

diff --git a/lib/cmdlib/cluster.py b/lib/cmdlib/cluster.py
index 42c9327..b7a205e 100644
--- a/lib/cmdlib/cluster.py
+++ b/lib/cmdlib/cluster.py
@@ -2520,17 +2520,20 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
                     "couldn't retrieve status for disk/%s on %s: %s",
                     idx, self.cfg.GetNodeName(nname), bdev_status)
 
-      if instance.disks_active and success and \
-         (bdev_status.is_degraded or
-          bdev_status.ldisk_status != constants.LDS_OKAY):
-        msg = "disk/%s on %s" % (idx, self.cfg.GetNodeName(nname))
-        if bdev_status.is_degraded:
-          msg += " is degraded"
-        if bdev_status.ldisk_status != constants.LDS_OKAY:
-          msg += "; state is '%s'" % \
+      if instance.disks_active and success and bdev_status.is_degraded:
+        msg = "disk/%s on %s is degraded" % (idx, self.cfg.GetNodeName(nname))
+
+        code = self.ETYPE_ERROR
+        accepted_lds = [constants.LDS_OKAY, constants.LDS_SYNC]
+
+        if bdev_status.ldisk_status in accepted_lds:
+          code = self.ETYPE_WARNING
+
+        msg += "; local disk state is '%s'" % \
                  constants.LDS_NAMES[bdev_status.ldisk_status]
 
-        self._Error(constants.CV_EINSTANCEFAULTYDISK, instance.name, msg)
+        self._Error(constants.CV_EINSTANCEFAULTYDISK, instance.name, msg,
+                    code=code)
 
     self._ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
                   constants.CV_ENODERPC, self.cfg.GetNodeName(pnode_uuid),
diff --git a/lib/storage/drbd.py b/lib/storage/drbd.py
index de91704..558508c 100644
--- a/lib/storage/drbd.py
+++ b/lib/storage/drbd.py
@@ -653,6 +653,8 @@ class DRBD8Dev(base.BlockDev):
       ldisk_status = constants.LDS_OKAY
     elif stats.is_diskless:
       ldisk_status = constants.LDS_FAULTY
+    elif stats.is_in_resync:
+      ldisk_status = constants.LDS_SYNC
     else:
       ldisk_status = constants.LDS_UNKNOWN
 
diff --git a/src/Ganeti/Constants.hs b/src/Ganeti/Constants.hs
index fecad84..6c95638 100644
--- a/src/Ganeti/Constants.hs
+++ b/src/Ganeti/Constants.hs
@@ -862,6 +862,9 @@ ldsOkay = Types.localDiskStatusToRaw DiskStatusOk
 ldsUnknown :: Int
 ldsUnknown = Types.localDiskStatusToRaw DiskStatusUnknown
 
+ldsSync :: Int
+ldsSync = Types.localDiskStatusToRaw DiskStatusSync
+
 ldsNames :: Map Int String
 ldsNames =
   Map.fromList [ (Types.localDiskStatusToRaw ds,
diff --git a/src/Ganeti/Types.hs b/src/Ganeti/Types.hs
index 6c99cb0..9ceb8e8 100644
--- a/src/Ganeti/Types.hs
+++ b/src/Ganeti/Types.hs
@@ -866,14 +866,16 @@ $(THH.makeJSONInstance ''DiskAccessMode)
 -- Python code depends on:
 --   DiskStatusOk < DiskStatusUnknown < DiskStatusFaulty
 $(THH.declareILADT "LocalDiskStatus"
-  [ ("DiskStatusFaulty",  3)
-  , ("DiskStatusOk",      1)
-  , ("DiskStatusUnknown", 2)
+  [ ("DiskStatusOk",      1)
+  , ("DiskStatusSync",    2)
+  , ("DiskStatusUnknown", 3)
+  , ("DiskStatusFaulty",  4)
   ])
 
 localDiskStatusName :: LocalDiskStatus -> String
 localDiskStatusName DiskStatusFaulty = "faulty"
 localDiskStatusName DiskStatusOk = "ok"
+localDiskStatusName DiskStatusSync = "syncing"
 localDiskStatusName DiskStatusUnknown = "unknown"
 
 -- | Replace disks type.
diff --git a/test/py/cmdlib/cluster_unittest.py 
b/test/py/cmdlib/cluster_unittest.py
index e91cb30..1e182cd 100644
--- a/test/py/cmdlib/cluster_unittest.py
+++ b/test/py/cmdlib/cluster_unittest.py
@@ -1670,6 +1670,7 @@ class 
TestLUClusterVerifyGroupVerifyInstance(TestLUClusterVerifyGroupMethods):
 
   @withLockedLU
   def testNotOkayDiskStatus(self, lu):
+    self.diskstatus[self.master_uuid][0][1].is_degraded = True
     self.diskstatus[self.master_uuid][0][1].ldisk_status = constants.LDS_FAULTY
     lu._VerifyInstance(self.running_inst, self.node_imgs, self.diskstatus)
     self.mcpu.assertLogContainsRegex("instance .* state is 'faulty'")
-- 
1.7.10.4

Reply via email to