On Fri, Oct 24, 2014 at 05:04:47PM +0300, Dimitris Aragiorgis wrote:
Commit 9b0e86e issues an error during cluster verify if the status
of DRBD is degraded and if the local disk is not UpToDate.

Still in case of a newly created instance or a newly attached disk with
--no-wait-for-sync, if a cluster verify runs it will report errors
for the instance's disks that are syncing.

This patch does the following:

 - Adds a new local disk state namely LDS_SYNC.

 - Thinks of a drbd device that is syncing as degraded.

 - During cluster verify, if the device is degraded, the local disk
   status is checked and if it is ok or syncing it prints a warning
   instead of an error.

 - Updates the order of lds to: ok < syncing < unknown < faulty

Signed-off-by: Dimitris Aragiorgis <[email protected]>
---
lib/cmdlib/cluster.py              |   21 ++++++++++++---------
lib/storage/drbd.py                |    2 ++
src/Ganeti/Constants.hs            |    3 +++
src/Ganeti/Types.hs                |    8 +++++---
test/py/cmdlib/cluster_unittest.py |    1 +
5 files changed, 23 insertions(+), 12 deletions(-)

diff --git a/lib/cmdlib/cluster.py b/lib/cmdlib/cluster.py
index 42c9327..b7a205e 100644
--- a/lib/cmdlib/cluster.py
+++ b/lib/cmdlib/cluster.py
@@ -2520,17 +2520,20 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
                    "couldn't retrieve status for disk/%s on %s: %s",
                    idx, self.cfg.GetNodeName(nname), bdev_status)

-      if instance.disks_active and success and \
-         (bdev_status.is_degraded or
-          bdev_status.ldisk_status != constants.LDS_OKAY):
-        msg = "disk/%s on %s" % (idx, self.cfg.GetNodeName(nname))
-        if bdev_status.is_degraded:
-          msg += " is degraded"
-        if bdev_status.ldisk_status != constants.LDS_OKAY:
-          msg += "; state is '%s'" % \
+      if instance.disks_active and success and bdev_status.is_degraded:
+        msg = "disk/%s on %s is degraded" % (idx, self.cfg.GetNodeName(nname))
+
+        code = self.ETYPE_ERROR
+        accepted_lds = [constants.LDS_OKAY, constants.LDS_SYNC]
+
+        if bdev_status.ldisk_status in accepted_lds:
+          code = self.ETYPE_WARNING
+
+        msg += "; local disk state is '%s'" % \
                 constants.LDS_NAMES[bdev_status.ldisk_status]

-        self._Error(constants.CV_EINSTANCEFAULTYDISK, instance.name, msg)
+        self._Error(constants.CV_EINSTANCEFAULTYDISK, instance.name, msg,
+                    code=code)

    self._ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
                  constants.CV_ENODERPC, self.cfg.GetNodeName(pnode_uuid),
diff --git a/lib/storage/drbd.py b/lib/storage/drbd.py
index de91704..558508c 100644
--- a/lib/storage/drbd.py
+++ b/lib/storage/drbd.py
@@ -653,6 +653,8 @@ class DRBD8Dev(base.BlockDev):
      ldisk_status = constants.LDS_OKAY
    elif stats.is_diskless:
      ldisk_status = constants.LDS_FAULTY
+    elif stats.is_in_resync:
+      ldisk_status = constants.LDS_SYNC
    else:
      ldisk_status = constants.LDS_UNKNOWN

diff --git a/src/Ganeti/Constants.hs b/src/Ganeti/Constants.hs
index fecad84..6c95638 100644
--- a/src/Ganeti/Constants.hs
+++ b/src/Ganeti/Constants.hs
@@ -862,6 +862,9 @@ ldsOkay = Types.localDiskStatusToRaw DiskStatusOk
ldsUnknown :: Int
ldsUnknown = Types.localDiskStatusToRaw DiskStatusUnknown

+ldsSync :: Int
+ldsSync = Types.localDiskStatusToRaw DiskStatusSync
+
ldsNames :: Map Int String
ldsNames =
  Map.fromList [ (Types.localDiskStatusToRaw ds,
diff --git a/src/Ganeti/Types.hs b/src/Ganeti/Types.hs
index 6c99cb0..9ceb8e8 100644
--- a/src/Ganeti/Types.hs
+++ b/src/Ganeti/Types.hs
@@ -866,14 +866,16 @@ $(THH.makeJSONInstance ''DiskAccessMode)
-- Python code depends on:
--   DiskStatusOk < DiskStatusUnknown < DiskStatusFaulty
$(THH.declareILADT "LocalDiskStatus"
-  [ ("DiskStatusFaulty",  3)
-  , ("DiskStatusOk",      1)
-  , ("DiskStatusUnknown", 2)
+  [ ("DiskStatusOk",      1)
+  , ("DiskStatusSync",    2)
+  , ("DiskStatusUnknown", 3)
+  , ("DiskStatusFaulty",  4)
  ])

localDiskStatusName :: LocalDiskStatus -> String
localDiskStatusName DiskStatusFaulty = "faulty"
localDiskStatusName DiskStatusOk = "ok"
+localDiskStatusName DiskStatusSync = "syncing"
localDiskStatusName DiskStatusUnknown = "unknown"

-- | Replace disks type.
diff --git a/test/py/cmdlib/cluster_unittest.py 
b/test/py/cmdlib/cluster_unittest.py
index e91cb30..1e182cd 100644
--- a/test/py/cmdlib/cluster_unittest.py
+++ b/test/py/cmdlib/cluster_unittest.py
@@ -1670,6 +1670,7 @@ class 
TestLUClusterVerifyGroupVerifyInstance(TestLUClusterVerifyGroupMethods):

  @withLockedLU
  def testNotOkayDiskStatus(self, lu):
+    self.diskstatus[self.master_uuid][0][1].is_degraded = True
    self.diskstatus[self.master_uuid][0][1].ldisk_status = constants.LDS_FAULTY
    lu._VerifyInstance(self.running_inst, self.node_imgs, self.diskstatus)
    self.mcpu.assertLogContainsRegex("instance .* state is 'faulty'")
--
1.7.10.4


LGTM, thanks.
Passed the QA tests, I will push it.

Reply via email to