Nir Soffer has uploaded a new change for review.

Change subject: monitor: Separate path status and domain status
......................................................................

monitor: Separate path status and domain status

Introduce two classes:

PathStatus      the status of checking domain monitoring path.
                Collecting this status can block only if the storage is
                not accessible.

DomainStatus    the status collected from the storage domain object.
                Collecting this status may block for long time because
                unrelated threads are holding global locks in vdsm.

The Status class wrap both objects and provides an unified view of the
monitor status, required for satisfying the code reporting storage
domain status, and for emitting status changes events.

We update the status twice for each monitoring cycle; once after
collecting path status, and once after collecting domain status. When
path checking is moved to new path checking infrastructure, this will
allow updating path status regardless of domain status check state.

For status changes, we mostly keep the old behavior:
- If path status check or domain status check fail and last status was
  valid, we emit a INVALID event.
- When both path status check and domain status check succeed and last
  status was invalid, we emit VALID event.

Note that now we perform path status check before domain self test; we
cannot keep the old order since path status must be separated from
operations accessing the storage domain object.

Change-Id: I97e18ebec36b421d0b4ec20e768fe661d6facf98
Signed-off-by: Nir Soffer <nsof...@redhat.com>
---
M tests/storage_monitor_test.py
M vdsm/storage/monitor.py
2 files changed, 237 insertions(+), 78 deletions(-)


  git pull ssh://gerrit.ovirt.org:29418/vdsm refs/changes/29/57429/1

diff --git a/tests/storage_monitor_test.py b/tests/storage_monitor_test.py
index c711b48..17fa165 100644
--- a/tests/storage_monitor_test.py
+++ b/tests/storage_monitor_test.py
@@ -18,18 +18,106 @@
 # Refer to the README and COPYING files for full details of the license
 #
 
+import time
+
 from storage import monitor
+
 from testlib import VdsmTestCase
+from testlib import permutations, expandPermutations
+from monkeypatch import MonkeyPatch
 
 
+@expandPermutations
 class TestStatus(VdsmTestCase):
 
-    def test_valid(self):
-        s = monitor.Status()
-        self.assertIsNone(s.error)
-        self.assertTrue(s.valid)
+    def test_initial_status(self):
+        # For backward compatibility, we must publish an initial status before
+        # we collect the first samples. The initial status is marked as
+        # actual=False to allow engine to treat it specially.
+        path_status = monitor.PathStatus(actual=False)
+        domain_status = monitor.DomainStatus(actual=False)
+        status = monitor.Status(path_status, domain_status)
+        self.assertFalse(status.actual)
+        self.assertIsNone(status.error)
+        self.assertTrue(status.valid)
 
-    def test_invalid(self):
-        s = monitor.Status()
-        s.error = Exception()
-        self.assertFalse(s.valid)
+    def test_partial_status(self):
+        # We collected path stauts but domain status is not available yet.
+        path_status = monitor.PathStatus()
+        domain_status = monitor.DomainStatus(actual=False)
+        status = monitor.Status(path_status, domain_status)
+        self.assertFalse(status.actual)
+        self.assertIsNone(status.error)
+        self.assertTrue(status.valid)
+
+    def test_full_status(self):
+        # Both path status and domain status are available.
+        path_status = monitor.PathStatus()
+        domain_status = monitor.DomainStatus()
+        status = monitor.Status(path_status, domain_status)
+        self.assertTrue(status.actual)
+        self.assertIsNone(status.error)
+        self.assertTrue(status.valid)
+
+    def test_path_error(self):
+        path_status = monitor.PathStatus(error=Exception("path"))
+        domain_status = monitor.DomainStatus()
+        status = monitor.Status(path_status, domain_status)
+        self.assertTrue(status.actual)
+        self.assertEqual(status.error, path_status.error)
+        self.assertFalse(status.valid)
+
+    def test_path_error_non_actual_domain_status(self):
+        path_status = monitor.PathStatus(error=Exception("path"))
+        domain_status = monitor.DomainStatus(actual=False)
+        status = monitor.Status(path_status, domain_status)
+        self.assertTrue(status.actual)
+        self.assertEqual(status.error, path_status.error)
+        self.assertFalse(status.valid)
+
+    def test_domain_error(self):
+        path_status = monitor.PathStatus()
+        domain_status = monitor.DomainStatus(error=Exception("domain"))
+        status = monitor.Status(path_status, domain_status)
+        self.assertTrue(status.actual)
+        self.assertEqual(status.error, domain_status.error)
+        self.assertFalse(status.valid)
+
+    def test_domain_error_non_actual_path_status(self):
+        path_status = monitor.PathStatus(actual=False)
+        domain_status = monitor.DomainStatus(error=Exception("domain"))
+        status = monitor.Status(path_status, domain_status)
+        self.assertTrue(status.actual)
+        self.assertEqual(status.error, domain_status.error)
+        self.assertFalse(status.valid)
+
+    def test_both_error(self):
+        # For backward compatibility we have to present single error.
+        path_status = monitor.PathStatus(error=Exception("path"))
+        domain_status = monitor.DomainStatus(error=Exception("domain"))
+        status = monitor.Status(path_status, domain_status)
+        self.assertTrue(status.actual)
+        self.assertEqual(status.error, path_status.error)
+        self.assertFalse(status.valid)
+
+    @permutations([
+        ("valid", True),
+        ("error", None),
+        ("actual", True),
+        ("checkTime", 1234567),
+        ("readDelay", 0),
+        ("diskUtilization", (None, None)),
+        ("masterMounted", False),
+        ("masterValid", False),
+        ("hasHostId", False),
+        ("vgMdUtilization", (0, 0)),
+        ("vgMdHasEnoughFreeSpace", True),
+        ("vgMdFreeBelowThreashold", True),
+        ("isoPrefix", None),
+        ("version", -1),
+    ])
+    @MonkeyPatch(time, 'time', lambda: 1234567)
+    def test_readonly_attributes(self, attr, value):
+        status = monitor.Status(monitor.PathStatus(), monitor.DomainStatus())
+        self.assertEqual(value, getattr(status, attr))
+        self.assertRaises(AttributeError, setattr, status, attr, "new")
diff --git a/vdsm/storage/monitor.py b/vdsm/storage/monitor.py
index 6c90faf..d86853c 100644
--- a/vdsm/storage/monitor.py
+++ b/vdsm/storage/monitor.py
@@ -36,31 +36,92 @@
 
 class Status(object):
 
-    def __init__(self, actual=True):
-        self.actual = actual
-        self.error = None
-        self.checkTime = time.time()
-        self.readDelay = 0
-        self.diskUtilization = (None, None)
-        self.masterMounted = False
-        self.masterValid = False
-        self.hasHostId = False
-        # FIXME : Exposing these breaks abstraction and is not
-        #         needed. Keep exposing for BC. Remove and use
-        #         warning mechanism.
-        self.vgMdUtilization = (0, 0)
-        self.vgMdHasEnoughFreeSpace = True
-        self.vgMdFreeBelowThreashold = True
-        # The iso prefix is computed asynchronously because in any
-        # synchronous operation (e.g.: connectStoragePool, getInfo)
-        # we cannot risk to stop and wait for the iso domain to
-        # report its prefix (it might be unreachable).
-        self.isoPrefix = None
-        self.version = -1
+    def __init__(self, path_status, domain_status):
+        self._path_status = path_status
+        self._domain_status = domain_status
+        self._time = time.time()
+
+    @property
+    def actual(self):
+        if not self.valid:
+            return True
+        return self._path_status.actual and self._domain_status.actual
+
+    @property
+    def error(self):
+        return self._path_status.error or self._domain_status.error
 
     @property
     def valid(self):
         return self.error is None
+
+    @property
+    def checkTime(self):
+        return self._time
+
+    @property
+    def readDelay(self):
+        return self._path_status.readDelay
+
+    @property
+    def diskUtilization(self):
+        return self._domain_status.diskUtilization
+
+    @property
+    def masterMounted(self):
+        return self._domain_status.masterMounted
+
+    @property
+    def masterValid(self):
+        return self._domain_status.masterValid
+
+    @property
+    def hasHostId(self):
+        return self._domain_status.hasHostId
+
+    @property
+    def vgMdUtilization(self):
+        return self._domain_status.vgMdUtilization
+
+    @property
+    def vgMdHasEnoughFreeSpace(self):
+        return self._domain_status.vgMdHasEnoughFreeSpace
+
+    @property
+    def vgMdFreeBelowThreashold(self):
+        return self._domain_status.vgMdFreeBelowThreashold
+
+    @property
+    def isoPrefix(self):
+        return self._domain_status.isoPrefix
+
+    @property
+    def version(self):
+        return self._domain_status.version
+
+
+class PathStatus(object):
+
+    def __init__(self, readDelay=0, error=None, actual=True):
+        self.readDelay = readDelay
+        self.error = error
+        self.actual = actual
+
+
+class DomainStatus(object):
+
+    def __init__(self, error=None, actual=True):
+        self.error = error
+        self.actual = actual
+        self.diskUtilization = (None, None)
+        self.masterMounted = False
+        self.masterValid = False
+        self.hasHostId = False
+        self.vgMdUtilization = (0, 0)
+        self.vgMdHasEnoughFreeSpace = True
+        self.vgMdFreeBelowThreashold = True
+        self.isoPrefix = None
+        self.version = -1
 
 
 class DomainMonitor(object):
@@ -185,7 +246,8 @@
         # For backward compatibility, we must present a fake status before
         # collecting the first sample. The fake status is marked as
         # actual=False so engine can handle it correctly.
-        self.status = Status(actual=False)
+        self.status = Status(PathStatus(actual=False),
+                             DomainStatus(actual=False))
         self.isIsoDomain = None
         self.isoPrefix = None
         self.lastRefresh = time.time()
@@ -241,8 +303,8 @@
                 return
             except Exception as e:
                 log.exception("Setting up monitor for %s failed", self.sdUUID)
-                status = Status()
-                status.error = e
+                domain_status = DomainStatus(error=e)
+                status = Status(self.status._path_status, domain_status)
                 self._updateStatus(status)
                 if self.stopEvent.wait(self.interval):
                     raise utils.Canceled
@@ -296,25 +358,65 @@
                 raise utils.Canceled
 
     def _monitorDomain(self):
-        status = Status()
-
         # Pick up changes in the domain, for example, domain upgrade.
         if self._shouldRefreshDomain():
             self._refreshDomain()
 
-        try:
-            self._performDomainSelftest()
-            self._checkReadDelay(status)
-            self._collectStatistics(status)
-        except Exception as e:
-            log.exception("Error monitoring domain %s", self.sdUUID)
-            status.error = e
+        if self._checkPathStatus():
+            self._checkDomainStatus()
 
-        status.checkTime = time.time()
+        if self._shouldAcquireHostId():
+            self._acquireHostId()
+
+    @utils.cancelpoint
+    def _checkPathStatus(self):
+        # This may block for long time if the storage server is not accessible.
+        # On overloaded machines we have seen this take up to 15 seconds.
+        try:
+            stats = misc.readspeed(self.monitoringPath, 4096)
+        except Exception as e:
+            log.exception("Error checking path %s", self.monitoringPath)
+            path_status = PathStatus(error=e)
+        else:
+            path_status = PathStatus(readDelay=stats['seconds'])
+
+        status = Status(path_status, self.status._domain_status)
         self._updateStatus(status)
 
-        if self._shouldAcquireHostId(status):
-            self._acquireHostId()
+        return path_status.error is None
+
+    @utils.cancelpoint
+    def _checkDomainStatus(self):
+        domain_status = DomainStatus()
+        try:
+            # This may trigger a refresh of lvm cache. We have seen this taking
+            # up to 90 seconds on overloaded machines.
+            self.domain.selftest()
+
+            stats = self.domain.getStats()
+            domain_status.diskUtilization = (stats["disktotal"],
+                                             stats["diskfree"])
+
+            domain_status.vgMdUtilization = (stats["mdasize"],
+                                             stats["mdafree"])
+            domain_status.vgMdHasEnoughFreeSpace = stats["mdavalid"]
+            domain_status.vgMdFreeBelowThreashold = stats["mdathreshold"]
+
+            masterStats = self.domain.validateMaster()
+            domain_status.masterValid = masterStats['valid']
+            domain_status.masterMounted = masterStats['mount']
+
+            domain_status.hasHostId = self.domain.hasHostId(self.hostId)
+            domain_status.version = self.domain.getVersion()
+            domain_status.isoPrefix = self.isoPrefix
+        except Exception as e:
+            log.exception("Error checking domain %s", self.sdUUID)
+            domain_status.error = e
+
+        status = Status(self.status._path_status, domain_status)
+        self._updateStatus(status)
+
+        return domain_status.error is None
 
     # Handling status changes
 
@@ -349,44 +451,13 @@
         sdCache.manuallyRemoveDomain(self.sdUUID)
         self.lastRefresh = time.time()
 
-    # Collecting monitoring info
-
-    @utils.cancelpoint
-    def _performDomainSelftest(self):
-        # This may trigger a refresh of lvm cache. We have seen this taking up
-        # to 90 seconds on overloaded machines.
-        self.domain.selftest()
-
-    @utils.cancelpoint
-    def _checkReadDelay(self, status):
-        # This may block for long time if the storage server is not accessible.
-        # On overloaded machines we have seen this take up to 15 seconds.
-        stats = misc.readspeed(self.monitoringPath, 4096)
-        status.readDelay = stats['seconds']
-
-    def _collectStatistics(self, status):
-        stats = self.domain.getStats()
-        status.diskUtilization = (stats["disktotal"], stats["diskfree"])
-
-        status.vgMdUtilization = (stats["mdasize"], stats["mdafree"])
-        status.vgMdHasEnoughFreeSpace = stats["mdavalid"]
-        status.vgMdFreeBelowThreashold = stats["mdathreshold"]
-
-        masterStats = self.domain.validateMaster()
-        status.masterValid = masterStats['valid']
-        status.masterMounted = masterStats['mount']
-
-        status.hasHostId = self.domain.hasHostId(self.hostId)
-        status.isoPrefix = self.isoPrefix
-        status.version = self.domain.getVersion()
-
     # Managing host id
 
-    def _shouldAcquireHostId(self, status):
+    def _shouldAcquireHostId(self):
         # An ISO domain can be shared by multiple pools
         return (not self.isIsoDomain and
-                status.valid and
-                status.hasHostId is False)
+                self.status.valid and
+                self.status.hasHostId is False)
 
     def _shouldReleaseHostId(self):
         # If this is an ISO domain we didn't acquire the host id and releasing


-- 
To view, visit https://gerrit.ovirt.org/57429
To unsubscribe, visit https://gerrit.ovirt.org/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I97e18ebec36b421d0b4ec20e768fe661d6facf98
Gerrit-PatchSet: 1
Gerrit-Project: vdsm
Gerrit-Branch: master
Gerrit-Owner: Nir Soffer <nsof...@redhat.com>
_______________________________________________
vdsm-patches mailing list
vdsm-patches@lists.fedorahosted.org
https://lists.fedorahosted.org/mailman/listinfo/vdsm-patches

Reply via email to