Hello Martin Peřina, Federico Simoncelli,
I'd like you to do a code review. Please visit
http://gerrit.ovirt.org/31148
to review the following change.
Change subject: fencing: Introduce getHostLeaseStatus API
......................................................................
fencing: Introduce getHostLeaseStatus API
When fencing another host, we like to check if the other host has access
to storage, to prevent unwanted fencing of a lively host. This patch
adds an internal API for getting another host status on monitored
domains.
The new getHostLeaseStatus API returns a dictionary of monitored domains
UUIDs and host lease status for each domain, as reported by the cluster
lock. The caller will apply a fencing policy using this data to decide
if host is lively enough to prevent fencing.
Here is an example result:
{'04e604d3-71eb-41e0-a8a3-74404bdd9d75': 'live',
'09049dc6-2007-492e-afd9-3ec201775b2a': 'dead',
'20501ea5-cf6b-453a-b23d-a708c28cec1f': 'live',
'2b02c759-2e4a-4cf0-9b8e-37a6ec77f821': 'unavailable',
...
'fc348be8-18f4-42ff-8421-9043203e13a6': 'live'}
See clusterlock.py for the possible statuses and their meaning.
Change-Id: Iccd62e58a194aa0ceb0f5e2503b8ec7e4349971b
Bug-Url: https://bugzilla.redhat.com/1110172
Signed-off-by: Nir Soffer <[email protected]>
Reviewed-on: http://gerrit.ovirt.org/28873
Reviewed-by: Martin Peřina <[email protected]>
Tested-by: Martin Peřina <[email protected]>
Reviewed-by: Federico Simoncelli <[email protected]>
---
M tests/testrunner.py
M vdsm/storage/clusterlock.py
M vdsm/storage/domainMonitor.py
M vdsm/storage/hsm.py
M vdsm/storage/sd.py
5 files changed, 111 insertions(+), 2 deletions(-)
git pull ssh://gerrit.ovirt.org:29418/vdsm refs/changes/48/31148/1
diff --git a/tests/testrunner.py b/tests/testrunner.py
index eed743f..37b4c95 100644
--- a/tests/testrunner.py
+++ b/tests/testrunner.py
@@ -52,8 +52,19 @@
PERMUTATION_ATTR = "_permutations_"
-# At the moment of this writing the tests don't need a mock object for sanlock
-sys.modules.update({'sanlock': object()})
+
+class FakeSanlock(object):
+ """
+ Minimal test double exposing what the tests needs at this point.
+ """
+ HOST_UNKNOWN = 1
+ HOST_FREE = 2
+ HOST_LIVE = 3
+ HOST_FAIL = 4
+ HOST_DEAD = 5
+
+
+sys.modules.update({'sanlock': FakeSanlock()})
def dummyTextGenerator(size):
diff --git a/vdsm/storage/clusterlock.py b/vdsm/storage/clusterlock.py
index 6ca3e03..24a5d81 100644
--- a/vdsm/storage/clusterlock.py
+++ b/vdsm/storage/clusterlock.py
@@ -41,6 +41,33 @@
SDM_LEASE_NAME = 'SDM'
SDM_LEASE_OFFSET = 512 * 2048
+# Host status - currently only sanlock supports this, and the documentaion
+# describes the sanlock implementation. For more info see:
+# https://git.fedorahosted.org/cgit/sanlock.git/tree/src/lockspace.c
+
+# Cannot tell because clusterlock does not implement this or call failed.
+HOST_STATUS_UNAVAILABLE = "unavailable"
+
+# Host has a lease on the storage, but the clusterlock cannot tell if the host
+# is live or dead yet. Would typically last for 10-20 seconds, but it's
+# possible that this could persist for up to 80 seconds before host is
+# considered live or fail.
+HOST_STATUS_UNKNOWN = "unknown"
+
+# There is no lease for this host id.
+HOST_STATUS_FREE = "free"
+
+# Host has renewed its lease in the last 80 seconds. It may be renewing its
+# lease now or not, we can tell that only by checking again later.
+HOST_STATUS_LIVE = "live"
+
+# Host has not renewed its lease for 80 seconds. Would last for 60 seconds
+# before host is considered dead.
+HOST_STATUS_FAIL = "fail"
+
+# Host has not renewed its lease for 140 seconds.
+HOST_STATUS_DEAD = "dead"
+
class InquireNotSupportedError(Exception):
"""Raised when the clusterlock class is not supporting inquire"""
@@ -90,6 +117,9 @@
def hasHostId(self, hostId):
return True
+
+ def getHostStatus(self, hostId):
+ return HOST_STATUS_UNAVAILABLE
def acquire(self, hostID):
leaseTimeMs = self._leaseTimeSec * 1000
@@ -153,6 +183,15 @@
class SANLock(object):
+
+ STATUS_NAME = {
+ sanlock.HOST_UNKNOWN: HOST_STATUS_UNKNOWN,
+ sanlock.HOST_FREE: HOST_STATUS_FREE,
+ sanlock.HOST_LIVE: HOST_STATUS_LIVE,
+ sanlock.HOST_FAIL: HOST_STATUS_FAIL,
+ sanlock.HOST_DEAD: HOST_STATUS_DEAD,
+ }
+
log = logging.getLogger("Storage.SANLock")
_sanlock_fd = None
@@ -224,6 +263,28 @@
self.log.debug("Unable to inquire sanlock lockspace "
"status, returning False", exc_info=True)
return False
+
+ def getHostStatus(self, hostId):
+ # Note: get_hosts has off-by-one bug when asking for particular host
+ # id, so get all hosts info and filter.
+ # See https://bugzilla.redhat.com/1111210
+ try:
+ hosts = sanlock.get_hosts(self._sdUUID)
+ except sanlock.SanlockException as e:
+ self.log.debug("Unable to get host %d status in lockspace %s: %s",
+ hostId, self._sdUUID, e)
+ return HOST_STATUS_UNAVAILABLE
+
+ for info in hosts:
+ if info['host_id'] == hostId:
+ status = info['flags']
+ return self.STATUS_NAME[status]
+
+ # get_hosts with host_id=0 returns only hosts with timestamp != 0,
+ # which means that no host is using this host id now. If there a
+ # timestamp, sanlock will return HOST_UNKNOWN and then HOST_LIVE or
+ # HOST_FAIL.
+ return HOST_STATUS_FREE
# The hostId parameter is maintained here only for compatibility with
# ClusterLock. We could consider to remove it in the future but keeping it
@@ -358,6 +419,9 @@
currentHostId, lockFile = self._getLease()
return currentHostId == hostId
+ def getHostStatus(self, hostId):
+ return HOST_STATUS_UNAVAILABLE
+
def acquire(self, hostId):
with self._globalLockMapSync:
self.log.info("Acquiring local lock for domain %s (id: %s)",
diff --git a/vdsm/storage/domainMonitor.py b/vdsm/storage/domainMonitor.py
index 8a50b4f..d9e515c 100644
--- a/vdsm/storage/domainMonitor.py
+++ b/vdsm/storage/domainMonitor.py
@@ -23,6 +23,7 @@
import weakref
import logging
+import clusterlock
import misc
from vdsm import utils
from vdsm.config import config
@@ -121,6 +122,17 @@
def getStatus(self, sdUUID):
return self._domains[sdUUID].getStatus()
+ def getHostStatus(self, domains):
+ status = {}
+ for sdUUID, hostId in domains.iteritems():
+ try:
+ monitor = self._domains[sdUUID]
+ except KeyError:
+ status[sdUUID] = clusterlock.HOST_STATUS_UNAVAILABLE
+ else:
+ status[sdUUID] = monitor.getHostStatus(hostId)
+ return status
+
def close(self):
self.log.info("Stopping domain monitors")
for sdUUID in self._domains.keys():
@@ -160,6 +172,11 @@
def getStatus(self):
return self.status.copy()
+ def getHostStatus(self, hostId):
+ if not self.domain:
+ return clusterlock.HOST_STATUS_UNAVAILABLE
+ return self.domain.getHostStatus(hostId)
+
@utils.traceback(on=log.name)
def _monitorLoop(self):
self.log.debug("Starting domain monitor for %s", self.sdUUID)
diff --git a/vdsm/storage/hsm.py b/vdsm/storage/hsm.py
index cbdf108..d743759 100644
--- a/vdsm/storage/hsm.py
+++ b/vdsm/storage/hsm.py
@@ -3647,3 +3647,17 @@
with rmanager.acquireResource(STORAGE, HSM_DOM_MON_LOCK,
rm.LockType.exclusive):
self.domainMonitor.stopMonitoring(sdUUID)
+
+ @public
+ def getHostLeaseStatus(self, domains):
+ """
+ Returns host lease status for specified domains.
+
+ Warning: Internal use only.
+
+ :param domains: mapping of host id indexed by domain uuid.
+ :returns: mapping of host lease status indexed by domain
+ uuid. See clusterlock.py for possible values and
+ their meaning.
+ """
+ return {'domains': self.domainMonitor.getHostStatus(domains)}
diff --git a/vdsm/storage/sd.py b/vdsm/storage/sd.py
index c65fd03..b34cc79 100644
--- a/vdsm/storage/sd.py
+++ b/vdsm/storage/sd.py
@@ -482,6 +482,9 @@
def hasHostId(self, hostId):
return self._clusterLock.hasHostId(hostId)
+ def getHostStatus(self, hostId):
+ return self._clusterLock.getHostStatus(hostId)
+
def hasVolumeLeases(self):
domVersion = self.getVersion()
try:
--
To view, visit http://gerrit.ovirt.org/31148
To unsubscribe, visit http://gerrit.ovirt.org/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: Iccd62e58a194aa0ceb0f5e2503b8ec7e4349971b
Gerrit-PatchSet: 1
Gerrit-Project: vdsm
Gerrit-Branch: ovirt-3.5
Gerrit-Owner: Nir Soffer <[email protected]>
Gerrit-Reviewer: Federico Simoncelli <[email protected]>
Gerrit-Reviewer: Martin Peřina <[email protected]>
_______________________________________________
vdsm-patches mailing list
[email protected]
https://lists.fedorahosted.org/mailman/listinfo/vdsm-patches