hello,
Apply the attached patched registry client to
C:\Python23\Lib\site-packages\AccessGrid3\AccessGrid\Registry
That provides a workaround to the current limitations of having only 10 bridges
listed.
Nic
>>> Kaizaad Bilimorya <[email protected]> 03/30 2:44 am >>>
Hello,
We are running a bridge server that seems to drop out from the central
registry http://www.accessgrid.org/registry/peers.txt
Physically, the bridge doesn't seem to loose network connectivity, but
when a client starts up and gets the list of bridges, sometimes our bridge
is not there. After waiting a few minutes and then restarting the client,
the bridge shows up in the list:
Bridge Host Port Type Status Distance PortRange
------ ---- ---- ---- ----- -------- ----------
SHARCNET agbs.sharcnet.ca 20000 QuickBridge Enabled 0.0160 50000-50200
I can actually just run the "RegistryClient.py" command (from the bridge
itself) and sometimes I see it listed but then on the next run it is gone.
name: SHARCNET host: agbs.sharcnet.ca port: 20000 dist:0.0015721321106
name: UofC host: bullet.uchicago.edu port: 20200 dist:0.0685670375824
name: Argonne host: milton.mcs.anl.gov port: 8030 dist:0.0720429420471
name: WestGrid host: venueserver2.westgrid.ca port: 20000 dist:0.172122955322
name: NIEeS host: agiraffe.niees.group.cam.ac.uk port: 29999
dist:0.353329181671
name: AGSC host: sam.ag.manchester.ac.uk port: 1992 dist:0.359209060669
name: onlyfinland host: vy216.ucs.fi port: 20000 dist:0.452105998993
name: APAG host: vv3.ap-accessgrid.org port: 20000 dist:0.751379966736
name: JCU host: hpc-external.jcu.edu.au port: 20000 dist:0.772580862045
name: Sheridan host: oa-ag3-venue.sheridanc.on.ca port: 20000 dist:8.37
I think it is dropping out of the registry because I would expect it to be
first in the list since it is the closest. Even when I increase the
"LookupBridge(self, maxToReturn=20):" it still is not listed some of the
time.
Any insight would be helpful
thanks
-k
--
This message (and any attachments) is for the recipient only. NERC
is subject to the Freedom of Information Act 2000 and the contents
of this email and any reply you make may be disclosed by NERC unless
it is exempt from release under the Act. Any material supplied to
NERC may be stored in an electronic records management system.
#!/usr/bin/env python
#-----------------------------------------------------------------------------
# Name: RegistryClient.py
# Purpose: This is the client side of the (bridge) Registry
# Created: 2006/01/01
# RCS-ID: $Id: RegistryClient.py,v 1.25 2006/08/01 21:54:41 turam Exp $
# Copyright: (c) 2006
# Licence: See COPYING.TXT
#-----------------------------------------------------------------------------
import xmlrpclib
import urllib
import os
import sys
import time
from AccessGrid import Log
from AccessGrid.Platform import IsWindows, IsOSX, IsLinux, IsFreeBSD
from AccessGrid.Descriptions import BridgeDescription
from AccessGrid.BridgeCache import BridgeCache
class RegistryClient:
def __init__(self, url):
self.url = url
self.serverProxy = None
self.registryPeers = None
self.bridges = None
self.bridgeCache = BridgeCache()
self.log = Log.GetLogger('RegistryClient')
def _connectToRegistry(self):
if not self.registryPeers:
self.registryPeers = self._readPeerList(url=self.url)
# Connect to the first reachable register according to ping
foundServer = 0
for r in self.registryPeers:
try:
tmpServerProxy = xmlrpclib.ServerProxy("http://"+r)
if self.PingRegistryPeer(tmpServerProxy) > -1:
self.serverProxy = tmpServerProxy
foundServer = 1
break
except Exception,e:
self.log.exception("Failed to connect to registry %s"%(r))
if not foundServer:
# Throw exception!
self.log.info("No bridge registry peers reachable")
#raise Exception("No bridge registry peers reachable")
def RegisterBridge(self, registeredServerInfo):
self._connectToRegistry()
return self.serverProxy.RegisterBridge(registeredServerInfo)
def PingRegistryPeer(self, serverProxy):
startTime = serverProxy.Ping(time.time())
roundTripTime = time.time() - startTime
#print "RoundTrip:", roundTripTime
return roundTripTime
def PingBridgeService(self, bridgeProxy):
try:
try: # Temporary try/except until all Bridges have the "Ping" method
startTime = bridgeProxy.Ping(time.time())
#print "RoundTrip:", time.time(), startTime
roundTripTime = time.time() - startTime
return roundTripTime
except xmlrpclib.Fault, e: # Temporary until all Bridges have the
"Ping" method
if 'method "Ping" is not supported' in e.faultString:
self.log.info('Using deprecated ping for older bridge
interface to %s',
bridgeProxy._ServerProxy__host)
host = bridgeProxy._ServerProxy__host.split(":")[0]
return self.PingHost(host)
else:
raise
except:
self.log.exception('Exception pinging bridge')
return -1
def PingHost(self, host):
try:
pingVal = self._ping(host)
return pingVal
except:
return -1
def LookupBridge(self, maxToReturn=10):
'''
Query registry for a list of bridges. If user cache exists it
is used instead of a network query.
@keyword maxToReturn: number of bridges to return, default 10
@type maxToReturn: int
@return: command output
@rtype: string
'''
if self.bridges:
# We have bridges, return
return self.bridges[0:maxToReturn]
#else:
# # Get bridges from cache on local file
# self.bridges = self.bridgeCache.GetBridges()
if not self.bridges:
# If the cache does not exist, query registry
self._connectToRegistry()
bridges = self.serverProxy.LookupBridge()
self.bridges = []
# Create real bridge descriptions
for b in bridges:
if 'portMin' not in b.keys():
b['portMin'] = 5000
b['portMax'] = 5100
desc = BridgeDescription(b["guid"], b["name"], b["host"],
b["port"], b["serverType"],
b["description"],
b["portMin"],
b["portMax"])
self.bridges.append(desc)
# Sort the bridges
self.bridges = self._sortBridges(maxToReturn)
# Store bridges in cache
self.bridgeCache.StoreBridges(self.bridges)
return self.bridges
def _sortBridges(self, maxToReturn):
'''
Sort a list of bridges based on ping values. Bridges
that can not be reached will be ignored.
@param maxToReturn number of bridges to return
@type maxToReturn int
@return: list of sorted bridges
@rtype: [AccessGrid.Descriptions.BridgeDescription]
'''
bridgeDescriptions = []
for desc in self.bridges:
try:
pingVal =
self.PingBridgeService(xmlrpclib.ServerProxy("http://%s:%s" % (desc.host,
desc.port)))
if pingVal >= 0.0:
desc.rank = pingVal
else:
desc.rank = 100000
bridgeDescriptions.append(desc)
except:
self.log.exception("Failed to ping bridge %s
(%s:%s)"%(desc.name,desc.host,str(desc.port)))
return bridgeDescriptions
def _readPeerList(self,url):
if url.startswith("file://"):
filename = url[7:]
f = open(filename, "r")
else:
opener = urllib.FancyURLopener({})
f = opener.open(url)
contents = f.read()
f.close()
registryPeers = contents.split()
return registryPeers
def _ping(self, host):
'''
Invoke system ping command to host
@param host: machine to ping
@type host: string
@return: average time for ping command
@rtype: string
'''
if IsOSX() or IsLinux() or IsFreeBSD():
# osx and linux ping command have the
# same output format
# time out after 10 sec
if IsOSX() or IsFreeBSD():
cmd = 'ping -o -t 1 %s'%(host)
else:
cmd = 'ping -c 1 -w 1 %s'%(host)
ret = self._execCmd(cmd)
if ret.find('unknown host')>-1:
self.log.info("Ping: Host %s not found"%(host))
raise Exception, "Ping: Host %s not found"%(host)
if ret.find('100%')>-1:
self.log.info("Ping: Host %s timed out"%(host))
raise Exception, "Ping: Host %s timed out"%(host)
# Find average round trip time
i = ret.find('time')
ret = ret[i:]
ret = ret.split('=')[1]
ret = ret.split()[0]
val = float(ret)
if IsWindows():
cmd = 'ping -n 1 %s'%(host)
ret = self._execCmd(cmd)
if ret.find('could not find')>-1:
self.log.info("Ping: Host %s not found"%(host))
raise Exception, "Ping: Host %s not found"%(host)
# windows times out automatically
if ret.find('timed out')>-1:
self.log.info("Ping: Host %s timed out"%(host))
raise Exception, "Ping: Host %s timed out"%(host)
# Find average round trip time
a = ret.find('Average')
ret = ret[a:]
val = ret.split('=')[1]
val = filter(lambda x: x.isdigit(), val)
val = float(val)
return val
def _execCmd(self, cmd):
'''
Execute a command using popen, returns the output string.
@param cmd: command to execute
@type cmd: string
@return: command output
@rtype: string
'''
ret = ''
try:
f = os.popen(cmd, 'r')
ret = f.read()
finally:
f.close()
return ret
if __name__=="__main__":
rc = RegistryClient(url="http://www.accessgrid.org/registry/peers.txt")
from AccessGrid.GUID import GUID
from AccessGrid.Descriptions import BridgeDescription, QUICKBRIDGE_TYPE
# Disable bridge registration so we can just test the registry client
# Register a bridge using the RegistryClient
#info = BridgeDescription(guid=GUID(), name="defaultName",
host="localhost", port="9999", serverType=QUICKBRIDGE_TYPE, description="")
#rc.RegisterBridge(info)
# Lookup a bridge using the RegistryClient
bridgeDescList = rc.LookupBridge()
bridgeDescList.sort(lambda x,y: cmp(x.rank,y.rank))
for b in bridgeDescList:
print 'name: '+b.name+' host: '+b.host+" port: "+str(b.port) +"
dist:"+str(b.rank)