Mark Wu has uploaded a new change for review. Change subject: configNetwork: Fix a race between dhcp thread and connectivity check ......................................................................
configNetwork: Fix a race between dhcp thread and connectivity check If dhcp is not done before the connectivity check, it can't detect the problem if the host loses connection to engine. This problem is found due to the engine bug(BZ838816): engine wrongly configures dhcp for management network, and then engine can't connect to the vdsm host because the ip address changes. This patch adds an event to synchronize dhcp thread and connectivity check. Change-Id: Id1a1fb8ef4cd88e2144f30cf8dd3f09b8dd8126f Signed-off-by: Mark Wu <[email protected]> --- M vdsm/configNetwork.py 1 file changed, 36 insertions(+), 11 deletions(-) git pull ssh://gerrit.ovirt.org:29418/vdsm refs/changes/01/7401/1 diff --git a/vdsm/configNetwork.py b/vdsm/configNetwork.py index d55a2cc..0617bbe 100755 --- a/vdsm/configNetwork.py +++ b/vdsm/configNetwork.py @@ -32,11 +32,11 @@ from vdsm import constants from vdsm import utils import neterrors as ne -from vdsm import define from vdsm import netinfo from vdsm import libvirtconnection CONNECTIVITY_TIMEOUT_DEFAULT = 4 +DHCP_TIMEOUT_DEFAULT = 120 MAX_VLAN_ID = 4094 MAX_BRIDGE_NAME_LEN = 15 ILLEGAL_BRIDGE_CHARS = frozenset(':. \t') @@ -59,11 +59,13 @@ if not line.endswith(' does not exist!')])) return p.returncode -def ifup(iface): +def ifup(iface, dhcpDone=None): "Bring up an interface" p = subprocess.Popen([constants.EXT_IFUP, iface], stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True) out, err = p.communicate() + if dhcpDone != None: + dhcpDone.set() if out.strip(): logging.info(out) if err.strip(): @@ -774,7 +776,8 @@ def addNetwork(network, vlan=None, bonding=None, nics=None, ipaddr=None, netmask=None, mtu=None, gateway=None, force=False, - configWriter=None, bondingOptions=None, bridged=True, **options): + configWriter=None, bondingOptions=None, bridged=True, + dhcpDone=None, **options): nics = nics or () _netinfo = netinfo.NetInfo() bridged = utils.tobool(bridged) @@ -878,7 +881,7 @@ # wait for dhcp in another thread, # so vdsm won't get stuck (BZ#498940) t = threading.Thread(target=ifup, name='ifup-waiting-on-dhcp', - args=(network,)) + args=(network, dhcpDone)) t.daemon = True t.start() else: @@ -1033,17 +1036,28 @@ def editNetwork(oldBridge, newBridge, vlan=None, bonding=None, nics=None, **options): configWriter = ConfigWriter() + dhcpEvt = None + if newBridge == 'ovirtmgmt' and options.get('bootproto') == 'dhcp' and \ + not utils.tobool(options.get('blockingdhcp')): + dhcpEvt = threading.Event() try: delNetwork(oldBridge, configWriter=configWriter, **options) - addNetwork(newBridge, vlan=vlan, bonding=bonding, nics=nics, configWriter=configWriter, **options) + addNetwork(newBridge, vlan=vlan, bonding=bonding, nics=nics, configWriter=configWriter, + dhcpDone=dhcpEvt, **options) + if utils.tobool(options.get('connectivityCheck', False)): + if dhcpEvt != None: + done = dhcpEvt.wait(DHCP_TIMEOUT_DEFAULT) + dhcpEvt = None + if not done: + raise ConfigNetworkError(ne.ERR_BAD_PARAMS, + 'dhcp client got timeout') + if not clientSeen(int(options.get('connectivityTimeout', CONNECTIVITY_TIMEOUT_DEFAULT))): + delNetwork(newBridge, force=True) + raise ConfigNetworkError(ne.ERR_LOST_CONNECTION, + 'connectivity check failed') except: configWriter.restoreBackups() raise - if utils.tobool(options.get('connectivityCheck', False)): - if not clientSeen(int(options.get('connectivityTimeout', CONNECTIVITY_TIMEOUT_DEFAULT))): - delNetwork(newBridge, force=True) - configWriter.restoreBackups() - return define.errCode['noConPeer']['status']['code'] def _validateNetworkSetup(networks={}, bondings={}): _netinfo = netinfo.NetInfo() @@ -1174,6 +1188,7 @@ _netinfo = netinfo.NetInfo() configWriter = ConfigWriter() networksAdded = set() + dhcpEvt = None # keep set netsWithNewBonds to be able remove # a new added network if connectivity check fail. # If a new network needs to be created on top of existing bond, @@ -1235,11 +1250,21 @@ d['nics'] = [d.pop('nic')] d['force'] = force + if network == 'ovirtmgmt' and d.get('bootproto') == 'dhcp' and \ + not utils.tobool(d.get('blockingdhcp')): + dhcpEvt = threading.Event() + logger.debug("Adding network %r" % network) - addNetwork(network, configWriter=configWriter, + addNetwork(network, configWriter=configWriter, dhcpDone=dhcpEvt, implicitBonding=True, **d) if utils.tobool(options.get('connectivityCheck', True)): + if dhcpEvt != None: + done = dhcpEvt.wait(DHCP_TIMEOUT_DEFAULT) + dhcpEvt = None + if not done: + raise ConfigNetworkError(ne.ERR_BAD_PARAMS, + 'dhcp client got timeout') logger.debug('Checking connectivity...') if not clientSeen(int(options.get('connectivityTimeout', CONNECTIVITY_TIMEOUT_DEFAULT))): -- To view, visit http://gerrit.ovirt.org/7401 To unsubscribe, visit http://gerrit.ovirt.org/settings Gerrit-MessageType: newchange Gerrit-Change-Id: Id1a1fb8ef4cd88e2144f30cf8dd3f09b8dd8126f Gerrit-PatchSet: 1 Gerrit-Project: vdsm Gerrit-Branch: master Gerrit-Owner: Mark Wu <[email protected]> _______________________________________________ vdsm-patches mailing list [email protected] https://lists.fedorahosted.org/mailman/listinfo/vdsm-patches
