Ido Barkan has uploaded a new change for review.

Change subject: net: wait for restored devices to be up
......................................................................

net: wait for restored devices to be up

If restore-net-config does not wait for all devices to be up, it
might observe a transient kernel state where, for example, bonding
devices might report no slaves. This makes restoration re-restore a
bond because it is reported by the kernel as different from the
persisted device.

Change-Id: I3cd3de577e5d0bcf5e87c4894e94e03c209ce76a
Signed-off-by: Ido Barkan <ibar...@redhat.com>
Bug-Url: https://bugzilla.redhat.com/1203422
---
M vdsm/vdsm-restore-net-config
1 file changed, 35 insertions(+), 0 deletions(-)


  git pull ssh://gerrit.ovirt.org:29418/vdsm refs/changes/22/43222/1

diff --git a/vdsm/vdsm-restore-net-config b/vdsm/vdsm-restore-net-config
index 17957c0..7968da3 100755
--- a/vdsm/vdsm-restore-net-config
+++ b/vdsm/vdsm-restore-net-config
@@ -22,8 +22,10 @@
 import logging
 import logging.config
 import os
+import time
 
 from vdsm.config import config
+from vdsm import ipwrapper
 from vdsm import netinfo
 from vdsm.constants import P_VDSM_RUN
 from vdsm.netconfpersistence import KernelConfig, BaseConfig
@@ -38,6 +40,7 @@
 import pkgutil
 
 _NETS_RESTORED_MARK = os.path.join(P_VDSM_RUN, 'nets_restored')
+_ALL_DEVICES_UP_TIMEOUT = 30
 
 
 def ifcfg_restoration():
@@ -68,6 +71,8 @@
 
     persistent_config = PersistentConfig()
     available_config = _filter_available(persistent_config)
+    _wait_for_for_all_devices_up(
+        available_config.networks.keys() + available_config.bonds.keys())
     changed_config = _filter_changed_nets_bonds(available_config)
     nets = changed_config.networks
     bonds = changed_config.bonds
@@ -168,6 +173,36 @@
     return changed_or_missing
 
 
+def _wait_for_for_all_devices_up(links):
+    sleep_time = 1
+    time_left = _ALL_DEVICES_UP_TIMEOUT
+
+    down_links = _get_links_with_state_down(links)
+
+    # TODO: use netlink monitor here might be more elegant (not available in
+    # TODO: 3.5)
+    while down_links and time_left > 0:
+        logging.debug("waiting for %s to be up.", down_links)
+        time.sleep(sleep_time)
+        time_left -= sleep_time
+        down_links = _get_links_with_state_down(links)
+
+    if down_links:
+        logging.warning("Not all devices are up. VDSM might restore them "
+                        "although they were not changed since they were "
+                        "persisted.")
+    else:
+        logging.debug("All devices are up.")
+
+
+def _get_links_with_state_down(links):
+    def oper_up(link):
+        return bool(link.flags & 1 << 6)
+
+    return set([l.name for l in ipwrapper.getLinks() if
+                l.name in links and not oper_up(l)])
+
+
 def _get_all_configurators():
     """Returns the class objects of all the configurators in the netconf pkg"""
     prefix = configurators.__name__ + '.'


-- 
To view, visit https://gerrit.ovirt.org/43222
To unsubscribe, visit https://gerrit.ovirt.org/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I3cd3de577e5d0bcf5e87c4894e94e03c209ce76a
Gerrit-PatchSet: 1
Gerrit-Project: vdsm
Gerrit-Branch: ovirt-3.5
Gerrit-Owner: Ido Barkan <ibar...@redhat.com>
_______________________________________________
vdsm-patches mailing list
vdsm-patches@lists.fedorahosted.org
https://lists.fedorahosted.org/mailman/listinfo/vdsm-patches

Reply via email to