When NETLINK_LISTEN_ALL_NSID is enabled on the RTNL notification
socket, the kernel tags every broadcast RTM event with the sender
nsid as looked up in the receiver namespace-id table. Normally,
events originating in the local namespace carry no nsid cmsg (which
OVS interprets as NETNSID_LOCAL).
However, some container runtimes cause the kernel to create a
self-referential nsid mapping (the root namespace gets a real nsid
that points back to itself) as a side-effect of cross namespace
link queries (RTM_GETLINK). When this mapping exists, the kernel
tags locally-originated events with that nsid instead of omitting
the cmsg, causing OVS to silently reject them.
Fix this by:
- At startup, query the kernel (RTM_GETNSID on /proc/self/ns/net)
to discover whether a self-referential nsid mapping already exists.
- Subscribe the notification socket to RTNLGRP_NSID so that
RTM_NEWNSID events are received whenever the kernel creates a new
nsid mapping. On receiving such a notification, immediately
re-query the self-nsid. This is race-free: the notification and
the first tagged event arrive on the same socket and are processed
sequentially, so the self-nsid is updated before any tagged event
is evaluated.
- In netdev_linux_update(), treat the self-nsid as equivalent to
NETNSID_LOCAL for local devices. Remote devices retain strict
nsid matching via the vport-queried nsid.
Signed-off-by: Matteo Perin <[email protected]>
---
lib/netdev-linux.c | 150 +++++++++++++++++++++++++++++++++++++++++----
1 file changed, 138 insertions(+), 12 deletions(-)
diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
index c694dc1c5..c44aa4093 100644
--- a/lib/netdev-linux.c
+++ b/lib/netdev-linux.c
@@ -586,6 +586,97 @@ is_tap_netdev(const struct netdev *netdev)
return netdev_get_class(netdev) == &netdev_tap_class;
}
+/* Cached nsid of the local namespace:
+ * NETNSID_LOCAL if no self-referential mapping has been found (yet).
+ * Set once by netdev_linux_query_self_nsid() when a local nsid mapping is
+ * found on a RTNLGRP_NSID notification or at startup. */
+static int netdev_linux_self_nsid = NETNSID_LOCAL;
+
+/* Queries the kernel for the nsid assigned to the local network namespace
+ * and updates netdev_linux_self_nsid if a mapping is found. */
+static void
+netdev_linux_query_self_nsid(void)
+{
+#ifdef HAVE_LINUX_NET_NAMESPACE_H
+ static const char ns_path[] = "/proc/self/ns/net";
+ int fd = open(ns_path, O_RDONLY);
+
+ if (fd >= 0) {
+ const int rta_offset = NLMSG_ALIGN(sizeof(struct rtgenmsg));
+ struct ofpbuf request;
+ struct ofpbuf *reply = NULL;
+ int error;
+
+ ofpbuf_init(&request, 0);
+ nl_msg_put_nlmsghdr(&request,
+ rta_offset + NL_ATTR_SIZE(sizeof(uint32_t)),
+ RTM_GETNSID, NLM_F_REQUEST);
+ ofpbuf_put_zeros(&request, rta_offset);
+ nl_msg_put_u32(&request, NETNSA_FD, fd);
+
+ error = nl_transact(NETLINK_ROUTE, &request, &reply);
+ if (!error && reply) {
+ const struct nlattr *a;
+
+ a = nl_attr_find(reply, NLMSG_HDRLEN + rta_offset, NETNSA_NSID);
+ if (a) {
+ netdev_linux_self_nsid = nl_attr_get_u32(a);
+ VLOG_DBG("local network namespace has nsid %d",
+ netdev_linux_self_nsid);
+ }
+ }
+
+ ofpbuf_uninit(&request);
+ ofpbuf_delete(reply);
+ close(fd);
+ }
+#endif
+}
+
+/* Returns the nsid that the kernel assigns to the local network namespace,
+ * or NETNSID_LOCAL if no such mapping exists.
+ *
+ * NETLINK_LISTEN_ALL_NSID workaround: OVS enables this option on its RTNL
+ * notification socket so that it can receive events from remote namespaces.
+ * A side-effect of this option is that the kernel tags every broadcast
+ * (including locally-originated RTM events) with the sender nsid looked up
+ * in the receiver nsid table.
+ *
+ * Some container runtimes create a self-referential nsid mapping as a
+ * side-effect of cross-namespace link queries: the root namespace ends up
+ * with a real nsid that points back to itself.
+ * When that mapping exists, local events arrive with a set nsid instead of
+ * no cmsg (which OVS interprets as NETNSID_LOCAL=-1), causing the
+ * events to be silently rejected.
+ *
+ * This function discovers the self-nsid so that netdev_linux_update() can
+ * treat it as equivalent to NETNSID_LOCAL.
+ *
+ * If a self-referential mapping is created after OVS has started, the
+ * initial query returns NETNSID_LOCAL (no mapping found). The notification
+ * socket is subscribed to RTNLGRP_NSID, so netdev_linux_run() will receive
+ * RTM_NEWNSID when the kernel creates the mapping and immediately re-query
+ * the self-nsid.
+ *
+ * Once created, a self-referential nsid mapping is permanent: the kernel
+ * only removes nsid entries when the peer namespace is destroyed.
+ *
+ * If NETLINK_LISTEN_ALL_NSID is ever deprecated and superseded by a
+ * mechanism that does not tag local events with a numeric nsid, this
+ * workaround (and the check in netdev_linux_update()) can be removed. */
+static int
+netdev_linux_get_self_nsid(void)
+{
+ static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
+
+ if (ovsthread_once_start(&once)) {
+ netdev_linux_query_self_nsid();
+ ovsthread_once_done(&once);
+ }
+
+ return netdev_linux_self_nsid;
+}
+
static int
netdev_linux_netnsid_update__(struct netdev_linux *netdev)
{
@@ -629,13 +720,6 @@ netdev_linux_netnsid_update(struct netdev_linux *netdev)
return 0;
}
-static bool
-netdev_linux_netnsid_is_eq(struct netdev_linux *netdev, int nsid)
-{
- netdev_linux_netnsid_update(netdev);
- return netnsid_eq(netdev->netnsid, nsid);
-}
-
static bool
netdev_linux_netnsid_is_remote(struct netdev_linux *netdev)
{
@@ -652,15 +736,19 @@ static void netdev_linux_changed(struct netdev_linux
*netdev,
OVS_REQUIRES(netdev->mutex);
/* Returns a NETLINK_ROUTE socket listening for RTNLGRP_LINK,
- * RTNLGRP_IPV4_IFADDR and RTNLGRP_IPV6_IFADDR changes, or NULL
- * if no such socket could be created. */
+ * RTNLGRP_IPV4_IFADDR, RTNLGRP_IPV6_IFADDR, and RTNLGRP_NSID changes,
+ * or NULL if no such socket could be created. */
static struct nl_sock *
netdev_linux_notify_sock(void)
{
static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
static struct nl_sock *sock;
unsigned int mcgroups[] = {RTNLGRP_LINK, RTNLGRP_IPV4_IFADDR,
- RTNLGRP_IPV6_IFADDR, RTNLGRP_IPV6_IFINFO};
+ RTNLGRP_IPV6_IFADDR, RTNLGRP_IPV6_IFINFO,
+#ifdef HAVE_LINUX_NET_NAMESPACE_H
+ RTNLGRP_NSID,
+#endif
+ };
if (ovsthread_once_start(&once)) {
int error;
@@ -791,6 +879,21 @@ netdev_linux_run(const struct netdev_class *netdev_class
OVS_UNUSED)
if (!error) {
struct rtnetlink_change change;
+#ifdef HAVE_LINUX_NET_NAMESPACE_H
+ /* RTM_NEWNSID: a new namespace id mapping was created.
+ * Re-query the self-nsid in case the kernel just created
+ * a self-referential root-namespace mapping. */
+ {
+ const struct nlmsghdr *nlmsg = buf.data;
+
+ if (nlmsg->nlmsg_type == RTM_NEWNSID) {
+ netdev_linux_query_self_nsid();
+ ofpbuf_uninit(&buf);
+ continue;
+ }
+ }
+#endif
+
if (rtnetlink_parse(&buf, &change) && !change.irrelevant) {
struct netdev *netdev_ = NULL;
char dev_name[IFNAMSIZ];
@@ -936,9 +1039,32 @@ netdev_linux_update(struct netdev_linux *dev, int nsid,
const struct rtnetlink_change *change)
OVS_REQUIRES(dev->mutex)
{
- if (netdev_linux_netnsid_is_eq(dev, nsid)) {
- netdev_linux_update__(dev, change);
+ netdev_linux_netnsid_update(dev);
+
+ if (netnsid_is_remote(dev->netnsid)) {
+ /* Remote device: only accept events with exactly matching nsid. */
+ if (!netnsid_eq(dev->netnsid, nsid)) {
+ return;
+ }
+ } else {
+ /* Local (or unresolved) device: only accept events that actually
+ * originated in the local namespace.
+ *
+ * NETLINK_LISTEN_ALL_NSID workaround: the kernel may tag local
+ * events with a real nsid instead of omitting the cmsg. The
+ * self-nsid is queried once at startup and refreshed whenever
+ * an RTM_NEWNSID notification arrives.
+ *
+ * Any other nsid means the event came from a genuinely different
+ * namespace and must be rejected to avoid cross-namespace name
+ * collisions. */
+ if (!netnsid_is_local(nsid)
+ && nsid != netdev_linux_get_self_nsid()) {
+ return;
+ }
}
+
+ netdev_linux_update__(dev, change);
}
static struct netdev *
--
2.43.0
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev