On Thu, Apr 16, 2026 at 04:45:13PM -0400, Mike Pattrick wrote:
> On Wed, Mar 11, 2026 at 2:06 AM Adrian Moreno via dev <
> [email protected]> wrote:
>
> > Now that netdev-linux flags are also cached, we need to ensure they are
> > updated correctly.
> >
> > Currently, there are subsystems that rely on link state changes via
> > rtnetlink notifiers (e.g: iface-notifier, route-table). In parallel,
> > netdev-linux creates its own netlink socket to listen for link
> > notifications based on which netdev's states are updated. This creates a
> > potential race: some subsystems might be notified that a link changed
> > but the internal netdev representation is still old.
> >
> > If we want to both cache netdev internal state (e.g: flags, ether_addr)
> > effectively we need to use the same callback chain, i.e: we need to use
> > rtnetlink to update netdev-linux netdevs as well when links change.
> >
> > The order of callbacks cannot be guaranteed so it could still be possible
> > to
> > have another subsystem be notified before netdev-linux has been given a
> > chance
> > to update its state. However, subsystems would likely want to aggregate
> > reconfigurations and will likely have their own "_run()" function to
> > operate.
> >
> > Therefore, it could be reasonalbe to assume the other subsystems would use
> > the
> > callback to set a flag that is then used to trigger reconfigurations in
> > their
> > own periodic "_run()" functions (this is exactly the case of the two
> > existing
> > users: iface-notifier, route-table).
> >
> > This patch makes netdev-linux use rtnetlink notifier for interface updates
> > and
> > independent nln notifiers for address changes.
> >
> > Signed-off-by: Adrian Moreno <[email protected]>
> > ---
> >  lib/netdev-linux.c     | 214 ++++++++++++++++++++---------------------
> >  lib/netdev-linux.h     |   1 +
> >  lib/netlink-notifier.h |   3 +-
> >  lib/rtnetlink.c        |  12 ++-
> >  lib/rtnetlink.h        |   1 +
> >  5 files changed, 118 insertions(+), 113 deletions(-)
> >
> > diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
> > index e6127240b..97be563f7 100644
> > --- a/lib/netdev-linux.c
> > +++ b/lib/netdev-linux.c
> > @@ -298,6 +298,10 @@ static struct ovs_mutex lag_mutex =
> > OVS_MUTEX_INITIALIZER;
> >  static struct shash lag_shash OVS_GUARDED_BY(lag_mutex)
> >      = SHASH_INITIALIZER(&lag_shash);
> >
> > +static struct rtnetlink_change netlink_change;
> > +static struct nln *nln = NULL;
> > +static struct nln_notifier *notifiers[4] = {NULL, NULL, NULL, NULL};
> > +
> >  /* Traffic control. */
> >
> >  /* An instance of a traffic control class.  Always associated with a
> > particular
> > @@ -648,40 +652,6 @@ static void netdev_linux_changed(struct netdev_linux
> > *netdev,
> >                                   unsigned int ifi_flags, unsigned int
> > mask)
> >      OVS_REQUIRES(netdev->mutex);
> >
> > -/* Returns a NETLINK_ROUTE socket listening for RTNLGRP_LINK,
> > - * RTNLGRP_IPV4_IFADDR and RTNLGRP_IPV6_IFADDR changes, or NULL
> > - * if no such socket could be created. */
> > -static struct nl_sock *
> > -netdev_linux_notify_sock(void)
> > -{
> > -    static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
> > -    static struct nl_sock *sock;
> > -    unsigned int mcgroups[] = {RTNLGRP_LINK, RTNLGRP_IPV4_IFADDR,
> > -                                RTNLGRP_IPV6_IFADDR, RTNLGRP_IPV6_IFINFO};
> > -
> > -    if (ovsthread_once_start(&once)) {
> > -        int error;
> > -
> > -        error = nl_sock_create(NETLINK_ROUTE, &sock);
> > -        if (!error) {
> > -            size_t i;
> > -
> > -            nl_sock_listen_all_nsid(sock, true);
> > -            for (i = 0; i < ARRAY_SIZE(mcgroups); i++) {
> > -                error = nl_sock_join_mcgroup(sock, mcgroups[i]);
> > -                if (error) {
> > -                    nl_sock_destroy(sock);
> > -                    sock = NULL;
> > -                    break;
> > -                }
> > -            }
> > -        }
> > -        ovsthread_once_done(&once);
> > -    }
> > -
> > -    return sock;
> > -}
> > -
> >  static bool
> >  netdev_linux_miimon_enabled(void)
> >  {
> > @@ -699,7 +669,7 @@ netdev_linux_kind_is_lag(const char *kind)
> >  }
> >
> >  static void
> > -netdev_linux_update_lag(struct rtnetlink_change *change)
> > +netdev_linux_update_lag(const struct rtnetlink_change *change)
> >      OVS_REQUIRES(lag_mutex)
> >  {
> >      struct linux_lag_member *lag;
> > @@ -763,101 +733,128 @@ netdev_linux_update_lag(struct rtnetlink_change
> > *change)
> >      }
> >  }
> >
> > -void
> > -netdev_linux_run(const struct netdev_class *netdev_class OVS_UNUSED)
> > +static void
> > +netdev_linux_rtnetlink_update(const struct rtnetlink_change *change,
> > +                              void *aux OVS_UNUSED)
> >  {
> > -    struct nl_sock *sock;
> > -    int error;
> > +    struct netdev *netdev_ = NULL;
> >
> > -    if (netdev_linux_miimon_enabled()) {
> > -        netdev_linux_miimon_run();
> > -    }
> > +    if (change && change->irrelevant) {
> > +        return;
> > +    } else if (!change) {
> > +        struct shash device_shash;
> > +        struct shash_node *node;
> >
> > -    sock = netdev_linux_notify_sock();
> > -    if (!sock) {
> > +        shash_init(&device_shash);
> > +        netdev_get_devices(&netdev_linux_class, &device_shash);
> > +        SHASH_FOR_EACH (node, &device_shash) {
> > +            struct netdev_linux *netdev = netdev_linux_cast(netdev_);
> > +            netdev_ = node->data;
> > +            int error;
> > +
> > +            ovs_mutex_lock(&netdev->mutex);
> > +            error = update_flags_local(netdev);
> > +            netdev_linux_changed(netdev, netdev->ifi_flags,
> > +                                 error ? 0 : VALID_FLAGS);
> > +            ovs_mutex_unlock(&netdev->mutex);
> > +
> > +            netdev_close(netdev_);
> > +        }
> > +        shash_destroy(&device_shash);
> >          return;
> >      }
> >
> > -    do {
> > -        uint64_t buf_stub[4096 / 8];
> > -        int nsid;
> > -        struct ofpbuf buf;
> > +    if (change->ifname) {
> > +        netdev_ = netdev_from_name(change->ifname);
> > +    }
> >
> > -        ofpbuf_use_stub(&buf, buf_stub, sizeof buf_stub);
> > -        error = nl_sock_recv(sock, &buf, &nsid, false);
> > -        if (!error) {
> > -            struct rtnetlink_change change;
> > +    if (netdev_ && is_netdev_linux_class(netdev_->netdev_class)) {
> > +        struct netdev_linux *netdev = netdev_linux_cast(netdev_);
> >
> > -            if (rtnetlink_parse(&buf, &change) && !change.irrelevant) {
> > -                struct netdev *netdev_ = NULL;
> > -                char dev_name[IFNAMSIZ];
> > +        ovs_mutex_lock(&netdev->mutex);
> > +        netdev_linux_update(netdev, change->nsid, change);
> > +        ovs_mutex_unlock(&netdev->mutex);
> > +    }
> >
> > -                if (!change.ifname) {
> > -                     change.ifname = if_indextoname(change.if_index,
> > dev_name);
> > -                }
> > +    if (change->ifname &&
> > +        rtnetlink_type_is_rtnlgrp_link(change->nlmsg_type)) {
> >
> > -                if (change.ifname) {
> > -                    netdev_ = netdev_from_name(change.ifname);
> > -                }
> > -                if (netdev_ &&
> > is_netdev_linux_class(netdev_->netdev_class)) {
> > -                    struct netdev_linux *netdev =
> > netdev_linux_cast(netdev_);
> > +        /* Need to try updating the LAG information. */
> > +        ovs_mutex_lock(&lag_mutex);
> > +        netdev_linux_update_lag(change);
> > +        ovs_mutex_unlock(&lag_mutex);
> > +    }
> > +    netdev_close(netdev_);
> > +}
> >
> > -                    ovs_mutex_lock(&netdev->mutex);
> > -                    netdev_linux_update(netdev, nsid, &change);
> > -                    ovs_mutex_unlock(&netdev->mutex);
> > -                }
> > +static void
> > +netdev_linux_nln_cb(const void *change, void *aux)
> > +{
> > +    netdev_linux_rtnetlink_update(change, aux);
> > +}
> >
> > -                if (change.ifname &&
> > -                    rtnetlink_type_is_rtnlgrp_link(change.nlmsg_type)) {
> > +static void
> > +netdev_linux_rtnetlink_config(void)
> > +{
> > +    static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
> >
> > -                    /* Need to try updating the LAG information. */
> > -                    ovs_mutex_lock(&lag_mutex);
> > -                    netdev_linux_update_lag(&change);
> > -                    ovs_mutex_unlock(&lag_mutex);
> > -                }
> > -                netdev_close(netdev_);
> > -            }
> > -        } else if (error == ENOBUFS) {
> > -            struct shash device_shash;
> > -            struct shash_node *node;
> > +    if (ovsthread_once_start(&once)) {
> > +        unsigned int extra_mcgroups[] = {RTNLGRP_IPV4_IFADDR,
> > +                                RTNLGRP_IPV6_IFADDR, RTNLGRP_IPV6_IFINFO};
> > +        struct nln_notifier *notifier;
> > +        int i;
> >
> > -            nl_sock_drain(sock);
> > +        ovs_assert(ARRAY_SIZE(notifiers) == ARRAY_SIZE(extra_mcgroups) +
> > 1);
> >
>
> This should probably be a BUILD_ASSERT_DECL, but why even have the
> notifiers array? It doesn't seem to be used?
>

Right, a build assertion should be enough.

And it's true that they are not being used at the moment. The problem is
that we don't really have a way to "de-initialize" the netdev-linux
subsystem: deinitialization is process destruction.

In theory we should hold on to the pointer returned by
"nln_notifier_create()" and call "nln_notifier_destroy()" on them.
However, I don't think we have the means to do that. This is something
that happens other nln_notifier users as well (see route-table.c). It
feels wrong not deinitializing this but it also feels wrong just
ignoring these pointers altogether. Do you have any suggestion?

>
> >
> > -            shash_init(&device_shash);
> > -            netdev_get_devices(&netdev_linux_class, &device_shash);
> > -            SHASH_FOR_EACH (node, &device_shash) {
> > -                struct netdev *netdev_ = node->data;
> > -                struct netdev_linux *netdev = netdev_linux_cast(netdev_);
> > -
> > -                ovs_mutex_lock(&netdev->mutex);
> > -                update_flags_local(netdev);
> > -                netdev_linux_changed(netdev, netdev->ifi_flags,
> > VALID_FLAGS);
> > -                ovs_mutex_unlock(&netdev->mutex);
> > -
> > -                netdev_close(netdev_);
> > -            }
> > -            shash_destroy(&device_shash);
> > -        } else if (error != EAGAIN) {
> > -            static struct vlog_rate_limit rll = VLOG_RATE_LIMIT_INIT(1,
> > 5);
> > -            VLOG_WARN_RL(&rll, "error reading or parsing netlink (%s)",
> > -                         ovs_strerror(error));
> > +        nln = nln_create(NETLINK_ROUTE, true, rtnetlink_parse_cb,
> > +                         &netlink_change);
> >
>
> netlink_change probably doesn't have to be a global.
>

Hmm... I think it is. It becomes an opaque pointer that is stored in
"struct nln", and passed to the notifier and parsing callbacks. How else
would you ensure it remains valid?

>
> > +        if (!nln) {
> > +            VLOG_ERR("failed to create nln notifier");
> >          }
> > -        ofpbuf_uninit(&buf);
> > -    } while (!error);
> > +
> > +        for (i = 0; i < ARRAY_SIZE(extra_mcgroups); i++) {
> > +            notifier = nln_notifier_create(nln, extra_mcgroups[i],
> > +                                           netdev_linux_nln_cb, NULL);
> > +            if (!notifier) {
> > +                VLOG_ERR("failed to create nln notifier for mcgroup %d",
> > +                         extra_mcgroups[i]);
> > +            }
> > +            notifiers[i] = notifier;
> > +        }
> > +
> > +        notifier =
> > rtnetlink_notifier_create(netdev_linux_rtnetlink_update,
> > +                                             NULL);
> > +        if (!notifier) {
> > +            VLOG_ERR("failed to create rtnetlink notifier");
> > +        }
> > +        notifiers[i] = notifier;
> > +
> > +        ovsthread_once_done(&once);
> > +    }
> > +}
> > +
> > +void
> > +netdev_linux_run(const struct netdev_class *netdev_class OVS_UNUSED)
> > +{
> > +    rtnetlink_run();
> > +    if (nln) {
> > +        nln_run(nln);
> > +    }
> > +    if (netdev_linux_miimon_enabled()) {
> > +        netdev_linux_miimon_run();
> >
>
> This patch changes the order of miimon_run to after netlink socket
> operations. Was that intentional?
>

Not really, no. I'll inverse the order.

>
> > +    }
> >  }
> >
> >  static void
> >  netdev_linux_wait(const struct netdev_class *netdev_class OVS_UNUSED)
> >  {
> > -    struct nl_sock *sock;
> > -
> > +    rtnetlink_wait();
> > +    if (nln) {
> > +        nln_wait(nln);
> > +    }
> >      if (netdev_linux_miimon_enabled()) {
> >          netdev_linux_miimon_wait();
> >      }
> > -    sock = netdev_linux_notify_sock();
> > -    if (sock) {
> > -        nl_sock_wait(sock, POLLIN);
> > -    }
> >  }
> >
> >  static void
> > @@ -900,9 +897,6 @@ netdev_linux_update__(struct netdev_linux *dev,
> >                  dev->etheraddr = change->mac;
> >                  dev->cache_valid |= VALID_ETHERADDR;
> >                  dev->ether_addr_error = 0;
> > -
> > -                /* The mac addr has been changed, report it now. */
> > -                rtnetlink_report_link();
> >              }
> >
> >              if (change->primary &&
> > netdev_linux_kind_is_lag(change->primary)) {
> > @@ -968,6 +962,8 @@ netdev_linux_common_construct(struct netdev *netdev_)
> >          return ENAMETOOLONG;
> >      }
> >
> > +    netdev_linux_rtnetlink_config();
> > +
> >      /* The device could be in the same network namespace or in another
> > one. */
> >      netnsid_unset(&netdev->netnsid);
> >      ovs_mutex_init(&netdev->mutex);
> > diff --git a/lib/netdev-linux.h b/lib/netdev-linux.h
> > index ec19b0ded..81a5634f6 100644
> > --- a/lib/netdev-linux.h
> > +++ b/lib/netdev-linux.h
> > @@ -25,6 +25,7 @@
> >   * Linux-specific code. */
> >
> >  struct netdev;
> > +struct rtnetlink_change;
> >
> >  int netdev_linux_ethtool_set_flag(struct netdev *netdev, uint32_t flag,
> >                                    const char *flag_name, bool enable);
> > diff --git a/lib/netlink-notifier.h b/lib/netlink-notifier.h
> > index 8b6c32048..d52e5f56c 100644
> > --- a/lib/netlink-notifier.h
> > +++ b/lib/netlink-notifier.h
> > @@ -1,5 +1,4 @@
> > -/*
> > - * Copyright (c) 2009 Nicira, Inc.
> > +/* Copyright (c) 2009 Nicira, Inc.
> >   *
> >   * Licensed under the Apache License, Version 2.0 (the "License");
> >   * you may not use this file except in compliance with the License.
> > diff --git a/lib/rtnetlink.c b/lib/rtnetlink.c
> > index 6b63afa06..4ae050ff0 100644
> > --- a/lib/rtnetlink.c
> > +++ b/lib/rtnetlink.c
> > @@ -191,7 +191,7 @@ rtnetlink_parse(struct ofpbuf *buf, struct
> > rtnetlink_change *change)
> >  }
> >
> >  /* Return RTNLGRP_LINK on success, 0 on parse error. */
> > -static int
> > +int
> >  rtnetlink_parse_cb(struct ofpbuf *buf, int nsid, void *change)
> >  {
> >      bool ret = rtnetlink_parse(buf, change);
> > @@ -212,12 +212,20 @@ rtnetlink_parse_cb(struct ofpbuf *buf, int nsid,
> > void *change)
> >   *
> >   * xxx Joins more multicast groups when needed.
> >   *
> > + * Callbacks might find that netdev-linux netdevs still hold outdated
> > cached
> > + * information. If the notification has to trigger some kind of
> > reconfiguration
> > + * that requires up-to-date netdev cache, it should do it asynchronously,
> > for
> > + * instance by setting a flag in the callback and acting on it during the
> > + * normal "*_run()" operation.
> > + *
> > + * Notifications might come from any network namespace.
> > + *
> >   * Returns an initialized nln_notifier if successful, NULL otherwise. */
> >  struct nln_notifier *
> >  rtnetlink_notifier_create(rtnetlink_notify_func *cb, void *aux)
> >  {
> >      if (!nln) {
> > -        nln = nln_create(NETLINK_ROUTE, false, rtnetlink_parse_cb,
> > +        nln = nln_create(NETLINK_ROUTE, true, rtnetlink_parse_cb,
> >                           &rtn_change);
> >      }
> >
> > diff --git a/lib/rtnetlink.h b/lib/rtnetlink.h
> > index 9b2397b4e..f0b49bf1a 100644
> > --- a/lib/rtnetlink.h
> > +++ b/lib/rtnetlink.h
> > @@ -76,4 +76,5 @@ void rtnetlink_notifier_destroy(struct nln_notifier *);
> >  void rtnetlink_run(void);
> >  void rtnetlink_wait(void);
> >  void rtnetlink_report_link(void);
> > +int rtnetlink_parse_cb(struct ofpbuf *buf, int nsid, void *change);
> >  #endif /* rtnetlink.h */
> > --
> > 2.53.0
> >
> > _______________________________________________
> > dev mailing list
> > [email protected]
> > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> >
> >

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to