On 4/24/26 3:23 PM, Ales Musil wrote:
> Add nexthop_exchange engine node that handles nexthop updates
> keeping the internal nexthop mapping up to date. The noop handler
> for evpn_fdb will be replaced by later commit.
> 
> Signed-off-by: Ales Musil <[email protected]>
> ---

Hi Ales,

>  controller/nexthop-exchange-stub.c | 20 +++++++
>  controller/nexthop-exchange.c      | 82 ++++++++++++++++++++++-----
>  controller/nexthop-exchange.h      |  5 ++
>  controller/ovn-controller.c        | 89 ++++++++++++++++++++++++++++++
>  tests/ovn-inc-proc-graph-dump.at   |  3 +
>  5 files changed, 184 insertions(+), 15 deletions(-)
> 
> diff --git a/controller/nexthop-exchange-stub.c 
> b/controller/nexthop-exchange-stub.c
> index 2742dc7e2..52c1bf028 100644
> --- a/controller/nexthop-exchange-stub.c
> +++ b/controller/nexthop-exchange-stub.c
> @@ -18,6 +18,7 @@
>  #include "lib/netlink.h"
>  #include "openvswitch/hmap.h"
>  #include "openvswitch/ofpbuf.h"
> +#include "vec.h"
>  
>  #include "nexthop-exchange.h"
>  
> @@ -34,9 +35,28 @@ nexthop_entry_format(struct ds *ds OVS_UNUSED,
>  {
>  }
>  
> +struct nexthop_entry *
> +nexthop_entry_find(const struct hmap *nexthops OVS_UNUSED,
> +                   uint32_t id OVS_UNUSED)
> +{
> +    return NULL;
> +}
> +
>  int
>  nh_table_parse(struct ofpbuf *buf OVS_UNUSED,
>                 struct nh_table_msg *change OVS_UNUSED)
>  {
>      return 0;
>  }
> +
> +bool
> +nexthops_handle_changes(struct hmap *nexthops OVS_UNUSED,
> +                        struct vector *msgs OVS_UNUSED)
> +{
> +    return false;
> +}
> +
> +void
> +nexthops_destroy(struct hmap *nexthops OVS_UNUSED)
> +{
> +}
> diff --git a/controller/nexthop-exchange.c b/controller/nexthop-exchange.c
> index a2ad643a6..8718b893f 100644
> --- a/controller/nexthop-exchange.c
> +++ b/controller/nexthop-exchange.c
> @@ -19,9 +19,11 @@
>  
>  #include "lib/netlink.h"
>  #include "lib/netlink-socket.h"
> +#include "hmapx.h"
>  #include "openvswitch/ofpbuf.h"
>  #include "openvswitch/vlog.h"
>  #include "packets.h"
> +#include "vec.h"
>  
>  #include "nexthop-exchange.h"
>  
> @@ -109,6 +111,20 @@ nexthop_entry_format(struct ds *ds, const struct 
> nexthop_entry *nhe)
>      }
>  }
>  
> +struct nexthop_entry *
> +nexthop_entry_find(const struct hmap *nexthops, uint32_t id)
> +{
> +    uint32_t hash = nexthop_entry_hash(id);
> +    struct nexthop_entry *nhe;
> +    HMAP_FOR_EACH_WITH_HASH (nhe, hmap_node, hash, nexthops) {
> +        if (nhe->id == id) {
> +            return nhe;
> +        }
> +    }
> +
> +    return NULL;
> +}
> +
>  /* Parse Netlink message in buf, which is expected to contain a UAPI nhmsg
>   * header and associated nexthop attributes. This will allocate
>   * 'struct nexthop_entry' which needs to be freed by the caller.
> @@ -128,6 +144,56 @@ nh_table_parse(struct ofpbuf *buf, struct nh_table_msg 
> *change)
>                              nlmsg, change);
>  }
>  
> +bool
> +nexthops_handle_changes(struct hmap *nexthops, struct vector *msgs)
> +{
> +    if (vector_is_empty(msgs)) {
> +        return false;
> +    }
> +
> +    struct hmapx updated_groups = HMAPX_INITIALIZER(&updated_groups);
> +
> +    struct nh_table_msg *msg;
> +    VECTOR_FOR_EACH_PTR (msgs, msg) {
> +        struct nexthop_entry *nhe = nexthop_entry_find(nexthops, 
> msg->nhe->id);
> +        if (nhe) {
> +            hmap_remove(nexthops, &nhe->hmap_node);

We assume here that if nhe used to be referenced by a different
nexthop_grp_entry->gateway, the same batch of changes will also include
an RTM_DELNEXTHOP for that nexthop too.

It feels a bit fragile but it's probably fine.  The alternative would be
to store backrefs but that seems a bit excessive.

Let's leave it as is.

> +            free(nhe);
> +        }
> +
> +        if (msg->nlmsg_type == RTM_NEWNEXTHOP) {
> +            hmap_insert(nexthops, &msg->nhe->hmap_node,
> +                        nexthop_entry_hash(msg->nhe->id));
> +
> +            if (msg->nhe->n_grps) {
> +                hmapx_add(&updated_groups, msg->nhe);
> +            }
> +
> +            /* The nexthop entry moved into the hmap, prevent double free. */
> +            msg->nhe = NULL;
> +        }
> +    }
> +
> +    struct hmapx_node *hmapx_node;
> +    HMAPX_FOR_EACH (hmapx_node, &updated_groups) {
> +        struct nexthop_entry *nhe = hmapx_node->data;
> +        nh_populate_grp_pointers(nhe, nexthops);
> +    }
> +
> +    hmapx_destroy(&updated_groups);
> +
> +    return true;
> +}
> +
> +void
> +nexthops_destroy(struct hmap *nexthops)
> +{
> +    struct nexthop_entry *entry;
> +    HMAP_FOR_EACH_POP (entry, hmap_node, nexthops) {
> +        free(entry);
> +    }
> +}
> +
>  static int
>  nh_table_parse__(struct ofpbuf *buf, size_t ofs, const struct nlmsghdr 
> *nlmsg,
>                   struct nh_table_msg *change)
> @@ -214,25 +280,11 @@ nexthop_entry_hash(uint32_t id)
>      return hash_int(id, 0);
>  }
>  
> -static struct nexthop_entry *
> -nexthop_find(struct hmap *nexthops, uint32_t id)
> -{
> -    uint32_t hash = nexthop_entry_hash(id);
> -    struct nexthop_entry *nhe;
> -    HMAP_FOR_EACH_WITH_HASH (nhe, hmap_node, hash, nexthops) {
> -        if (nhe->id == id) {
> -            return nhe;
> -        }
> -    }
> -
> -    return NULL;
> -}
> -
>  static void
>  nh_populate_grp_pointers(struct nexthop_entry *nhe, struct hmap *nexthops)
>  {
>      for (size_t i = 0; i < nhe->n_grps; i++) {
>          struct nexthop_grp_entry *grp = &nhe->grps[i];
> -        grp->gateway = nexthop_find(nexthops, grp->id);
> +        grp->gateway = nexthop_entry_find(nexthops, grp->id);
>      }
>  }
> diff --git a/controller/nexthop-exchange.h b/controller/nexthop-exchange.h
> index e94fdc73a..73f08c2fe 100644
> --- a/controller/nexthop-exchange.h
> +++ b/controller/nexthop-exchange.h
> @@ -23,6 +23,7 @@
>  
>  struct ds;
>  struct ofpbuf;
> +struct vector;
>  
>  struct nexthop_grp_entry {
>      /* The id of the nexthop gateway. */
> @@ -56,6 +57,10 @@ struct nh_table_msg {
>  
>  void nexthops_sync(struct hmap *nexthops);
>  void nexthop_entry_format(struct ds *ds, const struct nexthop_entry *nhe);
> +struct nexthop_entry *nexthop_entry_find(const struct hmap *nexthops,
> +                                         uint32_t id);
>  int nh_table_parse(struct ofpbuf *, struct nh_table_msg *change);
> +bool nexthops_handle_changes(struct hmap *nexthops, struct vector *msgs);
> +void nexthops_destroy(struct hmap *nexthops);
>  
>  #endif /* NEXTHOP_EXCHANGE_H */
> diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
> index 35a5cd0b4..c46530f90 100644
> --- a/controller/ovn-controller.c
> +++ b/controller/ovn-controller.c
> @@ -99,6 +99,7 @@
>  #include "neighbor.h"
>  #include "neighbor-exchange.h"
>  #include "neighbor-exchange-netlink.h"
> +#include "nexthop-exchange.h"
>  #include "evpn-arp.h"
>  #include "evpn-binding.h"
>  #include "evpn-fdb.h"
> @@ -6344,6 +6345,83 @@ en_neighbor_table_notify_run(struct engine_node *node 
> OVS_UNUSED,
>      return state;
>  }
>  
> +/* The nexthop_exchange node is an input node, but is enabled/disabled
> + * based on en_neighbor_exchange node. The reason being that engine
> + * periodically runs input nodes to check if there are updates, so it could
> + * be polled for updates without requiring other nodes to run first. */
> +struct ed_type_nexthop_exchange {
> +    struct hmap nexthops;
> +    bool enabled;
> +    bool recompute;
> +};
> +
> +static void *
> +en_nexthop_exchange_init(struct engine_node *node OVS_UNUSED,
> +                         struct engine_arg *arg OVS_UNUSED)
> +{
> +    struct ed_type_nexthop_exchange *nhe_data = xmalloc(sizeof *nhe_data);
> +    *nhe_data = (struct ed_type_nexthop_exchange) {
> +        .nexthops = HMAP_INITIALIZER(&nhe_data->nexthops),
> +        .enabled = false,
> +        .recompute = true,
> +    };
> +
> +    return nhe_data;
> +}
> +
> +static void
> +en_nexthop_exchange_cleanup(void *data)
> +{
> +    struct ed_type_nexthop_exchange *nhe_data = data;
> +    nexthops_destroy(&nhe_data->nexthops);
> +    hmap_destroy(&nhe_data->nexthops);
> +}
> +
> +static enum engine_node_state
> +en_nexthop_exchange_run(struct engine_node *node OVS_UNUSED, void *data)
> +{
> +    struct ed_type_nexthop_exchange *nhe_data = data;
> +
> +    if (!nhe_data->enabled) {
> +        return EN_UNCHANGED;
> +    }
> +
> +    if (nhe_data->recompute) {
> +        nexthops_destroy(&nhe_data->nexthops);
> +        nexthops_sync(&nhe_data->nexthops);
> +        /* We are doing a full sync, let's clear any data
> +         * that might accumulate in the meantime. */
> +        ovn_netlink_notifier_flush(OVN_NL_NOTIFIER_NEXTHOP);
> +
> +        nhe_data->recompute = false;
> +        return EN_UPDATED;
> +    }
> +
> +    struct vector *msgs = ovn_netlink_get_msgs(OVN_NL_NOTIFIER_NEXTHOP);
> +    bool updated = nexthops_handle_changes(&nhe_data->nexthops, msgs);
> +    ovn_netlink_notifier_flush(OVN_NL_NOTIFIER_NEXTHOP);
> +
> +    return updated ? EN_UPDATED : EN_UNCHANGED;
> +}
> +
> +static void
> +nexthop_exchange_update(struct ed_type_nexthop_exchange *nhe_data,
> +                        bool enabled)
> +{
> +    if (nhe_data->enabled == enabled) {
> +        return;
> +    }
> +
> +    if (nhe_data->enabled && !enabled) {
> +        nexthops_destroy(&nhe_data->nexthops);
> +    } else if (!nhe_data->enabled && enabled) {
> +        nhe_data->recompute = true;
> +    }
> +
> +    nhe_data->enabled = enabled;
> +    ovn_netlink_update_notifier(OVN_NL_NOTIFIER_NEXTHOP, enabled);
> +}
> +
>  struct ed_type_neighbor_exchange {
>      /* Contains 'struct evpn_remote_vtep'. */
>      struct hmap remote_vteps;
> @@ -6389,6 +6467,8 @@ en_neighbor_exchange_run(struct engine_node *node, void 
> *data_)
>          engine_get_input_data("neighbor", node);
>      struct ed_type_neighbor_table_notify *nt_notify =
>          engine_get_input_data("neighbor_table_notify", node);
> +    struct ed_type_nexthop_exchange *nhe_data =
> +        engine_get_input_data("nexthop_exchange", node);
>  
>      evpn_remote_vteps_clear(&data->remote_vteps);
>      evpn_static_entries_clear(&data->static_fdbs);
> @@ -6407,6 +6487,7 @@ en_neighbor_exchange_run(struct engine_node *node, void 
> *data_)
>  
>      neighbor_exchange_run(&n_ctx_in, &n_ctx_out);
>      neighbor_table_notify_update(&nt_notify->watches);
> +    nexthop_exchange_update(nhe_data, !vector_is_empty(&nt_notify->watches));

This reads a bit weird to be honest.  'nhe_data' is the data of the
"nexthop_exchange" node, which is an input of the "neighbor_exchange"
node whose run() function we're executing here.

So we're changing the data owned by an input node inside a different
node's run callback.  That is not really nice.

OTOH, we already broke that pattern with 5ec13ae6eff6 ("controller:
Consolidate the netlink notifiers.") as we're doing the same thing
(changing input node data) for rt_notify.

Let's keep it for now but maybe we should follow up with a finer grain
approach that doesn't require this kind of semantics?

>  
>      return EN_UPDATED;
>  }
> @@ -6864,6 +6945,7 @@ static ENGINE_NODE(neighbor);
>  static ENGINE_NODE(neighbor_table_notify);
>  static ENGINE_NODE(neighbor_exchange);
>  static ENGINE_NODE(neighbor_exchange_status);
> +static ENGINE_NODE(nexthop_exchange);
>  static ENGINE_NODE(evpn_vtep_binding, CLEAR_TRACKED_DATA);
>  static ENGINE_NODE(evpn_fdb, CLEAR_TRACKED_DATA);
>  static ENGINE_NODE(evpn_arp, CLEAR_TRACKED_DATA);
> @@ -7117,6 +7199,10 @@ inc_proc_ovn_controller_init(
>      engine_add_input(&en_neighbor_exchange, &en_neighbor_table_notify, NULL);
>      engine_add_input(&en_neighbor_exchange, &en_neighbor_exchange_status,
>                       NULL);
> +    /* We just need to enable/disable the nexthop exchange based on
> +     * the neighbor status.  */
> +    engine_add_input(&en_neighbor_exchange, &en_nexthop_exchange,
> +                     engine_noop_handler);
>  
>      engine_add_input(&en_evpn_vtep_binding, &en_ovs_open_vswitch, NULL);
>      engine_add_input(&en_evpn_vtep_binding, &en_ovs_bridge, NULL);
> @@ -7133,6 +7219,9 @@ inc_proc_ovn_controller_init(
>      engine_add_input(&en_evpn_fdb, &en_neighbor_exchange, NULL);
>      engine_add_input(&en_evpn_fdb, &en_evpn_vtep_binding,
>                       evpn_fdb_vtep_binding_handler);
> +    /* XXX: This is just a place holder and it will be updated later on. */
> +    engine_add_input(&en_evpn_fdb, &en_nexthop_exchange,
> +                     engine_noop_handler);
>  
>      engine_add_input(&en_evpn_arp, &en_neighbor_exchange, NULL);
>      engine_add_input(&en_evpn_arp, &en_evpn_vtep_binding,
> diff --git a/tests/ovn-inc-proc-graph-dump.at 
> b/tests/ovn-inc-proc-graph-dump.at
> index 178310978..6b4d94835 100644
> --- a/tests/ovn-inc-proc-graph-dump.at
> +++ b/tests/ovn-inc-proc-graph-dump.at
> @@ -401,11 +401,13 @@ digraph "Incremental-Processing-Engine" {
>       host_if_monitor [[style=filled, shape=box, fillcolor=white, 
> label="host_if_monitor"]];
>       neighbor_table_notify [[style=filled, shape=box, fillcolor=white, 
> label="neighbor_table_notify"]];
>       neighbor_exchange_status [[style=filled, shape=box, fillcolor=white, 
> label="neighbor_exchange_status"]];
> +     nexthop_exchange [[style=filled, shape=box, fillcolor=white, 
> label="nexthop_exchange"]];
>       neighbor_exchange [[style=filled, shape=box, fillcolor=white, 
> label="neighbor_exchange"]];
>       neighbor -> neighbor_exchange [[label=""]];
>       host_if_monitor -> neighbor_exchange [[label=""]];
>       neighbor_table_notify -> neighbor_exchange [[label=""]];
>       neighbor_exchange_status -> neighbor_exchange [[label=""]];
> +     nexthop_exchange -> neighbor_exchange [[label="engine_noop_handler"]];
>       evpn_vtep_binding [[style=filled, shape=box, fillcolor=white, 
> label="evpn_vtep_binding"]];
>       OVS_open_vswitch -> evpn_vtep_binding [[label=""]];
>       OVS_bridge -> evpn_vtep_binding [[label=""]];
> @@ -416,6 +418,7 @@ digraph "Incremental-Processing-Engine" {
>       evpn_fdb [[style=filled, shape=box, fillcolor=white, label="evpn_fdb"]];
>       neighbor_exchange -> evpn_fdb [[label=""]];
>       evpn_vtep_binding -> evpn_fdb [[label="evpn_fdb_vtep_binding_handler"]];
> +     nexthop_exchange -> evpn_fdb [[label="engine_noop_handler"]];
>       evpn_arp [[style=filled, shape=box, fillcolor=white, label="evpn_arp"]];
>       neighbor_exchange -> evpn_arp [[label=""]];
>       evpn_vtep_binding -> evpn_arp [[label="evpn_arp_vtep_binding_handler"]];

Applied to main, thanks!

Regards,
Dumitru

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to