On Wed, Sep 10, 2025 at 9:44 AM Dumitru Ceara <[email protected]> wrote:

> On 9/10/25 7:59 AM, Ales Musil via dev wrote:
> > We were removing routes while still iterating over them,
> > while it might work it's not a good practice. Postpone
> > the removal after the dump is done.
> >
> > Reported-at: https://issues.redhat.com/browse/FDP-1594
> > Signed-off-by: Ales Musil <[email protected]>
> > ---
>
> Hi Ales,
>
> Thanks for the fix!
>
> There's one CI test failure in the ovsrobot run of this patch but it's
> not related to the change:
>
> https://github.com/ovsrobot/ovn/actions/runs/17604844865/job/50013775059#step:11:4605


Thank you Dumitru,

it seems that we have a new flake.


>
>
> >  controller/route-exchange-netlink.c | 70 ++++++++++++++++++++---------
> >  1 file changed, 50 insertions(+), 20 deletions(-)
> >
> > diff --git a/controller/route-exchange-netlink.c
> b/controller/route-exchange-netlink.c
> > index cec4b1ec3..91f059492 100644
> > --- a/controller/route-exchange-netlink.c
> > +++ b/controller/route-exchange-netlink.c
> > @@ -200,9 +200,9 @@ struct route_msg_handle_data {
> >      const struct sbrec_datapath_binding *db;
> >      struct hmapx *routes_to_advertise;
> >      struct vector *learned_routes;
> > +    struct vector *stale_routes;
> >      const struct hmap *routes;
> >      uint32_t table_id; /* requested table id. */
> > -    int ret;
> >  };
> >
> >  static void
> > @@ -211,7 +211,6 @@ handle_route_msg(const struct route_table_msg *msg,
> void *data)
> >      struct route_msg_handle_data *handle_data = data;
> >      const struct route_data *rd = &msg->rd;
> >      struct advertise_route_entry *ar;
> > -    int err;
> >
> >      if (handle_data->table_id != rd->rta_table_id) {
> >          /* We do not have the NLM_F_DUMP_FILTERED info here, so check
> if the
> > @@ -261,23 +260,37 @@ handle_route_msg(const struct route_table_msg
> *msg, void *data)
> >              }
> >          }
> >      }
> > -    err = re_nl_delete_route(rd->rta_table_id, &rd->rta_dst,
> > -                             rd->rtm_dst_len, rd->rta_priority);
> > -    if (err) {
> > -        char addr_s[INET6_ADDRSTRLEN + 1];
> > -        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
> > -        VLOG_WARN_RL(&rl, "Delete route table_id=%"PRIu32" dst=%s
> plen=%d "
> > -                     "failed: %s", rd->rta_table_id,
> > -                     ipv6_string_mapped(
> > -                         addr_s, &rd->rta_dst) ? addr_s : "(invalid)",
> > -                     rd->rtm_dst_len,
> > -                     ovs_strerror(err));
> > -
> > -        if (!handle_data->ret) {
> > -            /* Report the first error value to the caller. */
> > -            handle_data->ret = err;
> > +
> > +    if (handle_data->stale_routes) {
> > +        vector_push(handle_data->stale_routes, rd);
> > +    }
> > +}
> > +
> > +static int
> > +re_nl_delete_stale_routes(const struct vector *stale_routes)
> > +{
> > +    int ret = 0;
> > +
> > +    const struct route_data *rd;
> > +    VECTOR_FOR_EACH_PTR (stale_routes, rd) {
> > +        int err = re_nl_delete_route(rd->rta_table_id, &rd->rta_dst,
> > +                                     rd->rtm_dst_len, rd->rta_priority);
> > +        if (err) {
> > +            char addr_s[INET6_ADDRSTRLEN + 1];
> > +            static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5,
> 20);
> > +            VLOG_WARN_RL(&rl, "Delete route table_id=%"PRIu32" dst=%s
> plen=%d "
> > +                         "failed: %s", rd->rta_table_id,
> > +                         ipv6_string_mapped(
> > +                             addr_s, &rd->rta_dst) ? addr_s :
> "(invalid)",
> > +                         rd->rtm_dst_len,
> > +                         ovs_strerror(err));
> > +            if (!ret) {
> > +                ret = err;
> > +            }
> >          }
> >      }
> > +
> > +    return ret;
> >  }
> >
> >  int
> > @@ -286,8 +299,9 @@ re_nl_sync_routes(uint32_t table_id, const struct
> hmap *routes,
> >                    const struct sbrec_datapath_binding *db)
> >  {
> >      struct hmapx routes_to_advertise =
> HMAPX_INITIALIZER(&routes_to_advertise);
> > +    struct vector stale_routes = VECTOR_EMPTY_INITIALIZER(struct
> route_data);
> >      struct advertise_route_entry *ar;
> > -    int ret;
> > +    int ret = 0;
> >
> >      HMAP_FOR_EACH (ar, node, routes) {
> >          hmapx_add(&routes_to_advertise, ar);
> > @@ -300,11 +314,11 @@ re_nl_sync_routes(uint32_t table_id, const struct
> hmap *routes,
> >          .routes = routes,
> >          .routes_to_advertise = &routes_to_advertise,
> >          .learned_routes = learned_routes,
> > +        .stale_routes = &stale_routes,
> >          .db = db,
> >          .table_id = table_id,
> >      };
> >      route_table_dump_one_table(table_id, handle_route_msg, &data);
> > -    ret = data.ret;
> >
> >      /* Add any remaining routes in the routes_to_advertise hmapx to the
> >       * system routing table. */
> > @@ -328,7 +342,14 @@ re_nl_sync_routes(uint32_t table_id, const struct
> hmap *routes,
> >              }
> >          }
> >      }
> > +
> > +    int err = re_nl_delete_stale_routes(&stale_routes);
> > +    if (!ret) {
> > +        ret = err;
> > +    }
> > +
> >      hmapx_destroy(&routes_to_advertise);
> > +    vector_destroy(&stale_routes);
> >
> >      return ret;
> >  }
> > @@ -336,15 +357,24 @@ re_nl_sync_routes(uint32_t table_id, const struct
> hmap *routes,
> >  int
> >  re_nl_cleanup_routes(uint32_t table_id)
> >  {
> > +    int ret = 0;
> > +    struct vector stale_routes = VECTOR_EMPTY_INITIALIZER(struct
> route_data);
> >      /* Remove routes from the system that are not in the host_routes
> hmap and
> >       * remove entries from host_routes hmap that match routes already
> installed
> >       * in the system. */
> >      struct route_msg_handle_data data = {
> >          .routes_to_advertise = NULL,
> >          .learned_routes = NULL,
> > +        .stale_routes = &stale_routes,
> >          .table_id = table_id,
> >      };
> >      route_table_dump_one_table(table_id, handle_route_msg, &data);
> >
> > -    return data.ret;
> > +    int err = re_nl_delete_stale_routes(&stale_routes);
>
> Nit: I think I would've just inlined the re_nl_delete_stale_routes()
> code here like we do in ne_nl_sync_neigh().  It saves us the check below
> and the function itself is not that large, IMO, it gives a better
> overview of what's going on.
>

This one is actually called twice once in "re_nl_cleanup_routes()"
and second in "re_nl_sync_routes()". That's why there is separate
function. Which is different from "ne_nl_sync_neigh()" where deletion
happens only once. With that said I'll leave it as is.


> But I'll leave it up to you:
>
> Acked-by: Dumitru Ceara <[email protected]>
>
> > +    if (!ret) {
> > +        ret = err;
> > +    }
> > +    vector_destroy(&stale_routes);
> > +
> > +    return ret;
> >  }
>
> Regards,
> Dumitru
>
>
I went ahead and merged this into main and backported to 25.09. I will have
to create a custom backport patch for 25.03, I'll send it out to the ML.

Regards,
Ales
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to