On Tue, Nov 19, 2024 at 9:11 AM Frode Nordahl <[email protected]> wrote:
>
> Hello, Felix,
>
> This is not a full review of this patch, but a statement below caught
> my attention and I would like to discuss it further with you.
>
> I was also unable to test due to assumptions made about the use of
> distributed gateway ports in previous patches in this series, also
> added a note in-line on the gating on export before learning.
>
> On Thu, Oct 24, 2024 at 5:54 PM Felix Huettner via dev
> <[email protected]> wrote:
> >
> > we now learn all routes inside the vrfs we also advertise routes on.
> > The routes are then placed in the southbound database for processing by
> > northd.
> >
> > Routes are only selected if matching the following rules:
> > 1. must not be a route advertised by us
> > 2. must not be a local connected route (as we want to not learn transfer
> >    networks)
> > 3. the prefix must not be a link local address
> >
> > However we can not reliably determine over which link we learned the
> > route in case we have two LRPs of the same LR on the same chassis.
> > For now we just assume the routes on both links are identical.
> > Future commits will refine this.
>
> From my perspective, I think it would be worth spending some more time
> on this point, as it is an essential building block for required
> functionality such as supporting ECMP out of the host as a replacement
> for bonds.
>
> Looking at this quickly in a lab environment, I can see that the
> routing protocol daemon in use labels learned and redistributed routes
> with an interface:
>
>     $ sudo ip -6 route show table 10
>     ...
>     default via fe80::216:3eff:fec4:63dc dev eth2-bgp proto ra metric
> 1024 expires 28sec hoplimit 64 pref medium
>     default via fe80::216:3eff:fe5c:7673 dev eth1-bgp proto ra metric
> 1024 expires 28sec hoplimit 64 pref medium

Ha, these actually appear to come from router advertisements and not
the routing protocol daemon!

> From your comment above, where does the reliability issue you have
> seen come from? Does the routing protocol daemon not always label
> routes with source interface, or is it just missing in the current
> implementation on the OVS/OVN side?
>
> > Signed-off-by: Felix Huettner <[email protected]>
> > ---
> >  controller/ovn-controller.c         |   8 ++
> >  controller/route-exchange-netlink.c |  41 +++++++-
> >  controller/route-exchange-netlink.h |  13 ++-
> >  controller/route-exchange.c         | 146 +++++++++++++++++++++++++++-
> >  controller/route-exchange.h         |   2 +
> >  lib/ovn-util.c                      |  10 ++
> >  lib/ovn-util.h                      |   1 +
> >  7 files changed, 216 insertions(+), 5 deletions(-)
> >
> > diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
> > index 052ad8812..141d50fc0 100644
> > --- a/controller/ovn-controller.c
> > +++ b/controller/ovn-controller.c
> > @@ -4925,10 +4925,16 @@ route_runtime_data_handler(struct engine_node 
> > *node, void *data)
> >  static void
> >  en_route_exchange_run(struct engine_node *node, void *data OVS_UNUSED)
> >  {
> > +    struct ovsdb_idl_index *sbrec_route_by_datapath =
> > +        engine_ovsdb_node_get_index(
> > +            engine_get_input("SB_route", node), "datapath");
> > +
> >      struct ed_type_route *route_data =
> >          engine_get_input_data("route", node);
> >
> >      struct route_exchange_ctx_in r_ctx_in = {
> > +        .ovnsb_idl_txn = engine_get_context()->ovnsb_idl_txn,
> > +        .sbrec_route_by_datapath = sbrec_route_by_datapath,
> >          .announce_routes = &route_data->announce_routes,
> >      };
> >
> > @@ -5277,6 +5283,8 @@ main(int argc, char *argv[])
> >      engine_add_input(&en_route, &en_sb_route,
> >                       engine_noop_handler);
> >      engine_add_input(&en_route_exchange, &en_route, NULL);
> > +    engine_add_input(&en_route_exchange, &en_sb_route,
> > +                     engine_noop_handler);
> >
> >      engine_add_input(&en_addr_sets, &en_sb_address_set,
> >                       addr_sets_sb_address_set_handler);
> > diff --git a/controller/route-exchange-netlink.c 
> > b/controller/route-exchange-netlink.c
> > index 84c6baa15..ee33d08d2 100644
> > --- a/controller/route-exchange-netlink.c
> > +++ b/controller/route-exchange-netlink.c
> > @@ -26,6 +26,7 @@
> >  #include "openvswitch/ofpbuf.h"
> >  #include "openvswitch/vlog.h"
> >  #include "packets.h"
> > +#include "ovn-util.h"
> >  #include "route-table.h"
> >  #include "route.h"
> >
> > @@ -171,8 +172,27 @@ re_nl_delete_route(uint32_t table_id, const struct 
> > in6_addr *dst,
> >      return modify_route(RTM_DELROUTE, 0, table_id, dst, plen);
> >  }
> >
> > +static uint32_t
> > +route_hash(const struct in6_addr *dst, unsigned int plen)
> > +{
> > +    uint32_t hash = hash_bytes(dst->s6_addr, 16, 0);
> > +    return hash_int(plen, hash);
> > +}
> > +
> > +void
> > +re_nl_received_routes_destroy(struct hmap *host_routes)
> > +{
> > +    struct re_nl_received_route_node *rr;
> > +    HMAP_FOR_EACH_SAFE (rr, hmap_node, host_routes) {
> > +        hmap_remove(host_routes, &rr->hmap_node);
> > +        free(rr);
> > +    }
> > +    hmap_destroy(host_routes);
> > +}
> > +
> >  struct route_msg_handle_data {
> >      const struct hmap *routes;
> > +    struct hmap *learned_routes;
> >  };
> >
> >  static void
> > @@ -184,8 +204,24 @@ handle_route_msg_delete_routes(const struct 
> > route_table_msg *msg, void *data)
> >      struct advertise_route_entry *ar;
> >      int err;
> >
> > -    /* This route is not from us, we should not touch it. */
> > +    /* This route is not from us, so we learn it. */
> >      if (rd->rtm_protocol != RTPROT_OVN) {
> > +        if (prefix_is_link_local(&rd->rta_dst, rd->plen)) {
> > +            return;
> > +        }
> > +        for (int i = 0; i < rd->n_nexthops; i++) {
> > +            if (ipv6_is_zero(&rd->nexthops[i].rta_gw)) {
> > +                /* This is most likely an address on the local link.
> > +                 * As we just want to learn remote routes we do not need 
> > it.*/
> > +                continue;
> > +            }
> > +            struct re_nl_received_route_node *rr = xzalloc(sizeof *rr);
> > +            hmap_insert(handle_data->learned_routes, &rr->hmap_node,
> > +                        route_hash(&rd->rta_dst, rd->plen));
> > +            rr->addr = rd->rta_dst;
> > +            rr->plen = rd->plen;
> > +            rr->nexthop = rd->nexthops[i].rta_gw;
> > +        }
> >          return;
> >      }
> >
> > @@ -212,7 +248,7 @@ handle_route_msg_delete_routes(const struct 
> > route_table_msg *msg, void *data)
> >
> >  void
> >  re_nl_sync_routes(uint32_t table_id,
> > -                  const struct hmap *routes)
> > +                  const struct hmap *routes, struct hmap *learned_routes)
> >  {
> >      struct advertise_route_entry *ar;
> >      HMAP_FOR_EACH (ar, node, routes) {
> > @@ -224,6 +260,7 @@ re_nl_sync_routes(uint32_t table_id,
> >       * in the system. */
> >      struct route_msg_handle_data data = {
> >          .routes = routes,
> > +        .learned_routes = learned_routes,
> >      };
> >      route_table_dump_one_table(NULL, table_id, 
> > handle_route_msg_delete_routes,
> >                                 &data);
> > diff --git a/controller/route-exchange-netlink.h 
> > b/controller/route-exchange-netlink.h
> > index f87ebd75d..566b38fde 100644
> > --- a/controller/route-exchange-netlink.h
> > +++ b/controller/route-exchange-netlink.h
> > @@ -16,6 +16,8 @@
> >  #define ROUTE_EXCHANGE_NETLINK_H 1
> >
> >  #include <stdint.h>
> > +#include "openvswitch/hmap.h"
> > +#include <netinet/in.h>
> >
> >  /* This value is arbitrary but currently unused.
> >   * See 
> > https://github.com/iproute2/iproute2/blob/main/etc/iproute2/rt_protos */
> > @@ -24,6 +26,13 @@
> >  struct in6_addr;
> >  struct hmap;
> >
> > +struct re_nl_received_route_node {
> > +    struct hmap_node hmap_node;
> > +    struct in6_addr addr;
> > +    unsigned int plen;
> > +    struct in6_addr nexthop;
> > +};
> > +
> >  int re_nl_create_vrf(const char *ifname, uint32_t table_id);
> >  int re_nl_delete_vrf(const char *ifname);
> >
> > @@ -34,7 +43,9 @@ int re_nl_delete_route(uint32_t table_id, const struct 
> > in6_addr *dst,
> >
> >  void re_nl_dump(uint32_t table_id);
> >
> > +void re_nl_received_routes_destroy(struct hmap *);
> >  void re_nl_sync_routes(uint32_t table_id,
> > -                       const struct hmap *host_routes);
> > +                       const struct hmap *host_routes,
> > +                       struct hmap *learned_routes);
> >
> >  #endif /* route-exchange-netlink.h */
> > diff --git a/controller/route-exchange.c b/controller/route-exchange.c
> > index 86ccc92cb..41fea6398 100644
> > --- a/controller/route-exchange.c
> > +++ b/controller/route-exchange.c
> > @@ -34,6 +34,139 @@ static struct vlog_rate_limit rl = 
> > VLOG_RATE_LIMIT_INIT(5, 20);
> >
> >  static struct sset _maintained_vrfs = SSET_INITIALIZER(&_maintained_vrfs);
> >
> > +struct route_entry {
> > +    struct hmap_node hmap_node;
> > +
> > +    const struct sbrec_route *sb_route;
> > +
> > +    const struct sbrec_datapath_binding *sb_db;
> > +    char *logical_port;
> > +    char *ip_prefix;
> > +    char *nexthop;
> > +    bool stale;
> > +};
> > +
> > +static struct route_entry *
> > +route_alloc_entry(struct hmap *routes,
> > +                  const struct sbrec_datapath_binding *sb_db,
> > +                  const char *logical_port,
> > +                  const char *ip_prefix, const char *nexthop)
> > +{
> > +    struct route_entry *route_e = xzalloc(sizeof *route_e);
> > +
> > +    route_e->sb_db = sb_db;
> > +    route_e->logical_port = xstrdup(logical_port);
> > +    route_e->ip_prefix = xstrdup(ip_prefix);
> > +    route_e->nexthop = xstrdup(nexthop);
> > +    route_e->stale = false;
> > +    uint32_t hash = uuid_hash(&sb_db->header_.uuid);
> > +    hash = hash_string(logical_port, hash);
> > +    hash = hash_string(ip_prefix, hash);
> > +    hmap_insert(routes, &route_e->hmap_node, hash);
> > +
> > +    return route_e;
> > +}
> > +
> > +static struct route_entry *
> > +route_lookup_or_add(struct hmap *route_map,
> > +                    const struct sbrec_datapath_binding *sb_db,
> > +                    const char *logical_port, const char *ip_prefix,
> > +                    const char *nexthop)
> > +{
> > +    struct route_entry *route_e;
> > +    uint32_t hash;
> > +
> > +    hash = uuid_hash(&sb_db->header_.uuid);
> > +    hash = hash_string(logical_port, hash);
> > +    hash = hash_string(ip_prefix, hash);
> > +    HMAP_FOR_EACH_WITH_HASH (route_e, hmap_node, hash, route_map) {
> > +        if (!strcmp(route_e->nexthop, nexthop)) {
> > +            return route_e;
> > +        }
> > +    }
> > +
> > +    route_e = route_alloc_entry(route_map, sb_db,
> > +                                 logical_port, ip_prefix, nexthop);
> > +    return route_e;
> > +}
> > +
> > +static void
> > +route_erase_entry(struct route_entry *route_e)
> > +{
> > +    free(route_e->logical_port);
> > +    free(route_e->ip_prefix);
> > +    free(route_e->nexthop);
> > +    free(route_e);
> > +}
> > +
> > +static void
> > +sb_sync_learned_routes(const struct sbrec_datapath_binding *datapath,
> > +                       const struct hmap *learned_routes,
> > +                       const struct sset *bound_ports,
> > +                       struct ovsdb_idl_txn *ovnsb_idl_txn,
> > +                       struct ovsdb_idl_index *sbrec_route_by_datapath)
> > +{
> > +    struct hmap sync_routes = HMAP_INITIALIZER(&sync_routes);
> > +    struct route_entry *route_e;
> > +    const struct sbrec_route *sb_route;
> > +
> > +    struct sbrec_route *filter =
> > +            sbrec_route_index_init_row(sbrec_route_by_datapath);
> > +    sbrec_route_index_set_datapath(filter, datapath);
> > +    SBREC_ROUTE_FOR_EACH_EQUAL (sb_route, filter, sbrec_route_by_datapath) 
> > {
> > +        if (strcmp(sb_route->type, "receive")) {
> > +            continue;
> > +        }
> > +        /* If the port is not local we don't care about it.
> > +         * Some other ovn-controller will handle it. */
> > +        if (!sset_contains(bound_ports, sb_route->logical_port)) {
> > +            continue;
> > +        }
> > +        route_e = route_alloc_entry(&sync_routes,
> > +                                    sb_route->datapath,
> > +                                    sb_route->logical_port,
> > +                                    sb_route->ip_prefix,
> > +                                    sb_route->nexthop);
> > +        route_e->stale = true;
> > +        route_e->sb_route = sb_route;
> > +    }
> > +    sbrec_route_index_destroy_row(filter);
> > +
> > +    struct re_nl_received_route_node *learned_route;
> > +    HMAP_FOR_EACH (learned_route, hmap_node, learned_routes) {
> > +        char *ip_prefix = normalize_v46_prefix(&learned_route->addr,
> > +                                               learned_route->plen);
> > +        char *nexthop = normalize_v46(&learned_route->nexthop);
> > +
> > +        const char *logical_port;
> > +        SSET_FOR_EACH (logical_port, bound_ports) {
> > +            route_e = route_lookup_or_add(&sync_routes,
> > +                datapath,
> > +                logical_port, ip_prefix, nexthop);
> > +            route_e->stale = false;
> > +            if (!route_e->sb_route) {
> > +                sb_route = sbrec_route_insert(ovnsb_idl_txn);
> > +                sbrec_route_set_datapath(sb_route, datapath);
> > +                sbrec_route_set_logical_port(sb_route, logical_port);
> > +                sbrec_route_set_ip_prefix(sb_route, ip_prefix);
> > +                sbrec_route_set_nexthop(sb_route, nexthop);
> > +                sbrec_route_set_type(sb_route, "receive");
> > +                route_e->sb_route = sb_route;
> > +            }
> > +        }
> > +        free(ip_prefix);
> > +        free(nexthop);
> > +    }
> > +
> > +    HMAP_FOR_EACH_POP (route_e, hmap_node, &sync_routes) {
> > +        if (route_e->stale) {
> > +            sbrec_route_delete(route_e->sb_route);
> > +        }
> > +        route_erase_entry(route_e);
> > +    }
> > +    hmap_destroy(&sync_routes);
> > +}
> > +
> >  void
> >  route_exchange_run(struct route_exchange_ctx_in *r_ctx_in,
> >                     struct route_exchange_ctx_out *r_ctx_out OVS_UNUSED)
> > @@ -57,12 +190,21 @@ route_exchange_run(struct route_exchange_ctx_in 
> > *r_ctx_in,
> >                               "%"PRId64": %s.",
> >                               vrf_name, ad->key,
> >                               ovs_strerror(error));
> > -                continue;
> > +                goto out;
> >              }
> >              sset_add(&_maintained_vrfs, vrf_name);
> >          }
> >
> > -        re_nl_sync_routes(ad->key, &ad->routes);
> > +        re_nl_sync_routes(ad->key, &ad->routes,
> > +                          &received_routes);
> > +
> > +        sb_sync_learned_routes(ad->db, &received_routes,
> > +                               &ad->bound_ports,
> > +                               r_ctx_in->ovnsb_idl_txn,
> > +                               r_ctx_in->sbrec_route_by_datapath);
>
> Calling these here does not seem right to me, it would cause the
> ovn-controller to only learn routes if it has already exported routes,
> regardless of configuration. There are use cases where we would want
> learning to happen even when there is nothing to export yet.
>
> --
> Frode Nordahl
>
>
>
> > +
> > +out:
> > +        re_nl_received_routes_destroy(&received_routes);
> >      }
> >
> >      /* Remove VRFs previously maintained by us not found in the above 
> > loop. */
> > diff --git a/controller/route-exchange.h b/controller/route-exchange.h
> > index 2c2a9ab84..d19e83403 100644
> > --- a/controller/route-exchange.h
> > +++ b/controller/route-exchange.h
> > @@ -18,6 +18,8 @@
> >  #include <stdbool.h>
> >
> >  struct route_exchange_ctx_in {
> > +    struct ovsdb_idl_txn *ovnsb_idl_txn;
> > +    struct ovsdb_idl_index *sbrec_route_by_datapath;
> >      /* Contains struct advertise_datapath_entry */
> >      struct hmap *announce_routes;
> >  };
> > diff --git a/lib/ovn-util.c b/lib/ovn-util.c
> > index 55a081ab1..5d0db1a5a 100644
> > --- a/lib/ovn-util.c
> > +++ b/lib/ovn-util.c
> > @@ -802,6 +802,16 @@ normalize_v46_prefix(const struct in6_addr *prefix, 
> > unsigned int plen)
> >      }
> >  }
> >
> > +char *
> > +normalize_v46(const struct in6_addr *prefix)
> > +{
> > +    if (IN6_IS_ADDR_V4MAPPED(prefix)) {
> > +        return normalize_ipv4_prefix(in6_addr_get_mapped_ipv4(prefix), 32);
> > +    } else {
> > +        return normalize_ipv6_prefix(prefix, 128);
> > +    }
> > +}
> > +
> >  char *
> >  str_tolower(const char *orig)
> >  {
> > diff --git a/lib/ovn-util.h b/lib/ovn-util.h
> > index da6ad88bc..88b0d45f9 100644
> > --- a/lib/ovn-util.h
> > +++ b/lib/ovn-util.h
> > @@ -205,6 +205,7 @@ bool ip46_parse(const char *ip_str, struct in6_addr 
> > *ip);
> >  char *normalize_ipv4_prefix(ovs_be32 ipv4, unsigned int plen);
> >  char *normalize_ipv6_prefix(const struct in6_addr *ipv6, unsigned int 
> > plen);
> >  char *normalize_v46_prefix(const struct in6_addr *prefix, unsigned int 
> > plen);
> > +char *normalize_v46(const struct in6_addr *prefix);
> >
> >  /* Returns a lowercase copy of orig.
> >   * Caller must free the returned string.
> > --
> > 2.47.0
> >
> > _______________________________________________
> > dev mailing list
> > [email protected]
> > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to