Hello, Felix,

This is not a full review of this patch, but a statement below caught
my attention and I would like to discuss it further with you.

I was also unable to test due to assumptions made about the use of
distributed gateway ports in previous patches in this series, also
added a note in-line on the gating on export before learning.

On Thu, Oct 24, 2024 at 5:54 PM Felix Huettner via dev
<[email protected]> wrote:
>
> we now learn all routes inside the vrfs we also advertise routes on.
> The routes are then placed in the southbound database for processing by
> northd.
>
> Routes are only selected if matching the following rules:
> 1. must not be a route advertised by us
> 2. must not be a local connected route (as we want to not learn transfer
>    networks)
> 3. the prefix must not be a link local address
>
> However we can not reliably determine over which link we learned the
> route in case we have two LRPs of the same LR on the same chassis.
> For now we just assume the routes on both links are identical.
> Future commits will refine this.

From my perspective, I think it would be worth spending some more time
on this point, as it is an essential building block for required
functionality such as supporting ECMP out of the host as a replacement
for bonds.

Looking at this quickly in a lab environment, I can see that the
routing protocol daemon in use labels learned and redistributed routes
with an interface:

    $ sudo ip -6 route show table 10
    ...
    default via fe80::216:3eff:fec4:63dc dev eth2-bgp proto ra metric
1024 expires 28sec hoplimit 64 pref medium
    default via fe80::216:3eff:fe5c:7673 dev eth1-bgp proto ra metric
1024 expires 28sec hoplimit 64 pref medium

From your comment above, where does the reliability issue you have
seen come from? Does the routing protocol daemon not always label
routes with source interface, or is it just missing in the current
implementation on the OVS/OVN side?

> Signed-off-by: Felix Huettner <[email protected]>
> ---
>  controller/ovn-controller.c         |   8 ++
>  controller/route-exchange-netlink.c |  41 +++++++-
>  controller/route-exchange-netlink.h |  13 ++-
>  controller/route-exchange.c         | 146 +++++++++++++++++++++++++++-
>  controller/route-exchange.h         |   2 +
>  lib/ovn-util.c                      |  10 ++
>  lib/ovn-util.h                      |   1 +
>  7 files changed, 216 insertions(+), 5 deletions(-)
>
> diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
> index 052ad8812..141d50fc0 100644
> --- a/controller/ovn-controller.c
> +++ b/controller/ovn-controller.c
> @@ -4925,10 +4925,16 @@ route_runtime_data_handler(struct engine_node *node, 
> void *data)
>  static void
>  en_route_exchange_run(struct engine_node *node, void *data OVS_UNUSED)
>  {
> +    struct ovsdb_idl_index *sbrec_route_by_datapath =
> +        engine_ovsdb_node_get_index(
> +            engine_get_input("SB_route", node), "datapath");
> +
>      struct ed_type_route *route_data =
>          engine_get_input_data("route", node);
>
>      struct route_exchange_ctx_in r_ctx_in = {
> +        .ovnsb_idl_txn = engine_get_context()->ovnsb_idl_txn,
> +        .sbrec_route_by_datapath = sbrec_route_by_datapath,
>          .announce_routes = &route_data->announce_routes,
>      };
>
> @@ -5277,6 +5283,8 @@ main(int argc, char *argv[])
>      engine_add_input(&en_route, &en_sb_route,
>                       engine_noop_handler);
>      engine_add_input(&en_route_exchange, &en_route, NULL);
> +    engine_add_input(&en_route_exchange, &en_sb_route,
> +                     engine_noop_handler);
>
>      engine_add_input(&en_addr_sets, &en_sb_address_set,
>                       addr_sets_sb_address_set_handler);
> diff --git a/controller/route-exchange-netlink.c 
> b/controller/route-exchange-netlink.c
> index 84c6baa15..ee33d08d2 100644
> --- a/controller/route-exchange-netlink.c
> +++ b/controller/route-exchange-netlink.c
> @@ -26,6 +26,7 @@
>  #include "openvswitch/ofpbuf.h"
>  #include "openvswitch/vlog.h"
>  #include "packets.h"
> +#include "ovn-util.h"
>  #include "route-table.h"
>  #include "route.h"
>
> @@ -171,8 +172,27 @@ re_nl_delete_route(uint32_t table_id, const struct 
> in6_addr *dst,
>      return modify_route(RTM_DELROUTE, 0, table_id, dst, plen);
>  }
>
> +static uint32_t
> +route_hash(const struct in6_addr *dst, unsigned int plen)
> +{
> +    uint32_t hash = hash_bytes(dst->s6_addr, 16, 0);
> +    return hash_int(plen, hash);
> +}
> +
> +void
> +re_nl_received_routes_destroy(struct hmap *host_routes)
> +{
> +    struct re_nl_received_route_node *rr;
> +    HMAP_FOR_EACH_SAFE (rr, hmap_node, host_routes) {
> +        hmap_remove(host_routes, &rr->hmap_node);
> +        free(rr);
> +    }
> +    hmap_destroy(host_routes);
> +}
> +
>  struct route_msg_handle_data {
>      const struct hmap *routes;
> +    struct hmap *learned_routes;
>  };
>
>  static void
> @@ -184,8 +204,24 @@ handle_route_msg_delete_routes(const struct 
> route_table_msg *msg, void *data)
>      struct advertise_route_entry *ar;
>      int err;
>
> -    /* This route is not from us, we should not touch it. */
> +    /* This route is not from us, so we learn it. */
>      if (rd->rtm_protocol != RTPROT_OVN) {
> +        if (prefix_is_link_local(&rd->rta_dst, rd->plen)) {
> +            return;
> +        }
> +        for (int i = 0; i < rd->n_nexthops; i++) {
> +            if (ipv6_is_zero(&rd->nexthops[i].rta_gw)) {
> +                /* This is most likely an address on the local link.
> +                 * As we just want to learn remote routes we do not need 
> it.*/
> +                continue;
> +            }
> +            struct re_nl_received_route_node *rr = xzalloc(sizeof *rr);
> +            hmap_insert(handle_data->learned_routes, &rr->hmap_node,
> +                        route_hash(&rd->rta_dst, rd->plen));
> +            rr->addr = rd->rta_dst;
> +            rr->plen = rd->plen;
> +            rr->nexthop = rd->nexthops[i].rta_gw;
> +        }
>          return;
>      }
>
> @@ -212,7 +248,7 @@ handle_route_msg_delete_routes(const struct 
> route_table_msg *msg, void *data)
>
>  void
>  re_nl_sync_routes(uint32_t table_id,
> -                  const struct hmap *routes)
> +                  const struct hmap *routes, struct hmap *learned_routes)
>  {
>      struct advertise_route_entry *ar;
>      HMAP_FOR_EACH (ar, node, routes) {
> @@ -224,6 +260,7 @@ re_nl_sync_routes(uint32_t table_id,
>       * in the system. */
>      struct route_msg_handle_data data = {
>          .routes = routes,
> +        .learned_routes = learned_routes,
>      };
>      route_table_dump_one_table(NULL, table_id, 
> handle_route_msg_delete_routes,
>                                 &data);
> diff --git a/controller/route-exchange-netlink.h 
> b/controller/route-exchange-netlink.h
> index f87ebd75d..566b38fde 100644
> --- a/controller/route-exchange-netlink.h
> +++ b/controller/route-exchange-netlink.h
> @@ -16,6 +16,8 @@
>  #define ROUTE_EXCHANGE_NETLINK_H 1
>
>  #include <stdint.h>
> +#include "openvswitch/hmap.h"
> +#include <netinet/in.h>
>
>  /* This value is arbitrary but currently unused.
>   * See https://github.com/iproute2/iproute2/blob/main/etc/iproute2/rt_protos 
> */
> @@ -24,6 +26,13 @@
>  struct in6_addr;
>  struct hmap;
>
> +struct re_nl_received_route_node {
> +    struct hmap_node hmap_node;
> +    struct in6_addr addr;
> +    unsigned int plen;
> +    struct in6_addr nexthop;
> +};
> +
>  int re_nl_create_vrf(const char *ifname, uint32_t table_id);
>  int re_nl_delete_vrf(const char *ifname);
>
> @@ -34,7 +43,9 @@ int re_nl_delete_route(uint32_t table_id, const struct 
> in6_addr *dst,
>
>  void re_nl_dump(uint32_t table_id);
>
> +void re_nl_received_routes_destroy(struct hmap *);
>  void re_nl_sync_routes(uint32_t table_id,
> -                       const struct hmap *host_routes);
> +                       const struct hmap *host_routes,
> +                       struct hmap *learned_routes);
>
>  #endif /* route-exchange-netlink.h */
> diff --git a/controller/route-exchange.c b/controller/route-exchange.c
> index 86ccc92cb..41fea6398 100644
> --- a/controller/route-exchange.c
> +++ b/controller/route-exchange.c
> @@ -34,6 +34,139 @@ static struct vlog_rate_limit rl = 
> VLOG_RATE_LIMIT_INIT(5, 20);
>
>  static struct sset _maintained_vrfs = SSET_INITIALIZER(&_maintained_vrfs);
>
> +struct route_entry {
> +    struct hmap_node hmap_node;
> +
> +    const struct sbrec_route *sb_route;
> +
> +    const struct sbrec_datapath_binding *sb_db;
> +    char *logical_port;
> +    char *ip_prefix;
> +    char *nexthop;
> +    bool stale;
> +};
> +
> +static struct route_entry *
> +route_alloc_entry(struct hmap *routes,
> +                  const struct sbrec_datapath_binding *sb_db,
> +                  const char *logical_port,
> +                  const char *ip_prefix, const char *nexthop)
> +{
> +    struct route_entry *route_e = xzalloc(sizeof *route_e);
> +
> +    route_e->sb_db = sb_db;
> +    route_e->logical_port = xstrdup(logical_port);
> +    route_e->ip_prefix = xstrdup(ip_prefix);
> +    route_e->nexthop = xstrdup(nexthop);
> +    route_e->stale = false;
> +    uint32_t hash = uuid_hash(&sb_db->header_.uuid);
> +    hash = hash_string(logical_port, hash);
> +    hash = hash_string(ip_prefix, hash);
> +    hmap_insert(routes, &route_e->hmap_node, hash);
> +
> +    return route_e;
> +}
> +
> +static struct route_entry *
> +route_lookup_or_add(struct hmap *route_map,
> +                    const struct sbrec_datapath_binding *sb_db,
> +                    const char *logical_port, const char *ip_prefix,
> +                    const char *nexthop)
> +{
> +    struct route_entry *route_e;
> +    uint32_t hash;
> +
> +    hash = uuid_hash(&sb_db->header_.uuid);
> +    hash = hash_string(logical_port, hash);
> +    hash = hash_string(ip_prefix, hash);
> +    HMAP_FOR_EACH_WITH_HASH (route_e, hmap_node, hash, route_map) {
> +        if (!strcmp(route_e->nexthop, nexthop)) {
> +            return route_e;
> +        }
> +    }
> +
> +    route_e = route_alloc_entry(route_map, sb_db,
> +                                 logical_port, ip_prefix, nexthop);
> +    return route_e;
> +}
> +
> +static void
> +route_erase_entry(struct route_entry *route_e)
> +{
> +    free(route_e->logical_port);
> +    free(route_e->ip_prefix);
> +    free(route_e->nexthop);
> +    free(route_e);
> +}
> +
> +static void
> +sb_sync_learned_routes(const struct sbrec_datapath_binding *datapath,
> +                       const struct hmap *learned_routes,
> +                       const struct sset *bound_ports,
> +                       struct ovsdb_idl_txn *ovnsb_idl_txn,
> +                       struct ovsdb_idl_index *sbrec_route_by_datapath)
> +{
> +    struct hmap sync_routes = HMAP_INITIALIZER(&sync_routes);
> +    struct route_entry *route_e;
> +    const struct sbrec_route *sb_route;
> +
> +    struct sbrec_route *filter =
> +            sbrec_route_index_init_row(sbrec_route_by_datapath);
> +    sbrec_route_index_set_datapath(filter, datapath);
> +    SBREC_ROUTE_FOR_EACH_EQUAL (sb_route, filter, sbrec_route_by_datapath) {
> +        if (strcmp(sb_route->type, "receive")) {
> +            continue;
> +        }
> +        /* If the port is not local we don't care about it.
> +         * Some other ovn-controller will handle it. */
> +        if (!sset_contains(bound_ports, sb_route->logical_port)) {
> +            continue;
> +        }
> +        route_e = route_alloc_entry(&sync_routes,
> +                                    sb_route->datapath,
> +                                    sb_route->logical_port,
> +                                    sb_route->ip_prefix,
> +                                    sb_route->nexthop);
> +        route_e->stale = true;
> +        route_e->sb_route = sb_route;
> +    }
> +    sbrec_route_index_destroy_row(filter);
> +
> +    struct re_nl_received_route_node *learned_route;
> +    HMAP_FOR_EACH (learned_route, hmap_node, learned_routes) {
> +        char *ip_prefix = normalize_v46_prefix(&learned_route->addr,
> +                                               learned_route->plen);
> +        char *nexthop = normalize_v46(&learned_route->nexthop);
> +
> +        const char *logical_port;
> +        SSET_FOR_EACH (logical_port, bound_ports) {
> +            route_e = route_lookup_or_add(&sync_routes,
> +                datapath,
> +                logical_port, ip_prefix, nexthop);
> +            route_e->stale = false;
> +            if (!route_e->sb_route) {
> +                sb_route = sbrec_route_insert(ovnsb_idl_txn);
> +                sbrec_route_set_datapath(sb_route, datapath);
> +                sbrec_route_set_logical_port(sb_route, logical_port);
> +                sbrec_route_set_ip_prefix(sb_route, ip_prefix);
> +                sbrec_route_set_nexthop(sb_route, nexthop);
> +                sbrec_route_set_type(sb_route, "receive");
> +                route_e->sb_route = sb_route;
> +            }
> +        }
> +        free(ip_prefix);
> +        free(nexthop);
> +    }
> +
> +    HMAP_FOR_EACH_POP (route_e, hmap_node, &sync_routes) {
> +        if (route_e->stale) {
> +            sbrec_route_delete(route_e->sb_route);
> +        }
> +        route_erase_entry(route_e);
> +    }
> +    hmap_destroy(&sync_routes);
> +}
> +
>  void
>  route_exchange_run(struct route_exchange_ctx_in *r_ctx_in,
>                     struct route_exchange_ctx_out *r_ctx_out OVS_UNUSED)
> @@ -57,12 +190,21 @@ route_exchange_run(struct route_exchange_ctx_in 
> *r_ctx_in,
>                               "%"PRId64": %s.",
>                               vrf_name, ad->key,
>                               ovs_strerror(error));
> -                continue;
> +                goto out;
>              }
>              sset_add(&_maintained_vrfs, vrf_name);
>          }
>
> -        re_nl_sync_routes(ad->key, &ad->routes);
> +        re_nl_sync_routes(ad->key, &ad->routes,
> +                          &received_routes);
> +
> +        sb_sync_learned_routes(ad->db, &received_routes,
> +                               &ad->bound_ports,
> +                               r_ctx_in->ovnsb_idl_txn,
> +                               r_ctx_in->sbrec_route_by_datapath);

Calling these here does not seem right to me, it would cause the
ovn-controller to only learn routes if it has already exported routes,
regardless of configuration. There are use cases where we would want
learning to happen even when there is nothing to export yet.

--
Frode Nordahl



> +
> +out:
> +        re_nl_received_routes_destroy(&received_routes);
>      }
>
>      /* Remove VRFs previously maintained by us not found in the above loop. 
> */
> diff --git a/controller/route-exchange.h b/controller/route-exchange.h
> index 2c2a9ab84..d19e83403 100644
> --- a/controller/route-exchange.h
> +++ b/controller/route-exchange.h
> @@ -18,6 +18,8 @@
>  #include <stdbool.h>
>
>  struct route_exchange_ctx_in {
> +    struct ovsdb_idl_txn *ovnsb_idl_txn;
> +    struct ovsdb_idl_index *sbrec_route_by_datapath;
>      /* Contains struct advertise_datapath_entry */
>      struct hmap *announce_routes;
>  };
> diff --git a/lib/ovn-util.c b/lib/ovn-util.c
> index 55a081ab1..5d0db1a5a 100644
> --- a/lib/ovn-util.c
> +++ b/lib/ovn-util.c
> @@ -802,6 +802,16 @@ normalize_v46_prefix(const struct in6_addr *prefix, 
> unsigned int plen)
>      }
>  }
>
> +char *
> +normalize_v46(const struct in6_addr *prefix)
> +{
> +    if (IN6_IS_ADDR_V4MAPPED(prefix)) {
> +        return normalize_ipv4_prefix(in6_addr_get_mapped_ipv4(prefix), 32);
> +    } else {
> +        return normalize_ipv6_prefix(prefix, 128);
> +    }
> +}
> +
>  char *
>  str_tolower(const char *orig)
>  {
> diff --git a/lib/ovn-util.h b/lib/ovn-util.h
> index da6ad88bc..88b0d45f9 100644
> --- a/lib/ovn-util.h
> +++ b/lib/ovn-util.h
> @@ -205,6 +205,7 @@ bool ip46_parse(const char *ip_str, struct in6_addr *ip);
>  char *normalize_ipv4_prefix(ovs_be32 ipv4, unsigned int plen);
>  char *normalize_ipv6_prefix(const struct in6_addr *ipv6, unsigned int plen);
>  char *normalize_v46_prefix(const struct in6_addr *prefix, unsigned int plen);
> +char *normalize_v46(const struct in6_addr *prefix);
>
>  /* Returns a lowercase copy of orig.
>   * Caller must free the returned string.
> --
> 2.47.0
>
> _______________________________________________
> dev mailing list
> [email protected]
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to