On Tue, Nov 19, 2024 at 9:11 AM Frode Nordahl <[email protected]> wrote: > > Hello, Felix, > > This is not a full review of this patch, but a statement below caught > my attention and I would like to discuss it further with you. > > I was also unable to test due to assumptions made about the use of > distributed gateway ports in previous patches in this series, also > added a note in-line on the gating on export before learning. > > On Thu, Oct 24, 2024 at 5:54 PM Felix Huettner via dev > <[email protected]> wrote: > > > > we now learn all routes inside the vrfs we also advertise routes on. > > The routes are then placed in the southbound database for processing by > > northd. > > > > Routes are only selected if matching the following rules: > > 1. must not be a route advertised by us > > 2. must not be a local connected route (as we want to not learn transfer > > networks) > > 3. the prefix must not be a link local address > > > > However we can not reliably determine over which link we learned the > > route in case we have two LRPs of the same LR on the same chassis. > > For now we just assume the routes on both links are identical. > > Future commits will refine this. > > From my perspective, I think it would be worth spending some more time > on this point, as it is an essential building block for required > functionality such as supporting ECMP out of the host as a replacement > for bonds. > > Looking at this quickly in a lab environment, I can see that the > routing protocol daemon in use labels learned and redistributed routes > with an interface: > > $ sudo ip -6 route show table 10 > ... > default via fe80::216:3eff:fec4:63dc dev eth2-bgp proto ra metric > 1024 expires 28sec hoplimit 64 pref medium > default via fe80::216:3eff:fe5c:7673 dev eth1-bgp proto ra metric > 1024 expires 28sec hoplimit 64 pref medium
Ha, these actually appear to come from router advertisements and not the routing protocol daemon! > From your comment above, where does the reliability issue you have > seen come from? Does the routing protocol daemon not always label > routes with source interface, or is it just missing in the current > implementation on the OVS/OVN side? > > > Signed-off-by: Felix Huettner <[email protected]> > > --- > > controller/ovn-controller.c | 8 ++ > > controller/route-exchange-netlink.c | 41 +++++++- > > controller/route-exchange-netlink.h | 13 ++- > > controller/route-exchange.c | 146 +++++++++++++++++++++++++++- > > controller/route-exchange.h | 2 + > > lib/ovn-util.c | 10 ++ > > lib/ovn-util.h | 1 + > > 7 files changed, 216 insertions(+), 5 deletions(-) > > > > diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c > > index 052ad8812..141d50fc0 100644 > > --- a/controller/ovn-controller.c > > +++ b/controller/ovn-controller.c > > @@ -4925,10 +4925,16 @@ route_runtime_data_handler(struct engine_node > > *node, void *data) > > static void > > en_route_exchange_run(struct engine_node *node, void *data OVS_UNUSED) > > { > > + struct ovsdb_idl_index *sbrec_route_by_datapath = > > + engine_ovsdb_node_get_index( > > + engine_get_input("SB_route", node), "datapath"); > > + > > struct ed_type_route *route_data = > > engine_get_input_data("route", node); > > > > struct route_exchange_ctx_in r_ctx_in = { > > + .ovnsb_idl_txn = engine_get_context()->ovnsb_idl_txn, > > + .sbrec_route_by_datapath = sbrec_route_by_datapath, > > .announce_routes = &route_data->announce_routes, > > }; > > > > @@ -5277,6 +5283,8 @@ main(int argc, char *argv[]) > > engine_add_input(&en_route, &en_sb_route, > > engine_noop_handler); > > engine_add_input(&en_route_exchange, &en_route, NULL); > > + engine_add_input(&en_route_exchange, &en_sb_route, > > + engine_noop_handler); > > > > engine_add_input(&en_addr_sets, &en_sb_address_set, > > addr_sets_sb_address_set_handler); > > diff --git a/controller/route-exchange-netlink.c > > b/controller/route-exchange-netlink.c > > index 84c6baa15..ee33d08d2 100644 > > --- a/controller/route-exchange-netlink.c > > +++ b/controller/route-exchange-netlink.c > > @@ -26,6 +26,7 @@ > > #include "openvswitch/ofpbuf.h" > > #include "openvswitch/vlog.h" > > #include "packets.h" > > +#include "ovn-util.h" > > #include "route-table.h" > > #include "route.h" > > > > @@ -171,8 +172,27 @@ re_nl_delete_route(uint32_t table_id, const struct > > in6_addr *dst, > > return modify_route(RTM_DELROUTE, 0, table_id, dst, plen); > > } > > > > +static uint32_t > > +route_hash(const struct in6_addr *dst, unsigned int plen) > > +{ > > + uint32_t hash = hash_bytes(dst->s6_addr, 16, 0); > > + return hash_int(plen, hash); > > +} > > + > > +void > > +re_nl_received_routes_destroy(struct hmap *host_routes) > > +{ > > + struct re_nl_received_route_node *rr; > > + HMAP_FOR_EACH_SAFE (rr, hmap_node, host_routes) { > > + hmap_remove(host_routes, &rr->hmap_node); > > + free(rr); > > + } > > + hmap_destroy(host_routes); > > +} > > + > > struct route_msg_handle_data { > > const struct hmap *routes; > > + struct hmap *learned_routes; > > }; > > > > static void > > @@ -184,8 +204,24 @@ handle_route_msg_delete_routes(const struct > > route_table_msg *msg, void *data) > > struct advertise_route_entry *ar; > > int err; > > > > - /* This route is not from us, we should not touch it. */ > > + /* This route is not from us, so we learn it. */ > > if (rd->rtm_protocol != RTPROT_OVN) { > > + if (prefix_is_link_local(&rd->rta_dst, rd->plen)) { > > + return; > > + } > > + for (int i = 0; i < rd->n_nexthops; i++) { > > + if (ipv6_is_zero(&rd->nexthops[i].rta_gw)) { > > + /* This is most likely an address on the local link. > > + * As we just want to learn remote routes we do not need > > it.*/ > > + continue; > > + } > > + struct re_nl_received_route_node *rr = xzalloc(sizeof *rr); > > + hmap_insert(handle_data->learned_routes, &rr->hmap_node, > > + route_hash(&rd->rta_dst, rd->plen)); > > + rr->addr = rd->rta_dst; > > + rr->plen = rd->plen; > > + rr->nexthop = rd->nexthops[i].rta_gw; > > + } > > return; > > } > > > > @@ -212,7 +248,7 @@ handle_route_msg_delete_routes(const struct > > route_table_msg *msg, void *data) > > > > void > > re_nl_sync_routes(uint32_t table_id, > > - const struct hmap *routes) > > + const struct hmap *routes, struct hmap *learned_routes) > > { > > struct advertise_route_entry *ar; > > HMAP_FOR_EACH (ar, node, routes) { > > @@ -224,6 +260,7 @@ re_nl_sync_routes(uint32_t table_id, > > * in the system. */ > > struct route_msg_handle_data data = { > > .routes = routes, > > + .learned_routes = learned_routes, > > }; > > route_table_dump_one_table(NULL, table_id, > > handle_route_msg_delete_routes, > > &data); > > diff --git a/controller/route-exchange-netlink.h > > b/controller/route-exchange-netlink.h > > index f87ebd75d..566b38fde 100644 > > --- a/controller/route-exchange-netlink.h > > +++ b/controller/route-exchange-netlink.h > > @@ -16,6 +16,8 @@ > > #define ROUTE_EXCHANGE_NETLINK_H 1 > > > > #include <stdint.h> > > +#include "openvswitch/hmap.h" > > +#include <netinet/in.h> > > > > /* This value is arbitrary but currently unused. > > * See > > https://github.com/iproute2/iproute2/blob/main/etc/iproute2/rt_protos */ > > @@ -24,6 +26,13 @@ > > struct in6_addr; > > struct hmap; > > > > +struct re_nl_received_route_node { > > + struct hmap_node hmap_node; > > + struct in6_addr addr; > > + unsigned int plen; > > + struct in6_addr nexthop; > > +}; > > + > > int re_nl_create_vrf(const char *ifname, uint32_t table_id); > > int re_nl_delete_vrf(const char *ifname); > > > > @@ -34,7 +43,9 @@ int re_nl_delete_route(uint32_t table_id, const struct > > in6_addr *dst, > > > > void re_nl_dump(uint32_t table_id); > > > > +void re_nl_received_routes_destroy(struct hmap *); > > void re_nl_sync_routes(uint32_t table_id, > > - const struct hmap *host_routes); > > + const struct hmap *host_routes, > > + struct hmap *learned_routes); > > > > #endif /* route-exchange-netlink.h */ > > diff --git a/controller/route-exchange.c b/controller/route-exchange.c > > index 86ccc92cb..41fea6398 100644 > > --- a/controller/route-exchange.c > > +++ b/controller/route-exchange.c > > @@ -34,6 +34,139 @@ static struct vlog_rate_limit rl = > > VLOG_RATE_LIMIT_INIT(5, 20); > > > > static struct sset _maintained_vrfs = SSET_INITIALIZER(&_maintained_vrfs); > > > > +struct route_entry { > > + struct hmap_node hmap_node; > > + > > + const struct sbrec_route *sb_route; > > + > > + const struct sbrec_datapath_binding *sb_db; > > + char *logical_port; > > + char *ip_prefix; > > + char *nexthop; > > + bool stale; > > +}; > > + > > +static struct route_entry * > > +route_alloc_entry(struct hmap *routes, > > + const struct sbrec_datapath_binding *sb_db, > > + const char *logical_port, > > + const char *ip_prefix, const char *nexthop) > > +{ > > + struct route_entry *route_e = xzalloc(sizeof *route_e); > > + > > + route_e->sb_db = sb_db; > > + route_e->logical_port = xstrdup(logical_port); > > + route_e->ip_prefix = xstrdup(ip_prefix); > > + route_e->nexthop = xstrdup(nexthop); > > + route_e->stale = false; > > + uint32_t hash = uuid_hash(&sb_db->header_.uuid); > > + hash = hash_string(logical_port, hash); > > + hash = hash_string(ip_prefix, hash); > > + hmap_insert(routes, &route_e->hmap_node, hash); > > + > > + return route_e; > > +} > > + > > +static struct route_entry * > > +route_lookup_or_add(struct hmap *route_map, > > + const struct sbrec_datapath_binding *sb_db, > > + const char *logical_port, const char *ip_prefix, > > + const char *nexthop) > > +{ > > + struct route_entry *route_e; > > + uint32_t hash; > > + > > + hash = uuid_hash(&sb_db->header_.uuid); > > + hash = hash_string(logical_port, hash); > > + hash = hash_string(ip_prefix, hash); > > + HMAP_FOR_EACH_WITH_HASH (route_e, hmap_node, hash, route_map) { > > + if (!strcmp(route_e->nexthop, nexthop)) { > > + return route_e; > > + } > > + } > > + > > + route_e = route_alloc_entry(route_map, sb_db, > > + logical_port, ip_prefix, nexthop); > > + return route_e; > > +} > > + > > +static void > > +route_erase_entry(struct route_entry *route_e) > > +{ > > + free(route_e->logical_port); > > + free(route_e->ip_prefix); > > + free(route_e->nexthop); > > + free(route_e); > > +} > > + > > +static void > > +sb_sync_learned_routes(const struct sbrec_datapath_binding *datapath, > > + const struct hmap *learned_routes, > > + const struct sset *bound_ports, > > + struct ovsdb_idl_txn *ovnsb_idl_txn, > > + struct ovsdb_idl_index *sbrec_route_by_datapath) > > +{ > > + struct hmap sync_routes = HMAP_INITIALIZER(&sync_routes); > > + struct route_entry *route_e; > > + const struct sbrec_route *sb_route; > > + > > + struct sbrec_route *filter = > > + sbrec_route_index_init_row(sbrec_route_by_datapath); > > + sbrec_route_index_set_datapath(filter, datapath); > > + SBREC_ROUTE_FOR_EACH_EQUAL (sb_route, filter, sbrec_route_by_datapath) > > { > > + if (strcmp(sb_route->type, "receive")) { > > + continue; > > + } > > + /* If the port is not local we don't care about it. > > + * Some other ovn-controller will handle it. */ > > + if (!sset_contains(bound_ports, sb_route->logical_port)) { > > + continue; > > + } > > + route_e = route_alloc_entry(&sync_routes, > > + sb_route->datapath, > > + sb_route->logical_port, > > + sb_route->ip_prefix, > > + sb_route->nexthop); > > + route_e->stale = true; > > + route_e->sb_route = sb_route; > > + } > > + sbrec_route_index_destroy_row(filter); > > + > > + struct re_nl_received_route_node *learned_route; > > + HMAP_FOR_EACH (learned_route, hmap_node, learned_routes) { > > + char *ip_prefix = normalize_v46_prefix(&learned_route->addr, > > + learned_route->plen); > > + char *nexthop = normalize_v46(&learned_route->nexthop); > > + > > + const char *logical_port; > > + SSET_FOR_EACH (logical_port, bound_ports) { > > + route_e = route_lookup_or_add(&sync_routes, > > + datapath, > > + logical_port, ip_prefix, nexthop); > > + route_e->stale = false; > > + if (!route_e->sb_route) { > > + sb_route = sbrec_route_insert(ovnsb_idl_txn); > > + sbrec_route_set_datapath(sb_route, datapath); > > + sbrec_route_set_logical_port(sb_route, logical_port); > > + sbrec_route_set_ip_prefix(sb_route, ip_prefix); > > + sbrec_route_set_nexthop(sb_route, nexthop); > > + sbrec_route_set_type(sb_route, "receive"); > > + route_e->sb_route = sb_route; > > + } > > + } > > + free(ip_prefix); > > + free(nexthop); > > + } > > + > > + HMAP_FOR_EACH_POP (route_e, hmap_node, &sync_routes) { > > + if (route_e->stale) { > > + sbrec_route_delete(route_e->sb_route); > > + } > > + route_erase_entry(route_e); > > + } > > + hmap_destroy(&sync_routes); > > +} > > + > > void > > route_exchange_run(struct route_exchange_ctx_in *r_ctx_in, > > struct route_exchange_ctx_out *r_ctx_out OVS_UNUSED) > > @@ -57,12 +190,21 @@ route_exchange_run(struct route_exchange_ctx_in > > *r_ctx_in, > > "%"PRId64": %s.", > > vrf_name, ad->key, > > ovs_strerror(error)); > > - continue; > > + goto out; > > } > > sset_add(&_maintained_vrfs, vrf_name); > > } > > > > - re_nl_sync_routes(ad->key, &ad->routes); > > + re_nl_sync_routes(ad->key, &ad->routes, > > + &received_routes); > > + > > + sb_sync_learned_routes(ad->db, &received_routes, > > + &ad->bound_ports, > > + r_ctx_in->ovnsb_idl_txn, > > + r_ctx_in->sbrec_route_by_datapath); > > Calling these here does not seem right to me, it would cause the > ovn-controller to only learn routes if it has already exported routes, > regardless of configuration. There are use cases where we would want > learning to happen even when there is nothing to export yet. > > -- > Frode Nordahl > > > > > + > > +out: > > + re_nl_received_routes_destroy(&received_routes); > > } > > > > /* Remove VRFs previously maintained by us not found in the above > > loop. */ > > diff --git a/controller/route-exchange.h b/controller/route-exchange.h > > index 2c2a9ab84..d19e83403 100644 > > --- a/controller/route-exchange.h > > +++ b/controller/route-exchange.h > > @@ -18,6 +18,8 @@ > > #include <stdbool.h> > > > > struct route_exchange_ctx_in { > > + struct ovsdb_idl_txn *ovnsb_idl_txn; > > + struct ovsdb_idl_index *sbrec_route_by_datapath; > > /* Contains struct advertise_datapath_entry */ > > struct hmap *announce_routes; > > }; > > diff --git a/lib/ovn-util.c b/lib/ovn-util.c > > index 55a081ab1..5d0db1a5a 100644 > > --- a/lib/ovn-util.c > > +++ b/lib/ovn-util.c > > @@ -802,6 +802,16 @@ normalize_v46_prefix(const struct in6_addr *prefix, > > unsigned int plen) > > } > > } > > > > +char * > > +normalize_v46(const struct in6_addr *prefix) > > +{ > > + if (IN6_IS_ADDR_V4MAPPED(prefix)) { > > + return normalize_ipv4_prefix(in6_addr_get_mapped_ipv4(prefix), 32); > > + } else { > > + return normalize_ipv6_prefix(prefix, 128); > > + } > > +} > > + > > char * > > str_tolower(const char *orig) > > { > > diff --git a/lib/ovn-util.h b/lib/ovn-util.h > > index da6ad88bc..88b0d45f9 100644 > > --- a/lib/ovn-util.h > > +++ b/lib/ovn-util.h > > @@ -205,6 +205,7 @@ bool ip46_parse(const char *ip_str, struct in6_addr > > *ip); > > char *normalize_ipv4_prefix(ovs_be32 ipv4, unsigned int plen); > > char *normalize_ipv6_prefix(const struct in6_addr *ipv6, unsigned int > > plen); > > char *normalize_v46_prefix(const struct in6_addr *prefix, unsigned int > > plen); > > +char *normalize_v46(const struct in6_addr *prefix); > > > > /* Returns a lowercase copy of orig. > > * Caller must free the returned string. > > -- > > 2.47.0 > > > > _______________________________________________ > > dev mailing list > > [email protected] > > https://mail.openvswitch.org/mailman/listinfo/ovs-dev _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
