On 7/23/25 3:12 PM, Dima Chumak via dev wrote: > Introduce support for route lookup across several routing tables > following a priority from highest to lowest. > > To properly prioritize route lookup order, the single router table in > OVS is now split into three tables: local, main and default. > Corresponding routing rules are created by default, where local table > has the highest priority and main and default tables have two lowest > priorities respectively. All non-standard tables can be added with a > priority in between local and main. > > Non-standard routing tables are created by reading the Routing Policy
Why only the non-standard tables? > Database (RPDB) from the kernel, on systems that support it, and > importing only those tables which are referenced in the RPDB rules with > a table lookup action. The table IDs and rule priority are copied from > the kernel RPDB as is. > > Current implementation only supports RPDB rules with a source address > selector, in form of '[not] from IP' match. > > Signed-off-by: Dima Chumak <dchu...@nvidia.com> > --- > lib/ovs-router.c | 231 +++++++++++++++++++++++++++++------ > lib/ovs-router.h | 7 ++ > lib/packets.c | 20 +++ > lib/packets.h | 2 + > lib/route-table-bsd.c | 4 +- > lib/route-table-stub.c | 4 +- > lib/route-table.c | 225 ++++++++++++++++++++++++++++++++-- > lib/route-table.h | 18 ++- > tests/test-lib-route-table.c | 5 +- > 9 files changed, 465 insertions(+), 51 deletions(-) > > diff --git a/lib/ovs-router.c b/lib/ovs-router.c > index 02dd94fd18c0..2addc7b43d07 100644 > --- a/lib/ovs-router.c > +++ b/lib/ovs-router.c > @@ -42,6 +42,7 @@ > #include "seq.h" > #include "ovs-thread.h" > #include "route-table.h" > +#include "pvector.h" > #include "tnl-ports.h" > #include "unixctl.h" > #include "util.h" > @@ -56,11 +57,22 @@ struct clsmap_node { > uint32_t table; > }; > > +struct router_rule { > + uint32_t prio; > + bool invert; > + uint8_t src_len; > + struct in6_addr from_addr; > + uint32_t lookup_table; > +}; > + > static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); > > static struct ovs_mutex mutex = OVS_MUTEX_INITIALIZER; > static struct hmap clsmap OVS_GUARDED_BY(mutex) = HMAP_INITIALIZER(&clsmap); > +static struct classifier cls_local; > static struct classifier cls_main; > +static struct classifier cls_default; > +static struct pvector rules; > > /* By default, use the system routing table. For system-independent testing, > * the unit tests disable using the system routing table. */ > @@ -87,7 +99,16 @@ cls_find(uint32_t table) > struct clsmap_node *node; > > if (route_table_is_standard_id(table)) { > - return &cls_main; > + switch (table) { > + case CLS_DEFAULT: > + return &cls_default; > + case CLS_MAIN: > + return &cls_main; > + case CLS_LOCAL: > + return &cls_local; > + default: > + OVS_NOT_REACHED(); > + } > } Not sure why we need to special-case these. See the comments for the previous patch. > > ovs_mutex_lock(&mutex); > @@ -131,6 +152,14 @@ cls_destroy(struct classifier *cls, bool flush_all) > classifier_publish(cls); > } > > +bool > +ovs_router_is_empty(uint32_t table) > +{ > + struct classifier *cls = cls_find(table); > + > + return !cls || !cls->n_rules; > +} > + > static struct ovs_router_entry * > ovs_router_entry_cast(const struct cls_rule *cr) > { > @@ -170,26 +199,63 @@ ovs_router_lookup(uint32_t mark, const struct in6_addr > *ip6_dst, > char output_netdev[], > struct in6_addr *src, struct in6_addr *gw) > { > - const struct cls_rule *cr; > struct flow flow = {.ipv6_dst = *ip6_dst, .pkt_mark = mark}; > + const struct in6_addr *from_src = src; > + const struct cls_rule *cr; > + struct router_rule *rule; > > if (src && ipv6_addr_is_set(src)) { > - const struct cls_rule *cr_src; > + struct classifier *src_cls[] = { &cls_local, &cls_main, &cls_default > }; Why would a local route be in a non-local classifier? We should also, likely, drop the 'local' field from the structure, as it doesn't carry a lot of meaning now that tables are separated. > struct flow flow_src = {.ipv6_dst = *src, .pkt_mark = mark}; > + struct ovs_router_entry *p_src; > + const struct cls_rule *cr_src; > + bool local = false; > > - cr_src = classifier_lookup(&cls_main, OVS_VERSION_MAX, &flow_src, > - NULL, NULL); > - if (cr_src) { > - struct ovs_router_entry *p_src = ovs_router_entry_cast(cr_src); > - if (!p_src->local) { > - return false; > + for (int i = 0; i < ARRAY_SIZE(src_cls); i++) { > + cr_src = classifier_lookup(src_cls[i], OVS_VERSION_MAX, > &flow_src, > + NULL, NULL); > + if (!cr_src) { > + continue; > } > - } else { > + p_src = ovs_router_entry_cast(cr_src); > + local = p_src->local; > + if (local) { > + break; > + } > + } > + if (!local) { > return false; > } > } > > - cr = classifier_lookup(&cls_main, OVS_VERSION_MAX, &flow, NULL, NULL); > + if (!from_src) { > + from_src = &in6addr_any; > + } > + > + PVECTOR_FOR_EACH (rule, &rules) { > + bool matched = !!(!rule->src_len || > + ipv6_addr_equals_masked(&rule->from_addr, > + from_src, rule->src_len)); > + > + if (rule->invert) { > + matched = !matched; > + } > + > + if (matched) { > + struct classifier *cls = cls_find(rule->lookup_table); > + > + if (!cls) { > + /* A rule can be added before the table is created */ > + continue; > + } > + cr = classifier_lookup(cls, OVS_VERSION_MAX, &flow, NULL, > + NULL); > + if (cr) { > + break; > + } > + } > + } > + > if (cr) { > struct ovs_router_entry *p = ovs_router_entry_cast(cr); > > @@ -644,37 +710,40 @@ out: > static void > ovs_router_show_text(struct ds *ds) > { > + struct classifier *std_cls[] = { &cls_local, &cls_main, &cls_default }; > struct ovs_router_entry *rt; > > ds_put_format(ds, "Route Table:\n"); > - CLS_FOR_EACH (rt, cr, &cls_main) { > - uint8_t plen; > - if (rt->priority == rt->plen || rt->local) { > - ds_put_format(ds, "Cached: "); > - } else { > - ds_put_format(ds, "User: "); > - } > - ipv6_format_mapped(&rt->nw_addr, ds); > - plen = rt->plen; > - if (IN6_IS_ADDR_V4MAPPED(&rt->nw_addr)) { > - plen -= 96; > - } > - ds_put_format(ds, "/%"PRIu8, plen); > - if (rt->mark) { > - ds_put_format(ds, " MARK %"PRIu32, rt->mark); > - } > + for (int i = 0; i < ARRAY_SIZE(std_cls); i++) { > + CLS_FOR_EACH (rt, cr, std_cls[i]) { > + uint8_t plen; > + if (rt->priority == rt->plen || rt->local) { > + ds_put_format(ds, "Cached: "); > + } else { > + ds_put_format(ds, "User: "); > + } > + ipv6_format_mapped(&rt->nw_addr, ds); > + plen = rt->plen; > + if (IN6_IS_ADDR_V4MAPPED(&rt->nw_addr)) { > + plen -= 96; > + } > + ds_put_format(ds, "/%"PRIu8, plen); > + if (rt->mark) { > + ds_put_format(ds, " MARK %"PRIu32, rt->mark); > + } > > - ds_put_format(ds, " dev %s", rt->output_netdev); > - if (ipv6_addr_is_set(&rt->gw)) { > - ds_put_format(ds, " GW "); > - ipv6_format_mapped(&rt->gw, ds); > - } > - ds_put_format(ds, " SRC "); > - ipv6_format_mapped(&rt->src_addr, ds); > - if (rt->local) { > - ds_put_format(ds, " local"); > + ds_put_format(ds, " dev %s", rt->output_netdev); > + if (ipv6_addr_is_set(&rt->gw)) { > + ds_put_format(ds, " GW "); > + ipv6_format_mapped(&rt->gw, ds); > + } > + ds_put_format(ds, " SRC "); > + ipv6_format_mapped(&rt->src_addr, ds); > + if (rt->local) { > + ds_put_format(ds, " local"); > + } > + ds_put_format(ds, "\n"); > } > - ds_put_format(ds, "\n"); > } > } > > @@ -747,22 +816,106 @@ ovs_router_flush(bool flush_all) > free(node); > } > } > + cls_destroy(&cls_local, flush_all); > cls_destroy(&cls_main, flush_all); > + cls_destroy(&cls_default, flush_all); > ovs_mutex_unlock(&mutex); > seq_change(tnl_conf_seq); > } > > +static void > +init_standard_rules(void) > +{ > + /* Add default rules using same priorities as Linux kernel does. */ > + ovs_router_rule_add(0, false, 0, &in6addr_any, CLS_LOCAL); > + ovs_router_rule_add(0x7FFE, false, 0, &in6addr_any, CLS_MAIN); > + ovs_router_rule_add(0x7FFF, false, 0, &in6addr_any, CLS_DEFAULT); > +} > + > +static void > +rule_destroy_cb(struct router_rule *rule) > +{ > + ovsrcu_postpone(free, rule); > +} > + > +void > +ovs_router_rules_flush(void) A thread safety annotation would be good here. > +{ > + struct router_rule *rule; > + > + PVECTOR_FOR_EACH (rule, &rules) { > + pvector_remove(&rules, rule); > + ovsrcu_postpone(rule_destroy_cb, rule); > + } > + pvector_publish(&rules); > + init_standard_rules(); > +} > + > static void > ovs_router_flush_handler(void *aux OVS_UNUSED) > { > + ovs_router_rules_flush(); > ovs_router_flush(true); > > ovs_mutex_lock(&mutex); > + pvector_destroy(&rules); > hmap_destroy(&clsmap); > hmap_init(&clsmap); > ovs_mutex_unlock(&mutex); > } > > +bool > +ovs_router_is_referenced(uint32_t table) > +{ > + struct router_rule *rule; > + > + if (route_table_is_standard_id(table)) { > + return true; > + } > + > + PVECTOR_FOR_EACH (rule, &rules) { > + if (rule->lookup_table == table) { > + return true; > + } > + } A lookup in the hash map may be better here instead of iteration. > + return false; > +} > + > +static int > +rule_pvec_prio(uint32_t prio) > +{ > + /* Invert the priority of a pvector entry to reverse the default sorting > + * order (descending) to maintain the standard rules semantic where 0 is > + * the highest priority and UINT_MAX is the lowest. The mapping is the > + * following: > + * > + * 0 -> INT_MAX > + * INT_MAX -> 0 > + * UINT_MAX -> INT_MIN > + */ > + if (prio <= INT_MAX) { > + return -(INT_MIN + (int) prio + 1); > + } else { > + return -(INT_MIN + INT_MAX + (int) (prio - INT_MAX - 1) + 1) - 1; The math seems fishy here, especially the INT_MIN + INT_MAX part that is just -1. Unless I'm missing something, this should be exquivalent to -(int) (prio - INT_MAX - 1) - 1, as INT_MIN + INT_MAX + 1 is just 0. This also assumes that int is 4 bytes long, which is not always the case. Alternatively we may just ignore rules above INT_MAX with a WARN_ONCE(), as it is unlikely that we'll have more rules above UNIT16_MAX in most cases, or convert pvector to use int64_t. > + } > +} > + > +void > +ovs_router_rule_add(uint32_t prio, bool invert, uint8_t src_len, > + const struct in6_addr *from, uint32_t lookup_table) > +{ > + struct router_rule *rule = xzalloc(sizeof *rule); > + > + rule->prio = prio; > + rule->invert = invert; > + rule->src_len = src_len; > + rule->from_addr = *from; > + rule->lookup_table = lookup_table; > + > + pvector_insert(&rules, rule, rule_pvec_prio(prio)); > + pvector_publish(&rules); > +} > + > void > ovs_router_init(void) > { > @@ -771,7 +924,11 @@ ovs_router_init(void) > if (ovsthread_once_start(&once)) { > ovs_mutex_lock(&mutex); > hmap_init(&clsmap); > + classifier_init(&cls_local, NULL); > classifier_init(&cls_main, NULL); > + classifier_init(&cls_default, NULL); > + pvector_init(&rules); > + init_standard_rules(); > ovs_mutex_unlock(&mutex); > fatal_signal_add_hook(ovs_router_flush_handler, NULL, NULL, true); > unixctl_command_register("ovs/route/add", > diff --git a/lib/ovs-router.h b/lib/ovs-router.h > index e2d995f3a8dc..a6e100e2f79d 100644 > --- a/lib/ovs-router.h > +++ b/lib/ovs-router.h > @@ -27,7 +27,9 @@ extern "C" { > #endif > > enum { > + CLS_DEFAULT = 253, /* RT_TABLE_DEFAULT */ > CLS_MAIN = 254, /* RT_TABLE_MAIN */ > + CLS_LOCAL = 255, /* RT_TABLE_LOCAL */ We need build assertions for these in route-table.c. And then we can remove these comments. > CLS_ALL = UINT_MAX, UINT32_MAX > }; > > @@ -35,6 +37,8 @@ bool ovs_router_lookup(uint32_t mark, const struct in6_addr > *ip_dst, > char output_netdev[], > struct in6_addr *src, struct in6_addr *gw); > void ovs_router_init(void); > +bool ovs_router_is_empty(uint32_t table); > +bool ovs_router_is_referenced(uint32_t table); > void ovs_router_insert(uint32_t table, uint32_t mark, > const struct in6_addr *ip_dst, > uint8_t plen, bool local, > @@ -46,7 +50,10 @@ void ovs_router_force_insert(uint32_t table, uint32_t mark, > const char output_netdev[], > const struct in6_addr *gw, > const struct in6_addr *prefsrc); > +void ovs_router_rule_add(uint32_t prio, bool invert, uint8_t src_len, > + const struct in6_addr *from, uint32_t lookup_table); > void ovs_router_flush(bool flush_all); > +void ovs_router_rules_flush(void); > > void ovs_router_disable_system_routing_table(void); > > diff --git a/lib/packets.c b/lib/packets.c > index a0bb2ad482f1..1bb1f3f779fa 100644 > --- a/lib/packets.c > +++ b/lib/packets.c > @@ -1081,6 +1081,26 @@ ipv6_is_cidr(const struct in6_addr *netmask) > return true; > } > > +bool > +ipv6_addr_equals_masked(const struct in6_addr *a, const struct in6_addr *b, > + int m) > +{ > + struct in6_addr mask; > + struct in6_addr ma; > + struct in6_addr mb; > + > + if (((IN6_IS_ADDR_V4MAPPED(a) || IN6_IS_ADDR_V4MAPPED(b)) && m == 32) || > + m == 128) { > + return ipv6_addr_equals(a, b); > + } > + > + mask = ipv6_create_mask(m); > + ma = ipv6_addr_bitand(a, &mask); > + mb = ipv6_addr_bitand(b, &mask); > + > + return ipv6_addr_equals(&ma, &mb); > +} > + > /* Populates 'b' with an Ethernet II packet headed with the given 'eth_dst', > * 'eth_src' and 'eth_type' parameters. A payload of 'size' bytes is > allocated > * in 'b' and returned. This payload may be populated with appropriate > diff --git a/lib/packets.h b/lib/packets.h > index 6eba07700a69..6e263d81c6e4 100644 > --- a/lib/packets.h > +++ b/lib/packets.h > @@ -1608,6 +1608,8 @@ bool ipv6_is_zero(const struct in6_addr *a); > struct in6_addr ipv6_create_mask(int mask); > int ipv6_count_cidr_bits(const struct in6_addr *netmask); > bool ipv6_is_cidr(const struct in6_addr *netmask); > +bool ipv6_addr_equals_masked(const struct in6_addr *a, > + const struct in6_addr *b, int m); Should expand the 'm' into something more meaningful, e.g. 'mask' or 'plen'. > > bool ipv6_parse(const char *s, struct in6_addr *ip); > char *ipv6_parse_masked(const char *s, struct in6_addr *ipv6, > diff --git a/lib/route-table-bsd.c b/lib/route-table-bsd.c > index 6488df2637c8..ce6eb8771ac8 100644 > --- a/lib/route-table-bsd.c > +++ b/lib/route-table-bsd.c > @@ -47,7 +47,9 @@ bool > route_table_is_standard_id(uint32_t table_id) > { > return !table_id > - || table_id == CLS_MAIN; > + || table_id == CLS_DEFAULT > + || table_id == CLS_MAIN > + || table_id == CLS_LOCAL; > } > > bool > diff --git a/lib/route-table-stub.c b/lib/route-table-stub.c > index 5bf00e820796..761dea3c4699 100644 > --- a/lib/route-table-stub.c > +++ b/lib/route-table-stub.c > @@ -22,7 +22,9 @@ bool > route_table_is_standard_id(uint32_t table_id) > { > return !table_id > - || table_id == CLS_MAIN; > + || table_id == CLS_DEFAULT > + || table_id == CLS_MAIN > + || table_id == CLS_LOCAL; > } > > bool > diff --git a/lib/route-table.c b/lib/route-table.c > index da7b276544e5..85ded4716d7c 100644 > --- a/lib/route-table.c > +++ b/lib/route-table.c > @@ -23,6 +23,7 @@ > #include <netinet/in.h> > #include <arpa/inet.h> > #include <sys/socket.h> > +#include <linux/fib_rules.h> > #include <linux/rtnetlink.h> > #include <net/if.h> > > @@ -59,13 +60,23 @@ static struct nln *nln = NULL; > static struct route_table_msg nln_rtmsg_change; > static struct nln_notifier *route_notifier = NULL; > static struct nln_notifier *route6_notifier = NULL; > +static struct nln_notifier *rule_notifier = NULL; > +static struct nln_notifier *rule6_notifier = NULL; > static struct nln_notifier *name_notifier = NULL; > > static bool route_table_valid = false; > +static bool rules_valid = false; > + > +static int route_nln_parse(struct ofpbuf *, void *change); > + > +static void rule_handle_msg(const struct route_table_msg *); > +static int rule_parse(struct ofpbuf *, void *change); > > static void route_table_reset(void); > -static void route_table_handle_msg(const struct route_table_msg *, void > *aux); > +static void route_table_handle_msg(const struct route_table_msg *, void *aux, > + uint32_t table); > static void route_table_change(struct route_table_msg *, void *aux); > +static void rules_change(const struct route_table_msg *, void *aux); > static void route_map_clear(void); > > static void name_table_init(void); > @@ -105,9 +116,11 @@ route_table_init(void) > ovs_assert(!nln); > ovs_assert(!route_notifier); > ovs_assert(!route6_notifier); > + ovs_assert(!rule_notifier); > + ovs_assert(!rule6_notifier); > > ovs_router_init(); > - nln = nln_create(NETLINK_ROUTE, route_table_parse, &nln_rtmsg_change); > + nln = nln_create(NETLINK_ROUTE, route_nln_parse, &nln_rtmsg_change); > > route_notifier = > nln_notifier_create(nln, RTNLGRP_IPV4_ROUTE, > @@ -116,6 +129,13 @@ route_table_init(void) > nln_notifier_create(nln, RTNLGRP_IPV6_ROUTE, > (nln_notify_func *) route_table_change, NULL); > > + rule_notifier = > + nln_notifier_create(nln, RTNLGRP_IPV4_RULE, > + (nln_notify_func *) rules_change, NULL); > + rule6_notifier = > + nln_notifier_create(nln, RTNLGRP_IPV6_RULE, > + (nln_notify_func *) rules_change, NULL); > + > route_table_reset(); > name_table_init(); > > @@ -132,7 +152,7 @@ route_table_run(void) > rtnetlink_run(); > nln_run(nln); > > - if (!route_table_valid) { > + if (!route_table_valid || !rules_valid) { > route_table_reset(); > } > } > @@ -191,7 +211,7 @@ route_table_dump_one_table(uint32_t id, > if (!(nlmsghdr->nlmsg_flags & NLM_F_DUMP_FILTERED)) { > filtered = false; > } > - handle_msg_cb(&msg, aux); > + handle_msg_cb(&msg, aux, id); > route_data_destroy(&msg.rd); > } > } > @@ -201,6 +221,36 @@ route_table_dump_one_table(uint32_t id, > return filtered; > } > > +static void > +rules_dump(void) > +{ > + uint64_t reply_stub[NL_DUMP_BUFSIZE / 8]; > + struct ofpbuf request, reply, buf; > + struct fib_rule_hdr *rq_msg; > + struct nl_dump dump; > + > + ofpbuf_init(&request, 0); > + > + nl_msg_put_nlmsghdr(&request, sizeof *rq_msg, RTM_GETRULE, > NLM_F_REQUEST); > + > + rq_msg = ofpbuf_put_zeros(&request, sizeof *rq_msg); > + rq_msg->family = AF_UNSPEC; > + > + nl_dump_start(&dump, NETLINK_ROUTE, &request); > + ofpbuf_uninit(&request); > + > + ofpbuf_use_stub(&buf, reply_stub, sizeof reply_stub); > + while (nl_dump_next(&dump, &reply, &buf)) { > + struct route_table_msg msg; > + > + if (rule_parse(&reply, &msg)) { > + rule_handle_msg(&msg); > + } > + } > + ofpbuf_uninit(&buf); > + nl_dump_done(&dump); > +} > + > static void > route_table_reset(void) > { > @@ -213,6 +263,7 @@ route_table_reset(void) > route_map_clear(); > netdev_get_addrs_list_flush(); > route_table_valid = true; > + rules_valid = true; > rt_change_seq++; > > COVERAGE_INC(route_table_dump); > @@ -224,6 +275,156 @@ route_table_reset(void) > break; > } > } > + rules_dump(); > +} > + > +static void > +rule_handle_msg(const struct route_table_msg *change) > +{ > + if (change->relevant) { > + const struct rule_data *rd = &change->rud; > + > + if (!route_table_is_standard_id(rd->lookup_table)) { Not sure why we need this check. > + route_table_dump_one_table(rd->lookup_table, > + route_table_handle_msg, NULL); > + } > + ovs_router_rule_add(rd->prio, rd->invert, rd->src_len, > &rd->from_addr, > + rd->lookup_table); > + } > +} > + > +static int route_nln_parse(struct ofpbuf *buf, void *change_) > +{ > + const struct nlmsghdr *nlmsg = buf->data; > + > + if (nlmsg->nlmsg_type == RTM_NEWROUTE || > + nlmsg->nlmsg_type == RTM_DELROUTE) { > + return route_table_parse(buf, change_); > + } else if (nlmsg->nlmsg_type == RTM_NEWRULE || > + nlmsg->nlmsg_type == RTM_DELRULE) { > + return rule_parse(buf, change_); > + } > + > + VLOG_DBG_RL(&rl, "received unsupported rtnetlink route message"); > + return 0; > +} > + > +/* Return RTNLGRP_IPV4_RULE or RTNLGRP_IPV6_RULE on success, 0 on parse > + * error. */ > +static int > +rule_parse(struct ofpbuf *buf, void *change_) > +{ > + struct route_table_msg *change = change_; > + bool parsed, ipv4 = false; > + > + static const struct nl_policy policy[] = { > + [FRA_PRIORITY] = { .type = NL_A_U32, .optional = false }, > + [FRA_SRC] = { .type = NL_A_U32, .optional = true }, > + [FRA_TABLE] = { .type = NL_A_U32, .optional = true }, > + }; > + > + static const struct nl_policy policy6[] = { > + [FRA_PRIORITY] = { .type = NL_A_U32, .optional = false }, Too many spaces at the end here and above. > + [FRA_SRC] = { .type = NL_A_IPV6, .optional = true }, > + [FRA_TABLE] = { .type = NL_A_U32, .optional = true }, > + }; > + > + struct nlattr *attrs[ARRAY_SIZE(policy)]; > + const struct fib_rule_hdr *frh; > + > + frh = ofpbuf_at(buf, NLMSG_HDRLEN, sizeof *frh); > + if (frh->action != FR_ACT_TO_TBL || frh->tos || frh->dst_len) { Should also check that frh is not NULL. > + /* unsupported rule */ Start with a capital, end with a dot. > + return 0; > + } > + > + if (frh->family == AF_INET) { > + parsed = nl_policy_parse(buf, NLMSG_HDRLEN + > + sizeof(struct fib_rule_hdr), policy, attrs, > + ARRAY_SIZE(policy)); > + ipv4 = true; > + } else if (frh->family == AF_INET6) { > + parsed = nl_policy_parse(buf, NLMSG_HDRLEN + > + sizeof(struct fib_rule_hdr), policy6, attrs, > + ARRAY_SIZE(policy6)); > + } else { > + VLOG_DBG_RL(&rl, "received non AF_INET rtnetlink route message"); > + return 0; > + } > + > + if (parsed) { > + const struct nlmsghdr *nlmsg; > + > + nlmsg = buf->data; > + > + memset(change, 0, sizeof *change); > + change->relevant = true; > + change->nlmsg_type = nlmsg->nlmsg_type; > + change->rud.invert = false; > + change->rud.src_len = frh->src_len; > + change->rud.lookup_table = frh->table; > + > + if (frh->flags & FIB_RULE_INVERT) { > + /* Invert the matching of rule selector */ Period at the end of the comment. > + change->rud.invert = true; > + } > + > + if (attrs[FRA_PRIORITY]) { > + change->rud.prio = nl_attr_get_u32(attrs[FRA_PRIORITY]); > + } > + > + if (attrs[FRA_SRC]) { > + if (ipv4) { > + ovs_be32 src; > + src = nl_attr_get_be32(attrs[FRA_SRC]); Either add an empty line after the declatration, or combine these two lines. The line might be good in both cases though. > + in6_addr_set_mapped_ipv4(&change->rud.from_addr, src); > + } else { > + change->rud.from_addr = nl_attr_get_in6_addr(attrs[FRA_SRC]); > + } > + } else if (ipv4) { > + in6_addr_set_mapped_ipv4(&change->rud.from_addr, 0); > + } > + > + if (attrs[FRA_TABLE]) { > + change->rud.lookup_table = nl_attr_get_u32(attrs[FRA_TABLE]); > + if (route_table_is_standard_id(change->rud.lookup_table)) { Not sure why these are not relevant. > + change->relevant = false; > + } > + } else { > + change->relevant = false; > + } > + > + if (change->rud.invert && !change->rud.src_len) { > + change->relevant = false; > + } > + } else { > + VLOG_DBG_RL(&rl, "received unparseable rtnetlink rule message"); > + return 0; > + } > + > + /* Check if there are any additional attributes that aren't supported > + * currently by OVS rule-based route lookup. */ > + if (change->relevant) { > + size_t offset = NLMSG_HDRLEN + sizeof(struct fib_rule_hdr); > + struct nlattr *nla; > + size_t left; > + > + NL_ATTR_FOR_EACH (nla, left, ofpbuf_at(buf, offset, 0), > + buf->size - offset) { > + uint16_t type = nl_attr_type(nla); > + > + if ((type > FRA_SRC && type < FRA_PRIORITY) || > + (type > FRA_PRIORITY && type < FRA_SUPPRESS_PREFIXLEN) || > + (type > FRA_TABLE && type < FRA_PROTOCOL) || > + type > FRA_PROTOCOL || type > FRA_MAX) { > + change->relevant = false; > + break; Some of these values are relatively new. We should have fallback definitions for values above FRA_FLOW, in case they are not defined. The type > FRA_MAX is a little pointless here as well. We should also have a system test for this, i.e. add some rules that OVS doesn't understand and make sure they are ignored. > + } > + } > + } > + > + /* Success. */ > + return ipv4 ? RTNLGRP_IPV4_RULE : RTNLGRP_IPV6_RULE; > } > > /* Returns true if the given route requires nexthop information (output > @@ -533,7 +734,7 @@ route_table_change(struct route_table_msg *change, void > *aux OVS_UNUSED) > { > if (!change > || (change->relevant > - && route_table_is_standard_id(change->rd.rta_table_id))) { > + && ovs_router_is_referenced(change->rd.rta_table_id))) { > route_table_valid = false; > } > if (change) { > @@ -543,7 +744,7 @@ route_table_change(struct route_table_msg *change, void > *aux OVS_UNUSED) > > static void > route_table_handle_msg(const struct route_table_msg *change, > - void *aux OVS_UNUSED) > + void *aux OVS_UNUSED, uint32_t table) > { > if (change->relevant && change->nlmsg_type == RTM_NEWROUTE > && !ovs_list_is_empty(&change->rd.nexthops)) { > @@ -556,7 +757,7 @@ route_table_handle_msg(const struct route_table_msg > *change, > rdnh = CONTAINER_OF(ovs_list_front(&change->rd.nexthops), > const struct route_data_nexthop, nexthop_node); > > - ovs_router_insert(CLS_MAIN, rd->rta_mark, &rd->rta_dst, > + ovs_router_insert(table, rd->rta_mark, &rd->rta_dst, > IN6_IS_ADDR_V4MAPPED(&rd->rta_dst) > ? rd->rtm_dst_len + 96 : rd->rtm_dst_len, > rd->rtn_local, rdnh->ifname, &rdnh->addr, > @@ -564,9 +765,19 @@ route_table_handle_msg(const struct route_table_msg > *change, > } > } > > +static void > +rules_change(const struct route_table_msg *change OVS_UNUSED, > + void *aux OVS_UNUSED) > +{ > + if (!change || change->relevant) { > + rules_valid = false; > + } > +} > + > static void > route_map_clear(void) > { > + ovs_router_rules_flush(); > ovs_router_flush(false); > } > > diff --git a/lib/route-table.h b/lib/route-table.h > index 5e060581fd25..64858a357187 100644 > --- a/lib/route-table.h > +++ b/lib/route-table.h > @@ -143,12 +143,24 @@ struct route_data { > uint32_t rta_priority; /* 0 if missing. */ > }; > > +struct rule_data { > + bool invert; > + uint32_t prio; > + uint8_t src_len; > + struct in6_addr from_addr; > + uint32_t lookup_table; > +}; > + > /* A digested version of a route message sent down by the kernel to indicate > - * that a route has changed. */ > + * that a route or a rule has changed. */ > struct route_table_msg { > bool relevant; /* Should this message be processed? */ > uint16_t nlmsg_type; /* e.g. RTM_NEWROUTE, RTM_DELROUTE. */ Outdated comment. > - struct route_data rd; /* Data parsed from this message. */ > + union { /* Data parsed from this message, depending on > + * nlmsg_type. */ > + struct route_data rd; > + struct rule_data rud; > + }; > }; > > uint64_t route_table_get_change_seq(void); > @@ -160,7 +172,7 @@ bool route_table_fallback_lookup(const struct in6_addr > *ip6_dst, > struct in6_addr *gw6); > > typedef void route_table_handle_msg_callback(const struct route_table_msg *, > - void *aux); > + void *aux, uint32_t table); > > bool route_table_is_standard_id(uint32_t table_id); > bool route_table_dump_one_table(uint32_t id, > diff --git a/tests/test-lib-route-table.c b/tests/test-lib-route-table.c > index 4d4fbe0d35d7..4f9695f07c30 100644 > --- a/tests/test-lib-route-table.c > +++ b/tests/test-lib-route-table.c > @@ -67,7 +67,8 @@ rt_table_name(uint32_t id) > > static void > test_lib_route_table_handle_msg(const struct route_table_msg *change, > - void *data OVS_UNUSED) > + void *data OVS_UNUSED, > + uint32_t table OVS_UNUSED) > { > struct ds nexthop_addr = DS_EMPTY_INITIALIZER; > struct ds rta_prefsrc = DS_EMPTY_INITIALIZER; > @@ -115,7 +116,7 @@ static void > test_lib_route_table_change(struct route_table_msg *change, > void *aux OVS_UNUSED) > { > - test_lib_route_table_handle_msg(change, NULL); > + test_lib_route_table_handle_msg(change, NULL, 0); > route_data_destroy(&change->rd); > } > _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev