On 10/27/25 1:42 PM, Dima Chumak via dev wrote:
> Introduce support for route lookup across several routing tables
> following a priority from highest to lowest.
>
> To properly prioritize route lookup order, the single router table in
> OVS is now split into three tables: local, main and default.
> Corresponding routing rules are created by default, where local table
> has the highest priority and main and default tables have two lowest
> priorities respectively. All non-standard tables can be added with a
> priority in between local and main.
>
> All routing tables are created by reading the Routing Policy Database
> (RPDB) from the kernel, on systems that support it, and importing only
> those tables which are referenced in the RPDB rules with a table lookup
> action. The table IDs and rule priority are copied from the kernel RPDB
> as is.
>
> Current implementation only supports RPDB rules with a source address
> selector, in form of '[not] from IP' match.
>
> Signed-off-by: Dima Chumak <[email protected]>
> ---
> lib/netdev-dummy.c | 4 +-
> lib/ovs-router.c | 223 ++++++++++++++++++++++++------
> lib/ovs-router.h | 7 +
> lib/packets.c | 19 +++
> lib/packets.h | 2 +
> lib/route-table.c | 256 ++++++++++++++++++++++++++++++++---
> lib/route-table.h | 22 ++-
> tests/ovs-router.at | 10 --
> tests/test-lib-route-table.c | 5 +-
> 9 files changed, 469 insertions(+), 79 deletions(-)
>
> diff --git a/lib/netdev-dummy.c b/lib/netdev-dummy.c
> index bad86d3c4c76..5e75012cd57a 100644
> --- a/lib/netdev-dummy.c
> +++ b/lib/netdev-dummy.c
> @@ -2223,7 +2223,7 @@ netdev_dummy_ip4addr(struct unixctl_conn *conn, int
> argc OVS_UNUSED,
>
> in6_addr_set_mapped_ipv4(&ip6, ip.s_addr);
> /* Insert local route entry for the new address. */
> - ovs_router_force_insert(CLS_MAIN, 0, &ip6, 32 + 96, true,
> + ovs_router_force_insert(CLS_LOCAL, 0, &ip6, 32 + 96, true,
> argv[1], &in6addr_any, &ip6);
> /* Insert network route entry for the new address. */
> ovs_router_force_insert(CLS_MAIN, 0, &ip6, plen + 96, false,
> @@ -2260,7 +2260,7 @@ netdev_dummy_ip6addr(struct unixctl_conn *conn, int
> argc OVS_UNUSED,
> netdev_dummy_add_in6(netdev, &ip6, &mask);
>
> /* Insert local route entry for the new address. */
> - ovs_router_force_insert(CLS_MAIN, 0, &ip6, 128, true, argv[1],
> + ovs_router_force_insert(CLS_LOCAL, 0, &ip6, 128, true, argv[1],
> &in6addr_any, &ip6);
> /* Insert network route entry for the new address. */
> ovs_router_force_insert(CLS_MAIN, 0, &ip6, plen, false, argv[1],
> diff --git a/lib/ovs-router.c b/lib/ovs-router.c
> index 70644f7bd4f2..808e8b244c35 100644
> --- a/lib/ovs-router.c
> +++ b/lib/ovs-router.c
> @@ -43,6 +43,7 @@
> #include "seq.h"
> #include "ovs-thread.h"
> #include "route-table.h"
> +#include "pvector.h"
> #include "tnl-ports.h"
> #include "unixctl.h"
> #include "util.h"
> @@ -57,10 +58,20 @@ struct clsmap_node {
> struct classifier cls;
> };
>
> +struct router_rule {
> + uint32_t prio;
> + bool invert;
> + bool ipv4;
> + uint8_t src_prefix;
> + struct in6_addr from_addr;
> + uint32_t lookup_table;
> +};
> +
> static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
>
> static struct ovs_mutex mutex = OVS_MUTEX_INITIALIZER;
> static struct cmap clsmap = CMAP_INITIALIZER;
> +static struct pvector rules;
>
> /* By default, use the system routing table. For system-independent testing,
> * the unit tests disable using the system routing table. */
> @@ -162,18 +173,20 @@ ovs_router_lookup(uint32_t mark, const struct in6_addr
> *ip6_dst,
> struct in6_addr *src, struct in6_addr *gw)
> {
> struct flow flow = {.ipv6_dst = *ip6_dst, .pkt_mark = mark};
> - struct classifier *cls_main = cls_find(CLS_MAIN);
> - const struct cls_rule *cr;
> -
> - if (!cls_main) {
> - return false;
> - }
> + const struct in6_addr *from_src = src;
> + const struct cls_rule *cr = NULL;
> + struct router_rule *rule;
>
> if (src && ipv6_addr_is_set(src)) {
> - const struct cls_rule *cr_src;
> struct flow flow_src = {.ipv6_dst = *src, .pkt_mark = mark};
> + struct classifier *cls_local = cls_find(CLS_LOCAL);
> + const struct cls_rule *cr_src;
> +
> + if (!cls_local) {
> + return false;
> + }
>
> - cr_src = classifier_lookup(cls_main, OVS_VERSION_MAX, &flow_src,
> + cr_src = classifier_lookup(cls_local, OVS_VERSION_MAX, &flow_src,
> NULL, NULL);
> if (cr_src) {
> struct ovs_router_entry *p_src = ovs_router_entry_cast(cr_src);
> @@ -185,7 +198,44 @@ ovs_router_lookup(uint32_t mark, const struct in6_addr
> *ip6_dst,
> }
> }
>
> - cr = classifier_lookup(cls_main, OVS_VERSION_MAX, &flow, NULL, NULL);
> + if (!from_src) {
> + from_src = &in6addr_any;
> + }
> +
> + PVECTOR_FOR_EACH (rule, &rules) {
> + uint8_t plen = rule->ipv4 ? rule->src_prefix + 96 : rule->src_prefix;
> + bool matched = (IN6_IS_ADDR_V4MAPPED(from_src) ? rule->ipv4 : true)
> &&
> + (!rule->src_prefix ||
> + ipv6_addr_equals_masked(&rule->from_addr, from_src,
> + plen));
> +
> + if (rule->invert) {
> + matched = !matched;
> + }
So, if we have a negative ipv6 rule, e.g. "not from=fc08::/64" and we have
an ipv4 source address, we'll get IN6_IS_ADDR_V4MAPPED(from_src) --> true,
rule->ipv4 --> false, so matched = false && (...), which is false. Then
we check the inversion and suddenly get matched == true and go to lookup
a route in the table. If somehow that table contains ipv4 routes, we can
find them, which should not be happening.
We should not proceed here if the address family doesn't match the rule,
otherwise all inverted rules will match traffic from a wrong family.
There is also a problem with in6addr_any here, which is set when there is
no source address specified. This complicates the check as it's not a
ipv4 mapped address, even if the destination we're looking up is ipv4.
This may also lead to a match for a rule from the wrong family.
I'm guessing, this is done this way because we're also skipping the dump
of the ipv6 default rules and we're not creating ipv6 default rules in
the init_standard_rules() either. So, somehow this code expects that
standard rules are not family specific. This will cause issues if we try
to avoid lookups for the wrong family. One more little problem here is
that there is no standard rule for the "default" table for ipv6 in Linux,
only for local and main. This at least is inconsistent.
To fix that, I think, we actually need to add standard rules for both
families and not skip ones dumped from the kernel. Then we can filter
and avoid matching rules from a wrong family during the lookup.
> +
> + if (matched) {
> + struct classifier *cls = cls_find(rule->lookup_table);
> +
> + if (!cls) {
> + /* A rule can be added before the table is created */
nit: Missing period at the end of the sentence.
> + continue;
> + }
> + cr = classifier_lookup(cls, OVS_VERSION_MAX, &flow, NULL,
> + NULL);
> + if (cr) {
> + struct ovs_router_entry *p = ovs_router_entry_cast(cr);
> + /* Avoid matching mapped IPv4 of a packet against default
> IPv6
> + * route entry. Either packet dst is IPv6 or both packet and
nit: Double spaces between sentences.
> + * route entry dst are mapped IPv4.
> + */
> + if (!IN6_IS_ADDR_V4MAPPED(ip6_dst) ||
> + IN6_IS_ADDR_V4MAPPED(&p->nw_addr)) {
> + break;
> + }
> + }
> + }
> + }
> +
<snip>
> +static int
> +rule_pvec_prio(uint32_t prio)
> +{
> + /* Invert the priority of a pvector entry to reverse the default sorting
> + * order (descending) to maintain the standard rules semantic where 0 is
> + * the highest priority and UINT_MAX is the lowest. The mapping is the
nit: Double spaces.
So, what I'm proposing is something like this:
diff --git a/lib/ovs-router.c b/lib/ovs-router.c
index 99660aa5d..3655451f4 100644
--- a/lib/ovs-router.c
+++ b/lib/ovs-router.c
@@ -200,15 +200,24 @@ ovs_router_lookup(uint32_t mark, const struct in6_addr
*ip6_dst,
}
if (!from_src) {
- from_src = &in6addr_any;
+ if (IN6_IS_ADDR_V4MAPPED(ip6_dst)) {
+ from_src = &in6addr_v4mapped_any;
+ } else {
+ from_src = &in6addr_any;
+ }
}
PVECTOR_FOR_EACH (rule, &rules) {
uint8_t plen = rule->ipv4 ? rule->src_prefix + 96 : rule->src_prefix;
- bool matched = (IN6_IS_ADDR_V4MAPPED(from_src) ? rule->ipv4 : true) &&
- (!rule->src_prefix ||
- ipv6_addr_equals_masked(&rule->from_addr, from_src,
- plen));
+ bool matched;
+
+ if ((IN6_IS_ADDR_V4MAPPED(from_src) && !rule->ipv4) ||
+ (!IN6_IS_ADDR_V4MAPPED(from_src) && rule->ipv4)) {
+ continue;
+ }
+
+ matched = (!rule->src_prefix ||
+ ipv6_addr_equals_masked(&rule->from_addr, from_src, plen));
if (rule->invert) {
matched = !matched;
@@ -838,9 +847,17 @@ static void
init_standard_rules(void)
{
/* Add default rules using same priorities as Linux kernel does. */
- ovs_router_rule_add(0, false, 0, &in6addr_any, CLS_LOCAL, true);
- ovs_router_rule_add(0x7FFE, false, 0, &in6addr_any, CLS_MAIN, true);
- ovs_router_rule_add(0x7FFF, false, 0, &in6addr_any, CLS_DEFAULT, true);
+ ovs_router_rule_add(0, false, 0,
+ &in6addr_v4mapped_any, CLS_LOCAL, true);
+ ovs_router_rule_add(0x7FFE, false, 0,
+ &in6addr_v4mapped_any, CLS_MAIN, true);
+ ovs_router_rule_add(0x7FFF, false, 0,
+ &in6addr_v4mapped_any, CLS_DEFAULT, true);
+
+ ovs_router_rule_add(0, false, 0,
+ &in6addr_any, CLS_LOCAL, false);
+ ovs_router_rule_add(0x7FFE, false, 0,
+ &in6addr_any, CLS_MAIN, false);
}
static void
diff --git a/lib/packets.c b/lib/packets.c
index eb3f7d837..c4a7c45a9 100644
--- a/lib/packets.c
+++ b/lib/packets.c
@@ -38,6 +38,7 @@
const struct in6_addr in6addr_exact = IN6ADDR_EXACT_INIT;
const struct in6_addr in6addr_all_hosts = IN6ADDR_ALL_HOSTS_INIT;
const struct in6_addr in6addr_all_routers = IN6ADDR_ALL_ROUTERS_INIT;
+const struct in6_addr in6addr_v4mapped_any = IN6ADDR_V4MAPPED_ANY_INIT;
struct in6_addr
flow_tnl_dst(const struct flow_tnl *tnl)
diff --git a/lib/packets.h b/lib/packets.h
index 9a974f6f8..adae3b71b 100644
--- a/lib/packets.h
+++ b/lib/packets.h
@@ -1174,6 +1174,11 @@ extern const struct in6_addr in6addr_all_routers;
#define IN6ADDR_ALL_ROUTERS_INIT { { {
0xff,0x02,0x00,0x00,0x00,0x00,0x00,0x00, \
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x02
} } }
+extern const struct in6_addr in6addr_v4mapped_any;
+#define IN6ADDR_V4MAPPED_ANY_INIT \
+ { { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
+ 0x00, 0x00, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00 } } }
+
static inline bool ipv6_addr_equals(const struct in6_addr *a,
const struct in6_addr *b)
{
diff --git a/lib/route-table.c b/lib/route-table.c
index 40be2762c..917469642 100644
--- a/lib/route-table.c
+++ b/lib/route-table.c
@@ -95,7 +95,6 @@ static void route_table_handle_msg(const struct
route_table_msg *, void *aux,
static void route_table_change(struct route_table_msg *, void *aux);
static void rules_change(const struct route_table_msg *, void *aux);
static void route_map_clear(void);
-static bool is_standard_table_id(uint32_t table_id);
static void name_table_init(void);
static void name_table_change(const struct rtnetlink_change *, void *);
@@ -316,7 +315,7 @@ static int
rule_parse(struct ofpbuf *buf, void *change_)
{
struct route_table_msg *change = change_;
- bool ipv4 = false, from_addr_any = false;
+ bool ipv4 = false;
bool parsed;
static const struct nl_policy policy[] = {
@@ -389,7 +388,6 @@ rule_parse(struct ofpbuf *buf, void *change_)
} else {
change->rud.from_addr = in6addr_any;
}
- from_addr_any = true;
}
if (attrs[FRA_TABLE]) {
@@ -406,14 +404,6 @@ rule_parse(struct ofpbuf *buf, void *change_)
return 0;
}
- /* Skip standard IPv6 rules for local and main tables to avoid duplicates
- * as they end up being the same as the similar IPv4 rules.
- */
- if (!ipv4 && (change->rud.prio == 0 || change->rud.prio == 0x7FFE) &&
- from_addr_any && is_standard_table_id(change->rud.lookup_table)) {
- change->relevant = false;
- }
-
/* Check if there are any additional attributes that aren't supported
* currently by OVS rule-based route lookup. */
if (change->relevant) {
@@ -732,15 +722,6 @@ route_table_parse(struct ofpbuf *buf, void *change)
nlmsg, rtm, NULL, change);
}
-static bool
-is_standard_table_id(uint32_t table_id)
-{
- return !table_id
- || table_id == RT_TABLE_DEFAULT
- || table_id == RT_TABLE_MAIN
- || table_id == RT_TABLE_LOCAL;
-}
-
static void
route_table_change(struct route_table_msg *change, void *aux OVS_UNUSED)
{
---
This should solve the issue of matching on the rule from a wrong family.
But also makes a code a bit easier to understand, IMO.
And there seems to be no tests covering inverted matches, we should add
some. Including one for the wrong ip family match.
What do you think? Did I miss something?
On side effect of this change though is that we now get both ipv4 and ipv6
standard rules in the rule/show command and it doesn't look particularly
great. The 'Cached' vs 'Cached6' makes it understandable, but doesn't make
it look better. Since we already have the -6 argument for the add/del
commands for the rules, maybe we should add -6 argument to the rules/show?
This will be consistent with the ip command for the kernel as well.
Best regards, Ilya Maximets.
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev