On Wed, Nov 12, 2025 at 10:05:24AM +0100, Ales Musil via dev wrote:
> When ovn-controller tried to learn two routes with the same
> destination and next-hop, but different metric, it would end up
> in a transaction error and loop. Make sure we are also checking
> routes that are learned in the current I-P run to prevent that
> issue.
>
> Fixes: 866a5014ae45 ("controller: Support learning routes.")
> Reported-at: https://bugs.launchpad.net/ubuntu/+source/ovn/+bug/2127934
> Reported-at: https://issues.redhat.com/browse/FDP-2081
> Reported-by: Martin Kalcok <[email protected]>
> Signed-off-by: Ales Musil <[email protected]>
Thanks a lot
Acked-by: Felix Huettner <[email protected]>
> ---
> controller/route-exchange.c | 20 +++++++++++-----
> tests/system-ovn.at | 48 ++++++++++++++++++++++++-------------
> 2 files changed, 46 insertions(+), 22 deletions(-)
>
> diff --git a/controller/route-exchange.c b/controller/route-exchange.c
> index d8deae1da..8fced5f3c 100644
> --- a/controller/route-exchange.c
> +++ b/controller/route-exchange.c
> @@ -51,6 +51,7 @@ struct route_entry {
> struct hmap_node hmap_node;
>
> const struct sbrec_learned_route *sb_route;
> + bool stale;
> };
>
> static uint32_t
> @@ -87,10 +88,14 @@ maintained_route_table_add(uint32_t table_id)
>
> static void
> route_add_entry(struct hmap *routes,
> - const struct sbrec_learned_route *sb_route)
> + const struct sbrec_learned_route *sb_route,
> + bool stale)
> {
> struct route_entry *route_e = xmalloc(sizeof *route_e);
> - route_e->sb_route = sb_route;
> + *route_e = (struct route_entry) {
> + .sb_route = sb_route,
> + .stale = stale,
> + };
>
> uint32_t hash = uuid_hash(&sb_route->datapath->header_.uuid);
> hash = hash_string(sb_route->logical_port->logical_port, hash);
> @@ -157,7 +162,7 @@ sb_sync_learned_routes(const struct vector
> *learned_routes,
> sb_route->logical_port->logical_port)) {
> continue;
> }
> - route_add_entry(&sync_routes, sb_route);
> + route_add_entry(&sync_routes, sb_route, true);
> }
> sbrec_learned_route_index_destroy_row(filter);
>
> @@ -185,8 +190,7 @@ sb_sync_learned_routes(const struct vector
> *learned_routes,
> route_e = route_lookup(&sync_routes, datapath,
> logical_port, ip_prefix, nexthop);
> if (route_e) {
> - hmap_remove(&sync_routes, &route_e->hmap_node);
> - free(route_e);
> + route_e->stale = false;
> } else {
> if (!ovnsb_idl_txn) {
> *sb_changes_pending = true;
> @@ -197,6 +201,8 @@ sb_sync_learned_routes(const struct vector
> *learned_routes,
> sbrec_learned_route_set_logical_port(sb_route, logical_port);
> sbrec_learned_route_set_ip_prefix(sb_route, ip_prefix);
> sbrec_learned_route_set_nexthop(sb_route, nexthop);
> +
> + route_add_entry(&sync_routes, sb_route, false);
> }
> }
> free(ip_prefix);
> @@ -204,7 +210,9 @@ sb_sync_learned_routes(const struct vector
> *learned_routes,
> }
>
> HMAP_FOR_EACH_POP (route_e, hmap_node, &sync_routes) {
> - sbrec_learned_route_delete(route_e->sb_route);
> + if (route_e->stale) {
> + sbrec_learned_route_delete(route_e->sb_route);
> + }
> free(route_e);
> }
> hmap_destroy(&sync_routes);
> diff --git a/tests/system-ovn.at b/tests/system-ovn.at
> index dbf3751ec..76f73d96e 100644
> --- a/tests/system-ovn.at
> +++ b/tests/system-ovn.at
> @@ -16094,7 +16094,8 @@ wait_for_ports_up mylearninglsp
> check ovn-nbctl --wait=hv set Logical_Router_Port internet-phys \
> options:dynamic-routing-port-name=mylearninglsp
>
> -check ip route add 233.253.0.0/24 via 192.168.20.20 dev hv1-mll onlink vrf
> ovnvrf1337
> +check ip route add 233.253.0.0/24 via 192.168.20.20 dev hv1-mll onlink vrf
> ovnvrf1337 metric 30
> +check ip route add 233.253.0.0/24 via 192.168.20.20 dev hv1-mll onlink vrf
> ovnvrf1337 metric 40
> check ovn-nbctl --wait=hv sync
> check_row_count Learned_Route 1 ip_prefix=233.253.0.0/24
> nexthop=192.168.20.20
>
> @@ -16106,7 +16107,8 @@ check ovn-nbctl --wait=hv set Logical_Router_Port
> internet-phys \
> OVN_CLEANUP_CONTROLLER([hv1])
> OVN_ROUTE_EQUAL([ovnvrf1337], [dnl
> 233.252.0.0/24 via 192.168.10.10 dev lo onlink
> -233.253.0.0/24 via 192.168.20.20 dev hv1-mll onlink])
> +233.253.0.0/24 via 192.168.20.20 dev hv1-mll metric 30 onlink
> +233.253.0.0/24 via 192.168.20.20 dev hv1-mll metric 40 onlink])
>
> # Starting it again will add the routes again.
> start_daemon ovn-controller
> @@ -16119,7 +16121,8 @@ blackhole 192.0.2.10 proto ovn metric 100
> blackhole 192.0.2.20 proto ovn metric 100
> blackhole 198.51.100.0/24 proto ovn metric 1000
> 233.252.0.0/24 via 192.168.10.10 dev lo onlink
> -233.253.0.0/24 via 192.168.20.20 dev hv1-mll onlink])
> +233.253.0.0/24 via 192.168.20.20 dev hv1-mll metric 30 onlink
> +233.253.0.0/24 via 192.168.20.20 dev hv1-mll metric 40 onlink])
>
> # Changing the vrf name will switch to the new one.
> # The old vrf will be removed.
> @@ -16136,7 +16139,8 @@ blackhole 192.0.2.10 proto ovn metric 100
> blackhole 192.0.2.20 proto ovn metric 100
> blackhole 198.51.100.0/24 proto ovn metric 1000
> 233.252.0.0/24 via 192.168.10.10 dev lo onlink
> -233.253.0.0/24 via 192.168.20.20 dev hv1-mll onlink])
> +233.253.0.0/24 via 192.168.20.20 dev hv1-mll metric 30 onlink
> +233.253.0.0/24 via 192.168.20.20 dev hv1-mll metric 40 onlink])
>
> # Stopping with --restart will not touch the routes.
> OVN_CONTROLLER_EXIT([],[--restart])
> @@ -16148,7 +16152,8 @@ blackhole 192.0.2.10 proto ovn metric 100
> blackhole 192.0.2.20 proto ovn metric 100
> blackhole 198.51.100.0/24 proto ovn metric 1000
> 233.252.0.0/24 via 192.168.10.10 dev lo onlink
> -233.253.0.0/24 via 192.168.20.20 dev hv1-mll onlink])
> +233.253.0.0/24 via 192.168.20.20 dev hv1-mll metric 30 onlink
> +233.253.0.0/24 via 192.168.20.20 dev hv1-mll metric 40 onlink])
>
> # When we now stop the ovn-controller it will remove the VRF.
> start_daemon ovn-controller
> @@ -16179,7 +16184,8 @@ blackhole 192.0.2.20 proto ovn metric 100
> blackhole 192.0.2.21 proto ovn metric 100
> blackhole 198.51.100.0/24 proto ovn metric 1000
> 233.252.0.0/24 via 192.168.10.10 dev lo onlink
> -233.253.0.0/24 via 192.168.20.20 dev hv1-mll onlink])
> +233.253.0.0/24 via 192.168.20.20 dev hv1-mll metric 30 onlink
> +233.253.0.0/24 via 192.168.20.20 dev hv1-mll metric 40 onlink])
>
> # Bind "vip" port locally and check the virtual IP is added in the VRF.
> NS_EXEC([vif4], [arping -U -c 1 -w 2 -I vif4 192.0.2.30])
> @@ -16195,7 +16201,8 @@ blackhole 192.0.2.21 proto ovn metric 100
> blackhole 192.0.2.30 proto ovn metric 100
> blackhole 198.51.100.0/24 proto ovn metric 1000
> 233.252.0.0/24 via 192.168.10.10 dev lo onlink
> -233.253.0.0/24 via 192.168.20.20 dev hv1-mll onlink])
> +233.253.0.0/24 via 192.168.20.20 dev hv1-mll metric 30 onlink
> +233.253.0.0/24 via 192.168.20.20 dev hv1-mll metric 40 onlink])
>
> check ovn-sbctl clear Port_Binding vip virtual-parent
> OVN_ROUTE_EQUAL([ovnvrf1338], [dnl
> @@ -16207,7 +16214,8 @@ blackhole 192.0.2.20 proto ovn metric 100
> blackhole 192.0.2.21 proto ovn metric 100
> blackhole 198.51.100.0/24 proto ovn metric 1000
> 233.252.0.0/24 via 192.168.10.10 dev lo onlink
> -233.253.0.0/24 via 192.168.20.20 dev hv1-mll onlink])
> +233.253.0.0/24 via 192.168.20.20 dev hv1-mll metric 30 onlink
> +233.253.0.0/24 via 192.168.20.20 dev hv1-mll metric 40 onlink])
>
> # Remove the backoff period, so we can bind it right away.
> check ovn-sbctl remove Port_Binding vip options vport-backoff
> @@ -16225,7 +16233,8 @@ blackhole 192.0.2.21 proto ovn metric 100
> blackhole 192.0.2.30 proto ovn metric 100
> blackhole 198.51.100.0/24 proto ovn metric 1000
> 233.252.0.0/24 via 192.168.10.10 dev lo onlink
> -233.253.0.0/24 via 192.168.20.20 dev hv1-mll onlink])
> +233.253.0.0/24 via 192.168.20.20 dev hv1-mll metric 30 onlink
> +233.253.0.0/24 via 192.168.20.20 dev hv1-mll metric 40 onlink])
>
> # Simulate "vip" bound to a different chassis.
> check ovn-sbctl clear Port_Binding vip virtual-parent
> @@ -16240,7 +16249,8 @@ blackhole 192.0.2.20 proto ovn metric 100
> blackhole 192.0.2.21 proto ovn metric 100
> blackhole 198.51.100.0/24 proto ovn metric 1000
> 233.252.0.0/24 via 192.168.10.10 dev lo onlink
> -233.253.0.0/24 via 192.168.20.20 dev hv1-mll onlink])
> +233.253.0.0/24 via 192.168.20.20 dev hv1-mll metric 30 onlink
> +233.253.0.0/24 via 192.168.20.20 dev hv1-mll metric 40 onlink])
>
> # Check with dynamic-routing-redistribute-local-only=false.
> check ovn-nbctl --wait=hv set logical_router_port internet-public \
> @@ -16256,7 +16266,8 @@ blackhole 192.0.2.22 proto ovn metric 1000
> blackhole 192.0.2.30 proto ovn metric 1000
> blackhole 198.51.100.0/24 proto ovn metric 1000
> 233.252.0.0/24 via 192.168.10.10 dev lo onlink
> -233.253.0.0/24 via 192.168.20.20 dev hv1-mll onlink])
> +233.253.0.0/24 via 192.168.20.20 dev hv1-mll metric 30 onlink
> +233.253.0.0/24 via 192.168.20.20 dev hv1-mll metric 40 onlink])
>
> # Remove the backoff period, so we can bind it right away.
> check ovn-sbctl remove Port_Binding vip options vport-backoff
> @@ -16275,7 +16286,8 @@ blackhole 192.0.2.22 proto ovn metric 1000
> blackhole 192.0.2.30 proto ovn metric 100
> blackhole 198.51.100.0/24 proto ovn metric 1000
> 233.252.0.0/24 via 192.168.10.10 dev lo onlink
> -233.253.0.0/24 via 192.168.20.20 dev hv1-mll onlink])
> +233.253.0.0/24 via 192.168.20.20 dev hv1-mll metric 30 onlink
> +233.253.0.0/24 via 192.168.20.20 dev hv1-mll metric 40 onlink])
>
> OVN_CLEANUP_CONTROLLER([hv1])
> AT_CHECK([ip vrf | grep -q ovnvrf1338], [1], [])
> @@ -16575,7 +16587,8 @@ wait_for_ports_up mylearninglsp
> check ovn-nbctl --wait=hv set Logical_Router_Port internet-phys \
> options:dynamic-routing-port-name=mylearninglsp
>
> -check ip route add 233.253.0.0/24 via 192.168.20.20 dev hv1-mll onlink vrf
> ovnvrf1337
> +check ip route add 233.253.0.0/24 via 192.168.20.20 dev hv1-mll onlink vrf
> ovnvrf1337 metric 30
> +check ip route add 233.253.0.0/24 via 192.168.20.20 dev hv1-mll onlink vrf
> ovnvrf1337 metric 40
> check ovn-nbctl --wait=hv sync
> check_row_count Learned_Route 1 ip_prefix=233.253.0.0/24
> nexthop=192.168.20.20
>
> @@ -16587,7 +16600,8 @@ check ovn-nbctl --wait=hv set Logical_Router_Port
> internet-phys \
> OVN_CLEANUP_CONTROLLER([hv1])
> OVN_ROUTE_EQUAL([ovnvrf1337], [dnl
> 233.252.0.0/24 via 192.168.10.10 dev lo onlink
> -233.253.0.0/24 via 192.168.20.20 dev hv1-mll onlink])
> +233.253.0.0/24 via 192.168.20.20 dev hv1-mll metric 30 onlink
> +233.253.0.0/24 via 192.168.20.20 dev hv1-mll metric 40 onlink])
>
> # Starting it again will add the routes again.
> start_daemon ovn-controller
> @@ -16600,7 +16614,8 @@ blackhole 192.0.2.10 proto ovn metric 100
> blackhole 192.0.2.20 proto ovn metric 100
> blackhole 198.51.100.0/24 proto ovn metric 1000
> 233.252.0.0/24 via 192.168.10.10 dev lo onlink
> -233.253.0.0/24 via 192.168.20.20 dev hv1-mll onlink])
> +233.253.0.0/24 via 192.168.20.20 dev hv1-mll metric 30 onlink
> +233.253.0.0/24 via 192.168.20.20 dev hv1-mll metric 40 onlink])
>
> # Stopping with --restart will not touch the routes.
> OVN_CONTROLLER_EXIT([],[--restart])
> @@ -16612,7 +16627,8 @@ blackhole 192.0.2.10 proto ovn metric 100
> blackhole 192.0.2.20 proto ovn metric 100
> blackhole 198.51.100.0/24 proto ovn metric 1000
> 233.252.0.0/24 via 192.168.10.10 dev lo onlink
> -233.253.0.0/24 via 192.168.20.20 dev hv1-mll onlink])
> +233.253.0.0/24 via 192.168.20.20 dev hv1-mll metric 30 onlink
> +233.253.0.0/24 via 192.168.20.20 dev hv1-mll metric 40 onlink])
>
> # Now we set maintain-vrf again and stop the ovn-controller.
> # It will then remove the VRF.
> --
> 2.51.1
>
> _______________________________________________
> dev mailing list
> [email protected]
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev