--- The FIB walks in babel_reconfigure_iface are a bit ugly, I tried to loop over ifa->neigh_list instead but couldn't get it to work. I'm open to suggestions :)
doc/bird.sgml | 8 +++++ proto/babel/babel.c | 70 +++++++++++++++++++++++++++++++++++++------- proto/babel/babel.h | 3 ++ proto/babel/config.Y | 3 ++ 4 files changed, 73 insertions(+), 11 deletions(-) diff --git a/doc/bird.sgml b/doc/bird.sgml index 1580facd..5e85d8ec 100644 --- a/doc/bird.sgml +++ b/doc/bird.sgml @@ -1879,6 +1879,7 @@ protocol babel [<name>] { check link <switch>; next hop ipv4 <address>; next hop ipv6 <address>; + ecmp <switch> [limit <num>]; authentication none|mac [permissive]; password "<text>"; password "<text>" { @@ -1983,6 +1984,13 @@ protocol babel [<name>] { source for Babel packets will be used. In normal operation, it should not be necessary to set this option. + <tag><label id="babel-ecmp">ecmp <m>switch</m> [limit <m>number</m>]</tag> + + Determines whether babel will emit ECMP (equal-cost multipath) + routes, allowing to load-balancing traffic across multiple paths. If + enabled the maximum number of next-hops to allow can be specified, + defaulting to 16. + <tag><label id="babel-authentication">authentication none|mac [permissive]</tag> Selects authentication method to be used. <cf/none/ means that packets are not authenticated at all, <cf/mac/ means MAC authentication is diff --git a/proto/babel/babel.c b/proto/babel/babel.c index 4a7d550f..4d9c657c 100644 --- a/proto/babel/babel.c +++ b/proto/babel/babel.c @@ -623,6 +623,34 @@ done: } } +/** + * babel_nexthop_insert - add next_hop of route to nexthop list + * @p: Babel protocol instance + * @r: Babel route + * @nhs: nexthop list head to append onto + * @nh: freshly allocated buffer to fill + */ +static void +babel_nexthop_insert( + struct babel_proto *p, + struct babel_route *r, + struct nexthop **nhs, + struct nexthop *nh) +{ + nh->gw = r->next_hop; + nh->iface = r->neigh->ifa->iface; + + /* + * If we cannot find a reachable neighbour, set the entry to be onlink. This + * makes it possible to, e.g., assign /32 addresses on a mesh interface and + * have routing work. + */ + if (!neigh_find(&p->p, r->next_hop, r->neigh->ifa->iface, 0)) + nh->flags = RNF_ONLINK; + + nexthop_insert(nhs, nh); +} + /** * babel_announce_rte - announce selected route to the core * @p: Babel protocol instance @@ -635,6 +663,7 @@ done: static void babel_announce_rte(struct babel_proto *p, struct babel_entry *e) { + struct babel_config *cf = (void *) p->p.cf; struct babel_route *r = e->selected; struct channel *c = (e->n.addr->type == NET_IP4) ? p->ip4_channel : p->ip6_channel; @@ -645,18 +674,24 @@ babel_announce_rte(struct babel_proto *p, struct babel_entry *e) .source = RTS_BABEL, .scope = SCOPE_UNIVERSE, .dest = RTD_UNICAST, - .from = r->neigh->addr, - .nh.gw = r->next_hop, - .nh.iface = r->neigh->ifa->iface, }; - /* - * If we cannot find a reachable neighbour, set the entry to be onlink. This - * makes it possible to, e.g., assign /32 addresses on a mesh interface and - * have routing work. - */ - if (!neigh_find(&p->p, r->next_hop, r->neigh->ifa->iface, 0)) - a0.nh.flags = RNF_ONLINK; + struct nexthop *nhs = NULL; + babel_nexthop_insert(p, r, &nhs, allocz(sizeof(struct nexthop))); + int num_nexthops = 1; + + struct babel_route *cr; + WALK_LIST(cr, e->routes) { + if (num_nexthops++ >= cf->ecmp) + break; + + if (cr == r || !cr->feasible || cr->metric != r->metric) + continue; + + babel_nexthop_insert(p, cr, &nhs, allocz(sizeof(struct nexthop))); + } + + a0.nh = *nhs; rta *a = rta_lookup(&a0); rte *rte = rte_get_temp(a); @@ -736,6 +771,7 @@ babel_announce_retraction(struct babel_proto *p, struct babel_entry *e) static void babel_select_route(struct babel_proto *p, struct babel_entry *e, struct babel_route *mod) { + struct babel_config *cf = (void *) p->p.cf; struct babel_route *r, *best = e->selected; /* Shortcut if only non-best was modified */ @@ -744,8 +780,10 @@ babel_select_route(struct babel_proto *p, struct babel_entry *e, struct babel_ro /* Either select modified route, or keep old best route */ if ((mod->metric < (best ? best->metric : BABEL_INFINITY)) && mod->feasible) best = mod; - else + else if (cf->ecmp==1) return; + /* With ecmp one of the non-selected but equal metric routes might have + * changed so contine on with the announcement in that case. */ } else { @@ -1879,6 +1917,16 @@ babel_reconfigure_iface(struct babel_proto *p, struct babel_iface *ifa, struct b if (ifa->up) babel_iface_kick_timer(ifa); + /* Update all routes to refresh ecmp settings. */ + FIB_WALK(&p->ip6_rtable, struct babel_entry, e) { + babel_select_route(p, e, NULL); + } + FIB_WALK_END; + FIB_WALK(&p->ip4_rtable, struct babel_entry, e) { + babel_select_route(p, e, NULL); + } + FIB_WALK_END; + return 1; } diff --git a/proto/babel/babel.h b/proto/babel/babel.h index 84feb085..3868dcb2 100644 --- a/proto/babel/babel.h +++ b/proto/babel/babel.h @@ -62,6 +62,8 @@ #define BABEL_OVERHEAD (IP6_HEADER_LENGTH+UDP_HEADER_LENGTH) #define BABEL_MIN_MTU (512 + BABEL_OVERHEAD) +#define BABEL_DEFAULT_ECMP_LIMIT 16 + #define BABEL_AUTH_NONE 0 #define BABEL_AUTH_MAC 1 @@ -120,6 +122,7 @@ struct babel_config { list iface_list; /* List of iface configs (struct babel_iface_config) */ uint hold_time; /* Time to hold stale entries and unreachable routes */ u8 randomize_router_id; + u8 ecmp; /* Maximum number of nexthops to install */ struct channel_config *ip4_channel; struct channel_config *ip6_channel; diff --git a/proto/babel/config.Y b/proto/babel/config.Y index 05210fa4..8f18c790 100644 --- a/proto/babel/config.Y +++ b/proto/babel/config.Y @@ -36,6 +36,7 @@ babel_proto_start: proto_start BABEL this_proto = proto_config_new(&proto_babel, $1); init_list(&BABEL_CFG->iface_list); BABEL_CFG->hold_time = 1 S_; + BABEL_CFG->ecmp = 1; }; babel_proto_item: @@ -43,6 +44,8 @@ babel_proto_item: | proto_channel | INTERFACE babel_iface | RANDOMIZE ROUTER ID bool { BABEL_CFG->randomize_router_id = $4; } + | ECMP bool { BABEL_CFG->ecmp = $2 ? BABEL_DEFAULT_ECMP_LIMIT : 1; } + | ECMP bool LIMIT expr { BABEL_CFG->ecmp = $2 ? $4 : 1; } ; babel_proto_opts: -- 2.30.2