From: Petr Machata <m...@pmachata.org> From: Ido Schimmel <ido...@nvidia.com>
Add ability to configure resilient nexthop groups and show their current configuration. Example: # ip nexthop add id 10 group 1/2 type resilient buckets 8 # ip nexthop show id 10 id 10 group 1/2 type resilient buckets 8 idle_timer 120 unbalanced_timer 0 # ip -j -p nexthop show id 10 [ { "id": 10, "group": [ { "id": 1 },{ "id": 2 } ], "type": "resilient", "resilient_args": { "buckets": 8, "idle_timer": 120, "unbalanced_timer": 0 }, "flags": [ ] } ] Signed-off-by: Ido Schimmel <ido...@nvidia.com> --- ip/ipnexthop.c | 144 +++++++++++++++++++++++++++++++++++++++++- man/man8/ip-nexthop.8 | 55 +++++++++++++++- 2 files changed, 193 insertions(+), 6 deletions(-) diff --git a/ip/ipnexthop.c b/ip/ipnexthop.c index 5aae32629edd..1d50bf7529c4 100644 --- a/ip/ipnexthop.c +++ b/ip/ipnexthop.c @@ -43,9 +43,12 @@ static void usage(void) " [ groups ] [ fdb ]\n" "NH := { blackhole | [ via ADDRESS ] [ dev DEV ] [ onlink ]\n" " [ encap ENCAPTYPE ENCAPHDR ] |\n" - " group GROUP [ fdb ] [ type TYPE ] }\n" + " group GROUP [ fdb ] [ type TYPE [ TYPE_ARGS ] ] }\n" "GROUP := [ <id[,weight]>/<id[,weight]>/... ]\n" - "TYPE := { mpath }\n" + "TYPE := { mpath | resilient }\n" + "TYPE_ARGS := [ RESILIENT_ARGS ]\n" + "RESILIENT_ARGS := [ buckets BUCKETS ] [ idle_timer IDLE ]\n" + " [ unbalanced_timer UNBALANCED ]\n" "ENCAPTYPE := [ mpls ]\n" "ENCAPHDR := [ MPLSLABEL ]\n"); exit(-1); @@ -203,6 +206,66 @@ static void print_nh_group(FILE *fp, const struct rtattr *grps_attr) close_json_array(PRINT_JSON, NULL); } +static const char *nh_group_type_name(__u16 type) +{ + switch (type) { + case NEXTHOP_GRP_TYPE_MPATH: + return "mpath"; + case NEXTHOP_GRP_TYPE_RES: + return "resilient"; + default: + return "<unknown type>"; + } +} + +static void print_nh_group_type(FILE *fp, const struct rtattr *grp_type_attr) +{ + __u16 type = rta_getattr_u16(grp_type_attr); + + if (type == NEXTHOP_GRP_TYPE_MPATH) + /* Do not print type in order not to break existing output. */ + return; + + print_string(PRINT_ANY, "type", "type %s ", nh_group_type_name(type)); +} + +static void print_nh_res_group(FILE *fp, const struct rtattr *res_grp_attr) +{ + struct rtattr *tb[NHA_RES_GROUP_MAX + 1]; + struct rtattr *rta; + struct timeval tv; + + parse_rtattr_nested(tb, NHA_RES_GROUP_MAX, res_grp_attr); + + open_json_object("resilient_args"); + + if (tb[NHA_RES_GROUP_BUCKETS]) + print_uint(PRINT_ANY, "buckets", "buckets %u ", + rta_getattr_u16(tb[NHA_RES_GROUP_BUCKETS])); + + if (tb[NHA_RES_GROUP_IDLE_TIMER]) { + rta = tb[NHA_RES_GROUP_IDLE_TIMER]; + __jiffies_to_tv(&tv, rta_getattr_u32(rta)); + print_tv(PRINT_ANY, "idle_timer", "idle_timer %g ", &tv); + } + + if (tb[NHA_RES_GROUP_UNBALANCED_TIMER]) { + rta = tb[NHA_RES_GROUP_UNBALANCED_TIMER]; + __jiffies_to_tv(&tv, rta_getattr_u32(rta)); + print_tv(PRINT_ANY, "unbalanced_timer", "unbalanced_timer %g ", + &tv); + } + + if (tb[NHA_RES_GROUP_UNBALANCED_TIME]) { + rta = tb[NHA_RES_GROUP_UNBALANCED_TIME]; + __jiffies_to_tv(&tv, rta_getattr_u32(rta)); + print_tv(PRINT_ANY, "unbalanced_time", "unbalanced_time %g ", + &tv); + } + + close_json_object(); +} + int print_nexthop(struct nlmsghdr *n, void *arg) { struct nhmsg *nhm = NLMSG_DATA(n); @@ -229,7 +292,7 @@ int print_nexthop(struct nlmsghdr *n, void *arg) if (filter.proto && filter.proto != nhm->nh_protocol) return 0; - parse_rtattr(tb, NHA_MAX, RTM_NHA(nhm), len); + parse_rtattr_flags(tb, NHA_MAX, RTM_NHA(nhm), len, NLA_F_NESTED); open_json_object(NULL); @@ -243,6 +306,12 @@ int print_nexthop(struct nlmsghdr *n, void *arg) if (tb[NHA_GROUP]) print_nh_group(fp, tb[NHA_GROUP]); + if (tb[NHA_GROUP_TYPE]) + print_nh_group_type(fp, tb[NHA_GROUP_TYPE]); + + if (tb[NHA_RES_GROUP]) + print_nh_res_group(fp, tb[NHA_RES_GROUP]); + if (tb[NHA_ENCAP]) lwt_print_encap(fp, tb[NHA_ENCAP_TYPE], tb[NHA_ENCAP]); @@ -333,10 +402,70 @@ static int read_nh_group_type(const char *name) { if (strcmp(name, "mpath") == 0) return NEXTHOP_GRP_TYPE_MPATH; + else if (strcmp(name, "resilient") == 0) + return NEXTHOP_GRP_TYPE_RES; return __NEXTHOP_GRP_TYPE_MAX; } +static void parse_nh_group_type_res(struct nlmsghdr *n, int maxlen, int *argcp, + char ***argvp) +{ + char **argv = *argvp; + struct rtattr *nest; + int argc = *argcp; + + if (!NEXT_ARG_OK()) + return; + + nest = addattr_nest(n, maxlen, NHA_RES_GROUP); + nest->rta_type |= NLA_F_NESTED; + + NEXT_ARG_FWD(); + while (argc > 0) { + if (strcmp(*argv, "buckets") == 0) { + __u16 buckets; + + NEXT_ARG(); + if (get_u16(&buckets, *argv, 0)) + invarg("invalid buckets value", *argv); + + addattr16(n, maxlen, NHA_RES_GROUP_BUCKETS, buckets); + } else if (strcmp(*argv, "idle_timer") == 0) { + __u32 idle_timer; + + NEXT_ARG(); + if (get_unsigned(&idle_timer, *argv, 0) || + idle_timer >= ~0UL / 100) + invarg("invalid idle timer value", *argv); + + addattr32(n, maxlen, NHA_RES_GROUP_IDLE_TIMER, + idle_timer * 100); + } else if (strcmp(*argv, "unbalanced_timer") == 0) { + __u32 unbalanced_timer; + + NEXT_ARG(); + if (get_unsigned(&unbalanced_timer, *argv, 0) || + unbalanced_timer >= ~0UL / 100) + invarg("invalid unbalanced timer value", *argv); + + addattr32(n, maxlen, NHA_RES_GROUP_UNBALANCED_TIMER, + unbalanced_timer * 100); + } else { + break; + } + argc--; argv++; + } + + /* argv is currently the first unparsed argument, but ipnh_modify() + * will move to the next, so step back. + */ + *argcp = argc + 1; + *argvp = argv - 1; + + addattr_nest_end(n, nest); +} + static void parse_nh_group_type(struct nlmsghdr *n, int maxlen, int *argcp, char ***argvp) { @@ -349,6 +478,15 @@ static void parse_nh_group_type(struct nlmsghdr *n, int maxlen, int *argcp, if (type > NEXTHOP_GRP_TYPE_MAX) invarg("\"type\" value is invalid\n", *argv); + switch (type) { + case NEXTHOP_GRP_TYPE_MPATH: + /* No additional arguments */ + break; + case NEXTHOP_GRP_TYPE_RES: + parse_nh_group_type_res(n, maxlen, &argc, &argv); + break; + } + *argcp = argc; *argvp = argv; diff --git a/man/man8/ip-nexthop.8 b/man/man8/ip-nexthop.8 index f02e0555a000..c68fcc0f9cf5 100644 --- a/man/man8/ip-nexthop.8 +++ b/man/man8/ip-nexthop.8 @@ -56,7 +56,7 @@ ip-nexthop \- nexthop object management .IR GROUP " [ " .BR fdb " ] [ " .B type -.IR TYPE " ] } " +.IR TYPE " [ " TYPE_ARGS " ] ] }" .ti -8 .IR ENCAP " := [ " @@ -75,7 +75,20 @@ ip-nexthop \- nexthop object management .ti -8 .IR TYPE " := { " -.BR mpath " }" +.BR mpath " | " resilient " }" + +.ti -8 +.IR TYPE_ARGS " := [ " +.IR RESILIENT_ARGS " ] " + +.ti -8 +.IR RESILIENT_ARGS " := " +.RB "[ " buckets +.IR BUCKETS " ] [ " +.B idle_timer +.IR IDLE " ] [ " +.B unbalanced_timer +.IR UNBALANCED " ]" .SH DESCRIPTION .B ip nexthop @@ -128,7 +141,7 @@ is a set of encapsulation attributes specific to the .in -2 .TP -.BI group " GROUP [ " type " TYPE ]" +.BI group " GROUP [ " type " TYPE [ TYPE_ARGS ] ]" create a nexthop group. Group specification is id with an optional weight (id,weight) and a '/' as a separator between entries. .sp @@ -138,6 +151,37 @@ is a string specifying the nexthop group type. Namely: .in +8 .BI mpath - multipath nexthop group +.sp +.BI resilient +- resilient nexthop group. Group is resilient to addition and deletion of +nexthops + +.sp +.in -8 +.I TYPE_ARGS +is a set of attributes specific to the +.I TYPE. + +.in +8 +.B resilient +.in +2 +.B buckets +.I BUCKETS +- Number of nexthop buckets. Cannot be changed for an existing group +.sp + +.B idle_timer +.I IDLE +- Time in seconds in which a nexthop bucket does not see traffic and is +therefore considered idle. Default is 120 seconds + +.B unbalanced_timer +.I UNBALANCED +- Time in seconds in which a nexthop group is unbalanced and is therefore +considered unbalanced. The kernel will try to rebalance unbalanced groups, which +might result in some flows being reset. A value of 0 means that no +rebalancing will take place. Default is 0 seconds +.in -2 .TP .B blackhole @@ -224,6 +268,11 @@ ip nexthop add id 7 group 5/6 fdb Adds a fdb nexthop group with id 7. A fdb nexthop group can only have fdb nexthops. .RE +.PP +ip nexthop add id 10 group 1/2 type resilient buckets 32 +.RS 4 +Add a resilient nexthop group with id 10 and 32 nexthop buckets. +.RE .SH SEE ALSO .br .BR ip (8) -- 2.26.2