Re: [s...@spacehopper.org: ospf6d fib reload [Re: bgpd fix for possible crash in SE]]
On Tue, Jun 20, 2023 at 05:31:34PM +0100, Stuart Henderson wrote: > This hasn't blown up yet... any interest? Some comments to the kroute.c changes below. Everything else is fine. > - Forwarded message from Stuart Henderson - > > From: Stuart Henderson > Date: Fri, 26 May 2023 14:40:45 +0100 > To: tech@openbsd.org > Subject: ospf6d fib reload [Re: bgpd fix for possible crash in SE] > Mail-Followup-To: tech@openbsd.org > > On 2023/05/26 13:52, Stuart Henderson wrote: > > I think my main issues come around LS_REFRESH_TIME intervals, when > > there's loads of churn and "ospf6d: ospf engine" can be busy for > > minutes at a time (not always, but very often). Don't know if that rings > > any bells for anyone... (I am now reminded that RTM_DESYNC isn't handled > > by ospf6d which probably doesn't help matters). > > Here's a first attempt at porting the fib reload/desync diffs from > ospfd to ospf6d ... Not sure if it's good yet, but it didn't immediately > crash and burn when I ran "ospf6ctl fib reload", at least. > ... > Index: ospf6d/kroute.c > === > RCS file: /cvs/src/usr.sbin/ospf6d/kroute.c,v > retrieving revision 1.67 > diff -u -p -r1.67 kroute.c > --- ospf6d/kroute.c 8 Mar 2023 04:43:14 - 1.67 > +++ ospf6d/kroute.c 26 May 2023 13:37:55 - > @@ -45,16 +45,22 @@ struct { > u_int32_t rtseq; > pid_t pid; > int fib_sync; > + int fib_serial; > u_int8_tfib_prio; > int fd; > - struct eventev; > + struct eventev, reload; Please put reload on its own line. > u_int rdomain; > +#define KR_RELOAD_IDLE 0 > +#define KR_RELOAD_FETCH1 > +#define KR_RELOAD_HOLD 2 > + int reload_state; > } kr_state; > > struct kroute_node { > RB_ENTRY(kroute_node)entry; > struct kroute_node *next; > struct krouter; > + int serial; > }; > > void kr_redist_remove(struct kroute_node *, struct kroute_node *); > @@ -90,7 +96,10 @@ void if_announce(void *); > int send_rtmsg(int, int, struct kroute *); > int dispatch_rtmsg(void); > int fetchtable(void); > -int rtmsg_process(char *, size_t); > +int refetchtable(void); > +int rtmsg_process(char *, size_t); > +void kr_fib_reload_timer(int, short, void *); > +void kr_fib_reload_arm_timer(int); > > RB_HEAD(kroute_tree, kroute_node)krt; > RB_PROTOTYPE(kroute_tree, kroute_node, entry, kroute_compare) > @@ -165,6 +174,9 @@ kr_init(int fs, u_int rdomain, int redis > kr_dispatch_msg, NULL); > event_add(_state.ev, NULL); > > + kr_state.reload_state = KR_RELOAD_IDLE; > + evtimer_set(_state.reload, kr_fib_reload_timer, NULL); > + > return (0); > } > > @@ -374,6 +386,62 @@ kr_fib_decouple(void) > } > > void > +kr_fib_reload_timer(int fd, short event, void *bula) > +{ > + if (kr_state.reload_state == KR_RELOAD_FETCH) { > + kr_fib_reload(); > + kr_state.reload_state = KR_RELOAD_HOLD; > + kr_fib_reload_arm_timer(KR_RELOAD_HOLD_TIMER); > + } else { > + kr_state.reload_state = KR_RELOAD_IDLE; > + } > +} > + > +void > +kr_fib_reload_arm_timer(int delay) > +{ > + struct timeval tv; > + > + timerclear(); > + tv.tv_sec = delay / 1000; > + tv.tv_usec = (delay % 1000) * 1000; > + > + if (evtimer_add(_state.reload, ) == -1) > + fatal("add_reload_timer"); > +} > + > +void > +kr_fib_reload(void) > +{ > + struct kroute_node *krn, *kr, *kn; > + Maybe include the: log_info("reloading interface list and routing table"); line from ospfd/kroute.c here as well. > + kr_state.fib_serial++; > + > + if (fetchifs(0) != 0 || fetchtable() != 0) > + return; > + > + for (kr = RB_MIN(kroute_tree, ); kr != NULL; kr = krn) { > + krn = RB_NEXT(kroute_tree, , kr); > + > + do { > + kn = kr->next; > + > + if (kr->serial != kr_state.fib_serial) { > + > + if (kr->r.priority == RTP_OSPF) { Here you should use kr_state.fib_prio instead of RTP_OSPF. > + kr->serial = kr_state.fib_serial; > +
[s...@spacehopper.org: ospf6d fib reload [Re: bgpd fix for possible crash in SE]]
This hasn't blown up yet... any interest? - Forwarded message from Stuart Henderson - From: Stuart Henderson Date: Fri, 26 May 2023 14:40:45 +0100 To: tech@openbsd.org Subject: ospf6d fib reload [Re: bgpd fix for possible crash in SE] Mail-Followup-To: tech@openbsd.org On 2023/05/26 13:52, Stuart Henderson wrote: > I think my main issues come around LS_REFRESH_TIME intervals, when > there's loads of churn and "ospf6d: ospf engine" can be busy for > minutes at a time (not always, but very often). Don't know if that rings > any bells for anyone... (I am now reminded that RTM_DESYNC isn't handled > by ospf6d which probably doesn't help matters). Here's a first attempt at porting the fib reload/desync diffs from ospfd to ospf6d ... Not sure if it's good yet, but it didn't immediately crash and burn when I ran "ospf6ctl fib reload", at least. Index: ospf6ctl/ospf6ctl.8 === RCS file: /cvs/src/usr.sbin/ospf6ctl/ospf6ctl.8,v retrieving revision 1.13 diff -u -p -r1.13 ospf6ctl.8 --- ospf6ctl/ospf6ctl.8 2 Mar 2023 17:09:53 - 1.13 +++ ospf6ctl/ospf6ctl.8 26 May 2023 13:37:55 - @@ -58,6 +58,9 @@ Remove the learned routes from the FIB. Decoupling the FIB from an OSPF router may create routing loops and could cause major routing issues in the complete OSPF cloud. Only routers with just one link to the OSPF cloud can safely decouple the FIB. +.It Cm fib reload +Refetches and relearns the routes in the Forwarding Information Base +a.k.a. the kernel routing table. .It Cm log brief Disable verbose debug logging. .It Cm log verbose Index: ospf6ctl/ospf6ctl.c === RCS file: /cvs/src/usr.sbin/ospf6ctl/ospf6ctl.c,v retrieving revision 1.53 diff -u -p -r1.53 ospf6ctl.c --- ospf6ctl/ospf6ctl.c 27 Dec 2022 12:11:39 - 1.53 +++ ospf6ctl/ospf6ctl.c 26 May 2023 13:37:55 - @@ -225,6 +225,11 @@ main(int argc, char *argv[]) printf("decouple request sent.\n"); done = 1; break; + case FIB_RELOAD: + imsg_compose(ibuf, IMSG_CTL_FIB_RELOAD, 0, 0, -1, NULL, 0); + printf("reload request sent.\n"); + done = 1; + break; case LOG_VERBOSE: verbose = 1; /* FALLTHROUGH */ @@ -304,6 +309,7 @@ main(int argc, char *argv[]) case FIB: case FIB_COUPLE: case FIB_DECOUPLE: + case FIB_RELOAD: case LOG_VERBOSE: case LOG_BRIEF: case RELOAD: Index: ospf6ctl/parser.c === RCS file: /cvs/src/usr.sbin/ospf6ctl/parser.c,v retrieving revision 1.14 diff -u -p -r1.14 parser.c --- ospf6ctl/parser.c 26 May 2019 09:27:09 - 1.14 +++ ospf6ctl/parser.c 26 May 2023 13:37:55 - @@ -73,6 +73,7 @@ static const struct token t_main[] = { static const struct token t_fib[] = { { KEYWORD, "couple", FIB_COUPLE, NULL}, { KEYWORD, "decouple", FIB_DECOUPLE, NULL}, + { KEYWORD, "reload", FIB_RELOAD, NULL}, { ENDTOKEN, "", NONE, NULL} }; Index: ospf6ctl/parser.h === RCS file: /cvs/src/usr.sbin/ospf6ctl/parser.h,v retrieving revision 1.9 diff -u -p -r1.9 parser.h --- ospf6ctl/parser.h 26 May 2019 09:27:09 - 1.9 +++ ospf6ctl/parser.h 26 May 2023 13:37:55 - @@ -29,6 +29,7 @@ enum actions { FIB, FIB_COUPLE, FIB_DECOUPLE, + FIB_RELOAD, LOG_VERBOSE, LOG_BRIEF, SHOW, Index: ospf6d/control.c === RCS file: /cvs/src/usr.sbin/ospf6d/control.c,v retrieving revision 1.31 diff -u -p -r1.31 control.c --- ospf6d/control.c8 Mar 2023 04:43:14 - 1.31 +++ ospf6d/control.c26 May 2023 13:37:55 - @@ -279,6 +279,7 @@ control_dispatch_imsg(int fd, short even case IMSG_CTL_FIB_DECOUPLE: ospfe_fib_update(imsg.hdr.type); /* FALLTHROUGH */ + case IMSG_CTL_FIB_RELOAD: case IMSG_CTL_RELOAD: c->iev.ibuf.pid = imsg.hdr.pid; ospfe_imsg_compose_parent(imsg.hdr.type, 0, NULL, 0); Index: ospf6d/kroute.c === RCS file: /cvs/src/usr.sbin/ospf6d/kroute.c,v retrieving revision 1.67 diff -u -p -r1.67 kroute.c --- ospf6d/kroute.c 8 Mar 2023 04:43:14 - 1.67 +++ ospf6d/kroute.c 26 May 2023 13:37:55 - @@ -45,16 +45,22 @
ospf6d fib reload [Re: bgpd fix for possible crash in SE]
On 2023/05/26 13:52, Stuart Henderson wrote: > I think my main issues come around LS_REFRESH_TIME intervals, when > there's loads of churn and "ospf6d: ospf engine" can be busy for > minutes at a time (not always, but very often). Don't know if that rings > any bells for anyone... (I am now reminded that RTM_DESYNC isn't handled > by ospf6d which probably doesn't help matters). Here's a first attempt at porting the fib reload/desync diffs from ospfd to ospf6d ... Not sure if it's good yet, but it didn't immediately crash and burn when I ran "ospf6ctl fib reload", at least. Index: ospf6ctl/ospf6ctl.8 === RCS file: /cvs/src/usr.sbin/ospf6ctl/ospf6ctl.8,v retrieving revision 1.13 diff -u -p -r1.13 ospf6ctl.8 --- ospf6ctl/ospf6ctl.8 2 Mar 2023 17:09:53 - 1.13 +++ ospf6ctl/ospf6ctl.8 26 May 2023 13:37:55 - @@ -58,6 +58,9 @@ Remove the learned routes from the FIB. Decoupling the FIB from an OSPF router may create routing loops and could cause major routing issues in the complete OSPF cloud. Only routers with just one link to the OSPF cloud can safely decouple the FIB. +.It Cm fib reload +Refetches and relearns the routes in the Forwarding Information Base +a.k.a. the kernel routing table. .It Cm log brief Disable verbose debug logging. .It Cm log verbose Index: ospf6ctl/ospf6ctl.c === RCS file: /cvs/src/usr.sbin/ospf6ctl/ospf6ctl.c,v retrieving revision 1.53 diff -u -p -r1.53 ospf6ctl.c --- ospf6ctl/ospf6ctl.c 27 Dec 2022 12:11:39 - 1.53 +++ ospf6ctl/ospf6ctl.c 26 May 2023 13:37:55 - @@ -225,6 +225,11 @@ main(int argc, char *argv[]) printf("decouple request sent.\n"); done = 1; break; + case FIB_RELOAD: + imsg_compose(ibuf, IMSG_CTL_FIB_RELOAD, 0, 0, -1, NULL, 0); + printf("reload request sent.\n"); + done = 1; + break; case LOG_VERBOSE: verbose = 1; /* FALLTHROUGH */ @@ -304,6 +309,7 @@ main(int argc, char *argv[]) case FIB: case FIB_COUPLE: case FIB_DECOUPLE: + case FIB_RELOAD: case LOG_VERBOSE: case LOG_BRIEF: case RELOAD: Index: ospf6ctl/parser.c === RCS file: /cvs/src/usr.sbin/ospf6ctl/parser.c,v retrieving revision 1.14 diff -u -p -r1.14 parser.c --- ospf6ctl/parser.c 26 May 2019 09:27:09 - 1.14 +++ ospf6ctl/parser.c 26 May 2023 13:37:55 - @@ -73,6 +73,7 @@ static const struct token t_main[] = { static const struct token t_fib[] = { { KEYWORD, "couple", FIB_COUPLE, NULL}, { KEYWORD, "decouple", FIB_DECOUPLE, NULL}, + { KEYWORD, "reload", FIB_RELOAD, NULL}, { ENDTOKEN, "", NONE, NULL} }; Index: ospf6ctl/parser.h === RCS file: /cvs/src/usr.sbin/ospf6ctl/parser.h,v retrieving revision 1.9 diff -u -p -r1.9 parser.h --- ospf6ctl/parser.h 26 May 2019 09:27:09 - 1.9 +++ ospf6ctl/parser.h 26 May 2023 13:37:55 - @@ -29,6 +29,7 @@ enum actions { FIB, FIB_COUPLE, FIB_DECOUPLE, + FIB_RELOAD, LOG_VERBOSE, LOG_BRIEF, SHOW, Index: ospf6d/control.c === RCS file: /cvs/src/usr.sbin/ospf6d/control.c,v retrieving revision 1.31 diff -u -p -r1.31 control.c --- ospf6d/control.c8 Mar 2023 04:43:14 - 1.31 +++ ospf6d/control.c26 May 2023 13:37:55 - @@ -279,6 +279,7 @@ control_dispatch_imsg(int fd, short even case IMSG_CTL_FIB_DECOUPLE: ospfe_fib_update(imsg.hdr.type); /* FALLTHROUGH */ + case IMSG_CTL_FIB_RELOAD: case IMSG_CTL_RELOAD: c->iev.ibuf.pid = imsg.hdr.pid; ospfe_imsg_compose_parent(imsg.hdr.type, 0, NULL, 0); Index: ospf6d/kroute.c === RCS file: /cvs/src/usr.sbin/ospf6d/kroute.c,v retrieving revision 1.67 diff -u -p -r1.67 kroute.c --- ospf6d/kroute.c 8 Mar 2023 04:43:14 - 1.67 +++ ospf6d/kroute.c 26 May 2023 13:37:55 - @@ -45,16 +45,22 @@ struct { u_int32_t rtseq; pid_t pid; int fib_sync; + int fib_serial; u_int8_tfib_prio; int fd; - struct eventev; + struct eventev, reload; u_int rdomain; +#define KR_RELOAD_IDLE 0