On Tue, Jun 20, 2023 at 05:31:34PM +0100, Stuart Henderson wrote:
> This hasn't blown up yet... any interest?

Some comments to the kroute.c changes below. Everything else is fine.
 
> ----- Forwarded message from Stuart Henderson <s...@spacehopper.org> -----
> 
> From: Stuart Henderson <s...@spacehopper.org>
> Date: Fri, 26 May 2023 14:40:45 +0100
> To: tech@openbsd.org
> Subject: ospf6d fib reload [Re: bgpd fix for possible crash in SE]
> Mail-Followup-To: tech@openbsd.org
> 
> On 2023/05/26 13:52, Stuart Henderson wrote:
> > I think my main issues come around LS_REFRESH_TIME intervals, when
> > there's loads of churn and "ospf6d: ospf engine" can be busy for
> > minutes at a time (not always, but very often). Don't know if that rings
> > any bells for anyone... (I am now reminded that RTM_DESYNC isn't handled
> > by ospf6d which probably doesn't help matters).
> 
> Here's a first attempt at porting the fib reload/desync diffs from
> ospfd to ospf6d ... Not sure if it's good yet, but it didn't immediately
> crash and burn when I ran "ospf6ctl fib reload", at least.
> 

...

> Index: ospf6d/kroute.c
> ===================================================================
> RCS file: /cvs/src/usr.sbin/ospf6d/kroute.c,v
> retrieving revision 1.67
> diff -u -p -r1.67 kroute.c
> --- ospf6d/kroute.c   8 Mar 2023 04:43:14 -0000       1.67
> +++ ospf6d/kroute.c   26 May 2023 13:37:55 -0000
> @@ -45,16 +45,22 @@ struct {
>       u_int32_t               rtseq;
>       pid_t                   pid;
>       int                     fib_sync;
> +     int                     fib_serial;
>       u_int8_t                fib_prio;
>       int                     fd;
> -     struct event            ev;
> +     struct event            ev, reload;

Please put reload on its own line.

>       u_int                   rdomain;
> +#define KR_RELOAD_IDLE 0
> +#define KR_RELOAD_FETCH        1
> +#define KR_RELOAD_HOLD 2
> +     int                     reload_state;
>  } kr_state;
>  
>  struct kroute_node {
>       RB_ENTRY(kroute_node)    entry;
>       struct kroute_node      *next;
>       struct kroute            r;
> +     int                      serial;
>  };
>  
>  void kr_redist_remove(struct kroute_node *, struct kroute_node *);
> @@ -90,7 +96,10 @@ void               if_announce(void *);
>  int          send_rtmsg(int, int, struct kroute *);
>  int          dispatch_rtmsg(void);
>  int          fetchtable(void);
> -int          rtmsg_process(char *, size_t); 
> +int          refetchtable(void);
> +int          rtmsg_process(char *, size_t);
> +void         kr_fib_reload_timer(int, short, void *);
> +void         kr_fib_reload_arm_timer(int);
>  
>  RB_HEAD(kroute_tree, kroute_node)    krt;
>  RB_PROTOTYPE(kroute_tree, kroute_node, entry, kroute_compare)
> @@ -165,6 +174,9 @@ kr_init(int fs, u_int rdomain, int redis
>           kr_dispatch_msg, NULL);
>       event_add(&kr_state.ev, NULL);
>  
> +     kr_state.reload_state = KR_RELOAD_IDLE;
> +     evtimer_set(&kr_state.reload, kr_fib_reload_timer, NULL);
> +
>       return (0);
>  }
>  
> @@ -374,6 +386,62 @@ kr_fib_decouple(void)
>  }
>  
>  void
> +kr_fib_reload_timer(int fd, short event, void *bula)
> +{
> +     if (kr_state.reload_state == KR_RELOAD_FETCH) {
> +             kr_fib_reload();
> +             kr_state.reload_state = KR_RELOAD_HOLD;
> +             kr_fib_reload_arm_timer(KR_RELOAD_HOLD_TIMER);
> +     } else {
> +             kr_state.reload_state = KR_RELOAD_IDLE;
> +     }
> +}
> +
> +void
> +kr_fib_reload_arm_timer(int delay)
> +{
> +     struct timeval          tv;
> +
> +     timerclear(&tv);
> +     tv.tv_sec = delay / 1000;
> +     tv.tv_usec = (delay % 1000) * 1000;
> +
> +     if (evtimer_add(&kr_state.reload, &tv) == -1)
> +             fatal("add_reload_timer");
> +}
> +
> +void
> +kr_fib_reload(void)
> +{
> +     struct kroute_node      *krn, *kr, *kn;
> +

Maybe include the:

        log_info("reloading interface list and routing table");

line from ospfd/kroute.c here as well.

> +     kr_state.fib_serial++;
> +
> +     if (fetchifs(0) != 0 || fetchtable() != 0)
> +             return;
> +
> +     for (kr = RB_MIN(kroute_tree, &krt); kr != NULL; kr = krn) {
> +             krn = RB_NEXT(kroute_tree, &krt, kr);
> +
> +             do {
> +                     kn = kr->next;
> +
> +                     if (kr->serial != kr_state.fib_serial) {
> +
> +                             if (kr->r.priority == RTP_OSPF) {

Here you should use kr_state.fib_prio instead of RTP_OSPF.

> +                                     kr->serial = kr_state.fib_serial;
> +                                     if (send_rtmsg(kr_state.fd,
> +                                         RTM_ADD, &kr->r) != 0)
> +                                             break;
> +                             } else
> +                                     kroute_remove(kr);
> +                     }
> +
> +             } while ((kr = kn) != NULL);
> +     }
> +}
> +
> +void
>  kr_fib_update_prio(u_int8_t fib_prio)
>  {
>       struct kroute_node      *kr;
> @@ -664,6 +732,8 @@ kroute_insert(struct kroute_node *kr)
>  {
>       struct kroute_node      *krm, *krh;
>  
> +     kr->serial = kr_state.fib_serial;
> +
>       if ((krh = RB_INSERT(kroute_tree, &krt, kr)) != NULL) {
>               /*
>                * Multipath route, add at end of list.
> @@ -1279,7 +1349,7 @@ rtmsg_process(char *buf, size_t len)
>       int                      flags, mpath;
>       unsigned int             scope;
>       u_short                  ifindex = 0;
> -     int                      rv;
> +     int                      rv, delay;
>       size_t                   offset;
>       char                    *next;
>  
> @@ -1395,13 +1465,10 @@ rtmsg_process(char *buf, size_t len)
>  
>                       if ((okr = kroute_find(&prefix, prefixlen, prio))
>                           != NULL) {
> -                             /* just add new multipath routes */
> -                             if (mpath && rtm->rtm_type == RTM_ADD)
> -                                     goto add;
> -                             /* get the correct route */
>                               kr = okr;
> -                             if (mpath && (kr = kroute_matchgw(okr,
> -                                 &nexthop, scope)) == NULL) {
> +                             if ((mpath || prio == kr_state.fib_prio) &&
> +                                 (kr = kroute_matchgw(okr, &nexthop, scope)) 
> ==
> +                                 NULL) {
>                                       log_warnx("rtmsg_process: mpath route"
>                                           " not found");
>                                       /* add routes we missed out earlier */
> @@ -1432,13 +1499,15 @@ rtmsg_process(char *buf, size_t len)
>                                       kr->r.flags |= F_DOWN;
>  
>                               /* just readd, the RDE will care */
> -                             kr_redistribute(okr);
> +                             okr->serial = kr_state.fib_serial;
> +                             kr_redistribute(kr);

I think this should be:
                                kr->serial = kr_state.fib_serial;
                                kr_redistribute(okr);

kr_redistribute() needs to be called with the head kroute but the serial
needs to be set on the route itself.

>                       } else {
>  add:
>                               if ((kr = calloc(1,
>                                   sizeof(struct kroute_node))) == NULL) {
>                                       log_warn("rtmsg_process calloc");
> -                                     return (-1);
> +                                     rv = -1;
> +                                     break;

This change is wrong. This must remain as return (-1);

>                               }
>                               kr->r.prefix = prefix;
>                               kr->r.prefixlen = prefixlen;
> @@ -1517,6 +1586,23 @@ add:
>                       break;
>               case RTM_IFANNOUNCE:
>                       if_announce(next);
> +                     break;
> +             case RTM_DESYNC:
> +                     /*
> +                      * We lost some routing packets. Schedule a reload
> +                      * of the kernel route/interface information.
> +                      */
> +                     if (kr_state.reload_state == KR_RELOAD_IDLE) {
> +                             delay = KR_RELOAD_TIMER;
> +                             log_info("desync; scheduling fib reload");
> +                     } else {
> +                             delay = KR_RELOAD_HOLD_TIMER;
> +                             log_debug("desync during KR_RELOAD_%s",
> +                                 kr_state.reload_state ==
> +                                 KR_RELOAD_FETCH ? "FETCH" : "HOLD");
> +                     }
> +                     kr_state.reload_state = KR_RELOAD_FETCH;
> +                     kr_fib_reload_arm_timer(delay);
>                       break;
>               default:
>                       /* ignore for now */

As said above everything else is OK.

-- 
:wq Claudio

Reply via email to