On 2023/05/26 13:52, Stuart Henderson wrote:
> I think my main issues come around LS_REFRESH_TIME intervals, when
> there's loads of churn and "ospf6d: ospf engine" can be busy for
> minutes at a time (not always, but very often). Don't know if that rings
> any bells for anyone... (I am now reminded that RTM_DESYNC isn't handled
> by ospf6d which probably doesn't help matters).

Here's a first attempt at porting the fib reload/desync diffs from
ospfd to ospf6d ... Not sure if it's good yet, but it didn't immediately
crash and burn when I ran "ospf6ctl fib reload", at least.

Index: ospf6ctl/ospf6ctl.8
===================================================================
RCS file: /cvs/src/usr.sbin/ospf6ctl/ospf6ctl.8,v
retrieving revision 1.13
diff -u -p -r1.13 ospf6ctl.8
--- ospf6ctl/ospf6ctl.8 2 Mar 2023 17:09:53 -0000       1.13
+++ ospf6ctl/ospf6ctl.8 26 May 2023 13:37:55 -0000
@@ -58,6 +58,9 @@ Remove the learned routes from the FIB.
 Decoupling the FIB from an OSPF router may create routing loops and could cause
 major routing issues in the complete OSPF cloud.
 Only routers with just one link to the OSPF cloud can safely decouple the FIB.
+.It Cm fib reload
+Refetches and relearns the routes in the Forwarding Information Base
+a.k.a. the kernel routing table.
 .It Cm log brief
 Disable verbose debug logging.
 .It Cm log verbose
Index: ospf6ctl/ospf6ctl.c
===================================================================
RCS file: /cvs/src/usr.sbin/ospf6ctl/ospf6ctl.c,v
retrieving revision 1.53
diff -u -p -r1.53 ospf6ctl.c
--- ospf6ctl/ospf6ctl.c 27 Dec 2022 12:11:39 -0000      1.53
+++ ospf6ctl/ospf6ctl.c 26 May 2023 13:37:55 -0000
@@ -225,6 +225,11 @@ main(int argc, char *argv[])
                printf("decouple request sent.\n");
                done = 1;
                break;
+       case FIB_RELOAD:
+               imsg_compose(ibuf, IMSG_CTL_FIB_RELOAD, 0, 0, -1, NULL, 0);
+               printf("reload request sent.\n");
+               done = 1;
+               break;
        case LOG_VERBOSE:
                verbose = 1;
                /* FALLTHROUGH */
@@ -304,6 +309,7 @@ main(int argc, char *argv[])
                        case FIB:
                        case FIB_COUPLE:
                        case FIB_DECOUPLE:
+                       case FIB_RELOAD:
                        case LOG_VERBOSE:
                        case LOG_BRIEF:
                        case RELOAD:
Index: ospf6ctl/parser.c
===================================================================
RCS file: /cvs/src/usr.sbin/ospf6ctl/parser.c,v
retrieving revision 1.14
diff -u -p -r1.14 parser.c
--- ospf6ctl/parser.c   26 May 2019 09:27:09 -0000      1.14
+++ ospf6ctl/parser.c   26 May 2023 13:37:55 -0000
@@ -73,6 +73,7 @@ static const struct token t_main[] = {
 static const struct token t_fib[] = {
        { KEYWORD,      "couple",       FIB_COUPLE,     NULL},
        { KEYWORD,      "decouple",     FIB_DECOUPLE,   NULL},
+       { KEYWORD,      "reload",       FIB_RELOAD,     NULL},
        { ENDTOKEN,     "",             NONE,           NULL}
 };
 
Index: ospf6ctl/parser.h
===================================================================
RCS file: /cvs/src/usr.sbin/ospf6ctl/parser.h,v
retrieving revision 1.9
diff -u -p -r1.9 parser.h
--- ospf6ctl/parser.h   26 May 2019 09:27:09 -0000      1.9
+++ ospf6ctl/parser.h   26 May 2023 13:37:55 -0000
@@ -29,6 +29,7 @@ enum actions {
        FIB,
        FIB_COUPLE,
        FIB_DECOUPLE,
+       FIB_RELOAD,
        LOG_VERBOSE,
        LOG_BRIEF,
        SHOW,
Index: ospf6d/control.c
===================================================================
RCS file: /cvs/src/usr.sbin/ospf6d/control.c,v
retrieving revision 1.31
diff -u -p -r1.31 control.c
--- ospf6d/control.c    8 Mar 2023 04:43:14 -0000       1.31
+++ ospf6d/control.c    26 May 2023 13:37:55 -0000
@@ -279,6 +279,7 @@ control_dispatch_imsg(int fd, short even
                case IMSG_CTL_FIB_DECOUPLE:
                        ospfe_fib_update(imsg.hdr.type);
                        /* FALLTHROUGH */
+               case IMSG_CTL_FIB_RELOAD:
                case IMSG_CTL_RELOAD:
                        c->iev.ibuf.pid = imsg.hdr.pid;
                        ospfe_imsg_compose_parent(imsg.hdr.type, 0, NULL, 0);
Index: ospf6d/kroute.c
===================================================================
RCS file: /cvs/src/usr.sbin/ospf6d/kroute.c,v
retrieving revision 1.67
diff -u -p -r1.67 kroute.c
--- ospf6d/kroute.c     8 Mar 2023 04:43:14 -0000       1.67
+++ ospf6d/kroute.c     26 May 2023 13:37:55 -0000
@@ -45,16 +45,22 @@ struct {
        u_int32_t               rtseq;
        pid_t                   pid;
        int                     fib_sync;
+       int                     fib_serial;
        u_int8_t                fib_prio;
        int                     fd;
-       struct event            ev;
+       struct event            ev, reload;
        u_int                   rdomain;
+#define KR_RELOAD_IDLE 0
+#define KR_RELOAD_FETCH        1
+#define KR_RELOAD_HOLD 2
+       int                     reload_state;
 } kr_state;
 
 struct kroute_node {
        RB_ENTRY(kroute_node)    entry;
        struct kroute_node      *next;
        struct kroute            r;
+       int                      serial;
 };
 
 void   kr_redist_remove(struct kroute_node *, struct kroute_node *);
@@ -90,7 +96,10 @@ void         if_announce(void *);
 int            send_rtmsg(int, int, struct kroute *);
 int            dispatch_rtmsg(void);
 int            fetchtable(void);
-int            rtmsg_process(char *, size_t); 
+int            refetchtable(void);
+int            rtmsg_process(char *, size_t);
+void           kr_fib_reload_timer(int, short, void *);
+void           kr_fib_reload_arm_timer(int);
 
 RB_HEAD(kroute_tree, kroute_node)      krt;
 RB_PROTOTYPE(kroute_tree, kroute_node, entry, kroute_compare)
@@ -165,6 +174,9 @@ kr_init(int fs, u_int rdomain, int redis
            kr_dispatch_msg, NULL);
        event_add(&kr_state.ev, NULL);
 
+       kr_state.reload_state = KR_RELOAD_IDLE;
+       evtimer_set(&kr_state.reload, kr_fib_reload_timer, NULL);
+
        return (0);
 }
 
@@ -374,6 +386,62 @@ kr_fib_decouple(void)
 }
 
 void
+kr_fib_reload_timer(int fd, short event, void *bula)
+{
+       if (kr_state.reload_state == KR_RELOAD_FETCH) {
+               kr_fib_reload();
+               kr_state.reload_state = KR_RELOAD_HOLD;
+               kr_fib_reload_arm_timer(KR_RELOAD_HOLD_TIMER);
+       } else {
+               kr_state.reload_state = KR_RELOAD_IDLE;
+       }
+}
+
+void
+kr_fib_reload_arm_timer(int delay)
+{
+       struct timeval          tv;
+
+       timerclear(&tv);
+       tv.tv_sec = delay / 1000;
+       tv.tv_usec = (delay % 1000) * 1000;
+
+       if (evtimer_add(&kr_state.reload, &tv) == -1)
+               fatal("add_reload_timer");
+}
+
+void
+kr_fib_reload(void)
+{
+       struct kroute_node      *krn, *kr, *kn;
+
+       kr_state.fib_serial++;
+
+       if (fetchifs(0) != 0 || fetchtable() != 0)
+               return;
+
+       for (kr = RB_MIN(kroute_tree, &krt); kr != NULL; kr = krn) {
+               krn = RB_NEXT(kroute_tree, &krt, kr);
+
+               do {
+                       kn = kr->next;
+
+                       if (kr->serial != kr_state.fib_serial) {
+
+                               if (kr->r.priority == RTP_OSPF) {
+                                       kr->serial = kr_state.fib_serial;
+                                       if (send_rtmsg(kr_state.fd,
+                                           RTM_ADD, &kr->r) != 0)
+                                               break;
+                               } else
+                                       kroute_remove(kr);
+                       }
+
+               } while ((kr = kn) != NULL);
+       }
+}
+
+void
 kr_fib_update_prio(u_int8_t fib_prio)
 {
        struct kroute_node      *kr;
@@ -664,6 +732,8 @@ kroute_insert(struct kroute_node *kr)
 {
        struct kroute_node      *krm, *krh;
 
+       kr->serial = kr_state.fib_serial;
+
        if ((krh = RB_INSERT(kroute_tree, &krt, kr)) != NULL) {
                /*
                 * Multipath route, add at end of list.
@@ -1279,7 +1349,7 @@ rtmsg_process(char *buf, size_t len)
        int                      flags, mpath;
        unsigned int             scope;
        u_short                  ifindex = 0;
-       int                      rv;
+       int                      rv, delay;
        size_t                   offset;
        char                    *next;
 
@@ -1395,13 +1465,10 @@ rtmsg_process(char *buf, size_t len)
 
                        if ((okr = kroute_find(&prefix, prefixlen, prio))
                            != NULL) {
-                               /* just add new multipath routes */
-                               if (mpath && rtm->rtm_type == RTM_ADD)
-                                       goto add;
-                               /* get the correct route */
                                kr = okr;
-                               if (mpath && (kr = kroute_matchgw(okr,
-                                   &nexthop, scope)) == NULL) {
+                               if ((mpath || prio == kr_state.fib_prio) &&
+                                   (kr = kroute_matchgw(okr, &nexthop, scope)) 
==
+                                   NULL) {
                                        log_warnx("rtmsg_process: mpath route"
                                            " not found");
                                        /* add routes we missed out earlier */
@@ -1432,13 +1499,15 @@ rtmsg_process(char *buf, size_t len)
                                        kr->r.flags |= F_DOWN;
 
                                /* just readd, the RDE will care */
-                               kr_redistribute(okr);
+                               okr->serial = kr_state.fib_serial;
+                               kr_redistribute(kr);
                        } else {
 add:
                                if ((kr = calloc(1,
                                    sizeof(struct kroute_node))) == NULL) {
                                        log_warn("rtmsg_process calloc");
-                                       return (-1);
+                                       rv = -1;
+                                       break;
                                }
                                kr->r.prefix = prefix;
                                kr->r.prefixlen = prefixlen;
@@ -1517,6 +1586,23 @@ add:
                        break;
                case RTM_IFANNOUNCE:
                        if_announce(next);
+                       break;
+               case RTM_DESYNC:
+                       /*
+                        * We lost some routing packets. Schedule a reload
+                        * of the kernel route/interface information.
+                        */
+                       if (kr_state.reload_state == KR_RELOAD_IDLE) {
+                               delay = KR_RELOAD_TIMER;
+                               log_info("desync; scheduling fib reload");
+                       } else {
+                               delay = KR_RELOAD_HOLD_TIMER;
+                               log_debug("desync during KR_RELOAD_%s",
+                                   kr_state.reload_state ==
+                                   KR_RELOAD_FETCH ? "FETCH" : "HOLD");
+                       }
+                       kr_state.reload_state = KR_RELOAD_FETCH;
+                       kr_fib_reload_arm_timer(delay);
                        break;
                default:
                        /* ignore for now */
Index: ospf6d/ospf6.h
===================================================================
RCS file: /cvs/src/usr.sbin/ospf6d/ospf6.h,v
retrieving revision 1.21
diff -u -p -r1.21 ospf6.h
--- ospf6d/ospf6.h      25 Mar 2013 14:29:35 -0000      1.21
+++ ospf6d/ospf6.h      26 May 2023 13:37:55 -0000
@@ -69,6 +69,10 @@
 #define MIN_SPF_HOLDTIME       1
 #define MAX_SPF_HOLDTIME       5
 
+/* msec */
+#define KR_RELOAD_TIMER                250
+#define KR_RELOAD_HOLD_TIMER   5000
+
 #define MIN_MD_ID              0
 #define MAX_MD_ID              255
 
Index: ospf6d/ospf6d.c
===================================================================
RCS file: /cvs/src/usr.sbin/ospf6d/ospf6d.c,v
retrieving revision 1.53
diff -u -p -r1.53 ospf6d.c
--- ospf6d/ospf6d.c     8 Mar 2023 04:43:14 -0000       1.53
+++ ospf6d/ospf6d.c     26 May 2023 13:37:55 -0000
@@ -372,6 +372,9 @@ main_dispatch_ospfe(int fd, short event,
                case IMSG_CTL_FIB_DECOUPLE:
                        kr_fib_decouple();
                        break;
+               case IMSG_CTL_FIB_RELOAD:
+                       kr_fib_reload();
+                       break;
                case IMSG_CTL_KROUTE:
                case IMSG_CTL_KROUTE_ADDR:
                        kr_show_route(&imsg);
Index: ospf6d/ospf6d.h
===================================================================
RCS file: /cvs/src/usr.sbin/ospf6d/ospf6d.h,v
retrieving revision 1.50
diff -u -p -r1.50 ospf6d.h
--- ospf6d/ospf6d.h     19 Jan 2021 09:46:51 -0000      1.50
+++ ospf6d/ospf6d.h     26 May 2023 13:37:55 -0000
@@ -93,6 +93,7 @@ enum imsg_type {
        IMSG_CTL_SHOW_SUM_AREA,
        IMSG_CTL_FIB_COUPLE,
        IMSG_CTL_FIB_DECOUPLE,
+       IMSG_CTL_FIB_RELOAD,
        IMSG_CTL_AREA,
        IMSG_CTL_IFACE,
        IMSG_CTL_KROUTE,
@@ -546,6 +547,7 @@ int          kr_delete(struct kroute *);
 void            kr_shutdown(void);
 void            kr_fib_couple(void);
 void            kr_fib_decouple(void);
+void            kr_fib_reload(void);
 void            kr_fib_update_prio(u_int8_t);
 void            kr_dispatch_msg(int, short, void *);
 void            kr_show_route(struct imsg *);

Reply via email to