Next step on my quest to make the RIB code better. This changes the following things: - network_flush is now using rib_dump_new to walk the Adj-RIB-In and remove all dynamically added announcements - peer_flush got generalized and is now used also in peer_down. It also uses a rib_dump_new call to walk the Adj-RIB-In and remove all prefixes from a peer but this is done synchronous for now. - peer_flush is now working correctly for AID_UNSPEC. - Change the rib_valid check to continue instead of break out of the loop. The rib table can have holes so the loop needs to continue.
This removes the last three places that use the peer -> path -> prefix tailqs so the next step is to remove these and make rde_aspath just an object that is referenced by prefixes. After that adding a proper Adj-RIB-Out should be possible. Running with this on production :) -- :wq Claudio Index: rde.c =================================================================== RCS file: /cvs/src/usr.sbin/bgpd/rde.c,v retrieving revision 1.440 diff -u -p -r1.440 rde.c --- rde.c 24 Oct 2018 08:26:37 -0000 1.440 +++ rde.c 25 Oct 2018 06:34:32 -0000 @@ -97,7 +97,7 @@ struct rde_peer *peer_add(u_int32_t, str struct rde_peer *peer_get(u_int32_t); void peer_up(u_int32_t, struct session_up *); void peer_down(u_int32_t); -void peer_flush(struct rde_peer *, u_int8_t); +void peer_flush(struct rde_peer *, u_int8_t, time_t); void peer_stale(u_int32_t, u_int8_t); void peer_dump(u_int32_t, u_int8_t); static void peer_recv_eor(struct rde_peer *, u_int8_t); @@ -105,7 +105,8 @@ static void peer_send_eor(struct rde_pe void network_add(struct network_config *, int); void network_delete(struct network_config *, int); -void network_dump_upcall(struct rib_entry *, void *); +static void network_dump_upcall(struct rib_entry *, void *); +static void network_flush_upcall(struct rib_entry *, void *); void rde_shutdown(void); int sa_cmp(struct bgpd_addr *, struct sockaddr *); @@ -418,7 +419,7 @@ rde_dispatch_imsg_session(struct imsgbuf imsg.hdr.peerid); break; } - peer_flush(peer, aid); + peer_flush(peer, aid, peer->staletime[aid]); break; case IMSG_SESSION_RESTARTED: if (imsg.hdr.len - IMSG_HEADER_SIZE != sizeof(aid)) { @@ -434,7 +435,7 @@ rde_dispatch_imsg_session(struct imsgbuf break; } if (peer->staletime[aid]) - peer_flush(peer, aid); + peer_flush(peer, aid, peer->staletime[aid]); break; case IMSG_REFRESH: if (imsg.hdr.len - IMSG_HEADER_SIZE != sizeof(aid)) { @@ -556,7 +557,10 @@ badnetdel: log_warnx("rde_dispatch: wrong imsg len"); break; } - prefix_network_clean(peerself); + if (rib_dump_new(RIB_ADJ_IN, AID_UNSPEC, + RDE_RUNNER_ROUNDS, peerself, network_flush_upcall, + NULL, NULL) == -1) + log_warn("rde_dispatch: IMSG_NETWORK_FLUSH"); break; case IMSG_FILTER_SET: if (imsg.hdr.len - IMSG_HEADER_SIZE != @@ -770,7 +774,7 @@ rde_dispatch_imsg_parent(struct imsgbuf memcpy(nconf, imsg.data, sizeof(struct bgpd_config)); for (rid = 0; rid < rib_size; rid++) { if (!rib_valid(rid)) - break; + continue; ribs[rid].state = RECONF_DELETE; } SIMPLEQ_INIT(&nconf->rde_prefixsets); @@ -1411,7 +1415,7 @@ rde_update_update(struct rde_peer *peer, for (i = RIB_LOC_START; i < rib_size; i++) { if (!rib_valid(i)) - break; + continue; rde_filterstate_prep(&state, &in->aspath, in->nexthop, in->nhflags); /* input filter */ @@ -1443,7 +1447,7 @@ rde_update_withdraw(struct rde_peer *pee for (i = RIB_LOC_START; i < rib_size; i++) { if (!rib_valid(i)) - break; + continue; if (prefix_remove(&ribs[i].rib, peer, prefix, prefixlen)) rde_update_log("withdraw", i, peer, NULL, prefix, prefixlen); @@ -2957,11 +2961,10 @@ rde_reload_done(void) static void rde_softreconfig_in_done(void *arg, u_int8_t aid) { - struct rib_desc *rd = arg; - struct rde_peer *peer; - u_int16_t rid; + struct rde_peer *peer; + u_int16_t rid; - if (rd != NULL) { + if (arg != NULL) { softreconfig--; /* one guy done but other dumps are still running */ if (softreconfig > 0) @@ -3372,10 +3375,7 @@ peer_up(u_int32_t id, struct session_up * There is a race condition when doing PEER_ERR -> PEER_DOWN. * So just do a full reset of the peer here. */ - for (i = 0; i < AID_MAX; i++) { - peer->staletime[i] = 0; - peer_flush(peer, i); - } + peer_flush(peer, AID_UNSPEC, 0); up_down(peer); peer->prefix_cnt = 0; peer->state = PEER_DOWN; @@ -3412,7 +3412,6 @@ void peer_down(u_int32_t id) { struct rde_peer *peer; - struct rde_aspath *asp, *nasp; peer = peer_get(id); if (peer == NULL) { @@ -3425,49 +3424,85 @@ peer_down(u_int32_t id) /* stop all pending dumps which may depend on this peer */ rib_dump_terminate(peer->loc_rib_id, peer, rde_up_dump_upcall); - /* walk through per peer RIB list and remove all prefixes. */ - for (asp = TAILQ_FIRST(&peer->path_h); asp != NULL; asp = nasp) { - nasp = TAILQ_NEXT(asp, peer_l); - path_remove(asp); - } - TAILQ_INIT(&peer->path_h); - peer->prefix_cnt = 0; + peer_flush(peer, AID_UNSPEC, 0); - /* Deletions are performed in path_remove() */ - rde_send_pftable_commit(); + peer->prefix_cnt = 0; LIST_REMOVE(peer, hash_l); LIST_REMOVE(peer, peer_l); free(peer); } +struct peer_flush { + struct rde_peer *peer; + time_t staletime; +}; + +static void +peer_flush_upcall(struct rib_entry *re, void *arg) +{ + struct rde_peer *peer = ((struct peer_flush *)arg)->peer; + struct rde_aspath *asp; + struct bgpd_addr addr; + struct prefix *p, *np, *rp; + time_t staletime = ((struct peer_flush *)arg)->staletime; + u_int32_t i; + u_int8_t prefixlen; + + pt_getaddr(re->prefix, &addr); + prefixlen = re->prefix->prefixlen; + LIST_FOREACH_SAFE(p, &re->prefix_h, rib_l, np) { + if (peer != prefix_peer(p)) + continue; + if (staletime && p->lastchange > staletime) + continue; + + for (i = RIB_LOC_START; i < rib_size; i++) { + if (!rib_valid(i)) + continue; + rp = prefix_get(&ribs[i].rib, peer, &addr, prefixlen); + if (rp) { + asp = prefix_aspath(rp); + if (asp->pftableid) + rde_send_pftable(asp->pftableid, &addr, + prefixlen, 1); + + prefix_destroy(rp); + rde_update_log("flush", i, peer, NULL, + &addr, prefixlen); + } + } + + prefix_destroy(p); + peer->prefix_cnt--; + } +} + /* * Flush all routes older then staletime. If staletime is 0 all routes will * be flushed. */ void -peer_flush(struct rde_peer *peer, u_int8_t aid) +peer_flush(struct rde_peer *peer, u_int8_t aid, time_t staletime) { - struct rde_aspath *asp, *nasp; - u_int32_t rprefixes; + struct peer_flush pf = { peer, staletime }; - rprefixes = 0; - /* walk through per peer RIB list and remove all stale prefixes. */ - for (asp = TAILQ_FIRST(&peer->path_h); asp != NULL; asp = nasp) { - nasp = TAILQ_NEXT(asp, peer_l); - rprefixes += path_remove_stale(asp, aid, peer->staletime[aid]); - } + /* this dump must run synchronous, too much depends on that right now */ + if (rib_dump_new(RIB_ADJ_IN, aid, 0, &pf, peer_flush_upcall, + NULL, NULL) == -1) + fatal("%s: rib_dump_new", __func__); /* Deletions are performed in path_remove() */ rde_send_pftable_commit(); /* flushed no need to keep staletime */ - peer->staletime[aid] = 0; - - if (peer->prefix_cnt > rprefixes) - peer->prefix_cnt -= rprefixes; - else - peer->prefix_cnt = 0; + if (aid == AID_UNSPEC) { + u_int8_t i; + for (i = 0; i < AID_MAX; i++) + peer->staletime[i] = 0; + } else { + peer->staletime[aid] = 0; + } } void @@ -3484,7 +3519,7 @@ peer_stale(u_int32_t id, u_int8_t aid) /* flush the now even staler routes out */ if (peer->staletime[aid]) - peer_flush(peer, aid); + peer_flush(peer, aid, peer->staletime[aid]); peer->staletime[aid] = now = time(NULL); /* make sure new prefixes start on a higher timestamp */ @@ -3661,7 +3696,7 @@ network_add(struct network_config *nc, i peerself->prefix_cnt++; for (i = RIB_LOC_START; i < rib_size; i++) { if (!rib_valid(i)) - break; + continue; rde_update_log("announce", i, peerself, state.nexthop ? &state.nexthop->exit_nexthop : NULL, &nc->prefix, nc->prefixlen); @@ -3713,19 +3748,18 @@ network_delete(struct network_config *nc for (i = RIB_LOC_START; i < rib_size; i++) { if (!rib_valid(i)) - break; + continue; if (prefix_remove(&ribs[i].rib, peerself, &nc->prefix, nc->prefixlen)) rde_update_log("withdraw announce", i, peerself, NULL, &nc->prefix, nc->prefixlen); - } if (prefix_remove(&ribs[RIB_ADJ_IN].rib, peerself, &nc->prefix, nc->prefixlen)) peerself->prefix_cnt--; } -void +static void network_dump_upcall(struct rib_entry *re, void *ptr) { struct prefix *p; @@ -3759,6 +3793,41 @@ network_dump_upcall(struct rib_entry *re } } +static void +network_flush_upcall(struct rib_entry *re, void *ptr) +{ + struct rde_peer *peer = ptr; + struct rde_aspath *asp; + struct bgpd_addr addr; + struct prefix *p, *np, *rp; + u_int32_t i; + u_int8_t prefixlen; + + pt_getaddr(re->prefix, &addr); + prefixlen = re->prefix->prefixlen; + LIST_FOREACH_SAFE(p, &re->prefix_h, rib_l, np) { + if (prefix_peer(p) != peer) + continue; + asp = prefix_aspath(p); + if ((asp->flags & F_ANN_DYNAMIC) != F_ANN_DYNAMIC) + continue; + + for (i = RIB_LOC_START; i < rib_size; i++) { + if (!rib_valid(i)) + continue; + rp = prefix_get(&ribs[i].rib, peer, &addr, prefixlen); + if (rp) { + prefix_destroy(rp); + rde_update_log("flush announce", i, peer, + NULL, &addr, prefixlen); + } + } + + prefix_destroy(p); + peer->prefix_cnt--; + } +} + /* clean up */ void rde_shutdown(void) @@ -3782,7 +3851,7 @@ rde_shutdown(void) filterlist_free(out_rules); for (i = 0; i < rib_size; i++) { if (!rib_valid(i)) - break; + continue; filterlist_free(ribs[i].in_rules); } Index: rde.h =================================================================== RCS file: /cvs/src/usr.sbin/bgpd/rde.h,v retrieving revision 1.198 diff -u -p -r1.198 rde.h --- rde.h 24 Oct 2018 08:26:37 -0000 1.198 +++ rde.h 24 Oct 2018 21:27:25 -0000 @@ -477,7 +477,6 @@ void path_hash_stats(struct rde_hashst int path_update(struct rib *, struct rde_peer *, struct filterstate *, struct bgpd_addr *, int, u_int8_t); int path_compare(struct rde_aspath *, struct rde_aspath *); -void path_remove(struct rde_aspath *); u_int32_t path_remove_stale(struct rde_aspath *, u_int8_t, time_t); void path_destroy(struct rde_aspath *); int path_empty(struct rde_aspath *); @@ -498,7 +497,6 @@ struct prefix *prefix_bypeer(struct rib_ void prefix_updateall(struct prefix *, enum nexthop_state, enum nexthop_state); void prefix_destroy(struct prefix *); -void prefix_network_clean(struct rde_peer *); void prefix_relink(struct prefix *, struct rde_aspath *, int); static inline struct rde_peer * Index: rde_rib.c =================================================================== RCS file: /cvs/src/usr.sbin/bgpd/rde_rib.c,v retrieving revision 1.180 diff -u -p -r1.180 rde_rib.c --- rde_rib.c 24 Oct 2018 08:26:37 -0000 1.180 +++ rde_rib.c 24 Oct 2018 08:44:54 -0000 @@ -339,8 +339,8 @@ rib_restart(struct rib_context *ctx) static void rib_dump_r(struct rib_context *ctx) { + struct rib_entry *re, *next; struct rib *rib; - struct rib_entry *re; unsigned int i; rib = rib_byid(ctx->ctx_rib_id); @@ -352,7 +352,8 @@ rib_dump_r(struct rib_context *ctx) else re = rib_restart(ctx); - for (i = 0; re != NULL; re = RB_NEXT(rib_tree, unused, re)) { + for (i = 0; re != NULL; re = next) { + next = RB_NEXT(rib_tree, unused, re); if (re->rib_id != ctx->ctx_rib_id) fatalx("%s: Unexpected RIB %u != %u.", __func__, re->rib_id, ctx->ctx_rib_id); @@ -435,6 +436,10 @@ rib_dump_new(u_int16_t id, u_int8_t aid, LIST_INSERT_HEAD(&rib_dumps, ctx, entry); + /* requested a sync traversal */ + if (count == 0) + rib_dump_r(ctx); + return 0; } @@ -664,65 +669,6 @@ path_lookup(struct rde_aspath *aspath, s return (NULL); } -void -path_remove(struct rde_aspath *asp) -{ - struct prefix *p, *np; - - for (p = TAILQ_FIRST(&asp->prefixes); p != NULL; p = np) { - np = TAILQ_NEXT(p, path_l); - if (asp->pftableid) { - struct bgpd_addr addr; - - pt_getaddr(p->re->prefix, &addr); - /* Commit is done in peer_down() */ - rde_send_pftable(prefix_aspath(p)->pftableid, &addr, - p->re->prefix->prefixlen, 1); - } - prefix_destroy(p); - } -} - -/* remove all stale routes or if staletime is 0 remove all routes for - a specified AID. */ -u_int32_t -path_remove_stale(struct rde_aspath *asp, u_int8_t aid, time_t staletime) -{ - struct prefix *p, *np; - u_int32_t rprefixes; - - rprefixes=0; - /* - * This is called when a session flapped and during that time - * the pending updates for that peer are getting reset. - */ - for (p = TAILQ_FIRST(&asp->prefixes); p != NULL; p = np) { - np = TAILQ_NEXT(p, path_l); - if (p->re->prefix->aid != aid) - continue; - - if (staletime && p->lastchange > staletime) - continue; - - if (asp->pftableid) { - struct bgpd_addr addr; - - pt_getaddr(p->re->prefix, &addr); - /* Commit is done in peer_flush() */ - rde_send_pftable(prefix_aspath(p)->pftableid, &addr, - p->re->prefix->prefixlen, 1); - } - - /* only count Adj-RIB-In */ - if (re_rib(p->re) == &ribs[RIB_ADJ_IN].rib) - rprefixes++; - - prefix_destroy(p); - } - return (rprefixes); -} - - /* * This function can only called when all prefix have been removed first. * Normally this happens directly out of the prefix removal functions. @@ -1144,29 +1090,6 @@ prefix_destroy(struct prefix *p) if (path_empty(asp)) path_destroy(asp); -} - -/* - * helper function to clean up the dynamically added networks - */ -void -prefix_network_clean(struct rde_peer *peer) -{ - struct rde_aspath *asp, *xasp; - struct prefix *p, *xp; - - for (asp = TAILQ_FIRST(&peer->path_h); asp != NULL; asp = xasp) { - xasp = TAILQ_NEXT(asp, peer_l); - if ((asp->flags & F_ANN_DYNAMIC) != F_ANN_DYNAMIC) - continue; - for (p = TAILQ_FIRST(&asp->prefixes); p != NULL; p = xp) { - xp = TAILQ_NEXT(p, path_l); - prefix_unlink(p); - prefix_free(p); - } - if (path_empty(asp)) - path_destroy(asp); - } } /*