On Thu, Oct 25, 2018 at 08:51:30AM +0200, Claudio Jeker wrote: > Next step on my quest to make the RIB code better. > This changes the following things: > - network_flush is now using rib_dump_new to walk the Adj-RIB-In and > remove all dynamically added announcements > - peer_flush got generalized and is now used also in peer_down. > It also uses a rib_dump_new call to walk the Adj-RIB-In and remove > all prefixes from a peer but this is done synchronous for now. > - peer_flush is now working correctly for AID_UNSPEC. > - Change the rib_valid check to continue instead of break out of the loop. > The rib table can have holes so the loop needs to continue. > > This removes the last three places that use the peer -> path -> prefix > tailqs so the next step is to remove these and make rde_aspath just an > object that is referenced by prefixes. After that adding a proper > Adj-RIB-Out should be possible. > > Running with this on production :)
I will commit this on Monday unless someone objects or I get some OKs :) -- :wq Claudio Index: rde.c =================================================================== RCS file: /cvs/src/usr.sbin/bgpd/rde.c,v retrieving revision 1.440 diff -u -p -r1.440 rde.c --- rde.c 24 Oct 2018 08:26:37 -0000 1.440 +++ rde.c 25 Oct 2018 06:34:32 -0000 @@ -97,7 +97,7 @@ struct rde_peer *peer_add(u_int32_t, str struct rde_peer *peer_get(u_int32_t); void peer_up(u_int32_t, struct session_up *); void peer_down(u_int32_t); -void peer_flush(struct rde_peer *, u_int8_t); +void peer_flush(struct rde_peer *, u_int8_t, time_t); void peer_stale(u_int32_t, u_int8_t); void peer_dump(u_int32_t, u_int8_t); static void peer_recv_eor(struct rde_peer *, u_int8_t); @@ -105,7 +105,8 @@ static void peer_send_eor(struct rde_pe void network_add(struct network_config *, int); void network_delete(struct network_config *, int); -void network_dump_upcall(struct rib_entry *, void *); +static void network_dump_upcall(struct rib_entry *, void *); +static void network_flush_upcall(struct rib_entry *, void *); void rde_shutdown(void); int sa_cmp(struct bgpd_addr *, struct sockaddr *); @@ -418,7 +419,7 @@ rde_dispatch_imsg_session(struct imsgbuf imsg.hdr.peerid); break; } - peer_flush(peer, aid); + peer_flush(peer, aid, peer->staletime[aid]); break; case IMSG_SESSION_RESTARTED: if (imsg.hdr.len - IMSG_HEADER_SIZE != sizeof(aid)) { @@ -434,7 +435,7 @@ rde_dispatch_imsg_session(struct imsgbuf break; } if (peer->staletime[aid]) - peer_flush(peer, aid); + peer_flush(peer, aid, peer->staletime[aid]); break; case IMSG_REFRESH: if (imsg.hdr.len - IMSG_HEADER_SIZE != sizeof(aid)) { @@ -556,7 +557,10 @@ badnetdel: log_warnx("rde_dispatch: wrong imsg len"); break; } - prefix_network_clean(peerself); + if (rib_dump_new(RIB_ADJ_IN, AID_UNSPEC, + RDE_RUNNER_ROUNDS, peerself, network_flush_upcall, + NULL, NULL) == -1) + log_warn("rde_dispatch: IMSG_NETWORK_FLUSH"); break; case IMSG_FILTER_SET: if (imsg.hdr.len - IMSG_HEADER_SIZE != @@ -770,7 +774,7 @@ rde_dispatch_imsg_parent(struct imsgbuf memcpy(nconf, imsg.data, sizeof(struct bgpd_config)); for (rid = 0; rid < rib_size; rid++) { if (!rib_valid(rid)) - break; + continue; ribs[rid].state = RECONF_DELETE; } SIMPLEQ_INIT(&nconf->rde_prefixsets); @@ -1411,7 +1415,7 @@ rde_update_update(struct rde_peer *peer, for (i = RIB_LOC_START; i < rib_size; i++) { if (!rib_valid(i)) - break; + continue; rde_filterstate_prep(&state, &in->aspath, in->nexthop, in->nhflags); /* input filter */ @@ -1443,7 +1447,7 @@ rde_update_withdraw(struct rde_peer *pee for (i = RIB_LOC_START; i < rib_size; i++) { if (!rib_valid(i)) - break; + continue; if (prefix_remove(&ribs[i].rib, peer, prefix, prefixlen)) rde_update_log("withdraw", i, peer, NULL, prefix, prefixlen); @@ -2957,11 +2961,10 @@ rde_reload_done(void) static void rde_softreconfig_in_done(void *arg, u_int8_t aid) { - struct rib_desc *rd = arg; - struct rde_peer *peer; - u_int16_t rid; + struct rde_peer *peer; + u_int16_t rid; - if (rd != NULL) { + if (arg != NULL) { softreconfig--; /* one guy done but other dumps are still running */ if (softreconfig > 0) @@ -3372,10 +3375,7 @@ peer_up(u_int32_t id, struct session_up * There is a race condition when doing PEER_ERR -> PEER_DOWN. * So just do a full reset of the peer here. */ - for (i = 0; i < AID_MAX; i++) { - peer->staletime[i] = 0; - peer_flush(peer, i); - } + peer_flush(peer, AID_UNSPEC, 0); up_down(peer); peer->prefix_cnt = 0; peer->state = PEER_DOWN; @@ -3412,7 +3412,6 @@ void peer_down(u_int32_t id) { struct rde_peer *peer; - struct rde_aspath *asp, *nasp; peer = peer_get(id); if (peer == NULL) { @@ -3425,49 +3424,85 @@ peer_down(u_int32_t id) /* stop all pending dumps which may depend on this peer */ rib_dump_terminate(peer->loc_rib_id, peer, rde_up_dump_upcall); - /* walk through per peer RIB list and remove all prefixes. */ - for (asp = TAILQ_FIRST(&peer->path_h); asp != NULL; asp = nasp) { - nasp = TAILQ_NEXT(asp, peer_l); - path_remove(asp); - } - TAILQ_INIT(&peer->path_h); - peer->prefix_cnt = 0; + peer_flush(peer, AID_UNSPEC, 0); - /* Deletions are performed in path_remove() */ - rde_send_pftable_commit(); + peer->prefix_cnt = 0; LIST_REMOVE(peer, hash_l); LIST_REMOVE(peer, peer_l); free(peer); } +struct peer_flush { + struct rde_peer *peer; + time_t staletime; +}; + +static void +peer_flush_upcall(struct rib_entry *re, void *arg) +{ + struct rde_peer *peer = ((struct peer_flush *)arg)->peer; + struct rde_aspath *asp; + struct bgpd_addr addr; + struct prefix *p, *np, *rp; + time_t staletime = ((struct peer_flush *)arg)->staletime; + u_int32_t i; + u_int8_t prefixlen; + + pt_getaddr(re->prefix, &addr); + prefixlen = re->prefix->prefixlen; + LIST_FOREACH_SAFE(p, &re->prefix_h, rib_l, np) { + if (peer != prefix_peer(p)) + continue; + if (staletime && p->lastchange > staletime) + continue; + + for (i = RIB_LOC_START; i < rib_size; i++) { + if (!rib_valid(i)) + continue; + rp = prefix_get(&ribs[i].rib, peer, &addr, prefixlen); + if (rp) { + asp = prefix_aspath(rp); + if (asp->pftableid) + rde_send_pftable(asp->pftableid, &addr, + prefixlen, 1); + + prefix_destroy(rp); + rde_update_log("flush", i, peer, NULL, + &addr, prefixlen); + } + } + + prefix_destroy(p); + peer->prefix_cnt--; + } +} + /* * Flush all routes older then staletime. If staletime is 0 all routes will * be flushed. */ void -peer_flush(struct rde_peer *peer, u_int8_t aid) +peer_flush(struct rde_peer *peer, u_int8_t aid, time_t staletime) { - struct rde_aspath *asp, *nasp; - u_int32_t rprefixes; + struct peer_flush pf = { peer, staletime }; - rprefixes = 0; - /* walk through per peer RIB list and remove all stale prefixes. */ - for (asp = TAILQ_FIRST(&peer->path_h); asp != NULL; asp = nasp) { - nasp = TAILQ_NEXT(asp, peer_l); - rprefixes += path_remove_stale(asp, aid, peer->staletime[aid]); - } + /* this dump must run synchronous, too much depends on that right now */ + if (rib_dump_new(RIB_ADJ_IN, aid, 0, &pf, peer_flush_upcall, + NULL, NULL) == -1) + fatal("%s: rib_dump_new", __func__); /* Deletions are performed in path_remove() */ rde_send_pftable_commit(); /* flushed no need to keep staletime */ - peer->staletime[aid] = 0; - - if (peer->prefix_cnt > rprefixes) - peer->prefix_cnt -= rprefixes; - else - peer->prefix_cnt = 0; + if (aid == AID_UNSPEC) { + u_int8_t i; + for (i = 0; i < AID_MAX; i++) + peer->staletime[i] = 0; + } else { + peer->staletime[aid] = 0; + } } void @@ -3484,7 +3519,7 @@ peer_stale(u_int32_t id, u_int8_t aid) /* flush the now even staler routes out */ if (peer->staletime[aid]) - peer_flush(peer, aid); + peer_flush(peer, aid, peer->staletime[aid]); peer->staletime[aid] = now = time(NULL); /* make sure new prefixes start on a higher timestamp */ @@ -3661,7 +3696,7 @@ network_add(struct network_config *nc, i peerself->prefix_cnt++; for (i = RIB_LOC_START; i < rib_size; i++) { if (!rib_valid(i)) - break; + continue; rde_update_log("announce", i, peerself, state.nexthop ? &state.nexthop->exit_nexthop : NULL, &nc->prefix, nc->prefixlen); @@ -3713,19 +3748,18 @@ network_delete(struct network_config *nc for (i = RIB_LOC_START; i < rib_size; i++) { if (!rib_valid(i)) - break; + continue; if (prefix_remove(&ribs[i].rib, peerself, &nc->prefix, nc->prefixlen)) rde_update_log("withdraw announce", i, peerself, NULL, &nc->prefix, nc->prefixlen); - } if (prefix_remove(&ribs[RIB_ADJ_IN].rib, peerself, &nc->prefix, nc->prefixlen)) peerself->prefix_cnt--; } -void +static void network_dump_upcall(struct rib_entry *re, void *ptr) { struct prefix *p; @@ -3759,6 +3793,41 @@ network_dump_upcall(struct rib_entry *re } } +static void +network_flush_upcall(struct rib_entry *re, void *ptr) +{ + struct rde_peer *peer = ptr; + struct rde_aspath *asp; + struct bgpd_addr addr; + struct prefix *p, *np, *rp; + u_int32_t i; + u_int8_t prefixlen; + + pt_getaddr(re->prefix, &addr); + prefixlen = re->prefix->prefixlen; + LIST_FOREACH_SAFE(p, &re->prefix_h, rib_l, np) { + if (prefix_peer(p) != peer) + continue; + asp = prefix_aspath(p); + if ((asp->flags & F_ANN_DYNAMIC) != F_ANN_DYNAMIC) + continue; + + for (i = RIB_LOC_START; i < rib_size; i++) { + if (!rib_valid(i)) + continue; + rp = prefix_get(&ribs[i].rib, peer, &addr, prefixlen); + if (rp) { + prefix_destroy(rp); + rde_update_log("flush announce", i, peer, + NULL, &addr, prefixlen); + } + } + + prefix_destroy(p); + peer->prefix_cnt--; + } +} + /* clean up */ void rde_shutdown(void) @@ -3782,7 +3851,7 @@ rde_shutdown(void) filterlist_free(out_rules); for (i = 0; i < rib_size; i++) { if (!rib_valid(i)) - break; + continue; filterlist_free(ribs[i].in_rules); } Index: rde.h =================================================================== RCS file: /cvs/src/usr.sbin/bgpd/rde.h,v retrieving revision 1.198 diff -u -p -r1.198 rde.h --- rde.h 24 Oct 2018 08:26:37 -0000 1.198 +++ rde.h 24 Oct 2018 21:27:25 -0000 @@ -477,7 +477,6 @@ void path_hash_stats(struct rde_hashst int path_update(struct rib *, struct rde_peer *, struct filterstate *, struct bgpd_addr *, int, u_int8_t); int path_compare(struct rde_aspath *, struct rde_aspath *); -void path_remove(struct rde_aspath *); u_int32_t path_remove_stale(struct rde_aspath *, u_int8_t, time_t); void path_destroy(struct rde_aspath *); int path_empty(struct rde_aspath *); @@ -498,7 +497,6 @@ struct prefix *prefix_bypeer(struct rib_ void prefix_updateall(struct prefix *, enum nexthop_state, enum nexthop_state); void prefix_destroy(struct prefix *); -void prefix_network_clean(struct rde_peer *); void prefix_relink(struct prefix *, struct rde_aspath *, int); static inline struct rde_peer * Index: rde_rib.c =================================================================== RCS file: /cvs/src/usr.sbin/bgpd/rde_rib.c,v retrieving revision 1.180 diff -u -p -r1.180 rde_rib.c --- rde_rib.c 24 Oct 2018 08:26:37 -0000 1.180 +++ rde_rib.c 24 Oct 2018 08:44:54 -0000 @@ -339,8 +339,8 @@ rib_restart(struct rib_context *ctx) static void rib_dump_r(struct rib_context *ctx) { + struct rib_entry *re, *next; struct rib *rib; - struct rib_entry *re; unsigned int i; rib = rib_byid(ctx->ctx_rib_id); @@ -352,7 +352,8 @@ rib_dump_r(struct rib_context *ctx) else re = rib_restart(ctx); - for (i = 0; re != NULL; re = RB_NEXT(rib_tree, unused, re)) { + for (i = 0; re != NULL; re = next) { + next = RB_NEXT(rib_tree, unused, re); if (re->rib_id != ctx->ctx_rib_id) fatalx("%s: Unexpected RIB %u != %u.", __func__, re->rib_id, ctx->ctx_rib_id); @@ -435,6 +436,10 @@ rib_dump_new(u_int16_t id, u_int8_t aid, LIST_INSERT_HEAD(&rib_dumps, ctx, entry); + /* requested a sync traversal */ + if (count == 0) + rib_dump_r(ctx); + return 0; } @@ -664,65 +669,6 @@ path_lookup(struct rde_aspath *aspath, s return (NULL); } -void -path_remove(struct rde_aspath *asp) -{ - struct prefix *p, *np; - - for (p = TAILQ_FIRST(&asp->prefixes); p != NULL; p = np) { - np = TAILQ_NEXT(p, path_l); - if (asp->pftableid) { - struct bgpd_addr addr; - - pt_getaddr(p->re->prefix, &addr); - /* Commit is done in peer_down() */ - rde_send_pftable(prefix_aspath(p)->pftableid, &addr, - p->re->prefix->prefixlen, 1); - } - prefix_destroy(p); - } -} - -/* remove all stale routes or if staletime is 0 remove all routes for - a specified AID. */ -u_int32_t -path_remove_stale(struct rde_aspath *asp, u_int8_t aid, time_t staletime) -{ - struct prefix *p, *np; - u_int32_t rprefixes; - - rprefixes=0; - /* - * This is called when a session flapped and during that time - * the pending updates for that peer are getting reset. - */ - for (p = TAILQ_FIRST(&asp->prefixes); p != NULL; p = np) { - np = TAILQ_NEXT(p, path_l); - if (p->re->prefix->aid != aid) - continue; - - if (staletime && p->lastchange > staletime) - continue; - - if (asp->pftableid) { - struct bgpd_addr addr; - - pt_getaddr(p->re->prefix, &addr); - /* Commit is done in peer_flush() */ - rde_send_pftable(prefix_aspath(p)->pftableid, &addr, - p->re->prefix->prefixlen, 1); - } - - /* only count Adj-RIB-In */ - if (re_rib(p->re) == &ribs[RIB_ADJ_IN].rib) - rprefixes++; - - prefix_destroy(p); - } - return (rprefixes); -} - - /* * This function can only called when all prefix have been removed first. * Normally this happens directly out of the prefix removal functions. @@ -1144,29 +1090,6 @@ prefix_destroy(struct prefix *p) if (path_empty(asp)) path_destroy(asp); -} - -/* - * helper function to clean up the dynamically added networks - */ -void -prefix_network_clean(struct rde_peer *peer) -{ - struct rde_aspath *asp, *xasp; - struct prefix *p, *xp; - - for (asp = TAILQ_FIRST(&peer->path_h); asp != NULL; asp = xasp) { - xasp = TAILQ_NEXT(asp, peer_l); - if ((asp->flags & F_ANN_DYNAMIC) != F_ANN_DYNAMIC) - continue; - for (p = TAILQ_FIRST(&asp->prefixes); p != NULL; p = xp) { - xp = TAILQ_NEXT(p, path_l); - prefix_unlink(p); - prefix_free(p); - } - if (path_empty(asp)) - path_destroy(asp); - } } /*