On Wed, Oct 31, 2018 at 04:24:49PM +0100, Claudio Jeker wrote: > This diff introduces a real Adj-RIB-Out. It is the minimal change to > introduce the new RIB. This removes the update_rib introduced before 6.4 > lock because that is now replaced with a real RIB. > The code used by bgpctl show rib is now a fair amount easier since now one > RIB runner can be used for all cases. > path_update() is now returning if a prefix was not modified which is > needed in the update path to suppress unneeded updates. > The softreconfig out case becomes simpler because there is no more the > need to run with both filters and check results. > Last the shutdown code of the RDE had to be adjusted a bit to still work > with the Adj-RIB-Out. > > This may cause memory consumption to go up but my testing has shown that > the result is actually not significant. Needs the commits from earlier > today to apply.
On my Route Server, it is quite a big change (in percentage). * Before : RDE memory statistics 11561 IPv4 unicast network entries using 452K of memory 131 IPv6 unicast network entries using 7.2K of memory 23370 rib entries using 1.4M of memory 23517 prefix entries using 1.8M of memory 4894 BGP path attribute entries using 344K of memory and holding 23517 references 1720 BGP AS-PATH attribute entries using 76.4K of memory and holding 4894 references 1061 BGP attributes entries using 41.4K of memory and holding 10565 references 1060 BGP attributes using 25.6K of memory 101809 as-set elements in 58041 tables using 2.1M of memory 114429 prefix-set elments using 4.7M of memory RIB using 4.1M of memory Sets using 6.9M of memory RDE hash statistics path hash: size 131072, 4894 entires min 0 max 3 avg/std-dev = 0.037/0.000 aspath hash: size 131072, 1720 entires min 0 max 2 avg/std-dev = 0.013/0.000 attr hash: size 16384, 1061 entires min 0 max 2 avg/std-dev = 0.065/0.000 * After: RDE memory statistics 11560 IPv4 unicast network entries using 452K of memory 145 IPv6 unicast network entries using 7.9K of memory 34991 rib entries using 2.1M of memory 69844 prefix entries using 5.3M of memory 4929 BGP path attribute entries using 347K of memory and holding 69844 references 1727 BGP AS-PATH attribute entries using 76.6K of memory and holding 4929 references 1066 BGP attributes entries using 41.6K of memory and holding 10643 references 1065 BGP attributes using 25.6K of memory 101809 as-set elements in 58041 tables using 2.1M of memory 114429 prefix-set elments using 4.7M of memory RIB using 8.4M of memory Sets using 6.9M of memory RDE hash statistics path hash: size 131072, 4929 entires min 0 max 3 avg/std-dev = 0.038/0.000 aspath hash: size 131072, 1727 entires min 0 max 2 avg/std-dev = 0.013/0.000 attr hash: size 16384, 1066 entires min 0 max 2 avg/std-dev = 0.065/0.000 I need to test it on a router with a fullview. Comments below. > -- > :wq Claudio > > Index: rde.c > =================================================================== > RCS file: /cvs/src/usr.sbin/bgpd/rde.c,v > retrieving revision 1.444 > diff -u -p -r1.444 rde.c > --- rde.c 31 Oct 2018 14:50:07 -0000 1.444 > +++ rde.c 31 Oct 2018 15:09:39 -0000 > @@ -1395,7 +1395,7 @@ rde_update_update(struct rde_peer *peer, > > /* add original path to the Adj-RIB-In */ > if (path_update(&ribs[RIB_ADJ_IN].rib, peer, in, prefix, prefixlen, > - vstate)) > + vstate) == 1) > peer->prefix_cnt++; > > /* max prefix checker */ > @@ -2124,16 +2124,17 @@ rde_reflector(struct rde_peer *peer, str > * control specific functions > */ > static void > -rde_dump_rib_as(struct prefix *p, struct rde_aspath *asp, > - struct nexthop *nexthop, pid_t pid, int flags) > +rde_dump_rib_as(struct prefix *p, struct rde_aspath *asp, pid_t pid, int > flags) > { > struct ctl_show_rib rib; > struct ibuf *wbuf; > struct attr *a; > + struct nexthop *nexthop; > void *bp; > time_t staletime; > u_int8_t l; > > + nexthop = prefix_nexthop(p); > bzero(&rib, sizeof(rib)); > rib.lastchange = p->lastchange; > rib.local_pref = asp->lpref; > @@ -2208,70 +2209,37 @@ rde_dump_rib_as(struct prefix *p, struct > } > > static void > -rde_dump_filterout(struct rde_peer *peer, struct prefix *p, > - struct ctl_show_rib_request *req) > -{ > - struct filterstate state; > - enum filter_actions a; > - > - if (up_test_update(peer, p) != 1) > - return; > - > - rde_filterstate_prep(&state, prefix_aspath(p), prefix_nexthop(p), > - prefix_nhflags(p)); > - a = rde_filter(out_rules, peer, p, &state); > - > - if (a == ACTION_ALLOW) > - rde_dump_rib_as(p, &state.aspath, state.nexthop, req->pid, > - req->flags); > - > - rde_filterstate_clean(&state); > -} > - > -static void > rde_dump_filter(struct prefix *p, struct ctl_show_rib_request *req) > { > - struct rde_peer *peer; > struct rde_aspath *asp; > > - if (req->flags & F_CTL_ADJ_OUT) { > - if (p->re->active != p) > - /* only consider active prefix */ > - return; > - if (req->peerid) { > - if ((peer = peer_get(req->peerid)) != NULL) > - rde_dump_filterout(peer, p, req); > - return; > - } > - } else { > - asp = prefix_aspath(p); > - if (req->peerid && req->peerid != prefix_peer(p)->conf.id) > - return; > - if ((req->flags & F_CTL_ACTIVE) && p->re->active != p) > - return; > - if ((req->flags & F_CTL_INVALID) && > - (asp->flags & F_ATTR_PARSE_ERR) == 0) > - return; > - if (req->type == IMSG_CTL_SHOW_RIB_AS && > - !aspath_match(asp->aspath->data, asp->aspath->len, > - &req->as, 0)) > - return; > - if (req->type == IMSG_CTL_SHOW_RIB_COMMUNITY && > - !community_match(asp, req->community.as, > - req->community.type)) > - return; > - if (req->type == IMSG_CTL_SHOW_RIB_EXTCOMMUNITY && > - !community_ext_match(asp, &req->extcommunity, 0)) > - return; > - if (req->type == IMSG_CTL_SHOW_RIB_LARGECOMMUNITY && > - !community_large_match(asp, req->large_community.as, > - req->large_community.ld1, req->large_community.ld2)) > - return; > - if (!ovs_match(p, req->flags)) > - return; > - rde_dump_rib_as(p, asp, prefix_nexthop(p), req->pid, > - req->flags); > - } > + if (req->peerid && req->peerid != prefix_peer(p)->conf.id) > + return; > + > + asp = prefix_aspath(p); > + if ((req->flags & F_CTL_ACTIVE) && p->re->active != p) > + return; You can move it up one line. It only saves a call to prefix_aspath() in a few case though. > + if ((req->flags & F_CTL_INVALID) && > + (asp->flags & F_ATTR_PARSE_ERR) == 0) > + return; > + if (req->type == IMSG_CTL_SHOW_RIB_AS && > + !aspath_match(asp->aspath->data, asp->aspath->len, > + &req->as, 0)) > + return; > + if (req->type == IMSG_CTL_SHOW_RIB_COMMUNITY && > + !community_match(asp, req->community.as, > + req->community.type)) > + return; > + if (req->type == IMSG_CTL_SHOW_RIB_EXTCOMMUNITY && > + !community_ext_match(asp, &req->extcommunity, 0)) > + return; > + if (req->type == IMSG_CTL_SHOW_RIB_LARGECOMMUNITY && > + !community_large_match(asp, req->large_community.as, > + req->large_community.ld1, req->large_community.ld2)) > + return; > + if (!ovs_match(p, req->flags)) > + return; > + rde_dump_rib_as(p, asp, req->pid, req->flags); > } > > static void > @@ -2342,6 +2310,8 @@ rde_dump_ctx_new(struct ctl_show_rib_req > } > if (req->flags & (F_CTL_ADJ_IN | F_CTL_INVALID)) { > rid = RIB_ADJ_IN; > + } else if (req->flags & F_CTL_ADJ_OUT) { > + rid = RIB_ADJ_OUT; > } else if ((rid = rib_find(req->rib)) == RIB_NOTFOUND) { > log_warnx("rde_dump_ctx_new: no such rib %s", req->rib); > error = CTL_RES_NOSUCHPEER; > @@ -2591,6 +2561,19 @@ rde_up_dump_upcall(struct rib_entry *re, > } > > static void > +rde_up_flush_upcall(struct rib_entry *re, void *ptr) > +{ > + struct rde_peer *peer = ptr; > + struct prefix *p, *np; > + > + LIST_FOREACH_SAFE(p, &re->prefix_h, rib_l, np) { > + if (peer != prefix_peer(p)) > + continue; > + up_generate_updates(out_rules, peer, NULL, p); > + } > +} > + > +static void > rde_up_dump_done(void *ptr, u_int8_t aid) > { > struct rde_peer *peer = ptr; > @@ -2805,6 +2788,8 @@ rde_reload_done(void) > u_int16_t rid; > int reload = 0; > > + softreconfig = 0; > + > /* first merge the main config */ > if ((conf->flags & BGPD_FLAG_NO_EVALUATE) && > (nconf->flags & BGPD_FLAG_NO_EVALUATE) == 0) { > @@ -2887,11 +2872,16 @@ rde_reload_done(void) > char *p = log_fmt_peer(&peer->conf); > log_debug("rib change: reloading peer %s", p); > free(p); > - up_withdraw_all(peer); > peer->loc_rib_id = rib_find(peer->conf.rib); > if (peer->loc_rib_id == RIB_NOTFOUND) > fatalx("King Bula's peer met an unknown RIB"); > peer->reconf_rib = 1; > + softreconfig++; > + if (rib_dump_new(RIB_ADJ_OUT, AID_UNSPEC, > + RDE_RUNNER_ROUNDS, peer, rde_up_flush_upcall, > + rde_softreconfig_in_done, NULL) == -1) > + fatal("%s: rib_dump_new", __func__); > + log_peer_info(&peer->conf, "flushing Adj-RIB-Out"); > continue; > } > if (!rde_filter_equal(out_rules, out_rules_tmp, peer)) { > @@ -2941,14 +2931,13 @@ rde_reload_done(void) > } > log_info("RDE reconfigured"); > > - softreconfig = 0; > if (reload > 0) { > - log_info("running softreconfig in"); > softreconfig++; > if (rib_dump_new(RIB_ADJ_IN, AID_UNSPEC, > RDE_RUNNER_ROUNDS, &ribs[RIB_ADJ_IN], rde_softreconfig_in, > rde_softreconfig_in_done, NULL) == -1) > fatal("%s: rib_dump_new", __func__); > + log_info("running softreconfig in"); > } else { > rde_softreconfig_in_done(NULL, AID_UNSPEC); > } > @@ -3113,61 +3102,18 @@ rde_softreconfig_in(struct rib_entry *re > } > > static void > -rde_softreconfig_out_peer(struct rib_entry *re, struct rde_peer *peer) > -{ > - struct filterstate ostate, nstate; > - struct bgpd_addr addr; > - struct prefix *p = re->active; > - struct pt_entry *pt; > - enum filter_actions oa, na; > - > - pt = re->prefix; > - pt_getaddr(pt, &addr); > - > - if (up_test_update(peer, p) != 1) > - return; > - > - rde_filterstate_prep(&ostate, prefix_aspath(p), prefix_nexthop(p), > - prefix_nhflags(p)); > - rde_filterstate_prep(&nstate, prefix_aspath(p), prefix_nexthop(p), > - prefix_nhflags(p)); > - oa = rde_filter(out_rules_tmp, peer, p, &ostate); > - na = rde_filter(out_rules, peer, p, &nstate); > - > - /* go through all 4 possible combinations */ > - /* if (oa == ACTION_DENY && na == ACTION_DENY) */ > - /* nothing todo */ > - if (oa == ACTION_DENY && na == ACTION_ALLOW) { > - /* send update */ > - up_rib_add(peer, re); > - up_generate(peer, &nstate, &addr, pt->prefixlen); > - } else if (oa == ACTION_ALLOW && na == ACTION_DENY) { > - /* send withdraw */ > - up_rib_remove(peer, re); > - up_generate(peer, NULL, &addr, pt->prefixlen); > - } else if (oa == ACTION_ALLOW && na == ACTION_ALLOW) { > - /* send update if anything changed */ > - if (nstate.nhflags != ostate.nhflags || > - nstate.nexthop != ostate.nexthop || > - path_compare(&nstate.aspath, &ostate.aspath) != 0) > - up_generate(peer, &nstate, &addr, pt->prefixlen); > - } > - > - rde_filterstate_clean(&ostate); > - rde_filterstate_clean(&nstate); > -} > - > -static void > rde_softreconfig_out(struct rib_entry *re, void *bula) > { > + struct prefix *new = re->active; > struct rde_peer *peer; > > - if (re->active == NULL) > + if (new == NULL) > return; > > LIST_FOREACH(peer, &peerlist, peer_l) { > if (peer->loc_rib_id == re->rib_id && peer->reconf_out) > - rde_softreconfig_out_peer(re, peer); > + /* Regenerate all updates. */ > + up_generate_updates(out_rules, peer, new, new); > } > } > > @@ -3687,7 +3633,7 @@ network_add(struct network_config *nc, i > vstate = rde_roa_validity(&conf->rde_roa, &nc->prefix, > nc->prefixlen, asp->source_as); > if (path_update(&ribs[RIB_ADJ_IN].rib, peerself, &state, &nc->prefix, > - nc->prefixlen, vstate)) > + nc->prefixlen, vstate) == 1) Isn't there too many tabs here ? > peerself->prefix_cnt++; > for (i = RIB_LOC_START; i < rib_size; i++) { > if (!rib_valid(i)) > @@ -3835,21 +3781,20 @@ rde_shutdown(void) > * rde_shutdown depends on this. > */ > > - /* > - * All peers go down > - */ > + /* Frist all peers go down */ Frist ?! :) > for (i = 0; i <= peertable.peer_hashmask; i++) > while ((p = LIST_FIRST(&peertable.peer_hashtbl[i])) != NULL) > peer_down(p->conf.id); > > + /* then since decision process is off, kill RIB_ADJ_OUT */ > + rib_free(rib_byid(RIB_ADJ_OUT)); > + > /* free filters */ > filterlist_free(out_rules); > - for (i = 0; i < rib_size; i++) { > - if (!rib_valid(i)) > - continue; > - filterlist_free(ribs[i].in_rules); > - } > + filterlist_free(out_rules_tmp); > > + /* now check everything */ > + rib_shutdown(); > nexthop_shutdown(); > path_shutdown(); > aspath_shutdown(); > Index: rde.h > =================================================================== > RCS file: /cvs/src/usr.sbin/bgpd/rde.h,v > retrieving revision 1.201 > diff -u -p -r1.201 rde.h > --- rde.h 31 Oct 2018 14:50:07 -0000 1.201 > +++ rde.h 31 Oct 2018 15:09:39 -0000 > @@ -79,7 +79,6 @@ LIST_HEAD(attr_list, attr); > LIST_HEAD(aspath_head, rde_aspath); > RB_HEAD(uptree_prefix, update_prefix); > RB_HEAD(uptree_attr, update_attr); > -RB_HEAD(uptree_rib, update_rib); > > TAILQ_HEAD(uplist_prefix, update_prefix); > TAILQ_HEAD(uplist_attr, update_attr); > @@ -91,7 +90,6 @@ struct rde_peer { > struct bgpd_addr remote_addr; > struct bgpd_addr local_v4_addr; > struct bgpd_addr local_v6_addr; > - struct uptree_rib up_rib; > struct uptree_prefix up_prefix; > struct uptree_attr up_attrs; > struct uplist_attr updates[AID_MAX]; > @@ -440,6 +438,7 @@ struct rib *rib_byid(u_int16_t); > u_int16_t rib_find(char *); > struct rib_desc *rib_desc(struct rib *); > void rib_free(struct rib *); > +void rib_shutdown(void); > struct rib_entry *rib_get(struct rib *, struct bgpd_addr *, int); > struct rib_entry *rib_lookup(struct rib *, struct bgpd_addr *); > int rib_dump_pending(void); > Index: rde_rib.c > =================================================================== > RCS file: /cvs/src/usr.sbin/bgpd/rde_rib.c,v > retrieving revision 1.184 > diff -u -p -r1.184 rde_rib.c > --- rde_rib.c 31 Oct 2018 14:50:07 -0000 1.184 > +++ rde_rib.c 31 Oct 2018 15:09:39 -0000 > @@ -216,6 +216,21 @@ rib_free(struct rib *rib) > bzero(rd, sizeof(struct rib_desc)); > } > > +void > +rib_shutdown(void) > +{ > + u_int16_t id; > + > + for (id = 0; id < rib_size; id++) { > + if (!rib_valid(id)) > + continue; > + if (!RB_EMPTY(rib_tree(&ribs[id].rib))) > + log_warnx("%s: rib %s is not empty", __func__, > + ribs[id].name); > + rib_free(&ribs[id].rib); > + } > +} > + > struct rib_entry * > rib_get(struct rib *rib, struct bgpd_addr *prefix, int prefixlen) > { > @@ -553,6 +568,11 @@ path_hash_stats(struct rde_hashstats *hs > } > } > > +/* > + * Update a prefix belonging to a possible new aspath. > + * Return 1 if prefix was newly added, 0 if it was just changed or 2 if no > + * change happened at all. > + */ > int > path_update(struct rib *rib, struct rde_peer *peer, struct filterstate > *state, > struct bgpd_addr *prefix, int prefixlen, u_int8_t vstate) > @@ -575,7 +595,7 @@ path_update(struct rib *rib, struct rde_ > /* no change, update last change */ > p->lastchange = time(NULL); > p->validation_state = vstate; > - return (0); > + return (2); > } > } > > Index: rde_update.c > =================================================================== > RCS file: /cvs/src/usr.sbin/bgpd/rde_update.c,v > retrieving revision 1.102 > diff -u -p -r1.102 rde_update.c > --- rde_update.c 24 Oct 2018 08:26:37 -0000 1.102 > +++ rde_update.c 31 Oct 2018 15:09:39 -0000 > @@ -53,15 +53,9 @@ struct update_attr { > u_int16_t mpattr_len; > }; > > -struct update_rib { > - RB_ENTRY(update_rib) entry; > - struct rib_entry *re; > -}; > - > void up_clear(struct uplist_attr *, struct uplist_prefix *); > int up_prefix_cmp(struct update_prefix *, struct update_prefix *); > int up_attr_cmp(struct update_attr *, struct update_attr *); > -int up_rib_cmp(struct update_rib *, struct update_rib *); > int up_add(struct rde_peer *, struct update_prefix *, struct update_attr *); > > RB_PROTOTYPE(uptree_prefix, update_prefix, entry, up_prefix_cmp) > @@ -70,9 +64,6 @@ RB_GENERATE(uptree_prefix, update_prefix > RB_PROTOTYPE(uptree_attr, update_attr, entry, up_attr_cmp) > RB_GENERATE(uptree_attr, update_attr, entry, up_attr_cmp) > > -RB_PROTOTYPE(uptree_rib, update_rib, entry, up_rib_cmp) > -RB_GENERATE(uptree_rib, update_rib, entry, up_rib_cmp) > - > SIPHASH_KEY uptree_key; > > void > @@ -86,7 +77,6 @@ up_init(struct rde_peer *peer) > } > RB_INIT(&peer->up_prefix); > RB_INIT(&peer->up_attrs); > - RB_INIT(&peer->up_rib); > peer->up_pcnt = 0; > peer->up_acnt = 0; > peer->up_nlricnt = 0; > @@ -120,7 +110,6 @@ up_clear(struct uplist_attr *updates, st > void > up_down(struct rde_peer *peer) > { > - struct update_rib *ur, *nur; > u_int8_t i; > > for (i = 0; i < AID_MAX; i++) > @@ -128,10 +117,6 @@ up_down(struct rde_peer *peer) > > RB_INIT(&peer->up_prefix); > RB_INIT(&peer->up_attrs); > - RB_FOREACH_SAFE(ur, uptree_rib, &peer->up_rib, nur) { > - RB_REMOVE(uptree_rib, &peer->up_rib, ur); > - free(ur); > - } > > peer->up_pcnt = 0; > peer->up_acnt = 0; > @@ -210,58 +195,6 @@ up_attr_cmp(struct update_attr *a, struc > } > > int > -up_rib_cmp(struct update_rib *a, struct update_rib *b) > -{ > - if (a->re != b->re) > - return (a->re > b->re ? 1 : -1); > - return 0; > -} > - > -int > -up_rib_remove(struct rde_peer *peer, struct rib_entry *re) > -{ > - struct update_rib *ur, u; > - u.re = re; > - > - ur = RB_FIND(uptree_rib, &peer->up_rib, &u); > - if (ur != NULL) { > - RB_REMOVE(uptree_rib, &peer->up_rib, ur); > - free(ur); > - return 1; > - } else > - return 0; > -} > - > -void > -up_rib_add(struct rde_peer *peer, struct rib_entry *re) > -{ > - struct update_rib *ur; > - > - if ((ur = calloc(1, sizeof(*ur))) == NULL) > - fatal("%s", __func__); > - ur->re = re; > - > - if (RB_INSERT(uptree_rib, &peer->up_rib, ur) != NULL) > - free(ur); > -} > - > -void > -up_withdraw_all(struct rde_peer *peer) > -{ > - struct bgpd_addr addr; > - struct update_rib *ur, *nur; > - > - RB_FOREACH_SAFE(ur, uptree_rib, &peer->up_rib, nur) { > - RB_REMOVE(uptree_rib, &peer->up_rib, ur); > - > - /* withdraw prefix */ > - pt_getaddr(ur->re->prefix, &addr); > - up_generate(peer, NULL, &addr, ur->re->prefix->prefixlen); > - free(ur); > - } > -} > - > -int > up_add(struct rde_peer *peer, struct update_prefix *p, struct update_attr *a) > { > struct update_attr *na = NULL; > @@ -473,14 +406,16 @@ up_generate_updates(struct filter_head * > > if (new == NULL) { > withdraw: > - if (up_test_update(peer, old) != 1) > - return; > - > - if (!up_rib_remove(peer, old->re)) > + if (old == NULL) > return; > > /* withdraw prefix */ > pt_getaddr(old->re->prefix, &addr); > + if (prefix_remove(&ribs[RIB_ADJ_OUT].rib, peer, &addr, > + old->re->prefix->prefixlen) == 0) { > + /* not in table, no need to send withdraw */ > + return; > + } > up_generate(peer, NULL, &addr, old->re->prefix->prefixlen); > } else { > switch (up_test_update(peer, new)) { > @@ -500,8 +435,12 @@ withdraw: > } > > pt_getaddr(new->re->prefix, &addr); > - up_generate(peer, &state, &addr, new->re->prefix->prefixlen); > - up_rib_add(peer, new->re); > + if (path_update(&ribs[RIB_ADJ_OUT].rib, peer, &state, &addr, > + new->re->prefix->prefixlen, prefix_vstate(new)) != 2) { > + /* only send update if path changed */ > + up_generate(peer, &state, &addr, > + new->re->prefix->prefixlen); > + } > > rde_filterstate_clean(&state); > } >