This diff moves the rib_entry pointer re into the union to safe some space. For add-path I need to add a few more u_int32_t and that would blow the size of struct prefix from 128 to 132 bytes. malloc would round that up to 256bytes and that is bad for the struct that is allocted in millions in bgpd.
To make this somewhat save introduce PREFIX_FLAG_ADJOUT to mark prefixes that live in the adj-rib-out. Those prefixes can not access the re pointer also use a wrapper prefix_re() which returns the re pointer or NULL. Also add some assertions to make sure that prefixes don't end up in the wrong tree. This change shrinks the struct back to 120bytes and gives me the space needed for add-path. Please test -- :wq Claudio Index: rde.c =================================================================== RCS file: /cvs/src/usr.sbin/bgpd/rde.c,v retrieving revision 1.530 diff -u -p -r1.530 rde.c --- rde.c 25 Jun 2021 09:25:48 -0000 1.530 +++ rde.c 29 Jun 2021 08:28:33 -0000 @@ -2298,6 +2298,7 @@ rde_dump_rib_as(struct prefix *p, struct struct ibuf *wbuf; struct attr *a; struct nexthop *nexthop; + struct rib_entry *re; void *bp; time_t staletime; size_t aslen; @@ -2330,7 +2331,8 @@ rde_dump_rib_as(struct prefix *p, struct rib.origin = asp->origin; rib.validation_state = p->validation_state; rib.flags = 0; - if (p->re != NULL && p->re->active == p) + re = prefix_re(p); + if (re != NULL && re->active == p) rib.flags |= F_PREF_ACTIVE; if (!prefix_peer(p)->conf.ebgp) rib.flags |= F_PREF_INTERNAL; @@ -2412,14 +2414,16 @@ static void rde_dump_filter(struct prefix *p, struct ctl_show_rib_request *req) { struct rde_aspath *asp; + struct rib_entry *re; if (!rde_match_peer(prefix_peer(p), &req->neighbor)) return; asp = prefix_aspath(p); + re = prefix_re(p); if (asp == NULL) /* skip pending withdraw in Adj-RIB-Out */ return; - if ((req->flags & F_CTL_ACTIVE) && p->re->active != p) + if ((req->flags & F_CTL_ACTIVE) && re != NULL && re->active != p) return; if ((req->flags & F_CTL_INVALID) && (asp->flags & F_ATTR_PARSE_ERR) == 0) Index: rde.h =================================================================== RCS file: /cvs/src/usr.sbin/bgpd/rde.h,v retrieving revision 1.240 diff -u -p -r1.240 rde.h --- rde.h 17 Jun 2021 16:05:26 -0000 1.240 +++ rde.h 29 Jun 2021 08:33:18 -0000 @@ -56,10 +56,10 @@ struct rib { struct filter_head *in_rules_tmp; u_int rtableid; u_int rtableid_tmp; + enum reconf_action state, fibstate; + u_int16_t id; u_int16_t flags; u_int16_t flags_tmp; - u_int16_t id; - enum reconf_action state, fibstate; }; #define RIB_ADJ_IN 0 @@ -317,13 +317,13 @@ struct prefix { union { struct { LIST_ENTRY(prefix) rib, nexthop; + struct rib_entry *re; } list; struct { RB_ENTRY(prefix) index, update; } tree; } entry; struct pt_entry *pt; - struct rib_entry *re; struct rde_aspath *aspath; struct rde_community *communities; struct rde_peer *peer; @@ -338,6 +338,7 @@ struct prefix { #define PREFIX_FLAG_DEAD 0x04 /* locked but removed */ #define PREFIX_FLAG_STALE 0x08 /* stale entry (graceful reload) */ #define PREFIX_FLAG_MASK 0x0f /* mask for the prefix types */ +#define PREFIX_FLAG_ADJOUT 0x10 /* prefix is in the adj-out rib */ #define PREFIX_NEXTHOP_LINKED 0x40 /* prefix is linked onto nexthop list */ #define PREFIX_FLAG_LOCKED 0x80 /* locked by rib walker */ }; @@ -637,6 +638,14 @@ static inline u_int8_t prefix_vstate(struct prefix *p) { return (p->validation_state & ROA_MASK); +} + +static inline struct rib_entry * +prefix_re(struct prefix *p) +{ + if (p->flags & PREFIX_FLAG_ADJOUT) + return NULL; + return (p->entry.list.re); } void nexthop_init(u_int32_t); Index: rde_rib.c =================================================================== RCS file: /cvs/src/usr.sbin/bgpd/rde_rib.c,v retrieving revision 1.222 diff -u -p -r1.222 rde_rib.c --- rde_rib.c 17 Jun 2021 08:16:04 -0000 1.222 +++ rde_rib.c 29 Jun 2021 09:41:21 -0000 @@ -1031,6 +1031,9 @@ prefix_move(struct prefix *p, struct rde { struct prefix *np; + if (p->flags & PREFIX_FLAG_ADJOUT) + fatalx("%s: prefix with PREFIX_FLAG_ADJOUT hit", __func__); + if (peer != prefix_peer(p)) fatalx("prefix_move: cross peer move"); @@ -1040,7 +1043,7 @@ prefix_move(struct prefix *p, struct rde np->aspath = path_ref(asp); np->communities = communities_ref(comm); np->peer = peer; - np->re = p->re; + np->entry.list.re = prefix_re(p); np->pt = p->pt; /* skip refcnt update since ref is moved */ np->validation_state = vstate; np->nhflags = nhflags; @@ -1056,7 +1059,7 @@ prefix_move(struct prefix *p, struct rde * no need to update the peer prefix count because we are only moving * the prefix without changing the peer. */ - prefix_evaluate(np->re, np, p); + prefix_evaluate(prefix_re(np), np, p); /* remove possible pftable reference from old path first */ if (p->aspath && p->aspath->pftableid) @@ -1071,8 +1074,8 @@ prefix_move(struct prefix *p, struct rde p->nexthop = NULL; p->aspath = NULL; p->peer = NULL; - p->re = NULL; p->pt = NULL; + p->entry.list.re = NULL; prefix_free(p); return (0); @@ -1093,6 +1096,8 @@ prefix_withdraw(struct rib *rib, struct if (p == NULL) /* Got a dummy withdrawn request. */ return (0); + if (p->flags & PREFIX_FLAG_ADJOUT) + fatalx("%s: prefix with PREFIX_FLAG_ADJOUT hit", __func__); asp = prefix_aspath(p); if (asp && asp->pftableid) /* only prefixes in the local RIB were pushed into pf */ @@ -1112,8 +1117,8 @@ prefix_add_eor(struct rde_peer *peer, u_ struct prefix *p; p = prefix_alloc(); + p->flags = PREFIX_FLAG_ADJOUT | PREFIX_FLAG_UPDATE; p->eor = 1; - p->flags = PREFIX_FLAG_UPDATE; if (RB_INSERT(prefix_tree, &peer->updates[aid], p) != NULL) /* no need to add if EoR marker already present */ prefix_free(p); @@ -1134,6 +1139,9 @@ prefix_adjout_update(struct rde_peer *pe int created = 0; if ((p = prefix_lookup(peer, prefix, prefixlen)) != NULL) { + if ((p->flags & PREFIX_FLAG_ADJOUT) == 0) + fatalx("%s: prefix without PREFIX_FLAG_ADJOUT hit", + __func__); /* prefix is already in the Adj-RIB-Out */ if (p->flags & PREFIX_FLAG_WITHDRAW) { created = 1; /* consider this a new entry */ @@ -1171,6 +1179,7 @@ prefix_adjout_update(struct rde_peer *pe /* peer and pt remain */ } else { p = prefix_alloc(); + p->flags |= PREFIX_FLAG_ADJOUT; created = 1; p->pt = pt_get(prefix, prefixlen); @@ -1225,6 +1234,9 @@ prefix_adjout_withdraw(struct rde_peer * if (p == NULL) /* Got a dummy withdrawn request. */ return (0); + if ((p->flags & PREFIX_FLAG_ADJOUT) == 0) + fatalx("%s: prefix without PREFIX_FLAG_ADJOUT hit", __func__); + /* already a withdraw, shortcut */ if (p->flags & PREFIX_FLAG_WITHDRAW) { p->lastchange = getmonotime(); @@ -1285,6 +1297,9 @@ prefix_adjout_destroy(struct prefix *p) { struct rde_peer *peer = prefix_peer(p); + if ((p->flags & PREFIX_FLAG_ADJOUT) == 0) + fatalx("%s: prefix without PREFIX_FLAG_ADJOUT hit", __func__); + if (p->eor) { /* EOR marker is not linked in the index */ prefix_free(p); @@ -1486,8 +1501,10 @@ static void prefix_evaluate_all(struct prefix *p, enum nexthop_state state, enum nexthop_state oldstate) { + struct rib_entry *re = prefix_re(p); + /* Skip non local-RIBs or RIBs that are flagged as noeval. */ - if (re_rib(p->re)->flags & F_RIB_NOEVALUATE) { + if (re_rib(re)->flags & F_RIB_NOEVALUATE) { log_warnx("%s: prefix with F_RIB_NOEVALUATE hit", __func__); return; } @@ -1500,15 +1517,15 @@ prefix_evaluate_all(struct prefix *p, en * the routing decision so shortcut here. */ if (state == NEXTHOP_REACH) { - if ((re_rib(p->re)->flags & F_RIB_NOFIB) == 0 && - p == p->re->active) - rde_send_kroute(re_rib(p->re), p, NULL); + if ((re_rib(re)->flags & F_RIB_NOFIB) == 0 && + p == re->active) + rde_send_kroute(re_rib(re), p, NULL); } return; } /* redo the route decision */ - prefix_evaluate(p->re, p, p); + prefix_evaluate(prefix_re(p), p, p); } /* kill a prefix. */ @@ -1516,7 +1533,7 @@ void prefix_destroy(struct prefix *p) { /* make route decision */ - prefix_evaluate(p->re, NULL, p); + prefix_evaluate(prefix_re(p), NULL, p); prefix_unlink(p); prefix_free(p); @@ -1530,11 +1547,14 @@ prefix_link(struct prefix *p, struct rib struct rde_aspath *asp, struct rde_community *comm, struct nexthop *nexthop, u_int8_t nhflags, u_int8_t vstate) { + if (p->flags & PREFIX_FLAG_ADJOUT) + fatalx("%s: prefix with PREFIX_FLAG_ADJOUT hit", __func__); + + p->entry.list.re = re; p->aspath = path_ref(asp); p->communities = communities_ref(comm); p->peer = peer; p->pt = pt_ref(re->prefix); - p->re = re; p->validation_state = vstate; p->nhflags = nhflags; p->nexthop = nexthop_ref(nexthop); @@ -1555,7 +1575,7 @@ prefix_link(struct prefix *p, struct rib static void prefix_unlink(struct prefix *p) { - struct rib_entry *re = p->re; + struct rib_entry *re = prefix_re(p); /* destroy all references to other objects */ nexthop_unlink(p); @@ -1567,7 +1587,6 @@ prefix_unlink(struct prefix *p) p->nexthop = NULL; p->aspath = NULL; p->peer = NULL; - p->re = NULL; p->pt = NULL; if (re && rib_empty(re)) @@ -1795,7 +1814,7 @@ nexthop_link(struct prefix *p) return; /* no need to link prefixes in RIBs that have no decision process */ - if (re_rib(p->re)->flags & F_RIB_NOEVALUATE) + if (re_rib(prefix_re(p))->flags & F_RIB_NOEVALUATE) return; p->flags |= PREFIX_NEXTHOP_LINKED;