On Tue, Jun 27, 2023 at 12:10:21PM +0200, Claudio Jeker wrote:
> Sorry this diff is a monster but it kind of turned into a all or nothing
> situation.
Frankly, it's not that bad.
> For a long time bgpd used a static 4k buffer plus a len argument to build
> updates. The result was ok but was also very fragile.
> With the new ibuf API all of this can be rewritten and the result is IMO
> already a lot cleaner (mainly because a lot of code duplication could be
> removed).
>
> Some bits I'm not super happy with are community_writebuf() and
> pt_writebuf(). Both functions are a bit too complex to my taste but for
> different reasons. pt_writebuf() needs to be transactional (either all or
> nothing) since we call pt_writebuf() until there is no more space.
> community_writebuf() does maybe too much at once.
It is a big step in the right direction. It's great. There's always time for
more passes and cleanup. I agree that both these functions are a bit on the
complex side, but compared to the code they replace...
> As mentioned this is a major rewrite, I did run this through regress and
> also on a few personal systems but I'm unable to test all possible cases.
> Please try this out and report back.
I need to make more passes over this, but here's a first round of feedback.
Some small suggestions and an (already existing) leak in up_generate_attr()
inline.
(There's the usual bit of "return (foo)" vs "return foo" being mixed in
the same function. I refrained from making comments.)
> --
> :wq Claudio
>
> Index: mrt.c
> ===================================================================
> RCS file: /cvs/src/usr.sbin/bgpd/mrt.c,v
> retrieving revision 1.114
> diff -u -p -r1.114 mrt.c
> --- mrt.c 19 Apr 2023 09:03:00 -0000 1.114
> +++ mrt.c 27 Jun 2023 08:33:54 -0000
> @@ -309,7 +309,9 @@ mrt_attr_dump(struct ibuf *buf, struct r
> return (-1);
>
> /* communities */
> - if (community_writebuf(buf, c) == -1)
> + if (community_writebuf(c, ATTR_COMMUNITIES, 0, buf) == -1 ||
> + community_writebuf(c, ATTR_EXT_COMMUNITIES, 0, buf) == -1 ||
> + community_writebuf(c, ATTR_LARGE_COMMUNITIES, 0, buf) == -1)
> return (-1);
>
> /* dump all other path attributes without modification */
> @@ -502,7 +504,7 @@ mrt_dump_entry_mp(struct mrt *mrt, struc
> goto fail;
> }
>
> - if (pt_writebuf(h2buf, p->pt) == -1) {
> + if (pt_writebuf(h2buf, p->pt, 0, 0, 0) == -1) {
> log_warnx("%s: pt_writebuf error", __func__);
> goto fail;
> }
> @@ -678,7 +680,7 @@ mrt_dump_entry_v2_rib(struct rib_entry *
> }
> len = ibuf_size(tbuf);
> DUMP_SHORT(buf, (uint16_t)len);
I assume the DUMP_* macros will be dealt with in a subsequent step?
> - if (ibuf_add(buf, tbuf->buf, len) == -1) {
> + if (ibuf_add_buf(buf, tbuf) == -1) {
> log_warn("%s: ibuf_add error", __func__);
ibuf_add_buf error
> ibuf_free(tbuf);
> goto fail;
> @@ -731,7 +733,7 @@ mrt_dump_entry_v2(struct mrt *mrt, struc
> break;
> }
>
> - if (pt_writebuf(pbuf, re->prefix) == -1) {
> + if (pt_writebuf(pbuf, re->prefix, 0, 0, 0) == -1) {
> log_warnx("%s: pt_writebuf error", __func__);
> goto fail;
> }
> @@ -748,7 +750,7 @@ mrt_dump_entry_v2(struct mrt *mrt, struc
> goto fail;
>
> DUMP_LONG(hbuf, snum);
> - if (ibuf_add(hbuf, pbuf->buf, ibuf_size(pbuf)) == -1) {
> + if (ibuf_add_buf(hbuf, pbuf) == -1) {
> log_warn("%s: ibuf_add error", __func__);
ibuf_add_buf error
> goto fail;
> }
> @@ -767,7 +769,7 @@ mrt_dump_entry_v2(struct mrt *mrt, struc
> goto fail;
>
> DUMP_LONG(hbuf, snum);
> - if (ibuf_add(hbuf, pbuf->buf, ibuf_size(pbuf)) == -1) {
> + if (ibuf_add_buf(hbuf, pbuf) == -1) {
> log_warn("%s: ibuf_add error", __func__);
ibuf_add_buf error
> goto fail;
> }
> @@ -833,8 +835,8 @@ mrt_dump_v2_hdr(struct mrt *mrt, struct
> }
>
> off = ibuf_size(buf);
> - if (ibuf_reserve(buf, sizeof(nump)) == NULL) {
> - log_warn("%s: ibuf_reserve error", __func__);
> + if (ibuf_add_zero(buf, sizeof(nump)) == -1) {
> + log_warn("%s: ibuf_add_zero error", __func__);
> goto fail;
> }
> arg.nump = 0;
> @@ -843,8 +845,10 @@ mrt_dump_v2_hdr(struct mrt *mrt, struct
> if (arg.nump == -1)
> goto fail;
>
> - nump = htons(arg.nump);
> - memcpy(ibuf_seek(buf, off, sizeof(nump)), &nump, sizeof(nump));
> + if (ibuf_set_n16(buf, off, arg.nump) == -1) {
> + log_warn("%s: ibuf_set_n16 error", __func__);
> + goto fail;
> + }
>
> len = ibuf_size(buf);
> if (mrt_dump_hdr_rde(&hbuf, MSG_TABLE_DUMP_V2,
> @@ -1099,14 +1103,8 @@ mrt_write(struct mrt *mrt)
> void
> mrt_clean(struct mrt *mrt)
> {
> - struct ibuf *b;
> -
> close(mrt->wbuf.fd);
> - while ((b = TAILQ_FIRST(&mrt->wbuf.bufs))) {
> - TAILQ_REMOVE(&mrt->wbuf.bufs, b, entry);
> - ibuf_free(b);
> - }
> - mrt->wbuf.queued = 0;
> + msgbuf_clear(&mrt->wbuf);
> }
>
> static struct imsgbuf *mrt_imsgbuf[2];
> Index: rde.c
> ===================================================================
> RCS file: /cvs/src/usr.sbin/bgpd/rde.c,v
> retrieving revision 1.606
> diff -u -p -r1.606 rde.c
> --- rde.c 12 Jun 2023 12:48:07 -0000 1.606
> +++ rde.c 27 Jun 2023 08:33:54 -0000
> @@ -86,8 +86,7 @@ static void rde_rpki_reload(void);
> static int rde_roa_reload(void);
> static int rde_aspa_reload(void);
> int rde_update_queue_pending(void);
> -void rde_update_queue_runner(void);
> -void rde_update6_queue_runner(uint8_t);
> +void rde_update_queue_runner(uint8_t);
> struct rde_prefixset *rde_find_prefixset(char *, struct rde_prefixset_head
> *);
> void rde_mark_prefixsets_dirty(struct rde_prefixset_head *,
> struct rde_prefixset_head *);
> @@ -310,9 +309,8 @@ rde_main(int debug, int verbose)
> rib_dump_runner();
> nexthop_runner();
> if (ibuf_se && ibuf_se->w.queued < SESS_MSG_HIGH_MARK) {
> - rde_update_queue_runner();
> - for (aid = AID_INET6; aid < AID_MAX; aid++)
> - rde_update6_queue_runner(aid);
> + for (aid = AID_MIN; aid < AID_MAX; aid++)
> + rde_update_queue_runner(aid);
> }
> /* commit pftable once per poll loop */
> rde_commit_pftable();
> @@ -3456,13 +3454,13 @@ rde_update_queue_pending(void)
> }
>
> void
> -rde_update_queue_runner(void)
> +rde_update_queue_runner(uint8_t aid)
> {
> struct rde_peer *peer;
> - int r, sent, max = RDE_RUNNER_ROUNDS, eor;
> - uint16_t len, wpos;
> + struct ibuf *buf;
> + int sent, max = RDE_RUNNER_ROUNDS;
>
> - len = sizeof(queue_buf) - MSGSIZE_HEADER;
> + /* first withdraws ... */
> do {
> sent = 0;
> RB_FOREACH(peer, peer_tree, &peertable) {
> @@ -3472,80 +3470,26 @@ rde_update_queue_runner(void)
> continue;
> if (peer->throttled)
> continue;
> - eor = 0;
> - wpos = 0;
> - /* first withdraws, save 2 bytes for path attributes */
> - if ((r = up_dump_withdraws(queue_buf, len - 2, peer,
> - AID_INET)) == -1)
> + if (RB_EMPTY(&peer->withdraws[aid]))
> continue;
> - wpos += r;
>
> - /* now bgp path attributes unless it is the EoR mark */
> - if (up_is_eor(peer, AID_INET)) {
> - eor = 1;
> - memset(queue_buf + wpos, 0, 2);
> - wpos += 2;
> - } else {
> - r = up_dump_attrnlri(queue_buf + wpos,
> - len - wpos, peer);
> - wpos += r;
> - }
> -
> - /* finally send message to SE */
> - if (wpos > 4) {
> - if (imsg_compose(ibuf_se, IMSG_UPDATE,
> - peer->conf.id, 0, -1, queue_buf,
> - wpos) == -1)
> - fatal("%s %d imsg_compose error",
> - __func__, __LINE__);
> - sent++;
> - }
> - if (eor) {
> - int sent_eor = peer->sent_eor & (1 << AID_INET);
> - if (peer->capa.grestart.restart && !sent_eor)
> - rde_peer_send_eor(peer, AID_INET);
> - if (peer->capa.enhanced_rr && sent_eor)
> - rde_peer_send_rrefresh(peer, AID_INET,
> - ROUTE_REFRESH_END_RR);
> - }
> - }
> - max -= sent;
> - } while (sent != 0 && max > 0);
> -}
> -
> -void
> -rde_update6_queue_runner(uint8_t aid)
> -{
> - struct rde_peer *peer;
> - int r, sent, max = RDE_RUNNER_ROUNDS / 2;
> - uint16_t len;
> -
> - /* first withdraws ... */
> - do {
> - sent = 0;
> - RB_FOREACH(peer, peer_tree, &peertable) {
> - if (peer->conf.id == 0)
> - continue;
> - if (peer->state != PEER_UP)
> + if ((buf = ibuf_dynamic(4, 4096 - MSGSIZE_HEADER)) ==
> + NULL)
> + fatal("%s", __func__);
> + if (up_dump_withdraws(buf, peer, aid) == -1) {
> + ibuf_free(buf);
> continue;
> - if (peer->throttled)
> - continue;
> - len = sizeof(queue_buf) - MSGSIZE_HEADER;
> - r = up_dump_mp_unreach(queue_buf, len, peer, aid);
> - if (r == -1)
> - continue;
> - /* finally send message to SE */
> - if (imsg_compose(ibuf_se, IMSG_UPDATE, peer->conf.id,
> - 0, -1, queue_buf, r) == -1)
> - fatal("%s %d imsg_compose error", __func__,
> - __LINE__);
> + }
> + if (imsg_compose_ibuf(ibuf_se, IMSG_UPDATE,
> + peer->conf.id, 0, buf) == -1)
> + fatal("%s: imsg_create error", __func__);
> sent++;
> }
> max -= sent;
> } while (sent != 0 && max > 0);
>
> /* ... then updates */
> - max = RDE_RUNNER_ROUNDS / 2;
> + max = RDE_RUNNER_ROUNDS;
> do {
> sent = 0;
> RB_FOREACH(peer, peer_tree, &peertable) {
> @@ -3555,7 +3499,9 @@ rde_update6_queue_runner(uint8_t aid)
> continue;
> if (peer->throttled)
> continue;
> - len = sizeof(queue_buf) - MSGSIZE_HEADER;
> + if (RB_EMPTY(&peer->updates[aid]))
> + continue;
> +
> if (up_is_eor(peer, aid)) {
> int sent_eor = peer->sent_eor & (1 << aid);
> if (peer->capa.grestart.restart && !sent_eor)
> @@ -3565,15 +3511,17 @@ rde_update6_queue_runner(uint8_t aid)
> ROUTE_REFRESH_END_RR);
> continue;
> }
> - r = up_dump_mp_reach(queue_buf, len, peer, aid);
> - if (r == 0)
> - continue;
>
> - /* finally send message to SE */
> - if (imsg_compose(ibuf_se, IMSG_UPDATE, peer->conf.id,
> - 0, -1, queue_buf, r) == -1)
> - fatal("%s %d imsg_compose error", __func__,
> - __LINE__);
> + if ((buf = ibuf_dynamic(4, 4096 - MSGSIZE_HEADER)) ==
> + NULL)
> + fatal("%s", __func__);
> + if (up_dump_update(buf, peer, aid) == -1) {
> + ibuf_free(buf);
> + continue;
> + }
> + if (imsg_compose_ibuf(ibuf_se, IMSG_UPDATE,
> + peer->conf.id, 0, buf) == -1)
> + fatal("%s: imsg_compose_ibuf error", __func__);
> sent++;
> }
> max -= sent;
> Index: rde.h
> ===================================================================
> RCS file: /cvs/src/usr.sbin/bgpd/rde.h,v
> retrieving revision 1.294
> diff -u -p -r1.294 rde.h
> --- rde.h 12 Jun 2023 12:48:07 -0000 1.294
> +++ rde.h 27 Jun 2023 08:33:54 -0000
> @@ -447,10 +447,7 @@ int community_add(struct rde_community *
> int community_large_add(struct rde_community *, int, void *, size_t);
> int community_ext_add(struct rde_community *, int, int, void *, size_t);
>
> -int community_write(struct rde_community *, void *, uint16_t);
> -int community_large_write(struct rde_community *, void *, uint16_t);
> -int community_ext_write(struct rde_community *, int, void *, uint16_t);
> -int community_writebuf(struct ibuf *, struct rde_community *);
> +int community_writebuf(struct rde_community *, uint8_t, int, struct ibuf *);
>
> void communities_shutdown(void);
> struct rde_community *communities_lookup(struct rde_community *);
> @@ -519,8 +516,7 @@ struct pt_entry *pt_add_flow(struct flow
> void pt_remove(struct pt_entry *);
> struct pt_entry *pt_lookup(struct bgpd_addr *);
> int pt_prefix_cmp(const struct pt_entry *, const struct pt_entry *);
> -int pt_write(u_char *, int, struct pt_entry *, int);
> -int pt_writebuf(struct ibuf *, struct pt_entry *);
> +int pt_writebuf(struct ibuf *, struct pt_entry *, int, int, uint32_t);
>
> static inline struct pt_entry *
> pt_ref(struct pt_entry *pt)
> @@ -710,10 +706,8 @@ void up_generate_addpath_all(struct rd
> struct prefix *, struct prefix *);
> void up_generate_default(struct rde_peer *, uint8_t);
> int up_is_eor(struct rde_peer *, uint8_t);
> -int up_dump_withdraws(u_char *, int, struct rde_peer *, uint8_t);
> -int up_dump_mp_unreach(u_char *, int, struct rde_peer *, uint8_t);
> -int up_dump_attrnlri(u_char *, int, struct rde_peer *);
> -int up_dump_mp_reach(u_char *, int, struct rde_peer *, uint8_t);
> +int up_dump_withdraws(struct ibuf *, struct rde_peer *, uint8_t);
> +int up_dump_update(struct ibuf *, struct rde_peer *, uint8_t);
>
> /* rde_aspa.c */
> void aspa_validation(struct rde_aspa *, struct aspath *,
> Index: rde_attr.c
> ===================================================================
> RCS file: /cvs/src/usr.sbin/bgpd/rde_attr.c,v
> retrieving revision 1.133
> diff -u -p -r1.133 rde_attr.c
> --- rde_attr.c 12 Jun 2023 12:10:17 -0000 1.133
> +++ rde_attr.c 27 Jun 2023 08:33:54 -0000
> @@ -30,42 +30,6 @@
> #include "log.h"
>
> int
> -attr_write(void *p, uint16_t p_len, uint8_t flags, uint8_t type,
> - void *data, uint16_t data_len)
> -{
> - u_char *b = p;
> - uint16_t tmp, tot_len = 2; /* attribute header (without len) */
> -
> - flags &= ~ATTR_DEFMASK;
> - if (data_len > 255) {
> - tot_len += 2 + data_len;
> - flags |= ATTR_EXTLEN;
> - } else {
> - tot_len += 1 + data_len;
> - }
> -
> - if (tot_len > p_len)
> - return (-1);
> -
> - *b++ = flags;
> - *b++ = type;
> - if (data_len > 255) {
> - tmp = htons(data_len);
> - memcpy(b, &tmp, sizeof(tmp));
> - b += 2;
> - } else
> - *b++ = (u_char)data_len;
> -
> - if (data == NULL)
> - return (tot_len - data_len);
> -
> - if (data_len != 0)
> - memcpy(b, data, data_len);
> -
> - return (tot_len);
> -}
> -
> -int
> attr_writebuf(struct ibuf *buf, uint8_t flags, uint8_t type, void *data,
> uint16_t data_len)
> {
> Index: rde_community.c
> ===================================================================
> RCS file: /cvs/src/usr.sbin/bgpd/rde_community.c,v
> retrieving revision 1.12
> diff -u -p -r1.12 rde_community.c
> --- rde_community.c 17 Jun 2023 08:05:48 -0000 1.12
> +++ rde_community.c 27 Jun 2023 09:37:35 -0000
> @@ -225,10 +225,9 @@ insert_community(struct rde_community *c
> struct community *new;
> int newsize = comm->size + 8;
>
> - if ((new = reallocarray(comm->communities, newsize,
> - sizeof(struct community))) == NULL)
> + if ((new = recallocarray(comm->communities, comm->size,
> + newsize, sizeof(struct community))) == NULL)
> fatal(__func__);
> - memset(new + comm->size, 0, 8 * sizeof(struct community));
Since you fatal on failure, new doesn't really add anything
> comm->communities = new;
> comm->size = newsize;
> }
> @@ -256,6 +255,8 @@ insert_community(struct rde_community *c
> static int
> non_transitive_ext_community(struct community *c)
> {
> + if ((uint8_t)c->flags != COMMUNITY_TYPE_EXT)
> + return 0;
> if ((c->data3 >> 8) & EXT_COMMUNITY_NON_TRANSITIVE)
> return 1;
> return 0;
> @@ -514,228 +515,79 @@ community_ext_add(struct rde_community *
>
> /*
> * Convert communities back to the wireformat.
> - * This function needs to make sure that the attribute buffer is overflowed
> - * while writing out the communities.
> - * - community_write for ATTR_COMMUNITUES
> - * - community_large_write for ATTR_LARGE_COMMUNITIES
> - * - community_ext_write for ATTR_EXT_COMMUNITIES
> - * When writing ATTR_EXT_COMMUNITIES non-transitive communities need to
> - * be skipped if it is sent to an ebgp peer.
> + * When writing ATTR_EXT_COMMUNITIES non-transitive communities need to
> + * be skipped if they are sent to an ebgp peer.
> */
> int
> -community_write(struct rde_community *comm, void *buf, uint16_t len)
> +community_writebuf(struct rde_community *comm, uint8_t type, int ebgp,
> + struct ibuf *buf)
> {
> - uint8_t *b = buf;
> - uint16_t c;
> - size_t n = 0;
> - int l, r, flags = ATTR_OPTIONAL | ATTR_TRANSITIVE;
> -
> - if (comm->flags & PARTIAL_COMMUNITIES)
> - flags |= ATTR_PARTIAL;
> -
> - /* first count how many communities will be written */
> - for (l = 0; l < comm->nentries; l++)
> - if ((uint8_t)comm->communities[l].flags ==
> - COMMUNITY_TYPE_BASIC)
> - n++;
> -
> - if (n == 0)
> - return 0;
> + struct community *cp;
> + uint64_t ext;
> + int l, size, start = -1, end, num = 0;
> + int flags = ATTR_OPTIONAL | ATTR_TRANSITIVE;
> + uint8_t t;
>
> - /* write attribute header */
> - r = attr_write(b, len, flags, ATTR_COMMUNITIES, NULL, n * 4);
> - if (r == -1)
> + switch (type) {
> + case ATTR_COMMUNITIES:
> + if (comm->flags & PARTIAL_COMMUNITIES)
> + flags |= ATTR_PARTIAL;
> + size = 4;
> + t = COMMUNITY_TYPE_BASIC;
> + break;
> + case ATTR_EXT_COMMUNITIES:
> + if (comm->flags & PARTIAL_EXT_COMMUNITIES)
> + flags |= ATTR_PARTIAL;
> + size = 8;
> + t = COMMUNITY_TYPE_EXT;
> + break;
> + case ATTR_LARGE_COMMUNITIES:
> + if (comm->flags & PARTIAL_LARGE_COMMUNITIES)
> + flags |= ATTR_PARTIAL;
> + size = 12;
> + t = COMMUNITY_TYPE_LARGE;
> + break;
> + default:
> return -1;
I wonder if it is worth pulling the flags handling out of the switch by adding
int is_partial = PARTIAL_COMMUNITIES | PARTIAL_EXT_COMMUNITIES |
PARTIAL_LARGE_COMMUNITIES;
and doing
flags = ATTR_OPTIONAL | ATTR_TRANSITIVE;
if (comm->flags & is_partial)
flags |= ATTR_PARTIAL;
right before the attr_writebuf() call.
> - b += r;
> -
> - /* write out the communities */
> - for (l = 0; l < comm->nentries; l++)
> - if ((uint8_t)comm->communities[l].flags ==
> - COMMUNITY_TYPE_BASIC) {
> - c = htons(comm->communities[l].data1);
> - memcpy(b, &c, sizeof(c));
> - b += sizeof(c);
> - r += sizeof(c);
> -
> - c = htons(comm->communities[l].data2);
> - memcpy(b, &c, sizeof(c));
> - b += sizeof(c);
> - r += sizeof(c);
> - }
> -
> - return r;
> -}
> -
> -int
> -community_large_write(struct rde_community *comm, void *buf, uint16_t len)
> -{
> - uint8_t *b = buf;
> - uint32_t c;
> - size_t n = 0;
> - int l, r, flags = ATTR_OPTIONAL | ATTR_TRANSITIVE;
> -
> - if (comm->flags & PARTIAL_LARGE_COMMUNITIES)
> - flags |= ATTR_PARTIAL;
> + }
>
> /* first count how many communities will be written */
> - for (l = 0; l < comm->nentries; l++)
> - if ((uint8_t)comm->communities[l].flags ==
> - COMMUNITY_TYPE_LARGE)
> - n++;
> -
> - if (n == 0)
> - return 0;
> -
> - /* write attribute header */
> - r = attr_write(b, len, flags, ATTR_LARGE_COMMUNITIES, NULL, n * 12);
> - if (r == -1)
> - return -1;
> - b += r;
As this is a long function, I would probably initialize these right
before the for loop.
num = 0;
start = -1;
> + for (l = 0; l < comm->nentries; l++) {
> + cp = comm->communities + l;
I tend to prefer
cp = &comm->communities[l];
>
> - /* write out the communities */
> - for (l = 0; l < comm->nentries; l++)
> - if ((uint8_t)comm->communities[l].flags ==
> - COMMUNITY_TYPE_LARGE) {
> - c = htonl(comm->communities[l].data1);
> - memcpy(b, &c, sizeof(c));
> - b += sizeof(c);
> - r += sizeof(c);
> -
> - c = htonl(comm->communities[l].data2);
> - memcpy(b, &c, sizeof(c));
> - b += sizeof(c);
> - r += sizeof(c);
> -
> - c = htonl(comm->communities[l].data3);
> - memcpy(b, &c, sizeof(c));
> - b += sizeof(c);
> - r += sizeof(c);
> + if (ebgp && non_transitive_ext_community(cp))
> + continue;
> + if ((uint8_t)cp->flags == t) {
> + num++;
> + if (start == -1)
> + start = l;
> }
> + if ((uint8_t)cp->flags > t)
> + break;
> + }
> + end = l;
>
> - return r;
> -}
> -
> -int
> -community_ext_write(struct rde_community *comm, int ebgp, void *buf,
> - uint16_t len)
> -{
> - struct community *cp;
> - uint8_t *b = buf;
> - uint64_t ext;
> - size_t n = 0;
> - int l, r, flags = ATTR_OPTIONAL | ATTR_TRANSITIVE;
> -
> - if (comm->flags & PARTIAL_EXT_COMMUNITIES)
> - flags |= ATTR_PARTIAL;
> -
> - /* first count how many communities will be written */
> - for (l = 0; l < comm->nentries; l++)
> - if ((uint8_t)comm->communities[l].flags ==
> - COMMUNITY_TYPE_EXT && !(ebgp &&
> - non_transitive_ext_community(&comm->communities[l])))
> - n++;
> -
> - if (n == 0)
> + /* no communities for this type present */
> + if (num == 0)
> return 0;
Should this overflow check before passing num * size to attr_writebuf()?
if (num > INT16_MAX / size)
return -1;
>
> /* write attribute header */
> - r = attr_write(b, len, flags, ATTR_EXT_COMMUNITIES, NULL, n * 8);
> - if (r == -1)
> + if (attr_writebuf(buf, flags, type, NULL, num * size) == -1)
> return -1;
> - b += r;
>
> /* write out the communities */
> - for (l = 0; l < comm->nentries; l++) {
> + for (l = start; l < end; l++) {
> cp = comm->communities + l;
again, i'd prefer
cp = &comm->communities[l];
> - if ((uint8_t)cp->flags == COMMUNITY_TYPE_EXT && !(ebgp &&
> - non_transitive_ext_community(cp))) {
> - ext = (uint64_t)cp->data3 << 48;
> - switch ((cp->data3 >> 8) & EXT_COMMUNITY_VALUE) {
> - case EXT_COMMUNITY_TRANS_TWO_AS:
> - case EXT_COMMUNITY_TRANS_OPAQUE:
> - case EXT_COMMUNITY_TRANS_EVPN:
> - ext |= ((uint64_t)cp->data1 & 0xffff) << 32;
> - ext |= (uint64_t)cp->data2;
> - break;
> - case EXT_COMMUNITY_TRANS_FOUR_AS:
> - case EXT_COMMUNITY_TRANS_IPV4:
> - ext |= (uint64_t)cp->data1 << 16;
> - ext |= (uint64_t)cp->data2 & 0xffff;
> - break;
> - }
> - ext = htobe64(ext);
> - memcpy(b, &ext, sizeof(ext));
> - b += sizeof(ext);
> - r += sizeof(ext);
> - }
> - }
> -
> - return r;
> -}
> -
> -/*
> - * Convert communities back to the wireformat and dump them into the ibuf
> buf.
> - * This function is used by the mrt dump code.
> - */
> -int
> -community_writebuf(struct ibuf *buf, struct rde_community *comm)
> -{
> - size_t basic_n = 0, large_n = 0, ext_n = 0;
> - int l, flags;
> -
> - /* first count how many communities will be written */
> - for (l = 0; l < comm->nentries; l++)
> - if ((uint8_t)comm->communities[l].flags ==
> - COMMUNITY_TYPE_BASIC)
> - basic_n++;
> - else if ((uint8_t)comm->communities[l].flags ==
> - COMMUNITY_TYPE_EXT)
> - ext_n++;
> - else if ((uint8_t)comm->communities[l].flags ==
> - COMMUNITY_TYPE_LARGE)
> - large_n++;
>
> -
> - if (basic_n != 0) {
> - /* write attribute header */
> - flags = ATTR_OPTIONAL | ATTR_TRANSITIVE;
> - if (comm->flags & PARTIAL_COMMUNITIES)
> - flags |= ATTR_PARTIAL;
> -
> - if (attr_writebuf(buf, flags, ATTR_COMMUNITIES, NULL,
> - basic_n * 4) == -1)
> - return -1;
> -
> - /* write out the communities */
> - for (l = 0; l < comm->nentries; l++)
> - if ((uint8_t)comm->communities[l].flags ==
> - COMMUNITY_TYPE_BASIC) {
> - uint16_t c;
> - c = htons(comm->communities[l].data1);
> - if (ibuf_add(buf, &c, sizeof(c)) == -1)
> - return (-1);
> - c = htons(comm->communities[l].data2);
> - if (ibuf_add(buf, &c, sizeof(c)) == -1)
> - return (-1);
> - }
> - }
> - if (ext_n != 0) {
> - /* write attribute header */
> - flags = ATTR_OPTIONAL | ATTR_TRANSITIVE;
> - if (comm->flags & PARTIAL_COMMUNITIES)
> - flags |= ATTR_PARTIAL;
> -
> - if (attr_writebuf(buf, flags, ATTR_EXT_COMMUNITIES, NULL,
> - ext_n * 8) == -1)
> - return -1;
> -
> - /* write out the communities */
> - for (l = 0; l < comm->nentries; l++) {
> - struct community *cp;
> - uint64_t ext;
> -
> - cp = comm->communities + l;
> - if ((uint8_t)cp->flags != COMMUNITY_TYPE_EXT)
> + switch (type) {
> + case ATTR_COMMUNITIES:
> + if (ibuf_add_n16(buf, cp->data1) == -1)
> + return (-1);
> + if (ibuf_add_n16(buf, cp->data2) == -1)
> + return (-1);
> + break;
> + case ATTR_EXT_COMMUNITIES:
> + if (ebgp && non_transitive_ext_community(cp))
> continue;
>
> ext = (uint64_t)cp->data3 << 48;
> @@ -752,38 +604,19 @@ community_writebuf(struct ibuf *buf, str
> ext |= (uint64_t)cp->data2 & 0xffff;
> break;
> }
> - ext = htobe64(ext);
> - if (ibuf_add(buf, &ext, sizeof(ext)) == -1)
> + if (ibuf_add_n64(buf, ext) == -1)
> + return (-1);
> + break;
> + case ATTR_LARGE_COMMUNITIES:
> + if (ibuf_add_n32(buf, cp->data1) == -1)
> return (-1);
> + if (ibuf_add_n32(buf, cp->data2) == -1)
> + return (-1);
> + if (ibuf_add_n32(buf, cp->data3) == -1)
> + return (-1);
> + break;
> }
> }
> - if (large_n != 0) {
> - /* write attribute header */
> - flags = ATTR_OPTIONAL | ATTR_TRANSITIVE;
> - if (comm->flags & PARTIAL_COMMUNITIES)
> - flags |= ATTR_PARTIAL;
> -
> - if (attr_writebuf(buf, flags, ATTR_LARGE_COMMUNITIES, NULL,
> - large_n * 12) == -1)
> - return -1;
> -
> - /* write out the communities */
> - for (l = 0; l < comm->nentries; l++)
> - if ((uint8_t)comm->communities[l].flags ==
> - COMMUNITY_TYPE_LARGE) {
> - uint32_t c;
> - c = htonl(comm->communities[l].data1);
> - if (ibuf_add(buf, &c, sizeof(c)) == -1)
> - return (-1);
> - c = htonl(comm->communities[l].data2);
> - if (ibuf_add(buf, &c, sizeof(c)) == -1)
> - return (-1);
> - c = htonl(comm->communities[l].data3);
> - if (ibuf_add(buf, &c, sizeof(c)) == -1)
> - return (-1);
> - }
> - }
> -
> return 0;
> }
>
> Index: rde_prefix.c
> ===================================================================
> RCS file: /cvs/src/usr.sbin/bgpd/rde_prefix.c,v
> retrieving revision 1.49
> diff -u -p -r1.49 rde_prefix.c
> --- rde_prefix.c 19 Apr 2023 07:09:47 -0000 1.49
> +++ rde_prefix.c 27 Jun 2023 08:33:54 -0000
> @@ -468,143 +468,111 @@ pt_free(struct pt_entry *pte)
>
> /* dump a prefix into specified buffer */
> int
> -pt_write(u_char *buf, int len, struct pt_entry *pte, int withdraw)
> +pt_writebuf(struct ibuf *buf, struct pt_entry *pte, int withdraw,
> + int add_path, uint32_t pathid)
> {
> struct pt_entry_vpn4 *pvpn4 = (struct pt_entry_vpn4 *)pte;
> struct pt_entry_vpn6 *pvpn6 = (struct pt_entry_vpn6 *)pte;
> struct pt_entry_flow *pflow = (struct pt_entry_flow *)pte;
> - int totlen, flowlen, psize;
> + struct ibuf *tmp;
> + int flowlen, psize;
> uint8_t plen;
>
> + if ((tmp = ibuf_dynamic(32, UINT16_MAX)) == NULL)
> + goto fail;
> +
> + if (add_path) {
> + if (ibuf_add_n32(tmp, pathid) == -1)
> + goto fail;
> + }
> +
> switch (pte->aid) {
> case AID_INET:
> case AID_INET6:
> plen = pte->prefixlen;
> - totlen = PREFIX_SIZE(plen);
> -
> - if (totlen > len)
> - return (-1);
> - *buf++ = plen;
> - memcpy(buf, pte->data, totlen - 1);
> - return (totlen);
> + if (ibuf_add_n8(tmp, plen) == -1)
> + goto fail;
> + if (ibuf_add(tmp, pte->data, PREFIX_SIZE(plen) - 1) == -1)
> + goto fail;
> + break;
> case AID_VPN_IPv4:
> plen = pvpn4->prefixlen;
> - totlen = PREFIX_SIZE(plen) + sizeof(pvpn4->rd);
> psize = PREFIX_SIZE(plen) - 1;
> plen += sizeof(pvpn4->rd) * 8;
> if (withdraw) {
> /* withdraw have one compat label as placeholder */
> - totlen += 3;
> plen += 3 * 8;
> } else {
> - totlen += pvpn4->labellen;
> plen += pvpn4->labellen * 8;
> }
>
> - if (totlen > len)
> - return (-1);
> - *buf++ = plen;
> + if (ibuf_add_n8(tmp, plen) == -1)
> + goto fail;
> if (withdraw) {
> /* magic compatibility label as per rfc8277 */
> - *buf++ = 0x80;
> - *buf++ = 0x0;
> - *buf++ = 0x0;
> + if (ibuf_add_n8(tmp, 0x80) == -1 ||
> + ibuf_add_zero(tmp, 2) == -1)
> + goto fail;
> } else {
> - memcpy(buf, &pvpn4->labelstack, pvpn4->labellen);
> - buf += pvpn4->labellen;
> + if (ibuf_add(tmp, &pvpn4->labelstack,
> + pvpn4->labellen) == -1)
> + goto fail;
> }
> - memcpy(buf, &pvpn4->rd, sizeof(pvpn4->rd));
> - buf += sizeof(pvpn4->rd);
> - memcpy(buf, &pvpn4->prefix4, psize);
> - return (totlen);
> + if (ibuf_add(tmp, &pvpn4->rd, sizeof(pvpn4->rd)) == -1 ||
> + ibuf_add(tmp, &pvpn4->prefix4, psize) == -1)
> + goto fail;
> + break;
> case AID_VPN_IPv6:
> plen = pvpn6->prefixlen;
> - totlen = PREFIX_SIZE(plen) + sizeof(pvpn6->rd);
> psize = PREFIX_SIZE(plen) - 1;
> plen += sizeof(pvpn6->rd) * 8;
> if (withdraw) {
> /* withdraw have one compat label as placeholder */
> - totlen += 3;
> plen += 3 * 8;
> } else {
> - totlen += pvpn6->labellen;
> plen += pvpn6->labellen * 8;
> }
>
> - if (totlen > len)
> - return (-1);
> - *buf++ = plen;
> + if (ibuf_add_n8(tmp, plen) == -1)
> + goto fail;
> if (withdraw) {
> /* magic compatibility label as per rfc8277 */
> - *buf++ = 0x80;
> - *buf++ = 0x0;
> - *buf++ = 0x0;
> + if (ibuf_add_n8(tmp, 0x80) == -1 ||
> + ibuf_add_zero(tmp, 2) == -1)
> + goto fail;
> } else {
> - memcpy(buf, &pvpn6->labelstack, pvpn6->labellen);
> - buf += pvpn6->labellen;
> + if (ibuf_add(tmp, &pvpn6->labelstack,
> + pvpn6->labellen) == -1)
> + goto fail;
> }
> - memcpy(buf, &pvpn6->rd, sizeof(pvpn6->rd));
> - buf += sizeof(pvpn6->rd);
> - memcpy(buf, &pvpn6->prefix6, psize);
> - return (totlen);
> + if (ibuf_add(tmp, &pvpn6->rd, sizeof(pvpn6->rd)) == -1 ||
> + ibuf_add(tmp, &pvpn6->prefix6, psize) == -1)
> + goto fail;
> + break;
> case AID_FLOWSPECv4:
> case AID_FLOWSPECv6:
> flowlen = pflow->len - PT_FLOW_SIZE;
> - totlen = flowlen < FLOWSPEC_LEN_LIMIT ? 1 : 2;
> - totlen += flowlen;
> -
> - if (totlen > len)
> - return (-1);
> -
> if (flowlen < FLOWSPEC_LEN_LIMIT) {
> - *buf++ = flowlen;
> + if (ibuf_add_n8(tmp, flowlen) == -1)
> + goto fail;
> } else {
> - *buf++ = 0xf0 | (flowlen >> 8);
> - *buf++ = flowlen & 0xff;
> + if (ibuf_add_n8(tmp, 0xf0 | (flowlen >> 8)) == -1 ||
> + ibuf_add_n8(tmp, flowlen) == -1)
> + goto fail;
> }
> - memcpy(buf, &pflow->flow, flowlen);
> - return (totlen);
> - default:
> - return (-1);
> - }
> -}
> -
> -/* dump a prefix into specified ibuf, allocating space for it if needed */
> -int
> -pt_writebuf(struct ibuf *buf, struct pt_entry *pte)
> -{
> - struct pt_entry_vpn4 *pvpn4 = (struct pt_entry_vpn4 *)pte;
> - struct pt_entry_vpn6 *pvpn6 = (struct pt_entry_vpn6 *)pte;
> - struct pt_entry_flow *pflow = (struct pt_entry_flow *)pte;
> - int totlen, flowlen;
> - void *bptr;
> -
> - switch (pte->aid) {
> - case AID_INET:
> - case AID_INET6:
> - totlen = PREFIX_SIZE(pte->prefixlen);
> - break;
> - case AID_VPN_IPv4:
> - totlen = PREFIX_SIZE(pte->prefixlen) + sizeof(pvpn4->rd) +
> - pvpn4->labellen;
> - break;
> - case AID_VPN_IPv6:
> - totlen = PREFIX_SIZE(pte->prefixlen) + sizeof(pvpn6->rd) +
> - pvpn6->labellen;
> - break;
> - case AID_FLOWSPECv4:
> - case AID_FLOWSPECv6:
> - flowlen = pflow->len - PT_FLOW_SIZE;
> - totlen = flowlen < FLOWSPEC_LEN_LIMIT ? 1 : 2;
> - totlen += flowlen;
> + if (ibuf_add(tmp, &pflow->flow, flowlen) == -1)
> + goto fail;
> break;
> default:
> - return (-1);
> + goto fail;
> }
>
> - if ((bptr = ibuf_reserve(buf, totlen)) == NULL)
> - return (-1);
> - if (pt_write(bptr, totlen, pte, 0) == -1)
> - return (-1);
> - return (0);
> + if (ibuf_add_buf(buf, tmp) == -1)
> + goto fail;
> + ibuf_free(tmp);
> + return 0;
> +
> + fail:
> + ibuf_free(tmp);
> + return -1;
> }
> Index: rde_update.c
> ===================================================================
> RCS file: /cvs/src/usr.sbin/bgpd/rde_update.c,v
> retrieving revision 1.162
> diff -u -p -r1.162 rde_update.c
> --- rde_update.c 19 Apr 2023 08:30:37 -0000 1.162
> +++ rde_update.c 27 Jun 2023 08:33:54 -0000
> @@ -558,17 +558,15 @@ up_prep_adjout(struct rde_peer *peer, st
>
>
> static int
> -up_generate_attr(u_char *buf, int len, struct rde_peer *peer,
> - struct filterstate *state, uint8_t aid)
> +up_generate_attr(struct ibuf *buf, struct rde_peer *peer,
> + struct rde_aspath *asp, struct rde_community *comm, struct nexthop *nh,
> + uint8_t aid)
> {
> - struct rde_aspath *asp = &state->aspath;
> - struct rde_community *comm = &state->communities;
> struct attr *oa = NULL, *newaggr = NULL;
> u_char *pdata;
> uint32_t tmp32;
> - struct bgpd_addr *nexthop;
> - int flags, r, neednewpath = 0;
> - uint16_t wlen = 0, plen;
> + int flags, neednewpath = 0;
> + uint16_t plen;
> uint8_t oalen = 0, type;
>
> if (asp->others_len > 0)
> @@ -576,8 +574,6 @@ up_generate_attr(u_char *buf, int len, s
>
> /* dump attributes in ascending order */
> for (type = ATTR_ORIGIN; type < 255; type++) {
> - r = 0;
> -
> while (oa && oa->type < type) {
> if (oalen < asp->others_len)
> oa = asp->others[oalen++];
> @@ -590,9 +586,9 @@ up_generate_attr(u_char *buf, int len, s
> * Attributes stored in rde_aspath
> */
> case ATTR_ORIGIN:
> - if ((r = attr_write(buf + wlen, len, ATTR_WELL_KNOWN,
> - ATTR_ORIGIN, &asp->origin, 1)) == -1)
> - return (-1);
> + if (attr_writebuf(buf, ATTR_WELL_KNOWN,
> + ATTR_ORIGIN, &asp->origin, 1) == -1)
> + return -1;
> break;
> case ATTR_ASPATH:
> plen = aspath_length(asp->aspath);
> @@ -602,20 +598,19 @@ up_generate_attr(u_char *buf, int len, s
> pdata = aspath_deflate(pdata, &plen,
> &neednewpath);
>
> - if ((r = attr_write(buf + wlen, len, ATTR_WELL_KNOWN,
> - ATTR_ASPATH, pdata, plen)) == -1)
> - return (-1);
> + if (attr_writebuf(buf, ATTR_WELL_KNOWN,
> + ATTR_ASPATH, pdata, plen) == -1)
Not new, but this return leaks pdata in the !peer_has_as4byte() case.
> + return -1;
> if (!peer_has_as4byte(peer))
> free(pdata);
> break;
> case ATTR_NEXTHOP:
> switch (aid) {
> case AID_INET:
> - nexthop = &state->nexthop->exit_nexthop;
> - if ((r = attr_write(buf + wlen, len,
> - ATTR_WELL_KNOWN, ATTR_NEXTHOP,
> - &nexthop->v4.s_addr, 4)) == -1)
> - return (-1);
> + if (attr_writebuf(buf, ATTR_WELL_KNOWN,
> + ATTR_NEXTHOP, &nh->exit_nexthop.v4,
> + sizeof(nh->exit_nexthop.v4)) == -1)
> + return -1;
> break;
> default:
> break;
> @@ -632,37 +627,29 @@ up_generate_attr(u_char *buf, int len, s
> asp->flags & F_ATTR_MED_ANNOUNCE ||
> peer->flags & PEERFLAG_TRANS_AS)) {
> tmp32 = htonl(asp->med);
> - if ((r = attr_write(buf + wlen, len,
> - ATTR_OPTIONAL, ATTR_MED, &tmp32, 4)) == -1)
> - return (-1);
> + if (attr_writebuf(buf, ATTR_OPTIONAL,
> + ATTR_MED, &tmp32, 4) == -1)
> + return -1;
> }
> break;
> case ATTR_LOCALPREF:
> if (!peer->conf.ebgp) {
> /* local preference, only valid for ibgp */
> tmp32 = htonl(asp->lpref);
> - if ((r = attr_write(buf + wlen, len,
> - ATTR_WELL_KNOWN, ATTR_LOCALPREF, &tmp32,
> - 4)) == -1)
> - return (-1);
> + if (attr_writebuf(buf, ATTR_WELL_KNOWN,
> + ATTR_LOCALPREF, &tmp32, 4) == -1)
> + return -1;
> }
> break;
> /*
> * Communities are stored in struct rde_community
> */
> case ATTR_COMMUNITIES:
> - if ((r = community_write(comm, buf + wlen, len)) == -1)
> - return (-1);
> - break;
> case ATTR_EXT_COMMUNITIES:
> - if ((r = community_ext_write(comm, peer->conf.ebgp,
> - buf + wlen, len)) == -1)
> - return (-1);
> - break;
> case ATTR_LARGE_COMMUNITIES:
> - if ((r = community_large_write(comm, buf + wlen,
> - len)) == -1)
> - return (-1);
> + if (community_writebuf(comm, type, peer->conf.ebgp,
> + buf) == -1)
> + return -1;
> break;
> /*
> * NEW to OLD conversion when sending stuff to a 2byte AS peer
> @@ -675,11 +662,10 @@ up_generate_attr(u_char *buf, int len, s
> flags = ATTR_OPTIONAL|ATTR_TRANSITIVE;
> if (!(asp->flags & F_PREFIX_ANNOUNCED))
> flags |= ATTR_PARTIAL;
> - if (plen == 0)
> - r = 0;
> - else if ((r = attr_write(buf + wlen, len, flags,
> - ATTR_AS4_PATH, pdata, plen)) == -1)
> - return (-1);
> + if (plen != 0)
> + if (attr_writebuf(buf, flags,
> + ATTR_AS4_PATH, pdata, plen) == -1)
> + return -1;
> }
> break;
> case ATTR_AS4_AGGREGATOR:
> @@ -687,10 +673,10 @@ up_generate_attr(u_char *buf, int len, s
> flags = ATTR_OPTIONAL|ATTR_TRANSITIVE;
> if (!(asp->flags & F_PREFIX_ANNOUNCED))
> flags |= ATTR_PARTIAL;
> - if ((r = attr_write(buf + wlen, len, flags,
> + if (attr_writebuf(buf, flags,
> ATTR_AS4_AGGREGATOR, newaggr->data,
> - newaggr->len)) == -1)
> - return (-1);
> + newaggr->len) == -1)
> + return -1;
> }
> break;
> /*
> @@ -712,24 +698,24 @@ up_generate_attr(u_char *buf, int len, s
> case ATTR_ATOMIC_AGGREGATE:
> if (oa == NULL || oa->type != type)
> break;
> - if ((r = attr_write(buf + wlen, len,
> - ATTR_WELL_KNOWN, ATTR_ATOMIC_AGGREGATE,
> - NULL, 0)) == -1)
> - return (-1);
> + if (attr_writebuf(buf, ATTR_WELL_KNOWN,
> + ATTR_ATOMIC_AGGREGATE, NULL, 0) == -1)
> + return -1;
> break;
> case ATTR_AGGREGATOR:
> if (oa == NULL || oa->type != type)
> break;
> + if ((!(oa->flags & ATTR_TRANSITIVE)) &&
> + peer->conf.ebgp)
> + break;
> if (!peer_has_as4byte(peer)) {
> /* need to deflate the aggregator */
> - uint8_t t[6];
> + uint8_t t[6];
> uint16_t tas;
>
> if ((!(oa->flags & ATTR_TRANSITIVE)) &&
> - peer->conf.ebgp) {
> - r = 0;
> + peer->conf.ebgp)
> break;
> - }
>
> memcpy(&tmp32, oa->data, sizeof(tmp32));
> if (ntohl(tmp32) > USHRT_MAX) {
> @@ -742,30 +728,31 @@ up_generate_attr(u_char *buf, int len, s
> memcpy(t + sizeof(tas),
> oa->data + sizeof(tmp32),
> oa->len - sizeof(tmp32));
> - if ((r = attr_write(buf + wlen, len,
> - oa->flags, oa->type, &t, sizeof(t))) == -1)
> - return (-1);
> - break;
> + if (attr_writebuf(buf, oa->flags,
> + oa->type, &t, sizeof(t)) == -1)
> + return -1;
> + } else {
> + if (attr_writebuf(buf, oa->flags, oa->type,
> + oa->data, oa->len) == -1)
> + return -1;
> }
> - /* FALLTHROUGH */
> + break;
> case ATTR_ORIGINATOR_ID:
> case ATTR_CLUSTER_LIST:
> case ATTR_OTC:
> if (oa == NULL || oa->type != type)
> break;
> if ((!(oa->flags & ATTR_TRANSITIVE)) &&
> - peer->conf.ebgp) {
> - r = 0;
> + peer->conf.ebgp)
> break;
> - }
> - if ((r = attr_write(buf + wlen, len,
> - oa->flags, oa->type, oa->data, oa->len)) == -1)
> - return (-1);
> + if (attr_writebuf(buf, oa->flags, oa->type,
> + oa->data, oa->len) == -1)
> + return -1;
> break;
> default:
> if (oa == NULL && type >= ATTR_FIRST_UNKNOWN)
> /* there is no attribute left to dump */
> - goto done;
> + return (0);
>
> if (oa == NULL || oa->type != type)
> break;
> @@ -779,16 +766,12 @@ up_generate_attr(u_char *buf, int len, s
> */
> break;
> }
> - if ((r = attr_write(buf + wlen, len,
> - oa->flags | ATTR_PARTIAL, oa->type,
> - oa->data, oa->len)) == -1)
> - return (-1);
> + if (attr_writebuf(buf, oa->flags | ATTR_PARTIAL,
> + oa->type, oa->data, oa->len) == -1)
> + return -1;
> }
> - wlen += r;
> - len -= r;
> }
> -done:
> - return (wlen);
> + return 0;
> }
>
> /*
> @@ -817,33 +800,42 @@ up_is_eor(struct rde_peer *peer, uint8_t
> /* minimal buffer size > withdraw len + attr len + attr hdr + afi/safi */
> #define MIN_UPDATE_LEN 16
>
> +static void
> +up_prefix_free(struct prefix_tree *prefix_head, struct prefix *p,
> + struct rde_peer *peer, int withdraw)
> +{
> + if (withdraw) {
> + /* prefix no longer needed, remove it */
> + prefix_adjout_destroy(p);
> + peer->stats.prefix_sent_withdraw++;
> + } else {
> + /* prefix still in Adj-RIB-Out, keep it */
> + RB_REMOVE(prefix_tree, prefix_head, p);
> + p->flags &= ~PREFIX_FLAG_UPDATE;
> + peer->stats.pending_update--;
> + peer->stats.prefix_sent_update++;
> + }
> +}
> +
> /*
> * Write prefixes to buffer until either there is no more space or
> * the next prefix has no longer the same ASPATH attributes.
> + * Returns -1 if no prefix was written else 0.
> */
> static int
> -up_dump_prefix(u_char *buf, int len, struct prefix_tree *prefix_head,
> +up_dump_prefix(struct ibuf *buf, struct prefix_tree *prefix_head,
> struct rde_peer *peer, int withdraw)
> {
> struct prefix *p, *np;
> - uint32_t pathid;
> - int r, wpos = 0, done = 0;
> + int done = 0, has_ap = -1, rv = -1;
>
> RB_FOREACH_SAFE(p, prefix_tree, prefix_head, np) {
> - if (peer_has_add_path(peer, p->pt->aid, CAPA_AP_SEND)) {
> - if (len <= wpos + (int)sizeof(pathid))
> - break;
> - pathid = htonl(p->path_id_tx);
> - memcpy(buf + wpos, &pathid, sizeof(pathid));
> - wpos += sizeof(pathid);
> - }
> - if ((r = pt_write(buf + wpos, len - wpos, p->pt,
> - withdraw)) == -1) {
> - if (peer_has_add_path(peer, p->pt->aid, CAPA_AP_SEND))
> - wpos -= sizeof(pathid);
> + if (has_ap == -1)
> + has_ap = peer_has_add_path(peer, p->pt->aid,
> + CAPA_AP_SEND);
> + if (pt_writebuf(buf, p->pt, withdraw, has_ap, p->path_id_tx) ==
> + -1)
> break;
> - }
> - wpos += r;
>
> /* make sure we only dump prefixes which belong together */
> if (np == NULL ||
> @@ -854,276 +846,249 @@ up_dump_prefix(u_char *buf, int len, str
> (np->flags & PREFIX_FLAG_EOR))
> done = 1;
>
> - if (withdraw) {
> - /* prefix no longer needed, remove it */
> - prefix_adjout_destroy(p);
> - peer->stats.prefix_sent_withdraw++;
> - } else {
> - /* prefix still in Adj-RIB-Out, keep it */
> - RB_REMOVE(prefix_tree, prefix_head, p);
> - p->flags &= ~PREFIX_FLAG_UPDATE;
> - peer->stats.pending_update--;
> - peer->stats.prefix_sent_update++;
> - }
> -
> + rv = 0;
> + up_prefix_free(prefix_head, p, peer, withdraw);
> if (done)
> break;
> }
> - return (wpos);
> -}
> -
> -int
> -up_dump_withdraws(u_char *buf, int len, struct rde_peer *peer, uint8_t aid)
> -{
> - uint16_t wpos, wd_len;
> - int r;
> -
> - if (len < MIN_UPDATE_LEN)
> - return (-1);
> -
> - /* reserve space for the length field */
> - wpos = 2;
> - r = up_dump_prefix(buf + wpos, len - wpos, &peer->withdraws[aid],
> - peer, 1);
> - wd_len = htons(r);
> - memcpy(buf, &wd_len, 2);
> -
> - return (wpos + r);
> -}
> -
> -int
> -up_dump_mp_unreach(u_char *buf, int len, struct rde_peer *peer, uint8_t aid)
> -{
> - u_char *attrbuf;
> - int wpos, r;
> - uint16_t attr_len, tmp;
> -
> - if (len < MIN_UPDATE_LEN || RB_EMPTY(&peer->withdraws[aid]))
> - return (-1);
> -
> - /* reserve space for withdraw len, attr len */
> - wpos = 2 + 2;
> - attrbuf = buf + wpos;
> -
> - /* attribute header, defaulting to extended length one */
> - attrbuf[0] = ATTR_OPTIONAL | ATTR_EXTLEN;
> - attrbuf[1] = ATTR_MP_UNREACH_NLRI;
> - wpos += 4;
> -
> - /* afi & safi */
> - if (aid2afi(aid, &tmp, buf + wpos + 2))
> - fatalx("up_dump_mp_unreach: bad AID");
> - tmp = htons(tmp);
> - memcpy(buf + wpos, &tmp, sizeof(uint16_t));
> - wpos += 3;
> -
> - r = up_dump_prefix(buf + wpos, len - wpos, &peer->withdraws[aid],
> - peer, 1);
> - if (r == 0)
> - return (-1);
> - wpos += r;
> - attr_len = r + 3; /* prefixes + afi & safi */
> -
> - /* attribute length */
> - attr_len = htons(attr_len);
> - memcpy(attrbuf + 2, &attr_len, sizeof(attr_len));
> -
> - /* write length fields */
> - memset(buf, 0, sizeof(uint16_t)); /* withdrawn routes len */
> - attr_len = htons(wpos - 4);
> - memcpy(buf + 2, &attr_len, sizeof(attr_len));
> -
> - return (wpos);
> -}
> -
> -int
> -up_dump_attrnlri(u_char *buf, int len, struct rde_peer *peer)
> -{
> - struct filterstate state;
> - struct prefix *p;
> - int r, wpos;
> - uint16_t attr_len;
> -
> - if (len < 2)
> - fatalx("up_dump_attrnlri: buffer way too small");
> - if (len < MIN_UPDATE_LEN)
> - goto done;
> -
> - p = RB_MIN(prefix_tree, &peer->updates[AID_INET]);
> - if (p == NULL)
> - goto done;
> -
> - rde_filterstate_prep(&state, p);
> - r = up_generate_attr(buf + 2, len - 2, peer, &state, AID_INET);
> - rde_filterstate_clean(&state);
> - if (r == -1) {
> - /*
> - * either no packet or not enough space.
> - * The length field needs to be set to zero else it would be
> - * an invalid bgp update.
> - */
> -done:
> - memset(buf, 0, 2);
> - return (2);
> - }
> -
> - /* first dump the 2-byte path attribute length */
> - attr_len = htons(r);
> - memcpy(buf, &attr_len, 2);
> - wpos = 2;
> - /* then skip over the already dumped path attributes themselves */
> - wpos += r;
> -
> - /* last but not least dump the nlri */
> - r = up_dump_prefix(buf + wpos, len - wpos, &peer->updates[AID_INET],
> - peer, 0);
> - wpos += r;
> -
> - return (wpos);
> + return rv;
> }
>
> static int
> -up_generate_mp_reach(u_char *buf, int len, struct rde_peer *peer,
> - struct filterstate *state, uint8_t aid)
> +up_generate_mp_reach(struct ibuf *buf, struct rde_peer *peer,
> + struct nexthop *nh, uint8_t aid)
> {
> - struct bgpd_addr *nexthop;
> - u_char *attrbuf;
> - int r, wpos, attrlen;
> - uint16_t tmp;
> + struct bgpd_addr *nexthop;
> + size_t off;
> + uint16_t len, afi;
> + uint8_t safi;
>
> - if (len < 4)
> - return (-1);
> /* attribute header, defaulting to extended length one */
> - buf[0] = ATTR_OPTIONAL | ATTR_EXTLEN;
> - buf[1] = ATTR_MP_REACH_NLRI;
> - wpos = 4;
> - attrbuf = buf + wpos;
> + if (ibuf_add_n8(buf, ATTR_OPTIONAL | ATTR_EXTLEN) == -1)
> + return -1;
> + if (ibuf_add_n8(buf, ATTR_MP_REACH_NLRI) == -1)
> + return -1;
> + off = ibuf_size(buf);
> + if (ibuf_add_zero(buf, sizeof(len)) == -1)
> + return -1;
> +
> + if (aid2afi(aid, &afi, &safi))
> + fatalx("up_generate_mp_reach: bad AID");
> +
> + /* AFI + SAFI + NH LEN + NH + Reserved */
> + if (ibuf_add_n16(buf, afi) == -1)
> + return -1;
> + if (ibuf_add_n8(buf, safi) == -1)
> + return -1;
>
> switch (aid) {
> case AID_INET6:
> - attrlen = 21; /* AFI + SAFI + NH LEN + NH + Reserved */
> - if (len < wpos + attrlen)
> - return (-1);
> - wpos += attrlen;
> - if (aid2afi(aid, &tmp, &attrbuf[2]))
> - fatalx("up_generate_mp_reach: bad AID");
> - tmp = htons(tmp);
> - memcpy(attrbuf, &tmp, sizeof(tmp));
> - attrbuf[3] = sizeof(struct in6_addr);
> - attrbuf[20] = 0; /* Reserved must be 0 */
> -
> + /* NH LEN */
> + if (ibuf_add_n8(buf, sizeof(struct in6_addr)) == -1)
> + return -1;
> /* write nexthop */
> - attrbuf += 4;
> - nexthop = &state->nexthop->exit_nexthop;
> - memcpy(attrbuf, &nexthop->v6, sizeof(struct in6_addr));
> + nexthop = &nh->exit_nexthop;
> + if (ibuf_add(buf, &nexthop->v6, sizeof(struct in6_addr)) == -1)
> + return -1;
> break;
> case AID_VPN_IPv4:
> - attrlen = 17; /* AFI + SAFI + NH LEN + NH + Reserved */
> - if (len < wpos + attrlen)
> - return (-1);
> - wpos += attrlen;
> - if (aid2afi(aid, &tmp, &attrbuf[2]))
> - fatalx("up_generate_mp_reachi: bad AID");
> - tmp = htons(tmp);
> - memcpy(attrbuf, &tmp, sizeof(tmp));
> - attrbuf[3] = sizeof(uint64_t) + sizeof(struct in_addr);
> - memset(attrbuf + 4, 0, sizeof(uint64_t));
> - attrbuf[16] = 0; /* Reserved must be 0 */
> -
> + /* NH LEN */
> + if (ibuf_add_n8(buf,
> + sizeof(uint64_t) + sizeof(struct in_addr)) == -1)
> + return -1;
> + /* write zero rd */
> + if (ibuf_add_zero(buf, sizeof(uint64_t)) == -1)
> + return -1;
> /* write nexthop */
> - attrbuf += 12;
> - nexthop = &state->nexthop->exit_nexthop;
> - memcpy(attrbuf, &nexthop->v4, sizeof(struct in_addr));
> + nexthop = &nh->exit_nexthop;
> + if (ibuf_add(buf, &nexthop->v4, sizeof(struct in_addr)) == -1)
> + return -1;
> break;
> case AID_VPN_IPv6:
> - attrlen = 29; /* AFI + SAFI + NH LEN + NH + Reserved */
> - if (len < wpos + attrlen)
> - return (-1);
> - wpos += attrlen;
> - if (aid2afi(aid, &tmp, &attrbuf[2]))
> - fatalx("up_generate_mp_reachi: bad AID");
> - tmp = htons(tmp);
> - memcpy(attrbuf, &tmp, sizeof(tmp));
> - attrbuf[3] = sizeof(uint64_t) + sizeof(struct in6_addr);
> - memset(attrbuf + 4, 0, sizeof(uint64_t));
> - attrbuf[28] = 0; /* Reserved must be 0 */
> -
> + /* NH LEN */
> + if (ibuf_add_n8(buf,
> + sizeof(uint64_t) + sizeof(struct in6_addr)) == -1)
> + return -1;
> + /* write zero rd */
> + if (ibuf_add_zero(buf, sizeof(uint64_t)) == -1)
> + return -1;
> /* write nexthop */
> - attrbuf += 12;
> - nexthop = &state->nexthop->exit_nexthop;
> - memcpy(attrbuf, &nexthop->v6, sizeof(struct in6_addr));
> + nexthop = &nh->exit_nexthop;
> + if (ibuf_add(buf, &nexthop->v6, sizeof(struct in6_addr)) == -1)
> + return -1;
> break;
> case AID_FLOWSPECv4:
> case AID_FLOWSPECv6:
> - attrlen = 5; /* AFI + SAFI + NH LEN + NH + Reserved */
> - if (len < wpos + attrlen)
> - return (-1);
> - wpos += attrlen;
> - if (aid2afi(aid, &tmp, &attrbuf[2]))
> - fatalx("up_generate_mp_reachi: bad AID");
> - tmp = htons(tmp);
> - memcpy(attrbuf, &tmp, sizeof(tmp));
> - attrbuf[3] = 0; /* nh len MUST be 0 */
> - attrbuf[4] = 0; /* Reserved must be 0 */
> + if (ibuf_add_zero(buf, 1) == -1) /* NH LEN MUST be 0 */
> + return -1;
> + /* no NH */
> break;
> default:
> fatalx("up_generate_mp_reach: unknown AID");
> }
>
> - r = up_dump_prefix(buf + wpos, len - wpos, &peer->updates[aid],
> - peer, 0);
> - if (r == 0) {
> - /* no prefixes written ... */
> + if (ibuf_add_zero(buf, 1) == -1) /* Reserved must be 0 */
> + return -1;
> +
> + if (up_dump_prefix(buf, &peer->updates[aid], peer, 0) == -1)
> + /* no prefixes written, fail update */
> return (-1);
> - }
> - attrlen += r;
> - wpos += r;
> - /* update attribute length field */
> - tmp = htons(attrlen);
> - memcpy(buf + 2, &tmp, sizeof(tmp));
>
> - return (wpos);
> + /* update MP_REACH attribute length field */
> + len = ibuf_size(buf) - off - sizeof(len);
> + if (ibuf_set_n16(buf, off, len) == -1)
> + return -1;
> +
> + return 0;
> }
>
> +/*
> + * Generate UPDATE message containing either just withdraws or updates.
> + * UPDATE messages are contructed like this:
> + *
> + * +-----------------------------------------------------+
> + * | Withdrawn Routes Length (2 octets) |
> + * +-----------------------------------------------------+
> + * | Withdrawn Routes (variable) |
> + * +-----------------------------------------------------+
> + * | Total Path Attribute Length (2 octets) |
> + * +-----------------------------------------------------+
> + * | Path Attributes (variable) |
> + * +-----------------------------------------------------+
> + * | Network Layer Reachability Information (variable) |
> + * +-----------------------------------------------------+
> + *
> + * Multiprotocol messages use MP_REACH_NLRI and MP_UNREACH_NLRI
> + * the latter will be the only path attribute in a message.
> + */
> +
> +/*
> + * Write UPDATE message for withdrawn routes. The size of buf limits
> + * how may routes can be added. Return 0 on success -1 on error which
> + * includes generating an empty withdraw message.
> + */
> int
> -up_dump_mp_reach(u_char *buf, int len, struct rde_peer *peer, uint8_t aid)
> +up_dump_withdraws(struct ibuf *buf, struct rde_peer *peer, uint8_t aid)
> {
> - struct filterstate state;
> - struct prefix *p;
> - int r, wpos;
> - uint16_t attr_len;
> + size_t off;
> + uint16_t afi, len;
> + uint8_t safi;
> +
> + /* reserve space for the withdrawn routes length field */
> + off = ibuf_size(buf);
> + if (ibuf_add_zero(buf, sizeof(len)) == -1)
> + return -1;
> +
> + if (aid != AID_INET) {
> + /* reserve space for 2-byte path attribute length */
> + off = ibuf_size(buf);
> + if (ibuf_add_zero(buf, sizeof(len)) == -1)
> + return -1;
> +
> + /* attribute header, defaulting to extended length one */
> + if (ibuf_add_n8(buf, ATTR_OPTIONAL | ATTR_EXTLEN) == -1)
> + return -1;
> + if (ibuf_add_n8(buf, ATTR_MP_UNREACH_NLRI) == -1)
> + return -1;
> + if (ibuf_add_zero(buf, sizeof(len)) == -1)
> + return -1;
> +
> + /* afi & safi */
> + if (aid2afi(aid, &afi, &safi))
> + fatalx("up_dump_mp_unreach: bad AID");
> + if (ibuf_add_n16(buf, afi) == -1)
> + return -1;
> + if (ibuf_add_n8(buf, safi) == -1)
> + return -1;
> + }
> +
> + if (up_dump_prefix(buf, &peer->withdraws[aid], peer, 1) == -1)
> + return -1;
>
> - if (len < MIN_UPDATE_LEN)
> - return 0;
> + /* update length field (either withdrawn routes or attribute length) */
> + len = ibuf_size(buf) - off - sizeof(len);
> + if (ibuf_set_n16(buf, off, len) == -1)
> + return -1;
> +
> + if (aid != AID_INET) {
> + /* write MP_UNREACH_NLRI attribute length (always extended) */
> + len -= 4; /* skip attribute header */
> + if (ibuf_set_n16(buf, off + sizeof(len) + 2, len) == -1)
> + return -1;
> + } else {
> + /* no extra attributes so set attribute len to 0 */
> + if (ibuf_add_zero(buf, sizeof(len)) == -1)
> + return -1;
> + }
> +
> + return 0;
> +}
> +
> +/*
> + * Write UPDATE message for changed and added routes. The size of buf limits
> + * how may routes can be added. The function first dumps the path attributes
> + * and then tries to add as many prefixes using these attributes.
> + * Return 0 on success -1 on error which includes producing an empty message.
> + */
> +int
> +up_dump_update(struct ibuf *buf, struct rde_peer *peer, uint8_t aid)
> +{
> + struct bgpd_addr addr;
> + struct prefix *p;
> + size_t off;
> + uint16_t len;
>
> - /* get starting point */
> p = RB_MIN(prefix_tree, &peer->updates[aid]);
> if (p == NULL)
> - return 0;
> + return -1;
> +
> + /* withdrawn routes length field is 0 */
> + if (ibuf_add_zero(buf, sizeof(len)) == -1)
> + return -1;
> +
> + /* reserve space for 2-byte path attribute length */
> + off = ibuf_size(buf);
> + if (ibuf_add_zero(buf, sizeof(len)) == -1)
> + return -1;
> +
> + if (up_generate_attr(buf, peer, prefix_aspath(p),
> + prefix_communities(p), prefix_nexthop(p), aid) == -1)
> + goto fail;
>
> - wpos = 4; /* reserve space for length fields */
> + if (aid != AID_INET) {
> + /* write mp attribute including nlri */
>
> - rde_filterstate_prep(&state, p);
> + /*
> + * RFC 7606 wants this to be first but then we need
> + * to use multiple buffers with adjusted length to
> + * merge the attributes together in reverse order of
> + * creation.
> + */
> + if (up_generate_mp_reach(buf, peer, prefix_nexthop(p), aid) ==
> + -1)
> + goto fail;
> + }
>
> - /* write regular path attributes */
> - r = up_generate_attr(buf + wpos, len - wpos, peer, &state, aid);
> - if (r == -1) {
> - rde_filterstate_clean(&state);
> - return 0;
> + /* update attribute length field */
> + len = ibuf_size(buf) - off - sizeof(len);
> + if (ibuf_set_n16(buf, off, len) == -1)
> + return -1;
> +
> + if (aid == AID_INET) {
> + /* last but not least dump the IPv4 nlri */
> + if (up_dump_prefix(buf, &peer->updates[aid], peer, 0) == -1)
> + goto fail;
> }
> - wpos += r;
>
> - /* write mp attribute */
> - r = up_generate_mp_reach(buf + wpos, len - wpos, peer, &state, aid);
> - rde_filterstate_clean(&state);
> - if (r == -1)
> - return 0;
> - wpos += r;
> -
> - /* write length fields */
> - memset(buf, 0, sizeof(uint16_t)); /* withdrawn routes len */
> - attr_len = htons(wpos - 4);
> - memcpy(buf + 2, &attr_len, sizeof(attr_len));
> + return 0;
>
> - return (wpos);
> +fail:
> + /* Not enough space. Drop prefix, it will never fit. */
> + pt_getaddr(p->pt, &addr);
> + log_peer_warnx(&peer->conf, "path attributes to large, "
> + "prefix %s/%d dropped", log_addr(&addr), p->pt->prefixlen);
> +
> + up_prefix_free(&peer->updates[AID_INET], p, peer, 0);
> + /* XXX should probably send a withdraw for this prefix */
> + return -1;
> }
> Index: session.c
> ===================================================================
> RCS file: /cvs/src/usr.sbin/bgpd/session.c,v
> retrieving revision 1.445
> diff -u -p -r1.445 session.c
> --- session.c 25 May 2023 14:20:25 -0000 1.445
> +++ session.c 27 Jun 2023 08:33:54 -0000
> @@ -1383,7 +1383,7 @@ session_sendmsg(struct bgp_msg *msg, str
> if ((mrt->peer_id == 0 && mrt->group_id == 0) ||
> mrt->peer_id == p->conf.id || (mrt->group_id != 0 &&
> mrt->group_id == p->conf.groupid))
> - mrt_dump_bgp_msg(mrt, msg->buf->buf, msg->len, p,
> + mrt_dump_bgp_msg(mrt, ibuf_data(msg->buf), msg->len, p,
> msg->type);
> }
>
> @@ -1589,7 +1589,7 @@ session_open(struct peer *p)
> uint8_t op_len = optparamlen;
> errs += ibuf_add(buf->buf, &op_len, 1);
> }
> - errs += ibuf_add(buf->buf, opb->buf, ibuf_size(opb));
> + errs += ibuf_add_buf(buf->buf, opb);
> }
>
> ibuf_free(opb);
>