Sorry this diff is a monster but it kind of turned into a all or nothing
situation.

For a long time bgpd used a static 4k buffer plus a len argument to build
updates. The result was ok but was also very fragile.
With the new ibuf API all of this can be rewritten and the result is IMO
already a lot cleaner (mainly because a lot of code duplication could be
removed).

Some bits I'm not super happy with are community_writebuf() and
pt_writebuf(). Both functions are a bit too complex to my taste but for
different reasons. pt_writebuf() needs to be transactional (either all or
nothing) since we call pt_writebuf() until there is no more space.
community_writebuf() does maybe too much at once.

As mentioned this is a major rewrite, I did run this through regress and
also on a few personal systems but I'm unable to test all possible cases.
Please try this out and report back.
-- 
:wq Claudio

Index: mrt.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/mrt.c,v
retrieving revision 1.114
diff -u -p -r1.114 mrt.c
--- mrt.c       19 Apr 2023 09:03:00 -0000      1.114
+++ mrt.c       27 Jun 2023 08:33:54 -0000
@@ -309,7 +309,9 @@ mrt_attr_dump(struct ibuf *buf, struct r
                return (-1);
 
        /* communities */
-       if (community_writebuf(buf, c) == -1)
+       if (community_writebuf(c, ATTR_COMMUNITIES, 0, buf) == -1 ||
+           community_writebuf(c, ATTR_EXT_COMMUNITIES, 0, buf) == -1 ||
+           community_writebuf(c, ATTR_LARGE_COMMUNITIES, 0, buf) == -1)
                return (-1);
 
        /* dump all other path attributes without modification */
@@ -502,7 +504,7 @@ mrt_dump_entry_mp(struct mrt *mrt, struc
                goto fail;
        }
 
-       if (pt_writebuf(h2buf, p->pt) == -1) {
+       if (pt_writebuf(h2buf, p->pt, 0, 0, 0) == -1) {
                log_warnx("%s: pt_writebuf error", __func__);
                goto fail;
        }
@@ -678,7 +680,7 @@ mrt_dump_entry_v2_rib(struct rib_entry *
                }
                len = ibuf_size(tbuf);
                DUMP_SHORT(buf, (uint16_t)len);
-               if (ibuf_add(buf, tbuf->buf, len) == -1) {
+               if (ibuf_add_buf(buf, tbuf) == -1) {
                        log_warn("%s: ibuf_add error", __func__);
                        ibuf_free(tbuf);
                        goto fail;
@@ -731,7 +733,7 @@ mrt_dump_entry_v2(struct mrt *mrt, struc
                break;
        }
 
-       if (pt_writebuf(pbuf, re->prefix) == -1) {
+       if (pt_writebuf(pbuf, re->prefix, 0, 0, 0) == -1) {
                log_warnx("%s: pt_writebuf error", __func__);
                goto fail;
        }
@@ -748,7 +750,7 @@ mrt_dump_entry_v2(struct mrt *mrt, struc
                        goto fail;
 
                DUMP_LONG(hbuf, snum);
-               if (ibuf_add(hbuf, pbuf->buf, ibuf_size(pbuf)) == -1) {
+               if (ibuf_add_buf(hbuf, pbuf) == -1) {
                        log_warn("%s: ibuf_add error", __func__);
                        goto fail;
                }
@@ -767,7 +769,7 @@ mrt_dump_entry_v2(struct mrt *mrt, struc
                        goto fail;
 
                DUMP_LONG(hbuf, snum);
-               if (ibuf_add(hbuf, pbuf->buf, ibuf_size(pbuf)) == -1) {
+               if (ibuf_add_buf(hbuf, pbuf) == -1) {
                        log_warn("%s: ibuf_add error", __func__);
                        goto fail;
                }
@@ -833,8 +835,8 @@ mrt_dump_v2_hdr(struct mrt *mrt, struct 
        }
 
        off = ibuf_size(buf);
-       if (ibuf_reserve(buf, sizeof(nump)) == NULL) {
-               log_warn("%s: ibuf_reserve error", __func__);
+       if (ibuf_add_zero(buf, sizeof(nump)) == -1) {
+               log_warn("%s: ibuf_add_zero error", __func__);
                goto fail;
        }
        arg.nump = 0;
@@ -843,8 +845,10 @@ mrt_dump_v2_hdr(struct mrt *mrt, struct 
        if (arg.nump == -1)
                goto fail;
 
-       nump = htons(arg.nump);
-       memcpy(ibuf_seek(buf, off, sizeof(nump)), &nump, sizeof(nump));
+       if (ibuf_set_n16(buf, off, arg.nump) == -1) {
+               log_warn("%s: ibuf_set_n16 error", __func__);
+               goto fail;
+       }
 
        len = ibuf_size(buf);
        if (mrt_dump_hdr_rde(&hbuf, MSG_TABLE_DUMP_V2,
@@ -1099,14 +1103,8 @@ mrt_write(struct mrt *mrt)
 void
 mrt_clean(struct mrt *mrt)
 {
-       struct ibuf     *b;
-
        close(mrt->wbuf.fd);
-       while ((b = TAILQ_FIRST(&mrt->wbuf.bufs))) {
-               TAILQ_REMOVE(&mrt->wbuf.bufs, b, entry);
-               ibuf_free(b);
-       }
-       mrt->wbuf.queued = 0;
+       msgbuf_clear(&mrt->wbuf);
 }
 
 static struct imsgbuf  *mrt_imsgbuf[2];
Index: rde.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde.c,v
retrieving revision 1.606
diff -u -p -r1.606 rde.c
--- rde.c       12 Jun 2023 12:48:07 -0000      1.606
+++ rde.c       27 Jun 2023 08:33:54 -0000
@@ -86,8 +86,7 @@ static void    rde_rpki_reload(void);
 static int      rde_roa_reload(void);
 static int      rde_aspa_reload(void);
 int             rde_update_queue_pending(void);
-void            rde_update_queue_runner(void);
-void            rde_update6_queue_runner(uint8_t);
+void            rde_update_queue_runner(uint8_t);
 struct rde_prefixset *rde_find_prefixset(char *, struct rde_prefixset_head *);
 void            rde_mark_prefixsets_dirty(struct rde_prefixset_head *,
                    struct rde_prefixset_head *);
@@ -310,9 +309,8 @@ rde_main(int debug, int verbose)
                rib_dump_runner();
                nexthop_runner();
                if (ibuf_se && ibuf_se->w.queued < SESS_MSG_HIGH_MARK) {
-                       rde_update_queue_runner();
-                       for (aid = AID_INET6; aid < AID_MAX; aid++)
-                               rde_update6_queue_runner(aid);
+                       for (aid = AID_MIN; aid < AID_MAX; aid++)
+                               rde_update_queue_runner(aid);
                }
                /* commit pftable once per poll loop */
                rde_commit_pftable();
@@ -3456,13 +3454,13 @@ rde_update_queue_pending(void)
 }
 
 void
-rde_update_queue_runner(void)
+rde_update_queue_runner(uint8_t aid)
 {
        struct rde_peer         *peer;
-       int                      r, sent, max = RDE_RUNNER_ROUNDS, eor;
-       uint16_t                 len, wpos;
+       struct ibuf             *buf;
+       int                      sent, max = RDE_RUNNER_ROUNDS;
 
-       len = sizeof(queue_buf) - MSGSIZE_HEADER;
+       /* first withdraws ... */
        do {
                sent = 0;
                RB_FOREACH(peer, peer_tree, &peertable) {
@@ -3472,80 +3470,26 @@ rde_update_queue_runner(void)
                                continue;
                        if (peer->throttled)
                                continue;
-                       eor = 0;
-                       wpos = 0;
-                       /* first withdraws, save 2 bytes for path attributes */
-                       if ((r = up_dump_withdraws(queue_buf, len - 2, peer,
-                           AID_INET)) == -1)
+                       if (RB_EMPTY(&peer->withdraws[aid]))
                                continue;
-                       wpos += r;
 
-                       /* now bgp path attributes unless it is the EoR mark */
-                       if (up_is_eor(peer, AID_INET)) {
-                               eor = 1;
-                               memset(queue_buf + wpos, 0, 2);
-                               wpos += 2;
-                       } else {
-                               r = up_dump_attrnlri(queue_buf + wpos,
-                                   len - wpos, peer);
-                               wpos += r;
-                       }
-
-                       /* finally send message to SE */
-                       if (wpos > 4) {
-                               if (imsg_compose(ibuf_se, IMSG_UPDATE,
-                                   peer->conf.id, 0, -1, queue_buf,
-                                   wpos) == -1)
-                                       fatal("%s %d imsg_compose error",
-                                           __func__, __LINE__);
-                               sent++;
-                       }
-                       if (eor) {
-                               int sent_eor = peer->sent_eor & (1 << AID_INET);
-                               if (peer->capa.grestart.restart && !sent_eor)
-                                       rde_peer_send_eor(peer, AID_INET);
-                               if (peer->capa.enhanced_rr && sent_eor)
-                                       rde_peer_send_rrefresh(peer, AID_INET,
-                                           ROUTE_REFRESH_END_RR);
-                       }
-               }
-               max -= sent;
-       } while (sent != 0 && max > 0);
-}
-
-void
-rde_update6_queue_runner(uint8_t aid)
-{
-       struct rde_peer         *peer;
-       int                      r, sent, max = RDE_RUNNER_ROUNDS / 2;
-       uint16_t                 len;
-
-       /* first withdraws ... */
-       do {
-               sent = 0;
-               RB_FOREACH(peer, peer_tree, &peertable) {
-                       if (peer->conf.id == 0)
-                               continue;
-                       if (peer->state != PEER_UP)
+                       if ((buf = ibuf_dynamic(4, 4096 - MSGSIZE_HEADER)) ==
+                           NULL)
+                               fatal("%s", __func__);
+                       if (up_dump_withdraws(buf, peer, aid) == -1) {
+                               ibuf_free(buf);
                                continue;
-                       if (peer->throttled)
-                               continue;
-                       len = sizeof(queue_buf) - MSGSIZE_HEADER;
-                       r = up_dump_mp_unreach(queue_buf, len, peer, aid);
-                       if (r == -1)
-                               continue;
-                       /* finally send message to SE */
-                       if (imsg_compose(ibuf_se, IMSG_UPDATE, peer->conf.id,
-                           0, -1, queue_buf, r) == -1)
-                               fatal("%s %d imsg_compose error", __func__,
-                                   __LINE__);
+                       }
+                       if (imsg_compose_ibuf(ibuf_se, IMSG_UPDATE,
+                           peer->conf.id, 0, buf) == -1)
+                               fatal("%s: imsg_create error", __func__);
                        sent++;
                }
                max -= sent;
        } while (sent != 0 && max > 0);
 
        /* ... then updates */
-       max = RDE_RUNNER_ROUNDS / 2;
+       max = RDE_RUNNER_ROUNDS;
        do {
                sent = 0;
                RB_FOREACH(peer, peer_tree, &peertable) {
@@ -3555,7 +3499,9 @@ rde_update6_queue_runner(uint8_t aid)
                                continue;
                        if (peer->throttled)
                                continue;
-                       len = sizeof(queue_buf) - MSGSIZE_HEADER;
+                       if (RB_EMPTY(&peer->updates[aid]))
+                               continue;
+
                        if (up_is_eor(peer, aid)) {
                                int sent_eor = peer->sent_eor & (1 << aid);
                                if (peer->capa.grestart.restart && !sent_eor)
@@ -3565,15 +3511,17 @@ rde_update6_queue_runner(uint8_t aid)
                                            ROUTE_REFRESH_END_RR);
                                continue;
                        }
-                       r = up_dump_mp_reach(queue_buf, len, peer, aid);
-                       if (r == 0)
-                               continue;
 
-                       /* finally send message to SE */
-                       if (imsg_compose(ibuf_se, IMSG_UPDATE, peer->conf.id,
-                           0, -1, queue_buf, r) == -1)
-                               fatal("%s %d imsg_compose error", __func__,
-                                   __LINE__);
+                       if ((buf = ibuf_dynamic(4, 4096 - MSGSIZE_HEADER)) ==
+                           NULL)
+                               fatal("%s", __func__);
+                       if (up_dump_update(buf, peer, aid) == -1) {
+                               ibuf_free(buf);
+                               continue;
+                       }
+                       if (imsg_compose_ibuf(ibuf_se, IMSG_UPDATE,
+                           peer->conf.id, 0, buf) == -1)
+                               fatal("%s: imsg_compose_ibuf error", __func__);
                        sent++;
                }
                max -= sent;
Index: rde.h
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde.h,v
retrieving revision 1.294
diff -u -p -r1.294 rde.h
--- rde.h       12 Jun 2023 12:48:07 -0000      1.294
+++ rde.h       27 Jun 2023 08:33:54 -0000
@@ -447,10 +447,7 @@ int        community_add(struct rde_community *
 int    community_large_add(struct rde_community *, int, void *, size_t);
 int    community_ext_add(struct rde_community *, int, int, void *, size_t);
 
-int    community_write(struct rde_community *, void *, uint16_t);
-int    community_large_write(struct rde_community *, void *, uint16_t);
-int    community_ext_write(struct rde_community *, int, void *, uint16_t);
-int    community_writebuf(struct ibuf *, struct rde_community *);
+int    community_writebuf(struct rde_community *, uint8_t, int, struct ibuf *);
 
 void                    communities_shutdown(void);
 struct rde_community   *communities_lookup(struct rde_community *);
@@ -519,8 +516,7 @@ struct pt_entry     *pt_add_flow(struct flow
 void    pt_remove(struct pt_entry *);
 struct pt_entry        *pt_lookup(struct bgpd_addr *);
 int     pt_prefix_cmp(const struct pt_entry *, const struct pt_entry *);
-int     pt_write(u_char *, int, struct pt_entry *, int);
-int     pt_writebuf(struct ibuf *, struct pt_entry *);
+int     pt_writebuf(struct ibuf *, struct pt_entry *, int, int, uint32_t);
 
 static inline struct pt_entry *
 pt_ref(struct pt_entry *pt)
@@ -710,10 +706,8 @@ void                up_generate_addpath_all(struct rd
                    struct prefix *, struct prefix *);
 void            up_generate_default(struct rde_peer *, uint8_t);
 int             up_is_eor(struct rde_peer *, uint8_t);
-int             up_dump_withdraws(u_char *, int, struct rde_peer *, uint8_t);
-int             up_dump_mp_unreach(u_char *, int, struct rde_peer *, uint8_t);
-int             up_dump_attrnlri(u_char *, int, struct rde_peer *);
-int             up_dump_mp_reach(u_char *, int, struct rde_peer *, uint8_t);
+int             up_dump_withdraws(struct ibuf *, struct rde_peer *, uint8_t);
+int             up_dump_update(struct ibuf *, struct rde_peer *, uint8_t);
 
 /* rde_aspa.c */
 void            aspa_validation(struct rde_aspa *, struct aspath *,
Index: rde_attr.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde_attr.c,v
retrieving revision 1.133
diff -u -p -r1.133 rde_attr.c
--- rde_attr.c  12 Jun 2023 12:10:17 -0000      1.133
+++ rde_attr.c  27 Jun 2023 08:33:54 -0000
@@ -30,42 +30,6 @@
 #include "log.h"
 
 int
-attr_write(void *p, uint16_t p_len, uint8_t flags, uint8_t type,
-    void *data, uint16_t data_len)
-{
-       u_char          *b = p;
-       uint16_t         tmp, tot_len = 2; /* attribute header (without len) */
-
-       flags &= ~ATTR_DEFMASK;
-       if (data_len > 255) {
-               tot_len += 2 + data_len;
-               flags |= ATTR_EXTLEN;
-       } else {
-               tot_len += 1 + data_len;
-       }
-
-       if (tot_len > p_len)
-               return (-1);
-
-       *b++ = flags;
-       *b++ = type;
-       if (data_len > 255) {
-               tmp = htons(data_len);
-               memcpy(b, &tmp, sizeof(tmp));
-               b += 2;
-       } else
-               *b++ = (u_char)data_len;
-
-       if (data == NULL)
-               return (tot_len - data_len);
-
-       if (data_len != 0)
-               memcpy(b, data, data_len);
-
-       return (tot_len);
-}
-
-int
 attr_writebuf(struct ibuf *buf, uint8_t flags, uint8_t type, void *data,
     uint16_t data_len)
 {
Index: rde_community.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde_community.c,v
retrieving revision 1.12
diff -u -p -r1.12 rde_community.c
--- rde_community.c     17 Jun 2023 08:05:48 -0000      1.12
+++ rde_community.c     27 Jun 2023 09:37:35 -0000
@@ -225,10 +225,9 @@ insert_community(struct rde_community *c
                struct community *new;
                int newsize = comm->size + 8;
 
-               if ((new = reallocarray(comm->communities, newsize,
-                   sizeof(struct community))) == NULL)
+               if ((new = recallocarray(comm->communities, comm->size,
+                   newsize, sizeof(struct community))) == NULL)
                        fatal(__func__);
-               memset(new + comm->size, 0, 8 * sizeof(struct community));
                comm->communities = new;
                comm->size = newsize;
        }
@@ -256,6 +255,8 @@ insert_community(struct rde_community *c
 static int
 non_transitive_ext_community(struct community *c)
 {
+       if ((uint8_t)c->flags != COMMUNITY_TYPE_EXT)
+               return 0;
        if ((c->data3 >> 8) & EXT_COMMUNITY_NON_TRANSITIVE)
                return 1;
        return 0;
@@ -514,228 +515,79 @@ community_ext_add(struct rde_community *
 
 /*
  * Convert communities back to the wireformat.
- * This function needs to make sure that the attribute buffer is overflowed
- * while writing out the communities.
- * - community_write for ATTR_COMMUNITUES
- * - community_large_write for ATTR_LARGE_COMMUNITIES
- * - community_ext_write for ATTR_EXT_COMMUNITIES
- *   When writing ATTR_EXT_COMMUNITIES non-transitive communities need to
- *   be skipped if it is sent to an ebgp peer.
+ * When writing ATTR_EXT_COMMUNITIES non-transitive communities need to
+ * be skipped if they are sent to an ebgp peer.
  */
 int
-community_write(struct rde_community *comm, void *buf, uint16_t len)
+community_writebuf(struct rde_community *comm, uint8_t type, int ebgp,
+    struct ibuf *buf)
 {
-       uint8_t *b = buf;
-       uint16_t c;
-       size_t n = 0;
-       int l, r, flags = ATTR_OPTIONAL | ATTR_TRANSITIVE;
-
-       if (comm->flags & PARTIAL_COMMUNITIES)
-               flags |= ATTR_PARTIAL;
-
-       /* first count how many communities will be written */
-       for (l = 0; l < comm->nentries; l++)
-               if ((uint8_t)comm->communities[l].flags ==
-                   COMMUNITY_TYPE_BASIC)
-                       n++;
-
-       if (n == 0)
-               return 0;
+       struct community *cp;
+       uint64_t ext;
+       int l, size, start = -1, end, num = 0;
+       int flags = ATTR_OPTIONAL | ATTR_TRANSITIVE;
+       uint8_t t;
 
-       /* write attribute header */
-       r = attr_write(b, len, flags, ATTR_COMMUNITIES, NULL, n * 4);
-       if (r == -1)
+       switch (type) {
+       case ATTR_COMMUNITIES:
+               if (comm->flags & PARTIAL_COMMUNITIES)
+                       flags |= ATTR_PARTIAL;
+               size = 4;
+               t = COMMUNITY_TYPE_BASIC;
+               break;
+       case ATTR_EXT_COMMUNITIES:
+               if (comm->flags & PARTIAL_EXT_COMMUNITIES)
+                       flags |= ATTR_PARTIAL;
+               size = 8;
+               t = COMMUNITY_TYPE_EXT;
+               break;
+       case ATTR_LARGE_COMMUNITIES:
+               if (comm->flags & PARTIAL_LARGE_COMMUNITIES)
+                       flags |= ATTR_PARTIAL;
+               size = 12;
+               t = COMMUNITY_TYPE_LARGE;
+               break;
+       default:
                return -1;
-       b += r;
-
-       /* write out the communities */
-       for (l = 0; l < comm->nentries; l++)
-               if ((uint8_t)comm->communities[l].flags ==
-                   COMMUNITY_TYPE_BASIC) {
-                       c = htons(comm->communities[l].data1);
-                       memcpy(b, &c, sizeof(c));
-                       b += sizeof(c);
-                       r += sizeof(c);
-
-                       c = htons(comm->communities[l].data2);
-                       memcpy(b, &c, sizeof(c));
-                       b += sizeof(c);
-                       r += sizeof(c);
-               }
-
-       return r;
-}
-
-int
-community_large_write(struct rde_community *comm, void *buf, uint16_t len)
-{
-       uint8_t *b = buf;
-       uint32_t c;
-       size_t n = 0;
-       int l, r, flags = ATTR_OPTIONAL | ATTR_TRANSITIVE;
-
-       if (comm->flags & PARTIAL_LARGE_COMMUNITIES)
-               flags |= ATTR_PARTIAL;
+       }
 
        /* first count how many communities will be written */
-       for (l = 0; l < comm->nentries; l++)
-               if ((uint8_t)comm->communities[l].flags ==
-                   COMMUNITY_TYPE_LARGE)
-                       n++;
-
-       if (n == 0)
-               return 0;
-
-       /* write attribute header */
-       r = attr_write(b, len, flags, ATTR_LARGE_COMMUNITIES, NULL, n * 12);
-       if (r == -1)
-               return -1;
-       b += r;
+       for (l = 0; l < comm->nentries; l++) {
+               cp = comm->communities + l;
 
-       /* write out the communities */
-       for (l = 0; l < comm->nentries; l++)
-               if ((uint8_t)comm->communities[l].flags ==
-                   COMMUNITY_TYPE_LARGE) {
-                       c = htonl(comm->communities[l].data1);
-                       memcpy(b, &c, sizeof(c));
-                       b += sizeof(c);
-                       r += sizeof(c);
-
-                       c = htonl(comm->communities[l].data2);
-                       memcpy(b, &c, sizeof(c));
-                       b += sizeof(c);
-                       r += sizeof(c);
-
-                       c = htonl(comm->communities[l].data3);
-                       memcpy(b, &c, sizeof(c));
-                       b += sizeof(c);
-                       r += sizeof(c);
+               if (ebgp && non_transitive_ext_community(cp))
+                       continue;
+               if ((uint8_t)cp->flags == t) {
+                       num++;
+                       if (start == -1)
+                               start = l;
                }
+               if ((uint8_t)cp->flags > t)
+                       break;
+       }
+       end = l;
 
-       return r;
-}
-
-int
-community_ext_write(struct rde_community *comm, int ebgp, void *buf,
-    uint16_t len)
-{
-       struct community *cp;
-       uint8_t *b = buf;
-       uint64_t ext;
-       size_t n = 0;
-       int l, r, flags = ATTR_OPTIONAL | ATTR_TRANSITIVE;
-
-       if (comm->flags & PARTIAL_EXT_COMMUNITIES)
-               flags |= ATTR_PARTIAL;
-
-       /* first count how many communities will be written */
-       for (l = 0; l < comm->nentries; l++)
-               if ((uint8_t)comm->communities[l].flags ==
-                   COMMUNITY_TYPE_EXT && !(ebgp &&
-                   non_transitive_ext_community(&comm->communities[l])))
-                       n++;
-
-       if (n == 0)
+       /* no communities for this type present */
+       if (num == 0)
                return 0;
 
        /* write attribute header */
-       r = attr_write(b, len, flags, ATTR_EXT_COMMUNITIES, NULL, n * 8);
-       if (r == -1)
+       if (attr_writebuf(buf, flags, type, NULL, num * size) == -1)
                return -1;
-       b += r;
 
        /* write out the communities */
-       for (l = 0; l < comm->nentries; l++) {
+       for (l = start; l < end; l++) {
                cp = comm->communities + l;
-               if ((uint8_t)cp->flags == COMMUNITY_TYPE_EXT && !(ebgp &&
-                   non_transitive_ext_community(cp))) {
-                       ext = (uint64_t)cp->data3 << 48;
-                       switch ((cp->data3 >> 8) & EXT_COMMUNITY_VALUE) {
-                       case EXT_COMMUNITY_TRANS_TWO_AS:
-                       case EXT_COMMUNITY_TRANS_OPAQUE:
-                       case EXT_COMMUNITY_TRANS_EVPN:
-                               ext |= ((uint64_t)cp->data1 & 0xffff) << 32;
-                               ext |= (uint64_t)cp->data2;
-                               break;
-                       case EXT_COMMUNITY_TRANS_FOUR_AS:
-                       case EXT_COMMUNITY_TRANS_IPV4:
-                               ext |= (uint64_t)cp->data1 << 16;
-                               ext |= (uint64_t)cp->data2 & 0xffff;
-                               break;
-                       }
-                       ext = htobe64(ext);
-                       memcpy(b, &ext, sizeof(ext));
-                       b += sizeof(ext);
-                       r += sizeof(ext);
-               }
-       }
-
-       return r;
-}
-
-/*
- * Convert communities back to the wireformat and dump them into the ibuf buf.
- * This function is used by the mrt dump code.
- */
-int
-community_writebuf(struct ibuf *buf, struct rde_community *comm)
-{
-       size_t basic_n = 0, large_n = 0, ext_n = 0;
-       int l, flags;
-
-       /* first count how many communities will be written */
-       for (l = 0; l < comm->nentries; l++)
-               if ((uint8_t)comm->communities[l].flags ==
-                   COMMUNITY_TYPE_BASIC)
-                       basic_n++;
-               else if ((uint8_t)comm->communities[l].flags ==
-                   COMMUNITY_TYPE_EXT)
-                       ext_n++;
-               else if ((uint8_t)comm->communities[l].flags ==
-                   COMMUNITY_TYPE_LARGE)
-                       large_n++;
 
-
-       if (basic_n != 0) {
-               /* write attribute header */
-               flags = ATTR_OPTIONAL | ATTR_TRANSITIVE;
-               if (comm->flags & PARTIAL_COMMUNITIES)
-                       flags |= ATTR_PARTIAL;
-
-               if (attr_writebuf(buf, flags, ATTR_COMMUNITIES, NULL,
-                   basic_n * 4) == -1)
-                       return -1;
-
-               /* write out the communities */
-               for (l = 0; l < comm->nentries; l++)
-                       if ((uint8_t)comm->communities[l].flags ==
-                           COMMUNITY_TYPE_BASIC) {
-                               uint16_t c;
-                               c = htons(comm->communities[l].data1);
-                               if (ibuf_add(buf, &c, sizeof(c)) == -1)
-                                       return (-1);
-                               c = htons(comm->communities[l].data2);
-                               if (ibuf_add(buf, &c, sizeof(c)) == -1)
-                                       return (-1);
-                       }
-       }
-       if (ext_n != 0) {
-               /* write attribute header */
-               flags = ATTR_OPTIONAL | ATTR_TRANSITIVE;
-               if (comm->flags & PARTIAL_COMMUNITIES)
-                       flags |= ATTR_PARTIAL;
-
-               if (attr_writebuf(buf, flags, ATTR_EXT_COMMUNITIES, NULL,
-                   ext_n * 8) == -1)
-                       return -1;
-
-               /* write out the communities */
-               for (l = 0; l < comm->nentries; l++) {
-                       struct community *cp;
-                       uint64_t ext;
-
-                       cp = comm->communities + l;
-                       if ((uint8_t)cp->flags != COMMUNITY_TYPE_EXT)
+               switch (type) {
+               case ATTR_COMMUNITIES:
+                       if (ibuf_add_n16(buf, cp->data1) == -1)
+                               return (-1);
+                       if (ibuf_add_n16(buf, cp->data2) == -1)
+                               return (-1);
+                       break;
+               case ATTR_EXT_COMMUNITIES:
+                       if (ebgp && non_transitive_ext_community(cp))
                                continue;
 
                        ext = (uint64_t)cp->data3 << 48;
@@ -752,38 +604,19 @@ community_writebuf(struct ibuf *buf, str
                                ext |= (uint64_t)cp->data2 & 0xffff;
                                break;
                        }
-                       ext = htobe64(ext);
-                       if (ibuf_add(buf, &ext, sizeof(ext)) == -1)
+                       if (ibuf_add_n64(buf, ext) == -1)
+                               return (-1);
+                       break;
+               case ATTR_LARGE_COMMUNITIES:
+                       if (ibuf_add_n32(buf, cp->data1) == -1)
                                return (-1);
+                       if (ibuf_add_n32(buf, cp->data2) == -1)
+                               return (-1);
+                       if (ibuf_add_n32(buf, cp->data3) == -1)
+                               return (-1);
+                       break;
                }
        }
-       if (large_n != 0) {
-               /* write attribute header */
-               flags = ATTR_OPTIONAL | ATTR_TRANSITIVE;
-               if (comm->flags & PARTIAL_COMMUNITIES)
-                       flags |= ATTR_PARTIAL;
-
-               if (attr_writebuf(buf, flags, ATTR_LARGE_COMMUNITIES, NULL,
-                   large_n * 12) == -1)
-                       return -1;
-
-               /* write out the communities */
-               for (l = 0; l < comm->nentries; l++)
-                       if ((uint8_t)comm->communities[l].flags ==
-                           COMMUNITY_TYPE_LARGE) {
-                               uint32_t c;
-                               c = htonl(comm->communities[l].data1);
-                               if (ibuf_add(buf, &c, sizeof(c)) == -1)
-                                       return (-1);
-                               c = htonl(comm->communities[l].data2);
-                               if (ibuf_add(buf, &c, sizeof(c)) == -1)
-                                       return (-1);
-                               c = htonl(comm->communities[l].data3);
-                               if (ibuf_add(buf, &c, sizeof(c)) == -1)
-                                       return (-1);
-                       }
-       }
-
        return 0;
 }
 
Index: rde_prefix.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde_prefix.c,v
retrieving revision 1.49
diff -u -p -r1.49 rde_prefix.c
--- rde_prefix.c        19 Apr 2023 07:09:47 -0000      1.49
+++ rde_prefix.c        27 Jun 2023 08:33:54 -0000
@@ -468,143 +468,111 @@ pt_free(struct pt_entry *pte)
 
 /* dump a prefix into specified buffer */
 int
-pt_write(u_char *buf, int len, struct pt_entry *pte, int withdraw)
+pt_writebuf(struct ibuf *buf, struct pt_entry *pte, int withdraw,
+    int add_path, uint32_t pathid)
 {
        struct pt_entry_vpn4    *pvpn4 = (struct pt_entry_vpn4 *)pte;
        struct pt_entry_vpn6    *pvpn6 = (struct pt_entry_vpn6 *)pte;
        struct pt_entry_flow    *pflow = (struct pt_entry_flow *)pte;
-       int                      totlen, flowlen, psize;
+       struct ibuf             *tmp;
+       int                      flowlen, psize;
        uint8_t                  plen;
 
+       if ((tmp = ibuf_dynamic(32, UINT16_MAX)) == NULL)
+               goto fail;
+
+       if (add_path) {
+               if (ibuf_add_n32(tmp, pathid) == -1)
+                       goto fail;
+       }
+
        switch (pte->aid) {
        case AID_INET:
        case AID_INET6:
                plen = pte->prefixlen;
-               totlen = PREFIX_SIZE(plen);
-
-               if (totlen > len)
-                       return (-1);
-               *buf++ = plen;
-               memcpy(buf, pte->data, totlen - 1);
-               return (totlen);
+               if (ibuf_add_n8(tmp, plen) == -1)
+                       goto fail;
+               if (ibuf_add(tmp, pte->data, PREFIX_SIZE(plen) - 1) == -1)
+                       goto fail;
+               break;
        case AID_VPN_IPv4:
                plen = pvpn4->prefixlen;
-               totlen = PREFIX_SIZE(plen) + sizeof(pvpn4->rd);
                psize = PREFIX_SIZE(plen) - 1;
                plen += sizeof(pvpn4->rd) * 8;
                if (withdraw) {
                        /* withdraw have one compat label as placeholder */
-                       totlen += 3;
                        plen += 3 * 8;
                } else {
-                       totlen += pvpn4->labellen;
                        plen += pvpn4->labellen * 8;
                }
 
-               if (totlen > len)
-                       return (-1);
-               *buf++ = plen;
+               if (ibuf_add_n8(tmp, plen) == -1)
+                       goto fail;
                if (withdraw) {
                        /* magic compatibility label as per rfc8277 */
-                       *buf++ = 0x80;
-                       *buf++ = 0x0;
-                       *buf++ = 0x0;
+                       if (ibuf_add_n8(tmp, 0x80) == -1 ||
+                           ibuf_add_zero(tmp, 2) == -1)
+                               goto fail;
                } else {
-                       memcpy(buf, &pvpn4->labelstack, pvpn4->labellen);
-                       buf += pvpn4->labellen;
+                       if (ibuf_add(tmp, &pvpn4->labelstack,
+                           pvpn4->labellen) == -1)
+                               goto fail;
                }
-               memcpy(buf, &pvpn4->rd, sizeof(pvpn4->rd));
-               buf += sizeof(pvpn4->rd);
-               memcpy(buf, &pvpn4->prefix4, psize);
-               return (totlen);
+               if (ibuf_add(tmp, &pvpn4->rd, sizeof(pvpn4->rd)) == -1 ||
+                   ibuf_add(tmp, &pvpn4->prefix4, psize) == -1)
+                       goto fail;
+               break;
        case AID_VPN_IPv6:
                plen = pvpn6->prefixlen;
-               totlen = PREFIX_SIZE(plen) + sizeof(pvpn6->rd);
                psize = PREFIX_SIZE(plen) - 1;
                plen += sizeof(pvpn6->rd) * 8;
                if (withdraw) {
                        /* withdraw have one compat label as placeholder */
-                       totlen += 3;
                        plen += 3 * 8;
                } else {
-                       totlen += pvpn6->labellen;
                        plen += pvpn6->labellen * 8;
                }
 
-               if (totlen > len)
-                       return (-1);
-               *buf++ = plen;
+               if (ibuf_add_n8(tmp, plen) == -1)
+                       goto fail;
                if (withdraw) {
                        /* magic compatibility label as per rfc8277 */
-                       *buf++ = 0x80;
-                       *buf++ = 0x0;
-                       *buf++ = 0x0;
+                       if (ibuf_add_n8(tmp, 0x80) == -1 ||
+                           ibuf_add_zero(tmp, 2) == -1)
+                               goto fail;
                } else {
-                       memcpy(buf, &pvpn6->labelstack, pvpn6->labellen);
-                       buf += pvpn6->labellen;
+                       if (ibuf_add(tmp, &pvpn6->labelstack,
+                           pvpn6->labellen) == -1)
+                               goto fail;
                }
-               memcpy(buf, &pvpn6->rd, sizeof(pvpn6->rd));
-               buf += sizeof(pvpn6->rd);
-               memcpy(buf, &pvpn6->prefix6, psize);
-               return (totlen);
+               if (ibuf_add(tmp, &pvpn6->rd, sizeof(pvpn6->rd)) == -1 ||
+                   ibuf_add(tmp, &pvpn6->prefix6, psize) == -1)
+                       goto fail;
+               break;
        case AID_FLOWSPECv4:
        case AID_FLOWSPECv6:
                flowlen = pflow->len - PT_FLOW_SIZE;
-               totlen = flowlen < FLOWSPEC_LEN_LIMIT ? 1 : 2;
-               totlen += flowlen;
-
-               if (totlen > len)
-                       return (-1);
-
                if (flowlen < FLOWSPEC_LEN_LIMIT) {
-                       *buf++ = flowlen;
+                       if (ibuf_add_n8(tmp, flowlen) == -1)
+                               goto fail;
                } else {
-                       *buf++ = 0xf0 | (flowlen >> 8);
-                       *buf++ = flowlen & 0xff;
+                       if (ibuf_add_n8(tmp, 0xf0 | (flowlen >> 8)) == -1 ||
+                           ibuf_add_n8(tmp, flowlen) == -1)
+                               goto fail;
                }
-               memcpy(buf, &pflow->flow, flowlen);
-               return (totlen);
-       default:
-               return (-1);
-       }
-}
-
-/* dump a prefix into specified ibuf, allocating space for it if needed */
-int
-pt_writebuf(struct ibuf *buf, struct pt_entry *pte)
-{
-       struct pt_entry_vpn4    *pvpn4 = (struct pt_entry_vpn4 *)pte;
-       struct pt_entry_vpn6    *pvpn6 = (struct pt_entry_vpn6 *)pte;
-       struct pt_entry_flow    *pflow = (struct pt_entry_flow *)pte;
-       int                      totlen, flowlen;
-       void                    *bptr;
-
-       switch (pte->aid) {
-       case AID_INET:
-       case AID_INET6:
-               totlen = PREFIX_SIZE(pte->prefixlen);
-               break;
-       case AID_VPN_IPv4:
-               totlen = PREFIX_SIZE(pte->prefixlen) + sizeof(pvpn4->rd) +
-                   pvpn4->labellen;
-               break;
-       case AID_VPN_IPv6:
-               totlen = PREFIX_SIZE(pte->prefixlen) + sizeof(pvpn6->rd) +
-                   pvpn6->labellen;
-               break;
-       case AID_FLOWSPECv4:
-       case AID_FLOWSPECv6:
-               flowlen = pflow->len - PT_FLOW_SIZE;
-               totlen = flowlen < FLOWSPEC_LEN_LIMIT ? 1 : 2;
-               totlen += flowlen;
+               if (ibuf_add(tmp, &pflow->flow, flowlen) == -1)
+                       goto fail;
                break;
        default:
-               return (-1);
+               goto fail;
        }
 
-       if ((bptr = ibuf_reserve(buf, totlen)) == NULL)
-               return (-1);
-       if (pt_write(bptr, totlen, pte, 0) == -1)
-               return (-1);
-       return (0);
+       if (ibuf_add_buf(buf, tmp) == -1)
+               goto fail;
+       ibuf_free(tmp);
+       return 0;
+
+ fail:
+       ibuf_free(tmp);
+       return -1;
 }
Index: rde_update.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde_update.c,v
retrieving revision 1.162
diff -u -p -r1.162 rde_update.c
--- rde_update.c        19 Apr 2023 08:30:37 -0000      1.162
+++ rde_update.c        27 Jun 2023 08:33:54 -0000
@@ -558,17 +558,15 @@ up_prep_adjout(struct rde_peer *peer, st
 
 
 static int
-up_generate_attr(u_char *buf, int len, struct rde_peer *peer,
-    struct filterstate *state, uint8_t aid)
+up_generate_attr(struct ibuf *buf, struct rde_peer *peer,
+    struct rde_aspath *asp, struct rde_community *comm, struct nexthop *nh,
+    uint8_t aid)
 {
-       struct rde_aspath *asp = &state->aspath;
-       struct rde_community *comm = &state->communities;
        struct attr     *oa = NULL, *newaggr = NULL;
        u_char          *pdata;
        uint32_t         tmp32;
-       struct bgpd_addr *nexthop;
-       int              flags, r, neednewpath = 0;
-       uint16_t         wlen = 0, plen;
+       int              flags, neednewpath = 0;
+       uint16_t         plen;
        uint8_t          oalen = 0, type;
 
        if (asp->others_len > 0)
@@ -576,8 +574,6 @@ up_generate_attr(u_char *buf, int len, s
 
        /* dump attributes in ascending order */
        for (type = ATTR_ORIGIN; type < 255; type++) {
-               r = 0;
-
                while (oa && oa->type < type) {
                        if (oalen < asp->others_len)
                                oa = asp->others[oalen++];
@@ -590,9 +586,9 @@ up_generate_attr(u_char *buf, int len, s
                 * Attributes stored in rde_aspath
                 */
                case ATTR_ORIGIN:
-                       if ((r = attr_write(buf + wlen, len, ATTR_WELL_KNOWN,
-                           ATTR_ORIGIN, &asp->origin, 1)) == -1)
-                               return (-1);
+                       if (attr_writebuf(buf, ATTR_WELL_KNOWN,
+                           ATTR_ORIGIN, &asp->origin, 1) == -1)
+                               return -1;
                        break;
                case ATTR_ASPATH:
                        plen = aspath_length(asp->aspath);
@@ -602,20 +598,19 @@ up_generate_attr(u_char *buf, int len, s
                                pdata = aspath_deflate(pdata, &plen,
                                    &neednewpath);
 
-                       if ((r = attr_write(buf + wlen, len, ATTR_WELL_KNOWN,
-                           ATTR_ASPATH, pdata, plen)) == -1)
-                               return (-1);
+                       if (attr_writebuf(buf, ATTR_WELL_KNOWN,
+                           ATTR_ASPATH, pdata, plen) == -1)
+                               return -1;
                        if (!peer_has_as4byte(peer))
                                free(pdata);
                        break;
                case ATTR_NEXTHOP:
                        switch (aid) {
                        case AID_INET:
-                               nexthop = &state->nexthop->exit_nexthop;
-                               if ((r = attr_write(buf + wlen, len,
-                                   ATTR_WELL_KNOWN, ATTR_NEXTHOP,
-                                   &nexthop->v4.s_addr, 4)) == -1)
-                                       return (-1);
+                               if (attr_writebuf(buf, ATTR_WELL_KNOWN,
+                                   ATTR_NEXTHOP, &nh->exit_nexthop.v4,
+                                   sizeof(nh->exit_nexthop.v4)) == -1)
+                                       return -1;
                                break;
                        default:
                                break;
@@ -632,37 +627,29 @@ up_generate_attr(u_char *buf, int len, s
                            asp->flags & F_ATTR_MED_ANNOUNCE ||
                            peer->flags & PEERFLAG_TRANS_AS)) {
                                tmp32 = htonl(asp->med);
-                               if ((r = attr_write(buf + wlen, len,
-                                   ATTR_OPTIONAL, ATTR_MED, &tmp32, 4)) == -1)
-                                       return (-1);
+                               if (attr_writebuf(buf, ATTR_OPTIONAL,
+                                   ATTR_MED, &tmp32, 4) == -1)
+                                       return -1;
                        }
                        break;
                case ATTR_LOCALPREF:
                        if (!peer->conf.ebgp) {
                                /* local preference, only valid for ibgp */
                                tmp32 = htonl(asp->lpref);
-                               if ((r = attr_write(buf + wlen, len,
-                                   ATTR_WELL_KNOWN, ATTR_LOCALPREF, &tmp32,
-                                   4)) == -1)
-                                       return (-1);
+                               if (attr_writebuf(buf, ATTR_WELL_KNOWN,
+                                   ATTR_LOCALPREF, &tmp32, 4) == -1)
+                                       return -1;
                        }
                        break;
                /*
                 * Communities are stored in struct rde_community
                 */
                case ATTR_COMMUNITIES:
-                       if ((r = community_write(comm, buf + wlen, len)) == -1)
-                               return (-1);
-                       break;
                case ATTR_EXT_COMMUNITIES:
-                       if ((r = community_ext_write(comm, peer->conf.ebgp,
-                           buf + wlen, len)) == -1)
-                               return (-1);
-                       break;
                case ATTR_LARGE_COMMUNITIES:
-                       if ((r = community_large_write(comm, buf + wlen,
-                           len)) == -1)
-                               return (-1);
+                       if (community_writebuf(comm, type, peer->conf.ebgp,
+                           buf) == -1)
+                               return -1;
                        break;
                /*
                 * NEW to OLD conversion when sending stuff to a 2byte AS peer
@@ -675,11 +662,10 @@ up_generate_attr(u_char *buf, int len, s
                                flags = ATTR_OPTIONAL|ATTR_TRANSITIVE;
                                if (!(asp->flags & F_PREFIX_ANNOUNCED))
                                        flags |= ATTR_PARTIAL;
-                               if (plen == 0)
-                                       r = 0;
-                               else if ((r = attr_write(buf + wlen, len, flags,
-                                   ATTR_AS4_PATH, pdata, plen)) == -1)
-                                       return (-1);
+                               if (plen != 0)
+                                       if (attr_writebuf(buf, flags,
+                                           ATTR_AS4_PATH, pdata, plen) == -1)
+                                               return -1;
                        }
                        break;
                case ATTR_AS4_AGGREGATOR:
@@ -687,10 +673,10 @@ up_generate_attr(u_char *buf, int len, s
                                flags = ATTR_OPTIONAL|ATTR_TRANSITIVE;
                                if (!(asp->flags & F_PREFIX_ANNOUNCED))
                                        flags |= ATTR_PARTIAL;
-                               if ((r = attr_write(buf + wlen, len, flags,
+                               if (attr_writebuf(buf, flags,
                                    ATTR_AS4_AGGREGATOR, newaggr->data,
-                                   newaggr->len)) == -1)
-                                       return (-1);
+                                   newaggr->len) == -1)
+                                       return -1;
                        }
                        break;
                /*
@@ -712,24 +698,24 @@ up_generate_attr(u_char *buf, int len, s
                case ATTR_ATOMIC_AGGREGATE:
                        if (oa == NULL || oa->type != type)
                                break;
-                       if ((r = attr_write(buf + wlen, len,
-                           ATTR_WELL_KNOWN, ATTR_ATOMIC_AGGREGATE,
-                           NULL, 0)) == -1)
-                               return (-1);
+                       if (attr_writebuf(buf, ATTR_WELL_KNOWN,
+                           ATTR_ATOMIC_AGGREGATE, NULL, 0) == -1)
+                               return -1;
                        break;
                case ATTR_AGGREGATOR:
                        if (oa == NULL || oa->type != type)
                                break;
+                       if ((!(oa->flags & ATTR_TRANSITIVE)) &&
+                           peer->conf.ebgp)
+                               break;
                        if (!peer_has_as4byte(peer)) {
                                /* need to deflate the aggregator */
-                               uint8_t t[6];
+                               uint8_t         t[6];
                                uint16_t        tas;
 
                                if ((!(oa->flags & ATTR_TRANSITIVE)) &&
-                                   peer->conf.ebgp) {
-                                       r = 0;
+                                   peer->conf.ebgp)
                                        break;
-                               }
 
                                memcpy(&tmp32, oa->data, sizeof(tmp32));
                                if (ntohl(tmp32) > USHRT_MAX) {
@@ -742,30 +728,31 @@ up_generate_attr(u_char *buf, int len, s
                                memcpy(t + sizeof(tas),
                                    oa->data + sizeof(tmp32),
                                    oa->len - sizeof(tmp32));
-                               if ((r = attr_write(buf + wlen, len,
-                                   oa->flags, oa->type, &t, sizeof(t))) == -1)
-                                       return (-1);
-                               break;
+                               if (attr_writebuf(buf, oa->flags,
+                                   oa->type, &t, sizeof(t)) == -1)
+                                       return -1;
+                       } else {
+                               if (attr_writebuf(buf, oa->flags, oa->type,
+                                   oa->data, oa->len) == -1)
+                                       return -1;
                        }
-                       /* FALLTHROUGH */
+                       break;
                case ATTR_ORIGINATOR_ID:
                case ATTR_CLUSTER_LIST:
                case ATTR_OTC:
                        if (oa == NULL || oa->type != type)
                                break;
                        if ((!(oa->flags & ATTR_TRANSITIVE)) &&
-                           peer->conf.ebgp) {
-                               r = 0;
+                           peer->conf.ebgp)
                                break;
-                       }
-                       if ((r = attr_write(buf + wlen, len,
-                           oa->flags, oa->type, oa->data, oa->len)) == -1)
-                               return (-1);
+                       if (attr_writebuf(buf, oa->flags, oa->type,
+                           oa->data, oa->len) == -1)
+                               return -1;
                        break;
                default:
                        if (oa == NULL && type >= ATTR_FIRST_UNKNOWN)
                                /* there is no attribute left to dump */
-                               goto done;
+                               return (0);
 
                        if (oa == NULL || oa->type != type)
                                break;
@@ -779,16 +766,12 @@ up_generate_attr(u_char *buf, int len, s
                                 */
                                break;
                        }
-                       if ((r = attr_write(buf + wlen, len,
-                           oa->flags | ATTR_PARTIAL, oa->type,
-                           oa->data, oa->len)) == -1)
-                               return (-1);
+                       if (attr_writebuf(buf, oa->flags | ATTR_PARTIAL,
+                           oa->type, oa->data, oa->len) == -1)
+                               return -1;
                }
-               wlen += r;
-               len -= r;
        }
-done:
-       return (wlen);
+       return 0;
 }
 
 /*
@@ -817,33 +800,42 @@ up_is_eor(struct rde_peer *peer, uint8_t
 /* minimal buffer size > withdraw len + attr len + attr hdr + afi/safi */
 #define MIN_UPDATE_LEN 16
 
+static void
+up_prefix_free(struct prefix_tree *prefix_head, struct prefix *p,
+    struct rde_peer *peer, int withdraw)
+{
+       if (withdraw) {
+               /* prefix no longer needed, remove it */
+               prefix_adjout_destroy(p);
+               peer->stats.prefix_sent_withdraw++;
+       } else {
+               /* prefix still in Adj-RIB-Out, keep it */
+               RB_REMOVE(prefix_tree, prefix_head, p);
+               p->flags &= ~PREFIX_FLAG_UPDATE;
+               peer->stats.pending_update--;
+               peer->stats.prefix_sent_update++;
+       }
+}
+
 /*
  * Write prefixes to buffer until either there is no more space or
  * the next prefix has no longer the same ASPATH attributes.
+ * Returns -1 if no prefix was written else 0.
  */
 static int
-up_dump_prefix(u_char *buf, int len, struct prefix_tree *prefix_head,
+up_dump_prefix(struct ibuf *buf, struct prefix_tree *prefix_head,
     struct rde_peer *peer, int withdraw)
 {
        struct prefix   *p, *np;
-       uint32_t         pathid;
-       int              r, wpos = 0, done = 0;
+       int              done = 0, has_ap = -1, rv = -1;
 
        RB_FOREACH_SAFE(p, prefix_tree, prefix_head, np) {
-               if (peer_has_add_path(peer, p->pt->aid, CAPA_AP_SEND)) {
-                       if (len <= wpos + (int)sizeof(pathid))
-                               break;
-                       pathid = htonl(p->path_id_tx);
-                       memcpy(buf + wpos, &pathid, sizeof(pathid));
-                       wpos += sizeof(pathid);
-               }
-               if ((r = pt_write(buf + wpos, len - wpos, p->pt,
-                   withdraw)) == -1) {
-                       if (peer_has_add_path(peer, p->pt->aid, CAPA_AP_SEND))
-                               wpos -= sizeof(pathid);
+               if (has_ap == -1)
+                       has_ap = peer_has_add_path(peer, p->pt->aid,
+                           CAPA_AP_SEND);
+               if (pt_writebuf(buf, p->pt, withdraw, has_ap, p->path_id_tx) ==
+                   -1)
                        break;
-               }
-               wpos += r;
 
                /* make sure we only dump prefixes which belong together */
                if (np == NULL ||
@@ -854,276 +846,249 @@ up_dump_prefix(u_char *buf, int len, str
                    (np->flags & PREFIX_FLAG_EOR))
                        done = 1;
 
-               if (withdraw) {
-                       /* prefix no longer needed, remove it */
-                       prefix_adjout_destroy(p);
-                       peer->stats.prefix_sent_withdraw++;
-               } else {
-                       /* prefix still in Adj-RIB-Out, keep it */
-                       RB_REMOVE(prefix_tree, prefix_head, p);
-                       p->flags &= ~PREFIX_FLAG_UPDATE;
-                       peer->stats.pending_update--;
-                       peer->stats.prefix_sent_update++;
-               }
-
+               rv = 0;
+               up_prefix_free(prefix_head, p, peer, withdraw);
                if (done)
                        break;
        }
-       return (wpos);
-}
-
-int
-up_dump_withdraws(u_char *buf, int len, struct rde_peer *peer, uint8_t aid)
-{
-       uint16_t wpos, wd_len;
-       int r;
-
-       if (len < MIN_UPDATE_LEN)
-               return (-1);
-
-       /* reserve space for the length field */
-       wpos = 2;
-       r = up_dump_prefix(buf + wpos, len - wpos, &peer->withdraws[aid],
-           peer, 1);
-       wd_len = htons(r);
-       memcpy(buf, &wd_len, 2);
-
-       return (wpos + r);
-}
-
-int
-up_dump_mp_unreach(u_char *buf, int len, struct rde_peer *peer, uint8_t aid)
-{
-       u_char          *attrbuf;
-       int              wpos, r;
-       uint16_t         attr_len, tmp;
-
-       if (len < MIN_UPDATE_LEN || RB_EMPTY(&peer->withdraws[aid]))
-               return (-1);
-
-       /* reserve space for withdraw len, attr len */
-       wpos = 2 + 2;
-       attrbuf = buf + wpos;
-
-       /* attribute header, defaulting to extended length one */
-       attrbuf[0] = ATTR_OPTIONAL | ATTR_EXTLEN;
-       attrbuf[1] = ATTR_MP_UNREACH_NLRI;
-       wpos += 4;
-
-       /* afi & safi */
-       if (aid2afi(aid, &tmp, buf + wpos + 2))
-               fatalx("up_dump_mp_unreach: bad AID");
-       tmp = htons(tmp);
-       memcpy(buf + wpos, &tmp, sizeof(uint16_t));
-       wpos += 3;
-
-       r = up_dump_prefix(buf + wpos, len - wpos, &peer->withdraws[aid],
-           peer, 1);
-       if (r == 0)
-               return (-1);
-       wpos += r;
-       attr_len = r + 3;       /* prefixes + afi & safi */
-
-       /* attribute length */
-       attr_len = htons(attr_len);
-       memcpy(attrbuf + 2, &attr_len, sizeof(attr_len));
-
-       /* write length fields */
-       memset(buf, 0, sizeof(uint16_t));       /* withdrawn routes len */
-       attr_len = htons(wpos - 4);
-       memcpy(buf + 2, &attr_len, sizeof(attr_len));
-
-       return (wpos);
-}
-
-int
-up_dump_attrnlri(u_char *buf, int len, struct rde_peer *peer)
-{
-       struct filterstate       state;
-       struct prefix           *p;
-       int                      r, wpos;
-       uint16_t                 attr_len;
-
-       if (len < 2)
-               fatalx("up_dump_attrnlri: buffer way too small");
-       if (len < MIN_UPDATE_LEN)
-               goto done;
-
-       p = RB_MIN(prefix_tree, &peer->updates[AID_INET]);
-       if (p == NULL)
-               goto done;
-
-       rde_filterstate_prep(&state, p);
-       r = up_generate_attr(buf + 2, len - 2, peer, &state, AID_INET);
-       rde_filterstate_clean(&state);
-       if (r == -1) {
-               /*
-                * either no packet or not enough space.
-                * The length field needs to be set to zero else it would be
-                * an invalid bgp update.
-                */
-done:
-               memset(buf, 0, 2);
-               return (2);
-       }
-
-       /* first dump the 2-byte path attribute length */
-       attr_len = htons(r);
-       memcpy(buf, &attr_len, 2);
-       wpos = 2;
-       /* then skip over the already dumped path attributes themselves */
-       wpos += r;
-
-       /* last but not least dump the nlri */
-       r = up_dump_prefix(buf + wpos, len - wpos, &peer->updates[AID_INET],
-           peer, 0);
-       wpos += r;
-
-       return (wpos);
+       return rv;
 }
 
 static int
-up_generate_mp_reach(u_char *buf, int len, struct rde_peer *peer,
-    struct filterstate *state, uint8_t aid)
+up_generate_mp_reach(struct ibuf *buf, struct rde_peer *peer,
+    struct nexthop *nh, uint8_t aid)
 {
-       struct bgpd_addr        *nexthop;
-       u_char                  *attrbuf;
-       int                      r, wpos, attrlen;
-       uint16_t                 tmp;
+       struct bgpd_addr *nexthop;
+       size_t off;
+       uint16_t len, afi;
+       uint8_t safi;
 
-       if (len < 4)
-               return (-1);
        /* attribute header, defaulting to extended length one */
-       buf[0] = ATTR_OPTIONAL | ATTR_EXTLEN;
-       buf[1] = ATTR_MP_REACH_NLRI;
-       wpos = 4;
-       attrbuf = buf + wpos;
+       if (ibuf_add_n8(buf, ATTR_OPTIONAL | ATTR_EXTLEN) == -1)
+               return -1;
+       if (ibuf_add_n8(buf, ATTR_MP_REACH_NLRI) == -1)
+               return -1;
+       off = ibuf_size(buf);
+       if (ibuf_add_zero(buf, sizeof(len)) == -1)
+               return -1;
+
+       if (aid2afi(aid, &afi, &safi))
+               fatalx("up_generate_mp_reach: bad AID");
+
+       /* AFI + SAFI + NH LEN + NH + Reserved */
+       if (ibuf_add_n16(buf, afi) == -1)
+               return -1;
+       if (ibuf_add_n8(buf, safi) == -1)
+               return -1;
 
        switch (aid) {
        case AID_INET6:
-               attrlen = 21; /* AFI + SAFI + NH LEN + NH + Reserved */
-               if (len < wpos + attrlen)
-                       return (-1);
-               wpos += attrlen;
-               if (aid2afi(aid, &tmp, &attrbuf[2]))
-                       fatalx("up_generate_mp_reach: bad AID");
-               tmp = htons(tmp);
-               memcpy(attrbuf, &tmp, sizeof(tmp));
-               attrbuf[3] = sizeof(struct in6_addr);
-               attrbuf[20] = 0; /* Reserved must be 0 */
-
+               /* NH LEN */
+               if (ibuf_add_n8(buf, sizeof(struct in6_addr)) == -1)
+                       return -1;
                /* write nexthop */
-               attrbuf += 4;
-               nexthop = &state->nexthop->exit_nexthop;
-               memcpy(attrbuf, &nexthop->v6, sizeof(struct in6_addr));
+               nexthop = &nh->exit_nexthop;
+               if (ibuf_add(buf, &nexthop->v6, sizeof(struct in6_addr)) == -1)
+                       return -1;
                break;
        case AID_VPN_IPv4:
-               attrlen = 17; /* AFI + SAFI + NH LEN + NH + Reserved */
-               if (len < wpos + attrlen)
-                       return (-1);
-               wpos += attrlen;
-               if (aid2afi(aid, &tmp, &attrbuf[2]))
-                       fatalx("up_generate_mp_reachi: bad AID");
-               tmp = htons(tmp);
-               memcpy(attrbuf, &tmp, sizeof(tmp));
-               attrbuf[3] = sizeof(uint64_t) + sizeof(struct in_addr);
-               memset(attrbuf + 4, 0, sizeof(uint64_t));
-               attrbuf[16] = 0; /* Reserved must be 0 */
-
+               /* NH LEN */
+               if (ibuf_add_n8(buf,
+                   sizeof(uint64_t) + sizeof(struct in_addr)) == -1)
+                       return -1;
+               /* write zero rd */
+               if (ibuf_add_zero(buf, sizeof(uint64_t)) == -1)
+                       return -1;
                /* write nexthop */
-               attrbuf += 12;
-               nexthop = &state->nexthop->exit_nexthop;
-               memcpy(attrbuf, &nexthop->v4, sizeof(struct in_addr));
+               nexthop = &nh->exit_nexthop;
+               if (ibuf_add(buf, &nexthop->v4, sizeof(struct in_addr)) == -1)
+                       return -1;
                break;
        case AID_VPN_IPv6:
-               attrlen = 29; /* AFI + SAFI + NH LEN + NH + Reserved */
-               if (len < wpos + attrlen)
-                       return (-1);
-               wpos += attrlen;
-               if (aid2afi(aid, &tmp, &attrbuf[2]))
-                       fatalx("up_generate_mp_reachi: bad AID");
-               tmp = htons(tmp);
-               memcpy(attrbuf, &tmp, sizeof(tmp));
-               attrbuf[3] = sizeof(uint64_t) + sizeof(struct in6_addr);
-               memset(attrbuf + 4, 0, sizeof(uint64_t));
-               attrbuf[28] = 0; /* Reserved must be 0 */
-
+               /* NH LEN */
+               if (ibuf_add_n8(buf,
+                   sizeof(uint64_t) + sizeof(struct in6_addr)) == -1)
+                       return -1;
+               /* write zero rd */
+               if (ibuf_add_zero(buf, sizeof(uint64_t)) == -1)
+                       return -1;
                /* write nexthop */
-               attrbuf += 12;
-               nexthop = &state->nexthop->exit_nexthop;
-               memcpy(attrbuf, &nexthop->v6, sizeof(struct in6_addr));
+               nexthop = &nh->exit_nexthop;
+               if (ibuf_add(buf, &nexthop->v6, sizeof(struct in6_addr)) == -1)
+                       return -1;
                break;
        case AID_FLOWSPECv4:
        case AID_FLOWSPECv6:
-               attrlen = 5; /* AFI + SAFI + NH LEN + NH + Reserved */
-               if (len < wpos + attrlen)
-                       return (-1);
-               wpos += attrlen;
-               if (aid2afi(aid, &tmp, &attrbuf[2]))
-                       fatalx("up_generate_mp_reachi: bad AID");
-               tmp = htons(tmp);
-               memcpy(attrbuf, &tmp, sizeof(tmp));
-               attrbuf[3] = 0; /* nh len MUST be 0 */
-               attrbuf[4] = 0; /* Reserved must be 0 */
+               if (ibuf_add_zero(buf, 1) == -1) /* NH LEN MUST be 0 */
+                       return -1;
+               /* no NH */
                break;
        default:
                fatalx("up_generate_mp_reach: unknown AID");
        }
 
-       r = up_dump_prefix(buf + wpos, len - wpos, &peer->updates[aid],
-           peer, 0);
-       if (r == 0) {
-               /* no prefixes written ... */
+       if (ibuf_add_zero(buf, 1) == -1) /* Reserved must be 0 */
+               return -1;
+
+       if (up_dump_prefix(buf, &peer->updates[aid], peer, 0) == -1)
+               /* no prefixes written, fail update  */
                return (-1);
-       }
-       attrlen += r;
-       wpos += r;
-       /* update attribute length field */
-       tmp = htons(attrlen);
-       memcpy(buf + 2, &tmp, sizeof(tmp));
 
-       return (wpos);
+       /* update MP_REACH attribute length field */
+       len = ibuf_size(buf) - off - sizeof(len);
+       if (ibuf_set_n16(buf, off, len) == -1)
+               return -1;
+
+       return 0;
 }
 
+/*
+ * Generate UPDATE message containing either just withdraws or updates.
+ * UPDATE messages are contructed like this:
+ *
+ *    +-----------------------------------------------------+
+ *    |   Withdrawn Routes Length (2 octets)                |
+ *    +-----------------------------------------------------+
+ *    |   Withdrawn Routes (variable)                       |
+ *    +-----------------------------------------------------+
+ *    |   Total Path Attribute Length (2 octets)            |
+ *    +-----------------------------------------------------+
+ *    |   Path Attributes (variable)                        |
+ *    +-----------------------------------------------------+
+ *    |   Network Layer Reachability Information (variable) |
+ *    +-----------------------------------------------------+
+ *
+ * Multiprotocol messages use MP_REACH_NLRI and MP_UNREACH_NLRI 
+ * the latter will be the only path attribute in a message.
+ */
+
+/*
+ * Write UPDATE message for withdrawn routes. The size of buf limits
+ * how may routes can be added. Return 0 on success -1 on error which
+ * includes generating an empty withdraw message.
+ */
 int
-up_dump_mp_reach(u_char *buf, int len, struct rde_peer *peer, uint8_t aid)
+up_dump_withdraws(struct ibuf *buf, struct rde_peer *peer, uint8_t aid)
 {
-       struct filterstate       state;
-       struct prefix           *p;
-       int                     r, wpos;
-       uint16_t                attr_len;
+       size_t off;
+       uint16_t afi, len;
+       uint8_t safi;
+
+       /* reserve space for the withdrawn routes length field */
+       off = ibuf_size(buf);
+       if (ibuf_add_zero(buf, sizeof(len)) == -1)
+               return -1;
+
+       if (aid != AID_INET) {
+               /* reserve space for 2-byte path attribute length */
+               off = ibuf_size(buf);
+               if (ibuf_add_zero(buf, sizeof(len)) == -1)
+                       return -1;
+
+               /* attribute header, defaulting to extended length one */
+               if (ibuf_add_n8(buf, ATTR_OPTIONAL | ATTR_EXTLEN) == -1)
+                       return -1;
+               if (ibuf_add_n8(buf, ATTR_MP_UNREACH_NLRI) == -1)
+                       return -1;
+               if (ibuf_add_zero(buf, sizeof(len)) == -1)
+                       return -1;
+
+               /* afi & safi */
+               if (aid2afi(aid, &afi, &safi))
+                       fatalx("up_dump_mp_unreach: bad AID");
+               if (ibuf_add_n16(buf, afi) == -1)
+                       return -1;
+               if (ibuf_add_n8(buf, safi) == -1)
+                       return -1;
+       }
+
+       if (up_dump_prefix(buf, &peer->withdraws[aid], peer, 1) == -1)
+               return -1;
 
-       if (len < MIN_UPDATE_LEN)
-               return 0;
+       /* update length field (either withdrawn routes or attribute length) */
+       len = ibuf_size(buf) - off - sizeof(len);
+       if (ibuf_set_n16(buf, off, len) == -1)
+               return -1;
+
+       if (aid != AID_INET) {
+               /* write MP_UNREACH_NLRI attribute length (always extended) */ 
+               len -= 4; /* skip attribute header */
+               if (ibuf_set_n16(buf, off + sizeof(len) + 2, len) == -1)
+                       return -1;
+       } else {
+               /* no extra attributes so set attribute len to 0 */
+               if (ibuf_add_zero(buf, sizeof(len)) == -1)
+                       return -1;
+       }
+
+       return 0;
+}
+
+/*
+ * Write UPDATE message for changed and added routes. The size of buf limits
+ * how may routes can be added. The function first dumps the path attributes
+ * and then tries to add as many prefixes using these attributes.
+ * Return 0 on success -1 on error which includes producing an empty message.
+ */
+int
+up_dump_update(struct ibuf *buf, struct rde_peer *peer, uint8_t aid)
+{
+       struct bgpd_addr addr;
+       struct prefix *p;
+       size_t off;
+       uint16_t len;
 
-       /* get starting point */
        p = RB_MIN(prefix_tree, &peer->updates[aid]);
        if (p == NULL)
-               return 0;
+               return -1;
+
+       /* withdrawn routes length field is 0 */
+       if (ibuf_add_zero(buf, sizeof(len)) == -1)
+               return -1;
+
+       /* reserve space for 2-byte path attribute length */
+       off = ibuf_size(buf);
+       if (ibuf_add_zero(buf, sizeof(len)) == -1)
+               return -1;
+
+       if (up_generate_attr(buf, peer, prefix_aspath(p),
+           prefix_communities(p), prefix_nexthop(p), aid) == -1)
+               goto fail;
 
-       wpos = 4;       /* reserve space for length fields */
+       if (aid != AID_INET) {
+               /* write mp attribute including nlri */
 
-       rde_filterstate_prep(&state, p);
+               /*
+                * RFC 7606 wants this to be first but then we need
+                * to use multiple buffers with adjusted length to
+                * merge the attributes together in reverse order of
+                * creation.
+                */
+               if (up_generate_mp_reach(buf, peer, prefix_nexthop(p), aid) ==
+                   -1)
+                       goto fail;
+       }
 
-       /* write regular path attributes */
-       r = up_generate_attr(buf + wpos, len - wpos, peer, &state, aid);
-       if (r == -1) {
-               rde_filterstate_clean(&state);
-               return 0;
+       /* update attribute length field */
+       len = ibuf_size(buf) - off - sizeof(len);
+       if (ibuf_set_n16(buf, off, len) == -1)
+               return -1;
+
+       if (aid == AID_INET) {
+               /* last but not least dump the IPv4 nlri */
+               if (up_dump_prefix(buf, &peer->updates[aid], peer, 0) == -1)
+                       goto fail;
        }
-       wpos += r;
 
-       /* write mp attribute */
-       r = up_generate_mp_reach(buf + wpos, len - wpos, peer, &state, aid);
-       rde_filterstate_clean(&state);
-       if (r == -1)
-               return 0;
-       wpos += r;
-
-       /* write length fields */
-       memset(buf, 0, sizeof(uint16_t));       /* withdrawn routes len */
-       attr_len = htons(wpos - 4);
-       memcpy(buf + 2, &attr_len, sizeof(attr_len));
+       return 0;
 
-       return (wpos);
+fail:
+       /* Not enough space. Drop prefix, it will never fit. */
+       pt_getaddr(p->pt, &addr);
+       log_peer_warnx(&peer->conf, "path attributes to large, "
+           "prefix %s/%d dropped", log_addr(&addr), p->pt->prefixlen);
+
+       up_prefix_free(&peer->updates[AID_INET], p, peer, 0);
+       /* XXX should probably send a withdraw for this prefix */
+       return -1;
 }
Index: session.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/session.c,v
retrieving revision 1.445
diff -u -p -r1.445 session.c
--- session.c   25 May 2023 14:20:25 -0000      1.445
+++ session.c   27 Jun 2023 08:33:54 -0000
@@ -1383,7 +1383,7 @@ session_sendmsg(struct bgp_msg *msg, str
                if ((mrt->peer_id == 0 && mrt->group_id == 0) ||
                    mrt->peer_id == p->conf.id || (mrt->group_id != 0 &&
                    mrt->group_id == p->conf.groupid))
-                       mrt_dump_bgp_msg(mrt, msg->buf->buf, msg->len, p,
+                       mrt_dump_bgp_msg(mrt, ibuf_data(msg->buf), msg->len, p,
                            msg->type);
        }
 
@@ -1589,7 +1589,7 @@ session_open(struct peer *p)
                        uint8_t op_len = optparamlen;
                        errs += ibuf_add(buf->buf, &op_len, 1);
                }
-               errs += ibuf_add(buf->buf, opb->buf, ibuf_size(opb));
+               errs += ibuf_add_buf(buf->buf, opb);
        }
 
        ibuf_free(opb);

Reply via email to