This diff is a bit of a monster. It changes the Adj-RIB-Out to be a peer
specific set of RB trees instead of using a rib in the original sense.
The reason for this is that the more peers a system has the more elements
end up being linked into the adj-rib-out and many operations do linear
searches which does not scale.

I did some testing with 4000 peers sending 1 prefix each which then are
sent back to all peers (resulting in 16Mio updates being put in Adj-RIB-Out).
Without this diff the system takes about 1h to bring up all sessions. With
the diff the system finishes in around 5min.

To not increase the memory footprint struct prefix is now using a union
for the lists or RB trees. Additionally the rib dump runner was adjusted
so that it also works with the Adj-RIB-Out. bgpctl show rib out changed
a bit since it will dump now one peer after the other apart from that
behaviour should be the same.

Please test
-- 
:wq Claudio


Index: mrt.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/mrt.c,v
retrieving revision 1.97
diff -u -p -r1.97 mrt.c
--- mrt.c       25 Jun 2019 21:33:55 -0000      1.97
+++ mrt.c       26 Jun 2019 09:44:12 -0000
@@ -512,15 +512,11 @@ mrt_dump_entry_v2(struct mrt *mrt, struc
                goto fail;
        }
        nump = 0;
-       LIST_FOREACH(p, &re->prefix_h, rib_l) {
+       LIST_FOREACH(p, &re->prefix_h, entry.list.rib) {
                struct nexthop          *nexthop;
                struct bgpd_addr        *nh;
                struct ibuf             *tbuf;
 
-               /* skip pending withdraw in Adj-RIB-Out */
-               if (prefix_aspath(p) == NULL)
-                       continue;
-
                nexthop = prefix_nexthop(p);
                if (nexthop == NULL) {
                        bzero(&addr, sizeof(struct bgpd_addr));
@@ -683,10 +679,7 @@ mrt_dump_upcall(struct rib_entry *re, vo
         * dumps the table so we do the same. If only the active route should
         * be dumped p should be set to p = pt->active.
         */
-       LIST_FOREACH(p, &re->prefix_h, rib_l) {
-               /* skip pending withdraw in Adj-RIB-Out */
-               if (prefix_aspath(p) == NULL)
-                       continue;
+       LIST_FOREACH(p, &re->prefix_h, entry.list.rib) {
                if (mrtbuf->type == MRT_TABLE_DUMP)
                        mrt_dump_entry(mrtbuf, p, mrtbuf->seqnum++,
                            prefix_peer(p));
Index: parse.y
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/parse.y,v
retrieving revision 1.392
diff -u -p -r1.392 parse.y
--- parse.y     22 Jun 2019 05:36:40 -0000      1.392
+++ parse.y     22 Jun 2019 05:44:57 -0000
@@ -3282,8 +3282,6 @@ parse_config(char *filename, struct peer
 
        add_rib("Adj-RIB-In", conf->default_tableid,
            F_RIB_NOFIB | F_RIB_NOEVALUATE);
-       add_rib("Adj-RIB-Out", conf->default_tableid,
-           F_RIB_NOFIB | F_RIB_NOEVALUATE);
        add_rib("Loc-RIB", conf->default_tableid, F_RIB_LOCAL);
 
        if ((file = pushfile(filename, 1)) == NULL)
Index: rde.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde.c,v
retrieving revision 1.475
diff -u -p -r1.475 rde.c
--- rde.c       1 Jul 2019 07:07:08 -0000       1.475
+++ rde.c       1 Jul 2019 07:37:09 -0000
@@ -94,8 +94,8 @@ u_int8_t       rde_roa_validity(struct rde_pr
 void            peer_init(u_int32_t);
 void            peer_shutdown(void);
 int             peer_localaddrs(struct rde_peer *, struct bgpd_addr *);
+struct rde_peer *peer_match(struct ctl_neighbor *, u_int32_t);
 struct rde_peer        *peer_add(u_int32_t, struct peer_config *);
-struct rde_peer        *peer_get(u_int32_t);
 void            peer_up(u_int32_t, struct session_up *);
 void            peer_down(u_int32_t);
 void            peer_flush(struct rde_peer *, u_int8_t, time_t);
@@ -133,7 +133,7 @@ int                  softreconfig;
 struct rde_dump_ctx {
        LIST_ENTRY(rde_dump_ctx)        entry;
        struct ctl_show_rib_request     req;
-       u_int16_t                       rid;
+       u_int32_t                       peerid;
        u_int8_t                        throttled;
 };
 
@@ -220,7 +220,6 @@ rde_main(int debug, int verbose)
 
        /* make sure the default RIBs are setup */
        rib_new("Adj-RIB-In", 0, F_RIB_NOFIB | F_RIB_NOEVALUATE);
-       rib_new("Adj-RIB-Out", 0, F_RIB_NOFIB | F_RIB_NOEVALUATE);
 
        out_rules = calloc(1, sizeof(struct filter_head));
        if (out_rules == NULL)
@@ -2242,7 +2241,7 @@ rde_dump_rib_as(struct prefix *p, struct
        rib.origin = asp->origin;
        rib.validation_state = p->validation_state;
        rib.flags = 0;
-       if (p->re->active == p)
+       if (p->re != NULL && p->re->active == p)
                rib.flags |= F_PREF_ACTIVE;
        if (!prefix_peer(p)->conf.ebgp)
                rib.flags |= F_PREF_INTERNAL;
@@ -2305,7 +2304,7 @@ rde_dump_rib_as(struct prefix *p, struct
 }
 
 static int
-rde_dump_match_peer(struct rde_peer *p, struct ctl_neighbor *n)
+rde_match_peer(struct rde_peer *p, struct ctl_neighbor *n)
 {
        char *s;
 
@@ -2326,7 +2325,7 @@ rde_dump_filter(struct prefix *p, struct
 {
        struct rde_aspath       *asp;
 
-       if (!rde_dump_match_peer(prefix_peer(p), &req->neighbor))
+       if (!rde_match_peer(prefix_peer(p), &req->neighbor))
                return;
 
        asp = prefix_aspath(p);
@@ -2353,10 +2352,10 @@ rde_dump_filter(struct prefix *p, struct
 static void
 rde_dump_upcall(struct rib_entry *re, void *ptr)
 {
-       struct prefix           *p;
        struct rde_dump_ctx     *ctx = ptr;
+       struct prefix           *p;
 
-       LIST_FOREACH(p, &re->prefix_h, rib_l)
+       LIST_FOREACH(p, &re->prefix_h, entry.list.rib)
                rde_dump_filter(p, &ctx->req);
 }
 
@@ -2375,10 +2374,38 @@ rde_dump_prefix_upcall(struct rib_entry 
        if (ctx->req.prefixlen > pt->prefixlen)
                return;
        if (!prefix_compare(&ctx->req.prefix, &addr, ctx->req.prefixlen))
-               LIST_FOREACH(p, &re->prefix_h, rib_l)
+               LIST_FOREACH(p, &re->prefix_h, entry.list.rib)
                        rde_dump_filter(p, &ctx->req);
 }
 
+static void
+rde_dump_adjout_upcall(struct prefix *p, void *ptr)
+{
+       struct rde_dump_ctx     *ctx = ptr;
+
+       if (p->flags & (PREFIX_FLAG_WITHDRAW | PREFIX_FLAG_DEAD))
+               return;
+       rde_dump_filter(p, &ctx->req);
+}
+
+static void
+rde_dump_adjout_prefix_upcall(struct prefix *p, void *ptr)
+{
+       struct rde_dump_ctx     *ctx = ptr;
+       struct bgpd_addr         addr;
+
+       if (p->flags & (PREFIX_FLAG_WITHDRAW | PREFIX_FLAG_DEAD))
+               return;
+
+       pt_getaddr(p->pt, &addr);
+       if (addr.aid != ctx->req.prefix.aid)
+               return;
+       if (ctx->req.prefixlen > p->pt->prefixlen)
+               return;
+       if (!prefix_compare(&ctx->req.prefix, &addr, ctx->req.prefixlen))
+               rde_dump_filter(p, &ctx->req);
+}
+
 static int
 rde_dump_throttled(void *arg)
 {
@@ -2391,11 +2418,45 @@ static void
 rde_dump_done(void *arg, u_int8_t aid)
 {
        struct rde_dump_ctx     *ctx = arg;
+       struct rde_peer         *peer;
+       u_int                    error;
 
-       imsg_compose(ibuf_se_ctl, IMSG_CTL_END, 0, ctx->req.pid,
-           -1, NULL, 0);
+       if (ctx->req.flags & F_CTL_ADJ_OUT) {
+               peer = peer_match(&ctx->req.neighbor, ctx->peerid);
+               if (peer == NULL)
+                       goto done;
+               ctx->peerid = peer->conf.id;
+               switch (ctx->req.type) {
+               case IMSG_CTL_SHOW_RIB:
+                       if (prefix_dump_new(peer, ctx->req.aid,
+                           CTL_MSG_HIGH_MARK, ctx, rde_dump_adjout_upcall,
+                           rde_dump_done, rde_dump_throttled) == -1)
+                               goto nomem;
+                       break;
+               case IMSG_CTL_SHOW_RIB_PREFIX:
+                       if (prefix_dump_new(peer, ctx->req.aid,
+                           CTL_MSG_HIGH_MARK, ctx,
+                           rde_dump_adjout_prefix_upcall,
+                           rde_dump_done, rde_dump_throttled) == -1)
+                               goto nomem;
+                       break;
+               default:
+                       fatalx("%s: unsupported imsg type", __func__);
+               }
+               return;
+       }
+done:
+       imsg_compose(ibuf_se_ctl, IMSG_CTL_END, 0, ctx->req.pid, -1, NULL, 0);
        LIST_REMOVE(ctx, entry);
        free(ctx);
+       return;
+
+nomem:
+       log_warn(__func__);
+       error = CTL_RES_NOMEM;
+       imsg_compose(ibuf_se_ctl, IMSG_CTL_RESULT, 0, ctx->req.pid, -1, &error,
+           sizeof(error));
+       return;
 }
 
 void
@@ -2404,24 +2465,92 @@ rde_dump_ctx_new(struct ctl_show_rib_req
 {
        struct rde_dump_ctx     *ctx;
        struct rib_entry        *re;
+       struct prefix           *p;
        u_int                    error;
        u_int8_t                 hostplen;
        u_int16_t                rid;
 
        if ((ctx = calloc(1, sizeof(*ctx))) == NULL) {
  nomem:
-               log_warn("rde_dump_ctx_new");
+               log_warn(__func__);
                error = CTL_RES_NOMEM;
                imsg_compose(ibuf_se_ctl, IMSG_CTL_RESULT, 0, pid, -1, &error,
                    sizeof(error));
                return;
        }
+
+       memcpy(&ctx->req, req, sizeof(struct ctl_show_rib_request));
+       ctx->req.pid = pid;
+       ctx->req.type = type;
+
        if (req->flags & (F_CTL_ADJ_IN | F_CTL_INVALID)) {
                rid = RIB_ADJ_IN;
        } else if (req->flags & F_CTL_ADJ_OUT) {
-               rid = RIB_ADJ_OUT;
+               struct rde_peer *peer;
+
+               peer = peer_match(&req->neighbor, 0);
+               if (peer == NULL) {
+                       log_warnx("%s: no peer found for adj-rib-out",
+                           __func__);
+                       error = CTL_RES_NOSUCHPEER;
+                       imsg_compose(ibuf_se_ctl, IMSG_CTL_RESULT, 0, pid, -1,
+                           &error, sizeof(error));
+                       free(ctx);
+                       return;
+               }
+               ctx->peerid = peer->conf.id;
+               switch (ctx->req.type) {
+               case IMSG_CTL_SHOW_RIB:
+                       if (prefix_dump_new(peer, ctx->req.aid,
+                           CTL_MSG_HIGH_MARK, ctx, rde_dump_adjout_upcall,
+                           rde_dump_done, rde_dump_throttled) == -1)
+                               goto nomem;
+                       break;
+               case IMSG_CTL_SHOW_RIB_PREFIX:
+                       if (req->flags & F_LONGER) {
+                               if (prefix_dump_new(peer, ctx->req.aid,
+                                   CTL_MSG_HIGH_MARK, ctx,
+                                   rde_dump_adjout_prefix_upcall,
+                                   rde_dump_done, rde_dump_throttled) == -1)
+                                       goto nomem;
+                               break;
+                       }
+                       switch (req->prefix.aid) {
+                       case AID_INET:
+                       case AID_VPN_IPv4:
+                               hostplen = 32;
+                               break;
+                       case AID_INET6:
+                       case AID_VPN_IPv6:
+                               hostplen = 128;
+                               break;
+                       default:
+                               fatalx("%s: unknown af", __func__);
+                       }
+
+                       do {
+                               if (req->prefixlen == hostplen)
+                                       p = prefix_match(peer, &req->prefix);
+                               else
+                                       p = prefix_lookup(peer, &req->prefix,
+                                           req->prefixlen);
+                               if (p)
+                                       rde_dump_adjout_upcall(p, ctx);
+                       } while ((peer = peer_match(&req->neighbor,
+                           peer->conf.id)));
+
+                       imsg_compose(ibuf_se_ctl, IMSG_CTL_END, 0, ctx->req.pid,
+                           -1, NULL, 0);
+                       free(ctx);
+                       return;
+               default:
+                       fatalx("%s: unsupported imsg type", __func__);
+               }
+       
+               LIST_INSERT_HEAD(&rde_dump_h, ctx, entry);
+               return;
        } else if ((rid = rib_find(req->rib)) == RIB_NOTFOUND) {
-               log_warnx("rde_dump_ctx_new: no such rib %s", req->rib);
+               log_warnx("%s: no such rib %s", __func__, req->rib);
                error = CTL_RES_NOSUCHRIB;
                imsg_compose(ibuf_se_ctl, IMSG_CTL_RESULT, 0, pid, -1, &error,
                    sizeof(error));
@@ -2429,10 +2558,6 @@ rde_dump_ctx_new(struct ctl_show_rib_req
                return;
        }
 
-       memcpy(&ctx->req, req, sizeof(struct ctl_show_rib_request));
-       ctx->req.pid = pid;
-       ctx->req.type = type;
-       ctx->rid = rid;
        switch (ctx->req.type) {
        case IMSG_CTL_SHOW_NETWORK:
                if (rib_dump_new(rid, ctx->req.aid, CTL_MSG_HIGH_MARK, ctx,
@@ -2463,10 +2588,10 @@ rde_dump_ctx_new(struct ctl_show_rib_req
                        hostplen = 128;
                        break;
                default:
-                       fatalx("rde_dump_ctx_new: unknown af");
+                       fatalx("%s: unknown af", __func__);
                }
                if (req->prefixlen == hostplen)
-                       re = rib_lookup(rib_byid(rid), &req->prefix);
+                       re = rib_match(rib_byid(rid), &req->prefix);
                else
                        re = rib_get(rib_byid(rid), &req->prefix,
                            req->prefixlen);
@@ -2502,21 +2627,7 @@ rde_dump_ctx_terminate(pid_t pid)
 
        LIST_FOREACH(ctx, &rde_dump_h, entry) {
                if (ctx->req.pid == pid) {
-                       void (*upcall)(struct rib_entry *, void *);
-                       switch (ctx->req.type) {
-                       case IMSG_CTL_SHOW_NETWORK:
-                               upcall = network_dump_upcall;
-                               break;
-                       case IMSG_CTL_SHOW_RIB:
-                               upcall = rde_dump_upcall;
-                               break;
-                       case IMSG_CTL_SHOW_RIB_PREFIX:
-                               upcall = rde_dump_prefix_upcall;
-                               break;
-                       default:
-                               fatalx("%s: unsupported imsg type", __func__);
-                       }
-                       rib_dump_terminate(ctx->rid, ctx, upcall);
+                       rib_dump_terminate(ctx);
                        return;
                }
        }
@@ -2697,16 +2808,9 @@ rde_up_dump_upcall(struct rib_entry *re,
 }
 
 static void
-rde_up_flush_upcall(struct rib_entry *re, void *ptr)
+rde_up_flush_upcall(struct prefix *p, void *ptr)
 {
-       struct rde_peer *peer = ptr;
-       struct prefix *p, *np;
-
-       LIST_FOREACH_SAFE(p, &re->prefix_h, rib_l, np) {
-               if (peer != prefix_peer(p))
-                       continue;
-               up_generate_updates(out_rules, peer, NULL, p);
-       }
+       up_generate_updates(out_rules, prefix_peer(p), NULL, p);
 }
 
 static void
@@ -3011,18 +3115,16 @@ rde_reload_done(void)
                peer->reconf_out = 0;
                peer->reconf_rib = 0;
                if (peer->loc_rib_id != rib_find(peer->conf.rib)) {
-                       char *p = log_fmt_peer(&peer->conf);
-                       log_debug("rib change: reloading peer %s", p);
-                       free(p);
+                       log_peer_info(&peer->conf, "rib change, reloading");
                        peer->loc_rib_id = rib_find(peer->conf.rib);
                        if (peer->loc_rib_id == RIB_NOTFOUND)
                                fatalx("King Bula's peer met an unknown RIB");
                        peer->reconf_rib = 1;
                        softreconfig++;
-                       if (rib_dump_new(RIB_ADJ_OUT, AID_UNSPEC,
-                           RDE_RUNNER_ROUNDS, peer, rde_up_flush_upcall,
+                       if (prefix_dump_new(peer, AID_UNSPEC,
+                           RDE_RUNNER_ROUNDS, NULL, rde_up_flush_upcall,
                            rde_softreconfig_in_done, NULL) == -1)
-                               fatal("%s: rib_dump_new", __func__);
+                               fatal("%s: prefix_dump_new", __func__);
                        log_peer_info(&peer->conf, "flushing Adj-RIB-Out");
                        continue;
                }
@@ -3197,7 +3299,7 @@ rde_softreconfig_in(struct rib_entry *re
 
        pt = re->prefix;
        pt_getaddr(pt, &prefix);
-       LIST_FOREACH(p, &re->prefix_h, rib_l) {
+       LIST_FOREACH(p, &re->prefix_h, entry.list.rib) {
                asp = prefix_aspath(p);
                peer = prefix_peer(p);
                force_eval = 0;
@@ -3358,6 +3460,35 @@ peer_get(u_int32_t id)
 }
 
 struct rde_peer *
+peer_match(struct ctl_neighbor *n, u_int32_t peerid)
+{
+       struct rde_peer_head    *head;
+       struct rde_peer         *peer;
+       u_int32_t               i = 0;
+
+       if (peerid != 0)
+               i = peerid & peertable.peer_hashmask;
+
+       while (i <= peertable.peer_hashmask) {
+               head = &peertable.peer_hashtbl[i];
+               LIST_FOREACH(peer, head, hash_l) {
+                       /* skip peers until peerid is found */
+                       if (peerid == peer->conf.id) {
+                               peerid = 0;
+                               continue;
+                       }
+                       if (peerid != 0)
+                               continue;
+
+                       if (rde_match_peer(peer, n))
+                               return (peer);
+               }
+               i++;
+       }
+       return (NULL);
+}
+
+struct rde_peer *
 peer_add(u_int32_t id, struct peer_config *p_conf)
 {
        struct rde_peer_head    *head;
@@ -3441,17 +3572,9 @@ peer_localaddrs(struct rde_peer *peer, s
 }
 
 static void
-peer_adjout_flush_upcall(struct rib_entry *re, void *arg)
+peer_adjout_clear_upcall(struct prefix *p, void *arg)
 {
-       struct rde_peer *peer = arg;
-       struct prefix *p, *np;
-
-       LIST_FOREACH_SAFE(p, &re->prefix_h, rib_l, np) {
-               if (peer != prefix_peer(p))
-                       continue;
-               prefix_destroy(p);
-               break;  /* optimization, only one match per peer possible */
-       }
+       prefix_adjout_destroy(p);
 }
 
 void
@@ -3472,9 +3595,9 @@ peer_up(u_int32_t id, struct session_up 
                 * There is a race condition when doing PEER_ERR -> PEER_DOWN.
                 * So just do a full reset of the peer here.
                 */
-               if (rib_dump_new(RIB_ADJ_OUT, AID_UNSPEC, 0, peer,
-                   peer_adjout_flush_upcall, NULL, NULL) == -1)
-                       fatal("%s: rib_dump_new", __func__);
+               if (prefix_dump_new(peer, AID_UNSPEC, 0, NULL,
+                   peer_adjout_clear_upcall, NULL, NULL) == -1)
+                       fatal("%s: prefix_dump_new", __func__);
                peer_flush(peer, AID_UNSPEC, 0);
                peer->prefix_cnt = 0;
                peer->state = PEER_DOWN;
@@ -3519,12 +3642,12 @@ peer_down(u_int32_t id)
        peer->remote_bgpid = 0;
        peer->state = PEER_DOWN;
        /* stop all pending dumps which may depend on this peer */
-       rib_dump_terminate(peer->loc_rib_id, peer, rde_up_dump_upcall);
+       rib_dump_terminate(peer);
 
        /* flush Adj-RIB-Out for this peer */
-       if (rib_dump_new(RIB_ADJ_OUT, AID_UNSPEC, 0, peer,
-           peer_adjout_flush_upcall, NULL, NULL) == -1)
-               fatal("%s: rib_dump_new", __func__);
+       if (prefix_dump_new(peer, AID_UNSPEC, 0, NULL,
+           peer_adjout_clear_upcall, NULL, NULL) == -1)
+               fatal("%s: prefix_dump_new", __func__);
 
        peer_flush(peer, AID_UNSPEC, 0);
 
@@ -3553,7 +3676,7 @@ peer_flush_upcall(struct rib_entry *re, 
 
        pt_getaddr(re->prefix, &addr);
        prefixlen = re->prefix->prefixlen;
-       LIST_FOREACH_SAFE(p, &re->prefix_h, rib_l, np) {
+       LIST_FOREACH_SAFE(p, &re->prefix_h, entry.list.rib, np) {
                if (peer != prefix_peer(p))
                        continue;
                if (staletime && p->lastchange > staletime)
@@ -3888,7 +4011,7 @@ network_dump_upcall(struct rib_entry *re
        struct bgpd_addr         addr;
        struct rde_dump_ctx     *ctx = ptr;
 
-       LIST_FOREACH(p, &re->prefix_h, rib_l) {
+       LIST_FOREACH(p, &re->prefix_h, entry.list.rib) {
                asp = prefix_aspath(p);
                if (!(asp->flags & F_PREFIX_ANNOUNCED))
                        continue;
@@ -3925,7 +4048,7 @@ network_flush_upcall(struct rib_entry *r
 
        pt_getaddr(re->prefix, &addr);
        prefixlen = re->prefix->prefixlen;
-       LIST_FOREACH_SAFE(p, &re->prefix_h, rib_l, np) {
+       LIST_FOREACH_SAFE(p, &re->prefix_h, entry.list.rib, np) {
                if (prefix_peer(p) != peer)
                        continue;
                asp = prefix_aspath(p);
@@ -3965,9 +4088,6 @@ rde_shutdown(void)
        for (i = 0; i <= peertable.peer_hashmask; i++)
                while ((p = LIST_FIRST(&peertable.peer_hashtbl[i])) != NULL)
                        peer_down(p->conf.id);
-
-       /* then since decision process is off, kill RIB_ADJ_OUT */
-       rib_free(rib_byid(RIB_ADJ_OUT));
 
        /* free filters */
        filterlist_free(out_rules);
Index: rde.h
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde.h,v
retrieving revision 1.219
diff -u -p -r1.219 rde.h
--- rde.h       1 Jul 2019 07:07:08 -0000       1.219
+++ rde.h       10 Jul 2019 14:42:42 -0000
@@ -57,8 +57,7 @@ struct rib {
 };
 
 #define RIB_ADJ_IN     0
-#define RIB_ADJ_OUT    1
-#define RIB_LOC_START  2
+#define RIB_LOC_START  1
 #define RIB_NOTFOUND   0xffff
 
 struct rib_desc {
@@ -78,6 +77,7 @@ LIST_HEAD(aspath_list, aspath);
 LIST_HEAD(attr_list, attr);
 LIST_HEAD(aspath_head, rde_aspath);
 RB_HEAD(prefix_tree, prefix);
+RB_HEAD(prefix_index, prefix);
 
 struct rde_peer {
        LIST_ENTRY(rde_peer)             hash_l; /* hash list over all peers */
@@ -87,6 +87,7 @@ struct rde_peer {
        struct bgpd_addr                 local_v4_addr;
        struct bgpd_addr                 local_v6_addr;
        struct capabilities              capa;
+       struct prefix_index              adj_rib_out;
        struct prefix_tree               updates[AID_MAX];
        struct prefix_tree               withdraws[AID_MAX];
        time_t                           staletime[AID_MAX];
@@ -306,8 +307,14 @@ struct pt_entry_vpn6 {
 };
 
 struct prefix {
-       LIST_ENTRY(prefix)               rib_l, nexthop_l;
-       RB_ENTRY(prefix)                 entry;
+       union {
+               struct {
+                       LIST_ENTRY(prefix)       rib, nexthop;
+               } list;
+               struct {
+                       RB_ENTRY(prefix)         index, update;
+               } tree;
+       }                                entry;
        struct pt_entry                 *pt;
        struct rib_entry                *re;
        struct rde_aspath               *aspath;
@@ -317,12 +324,17 @@ struct prefix {
        time_t                           lastchange;
        u_int8_t                         validation_state;
        u_int8_t                         nhflags;
-       u_int8_t                         flags;
        u_int8_t                         eor;
-#define        PREFIX_FLAG_WITHDRAW    0x01
-#define        PREFIX_FLAG_UPDATE      0x02
+       u_int8_t                         flags;
+#define        PREFIX_FLAG_WITHDRAW    0x01    /* queued for withdraw */
+#define        PREFIX_FLAG_UPDATE      0x02    /* queued for update */
+#define        PREFIX_FLAG_DEAD        0x04    /* locked but removed */
+#define        PREFIX_FLAG_MASK        0x07    /* mask for the three prefix 
types */
+#define        PREFIX_NEXTHOP_LINKED   0x40    /* prefix is linked onto 
nexthop list */
+#define        PREFIX_FLAG_LOCKED      0x80    /* locked by rib walker */
 };
 
+/* possible states for nhflags */
 #define        NEXTHOP_SELF            0x01
 #define        NEXTHOP_REJECT          0x02
 #define        NEXTHOP_BLACKHOLE       0x04
@@ -356,6 +368,7 @@ u_int32_t   rde_local_as(void);
 int            rde_noevaluate(void);
 int            rde_decisionflags(void);
 int            rde_as4byte(struct rde_peer *);
+struct rde_peer        *peer_get(u_int32_t);
 
 /* rde_attr.c */
 int             attr_write(void *, u_int16_t, u_int8_t, u_int8_t, void *,
@@ -395,6 +408,7 @@ u_char              *aspath_override(struct aspath *
                    u_int16_t *);
 int             aspath_lenmatch(struct aspath *, enum aslen_spec, u_int);
 
+/* rde_community.c */
 int    community_match(struct rde_community *, struct community *,
            struct rde_peer *);
 int    community_set(struct rde_community *, struct community *,
@@ -499,15 +513,14 @@ struct rib_desc   *rib_desc(struct rib *);
 void            rib_free(struct rib *);
 void            rib_shutdown(void);
 struct rib_entry *rib_get(struct rib *, struct bgpd_addr *, int);
-struct rib_entry *rib_lookup(struct rib *, struct bgpd_addr *);
+struct rib_entry *rib_match(struct rib *, struct bgpd_addr *);
 int             rib_dump_pending(void);
 void            rib_dump_runner(void);
 int             rib_dump_new(u_int16_t, u_int8_t, unsigned int, void *,
                    void (*)(struct rib_entry *, void *),
                    void (*)(void *, u_int8_t),
                    int (*)(void *));
-void            rib_dump_terminate(u_int16_t, void *,
-                   void (*)(struct rib_entry *, void *));
+void            rib_dump_terminate(void *);
 
 static inline struct rib *
 re_rib(struct rib_entry *re)
@@ -540,13 +553,20 @@ void               path_put(struct rde_aspath *);
 #define        PREFIX_SIZE(x)  (((x) + 7) / 8 + 1)
 struct prefix  *prefix_get(struct rib *, struct rde_peer *,
                    struct bgpd_addr *, int);
+struct prefix  *prefix_lookup(struct rde_peer *, struct bgpd_addr *, int);
+struct prefix  *prefix_match(struct rde_peer *, struct bgpd_addr *);
 int             prefix_remove(struct rib *, struct rde_peer *,
                    struct bgpd_addr *, int);
 void            prefix_add_eor(struct rde_peer *, u_int8_t);
-void            prefix_update(struct rib *, struct rde_peer *,
-                   struct bgpd_addr *, int);
-int             prefix_withdraw(struct rib *, struct rde_peer *,
-                   struct bgpd_addr *, int);
+int             prefix_update(struct rde_peer *, struct filterstate *,
+                   struct bgpd_addr *, int, u_int8_t);
+int             prefix_withdraw(struct rde_peer *, struct bgpd_addr *, int);
+void            prefix_adjout_destroy(struct prefix *p);
+void            prefix_adjout_dump(struct rde_peer *, void *,
+                   void (*)(struct prefix *, void *));
+int             prefix_dump_new(struct rde_peer *, u_int8_t, unsigned int,
+                   void *, void (*)(struct prefix *, void *),
+                   void (*)(void *, u_int8_t), int (*)(void *));
 int             prefix_write(u_char *, int, struct bgpd_addr *, u_int8_t, int);
 int             prefix_writebuf(struct ibuf *, struct bgpd_addr *, u_int8_t);
 struct prefix  *prefix_bypeer(struct rib_entry *, struct rde_peer *);
Index: rde_decide.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde_decide.c,v
retrieving revision 1.74
diff -u -p -r1.74 rde_decide.c
--- rde_decide.c        21 Jan 2019 02:07:56 -0000      1.74
+++ rde_decide.c        20 Jun 2019 19:50:25 -0000
@@ -245,7 +245,7 @@ prefix_evaluate(struct prefix *p, struct
        if (re_rib(re)->flags & F_RIB_NOEVALUATE || rde_noevaluate()) {
                /* decision process is turned off */
                if (p != NULL)
-                       LIST_INSERT_HEAD(&re->prefix_h, p, rib_l);
+                       LIST_INSERT_HEAD(&re->prefix_h, p, entry.list.rib);
                if (re->active != NULL)
                        re->active = NULL;
                return;
@@ -253,15 +253,18 @@ prefix_evaluate(struct prefix *p, struct
 
        if (p != NULL) {
                if (LIST_EMPTY(&re->prefix_h))
-                       LIST_INSERT_HEAD(&re->prefix_h, p, rib_l);
+                       LIST_INSERT_HEAD(&re->prefix_h, p, entry.list.rib);
                else {
-                       LIST_FOREACH(xp, &re->prefix_h, rib_l) {
+                       LIST_FOREACH(xp, &re->prefix_h, entry.list.rib) {
                                if (prefix_cmp(p, xp) > 0) {
-                                       LIST_INSERT_BEFORE(xp, p, rib_l);
+                                       LIST_INSERT_BEFORE(xp, p,
+                                           entry.list.rib);
                                        break;
-                               } else if (LIST_NEXT(xp, rib_l) == NULL) {
+                               } else if (LIST_NEXT(xp, entry.list.rib) ==
+                                   NULL) {
                                        /* if xp last element ... */
-                                       LIST_INSERT_AFTER(xp, p, rib_l);
+                                       LIST_INSERT_AFTER(xp, p,
+                                           entry.list.rib);
                                        break;
                                }
                        }
Index: rde_rib.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde_rib.c,v
retrieving revision 1.198
diff -u -p -r1.198 rde_rib.c
--- rde_rib.c   1 Jul 2019 14:47:56 -0000       1.198
+++ rde_rib.c   10 Jul 2019 14:27:45 -0000
@@ -52,8 +52,10 @@ RB_GENERATE(rib_tree, rib_entry, rib_e, 
 struct rib_context {
        LIST_ENTRY(rib_context)          entry;
        struct rib_entry                *ctx_re;
-       u_int16_t                        ctx_rib_id;
-       void            (*ctx_upcall)(struct rib_entry *, void *);
+       struct prefix                   *ctx_p;
+       u_int32_t                        ctx_id;
+       void            (*ctx_rib_call)(struct rib_entry *, void *);
+       void            (*ctx_prefix_call)(struct prefix *, void *);
        void            (*ctx_done)(void *, u_int8_t);
        int             (*ctx_throttle)(void *);
        void                            *ctx_arg;
@@ -69,6 +71,7 @@ static int    prefix_add(struct bgpd_addr *
 static int     prefix_move(struct prefix *, struct rde_peer *,
                    struct rde_aspath *, struct rde_community *,
                    struct nexthop *, u_int8_t, u_int8_t);
+static void    prefix_dump_r(struct rib_context *);
 
 static inline struct rib_entry *
 re_lock(struct rib_entry *re)
@@ -128,7 +131,7 @@ rib_new(char *name, u_int rtableid, u_in
                rib_size = id + 1;
        }
 
-       bzero(&ribs[id], sizeof(struct rib_desc));
+       memset(&ribs[id], 0, sizeof(struct rib_desc));
        strlcpy(ribs[id].name, name, sizeof(ribs[id].name));
        RB_INIT(rib_tree(&ribs[id].rib));
        ribs[id].state = RECONF_REINIT;
@@ -196,7 +199,7 @@ rib_free(struct rib *rib)
                 */
                while ((p = LIST_FIRST(&re->prefix_h))) {
                        struct rde_aspath *asp = prefix_aspath(p);
-                       np = LIST_NEXT(p, rib_l);
+                       np = LIST_NEXT(p, entry.list.rib);
                        if (asp && asp->pftableid) {
                                struct bgpd_addr addr;
 
@@ -215,7 +218,7 @@ rib_free(struct rib *rib)
        rd = &ribs[rib->id];
        filterlist_free(rd->in_rules_tmp);
        filterlist_free(rd->in_rules);
-       bzero(rd, sizeof(struct rib_desc));
+       memset(rd, 0, sizeof(struct rib_desc));
 }
 
 void
@@ -235,7 +238,7 @@ rib_shutdown(void)
                struct rib_desc *rd = &ribs[id];
                filterlist_free(rd->in_rules_tmp);
                filterlist_free(rd->in_rules);
-               bzero(rd, sizeof(struct rib_desc));
+               memset(rd, 0, sizeof(struct rib_desc));
        }
        free(ribs);
 }
@@ -247,7 +250,7 @@ rib_get(struct rib *rib, struct bgpd_add
        struct pt_entry *pte;
 
        pte = pt_fill(prefix, prefixlen);
-       bzero(&xre, sizeof(xre));
+       memset(&xre, 0, sizeof(xre));
        xre.prefix = pte;
 
        re = RB_FIND(rib_tree, rib_tree(rib), &xre);
@@ -258,7 +261,7 @@ rib_get(struct rib *rib, struct bgpd_add
 }
 
 struct rib_entry *
-rib_lookup(struct rib *rib, struct bgpd_addr *addr)
+rib_match(struct rib *rib, struct bgpd_addr *addr)
 {
        struct rib_entry *re;
        int              i;
@@ -281,7 +284,7 @@ rib_lookup(struct rib *rib, struct bgpd_
                }
                break;
        default:
-               fatalx("rib_lookup: unknown af");
+               fatalx("%s: unknown af", __func__);
        }
        return (NULL);
 }
@@ -367,9 +370,9 @@ rib_dump_r(struct rib_context *ctx)
        struct rib              *rib;
        unsigned int             i;
 
-       rib = rib_byid(ctx->ctx_rib_id);
+       rib = rib_byid(ctx->ctx_id);
        if (rib == NULL)
-               fatalx("%s: rib id %u gone", __func__, ctx->ctx_rib_id);
+               fatalx("%s: rib id %u gone", __func__, ctx->ctx_id);
 
        if (ctx->ctx_re == NULL)
                re = RB_MIN(rib_tree, rib_tree(rib));
@@ -378,9 +381,9 @@ rib_dump_r(struct rib_context *ctx)
 
        for (i = 0; re != NULL; re = next) {
                next = RB_NEXT(rib_tree, unused, re);
-               if (re->rib_id != ctx->ctx_rib_id)
+               if (re->rib_id != ctx->ctx_id)
                        fatalx("%s: Unexpected RIB %u != %u.", __func__,
-                           re->rib_id, ctx->ctx_rib_id);
+                           re->rib_id, ctx->ctx_id);
                if (ctx->ctx_aid != AID_UNSPEC &&
                    ctx->ctx_aid != re->prefix->aid)
                        continue;
@@ -391,7 +394,7 @@ rib_dump_r(struct rib_context *ctx)
                        re_lock(re);
                        return;
                }
-               ctx->ctx_upcall(re, ctx->ctx_arg);
+               ctx->ctx_rib_call(re, ctx->ctx_arg);
        }
 
        if (ctx->ctx_done)
@@ -422,7 +425,10 @@ rib_dump_runner(void)
        LIST_FOREACH_SAFE(ctx, &rib_dumps, entry, next) {
                if (ctx->ctx_throttle && ctx->ctx_throttle(ctx->ctx_arg))
                        continue;
-               rib_dump_r(ctx);
+               if (ctx->ctx_rib_call != NULL)
+                       rib_dump_r(ctx);
+               else
+                       prefix_dump_r(ctx);
        }
 }
 
@@ -432,7 +438,7 @@ rib_dump_abort(u_int16_t id)
        struct rib_context *ctx, *next;
 
        LIST_FOREACH_SAFE(ctx, &rib_dumps, entry, next) {
-               if (id != ctx->ctx_rib_id)
+               if (id != ctx->ctx_id)
                        continue;
                if (ctx->ctx_done)
                        ctx->ctx_done(ctx->ctx_arg, ctx->ctx_aid);
@@ -450,11 +456,11 @@ rib_dump_new(u_int16_t id, u_int8_t aid,
 
        if ((ctx = calloc(1, sizeof(*ctx))) == NULL)
                return -1;
-       ctx->ctx_rib_id = id;
+       ctx->ctx_id = id;
        ctx->ctx_aid = aid;
        ctx->ctx_count = count;
        ctx->ctx_arg = arg;
-       ctx->ctx_upcall = upcall;
+       ctx->ctx_rib_call = upcall;
        ctx->ctx_done = done;
        ctx->ctx_throttle = throttle;
 
@@ -468,14 +474,12 @@ rib_dump_new(u_int16_t id, u_int8_t aid,
 }
 
 void
-rib_dump_terminate(u_int16_t id, void *arg,
-    void (*upcall)(struct rib_entry *, void *))
+rib_dump_terminate(void *arg)
 {
        struct rib_context *ctx, *next;
 
        LIST_FOREACH_SAFE(ctx, &rib_dumps, entry, next) {
-               if (id != ctx->ctx_rib_id || ctx->ctx_arg != arg ||
-                   ctx->ctx_upcall != upcall)
+               if (ctx->ctx_arg != arg)
                        continue;
                if (ctx->ctx_done)
                        ctx->ctx_done(ctx->ctx_arg, ctx->ctx_aid);
@@ -610,15 +614,6 @@ path_update(struct rib *rib, struct rde_
                        p->validation_state = vstate;
                        return (2);
                }
-               if (p->flags) {
-                       struct prefix_tree *prefix_head;
-                       /* prefix is a pending update */
-                       prefix_head = p->flags & PREFIX_FLAG_UPDATE ?
-                           &peer->updates[prefix->aid] :
-                           &peer->withdraws[prefix->aid];
-                       RB_REMOVE(prefix_tree, prefix_head, p);
-                       p->flags = 0;
-               }
        }
 
        /*
@@ -881,7 +876,14 @@ prefix_cmp(struct prefix *a, struct pref
        return pt_prefix_cmp(a->pt, b->pt);
 }
 
-RB_GENERATE(prefix_tree, prefix, entry, prefix_cmp)
+static inline int
+prefix_index_cmp(struct prefix *a, struct prefix *b)
+{
+       return pt_prefix_cmp(a->pt, b->pt);
+}
+
+RB_GENERATE(prefix_tree, prefix, entry.tree.update, prefix_cmp)
+RB_GENERATE_STATIC(prefix_index, prefix, entry.tree.index, prefix_index_cmp)
 
 /*
  * search for specified prefix of a peer. Returns NULL if not found.
@@ -899,6 +901,52 @@ prefix_get(struct rib *rib, struct rde_p
 }
 
 /*
+ * lookup prefix in the peer prefix_index. Returns NULL if not found.
+ */
+struct prefix *
+prefix_lookup(struct rde_peer *peer, struct bgpd_addr *prefix,
+    int prefixlen)
+{
+       struct prefix xp;
+       struct pt_entry *pte;
+
+       memset(&xp, 0, sizeof(xp));
+       pte = pt_fill(prefix, prefixlen);
+       xp.pt = pte;
+
+       return RB_FIND(prefix_index, &peer->adj_rib_out, &xp);
+}
+
+struct prefix *
+prefix_match(struct rde_peer *peer, struct bgpd_addr *addr)
+{
+       struct prefix *p;
+       int i;
+
+       switch (addr->aid) {
+       case AID_INET:
+       case AID_VPN_IPv4:
+               for (i = 32; i >= 0; i--) {
+                       p = prefix_lookup(peer, addr, i);
+                       if (p != NULL)
+                               return p;
+               }
+               break;
+       case AID_INET6:
+       case AID_VPN_IPv6:
+               for (i = 128; i >= 0; i--) {
+                       p = prefix_lookup(peer, addr, i);
+                       if (p != NULL)
+                               return p;
+               }
+               break;
+       default:
+               fatalx("%s: unknown af", __func__);
+       }
+       return NULL;
+}
+
+/*
  * Adds or updates a prefix.
  */
 static int
@@ -936,8 +984,8 @@ prefix_move(struct prefix *p, struct rde
        np->aspath = path_ref(asp);
        np->communities = communities_ref(comm);
        np->peer = peer;
-       np->pt = p->pt; /* skip refcnt update since ref is moved */
        np->re = p->re;
+       np->pt = p->pt; /* skip refcnt update since ref is moved */
        np->validation_state = vstate;
        np->nhflags = nhflags;
        np->nexthop = nexthop_ref(nexthop);
@@ -957,7 +1005,7 @@ prefix_move(struct prefix *p, struct rde
         * This is safe because we create a new prefix and so the change
         * is noticed by prefix_evaluate().
         */
-       LIST_REMOVE(p, rib_l);
+       LIST_REMOVE(p, entry.list.rib);
        prefix_evaluate(np, np->re);
 
        /* remove old prefix node */
@@ -1020,26 +1068,97 @@ prefix_add_eor(struct rde_peer *peer, u_
        if (RB_INSERT(prefix_tree, &peer->updates[aid], p) != NULL)
                /* no need to add if EoR marker already present */
                prefix_free(p);
+       /* EOR marker is not inserted into the adj_rib_out index */
 }
 
 /*
  * Put a prefix from the Adj-RIB-Out onto the update queue.
  */
-void
-prefix_update(struct rib *rib, struct rde_peer *peer,
-    struct bgpd_addr *prefix, int prefixlen)
+int
+prefix_update(struct rde_peer *peer, struct filterstate *state,
+    struct bgpd_addr *prefix, int prefixlen, u_int8_t vstate)
 {
+       struct prefix_tree *prefix_head = NULL;
+       struct rde_aspath *asp;
+       struct rde_community *comm;
        struct prefix *p;
+       int created = 0;
 
-       p = prefix_get(rib, peer, prefix, prefixlen);
-       if (p == NULL)          /* Got a dummy withdrawn request. */
-               return;
+       if ((p = prefix_lookup(peer, prefix, prefixlen)) != NULL) {
+               /* prefix is already in the Adj-RIB-Out */
+               if (p->flags & PREFIX_FLAG_WITHDRAW) {
+                       created = 1;    /* consider this a new entry */
+                       peer->up_wcnt--;
+                       prefix_head = &peer->withdraws[prefix->aid];
+                       RB_REMOVE(prefix_tree, prefix_head, p);
+               } else if (p->flags & PREFIX_FLAG_DEAD) {
+                       created = 1;    /* consider this a new entry */
+               } else {
+                       if (prefix_nhflags(p) == state->nhflags &&
+                           prefix_nexthop(p) == state->nexthop &&
+                           communities_equal(&state->communities,
+                           prefix_communities(p)) &&
+                           path_compare(&state->aspath, prefix_aspath(p)) ==
+                           0) {
+                               /* nothing changed */
+                               p->validation_state = vstate;
+                               p->lastchange = time(NULL);
+                               return 0;
+                       }
+
+                       if (p->flags & PREFIX_FLAG_UPDATE) {
+                               /* created = 0 so up_nlricnt is not increased */
+                               prefix_head = &peer->updates[prefix->aid];
+                               RB_REMOVE(prefix_tree, prefix_head, p);
+                       }
+               }
+               /* unlink from aspath and remove nexthop ref */
+               nexthop_unref(p->nexthop);
+               communities_unref(p->communities);
+               path_unref(p->aspath);
+               p->flags &= ~PREFIX_FLAG_MASK;
+
+               /* peer and pt remain */
+       } else {
+               p = prefix_alloc();
+               created = 1;
+
+               p->pt = pt_get(prefix, prefixlen);
+               if (p->pt == NULL)
+                       fatalx("%s: update for non existing prefix", __func__);
+               pt_ref(p->pt);
+               p->peer = peer;
+
+               if (RB_INSERT(prefix_index, &peer->adj_rib_out, p) != NULL)
+                       fatalx("%s: RB index invariant violated", __func__);
+       }
+
+       if ((asp = path_lookup(&state->aspath)) == NULL) {
+               /* Path not available, create and link a new one. */
+               asp = path_copy(path_get(), &state->aspath);
+               path_link(asp);
+       }
+
+       if ((comm = communities_lookup(&state->communities)) == NULL) {
+               /* Communities not available, create and link a new one. */
+               comm = communities_link(&state->communities);
+       }
+
+       p->aspath = path_ref(asp);
+       p->communities = communities_ref(comm);
+       p->nexthop = nexthop_ref(state->nexthop);
+       p->nhflags = state->nhflags;
 
-       if (p->flags != 0)
+       p->validation_state = vstate;
+       p->lastchange = time(NULL);
+
+       if (p->flags & PREFIX_FLAG_MASK)
                fatalx("%s: bad flags %x", __func__, p->flags);
-       p->flags = PREFIX_FLAG_UPDATE;
+       p->flags |= PREFIX_FLAG_UPDATE;
        if (RB_INSERT(prefix_tree, &peer->updates[prefix->aid], p) != NULL)
                fatalx("%s: RB tree invariant violated", __func__);
+
+       return created;
 }
 
 /*
@@ -1047,15 +1166,19 @@ prefix_update(struct rib *rib, struct rd
  * the prefix in the RIB linked to the peer withdraw list.
  */
 int
-prefix_withdraw(struct rib *rib, struct rde_peer *peer,
-    struct bgpd_addr *prefix, int prefixlen)
+prefix_withdraw(struct rde_peer *peer, struct bgpd_addr *prefix, int prefixlen)
 {
        struct prefix           *p;
 
-       p = prefix_get(rib, peer, prefix, prefixlen);
+       p = prefix_lookup(peer, prefix, prefixlen);
        if (p == NULL)          /* Got a dummy withdrawn request. */
                return (0);
 
+       /* remove nexthop ref ... */
+       nexthop_unref(p->nexthop);
+       p->nexthop = NULL;
+       p->nhflags = 0;
+
        /* unlink from aspath ...*/
        path_unref(p->aspath);
        p->aspath = NULL;
@@ -1063,29 +1186,181 @@ prefix_withdraw(struct rib *rib, struct 
        /* ... communities ... */
        communities_unref(p->communities);
        p->communities = NULL;
+       /* and unlink from aspath */
+       path_unref(p->aspath);
+       p->aspath = NULL;
+       /* re already NULL */
 
-       /* ... and nexthop but keep the re link */
-       nexthop_unlink(p);
-       nexthop_unref(p->nexthop);
-       p->nexthop = NULL;
-       p->nhflags = 0;
-       /* re link still exists */
+       p->lastchange = time(NULL);
 
-       if (p->flags) {
+       if (p->flags & PREFIX_FLAG_MASK) {
                struct prefix_tree *prefix_head;
                /* p is a pending update or withdraw, remove first */
                prefix_head = p->flags & PREFIX_FLAG_UPDATE ?
                    &peer->updates[prefix->aid] :
                    &peer->withdraws[prefix->aid];
                RB_REMOVE(prefix_tree, prefix_head, p);
-               p->flags = 0;
+               p->flags &= ~PREFIX_FLAG_MASK;
        }
-       p->flags = PREFIX_FLAG_WITHDRAW;
+       p->flags |= PREFIX_FLAG_WITHDRAW;
        if (RB_INSERT(prefix_tree, &peer->withdraws[prefix->aid], p) != NULL)
                fatalx("%s: RB tree invariant violated", __func__);
        return (1);
 }
 
+static inline void
+prefix_lock(struct prefix *p)
+{
+       if (p->flags & PREFIX_FLAG_LOCKED)
+               fatalx("%s: locking locked prefix", __func__);
+       p->flags |= PREFIX_FLAG_LOCKED;
+}
+
+static inline void
+prefix_unlock(struct prefix *p)
+{
+       if ((p->flags & PREFIX_FLAG_LOCKED) == 0)
+               fatalx("%s: unlocking unlocked prefix", __func__);
+       p->flags &= ~PREFIX_FLAG_LOCKED;
+}
+
+static inline int
+prefix_is_locked(struct prefix *p)
+{
+       return (p->flags & PREFIX_FLAG_LOCKED) != 0;
+}
+
+static inline int
+prefix_is_dead(struct prefix *p)
+{
+       return (p->flags & PREFIX_FLAG_DEAD) != 0;
+}
+
+static struct prefix *
+prefix_restart(struct rib_context *ctx)
+{
+       struct prefix *p;
+
+       p = ctx->ctx_p;
+       prefix_unlock(p);
+
+       if (prefix_is_dead(p)) {
+               struct prefix *next;
+
+               next = RB_NEXT(prefix_index, unused, p);
+               prefix_adjout_destroy(p);
+               p = next;
+       }
+       return p;
+}
+
+void
+prefix_adjout_destroy(struct prefix *p)
+{
+       struct rde_peer *peer = prefix_peer(p);
+
+       if (p->eor) {
+               /* EOR marker is not linked in the index */
+               prefix_free(p);
+               return;
+       }
+
+       if (p->flags & PREFIX_FLAG_WITHDRAW)
+               RB_REMOVE(prefix_tree, &peer->withdraws[p->pt->aid], p);
+       else if (p->flags & PREFIX_FLAG_UPDATE)
+               RB_REMOVE(prefix_tree, &peer->updates[p->pt->aid], p);
+       /* nothing needs to be done for PREFIX_FLAG_DEAD */
+       p->flags &= ~PREFIX_FLAG_MASK;
+
+
+       if (prefix_is_locked(p)) {
+               /* remove nexthop ref ... */
+               nexthop_unref(p->nexthop);
+               p->nexthop = NULL;
+               /* ... communities ... */
+               communities_unref(p->communities);
+               p->communities = NULL;
+               /* and unlink from aspath */
+               path_unref(p->aspath);
+               p->aspath = NULL;
+               p->nhflags = 0;
+               /* re already NULL */
+
+               /* finally mark prefix dead */
+               p->flags |= PREFIX_FLAG_DEAD;
+               return;
+       }
+
+       RB_REMOVE(prefix_index, &peer->adj_rib_out, p);
+       
+       prefix_unlink(p);
+       prefix_free(p);
+}
+
+static void
+prefix_dump_r(struct rib_context *ctx)
+{
+       struct prefix *p, *next;
+       struct rde_peer *peer;
+       unsigned int i;
+
+       if ((peer = peer_get(ctx->ctx_id)) == NULL)
+               goto done;
+
+       if (ctx->ctx_p == NULL)
+               p = RB_MIN(prefix_index, &peer->adj_rib_out);
+       else
+               p = prefix_restart(ctx);
+
+       for (i = 0; p != NULL; p = next) {
+               next = RB_NEXT(prefix_index, unused, p);
+               if (prefix_is_dead(p))
+                       continue;
+               if (ctx->ctx_aid != AID_UNSPEC &&
+                   ctx->ctx_aid != p->pt->aid)
+                       continue;
+               if (ctx->ctx_count && i++ >= ctx->ctx_count &&
+                   !prefix_is_locked(p)) {
+                       /* store and lock last element */
+                       ctx->ctx_p = p;
+                       prefix_lock(p);
+                       return;
+               }
+               ctx->ctx_prefix_call(p, ctx->ctx_arg);
+       }
+
+done:
+       if (ctx->ctx_done)
+               ctx->ctx_done(ctx->ctx_arg, ctx->ctx_aid);
+       LIST_REMOVE(ctx, entry);
+       free(ctx);
+}
+
+int
+prefix_dump_new(struct rde_peer *peer, u_int8_t aid, unsigned int count,
+    void *arg, void (*upcall)(struct prefix *, void *),
+    void (*done)(void *, u_int8_t), int (*throttle)(void *))
+{
+       struct rib_context *ctx;
+
+       if ((ctx = calloc(1, sizeof(*ctx))) == NULL)
+               return -1;
+       ctx->ctx_id = peer->conf.id;
+       ctx->ctx_aid = aid;
+       ctx->ctx_count = count;
+       ctx->ctx_arg = arg;
+       ctx->ctx_prefix_call = upcall;
+       ctx->ctx_done = done;
+       ctx->ctx_throttle = throttle;
+
+       LIST_INSERT_HEAD(&rib_dumps, ctx, entry);
+
+       /* requested a sync traversal */
+       if (count == 0)
+               prefix_dump_r(ctx);
+
+       return 0;
+}
 
 /* dump a prefix into specified buffer */
 int
@@ -1205,7 +1480,7 @@ prefix_bypeer(struct rib_entry *re, stru
 {
        struct prefix   *p;
 
-       LIST_FOREACH(p, &re->prefix_h, rib_l)
+       LIST_FOREACH(p, &re->prefix_h, entry.list.rib)
                if (prefix_peer(p) == peer)
                        return (p);
        return (NULL);
@@ -1237,7 +1512,7 @@ prefix_updateall(struct prefix *p, enum 
        }
 
        /* redo the route decision */
-       LIST_REMOVE(p, rib_l);
+       LIST_REMOVE(p, entry.list.rib);
        /*
         * If the prefix is the active one remove it first,
         * this has to be done because we can not detect when
@@ -1255,6 +1530,10 @@ prefix_updateall(struct prefix *p, enum 
 void
 prefix_destroy(struct prefix *p)
 {
+       /* make route decision */
+       LIST_REMOVE(p, entry.list.rib);
+       prefix_evaluate(NULL, p->re);
+
        prefix_unlink(p);
        prefix_free(p);
 }
@@ -1290,13 +1569,6 @@ prefix_unlink(struct prefix *p)
 {
        struct rib_entry        *re = p->re;
 
-       if (p->eor)     /* nothing to unlink for EoR markers */
-               return;
-
-       /* make route decision */
-       LIST_REMOVE(p, rib_l);
-       prefix_evaluate(NULL, re);
-
        /* destroy all references to other objects */
        nexthop_unlink(p);
        nexthop_unref(p->nexthop);
@@ -1310,7 +1582,7 @@ prefix_unlink(struct prefix *p)
        p->re = NULL;
        p->pt = NULL;
 
-       if (rib_empty(re))
+       if (re && rib_empty(re))
                rib_remove(re);
 
        /*
@@ -1319,7 +1591,7 @@ prefix_unlink(struct prefix *p)
         */
 }
 
-/* alloc and bzero new entry. May not fail. */
+/* alloc and zero new entry. May not fail. */
 static struct prefix *
 prefix_alloc(void)
 {
@@ -1430,7 +1702,7 @@ nexthop_runner(void)
        p = nh->next_prefix;
        for (j = 0; p != NULL && j < RDE_RUNNER_ROUNDS; j++) {
                prefix_updateall(p, nh->state, nh->oldstate);
-               p = LIST_NEXT(p, nexthop_l);
+               p = LIST_NEXT(p, entry.list.nexthop);
        }
 
        /* prep for next run, if not finished readd to tail of queue */
@@ -1540,22 +1812,21 @@ nexthop_link(struct prefix *p)
        if (re_rib(p->re)->flags & F_RIB_NOEVALUATE)
                return;
 
-       LIST_INSERT_HEAD(&p->nexthop->prefix_h, p, nexthop_l);
+       p->flags |= PREFIX_NEXTHOP_LINKED;
+       LIST_INSERT_HEAD(&p->nexthop->prefix_h, p, entry.list.nexthop);
 }
 
 void
 nexthop_unlink(struct prefix *p)
 {
-       if (p->nexthop == NULL)
-               return;
-
-       if (re_rib(p->re)->flags & F_RIB_NOEVALUATE)
+       if (p->nexthop == NULL || (p->flags & PREFIX_NEXTHOP_LINKED) == 0)
                return;
 
        if (p == p->nexthop->next_prefix)
-               p->nexthop->next_prefix = LIST_NEXT(p, nexthop_l);
+               p->nexthop->next_prefix = LIST_NEXT(p, entry.list.nexthop);
 
-       LIST_REMOVE(p, nexthop_l);
+       p->flags &= ~PREFIX_NEXTHOP_LINKED;
+       LIST_REMOVE(p, entry.list.nexthop);
 }
 
 struct nexthop *
Index: rde_update.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde_update.c,v
retrieving revision 1.119
diff -u -p -r1.119 rde_update.c
--- rde_update.c        2 Jul 2019 12:07:00 -0000       1.119
+++ rde_update.c        4 Jul 2019 08:59:13 -0000
@@ -143,8 +143,7 @@ withdraw:
 
                /* withdraw prefix */
                pt_getaddr(old->pt, &addr);
-               if (prefix_withdraw(&ribs[RIB_ADJ_OUT].rib, peer, &addr,
-                   old->pt->prefixlen) == 1)
+               if (prefix_withdraw(peer, &addr, old->pt->prefixlen) == 1)
                        peer->up_wcnt++;
        } else {
                switch (up_test_update(peer, new)) {
@@ -165,13 +164,11 @@ withdraw:
                }
 
                pt_getaddr(new->pt, &addr);
-               if (path_update(&ribs[RIB_ADJ_OUT].rib, peer, &state, &addr,
-                   new->pt->prefixlen, prefix_vstate(new)) != 2) {
-                       /* only send update if path changed */
-                       prefix_update(&ribs[RIB_ADJ_OUT].rib, peer, &addr,
-                           new->pt->prefixlen);
+
+               /* only send update if path changed */
+               if (prefix_update(peer, &state, &addr, new->pt->prefixlen,
+                   prefix_vstate(new)) == 1)
                        peer->up_nlricnt++;
-               }
 
                rde_filterstate_clean(&state);
        }
@@ -229,11 +226,8 @@ up_generate_default(struct filter_head *
                return;
        }
 
-       if (path_update(&ribs[RIB_ADJ_OUT].rib, peer, &state, &addr, 0,
-           ROA_NOTFOUND) != 2) {
-               prefix_update(&ribs[RIB_ADJ_OUT].rib, peer, &addr, 0);
+       if (prefix_update(peer, &state, &addr, 0, ROA_NOTFOUND) == 1)
                peer->up_nlricnt++;
-       }
 
        /* no longer needed */
        rde_filterstate_clean(&state);
@@ -576,8 +570,13 @@ up_is_eor(struct rde_peer *peer, u_int8_
 
        p = RB_MIN(prefix_tree, &peer->updates[aid]);
        if (p != NULL && p->eor) {
+               /*
+                * Need to remove eor from update tree because
+                * prefix_adjout_destroy() can't handle that.
+                */
                RB_REMOVE(prefix_tree, &peer->updates[aid], p);
-               prefix_destroy(p);
+               p->flags &= ~PREFIX_FLAG_MASK;
+               prefix_adjout_destroy(p);
                return 1;
        }
        return 0;
@@ -616,11 +615,11 @@ up_dump_prefix(u_char *buf, int len, str
 
                /* prefix sent, remove from list and clear flag */
                RB_REMOVE(prefix_tree, prefix_head, p);
-               p->flags = 0;
+               p->flags &= ~PREFIX_FLAG_MASK;
 
                if (withdraw) {
                        /* prefix no longer needed, remove it */
-                       prefix_destroy(p);
+                       prefix_adjout_destroy(p);
                        peer->up_wcnt--;
                        peer->prefix_sent_withdraw++;
                } else {

Reply via email to