On Thu, Oct 25, 2018 at 08:51:30AM +0200, Claudio Jeker wrote:
> Next step on my quest to make the RIB code better.
> This changes the following things:
> - network_flush is now using rib_dump_new to walk the Adj-RIB-In and
>   remove all dynamically added announcements
> - peer_flush got generalized and is now used also in peer_down.
>   It also uses a rib_dump_new call to walk the Adj-RIB-In and remove
>   all prefixes from a peer but this is done synchronous for now.
> - peer_flush is now working correctly for AID_UNSPEC.
> - Change the rib_valid check to continue instead of break out of the loop.
>   The rib table can have holes so the loop needs to continue.
> 
> This removes the last three places that use the peer -> path -> prefix
> tailqs so the next step is to remove these and make rde_aspath just an
> object that is referenced by prefixes. After that adding a proper
> Adj-RIB-Out should be possible.
> 
> Running with this on production :)

I will commit this on Monday unless someone objects or I get some OKs :)

-- 
:wq Claudio

Index: rde.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde.c,v
retrieving revision 1.440
diff -u -p -r1.440 rde.c
--- rde.c       24 Oct 2018 08:26:37 -0000      1.440
+++ rde.c       25 Oct 2018 06:34:32 -0000
@@ -97,7 +97,7 @@ struct rde_peer       *peer_add(u_int32_t, str
 struct rde_peer        *peer_get(u_int32_t);
 void            peer_up(u_int32_t, struct session_up *);
 void            peer_down(u_int32_t);
-void            peer_flush(struct rde_peer *, u_int8_t);
+void            peer_flush(struct rde_peer *, u_int8_t, time_t);
 void            peer_stale(u_int32_t, u_int8_t);
 void            peer_dump(u_int32_t, u_int8_t);
 static void     peer_recv_eor(struct rde_peer *, u_int8_t);
@@ -105,7 +105,8 @@ static void  peer_send_eor(struct rde_pe
 
 void            network_add(struct network_config *, int);
 void            network_delete(struct network_config *, int);
-void            network_dump_upcall(struct rib_entry *, void *);
+static void     network_dump_upcall(struct rib_entry *, void *);
+static void     network_flush_upcall(struct rib_entry *, void *);
 
 void            rde_shutdown(void);
 int             sa_cmp(struct bgpd_addr *, struct sockaddr *);
@@ -418,7 +419,7 @@ rde_dispatch_imsg_session(struct imsgbuf
                                    imsg.hdr.peerid);
                                break;
                        }
-                       peer_flush(peer, aid);
+                       peer_flush(peer, aid, peer->staletime[aid]);
                        break;
                case IMSG_SESSION_RESTARTED:
                        if (imsg.hdr.len - IMSG_HEADER_SIZE != sizeof(aid)) {
@@ -434,7 +435,7 @@ rde_dispatch_imsg_session(struct imsgbuf
                                break;
                        }
                        if (peer->staletime[aid])
-                               peer_flush(peer, aid);
+                               peer_flush(peer, aid, peer->staletime[aid]);
                        break;
                case IMSG_REFRESH:
                        if (imsg.hdr.len - IMSG_HEADER_SIZE != sizeof(aid)) {
@@ -556,7 +557,10 @@ badnetdel:
                                log_warnx("rde_dispatch: wrong imsg len");
                                break;
                        }
-                       prefix_network_clean(peerself);
+                       if (rib_dump_new(RIB_ADJ_IN, AID_UNSPEC,
+                           RDE_RUNNER_ROUNDS, peerself, network_flush_upcall,
+                           NULL, NULL) == -1)
+                               log_warn("rde_dispatch: IMSG_NETWORK_FLUSH");
                        break;
                case IMSG_FILTER_SET:
                        if (imsg.hdr.len - IMSG_HEADER_SIZE !=
@@ -770,7 +774,7 @@ rde_dispatch_imsg_parent(struct imsgbuf 
                        memcpy(nconf, imsg.data, sizeof(struct bgpd_config));
                        for (rid = 0; rid < rib_size; rid++) {
                                if (!rib_valid(rid))
-                                       break;
+                                       continue;
                                ribs[rid].state = RECONF_DELETE;
                        }
                        SIMPLEQ_INIT(&nconf->rde_prefixsets);
@@ -1411,7 +1415,7 @@ rde_update_update(struct rde_peer *peer,
 
        for (i = RIB_LOC_START; i < rib_size; i++) {
                if (!rib_valid(i))
-                       break;
+                       continue;
                rde_filterstate_prep(&state, &in->aspath, in->nexthop,
                    in->nhflags);
                /* input filter */
@@ -1443,7 +1447,7 @@ rde_update_withdraw(struct rde_peer *pee
 
        for (i = RIB_LOC_START; i < rib_size; i++) {
                if (!rib_valid(i))
-                       break;
+                       continue;
                if (prefix_remove(&ribs[i].rib, peer, prefix, prefixlen))
                        rde_update_log("withdraw", i, peer, NULL, prefix,
                            prefixlen);
@@ -2957,11 +2961,10 @@ rde_reload_done(void)
 static void
 rde_softreconfig_in_done(void *arg, u_int8_t aid)
 {
-       struct rib_desc         *rd = arg;
-       struct rde_peer         *peer;
-       u_int16_t                rid;
+       struct rde_peer *peer;
+       u_int16_t        rid;
 
-       if (rd != NULL) {
+       if (arg != NULL) {
                softreconfig--;
                /* one guy done but other dumps are still running */
                if (softreconfig > 0)
@@ -3372,10 +3375,7 @@ peer_up(u_int32_t id, struct session_up 
                 * There is a race condition when doing PEER_ERR -> PEER_DOWN.
                 * So just do a full reset of the peer here.
                 */
-               for (i = 0; i < AID_MAX; i++) {
-                       peer->staletime[i] = 0;
-                       peer_flush(peer, i);
-               }
+               peer_flush(peer, AID_UNSPEC, 0);
                up_down(peer);
                peer->prefix_cnt = 0;
                peer->state = PEER_DOWN;
@@ -3412,7 +3412,6 @@ void
 peer_down(u_int32_t id)
 {
        struct rde_peer         *peer;
-       struct rde_aspath       *asp, *nasp;
 
        peer = peer_get(id);
        if (peer == NULL) {
@@ -3425,49 +3424,85 @@ peer_down(u_int32_t id)
        /* stop all pending dumps which may depend on this peer */
        rib_dump_terminate(peer->loc_rib_id, peer, rde_up_dump_upcall);
 
-       /* walk through per peer RIB list and remove all prefixes. */
-       for (asp = TAILQ_FIRST(&peer->path_h); asp != NULL; asp = nasp) {
-               nasp = TAILQ_NEXT(asp, peer_l);
-               path_remove(asp);
-       }
-       TAILQ_INIT(&peer->path_h);
-       peer->prefix_cnt = 0;
+       peer_flush(peer, AID_UNSPEC, 0);
 
-       /* Deletions are performed in path_remove() */
-       rde_send_pftable_commit();
+       peer->prefix_cnt = 0;
 
        LIST_REMOVE(peer, hash_l);
        LIST_REMOVE(peer, peer_l);
        free(peer);
 }
 
+struct peer_flush {
+       struct rde_peer *peer;
+       time_t           staletime;
+};
+
+static void
+peer_flush_upcall(struct rib_entry *re, void *arg)
+{
+       struct rde_peer *peer = ((struct peer_flush *)arg)->peer;
+       struct rde_aspath *asp;
+       struct bgpd_addr addr;
+       struct prefix *p, *np, *rp;
+       time_t staletime = ((struct peer_flush *)arg)->staletime;
+       u_int32_t i;
+       u_int8_t prefixlen;
+       
+       pt_getaddr(re->prefix, &addr);
+       prefixlen = re->prefix->prefixlen;
+       LIST_FOREACH_SAFE(p, &re->prefix_h, rib_l, np) {
+               if (peer != prefix_peer(p))
+                       continue;
+               if (staletime && p->lastchange > staletime)
+                       continue;
+
+               for (i = RIB_LOC_START; i < rib_size; i++) {
+                       if (!rib_valid(i))
+                               continue;
+                       rp = prefix_get(&ribs[i].rib, peer, &addr, prefixlen);
+                       if (rp) {
+                               asp = prefix_aspath(rp);
+                               if (asp->pftableid)
+                                       rde_send_pftable(asp->pftableid, &addr,
+                                           prefixlen, 1);
+
+                               prefix_destroy(rp);
+                               rde_update_log("flush", i, peer, NULL,
+                                   &addr, prefixlen);
+                       }
+               }
+
+               prefix_destroy(p);
+               peer->prefix_cnt--;
+       }
+}
+
 /*
  * Flush all routes older then staletime. If staletime is 0 all routes will
  * be flushed.
  */
 void
-peer_flush(struct rde_peer *peer, u_int8_t aid)
+peer_flush(struct rde_peer *peer, u_int8_t aid, time_t staletime)
 {
-       struct rde_aspath       *asp, *nasp;
-       u_int32_t                rprefixes;
+       struct peer_flush pf = { peer, staletime };
 
-       rprefixes = 0;
-       /* walk through per peer RIB list and remove all stale prefixes. */
-       for (asp = TAILQ_FIRST(&peer->path_h); asp != NULL; asp = nasp) {
-               nasp = TAILQ_NEXT(asp, peer_l);
-               rprefixes += path_remove_stale(asp, aid, peer->staletime[aid]);
-       }
+       /* this dump must run synchronous, too much depends on that right now */
+       if (rib_dump_new(RIB_ADJ_IN, aid, 0, &pf, peer_flush_upcall,
+           NULL, NULL) == -1)
+               fatal("%s: rib_dump_new", __func__);
 
        /* Deletions are performed in path_remove() */
        rde_send_pftable_commit();
 
        /* flushed no need to keep staletime */
-       peer->staletime[aid] = 0;
-
-       if (peer->prefix_cnt > rprefixes)
-               peer->prefix_cnt -= rprefixes;
-       else
-               peer->prefix_cnt = 0;
+       if (aid == AID_UNSPEC) {
+               u_int8_t i;
+               for (i = 0; i < AID_MAX; i++)
+                       peer->staletime[i] = 0;
+       } else {
+               peer->staletime[aid] = 0;
+       }
 }
 
 void
@@ -3484,7 +3519,7 @@ peer_stale(u_int32_t id, u_int8_t aid)
 
        /* flush the now even staler routes out */
        if (peer->staletime[aid])
-               peer_flush(peer, aid);
+               peer_flush(peer, aid, peer->staletime[aid]);
        peer->staletime[aid] = now = time(NULL);
 
        /* make sure new prefixes start on a higher timestamp */
@@ -3661,7 +3696,7 @@ network_add(struct network_config *nc, i
                peerself->prefix_cnt++;
        for (i = RIB_LOC_START; i < rib_size; i++) {
                if (!rib_valid(i))
-                       break;
+                       continue;
                rde_update_log("announce", i, peerself,
                    state.nexthop ? &state.nexthop->exit_nexthop : NULL,
                    &nc->prefix, nc->prefixlen);
@@ -3713,19 +3748,18 @@ network_delete(struct network_config *nc
 
        for (i = RIB_LOC_START; i < rib_size; i++) {
                if (!rib_valid(i))
-                       break;
+                       continue;
                if (prefix_remove(&ribs[i].rib, peerself, &nc->prefix,
                    nc->prefixlen))
                        rde_update_log("withdraw announce", i, peerself,
                            NULL, &nc->prefix, nc->prefixlen);
-
        }
        if (prefix_remove(&ribs[RIB_ADJ_IN].rib, peerself, &nc->prefix,
            nc->prefixlen))
                peerself->prefix_cnt--;
 }
 
-void
+static void
 network_dump_upcall(struct rib_entry *re, void *ptr)
 {
        struct prefix           *p;
@@ -3759,6 +3793,41 @@ network_dump_upcall(struct rib_entry *re
        }
 }
 
+static void
+network_flush_upcall(struct rib_entry *re, void *ptr)
+{
+       struct rde_peer *peer = ptr;
+       struct rde_aspath *asp;
+       struct bgpd_addr addr;
+       struct prefix *p, *np, *rp;
+       u_int32_t i;
+       u_int8_t prefixlen;
+       
+       pt_getaddr(re->prefix, &addr);
+       prefixlen = re->prefix->prefixlen;
+       LIST_FOREACH_SAFE(p, &re->prefix_h, rib_l, np) {
+               if (prefix_peer(p) != peer)
+                       continue;
+               asp = prefix_aspath(p);
+               if ((asp->flags & F_ANN_DYNAMIC) != F_ANN_DYNAMIC)
+                       continue;
+
+               for (i = RIB_LOC_START; i < rib_size; i++) {
+                       if (!rib_valid(i))
+                               continue;
+                       rp = prefix_get(&ribs[i].rib, peer, &addr, prefixlen);
+                       if (rp) {
+                               prefix_destroy(rp);
+                               rde_update_log("flush announce", i, peer,
+                                   NULL, &addr, prefixlen);
+                       }
+               }
+
+               prefix_destroy(p);
+               peer->prefix_cnt--;
+       }
+}
+
 /* clean up */
 void
 rde_shutdown(void)
@@ -3782,7 +3851,7 @@ rde_shutdown(void)
        filterlist_free(out_rules);
        for (i = 0; i < rib_size; i++) {
                if (!rib_valid(i))
-                       break;
+                       continue;
                filterlist_free(ribs[i].in_rules);
        }
 
Index: rde.h
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde.h,v
retrieving revision 1.198
diff -u -p -r1.198 rde.h
--- rde.h       24 Oct 2018 08:26:37 -0000      1.198
+++ rde.h       24 Oct 2018 21:27:25 -0000
@@ -477,7 +477,6 @@ void                 path_hash_stats(struct rde_hashst
 int             path_update(struct rib *, struct rde_peer *,
                     struct filterstate *, struct bgpd_addr *, int, u_int8_t);
 int             path_compare(struct rde_aspath *, struct rde_aspath *);
-void            path_remove(struct rde_aspath *);
 u_int32_t       path_remove_stale(struct rde_aspath *, u_int8_t, time_t);
 void            path_destroy(struct rde_aspath *);
 int             path_empty(struct rde_aspath *);
@@ -498,7 +497,6 @@ struct prefix       *prefix_bypeer(struct rib_
 void            prefix_updateall(struct prefix *, enum nexthop_state,
                     enum nexthop_state);
 void            prefix_destroy(struct prefix *);
-void            prefix_network_clean(struct rde_peer *);
 void            prefix_relink(struct prefix *, struct rde_aspath *, int);
 
 static inline struct rde_peer *
Index: rde_rib.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde_rib.c,v
retrieving revision 1.180
diff -u -p -r1.180 rde_rib.c
--- rde_rib.c   24 Oct 2018 08:26:37 -0000      1.180
+++ rde_rib.c   24 Oct 2018 08:44:54 -0000
@@ -339,8 +339,8 @@ rib_restart(struct rib_context *ctx)
 static void
 rib_dump_r(struct rib_context *ctx)
 {
+       struct rib_entry        *re, *next;
        struct rib              *rib;
-       struct rib_entry        *re;
        unsigned int             i;
 
        rib = rib_byid(ctx->ctx_rib_id);
@@ -352,7 +352,8 @@ rib_dump_r(struct rib_context *ctx)
        else
                re = rib_restart(ctx);
 
-       for (i = 0; re != NULL; re = RB_NEXT(rib_tree, unused, re)) {
+       for (i = 0; re != NULL; re = next) {
+               next = RB_NEXT(rib_tree, unused, re);
                if (re->rib_id != ctx->ctx_rib_id)
                        fatalx("%s: Unexpected RIB %u != %u.", __func__,
                            re->rib_id, ctx->ctx_rib_id);
@@ -435,6 +436,10 @@ rib_dump_new(u_int16_t id, u_int8_t aid,
 
        LIST_INSERT_HEAD(&rib_dumps, ctx, entry);
 
+       /* requested a sync traversal */
+       if (count == 0)
+               rib_dump_r(ctx);
+
        return 0;
 }
 
@@ -664,65 +669,6 @@ path_lookup(struct rde_aspath *aspath, s
        return (NULL);
 }
 
-void
-path_remove(struct rde_aspath *asp)
-{
-       struct prefix   *p, *np;
-
-       for (p = TAILQ_FIRST(&asp->prefixes); p != NULL; p = np) {
-               np = TAILQ_NEXT(p, path_l);
-               if (asp->pftableid) {
-                       struct bgpd_addr addr;
-
-                       pt_getaddr(p->re->prefix, &addr);
-                       /* Commit is done in peer_down() */
-                       rde_send_pftable(prefix_aspath(p)->pftableid, &addr,
-                           p->re->prefix->prefixlen, 1);
-               }
-               prefix_destroy(p);
-       }
-}
-
-/* remove all stale routes or if staletime is 0 remove all routes for
-   a specified AID. */
-u_int32_t
-path_remove_stale(struct rde_aspath *asp, u_int8_t aid, time_t staletime)
-{
-       struct prefix   *p, *np;
-       u_int32_t        rprefixes;
-
-       rprefixes=0;
-       /*
-        * This is called when a session flapped and during that time
-        * the pending updates for that peer are getting reset.
-        */
-       for (p = TAILQ_FIRST(&asp->prefixes); p != NULL; p = np) {
-               np = TAILQ_NEXT(p, path_l);
-               if (p->re->prefix->aid != aid)
-                       continue;
-
-               if (staletime && p->lastchange > staletime)
-                       continue;
-
-               if (asp->pftableid) {
-                       struct bgpd_addr addr;
-
-                       pt_getaddr(p->re->prefix, &addr);
-                       /* Commit is done in peer_flush() */
-                       rde_send_pftable(prefix_aspath(p)->pftableid, &addr,
-                           p->re->prefix->prefixlen, 1);
-               }
-
-               /* only count Adj-RIB-In */
-               if (re_rib(p->re) == &ribs[RIB_ADJ_IN].rib)
-                       rprefixes++;
-
-               prefix_destroy(p);
-       }
-       return (rprefixes);
-}
-
-
 /*
  * This function can only called when all prefix have been removed first.
  * Normally this happens directly out of the prefix removal functions.
@@ -1144,29 +1090,6 @@ prefix_destroy(struct prefix *p)
 
        if (path_empty(asp))
                path_destroy(asp);
-}
-
-/*
- * helper function to clean up the dynamically added networks
- */
-void
-prefix_network_clean(struct rde_peer *peer)
-{
-       struct rde_aspath       *asp, *xasp;
-       struct prefix           *p, *xp;
-
-       for (asp = TAILQ_FIRST(&peer->path_h); asp != NULL; asp = xasp) {
-               xasp = TAILQ_NEXT(asp, peer_l);
-               if ((asp->flags & F_ANN_DYNAMIC) != F_ANN_DYNAMIC)
-                       continue;
-               for (p = TAILQ_FIRST(&asp->prefixes); p != NULL; p = xp) {
-                       xp = TAILQ_NEXT(p, path_l);
-                       prefix_unlink(p);
-                       prefix_free(p);
-               }
-               if (path_empty(asp))
-                       path_destroy(asp);
-       }
 }
 
 /*

Reply via email to