On my big IXP RS test setup with 600 peers the rde_filter call in the
output path is rather expensive. By splitting the output ruleset into
output rulesets per peer the performance is improved by around 5min on a
20min runtime.
So when a peer is added or on config reloads the output ruleset is
compiled into a per peer output ruleset. The filter code will still
recheck these bits (from/to peer) but will no longer fail on those.
With 600 peers even skipsteps touches too many output rules and so this
results in a big boost.
--
:wq Claudio
Index: bgpd.h
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/bgpd.h,v
retrieving revision 1.462
diff -u -p -r1.462 bgpd.h
--- bgpd.h 9 Mar 2023 13:12:19 -0000 1.462
+++ bgpd.h 9 Mar 2023 14:11:27 -0000
@@ -1110,11 +1110,10 @@ struct filter_rule {
struct filter_peers peer;
struct filter_match match;
struct filter_set_head set;
-#define RDE_FILTER_SKIP_DIR 0
+#define RDE_FILTER_SKIP_PEERID 0
#define RDE_FILTER_SKIP_GROUPID 1
#define RDE_FILTER_SKIP_REMOTE_AS 2
-#define RDE_FILTER_SKIP_PEERID 3
-#define RDE_FILTER_SKIP_COUNT 4
+#define RDE_FILTER_SKIP_COUNT 3
struct filter_rule *skip[RDE_FILTER_SKIP_COUNT];
enum filter_actions action;
enum directions dir;
Index: rde.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde.c,v
retrieving revision 1.594
diff -u -p -r1.594 rde.c
--- rde.c 9 Mar 2023 13:12:19 -0000 1.594
+++ rde.c 9 Mar 2023 14:11:27 -0000
@@ -198,17 +198,16 @@ rde_main(int debug, int verbose)
imsg_init(ibuf_main, 3);
/* initialize the RIB structures */
+ if ((out_rules = calloc(1, sizeof(struct filter_head))) == NULL)
+ fatal(NULL);
+ TAILQ_INIT(out_rules);
+
pt_init();
- peer_init();
+ peer_init(out_rules);
/* make sure the default RIBs are setup */
rib_new("Adj-RIB-In", 0, F_RIB_NOFIB | F_RIB_NOEVALUATE);
- out_rules = calloc(1, sizeof(struct filter_head));
- if (out_rules == NULL)
- fatal(NULL);
- TAILQ_INIT(out_rules);
-
conf = new_config();
log_info("route decision engine ready");
@@ -396,7 +395,7 @@ rde_dispatch_imsg_session(struct imsgbuf
if (imsg.hdr.len - IMSG_HEADER_SIZE !=
sizeof(struct peer_config))
fatalx("incorrect size of session request");
- peer = peer_add(imsg.hdr.peerid, imsg.data);
+ peer = peer_add(imsg.hdr.peerid, imsg.data, out_rules);
/* make sure rde_eval_all is on if needed. */
if (peer->conf.flags & PEERFLAG_EVALUATE_ALL)
rde_eval_all = 1;
@@ -896,6 +895,7 @@ rde_dispatch_imsg_parent(struct imsgbuf
if ((rib = rib_byid(rib_find(r->rib))) == NULL) {
log_warnx("IMSG_RECONF_FILTER: filter rule "
"for nonexistent rib %s", r->rib);
+ filterset_free(&r->set);
free(r);
break;
}
@@ -911,8 +911,9 @@ rde_dispatch_imsg_parent(struct imsgbuf
rib->in_rules_tmp = nr;
}
TAILQ_INSERT_TAIL(nr, r, entry);
- } else
+ } else {
TAILQ_INSERT_TAIL(out_rules_tmp, r, entry);
+ }
break;
case IMSG_RECONF_PREFIX_SET:
case IMSG_RECONF_ORIGIN_SET:
@@ -3559,20 +3560,15 @@ rde_reload_done(void)
rde_mark_prefixsets_dirty(&originsets_old, &conf->rde_originsets);
as_sets_mark_dirty(&as_sets_old, &conf->as_sets);
- /*
- * make the new filter rules the active one but keep the old for
- * softrconfig. This is needed so that changes happening are using
- * the right filters.
- */
- fh = out_rules;
- out_rules = out_rules_tmp;
- out_rules_tmp = fh;
-
- rde_filter_calc_skip_steps(out_rules);
/* make sure that rde_eval_all is correctly set after a config change */
rde_eval_all = 0;
+ /* Make the new outbound filter rules the active one. */
+ filterlist_free(out_rules);
+ out_rules = out_rules_tmp;
+ out_rules_tmp = NULL;
+
/* check if filter changed */
RB_FOREACH(peer, peer_tree, &peertable) {
if (peer->conf.id == 0) /* ignore peerself */
@@ -3641,12 +3637,17 @@ rde_reload_done(void)
softreconfig++; /* account for the running flush */
continue;
}
- if (!rde_filter_equal(out_rules, out_rules_tmp, peer)) {
+
+ /* reapply outbound filters for this peer */
+ fh = peer_apply_out_filter(peer, out_rules);
+
+ if (!rde_filter_equal(peer->out_rules, fh)) {
char *p = log_fmt_peer(&peer->conf);
log_debug("out filter change: reloading peer %s", p);
free(p);
peer->reconf_out = 1;
}
+ filterlist_free(fh);
}
/* bring ribs in sync */
@@ -3696,8 +3697,7 @@ rde_reload_done(void)
rib->state = RECONF_KEEP;
/* FALLTHROUGH */
case RECONF_KEEP:
- if (rde_filter_equal(rib->in_rules,
- rib->in_rules_tmp, NULL))
+ if (rde_filter_equal(rib->in_rules, rib->in_rules_tmp))
/* rib is in sync */
break;
log_debug("in filter change: reloading RIB %s",
@@ -3717,8 +3717,6 @@ rde_reload_done(void)
rib->in_rules_tmp = NULL;
}
- filterlist_free(out_rules_tmp);
- out_rules_tmp = NULL;
/* old filters removed, free all sets */
free_rde_prefixsets(&prefixsets_old);
free_rde_prefixsets(&originsets_old);
@@ -3789,8 +3787,7 @@ rde_softreconfig_in_done(void *arg, uint
/* just resend the default route */
for (aid = 0; aid < AID_MAX; aid++) {
if (peer->capa.mp[aid])
- up_generate_default(out_rules,
- peer, aid);
+ up_generate_default(peer, aid);
}
peer->reconf_out = 0;
} else
Index: rde.h
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde.h,v
retrieving revision 1.284
diff -u -p -r1.284 rde.h
--- rde.h 9 Mar 2023 13:12:19 -0000 1.284
+++ rde.h 9 Mar 2023 14:11:27 -0000
@@ -88,6 +88,7 @@ struct rde_peer {
struct prefix_index adj_rib_out;
struct prefix_tree updates[AID_MAX];
struct prefix_tree withdraws[AID_MAX];
+ struct filter_head *out_rules;
time_t staletime[AID_MAX];
uint32_t remote_bgpid; /* host byte order! */
uint32_t path_id_tx;
@@ -401,12 +402,14 @@ int rde_match_peer(struct rde_peer *, s
int peer_has_as4byte(struct rde_peer *);
int peer_has_add_path(struct rde_peer *, uint8_t, int);
int peer_accept_no_as_set(struct rde_peer *);
-void peer_init(void);
+void peer_init(struct filter_head *);
void peer_shutdown(void);
void peer_foreach(void (*)(struct rde_peer *, void *), void *);
struct rde_peer *peer_get(uint32_t);
struct rde_peer *peer_match(struct ctl_neighbor *, uint32_t);
-struct rde_peer *peer_add(uint32_t, struct peer_config *);
+struct rde_peer *peer_add(uint32_t, struct peer_config *, struct
filter_head *);
+struct filter_head *peer_apply_out_filter(struct rde_peer *,
+ struct filter_head *);
void rde_generate_updates(struct rib_entry *, struct prefix *,
struct prefix *, enum eval_mode);
@@ -532,14 +535,14 @@ void prefix_evaluate_nexthop(struct pr
/* rde_filter.c */
void rde_apply_set(struct filter_set_head *, struct rde_peer *,
- struct rde_peer *, struct filterstate *, uint8_t);
+ struct rde_peer *, struct filterstate *, u_int8_t);
void rde_filterstate_init(struct filterstate *);
void rde_filterstate_prep(struct filterstate *, struct prefix *);
void rde_filterstate_copy(struct filterstate *, struct filterstate *);
void rde_filterstate_set_vstate(struct filterstate *, uint8_t, uint8_t);
void rde_filterstate_clean(struct filterstate *);
-int rde_filter_equal(struct filter_head *, struct filter_head *,
- struct rde_peer *);
+int rde_filter_skip_rule(struct rde_peer *, struct filter_rule *);
+int rde_filter_equal(struct filter_head *, struct filter_head *);
void rde_filter_calc_skip_steps(struct filter_head *);
enum filter_actions rde_filter(struct filter_head *, struct rde_peer *,
struct rde_peer *, struct bgpd_addr *, uint8_t,
@@ -727,15 +730,11 @@ int nexthop_compare(struct nexthop *,
/* rde_update.c */
void up_init(struct rde_peer *);
-void up_generate_updates(struct filter_head *, struct rde_peer *,
- struct rib_entry *);
-void up_generate_addpath(struct filter_head *, struct rde_peer *,
- struct rib_entry *);
-void up_generate_addpath_all(struct filter_head *,
- struct rde_peer *, struct rib_entry *, struct prefix *,
- struct prefix *);
-void up_generate_default(struct filter_head *, struct rde_peer *,
- uint8_t);
+void up_generate_updates(struct rde_peer *, struct rib_entry *);
+void up_generate_addpath(struct rde_peer *, struct rib_entry *);
+void up_generate_addpath_all(struct rde_peer *, struct rib_entry *,
+ struct prefix *, struct prefix *);
+void up_generate_default(struct rde_peer *, uint8_t);
int up_is_eor(struct rde_peer *, uint8_t);
int up_dump_withdraws(u_char *, int, struct rde_peer *, uint8_t);
int up_dump_mp_unreach(u_char *, int, struct rde_peer *, uint8_t);
Index: rde_filter.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde_filter.c,v
retrieving revision 1.133
diff -u -p -r1.133 rde_filter.c
--- rde_filter.c 24 Jan 2023 14:13:12 -0000 1.133
+++ rde_filter.c 3 Mar 2023 11:05:18 -0000
@@ -319,7 +319,7 @@ rde_filter_match(struct filter_rule *f,
}
/* return true when the rule f can never match for this peer */
-static int
+int
rde_filter_skip_rule(struct rde_peer *peer, struct filter_rule *f)
{
/* if any of the two is unset then rule can't be skipped */
@@ -350,8 +350,7 @@ rde_filter_skip_rule(struct rde_peer *pe
}
int
-rde_filter_equal(struct filter_head *a, struct filter_head *b,
- struct rde_peer *peer)
+rde_filter_equal(struct filter_head *a, struct filter_head *b)
{
struct filter_rule *fa, *fb;
struct rde_prefixset *psa, *psb, *osa, *osb;
@@ -362,16 +361,6 @@ rde_filter_equal(struct filter_head *a,
fb = b ? TAILQ_FIRST(b) : NULL;
while (fa != NULL || fb != NULL) {
- /* skip all rules with wrong peer */
- if (rde_filter_skip_rule(peer, fa)) {
- fa = TAILQ_NEXT(fa, entry);
- continue;
- }
- if (rde_filter_skip_rule(peer, fb)) {
- fb = TAILQ_NEXT(fb, entry);
- continue;
- }
-
/* compare the two rules */
if ((fa == NULL && fb != NULL) || (fa != NULL && fb == NULL))
/* new rule added or removed */
@@ -805,12 +794,12 @@ rde_filter_calc_skip_steps(struct filter
for (i = 0; i < RDE_FILTER_SKIP_COUNT; ++i)
head[i] = cur;
while (cur != NULL) {
+ if (cur->peer.peerid != prev->peer.peerid)
+ RDE_FILTER_SET_SKIP_STEPS(RDE_FILTER_SKIP_PEERID);
if (cur->peer.groupid != prev->peer.groupid)
RDE_FILTER_SET_SKIP_STEPS(RDE_FILTER_SKIP_GROUPID);
if (cur->peer.remote_as != prev->peer.remote_as)
RDE_FILTER_SET_SKIP_STEPS(RDE_FILTER_SKIP_REMOTE_AS);
- if (cur->peer.peerid != prev->peer.peerid)
- RDE_FILTER_SET_SKIP_STEPS(RDE_FILTER_SKIP_PEERID);
prev = cur;
cur = TAILQ_NEXT(cur, entry);
}
@@ -848,6 +837,10 @@ rde_filter(struct filter_head *rules, st
f = TAILQ_FIRST(rules);
while (f != NULL) {
RDE_FILTER_TEST_ATTRIB(
+ (f->peer.peerid &&
+ f->peer.peerid != peer->conf.id),
+ f->skip[RDE_FILTER_SKIP_PEERID]);
+ RDE_FILTER_TEST_ATTRIB(
(f->peer.groupid &&
f->peer.groupid != peer->conf.groupid),
f->skip[RDE_FILTER_SKIP_GROUPID]);
@@ -855,10 +848,6 @@ rde_filter(struct filter_head *rules, st
(f->peer.remote_as &&
f->peer.remote_as != peer->conf.remote_as),
f->skip[RDE_FILTER_SKIP_REMOTE_AS]);
- RDE_FILTER_TEST_ATTRIB(
- (f->peer.peerid &&
- f->peer.peerid != peer->conf.id),
- f->skip[RDE_FILTER_SKIP_PEERID]);
if (rde_filter_match(f, peer, from, state, prefix, plen)) {
rde_apply_set(&f->set, peer, from, state, prefix->aid);
Index: rde_peer.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde_peer.c,v
retrieving revision 1.30
diff -u -p -r1.30 rde_peer.c
--- rde_peer.c 9 Mar 2023 13:12:19 -0000 1.30
+++ rde_peer.c 9 Mar 2023 14:12:46 -0000
@@ -38,8 +38,6 @@ struct iq {
struct imsg imsg;
};
-extern struct filter_head *out_rules;
-
int
peer_has_as4byte(struct rde_peer *peer)
{
@@ -68,7 +66,7 @@ peer_accept_no_as_set(struct rde_peer *p
}
void
-peer_init(void)
+peer_init(struct filter_head *rules)
{
struct peer_config pc;
@@ -78,7 +76,7 @@ peer_init(void)
snprintf(pc.descr, sizeof(pc.descr), "LOCAL");
pc.id = PEER_ID_SELF;
- peerself = peer_add(PEER_ID_SELF, &pc);
+ peerself = peer_add(PEER_ID_SELF, &pc, rules);
peerself->state = PEER_UP;
}
@@ -132,13 +130,13 @@ peer_match(struct ctl_neighbor *n, uint3
for (; peer != NULL; peer = RB_NEXT(peer_tree, &peertable, peer)) {
if (rde_match_peer(peer, n))
- return (peer);
+ return peer;
}
- return (NULL);
+ return NULL;
}
struct rde_peer *
-peer_add(uint32_t id, struct peer_config *p_conf)
+peer_add(uint32_t id, struct peer_config *p_conf, struct filter_head *rules)
{
struct rde_peer *peer;
int conflict;
@@ -164,6 +162,8 @@ peer_add(uint32_t id, struct peer_config
peer->flags = peer->conf.flags;
SIMPLEQ_INIT(&peer->imsg_queue);
+ peer_apply_out_filter(peer, rules);
+
/*
* Assign an even random unique transmit path id.
* Odd path_id_tx numbers are for peers using add-path recv.
@@ -187,6 +187,32 @@ peer_add(uint32_t id, struct peer_config
return (peer);
}
+struct filter_head *
+peer_apply_out_filter(struct rde_peer *peer, struct filter_head *rules)
+{
+ struct filter_head *old;
+ struct filter_rule *fr, *new;
+
+ old = peer->out_rules;
+ if ((peer->out_rules = malloc(sizeof(*peer->out_rules))) == NULL)
+ fatal(NULL);
+ TAILQ_INIT(peer->out_rules);
+
+ TAILQ_FOREACH(fr, rules, entry) {
+ if (rde_filter_skip_rule(peer, fr))
+ continue;
+
+ if ((new = malloc(sizeof(*new))) == NULL)
+ fatal(NULL);
+ memcpy(new, fr, sizeof(*new));
+ filterset_copy(&fr->set, &new->set);
+
+ TAILQ_INSERT_TAIL(peer->out_rules, new, entry);
+ }
+
+ return old;
+}
+
static inline int
peer_cmp(struct rde_peer *a, struct rde_peer *b)
{
@@ -231,17 +257,16 @@ peer_generate_update(struct rde_peer *pe
/* handle peers with add-path */
if (peer_has_add_path(peer, aid, CAPA_AP_SEND)) {
if (peer->eval.mode == ADDPATH_EVAL_ALL)
- up_generate_addpath_all(out_rules, peer, re,
- newpath, oldpath);
+ up_generate_addpath_all(peer, re, newpath, oldpath);
else
- up_generate_addpath(out_rules, peer, re);
+ up_generate_addpath(peer, re);
return;
}
/* skip regular peers if the best path didn't change */
if (mode == EVAL_ALL && (peer->flags & PEERFLAG_EVALUATE_ALL) == 0)
return;
- up_generate_updates(out_rules, peer, re);
+ up_generate_updates(peer, re);
}
void
@@ -362,6 +387,7 @@ rde_up_dump_upcall(struct rib_entry *re,
if ((p = prefix_best(re)) == NULL)
/* no eligible prefix, not even for 'evaluate all' */
return;
+
peer_generate_update(peer, re, NULL, NULL, 0);
}
@@ -449,6 +475,9 @@ peer_down(struct rde_peer *peer, void *b
peer->stats.prefix_cnt = 0;
peer->stats.prefix_out_cnt = 0;
+ /* free filters */
+ filterlist_free(peer->out_rules);
+
RB_REMOVE(peer_tree, &peertable, peer);
free(peer);
}
@@ -531,7 +560,7 @@ peer_dump(struct rde_peer *peer, uint8_t
if (peer->capa.grestart.restart)
prefix_add_eor(peer, aid);
} else if (peer->export_type == EXPORT_DEFAULT_ROUTE) {
- up_generate_default(out_rules, peer, aid);
+ up_generate_default(peer, aid);
rde_up_dump_done(peer, aid);
} else {
if (rib_dump_new(peer->loc_rib_id, aid, RDE_RUNNER_ROUNDS, peer,
Index: rde_update.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde_update.c,v
retrieving revision 1.157
diff -u -p -r1.157 rde_update.c
--- rde_update.c 9 Mar 2023 13:12:19 -0000 1.157
+++ rde_update.c 9 Mar 2023 14:11:27 -0000
@@ -150,8 +150,8 @@ up_enforce_open_policy(struct rde_peer *
* - UP_EXCLUDED if prefix was excluded because of up_test_update()
*/
static enum up_state
-up_process_prefix(struct filter_head *rules, struct rde_peer *peer,
- struct prefix *new, struct prefix *p, struct bgpd_addr *addr, uint8_t plen)
+up_process_prefix(struct rde_peer *peer, struct prefix *new, struct prefix *p,
+ struct bgpd_addr *addr, uint8_t plen)
{
struct filterstate state;
int excluded = 0;
@@ -166,8 +166,8 @@ up_process_prefix(struct filter_head *ru
excluded = 1;
rde_filterstate_prep(&state, new);
- if (rde_filter(rules, peer, prefix_peer(new), addr, plen, &state) ==
- ACTION_DENY) {
+ if (rde_filter(peer->out_rules, peer, prefix_peer(new), addr, plen,
+ &state) == ACTION_DENY) {
rde_filterstate_clean(&state);
return UP_FILTERED;
}
@@ -206,8 +206,7 @@ up_process_prefix(struct filter_head *ru
}
void
-up_generate_updates(struct filter_head *rules, struct rde_peer *peer,
- struct rib_entry *re)
+up_generate_updates(struct rde_peer *peer, struct rib_entry *re)
{
struct bgpd_addr addr;
struct prefix *new, *p;
@@ -220,8 +219,7 @@ up_generate_updates(struct filter_head *
new = prefix_best(re);
while (new != NULL) {
- switch (up_process_prefix(rules, peer, new, p,
- &addr, prefixlen)) {
+ switch (up_process_prefix(peer, new, p, &addr, prefixlen)) {
case UP_OK:
case UP_ERR_LIMIT:
return;
@@ -251,8 +249,7 @@ done:
* less churn is needed.
*/
void
-up_generate_addpath(struct filter_head *rules, struct rde_peer *peer,
- struct rib_entry *re)
+up_generate_addpath(struct rde_peer *peer, struct rib_entry *re)
{
struct bgpd_addr addr;
struct prefix *head, *new, *p;
@@ -313,8 +310,8 @@ up_generate_addpath(struct filter_head *
}
}
- switch (up_process_prefix(rules, peer, new, (void *)-1,
- &addr, prefixlen)) {
+ switch (up_process_prefix(peer, new, (void *)-1, &addr,
+ prefixlen)) {
case UP_OK:
maxpaths++;
extrapaths += extra;
@@ -345,8 +342,8 @@ up_generate_addpath(struct filter_head *
* are distributed just remove old and add new.
*/
void
-up_generate_addpath_all(struct filter_head *rules, struct rde_peer *peer,
- struct rib_entry *re, struct prefix *new, struct prefix *old)
+up_generate_addpath_all(struct rde_peer *peer, struct rib_entry *re,
+ struct prefix *new, struct prefix *old)
{
struct bgpd_addr addr;
struct prefix *p, *head = NULL;
@@ -379,8 +376,8 @@ up_generate_addpath_all(struct filter_he
/* add new path (or multiple if all is set) */
while (new != NULL) {
- switch (up_process_prefix(rules, peer, new, (void *)-1,
- &addr, prefixlen)) {
+ switch (up_process_prefix(peer, new, (void *)-1, &addr,
+ prefixlen)) {
case UP_OK:
case UP_FILTERED:
case UP_EXCLUDED:
@@ -414,8 +411,7 @@ int rib_empty(struct rib_entry *);
/* send a default route to the specified peer */
void
-up_generate_default(struct filter_head *rules, struct rde_peer *peer,
- uint8_t aid)
+up_generate_default(struct rde_peer *peer, uint8_t aid)
{
extern struct rde_peer *peerself;
struct filterstate state;
@@ -444,7 +440,7 @@ up_generate_default(struct filter_head *
p = prefix_adjout_lookup(peer, &addr, 0);
/* outbound filter as usual */
- if (rde_filter(rules, peer, peerself, &addr, 0, &state) ==
+ if (rde_filter(peer->out_rules, peer, peerself, &addr, 0, &state) ==
ACTION_DENY) {
rde_filterstate_clean(&state);
return;