On Wed, Feb 07, 2018 at 05:52:09AM +0100, Claudio Jeker wrote:
> This diff changes the way bgpd does updates. Instead of having its own
> special update queue/tree it uses a regular RIB (Adj-RIB-Out) to store all
> updates to be sent. Stuff that has been sent is linked to the prefixes
> queue. On the peer there are also queues for updates and withdraws.
> The whole update code becomes a lot simpler but also results in the bulk
> of the diff. Other changes include the bgpctl show rib handling (we can
> just walk the Adj-RIB-Out now). Last but not least the EOR records are
> also now a magic rde_aspath (flag F_ATTR_EOR) which is added to the update
> queue.
>
> This diff is still very large and the changes are intrusive so reviews and
> testing is very welcome.
No news on this? Anyone?
--
:wq Claudio
Index: rde.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde.c,v
retrieving revision 1.377
diff -u -p -r1.377 rde.c
--- rde.c 7 Feb 2018 00:02:02 -0000 1.377
+++ rde.c 7 Feb 2018 00:02:18 -0000
@@ -80,8 +80,6 @@ void rde_dump_rib_as(struct prefix *,
int);
void rde_dump_filter(struct prefix *,
struct ctl_show_rib_request *);
-void rde_dump_filterout(struct rde_peer *, struct prefix *,
- struct ctl_show_rib_request *);
void rde_dump_upcall(struct rib_entry *, void *);
void rde_dump_prefix_upcall(struct rib_entry *, void *);
void rde_dump_ctx_new(struct ctl_show_rib_request *, pid_t,
@@ -2262,71 +2260,33 @@ rde_dump_rib_as(struct prefix *p, struct
}
void
-rde_dump_filterout(struct rde_peer *peer, struct prefix *p,
- struct ctl_show_rib_request *req)
+rde_dump_filter(struct prefix *p, struct ctl_show_rib_request *req)
{
- struct bgpd_addr addr;
- struct rde_aspath *asp, *fasp;
- enum filter_actions a;
+ struct rde_aspath *asp;
- if (up_test_update(peer, p) != 1)
+ if (req->peerid && req->peerid != prefix_peer(p)->conf.id)
return;
+ if (p->flags & F_PREFIX_USE_PEER)
+ return; /* pending withdraw, skip */
- pt_getaddr(p->re->prefix, &addr);
asp = prefix_aspath(p);
- a = rde_filter(out_rules, &fasp, peer, asp, &addr,
- p->re->prefix->prefixlen, asp->peer);
- if (fasp)
- fasp->peer = asp->peer;
- else
- fasp = asp;
-
- if (a == ACTION_ALLOW)
- rde_dump_rib_as(p, fasp, req->pid, req->flags);
-
- if (fasp != asp)
- path_put(fasp);
-}
-
-void
-rde_dump_filter(struct prefix *p, struct ctl_show_rib_request *req)
-{
- struct rde_peer *peer;
- struct rde_aspath *asp;
-
- if (req->flags & F_CTL_ADJ_IN ||
- !(req->flags & (F_CTL_ADJ_IN|F_CTL_ADJ_OUT))) {
- asp = prefix_aspath(p);
- if (req->peerid && req->peerid != asp->peer->conf.id)
- return;
- if (req->type == IMSG_CTL_SHOW_RIB_AS &&
- !aspath_match(asp->aspath->data, asp->aspath->len,
- &req->as, req->as.as))
- return;
- if (req->type == IMSG_CTL_SHOW_RIB_COMMUNITY &&
- !community_match(asp, req->community.as,
- req->community.type))
- return;
- if (req->type == IMSG_CTL_SHOW_RIB_EXTCOMMUNITY &&
- !community_ext_match(asp, &req->extcommunity, 0))
- return;
- if (req->type == IMSG_CTL_SHOW_RIB_LARGECOMMUNITY &&
- !community_large_match(asp, req->large_community.as,
- req->large_community.ld1, req->large_community.ld2))
- return;
- if ((req->flags & F_CTL_ACTIVE) && p->re->active != p)
- return;
- rde_dump_rib_as(p, asp, req->pid, req->flags);
- } else if (req->flags & F_CTL_ADJ_OUT) {
- if (p->re->active != p)
- /* only consider active prefix */
- return;
- if (req->peerid) {
- if ((peer = peer_get(req->peerid)) != NULL)
- rde_dump_filterout(peer, p, req);
- return;
- }
- }
+ if (req->type == IMSG_CTL_SHOW_RIB_AS &&
+ !aspath_match(asp->aspath->data, asp->aspath->len,
+ &req->as, req->as.as))
+ return;
+ if (req->type == IMSG_CTL_SHOW_RIB_COMMUNITY &&
+ !community_match(asp, req->community.as, req->community.type))
+ return;
+ if (req->type == IMSG_CTL_SHOW_RIB_EXTCOMMUNITY &&
+ !community_ext_match(asp, &req->extcommunity, 0))
+ return;
+ if (req->type == IMSG_CTL_SHOW_RIB_LARGECOMMUNITY &&
+ !community_large_match(asp, req->large_community.as,
+ req->large_community.ld1, req->large_community.ld2))
+ return;
+ if ((req->flags & F_CTL_ACTIVE) && p->re->active != p)
+ return;
+ rde_dump_rib_as(p, asp, req->pid, req->flags);
}
void
@@ -2375,7 +2335,11 @@ rde_dump_ctx_new(struct ctl_show_rib_req
sizeof(error));
return;
}
- if ((rib = rib_find(req->rib)) == NULL) {
+ if (req->flags & F_CTL_ADJ_IN)
+ rib = &ribs[RIB_ADJ_IN].rib;
+ else if (req->flags & F_CTL_ADJ_OUT)
+ rib = &ribs[RIB_ADJ_OUT].rib;
+ else if ((rib = rib_find(req->rib)) == NULL) {
log_warnx("rde_dump_ctx_new: no such rib %s", req->rib);
error = CTL_RES_NOSUCHPEER;
imsg_compose(ibuf_se_ctl, IMSG_CTL_RESULT, 0, pid, -1, &error,
@@ -2749,7 +2713,6 @@ void
rde_update6_queue_runner(u_int8_t aid)
{
struct rde_peer *peer;
- u_char *b;
int r, sent, max = RDE_RUNNER_ROUNDS / 2;
u_int16_t len;
@@ -2762,13 +2725,12 @@ rde_update6_queue_runner(u_int8_t aid)
if (peer->state != PEER_UP)
continue;
len = sizeof(queue_buf) - MSGSIZE_HEADER;
- b = up_dump_mp_unreach(queue_buf, &len, peer, aid);
-
- if (b == NULL)
+ r = up_dump_mp_unreach(queue_buf, len, peer, aid);
+ if (r == -1)
continue;
/* finally send message to SE */
if (imsg_compose(ibuf_se, IMSG_UPDATE, peer->conf.id,
- 0, -1, b, len) == -1)
+ 0, -1, queue_buf, r) == -1)
fatal("%s %d imsg_compose error", __func__,
__LINE__);
sent++;
@@ -2786,7 +2748,7 @@ rde_update6_queue_runner(u_int8_t aid)
if (peer->state != PEER_UP)
continue;
len = sizeof(queue_buf) - MSGSIZE_HEADER;
- r = up_dump_mp_reach(queue_buf, &len, peer, aid);
+ r = up_dump_mp_reach(queue_buf, len, peer, aid);
switch (r) {
case -2:
continue;
@@ -2794,13 +2756,11 @@ rde_update6_queue_runner(u_int8_t aid)
peer_send_eor(peer, aid);
continue;
default:
- b = queue_buf + r;
break;
}
-
/* finally send message to SE */
if (imsg_compose(ibuf_se, IMSG_UPDATE, peer->conf.id,
- 0, -1, b, len) == -1)
+ 0, -1, queue_buf, r) == -1)
fatal("%s %d imsg_compose error", __func__,
__LINE__);
sent++;
@@ -2929,8 +2889,8 @@ rde_reload_done(void)
peer->reconf_out = 0;
peer->reconf_rib = 0;
if (peer->rib != rib_find(peer->conf.rib)) {
- rib_dump(peer->rib, rde_softreconfig_unload_peer, peer,
- AID_UNSPEC);
+ rib_dump(&ribs[RIB_ADJ_OUT].rib,
+ rde_softreconfig_unload_peer, peer, AID_UNSPEC);
peer->rib = rib_find(peer->conf.rib);
if (peer->rib == NULL)
fatalx("King Bula's peer met an unknown RIB");
@@ -3060,83 +3020,34 @@ rde_softreconfig_in(struct rib_entry *re
void
rde_softreconfig_out(struct rib_entry *re, void *ptr)
{
- struct prefix *p = re->active;
- struct pt_entry *pt;
+ struct prefix *new = re->active;
struct rde_peer *peer = ptr;
- struct rde_aspath *oasp, *nasp;
- enum filter_actions oa, na;
- struct bgpd_addr addr;
-
- if (peer->conf.id == 0)
- fatalx("King Bula troubled by bad peer");
- if (p == NULL)
+ if (new == NULL)
return;
- pt = re->prefix;
- pt_getaddr(pt, &addr);
-
- if (up_test_update(peer, p) != 1)
- return;
-
- oa = rde_filter(out_rules_tmp, &oasp, peer, prefix_aspath(p),
- &addr, pt->prefixlen, prefix_peer(p));
- na = rde_filter(out_rules, &nasp, peer, prefix_aspath(p),
- &addr, pt->prefixlen, prefix_peer(p));
- oasp = oasp != NULL ? oasp : prefix_aspath(p);
- nasp = nasp != NULL ? nasp : prefix_aspath(p);
-
- /* go through all 4 possible combinations */
- /* if (oa == ACTION_DENY && na == ACTION_DENY) */
- /* nothing todo */
- if (oa == ACTION_DENY && na == ACTION_ALLOW) {
- /* send update */
- up_generate(peer, nasp, &addr, pt->prefixlen);
- } else if (oa == ACTION_ALLOW && na == ACTION_DENY) {
- /* send withdraw */
- up_generate(peer, NULL, &addr, pt->prefixlen);
- } else if (oa == ACTION_ALLOW && na == ACTION_ALLOW) {
- /* send update if path attributes changed */
- if (path_compare(nasp, oasp) != 0)
- up_generate(peer, nasp, &addr, pt->prefixlen);
- }
-
- if (oasp != prefix_aspath(p))
- path_put(oasp);
- if (nasp != prefix_aspath(p))
- path_put(nasp);
+ /*
+ * path_update is smart enough to only send out updates to
+ * prefixes that actually changed. So just regenerate all
+ * updates.
+ */
+ up_generate_updates(out_rules, peer, new, new);
}
void
rde_softreconfig_unload_peer(struct rib_entry *re, void *ptr)
{
struct rde_peer *peer = ptr;
- struct prefix *p = re->active;
- struct pt_entry *pt;
- struct rde_aspath *oasp;
- enum filter_actions oa;
+ struct prefix *p;
struct bgpd_addr addr;
- pt = re->prefix;
- pt_getaddr(pt, &addr);
-
- /* check if prefix was announced */
- if (up_test_update(peer, p) != 1)
+ p = prefix_bypeer(re, peer, 0);
+ if (p == NULL)
return;
- oa = rde_filter(out_rules_tmp, &oasp, peer, prefix_aspath(p),
- &addr, pt->prefixlen, prefix_peer(p));
- oasp = oasp != NULL ? oasp : prefix_aspath(p);
-
- if (oa == ACTION_DENY)
- /* nothing todo */
- goto done;
-
- /* send withdraw */
- up_generate(peer, NULL, &addr, pt->prefixlen);
-done:
- if (oasp != prefix_aspath(p))
- path_put(oasp);
+ pt_getaddr(p->re->prefix, &addr);
+ prefix_withdraw(&ribs[RIB_ADJ_OUT].rib, peer, &addr,
+ p->re->prefix->prefixlen);
}
/*
Index: rde.h
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde.h,v
retrieving revision 1.166
diff -u -p -r1.166 rde.h
--- rde.h 7 Feb 2018 00:02:02 -0000 1.166
+++ rde.h 7 Feb 2018 00:34:39 -0000
@@ -47,14 +47,10 @@ LIST_HEAD(prefix_list, prefix);
TAILQ_HEAD(prefix_queue, prefix);
LIST_HEAD(aspath_head, rde_aspath);
TAILQ_HEAD(aspath_queue, rde_aspath);
-RB_HEAD(uptree_prefix, update_prefix);
-RB_HEAD(uptree_attr, update_attr);
struct rib_desc;
struct rib;
RB_HEAD(rib_tree, rib_entry);
-TAILQ_HEAD(uplist_prefix, update_prefix);
-TAILQ_HEAD(uplist_attr, update_attr);
struct rde_peer {
LIST_ENTRY(rde_peer) hash_l; /* hash list over all peers */
@@ -64,10 +60,8 @@ struct rde_peer {
struct bgpd_addr remote_addr;
struct bgpd_addr local_v4_addr;
struct bgpd_addr local_v6_addr;
- struct uptree_prefix up_prefix;
- struct uptree_attr up_attrs;
- struct uplist_attr updates[AID_MAX];
- struct uplist_prefix withdraws[AID_MAX];
+ struct aspath_queue updates[AID_MAX];
+ struct prefix_queue withdraws[AID_MAX];
struct capabilities capa;
time_t staletime[AID_MAX];
u_int64_t prefix_rcvd_update;
@@ -178,7 +172,8 @@ struct path_table {
#define F_NEXTHOP_MASK 0x0f000
#define F_ATTR_PARSE_ERR 0x10000 /* parse error, not eligable */
#define F_ATTR_LINKED 0x20000 /* if set path is on various
lists */
-#define F_ATTR_UPDATE 0x20000 /* if set linked on update_l */
+#define F_ATTR_UPDATE 0x40000 /* if set linked on update_l */
+#define F_ATTR_EOR 0x80000 /* magic marker for EOR objects
*/
#define ORIGIN_IGP 0
@@ -204,6 +199,7 @@ struct rde_aspath {
u_int16_t rtlabelid; /* route label id */
u_int16_t pftableid; /* pf table id */
u_int8_t origin;
+ u_int8_t aid;
u_int8_t others_len;
};
@@ -314,12 +310,14 @@ struct prefix {
struct rib_entry *re;
union {
struct rde_aspath *_aspath;
+ struct rde_peer *_peer;
} _p;
time_t lastchange;
int flags;
};
#define F_PREFIX_USE_UPDATES 0x01 /* linked onto the updates list */
+#define F_PREFIX_USE_PEER 0x02 /* use _peer instead of _aspath */
extern struct rde_memstats rdemem;
@@ -479,11 +477,14 @@ void path_destroy(struct rde_aspath *)
int path_empty(struct rde_aspath *);
struct rde_aspath *path_copy(struct rde_aspath *);
struct rde_aspath *path_get(void);
+struct rde_aspath *path_get_eor(struct rde_peer *, u_int8_t);
void path_put(struct rde_aspath *);
#define PREFIX_SIZE(x) (((x) + 7) / 8 + 1)
int prefix_remove(struct rib *, struct rde_peer *,
struct bgpd_addr *, int, u_int32_t);
+void prefix_withdraw(struct rib *, struct rde_peer *,
+ struct bgpd_addr *, int);
int prefix_write(u_char *, int, struct bgpd_addr *, u_int8_t);
int prefix_writebuf(struct ibuf *, struct bgpd_addr *, u_int8_t);
struct prefix *prefix_bypeer(struct rib_entry *, struct rde_peer *,
@@ -497,13 +498,18 @@ void prefix_relink(struct prefix *, st
static inline struct rde_aspath *
prefix_aspath(struct prefix *p)
{
+ if (p->flags & F_PREFIX_USE_PEER)
+ fatalx("prefix_aspath: prefix has no aspath");
return (p->_p._aspath);
}
static inline struct rde_peer *
prefix_peer(struct prefix *p)
{
- return (p->_p._aspath->peer);
+ if (p->flags & F_PREFIX_USE_PEER)
+ return (p->_p._peer);
+ else
+ return (p->_p._aspath->peer);
}
void nexthop_init(u_int32_t);
@@ -521,19 +527,15 @@ int nexthop_compare(struct nexthop *,
void up_init(struct rde_peer *);
void up_down(struct rde_peer *);
int up_test_update(struct rde_peer *, struct prefix *);
-int up_generate(struct rde_peer *, struct rde_aspath *,
- struct bgpd_addr *, u_int8_t);
void up_generate_updates(struct filter_head *, struct rde_peer *,
struct prefix *, struct prefix *);
void up_generate_default(struct filter_head *, struct rde_peer *,
u_int8_t);
int up_generate_marker(struct rde_peer *, u_int8_t);
-int up_dump_prefix(u_char *, int, struct uplist_prefix *,
+int up_dump_prefix(u_char *, int, struct prefix_queue *,
struct rde_peer *);
int up_dump_attrnlri(u_char *, int, struct rde_peer *);
-u_char *up_dump_mp_unreach(u_char *, u_int16_t *, struct rde_peer *,
- u_int8_t);
-int up_dump_mp_reach(u_char *, u_int16_t *, struct rde_peer *,
- u_int8_t);
+int up_dump_mp_unreach(u_char *, int, struct rde_peer *, u_int8_t);
+int up_dump_mp_reach(u_char *, int, struct rde_peer *, u_int8_t);
#endif /* __RDE_H__ */
Index: rde_rib.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde_rib.c,v
retrieving revision 1.158
diff -u -p -r1.158 rde_rib.c
--- rde_rib.c 7 Feb 2018 00:02:02 -0000 1.158
+++ rde_rib.c 7 Feb 2018 00:04:42 -0000
@@ -405,6 +405,8 @@ path_update(struct rib *rib, struct rde_
struct prefix *p;
int pflag = 0;
+ nasp->aid = prefix->aid;
+
if (nasp->pftableid) {
rde_send_pftable(nasp->pftableid, prefix, prefixlen, 0);
rde_send_pftable_commit();
@@ -432,6 +434,20 @@ path_update(struct rib *rib, struct rde_
path_link(asp, peer);
}
+ if (flag & F_ATTR_UPDATE) {
+ struct aspath_queue *upl = &peer->updates[asp->aid];
+
+ if (asp->flags & F_ATTR_UPDATE) {
+ TAILQ_REMOVE(upl, asp, update_l);
+ peer->up_acnt--;
+ }
+ TAILQ_INSERT_TAIL(upl, asp, update_l);
+ asp->flags |= F_ATTR_UPDATE;
+ peer->up_acnt++;
+
+ pflag = F_PREFIX_USE_UPDATES;
+ }
+
/* If the prefix was found move it else add it to the aspath. */
if (p != NULL)
prefix_move(asp, p, pflag);
@@ -451,6 +467,10 @@ path_compare(struct rde_aspath *a, struc
return (1);
else if (a == NULL)
return (-1);
+ if (a->aid > b->aid)
+ return (1);
+ if (a->aid < b->aid)
+ return (-1);
if ((a->flags & ~(F_ATTR_LINKED | F_ATTR_UPDATE)) >
(b->flags & ~(F_ATTR_LINKED | F_ATTR_UPDATE)))
return (1);
@@ -611,10 +631,12 @@ path_destroy(struct rde_aspath *asp)
nexthop_unlink(asp);
LIST_REMOVE(asp, path_l);
+ if (asp->flags & F_ATTR_UPDATE)
+ TAILQ_REMOVE(&asp->peer->updates[asp->aid], asp, update_l);
TAILQ_REMOVE(&asp->peer->path_h, asp, peer_l);
asp->peer = NULL;
asp->nexthop = NULL;
- asp->flags &= ~F_ATTR_LINKED;
+ asp->flags &= ~(F_ATTR_LINKED | F_ATTR_UPDATE);
path_put(asp);
}
@@ -665,6 +687,7 @@ path_copy(struct rde_aspath *asp)
nasp->lpref = asp->lpref;
nasp->weight = asp->weight;
nasp->origin = asp->origin;
+ nasp->aid = asp->aid;
nasp->rtlabelid = asp->rtlabelid;
rtlabel_ref(nasp->rtlabelid);
nasp->pftableid = asp->pftableid;
@@ -691,6 +714,7 @@ path_get(void)
TAILQ_INIT(&asp->updates);
asp->origin = ORIGIN_INCOMPLETE;
asp->lpref = DEFAULT_LPREF;
+ /* aid = 0 */
/* med = 0 */
/* weight = 0 */
/* rtlabel = 0 */
@@ -698,6 +722,20 @@ path_get(void)
return (asp);
}
+/* create a special rde_aspath representing a eor record */
+struct rde_aspath *
+path_get_eor(struct rde_peer *peer, u_int8_t aid)
+{
+ struct rde_aspath *asp;
+
+ asp = path_get();
+ asp->flags = F_ATTR_EOR;
+ asp->aid = aid;
+ path_link(asp, peer);
+
+ return (asp);
+}
+
/* free an unlinked element */
void
path_put(struct rde_aspath *asp)
@@ -861,6 +899,36 @@ prefix_remove(struct rib *rib, struct rd
return (1);
}
+/*
+ * Withdraw a prefix from the Adj-RIB-Out, this unlinks the aspath but leaves
+ * the prefix in the RIB linked to the peer withdraw list.
+ */
+void
+prefix_withdraw(struct rib *rib, struct rde_peer *peer,
+ struct bgpd_addr *prefix, int prefixlen)
+{
+ struct prefix *p;
+ struct rib_entry *re;
+ struct rde_aspath *asp;
+
+ re = rib_get(rib, prefix, prefixlen);
+ if (re == NULL) /* Got a dummy withdrawn request */
+ return;
+
+ p = prefix_bypeer(re, peer, 0);
+ if (p == NULL) /* Got a dummy withdrawn request. */
+ return;
+
+ /* unlink aspath ...*/
+ asp = prefix_aspath(p);
+ PREFIX_COUNT(asp, -1);
+ prefix_relink(p, NULL, F_PREFIX_USE_PEER);
+
+ if (path_empty(asp))
+ path_destroy(asp);
+}
+
+
/* dump a prefix into specified buffer */
int
prefix_write(u_char *buf, int len, struct bgpd_addr *prefix, u_int8_t plen)
@@ -934,6 +1002,9 @@ prefix_bypeer(struct rib_entry *re, stru
LIST_FOREACH(p, &re->prefix_h, rib_l) {
if (prefix_peer(p) != peer)
continue;
+ if (p->flags & F_PREFIX_USE_PEER)
+ /* Adj-RIB-Out withdrawn route */
+ continue;
if (prefix_aspath(p)->flags & flags &&
(flags & F_ANN_DYNAMIC) !=
(prefix_aspath(p)->flags & F_ANN_DYNAMIC))
@@ -991,15 +1062,16 @@ prefix_updateall(struct rde_aspath *asp,
void
prefix_destroy(struct prefix *p)
{
- struct rde_aspath *asp;
-
- asp = prefix_aspath(p);
- PREFIX_COUNT(asp, -1);
+ struct rde_aspath *asp = NULL;
+ if ((p->flags & F_PREFIX_USE_PEER) == 0) {
+ asp = prefix_aspath(p);
+ PREFIX_COUNT(asp, -1);
+ }
prefix_unlink(p);
prefix_free(p);
- if (path_empty(asp))
+ if (asp && path_empty(asp))
path_destroy(asp);
}
@@ -1030,6 +1102,46 @@ prefix_network_clean(struct rde_peer *pe
}
/*
+ * Relink a prefix onto the right queue.
+ */
+void
+prefix_relink(struct prefix *p, struct rde_aspath *asp, int flag)
+{
+ struct prefix_queue *pq;
+ struct rde_peer *peer = prefix_peer(p);
+
+ /* unhook prefix */
+ if (p->flags & F_PREFIX_USE_PEER)
+ pq = &peer->withdraws[p->re->prefix->aid];
+ else if (p->flags & F_PREFIX_USE_UPDATES) {
+ if (asp && asp != prefix_aspath(p))
+ fatalx("prefix_relink: move between aspaths");
+ pq = &prefix_aspath(p)->updates;
+ } else {
+ if (asp && asp != prefix_aspath(p))
+ fatalx("prefix_relink: move between aspaths");
+ pq = &prefix_aspath(p)->prefixes;
+ }
+
+ TAILQ_REMOVE(pq, p, path_l);
+ p->flags &= ~(F_PREFIX_USE_PEER | F_PREFIX_USE_UPDATES);
+
+ if (flag & F_PREFIX_USE_PEER) {
+ pq = &peer->withdraws[p->re->prefix->aid];
+ p->_p._peer = peer;
+ } else if (flag & F_PREFIX_USE_UPDATES) {
+ pq = &asp->updates;
+ p->_p._aspath = asp;
+ } else {
+ pq = &asp->prefixes;
+ p->_p._aspath = asp;
+ }
+
+ TAILQ_INSERT_HEAD(pq, p, path_l);
+ p->flags |= flag;
+}
+
+/*
* Link a prefix into the different parent objects.
*/
static void
@@ -1064,7 +1176,9 @@ prefix_unlink(struct prefix *pref)
LIST_REMOVE(pref, rib_l);
prefix_evaluate(NULL, re);
- if (pref->flags & F_PREFIX_USE_UPDATES)
+ if (pref->flags & F_PREFIX_USE_PEER)
+ pq = &prefix_peer(pref)->withdraws[re->prefix->aid];
+ else if (pref->flags & F_PREFIX_USE_UPDATES)
pq = &prefix_aspath(pref)->updates;
else
pq = &prefix_aspath(pref)->prefixes;
Index: rde_update.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde_update.c,v
retrieving revision 1.88
diff -u -p -r1.88 rde_update.c
--- rde_update.c 5 Feb 2018 03:55:54 -0000 1.88
+++ rde_update.c 5 Feb 2018 23:44:56 -0000
@@ -27,45 +27,6 @@
#include "rde.h"
#include "log.h"
-in_addr_t up_get_nexthop(struct rde_peer *, struct rde_aspath *);
-int up_generate_mp_reach(struct rde_peer *, struct update_attr *,
- struct rde_aspath *, u_int8_t);
-int up_generate_attr(struct rde_peer *, struct update_attr *,
- struct rde_aspath *, u_int8_t);
-
-/* update stuff. */
-struct update_prefix {
- TAILQ_ENTRY(update_prefix) prefix_l;
- RB_ENTRY(update_prefix) entry;
- struct uplist_prefix *prefix_h;
- struct bgpd_addr prefix;
- int prefixlen;
-};
-
-struct update_attr {
- TAILQ_ENTRY(update_attr) attr_l;
- RB_ENTRY(update_attr) entry;
- struct uplist_prefix prefix_h;
- u_char *attr;
- u_char *mpattr;
- u_int32_t attr_hash;
- u_int16_t attr_len;
- u_int16_t mpattr_len;
-};
-
-void up_clear(struct uplist_attr *, struct uplist_prefix *);
-int up_prefix_cmp(struct update_prefix *, struct update_prefix *);
-int up_attr_cmp(struct update_attr *, struct update_attr *);
-int up_add(struct rde_peer *, struct update_prefix *, struct update_attr *);
-
-RB_PROTOTYPE(uptree_prefix, update_prefix, entry, up_prefix_cmp)
-RB_GENERATE(uptree_prefix, update_prefix, entry, up_prefix_cmp)
-
-RB_PROTOTYPE(uptree_attr, update_attr, entry, up_attr_cmp)
-RB_GENERATE(uptree_attr, update_attr, entry, up_attr_cmp)
-
-SIPHASH_KEY uptree_key;
-
void
up_init(struct rde_peer *peer)
{
@@ -75,48 +36,21 @@ up_init(struct rde_peer *peer)
TAILQ_INIT(&peer->updates[i]);
TAILQ_INIT(&peer->withdraws[i]);
}
- RB_INIT(&peer->up_prefix);
- RB_INIT(&peer->up_attrs);
peer->up_pcnt = 0;
peer->up_acnt = 0;
peer->up_nlricnt = 0;
peer->up_wcnt = 0;
- arc4random_buf(&uptree_key, sizeof(uptree_key));
-}
-
-void
-up_clear(struct uplist_attr *updates, struct uplist_prefix *withdraws)
-{
- struct update_attr *ua;
- struct update_prefix *up;
-
- while ((ua = TAILQ_FIRST(updates)) != NULL) {
- TAILQ_REMOVE(updates, ua, attr_l);
- while ((up = TAILQ_FIRST(&ua->prefix_h)) != NULL) {
- TAILQ_REMOVE(&ua->prefix_h, up, prefix_l);
- free(up);
- }
- free(ua->attr);
- free(ua->mpattr);
- free(ua);
- }
-
- while ((up = TAILQ_FIRST(withdraws)) != NULL) {
- TAILQ_REMOVE(withdraws, up, prefix_l);
- free(up);
- }
}
void
up_down(struct rde_peer *peer)
{
+ struct prefix *p;
u_int8_t i;
for (i = 0; i < AID_MAX; i++)
- up_clear(&peer->updates[i], &peer->withdraws[i]);
-
- RB_INIT(&peer->up_prefix);
- RB_INIT(&peer->up_attrs);
+ while ((p = TAILQ_FIRST(&peer->withdraws[i])) != NULL)
+ prefix_destroy(p);
peer->up_pcnt = 0;
peer->up_acnt = 0;
@@ -125,153 +59,6 @@ up_down(struct rde_peer *peer)
}
int
-up_prefix_cmp(struct update_prefix *a, struct update_prefix *b)
-{
- int i;
-
- if (a->prefix.aid < b->prefix.aid)
- return (-1);
- if (a->prefix.aid > b->prefix.aid)
- return (1);
-
- switch (a->prefix.aid) {
- case AID_INET:
- if (ntohl(a->prefix.v4.s_addr) < ntohl(b->prefix.v4.s_addr))
- return (-1);
- if (ntohl(a->prefix.v4.s_addr) > ntohl(b->prefix.v4.s_addr))
- return (1);
- break;
- case AID_INET6:
- i = memcmp(&a->prefix.v6, &b->prefix.v6,
- sizeof(struct in6_addr));
- if (i > 0)
- return (1);
- if (i < 0)
- return (-1);
- break;
- case AID_VPN_IPv4:
- if (betoh64(a->prefix.vpn4.rd) < betoh64(b->prefix.vpn4.rd))
- return (-1);
- if (betoh64(a->prefix.vpn4.rd) > betoh64(b->prefix.vpn4.rd))
- return (1);
- if (ntohl(a->prefix.v4.s_addr) < ntohl(b->prefix.v4.s_addr))
- return (-1);
- if (ntohl(a->prefix.v4.s_addr) > ntohl(b->prefix.v4.s_addr))
- return (1);
- if (a->prefixlen < b->prefixlen)
- return (-1);
- if (a->prefixlen > b->prefixlen)
- return (1);
- if (a->prefix.vpn4.labellen < b->prefix.vpn4.labellen)
- return (-1);
- if (a->prefix.vpn4.labellen > b->prefix.vpn4.labellen)
- return (1);
- return (memcmp(a->prefix.vpn4.labelstack,
- b->prefix.vpn4.labelstack, a->prefix.vpn4.labellen));
- default:
- fatalx("pt_prefix_cmp: unknown af");
- }
- if (a->prefixlen < b->prefixlen)
- return (-1);
- if (a->prefixlen > b->prefixlen)
- return (1);
- return (0);
-}
-
-int
-up_attr_cmp(struct update_attr *a, struct update_attr *b)
-{
- int r;
-
- if ((r = a->attr_hash - b->attr_hash) != 0)
- return (r);
- if ((r = a->attr_len - b->attr_len) != 0)
- return (r);
- if ((r = a->mpattr_len - b->mpattr_len) != 0)
- return (r);
- if ((r = memcmp(a->mpattr, b->mpattr, a->mpattr_len)) != 0)
- return (r);
- return (memcmp(a->attr, b->attr, a->attr_len));
-}
-
-int
-up_add(struct rde_peer *peer, struct update_prefix *p, struct update_attr *a)
-{
- struct update_attr *na = NULL;
- struct update_prefix *np;
- struct uplist_attr *upl = NULL;
- struct uplist_prefix *wdl = NULL;
-
- upl = &peer->updates[p->prefix.aid];
- wdl = &peer->withdraws[p->prefix.aid];
-
- /* 1. search for attr */
- if (a != NULL && (na = RB_FIND(uptree_attr, &peer->up_attrs, a)) ==
- NULL) {
- /* 1.1 if not found -> add */
- TAILQ_INIT(&a->prefix_h);
- if (RB_INSERT(uptree_attr, &peer->up_attrs, a) != NULL) {
- log_warnx("uptree_attr insert failed");
- /* cleanup */
- free(a->attr);
- free(a->mpattr);
- free(a);
- free(p);
- return (-1);
- }
- TAILQ_INSERT_TAIL(upl, a, attr_l);
- peer->up_acnt++;
- } else {
- /* 1.2 if found -> use that, free a */
- if (a != NULL) {
- free(a->attr);
- free(a->mpattr);
- free(a);
- a = na;
- /* move to end of update queue */
- TAILQ_REMOVE(upl, a, attr_l);
- TAILQ_INSERT_TAIL(upl, a, attr_l);
- }
- }
-
- /* 2. search for prefix */
- if ((np = RB_FIND(uptree_prefix, &peer->up_prefix, p)) == NULL) {
- /* 2.1 if not found -> add */
- if (RB_INSERT(uptree_prefix, &peer->up_prefix, p) != NULL) {
- log_warnx("uptree_prefix insert failed");
- /*
- * cleanup. But do not free a because it is already
- * linked or NULL. up_dump_attrnlri() will remove and
- * free the empty attribute later.
- */
- free(p);
- return (-1);
- }
- peer->up_pcnt++;
- } else {
- /* 2.2 if found -> use that and free p */
- TAILQ_REMOVE(np->prefix_h, np, prefix_l);
- free(p);
- p = np;
- if (p->prefix_h == wdl)
- peer->up_wcnt--;
- else
- peer->up_nlricnt--;
- }
- /* 3. link prefix to attr */
- if (a == NULL) {
- TAILQ_INSERT_TAIL(wdl, p, prefix_l);
- p->prefix_h = wdl;
- peer->up_wcnt++;
- } else {
- TAILQ_INSERT_TAIL(&a->prefix_h, p, prefix_l);
- p->prefix_h = &a->prefix_h;
- peer->up_nlricnt++;
- }
- return (0);
-}
-
-int
up_test_update(struct rde_peer *peer, struct prefix *p)
{
struct bgpd_addr addr;
@@ -365,52 +152,11 @@ up_test_update(struct rde_peer *peer, st
return (1);
}
-int
-up_generate(struct rde_peer *peer, struct rde_aspath *asp,
- struct bgpd_addr *addr, u_int8_t prefixlen)
-{
- struct update_attr *ua = NULL;
- struct update_prefix *up;
- SIPHASH_CTX ctx;
-
- if (asp) {
- ua = calloc(1, sizeof(struct update_attr));
- if (ua == NULL)
- fatal("up_generate");
-
- if (up_generate_attr(peer, ua, asp, addr->aid) == -1) {
- log_warnx("generation of bgp path attributes failed");
- free(ua);
- return (-1);
- }
- /*
- * use aspath_hash as attr_hash, this may be unoptimal
- * but currently I don't care.
- */
- SipHash24_Init(&ctx, &uptree_key);
- SipHash24_Update(&ctx, ua->attr, ua->attr_len);
- if (ua->mpattr)
- SipHash24_Update(&ctx, ua->mpattr, ua->mpattr_len);
- ua->attr_hash = SipHash24_End(&ctx);
- }
-
- up = calloc(1, sizeof(struct update_prefix));
- if (up == NULL)
- fatal("up_generate");
- up->prefix = *addr;
- up->prefixlen = prefixlen;
-
- if (up_add(peer, up, ua) == -1)
- return (-1);
-
- return (0);
-}
-
void
up_generate_updates(struct filter_head *rules, struct rde_peer *peer,
struct prefix *new, struct prefix *old)
{
- struct rde_aspath *asp, *fasp;
+ struct rde_aspath *fasp;
struct bgpd_addr addr;
if (peer->state != PEER_UP)
@@ -421,14 +167,14 @@ withdraw:
if (up_test_update(peer, old) != 1)
return;
- asp = prefix_aspath(old);
pt_getaddr(old->re->prefix, &addr);
- if (rde_filter(rules, NULL, peer, asp, &addr,
- old->re->prefix->prefixlen, asp->peer) == ACTION_DENY)
+ if (rde_filter(rules, NULL, peer, prefix_aspath(old), &addr,
+ old->re->prefix->prefixlen, prefix_peer(old)) ==
+ ACTION_DENY)
return;
- /* withdraw prefix */
- up_generate(peer, NULL, &addr, old->re->prefix->prefixlen);
+ prefix_withdraw(&ribs[RIB_ADJ_OUT].rib, peer, &addr,
+ old->re->prefix->prefixlen);
} else {
switch (up_test_update(peer, new)) {
case 1:
@@ -439,20 +185,21 @@ withdraw:
return;
}
- asp = prefix_aspath(new);
pt_getaddr(new->re->prefix, &addr);
- if (rde_filter(rules, &fasp, peer, asp, &addr,
- new->re->prefix->prefixlen, asp->peer) == ACTION_DENY) {
+ if (rde_filter(rules, &fasp, peer, prefix_aspath(new), &addr,
+ new->re->prefix->prefixlen, prefix_peer(new)) ==
+ ACTION_DENY) {
path_put(fasp);
goto withdraw;
}
if (fasp == NULL)
- fasp = asp;
+ fasp = prefix_aspath(new);
- up_generate(peer, fasp, &addr, new->re->prefix->prefixlen);
+ path_update(&ribs[RIB_ADJ_OUT].rib, peer, fasp, &addr,
+ new->re->prefix->prefixlen, F_ATTR_UPDATE);
/* free modified aspath */
- if (fasp != asp)
+ if (fasp != prefix_aspath(new))
path_put(fasp);
}
}
@@ -471,6 +218,7 @@ up_generate_default(struct filter_head *
asp = path_get();
asp->aspath = aspath_get(NULL, 0);
asp->origin = ORIGIN_IGP;
+ asp->aid = aid;
/* the other default values are OK, nexthop is once again NULL */
/*
@@ -493,7 +241,8 @@ up_generate_default(struct filter_head *
if (fasp == NULL)
fasp = asp;
- up_generate(peer, fasp, &addr, 0);
+ path_update(&ribs[RIB_ADJ_OUT].rib, peer, fasp, &addr, 0,
+ F_ATTR_UPDATE);
/* no longer needed */
if (fasp != asp)
@@ -501,47 +250,24 @@ up_generate_default(struct filter_head *
path_put(asp);
}
-/* generate a EoR marker in the update list. This is a horrible hack. */
int
up_generate_marker(struct rde_peer *peer, u_int8_t aid)
{
- struct update_attr *ua;
- struct update_attr *na = NULL;
- struct uplist_attr *upl = NULL;
-
- ua = calloc(1, sizeof(struct update_attr));
- if (ua == NULL)
- fatal("up_generate_marker");
+ struct rde_aspath *asp;
+ struct aspath_queue *upl;
+
+ asp = path_get_eor(peer, aid);
upl = &peer->updates[aid];
+ TAILQ_INSERT_TAIL(upl, asp, update_l);
+ asp->flags |= F_ATTR_UPDATE;
+ peer->up_acnt++;
- /* 1. search for attr */
- if ((na = RB_FIND(uptree_attr, &peer->up_attrs, ua)) == NULL) {
- /* 1.1 if not found -> add */
- TAILQ_INIT(&ua->prefix_h);
- if (RB_INSERT(uptree_attr, &peer->up_attrs, ua) != NULL) {
- log_warnx("uptree_attr insert failed");
- /* cleanup */
- free(ua);
- return (-1);
- }
- TAILQ_INSERT_TAIL(upl, ua, attr_l);
- peer->up_acnt++;
- } else {
- /* 1.2 if found -> use that, free ua */
- free(ua);
- ua = na;
- /* move to end of update queue */
- TAILQ_REMOVE(upl, ua, attr_l);
- TAILQ_INSERT_TAIL(upl, ua, attr_l);
- }
return (0);
}
-u_char up_attr_buf[4096];
-
/* only for IPv4 */
-in_addr_t
+static in_addr_t
up_get_nexthop(struct rde_peer *peer, struct rde_aspath *a)
{
in_addr_t mask;
@@ -596,37 +322,47 @@ up_get_nexthop(struct rde_peer *peer, st
return (peer->local_v4_addr.v4.s_addr);
}
-int
-up_generate_mp_reach(struct rde_peer *peer, struct update_attr *upa,
+static int
+up_generate_mp_reach(u_char *buf, int len, struct rde_peer *peer,
struct rde_aspath *a, u_int8_t aid)
{
- u_int16_t tmp;
+ u_char *attrbuf;
+ int r, wpos, attrlen;
+ u_int16_t tmp;
+
+ if (len < 4)
+ return (-1);
+ /* attribute header, defaulting to extended length one */
+ buf[0] = ATTR_OPTIONAL | ATTR_EXTLEN;
+ buf[1] = ATTR_MP_REACH_NLRI;
+ wpos = 4;
+ attrbuf = buf + wpos;
switch (aid) {
case AID_INET6:
- upa->mpattr_len = 21; /* AFI + SAFI + NH LEN + NH + Reserved */
- upa->mpattr = malloc(upa->mpattr_len);
- if (upa->mpattr == NULL)
- fatal("up_generate_mp_reach");
- if (aid2afi(aid, &tmp, &upa->mpattr[2]))
- fatalx("up_generate_mp_reachi: bad AID");
+ attrlen = 21; /* AFI + SAFI + NH LEN + NH + Reserved */
+ if (len < wpos + attrlen)
+ return (-1);
+ wpos += attrlen;
+ if (aid2afi(aid, &tmp, &attrbuf[2]))
+ fatalx("up_generate_mp_reach: bad AID");
tmp = htons(tmp);
- memcpy(upa->mpattr, &tmp, sizeof(tmp));
- upa->mpattr[3] = sizeof(struct in6_addr);
- upa->mpattr[20] = 0; /* Reserved must be 0 */
+ memcpy(attrbuf, &tmp, sizeof(tmp));
+ attrbuf[3] = sizeof(struct in6_addr);
+ attrbuf[20] = 0; /* Reserved must be 0 */
/* nexthop dance see also up_get_nexthop() */
+ attrbuf += 4;
if (a->flags & F_NEXTHOP_NOMODIFY) {
/* no modify flag set */
if (a->nexthop == NULL)
- memcpy(&upa->mpattr[4], &peer->local_v6_addr.v6,
+ memcpy(attrbuf, &peer->local_v6_addr.v6,
sizeof(struct in6_addr));
else
- memcpy(&upa->mpattr[4],
- &a->nexthop->exit_nexthop.v6,
+ memcpy(attrbuf, &a->nexthop->exit_nexthop.v6,
sizeof(struct in6_addr));
} else if (a->flags & F_NEXTHOP_SELF)
- memcpy(&upa->mpattr[4], &peer->local_v6_addr.v6,
+ memcpy(attrbuf, &peer->local_v6_addr.v6,
sizeof(struct in6_addr));
else if (!peer->conf.ebgp) {
/* ibgp */
@@ -634,11 +370,10 @@ up_generate_mp_reach(struct rde_peer *pe
(a->nexthop->exit_nexthop.aid == AID_INET6 &&
!memcmp(&a->nexthop->exit_nexthop.v6,
&peer->remote_addr.v6, sizeof(struct in6_addr))))
- memcpy(&upa->mpattr[4], &peer->local_v6_addr.v6,
+ memcpy(attrbuf, &peer->local_v6_addr.v6,
sizeof(struct in6_addr));
else
- memcpy(&upa->mpattr[4],
- &a->nexthop->exit_nexthop.v6,
+ memcpy(attrbuf, &a->nexthop->exit_nexthop.v6,
sizeof(struct in6_addr));
} else if (peer->conf.distance == 1) {
/* ebgp directly connected */
@@ -651,43 +386,43 @@ up_generate_mp_reach(struct rde_peer *pe
* nexthop and peer are in the same
* subnet
*/
- memcpy(&upa->mpattr[4],
+ memcpy(attrbuf,
&a->nexthop->exit_nexthop.v6,
sizeof(struct in6_addr));
- return (0);
+ break;
}
- memcpy(&upa->mpattr[4], &peer->local_v6_addr.v6,
+ memcpy(attrbuf, &peer->local_v6_addr.v6,
sizeof(struct in6_addr));
} else
/* ebgp multihop */
- memcpy(&upa->mpattr[4], &peer->local_v6_addr.v6,
+ memcpy(attrbuf, &peer->local_v6_addr.v6,
sizeof(struct in6_addr));
- return (0);
+ break;
case AID_VPN_IPv4:
- upa->mpattr_len = 17; /* AFI + SAFI + NH LEN + NH + Reserved */
- upa->mpattr = calloc(upa->mpattr_len, 1);
- if (upa->mpattr == NULL)
- fatal("up_generate_mp_reach");
- if (aid2afi(aid, &tmp, &upa->mpattr[2]))
+ attrlen = 17; /* AFI + SAFI + NH LEN + NH + Reserved */
+ if (len < wpos + attrlen)
+ return (-1);
+ wpos += attrlen;
+ if (aid2afi(aid, &tmp, &attrbuf[2]))
fatalx("up_generate_mp_reachi: bad AID");
tmp = htons(tmp);
- memcpy(upa->mpattr, &tmp, sizeof(tmp));
- upa->mpattr[3] = sizeof(u_int64_t) + sizeof(struct in_addr);
+ memcpy(attrbuf, &tmp, sizeof(tmp));
+ attrbuf[3] = sizeof(u_int64_t) + sizeof(struct in_addr);
+ bzero(attrbuf + 4, sizeof(u_int64_t));
/* nexthop dance see also up_get_nexthop() */
+ attrbuf += 12;
if (a->flags & F_NEXTHOP_NOMODIFY) {
/* no modify flag set */
if (a->nexthop == NULL)
- memcpy(&upa->mpattr[12],
- &peer->local_v4_addr.v4,
+ memcpy(attrbuf, &peer->local_v4_addr.v4,
sizeof(struct in_addr));
else
/* nexthops are stored as IPv4 addrs */
- memcpy(&upa->mpattr[12],
- &a->nexthop->exit_nexthop.v4,
+ memcpy(attrbuf, &a->nexthop->exit_nexthop.v4,
sizeof(struct in_addr));
} else if (a->flags & F_NEXTHOP_SELF)
- memcpy(&upa->mpattr[12], &peer->local_v4_addr.v4,
+ memcpy(attrbuf, &peer->local_v4_addr.v4,
sizeof(struct in_addr));
else if (!peer->conf.ebgp) {
/* ibgp */
@@ -695,12 +430,10 @@ up_generate_mp_reach(struct rde_peer *pe
(a->nexthop->exit_nexthop.aid == AID_INET &&
!memcmp(&a->nexthop->exit_nexthop.v4,
&peer->remote_addr.v4, sizeof(struct in_addr))))
- memcpy(&upa->mpattr[12],
- &peer->local_v4_addr.v4,
+ memcpy(attrbuf, &peer->local_v4_addr.v4,
sizeof(struct in_addr));
else
- memcpy(&upa->mpattr[12],
- &a->nexthop->exit_nexthop.v4,
+ memcpy(attrbuf, &a->nexthop->exit_nexthop.v4,
sizeof(struct in_addr));
} else if (peer->conf.distance == 1) {
/* ebgp directly connected */
@@ -713,40 +446,52 @@ up_generate_mp_reach(struct rde_peer *pe
* nexthop and peer are in the same
* subnet
*/
- memcpy(&upa->mpattr[12],
+ memcpy(attrbuf,
&a->nexthop->exit_nexthop.v4,
sizeof(struct in_addr));
- return (0);
+ break;
}
- memcpy(&upa->mpattr[12], &peer->local_v4_addr.v4,
+ memcpy(attrbuf, &peer->local_v4_addr.v4,
sizeof(struct in_addr));
} else
/* ebgp multihop */
- memcpy(&upa->mpattr[12], &peer->local_v4_addr.v4,
+ memcpy(attrbuf, &peer->local_v4_addr.v4,
sizeof(struct in_addr));
- return (0);
- default:
break;
+ default:
+ fatalx("up_generate_mp_reach: unknown AID");
+ }
+
+ r = up_dump_prefix(buf + wpos, len - wpos, &a->updates, peer);
+ if (r == 0) {
+ /* no prefixes written ... */
+ return (-1);
}
- return (-1);
+ attrlen += r;
+ wpos += r;
+ /* update attribute length field */
+ tmp = htons(attrlen);
+ memcpy(buf + 2, &tmp, sizeof(tmp));
+
+ return (wpos);
}
-int
-up_generate_attr(struct rde_peer *peer, struct update_attr *upa,
+static int
+up_generate_attr(u_char *buf, int len, struct rde_peer *peer,
struct rde_aspath *a, u_int8_t aid)
{
struct attr *oa, *newaggr = NULL;
u_char *pdata;
u_int32_t tmp32;
in_addr_t nexthop;
- int flags, r, ismp = 0, neednewpath = 0;
- u_int16_t len = sizeof(up_attr_buf), wlen = 0, plen;
+ int flags, r, neednewpath = 0;
+ u_int16_t wlen = 0, plen;
u_int8_t l;
u_int16_t nlen = 0;
u_char *ndata = NULL;
/* origin */
- if ((r = attr_write(up_attr_buf + wlen, len, ATTR_WELL_KNOWN,
+ if ((r = attr_write(buf + wlen, len, ATTR_WELL_KNOWN,
ATTR_ORIGIN, &a->origin, 1)) == -1)
return (-1);
wlen += r; len -= r;
@@ -763,7 +508,7 @@ up_generate_attr(struct rde_peer *peer,
if (!rde_as4byte(peer))
pdata = aspath_deflate(pdata, &plen, &neednewpath);
- if ((r = attr_write(up_attr_buf + wlen, len, ATTR_WELL_KNOWN,
+ if ((r = attr_write(buf + wlen, len, ATTR_WELL_KNOWN,
ATTR_ASPATH, pdata, plen)) == -1)
return (-1);
wlen += r; len -= r;
@@ -772,13 +517,12 @@ up_generate_attr(struct rde_peer *peer,
switch (aid) {
case AID_INET:
nexthop = up_get_nexthop(peer, a);
- if ((r = attr_write(up_attr_buf + wlen, len, ATTR_WELL_KNOWN,
+ if ((r = attr_write(buf + wlen, len, ATTR_WELL_KNOWN,
ATTR_NEXTHOP, &nexthop, 4)) == -1)
return (-1);
wlen += r; len -= r;
break;
default:
- ismp = 1;
break;
}
@@ -791,7 +535,7 @@ up_generate_attr(struct rde_peer *peer,
a->flags & F_ATTR_MED_ANNOUNCE ||
peer->conf.flags & PEERFLAG_TRANS_AS)) {
tmp32 = htonl(a->med);
- if ((r = attr_write(up_attr_buf + wlen, len, ATTR_OPTIONAL,
+ if ((r = attr_write(buf + wlen, len, ATTR_OPTIONAL,
ATTR_MED, &tmp32, 4)) == -1)
return (-1);
wlen += r; len -= r;
@@ -800,7 +544,7 @@ up_generate_attr(struct rde_peer *peer,
if (!peer->conf.ebgp) {
/* local preference, only valid for ibgp */
tmp32 = htonl(a->lpref);
- if ((r = attr_write(up_attr_buf + wlen, len, ATTR_WELL_KNOWN,
+ if ((r = attr_write(buf + wlen, len, ATTR_WELL_KNOWN,
ATTR_LOCALPREF, &tmp32, 4)) == -1)
return (-1);
wlen += r; len -= r;
@@ -820,7 +564,7 @@ up_generate_attr(struct rde_peer *peer,
break;
switch (oa->type) {
case ATTR_ATOMIC_AGGREGATE:
- if ((r = attr_write(up_attr_buf + wlen, len,
+ if ((r = attr_write(buf + wlen, len,
ATTR_WELL_KNOWN, ATTR_ATOMIC_AGGREGATE,
NULL, 0)) == -1)
return (-1);
@@ -848,7 +592,7 @@ up_generate_attr(struct rde_peer *peer,
memcpy(t + sizeof(tas),
oa->data + sizeof(tmp32),
oa->len - sizeof(tmp32));
- if ((r = attr_write(up_attr_buf + wlen, len,
+ if ((r = attr_write(buf + wlen, len,
oa->flags, oa->type, &t, sizeof(t))) == -1)
return (-1);
break;
@@ -863,7 +607,7 @@ up_generate_attr(struct rde_peer *peer,
r = 0;
break;
}
- if ((r = attr_write(up_attr_buf + wlen, len,
+ if ((r = attr_write(buf + wlen, len,
oa->flags, oa->type, oa->data, oa->len)) == -1)
return (-1);
break;
@@ -874,7 +618,7 @@ up_generate_attr(struct rde_peer *peer,
oa->len, &nlen);
if (nlen > 0) {
- if ((r = attr_write(up_attr_buf + wlen,
+ if ((r = attr_write(buf + wlen,
len, oa->flags, oa->type, ndata,
nlen)) == -1) {
free(ndata);
@@ -884,7 +628,7 @@ up_generate_attr(struct rde_peer *peer,
r = 0;
break;
}
- if ((r = attr_write(up_attr_buf + wlen, len,
+ if ((r = attr_write(buf + wlen, len,
oa->flags, oa->type, oa->data, oa->len)) == -1)
return (-1);
break;
@@ -900,7 +644,7 @@ up_generate_attr(struct rde_peer *peer,
r = 0;
break;
}
- if ((r = attr_write(up_attr_buf + wlen, len,
+ if ((r = attr_write(buf + wlen, len,
oa->flags | ATTR_PARTIAL, oa->type,
oa->data, oa->len)) == -1)
return (-1);
@@ -923,7 +667,7 @@ up_generate_attr(struct rde_peer *peer,
flags |= ATTR_PARTIAL;
if (plen == 0)
r = 0;
- else if ((r = attr_write(up_attr_buf + wlen, len, flags,
+ else if ((r = attr_write(buf + wlen, len, flags,
ATTR_AS4_PATH, pdata, plen)) == -1)
return (-1);
wlen += r; len -= r;
@@ -933,54 +677,44 @@ up_generate_attr(struct rde_peer *peer,
flags = ATTR_OPTIONAL|ATTR_TRANSITIVE;
if (!(a->flags & F_PREFIX_ANNOUNCED))
flags |= ATTR_PARTIAL;
- if ((r = attr_write(up_attr_buf + wlen, len, flags,
+ if ((r = attr_write(buf + wlen, len, flags,
ATTR_AS4_AGGREGATOR, newaggr->data, newaggr->len)) == -1)
return (-1);
wlen += r; len -= r;
}
- /* write mp attribute to different buffer */
- if (ismp)
- if (up_generate_mp_reach(peer, upa, a, aid) == -1)
- return (-1);
-
- /* the bgp path attributes are now stored in the global buf */
- upa->attr = malloc(wlen);
- if (upa->attr == NULL)
- fatal("up_generate_attr");
- memcpy(upa->attr, up_attr_buf, wlen);
- upa->attr_len = wlen;
return (wlen);
}
-#define MIN_PREFIX_LEN 5 /* 1 byte prefix length + 4 bytes addr */
+/* minimal buffer size > withdraw len + attr len + attr hdr + afi/safi */
+#define MIN_UPDATE_LEN 16
+
int
-up_dump_prefix(u_char *buf, int len, struct uplist_prefix *prefix_head,
+up_dump_prefix(u_char *buf, int len, struct prefix_queue *prefix_head,
struct rde_peer *peer)
{
- struct update_prefix *upp;
- int r, wpos = 0;
- u_int8_t i;
+ struct prefix *p;
+ struct bgpd_addr addr;
+ int r, wpos = 0;
- while ((upp = TAILQ_FIRST(prefix_head)) != NULL) {
+ while ((p = TAILQ_FIRST(prefix_head)) != NULL) {
+ pt_getaddr(p->re->prefix, &addr);
if ((r = prefix_write(buf + wpos, len - wpos,
- &upp->prefix, upp->prefixlen)) == -1)
+ &addr, p->re->prefix->prefixlen)) == -1)
break;
wpos += r;
- if (RB_REMOVE(uptree_prefix, &peer->up_prefix, upp) == NULL)
- log_warnx("dequeuing update failed.");
- TAILQ_REMOVE(upp->prefix_h, upp, prefix_l);
+
peer->up_pcnt--;
- for (i = 0; i < AID_MAX; i++) {
- if (upp->prefix_h == &peer->withdraws[i]) {
- peer->up_wcnt--;
- peer->prefix_sent_withdraw++;
- } else {
- peer->up_nlricnt--;
- peer->prefix_sent_update++;
- }
+ if (p->flags & F_PREFIX_USE_PEER) {
+ prefix_destroy(p);
+ peer->up_wcnt--;
+ peer->prefix_sent_withdraw++;
+ } else {
+ /* move prefix from updates to prefixes */
+ prefix_relink(p, prefix_aspath(p), 0);
+ peer->up_nlricnt--;
+ peer->prefix_sent_update++;
}
- free(upp);
}
return (wpos);
}
@@ -988,7 +722,7 @@ up_dump_prefix(u_char *buf, int len, str
int
up_dump_attrnlri(u_char *buf, int len, struct rde_peer *peer)
{
- struct update_attr *upa;
+ struct rde_aspath *asp;
int r, wpos;
u_int16_t attr_len;
@@ -996,221 +730,155 @@ up_dump_attrnlri(u_char *buf, int len, s
* It is possible that a queued path attribute has no nlri prefix.
* Ignore and remove those path attributes.
*/
- while ((upa = TAILQ_FIRST(&peer->updates[AID_INET])) != NULL)
- if (TAILQ_EMPTY(&upa->prefix_h)) {
- attr_len = upa->attr_len;
- if (RB_REMOVE(uptree_attr, &peer->up_attrs,
- upa) == NULL)
- log_warnx("dequeuing update failed.");
- TAILQ_REMOVE(&peer->updates[AID_INET], upa, attr_l);
- free(upa->attr);
- free(upa->mpattr);
- free(upa);
+ while ((asp = TAILQ_FIRST(&peer->updates[AID_INET])) != NULL) {
+ if (TAILQ_EMPTY(&asp->updates)) {
+ TAILQ_REMOVE(&peer->updates[AID_INET], asp, update_l);
+ asp->flags &= ~F_ATTR_UPDATE;
peer->up_acnt--;
- /* XXX horrible hack,
- * if attr_len is 0, it is a EoR marker */
- if (attr_len == 0)
+ /* special return for EoR markers */
+ if (asp->flags & F_ATTR_EOR) {
+ path_destroy(asp);
return (-1);
+ }
} else
break;
+ }
+
+ if (len < 2)
+ fatalx("up_dump_attrnlri: buffer way too small");
- if (upa == NULL || upa->attr_len + MIN_PREFIX_LEN > len) {
+ if (asp == NULL || len < MIN_UPDATE_LEN)
+ goto done;
+ r = up_generate_attr(buf + 2, len - 2, peer, asp, AID_INET);
+ if (r == -1) {
/*
* either no packet or not enough space.
* The length field needs to be set to zero else it would be
* an invalid bgp update.
*/
+done:
bzero(buf, 2);
return (2);
}
/* first dump the 2-byte path attribute length */
- attr_len = htons(upa->attr_len);
+ attr_len = htons(r);
memcpy(buf, &attr_len, 2);
wpos = 2;
-
- /* then the path attributes themselves */
- memcpy(buf + wpos, upa->attr, upa->attr_len);
- wpos += upa->attr_len;
+ /* then skip over the already dumped path attributes themselves */
+ wpos += r;
/* last but not least dump the nlri */
- r = up_dump_prefix(buf + wpos, len - wpos, &upa->prefix_h, peer);
+ r = up_dump_prefix(buf + wpos, len - wpos, &asp->updates, peer);
wpos += r;
/* now check if all prefixes were written */
- if (TAILQ_EMPTY(&upa->prefix_h)) {
- if (RB_REMOVE(uptree_attr, &peer->up_attrs, upa) == NULL)
- log_warnx("dequeuing update failed.");
- TAILQ_REMOVE(&peer->updates[AID_INET], upa, attr_l);
- free(upa->attr);
- free(upa->mpattr);
- free(upa);
+ if (TAILQ_EMPTY(&asp->updates)) {
+ TAILQ_REMOVE(&peer->updates[AID_INET], asp, update_l);
+ asp->flags &= ~F_ATTR_UPDATE;
peer->up_acnt--;
}
return (wpos);
}
-u_char *
-up_dump_mp_unreach(u_char *buf, u_int16_t *len, struct rde_peer *peer,
- u_int8_t aid)
+int
+up_dump_mp_unreach(u_char *buf, int len, struct rde_peer *peer, u_int8_t aid)
{
- int wpos;
- u_int16_t datalen, tmp;
- u_int16_t attrlen = 2; /* attribute header (without len) */
- u_int8_t flags = ATTR_OPTIONAL, safi;
-
- /*
- * reserve space for withdraw len, attr len, the attribute header
- * and the mp attribute header
- */
- wpos = 2 + 2 + 4 + 3;
+ u_char *attrbuf;
+ int wpos, r;
+ u_int16_t attr_len, tmp;
- if (*len < wpos)
- return (NULL);
-
- datalen = up_dump_prefix(buf + wpos, *len - wpos,
- &peer->withdraws[aid], peer);
- if (datalen == 0)
- return (NULL);
+ if (len < MIN_UPDATE_LEN || TAILQ_EMPTY(&peer->withdraws[aid]))
+ return (-1);
- datalen += 3; /* afi + safi */
+ /* reserve space for withdraw len, attr len */
+ wpos = 2 + 2;
+ attrbuf = buf + wpos;
+
+ /* attribute header, defaulting to extended length one */
+ attrbuf[0] = ATTR_OPTIONAL | ATTR_EXTLEN;
+ attrbuf[1] = ATTR_MP_UNREACH_NLRI;
+ wpos += 4;
- /* prepend header, need to do it reverse */
- /* safi & afi */
- if (aid2afi(aid, &tmp, &safi))
+ /* afi & safi */
+ if (aid2afi(aid, &tmp, buf + wpos + 2))
fatalx("up_dump_mp_unreach: bad AID");
- buf[--wpos] = safi;
- wpos -= sizeof(u_int16_t);
tmp = htons(tmp);
memcpy(buf + wpos, &tmp, sizeof(u_int16_t));
+ wpos += 3;
- /* attribute length */
- if (datalen > 255) {
- attrlen += 2 + datalen;
- flags |= ATTR_EXTLEN;
- wpos -= sizeof(u_int16_t);
- tmp = htons(datalen);
- memcpy(buf + wpos, &tmp, sizeof(u_int16_t));
- } else {
- attrlen += 1 + datalen;
- buf[--wpos] = (u_char)datalen;
- }
-
- /* mp attribute */
- buf[--wpos] = (u_char)ATTR_MP_UNREACH_NLRI;
- buf[--wpos] = flags;
+ r = up_dump_prefix(buf + wpos, len - wpos, &peer->withdraws[aid], peer);
+ if (r == 0)
+ return (-1);
+ wpos += r;
+ attr_len = r + 3; /* prefixes + afi & safi */
/* attribute length */
- wpos -= sizeof(u_int16_t);
- tmp = htons(attrlen);
- memcpy(buf + wpos, &tmp, sizeof(u_int16_t));
-
- /* no IPv4 withdraws */
- wpos -= sizeof(u_int16_t);
- bzero(buf + wpos, sizeof(u_int16_t));
-
- if (wpos < 0)
- fatalx("up_dump_mp_unreach: buffer underflow");
+ attr_len = htons(attr_len);
+ memcpy(attrbuf + 2, &attr_len, sizeof(attr_len));
- /* total length includes the two 2-bytes length fields. */
- *len = attrlen + 2 * sizeof(u_int16_t);
+ /* write length fields */
+ bzero(buf, sizeof(u_int16_t)); /* withdrawn routes len */
+ attr_len = htons(wpos - 4);
+ memcpy(buf + 2, &attr_len, sizeof(attr_len));
- return (buf + wpos);
+ return (wpos);
}
int
-up_dump_mp_reach(u_char *buf, u_int16_t *len, struct rde_peer *peer,
- u_int8_t aid)
+up_dump_mp_reach(u_char *buf, int len, struct rde_peer *peer, u_int8_t aid)
{
- struct update_attr *upa;
- int wpos;
- u_int16_t attr_len, datalen, tmp;
- u_int8_t flags = ATTR_OPTIONAL;
+ struct rde_aspath *asp;
+ int r, wpos;
+ u_int16_t attr_len;
/*
* It is possible that a queued path attribute has no nlri prefix.
* Ignore and remove those path attributes.
*/
- while ((upa = TAILQ_FIRST(&peer->updates[aid])) != NULL)
- if (TAILQ_EMPTY(&upa->prefix_h)) {
- attr_len = upa->attr_len;
- if (RB_REMOVE(uptree_attr, &peer->up_attrs,
- upa) == NULL)
- log_warnx("dequeuing update failed.");
- TAILQ_REMOVE(&peer->updates[aid], upa, attr_l);
- free(upa->attr);
- free(upa->mpattr);
- free(upa);
+ while ((asp = TAILQ_FIRST(&peer->updates[aid])) != NULL) {
+ if (TAILQ_EMPTY(&asp->updates)) {
+ TAILQ_REMOVE(&peer->updates[aid], asp, update_l);
+ asp->flags &= ~F_ATTR_UPDATE;
peer->up_acnt--;
- /* XXX horrible hack,
- * if attr_len is 0, it is a EoR marker */
- if (attr_len == 0)
+ /* special return for EoR markers */
+ if (asp->flags & F_ATTR_EOR) {
+ path_destroy(asp);
return (-1);
+ }
} else
break;
+ }
- if (upa == NULL)
+ if (asp == NULL || len < MIN_UPDATE_LEN)
return (-2);
- /*
- * reserve space for attr len, the attributes, the
- * mp attribute and the attribute header
- */
- wpos = 2 + 2 + upa->attr_len + 4 + upa->mpattr_len;
- if (*len < wpos)
- return (-2);
+ wpos = 4; /* reserve space for length fields */
- datalen = up_dump_prefix(buf + wpos, *len - wpos,
- &upa->prefix_h, peer);
- if (datalen == 0)
+ /* write regular path attributes */
+ r = up_generate_attr(buf + wpos, len + wpos, peer, asp, aid);
+ if (r == -1)
return (-2);
+ wpos += r;
- if (upa->mpattr_len == 0 || upa->mpattr == NULL)
- fatalx("mulitprotocol update without MP attrs");
-
- datalen += upa->mpattr_len;
- wpos -= upa->mpattr_len;
- memcpy(buf + wpos, upa->mpattr, upa->mpattr_len);
-
- if (datalen > 255) {
- wpos -= 2;
- tmp = htons(datalen);
- memcpy(buf + wpos, &tmp, sizeof(tmp));
- datalen += 4;
- flags |= ATTR_EXTLEN;
- } else {
- buf[--wpos] = (u_char)datalen;
- datalen += 3;
- }
- buf[--wpos] = (u_char)ATTR_MP_REACH_NLRI;
- buf[--wpos] = flags;
-
- datalen += upa->attr_len;
- wpos -= upa->attr_len;
- memcpy(buf + wpos, upa->attr, upa->attr_len);
-
- if (wpos < 4)
- fatalx("Grrr, mp_reach buffer fucked up");
-
- wpos -= 2;
- tmp = htons(datalen);
- memcpy(buf + wpos, &tmp, sizeof(tmp));
+ /* write mp attribute */
+ r = up_generate_mp_reach(buf + wpos, len - wpos, peer, asp, aid);
+ if (r == -1)
+ return (-2);
+ wpos += r;
- wpos -= 2;
- bzero(buf + wpos, 2);
+ /* write length fields */
+ bzero(buf, sizeof(u_int16_t)); /* withdrawn routes len */
+ attr_len = htons(wpos - 4);
+ memcpy(buf + 2, &attr_len, sizeof(attr_len));
/* now check if all prefixes were written */
- if (TAILQ_EMPTY(&upa->prefix_h)) {
- if (RB_REMOVE(uptree_attr, &peer->up_attrs, upa) == NULL)
- log_warnx("dequeuing update failed.");
- TAILQ_REMOVE(&peer->updates[aid], upa, attr_l);
- free(upa->attr);
- free(upa->mpattr);
- free(upa);
+ if (TAILQ_EMPTY(&asp->updates)) {
+ TAILQ_REMOVE(&peer->updates[aid], asp, update_l);
+ asp->flags &= ~F_ATTR_UPDATE;
peer->up_acnt--;
}
- *len = datalen + 4;
return (wpos);
}