Please test this diff whereever possible and make sure it does not cause troubles with certain peers for you.
This is the first bit of support of graceful restart. The diff only implements the procedures for the receiving speaker (aka the box that was not restarted). So bgpd will not preserve its rouing table accross restarts (that's for later) but will do the handling when a GR capable neighbor restarts. Note: bgpd will not announce the graceful restart capability by default. Use "announce restart yes" to turn it on. I will probably commit this later this week so you have been warned :) -- :wq Claudio Index: bgpctl/bgpctl.c =================================================================== RCS file: /cvs/src/usr.sbin/bgpctl/bgpctl.c,v retrieving revision 1.161 diff -u -p -r1.161 bgpctl.c --- bgpctl/bgpctl.c 2 Sep 2010 14:03:21 -0000 1.161 +++ bgpctl/bgpctl.c 18 Sep 2011 08:44:00 -0000 @@ -51,6 +51,7 @@ int show_summary_terse_msg(struct imsg int show_neighbor_terse(struct imsg *); int show_neighbor_msg(struct imsg *, enum neighbor_views); void print_neighbor_capa_mp(struct peer *); +void print_neighbor_capa_restart(struct peer *); void print_neighbor_msgstats(struct peer *); void print_timer(const char *, time_t); static char *fmt_timeframe(time_t t); @@ -157,7 +158,7 @@ main(int argc, char *argv[]) case NONE: case IRRFILTER: usage(); - /* not reached */ + /* NOTREACHED */ case SHOW: case SHOW_SUMMARY: imsg_compose(ibuf, IMSG_CTL_SHOW_NEIGHBOR, 0, 0, -1, NULL, 0); @@ -596,7 +597,7 @@ show_neighbor_msg(struct imsg *imsg, enu if (p->capa.peer.mp[i]) hascapamp = 1; if (hascapamp || p->capa.peer.refresh || - p->capa.peer.restart || p->capa.peer.as4byte) { + p->capa.peer.grestart.restart || p->capa.peer.as4byte) { printf(" Neighbor capabilities:\n"); if (hascapamp) { printf(" Multiprotocol extensions: "); @@ -605,8 +606,11 @@ show_neighbor_msg(struct imsg *imsg, enu } if (p->capa.peer.refresh) printf(" Route Refresh\n"); - if (p->capa.peer.restart) - printf(" Graceful Restart\n"); + if (p->capa.peer.grestart.restart) { + printf(" Graceful Restart"); + print_neighbor_capa_restart(p); + printf("\n"); + } if (p->capa.peer.as4byte) printf(" 4-byte AS numbers\n"); } @@ -669,6 +673,28 @@ print_neighbor_capa_mp(struct peer *p) for (i = 0, comma = 0; i < AID_MAX; i++) if (p->capa.peer.mp[i]) { printf("%s%s", comma ? ", " : "", aid2str(i)); + comma = 1; + } +} + +void +print_neighbor_capa_restart(struct peer *p) +{ + int comma; + u_int8_t i; + + if (p->capa.peer.grestart.timeout) + printf(": Timeout: %d, ", p->capa.peer.grestart.timeout); + for (i = 0, comma = 0; i < AID_MAX; i++) + if (p->capa.peer.grestart.flags[i] & CAPA_GR_PRESENT) { + if (!comma && + p->capa.peer.grestart.flags[i] & CAPA_GR_RESTART) + printf("restarted, "); + if (comma) + printf(", "); + printf("%s", aid2str(i)); + if (p->capa.peer.grestart.flags[i] & CAPA_GR_FORWARD) + printf(" (preserved)"); comma = 1; } } Index: bgpd/bgpd.h =================================================================== RCS file: /cvs/src/usr.sbin/bgpd/bgpd.h,v retrieving revision 1.264 diff -u -p -r1.264 bgpd.h --- bgpd/bgpd.h 1 May 2011 12:56:04 -0000 1.264 +++ bgpd/bgpd.h 5 May 2011 17:45:50 -0000 @@ -252,12 +252,24 @@ struct peer_auth { }; struct capabilities { - int8_t mp[AID_MAX]; /* multiprotocol extensions, RFC 4760 */ - int8_t refresh; /* route refresh, RFC 2918 */ - int8_t restart; /* graceful restart, RFC 4724 */ - int8_t as4byte; /* draft-ietf-idr-as4bytes-13 */ + struct { + int16_t timeout; /* graceful restart timeout */ + int8_t flags[AID_MAX]; /* graceful restart per AID flags */ + int8_t restart; /* graceful restart, RFC 4724 */ + } grestart; + int8_t mp[AID_MAX]; /* multiprotocol extensions, RFC 4760 */ + int8_t refresh; /* route refresh, RFC 2918 */ + int8_t as4byte; /* 4-byte ASnum, RFC 4893 */ }; +#define CAPA_GR_PRESENT 0x01 +#define CAPA_GR_RESTART 0x02 +#define CAPA_GR_FORWARD 0x04 +#define CAPA_GR_RESTARTING 0x80 +#define CAPA_GR_TIMEMASK 0x0fff +#define CAPA_GR_R_FLAG 0x8000 +#define CAPA_GR_F_FLAG 0x80 + struct peer_config { struct bgpd_addr remote_addr; struct bgpd_addr local_addr; @@ -368,6 +380,9 @@ enum imsg_type { IMSG_SESSION_ADD, IMSG_SESSION_UP, IMSG_SESSION_DOWN, + IMSG_SESSION_STALE, + IMSG_SESSION_FLUSH, + IMSG_SESSION_RESTARTED, IMSG_MRT_OPEN, IMSG_MRT_REOPEN, IMSG_MRT_CLOSE, Index: bgpd/parse.y =================================================================== RCS file: /cvs/src/usr.sbin/bgpd/parse.y,v retrieving revision 1.260 diff -u -p -r1.260 parse.y --- bgpd/parse.y 17 Sep 2011 16:29:44 -0000 1.260 +++ bgpd/parse.y 17 Sep 2011 17:57:13 -0000 @@ -1038,7 +1038,7 @@ peeropts : REMOTEAS as4number { curpeer->conf.capabilities.refresh = $3; } | ANNOUNCE RESTART yesno { - curpeer->conf.capabilities.restart = $3; + curpeer->conf.capabilities.grestart.restart = $3; } | ANNOUNCE AS4BYTE yesno { curpeer->conf.capabilities.as4byte = $3; @@ -3019,7 +3019,7 @@ alloc_peer(void) for (i = 0; i < AID_MAX; i++) p->conf.capabilities.mp[i] = -1; p->conf.capabilities.refresh = 1; - p->conf.capabilities.restart = 0; + p->conf.capabilities.grestart.restart = 0; p->conf.capabilities.as4byte = 1; p->conf.local_as = conf->as; p->conf.local_short_as = conf->short_as; Index: bgpd/printconf.c =================================================================== RCS file: /cvs/src/usr.sbin/bgpd/printconf.c,v retrieving revision 1.86 diff -u -p -r1.86 printconf.c --- bgpd/printconf.c 17 Sep 2011 16:29:44 -0000 1.86 +++ bgpd/printconf.c 17 Sep 2011 17:57:13 -0000 @@ -377,7 +377,7 @@ print_peer(struct peer_config *p, struct printf("%s\tannounce capabilities no\n", c); if (p->capabilities.refresh == 0) printf("%s\tannounce refresh no\n", c); - if (p->capabilities.restart == 1) + if (p->capabilities.grestart.restart == 1) printf("%s\tannounce restart yes\n", c); if (p->capabilities.as4byte == 0) printf("%s\tannounce as4byte no\n", c); Index: bgpd/rde.c =================================================================== RCS file: /cvs/src/usr.sbin/bgpd/rde.c,v retrieving revision 1.309 diff -u -p -r1.309 rde.c --- bgpd/rde.c 17 Sep 2011 16:29:44 -0000 1.309 +++ bgpd/rde.c 17 Sep 2011 17:57:13 -0000 @@ -100,6 +100,9 @@ struct rde_peer *peer_add(u_int32_t, str struct rde_peer *peer_get(u_int32_t); void peer_up(u_int32_t, struct session_up *); void peer_down(u_int32_t); +void peer_flush(struct rde_peer *, u_int8_t); +void peer_stale(u_int32_t, u_int8_t); +void peer_recv_eor(struct rde_peer *, u_int8_t); void peer_dump(u_int32_t, u_int8_t); void peer_send_eor(struct rde_peer *, u_int8_t); @@ -402,6 +405,47 @@ rde_dispatch_imsg_session(struct imsgbuf case IMSG_SESSION_DOWN: peer_down(imsg.hdr.peerid); break; + case IMSG_SESSION_STALE: + if (imsg.hdr.len - IMSG_HEADER_SIZE != sizeof(aid)) { + log_warnx("rde_dispatch: wrong imsg len"); + break; + } + memcpy(&aid, imsg.data, sizeof(aid)); + if (aid >= AID_MAX) + fatalx("IMSG_SESSION_STALE: bad AID"); + peer_stale(imsg.hdr.peerid, aid); + break; + case IMSG_SESSION_FLUSH: + if (imsg.hdr.len - IMSG_HEADER_SIZE != sizeof(aid)) { + log_warnx("rde_dispatch: wrong imsg len"); + break; + } + memcpy(&aid, imsg.data, sizeof(aid)); + if (aid >= AID_MAX) + fatalx("IMSG_SESSION_FLUSH: bad AID"); + if ((peer = peer_get(imsg.hdr.peerid)) == NULL) { + log_warnx("rde_dispatch: unknown peer id %d", + imsg.hdr.peerid); + break; + } + peer_flush(peer, aid); + break; + case IMSG_SESSION_RESTARTED: + if (imsg.hdr.len - IMSG_HEADER_SIZE != sizeof(aid)) { + log_warnx("rde_dispatch: wrong imsg len"); + break; + } + memcpy(&aid, imsg.data, sizeof(aid)); + if (aid >= AID_MAX) + fatalx("IMSG_SESSION_RESTARTED: bad AID"); + if ((peer = peer_get(imsg.hdr.peerid)) == NULL) { + log_warnx("rde_dispatch: unknown peer id %d", + imsg.hdr.peerid); + break; + } + if (peer->staletime[aid]) + peer_flush(peer, aid); + break; case IMSG_REFRESH: if (imsg.hdr.len - IMSG_HEADER_SIZE != sizeof(aid)) { log_warnx("rde_dispatch: wrong imsg len"); @@ -971,6 +1015,10 @@ rde_update_dispatch(struct imsg *imsg) ERR_UPD_ATTRLIST, NULL, 0); return (-1); } + if (withdrawn_len == 0) { + /* EoR marker */ + peer_recv_eor(peer, AID_INET); + } return (0); } @@ -1001,6 +1049,11 @@ rde_update_dispatch(struct imsg *imsg) goto done; } + if ((asp->flags & ~F_ATTR_MP_UNREACH) == 0 && mplen == 0) { + /* EoR marker */ + peer_recv_eor(peer, aid); + } + switch (aid) { case AID_INET6: while (mplen > 0) { @@ -3136,6 +3189,50 @@ peer_down(u_int32_t id) free(peer); } +/* + * Flush all routes older then staletime. If staletime is 0 all routes will + * be flushed. + */ +void +peer_flush(struct rde_peer *peer, u_int8_t aid) +{ + struct rde_aspath *asp, *nasp; + + /* walk through per peer RIB list and remove all stale prefixes. */ + for (asp = LIST_FIRST(&peer->path_h); asp != NULL; asp = nasp) { + nasp = LIST_NEXT(asp, peer_l); + path_remove_stale(asp, aid); + } + + /* Deletions are performed in path_remove() */ + rde_send_pftable_commit(); + + /* flushed no need to keep staletime */ + peer->staletime[aid] = 0; +} + +void +peer_stale(u_int32_t id, u_int8_t aid) +{ + struct rde_peer *peer; + time_t now; + + peer = peer_get(id); + if (peer == NULL) { + log_warnx("peer_stale: unknown peer id %d", id); + return; + } + + if (peer->staletime[aid]) + peer_flush(peer, aid); + peer->staletime[aid] = now = time(NULL); + + /* make sure new prefixes start on a higher timestamp */ + do { + sleep(1); + } while (now < time(NULL)); +} + void peer_dump(u_int32_t id, u_int8_t aid) { @@ -3151,11 +3248,20 @@ peer_dump(u_int32_t id, u_int8_t aid) up_generate_default(rules_l, peer, aid); else rib_dump(&ribs[peer->ribid], rde_up_dump_upcall, peer, aid); - if (peer->capa.restart) + if (peer->capa.grestart.restart) up_generate_marker(peer, aid); } /* End-of-RIB marker, RFC 4724 */ +void +peer_recv_eor(struct rde_peer *peer, u_int8_t aid) +{ + /* First notify SE to remove possible race with the timeout. */ + if (imsg_compose(ibuf_se, IMSG_SESSION_RESTARTED, peer->conf.id, + 0, -1, &aid, sizeof(aid)) == -1) + fatal("imsg_compose error"); +} + void peer_send_eor(struct rde_peer *peer, u_int8_t aid) { Index: bgpd/rde.h =================================================================== RCS file: /cvs/src/usr.sbin/bgpd/rde.h,v retrieving revision 1.139 diff -u -p -r1.139 rde.h --- bgpd/rde.h 17 Sep 2011 16:29:44 -0000 1.139 +++ bgpd/rde.h 17 Sep 2011 17:57:13 -0000 @@ -59,6 +59,7 @@ struct rde_peer { struct uplist_attr updates[AID_MAX]; struct uplist_prefix withdraws[AID_MAX]; struct capabilities capa; + time_t staletime[AID_MAX]; u_int64_t prefix_rcvd_update; u_int64_t prefix_rcvd_withdraw; u_int64_t prefix_sent_update; @@ -385,6 +386,7 @@ int path_update(struct rib *, struct r int path_compare(struct rde_aspath *, struct rde_aspath *); struct rde_aspath *path_lookup(struct rde_aspath *, struct rde_peer *); void path_remove(struct rde_aspath *); +void path_remove_stale(struct rde_aspath *, u_int8_t); void path_destroy(struct rde_aspath *); int path_empty(struct rde_aspath *); struct rde_aspath *path_copy(struct rde_aspath *); Index: bgpd/rde_rib.c =================================================================== RCS file: /cvs/src/usr.sbin/bgpd/rde_rib.c,v retrieving revision 1.129 diff -u -p -r1.129 rde_rib.c --- bgpd/rde_rib.c 17 Sep 2011 16:29:44 -0000 1.129 +++ bgpd/rde_rib.c 17 Sep 2011 17:57:13 -0000 @@ -505,6 +505,36 @@ path_remove(struct rde_aspath *asp) } } +/* remove all stale routes or if staletime is 0 remove all routes for + a specified AID. */ +void +path_remove_stale(struct rde_aspath *asp, u_int8_t aid) +{ + struct prefix *p, *np; + time_t staletime; + + staletime = asp->peer->staletime[aid]; + for (p = LIST_FIRST(&asp->prefix_h); p != NULL; p = np) { + np = LIST_NEXT(p, path_l); + if (p->prefix->aid != aid) + continue; + + if (staletime && p->lastchange > staletime) + continue; + + if (asp->pftableid) { + struct bgpd_addr addr; + + pt_getaddr(p->prefix, &addr); + /* Commit is done in peer_flush() */ + rde_send_pftable(p->aspath->pftableid, &addr, + p->prefix->prefixlen, 1); + } + prefix_destroy(p); + } +} + + /* this function is only called by prefix_remove and path_remove */ void path_destroy(struct rde_aspath *asp) Index: bgpd/session.c =================================================================== RCS file: /cvs/src/usr.sbin/bgpd/session.c,v retrieving revision 1.319 diff -u -p -r1.319 session.c --- bgpd/session.c 9 Jul 2011 02:51:18 -0000 1.319 +++ bgpd/session.c 16 Sep 2011 11:45:15 -0000 @@ -69,6 +69,7 @@ void session_tcp_established(struct peer void session_capa_ann_none(struct peer *); int session_capa_add(struct ibuf *, u_int8_t, u_int8_t); int session_capa_add_mp(struct ibuf *, u_int8_t); +int session_capa_add_gr(struct peer *, struct ibuf *, u_int8_t); struct bgp_msg *session_newmsg(enum msg_type, u_int16_t); int session_sendmsg(struct bgp_msg *, struct peer *); void session_open(struct peer *); @@ -77,6 +78,8 @@ void session_update(u_int32_t, void *, s void session_notification(struct peer *, u_int8_t, u_int8_t, void *, ssize_t); void session_rrefresh(struct peer *, u_int8_t); +int session_graceful_restart(struct peer *); +int session_graceful_stop(struct peer *, const char *); int session_dispatch_msg(struct pollfd *, struct peer *); int parse_header(struct peer *, u_char *, u_int16_t *, u_int8_t *); int parse_open(struct peer *); @@ -427,6 +430,9 @@ session_main(int pipe_m2s[2], int pipe_s p->state == STATE_ESTABLISHED) session_demote(p, -1); break; + case Timer_RestartTimeout: + session_graceful_stop(p, "timed out"); + break; default: fatalx("King Bula lost in time"); } @@ -914,14 +920,24 @@ change_state(struct peer *peer, enum ses free(peer->rbuf); peer->rbuf = NULL; bzero(&peer->capa.peer, sizeof(peer->capa.peer)); - if (peer->state == STATE_ESTABLISHED) - session_down(peer); + if (event != EVNT_STOP) { timer_set(peer, Timer_IdleHold, peer->IdleHoldTime); if (event != EVNT_NONE && peer->IdleHoldTime < MAX_IDLE_HOLD/2) peer->IdleHoldTime *= 2; } + if (peer->state == STATE_ESTABLISHED) { + if (peer->capa.neg.grestart.restart == 2 && + (event == EVNT_CON_CLOSED || + event == EVNT_CON_FATAL)) { + //session_graceful_restart(peer, event); + /* don't punish graceful restart */ + timer_set(peer, Timer_IdleHold, 0); + peer->IdleHoldTime /= 2; + } else + session_down(peer); + } if (peer->state == STATE_NONE || peer->state == STATE_ESTABLISHED) { /* initialize capability negotiation structures */ @@ -1002,6 +1018,7 @@ session_accept(int listenfd) } } +open: if (p->conf.auth.method != AUTH_NONE && sysdep.no_pfkey) { log_peer_warnx(&p->conf, "ipsec or md5sig configured but not available"); @@ -1034,6 +1051,16 @@ session_accept(int listenfd) } session_socket_blockmode(connfd, BM_NONBLOCK); bgp_fsm(p, EVNT_CON_OPEN); + return; + } else if (p != NULL && p->state == STATE_ESTABLISHED) { + /* do the graceful restart dance */ + session_close_connection(p); + session_graceful_restart(p); /* signal the RDE, start timers */ + + /* XXX reinit peer */ + change_state(p, STATE_CONNECT, EVNT_CON_CLOSED); + /* do open dance as in the case above */ + goto open; } else { log_conn_attempt(p, (struct sockaddr *)&cliaddr); close(connfd); @@ -1258,6 +1285,30 @@ session_capa_add_mp(struct ibuf *buf, u_ return (errs); } +int +session_capa_add_gr(struct peer *p, struct ibuf *b, u_int8_t aid) +{ + u_int errs = 0; + u_int16_t afi; + u_int8_t flags, safi; + + if (aid2afi(aid, &afi, &safi)) { + log_warn("session_capa_add_gr: bad AID"); + return (1); + } + if (p->capa.neg.grestart.flags[aid] & CAPA_GR_RESTARTING) + flags = CAPA_GR_F_FLAG; + else + flags = 0; + + afi = htons(afi); + errs += ibuf_add(b, &afi, sizeof(afi)); + errs += ibuf_add(b, &safi, sizeof(safi)); + errs += ibuf_add(b, &flags, sizeof(flags)); + + return (errs); +} + struct bgp_msg * session_newmsg(enum msg_type msgtype, u_int16_t len) { @@ -1318,6 +1369,7 @@ session_open(struct peer *p) u_int16_t len; u_int8_t i, op_type, optparamlen = 0; int errs = 0; + int mpcapa = 0; if ((opb = ibuf_dynamic(0, UCHAR_MAX - sizeof(op_type) - @@ -1337,14 +1389,44 @@ session_open(struct peer *p) if (p->capa.ann.refresh) /* no data */ errs += session_capa_add(opb, CAPA_REFRESH, 0); - /* End-of-RIB marker, RFC 4724 */ - if (p->capa.ann.restart) { /* 2 bytes data */ - u_char c[2]; - - c[0] = 0x80; /* we're always restarting */ - c[1] = 0; - errs += session_capa_add(opb, CAPA_RESTART, 2); - errs += ibuf_add(opb, &c, 2); + /* graceful restart and End-of-RIB marker, RFC 4724 */ + if (p->capa.ann.grestart.restart) { + int rst = 0; + u_int16_t hdr; + u_int8_t grlen; + + if (mpcapa) { + grlen = 2 + 4 * mpcapa; + for (i = 0; i < AID_MAX; i++) { + if (p->capa.neg.grestart.flags[i] & + CAPA_GR_RESTARTING) + rst++; + } + } else { /* AID_INET */ + grlen = 2 + 4; + if (p->capa.neg.grestart.flags[AID_INET] & + CAPA_GR_RESTARTING) + rst++; + } + + hdr = p->conf.holdtime; /* default timeout */ + /* if client does graceful restart don't set R flag */ + if (!rst) + hdr |= CAPA_GR_R_FLAG; + hdr = htons(hdr); + + errs += session_capa_add(opb, CAPA_RESTART, grlen); + errs += ibuf_add(opb, &hdr, sizeof(hdr)); + + if (mpcapa) { + for (i = 0; i < AID_MAX; i++) { + if (p->capa.ann.mp[i]) { + errs += session_capa_add_gr(p, opb, i); + } + } + } else { /* AID_INET */ + errs += session_capa_add_gr(p, opb, AID_INET); + } } /* 4-bytes AS numbers, draft-ietf-idr-as4bytes-13 */ @@ -1551,6 +1633,46 @@ session_rrefresh(struct peer *p, u_int8_ } int +session_graceful_restart(struct peer *p) +{ + u_int8_t i; + + timer_set(p, Timer_RestartTimeout, p->capa.neg.grestart.timeout); + + for (i = 0; i < AID_MAX; i++) { + if (p->capa.neg.grestart.flags[i] & CAPA_GR_PRESENT) { + if (imsg_compose(ibuf_rde, IMSG_SESSION_STALE, + p->conf.id, 0, -1, &i, sizeof(i)) == -1) + return (-1); + p->capa.neg.grestart.flags[i] |= CAPA_GR_RESTARTING; + } else if (p->capa.neg.mp[i]) { + if (imsg_compose(ibuf_rde, IMSG_SESSION_FLUSH, + p->conf.id, 0, -1, &i, sizeof(i)) == -1) + return (-1); + } + } + return (0); +} + +int +session_graceful_stop(struct peer *p, const char *msg) +{ + u_int8_t i; + + for (i = 0; i < AID_MAX; i++) { + if (p->capa.neg.grestart.flags[i] & CAPA_GR_RESTARTING) { + log_peer_warnx(&p->conf, + "graceful restart of %s %s", aid2str(i), msg); + if (imsg_compose(ibuf_rde, IMSG_SESSION_FLUSH, + p->conf.id, 0, -1, &i, sizeof(i)) == -1) + return (-1); + p->capa.neg.grestart.flags[i] &= ~CAPA_GR_RESTARTING; + } + } + return (0); +} + +int session_dispatch_msg(struct pollfd *pfd, struct peer *p) { ssize_t n, rpos, av, left; @@ -2114,7 +2236,7 @@ parse_notification(struct peer *peer) "disabling route refresh capability"); break; case CAPA_RESTART: - peer->capa.ann.restart = 0; + peer->capa.ann.grestart.restart = 0; log_peer_warnx(&peer->conf, "disabling restart capability"); break; @@ -2152,10 +2274,13 @@ parse_capabilities(struct peer *peer, u_ u_int32_t remote_as; u_int16_t len; u_int16_t afi; + u_int16_t gr_header; u_int8_t safi; u_int8_t aid; + u_int8_t gr_flags; u_int8_t capa_code; u_int8_t capa_len; + u_int8_t i; len = dlen; while (len > 0) { @@ -2207,8 +2332,50 @@ parse_capabilities(struct peer *peer, u_ peer->capa.peer.refresh = 1; break; case CAPA_RESTART: - peer->capa.peer.restart = 1; - /* we don't care about the further restart capas yet */ + if (capa_len == 2) { + /* peer only supports EoR marker */ + peer->capa.peer.grestart.restart = 1; + peer->capa.peer.grestart.timeout = 0; + break; + } else if (capa_len % 4 != 2) { + log_peer_warnx(&peer->conf, + "parse_capabilities: " + "expect len 2 + x*4, len is %u", capa_len); + return (-1); + } + + memcpy(&gr_header, capa_val, sizeof(gr_header)); + gr_header = ntohs(gr_header); + peer->capa.peer.grestart.timeout = + gr_header & CAPA_GR_TIMEMASK; + if (peer->capa.peer.grestart.timeout == 0) { + log_peer_warnx(&peer->conf, + "graceful restart timeout is zero"); + return (-1); + } + + for (i = 2; i <= capa_len - 4; i += 4) { + memcpy(&afi, capa_val + i, sizeof(afi)); + afi = ntohs(afi); + memcpy(&safi, capa_val + i + 2, sizeof(safi)); + if (afi2aid(afi, safi, &aid) == -1) { + log_peer_warnx(&peer->conf, + "parse_capabilities: AFI %u, " + "safi %u unknown", afi, safi); + return (-1); + } + memcpy(&gr_flags, capa_val + i + 3, + sizeof(gr_flags)); + peer->capa.peer.grestart.flags[i] |= + CAPA_GR_PRESENT; + if (gr_flags & CAPA_GR_F_FLAG) + peer->capa.peer.grestart.flags[i] |= + CAPA_GR_FORWARD; + if (gr_header & CAPA_GR_R_FLAG) + peer->capa.peer.grestart.flags[i] |= + CAPA_GR_RESTART; + peer->capa.peer.grestart.restart = 2; + } break; case CAPA_AS4BYTE: if (capa_len != 4) { @@ -2255,7 +2422,35 @@ capa_neg_calc(struct peer *p) if (!hasmp) p->capa.neg.mp[AID_INET] = 1; - p->capa.neg.restart = p->capa.peer.restart; + /* + * graceful restart: only the peer capabilities are of interest here. + * It is necessary to compare the new values with the previous ones + * and act acordingly. AFI/SAFI that are not part in the MP capability + * are treated as not being present. + */ + + for (i = 0; i < AID_MAX; i++) { + if (p->capa.peer.grestart.flags[i] & CAPA_GR_PRESENT && + p->capa.neg.mp[i] == 0) + p->capa.peer.grestart.flags[i] = 0; /* disable */ + if (p->capa.neg.grestart.flags[i] & CAPA_GR_RESTARTING) + if (p->capa.peer.grestart.flags[i] & CAPA_GR_FORWARD) { + p->capa.neg.grestart.flags[i] = + p->capa.peer.grestart.flags[i] | + CAPA_GR_RESTARTING; + } else { + if (imsg_compose(ibuf_rde, IMSG_SESSION_FLUSH, + p->conf.id, 0, -1, &i, sizeof(i)) == -1) + return (-1); + p->capa.neg.grestart.flags[i] = + p->capa.peer.grestart.flags[i]; + } + else + p->capa.neg.grestart.flags[i] = + p->capa.peer.grestart.flags[i]; + } + p->capa.neg.grestart.timeout = p->capa.peer.grestart.timeout; + p->capa.neg.grestart.restart = p->capa.peer.grestart.restart; return (0); } @@ -2273,7 +2468,7 @@ session_dispatch_imsg(struct imsgbuf *ib u_char *data; enum reconf_action reconf; int n, depend_ok, restricted; - u_int8_t errcode, subcode; + u_int8_t aid, errcode, subcode; if ((n = imsg_read(ibuf)) == -1) fatal("session_dispatch_imsg: imsg_read error"); @@ -2583,6 +2778,27 @@ session_dispatch_imsg(struct imsgbuf *ib bgp_fsm(p, EVNT_CON_FATAL); break; } + break; + case IMSG_SESSION_RESTARTED: + if (idx != PFD_PIPE_ROUTE) + fatalx("update request not from RDE"); + if (imsg.hdr.len < IMSG_HEADER_SIZE + sizeof(aid)) { + log_warnx("RDE sent invalid restart msg"); + break; + } + if ((p = getpeerbyid(imsg.hdr.peerid)) == NULL) { + log_warnx("no such peer: id=%u", + imsg.hdr.peerid); + break; + } + memcpy(&aid, imsg.data, sizeof(aid)); + if (aid >= AID_MAX) + fatalx("IMSG_SESSION_RESTARTED: bad AID"); + p->capa.peer.grestart.flags[aid] &= ~CAPA_GR_RESTARTING; + /* signal back to RDE */ + if (imsg_compose(ibuf_rde, IMSG_SESSION_RESTARTED, + imsg.hdr.peerid, 0, -1, &aid, sizeof(aid)) == -1) + fatal("imsg_compose: IMSG_SESSION_RESTARTED"); break; default: break; Index: bgpd/session.h =================================================================== RCS file: /cvs/src/usr.sbin/bgpd/session.h,v retrieving revision 1.111 diff -u -p -r1.111 session.h --- bgpd/session.h 9 Dec 2010 13:50:41 -0000 1.111 +++ bgpd/session.h 8 Jan 2011 21:25:30 -0000 @@ -179,6 +179,7 @@ enum Timer { Timer_IdleHold, Timer_IdleHoldReset, Timer_CarpUndemote, + Timer_RestartTimeout, Timer_Max };