On Tue, Mar 28, 2023 at 11:48:07AM +0200, Claudio Jeker wrote:
> When an RTR session updates the data it happens between CACHE_RESPONSE and
> END_OF_DATA PDUs. When an END_OF_DATA PDU is received the various sources
> are merged into one table and sent to the RDE.
> Now since bgpd supports multiple RTR servers it is possible that two
> servers run updates roughly at the same time. In that case the first
> END_OF_DATA PDU results in a table recalculation of intermediate data (at
> least from the point of the other session).
> To prevent this from happening introduce a semaphore that prevents the
> rtr_recalc() from happening if there are other active RTR sessions.
> On top of this add a 60sec timeout that prevents RTR sessions from hogging
> the semaphore for too long.
>
> The static table (roa-set, aspa-set) are also handled by the rtr porcess
> but config reloads happen work on a 2nd table which is switched into place
> at the end of the reload process so there is never a case were intermediate
> data is visible to rtr_recalc(). Therefore there is no need to use the
> semaphore there.
This also makes complete sense. Can't spot anything wrong with the
logic.
ok tb
>
> --
> :wq Claudio
>
> Index: bgpd.h
> ===================================================================
> RCS file: /cvs/src/usr.sbin/bgpd/bgpd.h,v
> retrieving revision 1.465
> diff -u -p -r1.465 bgpd.h
> --- bgpd.h 13 Mar 2023 16:52:41 -0000 1.465
> +++ bgpd.h 20 Mar 2023 09:23:47 -0000
> @@ -1638,6 +1638,7 @@ static const char * const timernames[] =
> "RTR RefreshTimer",
> "RTR RetryTimer",
> "RTR ExpireTimer",
> + "RTR ActiveTimer",
> ""
> };
>
> Index: rtr.c
> ===================================================================
> RCS file: /cvs/src/usr.sbin/bgpd/rtr.c,v
> retrieving revision 1.12
> diff -u -p -r1.12 rtr.c
> --- rtr.c 9 Mar 2023 17:21:21 -0000 1.12
> +++ rtr.c 20 Mar 2023 09:41:22 -0000
> @@ -40,6 +40,7 @@ static struct imsgbuf *ibuf_main;
> static struct imsgbuf *ibuf_rde;
> static struct bgpd_config *conf, *nconf;
> static struct timer_head expire_timer;
> +static int rtr_recalc_semaphore;
>
> static void
> rtr_sighdlr(int sig)
> @@ -58,6 +59,20 @@ rtr_sighdlr(int sig)
>
> #define EXPIRE_TIMEOUT 300
>
> +void
> +rtr_sem_acquire(int cnt)
> +{
> + rtr_recalc_semaphore += cnt;
> +}
> +
> +void
> +rtr_sem_release(int cnt)
> +{
> + rtr_recalc_semaphore -= cnt;
> + if (rtr_recalc_semaphore < 0)
> + fatalx("rtr recalc semaphore underflow");
> +}
> +
> /*
> * Every EXPIRE_TIMEOUT seconds traverse the static roa-set table and expire
> * all elements where the expires timestamp is smaller or equal to now.
> @@ -541,6 +556,9 @@ rtr_recalc(void)
> struct roa *roa, *nr;
> struct aspa_set *aspa;
> struct aspa_prep ap = { 0 };
> +
> + if (rtr_recalc_semaphore > 0)
> + return;
>
> RB_INIT(&rt);
> RB_INIT(&at);
> Index: rtr_proto.c
> ===================================================================
> RCS file: /cvs/src/usr.sbin/bgpd/rtr_proto.c,v
> retrieving revision 1.15
> diff -u -p -r1.15 rtr_proto.c
> --- rtr_proto.c 17 Mar 2023 11:14:10 -0000 1.15
> +++ rtr_proto.c 20 Mar 2023 09:46:54 -0000
> @@ -40,6 +40,7 @@ struct rtr_header {
> #define RTR_DEFAULT_REFRESH 3600
> #define RTR_DEFAULT_RETRY 600
> #define RTR_DEFAULT_EXPIRE 7200
> +#define RTR_DEFAULT_ACTIVE 60
>
> enum rtr_pdu_type {
> SERIAL_NOTIFY = 0,
> @@ -99,6 +100,7 @@ enum rtr_event {
> RTR_EVNT_TIMER_REFRESH,
> RTR_EVNT_TIMER_RETRY,
> RTR_EVNT_TIMER_EXPIRE,
> + RTR_EVNT_TIMER_ACTIVE,
> RTR_EVNT_SEND_ERROR,
> RTR_EVNT_SERIAL_NOTIFY,
> RTR_EVNT_CACHE_RESPONSE,
> @@ -116,6 +118,7 @@ static const char *rtr_eventnames[] = {
> "refresh timer expired",
> "retry timer expired",
> "expire timer expired",
> + "activity timer expired",
> "sent error",
> "serial notify received",
> "cache response received",
> @@ -157,8 +160,10 @@ struct rtr_session {
> uint32_t refresh;
> uint32_t retry;
> uint32_t expire;
> + uint32_t active;
> int session_id;
> int fd;
> + int active_lock;
> enum rtr_state state;
> enum reconf_action reconf_action;
> enum rtr_error last_sent_error;
> @@ -1033,18 +1038,30 @@ rtr_fsm(struct rtr_session *rs, enum rtr
> rtr_reset_cache(rs);
> rtr_recalc();
> break;
> + case RTR_EVNT_TIMER_ACTIVE:
> + log_warnx("rtr %s: activity timer fired", log_rtr(rs));
> + rtr_sem_release(rs->active_lock);
> + rtr_recalc();
> + rs->active_lock = 0;
> + break;
> case RTR_EVNT_CACHE_RESPONSE:
> rs->state = RTR_STATE_ACTIVE;
> timer_stop(&rs->timers, Timer_Rtr_Refresh);
> timer_stop(&rs->timers, Timer_Rtr_Retry);
> - /* XXX start timer to limit active time */
> + timer_set(&rs->timers, Timer_Rtr_Active, rs->active);
> + /* prevent rtr_recalc from running while active */
> + rs->active_lock = 1;
> + rtr_sem_acquire(rs->active_lock);
> break;
> case RTR_EVNT_END_OF_DATA:
> /* start refresh and expire timers */
> timer_set(&rs->timers, Timer_Rtr_Refresh, rs->refresh);
> timer_set(&rs->timers, Timer_Rtr_Expire, rs->expire);
> + timer_stop(&rs->timers, Timer_Rtr_Active);
> rs->state = RTR_STATE_IDLE;
> + rtr_sem_release(rs->active_lock);
> rtr_recalc();
> + rs->active_lock = 0;
> break;
> case RTR_EVNT_CACHE_RESET:
> rtr_reset_cache(rs);
> @@ -1164,6 +1181,9 @@ rtr_check_events(struct pollfd *pfds, si
> case Timer_Rtr_Expire:
> rtr_fsm(rs, RTR_EVNT_TIMER_EXPIRE);
> break;
> + case Timer_Rtr_Active:
> + rtr_fsm(rs, RTR_EVNT_TIMER_ACTIVE);
> + break;
> default:
> fatalx("King Bula lost in time");
> }
> @@ -1237,6 +1257,7 @@ rtr_new(uint32_t id, char *descr)
> rs->refresh = RTR_DEFAULT_REFRESH;
> rs->retry = RTR_DEFAULT_RETRY;
> rs->expire = RTR_DEFAULT_EXPIRE;
> + rs->active = RTR_DEFAULT_ACTIVE;
> rs->state = RTR_STATE_CLOSED;
> rs->reconf_action = RECONF_REINIT;
> rs->last_recv_error = NO_ERROR;
> Index: session.h
> ===================================================================
> RCS file: /cvs/src/usr.sbin/bgpd/session.h,v
> retrieving revision 1.161
> diff -u -p -r1.161 session.h
> --- session.h 9 Mar 2023 17:21:21 -0000 1.161
> +++ session.h 20 Mar 2023 09:42:04 -0000
> @@ -201,6 +201,7 @@ enum Timer {
> Timer_Rtr_Refresh,
> Timer_Rtr_Retry,
> Timer_Rtr_Expire,
> + Timer_Rtr_Active,
> Timer_Max
> };
>
> @@ -334,6 +335,8 @@ void rtr_shutdown(void);
> void rtr_show(struct rtr_session *, pid_t);
>
> /* rtr.c */
> +void rtr_sem_acquire(int);
> +void rtr_sem_release(int);
> void rtr_roa_insert(struct roa_tree *, struct roa *);
> void rtr_aspa_insert(struct aspa_tree *, struct aspa_set *);
> void rtr_main(int, int);
>