From: Vipin Kumar <[email protected]>
ISSUE:
During startup, BGP update prefix packing wasnt optimal and route installation
was found to be spread over.
SOLUTION:
With this patch, update-delay post processing is serialized to achieve:
a. better peer update packing
(which helps in reducing total number of BGP update packets)
b. installation of the resulting routes in zebra as close to each others
as possible.
(which can help zebra batch its processing and updates to Kernel better)
Signed-off-by: Vipin Kumar <[email protected]>
---
bgpd/bgp_fsm.c | 31 ++++++++++++++++++++++---------
bgpd/bgp_packet.c | 12 ++++++------
bgpd/bgp_route.c | 36 ++++++++++++++++++++----------------
bgpd/bgp_vty.c | 6 +++++-
bgpd/bgp_zebra.c | 8 ++++++++
bgpd/bgpd.h | 5 +++++
6 files changed, 66 insertions(+), 32 deletions(-)
diff --git a/bgpd/bgp_fsm.c b/bgpd/bgp_fsm.c
index ab9f1e1..0d38bfc 100644
--- a/bgpd/bgp_fsm.c
+++ b/bgpd/bgp_fsm.c
@@ -555,9 +555,6 @@ bgp_update_delay_configured (struct bgp *bgp)
void
bgp_update_delay_end (struct bgp *bgp)
{
- struct listnode *node, *nnode;
- struct peer *peer;
-
THREAD_TIMER_OFF (bgp->t_update_delay);
THREAD_TIMER_OFF (bgp->t_establish_wait);
@@ -573,15 +570,22 @@ bgp_update_delay_end (struct bgp *bgp)
/*
* Add an end-of-initial-update marker to the main process queues so that
- * the route advertisement timer for the peers can be started.
+ * the route advertisement timer for the peers can be started. Also set
+ * the zebra and peer update hold flags. These flags are used to achieve
+ * three stages in the update-delay post processing:
+ * 1. Finish best-path selection for all the prefixes held on the queues.
+ * (routes in BGP are updated, and peers sync queues are populated too)
+ * 2. As the eoiu mark is reached in the bgp process routine, ship all the
+ * routes to zebra. With that zebra should see updates from BGP close
+ * to each other.
+ * 3. Unblock the peer update writes. With that peer update packing with
+ * the prefixes should be at its maximum.
*/
bgp_add_eoiu_mark(bgp, BGP_TABLE_MAIN);
bgp_add_eoiu_mark(bgp, BGP_TABLE_RSCLIENT);
-
- /* Route announcements were postponed for all the peers during read-only
mode,
- send those now. */
- for (ALL_LIST_ELEMENTS (bgp->peer, node, nnode, peer))
- bgp_announce_route_all (peer);
+ bgp->main_zebra_update_hold = 1;
+ bgp->main_peers_update_hold = 1;
+ bgp->rsclient_peers_update_hold = 1;
/* Resume the queue processing. This should trigger the event that would take
care of processing any work that was queued during the read-only mode. */
@@ -598,6 +602,15 @@ bgp_start_routeadv (struct bgp *bgp)
struct listnode *node, *nnode;
struct peer *peer;
+ zlog_info("bgp_start_routeadv(), update hold status - main: %d, rsclient:
%d",
+ bgp->main_peers_update_hold, bgp->rsclient_peers_update_hold);
+
+ if (bgp->main_peers_update_hold || bgp->rsclient_peers_update_hold)
+ return;
+
+ quagga_timestamp(3, bgp->update_delay_peers_resume_time,
+ sizeof(bgp->update_delay_peers_resume_time));
+
for (ALL_LIST_ELEMENTS (bgp->peer, node, nnode, peer))
{
if (peer->status != Established)
diff --git a/bgpd/bgp_packet.c b/bgpd/bgp_packet.c
index d222100..b6744f9 100644
--- a/bgpd/bgp_packet.c
+++ b/bgpd/bgp_packet.c
@@ -435,9 +435,6 @@ bgp_default_update_send (struct peer *peer, struct attr
*attr,
if (DISABLE_BGP_ANNOUNCE)
return;
- if (bgp_update_delay_active(peer->bgp))
- return;
-
if (afi == AFI_IP)
str2prefix ("0.0.0.0/0", &p);
#ifdef HAVE_IPV6
@@ -507,9 +504,6 @@ bgp_default_withdraw_send (struct peer *peer, afi_t afi,
safi_t safi)
if (DISABLE_BGP_ANNOUNCE)
return;
- if (bgp_update_delay_active(peer->bgp))
- return;
-
if (afi == AFI_IP)
str2prefix ("0.0.0.0/0", &p);
#ifdef HAVE_IPV6
@@ -587,6 +581,12 @@ bgp_write_packet (struct peer *peer)
if (s)
return s;
+ /* The code beyond this part deals with update packets, check if updates
+ are on hold as part of the update-delay post processing stages. */
+ if (peer->bgp && (peer->bgp->main_peers_update_hold ||
+ peer->bgp->rsclient_peers_update_hold))
+ return NULL;
+
for (afi = AFI_IP; afi < AFI_MAX; afi++)
for (safi = SAFI_UNICAST; safi < SAFI_MAX; safi++)
{
diff --git a/bgpd/bgp_route.c b/bgpd/bgp_route.c
index 8da267b..fc84ea6 100644
--- a/bgpd/bgp_route.c
+++ b/bgpd/bgp_route.c
@@ -1555,6 +1555,12 @@ bgp_process_rsclient (struct work_queue *wq, void *data)
/* Is it end of initial update? (after startup) */
if (!rn)
{
+ /* This is just to keep the display sane in case all the peers are
+ rsclients only */
+ quagga_timestamp(3, bgp->update_delay_zebra_resume_time,
+ sizeof(bgp->update_delay_zebra_resume_time));
+
+ bgp->rsclient_peers_update_hold = 0;
bgp_start_routeadv(bgp);
return WQ_SUCCESS;
}
@@ -1627,6 +1633,17 @@ bgp_process_main (struct work_queue *wq, void *data)
/* Is it end of initial update? (after startup) */
if (!rn)
{
+ quagga_timestamp(3, bgp->update_delay_zebra_resume_time,
+ sizeof(bgp->update_delay_zebra_resume_time));
+
+ bgp->main_zebra_update_hold = 0;
+ for (afi = AFI_IP; afi < AFI_MAX; afi++)
+ for (safi = SAFI_UNICAST; safi < SAFI_MAX; safi++)
+ {
+ bgp_zebra_announce_table(bgp, afi, safi);
+ }
+ bgp->main_peers_update_hold = 0;
+
bgp_start_routeadv(bgp);
return WQ_SUCCESS;
}
@@ -2699,19 +2716,9 @@ bgp_announce_table (struct peer *peer, afi_t afi, safi_t
safi,
if (! table)
table = (rsclient) ? peer->rib[afi][safi] : peer->bgp->rib[afi][safi];
- if (safi != SAFI_MPLS_VPN)
- {
- if (CHECK_FLAG (peer->af_flags[afi][safi], PEER_FLAG_DEFAULT_ORIGINATE))
- {
- bgp_default_originate (peer, afi, safi, 0);
- }
- else
- {
- /* Send the withdraw if it was postponed during read-only mode. */
- if (CHECK_FLAG (peer->af_flags[afi][safi],
PEER_STATUS_DEFAULT_ORIGINATE))
- bgp_default_originate (peer, afi, safi, 1);
- }
- }
+ if (safi != SAFI_MPLS_VPN
+ && CHECK_FLAG (peer->af_flags[afi][safi], PEER_FLAG_DEFAULT_ORIGINATE))
+ bgp_default_originate (peer, afi, safi, 0);
/* It's initialized in bgp_announce_[check|check_rsclient]() */
attr.extra = &extra;
@@ -2763,9 +2770,6 @@ bgp_announce_route_all (struct peer *peer)
afi_t afi;
safi_t safi;
- if (bgp_update_delay_active(peer->bgp))
- return;
-
for (afi = AFI_IP; afi < AFI_MAX; afi++)
for (safi = SAFI_UNICAST; safi < SAFI_MAX; safi++)
bgp_announce_route (peer, afi, safi);
diff --git a/bgpd/bgp_vty.c b/bgpd/bgp_vty.c
index eddbd90..3fe363b 100644
--- a/bgpd/bgp_vty.c
+++ b/bgpd/bgp_vty.c
@@ -7352,8 +7352,12 @@ bgp_show_summary (struct vty *vty, struct bgp *bgp, int
afi, int safi)
{
vty_out (vty, " First neighbor established: %s%s",
bgp->update_delay_begin_time, VTY_NEWLINE);
- vty_out (vty, " Best-paths/updates resumed: %s%s",
+ vty_out (vty, " Best-paths resumed: %s%s",
bgp->update_delay_end_time, VTY_NEWLINE);
+ vty_out (vty, " zebra update resumed: %s%s",
+ bgp->update_delay_zebra_resume_time,
VTY_NEWLINE);
+ vty_out (vty, " peers update resumed: %s%s",
+ bgp->update_delay_peers_resume_time,
VTY_NEWLINE);
}
}
}
diff --git a/bgpd/bgp_zebra.c b/bgpd/bgp_zebra.c
index cb29d79..6b912fa 100644
--- a/bgpd/bgp_zebra.c
+++ b/bgpd/bgp_zebra.c
@@ -888,6 +888,9 @@ bgp_zebra_announce (struct prefix *p, struct bgp_info
*info, struct bgp *bgp,
if (! vrf_bitmap_check (zclient->redist[ZEBRA_ROUTE_BGP], VRF_DEFAULT))
return;
+ if (bgp->main_zebra_update_hold)
+ return;
+
flags = 0;
peer = info->peer;
@@ -1188,6 +1191,7 @@ bgp_zebra_announce_table (struct bgp *bgp, afi_t afi,
safi_t safi)
struct bgp_info *ri;
table = bgp->rib[afi][safi];
+ if (!table) return;
for (rn = bgp_table_top (table); rn; rn = bgp_route_next (rn))
for (ri = rn->info; ri; ri = ri->next)
@@ -1210,6 +1214,10 @@ bgp_zebra_withdraw (struct prefix *p, struct bgp_info
*info, safi_t safi)
return;
peer = info->peer;
+
+ if (peer->bgp && peer->bgp->main_zebra_update_hold)
+ return;
+
flags = 0;
if (peer->sort == BGP_PEER_IBGP)
diff --git a/bgpd/bgpd.h b/bgpd/bgpd.h
index 7c580bd..82b4efc 100644
--- a/bgpd/bgpd.h
+++ b/bgpd/bgpd.h
@@ -117,10 +117,15 @@ struct bgp
struct thread *t_update_delay;
struct thread *t_establish_wait;
u_char update_delay_over;
+ u_char main_zebra_update_hold;
+ u_char main_peers_update_hold;
+ u_char rsclient_peers_update_hold;
u_int16_t v_update_delay;
u_int16_t v_establish_wait;
char update_delay_begin_time[64];
char update_delay_end_time[64];
+ char update_delay_zebra_resume_time[64];
+ char update_delay_peers_resume_time[64];
u_int32_t established;
u_int32_t restarted_peers;
u_int32_t implicit_eors;
--
1.9.1
_______________________________________________
Quagga-dev mailing list
[email protected]
https://lists.quagga.net/mailman/listinfo/quagga-dev