Problem: ======== During port DOWN->UP of link (slave) in a LACP bond, after receiving the LACPDU with SYNC set for both actor and partner, the bond-slave remains "disabled" until OVS main thread runs LACP state machine and eventually "enables" the bond-slave. With this, we have observed delays in the order of 350ms and packets are dropped in OVS due to bond-admissibility check (packets received on slave in "disabled" state are dropped).
Fix: ==== When a LACPDU is received, evaluate whether LACP slave can be enabled (slave_may_enable()) and set LACP slave's may_enable from the datapath thread itself. When may_enable = TRUE, it means L1 state is UP and LACP-SYNC is done and it is waiting for the main thread to enable the slave. Relax the check in bond_check_admissibility() to check for both "enable" and "may_enable" of the LACP slave. This would avoid dropping of packets until the main thread enables the slave from bundle_run(). Signed-off-by: Nitin Katiyar <nitin.kati...@ericsson.com> Signed-off-by: Manohar Krishnappa Chidambaraswamy <man...@gmail.com> Co-authored-by: Manohar Krishnappa Chidambaraswamy <man...@gmail.com> --- lib/lacp.c | 12 ++++++++++-- lib/lacp.h | 3 ++- ofproto/bond.c | 16 +++++++++++++--- ofproto/ofproto-dpif-xlate.c | 11 ++++++++++- 4 files changed, 35 insertions(+), 7 deletions(-) diff --git a/lib/lacp.c b/lib/lacp.c index d6b36aa..7ac85f9 100644 --- a/lib/lacp.c +++ b/lib/lacp.c @@ -154,6 +154,7 @@ static struct slave *slave_lookup(const struct lacp *, const void *slave) OVS_REQUIRES(mutex); static bool info_tx_equal(struct lacp_info *, struct lacp_info *) OVS_REQUIRES(mutex); +static bool slave_may_enable__(struct slave *slave) OVS_REQUIRES(mutex); static unixctl_cb_func lacp_unixctl_show; static unixctl_cb_func lacp_unixctl_show_stats; @@ -324,8 +325,8 @@ lacp_is_active(const struct lacp *lacp) OVS_EXCLUDED(mutex) /* Processes 'packet' which was received on 'slave_'. This function should be * called on all packets received on 'slave_' with Ethernet Type ETH_TYPE_LACP. */ -void -lacp_process_packet(struct lacp *lacp, const void *slave_, +bool +lacp_process_packet(struct lacp *lacp, const void *bond, const void *slave_, const struct dp_packet *packet) OVS_EXCLUDED(mutex) { @@ -333,6 +334,7 @@ lacp_process_packet(struct lacp *lacp, const void *slave_, const struct lacp_pdu *pdu; long long int tx_rate; struct slave *slave; + bool lacp_may_enable = false; lacp_lock(); slave = slave_lookup(lacp, slave_); @@ -362,8 +364,14 @@ lacp_process_packet(struct lacp *lacp, const void *slave_, slave->partner = pdu->actor; } + /* Evaluate may_enable here to avoid dropping of packets till main thread + * sets may_enable to true. */ + lacp_may_enable = slave_may_enable__(slave); + out: lacp_unlock(); + + return lacp_may_enable; } /* Returns the lacp_status of the given 'lacp' object (which may be NULL). */ diff --git a/lib/lacp.h b/lib/lacp.h index f35cff5..1505c2c 100644 --- a/lib/lacp.h +++ b/lib/lacp.h @@ -46,7 +46,8 @@ struct lacp *lacp_ref(const struct lacp *); void lacp_configure(struct lacp *, const struct lacp_settings *); bool lacp_is_active(const struct lacp *); -void lacp_process_packet(struct lacp *, const void *slave, +bool lacp_process_packet(struct lacp *, const void *bond, + const void *slave, const struct dp_packet *packet); enum lacp_status lacp_status(const struct lacp *); diff --git a/ofproto/bond.c b/ofproto/bond.c index d2a8b1f..cd6fd33 100644 --- a/ofproto/bond.c +++ b/ofproto/bond.c @@ -794,6 +794,7 @@ bond_check_admissibility(struct bond *bond, const void *slave_, { enum bond_verdict verdict = BV_DROP; struct bond_slave *slave; + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); ovs_rwlock_rdlock(&rwlock); slave = bond_slave_lookup(bond, slave_); @@ -811,7 +812,11 @@ bond_check_admissibility(struct bond *bond, const void *slave_, * drop all incoming traffic except if lacp_fallback_ab is enabled. */ switch (bond->lacp_status) { case LACP_NEGOTIATED: - verdict = slave->enabled ? BV_ACCEPT : BV_DROP; + /* To reduce packet-drops due to delay in enabling of slave (post + * LACP-SYNC), from main thread, check for may_enable as well. + * When may_enable is TRUE, it means LACP is UP and waiting for the + * main thread to run LACP state machine and enable the slave. */ + verdict = (slave->enabled || slave->may_enable) ? BV_ACCEPT : BV_DROP; goto out; case LACP_CONFIGURED: if (!bond->lacp_fallback_ab) { @@ -847,8 +852,6 @@ bond_check_admissibility(struct bond *bond, const void *slave_, /* Drop all packets which arrive on backup slaves. This is similar to * how Linux bonding handles active-backup bonds. */ if (bond->active_slave != slave) { - static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); - VLOG_DBG_RL(&rl, "active-backup bond received packet on backup" " slave (%s) destined for " ETH_ADDR_FMT, slave->name, ETH_ADDR_ARGS(eth_dst)); @@ -870,6 +873,13 @@ bond_check_admissibility(struct bond *bond, const void *slave_, OVS_NOT_REACHED(); out: + if (slave && (verdict != BV_ACCEPT)) { + VLOG_DBG_RL(&rl, "slave= %s actv-slave= %d may_enable %d enable %d " + "LACP %d verdict(A/D/M=0/1/2) %d\n", slave->name, + (bond->active_slave == slave), slave->may_enable, + slave->enabled, bond->lacp_status, verdict); + } + ovs_rwlock_unlock(&rwlock); return verdict; diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c index acd4817..a4a3a2b 100644 --- a/ofproto/ofproto-dpif-xlate.c +++ b/ofproto/ofproto-dpif-xlate.c @@ -3288,6 +3288,7 @@ process_special(struct xlate_ctx *ctx, const struct xport *xport) const struct xbridge *xbridge = ctx->xbridge; const struct dp_packet *packet = ctx->xin->packet; enum slow_path_reason slow; + bool lacp_may_enable; if (!xport) { slow = 0; @@ -3308,7 +3309,15 @@ process_special(struct xlate_ctx *ctx, const struct xport *xport) } else if (xport->xbundle && xport->xbundle->lacp && flow->dl_type == htons(ETH_TYPE_LACP)) { if (packet) { - lacp_process_packet(xport->xbundle->lacp, xport->ofport, packet); + lacp_may_enable = lacp_process_packet(xport->xbundle->lacp, + xport->xbundle->bond, + xport->ofport, packet); + /* Update LACP status in bond-slave to avoid packet-drops until + * LACP state machine is run by the main thread. */ + if (xport->xbundle->bond && lacp_may_enable) { + bond_slave_set_may_enable(xport->xbundle->bond, xport->ofport, + lacp_may_enable); + } } slow = SLOW_LACP; } else if ((xbridge->stp || xbridge->rstp) && -- 1.9.1 _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev