Diagnosing connectivity issues involving a bond can be complicated by a lack of logging in LACP. It is difficult to determine the health of sending and receving LACP packets. This is further complicated by the tendency of some switches to toggle the carrier on interfaces that experience issues with LACP, which can cause confusion about why an interface is flapping down and up again.
With this patch, OVS will log when LACP packets aren't sent or recieved on time. Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=2223306 Signed-off-by: Mike Pattrick <m...@redhat.com> --- lib/lacp.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/lib/lacp.c b/lib/lacp.c index 3252f17eb..fd186347f 100644 --- a/lib/lacp.c +++ b/lib/lacp.c @@ -143,6 +143,8 @@ struct member { uint32_t count_link_expired; /* Num of times link expired */ uint32_t count_link_defaulted; /* Num of times link defaulted */ uint32_t count_carrier_changed; /* Num of times link status changed */ + long long int last_tx; + long long int last_rx; }; static struct ovs_mutex mutex; @@ -387,6 +389,13 @@ lacp_process_packet(struct lacp *lacp, const void *member_, goto out; } + if (member->last_rx && member->status != LACP_CURRENT) { + long long int delay = time_msec() - member->last_rx; + VLOG_DBG("%s: %s recieved PDU after expiry, delayed by %lldms " + "seconds.", lacp->name, member->name, delay); + } + + member->last_rx = time_msec(); member->status = LACP_CURRENT; tx_rate = lacp->fast ? LACP_FAST_TIME_TX : LACP_SLOW_TIME_TX; timer_set_duration(&member->rx, LACP_RX_MULTIPLIER * tx_rate); @@ -524,6 +533,11 @@ lacp_member_carrier_changed(const struct lacp *lacp, const void *member_, if (member->status == LACP_CURRENT || member->lacp->active) { member_set_expired(member); + VLOG_DBG("%s: Expiring LACP due to %s carrier change.", + lacp->name, member->name); + /* Do not warn about long LACP RX/TX interval if interface was down */ + member->last_rx = 0; + member->last_tx = 0; } if (member->carrier_up != carrier_up) { @@ -603,6 +617,8 @@ lacp_run(struct lacp *lacp, lacp_send_pdu *send_pdu) OVS_EXCLUDED(mutex) if (member->status == LACP_CURRENT) { member_set_expired(member); + VLOG_DBG("%s: Expired member %s because LACP PDU was not " + "received on time.", lacp->name, member->name); member->count_link_expired++; } else if (member->status == LACP_EXPIRED) { member_set_defaulted(member); @@ -642,6 +658,13 @@ lacp_run(struct lacp *lacp, lacp_send_pdu *send_pdu) OVS_EXCLUDED(mutex) ? LACP_FAST_TIME_TX : LACP_SLOW_TIME_TX); + /* Log if we exceed the tx timer by double the tx rate. */ + if (member->last_tx && + (time_msec() - member->last_tx) > (duration * 2)) { + VLOG_INFO("%s: Member %s failed to send LACP PCU on time.", + lacp->name, member->name); + } + member->last_tx = time_msec(); timer_set_duration(&member->tx, duration); seq_change(connectivity_seq_get()); } @@ -800,6 +823,7 @@ member_set_expired(struct member *member) OVS_REQUIRES(mutex) member->partner.state &= ~LACP_STATE_SYNC; timer_set_duration(&member->rx, LACP_RX_MULTIPLIER * LACP_FAST_TIME_TX); + member->last_tx = 0; } static void -- 2.39.3 _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev