On Wed, Sep 17, 2014 at 03:21:53PM +0100, Pawel Wodkowski wrote:
> Signed-off-by: Pawel Wodkowski <pawelx.wodkowski at intel.com>
> Signed-off-by: Maciej T Gajdzica <maciejx.t.gajdzica at intel.com>
> Reviewed-by: Declan Doherty <declan.doherty at intel.com>
> ---
>  lib/librte_ether/rte_ether.h               |    1 +
>  lib/librte_pmd_bond/Makefile               |    1 +
>  lib/librte_pmd_bond/rte_eth_bond.h         |    4 +
>  lib/librte_pmd_bond/rte_eth_bond_8023ad.c  | 1064 
> ++++++++++++++++++++++++++++
>  lib/librte_pmd_bond/rte_eth_bond_8023ad.h  |  411 +++++++++++
>  lib/librte_pmd_bond/rte_eth_bond_api.c     |   28 +-
>  lib/librte_pmd_bond/rte_eth_bond_args.c    |    1 +
>  lib/librte_pmd_bond/rte_eth_bond_pmd.c     |  179 ++++-
>  lib/librte_pmd_bond/rte_eth_bond_private.h |    9 +-
>  9 files changed, 1685 insertions(+), 13 deletions(-)
>  create mode 100644 lib/librte_pmd_bond/rte_eth_bond_8023ad.c
>  create mode 100644 lib/librte_pmd_bond/rte_eth_bond_8023ad.h
> 
> diff --git a/lib/librte_ether/rte_ether.h b/lib/librte_ether/rte_ether.h
> index 2e08f23..1a3711b 100644
> --- a/lib/librte_ether/rte_ether.h
> +++ b/lib/librte_ether/rte_ether.h
> @@ -293,6 +293,7 @@ struct vlan_hdr {
>  #define ETHER_TYPE_RARP 0x8035 /**< Reverse Arp Protocol. */
>  #define ETHER_TYPE_VLAN 0x8100 /**< IEEE 802.1Q VLAN tagging. */
>  #define ETHER_TYPE_1588 0x88F7 /**< IEEE 802.1AS 1588 Precise Time Protocol. 
> */
> +#define ETHER_TYPE_SLOW 0x8809 /**< Slow protocols (LACP and Marker). */
>  
>  #ifdef __cplusplus
>  }
> diff --git a/lib/librte_pmd_bond/Makefile b/lib/librte_pmd_bond/Makefile
> index 953d75e..c2312c2 100644
> --- a/lib/librte_pmd_bond/Makefile
> +++ b/lib/librte_pmd_bond/Makefile
> @@ -44,6 +44,7 @@ CFLAGS += $(WERROR_FLAGS)
>  #
>  SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_api.c
>  SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_pmd.c
> +SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_8023ad.c
>  SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_args.c
>  
>  #
> diff --git a/lib/librte_pmd_bond/rte_eth_bond.h 
> b/lib/librte_pmd_bond/rte_eth_bond.h
> index bd59780..6aac4ec 100644
> --- a/lib/librte_pmd_bond/rte_eth_bond.h
> +++ b/lib/librte_pmd_bond/rte_eth_bond.h
> @@ -75,6 +75,10 @@ extern "C" {
>  /**< Broadcast (Mode 3).
>   * In this mode all transmitted packets will be transmitted on all available
>   * active slaves of the bonded. */
> +#define BONDING_MODE_8023AD                          (4)
> +/**< 802.3AD (Mode 4).
> + * In this mode transmission and reception of packets is managed by LACP
> + * protocol specified in 802.3AD documentation. */
>  
>  /* Balance Mode Transmit Policies */
>  #define BALANCE_XMIT_POLICY_LAYER2           (0)
> diff --git a/lib/librte_pmd_bond/rte_eth_bond_8023ad.c 
> b/lib/librte_pmd_bond/rte_eth_bond_8023ad.c
> new file mode 100644
> index 0000000..6ce6efb
> --- /dev/null
> +++ b/lib/librte_pmd_bond/rte_eth_bond_8023ad.c
> @@ -0,0 +1,1064 @@
> +/*-
> + *   BSD LICENSE
> + *
> + *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
> + *   All rights reserved.
> + *
> + *   Redistribution and use in source and binary forms, with or without
> + *   modification, are permitted provided that the following conditions
> + *   are met:
> + *
> + *     * Redistributions of source code must retain the above copyright
> + *       notice, this list of conditions and the following disclaimer.
> + *     * Redistributions in binary form must reproduce the above copyright
> + *       notice, this list of conditions and the following disclaimer in
> + *       the documentation and/or other materials provided with the
> + *       distribution.
> + *     * Neither the name of Intel Corporation nor the names of its
> + *       contributors may be used to endorse or promote products derived
> + *       from this software without specific prior written permission.
> + *
> + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> + */
> +
> +#include <stddef.h>
> +#include <string.h>
> +
> +#include <rte_alarm.h>
> +#include <rte_malloc.h>
> +#include <rte_errno.h>
> +
> +#include "rte_eth_bond_private.h"
> +#include "rte_eth_bond_8023ad.h"
> +
> +#include <rte_cycles.h>
> +
> +#define RTE_LIBRTE_BOND_DEBUG_8023AX
> +
> +#ifdef RTE_LIBRTE_BOND_DEBUG_8023AX
> +#define BOND_ASSERT(expr) \
> +     ((expr) ? (void) (0) \
> +     : rte_panic("%s(%d): assertion failed" __STRING(expr), __FILE__, 
> __LINE__))
> +#else
> +#define BOND_ASSERT(expr) do { } while (0)
> +#endif
It seems like every library re-invents this wheel.  Maybe merge them into a
single assert macro available from a common header?


> +
> +#ifdef RTE_LIBRTE_BOND_DEBUG_8023AX
> +#define _PORT_ID internals->active_slaves[port_num]
> +#define BOND_DEBUG(fmt, ...) RTE_LOG(DEBUG, PMD, "%6u [Port %u: %s] " fmt, \
> +                     bond_dbg_get_time_diff(), _PORT_ID, __FUNCTION__, 
> ##__VA_ARGS__)
> +
> +static unsigned
> +bond_dbg_get_time_diff(void)
> +{
> +     static unsigned ms_start = 0;
> +     struct timespec t;
> +     uint64_t ms;
> +
> +     clock_gettime(CLOCK_MONOTONIC, &t);
> +     ms = (((long)t.tv_sec * 1000000000L) + t.tv_nsec) / 1000000L;
> +
> +     if (ms_start == 0)
> +             ms_start = ms;
> +
> +     return ms - ms_start;
> +}
> +
> +static void
> +bond_print_lacp(struct lacpdu *l)
> +{
> +     char a_state[256] = { 0 };
> +     char p_state[256] = { 0 };
> +     static const char *state_labels[] = {
> +             "ACT", "TIMEOUT", "AGG", "SYNC", "COL", "DIST", "DEF", "EXP"
> +     };
> +
> +     int a_len = 0;
> +     int p_len = 0;
> +     uint8_t i;
> +
> +     for (i = 0; i < 8; i++) {
> +             if ((l->actor.state >> i) & 1) {
> +                     a_len += snprintf(a_state + a_len, sizeof(a_state) - 
> a_len, "%s ",
> +                             state_labels[i]);
> +             }
> +
> +             if ((l->partner.state >> i) & 1) {
> +                     p_len += snprintf(p_state + p_len, sizeof(p_state) - 
> p_len, "%s ",
> +                             state_labels[i]);
> +             }
> +     }
> +
> +     if (a_len && a_state[a_len-1] == ' ')
> +             a_state[a_len-1] = '\0';
> +
> +     if (p_len && p_state[p_len-1] == ' ')
> +                     p_state[p_len-1] = '\0';
> +
> +     RTE_LOG(DEBUG, PMD, "LACP: {\n"\
> +                     "  subtype= %02X\n"\
> +                     "  ver_num=%02X\n"\
> +                     "  actor={ tlv=%02X, len=%02X\n"\
> +                     "    pri=%04X, system=(ADDRESS), key=%04X, p_pri=%04X 
> p_num=%04X\n"\
> +                     "       state={ %s }\n"\
> +                     "  }\n"\
> +                     "  partner={ tlv=%02X, len=%02X\n"\
> +                     "    pri=%04X, system=(ADDRESS), key=%04X, p_pri=%04X 
> p_num=%04X\n"\
> +                     "       state={ %s }\n"\
> +                     "  }\n"\
> +                     "  collector={info=%02X, length=%02X, max_delay=%04X\n, 
> " \
> +                                                     "type_term=%02X, 
> terminator_length = %02X}\n",\
> +                     l->subtype,\
> +                     l->version_number,\
> +                     l->actor.tlv_type_info,\
> +                     l->actor.info_length,\
> +                     l->actor.port_params.system_priority,\
> +                     l->actor.port_params.key,\
> +                     l->actor.port_params.port_priority,\
> +                     l->actor.port_params.port_number,\
> +                     a_state,\
> +                     l->partner.tlv_type_info,\
> +                     l->partner.info_length,\
> +                     l->partner.port_params.system_priority,\
> +                     l->partner.port_params.key,\
> +                     l->partner.port_params.port_priority,\
> +                     l->partner.port_params.port_number,\
> +                     p_state,\
> +                     l->tlv_type_collector_info,\
> +                     l->collector_info_length,\
> +                     l->collector_max_delay,\
> +                     l->tlv_type_terminator,\
> +                     l->terminator_length);
> +
> +}
> +#define BOND_PRINT_LACP(lacpdu) bond_print_lacp(lacpdu)
> +
> +#else
> +#define BOND_PRINT_LACP(lacpdu) do { } while (0)
> +#define BOND_DEBUG(fmt, ...) do { } while (0)
> +#endif
> +
> +static const struct ether_addr lacp_mac_addr = {
> +     .addr_bytes = {0x01, 0x80, 0xC2, 0x00, 0x00, 0x02}
> +};
> +
> +static void
> +timer_expired_cb(void *arg)
> +{
> +     enum timer_state *timer = arg;
> +
> +     BOND_ASSERT(*timer == TIMER_RUNNING);
> +     *timer = TIMER_EXPIRED;
> +}
> +
> +static void
> +timer_cancel(enum timer_state *timer)
> +{
> +     rte_eal_alarm_cancel(&timer_expired_cb, timer);
> +     *timer = TIMER_NOT_STARTED;
> +}
> +
> +static void
> +timer_set(enum timer_state *timer, uint64_t timeout)
> +{
> +     rte_eal_alarm_cancel(&timer_expired_cb, timer);
> +     rte_eal_alarm_set(timeout * 1000, &timer_expired_cb, timer);
> +     *timer = TIMER_RUNNING;
> +}
> +
> +static bool
> +timer_is_expired(enum timer_state *timer)
> +{
> +     return *timer == TIMER_EXPIRED;
> +}
> +
> +static void
> +record_default(struct port *port)
> +{
> +     /* Record default parametes for partner. Partner admin parameters
> +      * are not implemented so nothing to copy. Only mark actor that parner 
> is
> +      * in defaulted state. */
> +     port->partner_state = STATE_LACP_ACTIVE;
> +     ACTOR_STATE_SET(port, DEFAULTED);
> +}
> +
> +/** Function handles rx state machine.
> + *
> + * This function implements Receive State Machine from point 5.4.12 in
> + * 802.1AX documentation. It should be called periodically.
> + *
> + * @param lacpdu             LACPDU received.
> + * @param port                       Port on which LACPDU was received.
> + */
> +static void
> +rx_machine(struct bond_dev_private *internals, uint8_t port_num,
> +     struct lacpdu *lacp)
> +{
> +     struct port *port = &internals->mode4.port_list[port_num];
> +
> +     if (SM_FLAG(port, BEGIN)) {
> +             /* Initialize stuff */
> +             BOND_DEBUG("-> INITIALIZE\n");
> +             SM_FLAG_CLR(port, MOVED);
> +             port->selected = UNSELECTED;
> +
> +             record_default(port);
> +
> +             ACTOR_STATE_CLR(port, EXPIRED);
> +             timer_cancel(&port->current_while_timer);
> +
> +             /* DISABLED: On initialization partner is out of sync */
> +             PARTNER_STATE_CLR(port, SYNCHRONIZATION);
> +
> +             /* LACP DISABLED stuff if LACP not enabled on this port */
> +             if (!SM_FLAG(port, LACP_ENABLED))
> +                     PARTNER_STATE_CLR(port, AGGREGATION);
> +     }
> +
> +     if (!SM_FLAG(port, LACP_ENABLED)) {
> +             /* Update parameters only if state changed */
> +             if (port->current_while_timer) {
> +                     port->selected = UNSELECTED;
> +                     record_default(port);
> +                     PARTNER_STATE_CLR(port, AGGREGATION);
> +                     ACTOR_STATE_CLR(port, EXPIRED);
> +                     timer_cancel(&port->current_while_timer);
> +             }
> +             return;
> +     }
> +
> +     if (lacp) {
> +             BOND_DEBUG("LACP -> CURRENT\n");
> +             BOND_PRINT_LACP(lacp);
> +             /* Update selected flag. If partner parameters are defaulted 
> assume they
> +              * are match. If not defaulted  compare LACP actor with ports 
> parner
> +              * params. */
> +             if (!(port->actor_state & STATE_DEFAULTED) &&
> +                     (((port->partner_state ^ lacp->actor.state) & 
> STATE_AGGREGATION) ||
> +                             memcmp(&port->partner, &lacp->actor.port_params,
> +                                     sizeof(port->partner)) != 0)) {
> +                     BOND_DEBUG("selected <- UNSELECTED\n");
> +                     port->selected = UNSELECTED;
> +             }
> +
> +             /* Record this PDU actor params as partner params */
> +             memcpy(&port->partner, &lacp->actor.port_params,
> +                     sizeof(struct port_params));
> +             port->partner_state = lacp->actor.state;
> +             ACTOR_STATE_CLR(port, DEFAULTED);
> +
> +             /* Update NTT if partners information are outdated */
> +             uint8_t state_mask = STATE_LACP_ACTIVE | 
> STATE_LACP_SHORT_TIMEOUT |
> +                     STATE_SYNCHRONIZATION | STATE_AGGREGATION;
> +
> +             if (((port->actor_state ^ lacp->partner.state) & state_mask) ||
> +                             memcmp(&port->actor, &lacp->partner.port_params,
> +                                     sizeof(struct port_params)) != 0) {
> +                     port->sm_flags |= SM_FLAGS_NTT;
> +             }
> +
> +             /* If LACP partner params match this port actor params */
> +             if (memcmp(&port->actor, &lacp->partner.port_params,
> +                         sizeof(port->actor)) == 0 &&
> +                     (port->partner_state & STATE_AGGREGATION) == 
> (port->actor_state
> +                         & STATE_AGGREGATION))
> +                     PARTNER_STATE_SET(port, SYNCHRONIZATION);
> +             else if (!(port->partner_state & STATE_AGGREGATION) &&
> +                         (port->actor_state & STATE_AGGREGATION))
> +                     PARTNER_STATE_SET(port, SYNCHRONIZATION);
> +             else
> +                     PARTNER_STATE_CLR(port, SYNCHRONIZATION);
> +
> +             if (port->actor_state & STATE_LACP_SHORT_TIMEOUT)
> +                     timer_set(&port->current_while_timer, 
> BOND_8023AD_SHORT_TIMEOUT_MS);
> +             else
> +                     timer_set(&port->current_while_timer, 
> BOND_8023AD_LONG_TIMEOUT_MS);
> +
> +             ACTOR_STATE_CLR(port, EXPIRED);
> +             return; /* No state change */
> +     }
> +
> +     if (port->current_while_timer != TIMER_RUNNING) {
> +             ACTOR_STATE_SET(port, EXPIRED);
> +             PARTNER_STATE_CLR(port, SYNCHRONIZATION);
> +             PARTNER_STATE_SET(port, LACP_SHORT_TIMEOUT);
> +             timer_set(&port->current_while_timer, 
> BOND_8023AD_SHORT_TIMEOUT_MS);
> +     }
> +}
> +
> +/**
> + * Function handles periodic tx state machine.
> + *
> + * Function implements Periodic Transmission state machine from point 5.4.13
> + * in 802.1AX documentation. It should be called periodically.
> + *
> + * @param port                       Port to handle state machine.
> + */
> +static void
> +periodic_machine(struct bond_dev_private *internals, uint8_t port_num)
> +{
> +     struct port *port = &internals->mode4.port_list[port_num];
> +     /* Calculate if either site is LACP enabled */
> +     uint32_t timeout;
> +     uint16_t sm_flags = port->sm_flags;
> +     uint8_t active = ACTOR_STATE(port, LACP_ACTIVE) ||
> +             PARTNER_STATE(port, LACP_ACTIVE);
> +
> +     uint8_t is_partner_fast, was_partner_fast;
> +     /* No periodic is on BEGIN, LACP DISABLE or when both sides are pasive 
> */
> +     if ((sm_flags & SM_FLAGS_BEGIN) || !(sm_flags & SM_FLAGS_LACP_ENABLED) 
> ||
> +                 active == 0) {
> +             timer_cancel(&port->periodic_timer);
> +             port->tx_machine_timer = TIMER_EXPIRED;
> +             sm_flags &= ~SM_FLAGS_PARTNER_SHORT_TIMEOUT;
> +             port->sm_flags = sm_flags;
> +
> +             BOND_DEBUG("-> NO_PERIODIC ( %s%s%s)\n",
> +                     SM_FLAG(port, BEGIN) ? "begind " : "",
> +                     SM_FLAG(port, LACP_ENABLED) ? "" : "LACP disabled ",
> +                     active ? "LACP active " : "LACP pasive ");
> +             return;
> +     }
> +
> +     is_partner_fast = !!(port->partner_state & STATE_LACP_SHORT_TIMEOUT);
> +     was_partner_fast = !!(port->sm_flags & SM_FLAGS_PARTNER_SHORT_TIMEOUT);
> +
> +     /* If periodic timer is not started, transit from NO PERIODIC to 
> FAST/SLOW.
> +      * Other case: check if timer expire or partners settings changed. */
> +     if (port->periodic_timer != TIMER_NOT_STARTED) {
> +             if (timer_is_expired(&port->periodic_timer))
> +                     sm_flags |= SM_FLAGS_NTT;
> +             else if (is_partner_fast != was_partner_fast) {
> +                     /* Partners timeout  was slow and now it is fast -> 
> send LACP.
> +                      * In other case (was fast and now it is slow) just 
> switch
> +                      * timeout to slow without forcing send of LACP 
> (because standard
> +                      * say so)*/
> +                     if (!is_partner_fast)
> +                             sm_flags |= SM_FLAGS_NTT;
> +             } else
> +                     return; /* Nothing changed */
> +     }
> +
> +     /* Handle state transition to FAST/SLOW LACP timeout */
> +     if (is_partner_fast) {
> +             timeout = BOND_8023AD_FAST_PERIODIC_MS;
> +             sm_flags |= SM_FLAGS_PARTNER_SHORT_TIMEOUT;
> +     } else {
> +             timeout = BOND_8023AD_SLOW_PERIODIC_MS;
> +             sm_flags &= ~SM_FLAGS_PARTNER_SHORT_TIMEOUT;
> +     }
> +
> +     timer_set(&port->periodic_timer, timeout);
> +     port->sm_flags = sm_flags;
> +}
> +
> +/**
> + * Function handles mux state machine.
> + *
> + * Function implements Mux Machine from point 5.4.15 in 802.1AX 
> documentation.
> + * It should be called periodically.
> + *
> + * @param port                       Port to handle state machine.
> + */
> +static int
> +mux_machine(struct bond_dev_private *internals, uint8_t port_num)
> +{
> +     bool ntt = false;
> +     struct port *port = &internals->mode4.port_list[port_num];
> +
> +     /* Save current state for later use */
> +     const uint8_t state_mask = STATE_SYNCHRONIZATION | STATE_DISTRIBUTING |
> +             STATE_COLLECTING;
> +
> +     /* Enter DETACHED state on BEGIN condition or from any other state if
> +      * port was unselected */
> +     if (SM_FLAG(port, BEGIN) ||
> +                     port->selected == UNSELECTED || (port->selected == 
> STANDBY &&
> +                             (port->actor_state & state_mask) != 0)) {
> +             /* detach mux from aggregator not used */
> +             port->actor_state &= ~state_mask;
> +             /* Set ntt to true if BEGIN condition or transition from any 
> other state
> +              * which is indicated that wait_while_timer was started */
> +             if (SM_FLAG(port, BEGIN) ||
> +                             port->wait_while_timer != TIMER_NOT_STARTED) {
> +                     SM_FLAG_SET(port, NTT);
> +                     BOND_DEBUG("-> DETACHED\n");
> +             }
> +             timer_cancel(&port->wait_while_timer);
> +     }
> +
> +     if (port->wait_while_timer == TIMER_NOT_STARTED) {
> +             if (port->selected == SELECTED || port->selected == STANDBY) {
> +                     timer_set(&port->wait_while_timer,
> +                             BOND_8023AD_AGGREGATE_WAIT_TIMEOUT_MS);
> +
> +                     BOND_DEBUG("DETACHED -> WAITING\n");
> +             }
> +             /* Waiting state entered */
> +             return 0;
> +     }
> +
> +     /* Transit next state if port is ready */
> +     if (!timer_is_expired(&port->wait_while_timer))
> +             return 0;
> +
> +     if ((ACTOR_STATE(port, DISTRIBUTING) || ACTOR_STATE(port, COLLECTING)) 
> &&
> +             !PARTNER_STATE(port, SYNCHRONIZATION)) {
> +             /* If in COLLECTING or DISTRIBUTING state and partner becomes 
> out of
> +              * sync transit to ATACHED state.  */
> +             ACTOR_STATE_CLR(port, DISTRIBUTING);
> +             ACTOR_STATE_CLR(port, COLLECTING);
> +             /* Clear actor sync to activate transit ATACHED in condition 
> bellow */
> +             ACTOR_STATE_CLR(port, SYNCHRONIZATION);
> +             BOND_DEBUG("Out of sync -> ATTACHED\n");
> +     } else if (!ACTOR_STATE(port, SYNCHRONIZATION)) {
> +             /* attach mux to aggregator */
> +             BOND_ASSERT((port->actor_state & (STATE_COLLECTING |
> +                     STATE_DISTRIBUTING)) == 0);
> +             ACTOR_STATE_SET(port, SYNCHRONIZATION);
> +             ntt = true;
> +             BOND_DEBUG("ATTACHED Entered\n");
> +     } else if (!ACTOR_STATE(port, COLLECTING)) {
> +             /* Start collecting if in sync */
> +             if (PARTNER_STATE(port, SYNCHRONIZATION)) {
> +                     BOND_DEBUG("ATTACHED -> COLLECTING\n");
> +                     ACTOR_STATE_SET(port, COLLECTING);
> +             }
> +     } else if (ACTOR_STATE(port, COLLECTING)) {
> +             /* Check if partner is in COLLECTING state. If so this port can
> +              * distribute frames to it */
> +             if (!ACTOR_STATE(port, DISTRIBUTING)) {
> +                     if (PARTNER_STATE(port, COLLECTING)) {
> +                             /* Enable  DISTRIBUTING if partner is 
> collecting */
> +                             ACTOR_STATE_SET(port, DISTRIBUTING);
> +                             ntt = true;
> +                             BOND_DEBUG("COLLECTING -> DISTRIBUTING\n");
> +                     }
> +             } else {
> +                     if (!PARTNER_STATE(port, COLLECTING)) {
> +                             /* Disable DISTRIBUTING (enter COLLECTING 
> state) if partner
> +                              * is not collecting */
> +                             ACTOR_STATE_CLR(port, DISTRIBUTING);
> +                             ntt = true;
> +                             BOND_DEBUG("DISTRIBUTING -> COLLECTING\n");
> +                     }
> +             }
> +     }
> +
> +     if (ntt != false)
> +             SM_FLAG_SET(port, NTT);
> +
> +     return ntt;
> +}
> +
> +
> +/**
> + * Function handles transmit state machine.
> + *
> + * Function implements Transmit Machine from point 5.4.16 in 802.1AX
> + * documentation.
> + *
> + * @param port
> + */
> +static void
> +tx_machine(struct rte_eth_dev *bond_dev, uint8_t port_num)
> +{
> +     struct bond_dev_private *internals = bond_dev->data->dev_private;
> +     struct port *port = &internals->mode4.port_list[port_num];
> +     struct mode8023ad_data *data = &internals->mode4;
> +
> +     struct slow_protocol_msg *msg;
> +     struct lacpdu_header *hdr;
> +     struct lacpdu *lacpdu;
> +
> +     /* If periodic timer is not running periodic machine is in NO PERIODIC 
> and
> +      * acording to 802.3ax standard tx machine should not transmit any 
> frames
> +      * and set ntt to false. */
> +     if (port->periodic_timer == TIMER_NOT_STARTED)
> +             SM_FLAG_CLR(port, NTT);
> +
> +     if (!SM_FLAG(port, NTT) || !timer_is_expired(&port->tx_machine_timer))
> +             return;
> +
> +     /* If all conditions are met construct packet to send */
> +     if (rte_ring_dequeue(data->free_ring, (void **)&msg) == -ENOBUFS) {
> +             BOND_DEBUG("tx_machine: no free_lacpdu_ring\n");
> +             return;
> +     }
> +
> +     msg->pkt = rte_pktmbuf_alloc(data->mbuf_pool);
> +     if (msg->pkt == NULL) {
> +             /* FIXME: temporal workaround, when packets are no freed by 
> driver */
> +             struct bond_rx_queue *bd_rx_q;
> +             uint8_t i;
> +
> +             for (i = 0; i < bond_dev->data->nb_rx_queues; i++) {
> +                     bd_rx_q = (struct bond_rx_queue 
> *)bond_dev->data->rx_queues[i];
> +                     BOND_ASSERT(bd_rx_q != NULL && bd_rx_q->mb_pool != 
> NULL);
> +                     /* Do not use SP or SC pools as this is unsafe. */
> +                     if (bd_rx_q->mb_pool->flags & (MEMPOOL_F_SC_GET | 
> MEMPOOL_F_SP_PUT))
> +                             continue;
> +
> +                     msg->pkt = rte_pktmbuf_alloc(bd_rx_q->mb_pool);
> +                     if (msg->pkt) {
> +                             RTE_LOG(WARNING, PMD, "Failed to allocate LACP 
> from mode4 pool."
> +                             "Packet was allocated from pool of rx queue 
> %u\n", i);
> +                             break;
> +                     }
> +             }
> +
> +             if (msg->pkt == NULL) {
> +                     rte_ring_enqueue(data->free_ring, msg);
> +                     RTE_LOG(ERR, PMD, "Failed to allocate LACP packet from 
> pool\n");
> +                     return;
> +             }
> +     }
> +     msg->port_id = internals->active_slaves[port_num];
> +     hdr = rte_pktmbuf_mtod(msg->pkt, struct lacpdu_header *);
> +
> +     msg->pkt->pkt.data_len = sizeof(*hdr);
> +     msg->pkt->pkt.pkt_len = sizeof(*hdr);
> +     /* Source and destination MAC */
> +     ether_addr_copy(&lacp_mac_addr, &hdr->eth_hdr.d_addr);
> +     mac_address_get(bond_dev, &hdr->eth_hdr.s_addr);
> +     hdr->eth_hdr.ether_type = rte_cpu_to_be_16(ETHER_TYPE_SLOW);
> +
> +     port = &data->port_list[port_num];
> +     lacpdu = &hdr->lacpdu;
> +     memset(lacpdu, 0, sizeof(*lacpdu));
> +
> +     /* Initialize LACP part */
> +     lacpdu->subtype = SUBTYPE_LACP;
> +     lacpdu->version_number = 1;
> +
> +     /* ACTOR */
> +     lacpdu->actor.tlv_type_info = TLV_TYPE_ACTOR_INFORMATION;
> +     lacpdu->actor.info_length = sizeof(struct lacpdu_actor_partner_params);
> +     memcpy(&hdr->lacpdu.actor.port_params, &port->actor,
> +                     sizeof(port->actor));
> +     lacpdu->actor.state = port->actor_state;
> +
> +     /* PARTNER */
> +     lacpdu->partner.tlv_type_info = TLV_TYPE_PARTNER_INFORMATION;
> +     lacpdu->partner.info_length = sizeof(struct 
> lacpdu_actor_partner_params);
> +     memcpy(&lacpdu->partner.port_params, &port->partner,
> +                     sizeof(struct port_params));
> +     lacpdu->partner.state = port->partner_state;
> +
> +     /* Other fields */
> +     lacpdu->tlv_type_collector_info = TLV_TYPE_COLLECTOR_INFORMATION;
> +     lacpdu->collector_info_length = 0x10;
> +     lacpdu->collector_max_delay = 0;
> +
> +     lacpdu->tlv_type_terminator = TLV_TYPE_TERMINATOR_INFORMATION;
> +     lacpdu->terminator_length = 0;
> +
> +     if (rte_ring_enqueue(data->tx_ring, msg) == -ENOBUFS) {
> +             /* If TX ring full, drop packet and free message. Retransmission
> +              * will happen in next function call. */
> +             rte_pktmbuf_free(msg->pkt);
> +             rte_ring_enqueue(data->free_ring, msg);
> +
> +             RTE_LOG(ERR, PMD, "Failed to enqueue LACP packet into tx ring");
> +             return;
> +     }
> +
> +     BOND_DEBUG("sending LACP frame\n");
> +     BOND_PRINT_LACP(lacpdu);
> +
> +     SM_FLAG_CLR(port, NTT);
> +     /* Add 10% random backoff time to better distrube slow packets
> +      * between tx bursts. */
> +     timer_set(&port->tx_machine_timer, BOND_8023AD_TX_PERIOD_MS +
> +             rand() % ((BOND_8023AD_TX_PERIOD_MS * 10) / 100));
> +}
> +
> +/**
> + * Function assigns port to aggregator.
> + *
> + * @param bond_dev_private   Pointer to bond_dev_private structure.
> + * @param port_pos                   Port to assign.
> + */
> +static void
> +selection_logic(struct bond_dev_private *internals, uint8_t port_num)
> +{
> +     struct mode8023ad_data *data = &internals->mode4;
> +     struct port *agg, *port, *port_list;
> +     uint8_t ports_count;
> +     uint8_t i;
> +
> +     ports_count = internals->slave_count;
> +     port_list = data->port_list;
> +     port = &port_list[port_num];
> +
> +     /* Skip port if it is selected */
> +     if (port->selected == SELECTED)
> +             return;
> +
> +     /* Search for aggregator suitable for this port */
> +     for (i = 0; i < ports_count; ++i) {
> +             agg = &port_list[i];
> +             /* Skip ports that are not aggreagators */
> +             if (agg->used_agregator_idx != i && i == port_num)
> +                     continue;
> +
> +             /* Actors system ID is not checked since all slave device have 
> the same
> +              * ID (MAC address). */
> +             if ((agg->actor.key == port->actor.key &&
> +                     agg->partner.system_priority == 
> port->partner.system_priority &&
> +                     is_same_ether_addr(&agg->partner.system, 
> &port->partner.system) == 1
> +                     && (agg->partner.key == port->partner.key)) &&
> +                     is_zero_ether_addr(&port->partner.system) != 1 &&
> +                     (agg->actor.key &
> +                             rte_cpu_to_be_16(BOND_LINK_FULL_DUPLEX_KEY)) != 
> 0) {
> +
> +                     port->used_agregator_idx = i;
> +                     break;
> +             }
> +     }
> +
> +     /* By default, port uses it self as agregator */
> +     if (i == ports_count)
> +             port->used_agregator_idx = port_num;
> +
> +     port->selected = SELECTED;
> +
> +     BOND_DEBUG("-> SELECTED: ID=%3u pos=%3u\n"
> +             "\t%s ID=%3u pos=%3u\n",
> +             internals->active_slaves[port_num], port_num,
> +             port->used_agregator_idx == port_num ?
> +                     "agregator not found, using default" : "agregator 
> found",
> +             port->used_agregator_idx,
> +             internals->active_slaves[port->used_agregator_idx]);
> +}
> +
> +/**
> + * Helper function which updates current port
> + */
> +static void
> +update_mux_slaves(struct bond_dev_private *internals)
> +{
> +     struct mode8023ad_data *data = &internals->mode4;
> +     struct port *port;
> +     uint8_t current[RTE_MAX_ETHPORTS];
> +     uint8_t count = 0;
> +     uint8_t i;
> +
> +     for (i = 0; i < internals->slave_count; i++) {
> +             port = &data->port_list[i];
> +             if (ACTOR_STATE(port, DISTRIBUTING))
> +                     current[count++] = i;
> +     }
> +
> +     memcpy(data->distibuting_slaves, current, sizeof(current[0]) * count);
> +     data->distibuting_slaves_count = count;
> +}
> +
> +/* Function maps DPDK speed to bonding speed stored in key field */
> +static uint16_t
> +link_speed_key(uint16_t speed) {
> +     uint16_t key_speed;
> +
> +     switch (speed) {
> +     case ETH_LINK_SPEED_AUTONEG:
> +             key_speed = 0x00;
> +             break;
> +     case ETH_LINK_SPEED_10:
> +             key_speed = BOND_LINK_SPEED_KEY_10M;
> +             break;
> +     case ETH_LINK_SPEED_100:
> +             key_speed = BOND_LINK_SPEED_KEY_100M;
> +             break;
> +     case ETH_LINK_SPEED_1000:
> +             key_speed = BOND_LINK_SPEED_KEY_1000M;
> +             break;
> +     case ETH_LINK_SPEED_10G:
> +             key_speed = BOND_LINK_SPEED_KEY_10G;
> +             break;
> +     case ETH_LINK_SPEED_20G:
> +             key_speed = BOND_LINK_SPEED_KEY_20G;
> +             break;
> +     case ETH_LINK_SPEED_40G:
> +             key_speed = BOND_LINK_SPEED_KEY_40G;
> +             break;
> +     default:
> +             /* Unknown speed*/
> +             key_speed = 0xFFFF;
> +     }
> +
> +     return key_speed;
> +}
> +
> +static void
> +bond_mode_8023ad_periodic_cb(void *arg)
> +{
> +     struct rte_eth_dev *bond_dev = arg;
> +     struct bond_dev_private *internals = bond_dev->data->dev_private;
> +     struct mode8023ad_data *data = &internals->mode4;
> +
> +     struct slow_protocol_frame *slow_hdr;
> +     struct rte_eth_link link_info;
> +
> +     struct slow_protocol_msg *msgs[BOND_MODE_8023AX_RX_RING_SIZE];
> +     uint16_t port_num, j, nb_msgs;
> +     /* if not 0 collecting/distibuting array need update */
> +     uint16_t slaves_changed = 0;
> +     bool machines_invoked;
> +
> +     /* Update link status on each port */
> +     for (port_num = 0; port_num < internals->active_slave_count; 
> port_num++) {
> +             uint8_t key;
> +
> +             rte_eth_link_get(internals->active_slaves[port_num], 
> &link_info);
> +             if (link_info.link_status != 0) {
> +                     key = link_speed_key(link_info.link_speed) << 1;
> +                     if (link_info.link_duplex == ETH_LINK_FULL_DUPLEX)
> +                             key |= BOND_LINK_FULL_DUPLEX_KEY;
> +             } else
> +                     key = 0;
> +
> +             data->port_list[port_num].actor.key = rte_cpu_to_be_16(key);
> +     }
> +
> +     nb_msgs = (uint16_t)rte_ring_dequeue_burst(data->rx_ring, (void **) 
> msgs,
> +             BOND_MODE_8023AX_RX_RING_SIZE);
> +
> +     for (port_num = 0; port_num < internals->active_slave_count; 
> port_num++) {
> +             struct port *port = &data->port_list[port_num];
> +             if ((port->actor.key &
> +                             rte_cpu_to_be_16(BOND_LINK_FULL_DUPLEX_KEY)) == 
> 0) {
> +
> +                     SM_FLAG_SET(port, BEGIN);
> +
> +                     /* LACP is disabled on half duples or link is down */
> +                     if (SM_FLAG(port, LACP_ENABLED)) {
> +                             /* If port was enabled set it to BEGIN state */
> +                             SM_FLAG_CLR(port, LACP_ENABLED);
> +                             ACTOR_STATE_CLR(port, DISTRIBUTING);
> +                             ACTOR_STATE_CLR(port, COLLECTING);
> +                             slaves_changed++;
> +                     }
> +
> +                     BOND_DEBUG("Port %u is not LACP capable!\n",
> +                             internals->active_slaves[port_num]);
> +                     /* Skip this port processing */
> +                     continue;
> +             }
> +
> +             SM_FLAG_SET(port, LACP_ENABLED);
> +             machines_invoked = false;
> +             /* Find LACP packet */
> +             for (j = 0; j < nb_msgs; j++) {
> +                     if (msgs[j] == NULL || msgs[j]->port_id !=
> +                                     internals->active_slaves[port_num])
> +                             continue;
> +
> +                     slow_hdr = rte_pktmbuf_mtod(msgs[j]->pkt,
> +                                     struct slow_protocol_frame *);
> +
> +                     if (slow_hdr->slow_protocol.subtype == 
> SLOW_SUBTYPE_LACP) {
> +                             /* This is LACP frame so pass it to rx_machine 
> */
> +                             struct lacpdu *lacp = (struct lacpdu 
> *)&slow_hdr->slow_protocol;
> +                             /* Invoke state machines on every active slave 
> port */
> +                             rx_machine(internals, port_num, lacp);
> +                             periodic_machine(internals, port_num);
> +                             slaves_changed += mux_machine(internals, 
> port_num);
> +                             tx_machine(bond_dev, port_num);
> +                             selection_logic(internals, port_num);
> +
> +                             machines_invoked = true;
> +                     } else if (slow_hdr->slow_protocol.subtype == 
> SLOW_SUBTYPE_MARKER) {
> +                             struct marker *marker;
> +
> +                             marker = (struct marker *) 
> &slow_hdr->slow_protocol;
> +                             if (marker->tlv_type_marker == 
> MARKER_TLV_TYPE_MARKER_INFO) {
> +                                     /* Reuse received packet to send frame 
> to Marker Responder
> +                                      */
> +                                     marker->tlv_type_marker = 
> MARKER_TLV_TYPE_MARKER_RESP;
> +
> +                                     /* Update source MAC, destination MAC 
> is multicast so we
> +                                      * don't update it */
> +                                     mac_address_get(bond_dev, 
> &slow_hdr->eth_hdr.s_addr);
> +
> +                                     if (rte_ring_enqueue(data->tx_ring, 
> msgs[j]) == -ENOBUFS) {
> +                                             RTE_LOG(ERR, PMD,
> +                                             "Failed to enqueue packet into 
> tx ring");
> +                                             rte_pktmbuf_free(msgs[j]->pkt);
> +                                             
> rte_ring_enqueue(data->free_ring, msgs[j]);
> +                                     }
> +
> +                                     msgs[j] = NULL;
> +                             }
> +                     }
> +             }
> +
> +             if (machines_invoked == false) {
> +                     rx_machine(internals, port_num, NULL);
> +                     periodic_machine(internals, port_num);
> +                     slaves_changed += mux_machine(internals, port_num);
> +                     tx_machine(bond_dev, port_num);
> +                     selection_logic(internals, port_num);
> +                     machines_invoked = true;
> +             }
> +
> +             SM_FLAG_CLR(port, BEGIN);
> +     }
> +
> +     /* Update mux if something changed */
> +     if (slaves_changed > 0) {
> +             update_mux_slaves(internals);
> +             BOND_DEBUG("mux count %u [%2u%s%2u%s%2u%s%2u%s%s]\n",
> +                     data->distibuting_slaves_count,
> +                     data->distibuting_slaves[0],
> +                     data->distibuting_slaves_count > 0 ? " " : "\b\b",
> +                     data->distibuting_slaves[1],
> +                     data->distibuting_slaves_count > 1 ? " " : "\b\b",
> +                     data->distibuting_slaves[2],
> +                     data->distibuting_slaves_count > 2 ? " " : "\b\b",
> +                     data->distibuting_slaves[3],
> +                     data->distibuting_slaves_count > 3 ? " " : "\b\b",
> +                     data->distibuting_slaves_count > 4 ? "..." : "");
> +     }
> +
> +     /* Free packets that was not reused */
> +     for (port_num = 0; port_num < nb_msgs; port_num++) {
> +             if (msgs[port_num] != NULL) {
> +                     rte_pktmbuf_free(msgs[port_num]->pkt);
> +                     rte_ring_enqueue(data->free_ring, msgs[port_num]);
> +             }
> +     }
> +
> +     rte_eal_alarm_set(BOND_MODE_8023AX_UPDATE_TIMEOUT_MS * 1000,
> +                     bond_mode_8023ad_periodic_cb, arg);
> +}
> +
> +static void
> +bond_mode_8023ad_activate_slave(struct rte_eth_dev *bond_dev, uint8_t num)
> +{
> +     struct bond_dev_private *internals = bond_dev->data->dev_private;
> +     struct mode8023ad_data *data = &internals->mode4;
> +
> +     struct port *port = &data->port_list[internals->active_slave_count];
> +     struct port_params initial = {
> +                     .system = { { 0 } },
> +                     .system_priority = rte_cpu_to_be_16(0xFFFF),
> +                     .key = rte_cpu_to_be_16(BOND_LINK_FULL_DUPLEX_KEY),
> +                     .port_priority = rte_cpu_to_be_16(0x00FF),
> +                     .port_number = 0,
> +     };
> +
> +     memcpy(&port->actor, &initial, sizeof(struct port_params));
> +     mac_address_get(bond_dev, &port->actor.system);
> +     port->actor.port_number =
> +             slave_id_to_port_number(internals->active_slaves[num]);
> +
> +     memcpy(&port->partner, &initial, sizeof(struct port_params));
> +
> +     /* default states */
> +     port->actor_state = STATE_AGGREGATION | STATE_LACP_ACTIVE | 
> STATE_DEFAULTED;
> +     port->partner_state = STATE_LACP_ACTIVE;
> +     port->sm_flags = SM_FLAGS_BEGIN;
> +
> +     /* use this port as agregator */
> +     port->used_agregator_idx = num;
> +}
> +
> +void
> +bond_mode_8023ad_slave_append(struct rte_eth_dev *bond_dev)
> +{
> +     struct bond_dev_private *internals = bond_dev->data->dev_private;
> +
> +     bond_mode_8023ad_activate_slave(bond_dev, 
> internals->active_slave_count);
> +}
> +
> +int
> +bond_mode_8023ad_deactivate_slave(struct rte_eth_dev *bond_dev,
> +             uint8_t slave_pos)
> +{
> +     struct bond_dev_private *internals = bond_dev->data->dev_private;
> +     struct mode8023ad_data *data = &internals->mode4;
> +     struct port *port;
> +     uint8_t i;
> +
> +     bond_mode_8023ad_stop(bond_dev);
> +
> +     /* Exclude slave from transmit policy. If this slave is an aggregator
> +      * make all aggregated slaves unselected to force sellection logic
> +      * to select suitable aggregator for this port   */
> +     for (i = 0; i < internals->active_slave_count; i++) {
> +             port = &data->port_list[slave_pos];
> +             if (port->used_agregator_idx == slave_pos) {
> +                     port->selected = UNSELECTED;
> +                     port->actor_state &= ~(STATE_SYNCHRONIZATION | 
> STATE_DISTRIBUTING |
> +                             STATE_COLLECTING);
> +
> +                     /* Use default aggregator */
> +                     port->used_agregator_idx = i;
> +             }
> +     }
> +
> +     port = &data->port_list[slave_pos];
> +     timer_cancel(&port->current_while_timer);
> +     timer_cancel(&port->periodic_timer);
> +     timer_cancel(&port->wait_while_timer);
> +     timer_cancel(&port->tx_machine_timer);
> +
These all seem rather racy.  Alarm callbacks are executed with the alarm list
locks not held.  So there is every possibility that you could execute these (or
any timer_cancel calls in this PMD in parallel with the internal state machine
timer callback, and leave either with a corrupted timer list (resulting from a
double free between here, and the actual callback site), or a timer that is
actually still pending when a slave is removed.


Reply via email to