From: Declan Doherty <declan.dohe...@intel.com>

Link Bonding Library (lib/librte_bond) initial release with support for
     Mode 0 - Round Robin
     Mode 1 - Active Backup
     Mode 2 - Balance -> Supports 3 transmit polices (layer 2, layer 2+3, layer
     Mode 3 - Broadcast

Signed-off-by: Declan Doherty <declan.doherty at intel.com>
---
 config/common_bsdapp       |    5 +
 config/common_linuxapp     |    5 +
 lib/Makefile               |    1 +
 lib/librte_bond/Makefile   |   28 +
 lib/librte_bond/rte_bond.c | 1679 ++++++++++++++++++++++++++++++++++++++++++++
 lib/librte_bond/rte_bond.h |  228 ++++++
 mk/rte.app.mk              |    5 +
 7 files changed, 1951 insertions(+)
 create mode 100644 lib/librte_bond/Makefile
 create mode 100644 lib/librte_bond/rte_bond.c
 create mode 100644 lib/librte_bond/rte_bond.h

diff --git a/config/common_bsdapp b/config/common_bsdapp
index 2cc7b80..53ed8b9 100644
--- a/config/common_bsdapp
+++ b/config/common_bsdapp
@@ -187,6 +187,11 @@ CONFIG_RTE_PMD_RING_MAX_TX_RINGS=16
 CONFIG_RTE_LIBRTE_PMD_PCAP=y

 #
+# Compile link bonding library
+#
+CONFIG_RTE_LIBRTE_BOND=y
+
+#
 # Do prefetch of packet data within PMD driver receive function
 #
 CONFIG_RTE_PMD_PACKET_PREFETCH=y
diff --git a/config/common_linuxapp b/config/common_linuxapp
index 62619c6..35b525a 100644
--- a/config/common_linuxapp
+++ b/config/common_linuxapp
@@ -211,6 +211,11 @@ CONFIG_RTE_PMD_RING_MAX_TX_RINGS=16
 CONFIG_RTE_LIBRTE_PMD_PCAP=n


+#
+# Compile link bonding library
+#
+CONFIG_RTE_LIBRTE_BOND=y
+
 CONFIG_RTE_LIBRTE_PMD_XENVIRT=n

 #
diff --git a/lib/Makefile b/lib/Makefile
index b92b392..9995ba8 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -47,6 +47,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_PMD_PCAP) += librte_pmd_pcap
 DIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += librte_pmd_virtio
 DIRS-$(CONFIG_RTE_LIBRTE_VMXNET3_PMD) += librte_pmd_vmxnet3
 DIRS-$(CONFIG_RTE_LIBRTE_PMD_XENVIRT) += librte_pmd_xenvirt
+DIRS-$(CONFIG_RTE_LIBRTE_BOND) += librte_bond
 DIRS-$(CONFIG_RTE_LIBRTE_HASH) += librte_hash
 DIRS-$(CONFIG_RTE_LIBRTE_LPM) += librte_lpm
 DIRS-$(CONFIG_RTE_LIBRTE_NET) += librte_net
diff --git a/lib/librte_bond/Makefile b/lib/librte_bond/Makefile
new file mode 100644
index 0000000..7514378
--- /dev/null
+++ b/lib/librte_bond/Makefile
@@ -0,0 +1,28 @@
+# <COPYRIGHT_TAG>
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+#
+# library name
+#
+LIB = librte_bond.a
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS)
+
+#
+# all source are stored in SRCS-y
+#
+SRCS-$(CONFIG_RTE_LIBRTE_BOND) += rte_bond.c
+
+
+#
+# Export include files
+#
+SYMLINK-y-include += rte_bond.h
+
+# this lib depends upon:
+DEPDIRS-$(CONFIG_RTE_LIBRTE_BOND) += lib/librte_mbuf lib/librte_ether
+DEPDIRS-$(CONFIG_RTE_LIBRTE_BOND) += lib/librte_malloc
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_bond/rte_bond.c b/lib/librte_bond/rte_bond.c
new file mode 100644
index 0000000..35dff25
--- /dev/null
+++ b/lib/librte_bond/rte_bond.c
@@ -0,0 +1,1679 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/queue.h>
+#include <linux/binfmts.h>
+
+#include <rte_mbuf.h>
+#include <rte_ethdev.h>
+#include <rte_malloc.h>
+#include <rte_memcpy.h>
+#include <rte_memory.h>
+#include <rte_string_fns.h>
+#include <rte_cycles.h>
+#include <rte_ip.h>
+#include <rte_udp.h>
+
+#include <cmdline_parse.h>
+#include <cmdline_parse_etheraddr.h>
+
+#include "rte_bond.h"
+
+static const char *driver_name = "Link Bonding PMD";
+
+/** Port Queue Mapping Structure */
+struct bond_rx_queue {
+       int queue_id;                                                   /**< 
Queue Id */
+       struct bond_dev_private *dev_private;   /**< Reference to eth_dev 
private
+                                                                               
                 structure */
+
+       uint16_t nb_rx_desc;                                    /**< Number of 
RX descriptors
+                                                                               
                 available for the queue */
+       struct rte_eth_rxconf rx_conf;                  /**< Copy of RX 
configuration
+                                                                               
                 structure for queue */
+       struct rte_mempool *mb_pool;                    /**< Reference to mbuf 
pool to use
+                                                                               
                 for RX queue */
+};
+
+struct bond_tx_queue {
+       int queue_id;                                                   /**< 
Queue Id */
+       struct bond_dev_private *dev_private;   /**< Reference to dev private
+                                                                               
                 structure */
+       uint16_t nb_tx_desc;                                    /**< Number of 
TX descriptors
+                                                                               
                 available for the queue */
+       struct rte_eth_txconf tx_conf;                  /**< Copy of TX 
configuration
+                                                                               
                 structure for queue */
+};
+
+
+/** Persisted Slave Configuration Structure */
+struct slave_conf {
+       uint8_t port_id;                                /**< Port Id of slave 
eth_dev */
+       struct ether_addr mac_addr;             /**< Slave eth_dev original MAC 
address */
+};
+
+/** Link Bonding PMD device private configuration Structure */
+struct bond_dev_private {
+       uint8_t mode;                                           /**< Link 
Bonding Mode */
+       uint8_t primary_port;                           /**< Primary Slave Port 
*/
+       uint8_t balance_xmit_policy;            /**< Transmit policy - l2 / l23 
/ l34
+                                                                               
         for operation in balance mode */
+       uint8_t user_defined_mac;                       /**< Flag for whether 
MAC address is
+                                                                               
         user defined or not */
+       uint8_t promiscuous_en;                         /**< Enabled/disable 
promiscuous mode on
+                                                                               
        slave devices */
+       uint8_t link_props_set;                         /**< Bonded eth_dev 
link properties set*/
+
+       uint16_t nb_rx_queues;                          /**< Total number of rx 
queues */
+       uint16_t nb_tx_queues;                          /**< Total number of tx 
queues*/
+
+       uint8_t slave_count;                            /**< Number of active 
slaves */
+       uint8_t active_slave_count;                     /**< Number of slaves */
+
+       uint8_t active_slaves[RTE_MAX_ETHPORTS];        /**< Active slave list 
*/
+       uint8_t slaves[RTE_MAX_ETHPORTS];                       /**< Slave list 
*/
+
+       /** Persisted configuration of slaves */
+       struct slave_conf presisted_slaves_conf[RTE_MAX_ETHPORTS];
+};
+
+static struct slave_conf *
+slave_config_get(struct bond_dev_private *internals, uint8_t slave_port_id);
+
+static int
+valid_bonded_ethdev(struct rte_eth_dev *eth_dev)
+{
+       size_t len;
+
+       /* Check valid pointer */
+       if (eth_dev->driver->pci_drv.name == NULL || driver_name == NULL)
+               return -1;
+
+       /* Check string lengths are equal */
+       len = strlen(driver_name);
+       if (strlen(eth_dev->driver->pci_drv.name) != len)
+               return -1;
+
+       /* Compare strings */
+       return strncmp(eth_dev->driver->pci_drv.name, driver_name, len);
+}
+
+static int
+valid_port_id(uint8_t port_id)
+{
+       /* Verify that port id is valid */
+       int ethdev_count = rte_eth_dev_count();
+       if (port_id >= ethdev_count) {
+               RTE_LOG(ERR, PMD,
+                               "%s: port Id %d is greater than 
rte_eth_dev_count %d\n",
+                               __func__, port_id, ethdev_count);
+               return -1;
+       }
+
+       return 0;
+}
+
+static int
+valid_bonded_port_id(uint8_t port_id)
+{
+       /* Verify that port id's are valid */
+       if (valid_port_id(port_id))
+               return -1;
+
+       /* Verify that bonded_port_id refers to a bonded port */
+       if (valid_bonded_ethdev(&rte_eth_devices[port_id])) {
+               RTE_LOG(ERR, PMD,
+                               "%s: Specified port Id %d is not a bonded 
eth_dev device\n",
+                               __func__, port_id);
+               return -1;
+       }
+
+       return 0;
+}
+
+static int
+valid_slave_port_id(uint8_t port_id)
+{
+       /* Verify that port id's are valid */
+       if (valid_port_id(port_id))
+               return -1;
+
+       /* Verify that port_id refers to a non bonded port */
+       if (!valid_bonded_ethdev(&rte_eth_devices[port_id]))
+               return -1;
+
+       return 0;
+}
+
+
+static uint16_t
+bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
+{
+       struct bond_dev_private *internals;
+
+       uint16_t num_rx_slave = 0;
+       uint16_t num_rx_total = 0;
+
+       int i;
+
+       /* Cast to structure, containing bonded device's port id and queue id */
+       struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
+
+       internals = bd_rx_q->dev_private;
+
+       switch (internals->mode) {
+       case BONDING_MODE_ROUND_ROBIN:
+       case BONDING_MODE_BROADCAST:
+       case BONDING_MODE_BALANCE:
+               for (i = 0; i < internals->active_slave_count; i++) {
+                       /* Offset of pointer to *bufs increases as packets are 
received
+                        * from other slaves */
+                       num_rx_slave = 
rte_eth_rx_burst(internals->active_slaves[i],
+                                       bd_rx_q->queue_id, bufs + num_rx_total, 
nb_pkts);
+                       if (num_rx_slave)
+                               num_rx_total += num_rx_slave;
+               }
+               break;
+       case BONDING_MODE_ACTIVE_BACKUP:
+               num_rx_slave = rte_eth_rx_burst(internals->primary_port,
+                               bd_rx_q->queue_id, bufs, nb_pkts);
+               if (num_rx_slave)
+                       num_rx_total = num_rx_slave;
+               break;
+       }
+       return num_rx_total;
+}
+
+
+static uint16_t
+bond_ethdev_tx_round_robin(void *queue, struct rte_mbuf **bufs,
+               uint16_t nb_pkts)
+{
+       struct bond_dev_private *dev_private;
+       struct bond_tx_queue *bd_tx_q;
+
+       struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
+       uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
+
+       uint8_t num_of_slaves;
+       uint8_t slaves[RTE_MAX_ETHPORTS];
+
+       uint16_t num_tx_total = 0;
+
+       static int last_slave_idx = -1;
+       int i, slave_idx;
+
+       bd_tx_q = (struct bond_tx_queue *)queue;
+       dev_private = bd_tx_q->dev_private;
+
+       /* Copy slave list to protect against slave up/down changes during tx
+        * bursting */
+       num_of_slaves = dev_private->active_slave_count;
+       memcpy(slaves, dev_private->active_slaves,
+                       sizeof(dev_private->active_slaves[0]) * num_of_slaves);
+
+       if (num_of_slaves < 1)
+               return num_tx_total;
+
+       /* Populate slaves mbuf with which packets are to be sent on it  */
+       for (i = 0; i < nb_pkts; i++) {
+               slave_idx = i % num_of_slaves;
+               slave_bufs[slave_idx][(slave_nb_pkts[slave_idx])++] = bufs[i];
+       }
+
+       /* calculate the next slave to transmit on based on the last slave idx 
used
+        * in the last call to bond_ethdev_tx_burst_round_robin */
+       slave_idx = last_slave_idx + 1;
+
+       /* Send packet burst on each slave device */
+       for (i = 0; i < num_of_slaves; i++) {
+               slave_idx = (slave_idx + i) % num_of_slaves;
+
+               if (slave_nb_pkts[i] > 0) {
+                       num_tx_total += rte_eth_tx_burst(slaves[slave_idx],
+                                       bd_tx_q->queue_id, slave_bufs[i], 
slave_nb_pkts[i]);
+               }
+       }
+
+       last_slave_idx = slave_idx;
+
+       return num_tx_total;
+}
+
+static uint16_t bond_ethdev_tx_active_backup(void *queue,
+               struct rte_mbuf **bufs, uint16_t nb_pkts)
+{
+       struct bond_dev_private *internals;
+       struct bond_tx_queue *bd_tx_q;
+
+       bd_tx_q = (struct bond_tx_queue *)queue;
+       internals = bd_tx_q->dev_private;
+
+       if (internals->active_slave_count < 1)
+               return 0;
+
+       return rte_eth_tx_burst(internals->primary_port, bd_tx_q->queue_id,
+                       bufs, nb_pkts);
+}
+
+
+static inline uint16_t
+ether_hash(struct ether_hdr *eth_hdr)
+{
+       uint16_t *word_src_addr = (uint16_t *)eth_hdr->s_addr.addr_bytes;
+       uint16_t *word_dst_addr = (uint16_t *)eth_hdr->d_addr.addr_bytes;
+
+       return (word_src_addr[0] ^ word_dst_addr[0]) ^
+                       (word_src_addr[1] ^ word_dst_addr[1]) ^
+                       (word_src_addr[2] ^ word_dst_addr[2]);
+}
+
+static inline uint32_t
+ipv4_hash(struct ipv4_hdr *ipv4_hdr)
+{
+       return (ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr) & 0xFFFF;
+}
+
+static inline uint32_t
+ipv6_hash(struct ipv6_hdr *ipv6_hdr)
+{
+       uint32_t *word_src_addr = (uint32_t *)&(ipv6_hdr->src_addr[0]);
+       uint32_t *word_dst_addr = (uint32_t *)&(ipv6_hdr->dst_addr[0]);
+
+       return (word_src_addr[0] ^ word_dst_addr[0]) ^
+                       (word_src_addr[1] ^ word_dst_addr[1]) ^
+                       (word_src_addr[2] ^ word_dst_addr[2]) ^
+                       (word_src_addr[3] ^ word_dst_addr[3]);
+}
+
+static uint32_t
+udp_hash(struct udp_hdr *hdr) {
+       return hdr->src_port ^ hdr->dst_port;
+}
+
+static inline uint16_t
+xmit_slave_hash(const struct rte_mbuf *buf, uint8_t slave_count, uint8_t 
policy)
+{
+       struct ether_hdr *eth_hdr;
+       struct udp_hdr *udp_hdr;
+       size_t eth_offset = 0;
+       uint32_t hash = 0;
+
+       if (slave_count == 1)
+               return 0;
+
+       switch (policy) {
+       case BALANCE_XMIT_POLICY_LAYER2:
+               eth_hdr = (struct ether_hdr *)buf->pkt.data;
+
+               hash = ether_hash(eth_hdr);
+               hash ^= hash >> 8;
+               return hash % slave_count;
+
+
+       case BALANCE_XMIT_POLICY_LAYER23:
+               eth_hdr = (struct ether_hdr *)buf->pkt.data;
+
+               if (buf->ol_flags & PKT_RX_VLAN_PKT)
+                       eth_offset = sizeof(struct ether_hdr) + sizeof(struct 
vlan_hdr);
+               else
+                       eth_offset = sizeof(struct ether_hdr);
+
+               if (buf->ol_flags & PKT_RX_IPV4_HDR) {
+                       struct ipv4_hdr *ipv4_hdr;
+                       ipv4_hdr = (struct ipv4_hdr *)(rte_pktmbuf_mtod(buf,
+                                       unsigned char *) + eth_offset);
+
+                       hash = ether_hash(eth_hdr) ^ ipv4_hash(ipv4_hdr);
+
+               } else {
+                       struct ipv6_hdr *ipv6_hdr;
+
+                       ipv6_hdr = (struct ipv6_hdr *)(rte_pktmbuf_mtod(buf,
+                                       unsigned char *) + eth_offset);
+
+                       hash = ether_hash(eth_hdr) ^ ipv6_hash(ipv6_hdr);
+               }
+               break;
+
+       case BALANCE_XMIT_POLICY_LAYER34:
+               if (buf->ol_flags & PKT_RX_VLAN_PKT)
+                       eth_offset = sizeof(struct ether_hdr) + sizeof(struct 
vlan_hdr);
+               else
+                       eth_offset = sizeof(struct ether_hdr);
+
+               if (buf->ol_flags & PKT_RX_IPV4_HDR) {
+                       struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
+                                       (rte_pktmbuf_mtod(buf, unsigned char *) 
+ eth_offset);
+
+                       if (ipv4_hdr->next_proto_id == IPPROTO_UDP) {
+                               udp_hdr = (struct udp_hdr *)
+                                               (rte_pktmbuf_mtod(buf, unsigned 
char *) + eth_offset +
+                                                               sizeof(struct 
ipv4_hdr));
+                               hash = ipv4_hash(ipv4_hdr) ^ udp_hash(udp_hdr);
+                       } else {
+                               hash = ipv4_hash(ipv4_hdr);
+                       }
+               } else {
+                       struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
+                                       (rte_pktmbuf_mtod(buf, unsigned char *) 
+ eth_offset);
+
+                       if (ipv6_hdr->proto == IPPROTO_UDP) {
+                               udp_hdr = (struct udp_hdr *)
+                                               (rte_pktmbuf_mtod(buf, unsigned 
char *) + eth_offset +
+                                                               sizeof(struct 
ipv6_hdr));
+                               hash = ipv6_hash(ipv6_hdr) ^ udp_hash(udp_hdr);
+                       } else {
+                               hash = ipv6_hash(ipv6_hdr);
+                       }
+               }
+               break;
+       }
+
+       hash ^= hash >> 16;
+       hash ^= hash >> 8;
+
+       return hash % slave_count;
+}
+
+static uint16_t
+bond_ethdev_tx_balance(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
+{
+       struct bond_dev_private *internals;
+       struct bond_tx_queue *bd_tx_q;
+
+       uint8_t num_of_slaves;
+       uint8_t slaves[RTE_MAX_ETHPORTS];
+
+       uint16_t num_tx_total = 0;
+
+       int i, op_slave_id;
+
+       struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
+       uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
+
+       bd_tx_q = (struct bond_tx_queue *)queue;
+       internals = bd_tx_q->dev_private;
+
+       /* Copy slave list to protect against slave up/down changes during tx
+        * bursting */
+       num_of_slaves = internals->active_slave_count;
+       memcpy(slaves, internals->active_slaves,
+                       sizeof(internals->active_slaves[0]) * num_of_slaves);
+
+       if (num_of_slaves < 1)
+               return num_tx_total;
+
+       /* Populate slaves mbuf with the packets which are to be sent on it  */
+       for (i = 0; i < nb_pkts; i++) {
+               /* Select output slave using hash based on xmit policy */
+               op_slave_id = xmit_slave_hash(bufs[i], num_of_slaves,
+                                                                         
internals->balance_xmit_policy);
+               slave_bufs[op_slave_id][slave_nb_pkts[op_slave_id]++] = bufs[i];
+       }
+
+       /* Send packet burst on each slave device */
+       for (i = 0; i < num_of_slaves; i++) {
+               if (slave_nb_pkts[i] > 0) {
+                       num_tx_total += rte_eth_tx_burst(slaves[i], 
bd_tx_q->queue_id,
+                                       slave_bufs[i], slave_nb_pkts[i]);
+               }
+       }
+
+       return num_tx_total;
+}
+
+static uint16_t
+bond_ethdev_tx_broadcast(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
+{
+       struct bond_dev_private *internals;
+       struct bond_tx_queue *bd_tx_q;
+
+       uint8_t num_of_slaves;
+       uint8_t slaves[RTE_MAX_ETHPORTS];
+
+       uint16_t num_tx_total = 0;
+
+       int i;
+
+       bd_tx_q = (struct bond_tx_queue *)queue;
+       internals = bd_tx_q->dev_private;
+
+       /* Copy slave list to protect against slave up/down changes during tx
+        * bursting */
+       num_of_slaves = internals->active_slave_count;
+       memcpy(slaves, internals->active_slaves,
+                       sizeof(internals->active_slaves[0]) * num_of_slaves);
+
+       if (num_of_slaves < 1)
+               return 0;
+
+
+       for (i = 0; i < num_of_slaves; i++) {
+               num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
+                               bufs, nb_pkts);
+       }
+
+       return num_tx_total;
+}
+
+static void
+link_properties_set(struct rte_eth_dev *bonded_eth_dev,
+               struct rte_eth_link *slave_dev_link)
+{
+       struct rte_eth_link *bonded_dev_link = &bonded_eth_dev->data->dev_link;
+       struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
+
+       if (slave_dev_link->link_status &&
+               bonded_eth_dev->data->dev_started) {
+               bonded_dev_link->link_duplex = slave_dev_link->link_duplex;
+               bonded_dev_link->link_speed = slave_dev_link->link_speed;
+
+               internals->link_props_set = 1;
+       }
+}
+
+static void
+link_properties_reset(struct rte_eth_dev *bonded_eth_dev)
+{
+       struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
+
+       memset(&(bonded_eth_dev->data->dev_link), 0,
+                       sizeof(bonded_eth_dev->data->dev_link));
+
+       internals->link_props_set = 0;
+}
+
+static int
+link_properties_valid(struct rte_eth_link *bonded_dev_link,
+               struct rte_eth_link *slave_dev_link)
+{
+       if (bonded_dev_link->link_duplex != slave_dev_link->link_duplex ||
+               bonded_dev_link->link_speed !=  slave_dev_link->link_speed)
+               return -1;
+
+       return 0;
+}
+
+static int
+mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr)
+{
+       struct ether_addr *mac_addr;
+
+       mac_addr = eth_dev->data->mac_addrs;
+
+       if (eth_dev == NULL) {
+               RTE_LOG(ERR, PMD, "%s: NULL pointer eth_dev specified\n", 
__func__);
+               return -1;
+       }
+
+       if (new_mac_addr == NULL) {
+               RTE_LOG(ERR, PMD, "%s: NULL pointer MAC specified\n", __func__);
+               return -1;
+       }
+
+       /* if new MAC is different to current MAC then update */
+       if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)))
+               memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr));
+
+       return 0;
+}
+
+static int
+mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
+{
+       struct bond_dev_private *internals;
+       int i;
+
+       internals = bonded_eth_dev->data->dev_private;
+
+       /* Update slave devices MAC addresses */
+       if (internals->slave_count < 1)
+               return -1;
+
+       switch (internals->mode) {
+       case BONDING_MODE_ROUND_ROBIN:
+       case BONDING_MODE_BALANCE:
+       case BONDING_MODE_BROADCAST:
+               for (i = 0; i < internals->slave_count; i++) {
+                       if 
(mac_address_set(&rte_eth_devices[internals->slaves[i]],
+                                       bonded_eth_dev->data->mac_addrs)) {
+                               RTE_LOG(ERR, PMD,
+                                               "%s: Failed to update port Id 
%d MAC address\n",
+                                               __func__, internals->slaves[i]);
+                               return -1;
+                       }
+               }
+               break;
+       case BONDING_MODE_ACTIVE_BACKUP:
+       default:
+               for (i = 0; i < internals->slave_count; i++) {
+                       if (internals->slaves[i] == internals->primary_port) {
+                               if 
(mac_address_set(&rte_eth_devices[internals->primary_port],
+                                               
bonded_eth_dev->data->mac_addrs)) {
+                                       RTE_LOG(ERR, PMD,
+                                                       "%s: Failed to update 
port Id %d MAC address\n",
+                                                       __func__, 
internals->primary_port);
+                               }
+                       } else {
+                               struct slave_conf *conf =
+                                               slave_config_get(internals, 
internals->slaves[i]);
+
+                               if 
(mac_address_set(&rte_eth_devices[internals->slaves[i]],
+                                               &conf->mac_addr)) {
+                                       RTE_LOG(ERR, PMD,
+                                                       "%s: Failed to update 
port Id %d MAC address\n",
+                                                       __func__, 
internals->slaves[i]);
+
+
+                                       return -1;
+                               }
+                       }
+               }
+       }
+
+       return 0;
+}
+
+
+static int
+bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
+{
+       struct bond_dev_private *internals;
+
+       internals = eth_dev->data->dev_private;
+
+       switch (mode) {
+       case BONDING_MODE_ROUND_ROBIN:
+               eth_dev->tx_pkt_burst = bond_ethdev_tx_round_robin;
+               break;
+       case BONDING_MODE_ACTIVE_BACKUP:
+               eth_dev->tx_pkt_burst = bond_ethdev_tx_active_backup;
+               break;
+       case BONDING_MODE_BALANCE:
+               eth_dev->tx_pkt_burst = bond_ethdev_tx_balance;
+               break;
+       case BONDING_MODE_BROADCAST:
+               eth_dev->tx_pkt_burst = bond_ethdev_tx_broadcast;
+               break;
+       default:
+               return -1;
+       }
+       internals->mode = mode;
+
+       return 0;
+}
+
+static int
+slave_configure(struct rte_eth_dev *bonded_eth_dev,
+               struct rte_eth_dev *slave_eth_dev)
+{
+       struct slave_conf *presisted_slave_conf;
+       struct bond_dev_private *internals;
+
+       struct bond_rx_queue *bd_rx_q;
+       struct bond_tx_queue *bd_tx_q;
+
+       int q_id;
+
+       internals = bonded_eth_dev->data->dev_private;
+
+       presisted_slave_conf = slave_config_get(internals,
+                                                                               
        slave_eth_dev->data->port_id);
+
+       if (presisted_slave_conf == NULL)
+               return -1;
+
+       /* Stop slave */
+       rte_eth_dev_stop(slave_eth_dev->data->port_id);
+
+       /* Enable interrupts on slave device */
+       slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
+
+       if (rte_eth_dev_configure(slave_eth_dev->data->port_id,
+                       bonded_eth_dev->data->nb_rx_queues,
+                       bonded_eth_dev->data->nb_tx_queues,
+                       &(slave_eth_dev->data->dev_conf)) != 0) {
+               RTE_LOG(ERR, PMD, "Cannot configure slave device: port=%u\n",
+                               slave_eth_dev->data->port_id);
+               return -1;
+       }
+
+       /* Setup Rx Queues */
+       for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
+               bd_rx_q = (struct bond_rx_queue 
*)bonded_eth_dev->data->rx_queues[q_id];
+
+               if (rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
+                               bd_rx_q->nb_rx_desc,
+                               
rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
+                               &(bd_rx_q->rx_conf), bd_rx_q->mb_pool) != 0) {
+                       RTE_LOG(ERR, PMD, "rte_eth_rx_queue_setup: port=%d 
queue_id %d\n",
+                                       slave_eth_dev->data->port_id, q_id);
+                       return -1;
+               }
+       }
+
+       /* Setup Tx Queues */
+       for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
+               bd_tx_q = (struct bond_tx_queue 
*)bonded_eth_dev->data->tx_queues[q_id];
+
+               if (rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
+                               bd_tx_q->nb_tx_desc,
+                               
rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
+                               &bd_tx_q->tx_conf) != 0) {
+                       RTE_LOG(ERR, PMD, "rte_eth_tx_queue_setup: port=%d 
queue_id %d\n",
+                                       slave_eth_dev->data->port_id, q_id);
+                       return -1;
+               }
+       }
+
+       /* Start device */
+       if (rte_eth_dev_start(slave_eth_dev->data->port_id) != 0) {
+               RTE_LOG(ERR, PMD, "rte_eth_dev_start: port=%u\n",
+                               slave_eth_dev->data->port_id);
+               return -1;
+       }
+
+       return 0;
+}
+
+static struct slave_conf *
+slave_config_get(struct bond_dev_private *internals, uint8_t slave_port_id)
+{
+       int i;
+
+       for (i = 0; i < internals->slave_count; i++) {
+               if (internals->presisted_slaves_conf[i].port_id == 
slave_port_id)
+                       return &internals->presisted_slaves_conf[i];
+       }
+       return NULL;
+}
+
+static void
+slave_config_clear(struct bond_dev_private *internals,
+               struct rte_eth_dev *slave_eth_dev)
+{
+       int i, found = 0;
+
+       for (i = 0; i < internals->slave_count; i++) {
+               if (internals->presisted_slaves_conf[i].port_id ==
+                               slave_eth_dev->data->port_id) {
+                       found = 1;
+                       memset(&internals->presisted_slaves_conf[i], 0,
+                                       
sizeof(internals->presisted_slaves_conf[i]));
+               }
+               if (found && i < (internals->slave_count - 1)) {
+                       memcpy(&internals->presisted_slaves_conf[i],
+                                  &internals->presisted_slaves_conf[i+1],
+                                  sizeof(internals->presisted_slaves_conf[i]));
+               }
+       }
+}
+
+static void
+slave_config_store(struct bond_dev_private *internals,
+               struct rte_eth_dev *slave_eth_dev)
+{
+       struct slave_conf *presisted_slave_conf =
+                       
&internals->presisted_slaves_conf[internals->slave_count];
+
+       presisted_slave_conf->port_id = slave_eth_dev->data->port_id;
+
+       memcpy(&(presisted_slave_conf->mac_addr),
+                  slave_eth_dev->data->mac_addrs,
+                  sizeof(struct ether_addr));
+}
+
+static void
+bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev);
+
+static int
+bond_ethdev_start(struct rte_eth_dev *eth_dev)
+{
+       struct bond_dev_private *internals;
+       int i;
+
+       /* slave eth dev will be started by bonded device */
+       if (valid_bonded_ethdev(eth_dev)) {
+               RTE_LOG(ERR, PMD,
+                               "%s: user tried to explicitly start a slave 
eth_dev (%d) "
+                               "of the bonded eth_dev\n",
+                               __func__, eth_dev->data->port_id);
+               return -1;
+       }
+
+       eth_dev->data->dev_link.link_status = 1;
+       eth_dev->data->dev_started = 1;
+
+       internals = eth_dev->data->dev_private;
+
+       if (internals->slave_count == 0) {
+               RTE_LOG(ERR, PMD,
+                               "%s: Cannot start port since there are no slave 
devices\n",
+                               __func__);
+               return -1;
+       }
+
+       if (!internals->user_defined_mac) {
+               struct slave_conf *conf = slave_config_get(internals,
+                               internals->primary_port);
+
+               if (mac_address_set(eth_dev, &conf->mac_addr) != 0)
+                       return -1;
+       }
+
+       /* Update all slave devices MACs*/
+       if (mac_address_slaves_update(eth_dev) != 0)
+               return -1;
+
+       /* If bonded device is configure in promiscuous mode then re-apply 
config */
+       if (internals->promiscuous_en)
+               bond_ethdev_promiscuous_enable(eth_dev);
+
+       /* Reconfigure each slave device if starting bonded device */
+       for (i = 0; i < internals->slave_count; i++) {
+               if (slave_configure(eth_dev, 
&(rte_eth_devices[internals->slaves[i]]))
+                               != 0) {
+                       RTE_LOG(ERR, PMD,
+                                       "bonded port (%d) failed to reconfigure 
slave device %d)",
+                                       eth_dev->data->port_id, 
internals->slaves[i]);
+                       return -1;
+               }
+       }
+       return 0;
+}
+
+static void
+bond_ethdev_stop(struct rte_eth_dev *eth_dev)
+{
+       struct bond_dev_private *internals = eth_dev->data->dev_private;
+
+       internals->active_slave_count = 0;
+
+       eth_dev->data->dev_link.link_status = 0;
+       eth_dev->data->dev_started = 0;
+}
+
+static void
+bond_ethdev_close(struct rte_eth_dev *dev __rte_unused)
+{
+}
+
+static int
+bond_ethdev_configure(struct rte_eth_dev *dev __rte_unused)
+{
+       return 0;
+}
+
+static void
+bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
+{
+       dev_info->driver_name = driver_name;
+       dev_info->max_mac_addrs = 1;
+
+       dev_info->max_rx_pktlen = (uint32_t)2048;
+
+       dev_info->max_rx_queues = (uint16_t)128;
+       dev_info->max_tx_queues = (uint16_t)512;
+
+       dev_info->min_rx_bufsize = 0;
+       dev_info->pci_dev = dev->pci_dev;
+}
+
+static int
+bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
+               uint16_t nb_rx_desc, unsigned int socket_id __rte_unused,
+               const struct rte_eth_rxconf *rx_conf, struct rte_mempool 
*mb_pool)
+{
+       struct bond_rx_queue *bd_rx_q;
+
+       bd_rx_q = (struct bond_rx_queue *)rte_zmalloc_socket(NULL,
+                       sizeof(struct bond_rx_queue), 0, 
dev->pci_dev->numa_node);
+       if (bd_rx_q == NULL)
+               return -1;
+
+       bd_rx_q->queue_id = rx_queue_id;
+       bd_rx_q->dev_private = dev->data->dev_private;
+
+       bd_rx_q->nb_rx_desc = nb_rx_desc;
+
+       memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf));
+       bd_rx_q->mb_pool = mb_pool;
+
+       dev->data->rx_queues[rx_queue_id] = bd_rx_q;
+
+       return 0;
+}
+
+static int
+bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
+               uint16_t nb_tx_desc, unsigned int socket_id __rte_unused,
+               const struct rte_eth_txconf *tx_conf)
+{
+       struct bond_tx_queue *bd_tx_q;
+
+       bd_tx_q = (struct bond_tx_queue *)rte_zmalloc_socket(NULL,
+                       sizeof(struct bond_tx_queue), 0, 
dev->pci_dev->numa_node);
+
+       bd_tx_q->queue_id = tx_queue_id;
+       bd_tx_q->dev_private = dev->data->dev_private;
+
+       bd_tx_q->nb_tx_desc = nb_tx_desc;
+       memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf));
+
+       dev->data->tx_queues[tx_queue_id] = bd_tx_q;
+
+       return 0;
+}
+
+static void
+bond_ethdev_rx_queue_release(void *queue)
+{
+       if (queue == NULL)
+               return;
+
+       rte_free(queue);
+}
+
+static void
+bond_ethdev_tx_queue_release(void *queue)
+{
+       if (queue == NULL)
+               return;
+
+       rte_free(queue);
+}
+
+
+static int
+bond_ethdev_link_update(struct rte_eth_dev *bonded_eth_dev,
+               int wait_to_complete)
+{
+       struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
+
+       if (!bonded_eth_dev->data->dev_started ||
+               internals->active_slave_count == 0) {
+               bonded_eth_dev->data->dev_link.link_status = 0;
+               return 0;
+       } else {
+               struct rte_eth_dev *slave_eth_dev;
+               int i, link_up = 0;
+
+               for (i = 0; i < internals->active_slave_count; i++) {
+                       slave_eth_dev = 
&rte_eth_devices[internals->active_slaves[i]];
+
+                       (*slave_eth_dev->dev_ops->link_update)(slave_eth_dev,
+                                       wait_to_complete);
+                       if (slave_eth_dev->data->dev_link.link_status == 1) {
+                               link_up = 1;
+                               break;
+                       }
+               }
+
+               bonded_eth_dev->data->dev_link.link_status = link_up;
+       }
+
+       return 0;
+}
+
+static void
+bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
+{
+       struct bond_dev_private *internals = dev->data->dev_private;
+       struct rte_eth_stats slave_stats;
+
+       int i;
+
+       /* clear bonded stats before populating from slaves */
+       memset(stats, 0, sizeof(*stats));
+
+       for (i = 0; i < internals->slave_count; i++) {
+               rte_eth_stats_get(internals->slaves[i], &slave_stats);
+
+               stats->ipackets += slave_stats.ipackets;
+               stats->opackets += slave_stats.opackets;
+               stats->ibytes += slave_stats.ibytes;
+               stats->obytes += slave_stats.obytes;
+               stats->ierrors += slave_stats.ierrors;
+               stats->oerrors += slave_stats.oerrors;
+               stats->imcasts += slave_stats.imcasts;
+               stats->rx_nombuf += slave_stats.rx_nombuf;
+               stats->fdirmatch += slave_stats.fdirmatch;
+               stats->fdirmiss += slave_stats.fdirmiss;
+               stats->tx_pause_xon += slave_stats.tx_pause_xon;
+               stats->rx_pause_xon += slave_stats.rx_pause_xon;
+               stats->tx_pause_xoff += slave_stats.tx_pause_xoff;
+               stats->rx_pause_xoff += slave_stats.rx_pause_xoff;
+       }
+}
+
+static void
+bond_ethdev_stats_reset(struct rte_eth_dev *dev)
+{
+       struct bond_dev_private *internals = dev->data->dev_private;
+       int i;
+
+       for (i = 0; i < internals->slave_count; i++)
+               rte_eth_stats_reset(internals->slaves[i]);
+}
+
+static void
+bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
+{
+       struct bond_dev_private *internals = eth_dev->data->dev_private;
+       int i;
+
+       internals->promiscuous_en = 1;
+
+       switch (internals->mode) {
+       /* Promiscuous mode is propagated to all slaves */
+       case BONDING_MODE_ROUND_ROBIN:
+       case BONDING_MODE_BALANCE:
+       case BONDING_MODE_BROADCAST:
+               for (i = 0; i < internals->slave_count; i++)
+                       rte_eth_promiscuous_enable(internals->slaves[i]);
+               break;
+       /* Promiscuous mode is propagated only to primary slave */
+       case BONDING_MODE_ACTIVE_BACKUP:
+       default:
+               rte_eth_promiscuous_enable(internals->primary_port);
+
+       }
+}
+
+static void
+bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
+{
+       struct bond_dev_private *internals = dev->data->dev_private;
+       int i;
+
+       internals->promiscuous_en = 0;
+
+       switch (internals->mode) {
+       /* Promiscuous mode is propagated to all slaves */
+       case BONDING_MODE_ROUND_ROBIN:
+       case BONDING_MODE_BALANCE:
+       case BONDING_MODE_BROADCAST:
+               for (i = 0; i < internals->slave_count; i++)
+                       rte_eth_promiscuous_disable(internals->slaves[i]);
+               break;
+       /* Promiscuous mode is propagated only to primary slave */
+       case BONDING_MODE_ACTIVE_BACKUP:
+       default:
+               rte_eth_promiscuous_disable(internals->primary_port);
+       }
+}
+
+
+static void
+bond_ethdev_lsc_event_callback(uint8_t port_id, enum rte_eth_event_type type,
+               void *param)
+{
+       struct rte_eth_dev *bonded_eth_dev, *slave_eth_dev;
+       struct bond_dev_private *internals;
+       struct rte_eth_link link;
+
+       int i, bonded_port_id, valid_slave, active_pos = -1;
+
+       if (type != RTE_ETH_EVENT_INTR_LSC)
+               return;
+
+       if (param == NULL)
+               return;
+
+       bonded_port_id = *(uint8_t *)param;
+
+       bonded_eth_dev = &rte_eth_devices[bonded_port_id];
+       slave_eth_dev = &rte_eth_devices[port_id];
+
+       if (valid_bonded_ethdev(bonded_eth_dev))
+               return;
+
+       internals = bonded_eth_dev->data->dev_private;
+
+       /* If the device isn't started don't handle interrupts */
+       if (!bonded_eth_dev->data->dev_started)
+               return;
+
+       /* verify that port_id is a valid slave of bonded port */
+       for (i = 0; i < internals->slave_count; i++) {
+               if (internals->slaves[i] == port_id) {
+                       valid_slave = 1;
+                       break;
+               }
+       }
+
+       if (!valid_slave)
+               return;
+
+       /* Search for port in active port list */
+       for (i = 0; i < internals->active_slave_count; i++) {
+               if (port_id == internals->active_slaves[i]) {
+                       active_pos = i;
+                       break;
+               }
+       }
+
+       rte_eth_link_get_nowait(port_id, &link);
+       if (link.link_status) {
+               if (active_pos == -1) {
+                       /* if no active slave ports then set this port to be 
primary port */
+                       if (internals->active_slave_count == 0) {
+                               /* If first active slave, then change link 
status */
+                               bonded_eth_dev->data->dev_link.link_status = 1;
+                               internals->primary_port = port_id;
+
+                               /* Inherit eth dev link properties from first 
active slave */
+                               link_properties_set(bonded_eth_dev,
+                                               
&(slave_eth_dev->data->dev_link));
+
+                       }
+                       
internals->active_slaves[internals->active_slave_count++] = port_id;
+               }
+       } else {
+               if (active_pos != -1) {
+                       /* Remove from active slave list */
+                       for (i = active_pos; i < (internals->active_slave_count 
- 1); i++)
+                               internals->active_slaves[i] = 
internals->active_slaves[i+1];
+
+                       internals->active_slave_count--;
+
+                       /* No active slaves, change link status to down and 
reset other
+                        * link properties */
+                       if (internals->active_slave_count == 0)
+                               link_properties_reset(bonded_eth_dev);
+
+                       /* Update primary id, take first active slave from list 
or if none
+                        * available set to -1 */
+                       if (port_id == internals->primary_port) {
+                               if (internals->active_slave_count > 0)
+                                       internals->primary_port = 
internals->active_slaves[0];
+                               else
+                                       internals->primary_port = 
internals->slaves[0];
+                       }
+               }
+       }
+}
+
+static struct eth_dev_ops default_dev_ops = {
+               .dev_start = bond_ethdev_start,
+               .dev_stop = bond_ethdev_stop,
+               .dev_close = bond_ethdev_close,
+               .dev_configure = bond_ethdev_configure,
+               .dev_infos_get = bond_ethdev_info,
+               .rx_queue_setup = bond_ethdev_rx_queue_setup,
+               .tx_queue_setup = bond_ethdev_tx_queue_setup,
+               .rx_queue_release = bond_ethdev_rx_queue_release,
+               .tx_queue_release = bond_ethdev_tx_queue_release,
+               .link_update = bond_ethdev_link_update,
+               .stats_get = bond_ethdev_stats_get,
+               .stats_reset = bond_ethdev_stats_reset,
+               .promiscuous_enable = bond_ethdev_promiscuous_enable,
+               .promiscuous_disable = bond_ethdev_promiscuous_disable
+};
+
+static uint8_t
+number_of_sockets(void)
+{
+       int sockets = 0;
+       int i;
+       const struct rte_memseg *ms = rte_eal_get_physmem_layout();
+
+       for (i = 0; ((i < RTE_MAX_MEMSEG) && (ms[i].addr != NULL)); i++) {
+               if (sockets < ms[i].socket_id)
+                       sockets = ms[i].socket_id;
+       }
+
+       /* Number of sockets = maximum socket_id + 1 */
+       return ++sockets;
+
+}
+
+int
+rte_eth_bond_create(const char *name, uint8_t mode, uint8_t socket_id)
+{
+       struct rte_pci_device *pci_dev = NULL;
+       struct bond_dev_private *internals = NULL;
+       struct rte_eth_dev *eth_dev = NULL;
+       struct eth_driver *eth_drv = NULL;
+       struct rte_pci_driver *pci_drv = NULL;
+
+       /* now do all data allocation - for eth_dev structure, dummy pci driver
+        * and internal (private) data
+        */
+
+       if (name == NULL) {
+               RTE_LOG(ERR, PMD, "Invalid name specified\n");
+               goto err;
+       }
+
+       if (socket_id >= number_of_sockets()) {
+               RTE_LOG(ERR, PMD,
+                               "%s: invalid socket id specified to create 
bonded device on.\n",
+                               __func__);
+               goto err;
+       }
+
+       pci_dev = rte_zmalloc_socket(name, sizeof(*pci_dev), 0, socket_id);
+       if (pci_dev == NULL) {
+               RTE_LOG(ERR, PMD, "Unable to malloc pci dev on socket");
+               goto err;
+       }
+       eth_drv = rte_zmalloc_socket(name, sizeof(*eth_drv), 0, socket_id);
+       if (eth_drv == NULL) {
+               RTE_LOG(ERR, PMD, "Unable to malloc eth_drv on socket");
+               goto err;
+       }
+
+       pci_drv = rte_zmalloc_socket(name, sizeof(*pci_drv), 0, socket_id);
+       if (pci_drv == NULL) {
+               RTE_LOG(ERR, PMD, "Unable to malloc pci_drv on socket");
+               goto err;
+       }
+
+       internals = rte_zmalloc_socket(name, sizeof(*internals), 0, socket_id);
+       if (internals == NULL) {
+               RTE_LOG(ERR, PMD, "Unable to malloc internals on socket");
+               goto err;
+       }
+
+       /* reserve an ethdev entry */
+       eth_dev = rte_eth_dev_allocate();
+       if (eth_dev == NULL) {
+               RTE_LOG(ERR, PMD, "Unable to allocate rte_eth_dev");
+               goto err;
+       }
+
+       pci_dev->numa_node = socket_id;
+       pci_drv->name = driver_name;
+
+       eth_drv->pci_drv = (struct rte_pci_driver)(*pci_drv);
+       eth_dev->driver = eth_drv;
+
+       eth_dev->data->dev_private = internals;
+       eth_dev->data->nb_rx_queues = (uint16_t)1;
+       eth_dev->data->nb_tx_queues = (uint16_t)1;
+
+       eth_dev->data->dev_link.link_status = 0;
+
+       eth_dev->data->mac_addrs = rte_zmalloc(name, ETHER_ADDR_LEN, 0);
+
+       eth_dev->data->dev_started = 0;
+       eth_dev->data->promiscuous = 0;
+       eth_dev->data->scattered_rx = 0;
+       eth_dev->data->all_multicast = 0;
+
+       eth_dev->dev_ops = &default_dev_ops;
+       eth_dev->pci_dev = pci_dev;
+
+       eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
+       if (bond_ethdev_mode_set(eth_dev, mode)) {
+               RTE_LOG(ERR, PMD,
+                               "%s: failed to set bonded device %d mode too 
%d\n",
+                               __func__, eth_dev->data->port_id, mode);
+               goto err;
+       }
+
+       internals->primary_port = 0;
+       internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2;
+       internals->user_defined_mac = 0;
+       internals->link_props_set = 0;
+       internals->slave_count = 0;
+       internals->active_slave_count = 0;
+
+       memset(internals->active_slaves, 0, sizeof(internals->active_slaves));
+       memset(internals->slaves, 0, sizeof(internals->slaves));
+
+       memset(internals->presisted_slaves_conf, 0,
+                       sizeof(internals->presisted_slaves_conf));
+
+       return eth_dev->data->port_id;
+
+err:
+       if (pci_dev)
+               rte_free(pci_dev);
+       if (pci_drv)
+               rte_free(pci_drv);
+       if (eth_drv)
+               rte_free(eth_drv);
+       if (internals)
+               rte_free(internals);
+       return -1;
+}
+
+
+
+int
+rte_eth_bond_slave_add(uint8_t bonded_port_id, uint8_t slave_port_id)
+{
+       struct rte_eth_dev *bonded_eth_dev, *slave_eth_dev;
+       struct bond_dev_private *internals;
+       struct bond_dev_private *temp_internals;
+       struct rte_eth_link link_props;
+
+       int i, j;
+
+       /* Verify that port id's are valid bonded and slave ports */
+       if (valid_bonded_port_id(bonded_port_id) != 0)
+               goto err_add;
+
+       if (valid_slave_port_id(slave_port_id) != 0)
+               goto err_add;
+
+       /*
+        * Verify that new slave device is not already a slave of another bonded
+        * device */
+       for (i = rte_eth_dev_count()-1; i >= 0; i--) {
+               if (valid_bonded_ethdev(&rte_eth_devices[i]) == 0) {
+                       temp_internals = rte_eth_devices[i].data->dev_private;
+                       for (j = 0; j < temp_internals->slave_count; j++) {
+                               /* Device already a slave of a bonded device */
+                               if (temp_internals->slaves[j] == slave_port_id)
+                                       goto err_add;
+                       }
+               }
+       }
+
+       bonded_eth_dev = &rte_eth_devices[bonded_port_id];
+       internals = bonded_eth_dev->data->dev_private;
+
+       slave_eth_dev = &rte_eth_devices[slave_port_id];
+
+       if (internals->slave_count > 0) {
+               /* Check that new slave device is the same type as the other 
slaves
+                * and not repetitive */
+               for (i = 0; i < internals->slave_count; i++) {
+                       if (slave_eth_dev->pci_dev->driver->id_table->device_id 
!=
+                                       
rte_eth_devices[internals->slaves[i]].pci_dev->driver->id_table->device_id ||
+                               internals->slaves[i] == slave_port_id)
+                               goto err_add;
+               }
+       }
+
+       /* Add slave details to bonded device */
+       internals->slaves[internals->slave_count] = slave_port_id;
+
+       slave_config_store(internals, slave_eth_dev);
+
+       if (internals->slave_count == 0) {
+               /* if MAC is not user defined then use MAC of first slave add 
to bonded
+                * device */
+               if (!internals->user_defined_mac)
+                       mac_address_set(bonded_eth_dev, 
slave_eth_dev->data->mac_addrs);
+
+               /* Inherit eth dev link properties from first slave */
+               link_properties_set(bonded_eth_dev, 
&(slave_eth_dev->data->dev_link));
+
+               /* Make primary slave */
+               rte_eth_bond_primary_set(bonded_port_id, slave_port_id);
+       } else {
+               /* Check slave link properties are supported if props are set,
+                * all slaves must be the same */
+               if (internals->link_props_set) {
+                       if 
(link_properties_valid(&(bonded_eth_dev->data->dev_link),
+                                                                         
&(slave_eth_dev->data->dev_link))) {
+                               RTE_LOG(ERR, PMD,
+                                               "%s: Slave port %d link 
speed/duplex not supported\n",
+                                               __func__, slave_port_id);
+                               goto err_add;
+                       }
+               } else {
+                       link_properties_set(bonded_eth_dev,
+                                       &(slave_eth_dev->data->dev_link));
+               }
+       }
+
+       internals->slave_count++;
+
+       /* Update all slave devices MACs*/
+       mac_address_slaves_update(bonded_eth_dev);
+
+       if (bonded_eth_dev->data->dev_started) {
+               if (slave_configure(bonded_eth_dev, slave_eth_dev) != 0) {
+                       RTE_LOG(ERR, PMD, "rte_bond_slaves_configure: 
port=%d\n",
+                                       slave_port_id);
+                       goto err_add;
+               }
+       }
+
+       /* Register link status change callback with bonded device pointer as
+        * argument*/
+       rte_eth_dev_callback_register(slave_port_id, RTE_ETH_EVENT_INTR_LSC,
+                       bond_ethdev_lsc_event_callback, 
&bonded_eth_dev->data->port_id);
+
+       /* If bonded device is started then we can add the slave to our active
+        * slave array */
+       if (bonded_eth_dev->data->dev_started) {
+               rte_eth_link_get_nowait(slave_port_id, &link_props);
+
+                if (link_props.link_status == 1) {
+                       
internals->active_slaves[internals->active_slave_count++] =
+                                       slave_port_id;
+               }
+       }
+
+       return 0;
+
+err_add:
+       RTE_LOG(ERR, PMD, "Failed to add port %d as slave\n", slave_port_id);
+       return -1;
+
+}
+
+int
+rte_eth_bond_slave_remove(uint8_t bonded_port_id, uint8_t slave_port_id)
+{
+       struct bond_dev_private *internals;
+       struct slave_conf *slave_conf;
+
+       int i;
+       int pos = -1;
+
+       /* Verify that port id's are valid bonded and slave ports */
+       if (valid_bonded_port_id(bonded_port_id) != 0)
+               goto err_del;
+
+       if (valid_slave_port_id(slave_port_id) != 0)
+               goto err_del;
+
+       internals = rte_eth_devices[bonded_port_id].data->dev_private;
+
+
+       /* first remove from active slave list */
+       for (i = 0; i < internals->active_slave_count; i++) {
+               if (internals->active_slaves[i] == slave_port_id)
+                       pos = i;
+
+               /* shift active slaves up active array list */
+               if (pos >= 0 && i < (internals->active_slave_count - 1))
+                       internals->active_slaves[i] = 
internals->active_slaves[i+1];
+       }
+
+       if (pos >= 0)
+               internals->active_slave_count--;
+
+       /* UnRegister link status change callback with bonded device pointer as
+        * argument*/
+       rte_eth_dev_callback_unregister(slave_port_id, RTE_ETH_EVENT_INTR_LSC,
+                       bond_ethdev_lsc_event_callback,
+                       &rte_eth_devices[bonded_port_id].data->port_id);
+
+       /* Restore original MAC address of slave device */
+       slave_conf = slave_config_get(internals, slave_port_id);
+
+       mac_address_set(&rte_eth_devices[slave_port_id], 
&(slave_conf->mac_addr));
+
+
+       pos = -1;
+       /* now remove from slave list */
+       for (i = 0; i < internals->slave_count; i++) {
+               if (internals->slaves[i] == slave_port_id)
+                       pos = i;
+
+               /* shift slaves up list */
+               if (pos >= 0 && i < (internals->slave_count - 1))
+                       internals->slaves[i] = internals->slaves[i+1];
+       }
+
+       slave_config_clear(internals, &rte_eth_devices[slave_port_id]);
+
+       if (pos < 0)
+               goto err_del;
+
+       internals->slave_count--;
+
+       /*  first slave in the active list will be the primary by default,
+        *  otherwise use first device in list */
+       if (internals->primary_port == slave_port_id &&
+                       internals->active_slave_count > 0)
+               internals->primary_port = internals->active_slaves[0];
+
+       if (internals->active_slave_count < 1) {
+               /* reset device link properties as no slaves are active */
+               link_properties_reset(&rte_eth_devices[bonded_port_id]);
+
+               /* if no slaves are any longer attached to bonded device and 
MAC is not
+                * user defined then clear MAC of bonded device  as it will be 
reset
+                * when a new slave is added */
+               if (internals->slave_count < 1 && !internals->user_defined_mac)
+                       memset(rte_eth_devices[bonded_port_id].data->mac_addrs, 
0,
+                                       
sizeof(*(rte_eth_devices[bonded_port_id].data->mac_addrs)));
+       }
+
+       return 0;
+
+err_del:
+       RTE_LOG(ERR, PMD,
+                       "Cannot remove slave device (not present in bonded 
device)\n");
+       return -1;
+
+}
+
+int
+rte_eth_bond_mode_set(uint8_t bonded_port_id, uint8_t mode)
+{
+       if (valid_bonded_port_id(bonded_port_id) != 0)
+               return -1;
+
+       return bond_ethdev_mode_set(&rte_eth_devices[bonded_port_id], mode);
+}
+
+
+int
+rte_eth_bond_mode_get(uint8_t bonded_port_id)
+{
+       struct bond_dev_private *internals;
+
+       if (valid_bonded_port_id(bonded_port_id) != 0)
+               return -1;
+
+       internals = rte_eth_devices[bonded_port_id].data->dev_private;
+
+       return internals->mode;
+}
+
+int
+rte_eth_bond_primary_set(uint8_t bonded_port_id, uint8_t slave_port_id)
+{
+       struct bond_dev_private *internals;
+       int i;
+
+       if (valid_bonded_port_id(bonded_port_id) != 0)
+               return -1;
+
+       if (valid_slave_port_id(slave_port_id) != 0)
+               return -1;
+
+       internals =  rte_eth_devices[bonded_port_id].data->dev_private;
+
+       /* Search bonded device slave ports for new proposed primary port */
+       for (i = 0; i < internals->slave_count; i++) {
+               if (internals->slaves[i] == slave_port_id) {
+                       /* Found slave device in active slave list */
+                       internals->primary_port = slave_port_id;
+                       return 0;
+               }
+       }
+
+       /* Slave is not bound to this master device */
+       return -1;
+}
+
+int
+rte_eth_bond_primary_get(uint8_t bonded_port_id)
+{
+       struct bond_dev_private *internals;
+
+       if (valid_bonded_port_id(bonded_port_id) != 0)
+               return -1;
+
+       internals = rte_eth_devices[bonded_port_id].data->dev_private;
+
+       if (internals->slave_count < 1)
+               return -1;
+
+       return internals->primary_port;
+}
+
+int
+rte_eth_bond_slaves_get(uint8_t bonded_port_id, const uint8_t **slaves)
+{
+       struct bond_dev_private *internals;
+
+       if (valid_bonded_port_id(bonded_port_id) != 0)
+               return -1;
+
+       if (slaves == NULL)
+               return -1;
+
+       internals = rte_eth_devices[bonded_port_id].data->dev_private;
+
+       *slaves = (uint8_t *)(&internals->slaves);
+
+       return internals->slave_count;
+
+}
+
+int
+rte_eth_bond_active_slaves_get(uint8_t bonded_port_id, const uint8_t **slaves)
+{
+       struct bond_dev_private *internals;
+
+       if (valid_bonded_port_id(bonded_port_id) != 0)
+               return -1;
+
+       if (slaves == NULL)
+               return -1;
+
+       internals = rte_eth_devices[bonded_port_id].data->dev_private;
+
+       *slaves = (uint8_t *)(&internals->active_slaves);
+
+       return internals->active_slave_count;
+
+}
+
+
+int
+rte_eth_bond_mac_address_set(uint8_t bonded_port_id,
+               struct ether_addr *mac_addr)
+{
+       struct rte_eth_dev *bonded_eth_dev;
+       struct bond_dev_private *internals;
+
+       if (valid_bonded_port_id(bonded_port_id) != 0)
+               return -1;
+
+       bonded_eth_dev = &rte_eth_devices[bonded_port_id];
+       internals = bonded_eth_dev->data->dev_private;
+
+       /* Set MAC Address of Bonded Device */
+       if (mac_address_set(bonded_eth_dev, mac_addr))
+               return -1;
+
+       internals->user_defined_mac = 1;
+
+       /* Update all slave devices MACs*/
+       if (internals->slave_count > 0)
+               return mac_address_slaves_update(bonded_eth_dev);
+
+       return 0;
+}
+
+
+int
+rte_eth_bond_mac_address_reset(uint8_t bonded_port_id)
+{
+       struct rte_eth_dev *bonded_eth_dev;
+       struct bond_dev_private *internals;
+
+
+       if (valid_bonded_port_id(bonded_port_id) != 0)
+               return -1;
+
+       bonded_eth_dev = &rte_eth_devices[bonded_port_id];
+       internals = bonded_eth_dev->data->dev_private;
+
+       internals->user_defined_mac = 0;
+
+       if (internals->slave_count > 0) {
+               struct slave_conf *conf;
+               conf = slave_config_get(internals, internals->primary_port);
+
+               /* Set MAC Address of Bonded Device */
+               if (mac_address_set(bonded_eth_dev, &conf->mac_addr) != 0)
+                       return -1;
+
+               /* Update all slave devices MAC addresses */
+               return mac_address_slaves_update(bonded_eth_dev);
+       }
+       /* No need to update anything as no slaves present */
+       return 0;
+}
+
+int
+rte_eth_bond_xmit_policy_set(uint8_t bonded_port_id, uint8_t policy)
+{
+       struct bond_dev_private *internals;
+
+       if (valid_bonded_port_id(bonded_port_id) != 0)
+               return -1;
+
+       internals = rte_eth_devices[bonded_port_id].data->dev_private;
+
+       switch (policy) {
+       case BALANCE_XMIT_POLICY_LAYER2:
+       case BALANCE_XMIT_POLICY_LAYER23:
+       case BALANCE_XMIT_POLICY_LAYER34:
+               internals->balance_xmit_policy = policy;
+               break;
+
+       default:
+               return -1;
+       }
+       return 0;
+}
+
+
+int
+rte_eth_bond_xmit_policy_get(uint8_t bonded_port_id)
+{
+       struct bond_dev_private *internals;
+
+       if (valid_bonded_port_id(bonded_port_id) != 0)
+               return -1;
+
+       internals = rte_eth_devices[bonded_port_id].data->dev_private;
+
+       return internals->balance_xmit_policy;
+}
diff --git a/lib/librte_bond/rte_bond.h b/lib/librte_bond/rte_bond.h
new file mode 100644
index 0000000..97b6d5e
--- /dev/null
+++ b/lib/librte_bond/rte_bond.h
@@ -0,0 +1,228 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_ETH_BOND_H_
+#define _RTE_ETH_BOND_H_
+
+/**
+ * @file
+ * RTE Link Bonding Ethernet Device
+ * Link Bonding for 1GbE and 10GbE ports to allow the aggregation of multiple
+ * (slave) NICs into a single logical interface. The bonded device processes
+ * these interfaces based on the mode of operation specified and supported.
+ * This implementation supports 4 modes of operation round robin, active backup
+ * balance and broadcast. Providing redundant links, fault tolerance and/or
+ * load balancing of network ports
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <rte_ether.h>
+
+/** Link Bonding Mode Definitions */
+#define BONDING_MODE_ROUND_ROBIN               (0)
+#define BONDING_MODE_ACTIVE_BACKUP             (1)
+#define BONDING_MODE_BALANCE                   (2)
+#define BONDING_MODE_BROADCAST                 (3)
+
+/** Balance Mode Transmit Policy Types */
+#define BALANCE_XMIT_POLICY_LAYER2             (0)
+#define BALANCE_XMIT_POLICY_LAYER23            (1)
+#define BALANCE_XMIT_POLICY_LAYER34            (2)
+
+/**
+ * Create a bonded rte_eth_dev device
+ *
+ * @param name
+ * @param mode
+ * @param socket_id
+ *
+ * @return
+ *     Port Id of created rte_eth_dev on success, negative value otherwise
+ */
+int
+rte_eth_bond_create(const char *name, uint8_t mode, uint8_t socket_id);
+
+/**
+ * Add a rte_eth_dev device as a slave to the bonded device
+ *
+ * @param bonded_port_id
+ * @param slave_port_id
+ *
+ * @return
+ *     0 on success, negative value otherwise
+ */
+int
+rte_eth_bond_slave_add(uint8_t bonded_port_id, uint8_t slave_port_id);
+
+/**
+ * Remove a slave rte_eth_dev device from the bonded device
+ *
+ * @param bonded_port_id
+ * @param slave_port_id
+ *
+ * @return
+ *     0 on success, negative value otherwise
+ */
+int
+rte_eth_bond_slave_remove(uint8_t bonded_port_id, uint8_t slave_port_id);
+
+/**
+ * Set link bonding mode of bonded device
+ *
+ * @param bonded_port_id
+ * @param mode
+ *
+ * @return
+ *     0 on success, negative value otherwise
+ */
+int
+rte_eth_bond_mode_set(uint8_t bonded_port_id, uint8_t mode);
+
+/**
+ * Get link bonding mode of bonded device
+ *
+ * @param bonded_port_id
+ *
+ * @return
+ *     link bonding mode on success, negative value otherwise
+ */
+int
+rte_eth_bond_mode_get(uint8_t bonded_port_id);
+
+/**
+ * Set slave rte_eth_dev as primary slave of bonded device
+ *
+ * @param bonded_port_id
+ * @param slave_port_id
+ *
+ * @return
+ *     0 on success, negative value otherwise
+ */
+int
+rte_eth_bond_primary_set(uint8_t bonded_port_id, uint8_t slave_port_id);
+
+/**
+ * Get primary slave of bonded device
+ *
+ * @param bonded_port_id
+ *
+ * @return
+ *     Port Id of primary slave on success, -1 on failure
+ */
+int
+rte_eth_bond_primary_get(uint8_t bonded_port_id);
+
+/**
+ * Populate an array with list of the slaves port id's of the bonded device
+ *
+ * @param bonded_port_id
+ * @param slaves
+ *
+ * @return
+ *     number of slaves associated with bonded device on success,
+ *     negative value otherwise
+ */
+int
+rte_eth_bond_slaves_get(uint8_t bonded_port_id, const uint8_t **slaves);
+
+/**
+ * Populate an array with list of the active slaves port id's of the bonded
+ * device.
+ *
+ * @param bonded_port_id
+ * @param slaves
+ *
+ * @return
+ *     number of active slaves associated with bonded device on success,
+ *     negative value otherwise
+ */
+int
+rte_eth_bond_active_slaves_get(uint8_t bonded_port_id, const uint8_t **slaves);
+
+/**
+ * Set explicit MAC address to use on bonded device and it's slaves.
+ *
+ * @param bonded_port_id
+ * @param mac_addr
+ *
+ * @return
+ *     0 on success, negative value otherwise
+ */
+int
+rte_eth_bond_mac_address_set(uint8_t bonded_port_id,
+               struct ether_addr *mac_addr);
+
+/**
+ * Reset bonded device to use MAC from primary slave on bonded device and it's
+ * slaves.
+ *
+ * @param bonded_port_id
+ *
+ * @return
+ *     0 on success, negative value otherwise
+ */
+int
+rte_eth_bond_mac_address_reset(uint8_t bonded_port_id);
+
+/**
+ * Set the transmit policy for bonded device to use when it is operating in
+ * balance mode
+ *
+ * @param bonded_port_id
+ * @param policy
+ *
+ * @return
+ *     0 on success, negative value otherwise
+ */
+int
+rte_eth_bond_xmit_policy_set(uint8_t bonded_port_id, uint8_t policy);
+
+/**
+ * Get the transmit policy set on bonded device for balance mode operation
+ *
+ * @param bonded_port_id
+ *
+ * @return
+ *     balance transmit policy on success, negative value otherwise
+ */
+int
+rte_eth_bond_xmit_policy_get(uint8_t bonded_port_id);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/mk/rte.app.mk b/mk/rte.app.mk
index a836577..a803a5c 100644
--- a/mk/rte.app.mk
+++ b/mk/rte.app.mk
@@ -177,8 +177,13 @@ ifeq ($(CONFIG_RTE_LIBRTE_PMD_PCAP),y)
 LDLIBS += -lrte_pmd_pcap -lpcap
 endif

+ifeq ($(CONFIG_RTE_LIBRTE_BOND),y)
+LDLIBS += -lrte_bond
 endif

+endif
+
+
 LDLIBS += $(EXECENV_LDLIBS)

 LDLIBS += --end-group
-- 
1.8.5.3

Reply via email to