[dpdk-dev] [PATCH 1/4] Link Bonding Library

2014-05-28 Thread declan.dohe...@intel.com
From: Declan Doherty 

Link Bonding Library (lib/librte_bond) initial release with support for
 Mode 0 - Round Robin
 Mode 1 - Active Backup
 Mode 2 - Balance -> Supports 3 transmit polices (layer 2, layer 2+3, layer
 Mode 3 - Broadcast

Signed-off-by: Declan Doherty 
---
 config/common_bsdapp   |5 +
 config/common_linuxapp |5 +
 lib/Makefile   |1 +
 lib/librte_bond/Makefile   |   28 +
 lib/librte_bond/rte_bond.c | 1679 
 lib/librte_bond/rte_bond.h |  228 ++
 mk/rte.app.mk  |5 +
 7 files changed, 1951 insertions(+)
 create mode 100644 lib/librte_bond/Makefile
 create mode 100644 lib/librte_bond/rte_bond.c
 create mode 100644 lib/librte_bond/rte_bond.h

diff --git a/config/common_bsdapp b/config/common_bsdapp
index 2cc7b80..53ed8b9 100644
--- a/config/common_bsdapp
+++ b/config/common_bsdapp
@@ -187,6 +187,11 @@ CONFIG_RTE_PMD_RING_MAX_TX_RINGS=16
 CONFIG_RTE_LIBRTE_PMD_PCAP=y

 #
+# Compile link bonding library
+#
+CONFIG_RTE_LIBRTE_BOND=y
+
+#
 # Do prefetch of packet data within PMD driver receive function
 #
 CONFIG_RTE_PMD_PACKET_PREFETCH=y
diff --git a/config/common_linuxapp b/config/common_linuxapp
index 62619c6..35b525a 100644
--- a/config/common_linuxapp
+++ b/config/common_linuxapp
@@ -211,6 +211,11 @@ CONFIG_RTE_PMD_RING_MAX_TX_RINGS=16
 CONFIG_RTE_LIBRTE_PMD_PCAP=n


+#
+# Compile link bonding library
+#
+CONFIG_RTE_LIBRTE_BOND=y
+
 CONFIG_RTE_LIBRTE_PMD_XENVIRT=n

 #
diff --git a/lib/Makefile b/lib/Makefile
index b92b392..9995ba8 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -47,6 +47,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_PMD_PCAP) += librte_pmd_pcap
 DIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += librte_pmd_virtio
 DIRS-$(CONFIG_RTE_LIBRTE_VMXNET3_PMD) += librte_pmd_vmxnet3
 DIRS-$(CONFIG_RTE_LIBRTE_PMD_XENVIRT) += librte_pmd_xenvirt
+DIRS-$(CONFIG_RTE_LIBRTE_BOND) += librte_bond
 DIRS-$(CONFIG_RTE_LIBRTE_HASH) += librte_hash
 DIRS-$(CONFIG_RTE_LIBRTE_LPM) += librte_lpm
 DIRS-$(CONFIG_RTE_LIBRTE_NET) += librte_net
diff --git a/lib/librte_bond/Makefile b/lib/librte_bond/Makefile
new file mode 100644
index 000..7514378
--- /dev/null
+++ b/lib/librte_bond/Makefile
@@ -0,0 +1,28 @@
+# 
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+#
+# library name
+#
+LIB = librte_bond.a
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS)
+
+#
+# all source are stored in SRCS-y
+#
+SRCS-$(CONFIG_RTE_LIBRTE_BOND) += rte_bond.c
+
+
+#
+# Export include files
+#
+SYMLINK-y-include += rte_bond.h
+
+# this lib depends upon:
+DEPDIRS-$(CONFIG_RTE_LIBRTE_BOND) += lib/librte_mbuf lib/librte_ether
+DEPDIRS-$(CONFIG_RTE_LIBRTE_BOND) += lib/librte_malloc
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_bond/rte_bond.c b/lib/librte_bond/rte_bond.c
new file mode 100644
index 000..35dff25
--- /dev/null
+++ b/lib/librte_bond/rte_bond.c
@@ -0,0 +1,1679 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the
+ *   distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ *   contributors may be used to endorse or promote products derived
+ *   from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include 
+#include 
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+#include 
+
+#include "rte_bond.h"
+
+static const char *driver_name = "Link Bonding PMD";
+
+/** Port Queue Mapping Structure */
+struct bond_rx_queue {
+   int queue_id;   /**< 
Queue Id */
+   

[dpdk-dev] [PATCH 1/4] Link Bonding Library

2014-05-28 Thread Shaw, Jeffrey B
Hi Declan,
I'm worried about one thing in "bond_ethdev_tx_broadcast()" related to freeing 
of the broadcasted packets.

> +static uint16_t
> +bond_ethdev_tx_broadcast(void *queue, struct rte_mbuf **bufs, uint16_t 
> nb_pkts)
> +{
> + struct bond_dev_private *internals;
> + struct bond_tx_queue *bd_tx_q;
> +
> + uint8_t num_of_slaves;
> + uint8_t slaves[RTE_MAX_ETHPORTS];
> +
> + uint16_t num_tx_total = 0;
> +
> + int i;
> +
> + bd_tx_q = (struct bond_tx_queue *)queue;
> + internals = bd_tx_q->dev_private;
> +
> + /* Copy slave list to protect against slave up/down changes during tx
> +  * bursting */
> + num_of_slaves = internals->active_slave_count;
> + memcpy(slaves, internals->active_slaves,
> + sizeof(internals->active_slaves[0]) * num_of_slaves);
> +
> + if (num_of_slaves < 1)
> + return 0;
> +
> +
> + for (i = 0; i < num_of_slaves; i++) {
> + num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
> + bufs, nb_pkts);
> + }
> +
> + return num_tx_total;
> +}
> +

Transmitting the same buffers on all slaves will cause problems when the PMD 
frees the mbufs for previously transmitted packets.
So if you broadcast 1 packet on 4 slaves, each of the 4 slaves will 
(eventually) have to free the same mbuf.  Without updating the refcnt, you will 
end up incorrectly freeing the same mbuf 3 extra times.

Your test application does not catch this case since the max packets that are 
tested is 320, which is less than the size of the Tx descriptor rings (512).  
Try testing with more packets and you should see some latent segmentation 
faults.  You should also see this with testpmd, if you try broadcasting enough 
packets.


Thanks,
Jeff


[dpdk-dev] [PATCH 1/4] Link Bonding Library

2014-05-29 Thread Doherty, Declan


> -Original Message-
> From: Shaw, Jeffrey B
> Sent: Wednesday, May 28, 2014 5:55 PM
> To: Doherty, Declan; dev at dpdk.org
> Subject: RE: [dpdk-dev] [PATCH 1/4] Link Bonding Library
> 
> Hi Declan,
> I'm worried about one thing in "bond_ethdev_tx_broadcast()" related to
> freeing of the broadcasted packets.
> 
> > +static uint16_t
> > +bond_ethdev_tx_broadcast(void *queue, struct rte_mbuf **bufs,
> > +uint16_t nb_pkts) {
> > +   struct bond_dev_private *internals;
> > +   struct bond_tx_queue *bd_tx_q;
> > +
> > +   uint8_t num_of_slaves;
> > +   uint8_t slaves[RTE_MAX_ETHPORTS];
> > +
> > +   uint16_t num_tx_total = 0;
> > +
> > +   int i;
> > +
> > +   bd_tx_q = (struct bond_tx_queue *)queue;
> > +   internals = bd_tx_q->dev_private;
> > +
> > +   /* Copy slave list to protect against slave up/down changes during tx
> > +* bursting */
> > +   num_of_slaves = internals->active_slave_count;
> > +   memcpy(slaves, internals->active_slaves,
> > +   sizeof(internals->active_slaves[0]) * num_of_slaves);
> > +
> > +   if (num_of_slaves < 1)
> > +   return 0;
> > +
> > +
> > +   for (i = 0; i < num_of_slaves; i++) {
> > +   num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q-
> >queue_id,
> > +   bufs, nb_pkts);
> > +   }
> > +
> > +   return num_tx_total;
> > +}
> > +
> 
> Transmitting the same buffers on all slaves will cause problems when the PMD
> frees the mbufs for previously transmitted packets.
> So if you broadcast 1 packet on 4 slaves, each of the 4 slaves will 
> (eventually)
> have to free the same mbuf.  Without updating the refcnt, you will end up
> incorrectly freeing the same mbuf 3 extra times.
> 
> Your test application does not catch this case since the max packets that are
> tested is 320, which is less than the size of the Tx descriptor rings (512).  
> Try
> testing with more packets and you should see some latent segmentation
> faults.  You should also see this with testpmd, if you try broadcasting enough
> packets.
> 
> 
> Thanks,
> Jeff

Hey Jeff, I'm not sure why I didn't see this in testing using on the test-pmd 
app, but it's obvious that this will cause issues. I'll add code to increment 
the refcnt in each mbuf by (num_of_slaves -1) in the bond_ethdev_tx_broadcast() 
function and also add a unit test to validate this in the next version of the 
patch.

Thanks,
Declan
--
Intel Shannon Limited
Registered in Ireland
Registered Office: Collinstown Industrial Park, Leixlip, County Kildare
Registered Number: 308263
Business address: Dromore House, East Park, Shannon, Co. Clare

This e-mail and any attachments may contain confidential material for the sole 
use of the intended recipient(s). Any review or distribution by others is 
strictly prohibited. If you are not the intended recipient, please contact the 
sender and delete all copies.