[dpdk-dev] [PATCH 0/2] examples/vhost: support new VMDQ api and new nic i40e in vhost example

2014-11-13 Thread Huawei Xie
I40E has several different types of VSI and queues are allocated among them. 
VMDQ queue base and pool base doesn't start from zero due to this change and 
VMDQ doesn't own all queues.
rte_eth_dev_info structure is extended to provide VMDQ queue base, pool base, 
queue number information for us to properly set up VMDQ, i.e, add mac/vlan 
filter.
This patchset enables the vhost example to use this information to set up VMDQ.

Huawei Xie (2):
  support new VMDQ API and new nic i40e
  use factorized default Rx/Tx configuration

 examples/vhost/main.c | 103 --
 1 file changed, 41 insertions(+), 62 deletions(-)

-- 
1.8.1.4



[dpdk-dev] [PATCH 2/2] examples/vhost: use factorized default Rx/Tx configuration

2014-11-13 Thread Huawei Xie
Refer to Pablo's commit:
"use factorized default Rx/Tx configuration

For apps that were using default rte_eth_rxconf and rte_eth_txconf
structures, these have been removed and now they are obtained by
calling rte_eth_dev_info_get, just before setting up RX/TX queues."

move zero copy's deferred start set up ahead.

Signed-off-by: Huawei Xie 
---
 examples/vhost/main.c | 78 +++
 1 file changed, 22 insertions(+), 56 deletions(-)

diff --git a/examples/vhost/main.c b/examples/vhost/main.c
index 2b1bf02..fa36913 100644
--- a/examples/vhost/main.c
+++ b/examples/vhost/main.c
@@ -79,25 +79,6 @@
+ RTE_PKTMBUF_HEADROOM)
 #define MBUF_CACHE_SIZE_ZCP 0

-/*
- * RX and TX Prefetch, Host, and Write-back threshold values should be
- * carefully set for optimal performance. Consult the network
- * controller's datasheet and supporting DPDK documentation for guidance
- * on how these parameters should be set.
- */
-#define RX_PTHRESH 8 /* Default values of RX prefetch threshold reg. */
-#define RX_HTHRESH 8 /* Default values of RX host threshold reg. */
-#define RX_WTHRESH 4 /* Default values of RX write-back threshold reg. */
-
-/*
- * These default values are optimized for use with the Intel(R) 82599 10 GbE
- * Controller and the DPDK ixgbe PMD. Consider using other values for other
- * network controllers and/or network drivers.
- */
-#define TX_PTHRESH 36 /* Default values of TX prefetch threshold reg. */
-#define TX_HTHRESH 0  /* Default values of TX host threshold reg. */
-#define TX_WTHRESH 0  /* Default values of TX write-back threshold reg. */
-
 #define MAX_PKT_BURST 32   /* Max burst size for RX/TX */
 #define BURST_TX_DRAIN_US 100  /* TX drain every ~100us */

@@ -217,32 +198,6 @@ static uint32_t burst_rx_retry_num = BURST_RX_RETRIES;
 /* Character device basename. Can be set by user. */
 static char dev_basename[MAX_BASENAME_SZ] = "vhost-net";

-
-/* Default configuration for rx and tx thresholds etc. */
-static struct rte_eth_rxconf rx_conf_default = {
-   .rx_thresh = {
-   .pthresh = RX_PTHRESH,
-   .hthresh = RX_HTHRESH,
-   .wthresh = RX_WTHRESH,
-   },
-   .rx_drop_en = 1,
-};
-
-/*
- * These default values are optimized for use with the Intel(R) 82599 10 GbE
- * Controller and the DPDK ixgbe/igb PMD. Consider using other values for other
- * network controllers and/or network drivers.
- */
-static struct rte_eth_txconf tx_conf_default = {
-   .tx_thresh = {
-   .pthresh = TX_PTHRESH,
-   .hthresh = TX_HTHRESH,
-   .wthresh = TX_WTHRESH,
-   },
-   .tx_free_thresh = 0, /* Use PMD default values */
-   .tx_rs_thresh = 0, /* Use PMD default values */
-};
-
 /* empty vmdq configuration structure. Filled in programatically */
 static struct rte_eth_conf vmdq_conf_default = {
.rxmode = {
@@ -410,7 +365,9 @@ port_init(uint8_t port)
 {
struct rte_eth_dev_info dev_info;
struct rte_eth_conf port_conf;
-   uint16_t rx_rings, tx_rings;
+   struct rte_eth_rxconf *rxconf;
+   struct rte_eth_txconf *txconf;
+   int16_t rx_rings, tx_rings;
uint16_t rx_ring_size, tx_ring_size;
int retval;
uint16_t q;
@@ -418,6 +375,21 @@ port_init(uint8_t port)
/* The max pool number from dev_info will be used to validate the pool 
number specified in cmd line */
rte_eth_dev_info_get (port, &dev_info);

+   rxconf = &dev_info.default_rxconf;
+   txconf = &dev_info.default_txconf;
+   rxconf->rx_drop_en = 1;
+
+   /*
+* Zero copy defers queue RX/TX start to the time when guest
+* finishes its startup and packet buffers from that guest are
+* available.
+*/
+   if (zero_copy) {
+   rxconf->rx_deferred_start = 1;
+   rxconf->rx_drop_en = 0;
+   txconf->tx_deferred_start = 1;
+   }
+
/*configure the number of supported virtio devices based on VMDQ limits 
*/
num_devices = dev_info.max_vmdq_pools;

@@ -460,14 +432,16 @@ port_init(uint8_t port)
/* Setup the queues. */
for (q = 0; q < rx_rings; q ++) {
retval = rte_eth_rx_queue_setup(port, q, rx_ring_size,
-   rte_eth_dev_socket_id(port), 
&rx_conf_default,
+   rte_eth_dev_socket_id(port),
+   rxconf,
vpool_array[q].pool);
if (retval < 0)
return retval;
}
for (q = 0; q < tx_rings; q ++) {
retval = rte_eth_tx_queue_setup(port, q, tx_ring_size,
-   rte_eth_dev_socket_id(port), 
&tx_conf_default);
+   rte_eth_dev_socket_id(port),
+ 

[dpdk-dev] [PATCH 1/2] examples/vhost: support new VMDQ API and new nic i40e

2014-11-13 Thread Huawei Xie
In Niantic, if VMDQ mode is set, all queues are allocated to VMDQ in DPDK.
In I40E, only configured part of continous queues are allocated to VMDQ.
The rte_eth_dev_info structure is extened to provide VMDQ queue base, queue 
number, and VMDQ pool base information.
This patch support the new VMDQ API in vhost example.

FIXME in PMD:
 * added mac address will be flushed at rte_eth_dev_start.
 * we don't support selectively setting up queues well. 

Signed-off-by: Huawei Xie 
---
 examples/vhost/main.c | 25 +++--
 1 file changed, 19 insertions(+), 6 deletions(-)

diff --git a/examples/vhost/main.c b/examples/vhost/main.c
index a93f7a0..2b1bf02 100644
--- a/examples/vhost/main.c
+++ b/examples/vhost/main.c
@@ -53,7 +53,7 @@

 #include "main.h"

-#define MAX_QUEUES 128
+#define MAX_QUEUES 256

 /* the maximum number of external ports supported */
 #define MAX_SUP_PORTS 1
@@ -282,6 +282,9 @@ static struct rte_eth_conf vmdq_conf_default = {
 static unsigned lcore_ids[RTE_MAX_LCORE];
 static uint8_t ports[RTE_MAX_ETHPORTS];
 static unsigned num_ports = 0; /**< The number of ports specified in command 
line */
+static uint16_t num_pf_queues, num_vmdq_queues;
+static uint16_t vmdq_pool_base, vmdq_queue_base;
+static uint16_t queues_per_pool;

 static const uint16_t external_pkt_default_vlan_tag = 2000;
 const uint16_t vlan_tags[] = {
@@ -417,7 +420,6 @@ port_init(uint8_t port)

/*configure the number of supported virtio devices based on VMDQ limits 
*/
num_devices = dev_info.max_vmdq_pools;
-   num_queues = dev_info.max_rx_queues;

if (zero_copy) {
rx_ring_size = num_rx_descriptor;
@@ -437,10 +439,19 @@ port_init(uint8_t port)
retval = get_eth_conf(&port_conf, num_devices);
if (retval < 0)
return retval;
+   /* NIC queues are divided into pf queues and vmdq queues.  */
+   num_pf_queues = dev_info.max_rx_queues - dev_info.vmdq_queue_num;
+   queues_per_pool = dev_info.vmdq_queue_num / dev_info.max_vmdq_pools;
+   num_vmdq_queues = num_devices * queues_per_pool;
+   num_queues = num_pf_queues + num_vmdq_queues;
+   vmdq_queue_base = dev_info.vmdq_queue_base;
+   vmdq_pool_base  = dev_info.vmdq_pool_base;
+   printf("pf queue num: %u, configured vmdq pool num: %u, each vmdq pool 
has %u queues\n",
+   num_pf_queues, num_devices, queues_per_pool);

if (port >= rte_eth_dev_count()) return -1;

-   rx_rings = (uint16_t)num_queues,
+   rx_rings = (uint16_t)dev_info.max_rx_queues;
/* Configure ethernet device. */
retval = rte_eth_dev_configure(port, rx_rings, tx_rings, &port_conf);
if (retval != 0)
@@ -931,7 +942,8 @@ link_vmdq(struct vhost_dev *vdev, struct rte_mbuf *m)
vdev->vlan_tag);

/* Register the MAC address. */
-   ret = rte_eth_dev_mac_addr_add(ports[0], &vdev->mac_address, 
(uint32_t)dev->device_fh);
+   ret = rte_eth_dev_mac_addr_add(ports[0], &vdev->mac_address,
+   (uint32_t)dev->device_fh + vmdq_pool_base);
if (ret)
RTE_LOG(ERR, VHOST_DATA, "(%"PRIu64") Failed to add device MAC 
address to VMDQ\n",
dev->device_fh);
@@ -2602,7 +2614,7 @@ new_device (struct virtio_net *dev)
ll_dev->vdev = vdev;
add_data_ll_entry(&ll_root_used, ll_dev);
vdev->vmdq_rx_q
-   = dev->device_fh * (num_queues / num_devices);
+   = dev->device_fh * queues_per_pool + vmdq_queue_base;

if (zero_copy) {
uint32_t index = vdev->vmdq_rx_q;
@@ -2837,7 +2849,8 @@ MAIN(int argc, char *argv[])
unsigned lcore_id, core_id = 0;
unsigned nb_ports, valid_num_ports;
int ret;
-   uint8_t portid, queue_id = 0;
+   uint8_t portid;
+   uint16_t queue_id;
static pthread_t tid;

/* init EAL */
-- 
1.8.1.4



[dpdk-dev] [PATCH v7 0/7] app/test: unit test to measure cycles per packet

2014-11-13 Thread Thomas Monjalon
2014-11-12 14:24, Cunming Liang:
> v7 update:
> # patch split and re-orginize
> 
> v6 update:
> # leave FUNC_PTR_OR_*_RET unmodified
> 
> v5 update:
> # fix the confusing of retval in some API of rte_ethdev
> 
> v4 ignore
> 
> v3 update:
> # Codes refine according to the feedback.
>   1. add ether_format_addr to rte_ether.h
>   2. fix typo in code comments.
>   3. %lu to %PRIu64, fixing 32-bit targets compilation err
> # merge 2 small incremental patches to the first one.
>   The whole unit test as a single patch in [PATCH v3 2/2]
> # rebase code to the latest master
> 
> v2 update:
> Rebase code to the latest master branch.
> 
> It provides unit test to measure cycles/packet in NIC loopback mode.
> It simply gives the average cycles of IO used per packet without test 
> equipment.
> When doing the test, make sure the link is UP.
> 
> There's two stream control mode support, one is continues, another is burst.
> The former continues to forward the injected packets until reaching a certain 
> amount of number.
> The latter one stop when all the injected packets are received.
> In burst stream, now measure two situations, with or without desc. cache 
> conflict.
> By default, it runs in continues stream mode to measure the whole rxtx.
> 
> Usage Example:
> 1. Run unit test app in interactive mode
> app/test -c f -n 4 -- -i
> 2. Set stream control mode, by default is continuous
> set_rxtx_sc [continuous|poll_before_xmit|poll_after_xmit]
> 3. If choose continuous stream, there are another two options can configure
> 3.1 choose rx/tx pair, default is vector
> set_rxtx_mode [vector|scalar|full|hybrid]
> Note: To get acurate scalar fast, plz choose 'vector' or 'hybrid' 
> without INC_VEC=y in config 
> 3.2 choose the area of masurement, default is rxtx
> set_rxtx_anchor [rxtx|rxonly|txonly]
> 4. Run and wait for the result
> pmd_perf_autotest
> 
> For who simply just want to see how much cycles cost per packet.
> Compile DPDK, Run 'app/test', and type 'pmd_perf_autotest', that's it.
> Nothing else needs to configure. 
> Using other options when you understand and what to measures more. 

Applied

Thanks
-- 
Thomas


[dpdk-dev] [PATCH v3] Add in_flight_bitmask so as to use full 32 bits of tag.

2014-11-13 Thread Thomas Monjalon
Hi,

2014-11-10 16:44, Qinglai Xiao:
> With introduction of in_flight_bitmask, the whole 32 bits of tag can be
> used. Further more, this patch fixed the integer overflow when finding
> the matched tags.
> The maximum number workers is now defined as 64, which is length of
> double-word. The link between number of workers and RTE_MAX_LCORE is
> now removed. Compile time check is added to ensure the
> RTE_DISTRIB_MAX_WORKERS is less than or equal to size of double-word.
> 
> Signed-off-by: Qinglai Xiao 

The patch doesn't apply cleanly and fail to compile:
lib/librte_distributor/rte_distributor.c:310:27: error: ?union ? has 
no member named ?usr?

Do you have another commit before this one in your tree?

-- 
Thomas


[dpdk-dev] [PATCH 1/2] examples/vhost: support new VMDQ API and new nic i40e

2014-11-13 Thread Ouyang, Changchun


> -Original Message-
> From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of Huawei Xie
> Sent: Thursday, November 13, 2014 6:34 AM
> To: dev at dpdk.org
> Subject: [dpdk-dev] [PATCH 1/2] examples/vhost: support new VMDQ API
> and new nic i40e
> 
> In Niantic, if VMDQ mode is set, all queues are allocated to VMDQ in DPDK.
> In I40E, only configured part of continous queues are allocated to VMDQ.
> The rte_eth_dev_info structure is extened to provide VMDQ queue base,
> queue number, and VMDQ pool base information.
> This patch support the new VMDQ API in vhost example.
> 
> FIXME in PMD:
>  * added mac address will be flushed at rte_eth_dev_start.
>  * we don't support selectively setting up queues well.
> 
> Signed-off-by: Huawei Xie 
> ---
>  examples/vhost/main.c | 25 +++--
>  1 file changed, 19 insertions(+), 6 deletions(-)
> 
> diff --git a/examples/vhost/main.c b/examples/vhost/main.c index
> a93f7a0..2b1bf02 100644
> --- a/examples/vhost/main.c
> +++ b/examples/vhost/main.c
> @@ -53,7 +53,7 @@
> 
>  #include "main.h"
> 
> -#define MAX_QUEUES 128
> +#define MAX_QUEUES 256
> 
>  /* the maximum number of external ports supported */  #define
> MAX_SUP_PORTS 1 @@ -282,6 +282,9 @@ static struct rte_eth_conf
> vmdq_conf_default = {  static unsigned lcore_ids[RTE_MAX_LCORE];  static
> uint8_t ports[RTE_MAX_ETHPORTS];  static unsigned num_ports = 0; /**<
> The number of ports specified in command line */
> +static uint16_t num_pf_queues, num_vmdq_queues; static uint16_t
> +vmdq_pool_base, vmdq_queue_base; static uint16_t queues_per_pool;
> 
>  static const uint16_t external_pkt_default_vlan_tag = 2000;  const uint16_t
> vlan_tags[] = { @@ -417,7 +420,6 @@ port_init(uint8_t port)
> 
>   /*configure the number of supported virtio devices based on VMDQ
> limits */
>   num_devices = dev_info.max_vmdq_pools;
> - num_queues = dev_info.max_rx_queues;
> 
>   if (zero_copy) {
>   rx_ring_size = num_rx_descriptor;
> @@ -437,10 +439,19 @@ port_init(uint8_t port)
>   retval = get_eth_conf(&port_conf, num_devices);
>   if (retval < 0)
>   return retval;
> + /* NIC queues are divided into pf queues and vmdq queues.  */
> + num_pf_queues = dev_info.max_rx_queues -
> dev_info.vmdq_queue_num;
> + queues_per_pool = dev_info.vmdq_queue_num /
> dev_info.max_vmdq_pools;
> + num_vmdq_queues = num_devices * queues_per_pool;
> + num_queues = num_pf_queues + num_vmdq_queues;
> + vmdq_queue_base = dev_info.vmdq_queue_base;
> + vmdq_pool_base  = dev_info.vmdq_pool_base;
> + printf("pf queue num: %u, configured vmdq pool num: %u, each
> vmdq pool has %u queues\n",
> + num_pf_queues, num_devices, queues_per_pool);
>

Better to use RTE_LOG to replace printf.

>   if (port >= rte_eth_dev_count()) return -1;
> 
> - rx_rings = (uint16_t)num_queues,
> + rx_rings = (uint16_t)dev_info.max_rx_queues;
>   /* Configure ethernet device. */
>   retval = rte_eth_dev_configure(port, rx_rings, tx_rings, &port_conf);
>   if (retval != 0)
> @@ -931,7 +942,8 @@ link_vmdq(struct vhost_dev *vdev, struct rte_mbuf
> *m)
>   vdev->vlan_tag);
> 
>   /* Register the MAC address. */
> - ret = rte_eth_dev_mac_addr_add(ports[0], &vdev->mac_address,
> (uint32_t)dev->device_fh);
> + ret = rte_eth_dev_mac_addr_add(ports[0], &vdev->mac_address,
> + (uint32_t)dev->device_fh +
> vmdq_pool_base);
>   if (ret)
>   RTE_LOG(ERR, VHOST_DATA, "(%"PRIu64") Failed to add
> device MAC address to VMDQ\n",
>   dev->device_fh);
> @@ -2602,7 +2614,7 @@ new_device (struct virtio_net *dev)
>   ll_dev->vdev = vdev;
>   add_data_ll_entry(&ll_root_used, ll_dev);
>   vdev->vmdq_rx_q
> - = dev->device_fh * (num_queues / num_devices);
> + = dev->device_fh * queues_per_pool + vmdq_queue_base;
> 
>   if (zero_copy) {
>   uint32_t index = vdev->vmdq_rx_q;
> @@ -2837,7 +2849,8 @@ MAIN(int argc, char *argv[])
>   unsigned lcore_id, core_id = 0;
>   unsigned nb_ports, valid_num_ports;
>   int ret;
> - uint8_t portid, queue_id = 0;
> + uint8_t portid;
> + uint16_t queue_id;

If max queue is 256, and queue_id vary from 0 to 255, then uint8_t is enough to 
denote it.
Any other consideration here to change it to uint16_t?

>   static pthread_t tid;
> 
>   /* init EAL */
> --
> 1.8.1.4



[dpdk-dev] [PATCH 1/2] examples/vhost: support new VMDQ API and new nic i40e

2014-11-13 Thread Xie, Huawei


> -Original Message-
> From: Ouyang, Changchun
> Sent: Wednesday, November 12, 2014 5:50 PM
> To: Xie, Huawei; dev at dpdk.org
> Cc: Ouyang, Changchun
> Subject: RE: [dpdk-dev] [PATCH 1/2] examples/vhost: support new VMDQ API
> and new nic i40e
> 
> 
> 
> > -Original Message-
> > From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of Huawei Xie
> > Sent: Thursday, November 13, 2014 6:34 AM
> > To: dev at dpdk.org
> > Subject: [dpdk-dev] [PATCH 1/2] examples/vhost: support new VMDQ API
> > and new nic i40e
> >
> > In Niantic, if VMDQ mode is set, all queues are allocated to VMDQ in DPDK.
> > In I40E, only configured part of continous queues are allocated to VMDQ.
> > The rte_eth_dev_info structure is extened to provide VMDQ queue base,
> > queue number, and VMDQ pool base information.
> > This patch support the new VMDQ API in vhost example.
> >
> > FIXME in PMD:
> >  * added mac address will be flushed at rte_eth_dev_start.
> >  * we don't support selectively setting up queues well.
> >
> > Signed-off-by: Huawei Xie 
> > ---
> >  examples/vhost/main.c | 25 +++--
> >  1 file changed, 19 insertions(+), 6 deletions(-)
> >
> > diff --git a/examples/vhost/main.c b/examples/vhost/main.c index
> > a93f7a0..2b1bf02 100644
> > --- a/examples/vhost/main.c
> > +++ b/examples/vhost/main.c
> > @@ -53,7 +53,7 @@
> >
> >  #include "main.h"
> >
> > -#define MAX_QUEUES 128
> > +#define MAX_QUEUES 256
> >
> >  /* the maximum number of external ports supported */  #define
> > MAX_SUP_PORTS 1 @@ -282,6 +282,9 @@ static struct rte_eth_conf
> > vmdq_conf_default = {  static unsigned lcore_ids[RTE_MAX_LCORE];  static
> > uint8_t ports[RTE_MAX_ETHPORTS];  static unsigned num_ports = 0; /**<
> > The number of ports specified in command line */
> > +static uint16_t num_pf_queues, num_vmdq_queues; static uint16_t
> > +vmdq_pool_base, vmdq_queue_base; static uint16_t queues_per_pool;
> >
> >  static const uint16_t external_pkt_default_vlan_tag = 2000;  const uint16_t
> > vlan_tags[] = { @@ -417,7 +420,6 @@ port_init(uint8_t port)
> >
> > /*configure the number of supported virtio devices based on VMDQ
> > limits */
> > num_devices = dev_info.max_vmdq_pools;
> > -   num_queues = dev_info.max_rx_queues;
> >
> > if (zero_copy) {
> > rx_ring_size = num_rx_descriptor;
> > @@ -437,10 +439,19 @@ port_init(uint8_t port)
> > retval = get_eth_conf(&port_conf, num_devices);
> > if (retval < 0)
> > return retval;
> > +   /* NIC queues are divided into pf queues and vmdq queues.  */
> > +   num_pf_queues = dev_info.max_rx_queues -
> > dev_info.vmdq_queue_num;
> > +   queues_per_pool = dev_info.vmdq_queue_num /
> > dev_info.max_vmdq_pools;
> > +   num_vmdq_queues = num_devices * queues_per_pool;
> > +   num_queues = num_pf_queues + num_vmdq_queues;
> > +   vmdq_queue_base = dev_info.vmdq_queue_base;
> > +   vmdq_pool_base  = dev_info.vmdq_pool_base;
> > +   printf("pf queue num: %u, configured vmdq pool num: %u, each
> > vmdq pool has %u queues\n",
> > +   num_pf_queues, num_devices, queues_per_pool);
> >
> 
> Better to use RTE_LOG to replace printf.
> 
> > if (port >= rte_eth_dev_count()) return -1;
> >
> > -   rx_rings = (uint16_t)num_queues,
> > +   rx_rings = (uint16_t)dev_info.max_rx_queues;
> > /* Configure ethernet device. */
> > retval = rte_eth_dev_configure(port, rx_rings, tx_rings, &port_conf);
> > if (retval != 0)
> > @@ -931,7 +942,8 @@ link_vmdq(struct vhost_dev *vdev, struct rte_mbuf
> > *m)
> > vdev->vlan_tag);
> >
> > /* Register the MAC address. */
> > -   ret = rte_eth_dev_mac_addr_add(ports[0], &vdev->mac_address,
> > (uint32_t)dev->device_fh);
> > +   ret = rte_eth_dev_mac_addr_add(ports[0], &vdev->mac_address,
> > +   (uint32_t)dev->device_fh +
> > vmdq_pool_base);
> > if (ret)
> > RTE_LOG(ERR, VHOST_DATA, "(%"PRIu64") Failed to add
> > device MAC address to VMDQ\n",
> > dev->device_fh);
> > @@ -2602,7 +2614,7 @@ new_device (struct virtio_net *dev)
> > ll_dev->vdev = vdev;
> > add_data_ll_entry(&ll_root_used, ll_dev);
> > vdev->vmdq_rx_q
> > -   = dev->device_fh * (num_queues / num_devices);
> > +   = dev->device_fh * queues_per_pool + vmdq_queue_base;
> >
> > if (zero_copy) {
> > uint32_t index = vdev->vmdq_rx_q;
> > @@ -2837,7 +2849,8 @@ MAIN(int argc, char *argv[])
> > unsigned lcore_id, core_id = 0;
> > unsigned nb_ports, valid_num_ports;
> > int ret;
> > -   uint8_t portid, queue_id = 0;
> > +   uint8_t portid;
> > +   uint16_t queue_id;
> 
> If max queue is 256, and queue_id vary from 0 to 255, then uint8_t is enough 
> to
> denote it.
> Any other consideration here to change it to uint16_t?
queue_id is compared with MAX_QUEUE + 1 which will be always false.
check the patch, I couldn't copy the code to here.
> 
> > static pthread_t tid;
> >
> > /* init EAL */
> 

[dpdk-dev] [PATCH v6 1/9] librte_mbuf:the rte_mbuf structure changes

2014-11-13 Thread Liu, Jijiang


> -Original Message-
> From: Thomas Monjalon [mailto:thomas.monjalon at 6wind.com]
> Sent: Wednesday, November 12, 2014 9:26 PM
> To: Liu, Jijiang
> Cc: Zhang, Helin; dev at dpdk.org; Richardson, Bruce
> Subject: Re: [dpdk-dev] [PATCH v6 1/9] librte_mbuf:the rte_mbuf structure
> changes
> 
> Hi guys,
> 
> We still have some problems with the mbuf changes introduced for VXLAN.
> I want to raise the packet type issue here.
> 
> 2014-10-23 02:23, Zhang, Helin:
> > From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of Thomas Monjalon
> > > 2014-10-21 14:14, Liu, Jijiang:
> > > > From: Thomas Monjalon [mailto:thomas.monjalon at 6wind.com]
> > > > > 2014-10-21 16:46, Jijiang Liu:
> > > > > > -   uint16_t reserved2;   /**< Unused field. Required for 
> > > > > > padding
> */
> > > > > > +
> > > > > > +   /**
> > > > > > +* Packet type, which is used to indicate ordinary L2 packet
> format and
> > > > > > +* also tunneled packet format such as IP in IP, IP in GRE, MAC 
> > > > > > in
> GRE
> > > > > > +* and MAC in UDP.
> > > > > > +*/
> > > > > > +   uint16_t packet_type;
> > > > >
> > > > > Why not name it "l2_type"?
> >
> > 'packet_type' is for storing the hardware identified packet type upon
> > different layers of protocols (l2, l3, l4, ...).
> > It is quite useful for user application or middle layer software
> > stacks, it can know what the packet type is without checking the packet too
> much by software.
> > Actually ixgbe already has packet types (less than 10), which is transcoded 
> > into
> 'ol_flags'.
> > For i40e, the packet type can represent about 256 types of packet,
> > 'ol_flags' does not have enough bits for it anymore. So put the i40e packet 
> > types
> into mbuf would be better.
> > Also this field can be used for NON-Intel NICs, I think there must be
> > the similar concepts of other NICs. And 16 bits 'packet_type' has severl
> reserved bits for future and NON-Intel NICs.
> 
> Thanks Helin, that's the best description of packet_type I've seen so far.
> It's not so clear in the commit log:
>   http://dpdk.org/browse/dpdk/commit/?id=73b7d59cf4f6faf
> 
> > > > In datasheet, this term is called packet type(s).
> > >
> > > That's exactly the point I want you really understand!
> > > This is a field in generic mbuf structure, so your datasheet has no value 
> > > here.
> > >
> > > > Personally , I think packet type is  more clear what meaning of this 
> > > > field is .
> > >
> > > You cannot add an API field without knowing what will be its generic 
> > > meaning.
> > > Please think about it and describe its scope.
> 
> I integrated this patch with the VXLAN patchset in the hope that you'll 
> improve
> the situation afterwards.
> This is the answer you recently gave to Olivier:
>   http://dpdk.org/ml/archives/dev/2014-November/007599.html
> "
>   Regarding adding a packet_type in mbuf, we ever had a lot of discussions
> as follows:
>   http://dpdk.org/ml/archives/dev/2014-October/007027.html
>   http://dpdk.org/ml/archives/dev/2014-September/005240.html
>   http://dpdk.org/ml/archives/dev/2014-September/005241.html
>   http://dpdk.org/ml/archives/dev/2014-September/005274.html
> "
> 
> To sum up the situation:
> - We don't know what are the possible values of packet_type
> - It's only filled by i40e, while other drivers use ol_flags
> - There is no special value "unknown" which should be set by drivers
>   not supporting this feature.
> - Its only usage is to print a decimal value in app/test-pmd/rxonly.c
> 
> It's now clear that nobody cares about this part of the API.
> So I'm going to remove packet_type from mbuf.
> I don't want to keep something that we don't know how to use, that is not
> consistent across drivers, and that overlap another API part (ol_flags).

The packet type in 40e is very important for user, using packet type can help 
to speed up packet analysis/identification in their application, especially 
tunneling packet format.
Now I'm working on implementing packet type definition in rte_ethdev.h file and 
 translation table in i40e, which is almost done. 
The packet type  definition in in rte_ethdev.h file like below. 
/*
 * Ethernet packet type
 */
enum rte_eth_ptype {
/* undefined packet type, means HW can't recognise it */
RTE_PTYPE_UNDEF = 0,
...

/* IPv4 --> GRE/Teredo/VXLAN --> MAC --> IPv4 */
RTE_PTYPE_IPv4_GRENAT_MAC_IPv4FRAG_PAY3,
RTE_PTYPE_IPv4_GRENAT_MAC_IPv4_PAY3,
RTE_PTYPE_IPv4_GRENAT_MAC_IPv4_UDP_PAY4,
RTE_PTYPE_IPv4_GRENAT_MAC_IPv4_TCP_PAY4,
RTE_PTYPE_IPv4_GRENAT_MAC_IPv4_SCTP_PAY4,
RTE_PTYPE_IPv4_GRENAT_MAC_IPv4_ICMP_PAY4,

/* IPv4 --> GRE/Teredo/VXLAN --> MAC --> IPv6 */
RTE_PTYPE_IPv4_GRENAT_MAC_IPv6FRAG_PAY3
RTE_PTYPE_IPv4_GRENAT_MAC_IPv6_PAY3,
RTE_PTYPE_IPv4_GRENAT_MAC_IPv6_UDP_PAY4,
RTE_PTYPE_IPv4_GRENAT_MAC_IPv6_TCP_PAY4,
RTE_PTYPE_IPv4_GRENAT_MAC_IPv6_SCTP_PAY4,
RTE_PT

[dpdk-dev] [PATCH v8 10/10] app/testpmd:test VxLAN Tx checksum offload

2014-11-13 Thread Liu, Jijiang


> -Original Message-
> From: Thomas Monjalon [mailto:thomas.monjalon at 6wind.com]
> Sent: Thursday, November 13, 2014 1:26 AM
> To: Liu, Jijiang
> Cc: dev at dpdk.org; Olivier MATZ
> Subject: Re: [dpdk-dev] [PATCH v8 10/10] app/testpmd:test VxLAN Tx checksum
> offload
> 
> 2014-11-11 05:29, Liu, Jijiang:
> > From: Olivier MATZ
> > > On 11/10/2014 07:03 AM, Liu, Jijiang wrote:
> > > > > - if PKT_TX_VXLAN_CKSUM is not set (legacy use case), then the
> > > > >driver use l2_len and l3_len to offload inner IP/UDP/TCP checksums.
> > > >
> > > > If the flag is not set, and imply that it is not VXLAN packet,
> > > > and do TX checksum offload as regular packet.
> > > >
> > > > > - if PKT_TX_VXLAN_CKSUM is set, then the driver has to use
> > > > >inner_l{23}_len instead of l{23}_len for the same operation.
> > > >
> > > > Your understanding is not fully correct.
> > > > The l{23}_len is still used for TX checksum offload, please refer
> > > > to
> > > > i40e_txd_enable_checksum()  implementation.
> > >
> > > This fields are part of public mbuf API. You cannot say to refer to
> > > i40e PMD code to understand how to use it.
> > >
> > > > > Adding PKT_TX_VXLAN_CKSUM changes the semantic of l2_len and
> l3_len.
> > > > > To fix this, I suggest to remove the new fields inner_l{23}_len
> > > > > then add outer_l{23}_len instead. Therefore, the semantic of
> > > > > l2_len and l3_len would not change, and a driver would always
> > > > > use the same field for a specific offload.
> > > >
> > > > Oh...
> > >
> > > Does it mean you agree?
> >
> > I don't agree to change inner_l{23}_len the name.
> > The reason is that using the "inner" word means incoming packet is tunneling
> packet or encapsulation packet.
> > if we add "outer"{2,3}_len, which will cause confusion when processing non-
> tunneling packet.
> 
> Sorry Jijiang, maybe I don't understand what you are saying, but I think you
> missed something. Let me explain the problem.
> 
> For PKT_TX_IP_CKSUM, we must set l{2,3}_len.
> When PKT_TX_VXLAN_CKSUM is set, PKT_TX_IP_CKSUM is related to inner IP,
> right?
First of all, I want to explain that what PKT_TX_VXLAN_CKSUM meaning is,  when 
the flag is set, driver know that it need set TX checksum for whole packet, not 
only for inner part. 

So When PKT_TX_VXLAN_CKSUM is set, PKT_TX_IP_CKSUM is related to inner IP,right?



> So we must set inner_l{2,3}_len.
> It means that PKT_TX_IP_CKSUM requires different fields to be set, depending 
> of
> PKT_TX_VXLAN_CKSUM. That's what Olivier calls a semantic change.
> It's not acceptable for an API.
> 
> PKT_TX_IP_CKSUM should always be related to l{2,3}_len.
> When PKT_TX_VXLAN_CKSUM is set, we should add outer_l{2,3}_len.
> 
> Please, correct me if I'm wrong or fix the API.
> 
> Thanks
> --
> Thomas


[dpdk-dev] [PATCH v8 10/10] app/testpmd:test VxLAN Tx checksum offload

2014-11-13 Thread Liu, Jijiang
Please Ignore this mail.


> -Original Message-
> From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of Liu, Jijiang
> Sent: Thursday, November 13, 2014 1:35 PM
> To: Thomas Monjalon
> Cc: dev at dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v8 10/10] app/testpmd:test VxLAN Tx checksum
> offload
> 
> 
> 
> > -Original Message-
> > From: Thomas Monjalon [mailto:thomas.monjalon at 6wind.com]
> > Sent: Thursday, November 13, 2014 1:26 AM
> > To: Liu, Jijiang
> > Cc: dev at dpdk.org; Olivier MATZ
> > Subject: Re: [dpdk-dev] [PATCH v8 10/10] app/testpmd:test VxLAN Tx
> > checksum offload
> >
> > 2014-11-11 05:29, Liu, Jijiang:
> > > From: Olivier MATZ
> > > > On 11/10/2014 07:03 AM, Liu, Jijiang wrote:
> > > > > > - if PKT_TX_VXLAN_CKSUM is not set (legacy use case), then the
> > > > > >driver use l2_len and l3_len to offload inner IP/UDP/TCP 
> > > > > > checksums.
> > > > >
> > > > > If the flag is not set, and imply that it is not VXLAN packet,
> > > > > and do TX checksum offload as regular packet.
> > > > >
> > > > > > - if PKT_TX_VXLAN_CKSUM is set, then the driver has to use
> > > > > >inner_l{23}_len instead of l{23}_len for the same operation.
> > > > >
> > > > > Your understanding is not fully correct.
> > > > > The l{23}_len is still used for TX checksum offload, please
> > > > > refer to
> > > > > i40e_txd_enable_checksum()  implementation.
> > > >
> > > > This fields are part of public mbuf API. You cannot say to refer
> > > > to i40e PMD code to understand how to use it.
> > > >
> > > > > > Adding PKT_TX_VXLAN_CKSUM changes the semantic of l2_len and
> > l3_len.
> > > > > > To fix this, I suggest to remove the new fields
> > > > > > inner_l{23}_len then add outer_l{23}_len instead. Therefore,
> > > > > > the semantic of l2_len and l3_len would not change, and a
> > > > > > driver would always use the same field for a specific offload.
> > > > >
> > > > > Oh...
> > > >
> > > > Does it mean you agree?
> > >
> > > I don't agree to change inner_l{23}_len the name.
> > > The reason is that using the "inner" word means incoming packet is
> > > tunneling
> > packet or encapsulation packet.
> > > if we add "outer"{2,3}_len, which will cause confusion when
> > > processing non-
> > tunneling packet.
> >
> > Sorry Jijiang, maybe I don't understand what you are saying, but I
> > think you missed something. Let me explain the problem.
> >
> > For PKT_TX_IP_CKSUM, we must set l{2,3}_len.
> > When PKT_TX_VXLAN_CKSUM is set, PKT_TX_IP_CKSUM is related to inner
> > IP, right?
> First of all, I want to explain that what PKT_TX_VXLAN_CKSUM meaning is,  when
> the flag is set, driver know that it need set TX checksum for whole packet, 
> not
> only for inner part.
> 
> So When PKT_TX_VXLAN_CKSUM is set, PKT_TX_IP_CKSUM is related to inner
> IP,right?
> 
> 
> 
> > So we must set inner_l{2,3}_len.
> > It means that PKT_TX_IP_CKSUM requires different fields to be set,
> > depending of PKT_TX_VXLAN_CKSUM. That's what Olivier calls a semantic
> change.
> > It's not acceptable for an API.
> >
> > PKT_TX_IP_CKSUM should always be related to l{2,3}_len.
> > When PKT_TX_VXLAN_CKSUM is set, we should add outer_l{2,3}_len.
> >
> > Please, correct me if I'm wrong or fix the API.
> >
> > Thanks
> > --
> > Thomas


[dpdk-dev] [PATCH v2 1/3] ethdev: define ctrl_pkt filter type and its structure

2014-11-13 Thread Wu, Jingjing
Hi, Thomas

The input set of control packet filter are dst_mac and ethertype in Ethernet 
head.
To be clear, I think it's better to use the name ethertype filter.

While there is already ethertype filter existing in igb and ixgbe driver. I 
will rename
The patchset to ethertype filter and also integrate igb and ixgbe's ethertype 
filter
To the filter_ctrl API.

What do you think?

Jingjing

> -Original Message-
> From: Thomas Monjalon [mailto:thomas.monjalon at 6wind.com]
> Sent: Friday, October 31, 2014 4:45 PM
> To: Wu, Jingjing
> Cc: dev at dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v2 1/3] ethdev: define ctrl_pkt filter type
> and its structure
> 
> Hi Jingjing,
> 
> I'm sorry, but your explanations are not sufficient.
> Please keep in mind that the user of the API don't know i40e internals.
> 
> 2014-10-31 07:05, Wu, Jingjing:
> > From: Thomas Monjalon [mailto:thomas.monjalon at 6wind.com]
> > > 2014-10-22 16:19, Jingjing Wu:
> > > > +/**
> > > > + * Define all structures for Control Packet Filter type
> > > > +corresponding with
> > > specific operations.
> > > > + */
> > >
> > > Please explain what is a control packet.
> >
> > A control element in Fortville can be used to receive control packets and
> control other switching elements. Control packet filter can filter control
> packet (such as LLDP) to different queues in receive and identify the switch
> element that should process the packets in transmit.
> > At the same time, we also can use this filter to route non-control packets 
> > to
> queue or other engine by filtering mac and ether_type. The name "control
> packet filter" comes from Fortville.
> 
> I still don't know what is a control packet.
> 
> > > > +#define RTE_CONTROL_PACKET_FLAGS_IGNORE_MAC0x0001
> > > > +#define RTE_CONTROL_PACKET_FLAGS_DROP  0x0002
> > > > +#define RTE_CONTROL_PACKET_FLAGS_TO_QUEUE  0x0004
> > > > +#define RTE_CONTROL_PACKET_FLAGS_TX0x0008
> > > > +#define RTE_CONTROL_PACKET_FLAGS_RX0x
> > >
> > > Flag RX is 0?
> >
> > Yes, RX is default value. Maybe it need to be removed.
> 
> No, it doesn't need to be removed. But a flag must not be 0.
> 0 means none.
> It's impossible to disable this flag.
> 
> Moreover, you should comment each flag.
> 
> > > > +/**
> > > > + * A structure used to define the control packet filter entry
> > > > + * to support RTE_ETH_FILTER_CTRL_PKT with RTE_ETH_FILTER_ADD
> > > > + * and RTE_ETH_FILTER_DELETE operations.
> > > > + */
> > > > +struct rte_ctrl_pkt_filter {
> > > > +   struct ether_addr mac_addr;   /**< mac address to match. */
> > > > +   uint16_t ether_type;  /**< ether type to match */
> > > > +   uint16_t flags;   /**< options for filter's 
> > > > behavior*/
> > > > +   uint16_t dest_id; /**< destination vsi id or pool 
> > > > id*/
> > >
> > > vsi id and pool id cannot be understood in a generic context.
> > > Please explain what you mean and why queue is not enough.
> >
> > If queue is not specified, dest_id defines which element (vsi) will get the
> packet.
> > If queue is specified, the queue need belong to the destination element.
> 
> You really need to define what is a vsi id and pool id. These notions are not
> known in the API layer.
> 
> > > > +   uint16_t queue;   /**< queue assign to if TO QUEUE 
> > > > flag is set
> > > */
> > >
> > > TO QUEUE is not defined. Is it really needed?
> > >
> > TO QUEUE is just the flag RTE_CONTROL_PACKET_FLAGS_TO_QUEUE
> above.
> 
> OK, please use the same wording or users will get lost.
> 
> --
> Thomas


[dpdk-dev] [PATCH 1/2] examples/vhost: support new VMDQ API and new nic i40e

2014-11-13 Thread Chen, Jing D
Hi,

> -Original Message-
> From: Xie, Huawei
> Sent: Thursday, November 13, 2014 6:34 AM
> To: dev at dpdk.org
> Cc: Chen, Jing D; Xie, Huawei
> Subject: [PATCH 1/2] examples/vhost: support new VMDQ API and new nic
> i40e
> 
> In Niantic, if VMDQ mode is set, all queues are allocated to VMDQ in DPDK.
> In I40E, only configured part of continous queues are allocated to VMDQ.
> The rte_eth_dev_info structure is extened to provide VMDQ queue base,
> queue number, and VMDQ pool base information.
> This patch support the new VMDQ API in vhost example.
> 
> FIXME in PMD:
>  * added mac address will be flushed at rte_eth_dev_start.
>  * we don't support selectively setting up queues well.
> 
> Signed-off-by: Huawei Xie 
> ---
>  examples/vhost/main.c | 25 +++--
>  1 file changed, 19 insertions(+), 6 deletions(-)
> 
> diff --git a/examples/vhost/main.c b/examples/vhost/main.c
> index a93f7a0..2b1bf02 100644
> --- a/examples/vhost/main.c
> +++ b/examples/vhost/main.c
> @@ -53,7 +53,7 @@
> 
>  #include "main.h"
> 
> -#define MAX_QUEUES 128
> +#define MAX_QUEUES 256
> 
>  /* the maximum number of external ports supported */
>  #define MAX_SUP_PORTS 1
> @@ -282,6 +282,9 @@ static struct rte_eth_conf vmdq_conf_default = {
>  static unsigned lcore_ids[RTE_MAX_LCORE];
>  static uint8_t ports[RTE_MAX_ETHPORTS];
>  static unsigned num_ports = 0; /**< The number of ports specified in
> command line */
> +static uint16_t num_pf_queues, num_vmdq_queues;
> +static uint16_t vmdq_pool_base, vmdq_queue_base;
> +static uint16_t queues_per_pool;
> 
>  static const uint16_t external_pkt_default_vlan_tag = 2000;
>  const uint16_t vlan_tags[] = {
> @@ -417,7 +420,6 @@ port_init(uint8_t port)
> 
>   /*configure the number of supported virtio devices based on VMDQ
> limits */
>   num_devices = dev_info.max_vmdq_pools;
> - num_queues = dev_info.max_rx_queues;
> 
>   if (zero_copy) {
>   rx_ring_size = num_rx_descriptor;
> @@ -437,10 +439,19 @@ port_init(uint8_t port)
>   retval = get_eth_conf(&port_conf, num_devices);
>   if (retval < 0)
>   return retval;
> + /* NIC queues are divided into pf queues and vmdq queues.  */
> + num_pf_queues = dev_info.max_rx_queues -
> dev_info.vmdq_queue_num;
> + queues_per_pool = dev_info.vmdq_queue_num /
> dev_info.max_vmdq_pools;
> + num_vmdq_queues = num_devices * queues_per_pool;
> + num_queues = num_pf_queues + num_vmdq_queues;
> + vmdq_queue_base = dev_info.vmdq_queue_base;
> + vmdq_pool_base  = dev_info.vmdq_pool_base;
> + printf("pf queue num: %u, configured vmdq pool num: %u, each
> vmdq pool has %u queues\n",
> + num_pf_queues, num_devices, queues_per_pool);
> 
>   if (port >= rte_eth_dev_count()) return -1;
> 
> - rx_rings = (uint16_t)num_queues,
> + rx_rings = (uint16_t)dev_info.max_rx_queues;

You removed line 'num_queues = dev_info.max_rx_queues'  and calculate 
'num_queues' 
with another equation. I assume you thought it may not equals.
So, why you assign dev_info.max_rx_queues to rx_rings again? Won't it better to 
use 'num_queues' 

>   /* Configure ethernet device. */
>   retval = rte_eth_dev_configure(port, rx_rings, tx_rings, &port_conf);
>   if (retval != 0)
> @@ -931,7 +942,8 @@ link_vmdq(struct vhost_dev *vdev, struct rte_mbuf
> *m)
>   vdev->vlan_tag);
> 
>   /* Register the MAC address. */
> - ret = rte_eth_dev_mac_addr_add(ports[0], &vdev->mac_address,
> (uint32_t)dev->device_fh);
> + ret = rte_eth_dev_mac_addr_add(ports[0], &vdev->mac_address,
> + (uint32_t)dev->device_fh +
> vmdq_pool_base);
>   if (ret)
>   RTE_LOG(ERR, VHOST_DATA, "(%"PRIu64") Failed to add
> device MAC address to VMDQ\n",
>   dev->device_fh);
> @@ -2602,7 +2614,7 @@ new_device (struct virtio_net *dev)
>   ll_dev->vdev = vdev;
>   add_data_ll_entry(&ll_root_used, ll_dev);
>   vdev->vmdq_rx_q
> - = dev->device_fh * (num_queues / num_devices);
> + = dev->device_fh * queues_per_pool + vmdq_queue_base;
> 
>   if (zero_copy) {
>   uint32_t index = vdev->vmdq_rx_q;
> @@ -2837,7 +2849,8 @@ MAIN(int argc, char *argv[])
>   unsigned lcore_id, core_id = 0;
>   unsigned nb_ports, valid_num_ports;
>   int ret;
> - uint8_t portid, queue_id = 0;
> + uint8_t portid;
> + uint16_t queue_id;
>   static pthread_t tid;
> 
>   /* init EAL */
> --
> 1.8.1.4



[dpdk-dev] [PATCH 2/2] examples/vhost: use factorized default Rx/Tx configuration

2014-11-13 Thread Chen, Jing D
Hi,

> -Original Message-
> From: Xie, Huawei
> Sent: Thursday, November 13, 2014 6:34 AM
> To: dev at dpdk.org
> Cc: Chen, Jing D; Xie, Huawei
> Subject: [PATCH 2/2] examples/vhost: use factorized default Rx/Tx
> configuration
> 
> Refer to Pablo's commit:
> "use factorized default Rx/Tx configuration
> 
> For apps that were using default rte_eth_rxconf and rte_eth_txconf
> structures, these have been removed and now they are obtained by
> calling rte_eth_dev_info_get, just before setting up RX/TX queues."
> 
> move zero copy's deferred start set up ahead.
> 
> Signed-off-by: Huawei Xie 
> ---
>  examples/vhost/main.c | 78 +++--
> --
>  1 file changed, 22 insertions(+), 56 deletions(-)
> 
> diff --git a/examples/vhost/main.c b/examples/vhost/main.c
> index 2b1bf02..fa36913 100644
> --- a/examples/vhost/main.c
> +++ b/examples/vhost/main.c
> @@ -79,25 +79,6 @@
>   + RTE_PKTMBUF_HEADROOM)
>  #define MBUF_CACHE_SIZE_ZCP 0
> 
> -/*
> - * RX and TX Prefetch, Host, and Write-back threshold values should be
> - * carefully set for optimal performance. Consult the network
> - * controller's datasheet and supporting DPDK documentation for guidance
> - * on how these parameters should be set.
> - */
> -#define RX_PTHRESH 8 /* Default values of RX prefetch threshold reg. */
> -#define RX_HTHRESH 8 /* Default values of RX host threshold reg. */
> -#define RX_WTHRESH 4 /* Default values of RX write-back threshold reg. */
> -
> -/*
> - * These default values are optimized for use with the Intel(R) 82599 10 GbE
> - * Controller and the DPDK ixgbe PMD. Consider using other values for other
> - * network controllers and/or network drivers.
> - */
> -#define TX_PTHRESH 36 /* Default values of TX prefetch threshold reg. */
> -#define TX_HTHRESH 0  /* Default values of TX host threshold reg. */
> -#define TX_WTHRESH 0  /* Default values of TX write-back threshold reg. */
> -
>  #define MAX_PKT_BURST 32 /* Max burst size for RX/TX */
>  #define BURST_TX_DRAIN_US 100/* TX drain every ~100us */
> 
> @@ -217,32 +198,6 @@ static uint32_t burst_rx_retry_num =
> BURST_RX_RETRIES;
>  /* Character device basename. Can be set by user. */
>  static char dev_basename[MAX_BASENAME_SZ] = "vhost-net";
> 
> -
> -/* Default configuration for rx and tx thresholds etc. */
> -static struct rte_eth_rxconf rx_conf_default = {
> - .rx_thresh = {
> - .pthresh = RX_PTHRESH,
> - .hthresh = RX_HTHRESH,
> - .wthresh = RX_WTHRESH,
> - },
> - .rx_drop_en = 1,
> -};
> -
> -/*
> - * These default values are optimized for use with the Intel(R) 82599 10 GbE
> - * Controller and the DPDK ixgbe/igb PMD. Consider using other values for
> other
> - * network controllers and/or network drivers.
> - */
> -static struct rte_eth_txconf tx_conf_default = {
> - .tx_thresh = {
> - .pthresh = TX_PTHRESH,
> - .hthresh = TX_HTHRESH,
> - .wthresh = TX_WTHRESH,
> - },
> - .tx_free_thresh = 0, /* Use PMD default values */
> - .tx_rs_thresh = 0, /* Use PMD default values */
> -};
> -
>  /* empty vmdq configuration structure. Filled in programatically */
>  static struct rte_eth_conf vmdq_conf_default = {
>   .rxmode = {
> @@ -410,7 +365,9 @@ port_init(uint8_t port)
>  {
>   struct rte_eth_dev_info dev_info;
>   struct rte_eth_conf port_conf;
> - uint16_t rx_rings, tx_rings;
> + struct rte_eth_rxconf *rxconf;
> + struct rte_eth_txconf *txconf;
> + int16_t rx_rings, tx_rings;
>   uint16_t rx_ring_size, tx_ring_size;
>   int retval;
>   uint16_t q;
> @@ -418,6 +375,21 @@ port_init(uint8_t port)
>   /* The max pool number from dev_info will be used to validate the
> pool number specified in cmd line */
>   rte_eth_dev_info_get (port, &dev_info);
> 
> + rxconf = &dev_info.default_rxconf;
> + txconf = &dev_info.default_txconf;
> + rxconf->rx_drop_en = 1;
> +
> + /*
> +  * Zero copy defers queue RX/TX start to the time when guest
> +  * finishes its startup and packet buffers from that guest are
> +  * available.
> +  */
> + if (zero_copy) {
> + rxconf->rx_deferred_start = 1;
> + rxconf->rx_drop_en = 0;
> + txconf->tx_deferred_start = 1;
> + }
> +

May I know why 'rx_drop_en' is cleared after 'zero_copy' set?

>   /*configure the number of supported virtio devices based on VMDQ
> limits */
>   num_devices = dev_info.max_vmdq_pools;
> 
> @@ -460,14 +432,16 @@ port_init(uint8_t port)
>   /* Setup the queues. */
>   for (q = 0; q < rx_rings; q ++) {
>   retval = rte_eth_rx_queue_setup(port, q, rx_ring_size,
> - rte_eth_dev_socket_id(port),
> &rx_conf_default,
> + rte_eth_dev_socket_id(port),
> + rxconf,
>  

[dpdk-dev] vhost-user technical isssues

2014-11-13 Thread Linhaifeng


On 2014/11/12 5:37, Xie, Huawei wrote:
> Hi Tetsuya:
> There are two major technical issues in my mind for vhost-user implementation.
> 
> 1) memory region map
> Vhost-user passes us file fd and offset for each memory region. Unfortunately 
> the mmap offset is "very" wrong. I discovered this issue long time ago, and 
> also found
> that I couldn't mmap the huge page file even with correct offset(need double 
> check).
> Just now I find that people reported this issue on Nov 3.
> [Qemu-devel] [PULL 27/29] vhost-user: fix mmap offset calculation
> Anyway, I turned to the same idea used in our DPDK vhost-cuse: only use the 
> fd for region(0) to map the  whole file.
> I think we should use this way temporarily to support qemu-2.1 as it has that 
> bug.
> 

this bug is not in dpdk's vhost-user just for qemu's vhost-user backend
> 2) what message is the indicator for vhost start/release?
> Previously  for vhost-cuse, it has SET_BACKEND message.
> What we should do for vhost-user?
> SET_VRING_KICK for start?
> What about for release?
> Unlike the kernel virtio, the DPDK virtio in guest could be restarted. 
> 
> Thoughts?
> 
> -huawei
> 
> 

-- 
Regards,
Haifeng



[dpdk-dev] vhost-user technical isssues

2014-11-13 Thread Linhaifeng


On 2014/11/12 5:37, Xie, Huawei wrote:
> Hi Tetsuya:
> There are two major technical issues in my mind for vhost-user implementation.
> 
> 1) memory region map
> Vhost-user passes us file fd and offset for each memory region. Unfortunately 
> the mmap offset is "very" wrong. I discovered this issue long time ago, and 
> also found
> that I couldn't mmap the huge page file even with correct offset(need double 
> check).
> Just now I find that people reported this issue on Nov 3.
> [Qemu-devel] [PULL 27/29] vhost-user: fix mmap offset calculation
> Anyway, I turned to the same idea used in our DPDK vhost-cuse: only use the 
> fd for region(0) to map the  whole file.
> I think we should use this way temporarily to support qemu-2.1 as it has that 
> bug.
> 

the size of region 0 is not same as the file size. may be you should mmap the 
other region.

region 0:
gpa = 0x0
size = 655360
ua = 0x2ac0
offset = 0

region 1:// use this region to mmap.BTW how to avoid mmap twice when there are 
two devices?
gpa = 0xC
size = 2146697216
ua = 0x2acc
offset = 786432



> 2) what message is the indicator for vhost start/release?
> Previously  for vhost-cuse, it has SET_BACKEND message.
> What we should do for vhost-user?
> SET_VRING_KICK for start?
> What about for release?
> Unlike the kernel virtio, the DPDK virtio in guest could be restarted. 
> 
> Thoughts?
> 
> -huawei
> 
> 

-- 
Regards,
Haifeng



[dpdk-dev] vhost-user technical isssues

2014-11-13 Thread Linhaifeng


On 2014/11/12 12:12, Tetsuya Mukawa wrote:
> Hi Xie,
> 
> (2014/11/12 6:37), Xie, Huawei wrote:
>> Hi Tetsuya:
>> There are two major technical issues in my mind for vhost-user 
>> implementation.
>>
>> 1) memory region map
>> Vhost-user passes us file fd and offset for each memory region. 
>> Unfortunately the mmap offset is "very" wrong. I discovered this issue long 
>> time ago, and also found
>> that I couldn't mmap the huge page file even with correct offset(need double 
>> check).
>> Just now I find that people reported this issue on Nov 3.
>> [Qemu-devel] [PULL 27/29] vhost-user: fix mmap offset calculation
>> Anyway, I turned to the same idea used in our DPDK vhost-cuse: only use the 
>> fd for region(0) to map the  whole file.
>> I think we should use this way temporarily to support qemu-2.1 as it has 
>> that bug.
> I agree with you.
> Also we may have an issue about un-mapping file on hugetlbfs of linux.
> When I check munmap(), it seems 'size' need to be aligned by hugepage size.
> (I guess it may be a kernel bug. Might be fixed already.)
> Please add return value checking code for munmap().
> Still munmap() might be failed.
> 
are you munmmap the region 0? region 0 is not need to mmap so not need to 
munmap too.

I can munmap success with the other regions.

>>
>> 2) what message is the indicator for vhost start/release?
>> Previously  for vhost-cuse, it has SET_BACKEND message.
>> What we should do for vhost-user?
>> SET_VRING_KICK for start?
> I think so.
> 
>> What about for release?
>> Unlike the kernel virtio, the DPDK virtio in guest could be restarted. 
>>
>> Thoughts?
> I guess we need to consider 2 types of restarting.
> One is virtio-net driver restarting, the other is vhost-user backend
> restarting.
> But, so far, it's nice to start to think about virtio-net driver
> restarting first.
> 
> Probably we need to implement a way to let vhost-user backend know
> virtio-net driver is restarted.
> I am not sure what is good way to let vhost-user backend know it.
> But how about followings RFC?
> 
> - When unix domain socket is closed, vhost-user backend should treat it
> as "release".
>  It is useful when QEMU itself is gone suddenly.
> 
> - Also, implementing new ioctl command like VHOST_RESET_BACKEND.
>  This command should be sent from virtio-net device of QEMU when
>  VIRTIO_CONFIG_STATUS_RESET register of virtio-net device is set by
> vrtio-net driver.
>  (Usually this register is set when virtio-net driver is initialized or
> stopped.)
>  It means we need to change QEMU. ;)
>  It seems virtio-net PMD already sets this register when PMD is
> initialized or stopped.
>  So this framework should work, and can let vhost-user backend know
> driver resetting.
>  (And I guess we can say same things for virtio-net kernel driver.)
>  It might be enough to close an unix domain socket, instead of
> implementing new command.
>  But in the case, we may need auto reconnection mechanism.
> 
> - We also need to consider DPDK application is gone suddenly without
> setting reset register.
>  In the case, vhost-user backend cannot know it. Only user (or some kind
> of watchdog
>  applications on guest) knows it.
>  Because of this, user(or app.) should have responsibility to solve this
> situation.
>  To be more precise, user should let vhost-user backend know device
> releasing.
>  If user starts an other DPDK application without solving the issue, the
> new DPDK application may
>  access memory that vhost-user backend is also accessing.
>  I guess user can solve the issue using "dpdk_nic_bind.py".
>  The script can move virtio-net device to kernel virtio-net driver, and
> return it to igb_uio.
>  While those steps, virtio-net device is initialized by virtio-net
> kernel driver.
>  So vhost-user backend can know device releasing.
> 
> Tetsuya
> 
>>
>> -huawei
> 
> 
> 
> 

-- 
Regards,
Haifeng



[dpdk-dev] [PATCH v8 10/10] app/testpmd:test VxLAN Tx checksum offload

2014-11-13 Thread Liu, Jijiang


> -Original Message-
> From: Thomas Monjalon [mailto:thomas.monjalon at 6wind.com]
> Sent: Thursday, November 13, 2014 1:26 AM
> To: Liu, Jijiang
> Cc: dev at dpdk.org; Olivier MATZ
> Subject: Re: [dpdk-dev] [PATCH v8 10/10] app/testpmd:test VxLAN Tx checksum
> offload
>
> 2014-11-11 05:29, Liu, Jijiang:
> > From: Olivier MATZ
> > > On 11/10/2014 07:03 AM, Liu, Jijiang wrote:
> > > > > - if PKT_TX_VXLAN_CKSUM is not set (legacy use case), then the
> > > > >driver use l2_len and l3_len to offload inner IP/UDP/TCP checksums.
> > > >
> > > > If the flag is not set, and imply that it is not VXLAN packet,
> > > > and do TX checksum offload as regular packet.
> > > >
> > > > > - if PKT_TX_VXLAN_CKSUM is set, then the driver has to use
> > > > >inner_l{23}_len instead of l{23}_len for the same operation.
> > > >
> > > > Your understanding is not fully correct.
> > > > The l{23}_len is still used for TX checksum offload, please refer
> > > > to
> > > > i40e_txd_enable_checksum()  implementation.
> > >
> > > This fields are part of public mbuf API. You cannot say to refer to
> > > i40e PMD code to understand how to use it.
> > >
> > > > > Adding PKT_TX_VXLAN_CKSUM changes the semantic of l2_len and
> l3_len.
> > > > > To fix this, I suggest to remove the new fields inner_l{23}_len
> > > > > then add outer_l{23}_len instead. Therefore, the semantic of
> > > > > l2_len and l3_len would not change, and a driver would always
> > > > > use the same field for a specific offload.
> > > >
> > > > Oh...
> > >
> > > Does it mean you agree?
> >
> > I don't agree to change inner_l{23}_len the name.
> > The reason is that using the "inner" word means incoming packet is tunneling
> packet or encapsulation packet.
> > if we add "outer"{2,3}_len, which will cause confusion when processing non-
> tunneling packet.
>
> Sorry Jijiang, maybe I don't understand what you are saying, but I think you
> missed something. Let me explain the problem.
>
> For PKT_TX_IP_CKSUM, we must set l{2,3}_len.
> When PKT_TX_VXLAN_CKSUM is set, PKT_TX_IP_CKSUM is related to inner IP,
> right?
> So we must set inner_l{2,3}_len.
> It means that PKT_TX_IP_CKSUM requires different fields to be set, depending 
> of
> PKT_TX_VXLAN_CKSUM. That's what Olivier calls a semantic change.
> It's not acceptable for an API.

I'd like to explain what PKT_TX_VXLAN_CKSUM means, it is to tell driver should 
set whole VXLAN packet TX checksum according to  L3/L4 flag setting.
VXLAN packet IP checksum  not only include inner IP, but also include outer IP, 
so when PKT_TX_VXLAN_CKSUM is set, the  PKT_TX_IP_CKSUM is not only related to 
inner IP, but also IP.   In other words, we use this one flag to set inner IP 
and outer IP checksum offload at the same time in driver, because it is not 
necessary to add other flag to stand for inner IP flag

L4 flag usage is the same the L3 flag as well.

> PKT_TX_IP_CKSUM should always be related to l{2,3}_len.

> When PKT_TX_VXLAN_CKSUM is set, we should add outer_l{2,3}_len.

> Please, correct me if I'm wrong or fix the API.

Probably we can refer to struct sk_buff in Linux kernel .
Just as a reference!!
struct sk_buff {
...
*   @inner_protocol: Protocol (encapsulation)
 *  @inner_transport_header: Inner transport layer header (encapsulation)
 *  @inner_network_header: Network layer header (encapsulation)
 *  @inner_mac_header: Link layer header (encapsulation)

__u16   inner_transport_header;
__u16   inner_network_header;
__u16   inner_mac_header;
__u16   transport_header;
__u16   network_header;
__u16   mac_header;


> Thanks

> Thomas



[dpdk-dev] [PATCH v3] Add in_flight_bitmask so as to use full 32 bits of tag.

2014-11-13 Thread jigsaw
Hi Thomas,

>>Do you have another commit before this one in your tree?

Yes this patch relies on this one:
http://dpdk.org/ml/archives/dev/2014-November/007943.html

Sorry I didn't make it clear. The new field usr in rte_mbuf was under same
cover letter in v2 of the in_flight_bitmask patch.
Then in_flight_bitmask has a v3 patch, but I didn't include the rte_mbuf in
the same cover letter, coz the usr patch has been ACKed.

thx &
rgds,
-ql

On Thu, Nov 13, 2014 at 2:50 AM, Thomas Monjalon 
wrote:

> Hi,
>
> 2014-11-10 16:44, Qinglai Xiao:
> > With introduction of in_flight_bitmask, the whole 32 bits of tag can be
> > used. Further more, this patch fixed the integer overflow when finding
> > the matched tags.
> > The maximum number workers is now defined as 64, which is length of
> > double-word. The link between number of workers and RTE_MAX_LCORE is
> > now removed. Compile time check is added to ensure the
> > RTE_DISTRIB_MAX_WORKERS is less than or equal to size of double-word.
> >
> > Signed-off-by: Qinglai Xiao 
>
> The patch doesn't apply cleanly and fail to compile:
> lib/librte_distributor/rte_distributor.c:310:27: error: ?union
> ? has no member named ?usr?
>
> Do you have another commit before this one in your tree?
>
> --
> Thomas
>


[dpdk-dev] [PATCH] eal: modification of the list of i40e supported device IDs

2014-11-13 Thread Helin Zhang
According to the changes of the i40e base driver, two device
IDs (0x1573, 0x1582) are not supported anymore, and one new
device ID (0x1586) is supported. The list of i40e device IDs
DPDK supported should be modified accordingly.

Signed-off-by: Helin Zhang 
---
 lib/librte_eal/common/include/rte_pci_dev_ids.h | 6 ++
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/lib/librte_eal/common/include/rte_pci_dev_ids.h 
b/lib/librte_eal/common/include/rte_pci_dev_ids.h
index dea620f..feaeda1 100644
--- a/lib/librte_eal/common/include/rte_pci_dev_ids.h
+++ b/lib/librte_eal/common/include/rte_pci_dev_ids.h
@@ -447,26 +447,24 @@ RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, 
IXGBE_DEV_ID_82599_BYPASS)
 /*** Physical I40E devices from i40e_type.h */

 #define I40E_DEV_ID_SFP_XL710   0x1572
-#define I40E_DEV_ID_SFP_X7100x1573
 #define I40E_DEV_ID_QEMU0x1574
 #define I40E_DEV_ID_KX_A0x157F
 #define I40E_DEV_ID_KX_B0x1580
 #define I40E_DEV_ID_KX_C0x1581
-#define I40E_DEV_ID_KX_D0x1582
 #define I40E_DEV_ID_QSFP_A  0x1583
 #define I40E_DEV_ID_QSFP_B  0x1584
 #define I40E_DEV_ID_QSFP_C  0x1585
+#define I40E_DEV_ID_10G_BASE_T  0x1586

 RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_SFP_XL710)
-RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_SFP_X710)
 RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_QEMU)
 RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_KX_A)
 RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_KX_B)
 RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_KX_C)
-RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_KX_D)
 RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_QSFP_A)
 RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_QSFP_B)
 RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_QSFP_C)
+RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_10G_BASE_T)

 /** Virtual IGB devices from e1000_hw.h **/

-- 
1.8.1.4



[dpdk-dev] [PATCH v2 1/3] ethdev: define ctrl_pkt filter type and its structure

2014-11-13 Thread Thomas Monjalon
Hi Jingjing,

2014-11-13 05:44, Wu, Jingjing:
> The input set of control packet filter are dst_mac and ethertype in Ethernet 
> head.
> To be clear, I think it's better to use the name ethertype filter.
> 
> While there is already ethertype filter existing in igb and ixgbe driver. I 
> will rename
> The patchset to ethertype filter and also integrate igb and ixgbe's ethertype 
> filter
> To the filter_ctrl API.
> 
> What do you think?

OK, good.
If I understand well, this feature is now planned for release 2.0?

-- 
Thomas


[dpdk-dev] [PATCH v6 1/9] librte_mbuf:the rte_mbuf structure changes

2014-11-13 Thread Thomas Monjalon
2014-11-13 03:17, Liu, Jijiang:
> From: Thomas Monjalon [mailto:thomas.monjalon at 6wind.com]
> > 2014-10-23 02:23, Zhang, Helin:
> > > From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of Thomas Monjalon
> > > > 2014-10-21 14:14, Liu, Jijiang:
> > > > > From: Thomas Monjalon [mailto:thomas.monjalon at 6wind.com]
> > > > > > 2014-10-21 16:46, Jijiang Liu:
> > > > > > > + uint16_t packet_type;
> > > > > >
> > > > > > Why not name it "l2_type"?
> > >
> > > 'packet_type' is for storing the hardware identified packet type upon
> > > different layers of protocols (l2, l3, l4, ...).
> > > It is quite useful for user application or middle layer software
> > > stacks, it can know what the packet type is without checking the packet 
> > > too
> > much by software.
> > > Actually ixgbe already has packet types (less than 10), which is 
> > > transcoded into
> > 'ol_flags'.
> > > For i40e, the packet type can represent about 256 types of packet,
> > > 'ol_flags' does not have enough bits for it anymore. So put the i40e 
> > > packet types
> > into mbuf would be better.
> > > Also this field can be used for NON-Intel NICs, I think there must be
> > > the similar concepts of other NICs. And 16 bits 'packet_type' has severl
> > reserved bits for future and NON-Intel NICs.
> > 
> > Thanks Helin, that's the best description of packet_type I've seen so far.
> > It's not so clear in the commit log:
> > http://dpdk.org/browse/dpdk/commit/?id=73b7d59cf4f6faf
> > 
> > > > > In datasheet, this term is called packet type(s).
> > > >
> > > > That's exactly the point I want you really understand!
> > > > This is a field in generic mbuf structure, so your datasheet has no 
> > > > value here.
> > > >
> > > > > Personally , I think packet type is  more clear what meaning of this 
> > > > > field is .
> > > >
> > > > You cannot add an API field without knowing what will be its generic 
> > > > meaning.
> > > > Please think about it and describe its scope.
> > 
> > I integrated this patch with the VXLAN patchset in the hope that you'll 
> > improve
> > the situation afterwards.
> > This is the answer you recently gave to Olivier:
> > http://dpdk.org/ml/archives/dev/2014-November/007599.html
> > "
> > Regarding adding a packet_type in mbuf, we ever had a lot of discussions
> > as follows:
> > http://dpdk.org/ml/archives/dev/2014-October/007027.html
> > http://dpdk.org/ml/archives/dev/2014-September/005240.html
> > http://dpdk.org/ml/archives/dev/2014-September/005241.html
> > http://dpdk.org/ml/archives/dev/2014-September/005274.html
> > "
> > 
> > To sum up the situation:
> > - We don't know what are the possible values of packet_type
> > - It's only filled by i40e, while other drivers use ol_flags
> > - There is no special value "unknown" which should be set by drivers
> >   not supporting this feature.
> > - Its only usage is to print a decimal value in app/test-pmd/rxonly.c
> > 
> > It's now clear that nobody cares about this part of the API.
> > So I'm going to remove packet_type from mbuf.
> > I don't want to keep something that we don't know how to use, that is not
> > consistent across drivers, and that overlap another API part (ol_flags).
> 
> The packet type in 40e is very important for user, using packet type can
> help to speed up packet analysis/identification in their application,
> especially tunneling packet format.
> Now I'm working on implementing packet type definition in rte_ethdev.h
> file and  translation table in i40e, which is almost done. 
> The packet type  definition in in rte_ethdev.h file like below. 
> /*
>  * Ethernet packet type
>  */
> enum rte_eth_ptype {
> /* undefined packet type, means HW can't recognise it */
> RTE_PTYPE_UNDEF = 0,
> ...
> 
> /* IPv4 --> GRE/Teredo/VXLAN --> MAC --> IPv4 */
> RTE_PTYPE_IPv4_GRENAT_MAC_IPv4FRAG_PAY3,
> RTE_PTYPE_IPv4_GRENAT_MAC_IPv4_PAY3,
> RTE_PTYPE_IPv4_GRENAT_MAC_IPv4_UDP_PAY4,
> RTE_PTYPE_IPv4_GRENAT_MAC_IPv4_TCP_PAY4,
> RTE_PTYPE_IPv4_GRENAT_MAC_IPv4_SCTP_PAY4,
> RTE_PTYPE_IPv4_GRENAT_MAC_IPv4_ICMP_PAY4,
>  
> /* IPv4 --> GRE/Teredo/VXLAN --> MAC --> IPv6 */
> RTE_PTYPE_IPv4_GRENAT_MAC_IPv6FRAG_PAY3
> RTE_PTYPE_IPv4_GRENAT_MAC_IPv6_PAY3,
> RTE_PTYPE_IPv4_GRENAT_MAC_IPv6_UDP_PAY4,
> RTE_PTYPE_IPv4_GRENAT_MAC_IPv6_TCP_PAY4,
> RTE_PTYPE_IPv4_GRENAT_MAC_IPv6_SCTP_PAY4,
> RTE_PTYPE_IPv4_GRENAT_MAC_IPv6_ICMP_PAY4,
>  
> /* IPv4 --> GRE/Teredo/VXLAN --> MAC/VLAN */
> RTE_PTYPE_IPv4_GRENAT_MACVLAN_PAY3,
> ... 
> }

OK, it seems well abstracted.
I think the last part of these names (PAY3/PAY4) is useless.

When this patch for API and i40e will be ready?
I'd prefer fixing the API instead of removing it.

> Yes, we don't use packet type in many places now, which doesn't mean
> we don't use it  in the future (when supporting another tunneling packet).
> 
> It is ok for me if you want to remove the pac

[dpdk-dev] [PATCH v2 1/3] ethdev: define ctrl_pkt filter type and its structure

2014-11-13 Thread Wu, Jingjing
Hi, Thomas

Nop.

We target to have it in r1.8. And the whole patch set is almost ready, but lack 
of review internally.

I have two proposes here:
1. Send a patch set include all the ethdev, i40e, ixgbe, igb and testpmd 
changes just as my previous mail.

2. send a patch set only include the ethdev, i40e part. Without testpmd changes 
to support testing it in fortville. And will send remaining changes later. 
Maybe r2.0?

The second proposal will split the whole task to small patchset. And easy to 
review.

Which one do you prefer?

Look forward to your reply!
Thanks

Jingjing 


> -Original Message-
> From: Thomas Monjalon [mailto:thomas.monjalon at 6wind.com]
> Sent: Thursday, November 13, 2014 4:41 PM
> To: Wu, Jingjing
> Cc: dev at dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v2 1/3] ethdev: define ctrl_pkt filter type 
> and its structure
> 
> Hi Jingjing,
> 
> 2014-11-13 05:44, Wu, Jingjing:
> > The input set of control packet filter are dst_mac and ethertype in 
> > Ethernet head.
> > To be clear, I think it's better to use the name ethertype filter.
> >
> > While there is already ethertype filter existing in igb and ixgbe driver. I 
> > will rename
> > The patchset to ethertype filter and also integrate igb and ixgbe's 
> > ethertype filter
> > To the filter_ctrl API.
> >
> > What do you think?
> 
> OK, good.
> If I understand well, this feature is now planned for release 2.0?
> 
> --
> Thomas


[dpdk-dev] [PATCH v8 10/10] app/testpmd:test VxLAN Tx checksum offload

2014-11-13 Thread Thomas Monjalon
2014-11-13 06:51, Liu, Jijiang:
> From: Thomas Monjalon [mailto:thomas.monjalon at 6wind.com]
> > 2014-11-11 05:29, Liu, Jijiang:
> > > From: Olivier MATZ
> > > > On 11/10/2014 07:03 AM, Liu, Jijiang wrote:
> > > > > > - if PKT_TX_VXLAN_CKSUM is not set (legacy use case), then the
> > > > > >driver use l2_len and l3_len to offload inner IP/UDP/TCP 
> > > > > > checksums.
> > > > >
> > > > > If the flag is not set, and imply that it is not VXLAN packet,
> > > > > and do TX checksum offload as regular packet.
> > > > >
> > > > > > - if PKT_TX_VXLAN_CKSUM is set, then the driver has to use
> > > > > >inner_l{23}_len instead of l{23}_len for the same operation.
> > > > >
> > > > > Your understanding is not fully correct.
> > > > > The l{23}_len is still used for TX checksum offload, please refer
> > > > > to i40e_txd_enable_checksum() implementation.
> > > >
> > > > This fields are part of public mbuf API. You cannot say to refer to
> > > > i40e PMD code to understand how to use it.
> > > >
> > > > > > Adding PKT_TX_VXLAN_CKSUM changes the semantic of l2_len and
> > > > > > l3_len.
> > > > > > To fix this, I suggest to remove the new fields inner_l{23}_len
> > > > > > then add outer_l{23}_len instead. Therefore, the semantic of
> > > > > > l2_len and l3_len would not change, and a driver would always
> > > > > > use the same field for a specific offload.
> > > > >
> > > > > Oh...
> > > >
> > > > Does it mean you agree?
> > >
> > > I don't agree to change inner_l{23}_len the name.
> > > The reason is that using the "inner" word means incoming packet is 
> > > tunneling
> > > packet or encapsulation packet.
> > > if we add "outer"{2,3}_len, which will cause confusion when processing 
> > > non-
> > > tunneling packet.
> >
> > Sorry Jijiang, maybe I don't understand what you are saying, but I think you
> > missed something. Let me explain the problem.
> >
> > For PKT_TX_IP_CKSUM, we must set l{2,3}_len.
> > When PKT_TX_VXLAN_CKSUM is set, PKT_TX_IP_CKSUM is related to inner IP,
> > right?
> > So we must set inner_l{2,3}_len.
> > It means that PKT_TX_IP_CKSUM requires different fields to be set, 
> > depending of
> > PKT_TX_VXLAN_CKSUM. That's what Olivier calls a semantic change.
> > It's not acceptable for an API.
> 
> I'd like to explain what PKT_TX_VXLAN_CKSUM means, it is to tell driver
> should set whole VXLAN packet TX checksum according to  L3/L4 flag setting.
> VXLAN packet IP checksum  not only include inner IP, but also include outer
> IP, so when PKT_TX_VXLAN_CKSUM is set, the  PKT_TX_IP_CKSUM is not only
> related to inner IP, but also IP.   In other words, we use this one flag to
> set inner IP and outer IP checksum offload at the same time in driver,
> because it is not necessary to add other flag to stand for inner IP flag

You mean that PKT_TX_VXLAN_CKSUM request hardware checksumming of outer L3,
outer L4, inner L3 and inner L4?
So maybe the name and comments are not enough clear.

> L4 flag usage is the same the L3 flag as well.

What do you mean?

> > PKT_TX_IP_CKSUM should always be related to l{2,3}_len.
> > When PKT_TX_VXLAN_CKSUM is set, we should add outer_l{2,3}_len.
> > Please, correct me if I'm wrong or fix the API.
> 
> Probably we can refer to struct sk_buff in Linux kernel .
> Just as a reference!!
> struct sk_buff {
> ...
> *   @inner_protocol: Protocol (encapsulation)
>  *  @inner_transport_header: Inner transport layer header (encapsulation)
>  *  @inner_network_header: Network layer header (encapsulation)
>  *  @inner_mac_header: Link layer header (encapsulation)
> 
> __u16   inner_transport_header;
> __u16   inner_network_header;
> __u16   inner_mac_header;
> __u16   transport_header;
> __u16   network_header;
> __u16   mac_header;

Yes it's a reference. But some things are made differently in DPDK.
Is there a flag PKT_TX_VXLAN_CKSUM in Linux?

I'm not sure what the checksumming API would be.
But I'm sure the VXLAN API is not enough commented.
Olivier is improving documentation of the legacy checksum API:
http://dpdk.org/ml/archives/dev/2014-November/007956.html
I'd like you do the same thing for VXLAN checksum.

Thanks
-- 
Thomas


[dpdk-dev] [PATCH v3] Add in_flight_bitmask so as to use full 32 bits of tag.

2014-11-13 Thread Thomas Monjalon
2014-11-13 08:56, jigsaw:
> >>Do you have another commit before this one in your tree?
> 
> Yes this patch relies on this one:
> http://dpdk.org/ml/archives/dev/2014-November/007943.html
> 
> Sorry I didn't make it clear. The new field usr in rte_mbuf was under same
> cover letter in v2 of the in_flight_bitmask patch.
> Then in_flight_bitmask has a v3 patch, but I didn't include the rte_mbuf in
> the same cover letter, coz the usr patch has been ACKed.

OK. In this case, you should re-submit the whole serie and add the acked-by
line in the patch, and/or use the --in-reply-to option to place the v3 patch in
the original thread.

No need to resend it, I can manage to get the whole patchset now.

Thanks
-- 
Thomas


[dpdk-dev] [PATCH v3] Add in_flight_bitmask so as to use full 32 bits of tag.

2014-11-13 Thread jigsaw
OK thanks. Sorry my bad. -ql

On Thu, Nov 13, 2014 at 11:18 AM, Thomas Monjalon  wrote:

> 2014-11-13 08:56, jigsaw:
> > >>Do you have another commit before this one in your tree?
> >
> > Yes this patch relies on this one:
> > http://dpdk.org/ml/archives/dev/2014-November/007943.html
> >
> > Sorry I didn't make it clear. The new field usr in rte_mbuf was under
> same
> > cover letter in v2 of the in_flight_bitmask patch.
> > Then in_flight_bitmask has a v3 patch, but I didn't include the rte_mbuf
> in
> > the same cover letter, coz the usr patch has been ACKed.
>
> OK. In this case, you should re-submit the whole serie and add the acked-by
> line in the patch, and/or use the --in-reply-to option to place the v3
> patch in
> the original thread.
>
> No need to resend it, I can manage to get the whole patchset now.
>
> Thanks
> --
> Thomas
>


[dpdk-dev] [PATCH] eal: modification of the list of i40e supported device IDs

2014-11-13 Thread Thomas Monjalon
> According to the changes of the i40e base driver, two device
> IDs (0x1573, 0x1582) are not supported anymore, and one new
> device ID (0x1586) is supported. The list of i40e device IDs
> DPDK supported should be modified accordingly.
> 
> Signed-off-by: Helin Zhang 

Acked-by: Thomas Monjalon 

Applied

Thanks
-- 
Thomas


[dpdk-dev] [PATCH] i40e: support of link flow control

2014-11-13 Thread Thomas Monjalon
Hi Zhida,

> Add support of link flow control.
> 
> Signed-off-by: Zhida Zang 
> Acked-by: Helin Zhang 

I think you are reworking this patch, right?

I won't review it myself because I don't i40e enough.
I just have some minor comments:

> +static int i40e_10g_fc_enable(struct i40e_hw *hw, uint16_t pause_time)

"static int" should be on a separate line.

> + PMD_INIT_LOG(ERR, "i40e_set_fc = %d\n", err);

Carriage return is already included in PMD_INIT_LOG now.

Thanks for keep us informed of the status of this patch.
-- 
Thomas


[dpdk-dev] [PATCH v5 05/21] i40e: implement operations to add/delete flow director

2014-11-13 Thread Thomas Monjalon
Hi Jingjing,

You didn't reply to Pablo's comment.
Any news of this patchset? Could it be reviewed?

Thanks
-- 
Thomas

2014-11-05 21:18, De Lara Guarch, Pablo:
> From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of Jingjing Wu
> > Deal with two operations for flow director
> >  - RTE_ETH_FILTER_ADD
> >  - RTE_ETH_FILTER_DELETE
> > Encode the flow inputs to programming packet.
> > Sent the packet to filter programming queue and check status
> > on the status report queue.
> > 
> > Signed-off-by: Jingjing Wu 
> > ---
> >  lib/librte_pmd_i40e/i40e_ethdev.c |   3 +
> >  lib/librte_pmd_i40e/i40e_ethdev.h |   3 +
> >  lib/librte_pmd_i40e/i40e_fdir.c   | 622
> > ++
> >  3 files changed, 628 insertions(+)
> > 
> > diff --git a/lib/librte_pmd_i40e/i40e_ethdev.c
> > b/lib/librte_pmd_i40e/i40e_ethdev.c
> > index 8195e8a..fb43efb 100644
> > --- a/lib/librte_pmd_i40e/i40e_ethdev.c
> > +++ b/lib/librte_pmd_i40e/i40e_ethdev.c
> > @@ -4577,6 +4577,7 @@ i40e_dev_filter_ctrl(struct rte_eth_dev *dev,
> >  enum rte_filter_op filter_op,
> >  void *arg)
> >  {
> > +   struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data-
> > >dev_private);
> > int ret = 0;
> > 
> > if (dev == NULL)
> > @@ -4585,6 +4586,8 @@ i40e_dev_filter_ctrl(struct rte_eth_dev *dev,
> > switch (filter_type) {
> > case RTE_ETH_FILTER_TUNNEL:
> > ret = i40e_tunnel_filter_handle(dev, filter_op, arg);
> 
> Missing break here?
> 
> > +   case RTE_ETH_FILTER_FDIR:
> > +   ret = i40e_fdir_ctrl_func(pf, filter_op, arg);
> > break;
> > default:
> > PMD_DRV_LOG(WARNING, "Filter type (%d) not
> > supported",



[dpdk-dev] [PATCH v2] librte_pmd_packet: add PMD for AF_PACKET-based virtual devices

2014-11-13 Thread Thomas Monjalon
Hi Neil and John,

I would like to wake up this very old thread.

2014-10-08 15:14, Neil Horman:
> On Wed, Oct 08, 2014 at 05:57:46PM +0200, Thomas Monjalon wrote:
> > 2014-09-29 11:05, Bruce Richardson:
> > > On Fri, Sep 26, 2014 at 10:08:55AM -0400, Neil Horman wrote:
> > > > On Fri, Sep 26, 2014 at 11:28:05AM +0200, Thomas Monjalon wrote:
> > > > > 3) There is no test associated with this PMD.
> > > > That would have been a great comment to make a few months back, though 
> > > > whats
> > > > wrong with testpmd here?  That seems to be the same test that every 
> > > > other pmd
> > > > uses. What exactly are you looking for?
> > 
> > I was thinking of testing behaviour with different kernel configurations and
> > unit tests for --vdev options. But it's not a major blocker.
> > 
> Thats fine with me.  If theres a set of unit tests that you have documentation
> for, I'm sure we would be happy to run them.  I presume you just want all the
> pmd vdev option exercised?  Any specific sets of kernel configurations?

I don't really know which tests are needed. It could be a mix of unit tests
and functionnal tests described in a test plan.
The goal is to be able to validate the behaviour and check there is no
regression. Ideally some corner cases could be described.
I'm OK to integrate it as is. But future maintenance will probably need
such inputs for validation tests.

> > If RedHat is committed for its maintenance, it could integrated in release 
> > 1.8.
> > But I'd like it to be renamed as pmd_af_packet (or a better name) instead of
> > pmd_packet.
> > 
> John L. is on his way to plumbers at the moment, so is unable to comment, but
> I'll try to get a few cycles to change the name of the PMD around.  And yes, I
> thought that maintenance was implicit.  He's the author, of course he'll take
> care of it :).  And I'll be glad to help

Do you have time in coming days to rebase and rename this PMD for inclusion
in 1.8.0 release?

Thanks
-- 
Thomas


[dpdk-dev] [PATCH 00/12] Patches for DPDK to support Power architecture

2014-11-13 Thread Thomas Monjalon
Hi Chao,

2014-09-26 05:36, Chao Zhu:
> The set of patches add IBM Power architecture to the DPDK. It adds the 
> required support to the
> EAL library. This set of patches doesn't support full function on Power 
> processors. Many functions
> are turned off in configuratidon. More patches will be added continuesly.
> 
> Chao Zhu (12):
>   Add compiling definations for IBM Power architecture
>   Add atomic operations for IBM Power architecture
>   Add byte order operations for IBM Power architecture
>   Add CPU cycle operations for IBM Power architecture
>   Add prefetch operation for IBM Power architecture
>   Add spinlock operation for IBM Power architecture
>   Add vector memcpy for IBM Power architecture
>   Add CPU flag checking for IBM Power architecture
>   Remove iopl operation for IBM Power architecture
>   Add cache size define for IBM Power Architecture
>   Add huge page sizes for IBM Power architecture
>   Add memory support for IBM Power Architecture

Could you share the status of the rework of these patches?
Maybe that some parts could enter in 1.8 as a preview.

-- 
Thomas


[dpdk-dev] [PATCH 00/12] Patches for DPDK to support Power architecture

2014-11-13 Thread Chao Zhu
Thomas,

Sorry for the delay. I'll push the updated patches to the mail list 
before next Monday.
Thanks a lot!

Best Regards!
--
Chao Zhu

On 2014/11/13 18:24, Thomas Monjalon wrote:
> Hi Chao,
>
> 2014-09-26 05:36, Chao Zhu:
>> The set of patches add IBM Power architecture to the DPDK. It adds the 
>> required support to the
>> EAL library. This set of patches doesn't support full function on Power 
>> processors. Many functions
>> are turned off in configuratidon. More patches will be added continuesly.
>>
>> Chao Zhu (12):
>>Add compiling definations for IBM Power architecture
>>Add atomic operations for IBM Power architecture
>>Add byte order operations for IBM Power architecture
>>Add CPU cycle operations for IBM Power architecture
>>Add prefetch operation for IBM Power architecture
>>Add spinlock operation for IBM Power architecture
>>Add vector memcpy for IBM Power architecture
>>Add CPU flag checking for IBM Power architecture
>>Remove iopl operation for IBM Power architecture
>>Add cache size define for IBM Power Architecture
>>Add huge page sizes for IBM Power architecture
>>Add memory support for IBM Power Architecture
> Could you share the status of the rework of these patches?
> Maybe that some parts could enter in 1.8 as a preview.
>




[dpdk-dev] [PATCH v2] librte_pmd_packet: add PMD for AF_PACKET-based virtual devices

2014-11-13 Thread Neil Horman
On Thu, Nov 13, 2014 at 02:03:18AM -0800, Thomas Monjalon wrote:
> Hi Neil and John,
> 
> I would like to wake up this very old thread.
> 
> 2014-10-08 15:14, Neil Horman:
> > On Wed, Oct 08, 2014 at 05:57:46PM +0200, Thomas Monjalon wrote:
> > > 2014-09-29 11:05, Bruce Richardson:
> > > > On Fri, Sep 26, 2014 at 10:08:55AM -0400, Neil Horman wrote:
> > > > > On Fri, Sep 26, 2014 at 11:28:05AM +0200, Thomas Monjalon wrote:
> > > > > > 3) There is no test associated with this PMD.
> > > > > That would have been a great comment to make a few months back, 
> > > > > though whats
> > > > > wrong with testpmd here?  That seems to be the same test that every 
> > > > > other pmd
> > > > > uses. What exactly are you looking for?
> > > 
> > > I was thinking of testing behaviour with different kernel configurations 
> > > and
> > > unit tests for --vdev options. But it's not a major blocker.
> > > 
> > Thats fine with me.  If theres a set of unit tests that you have 
> > documentation
> > for, I'm sure we would be happy to run them.  I presume you just want all 
> > the
> > pmd vdev option exercised?  Any specific sets of kernel configurations?
> 
> I don't really know which tests are needed. It could be a mix of unit tests
> and functionnal tests described in a test plan.
> The goal is to be able to validate the behaviour and check there is no
> regression. Ideally some corner cases could be described.
> I'm OK to integrate it as is. But future maintenance will probably need
> such inputs for validation tests.
> 
Do you have an example set of tests that the other pmd's have followed for this?

> > > If RedHat is committed for its maintenance, it could integrated in 
> > > release 1.8.
> > > But I'd like it to be renamed as pmd_af_packet (or a better name) instead 
> > > of
> > > pmd_packet.
> > > 
> > John L. is on his way to plumbers at the moment, so is unable to comment, 
> > but
> > I'll try to get a few cycles to change the name of the PMD around.  And 
> > yes, I
> > thought that maintenance was implicit.  He's the author, of course he'll 
> > take
> > care of it :).  And I'll be glad to help
> 
> Do you have time in coming days to rebase and rename this PMD for inclusion
> in 1.8.0 release?
> 
> Thanks
> -- 
> Thomas
> 


[dpdk-dev] [PATCH v6 1/9] librte_mbuf:the rte_mbuf structure changes

2014-11-13 Thread Liu, Jijiang
Hi, 

> -Original Message-
> From: Thomas Monjalon [mailto:thomas.monjalon at 6wind.com]
> Sent: Thursday, November 13, 2014 4:53 PM
> To: Liu, Jijiang
> Cc: Zhang, Helin; dev at dpdk.org; Richardson, Bruce
> Subject: Re: [dpdk-dev] [PATCH v6 1/9] librte_mbuf:the rte_mbuf structure
> changes
> 
> 2014-11-13 03:17, Liu, Jijiang:
> > From: Thomas Monjalon [mailto:thomas.monjalon at 6wind.com]
> > > 2014-10-23 02:23, Zhang, Helin:
> > > > From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of Thomas
> > > > Monjalon
> > > > > 2014-10-21 14:14, Liu, Jijiang:
> > > > > > From: Thomas Monjalon [mailto:thomas.monjalon at 6wind.com]
> > > > > > > 2014-10-21 16:46, Jijiang Liu:
> > > > > > > > +   uint16_t packet_type;
> > > > > > >
> > > > > > > Why not name it "l2_type"?
> > > >
> > > > 'packet_type' is for storing the hardware identified packet type
> > > > upon different layers of protocols (l2, l3, l4, ...).
> > > > It is quite useful for user application or middle layer software
> > > > stacks, it can know what the packet type is without checking the
> > > > packet too
> > > much by software.
> > > > Actually ixgbe already has packet types (less than 10), which is
> > > > transcoded into
> > > 'ol_flags'.
> > > > For i40e, the packet type can represent about 256 types of packet,
> > > > 'ol_flags' does not have enough bits for it anymore. So put the
> > > > i40e packet types
> > > into mbuf would be better.
> > > > Also this field can be used for NON-Intel NICs, I think there must
> > > > be the similar concepts of other NICs. And 16 bits 'packet_type'
> > > > has severl
> > > reserved bits for future and NON-Intel NICs.
> > >
> > > Thanks Helin, that's the best description of packet_type I've seen so far.
> > > It's not so clear in the commit log:
> > >   http://dpdk.org/browse/dpdk/commit/?id=73b7d59cf4f6faf
> > >
> > > > > > In datasheet, this term is called packet type(s).
> > > > >
> > > > > That's exactly the point I want you really understand!
> > > > > This is a field in generic mbuf structure, so your datasheet has no 
> > > > > value
> here.
> > > > >
> > > > > > Personally , I think packet type is  more clear what meaning of 
> > > > > > this field
> is .
> > > > >
> > > > > You cannot add an API field without knowing what will be its generic
> meaning.
> > > > > Please think about it and describe its scope.
> > >
> > > I integrated this patch with the VXLAN patchset in the hope that
> > > you'll improve the situation afterwards.
> > > This is the answer you recently gave to Olivier:
> > >   http://dpdk.org/ml/archives/dev/2014-November/007599.html
> > > "
> > >   Regarding adding a packet_type in mbuf, we ever had a lot of
> > > discussions as follows:
> > >   http://dpdk.org/ml/archives/dev/2014-October/007027.html
> > >   http://dpdk.org/ml/archives/dev/2014-September/005240.html
> > >   http://dpdk.org/ml/archives/dev/2014-September/005241.html
> > >   http://dpdk.org/ml/archives/dev/2014-September/005274.html
> > > "
> > >
> > > To sum up the situation:
> > > - We don't know what are the possible values of packet_type
> > > - It's only filled by i40e, while other drivers use ol_flags
> > > - There is no special value "unknown" which should be set by drivers
> > >   not supporting this feature.
> > > - Its only usage is to print a decimal value in
> > > app/test-pmd/rxonly.c
> > >
> > > It's now clear that nobody cares about this part of the API.
> > > So I'm going to remove packet_type from mbuf.
> > > I don't want to keep something that we don't know how to use, that
> > > is not consistent across drivers, and that overlap another API part 
> > > (ol_flags).
> >
> > The packet type in 40e is very important for user, using packet type
> > can help to speed up packet analysis/identification in their
> > application, especially tunneling packet format.
> > Now I'm working on implementing packet type definition in rte_ethdev.h
> > file and  translation table in i40e, which is almost done.
> > The packet type  definition in in rte_ethdev.h file like below.
> > /*
> >  * Ethernet packet type
> >  */
> > enum rte_eth_ptype {
> > /* undefined packet type, means HW can't recognise it */
> > RTE_PTYPE_UNDEF = 0,
> > ...
> >
> > /* IPv4 --> GRE/Teredo/VXLAN --> MAC --> IPv4 */
> > RTE_PTYPE_IPv4_GRENAT_MAC_IPv4FRAG_PAY3,
> > RTE_PTYPE_IPv4_GRENAT_MAC_IPv4_PAY3,
> > RTE_PTYPE_IPv4_GRENAT_MAC_IPv4_UDP_PAY4,
> > RTE_PTYPE_IPv4_GRENAT_MAC_IPv4_TCP_PAY4,
> > RTE_PTYPE_IPv4_GRENAT_MAC_IPv4_SCTP_PAY4,
> > RTE_PTYPE_IPv4_GRENAT_MAC_IPv4_ICMP_PAY4,
> >
> > /* IPv4 --> GRE/Teredo/VXLAN --> MAC --> IPv6 */
> > RTE_PTYPE_IPv4_GRENAT_MAC_IPv6FRAG_PAY3
> > RTE_PTYPE_IPv4_GRENAT_MAC_IPv6_PAY3,
> > RTE_PTYPE_IPv4_GRENAT_MAC_IPv6_UDP_PAY4,
> > RTE_PTYPE_IPv4_GRENAT_MAC_IPv6_TCP_PAY4,
> > RTE_PTYPE_IPv4_GRENAT_MAC_IPv6_SCTP_PAY4,
> > RTE_PTYPE_IPv4_GRENAT_MAC_IPv6_ICMP_PAY4,
> >
> >

[dpdk-dev] [PATCH v2 1/2] Add new union field usr in mbuf->hash.

2014-11-13 Thread Thomas Monjalon
> > This field is added for librte_distributor. User of librte_distributor
> > is advocated to set value of mbuf->hash.usr before calling
> > rte_distributor_process. The value of usr is the tag which stands as
> > identifier of flow.
> > 
> > Signed-off-by: Qinglai Xiao 
> Acked-by: Bruce Richardson 

Applied

Thanks
-- 
Thomas


[dpdk-dev] [PATCH v3] Add in_flight_bitmask so as to use full 32 bits of tag.

2014-11-13 Thread Thomas Monjalon
2014-11-12 15:51, Bruce Richardson:
> On Tue, Nov 11, 2014 at 10:53:40AM +0200, jigsaw wrote:
> > This patch has little, if any, performance impact.
> > See the perf stat -d for original and patched version
> > of test_distributor_perf.
> 
> Thanks for running the test. Results look ok to me. I confirm my previous ack.

Applied

Thanks
-- 
Thomas


[dpdk-dev] [PATCH v8] eal: map PCI memory resources after hugepages

2014-11-13 Thread Burakov, Anatoly
Hi Thomas and all

Are there any objections to this patch? If there are no objections to it, could 
someone perhaps ack it?

Thanks,
Anatoly

-Original Message-
From: dev [mailto:dev-boun...@dpdk.org] On Behalf Of Anatoly Burakov
Sent: Tuesday, November 11, 2014 10:09 AM
To: dev at dpdk.org
Subject: [dpdk-dev] [PATCH v8] eal: map PCI memory resources after hugepages

Multi-process DPDK application must mmap hugepages and PCI resources into the 
same virtual address space. By default the virtual addresses are chosen by the 
primary process automatically when calling the mmap.
But sometimes the chosen virtual addresses aren't usable in secondary process - 
for example, secondary process is linked with more libraries than primary 
process, and the library occupies the same address space that the primary 
process has requested for PCI mappings.

This patch makes EAL try and map PCI BARs right after the hugepages (instead of 
location chosen by mmap) in virtual memory, so that PCI BARs have less chance 
of ending up in random places in virtual memory.

Signed-off-by: Liang Xu 
Signed-off-by: Anatoly Burakov 
---
 lib/librte_eal/linuxapp/eal/eal_pci.c  | 30 --
 lib/librte_eal/linuxapp/eal/eal_pci_uio.c  | 13 --
 lib/librte_eal/linuxapp/eal/eal_pci_vfio.c | 19 +++---
 lib/librte_eal/linuxapp/eal/include/eal_pci_init.h |  6 +
 4 files changed, 55 insertions(+), 13 deletions(-)

diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c 
b/lib/librte_eal/linuxapp/eal/eal_pci.c
index 5fe3961..79fbbb8 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci.c
+++ b/lib/librte_eal/linuxapp/eal/eal_pci.c
@@ -97,6 +97,25 @@ error:
return -1;
 }

+void *
+pci_find_max_end_va(void)
+{
+   const struct rte_memseg *seg = rte_eal_get_physmem_layout();
+   const struct rte_memseg *last = seg;
+   unsigned i = 0;
+
+   for (i = 0; i < RTE_MAX_MEMSEG; i++, seg++) {
+   if (seg->addr == NULL)
+   break;
+
+   if (seg->addr > last->addr)
+   last = seg;
+
+   }
+   return RTE_PTR_ADD(last->addr, last->len); }
+
+
 /* map a particular resource from a file */  void *  pci_map_resource(void 
*requested_addr, int fd, off_t offset, size_t size) @@ -106,21 +125,16 @@ 
pci_map_resource(void *requested_addr, int fd, off_t offset, size_t size)
/* Map the PCI memory resource of device */
mapaddr = mmap(requested_addr, size, PROT_READ | PROT_WRITE,
MAP_SHARED, fd, offset);
-   if (mapaddr == MAP_FAILED ||
-   (requested_addr != NULL && mapaddr != requested_addr)) {
+   if (mapaddr == MAP_FAILED) {
RTE_LOG(ERR, EAL, "%s(): cannot mmap(%d, %p, 0x%lx, 0x%lx): %s 
(%p)\n",
__func__, fd, requested_addr,
(unsigned long)size, (unsigned long)offset,
strerror(errno), mapaddr);
-   goto fail;
+   } else {
+   RTE_LOG(DEBUG, EAL, "  PCI memory mapped at %p\n", mapaddr);
}

-   RTE_LOG(DEBUG, EAL, "  PCI memory mapped at %p\n", mapaddr);
-
return mapaddr;
-
-fail:
-   return NULL;
 }

 /* parse the "resource" sysfs file */
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c 
b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
index 7e62266..e53f06b 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
+++ b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
@@ -35,6 +35,7 @@
 #include 
 #include 
 #include 
+#include 

 #include 
 #include 
@@ -48,6 +49,8 @@

 static int pci_parse_sysfs_value(const char *filename, uint64_t *val);

+void *pci_map_addr = NULL;
+

 #define OFF_MAX  ((uint64_t)(off_t)-1)
 static int
@@ -371,10 +374,16 @@ pci_uio_map_resource(struct rte_pci_device *dev)
if (maps[j].addr != NULL)
fail = 1;
else {
-   mapaddr = pci_map_resource(NULL, fd, 
(off_t)offset,
+   /* try mapping somewhere close to the end of 
hugepages */
+   if (pci_map_addr == NULL)
+   pci_map_addr = pci_find_max_end_va();
+
+   mapaddr = pci_map_resource(pci_map_addr, fd, 
(off_t)offset,
(size_t)maps[j].size);
-   if (mapaddr == NULL)
+   if (mapaddr == MAP_FAILED)
fail = 1;
+
+   pci_map_addr = RTE_PTR_ADD(mapaddr, (size_t) 
maps[j].size);
}

if (fail) {
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c 
b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
index c776ddc..c1246e8 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
+++ b/lib/librte_eal/linuxapp/eal/

[dpdk-dev] [PATCH v6 1/9] librte_mbuf:the rte_mbuf structure changes

2014-11-13 Thread Thomas Monjalon
2014-11-13 11:24, Liu, Jijiang:
> From: Thomas Monjalon [mailto:thomas.monjalon at 6wind.com]
> > 2014-11-13 03:17, Liu, Jijiang:
> > > From: Thomas Monjalon [mailto:thomas.monjalon at 6wind.com]
> > > > 2014-10-23 02:23, Zhang, Helin:
> > > > > From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of Thomas
> > > > > Monjalon
> > > > > > 2014-10-21 14:14, Liu, Jijiang:
> > > > > > > From: Thomas Monjalon [mailto:thomas.monjalon at 6wind.com]
> > > > > > > > 2014-10-21 16:46, Jijiang Liu:
> > > > > > > > > + uint16_t packet_type;
> > > > > > > >
> > > > > > > > Why not name it "l2_type"?
> > > > >
> > > > > 'packet_type' is for storing the hardware identified packet type
> > > > > upon different layers of protocols (l2, l3, l4, ...).
> > > > > It is quite useful for user application or middle layer software
> > > > > stacks, it can know what the packet type is without checking the
> > > > > packet too
> > > > much by software.
> > > > > Actually ixgbe already has packet types (less than 10), which is
> > > > > transcoded into
> > > > 'ol_flags'.
> > > > > For i40e, the packet type can represent about 256 types of packet,
> > > > > 'ol_flags' does not have enough bits for it anymore. So put the
> > > > > i40e packet types
> > > > into mbuf would be better.
> > > > > Also this field can be used for NON-Intel NICs, I think there must
> > > > > be the similar concepts of other NICs. And 16 bits 'packet_type'
> > > > > has severl
> > > > reserved bits for future and NON-Intel NICs.
> > > >
> > > > Thanks Helin, that's the best description of packet_type I've seen so 
> > > > far.
> > > > It's not so clear in the commit log:
> > > > http://dpdk.org/browse/dpdk/commit/?id=73b7d59cf4f6faf
> > > >
> > > > > > > In datasheet, this term is called packet type(s).
> > > > > >
> > > > > > That's exactly the point I want you really understand!
> > > > > > This is a field in generic mbuf structure, so your datasheet has no 
> > > > > > value here.
> > > > > >
> > > > > > > Personally , I think packet type is  more clear what meaning of 
> > > > > > > this field is .
> > > > > >
> > > > > > You cannot add an API field without knowing what will be its 
> > > > > > generic meaning.
> > > > > > Please think about it and describe its scope.
> > > >
> > > > I integrated this patch with the VXLAN patchset in the hope that
> > > > you'll improve the situation afterwards.
> > > > This is the answer you recently gave to Olivier:
> > > > http://dpdk.org/ml/archives/dev/2014-November/007599.html
> > > > "
> > > > Regarding adding a packet_type in mbuf, we ever had a lot of
> > > > discussions as follows:
> > > > http://dpdk.org/ml/archives/dev/2014-October/007027.html
> > > > http://dpdk.org/ml/archives/dev/2014-September/005240.html
> > > > http://dpdk.org/ml/archives/dev/2014-September/005241.html
> > > > http://dpdk.org/ml/archives/dev/2014-September/005274.html
> > > > "
> > > >
> > > > To sum up the situation:
> > > > - We don't know what are the possible values of packet_type
> > > > - It's only filled by i40e, while other drivers use ol_flags
> > > > - There is no special value "unknown" which should be set by drivers
> > > >   not supporting this feature.
> > > > - Its only usage is to print a decimal value in
> > > > app/test-pmd/rxonly.c
> > > >
> > > > It's now clear that nobody cares about this part of the API.
> > > > So I'm going to remove packet_type from mbuf.
> > > > I don't want to keep something that we don't know how to use, that
> > > > is not consistent across drivers, and that overlap another API part 
> > > > (ol_flags).
> > >
> > > The packet type in 40e is very important for user, using packet type
> > > can help to speed up packet analysis/identification in their
> > > application, especially tunneling packet format.
> > > Now I'm working on implementing packet type definition in rte_ethdev.h
> > > file and  translation table in i40e, which is almost done.
> > > The packet type  definition in in rte_ethdev.h file like below.
> > > /*
> > >  * Ethernet packet type
> > >  */
> > > enum rte_eth_ptype {
> > > /* undefined packet type, means HW can't recognise it */
> > > RTE_PTYPE_UNDEF = 0,
> > > ...
> > >
> > > /* IPv4 --> GRE/Teredo/VXLAN --> MAC --> IPv4 */
> > > RTE_PTYPE_IPv4_GRENAT_MAC_IPv4FRAG_PAY3,
> > > RTE_PTYPE_IPv4_GRENAT_MAC_IPv4_PAY3,
> > > RTE_PTYPE_IPv4_GRENAT_MAC_IPv4_UDP_PAY4,
> > > RTE_PTYPE_IPv4_GRENAT_MAC_IPv4_TCP_PAY4,
> > > RTE_PTYPE_IPv4_GRENAT_MAC_IPv4_SCTP_PAY4,
> > > RTE_PTYPE_IPv4_GRENAT_MAC_IPv4_ICMP_PAY4,
> > >
> > > /* IPv4 --> GRE/Teredo/VXLAN --> MAC --> IPv6 */
> > > RTE_PTYPE_IPv4_GRENAT_MAC_IPv6FRAG_PAY3
> > > RTE_PTYPE_IPv4_GRENAT_MAC_IPv6_PAY3,
> > > RTE_PTYPE_IPv4_GRENAT_MAC_IPv6_UDP_PAY4,
> > > RTE_PTYPE_IPv4_GRENAT_MAC_IPv6_TCP_PAY4,
> > > RTE_PTYPE_IPv4_GRENAT_MAC_IPv6_SCTP_PAY4,
> > >

[dpdk-dev] [PATCH v5 05/21] i40e: implement operations to add/delete flow director

2014-11-13 Thread Wu, Jingjing
Hi, Pablo & Thomas

You are correct. This is a merge mistake.

Besides that, there are some comments from Konstantin, I'm reworking on this 
patchset.

The new patchset is coming soon.

Thanks for reminder.

Jingjing

> -Original Message-
> From: Thomas Monjalon [mailto:thomas.monjalon at 6wind.com]
> Sent: Thursday, November 13, 2014 5:50 PM
> To: Wu, Jingjing
> Cc: dev at dpdk.org; De Lara Guarch, Pablo
> Subject: Re: [dpdk-dev] [PATCH v5 05/21] i40e: implement operations to 
> add/delete flow
> director
> 
> Hi Jingjing,
> 
> You didn't reply to Pablo's comment.
> Any news of this patchset? Could it be reviewed?
> 
> Thanks
> --
> Thomas
> 
> 2014-11-05 21:18, De Lara Guarch, Pablo:
> > From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of Jingjing Wu
> > > Deal with two operations for flow director
> > >  - RTE_ETH_FILTER_ADD
> > >  - RTE_ETH_FILTER_DELETE
> > > Encode the flow inputs to programming packet.
> > > Sent the packet to filter programming queue and check status
> > > on the status report queue.
> > >
> > > Signed-off-by: Jingjing Wu 
> > > ---
> > >  lib/librte_pmd_i40e/i40e_ethdev.c |   3 +
> > >  lib/librte_pmd_i40e/i40e_ethdev.h |   3 +
> > >  lib/librte_pmd_i40e/i40e_fdir.c   | 622
> > > ++
> > >  3 files changed, 628 insertions(+)
> > >
> > > diff --git a/lib/librte_pmd_i40e/i40e_ethdev.c
> > > b/lib/librte_pmd_i40e/i40e_ethdev.c
> > > index 8195e8a..fb43efb 100644
> > > --- a/lib/librte_pmd_i40e/i40e_ethdev.c
> > > +++ b/lib/librte_pmd_i40e/i40e_ethdev.c
> > > @@ -4577,6 +4577,7 @@ i40e_dev_filter_ctrl(struct rte_eth_dev *dev,
> > >enum rte_filter_op filter_op,
> > >void *arg)
> > >  {
> > > + struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data-
> > > >dev_private);
> > >   int ret = 0;
> > >
> > >   if (dev == NULL)
> > > @@ -4585,6 +4586,8 @@ i40e_dev_filter_ctrl(struct rte_eth_dev *dev,
> > >   switch (filter_type) {
> > >   case RTE_ETH_FILTER_TUNNEL:
> > >   ret = i40e_tunnel_filter_handle(dev, filter_op, arg);
> >
> > Missing break here?
> >
> > > + case RTE_ETH_FILTER_FDIR:
> > > + ret = i40e_fdir_ctrl_func(pf, filter_op, arg);
> > >   break;
> > >   default:
> > >   PMD_DRV_LOG(WARNING, "Filter type (%d) not
> > > supported",



[dpdk-dev] [PATCH v2] librte_pmd_packet: add PMD for AF_PACKET-based virtual devices

2014-11-13 Thread Thomas Monjalon
2014-11-13 06:14, Neil Horman:
> On Thu, Nov 13, 2014 at 02:03:18AM -0800, Thomas Monjalon wrote:
> > 2014-10-08 15:14, Neil Horman:
> > > On Wed, Oct 08, 2014 at 05:57:46PM +0200, Thomas Monjalon wrote:
> > > > 2014-09-29 11:05, Bruce Richardson:
> > > > > On Fri, Sep 26, 2014 at 10:08:55AM -0400, Neil Horman wrote:
> > > > > > On Fri, Sep 26, 2014 at 11:28:05AM +0200, Thomas Monjalon wrote:
> > > > > > > 3) There is no test associated with this PMD.
> > > > > > That would have been a great comment to make a few months back, 
> > > > > > though whats
> > > > > > wrong with testpmd here?  That seems to be the same test that every 
> > > > > > other pmd
> > > > > > uses. What exactly are you looking for?
> > > > 
> > > > I was thinking of testing behaviour with different kernel 
> > > > configurations and
> > > > unit tests for --vdev options. But it's not a major blocker.
> > > > 
> > > Thats fine with me.  If theres a set of unit tests that you have 
> > > documentation
> > > for, I'm sure we would be happy to run them.  I presume you just want all 
> > > the
> > > pmd vdev option exercised?  Any specific sets of kernel configurations?
> > 
> > I don't really know which tests are needed. It could be a mix of unit tests
> > and functionnal tests described in a test plan.
> > The goal is to be able to validate the behaviour and check there is no
> > regression. Ideally some corner cases could be described.
> > I'm OK to integrate it as is. But future maintenance will probably need
> > such inputs for validation tests.
> > 
> Do you have an example set of tests that the other pmd's have followed for 
> this?

You can check this:
http://dpdk.org/browse/tools/dts/tree/test_plans/pmd_test_plan.rst

http://dpdk.org/browse/tools/dts/tree/test_plans/pmd_bonded_test_plan.rst

As I said, we can integrate AF_PACKET PMD without such test plan.
But we are going to improve testing of many areas in DPDK.

> > > > If RedHat is committed for its maintenance, it could integrated in 
> > > > release 1.8.
> > > > But I'd like it to be renamed as pmd_af_packet (or a better name) 
> > > > instead of
> > > > pmd_packet.
> > > > 
> > > John L. is on his way to plumbers at the moment, so is unable to comment, 
> > > but
> > > I'll try to get a few cycles to change the name of the PMD around.  And 
> > > yes, I
> > > thought that maintenance was implicit.  He's the author, of course he'll 
> > > take
> > > care of it :).  And I'll be glad to help
> > 
> > Do you have time in coming days to rebase and rename this PMD for inclusion
> > in 1.8.0 release?

Do you think a sub-tree with pull request model would help you for
maintenance of this PMD?

-- 
Thomas


[dpdk-dev] [PATCH] x32 ABI support, first iteration

2014-11-13 Thread Daniel Mrzyglod
Signed-off-by: Konstantin Ananyev 
Signed-off-by: Daniel Mrzyglod 
---
 config/defconfig_x86_x32-native-linuxapp-gcc | 46 
 mk/arch/x86_x32/rte.vars.mk  | 63 
 2 files changed, 109 insertions(+)
 create mode 100644 config/defconfig_x86_x32-native-linuxapp-gcc
 create mode 100644 mk/arch/x86_x32/rte.vars.mk

diff --git a/config/defconfig_x86_x32-native-linuxapp-gcc 
b/config/defconfig_x86_x32-native-linuxapp-gcc
new file mode 100644
index 000..fb0afc4
--- /dev/null
+++ b/config/defconfig_x86_x32-native-linuxapp-gcc
@@ -0,0 +1,46 @@
+#   BSD LICENSE
+#
+#   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+#   All rights reserved.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+# * Redistributions of source code must retain the above copyright
+#   notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+#   notice, this list of conditions and the following disclaimer in
+#   the documentation and/or other materials provided with the
+#   distribution.
+# * Neither the name of Intel Corporation nor the names of its
+#   contributors may be used to endorse or promote products derived
+#   from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+#include "common_linuxapp"
+
+CONFIG_RTE_MACHINE="native"
+
+CONFIG_RTE_ARCH="x86_x32"
+CONFIG_RTE_ARCH_X86_X32=y
+
+CONFIG_RTE_TOOLCHAIN="gcc"
+CONFIG_RTE_TOOLCHAIN_GCC=y
+
+#
+# KNI is not supported on 32-bit
+#
+CONFIG_RTE_LIBRTE_KNI=n
diff --git a/mk/arch/x86_x32/rte.vars.mk b/mk/arch/x86_x32/rte.vars.mk
new file mode 100644
index 000..9507af7
--- /dev/null
+++ b/mk/arch/x86_x32/rte.vars.mk
@@ -0,0 +1,63 @@
+#   BSD LICENSE
+#
+#   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+#   All rights reserved.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+# * Redistributions of source code must retain the above copyright
+#   notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+#   notice, this list of conditions and the following disclaimer in
+#   the documentation and/or other materials provided with the
+#   distribution.
+# * Neither the name of Intel Corporation nor the names of its
+#   contributors may be used to endorse or promote products derived
+#   from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#
+# arch:
+#
+#   - define ARCH variable (overriden by cmdline or by previous
+# optional define in machine .mk)
+#   - define CROSS variable (overriden by cmdline or previous define
+# in machine .mk)
+#   - define CPU_CFLAGS variable (overriden by cmdline or previous
+# define in machine .mk)
+#   - define CPU_LDFLAGS variable (overriden by cmdline or previous
+# define in machine .mk)
+#   - define CPU_ASFLAGS variable (overriden by cmdline or previous
+# define in machine .mk)
+#   - may override any previously defined variable
+#
+# examples for CONFIG_RTE_ARCH: i

[dpdk-dev] [PATCH v2] librte_pmd_null: Add null PMD

2014-11-13 Thread Thomas Monjalon
Hi,

2014-10-01 13:57, mukawa at igel.co.jp:
> 'null PMD' is a driver of the virtual device particulary designed to measure
> performance of DPDK PMDs. When an application call rx, null PMD just allocates
> mbufs and returns those. Also tx, the PMD just frees mbufs.
> 
> The PMD has following options.
> - size: specify packe size allocated by RX. Default packet size is 64.
> - copy: specify 1 or 0 to enable or disable copy while RX and TX.
>   Default value is 0(disbaled).
>   This option is used for emulating more realistic data transfer.
>   Copy size is equal to packet size.
> 
> To use the PMD, enable CONFIG_RTE_BUILD_SHARED_LIB in config file. Then
> compile the PMD as shared library. The library can be linked using '-d'
> option when an application invokes.
> 
> Here is an example.
> $ sudo ./testpmd -c f -n 4 -d librte_pmd_null.so \
>   --vdev 'eth_null0' --vdev 'eth_null1' -- -i
> 
> If testpmd is compiled with CONFIG_RTE_BUILD_SHARED_LIB, it may need to
> specify more libraries using '-d' option.
> 
> Signed-off-by: Tetsuya Mukawa 

This patch is still pending because nobody reviewed it.

-- 
Thomas


[dpdk-dev] [PATCH] x32 ABI support, first iteration

2014-11-13 Thread Mrzyglod, DanielX T

This patch provides support for x32 ABI.
x32 ABI provides benefits of x86-64 while using 32-bit pointers and avoiding 
overhead of 64-bit pointers.

Daniel Mrzyglod (1):
Konstantin Ananyev(1):
  x32 ABI support, first iteration

 config/defconfig_x86_x32-native-linuxapp-gcc | 46 
 mk/arch/x86_x32/rte.vars.mk  | 63 
 2 files changed, 109 insertions(+)
 create mode 100644 config/defconfig_x86_x32-native-linuxapp-gcc
 create mode 100644 mk/arch/x86_x32/rte.vars.mk

--
2.1.0



[dpdk-dev] [PATCH] skeleton app: Very simple code for l2fwding

2014-11-13 Thread Thomas Monjalon
Hi Bruce,

2014-06-26 21:22, Bruce Richardson:
> This is a very simple example app for doing packet forwarding with the
> Intel DPDK. It's designed to serve as a start point for people new to
> the Intel DPDK and who want to develop a new app.
> 
> Therefore it's meant to:
> * have as good a performance out-of-the-box as possible, using the
>   best-known settings for configuring the PMDs, so that any new apps can
>   be based off it.
> * be kept as short as possible to make it easy to understand it and get
>   started with it.
> 
> Signed-off-by: Bruce Richardson 

What about a rebase of this patch now that Rx/Tx default conf is
available in API?

-- 
Thomas


[dpdk-dev] [PATCH v3 1/2] ethdev: new structure of Ethertype Filter for filter_ctrl api

2014-11-13 Thread Jingjing Wu
From: "jingjing.wu" 

A new structure of ethertype filter is defined in rte_eth_ctrl.h
for filter_ctrl api

Signed-off-by: jingjing.wu 
---
 lib/librte_ether/rte_eth_ctrl.h | 20 
 1 file changed, 20 insertions(+)

diff --git a/lib/librte_ether/rte_eth_ctrl.h b/lib/librte_ether/rte_eth_ctrl.h
index 8dd384d..73bc296 100644
--- a/lib/librte_ether/rte_eth_ctrl.h
+++ b/lib/librte_ether/rte_eth_ctrl.h
@@ -53,6 +53,7 @@ enum rte_filter_type {
RTE_ETH_FILTER_NONE = 0,
RTE_ETH_FILTER_MACVLAN,
RTE_ETH_FILTER_TUNNEL,
+   RTE_ETH_FILTER_ETHERTYPE,
RTE_ETH_FILTER_MAX
 };

@@ -155,6 +156,25 @@ struct rte_eth_tunnel_filter_conf {
uint16_t queue_id;  /** < queue number. */
 };

+/**
+ * Define all structures for Ethertype Filter type.
+ */
+
+#define RTE_ETHTYPE_FLAGS_MAC0x0001 /**< If set, compare mac */
+#define RTE_ETHTYPE_FLAGS_DROP   0x0002 /**< If set, drop packet when match */
+
+/**
+ * A structure used to define the ethertype filter entry
+ * to support RTE_ETH_FILTER_ETHERTYPE with RTE_ETH_FILTER_ADD,
+ * RTE_ETH_FILTER_DELETE and RTE_ETH_FILTER_GET operations.
+ */
+struct rte_eth_ethertype_filter {
+   struct ether_addr mac_addr;   /**< Mac address to match. */
+   uint16_t ether_type;  /**< Ether type to match */
+   uint16_t flags;   /**< Flags from RTE_ETHTYPE_FLAGS_* */
+   uint16_t queue;   /**< Queue assigned to when match*/
+};
+
 #ifdef __cplusplus
 }
 #endif
-- 
1.8.1.4



[dpdk-dev] [PATCH v3 2/2] i40e: implement operation to add/delete an ethertype filter

2014-11-13 Thread Jingjing Wu
From: "jingjing.wu" 

Handle the RTE_ETH_FILTER_ADD and RTE_ETH_FILTER_DELETE operations
on ethertype filter.

Signed-off-by: jingjing.wu 
---
 lib/librte_pmd_i40e/i40e_ethdev.c | 99 +++
 1 file changed, 99 insertions(+)

diff --git a/lib/librte_pmd_i40e/i40e_ethdev.c 
b/lib/librte_pmd_i40e/i40e_ethdev.c
index 5074262..499270e 100644
--- a/lib/librte_pmd_i40e/i40e_ethdev.c
+++ b/lib/librte_pmd_i40e/i40e_ethdev.c
@@ -205,6 +205,12 @@ static int i40e_dev_udp_tunnel_add(struct rte_eth_dev *dev,
struct rte_eth_udp_tunnel *udp_tunnel);
 static int i40e_dev_udp_tunnel_del(struct rte_eth_dev *dev,
struct rte_eth_udp_tunnel *udp_tunnel);
+static int i40e_ethertype_filter_set(struct i40e_pf *pf,
+   struct rte_eth_ethertype_filter *filter,
+   bool add);
+static int i40e_ethertype_filter_handle(struct rte_eth_dev *dev,
+   enum rte_filter_op filter_op,
+   void *arg);
 static int i40e_dev_filter_ctrl(struct rte_eth_dev *dev,
enum rte_filter_type filter_type,
enum rte_filter_op filter_op,
@@ -4990,6 +4996,96 @@ i40e_pf_config_mq_rx(struct i40e_pf *pf)
return ret;
 }

+/*
+ * Configure ethertype filter, which can director packet by filtering
+ * with mac address and ether_type or only ether_type
+ */
+static int
+i40e_ethertype_filter_set(struct i40e_pf *pf,
+   struct rte_eth_ethertype_filter *filter,
+   bool add)
+{
+   struct i40e_hw *hw = I40E_PF_TO_HW(pf);
+   struct i40e_control_filter_stats stats;
+   uint16_t flags = 0;
+   int ret;
+
+   if (filter->queue >= pf->dev_data->nb_rx_queues) {
+   PMD_DRV_LOG(ERR, "Invalid queue ID");
+   return -EINVAL;
+   }
+   if (filter->ether_type == ETHER_TYPE_IPv4 ||
+   filter->ether_type == ETHER_TYPE_IPv6) {
+   PMD_DRV_LOG(ERR, "unsupported ether_type(0x%04x) in"
+   " control packet filter.", filter->ether_type);
+   return -EINVAL;
+   }
+   if (filter->ether_type == ETHER_TYPE_VLAN)
+   PMD_DRV_LOG(WARNING, "filter vlan ether_type in first tag is"
+   " not supported.");
+
+   if (!(filter->flags & RTE_ETHTYPE_FLAGS_MAC))
+   flags |= I40E_AQC_ADD_CONTROL_PACKET_FLAGS_IGNORE_MAC;
+   if (filter->flags & RTE_ETHTYPE_FLAGS_DROP)
+   flags |= I40E_AQC_ADD_CONTROL_PACKET_FLAGS_DROP;
+   flags |= I40E_AQC_ADD_CONTROL_PACKET_FLAGS_TO_QUEUE;
+
+   memset(&stats, 0, sizeof(stats));
+   ret = i40e_aq_add_rem_control_packet_filter(hw,
+   filter->mac_addr.addr_bytes,
+   filter->ether_type, flags,
+   pf->main_vsi->seid,
+   filter->queue, add, &stats, NULL);
+
+   PMD_DRV_LOG(INFO, "add/rem control packet filter, return %d,"
+" mac_etype_used = %u, etype_used = %u,"
+" mac_etype_free = %u, etype_free = %u\n",
+ret, stats.mac_etype_used, stats.etype_used,
+stats.mac_etype_free, stats.etype_free);
+   if (ret < 0)
+   return -ENOSYS;
+   return 0;
+}
+
+/*
+ * Handle operations for ethertype filter.
+ */
+static int
+i40e_ethertype_filter_handle(struct rte_eth_dev *dev,
+   enum rte_filter_op filter_op,
+   void *arg)
+{
+   struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
+   int ret = 0;
+
+   if (filter_op == RTE_ETH_FILTER_NOP)
+   return ret;
+
+   if (arg == NULL) {
+   PMD_DRV_LOG(ERR, "arg shouldn't be NULL for operation %u",
+   filter_op);
+   return -EINVAL;
+   }
+
+   switch (filter_op) {
+   case RTE_ETH_FILTER_ADD:
+   ret = i40e_ethertype_filter_set(pf,
+   (struct rte_eth_ethertype_filter *)arg,
+   TRUE);
+   break;
+   case RTE_ETH_FILTER_DELETE:
+   ret = i40e_ethertype_filter_set(pf,
+   (struct rte_eth_ethertype_filter *)arg,
+   FALSE);
+   break;
+   default:
+   PMD_DRV_LOG(ERR, "unsupported operation %u\n", filter_op);
+   ret = -ENOSYS;
+   break;
+   }
+   return ret;
+}
+
 static int
 i40e_dev_filter_ctrl(struct rte_eth_dev *dev,
 enum rte_filter_type filter_type,
@@ -5008,6 +5104,9 @@ i40e_dev_filter_ctrl(struct rte_eth_dev *dev,
case RTE_ETH_FILTER_TUNNEL:
ret = i40e_tunnel_filter_handle(dev, filter_op, arg);
break;
+   case RTE_ETH_FILTER_ETHERTYPE:
+   

[dpdk-dev] [PATCH v3 0/2] support ethertype filter on fortville

2014-11-13 Thread Jingjing Wu
From: "jingjing.wu" 

The patch set supports ethertype filter on fortville.

v3 changes:
 - redefine the control packet filter to ethertype filter

v2 changes:
 - strip the filter APIs definitions from this patch set

jingjing.wu (2):
  ethdev: new structure of Ethertype Filter for filter_ctrl api
  i40e: implement operation to add/delete an ethertype filter

 lib/librte_ether/rte_eth_ctrl.h   | 20 
 lib/librte_pmd_i40e/i40e_ethdev.c | 99 +++
 2 files changed, 119 insertions(+)

-- 
1.8.1.4



[dpdk-dev] [PATCH v8] eal: map PCI memory resources after hugepages

2014-11-13 Thread Bruce Richardson
On Thu, Nov 13, 2014 at 11:34:22AM +, Burakov, Anatoly wrote:
> Hi Thomas and all
> 
> Are there any objections to this patch? If there are no objections to it, 
> could someone perhaps ack it?
> 

No objections, just a quick request for clarification below.

/Bruce

> Thanks,
> Anatoly
> 
> -Original Message-
> From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of Anatoly Burakov
> Sent: Tuesday, November 11, 2014 10:09 AM
> To: dev at dpdk.org
> Subject: [dpdk-dev] [PATCH v8] eal: map PCI memory resources after hugepages
> 
> Multi-process DPDK application must mmap hugepages and PCI resources into the 
> same virtual address space. By default the virtual addresses are chosen by 
> the primary process automatically when calling the mmap.
> But sometimes the chosen virtual addresses aren't usable in secondary process 
> - for example, secondary process is linked with more libraries than primary 
> process, and the library occupies the same address space that the primary 
> process has requested for PCI mappings.
> 
> This patch makes EAL try and map PCI BARs right after the hugepages (instead 
> of location chosen by mmap) in virtual memory, so that PCI BARs have less 
> chance of ending up in random places in virtual memory.
> 
> Signed-off-by: Liang Xu 
> Signed-off-by: Anatoly Burakov 
> ---
>  lib/librte_eal/linuxapp/eal/eal_pci.c  | 30 
> --
>  lib/librte_eal/linuxapp/eal/eal_pci_uio.c  | 13 --
>  lib/librte_eal/linuxapp/eal/eal_pci_vfio.c | 19 +++---
>  lib/librte_eal/linuxapp/eal/include/eal_pci_init.h |  6 +
>  4 files changed, 55 insertions(+), 13 deletions(-)
> 
> diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c 
> b/lib/librte_eal/linuxapp/eal/eal_pci.c
> index 5fe3961..79fbbb8 100644
> --- a/lib/librte_eal/linuxapp/eal/eal_pci.c
> +++ b/lib/librte_eal/linuxapp/eal/eal_pci.c
> @@ -97,6 +97,25 @@ error:
>   return -1;
>  }
>  
> +void *
> +pci_find_max_end_va(void)
> +{
> + const struct rte_memseg *seg = rte_eal_get_physmem_layout();
> + const struct rte_memseg *last = seg;
> + unsigned i = 0;
> +
> + for (i = 0; i < RTE_MAX_MEMSEG; i++, seg++) {
> + if (seg->addr == NULL)
> + break;
> +
> + if (seg->addr > last->addr)
> + last = seg;
> +
> + }
> + return RTE_PTR_ADD(last->addr, last->len); }
> +
> +
>  /* map a particular resource from a file */  void *  pci_map_resource(void 
> *requested_addr, int fd, off_t offset, size_t size) @@ -106,21 +125,16 @@ 
> pci_map_resource(void *requested_addr, int fd, off_t offset, size_t size)
>   /* Map the PCI memory resource of device */
>   mapaddr = mmap(requested_addr, size, PROT_READ | PROT_WRITE,
>   MAP_SHARED, fd, offset);
> - if (mapaddr == MAP_FAILED ||
> - (requested_addr != NULL && mapaddr != requested_addr)) {

Why has this check been removed from here. I assume it is replaced by a new
check in secondary processes that I see added below, but perhaps you could 
explain
the reason for the change?

> + if (mapaddr == MAP_FAILED) {
>   RTE_LOG(ERR, EAL, "%s(): cannot mmap(%d, %p, 0x%lx, 0x%lx): %s 
> (%p)\n",
>   __func__, fd, requested_addr,
>   (unsigned long)size, (unsigned long)offset,
>   strerror(errno), mapaddr);
> - goto fail;
> + } else {
> + RTE_LOG(DEBUG, EAL, "  PCI memory mapped at %p\n", mapaddr);
>   }
>  
> - RTE_LOG(DEBUG, EAL, "  PCI memory mapped at %p\n", mapaddr);
> -
>   return mapaddr;
> -
> -fail:
> - return NULL;
>  }
>  
>  /* parse the "resource" sysfs file */
> diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c 
> b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
> index 7e62266..e53f06b 100644
> --- a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
> +++ b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
> @@ -35,6 +35,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  #include 
>  #include 
> @@ -48,6 +49,8 @@
>  
>  static int pci_parse_sysfs_value(const char *filename, uint64_t *val);
>  
> +void *pci_map_addr = NULL;
> +
>  
>  #define OFF_MAX  ((uint64_t)(off_t)-1)
>  static int
> @@ -371,10 +374,16 @@ pci_uio_map_resource(struct rte_pci_device *dev)
>   if (maps[j].addr != NULL)
>   fail = 1;
>   else {
> - mapaddr = pci_map_resource(NULL, fd, 
> (off_t)offset,
> + /* try mapping somewhere close to the end of 
> hugepages */
> + if (pci_map_addr == NULL)
> + pci_map_addr = pci_find_max_end_va();
> +
> + mapaddr = pci_map_resource(pci_map_addr, fd, 
> (off_t)offset,
>   (size_t)maps[j].size);
> -   

[dpdk-dev] Panic in rte MEMPOOL__mempool_check_cookies()

2014-11-13 Thread Bruce Richardson
On Wed, Nov 12, 2014 at 09:50:50PM -0500, Kamraan Nasim wrote:
> Hi Bruce,
> 
> The issue seems to be triggered by a combination of using the
> CONFIG_RTE_LIBRTE_MEMPOOL_DEBUG  option and contention in the mempool(small
> mempool size).
> 
> I disabled that and the header panics went away.
> 
> Thank you for your help!

No problem, though it does mean that we have a bug in the mempool debug 
functions
that will have to be investigated and fixed in the future. :-(

> 
> --Kam
> 
> On Fri, Nov 7, 2014 at 4:48 AM, Bruce Richardson  intel.com
> > wrote:
> 
> > On Thu, Nov 06, 2014 at 04:28:18PM -0500, Kamraan Nasim wrote:
> > > Greetings,
> > >
> > > I have been hitting this issue fairly consistently for the ixgbe driver
> > >
> > > MEMPOOL: obj=0x7ffeed1f5d00, mempool=0x7ffeecb69bc0,
> > cookie=badbadbadadd2e55
> > > PANIC in __mempool_check_cookies():
> > > MEMPOOL: bad header cookie (get)
> > >
> > > It seems to be a corruption in the mempool bound to my ixgbe port. What I
> > > have observed is that this ONLY happens if I initialize dpdk(i.e. start
> > > dpdk application) AFTER traffic is already flowing in through the port.
> > If
> > > I initialize dpdk and bind BEFORE I start traffic then things seem to
> > work
> > > fine.
> > >
> > > Any clues on why this might be happening?
> > >
> > > A bit stumped, so would really appreciate all the help I can get on this
> > > one.
> > >
> >
> > Hi.
> > Can you confirm if your application works ok with the mempool debug options
> > disabled? The mempool debug options are not often used because they slow
> > things
> > down so it's possible the bug could be in the cookie checking itself.
> >
> > /Bruce
> >
> > > Thanks,
> > > Kam
> > >
> > >
> > > (bt for your reference)
> > >
> > > #2  0x00408cc6 in __rte_panic (funcname=0x571100
> > > "__mempool_check_cookies", format=
> > > 0x568fb0 "MEMPOOL: bad header cookie (get)\n%.0s")
> > > at
> > >
> > /b/knasim/bandwagon/sbn/src/share/dpdk/lib/librte_eal/linuxapp/eal/eal_debug.c:83
> > > #3  0x004af027 in __mempool_check_cookies (rxq= > > out>)
> > > at
> > >
> > /b/knasim/bandwagon/sbn/src/share/dpdk/x86_64-native-linuxapp-gcc/include/rte_mempool.h:357
> > > #4  rte_mempool_get_bulk (rxq=)
> > > at
> > >
> > /b/knasim/bandwagon/sbn/src/share/dpdk/x86_64-native-linuxapp-gcc/include/rte_mempool.h:1094
> > > #5  ixgbe_rx_alloc_bufs (rxq=)
> > > at
> > >
> > /b/knasim/bandwagon/sbn/src/share/dpdk/lib/librte_pmd_ixgbe/ixgbe_rxtx.c:997
> > > #6  0x004afce9 in rx_recv_pkts (rx_queue=0x7ffeec8edbc0,
> > > rx_pkts=0x900410,
> > > nb_pkts=)
> > > at
> > >
> > /b/knasim/bandwagon/sbn/src/share/dpdk/lib/librte_pmd_ixgbe/ixgbe_rxtx.c:1074
> > > #7  ixgbe_recv_pkts_bulk_alloc (rx_queue=0x7ffeec8edbc0,
> > rx_pkts=0x900410,
> > > nb_pkts=)
> > > at
> > >
> > /b/knasim/bandwagon/sbn/src/share/dpdk/lib/librte_pmd_ixgbe/ixgbe_rxtx.c:1124
> > > #8  0x00520d36 in rte_eth_rx_burst (lp=0x900340, n_workers=14,
> > > bsz_rd=, bsz_wr=
> > > 144, pos_lb=0 '\000') at /usr/lib/dpdk/include/rte_ethdev.h:2368
> >


[dpdk-dev] [PATCH] skeleton app: Very simple code for l2fwding

2014-11-13 Thread Bruce Richardson
On Thu, Nov 13, 2014 at 01:20:46PM +0100, Thomas Monjalon wrote:
> Hi Bruce,
> 
> 2014-06-26 21:22, Bruce Richardson:
> > This is a very simple example app for doing packet forwarding with the
> > Intel DPDK. It's designed to serve as a start point for people new to
> > the Intel DPDK and who want to develop a new app.
> > 
> > Therefore it's meant to:
> > * have as good a performance out-of-the-box as possible, using the
> >   best-known settings for configuring the PMDs, so that any new apps can
> >   be based off it.
> > * be kept as short as possible to make it easy to understand it and get
> >   started with it.
> > 
> > Signed-off-by: Bruce Richardson 
> 
> What about a rebase of this patch now that Rx/Tx default conf is
> available in API?
>

Yes, I've did some work on doing so, just need to finish it up and I can 
resubmit
it soon, I hope.

/Bruce


[dpdk-dev] [PATCH v8] eal: map PCI memory resources after hugepages

2014-11-13 Thread Burakov, Anatoly
> Why has this check been removed from here. I assume it is replaced by a
> new check in secondary processes that I see added below, but perhaps you
> could explain the reason for the change?

Sure. The reason behind that change is that we can't expect that we will get a 
mapping at exact same address (for whatever reasons, i.e. something else is 
mapped there, alignment, etc.), and in primary process, it's not an error. In 
other words, removing this check makes it a "best-effort" type mechanism, 
rather than mandates PCI resources to be mapped exactly after hugepages, 
exactly one after another. "Wrong" mapping will still result in failure in 
secondary processes, and we still are risking mapping something somewhere the 
secondary process can't map, but that probability is decreased because we're 
now asking EAL to map PCI resources closer to where we most likely have some 
free virtual space (as evidenced by tests being done by the original submitter 
of the patch).

Hope that makes sense.

Thanks,
Anatoly


[dpdk-dev] [PATCH v8] eal: map PCI memory resources after hugepages

2014-11-13 Thread Bruce Richardson
On Tue, Nov 11, 2014 at 10:09:25AM +, Anatoly Burakov wrote:
> Multi-process DPDK application must mmap hugepages and PCI resources
> into the same virtual address space. By default the virtual addresses
> are chosen by the primary process automatically when calling the mmap.
> But sometimes the chosen virtual addresses aren't usable in secondary
> process - for example, secondary process is linked with more libraries
> than primary process, and the library occupies the same address space
> that the primary process has requested for PCI mappings.
> 
> This patch makes EAL try and map PCI BARs right after the hugepages
> (instead of location chosen by mmap) in virtual memory, so that PCI BARs
> have less chance of ending up in random places in virtual memory.
> 
> Signed-off-by: Liang Xu 
> Signed-off-by: Anatoly Burakov 

Acked-by: Bruce Richardson 

> ---
>  lib/librte_eal/linuxapp/eal/eal_pci.c  | 30 
> --
>  lib/librte_eal/linuxapp/eal/eal_pci_uio.c  | 13 --
>  lib/librte_eal/linuxapp/eal/eal_pci_vfio.c | 19 +++---
>  lib/librte_eal/linuxapp/eal/include/eal_pci_init.h |  6 +
>  4 files changed, 55 insertions(+), 13 deletions(-)
> 
> diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c 
> b/lib/librte_eal/linuxapp/eal/eal_pci.c
> index 5fe3961..79fbbb8 100644
> --- a/lib/librte_eal/linuxapp/eal/eal_pci.c
> +++ b/lib/librte_eal/linuxapp/eal/eal_pci.c
> @@ -97,6 +97,25 @@ error:
>   return -1;
>  }
>  
> +void *
> +pci_find_max_end_va(void)
> +{
> + const struct rte_memseg *seg = rte_eal_get_physmem_layout();
> + const struct rte_memseg *last = seg;
> + unsigned i = 0;
> +
> + for (i = 0; i < RTE_MAX_MEMSEG; i++, seg++) {
> + if (seg->addr == NULL)
> + break;
> +
> + if (seg->addr > last->addr)
> + last = seg;
> +
> + }
> + return RTE_PTR_ADD(last->addr, last->len);
> +}
> +
> +
>  /* map a particular resource from a file */
>  void *
>  pci_map_resource(void *requested_addr, int fd, off_t offset, size_t size)
> @@ -106,21 +125,16 @@ pci_map_resource(void *requested_addr, int fd, off_t 
> offset, size_t size)
>   /* Map the PCI memory resource of device */
>   mapaddr = mmap(requested_addr, size, PROT_READ | PROT_WRITE,
>   MAP_SHARED, fd, offset);
> - if (mapaddr == MAP_FAILED ||
> - (requested_addr != NULL && mapaddr != requested_addr)) {
> + if (mapaddr == MAP_FAILED) {
>   RTE_LOG(ERR, EAL, "%s(): cannot mmap(%d, %p, 0x%lx, 0x%lx): %s 
> (%p)\n",
>   __func__, fd, requested_addr,
>   (unsigned long)size, (unsigned long)offset,
>   strerror(errno), mapaddr);
> - goto fail;
> + } else {
> + RTE_LOG(DEBUG, EAL, "  PCI memory mapped at %p\n", mapaddr);
>   }
>  
> - RTE_LOG(DEBUG, EAL, "  PCI memory mapped at %p\n", mapaddr);
> -
>   return mapaddr;
> -
> -fail:
> - return NULL;
>  }
>  
>  /* parse the "resource" sysfs file */
> diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c 
> b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
> index 7e62266..e53f06b 100644
> --- a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
> +++ b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
> @@ -35,6 +35,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  #include 
>  #include 
> @@ -48,6 +49,8 @@
>  
>  static int pci_parse_sysfs_value(const char *filename, uint64_t *val);
>  
> +void *pci_map_addr = NULL;
> +
>  
>  #define OFF_MAX  ((uint64_t)(off_t)-1)
>  static int
> @@ -371,10 +374,16 @@ pci_uio_map_resource(struct rte_pci_device *dev)
>   if (maps[j].addr != NULL)
>   fail = 1;
>   else {
> - mapaddr = pci_map_resource(NULL, fd, 
> (off_t)offset,
> + /* try mapping somewhere close to the end of 
> hugepages */
> + if (pci_map_addr == NULL)
> + pci_map_addr = pci_find_max_end_va();
> +
> + mapaddr = pci_map_resource(pci_map_addr, fd, 
> (off_t)offset,
>   (size_t)maps[j].size);
> - if (mapaddr == NULL)
> + if (mapaddr == MAP_FAILED)
>   fail = 1;
> +
> + pci_map_addr = RTE_PTR_ADD(mapaddr, (size_t) 
> maps[j].size);
>   }
>  
>   if (fail) {
> diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c 
> b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
> index c776ddc..c1246e8 100644
> --- a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
> +++ b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
> @@ -37,6 +37,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  #include 
>  

[dpdk-dev] collect2: error: ld returned 1 exit status -->> DPDK 1.7.1

2014-11-13 Thread sothy shan
Hi!

I run the command below and got an error with Collect2. May I know why?

Best regards
Sothy

Command:>>>
$export RTE_TARGET="x86_64-ivshmem-linuxapp-gcc"
$make CONFIG_RTE_BUILD_COMBINE_LIBS=y CONFIG_RTE_BUILD_SHARED_LIB=y install
T="$RTE_TARGET"
Error:>>>


== Build app/test-acl
  CC main.o
  LD testacl
/home/cubiq/sothy/dpdk-1.7.1/x86_64-ivshmem-linuxapp-gcc/lib/libintel_dpdk.so:
r?f?rence ind?finie vers ? log2 ?
/home/cubiq/sothy/dpdk-1.7.1/x86_64-ivshmem-linuxapp-gcc/lib/libintel_dpdk.so:
r?f?rence ind?finie vers ? ceil ?
/home/cubiq/sothy/dpdk-1.7.1/x86_64-ivshmem-linuxapp-gcc/lib/libintel_dpdk.so:
r?f?rence ind?finie vers ? pow ?
/home/cubiq/sothy/dpdk-1.7.1/x86_64-ivshmem-linuxapp-gcc/lib/libintel_dpdk.so:
r?f?rence ind?finie vers ? round ?
collect2: error: ld returned 1 exit status
make[5]: *** [testacl] Erreur 1
make[4]: *** [test-acl] Erreur 2
make[3]: *** [app] Erreur 2
make[2]: *** [all] Erreur 2
make[1]: *** [x86_64-ivshmem-linuxapp-gcc_install] Erreur 2
make: *** [install] Erreur 2


[dpdk-dev] LLC miss in librte_distributor

2014-11-13 Thread jigsaw
Hi,

Well, I give up the idea of optimizing QPI caused LLC miss.
The queue based messaging has even worse performance than polling a same
buf from both cores.
It is the nature of busy polling model.
I guess we have to accept it as a fact, unless the programming model can be
changed to a biased locking model,
which favors one lock-owner core. But unfortunately the biased locking
model doesn't seem to be applicable for distributor.

thx &
rgds,
-ql

On Wed, Nov 12, 2014 at 7:11 PM, jigsaw  wrote:

> Hi Bruce,
>
> Thanks for your reply.
>
> I agree that to logically divide the distributor functionality is the best
> solution.
>
> Meantime I tried some tricks and the result looks good: For same amount of
> pkts (1M), the LLC stores and loads decrease 90% percent, and the miss
> rates for both decrease to 25%.
> The L1 miss rate increase a bit, thought.
> Then the combined result is that the time spent decreases 50%.
> The main change I made is to use a FIFO to transfer the pkts from
> distributor to worker, while the current buf is only used as a signalling
> channel. This change has a very obvious effect on saving LLC access.
>
> However, the test is based on the simple test program, rather on DPDK
> application. So I will try same tricks on DPDK and see if it has same
> effect.
> Besides, I need more time to read a few more papers to get it right.
>
> I will try to propose a patch if I manage to get a positive result. It
> will take several days coz I'm not fully dedicated to this issue.
>
> I will come back with more details.
>
> BTW, I have another user story: a worker can asking distributor to
> schedule a pkt.
> It arises in such condition: After processing pkt with tag value 1, the
> worker changes it's tag to 2, so the distributor has to be
> asked to deliver the pkt with new tag value to proper worker.
> I already have the patch ready but I will hold it back until previous
> patch is committed.
> I need also your comments on this user story.
>
> thx &
> rgds,
> -ql
>
> On Wed, Nov 12, 2014 at 6:07 PM, Bruce Richardson <
> bruce.richardson at intel.com> wrote:
>
>> On Wed, Nov 12, 2014 at 10:37:33AM +0200, jigsaw wrote:
>> > Hi,
>> >
>> > OK it is now very clear it is due to memory transactions between
>> different
>> > nodes.
>> >
>> > The test program is here:
>> > https://gist.github.com/jigsawecho/6a2e78d65f0fe67adf1b
>> >
>> > The test machine topology is:
>> >
>> > NUMA node0 CPU(s): 0-7,16-23
>> > NUMA node1 CPU(s): 8-15,24-31
>> >
>> > Change the 3rd param from 0 to 1 at line 135, and the LLC cache load
>> miss
>> > boost from  0.09% to 33.45%.
>> > The LLC cache store miss boost from 0.027% to 50.695%.
>> >
>> > Clearly the root cause is transaction crossing the node boundary.
>> >
>> > But then how to resolve this problem is another topic...
>> >
>> > thx &
>> > rgds,
>> > -ql
>> >
>> >
>>
>> Having traffic cross QPI is always a problem, and there could be a number
>> of ways
>> to solve it. Probably the best solution is to have multiple NICs with some
>> directly connected to each socket, with the packets from each NIC
>> processed locally
>> on the socket that NIC is connected to.
>>
>> If that is not possible, then other solutions need to be looked at. E.g.
>> For an app
>> wanting to use a distributor, I would suggest investigating if two
>> distributors
>> could be used - one on each socket. Then use a ring to burst-transfer
>> large
>> groups of packets from one socket to another and then use the distributor
>> locally.
>> This would involve far less QPI traffic than using a distributor with
>> remote workers.
>>
>> Regards,
>> /Bruce
>>
>> >
>> > On Tue, Nov 11, 2014 at 5:37 PM, jigsaw  wrote:
>> >
>> > > Hi Bruce,
>> > >
>> > > I noticed that librte_distributor has quite sever LLC miss problem
>> when
>> > > running on 16 cores.
>> > > While on 8 cores, there's no such problem.
>> > > The test runs on a Intel(R) Xeon(R) CPU E5-2670, a SandyBridge with 32
>> > > cores on 2 sockets.
>> > >
>> > > The test case is the distributor_perf_autotest, i.e.
>> > > in app/test/test_distributor_perf.c.
>> > > The test result is collected by command:
>> > >
>> > > perf stat -e LLC-load-misses,LLC-loads,LLC-store-misses,LLC-stores
>> ./test
>> > > -cff -n2 --no-huge
>> > >
>> > > Note that test results show that with or without hugepage, the LCC
>> miss
>> > > rate remains the same. So I will just show --no-huge config.
>> > >
>> > > With 8 cores, the LLC miss rate is OK:
>> > >
>> > > LLC-load-misses  26750
>> > > LLC-loads  93979233
>> > > LLC-store-misses  432263
>> > > LLC-stores  69954746
>> > >
>> > > That is 0.028% of load miss and 0.62% of store miss.
>> > >
>> > > With 16 cores, the LLC miss rate is very high:
>> > >
>> > > LLC-load-misses  70263520
>> > > LLC-loads  143807657
>> > > LLC-store-misses  23115990
>> > > LLC-stores  63692854
>> > >
>> > > That is 48.9% load miss and 36.3% store miss.
>> > >
>> > > Most of the load miss happens at first line of
>> rte_distributor_poll_pk

[dpdk-dev] [PATCH 0/2] rewritten rte_hash_crc() call

2014-11-13 Thread Thomas Monjalon
Any comment on these patches?

2014-09-03 12:05, Yerden Zhumabekov:
> As SSE4.2 provides CRC32 instructions with either 32 and 64 bit operands,
> new rte_hash_crc_8byte() call assisted with _mm_crc32_u64 intrinsic may be
> useful.
> 
> Then, rte_hash_crc() function is redesigned to take advantage of both 32
> and 64 bit operands. This improves the function's performance significantly.
> 
> Results of my test run on a single CPU core are below.
> 
> CPU: Intel(R) Xeon(R) CPU E5-2620 0 @ 2.00GHz
> Number of iterations/chunks: 52428800
> Chunk size: 24
>   rte_hash_crc:0.379 sec, hash: 0x14c64e11
>   rte_hash_crc_new:0.253 sec, hash: 0x14c64e11
> Chunk size: 25
>   rte_hash_crc:0.442 sec, hash: 0xa9afc779
>   rte_hash_crc_new:0.316 sec, hash: 0xa9afc779
> Chunk size: 26
>   rte_hash_crc:0.442 sec, hash: 0x92f2284b
>   rte_hash_crc_new:0.316 sec, hash: 0x92f2284b
> Chunk size: 27
>   rte_hash_crc:0.442 sec, hash: 0x7c4655ff
>   rte_hash_crc_new:0.316 sec, hash: 0x7c4655ff
> Chunk size: 28
>   rte_hash_crc:0.442 sec, hash: 0xf577c6b4
>   rte_hash_crc_new:0.316 sec, hash: 0xf577c6b4
> Chunk size: 29
>   rte_hash_crc:0.505 sec, hash: 0x6e18ba55
>   rte_hash_crc_new:0.337 sec, hash: 0x6e18ba55
> Chunk size: 30
>   rte_hash_crc:0.505 sec, hash: 0x35f07dbb
>   rte_hash_crc_new:0.337 sec, hash: 0x35f07dbb
> Chunk size: 31
>   rte_hash_crc:0.505 sec, hash: 0x1bf2ee8c
>   rte_hash_crc_new:0.337 sec, hash: 0x1bf2ee8c
> 
> Yerden Zhumabekov (2):
>   hash: add new rte_hash_crc_8byte call
>   hash: rte_hash_crc uses 8- and 4-byte CRC32 intrinsics
> 
>  lib/librte_hash/rte_hash_crc.h |   47 
> +---
>  1 file changed, 39 insertions(+), 8 deletions(-)



[dpdk-dev] Load-balancing position field in DPDK load_balancer sample app vs. Hash table

2014-11-13 Thread Kamraan Nasim
Hello,

So i've borrowed some code from the DPDK Load balancer sample application,
specifically the load balancing position(byte 29th) to determine which
worker lcore to forward the packet to.

The idea is that flow affinity should be maintained and all packets from
the same flow would have the same checksum/5-tuple value

worker_id = packet[load_balancing_field] % n_workers

Question is that how reliable is this load balancing position? I am tempted
to use Hash tables but I think this position based mechanism may be faster.

How have people's experience with this been in general?


--Kam


[dpdk-dev] one lightwight rte_eal_init() for SECONDARY processes which only use sharedmemory

2014-11-13 Thread Igor Ryzhov
This is really useful, thank you!

Best regards,
Igor Ryzhov

> 12 . 2014 ?., ? 6:22, Chi, Xiaobo (NSN - CN/Hangzhou)  nsn.com> ???(?):
> 
> Hi,
> Background:
> What we are doing now is port make telecom network element to be cloud based. 
>  For one of our product,  DPDK is applied not only for fastpath/dataplane 
> processing, but also for Distributed Message eXchange (DMX) between different 
> processes/applications which may located in different VM even different host. 
>  for such a DMX system, in one VM, we have one DPDK based dmxdemo (which acts 
> as the PRIMARY) which is in charge of distribute message between different 
> applications, and dozens of applications (act as SECONDARY) to use DPDK based 
> rte_tx_ring/rte_rx_ring/mempool/memzone to send receive messages to dmxdemo.
> 
> Problem:
> Here, these DPDK based SECONDARY processes need only the DPDK's hugepage 
> based sharememory mechanism and it's upper libs (such as ring, mempool, 
> etc.), they need not cpu core pinning, iopl privilege changing , pci device, 
> timer, alarm, interrupt, shared_driver_list,  core_info, threads for each 
> core, etc. Then, for such kind of SECONDARY processes, the current 
> rte_eal_init() is too heavy.
> I have seen some others also met similar troubles.
> 
> Solution:
> I write one light weight rte_eal_init(), called rte_eal_secondary_mem_init() 
> as following.  It only initializes shared memory and mandatory resources. I 
> expect your review and hope these code can be merged into DPDK main branch.
> 
> static void eal_secondary_mem_parse_args(int argc, char **argv)
> {
>static struct option lgopts[] = {
>{OPT_HUGE_DIR, 1, 0, 0},
>{OPT_FILE_PREFIX, 1, 0, 0},
>{0, 0, 0, 0}
>};
> 
>int opt;
>int option_index;
> 
>while ((opt = getopt_long(argc, argv, "", lgopts, &option_index)) != 
> EOF) {
> 
>if (!opt ) {
> if (!strcmp(lgopts[option_index].name, OPT_HUGE_DIR)) {
>internal_config.hugepage_dir = optarg;
> }
> else if (!strcmp(lgopts[option_index].name, OPT_FILE_PREFIX)) {
>internal_config.hugefile_prefix = optarg;
> }
>  }
>   }
> }
> 
> int rte_eal_secondary_mem_init( int argc, char **argv )
> {
>static rte_atomic32_t run_once = RTE_ATOMIC32_INIT(0);
>const char *logid;
> 
>if (!rte_atomic32_test_and_set(&run_once))
>return -1;
> 
>logid = strrchr(argv[0], '/');
>logid = strdup(logid ? logid + 1: argv[0]);
> 
>if (rte_eal_log_early_init() < 0)
>rte_panic("Cannot init early logs\n");
> 
>   memset( &internal_config, 0, sizeof( struct internal_config ) );
>   /*this is only for secondary PRBs */
>   internal_config.process_type = RTE_PROC_SECONDARY;
>internal_config.hugefile_prefix = HUGEFILE_PREFIX_DEFAULT;
>internal_config.hugepage_dir = NULL;
>   /* user can freely define the hugefile_prefix and hugepage_dir */
>   eal_secondary_mem_parse_args( argc, argv );
> 
>   RTE_LOG(INFO, EAL, "prefix=%s, dir=%s.\n",internal_config.hugefile_prefix, 
> internal_config.hugepage_dir );
> 
>   /* To share memory config with PRIMARY process */
>   internal_config.no_shconf = 0;
>   rte_config_init();
> 
>if (rte_eal_memory_init() < 0)
>rte_panic("Cannot init memory\n");
> 
>if (rte_eal_memzone_init() < 0)
>rte_panic("Cannot init memzone\n");
> 
>if (rte_eal_log_init(logid, LOG_DAEMON ) < 0)
>rte_panic("Cannot init logs\n");
> 
>return 0;
> }
> 
> brgs,
> chi xiaobo
> 
> 
> 



[dpdk-dev] [PATCH v7] distributor_app: new sample app

2014-11-13 Thread Thomas Monjalon
Hi Reshma,

> > From: Reshma Pattan 
> > 
> > A new sample app that shows the usage of the distributor library. This
> > app works as follows:
> > 
> > * An RX thread runs which pulls packets from each ethernet port in turn
> >   and passes those packets to worker using a distributor component.
> > * The workers take the packets in turn, and determine the output port
> >   for those packets using basic l2forwarding doing an xor on the source
> >   port id.
> > * The RX thread takes the returned packets from the workers and enqueue
> >   those packets into an rte_ring structure.
> > * A TX thread pulls the packets off the rte_ring structure and then
> >   sends each packet out the output port specified previously by the worker
> > * Command-line option support provided only for portmask.
> > 
> > v5 change:
> > * Handled gracefull shutdown of rx and tx threads upon SIGINT.
> > 
> > v6 change:
> > * modified graceful shutdown logic to handle all threads upon SIGINT
> > * removed call to rte_eal_pci_probe()
> > * added seperate print_stats function
> > 
> > v7 change:
> > * passing NULL as rxconf/txconf parameter for rx/tx queue setup
> > * removed zero-ed fields from port conf structure
> > * fixed style related comments
> > 
> > 
> > Signed-off-by: Bruce Richardson 
> > Signed-off-by: Reshma Pattan 
> 
> Acked-by: Pablo de Lara 

Before applying this patch, I have 2 comments:
- Other examples directories don't have _app suffix.
- checkpatch reports "switch and case should be at the same indent"

If you agree I will fix these 2 minor issues before applying.

Thanks
-- 
Thomas


[dpdk-dev] [PATCH v2 0/6] vmxnet3 pmd fixes/improvement

2014-11-13 Thread Thomas Monjalon
Hi Waterman,

You wanted to update your regression tests:
http://dpdk.org/ml/archives/dev/2014-November/007598.html
Should I wait a test report before integrating these patches?

Is there someone else reviewing these patches?

-- 
Thomas


2014-11-04 17:49, Yong Wang:
> This patch series include various fixes and improvement to the
> vmxnet3 pmd driver.
> 
> V2:
> - Add more commit descriptions
> - Add a new patch that improve tx performance for small packet
> 
> Yong Wang (6):
>   vmxnet3: Fix VLAN Rx stripping
>   vmxnet3: Add VLAN Tx offload
>   vmxnet3: Fix dev stop/restart bug
>   vmxnet3: Add rx pkt check offloads
>   vmxnet3: Perf improvement on the rx path
>   vmxnet3: Leverage data_ring on tx path



[dpdk-dev] [PATCH v2] librte_pmd_packet: add PMD for AF_PACKET-based virtual devices

2014-11-13 Thread Neil Horman
On Thu, Nov 13, 2014 at 12:57:25PM +0100, Thomas Monjalon wrote:
> 2014-11-13 06:14, Neil Horman:
> > On Thu, Nov 13, 2014 at 02:03:18AM -0800, Thomas Monjalon wrote:
> > > 2014-10-08 15:14, Neil Horman:
> > > > On Wed, Oct 08, 2014 at 05:57:46PM +0200, Thomas Monjalon wrote:
> > > > > 2014-09-29 11:05, Bruce Richardson:
> > > > > > On Fri, Sep 26, 2014 at 10:08:55AM -0400, Neil Horman wrote:
> > > > > > > On Fri, Sep 26, 2014 at 11:28:05AM +0200, Thomas Monjalon wrote:
> > > > > > > > 3) There is no test associated with this PMD.
> > > > > > > That would have been a great comment to make a few months back, 
> > > > > > > though whats
> > > > > > > wrong with testpmd here?  That seems to be the same test that 
> > > > > > > every other pmd
> > > > > > > uses. What exactly are you looking for?
> > > > > 
> > > > > I was thinking of testing behaviour with different kernel 
> > > > > configurations and
> > > > > unit tests for --vdev options. But it's not a major blocker.
> > > > > 
> > > > Thats fine with me.  If theres a set of unit tests that you have 
> > > > documentation
> > > > for, I'm sure we would be happy to run them.  I presume you just want 
> > > > all the
> > > > pmd vdev option exercised?  Any specific sets of kernel configurations?
> > > 
> > > I don't really know which tests are needed. It could be a mix of unit 
> > > tests
> > > and functionnal tests described in a test plan.
> > > The goal is to be able to validate the behaviour and check there is no
> > > regression. Ideally some corner cases could be described.
> > > I'm OK to integrate it as is. But future maintenance will probably need
> > > such inputs for validation tests.
> > > 
> > Do you have an example set of tests that the other pmd's have followed for 
> > this?
> 
> You can check this:
>   http://dpdk.org/browse/tools/dts/tree/test_plans/pmd_test_plan.rst
>   
> http://dpdk.org/browse/tools/dts/tree/test_plans/pmd_bonded_test_plan.rst
> 
> As I said, we can integrate AF_PACKET PMD without such test plan.
> But we are going to improve testing of many areas in DPDK.
> 
Thank you, I'll take a look in the AM

> > > > > If RedHat is committed for its maintenance, it could integrated in 
> > > > > release 1.8.
> > > > > But I'd like it to be renamed as pmd_af_packet (or a better name) 
> > > > > instead of
> > > > > pmd_packet.
> > > > > 
> > > > John L. is on his way to plumbers at the moment, so is unable to 
> > > > comment, but
> > > > I'll try to get a few cycles to change the name of the PMD around.  And 
> > > > yes, I
> > > > thought that maintenance was implicit.  He's the author, of course 
> > > > he'll take
> > > > care of it :).  And I'll be glad to help
> > > 
> > > Do you have time in coming days to rebase and rename this PMD for 
> > > inclusion
> > > in 1.8.0 release?
> 
> Do you think a sub-tree with pull request model would help you for
> maintenance of this PMD?
> 
I think thats a question for John to answer, but IMHO, I don't think the pmd
will have such patch volume that subtrees will be needed.

Neil

> -- 
> Thomas
> 


[dpdk-dev] [PATCH] x32 ABI support, first iteration

2014-11-13 Thread Neil Horman
On Thu, Nov 13, 2014 at 12:01:31PM +, Daniel Mrzyglod wrote:
> Signed-off-by: Konstantin Ananyev 
> Signed-off-by: Daniel Mrzyglod 
> ---
>  config/defconfig_x86_x32-native-linuxapp-gcc | 46 
>  mk/arch/x86_x32/rte.vars.mk  | 63 
> 
>  2 files changed, 109 insertions(+)
>  create mode 100644 config/defconfig_x86_x32-native-linuxapp-gcc
>  create mode 100644 mk/arch/x86_x32/rte.vars.mk
> 
> diff --git a/config/defconfig_x86_x32-native-linuxapp-gcc 
> b/config/defconfig_x86_x32-native-linuxapp-gcc
> new file mode 100644
> index 000..fb0afc4
> --- /dev/null
> +++ b/config/defconfig_x86_x32-native-linuxapp-gcc
> @@ -0,0 +1,46 @@
> +#   BSD LICENSE
> +#
> +#   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
> +#   All rights reserved.
> +#
> +#   Redistribution and use in source and binary forms, with or without
> +#   modification, are permitted provided that the following conditions
> +#   are met:
> +#
> +# * Redistributions of source code must retain the above copyright
> +#   notice, this list of conditions and the following disclaimer.
> +# * Redistributions in binary form must reproduce the above copyright
> +#   notice, this list of conditions and the following disclaimer in
> +#   the documentation and/or other materials provided with the
> +#   distribution.
> +# * Neither the name of Intel Corporation nor the names of its
> +#   contributors may be used to endorse or promote products derived
> +#   from this software without specific prior written permission.
> +#
> +#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> +#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> +#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> +#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> +#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> +#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> +#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> +#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> +#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> +#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> +#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> +#
> +
> +#include "common_linuxapp"
> +
> +CONFIG_RTE_MACHINE="native"
> +
> +CONFIG_RTE_ARCH="x86_x32"
> +CONFIG_RTE_ARCH_X86_X32=y
> +
> +CONFIG_RTE_TOOLCHAIN="gcc"
> +CONFIG_RTE_TOOLCHAIN_GCC=y
> +
> +#
> +# KNI is not supported on 32-bit
> +#
> +CONFIG_RTE_LIBRTE_KNI=n
> diff --git a/mk/arch/x86_x32/rte.vars.mk b/mk/arch/x86_x32/rte.vars.mk
> new file mode 100644
> index 000..9507af7
> --- /dev/null
> +++ b/mk/arch/x86_x32/rte.vars.mk
> @@ -0,0 +1,63 @@
> +#   BSD LICENSE
> +#
> +#   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
> +#   All rights reserved.
> +#
> +#   Redistribution and use in source and binary forms, with or without
> +#   modification, are permitted provided that the following conditions
> +#   are met:
> +#
> +# * Redistributions of source code must retain the above copyright
> +#   notice, this list of conditions and the following disclaimer.
> +# * Redistributions in binary form must reproduce the above copyright
> +#   notice, this list of conditions and the following disclaimer in
> +#   the documentation and/or other materials provided with the
> +#   distribution.
> +# * Neither the name of Intel Corporation nor the names of its
> +#   contributors may be used to endorse or promote products derived
> +#   from this software without specific prior written permission.
> +#
> +#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> +#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> +#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> +#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> +#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> +#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> +#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> +#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> +#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> +#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> +#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> +
> +#
> +# arch:
> +#
> +#   - define ARCH variable (overriden by cmdline or by previous
> +# optional define in machine .mk)
> +#   - define CROSS variable (overriden by cmdline or previous define
> +# in machine .mk)
> +#   - define CPU_CFLAGS variable (overriden by cmdline or previous
> +# define in machine .mk)
> +

[dpdk-dev] [PATCH 0/2] rewritten rte_hash_crc() call

2014-11-13 Thread Neil Horman
On Thu, Nov 13, 2014 at 06:33:14PM +0100, Thomas Monjalon wrote:
> Any comment on these patches?
> 
> 2014-09-03 12:05, Yerden Zhumabekov:
> > As SSE4.2 provides CRC32 instructions with either 32 and 64 bit operands,
> > new rte_hash_crc_8byte() call assisted with _mm_crc32_u64 intrinsic may be
> > useful.
> > 
> > Then, rte_hash_crc() function is redesigned to take advantage of both 32
> > and 64 bit operands. This improves the function's performance significantly.
> > 
> > Results of my test run on a single CPU core are below.
> > 
> > CPU: Intel(R) Xeon(R) CPU E5-2620 0 @ 2.00GHz
> > Number of iterations/chunks: 52428800
> > Chunk size: 24
> >   rte_hash_crc:0.379 sec, hash: 0x14c64e11
> >   rte_hash_crc_new:0.253 sec, hash: 0x14c64e11
> > Chunk size: 25
> >   rte_hash_crc:0.442 sec, hash: 0xa9afc779
> >   rte_hash_crc_new:0.316 sec, hash: 0xa9afc779
> > Chunk size: 26
> >   rte_hash_crc:0.442 sec, hash: 0x92f2284b
> >   rte_hash_crc_new:0.316 sec, hash: 0x92f2284b
> > Chunk size: 27
> >   rte_hash_crc:0.442 sec, hash: 0x7c4655ff
> >   rte_hash_crc_new:0.316 sec, hash: 0x7c4655ff
> > Chunk size: 28
> >   rte_hash_crc:0.442 sec, hash: 0xf577c6b4
> >   rte_hash_crc_new:0.316 sec, hash: 0xf577c6b4
> > Chunk size: 29
> >   rte_hash_crc:0.505 sec, hash: 0x6e18ba55
> >   rte_hash_crc_new:0.337 sec, hash: 0x6e18ba55
> > Chunk size: 30
> >   rte_hash_crc:0.505 sec, hash: 0x35f07dbb
> >   rte_hash_crc_new:0.337 sec, hash: 0x35f07dbb
> > Chunk size: 31
> >   rte_hash_crc:0.505 sec, hash: 0x1bf2ee8c
> >   rte_hash_crc_new:0.337 sec, hash: 0x1bf2ee8c
> > 
> > Yerden Zhumabekov (2):
> >   hash: add new rte_hash_crc_8byte call
> >   hash: rte_hash_crc uses 8- and 4-byte CRC32 intrinsics
> > 
> >  lib/librte_hash/rte_hash_crc.h |   47 
> > +---
> >  1 file changed, 39 insertions(+), 8 deletions(-)
> 
> 
Yeah, sorry I didn't speak up earlier.  I meant to ask if the __mm_crc_u64
intrinsic will emit software emulated versions of the sse4.2 instruction in the
event that you build with a config that doesn't enable sse4.2?  If not, then
NAK, since this will break on the default build.  In that event you'll have to
modify the new function to do a runtime cpu flags check to either just use the
instruction inlined with some asm, or emulate it in software.

Neil