date:20160421

[dpdk-dev] PKT_RX_VLAN_PKT when VLAN stripping is disabled

2016-04-21 Thread John Daley (johndale)

Hi,

When VLAN stripping is disabled, X710 and 82599ES act differently for me in 
this case when receiving VLAN tagged packets. On 82599ES the flag is set but on 
X710 the flag not set.

Do I maybe have old X710 firmware? Or is it not set for X710 on purpose in this 
case and instead the flag is used to indicate if vlan_tci is valid? Right now 
the enic pmd does what my X710 does, which I think is incorrect and I want to 
fix it.

Thanks,
John

[dpdk-dev] [PATCH] examples/l2fwd-jobstats: Fix a typo.

2016-04-21 Thread Rami Rosen

Signed-off-by: Rami Rosen 
---
 examples/l2fwd-jobstats/main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/l2fwd-jobstats/main.c b/examples/l2fwd-jobstats/main.c
index 9f3a77d..dc17f28 100644
--- a/examples/l2fwd-jobstats/main.c
+++ b/examples/l2fwd-jobstats/main.c
@@ -990,7 +990,7 @@ main(int argc, char **argv)
struct rte_jobstats *job = &qconf->port_fwd_jobs[i];

portid = qconf->rx_port_list[i];
-   printf("Setting forward jon for port %u\n", portid);
+   printf("Setting forward job for port %u\n", portid);

snprintf(name, RTE_DIM(name), "port %u fwd", portid);
/* Setup forward job.
-- 
2.5.5

[dpdk-dev] [PATCH 1/1] add writing registers

2016-04-21 Thread Rami Rosen

Change-Id: Iaeeefcbc29d109fef17797d6a888cf2448499646
Signed-off-by: Rami Rosen 
---
 fvlproto/build.sh   |4 +
 fvlproto/config.c   |7 +
 fvlproto/myMakefile |   29 +
 fvlproto/mybuild.sh |5 +
 fvlproto/regs.txt   |   78 ++
 fvlproto/regs.txt.org   | 1998 +++
 fvlproto/run.sh |   20 +
 fvlproto/test/Makefile  |   29 +
 fvlproto/test/readreg.c |  167 
 fvlproto/test/run.sh|3 +
 10 files changed, 2340 insertions(+)
 create mode 100755 fvlproto/build.sh
 create mode 100644 fvlproto/myMakefile
 create mode 100755 fvlproto/mybuild.sh
 create mode 100644 fvlproto/regs.txt
 create mode 100644 fvlproto/regs.txt.org
 create mode 100644 fvlproto/run.sh
 create mode 100644 fvlproto/test/Makefile
 create mode 100644 fvlproto/test/readreg.c
 create mode 100644 fvlproto/test/run.sh

diff --git a/fvlproto/build.sh b/fvlproto/build.sh
new file mode 100755
index 000..bfffb6b
--- /dev/null
+++ b/fvlproto/build.sh
@@ -0,0 +1,4 @@
+#!/bin/sh
+
+export RTE_SDK=/work/src/dpdk-16.04
+make
diff --git a/fvlproto/config.c b/fvlproto/config.c
index f7db457..1659154 100644
--- a/fvlproto/config.c
+++ b/fvlproto/config.c
@@ -148,6 +148,9 @@ static void parse_print_regs_input(const char *arg)
char input_line[256];
char *address, *val, *curr;
const char delimiters[] = " ";
+   uint32_t reg;
+   uint32_t value;
+   char *eptr;

if (arg == NULL)
return;
@@ -166,6 +169,10 @@ static void parse_print_regs_input(const char *arg)
address = strtok(NULL, delimiters);
val = strtok(NULL, delimiters);
printf("addess=%s val=%s\n", address, val);
+   reg = strtol(address, &eptr, 16);
+   value = strtol(val, &eptr, 16);
+   printf("Writing reg=%x value=%x\n", reg, value);
+   i40e_dev_reg_write(app.nic_rx_port, reg, value, 0);
}

fclose(file);
diff --git a/fvlproto/myMakefile b/fvlproto/myMakefile
new file mode 100644
index 000..00b878b
--- /dev/null
+++ b/fvlproto/myMakefile
@@ -0,0 +1,29 @@
+#   BSD LICENSE
+
+ifeq ($(RTE_SDK),)
+$(error "Please define RTE_SDK environment variable")
+endif
+
+# Default target, can be overriden by command line or environment
+RTE_TARGET ?= x86_64-native-linuxapp-gcc
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# binary name
+APP = readreg
+
+# all source are stored in SRCS-y
+SRCS-y := readreg.c
+
+CFLAGS += -O3 -g
+CFLAGS += $(WERROR_FLAGS)
+#CFLAGS_config.o := -D_GNU_SOURCE
+
+# workaround for a gcc bug with noreturn attribute
+# http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603
+ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y)
+#CFLAGS_main.o += -Wno-return-type
+CFLAGS_readreg.o += -Wno-return-type
+endif
+
+include $(RTE_SDK)/mk/rte.extapp.mk
diff --git a/fvlproto/mybuild.sh b/fvlproto/mybuild.sh
new file mode 100755
index 000..55b89f1
--- /dev/null
+++ b/fvlproto/mybuild.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+
+export RTE_SDK=/work/src/dpdk-16.04
+make -f myMakefile
+
diff --git a/fvlproto/regs.txt b/fvlproto/regs.txt
new file mode 100644
index 000..68f9ef3
--- /dev/null
+++ b/fvlproto/regs.txt
@@ -0,0 +1,78 @@
+ write  0x1c0a90   0x5  
+ write  0x1c0a9c   0x3180c  
+ write  0x1c0aa0   0x2096  
+ write  0x1c0aa4   0x5  
+ write  0x1c0aa8   0x3180c  
+ write  0x1c0aac   0x2096  
+ write  0x1c0a90   0x305  
+ write  0x1c0a9c   0x0  
+ write  0x1c0aa0   0xb  
+ write  0x1c0aa4   0x305  
+ write  0x1c0aa8   0x0  
+ write  0x1c0aac   0xb  
+ write  0x1c0a90   0x110  
+ write  0x1c0a9c   0x0  
+ write  0x1c0aa0   0x28000cb  
+ write  0x1c0aa4   0x110  
+ write  0x1c0aa8   0x0  
+ write  0x1c0aac   0x28000cb  
+ write  0x1c0a90   0x111  
+ write  0x1c0a9c   0x0  
+ write  0x1c0aa0   0x28b  
+ write  0x1c0aa4   0x111  
+ write  0x1c0aa8   0x0  
+ write  0x1c0aac   0x28b  
+ write  0x1c0a90   0x113  
+ write  0x1c0a9c   0x0  
+ write  0x1c0aa0   0x2c4a7d5  
+ write  0x1c0aa4   0x113  
+ write  0x1c0aa8   0x0  
+ write  0x1c0aac   0x2c4a7d5  
+ write  0x1c0a90   0x114  
+ write  0x1c0a9c   0x0  
+ write  0x1c0aa0   0x2800219  
+ write  0x1c0aa4   0x114  
+ write  0x1c0aa8   0x0  
+ write  0x1c0aac   0x2800219  
+ write  0x1c0a90   0x115  
+ write  0x1c0a9c   0x0  
+ write  0x1c0aa0   0x280031b  
+ write  0x1c0aa4   0x115  
+ write  0x1c0aa8   0x0  
+ write  0x1c0aac   0x280031b  
+ write  0x1c0a90   0x138  
+ write  0x1c0a9c   0x0  
+ write  0x1c0aa0   0x2b2ac41  
+ write  0x1c0aa4   0x138  
+ write  0x1c0aa8   0x0  
+ write  0x1c0aac   0x2b2ac41  
+ write  0x1c0a90   0x139  
+ write  0x1c0a9c   0x0  
+ write  0x1c0aa0   0x2840019  
+ write  0x1c0aa4   0x139  
+ write  0x1c0aa8   0x0  
+ write  0x1c0aac   0x2840019  
+ write  0x1c0a90   0x13a  
+ write  0x1c0a9c   0x0  
+ write  0x1c0aa0   0x2c36e9b  
+ write  0x1c0aa4   0x13a  
+ write  0x1c0aa8   0x0  
+ write  0x1c0aac   0x2c36e9b  
+ write  0x1c0a90   0x13b  
+ write  0x1c0a9c   0x0  
+ write  0x1c

[dpdk-dev] [PATCH v2] kni: don't reassign ethernet address every time an interface goes up

2016-04-21 Thread Igor Ryzhov

Currently every time a KNI interface goes up, its ethernet address is 
reassigned.
After this patch ethernet address is assigned only once, at initialization time.

Suggested-by: Sergey Balabanov 
Signed-off-by: Igor Ryzhov 
---
v2:
- change variable name from dev to net_dev
- slightly rewrite commit message header and body

 lib/librte_eal/linuxapp/kni/kni_misc.c | 10 ++
 lib/librte_eal/linuxapp/kni/kni_net.c  |  9 -
 2 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/lib/librte_eal/linuxapp/kni/kni_misc.c 
b/lib/librte_eal/linuxapp/kni/kni_misc.c
index ae8133f..871437f 100644
--- a/lib/librte_eal/linuxapp/kni/kni_misc.c
+++ b/lib/librte_eal/linuxapp/kni/kni_misc.c
@@ -26,6 +26,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -542,6 +543,15 @@ kni_ioctl_create(struct net *net,
if (pci)
pci_dev_put(pci);

+   if (kni->lad_dev)
+   memcpy(net_dev->dev_addr, kni->lad_dev->dev_addr, ETH_ALEN);
+   else
+   /*
+* Generate random mac address. eth_random_addr() is the newer
+* version of generating mac address in linux kernel.
+*/
+   random_ether_addr(net_dev->dev_addr);
+
ret = register_netdev(net_dev);
if (ret) {
KNI_ERR("error %i registering device \"%s\"\n",
diff --git a/lib/librte_eal/linuxapp/kni/kni_net.c 
b/lib/librte_eal/linuxapp/kni/kni_net.c
index cfa8339..3d2d246 100644
--- a/lib/librte_eal/linuxapp/kni/kni_net.c
+++ b/lib/librte_eal/linuxapp/kni/kni_net.c
@@ -69,15 +69,6 @@ kni_net_open(struct net_device *dev)
struct rte_kni_request req;
struct kni_dev *kni = netdev_priv(dev);

-   if (kni->lad_dev)
-   memcpy(dev->dev_addr, kni->lad_dev->dev_addr, ETH_ALEN);
-   else
-   /*
-* Generate random mac address. eth_random_addr() is the newer
-* version of generating mac address in linux kernel.
-*/
-   random_ether_addr(dev->dev_addr);
-
netif_start_queue(dev);

memset(&req, 0, sizeof(req));
-- 
2.6.4

[dpdk-dev] [PATCH] kni: don't rewrite ethernet address every time an interface goes up

2016-04-21 Thread Igor Ryzhov

Self nack. Forgot to change variable name from dev to net_dev. Will send v2.

> 21 ???. 2016 ?., ? 18:12, Igor Ryzhov  ???(?):
> 
> Now every time a KNI interface goes up, its ethernet address is overwritten.
> After this patch ethernet address is assigned only once, at initialization
> time.
> 
> Suggested-by: Sergey Balabanov 
> Signed-off-by: Igor Ryzhov 
> ---
> lib/librte_eal/linuxapp/kni/kni_misc.c | 10 ++
> lib/librte_eal/linuxapp/kni/kni_net.c  |  9 -
> 2 files changed, 10 insertions(+), 9 deletions(-)
> 
> diff --git a/lib/librte_eal/linuxapp/kni/kni_misc.c 
> b/lib/librte_eal/linuxapp/kni/kni_misc.c
> index ae8133f..45bcf32 100644
> --- a/lib/librte_eal/linuxapp/kni/kni_misc.c
> +++ b/lib/librte_eal/linuxapp/kni/kni_misc.c
> @@ -26,6 +26,7 @@
> #include 
> #include 
> #include 
> +#include 
> #include 
> #include 
> #include 
> @@ -542,6 +543,15 @@ kni_ioctl_create(struct net *net,
>   if (pci)
>   pci_dev_put(pci);
> 
> + if (kni->lad_dev)
> + memcpy(dev->dev_addr, kni->lad_dev->dev_addr, ETH_ALEN);
> + else
> + /*
> +  * Generate random mac address. eth_random_addr() is the newer
> +  * version of generating mac address in linux kernel.
> +  */
> + random_ether_addr(dev->dev_addr);
> +
>   ret = register_netdev(net_dev);
>   if (ret) {
>   KNI_ERR("error %i registering device \"%s\"\n",
> diff --git a/lib/librte_eal/linuxapp/kni/kni_net.c 
> b/lib/librte_eal/linuxapp/kni/kni_net.c
> index cfa8339..3d2d246 100644
> --- a/lib/librte_eal/linuxapp/kni/kni_net.c
> +++ b/lib/librte_eal/linuxapp/kni/kni_net.c
> @@ -69,15 +69,6 @@ kni_net_open(struct net_device *dev)
>   struct rte_kni_request req;
>   struct kni_dev *kni = netdev_priv(dev);
> 
> - if (kni->lad_dev)
> - memcpy(dev->dev_addr, kni->lad_dev->dev_addr, ETH_ALEN);
> - else
> - /*
> -  * Generate random mac address. eth_random_addr() is the newer
> -  * version of generating mac address in linux kernel.
> -  */
> - random_ether_addr(dev->dev_addr);
> -
>   netif_start_queue(dev);
> 
>   memset(&req, 0, sizeof(req));
> -- 
> 2.6.4
>

[dpdk-dev] [PATCH] kni: set kni mac on ioctl_create

2016-04-21 Thread Igor Ryzhov

Hello.

I rebased a patch and added Suggested-by string.
Check it, please: http://dpdk.org/dev/patchwork/patch/12188/ 
.

Best regards,
Igor

> 18 ? 2016 ?., ? 5:14, Zhang, Helin  ???(?):
> 
> Hi Sergey
> 
>> -Original Message-
>> From: dev [mailto:dev-bounces at dpdk.org ] 
>> On Behalf Of Sergey Balabanov
>> Sent: Friday, August 28, 2015 9:06 PM
>> To: dev at dpdk.org 
>> Subject: [dpdk-dev] [PATCH] kni: set kni mac on ioctl_create
>> 
>> There is a situation when ioctl returns zero mac address (00:00:00:00:00:00)
>> for just created kni. The situation happens because kni mac is set on 
>> 'ipconfig
>> up' event (kni_net_open callback) not on kni creation (kni_ioctl_create).
> Could you help to clarify a bit of the real issue? What's wrong there?
> 
>> 
>> Signed-off-by: Sergey Balabanov 
>> ---
>> lib/librte_eal/linuxapp/kni/kni_misc.c | 10 ++
>> lib/librte_eal/linuxapp/kni/kni_net.c  |  9 -
>> 2 files changed, 10 insertions(+), 9 deletions(-)
>> 
>> diff --git a/lib/librte_eal/linuxapp/kni/kni_misc.c
>> b/lib/librte_eal/linuxapp/kni/kni_misc.c
>> index 2e9fa89..61f83a0 100644
>> --- a/lib/librte_eal/linuxapp/kni/kni_misc.c
>> +++ b/lib/librte_eal/linuxapp/kni/kni_misc.c
>> @@ -28,6 +28,7 @@
>> #include 
>> #include 
>> #include 
>> +#include  /* eth_type_trans */
>> 
>> #include 
>> #include "kni_dev.h"
>> @@ -465,6 +466,15 @@ kni_ioctl_create(unsigned int ioctl_num, unsigned
>> long ioctl_param)
>>  if (pci)
>>  pci_dev_put(pci);
>> 
>> +if (kni->lad_dev)
>> +memcpy(net_dev->dev_addr, kni->lad_dev->dev_addr,
>> ETH_ALEN);
>> +else
>> +/*
>> + * Generate random mac address. eth_random_addr() is the
>> newer
>> + * version of generating mac address in linux kernel.
>> + */
>> +random_ether_addr(net_dev->dev_addr);
>> +
> A rebase is needed, as a lot of changes after that. Thanks!
> 
> Helin
>>  ret = register_netdev(net_dev);
>>  if (ret) {
>>  KNI_ERR("error %i registering device \"%s\"\n", diff --git
>> a/lib/librte_eal/linuxapp/kni/kni_net.c 
>> b/lib/librte_eal/linuxapp/kni/kni_net.c
>> index ab5add4..b50b4cf 100644
>> --- a/lib/librte_eal/linuxapp/kni/kni_net.c
>> +++ b/lib/librte_eal/linuxapp/kni/kni_net.c
>> @@ -70,15 +70,6 @@ kni_net_open(struct net_device *dev)
>>  struct rte_kni_request req;
>>  struct kni_dev *kni = netdev_priv(dev);
>> 
>> -if (kni->lad_dev)
>> -memcpy(dev->dev_addr, kni->lad_dev->dev_addr,
>> ETH_ALEN);
>> -else
>> -/*
>> - * Generate random mac address. eth_random_addr() is the
>> newer
>> - * version of generating mac address in linux kernel.
>> - */
>> -random_ether_addr(dev->dev_addr);
>> -
>>  netif_start_queue(dev);
>> 
>>  memset(&req, 0, sizeof(req));
>> --
>> 2.1.4

[dpdk-dev] [PATCH] kni: don't rewrite ethernet address every time an interface goes up

2016-04-21 Thread Igor Ryzhov

Now every time a KNI interface goes up, its ethernet address is overwritten.
After this patch ethernet address is assigned only once, at initialization
time.

Suggested-by: Sergey Balabanov 
Signed-off-by: Igor Ryzhov 
---
 lib/librte_eal/linuxapp/kni/kni_misc.c | 10 ++
 lib/librte_eal/linuxapp/kni/kni_net.c  |  9 -
 2 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/lib/librte_eal/linuxapp/kni/kni_misc.c 
b/lib/librte_eal/linuxapp/kni/kni_misc.c
index ae8133f..45bcf32 100644
--- a/lib/librte_eal/linuxapp/kni/kni_misc.c
+++ b/lib/librte_eal/linuxapp/kni/kni_misc.c
@@ -26,6 +26,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -542,6 +543,15 @@ kni_ioctl_create(struct net *net,
if (pci)
pci_dev_put(pci);

+   if (kni->lad_dev)
+   memcpy(dev->dev_addr, kni->lad_dev->dev_addr, ETH_ALEN);
+   else
+   /*
+* Generate random mac address. eth_random_addr() is the newer
+* version of generating mac address in linux kernel.
+*/
+   random_ether_addr(dev->dev_addr);
+
ret = register_netdev(net_dev);
if (ret) {
KNI_ERR("error %i registering device \"%s\"\n",
diff --git a/lib/librte_eal/linuxapp/kni/kni_net.c 
b/lib/librte_eal/linuxapp/kni/kni_net.c
index cfa8339..3d2d246 100644
--- a/lib/librte_eal/linuxapp/kni/kni_net.c
+++ b/lib/librte_eal/linuxapp/kni/kni_net.c
@@ -69,15 +69,6 @@ kni_net_open(struct net_device *dev)
struct rte_kni_request req;
struct kni_dev *kni = netdev_priv(dev);

-   if (kni->lad_dev)
-   memcpy(dev->dev_addr, kni->lad_dev->dev_addr, ETH_ALEN);
-   else
-   /*
-* Generate random mac address. eth_random_addr() is the newer
-* version of generating mac address in linux kernel.
-*/
-   random_ether_addr(dev->dev_addr);
-
netif_start_queue(dev);

memset(&req, 0, sizeof(req));
-- 
2.6.4

[dpdk-dev] Pktgen rate issue

2016-04-21 Thread Iñaki Murillo

Hello Keith,

I sent the patch as it is described in http://dpdk.org/dev but I got 
confused and I did not put that it was a patch for Pktgen. I hope it 
does not make anyone get confused.

Thank you,

 I?aki Murillo

El 21/04/16 a las 16:28, Wiles, Keith escribi?:
>> Hello Keith,
>>
>> I have done a patch to the pktgen, but I not sure how to submit it. I
>> would be very grateful if you give me some advice on how to contribute.
> Please have a look at the DPDK.org page on development : http://dpdk.org/dev
>
> You could send me the patch on the current Pktgen and I can review the 
> changes.
>
>> Thank you,
>>
>>  I?aki Murillo
>>
>> El 12/04/16 a las 14:55, Wiles, Keith escribi?:
 Hello

 I have been using pktgen for a while and I released that there is no
 possibility to set a rate between  two whole numbers.

I looked up the source code and I found out that the rate is stored in
 a uint8_t. So, I made a quick-and-dirty change just to check if it is
 possible to get a speed between two hole numbers. It seemed to work
 properly.

Is there a reason to use an uint8_t instead of a float, for example?
>>> The uint8_t was easier then using floats :-)
>>>
>>> If you have a patch for this change please send it along and I will look at 
>>> adding the support.
>>>
 Thank you,

 I?aki Murillo

>>> Regards,
>>> Keith
>>>
>>>
>>>
>>>
>>
>
> Regards,
> Keith
>
>
>
>

[dpdk-dev] [PATCH] Rate as a decimal number

2016-04-21 Thread inaki.murillo

Currently Pktgen does not accept a decimal number for the rate. This patch 
makes possible to set a decimal number as a rate.

Signed-off-by: I?aki Murillo Arroyo 
---
 app/cmd-functions.c|   71 +-
 app/cmd-functions.h|   38 +
 app/lpktgenlib.c   |2 +-
 app/pktgen-cmds.c  |   10 +-
 app/pktgen-cmds.c.orig | 2622 
 app/pktgen-cmds.h  |2 +-
 app/pktgen-port-cfg.h  |2 +-
 app/pktgen.c   |2 +-
 8 files changed, 2737 insertions(+), 12 deletions(-)
 create mode 100644 app/pktgen-cmds.c.orig

diff --git a/app/cmd-functions.c b/app/cmd-functions.c
index b2fda7c..a009907 100644
--- a/app/cmd-functions.c
+++ b/app/cmd-functions.c
@@ -1805,7 +1805,7 @@ struct cmd_set_result {
cmdline_fixed_string_t set;
cmdline_portlist_t portlist;
cmdline_fixed_string_t what;
-   uint32_t value;
+   float value;
 };

 /**//**
@@ -1865,8 +1865,8 @@ cmdline_parse_token_string_t cmd_set_what =
 TOKEN_STRING_INITIALIZER(struct cmd_set_result,
  what,
  
"count#size#rate#burst#tx_cycles#sport#dport#seqCnt#prime#dump#vlanid");
-cmdline_parse_token_num_t cmd_set_value =
-TOKEN_NUM_INITIALIZER(struct cmd_set_result, value, UINT32);
+cmdline_parse_token_float_t cmd_set_value =
+TOKEN_FLOAT_INITIALIZER(struct cmd_set_result, value, FLOAT);

 cmdline_parse_inst_t cmd_set = {
.f = cmd_set_parsed,
@@ -4691,3 +4691,68 @@ pktgen_load_cmds(char *filename)
}
return 0;
 }
+
+struct cmdline_token_ops cmdline_token_float_ops = {
+   .parse = cmdline_parse_float,
+   .complete_get_nb = NULL,
+   .complete_get_elt = NULL,
+   .get_help = cmdline_get_help_float,
+};
+
+/* parse a float */
+int
+cmdline_parse_float(cmdline_parse_token_hdr_t *tk, const char *srcbuf, void 
*res, unsigned ressize)
+{
+   unsigned int token_len;
+   float tmp;
+
+   if (res && ressize < STR_TOKEN_SIZE)
+   return -1;
+
+   if (!tk || !srcbuf || ! *srcbuf)
+   return -1;
+
+   token_len = 0;
+   while(!cmdline_isendoftoken(srcbuf[token_len]) && token_len < 
(STR_TOKEN_SIZE-1))
+   {
+   token_len++;
+   }
+
+   /* return if token too long */
+   if (token_len >= STR_TOKEN_SIZE - 1) {
+   return -1;
+   }
+
+   tmp =  atof(srcbuf);
+   if ((tmp == 0 && strcmp(srcbuf, "0") == 0) || (tmp != 0)) {
+   /* we are sure that token_len is < STR_TOKEN_SIZE-1 */
+   *(float *)res = tmp;
+   return token_len;
+   }
+
+   return -1;
+
+}
+
+/* parse a float */
+int
+cmdline_get_help_float(cmdline_parse_token_hdr_t *tk, char *dstbuf, unsigned 
int size)
+{
+   struct cmdline_token_float_data nd;
+   int ret;
+
+   if (!tk)
+   return -1;
+
+   memcpy(&nd, &((struct cmdline_token_float *)tk)->float_data, 
sizeof(nd));
+
+   /* should not happen don't so this test */
+   /* if (nd.type >= (sizeof(num_help)/sizeof(const char *))) */
+   /* return -1; */
+
+   ret = snprintf(dstbuf, size, "FLOAT");
+   if (ret < 0)
+   return -1;
+   dstbuf[size-1] = '\0';
+   return 0;
+}
diff --git a/app/cmd-functions.h b/app/cmd-functions.h
index 0ccef6d..4b8dc4c 100644
--- a/app/cmd-functions.h
+++ b/app/cmd-functions.h
@@ -77,4 +77,42 @@ extern void pktgen_cmdline_start(void);

 extern int pktgen_load_cmds(char *filename);

+/* size of a parsed string */
+#define STR_TOKEN_SIZE 128
+
+enum cmdline_floattype {
+   FLOAT
+};
+
+struct cmdline_token_float_data {
+   const float *float_data;
+};
+
+struct cmdline_token_float {
+   struct cmdline_token_hdr hdr;
+   struct cmdline_token_float_data float_data;
+};
+typedef struct cmdline_token_float cmdline_parse_token_float_t;
+
+extern struct cmdline_token_ops cmdline_token_float_ops;
+
+int cmdline_parse_float(cmdline_parse_token_hdr_t *tk,
+   const char *srcbuf, void *res, unsigned ressize);
+int cmdline_get_help_float(cmdline_parse_token_hdr_t *tk,
+   char *dstbuf, unsigned int size);
+
+#define TOKEN_FLOAT_INITIALIZER(structure, field, float_type)\
+{   \
+   /* hdr */   \
+   {   \
+   &cmdline_token_float_ops, /* ops */   \
+   offsetof(structure, field), /* offset */\
+   },  \
+   /* num_data */  \
+   {   \
+   float_type,/* type */  \
+   },

[dpdk-dev] [PATCH 1/1] add writing registers

2016-04-21 Thread Rosen, Rami

Please ignore, this is a wrong patch 

-Original Message-
From: Rosen, Rami 
Sent: Thursday, April 21, 2016 8:24 PM
To: dev at dpdk.org
Cc: Rosen, Rami 
Subject: [PATCH 1/1] add writing registers

Change-Id: Iaeeefcbc29d109fef17797d6a888cf2448499646
Signed-off-by: Rami Rosen 
---
 fvlproto/build.sh   |4 +
 fvlproto/config.c   |7 +
 fvlproto/myMakefile |   29 +
 fvlproto/mybuild.sh |5 +
 fvlproto/regs.txt   |   78 ++
 fvlproto/regs.txt.org   | 1998 +++
 fvlproto/run.sh |   20 +
 fvlproto/test/Makefile  |   29 +
 fvlproto/test/readreg.c |  167 
 fvlproto/test/run.sh|3 +
 10 files changed, 2340 insertions(+)
 create mode 100755 fvlproto/build.sh
 create mode 100644 fvlproto/myMakefile
 create mode 100755 fvlproto/mybuild.sh
 create mode 100644 fvlproto/regs.txt
 create mode 100644 fvlproto/regs.txt.org
 create mode 100644 fvlproto/run.sh
 create mode 100644 fvlproto/test/Makefile
 create mode 100644 fvlproto/test/readreg.c
 create mode 100644 fvlproto/test/run.sh

diff --git a/fvlproto/build.sh b/fvlproto/build.sh
new file mode 100755
index 000..bfffb6b
--- /dev/null
+++ b/fvlproto/build.sh
@@ -0,0 +1,4 @@
+#!/bin/sh
+
+export RTE_SDK=/work/src/dpdk-16.04
+make
diff --git a/fvlproto/config.c b/fvlproto/config.c
index f7db457..1659154 100644
--- a/fvlproto/config.c
+++ b/fvlproto/config.c
@@ -148,6 +148,9 @@ static void parse_print_regs_input(const char *arg)
char input_line[256];
char *address, *val, *curr;
const char delimiters[] = " ";
+   uint32_t reg;
+   uint32_t value;
+   char *eptr;

if (arg == NULL)
return;
@@ -166,6 +169,10 @@ static void parse_print_regs_input(const char *arg)
address = strtok(NULL, delimiters);
val = strtok(NULL, delimiters);
printf("addess=%s val=%s\n", address, val);
+   reg = strtol(address, &eptr, 16);
+   value = strtol(val, &eptr, 16);
+   printf("Writing reg=%x value=%x\n", reg, value);
+   i40e_dev_reg_write(app.nic_rx_port, reg, value, 0);
}

fclose(file);
diff --git a/fvlproto/myMakefile b/fvlproto/myMakefile
new file mode 100644
index 000..00b878b
--- /dev/null
+++ b/fvlproto/myMakefile
@@ -0,0 +1,29 @@
+#   BSD LICENSE
+
+ifeq ($(RTE_SDK),)
+$(error "Please define RTE_SDK environment variable")
+endif
+
+# Default target, can be overriden by command line or environment
+RTE_TARGET ?= x86_64-native-linuxapp-gcc
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# binary name
+APP = readreg
+
+# all source are stored in SRCS-y
+SRCS-y := readreg.c
+
+CFLAGS += -O3 -g
+CFLAGS += $(WERROR_FLAGS)
+#CFLAGS_config.o := -D_GNU_SOURCE
+
+# workaround for a gcc bug with noreturn attribute
+# http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603
+ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y)
+#CFLAGS_main.o += -Wno-return-type
+CFLAGS_readreg.o += -Wno-return-type
+endif
+
+include $(RTE_SDK)/mk/rte.extapp.mk
diff --git a/fvlproto/mybuild.sh b/fvlproto/mybuild.sh
new file mode 100755
index 000..55b89f1
--- /dev/null
+++ b/fvlproto/mybuild.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+
+export RTE_SDK=/work/src/dpdk-16.04
+make -f myMakefile
+
diff --git a/fvlproto/regs.txt b/fvlproto/regs.txt
new file mode 100644
index 000..68f9ef3
--- /dev/null
+++ b/fvlproto/regs.txt
@@ -0,0 +1,78 @@
+ write  0x1c0a90   0x5  
+ write  0x1c0a9c   0x3180c  
+ write  0x1c0aa0   0x2096  
+ write  0x1c0aa4   0x5  
+ write  0x1c0aa8   0x3180c  
+ write  0x1c0aac   0x2096  
+ write  0x1c0a90   0x305  
+ write  0x1c0a9c   0x0  
+ write  0x1c0aa0   0xb  
+ write  0x1c0aa4   0x305  
+ write  0x1c0aa8   0x0  
+ write  0x1c0aac   0xb  
+ write  0x1c0a90   0x110  
+ write  0x1c0a9c   0x0  
+ write  0x1c0aa0   0x28000cb  
+ write  0x1c0aa4   0x110  
+ write  0x1c0aa8   0x0  
+ write  0x1c0aac   0x28000cb  
+ write  0x1c0a90   0x111  
+ write  0x1c0a9c   0x0  
+ write  0x1c0aa0   0x28b  
+ write  0x1c0aa4   0x111  
+ write  0x1c0aa8   0x0  
+ write  0x1c0aac   0x28b  
+ write  0x1c0a90   0x113  
+ write  0x1c0a9c   0x0  
+ write  0x1c0aa0   0x2c4a7d5  
+ write  0x1c0aa4   0x113  
+ write  0x1c0aa8   0x0  
+ write  0x1c0aac   0x2c4a7d5  
+ write  0x1c0a90   0x114  
+ write  0x1c0a9c   0x0  
+ write  0x1c0aa0   0x2800219  
+ write  0x1c0aa4   0x114  
+ write  0x1c0aa8   0x0  
+ write  0x1c0aac   0x2800219  
+ write  0x1c0a90   0x115  
+ write  0x1c0a9c   0x0  
+ write  0x1c0aa0   0x280031b  
+ write  0x1c0aa4   0x115  
+ write  0x1c0aa8   0x0  
+ write  0x1c0aac   0x280031b  
+ write  0x1c0a90   0x138  
+ write  0x1c0a9c   0x0  
+ write  0x1c0aa0   0x2b2ac41  
+ write  0x1c0aa4   0x138  
+ write  0x1c0aa8   0x0  
+ write  0x1c0aac   0x2b2ac41  
+ write  0x1c0a90   0x139  
+ write  0x1c0a9c   0x0  
+ write  0x1c0aa0   0x2840019  
+ write  0x1c0aa4   0x139  
+ write  0x1c0aa8   0x0  
+ write  0x1c0aac   0x2840019  
+ write  0x1c0a90   0x13a  
+ writ

[dpdk-dev] [dpdk-users] DPDK 16.04 link changes cause PMD drivers to not be loaded

2016-04-21 Thread Thomas Monjalon

2016-04-21 08:01, Aurojit Panda:
> Panu Matilainen wrote:
[...]
> > Again, PMDs are *plugins* that are *meant* to be loaded at runtime.
> > That allows for all sorts of flexibility especially
> > for packaging and shipping, at some extra cost in setup complexity.
> 
> I am all for a plugin architecture, I was merely suggesting that you
> embed some path infromation at the beginning. Also please note:
> (a) This behavior changed recently.

What changed recently?

> (b) This change is entirely undocumented, which is why I was reporting
> it in the first place.
> (c) It is actually quite unintutive, because previously ensuring
> LD_LIBRARY_PATH was correct was all that was required 
> to get any DPDK application to interact with ports.

?
Are you talking about combined library?

> > For your own purposes, you can of course tweak the linking settings
> > as much as you like. Look for "plugins" in mk/rte.app.mk and change
> > the shared lib condition on the line above to "y" and there you have it.
> > But that's not the way plugins are meant to be used.
> 
> That is not a reasonable solution given that it makes it very hard to
> track future changes to DPDK without merges.
> My alternatives neither break people's abilities to use plugins,
> nor do they impact behavior.

Please do not hesitate to send some patch to show your solution.
Thanks

[dpdk-dev] [PATCH v2] i40evf: Report error if HW CRC strip is disabled for Linux PF hosts

2016-04-21 Thread Björn Töpel

On Linux PF hosts, the VF has no means of changing the HW CRC strip
setting for a RX queue. It's implicitly enabled.

This patch checks if the host is running a Linux PF kernel driver, and
returns an error, if HW CRC stripping was disabled.

Signed-off-by: Bj?rn T?pel 
---
 drivers/net/i40e/i40e_ethdev_vf.c | 15 +++
 1 file changed, 15 insertions(+)

diff --git a/drivers/net/i40e/i40e_ethdev_vf.c 
b/drivers/net/i40e/i40e_ethdev_vf.c
index 2bce69b..0057ed6 100644
--- a/drivers/net/i40e/i40e_ethdev_vf.c
+++ b/drivers/net/i40e/i40e_ethdev_vf.c
@@ -1567,6 +1567,8 @@ i40evf_dev_configure(struct rte_eth_dev *dev)
 {
struct i40e_adapter *ad =
I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
+   struct rte_eth_conf *conf = &dev->data->dev_conf;
+   struct i40e_vf *vf;

/* Initialize to TRUE. If any of Rx queues doesn't meet the bulk
 * allocation or vector Rx preconditions we will reset it.
@@ -1576,6 +1578,19 @@ i40evf_dev_configure(struct rte_eth_dev *dev)
ad->tx_simple_allowed = true;
ad->tx_vec_allowed = true;

+   /* For Linux PF hosts, VF has no ability to disable HW CRC strip,
+* and is implicitly enabled by the PF.
+*/
+   if (!conf->rxmode.hw_strip_crc) {
+   vf = I40EVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
+   if ((vf->version_major == I40E_VIRTCHNL_VERSION_MAJOR) &&
+   (vf->version_minor <= I40E_VIRTCHNL_VERSION_MINOR)) {
+   /* Peer is Linux PF host. */
+   PMD_INIT_LOG(ERR, "VF can't disable HW CRC Strip");
+   return -EINVAL;
+   }
+   }
+
return i40evf_init_vlan(dev);
 }

-- 
2.7.4

[dpdk-dev] [PATCH] Rate as a decimal number

2016-04-21 Thread Wiles, Keith

>Currently Pktgen does not accept a decimal number for the rate. This patch 
>makes possible to set a decimal number as a rate.
>
>Signed-off-by: I?aki Murillo Arroyo 
>---
> app/cmd-functions.c|   71 +-
> app/cmd-functions.h|   38 +
> app/lpktgenlib.c   |2 +-
> app/pktgen-cmds.c  |   10 +-
> app/pktgen-cmds.c.orig | 2622 
> app/pktgen-cmds.h  |2 +-
> app/pktgen-port-cfg.h  |2 +-
> app/pktgen.c   |2 +-
> 8 files changed, 2737 insertions(+), 12 deletions(-)
> create mode 100644 app/pktgen-cmds.c.orig
>
>diff --git a/app/cmd-functions.c b/app/cmd-functions.c
>index b2fda7c..a009907 100644
>--- a/app/cmd-functions.c
>+++ b/app/cmd-functions.c
>@@ -1805,7 +1805,7 @@ struct cmd_set_result {
>   cmdline_fixed_string_t set;
>   cmdline_portlist_t portlist;
>   cmdline_fixed_string_t what;
>-  uint32_t value;
>+  float value;
> };
> 



Looks like you checked in the pktgen-cmds.c.orig file and it shows up here are 
the complete file. Can you remove it and resend the patch again with Pktgen in 
the subject and called v2 version.

Thanks

>   rate = 1;
>diff --git a/app/pktgen-cmds.c.orig b/app/pktgen-cmds.c.orig
>new file mode 100644
>index 000..83ba230
>--- /dev/null
>+++ b/app/pktgen-cmds.c.orig
>@@ -0,0 +1,2622 @@
>+/*-
>+ * Copyright (c) <2010>, Intel Corporation
>+ * All rights reserved.
>+ *
>+ * Redistribution and use in source and binary forms, with or without
>+ * modification, are permitted provided that the following conditions
>+ * are met:
>+ *
>+ * - Redistributions of source code must retain the above copyright
>+ *   notice, this list of conditions and the following disclaimer.
>+ *
>+ * - Redistributions in binary form must reproduce the above copyright
>+ *   notice, this list of conditions and the following disclaimer in
>+ *   the documentation and/or other materials provided with the
>+ *   distribution.
>+ *
>+ * - Neither the name of Intel Corporation nor the names of its
>+ *   contributors may be used to endorse or promote products derived
>+ *   from this software without specific prior written permission.
>+ *
>+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
>+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
>+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
>+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
>+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
>+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
>+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
>+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
>+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
>+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
>+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
>+ * OF THE POSSIBILITY OF SUCH DAMAGE.
>+ */


>
>diff --git a/app/pktgen-cmds.h b/app/pktgen-cmds.h
>index 683abc3..1c50e59 100644
>--- a/app/pktgen-cmds.h
>+++ b/app/pktgen-cmds.h
>@@ -135,7 +135,7 @@ extern void pktgen_set_pkt_size(port_info_t *info, 
>uint32_t size);
> extern void pktgen_set_port_value(port_info_t *info,
>   char type,
>   uint32_t portValue);
>-extern void pktgen_set_tx_rate(port_info_t *info, uint32_t rate);
>+extern void pktgen_set_tx_rate(port_info_t *info, float rate);
> extern void pktgen_set_ipaddr(port_info_t *info, char type,
>   cmdline_ipaddr_t *ip);
> extern void pktgen_set_dst_mac(port_info_t *info, cmdline_etheraddr_t *mac);
>diff --git a/app/pktgen-port-cfg.h b/app/pktgen-port-cfg.h
>index bd5876c..a20286d 100644
>--- a/app/pktgen-port-cfg.h
>+++ b/app/pktgen-port-cfg.h
>@@ -193,7 +193,7 @@ typedef struct port_info_s {
>   uint16_t pid;   /**< Port ID value */
>   uint16_t tx_burst;  /**< Number of TX burst packets */
>   uint8_t pad0;
>-  uint8_t tx_rate;/**< Percentage rate for tx packets */
>+  float tx_rate;  /**< Percentage rate for tx packets */
>   rte_atomic32_t port_flags;  /**< Special send flags for ARP and 
> other */
> 
>   rte_atomic64_t transmit_count;  /**< Packets to transmit loaded into 
> current_tx_count */
>diff --git a/app/pktgen.c b/app/pktgen.c
>index a0705ab..a64e0ca 100644
>--- a/app/pktgen.c
>+++ b/app/pktgen.c
>@@ -143,7 +143,7 @@ pktgen_packet_rate(port_info_t *info)
>   info->tx_pps= pps;
>   info->tx_cycles = ((cpp * info->tx_burst) /
>  wr_get_port_txcnt(pktgen.l2p, info->pid));
>-  info->tx_cycles -= ff[info->tx_rate / 10];
>+  info->tx_cycles -= ff[(int)info->tx_rate / 10];
> }
> 
> /**//**
>-- 
>1.9.1
>
>


Regards,
Keith

[dpdk-dev] [PATCH] ixgbe: configure VLAN TPID

2016-04-21 Thread Beilei Xing

The patch enables configuring the ether types of both inner and
outer VLANs.

Signed-off-by: Beilei Xing 
---
 drivers/net/ixgbe/ixgbe_ethdev.c | 29 +++--
 1 file changed, 27 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ixgbe/ixgbe_ethdev.c b/drivers/net/ixgbe/ixgbe_ethdev.c
index 3f1ebc1..c2c4154 100644
--- a/drivers/net/ixgbe/ixgbe_ethdev.c
+++ b/drivers/net/ixgbe/ixgbe_ethdev.c
@@ -157,6 +157,8 @@ enum ixgbevf_xcast_modes {
IXGBEVF_XCAST_MODE_ALLMULTI,
 };

+#define IXGBE_EXVET_VET_EXT_SHIFT  16
+
 static int eth_ixgbe_dev_init(struct rte_eth_dev *eth_dev);
 static int eth_ixgbe_dev_uninit(struct rte_eth_dev *eth_dev);
 static int  ixgbe_dev_configure(struct rte_eth_dev *dev);
@@ -1575,11 +1577,34 @@ ixgbe_vlan_tpid_set(struct rte_eth_dev *dev,
struct ixgbe_hw *hw =
IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
int ret = 0;
+   uint32_t reg;
+   uint32_t qinq;
+
+   qinq = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
+   qinq &= IXGBE_DMATXCTL_GDV;

switch (vlan_type) {
case ETH_VLAN_TYPE_INNER:
-   /* Only the high 16-bits is valid */
-   IXGBE_WRITE_REG(hw, IXGBE_EXVET, tpid << 16);
+   if (qinq) {
+   reg = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
+   reg = (reg & (~IXGBE_VLNCTRL_VET)) | (uint32_t)tpid;
+   IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, reg);
+   } else {
+   PMD_DRV_LOG(ERR, "not set QinQ on yet\n");
+   ret = -EIO;
+   }
+   break;
+   case ETH_VLAN_TYPE_OUTER:
+   if (qinq) {
+   /* Only the high 16-bits is valid */
+   IXGBE_WRITE_REG(hw, IXGBE_EXVET, (uint32_t)tpid <<
+   IXGBE_EXVET_VET_EXT_SHIFT);
+   } else {
+   reg = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
+   reg = (reg & (~IXGBE_VLNCTRL_VET)) | (uint32_t)tpid;
+   IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, reg);
+   }
+
break;
default:
ret = -EINVAL;
-- 
2.5.0

[dpdk-dev] [PATCH] e1000: configure VLAN TPID

2016-04-21 Thread Beilei Xing

This patch enables configuring the ether types of both inner and
outer VLANs. Note that TPID of single or inner VLAN is read only.

Signed-off-by: Beilei Xing 
---
 drivers/net/e1000/igb_ethdev.c | 34 +++---
 1 file changed, 31 insertions(+), 3 deletions(-)

diff --git a/drivers/net/e1000/igb_ethdev.c b/drivers/net/e1000/igb_ethdev.c
index e0053fe..c957fbe 100644
--- a/drivers/net/e1000/igb_ethdev.c
+++ b/drivers/net/e1000/igb_ethdev.c
@@ -86,6 +86,14 @@
 #define E1000_INCVALUE_82576 (16 << IGB_82576_TSYNC_SHIFT)
 #define E1000_TSAUXC_DISABLE_SYSTIME 0x8000

+/* CTRL_EXT bit mask*/
+#define E1000_CTRL_EXT_EXT_VLAN  (1 << 26)
+
+/* VLAN Ether Type bit mask */
+#define E1000_VET_VET_EXT0x
+
+#define E1000_VET_VET_EXT_SHIFT  16
+
 static int  eth_igb_configure(struct rte_eth_dev *dev);
 static int  eth_igb_start(struct rte_eth_dev *dev);
 static void eth_igb_stop(struct rte_eth_dev *dev);
@@ -2242,13 +2250,33 @@ eth_igb_vlan_tpid_set(struct rte_eth_dev *dev,
 {
struct e1000_hw *hw =
E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
-   uint32_t reg = ETHER_TYPE_VLAN;
+   uint32_t reg;
+   uint32_t qinq;
int ret = 0;

+   qinq = E1000_READ_REG(hw, E1000_CTRL_EXT);
+   qinq &= E1000_CTRL_EXT_EXT_VLAN;
+
switch (vlan_type) {
case ETH_VLAN_TYPE_INNER:
-   reg |= (tpid << 16);
-   E1000_WRITE_REG(hw, E1000_VET, reg);
+   if (qinq)
+   PMD_DRV_LOG(WARNING,
+   "inner vlan ether type is read-only\n");
+   else {
+   PMD_DRV_LOG(ERR, "not set QinQ on yet\n");
+   ret = -EIO;
+   }
+   break;
+   case ETH_VLAN_TYPE_OUTER:
+   if (qinq) {
+   reg = E1000_READ_REG(hw, E1000_VET);
+   reg = (reg & (~E1000_VET_VET_EXT)) |
+   ((uint32_t)tpid << E1000_VET_VET_EXT_SHIFT);
+   E1000_WRITE_REG(hw, E1000_VET, reg);
+   } else
+   PMD_DRV_LOG(WARNING,
+   "single vlan ether type is read-only\n");
+
break;
default:
ret = -EINVAL;
-- 
2.5.0

[dpdk-dev] [PATCH] ixgbe: fix bad shift operation in ixgbe_set_pool_rx

2016-04-21 Thread Bruce Richardson

On Thu, Apr 21, 2016 at 03:44:03PM +0100, Kulasek, TomaszX wrote:
> 
> 
> > -Original Message-
> > From: Richardson, Bruce
> > Sent: Thursday, April 21, 2016 15:52
> > To: Kulasek, TomaszX 
> > Cc: dev at dpdk.org; Zhang, Helin ; Ananyev,
> > Konstantin 
> > Subject: Re: [dpdk-dev] [PATCH] ixgbe: fix bad shift operation in
> > ixgbe_set_pool_rx
> > 
> > On Fri, Apr 15, 2016 at 03:39:09PM +0200, Tomasz Kulasek wrote:
> > > CID 13193 (#1 of 1): Bad bit shift operation (BAD_SHIFT)
> > > large_shift: In expression 1 << pool, left shifting by more than 31
> > > bits has undefined behavior. The shift amount, pool, is at least 32.
> > >
> > > This patch limits mask shift to be in range of 32 bit PFVFRE[1]
> > > register, for pool > 31.
> > >
> > > Fixes: fe3a45fd4104 ("ixgbe: add VMDq support")
> > >
> > > Signed-off-by: Tomasz Kulasek 
> > > ---
> > >  drivers/net/ixgbe/ixgbe_ethdev.c |2 +-
> > >  1 file changed, 1 insertion(+), 1 deletion(-)
> > >
> > > diff --git a/drivers/net/ixgbe/ixgbe_ethdev.c
> > > b/drivers/net/ixgbe/ixgbe_ethdev.c
> > > index 3f1ebc1..f676a64 100644
> > > --- a/drivers/net/ixgbe/ixgbe_ethdev.c
> > > +++ b/drivers/net/ixgbe/ixgbe_ethdev.c
> > > @@ -4401,7 +4401,7 @@ ixgbe_set_pool_rx(struct rte_eth_dev *dev,
> > > uint16_t pool, uint8_t on)
> > >
> > >   addr = IXGBE_VFRE(pool >= ETH_64_POOLS/2);
> 
> For pool in 0..31 PFVFRE[0] is used, for pool in 32..63, PFVFRE[1], but for 
> second case, we set/unset (pool-32) bit in the register. Invalid value if 
> pool > 63, but catching it doesn't solve a problem of possible overflow for 
> pool > 31.
> 
> > >   reg = IXGBE_READ_REG(hw, addr);
> > > - val = bit1 << pool;
> 
> Previous implementation expects that for shift operation will be used rol on 
> 32 bit value, and the bits that slide off the end of the register are fed 
> back into the spaces, eg. (bit1 << 33) == (bit1 << 1).
> Pool value can be bigger than 31, and this is not an error while pool is 
> smaller than 64.
> 
> Truncating pool value is clearer for me, than relay on obscure shift 
> operation.
>
Thanks for the explanation, that indeed does make it clearer.

However, all that detail is completely unclear to the reader of the function, so
perhaps we can clean up the code to make it more explicit what is happening.
For example:

/* for pool >= 32, set bit in PFVFRE[1], otherwise PFVFRE[0] */
if (pool >= ETH_64_POOLS)
 return -EINVAL;
else if (pool >= ETH_64_POOLS/2) {
 addr = IXGBE_VFRE(1);
 val = bit1 << (pool - 32);
} else {
addr = IXGBE_VFRE(0);
val = bit1 << pool;
}

reg = IXGBE_READ_REG(hw, addr);

This should fix the issue and make the resulting code clearer, I think.

/Bruce

[dpdk-dev] Pktgen rate issue

2016-04-21 Thread Iñaki Murillo

Hello Keith,

I have done a patch to the pktgen, but I not sure how to submit it. I 
would be very grateful if you give me some advice on how to contribute.

Thank you,

 I?aki Murillo

El 12/04/16 a las 14:55, Wiles, Keith escribi?:
>> Hello
>>
>> I have been using pktgen for a while and I released that there is no
>> possibility to set a rate between  two whole numbers.
>>
>>   I looked up the source code and I found out that the rate is stored in
>> a uint8_t. So, I made a quick-and-dirty change just to check if it is
>> possible to get a speed between two hole numbers. It seemed to work
>> properly.
>>
>>   Is there a reason to use an uint8_t instead of a float, for example?
> The uint8_t was easier then using floats :-)
>
> If you have a patch for this change please send it along and I will look at 
> adding the support.
>
>> Thank you,
>>
>> I?aki Murillo
>>
>
> Regards,
> Keith
>
>
>
>

[dpdk-dev] Memory leak when adding/removing vhost_user ports

2016-04-21 Thread Christian Ehrhardt

Thanks Ilya,
yeah we usually wait for the point releases as they undergo some extra
testing and verification.
.1 shouldn't be too much into the future I guess.
Thanks a lot for identifying.

That said, I'd still go on with Yuanhan to finalize the dpdk side leak fix
we identified, so we eventually get it committed.
So Yuanhan, what do you think of my last revised version of your patch for
upstream DPDK (there with the vhost_destroy_device then)?
I mean it is essentially your patch plus a bit of polishing, not mine so I
don't feel entitled to submit it as mine :-)

Kind Regards,
Christian


Christian Ehrhardt
Software Engineer, Ubuntu Server
Canonical Ltd

On Thu, Apr 21, 2016 at 1:01 PM, Ilya Maximets 
wrote:

> Hi, Christian.
> You're, likely, using tar archive with openvswitch from openvswitch.org.
> It doesn't contain many bug fixes from git/branch-2.5 unfortunately.
>
> The problem that you are facing has been solved in branch-2.5 by
>
> commit d9df7b9206831631ddbd90f9cbeef1b4fc5a8e89
> Author: Ilya Maximets 
> Date:   Thu Mar 3 11:30:06 2016 +0300
>
> netdev-dpdk: Fix memory leak in netdev_dpdk_vhost_destruct().
>
> Fixes: 4573fbd38fa1 ("netdev-dpdk: Add vhost-user multiqueue support")
> Signed-off-by: Ilya Maximets 
> Acked-by: Flavio Leitner 
> Acked-by: Daniele Di Proietto 
>
> Best regards, Ilya Maximets.
>
> > I assume there is a leak somewhere on adding/removing vhost_user ports.
> > Although it could also be "only" a fragmentation issue.
> >
> > Reproduction is easy:
> > I set up a pair of nicely working OVS-DPDK connected KVM Guests.
> > Then in a loop I
> >- add up to more 512 ports
> >- test connectivity between the two guests
> >- remove up to 512 ports
> >
> > Depending on memory and the amount of multiqueue/rxq I use it seems to
> > slightly change when exactly it breaks. But for my default setup of 4
> > queues and 5G Hugepages initialized by DPDK it always breaks at the sixth
> > iteration.
> > Here a link to the stack trace indicating a memory shortage (TBC):
> > https://launchpadlibrarian.net/253916410/apport-retrace.log
> >
> > Known Todos:
> > - I want to track it down more, and will try to come up with a non
> > openvswitch based looping testcase that might show it as well to simplify
> > debugging.
> > - in use were Openvswitch-dpdk 2.5 and DPDK 2.2; Retest with DPDK 16.04
> and
> > Openvswitch master is planned.
> >
> > I will go on debugging this and let you know, but I wanted to give a
> heads
> > up to everyone.
> > In case this is a known issue for some of you please let me know.
> >
> > Kind Regards,
> > Christian Ehrhardt
> > Software Engineer, Ubuntu Server
> > Canonical Ltd
> >
> > P.S. I think it is a dpdk issue, but adding Daniele on CC to represent
> > ovs-dpdk as well.
>

[dpdk-dev] [PATCH] virtio: fix segfault when transmit pkts

2016-04-21 Thread Yuanhan Liu

On Thu, Apr 21, 2016 at 12:36:10PM +, Jianfeng Tan wrote:
> Issue: when using virtio nic to transmit pkts, it causes segment fault.

Jianfeng,

It's good to describe the issues, steps to reproduce it and how to fix
it. But you don't have to tell it like filling a form. Instead, you
could try to describe in plain English.

> How to reproduce:
> a. start testpmd with vhost.
> $testpmd -c 0x3 -n 4 --socket-mem 1024,0 --no-pci \
>   --vdev 'eth_vhost0,iface=/tmp/sock0,queues=1' -- -i --nb-cores=1

I personally would suggest to add some indentations (and some whitespace
lines), like

a. start testpmd with vhost.
   $ testpmd -c 0x3 -n 4 ... \
 --vdev '...' ...

b. something else ...
   some more explanations.

And, do not quote a command line like "$testpmd ...", it's like a var
in base in this way. Instead, add a space after "$ ", like what I did
above.

> b. start a qemu with a virtio nic connected with the vhost-user port.

This should be enough. I didn't find any special options below,
therefore, above words is enough. However, if it's some specific
option introduces a bug, you could claim it aloud then.

> $qemu -smp cores=2,sockets=1 -cpu host -enable-kvm vm-0.img -vnc :5 -m 4G \
>   -object memory-backend-file,id=mem,size=4096M,mem-path=,share=on \
>   -numa node,memdev=mem -mem-prealloc \
>   -chardev socket,id=char1,path=$sock_vhost \
>   -netdev type=vhost-user,id=net1,chardev=char1 \
>   -device virtio-net-pci,netdev=net1,mac=00:01:02:03:04:05
> c. enable testpmd on the host.
> testpmd> set fwd io
> testpmd> start
> d. start testpmd in VM.
> $testpmd -c 0x3 -n 4 -m 1024 -- -i --disable-hw-vlan-filter --txqflags=0xf01
> testpmd> set fwd txonly
> testpmd> start
> 
> How to fix: this bug is because inside virtqueue_enqueue_xmit(), the flag of
> desc has been updated inside the do {} while (); and after the loop, all descs
> could have run out, so idx is VQ_RING_DESC_CHAIN_END (32768), use this idx to
> reference the start_dp array will lead to segment fault.

You missed a fix line here.

> Signed-off-by: Jianfeng Tan 
> ---
>  drivers/net/virtio/virtio_rxtx.c | 2 --
>  1 file changed, 2 deletions(-)
> 
> diff --git a/drivers/net/virtio/virtio_rxtx.c 
> b/drivers/net/virtio/virtio_rxtx.c
> index ef21d8e..432aeab 100644
> --- a/drivers/net/virtio/virtio_rxtx.c
> +++ b/drivers/net/virtio/virtio_rxtx.c
> @@ -271,8 +271,6 @@ virtqueue_enqueue_xmit(struct virtqueue *txvq, struct 
> rte_mbuf *cookie,
>   idx = start_dp[idx].next;
>   } while ((cookie = cookie->next) != NULL);
>  
> - start_dp[idx].flags &= ~VRING_DESC_F_NEXT;

This looks a good fix to me. I'm just wondering why we never met it
before, and on which specific case do we meet it? Talking about that,
seems like "set fwd txonly" is with high suspicious.

--yliu

[dpdk-dev] [PATCH] i40e: fix packet stats getting

2016-04-21 Thread Bruce Richardson

On Tue, Apr 19, 2016 at 07:21:16AM +0100, Zhang, Helin wrote:
> 
> 
> > -Original Message-
> > From: Wu, Jingjing
> > Sent: Tuesday, April 19, 2016 2:11 PM
> > To: Richardson, Bruce 
> > Cc: dev at dpdk.org; Wu, Jingjing ; Zhang, Helin
> > 
> > Subject: [PATCH] i40e: fix packet stats getting
> > 
> > The statistics queried by calling rte_eth_stats_get are zero when the API 
> > is first
> > called on the port. The root cause is because of offset_loaded flag is not 
> > set
> > correctly after device started.
> > This patch fixes this issue by resetting statistics at initialization time, 
> > at the
> > meanwhile the resetting process will set offset_loaded flag.
> > 
> > Fixes: 4861cde46116 ("i40e: new poll mode driver")
> > Signed-off-by: Jingjing Wu 
> Acked-by: Helin Zhang 
> 
Applied to dpdk-next-net/rel_16_07

/Bruce

[dpdk-dev] [PATCH v3 2/2] virtio/vdev: add a new vdev named eth_cvio

2016-04-21 Thread Yuanhan Liu

On Thu, Apr 21, 2016 at 02:56:36AM +, Jianfeng Tan wrote:
> Add a new virtual device named eth_cvio, it can be used just like
> eth_ring, eth_null, etc.
> 
> Configured parameters include:
>   - rx (optional, 1 by default), number of rx, not used for now.
>   - tx (optional, 1 by default), number of tx, not used for now.
>   - cq (optional, 0 by default), not supported for now.
>   - mac (optional), random value will be given if not specified.
>   - queue_size (optional, 256 by default), size of virtqueue.
>   - path (madatory), path of vhost, depends on the file type, vhost
> user if the given path points to a unix socket; vhost-net if the
> given path points to a char device.
>   - ifname (optional), specify the name of backend tap device; only
> valid when backend is vhost-net.
> 
> The major difference with original virtio for vm is that, here we use
> virtual addr instead of physical addr for vhost to calculate relative
> address.
> 
> When enable CONFIG_RTE_VIRTIO_VDEV (enabled by default), the compiled
> library can be used in both VM and container environment.
> 
> Examples:
> path_vhost=/dev/vhost-net # use vhost-net as a backend
> path_vhost= # use vhost-user as a backend
> 
> sudo ./examples/l2fwd/build/l2fwd -c 0x10 -n 4 \
> --socket-mem 0,1024 --no-pci --file-prefix=l2fwd \
> --vdev=eth_cvio0,mac=00:01:02:03:04:05,path=$path_vhost -- -p 0x1
> 
> Known issues:
>  - Control queue and multi-queue are not supported yet.
>  - Cannot work with --huge-unlink.
>  - Cannot work with no-huge.
>  - Cannot work when there are more than VHOST_MEMORY_MAX_NREGIONS(8)
>hugepages.
>  - Root privilege is a must (mainly becase of sorting hugepages according
>to physical address).
>  - Applications should not use file name like HUGEFILE_FMT ("%smap_%d").
> 
> Signed-off-by: Huawei Xie 
> Signed-off-by: Jianfeng Tan 
> Acked-By: Neil Horman 
> ---
>  doc/guides/nics/overview.rst |  58 +-
>  doc/guides/rel_notes/release_16_07.rst   |   4 +
>  drivers/net/virtio/rte_eth_virtio_vdev.c | 188 
> ++-

Why prefixing it with "rte_eth_..."?

> -    = = = = = = = = = = = = = = = = = = = = = = = = = = 
> = = = = = = = = =
> -   Feature  a b b b c e e e i i i i i i i i i i f f f f m m m n 
> n p r s v v v v x
> -f n n o x 1 n n 4 4 4 4 g g x x x x m m m m l l p f 
> u c i z h i i m e
> -p x x n g 0 a i 0 0 0 0 b b g g g g 1 1 1 1 x x i p 
> l a n e o r r x n
> -a 2 2 d b 0   c e e e e   v b b b b 0 0 0 0 4 5 p   
> l p g d s t t n v
> -c x x i e 0   . v v   f e e e e k k k k e
>  a t i i e i
> -k   v n   . f f   . v v   . v v  
>  t   o o t r
> -e   f g   .   .   . f f   . f f  
>  a . 3 t
> +    = = = = = = = = = = = = = = = = = = = = = = = = = = 
> = = = = = = = = = =
> +   Feature  a b b b c e e e i i i i i i i i i i f f f f m m m n 
> n p r s v v v v x c
> +f n n o x 1 n n 4 4 4 4 g g x x x x m m m m l l p f 
> u c i z h i i m e v
> +p x x n g 0 a i 0 0 0 0 b b g g g g 1 1 1 1 x x i p 
> l a n e o r r x n i
> +a 2 2 d b 0   c e e e e   v b b b b 0 0 0 0 4 5 p   
> l p g d s t t n v r
> +c x x i e 0   . v v   f e e e e k k k k e
>  a t i i e i t
> +k   v n   . f f   . v v   . v v  
>  t   o o t r i
> +e   f g   .   .   . f f   . f f  
>  a . 3 t o
>  t v   v   v   v   v   v  
>  2 v


I would wish we have a diff that could do compare by columns but not by
rows :)


> diff --git a/drivers/net/virtio/virtio_pci.h b/drivers/net/virtio/virtio_pci.h
> index 68097e6..3b47332 100644
> --- a/drivers/net/virtio/virtio_pci.h
> +++ b/drivers/net/virtio/virtio_pci.h
> @@ -264,7 +264,7 @@ struct virtio_hw {
>  #define VHOST_KERNEL 0
>  #define VHOST_USER   1
>   int type; /* type of backend */
> - uint32_tqueue_num;
> + uint32_tqueue_size;

Hmm, this kind of change should not be squeezed here, stealthily. I
would agree that the rename in decreases the stealthily, which is a
good thing. You should submit a standalone patch instead.

--yliu

[dpdk-dev] [PATCH v3] examples/qos_sched: fix bad bit shift operation

2016-04-21 Thread Michal Jastrzebski

From: Slawomir Mrozowicz 

Fix issue reported by Coverity.

Coverity ID 30690: Bad bit shift operation
large_shift: In expression 1ULL << i, left shifting by more than 63 bits
has undefined behavior. The shift amount, i, is as much as 127.

Fixes: de3cfa2c9823 ("sched: initial import")

Signed-off-by: Slawomir Mrozowicz 
---
 examples/qos_sched/args.c | 84 +--
 1 file changed, 52 insertions(+), 32 deletions(-)

diff --git a/examples/qos_sched/args.c b/examples/qos_sched/args.c
index 3e7fd08..cd077ba 100644
--- a/examples/qos_sched/args.c
+++ b/examples/qos_sched/args.c
@@ -53,7 +53,7 @@

 static uint32_t app_master_core = 1;
 static uint32_t app_numa_mask;
-static uint64_t app_used_core_mask = 0;
+static int app_used_core_mask[RTE_MAX_LCORE];
 static uint64_t app_used_port_mask = 0;
 static uint64_t app_used_rx_port_mask = 0;
 static uint64_t app_used_tx_port_mask = 0;
@@ -115,22 +115,23 @@ static inline int str_is(const char *str, const char *is)
return strcmp(str, is) == 0;
 }

-/* returns core mask used by DPDK */
-static uint64_t
-app_eal_core_mask(void)
+/* compare used core with eal configuration,
+   returns:
+   1 if equal
+   0 if differ */
+static int
+app_eal_core_check(void)
 {
-   uint32_t i;
-   uint64_t cm = 0;
+   uint16_t i;
+   int ret = 1;
struct rte_config *cfg = rte_eal_get_configuration();

-   for (i = 0; i < RTE_MAX_LCORE; i++) {
-   if (cfg->lcore_role[i] == ROLE_RTE)
-   cm |= (1ULL << i);
+   for (i = 0; i < RTE_MAX_LCORE && ret; i++) {
+   if ((cfg->lcore_role[i] == ROLE_RTE) != app_used_core_mask[i])
+   ret = 0;
}

-   cm |= (1ULL << cfg->master_lcore);
-
-   return cm;
+   return ret;
 }


@@ -292,14 +293,9 @@ app_parse_flow_conf(const char *conf_str)
app_used_tx_port_mask |= mask;
app_used_port_mask |= mask;

-   mask = 1lu << pconf->rx_core;
-   app_used_core_mask |= mask;
-
-   mask = 1lu << pconf->wt_core;
-   app_used_core_mask |= mask;
-
-   mask = 1lu << pconf->tx_core;
-   app_used_core_mask |= mask;
+   app_used_core_mask[pconf->rx_core] = 1;
+   app_used_core_mask[pconf->wt_core] = 1;
+   app_used_core_mask[pconf->tx_core] = 1;

nb_pfc++;

@@ -335,7 +331,7 @@ app_parse_args(int argc, char **argv)
int option_index;
const char *optname;
char *prgname = argv[0];
-   uint32_t i, nb_lcores;
+   uint16_t i, j, k, nb_lcores;

static struct option lgopts[] = {
{ "pfc", 1, 0, 0 },
@@ -349,6 +345,9 @@ app_parse_args(int argc, char **argv)
{ NULL,  0, 0, 0 }
};

+   for (i = 0; i < RTE_MAX_LCORE; i++)
+   app_used_core_mask[i] = 0;
+
/* initialize EAL first */
ret = rte_eal_init(argc, argv);
if (ret < 0)
@@ -436,19 +435,40 @@ app_parse_args(int argc, char **argv)
}

/* check master core index validity */
-   for(i = 0; i <= app_master_core; i++) {
-   if (app_used_core_mask & (1u << app_master_core)) {
-   RTE_LOG(ERR, APP, "Master core index is not configured 
properly\n");
-   app_usage(prgname);
-   return -1;
-   }
+   if (app_used_core_mask[app_master_core] == 1) {
+   RTE_LOG(ERR, APP,
+   "Master core index is not configured properly\n");
+   app_usage(prgname);
+   return -1;
}
-   app_used_core_mask |= 1u << app_master_core;
+   app_used_core_mask[app_master_core] = 1;
+
+   if ((app_eal_core_check() == 0) ||
+   (app_master_core != rte_get_master_lcore())) {
+
+   char used_hexstr[RTE_MAX_LCORE/4+1];
+   char conf_hexstr[RTE_MAX_LCORE/4+1];
+   int used_byte, conf_byte;
+   struct rte_config *cfg = rte_eal_get_configuration();
+
+   for (i = 0; i < RTE_MAX_LCORE/4; i++) {
+   used_byte = 0;
+   conf_byte = 0;
+   for (j = 0; j < 3; j++) {
+   k = 4 * (RTE_MAX_LCORE/4 - i - 1) + j;
+   used_byte += app_used_core_mask[k] << j;
+   conf_byte +=
+   ((cfg->lcore_role[k] ==
+   ROLE_RTE)?1:0) << j;
+   }
+   sprintf(&used_hexstr[i], "%1x", used_byte);
+   sprintf(&conf_hexstr[i], "%1x", used_byte);
+   }
+
+   RTE_LOG(ERR, APP, "EAL core mask not configured properly\n");
+   RTE_LOG(ERR, APP, "  must be   : %s\n", used_hexstr);
+   RTE_LOG(ERR, APP, "  instead of: %s\n", conf_hexstr);

-   if ((app_used_core_mask != app_eal_core_m

[dpdk-dev] [PATCH v3] examples/qos_sched: fix copy-paste error

2016-04-21 Thread Michal Jastrzebski

From: Slawomir Mrozowicz 

Fix issue reported by Coverity.

Coverity ID 30699: Copy-paste error;
rx_port in pconf->rx_port looks like a copy-paste error.

Fixes: de3cfa2c9823 ("sched: initial import")

Signed-off-by: Slawomir Mrozowicz 
---
 examples/qos_sched/args.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/qos_sched/args.c b/examples/qos_sched/args.c
index 3e7fd08..1916790 100644
--- a/examples/qos_sched/args.c
+++ b/examples/qos_sched/args.c
@@ -270,7 +270,7 @@ app_parse_flow_conf(const char *conf_str)
}
if (pconf->tx_port >= RTE_MAX_ETHPORTS) {
RTE_LOG(ERR, APP, "pfc %u: invalid tx port %"PRIu8" index\n",
-   nb_pfc, pconf->rx_port);
+   nb_pfc, pconf->tx_port);
return -1;
}

-- 
1.9.1

[dpdk-dev] [PATCH v3] examples/qos_sched: fix negative loop bound

2016-04-21 Thread Michal Jastrzebski

From: Slawomir Mrozowicz 

Fix issue reported by Coverity.

Coverity ID 30704: Negative loop bound
negative_returns: Using unsigned variable n_tokens in a loop exit condition.

Fixes: de3cfa2c9823 ("sched: initial import")

Signed-off-by: Slawomir Mrozowicz 
---
 examples/qos_sched/args.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/qos_sched/args.c b/examples/qos_sched/args.c
index 3e7fd08..7a98e5c 100644
--- a/examples/qos_sched/args.c
+++ b/examples/qos_sched/args.c
@@ -162,7 +162,7 @@ static int
 app_parse_opt_vals(const char *conf_str, char separator, uint32_t n_vals, 
uint32_t *opt_vals)
 {
char *string;
-   uint32_t i, n_tokens;
+   int i, n_tokens;
char *tokens[MAX_OPT_VALUES];

if (conf_str == NULL || opt_vals == NULL || n_vals == 0 || n_vals > 
MAX_OPT_VALUES)
-- 
1.9.1

[dpdk-dev] [PATCH v1 1/1] ixgbe: fix queue stop

2016-04-21 Thread Bruce Richardson

On Fri, Apr 15, 2016 at 01:31:03AM +, Lu, Wenzhuo wrote:
> Hi,
> 
> > -Original Message-
> > From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of Piotr Azarewicz
> > Sent: Thursday, April 14, 2016 6:00 PM
> > To: Zhang, Helin; Ananyev, Konstantin
> > Cc: dev at dpdk.org; Azarewicz, PiotrX T
> > Subject: [dpdk-dev] [PATCH v1 1/1] ixgbe: fix queue stop
> > 
> > It should be checked if queue enable bit is clear.
> > 
> > CID 13215 : Wrong operator used (CONSTANT_EXPRESSION_RESULT)
> > operator_confusion: txdctl | 33554432 is always 1/true regardless of the 
> > values
> > of its operand. This occurs as the logical second operand of '&&'.
> > 
> > CID 13216 : Wrong operator used (CONSTANT_EXPRESSION_RESULT)
> > operator_confusion: rxdctl | 33554432 is always 1/true regardless of the 
> > values
> > of its operand. This occurs as the logical second operand of '&&'.
> > 
> > Coverity issue: 13215
> > Coverity issue: 13216
> > Fixes: 029fd06d40fa ("ixgbe: queue start and stop")
> > 
> > Signed-off-by: Piotr Azarewicz 
> Acked-by: Wenzhuo Lu 
> Sometimes machine is smarter than human :)  Thanks Piotr for handling this.

Applied to dpdk-next-net/rel_16_07

/Bruce

[dpdk-dev] [PATCH v2] fm10k: set packet type for multi-segment packets

2016-04-21 Thread Bruce Richardson

On Mon, Apr 18, 2016 at 02:05:23PM +, Chen, Jing D wrote:
> Hi,
> 
> > -Original Message-
> > From: Michael Frasca [mailto:michael.frasca at oracle.com]
> > Sent: Monday, April 18, 2016 8:52 PM
> > To: Chen, Jing D 
> > Cc: dev at dpdk.org; Michael Frasca 
> > Subject: [PATCH v2] fm10k: set packet type for multi-segment packets
> > 
> > When building a chain of mbufs for a multi-segment packet, the packet_type
> > field resides at the end of the chain. It should be copied forward to the 
> > head
> > of the list.
> > 
> > Also, uses RTE_LIBRTE_FM10K_RX_OLFLAGS_ENABLE to guard packet-type
> > computation. The mbuf fields are not copied when this define is not set.
> > 
> > Fixes: fe65e1e1ce61 ("fm10k: add vector scatter Rx")
> > 
> > Signed-off-by: Michael Frasca 
> Acked-by : Jing Chen 
> 
Applied to dpdk-next-net/rel_16_07

/Bruce

[dpdk-dev] [PATCH v3 1/2] virtio/vdev: add embeded device emulation

2016-04-21 Thread Yuanhan Liu

On Thu, Apr 21, 2016 at 02:56:35AM +, Jianfeng Tan wrote:
> Background: Previously, we usually use a virtio device in QEMU/VM's
> context as below pic shows. Virtio nic is emulated in QEMU, and usually
> presented in VM as a PCI device.
> 
> |---|
> | vm|
> |---| (over PCI bus or MMIO or Channel I/O)
> |QEMU   | -> device emulation
> |---|
>   |
>   | (vhost-user protocol or vhost-net ioctls)
>   |
> |---|
> |   vhost   |
> |---|
> 
> Then we come to the topic that how to present a virtio device in an app
> or container, which uses virtio device to do inter process communication
> with vhost backend process. To achieve that, first of all, we need way
> in DPDK to interract with vhost backend. And then emulate a virtual
> virtio device in DPDK (which is addressed in following patch).
> 
> |---|
> |  DPDK app |
> |---|
> |  DPDK lib | -> device emulation (addressed by following patch)
> |---|
>   |
>   | (vhost-user protocol or vhost-net ioctls), addressed by this patch
>   |
> |---|
> |   vhost   |
> |---|
> 
> How: we implement another instance of struct virtio_pci_ops to intercept
> the communications between VM and QEMU. Instead of rd/wr ioport or PCI
> configuration space, here we directly talk with backend through the vhost
> file.

Nope, that's wrong, and here becomes a bit subtle. I will try to make
some explanation here.

Let's talk about the normal case (with QEMU) first. Where, virtio PMD
is a driver, and virito device is emulated inside QEMU, and exposed by
PCI. So, virtio PMD talks to the device with ioport rd/wr (or MMIO for
virtio 1.0).

Till now, you are right.

However, vhost-user socket is for establishing a connection, providing
HOST with enough information, so that host can directly manipulate the
vring, to dequeue/enqueue buffers.

So, what you were saying about "directly talk with backend (the virtual
virtio device you created) through vhost file" is not right. Instead,
in your case, the (virtual) virtio device and the PMD driver is in
the same process space, therefore, you could actually access or the
device info simply by normal read/write.

As you can see, It's a bit messy to mix all of them (virtio PMD driver,
virtio device emulation, and vhost-uesr frontend) in one single directory
(or even in one single file as you did). Therefore, I'd suggest you to
make a new dir, say "virtio-user" (a good name from Thomas), and put all
files related to virtio device emulation and vhost-user frontend there.

Further more, I'd suggest to divide the code into following files:

- virtio-user/virtio.c

  All virtio device emulation goes here.

- virtio-user/vhost-user.c

  The vhost-user frontend implementation

- virtio-user/vhost-kernel.c

  vhost kernel hanldings, including setting the tap device.

- And, __maybe__ another standalone file for handling the talk
  between the driver and the device. (See more for the comments
  about virtio_pci_ops below).

That would make it much clearer, IMO.

Besides that, I came up with few minor nits below. You might want to
fix them all in the next version.

> +static int
> +vhost_user_write(int fd, void *buf, int len, int *fds, int fd_num)
> +{
> + int r;
> + struct msghdr msgh;
> + struct iovec iov;
> + size_t fd_size = fd_num * sizeof(int);
> + char control[CMSG_SPACE(fd_size)];
> + struct cmsghdr *cmsg;
> +
> + bzero(&msgh, sizeof(msgh));
> + bzero(control, sizeof(control));

bzero is marked as deprecated (see the man page), use memset instead.

> +
> +static struct vhost_user_msg m __rte_unused;

Hmm, if it's not used, why define it. If it's used, why decorate it
with __rte_unused?

> +
> +static void
> +prepare_vhost_memory_user(struct vhost_user_msg *msg, int fds[])
> +{
> + int i, num;
> + struct hugepage_file_info huges[VHOST_MEMORY_MAX_NREGIONS];
> + struct vhost_memory_region *mr;
> +
> + num = get_hugepage_file_info(huges, VHOST_MEMORY_MAX_NREGIONS);
> + if (num < 0)
> + rte_panic("Failed to prepare memory for vhost-user\n");

Do not use rte_panic, unless it's really needed. I see no good reason
to use it in a driver. If something we need is out of order, just
return and print some log and tell the user that this driver will not
work. This would keep other components work. You may then argue that
we have only one driver in container usage, but still, it's not a
good habit.

> +static void
> +vdev_reset(struct virtio_hw *hw __rte_unused)
> +{
> + /* do nothing according to qemu vhost user spec */

That's not the right way to quote spec, it barely tells us anything
useful. So, you should quote the content here.

> +
> +static const struct virtio_pci_ops vdev_ops = {
> + .read_dev_cfg   = vdev_read_dev_config,
> + .write_dev_cfg  = vdev_write_dev_config,
> + .reset  = vdev_reset,
> + .get_status = vdev_get_status,
> + .set_status

[dpdk-dev] [PATCH] ixgbe: fix bad shift operation in ixgbe_set_pool_tx

2016-04-21 Thread Bruce Richardson

On Mon, Apr 18, 2016 at 01:58:02AM +, Lu, Wenzhuo wrote:
> Hi,
> 
> > -Original Message-
> > From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of Tomasz Kulasek
> > Sent: Friday, April 15, 2016 10:33 PM
> > To: dev at dpdk.org
> > Cc: Zhang, Helin; Ananyev, Konstantin
> > Subject: [dpdk-dev] [PATCH] ixgbe: fix bad shift operation in 
> > ixgbe_set_pool_tx
> > 
> > CID 13190 (#1 of 1): Bad bit shift operation (BAD_SHIFT)
> > large_shift: In expression 1 << pool, left shifting by more than 31 bits has
> > undefined behavior. The shift amount, pool, is at least 32.
> > 
> > This patch limits mask shift to be in range of 32 bit PFVFTE[1] register, 
> > for pool >
> > 31.
> > 
> > Fixes: fe3a45fd4104 ("ixgbe: add VMDq support")
> > 
> > Signed-off-by: Tomasz Kulasek 
> Acked-by: Wenzhuo Lu 
> 
As with the rx function, I feel that this isn't the best fix, but that parameter
checking and returning an error might be better.

Also, since this is the same issue with the same fix repeated for both RX and TX
both fixes can probably be included in the same patch for a V2.

/Bruce

[dpdk-dev] [PATCH] ixgbe: fix bad shift operation in ixgbe_set_pool_rx

2016-04-21 Thread Bruce Richardson

On Fri, Apr 15, 2016 at 03:39:09PM +0200, Tomasz Kulasek wrote:
> CID 13193 (#1 of 1): Bad bit shift operation (BAD_SHIFT)
> large_shift: In expression 1 << pool, left shifting by more than 31 bits
> has undefined behavior. The shift amount, pool, is at least 32.
> 
> This patch limits mask shift to be in range of 32 bit PFVFRE[1] register,
> for pool > 31.
> 
> Fixes: fe3a45fd4104 ("ixgbe: add VMDq support")
> 
> Signed-off-by: Tomasz Kulasek 
> ---
>  drivers/net/ixgbe/ixgbe_ethdev.c |2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/drivers/net/ixgbe/ixgbe_ethdev.c 
> b/drivers/net/ixgbe/ixgbe_ethdev.c
> index 3f1ebc1..f676a64 100644
> --- a/drivers/net/ixgbe/ixgbe_ethdev.c
> +++ b/drivers/net/ixgbe/ixgbe_ethdev.c
> @@ -4401,7 +4401,7 @@ ixgbe_set_pool_rx(struct rte_eth_dev *dev, uint16_t 
> pool, uint8_t on)
>  
>   addr = IXGBE_VFRE(pool >= ETH_64_POOLS/2);
>   reg = IXGBE_READ_REG(hw, addr);
> - val = bit1 << pool;
> + val = bit1 << (pool & 0x01F);
>  
Are we sure this is the correct way to fix this. Rather than silently truncating
the pool value, are we not better to check our input parameters and return
EINVAL to the caller if pool overflows?

/Bruce

[dpdk-dev] [PATCH 0/6] fix i40e problematic dereference

2016-04-21 Thread Thomas Monjalon

> > 2016-04-21 11:42, Helin Zhang:
> > > It fixes several problematic dereferences in i40e driver, reported by
> > > Coverity.
> > >
> > > Helin Zhang (6):
> > >   i40e: fix problematic dereference
> > >   i40e: fix problematic dereference
> > >   i40e: fix problematic dereference
> > >   i40e: fix problematic dereference
> > >   i40e: fix problematic dereference
> > >   i40e: fix problematic dereference
> > 
> > One patch is enough to fix all the occurences of the same issue.
> 
> But their Fixes line are different. How to handle that?

You can have several Fixes lines.
Thanks

[dpdk-dev] [PATCH] ixgbe: fix bad shift operation in ixgbe_set_pool_rx

2016-04-21 Thread Kulasek, TomaszX



> -Original Message-
> From: Richardson, Bruce
> Sent: Thursday, April 21, 2016 15:52
> To: Kulasek, TomaszX 
> Cc: dev at dpdk.org; Zhang, Helin ; Ananyev,
> Konstantin 
> Subject: Re: [dpdk-dev] [PATCH] ixgbe: fix bad shift operation in
> ixgbe_set_pool_rx
> 
> On Fri, Apr 15, 2016 at 03:39:09PM +0200, Tomasz Kulasek wrote:
> > CID 13193 (#1 of 1): Bad bit shift operation (BAD_SHIFT)
> > large_shift: In expression 1 << pool, left shifting by more than 31
> > bits has undefined behavior. The shift amount, pool, is at least 32.
> >
> > This patch limits mask shift to be in range of 32 bit PFVFRE[1]
> > register, for pool > 31.
> >
> > Fixes: fe3a45fd4104 ("ixgbe: add VMDq support")
> >
> > Signed-off-by: Tomasz Kulasek 
> > ---
> >  drivers/net/ixgbe/ixgbe_ethdev.c |2 +-
> >  1 file changed, 1 insertion(+), 1 deletion(-)
> >
> > diff --git a/drivers/net/ixgbe/ixgbe_ethdev.c
> > b/drivers/net/ixgbe/ixgbe_ethdev.c
> > index 3f1ebc1..f676a64 100644
> > --- a/drivers/net/ixgbe/ixgbe_ethdev.c
> > +++ b/drivers/net/ixgbe/ixgbe_ethdev.c
> > @@ -4401,7 +4401,7 @@ ixgbe_set_pool_rx(struct rte_eth_dev *dev,
> > uint16_t pool, uint8_t on)
> >
> > addr = IXGBE_VFRE(pool >= ETH_64_POOLS/2);

For pool in 0..31 PFVFRE[0] is used, for pool in 32..63, PFVFRE[1], but for 
second case, we set/unset (pool-32) bit in the register. Invalid value if pool 
> 63, but catching it doesn't solve a problem of possible overflow for pool > 
31.

> > reg = IXGBE_READ_REG(hw, addr);
> > -   val = bit1 << pool;

Previous implementation expects that for shift operation will be used rol on 32 
bit value, and the bits that slide off the end of the register are fed back 
into the spaces, eg. (bit1 << 33) == (bit1 << 1).
Pool value can be bigger than 31, and this is not an error while pool is 
smaller than 64.

Truncating pool value is clearer for me, than relay on obscure shift operation.

> > +   val = bit1 << (pool & 0x01F);
> >
> Are we sure this is the correct way to fix this. Rather than silently
> truncating the pool value, are we not better to check our input parameters
> and return EINVAL to the caller if pool overflows?
> 
> /Bruce

Tomasz

[dpdk-dev] [PATCH v3 01/13] e1000: move pci device ids to driver

2016-04-21 Thread Thomas Monjalon

2016-04-21 08:08, Neil Horman:
> On Thu, Apr 21, 2016 at 09:27:18AM +0200, David Marchand wrote:
> > I don't mind doing trivial changes, but I don't have time for more on
> > this series.
> > 
> Um, I'm not sure what to say here.  The whole point of review is to help 
> improve
> the code.  If you don't have time to do anything non-trivial, Why are we
> reviewing it?

Neil, thanks for reviewing.
If David has no time to improve this series, maybe someone else can take over.
I don't see any problem to have several authors on the same series.

[dpdk-dev] Pktgen rate issue

2016-04-21 Thread Wiles, Keith

>Hello Keith,
>
>I have done a patch to the pktgen, but I not sure how to submit it. I 
>would be very grateful if you give me some advice on how to contribute.

Please have a look at the DPDK.org page on development : http://dpdk.org/dev

You could send me the patch on the current Pktgen and I can review the changes.

>
>Thank you,
>
> I?aki Murillo
>
>El 12/04/16 a las 14:55, Wiles, Keith escribi?:
>>> Hello
>>>
>>> I have been using pktgen for a while and I released that there is no
>>> possibility to set a rate between  two whole numbers.
>>>
>>>   I looked up the source code and I found out that the rate is stored in
>>> a uint8_t. So, I made a quick-and-dirty change just to check if it is
>>> possible to get a speed between two hole numbers. It seemed to work
>>> properly.
>>>
>>>   Is there a reason to use an uint8_t instead of a float, for example?
>> The uint8_t was easier then using floats :-)
>>
>> If you have a patch for this change please send it along and I will look at 
>> adding the support.
>>
>>> Thank you,
>>>
>>> I?aki Murillo
>>>
>>
>> Regards,
>> Keith
>>
>>
>>
>>
>
>


Regards,
Keith

[dpdk-dev] Memory leak when adding/removing vhost_user ports

2016-04-21 Thread Ilya Maximets

Hi, Christian.
You're, likely, using tar archive with openvswitch from openvswitch.org.
It doesn't contain many bug fixes from git/branch-2.5 unfortunately.

The problem that you are facing has been solved in branch-2.5 by

commit d9df7b9206831631ddbd90f9cbeef1b4fc5a8e89
Author: Ilya Maximets 
Date:   Thu Mar 3 11:30:06 2016 +0300

netdev-dpdk: Fix memory leak in netdev_dpdk_vhost_destruct().

Fixes: 4573fbd38fa1 ("netdev-dpdk: Add vhost-user multiqueue support")
Signed-off-by: Ilya Maximets 
Acked-by: Flavio Leitner 
Acked-by: Daniele Di Proietto 

Best regards, Ilya Maximets.

> I assume there is a leak somewhere on adding/removing vhost_user ports.
> Although it could also be "only" a fragmentation issue.
> 
> Reproduction is easy:
> I set up a pair of nicely working OVS-DPDK connected KVM Guests.
> Then in a loop I
>- add up to more 512 ports
>- test connectivity between the two guests
>- remove up to 512 ports
> 
> Depending on memory and the amount of multiqueue/rxq I use it seems to
> slightly change when exactly it breaks. But for my default setup of 4
> queues and 5G Hugepages initialized by DPDK it always breaks at the sixth
> iteration.
> Here a link to the stack trace indicating a memory shortage (TBC):
> https://launchpadlibrarian.net/253916410/apport-retrace.log
> 
> Known Todos:
> - I want to track it down more, and will try to come up with a non
> openvswitch based looping testcase that might show it as well to simplify
> debugging.
> - in use were Openvswitch-dpdk 2.5 and DPDK 2.2; Retest with DPDK 16.04 and
> Openvswitch master is planned.
> 
> I will go on debugging this and let you know, but I wanted to give a heads
> up to everyone.
> In case this is a known issue for some of you please let me know.
> 
> Kind Regards,
> Christian Ehrhardt
> Software Engineer, Ubuntu Server
> Canonical Ltd
> 
> P.S. I think it is a dpdk issue, but adding Daniele on CC to represent
> ovs-dpdk as well.

[dpdk-dev] [PATCH] eal: fix unchecked return value from library

2016-04-21 Thread Daniel Mrzyglod

Fix issue reported by Coverity.
Coverity ID 13194

The function returns a value that indicates an error condition. If this
 is not checked, the error condition may not be handled correctly.

In pci_vfio_mp_sync_thread: Value returned from a library function is not
checked for errors before being used. This value may indicate an error 
condition.

Fixes: 2f4adfad0a69 ("vfio: add multiprocess support")

Signed-off-by: Daniel Mrzyglod 
---
 lib/librte_eal/linuxapp/eal/eal_pci_vfio_mp_sync.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_vfio_mp_sync.c 
b/lib/librte_eal/linuxapp/eal/eal_pci_vfio_mp_sync.c
index d9188fd..2b136fc 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci_vfio_mp_sync.c
+++ b/lib/librte_eal/linuxapp/eal/eal_pci_vfio_mp_sync.c
@@ -287,7 +287,10 @@ pci_vfio_mp_sync_thread(void __rte_unused * arg)
struct linger l;
l.l_onoff = 1;
l.l_linger = 60;
-   setsockopt(conn_sock, SOL_SOCKET, SO_LINGER, &l, sizeof(l));
+
+   if (setsockopt(conn_sock, SOL_SOCKET, SO_LINGER, &l, sizeof(l)) 
< 0)
+   RTE_LOG(ERR, EAL, "Cannot set SO_LINGER option "
+   "on listen socket (%s)\n", 
strerror(errno));

ret = vfio_mp_sync_receive_request(conn_sock);

-- 
2.5.5

[dpdk-dev] [PATCH v2] examples/qos_sched: fix out-of-bounds read

2016-04-21 Thread Michal Jastrzebski

From: Slawomir Mrozowicz 

Fix issue reported by Coverity.

Coverity ID 30708: Out-of-bounds read
overrun-local: Overrunning array tokens of 8 8-byte elements
at element index 4294967294 (byte offset 34359738352)
using index i (which evaluates to 4294967294).

Fixes: de3cfa2c9823 ("sched: initial import")

Signed-off-by: Slawomir Mrozowicz 
---
 examples/qos_sched/args.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/examples/qos_sched/args.c b/examples/qos_sched/args.c
index 3e7fd08..d819269 100644
--- a/examples/qos_sched/args.c
+++ b/examples/qos_sched/args.c
@@ -175,9 +175,11 @@ app_parse_opt_vals(const char *conf_str, char separator, 
uint32_t n_vals, uint32

n_tokens = rte_strsplit(string, strnlen(string, 32), tokens, n_vals, 
separator);

-   for(i = 0; i < n_tokens; i++) {
+   if (n_tokens > MAX_OPT_VALUES)
+   return -1;
+
+   for (i = 0; i < n_tokens; i++)
opt_vals[i] = (uint32_t)atol(tokens[i]);
-   }

free(string);

-- 
1.9.1

[dpdk-dev] [PATCH v2] examples/qos_meter: fix unchecked return value

2016-04-21 Thread Michal Jastrzebski

From: Slawomir Mrozowicz 

Fix issue reported by Coverity.

Coverity ID 30693: Unchecked return value
check_return: Calling rte_meter_srtcm_config without checking return value.

Fixes: e6541fdec8b2 ("meter: initial import")

Signed-off-by: Slawomir Mrozowicz 
---
 examples/qos_meter/main.c | 15 ++-
 examples/qos_meter/main.h |  2 +-
 2 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/examples/qos_meter/main.c b/examples/qos_meter/main.c
index b968b00..16b0b87 100644
--- a/examples/qos_meter/main.c
+++ b/examples/qos_meter/main.c
@@ -133,14 +133,17 @@ struct rte_meter_trtcm_params app_trtcm_params[] = {

 FLOW_METER app_flows[APP_FLOWS_MAX];

-static void
+static int
 app_configure_flow_table(void)
 {
uint32_t i, j;
+   int ret = 0;

-   for (i = 0, j = 0; i < APP_FLOWS_MAX; i ++, j = (j + 1) % 
RTE_DIM(PARAMS)){
-   FUNC_CONFIG(&app_flows[i], &PARAMS[j]);
-   }
+   for (i = 0, j = 0; i < APP_FLOWS_MAX;
+   i ++, j = (j + 1) % RTE_DIM(PARAMS))
+   ret |= FUNC_CONFIG(&app_flows[i], &PARAMS[j]);
+
+   return ret;
 }

 static inline void
@@ -381,7 +384,9 @@ main(int argc, char **argv)
rte_eth_promiscuous_enable(port_tx);

/* App configuration */
-   app_configure_flow_table();
+   ret = app_configure_flow_table();
+   if (ret < 0)
+   rte_exit(EXIT_FAILURE, "Invalid configure flow table\n");

/* Launch per-lcore init on every lcore */
rte_eal_mp_remote_launch(main_loop, NULL, CALL_MASTER);
diff --git a/examples/qos_meter/main.h b/examples/qos_meter/main.h
index 530bf69..54867dc 100644
--- a/examples/qos_meter/main.h
+++ b/examples/qos_meter/main.h
@@ -51,7 +51,7 @@ enum policer_action 
policer_table[e_RTE_METER_COLORS][e_RTE_METER_COLORS] =
 #if APP_MODE == APP_MODE_FWD

 #define FUNC_METER(a,b,c,d) color, flow_id=flow_id, pkt_len=pkt_len, time=time
-#define FUNC_CONFIG(a,b)
+#define FUNC_CONFIG(a, b) 0
 #define PARAMS app_srtcm_params
 #define FLOW_METER int

-- 
1.9.1

[dpdk-dev] [PATCH 0/6] fix i40e problematic dereference

2016-04-21 Thread Zhang, Helin



> -Original Message-
> From: Richardson, Bruce
> Sent: Thursday, April 21, 2016 7:02 PM
> To: Zhang, Helin 
> Cc: dev at dpdk.org
> Subject: Re: [dpdk-dev] [PATCH 0/6] fix i40e problematic dereference
> 
> On Thu, Apr 21, 2016 at 11:42:51AM +0800, Helin Zhang wrote:
> > It fixes several problematic dereferences in i40e driver, reported by
> > Coverity.
> >
> > Helin Zhang (6):
> >   i40e: fix problematic dereference
> >   i40e: fix problematic dereference
> >   i40e: fix problematic dereference
> >   i40e: fix problematic dereference
> >   i40e: fix problematic dereference
> >   i40e: fix problematic dereference
> >
> Since this is the same issue fixed multiple times, I think it might be best 
> to just
> merge these patches into just one patch to fix the dereferences. You can 
> include
> all the fixes lines and coverity defect references in the one commit message.
Yes, that's good idea!
I will send v2 soon later.

Thanks,
Helin

> 
> /Bruce

[dpdk-dev] [PATCH 0/6] fix i40e problematic dereference

2016-04-21 Thread Zhang, Helin



> -Original Message-
> From: Thomas Monjalon [mailto:thomas.monjalon at 6wind.com]
> Sent: Thursday, April 21, 2016 6:07 PM
> To: Zhang, Helin 
> Cc: dev at dpdk.org
> Subject: Re: [dpdk-dev] [PATCH 0/6] fix i40e problematic dereference
> 
> 2016-04-21 11:42, Helin Zhang:
> > It fixes several problematic dereferences in i40e driver, reported by
> > Coverity.
> >
> > Helin Zhang (6):
> >   i40e: fix problematic dereference
> >   i40e: fix problematic dereference
> >   i40e: fix problematic dereference
> >   i40e: fix problematic dereference
> >   i40e: fix problematic dereference
> >   i40e: fix problematic dereference
> 
> One patch is enough to fix all the occurences of the same issue.

But their Fixes line are different. How to handle that?

Thanks,
Helin

[dpdk-dev] [PATCH] virtio: fix segfault when transmit pkts

2016-04-21 Thread Jianfeng Tan

Issue: when using virtio nic to transmit pkts, it causes segment fault.

How to reproduce:
a. start testpmd with vhost.
$testpmd -c 0x3 -n 4 --socket-mem 1024,0 --no-pci \
  --vdev 'eth_vhost0,iface=/tmp/sock0,queues=1' -- -i --nb-cores=1
b. start a qemu with a virtio nic connected with the vhost-user port.
$qemu -smp cores=2,sockets=1 -cpu host -enable-kvm vm-0.img -vnc :5 -m 4G \
  -object memory-backend-file,id=mem,size=4096M,mem-path=,share=on \
  -numa node,memdev=mem -mem-prealloc \
  -chardev socket,id=char1,path=$sock_vhost \
  -netdev type=vhost-user,id=net1,chardev=char1 \
  -device virtio-net-pci,netdev=net1,mac=00:01:02:03:04:05
c. enable testpmd on the host.
testpmd> set fwd io
testpmd> start
d. start testpmd in VM.
$testpmd -c 0x3 -n 4 -m 1024 -- -i --disable-hw-vlan-filter --txqflags=0xf01
testpmd> set fwd txonly
testpmd> start

How to fix: this bug is because inside virtqueue_enqueue_xmit(), the flag of
desc has been updated inside the do {} while (); and after the loop, all descs
could have run out, so idx is VQ_RING_DESC_CHAIN_END (32768), use this idx to
reference the start_dp array will lead to segment fault.

Signed-off-by: Jianfeng Tan 
---
 drivers/net/virtio/virtio_rxtx.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
index ef21d8e..432aeab 100644
--- a/drivers/net/virtio/virtio_rxtx.c
+++ b/drivers/net/virtio/virtio_rxtx.c
@@ -271,8 +271,6 @@ virtqueue_enqueue_xmit(struct virtqueue *txvq, struct 
rte_mbuf *cookie,
idx = start_dp[idx].next;
} while ((cookie = cookie->next) != NULL);

-   start_dp[idx].flags &= ~VRING_DESC_F_NEXT;
-
if (use_indirect)
idx = txvq->vq_ring.desc[head_idx].next;

-- 
2.1.4

[dpdk-dev] [PATCH v2] mem: fix freeing of memzone used by ivshmem

2016-04-21 Thread Sergio Gonzalez Monroy

On 15/04/2016 09:29, Mauricio Vasquez B wrote:
> although previous implementation returned an error when trying to release a
> memzone assigned to an ivshmem device, it stills freed it.
>
> Fixes: cd10c42eb5bc ("mem: fix ivshmem freeing")
>
> Signed-off-by: Mauricio Vasquez B  studenti.polito.it>
> ---
> v2:
> solved compilation problem when ivshmem is disabled
>   lib/librte_eal/common/eal_common_memzone.c | 10 +++---
>   1 file changed, 7 insertions(+), 3 deletions(-)

This time I have waited to see the test-report (which I should have done 
for the v1).

Acked-by: Sergio Gonzalez Monroy

[dpdk-dev] [PATCH 1/1] lib/librte_eal: fix resource leak

2016-04-21 Thread Sergio Gonzalez Monroy

On 20/04/2016 10:15, David Marchand wrote:
> On Tue, Apr 19, 2016 at 6:27 PM, Marcin Kerlin  
> wrote:
>> Fix issue reported by Coverity.
>>
>> Coverity ID 13295, 13296, 13303:
>> Resource leak: The system resource will not be reclaimed
>> and reused, reducing the future availability of the resource.
>> In rte_eal_hugepage_attach: Leak of memory or pointers to system
>> resources.
>>
>> Fixes: af75078fece3 ("first public release")
>>
>> Signed-off-by: Marcin Kerlin 
>> ---
>>   lib/librte_eal/linuxapp/eal/eal_memory.c | 12 +++-
>>   1 file changed, 11 insertions(+), 1 deletion(-)
>>
>> diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c 
>> b/lib/librte_eal/linuxapp/eal/eal_memory.c
>> index 5b9132c..6320aa0 100644
>> --- a/lib/librte_eal/linuxapp/eal/eal_memory.c
>> +++ b/lib/librte_eal/linuxapp/eal/eal_memory.c
>> @@ -1475,13 +1475,17 @@ rte_eal_hugepage_attach(void)
>>  "and retry running both primary "
>>  "and secondary processes\n");
>>  }
>> +
>> +   if (base_addr != MAP_FAILED)
>> +   munmap((void *)(uintptr_t)base_addr, 
>> mcfg->memseg[s].len);
>> +
> What is the point of this casting ?
> Idem for the rest of the patch.

I don't see the point either.
Marcin?

>
> I can't see cleanup for previously mapped segments when mapping one fails.
> Do we want this cleanup as well ?

Good point.

We are not really leaking resources because we panic if we fail to 
initialize eal memory.

Now, if we are going to do the cleanup, I think that as David points out 
we should be
cleaning up all previous mappings too.

Sergio
> CC Sergio.
>
>

[dpdk-dev] [PATCH v3 3/7] drivers/net/bnxt new driver for Broadcom bnxt

2016-04-21 Thread Thomas Monjalon

2016-04-21 11:00, Bruce Richardson:
> On Wed, Apr 20, 2016 at 02:32:18PM -0700, Stephen Hurd wrote:
> > > It's not for testing, more for code review and to help understand the code
> > > [though
> > > as you say, we do need to ensure that each commit doesn't actually break
> > > the
> > > build].
> > > Right now, the driver code goes in as a single commit - which makes it a
> > > hard
> > > enough task to review and see what is in there. One suggestion that
> > > hopefully
> > > wouldn't be too much work might be to split the code up into: basic device
> > > init code, RX and TX functions, and then any additional features based on
> > > top
> > > of that [ideally one patch per added feature].
> > >
> > 
> > The current driver doesn't have much beyond basic TX/RX, but we can give it
> > a shot.  "Too much work" is relative of course, but splitting it into
> > self-contained easily understood chunks will absolutely be a lot of work.
> > 
> > Adding Ajit who will also be working on this.  Since he's coming up to
> > speed on the driver code, this could be a good way for him to fully
> > familiarize himself with it.
> > 
> Sure, thanks. Any splitting you can do at all would be appreciated and help 
> with
> reviews.

I'd add that it helps newcomers to understand the driver by reading the
git history.
Thanks

[dpdk-dev] [PATCH 0/6] fix i40e problematic dereference

2016-04-21 Thread Thomas Monjalon

2016-04-21 11:42, Helin Zhang:
> It fixes several problematic dereferences in i40e driver,
> reported by Coverity.
> 
> Helin Zhang (6):
>   i40e: fix problematic dereference
>   i40e: fix problematic dereference
>   i40e: fix problematic dereference
>   i40e: fix problematic dereference
>   i40e: fix problematic dereference
>   i40e: fix problematic dereference

One patch is enough to fix all the occurences of the same issue.

[dpdk-dev] [PATCH v3 2/2] virtio/vdev: add a new vdev named eth_cvio

2016-04-21 Thread Thomas Monjalon

2016-04-21 02:56, Jianfeng Tan:
> Add a new virtual device named eth_cvio, it can be used just like
> eth_ring, eth_null, etc.

Why this name eth_cvio?
Why the prefix eth_?
The virtio-net driver uses a kernel device. Here it is a userland device.
Why not virtio-user?

>  .. table:: Features availability in networking drivers
>  
> -    = = = = = = = = = = = = = = = = = = = = = = = = = = 
> = = = = = = = = =
> -   Feature  a b b b c e e e i i i i i i i i i i f f f f m m m n 
> n p r s v v v v x
> -f n n o x 1 n n 4 4 4 4 g g x x x x m m m m l l p f 
> u c i z h i i m e
> -p x x n g 0 a i 0 0 0 0 b b g g g g 1 1 1 1 x x i p 
> l a n e o r r x n
> -a 2 2 d b 0   c e e e e   v b b b b 0 0 0 0 4 5 p   
> l p g d s t t n v
> -c x x i e 0   . v v   f e e e e k k k k e
>  a t i i e i
> -k   v n   . f f   . v v   . v v  
>  t   o o t r
> -e   f g   .   .   . f f   . f f  
>  a . 3 t
> +    = = = = = = = = = = = = = = = = = = = = = = = = = = 
> = = = = = = = = = =
> +   Feature  a b b b c e e e i i i i i i i i i i f f f f m m m n 
> n p r s v v v v x c
> +f n n o x 1 n n 4 4 4 4 g g x x x x m m m m l l p f 
> u c i z h i i m e v
> +p x x n g 0 a i 0 0 0 0 b b g g g g 1 1 1 1 x x i p 
> l a n e o r r x n i
> +a 2 2 d b 0   c e e e e   v b b b b 0 0 0 0 4 5 p   
> l p g d s t t n v r
> +c x x i e 0   . v v   f e e e e k k k k e
>  a t i i e i t
> +k   v n   . f f   . v v   . v v  
>  t   o o t r i
> +e   f g   .   .   . f f   . f f  
>  a . 3 t o
>  t v   v   v   v   v   v  
>  2 v
>e   e   e   e   e   e  
>e
>c   c   c   c   c   c  
>c
> -    = = = = = = = = = = = = = = = = = = = = = = = = = = 
> = = = = = = = = =

Please keep the alphabetical order.

[dpdk-dev] [PATCH 0/6] fix i40e problematic dereference

2016-04-21 Thread Bruce Richardson

On Thu, Apr 21, 2016 at 11:42:51AM +0800, Helin Zhang wrote:
> It fixes several problematic dereferences in i40e driver,
> reported by Coverity.
> 
> Helin Zhang (6):
>   i40e: fix problematic dereference
>   i40e: fix problematic dereference
>   i40e: fix problematic dereference
>   i40e: fix problematic dereference
>   i40e: fix problematic dereference
>   i40e: fix problematic dereference
> 
Since this is the same issue fixed multiple times, I think it might be best to
just merge these patches into just one patch to fix the dereferences. You can
include all the fixes lines and coverity defect references in the one commit
message.

/Bruce

[dpdk-dev] [RFC PATCH 0/2] performance utility in testpmd

2016-04-21 Thread Bruce Richardson

On Thu, Apr 21, 2016 at 11:54:12AM +0200, Thomas Monjalon wrote:
> 2016-04-20 18:43, Zhihong Wang:
> > This RFC patch proposes a general purpose forwarding engine in testpmd
> > namely "portfwd", to enable performance analysis and tuning for poll mode
> > drivers in vSwitching scenarios.
> > 
> > 
> > Problem statement
> > -
> > 
> > vSwitching is more I/O bound in a lot of cases since there are a lot of
> > LLC/cross-core memory accesses.
> > 
> > In order to reveal memory/cache behavior in real usage scenarios and enable
> > efficient performance analysis and tuning for vSwitching, DPDK needs a
> > sample application that supports traffic flow close to real deployment,
> > e.g. multi-tenancy, service chaining.
> > 
> > There is a vhost sample application currently to enable simple vSwitching
> > scenarios, it comes with several limitations:
> > 
> >1) Traffic flow is too simple and not flexible
> > 
> >2) Switching based on MAC/VLAN only
> > 
> >3) Not enough performance metrics
> > 
> > 
> > Proposed solution
> > -
> > 
> > The testpmd sample application is a good choice, it's a powerful poll mode
> > driver management framework hosts various forwarding engine.
> 
> Not sure it is a good choice.
> The goal of testpmd is to test every PMD features.
> How far can we go in adding some stack processing while keeping it
> easily maintainable?

I was thinking the exact same thing. Would it not be better to enhance the
existing vhost example application to remove the limitations you call out above?
I don't particularly like the idea of introducing protocol awareness into 
testpmd
for IP forwarding, for instance.

Regards,
/Bruce

[dpdk-dev] [RFC PATCH 0/2] performance utility in testpmd

2016-04-21 Thread Thomas Monjalon

2016-04-20 18:43, Zhihong Wang:
> This RFC patch proposes a general purpose forwarding engine in testpmd
> namely "portfwd", to enable performance analysis and tuning for poll mode
> drivers in vSwitching scenarios.
> 
> 
> Problem statement
> -
> 
> vSwitching is more I/O bound in a lot of cases since there are a lot of
> LLC/cross-core memory accesses.
> 
> In order to reveal memory/cache behavior in real usage scenarios and enable
> efficient performance analysis and tuning for vSwitching, DPDK needs a
> sample application that supports traffic flow close to real deployment,
> e.g. multi-tenancy, service chaining.
> 
> There is a vhost sample application currently to enable simple vSwitching
> scenarios, it comes with several limitations:
> 
>1) Traffic flow is too simple and not flexible
> 
>2) Switching based on MAC/VLAN only
> 
>3) Not enough performance metrics
> 
> 
> Proposed solution
> -
> 
> The testpmd sample application is a good choice, it's a powerful poll mode
> driver management framework hosts various forwarding engine.

Not sure it is a good choice.
The goal of testpmd is to test every PMD features.
How far can we go in adding some stack processing while keeping it
easily maintainable?

> Now with the vhost pmd feature, it can also handle vhost devices, only a
> new forwarding engine is needed to make use of it.

Why a new forwarding engine is needed for vhost?

> portfwd is implemented to this end.
> 
> Features of portfwd:
> 
>1) Build up traffic from simple rx/tx to complex scenarios easily
> 
>2) Rich performance statistics for all ports

Have you checked CONFIG_RTE_TEST_PMD_RECORD_CORE_CYCLES and
CONFIG_RTE_TEST_PMD_RECORD_BURST_STATS?

>3) Core affinity manipulation
> 
>4) Commands for run time configuration
> 
> Notice that portfwd has fair performance, but it's not for getting the
> "maximum" numbers:
> 
>1) It buffers packets for burst send efficiency analysis, which increase
>   latency
> 
>2) It touches the packet header and collect performance statistics which
>   adds overheads
> 
> These "extra" overheads are actually what happens in real applications.
[...]
> Implementation details
> --
> 
> To enable flexible traffic flow setup, each port has 2 ways to forward
> packets in portfwd:

Should not it be 2 forward engines?
Please first describe the existing engines to help making a decision.

>1) Forward based on dst ip
[...]
>2) Forward to a fixed port
[...]

[dpdk-dev] Couple of PMD questions

2016-04-21 Thread Bruce Richardson

On Wed, Apr 20, 2016 at 10:47:59AM -0500, Jay Rolette wrote:
> On Wed, Apr 20, 2016 at 9:05 AM, Bruce Richardson <
> bruce.richardson at intel.com> wrote:
> 
> > On Wed, Apr 20, 2016 at 07:22:57AM -0500, Jay Rolette wrote:
> > > On Wed, Apr 20, 2016 at 4:10 AM, Bruce Richardson <
> > > bruce.richardson at intel.com> wrote:
> > >
> > > > On Tue, Apr 19, 2016 at 03:16:37PM -0500, Jay Rolette wrote:
> > > > > In ixgbe_dev_rx_init(), there is this bit of code:
> > > > >
> > > > >   /*
> > > > >* Configure the RX buffer size in the BSIZEPACKET field of
> > > > >* the SRRCTL register of the queue.
> > > > >* The value is in 1 KB resolution. Valid values can be from
> > > > >* 1 KB to 16 KB.
> > > > >*/
> > > > >   buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool)
> > -
> > > > >   RTE_PKTMBUF_HEADROOM);
> > > > >   srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
> > > > >  IXGBE_SRRCTL_BSIZEPKT_MASK);
> > > > >
> > > > >   IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
> > > > >
> > > > >   buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
> > > > >  IXGBE_SRRCTL_BSIZEPKT_SHIFT);
> > > > >
> > > > >   /* It adds dual VLAN length for supporting dual VLAN */
> > > > >   if (dev->data->dev_conf.rxmode.max_rx_pkt_len +
> > > > >   2 * IXGBE_VLAN_TAG_SIZE > buf_size)
> > > > >   dev->data->scattered_rx = 1;
> > > > >
> > > > >
> > > > > Couple of questions I have about it:
> > > > >
> > > > > 1) If the caller configured the MBUF memory pool data room size to be
> > > > > something other than a multiple of 1K (+ RTE_PKTMBUF_HEADROOM), then
> > the
> > > > > driver ends up silently programming the NIC to use a smaller max RX
> > size
> > > > > than the caller specified.
> > > > >
> > > > > Should the driver error out in that case instead of only "sort of"
> > > > working?
> > > > > If not, it seems like it should be logging an error message.
> > > >
> > > > Well, it does log at the end of the whole rx init process what RX
> > function
> > > > is
> > > > being used, so there is that.
> > > > However, looking at it now, given that there is an explicit setting in
> > the
> > > > config
> > > > to request scattered mode, I think you may be right and that we should
> > > > error out
> > > > here if scattered mode is needed and not set. It could help prevent
> > > > unexpected
> > > > problems with these drivers.
> > > >
> > >
> > > +1. A hard fail at init if I've misconfigured things is much preferred to
> > > something that mostly works for typical test cases and only fails on
> > > corner/limit testing.
> > >
> > >
> > > > > 2) Second question is about the "/* It adds dual VLAN length for
> > > > supporting
> > > > > dual VLAN */" bit...
> > > > >
> > > > > As I understand it, dev_conf.rxmode.max_rx_pkt_len is supposed to be
> > set
> > > > to
> > > > > the max frame size you support (although it says it's only used if
> > jumbo
> > > > > frame is enabled). That same value is generally used when
> > calculating the
> > > > > size that mbuf elements should be created at.
> > > > >
> > > > > The description for the data_room_size parameter of
> > > > > rte_pktmbuf_pool_create():
> > > > >
> > > > > "Size of data buffer in each mbuf, including RTE_PKTMBUF_HEADROOM."
> > > > >
> > > > >
> > > > > If I support a max frame size of 9216 bytes (exactly a 1K multiple to
> > > > make
> > > > > the NIC happy), then max_rx_pkt_len is going to be 9216 and
> > > > data_room_size
> > > > > will be 9216 + RTE_PKTMBUF_HEADROOM.
> > > > >
> > > > > ixgbe_dev_rx_init() will calculate normalize that back to 9216, which
> > > > will
> > > > > fail the dual VLAN length check. The really nasty part about that is
> > it
> > > > has
> > > > > a side-effect of enabling scattered RX regardless of the fact that I
> > > > didn't
> > > > > enable scattered RX in dev_conf.rxmode.
> > > > >
> > > > > The code in the e1000 PMD is similar, so nothing unique to ixgbe.
> > > > >
> > > > > Is that check correct? It seems wrong to be adding space for q-in-q
> > on
> > > > top
> > > > > of your specified max frame size...
> > > >
> > > > The issue here is what the correct behaviour needs to be. If we have
> > the
> > > > user
> > > > specify the maximum frame size including all vlan tags, then we hit the
> > > > problem
> > > > where we have to subtract the VLAN tag sizes when writing the value to
> > the
> > > > NIC.
> > > > In that case, we will hit a problem where we get a e.g. 9210 byte
> > frame -
> > > > to
> > > > reuse your example - without any VLAN tags, which will be rejected by
> > the
> > > > hardware as being oversized. If we don't do the subtraction, and we
> > get the
> > > > same 9210 byte packet with 2 VLAN tags on it, the hardware will accept
> > it
> > > > and
> > > > then split it across multiple descriptors because the actual DMA

[dpdk-dev] [PATCH 1/1] lib/librte_eal: fix resource leak

2016-04-21 Thread Kerlin, MarcinX

> -Original Message-
> From: Gonzalez Monroy, Sergio
> Sent: Thursday, April 21, 2016 1:19 PM
> To: David Marchand 
> Cc: dev at dpdk.org; Kerlin, MarcinX 
> Subject: Re: [PATCH 1/1] lib/librte_eal: fix resource leak
> 
> On 20/04/2016 10:15, David Marchand wrote:
> > On Tue, Apr 19, 2016 at 6:27 PM, Marcin Kerlin 
> wrote:
> >> Fix issue reported by Coverity.
> >>
> >> Coverity ID 13295, 13296, 13303:
> >> Resource leak: The system resource will not be reclaimed and reused,
> >> reducing the future availability of the resource.
> >> In rte_eal_hugepage_attach: Leak of memory or pointers to system
> >> resources.
> >>
> >> Fixes: af75078fece3 ("first public release")
> >>
> >> Signed-off-by: Marcin Kerlin 
> >> ---
> >>   lib/librte_eal/linuxapp/eal/eal_memory.c | 12 +++-
> >>   1 file changed, 11 insertions(+), 1 deletion(-)
> >>
> >> diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c
> >> b/lib/librte_eal/linuxapp/eal/eal_memory.c
> >> index 5b9132c..6320aa0 100644
> >> --- a/lib/librte_eal/linuxapp/eal/eal_memory.c
> >> +++ b/lib/librte_eal/linuxapp/eal/eal_memory.c
> >> @@ -1475,13 +1475,17 @@ rte_eal_hugepage_attach(void)
> >>  "and retry running both primary "
> >>  "and secondary processes\n");
> >>  }
> >> +
> >> +   if (base_addr != MAP_FAILED)
> >> +   munmap((void *)(uintptr_t)base_addr,
> >> + mcfg->memseg[s].len);
> >> +
> > What is the point of this casting ?
> > Idem for the rest of the patch.
> 
> I don't see the point either.
> Marcin?

Oh sorry, right, an oversight with the redundant casting.

> 
> >
> > I can't see cleanup for previously mapped segments when mapping one fails.
> > Do we want this cleanup as well ?
> 
> Good point.
> 
> We are not really leaking resources because we panic if we fail to initialize 
> eal
> memory.
> 
> Now, if we are going to do the cleanup, I think that as David points out we
> should be cleaning up all previous mappings too.

Exactly app panic after fail so do we need to worry about these warnings from 
Coverity and try to improve or leave it without affecting?

> 
> Sergio
> > CC Sergio.
> >
> >

[dpdk-dev] [PATCH 6/6] i40e: fix problematic dereference

2016-04-21 Thread Helin Zhang

Fix issue reported by Coverity.

Coverity ID 13265: Missing break in switch.

Fixes: 4861cde46116 ("i40e: new poll mode driver")

Signed-off-by: Helin Zhang 
---
 drivers/net/i40e/i40e_pf.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/net/i40e/i40e_pf.c b/drivers/net/i40e/i40e_pf.c
index 1bd599b..1ea7b0a 100644
--- a/drivers/net/i40e/i40e_pf.c
+++ b/drivers/net/i40e/i40e_pf.c
@@ -1000,8 +1000,6 @@ i40e_pf_host_handle_vf_msg(struct rte_eth_dev *dev,
 /* Don't add command supported below, which will
 *  return an error code.
 */
-   case I40E_VIRTCHNL_OP_FCOE:
-   PMD_DRV_LOG(ERR, "OP_FCOE received, not supported");
default:
PMD_DRV_LOG(ERR, "%u received, not supported", opcode);
i40e_pf_host_send_msg_to_vf(vf, opcode, I40E_ERR_PARAM,
-- 
2.5.0

[dpdk-dev] [PATCH 5/6] i40e: fix problematic dereference

2016-04-21 Thread Helin Zhang

Fix issue reported by Coverity.

Coverity ID 13298: Dereference before null check.

Fixes: 4861cde46116 ("i40e: new poll mode driver")

Signed-off-by: Helin Zhang 
---
 drivers/net/i40e/i40e_pf.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/i40e/i40e_pf.c b/drivers/net/i40e/i40e_pf.c
index 3ec8f7c..1bd599b 100644
--- a/drivers/net/i40e/i40e_pf.c
+++ b/drivers/net/i40e/i40e_pf.c
@@ -123,7 +123,7 @@ int
 i40e_pf_host_vf_reset(struct i40e_pf_vf *vf, bool do_hw_reset)
 {
uint32_t val, i;
-   struct i40e_hw *hw = I40E_PF_TO_HW(vf->pf);
+   struct i40e_hw *hw;
uint16_t vf_id, abs_vf_id, vf_msix_num;
int ret;
struct i40e_virtchnl_queue_select qsel;
@@ -131,6 +131,7 @@ i40e_pf_host_vf_reset(struct i40e_pf_vf *vf, bool 
do_hw_reset)
if (vf == NULL)
return -EINVAL;

+   hw = I40E_PF_TO_HW(vf->pf);
vf_id = vf->vf_idx;
abs_vf_id = vf_id + hw->func_caps.vf_base_id;

-- 
2.5.0

[dpdk-dev] [PATCH 4/6] i40e: fix problematic dereference

2016-04-21 Thread Helin Zhang

Fix issue reported by Coverity.

Coverity ID 13299: Dereference before null check.

Fixes: 4861cde46116 ("i40e: new poll mode driver")

Signed-off-by: Helin Zhang 
---
 drivers/net/i40e/i40e_pf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/i40e/i40e_pf.c b/drivers/net/i40e/i40e_pf.c
index 5afd61a..3ec8f7c 100644
--- a/drivers/net/i40e/i40e_pf.c
+++ b/drivers/net/i40e/i40e_pf.c
@@ -913,7 +913,7 @@ i40e_pf_host_handle_vf_msg(struct rte_eth_dev *dev,
/* AdminQ will pass absolute VF id, transfer to internal vf id */
uint16_t vf_id = abs_vf_id - hw->func_caps.vf_base_id;

-   if (!dev || vf_id > pf->vf_num - 1 || !pf->vfs) {
+   if (vf_id > pf->vf_num - 1 || !pf->vfs) {
PMD_DRV_LOG(ERR, "invalid argument");
return;
}
-- 
2.5.0

[dpdk-dev] [PATCH 3/6] i40e: fix problematic dereference

2016-04-21 Thread Helin Zhang

Fix issue reported by Coverity.

Coverity ID 13294: Dereference before null check.

Fixes: a778a1fa2e4e ("i40e: set up and initialize flow director")

Signed-off-by: Helin Zhang 
---
 drivers/net/i40e/i40e_rxtx.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c
index 31bfc44..4fd96fe 100644
--- a/drivers/net/i40e/i40e_rxtx.c
+++ b/drivers/net/i40e/i40e_rxtx.c
@@ -2981,13 +2981,15 @@ i40e_fdir_setup_tx_resources(struct i40e_pf *pf)
struct i40e_tx_queue *txq;
const struct rte_memzone *tz = NULL;
uint32_t ring_size;
-   struct rte_eth_dev *dev = pf->adapter->eth_dev;
+   struct rte_eth_dev *dev;

if (!pf) {
PMD_DRV_LOG(ERR, "PF is not available");
return I40E_ERR_BAD_PTR;
}

+   dev = pf->adapter->eth_dev;
+
/* Allocate the TX queue data structure. */
txq = rte_zmalloc_socket("i40e fdir tx queue",
  sizeof(struct i40e_tx_queue),
-- 
2.5.0

[dpdk-dev] [PATCH 2/6] i40e: fix problematic dereference

2016-04-21 Thread Helin Zhang

Fix issue reported by Coverity.

Coverity ID 13301: Dereference before null check.

Fixes: a778a1fa2e4e ("i40e: set up and initialize flow director")

Signed-off-by: Helin Zhang 
---
 drivers/net/i40e/i40e_rxtx.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c
index 9c126a3..31bfc44 100644
--- a/drivers/net/i40e/i40e_rxtx.c
+++ b/drivers/net/i40e/i40e_rxtx.c
@@ -3035,13 +3035,15 @@ i40e_fdir_setup_rx_resources(struct i40e_pf *pf)
struct i40e_rx_queue *rxq;
const struct rte_memzone *rz = NULL;
uint32_t ring_size;
-   struct rte_eth_dev *dev = pf->adapter->eth_dev;
+   struct rte_eth_dev *dev;

if (!pf) {
PMD_DRV_LOG(ERR, "PF is not available");
return I40E_ERR_BAD_PTR;
}

+   dev = pf->adapter->eth_dev;
+
/* Allocate the RX queue data structure. */
rxq = rte_zmalloc_socket("i40e fdir rx queue",
  sizeof(struct i40e_rx_queue),
-- 
2.5.0

[dpdk-dev] [PATCH 1/6] i40e: fix problematic dereference

2016-04-21 Thread Helin Zhang

Fix issue reported by Coverity.

Coverity ID 119267: Dereference before null check.

Fixes: 8e109464c022 ("i40e: allow vector Rx and Tx usage")

Signed-off-by: Helin Zhang 
---
 drivers/net/i40e/i40e_rxtx.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c
index 4d35d83..9c126a3 100644
--- a/drivers/net/i40e/i40e_rxtx.c
+++ b/drivers/net/i40e/i40e_rxtx.c
@@ -2592,14 +2592,14 @@ i40e_rx_queue_release_mbufs(struct i40e_rx_queue *rxq)
 {
uint16_t i;

-   /* SSE Vector driver has a different way of releasing mbufs. */
-   if (rxq->rx_using_sse) {
-   i40e_rx_queue_release_mbufs_vec(rxq);
+   if (!rxq || !rxq->sw_ring) {
+   PMD_DRV_LOG(DEBUG, "Pointer to rxq or sw_ring is NULL");
return;
}

-   if (!rxq || !rxq->sw_ring) {
-   PMD_DRV_LOG(DEBUG, "Pointer to rxq or sw_ring is NULL");
+   /* SSE Vector driver has a different way of releasing mbufs. */
+   if (rxq->rx_using_sse) {
+   i40e_rx_queue_release_mbufs_vec(rxq);
return;
}

-- 
2.5.0

[dpdk-dev] [PATCH 0/6] fix i40e problematic dereference

2016-04-21 Thread Helin Zhang

It fixes several problematic dereferences in i40e driver,
reported by Coverity.

Helin Zhang (6):
  i40e: fix problematic dereference
  i40e: fix problematic dereference
  i40e: fix problematic dereference
  i40e: fix problematic dereference
  i40e: fix problematic dereference
  i40e: fix problematic dereference

 drivers/net/i40e/i40e_pf.c   |  7 +++
 drivers/net/i40e/i40e_rxtx.c | 18 +++---
 2 files changed, 14 insertions(+), 11 deletions(-)

-- 
2.5.0

[dpdk-dev] ixgbe : query regarding your code changes for VF mac add

2016-04-21 Thread santosh

Hi Ivan and team,

Please respond to my last mail and  let me know if there is any
alternate way to handle this.
Our release is in pending due to this issue.


Thanks & Regards
Santosh

On Wed, Apr 20, 2016 at 2:35 PM, santosh  wrote:
> Hi Ivan,
>
> Thanks for your response.
>
> Let me explain you the issue that we are facing on our virtual router
> in VMware environment.
>
> 1. We are using ixgbe driver , SRIOV enabled .
> root at localhost:~# lspci
>    "Intel Corporation 82599 Ethernet Controller Virtual Function"
>
> 2.  "mx86-bgl-1-r1"  is our router under testing and  R2 is a standard router.
>
>   mx86-bgl-1-r1 is connected to R2
>   mx86-bgl-1-r1 (10.1.1.103)-->R2(10.1.1.101)
>
> 2. This time ping test passes
>
> root at mx86-bgl-1-r1# show interfaces
> ge-0/0/0 {
> unit 0 {
> family inet {
> address 10.1.1.103/24;
> }
> }
> }
>
> [edit]
> root at mx86-bgl-1-r1#
> root at mx86-bgl-1-r1# run ping 10.1.1.101
> PING 10.1.1.101 (10.1.1.101): 56 data bytes
> 64 bytes from 10.1.1.101: icmp_seq=0 ttl=64 time=0.980 ms
> 64 bytes from 10.1.1.101: icmp_seq=1 ttl=64 time=1.433 ms
>
>
> 3.  Default MAC address of interface ge-0/0/0  is as shown below:
>
> root at mx86-bgl-1-r1# run show interfaces ge-0/0/0 extensive | grep current
>   Current address: 02:06:0a:0e:ff:f0, Hardware address: 02:06:0a:0e:ff:f0
>
> [edit]
> root at mx86-bgl-1-r1#
>
>
> 4.  Now I am changing MAC. Ping works this time also
>
> root at mx86-bgl-1-r1# set interfaces ge-0/0/0 mac 02:06:0a:0a:ff:f0
>
> [edit]
> root at mx86-bgl-1-r1# commit
> commit complete
>
> [edit]
> root at mx86-bgl-1-r1# run show interfaces ge-0/0/0 extensive | grep current
>   Current address: 02:06:0a:0a:ff:f0, Hardware address: 02:06:0a:0e:ff:f0
>
> [edit]
> root at mx86-bgl-1-r1# show interfaces
> ge-0/0/0 {
> mac 02:06:0a:0a:ff:f0;
> unit 0 {
> family inet {
> address 10.1.1.103/24;
> }
> }
> }
>
> [edit]
> root at mx86-bgl-1-r1#
> root at mx86-bgl-1-r1# run ping 10.1.1.101
> PING 10.1.1.101 (10.1.1.101): 56 data bytes
> 64 bytes from 10.1.1.101: icmp_seq=0 ttl=64 time=1.338 ms
> 64 bytes from 10.1.1.101: icmp_seq=1 ttl=64 time=8.832 ms
> 64 bytes from 10.1.1.101: icmp_seq=2 ttl=64 time=1.292 ms
>
>
> 5.  I am deleting the above configured MAC.
>  In this case newly configured MAC as shown above will be deleted
> and Default MAC (02:06:0a:0e:ff:f0)
>   will be applied.  Ping fails at this step.  "return statement
> added by you is not allowing to configure default MAC.
>
>
> [edit]
> root at mx86-bgl-1-r1# delete interfaces ge-0/0/0 mac
>
> [edit]
> root at mx86-bgl-1-r1# commit
> commit complete
>
> [edit]
> root at mx86-bgl-1-r1# show interfaces
> ge-0/0/0 {
> unit 0 {
> family inet {
> address 10.1.1.103/24;
> }
> }
> }
>
> [edit]
>
> root at mx86-bgl-1-r1# run show interfaces ge-0/0/0 extensive | grep
> current // Displays value from router's local database not from
> NIC.
>   Current address: 02:06:0a:0e:ff:f0, Hardware address: 02:06:0a:0e:ff:f0
>
> [edit]
> root at mx86-bgl-1-r1# run ping 10.1.1.101
> PING 10.1.1.101 (10.1.1.101): 56 data bytes
> ^C
> 2 packets transmitted, 0 packets received, 100% packet loss
>
> [edit]
> root at mx86-bgl-1-r1#
>
>
> 7. LOGs:(I have added a debug log just before the return statement
> that you place)
>
> Log o/p in failure case
> 
> Santosh ixgbevf_add_mac_addr returning
> 
>
> code changes:
> -
> ixgbevf_add_mac_addr()  {
> ...
> if (memcmp(hw->mac.perm_addr, mac_addr, sizeof(struct ether_addr)) == 0) {
> PMD_DRV_LOG(DEBUG, "Existing MAC \n");
> printf("Santosh %s returning \n",__FUNCTION__);
> return;
> }
>
>
> 8.  If I remove the above "return" statement, build the driver, loaded
> in router and repeat the steps-2 to steps-5 of MAC add and MAC delete
> on our router then ping passes.
>
> root at mx86-bgl-1-r1# run ping 10.1.1.101
> PING 10.1.1.101 (10.1.1.101): 56 data bytes
> 64 bytes from 10.1.1.101: icmp_seq=0 ttl=64 time=2.356 ms
> 64 bytes from 10.1.1.101: icmp_seq=1 ttl=64 time=1.415 ms
> 64 bytes from 10.1.1.101: icmp_seq=2 ttl=64 time=1.226 ms
> ^C
> --- 10.1.1.101 ping statistics ---
> 3 packets transmitted, 3 packets received, 0% packet loss
> round-trip min/avg/max/stddev = 1.226/1.666/2.356/0.494 ms
>
>
> 9.  Please let me know whether is it  fine to remove that return
> statement added by you in  "ixgbevf_add_mac_addr"  ?
>
>
>
> Thanks & Regards,
> Santosh
>
> On Wed, Apr 20, 2016 at 3:31 AM, Ivan Boule  wrote:
>> Hi Santosh,
>>
>> I do not get exactly what you attempt to do on a VF.
>> Are you first deleting the so-called permanent MAC address by a call to the
>> function ixgbevf_remove_mac_addr() ? This operation is not allowed.
>> Can you explain exactly the sequence of operations that are done, so that I
>> can understand how the test (memcmp(hw->mac.perm_addr, mac_ad

[dpdk-dev] Couple of PMD questions

2016-04-21 Thread Andriy Berestovskyy

Hi Jay,
Max RX frame size and buffer size are independent bits. You may set
MAXFRS to 9K and feed 512B buffers or vice versa. The issue is that
you are trying to tune both of the options to avoid buf chains.

Here is another way to enable jumbo frames and avoid buf chains:
1. Disable scattering and jumbo frames at rte_eth_dev_configure().

2. Make sure the rte_pktmbuf_data_room_size(mpool)
-RTE_PKTMBUF_HEADROOM is aligned to 1K.

3. Use rte_eth_dev_set_mtu() to enable jumbo frames. Since the
scattering is disabled, set_mtu() will make sure the new packet size
fits into a single mbuf.

Andriy

On Wed, Apr 20, 2016 at 5:47 PM, Jay Rolette  wrote:
>
> On Wed, Apr 20, 2016 at 9:05 AM, Bruce Richardson
>  wrote:
>>
>> On Wed, Apr 20, 2016 at 07:22:57AM -0500, Jay Rolette wrote:
>> > On Wed, Apr 20, 2016 at 4:10 AM, Bruce Richardson <
>> > bruce.richardson at intel.com> wrote:
>> >
>> > > On Tue, Apr 19, 2016 at 03:16:37PM -0500, Jay Rolette wrote:
>> > > > In ixgbe_dev_rx_init(), there is this bit of code:
>> > > >
>> > > >   /*
>> > > >* Configure the RX buffer size in the BSIZEPACKET field of
>> > > >* the SRRCTL register of the queue.
>> > > >* The value is in 1 KB resolution. Valid values can be from
>> > > >* 1 KB to 16 KB.
>> > > >*/
>> > > >   buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool)
>> > > > -
>> > > >   RTE_PKTMBUF_HEADROOM);
>> > > >   srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
>> > > >  IXGBE_SRRCTL_BSIZEPKT_MASK);
>> > > >
>> > > >   IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
>> > > >
>> > > >   buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK)
>> > > > <<
>> > > >  IXGBE_SRRCTL_BSIZEPKT_SHIFT);
>> > > >
>> > > >   /* It adds dual VLAN length for supporting dual VLAN */
>> > > >   if (dev->data->dev_conf.rxmode.max_rx_pkt_len +
>> > > >   2 * IXGBE_VLAN_TAG_SIZE >
>> > > > buf_size)
>> > > >   dev->data->scattered_rx = 1;
>> > > >
>> > > >
>> > > > Couple of questions I have about it:
>> > > >
>> > > > 1) If the caller configured the MBUF memory pool data room size to
>> > > > be
>> > > > something other than a multiple of 1K (+ RTE_PKTMBUF_HEADROOM), then
>> > > > the
>> > > > driver ends up silently programming the NIC to use a smaller max RX
>> > > > size
>> > > > than the caller specified.
>> > > >
>> > > > Should the driver error out in that case instead of only "sort of"
>> > > working?
>> > > > If not, it seems like it should be logging an error message.
>> > >
>> > > Well, it does log at the end of the whole rx init process what RX
>> > > function
>> > > is
>> > > being used, so there is that.
>> > > However, looking at it now, given that there is an explicit setting in
>> > > the
>> > > config
>> > > to request scattered mode, I think you may be right and that we should
>> > > error out
>> > > here if scattered mode is needed and not set. It could help prevent
>> > > unexpected
>> > > problems with these drivers.
>> > >
>> >
>> > +1. A hard fail at init if I've misconfigured things is much preferred
>> > to
>> > something that mostly works for typical test cases and only fails on
>> > corner/limit testing.
>> >
>> >
>> > > > 2) Second question is about the "/* It adds dual VLAN length for
>> > > supporting
>> > > > dual VLAN */" bit...
>> > > >
>> > > > As I understand it, dev_conf.rxmode.max_rx_pkt_len is supposed to be
>> > > > set
>> > > to
>> > > > the max frame size you support (although it says it's only used if
>> > > > jumbo
>> > > > frame is enabled). That same value is generally used when
>> > > > calculating the
>> > > > size that mbuf elements should be created at.
>> > > >
>> > > > The description for the data_room_size parameter of
>> > > > rte_pktmbuf_pool_create():
>> > > >
>> > > > "Size of data buffer in each mbuf, including RTE_PKTMBUF_HEADROOM."
>> > > >
>> > > >
>> > > > If I support a max frame size of 9216 bytes (exactly a 1K multiple
>> > > > to
>> > > make
>> > > > the NIC happy), then max_rx_pkt_len is going to be 9216 and
>> > > data_room_size
>> > > > will be 9216 + RTE_PKTMBUF_HEADROOM.
>> > > >
>> > > > ixgbe_dev_rx_init() will calculate normalize that back to 9216,
>> > > > which
>> > > will
>> > > > fail the dual VLAN length check. The really nasty part about that is
>> > > > it
>> > > has
>> > > > a side-effect of enabling scattered RX regardless of the fact that I
>> > > didn't
>> > > > enable scattered RX in dev_conf.rxmode.
>> > > >
>> > > > The code in the e1000 PMD is similar, so nothing unique to ixgbe.
>> > > >
>> > > > Is that check correct? It seems wrong to be adding space for q-in-q
>> > > > on
>> > > top
>> > > > of your specified max frame size...
>> > >
>> > > The issue here is what the correct behaviour needs to be. If we have
>> > > the
>> > > user
>> > > specify the maximum frame size including all vlan tags, then we h

[dpdk-dev] Memory leak when adding/removing vhost_user ports

2016-04-21 Thread Christian Ehrhardt

Hi,
I can follow your argument that - and agree that in this case the leak
can't be solved your patch.
Still I found it useful to revise it along our discussion as eventually it
will still be a good patch to have.
I followed your suggestion and found:
- rte_vhost_driver_register callocs vserver (implies fh=0)
- later when on init when getting the callback to vserver_new_vq_conn it
would get set by ops->new_device(vdev_ctx);
- but as you pointed out that could be fh = 0 for the first
- so I initialized vserver->fh with -1 in rte_vhost_driver_register - that
won't ever be a real fh.
- later on get_config_ll_entry won't find a device with that then on the
call by destroy_device.
- so the revised patch currently in use (still for DPDK 2.2) can be found
here http://paste.ubuntu.com/15961394/

Also as you requested I tried with no guest attached at all - that way I
can still reproduce it.
Here is a new stacktrace, but to me it looks the same
http://paste.ubuntu.com/15961185/
Also as you asked before a log of the vswitch, but it is 895MB since a lot
of messages repeat on port add/remove.
Even compressed still 27MB - I need to do something about verbosity there.
Also the system journal of the same time.
Therefore I only added links to bz2 files.
The crash is at "2016-04-21T07:54:47.782Z" in the logs.
=>
http://people.canonical.com/~paelzer/ovs-dpdk-vhost-add-remove-leak/mem-leak-addremove.journal.bz2
=>
http://people.canonical.com/~paelzer/ovs-dpdk-vhost-add-remove-leak/ovs-vswitchd.log.bz2

Kind Regards,
Christian Ehrhardt


Christian Ehrhardt
Software Engineer, Ubuntu Server
Canonical Ltd

On Thu, Apr 21, 2016 at 7:54 AM, Yuanhan Liu 
wrote:

> On Wed, Apr 20, 2016 at 08:18:49AM +0200, Christian Ehrhardt wrote:
> > On Wed, Apr 20, 2016 at 7:04 AM, Yuanhan Liu <
> yuanhan.liu at linux.intel.com>
> > wrote:
> >
> > On Tue, Apr 19, 2016 at 06:33:50PM +0200, Christian Ehrhardt wrote:
> >
> > [...]
> >
> > > With that applied one (and only one) of my two guests looses
> connectivity
> > after
> > > removing the ports the first time.
> >
> > Yeah, that's should be because I invoked the "->destroy_device()"
> > callback.
> >
> >
> > Shouldn't that not only destroy the particular vhost_user device I
> remove?
>
> I assume the "not" is typed wrong here, then yes. Well, it turned
> out that I accidentally destroyed the first guest (with id 0) with
> following code:
>
> ctx.fh = g_vhost_server.server[i]->fh;
> vhost_destroy_device(ctx);
>
> server[i]->fh is initialized with 0 when no connection is established
> (check below for more info), and the first id is started with 0. Anyway,
> this could be fixed easily.
>
> > See below for some better details on the test to clarify that.
> >
> >
> > BTW, I'm curious how do you do the test? I saw you added 256 ports,
> but
> > with 2 guests only? So, 254 of them are idle, just for testing the
> > memory leak bug?
> >
> >
> > Maybe I should describe it better:
> > 1. Spawn some vhost-user ports (40 in my case)
> > 2. Spawn a pair of guests that connect via four of those ports per guest
> > 3. Guests only intialize one of that vhost_user based NICs
> > 4. check connectivity between guests via the vhost_user based connection
> > (working at this stage)
> > LOOP 5-7:
> >5. add ports 41-512
> >6. remove  ports 41-512
> >7. check connectivity between guests via the vhost_user based
> connection
>
> Yes, it's much clearer now. Thanks.
>
> I then don't see it's a leak from DPDK vhost-user, at least not the leak
> on "struct virtio_net" I have mentioned before. "struct virito_net" will
> not even be allocated for those ports never used (ports 41-512 in your
> case),
> as it will be allocated only when there is a connection established, aka,
> a guest is connected.
>
> BTW, will you be able to reproduce it without any connections? Say, all
> 512 ports are added, and then deleted.
>
> Thanks.
>
> --yliu
>
> >
> > So the vhost_user ports the guests are using are never deleted.
> > Only some extra (not even used) ports are added&removed in the loop to
> search
> > for potential leaks over a longer lifetime of an openvswitch-dpdk based
> > solution.
> >
>

[dpdk-dev] [PATCH] eal: fix unchecked return value from library

2016-04-21 Thread Burakov, Anatoly

> Fix issue reported by Coverity.
> Coverity ID 13194
> 
> The function returns a value that indicates an error condition. If this  is 
> not
> checked, the error condition may not be handled correctly.
> 
> In pci_vfio_mp_sync_thread: Value returned from a library function is not
> checked for errors before being used. This value may indicate an error
> condition.
> 
> Fixes: 2f4adfad0a69 ("vfio: add multiprocess support")
> 
> Signed-off-by: Daniel Mrzyglod 
> ---
>  lib/librte_eal/linuxapp/eal/eal_pci_vfio_mp_sync.c | 5 -
>  1 file changed, 4 insertions(+), 1 deletion(-)
> 
> diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_vfio_mp_sync.c
> b/lib/librte_eal/linuxapp/eal/eal_pci_vfio_mp_sync.c
> index d9188fd..2b136fc 100644
> --- a/lib/librte_eal/linuxapp/eal/eal_pci_vfio_mp_sync.c
> +++ b/lib/librte_eal/linuxapp/eal/eal_pci_vfio_mp_sync.c
> @@ -287,7 +287,10 @@ pci_vfio_mp_sync_thread(void __rte_unused * arg)
>   struct linger l;
>   l.l_onoff = 1;
>   l.l_linger = 60;
> - setsockopt(conn_sock, SOL_SOCKET, SO_LINGER, &l,
> sizeof(l));
> +
> + if (setsockopt(conn_sock, SOL_SOCKET, SO_LINGER, &l,
> sizeof(l)) < 0)
> + RTE_LOG(ERR, EAL, "Cannot set SO_LINGER option "
> + "on listen socket (%s)\n",
> strerror(errno));
> 
>   ret = vfio_mp_sync_receive_request(conn_sock);
> 
> --
> 2.5.5

Acked-by: Anatoly  Burakov

[dpdk-dev] [PATCH v5 10/10] qede: Enable PMD build

2016-04-21 Thread Bruce Richardson

On Wed, Apr 20, 2016 at 04:43:44PM +, Harish Patil wrote:
> 
> >On Wed, Apr 20, 2016 at 10:51:06AM +0200, Thomas Monjalon wrote:
> >> 2016-04-20 00:14, Harish Patil:
> >> > >2016-03-31 19:15, Rasesh Mody:
> >> > >> --- a/config/common_base
> >> > >> +++ b/config/common_base
> >> > >> +CONFIG_RTE_LIBRTE_QEDE_RX_COAL_US=24
> >> > >> +CONFIG_RTE_LIBRTE_QEDE_TX_COAL_US=48
> >> > >
> >> > >It looks to be some tuning which could be done at runtime. Isn't it?
> >> >  
> >> > That?s right. Can you please suggest if there is any better option to
> >>make
> >> > it a runtime? There is no PMD API for that.
> >> 
> >> There are some devargs for that.
> >> For PCI dev, it can be passed in the whitelist option.
> >> We should remove this limitation by having a devargs API (and command
> >>line
> >> options) independent of whitelisting.
> >
> >But back to the original setting. Are these likely to be values that are
> >tunable
> >or need to be tunable by the user?
> 
> > 
> This is a tunable which is equivalent of ethtool -c (in linux) which
> controls the rate at which status block is updated.
> 
> >If not, I see little reason to make them
> >run-time configurable - they could be defines inside the driver itself.
> 
> There are defines already which set them to the defaults. The reason to
> expose it as config option is that the user don?t have to change the
> driver and rather just control it via config file. For now, I can remove
> it till we find a better alternative to make those run time. Please
> confirm and I can have it removed in next patch submission.
> 
Removing config options is always a good thing. As you say we can always find a
way to make them tunable in a later patchset.

/Bruce

[dpdk-dev] [PATCH v3 3/7] drivers/net/bnxt new driver for Broadcom bnxt

2016-04-21 Thread Bruce Richardson

On Wed, Apr 20, 2016 at 02:32:18PM -0700, Stephen Hurd wrote:
> > It's not for testing, more for code review and to help understand the code
> > [though
> > as you say, we do need to ensure that each commit doesn't actually break
> > the
> > build].
> > Right now, the driver code goes in as a single commit - which makes it a
> > hard
> > enough task to review and see what is in there. One suggestion that
> > hopefully
> > wouldn't be too much work might be to split the code up into: basic device
> > init code, RX and TX functions, and then any additional features based on
> > top
> > of that [ideally one patch per added feature].
> >
> 
> The current driver doesn't have much beyond basic TX/RX, but we can give it
> a shot.  "Too much work" is relative of course, but splitting it into
> self-contained easily understood chunks will absolutely be a lot of work.
> 
> Adding Ajit who will also be working on this.  Since he's coming up to
> speed on the driver code, this could be a good way for him to fully
> familiarize himself with it.
> 
Sure, thanks. Any splitting you can do at all would be appreciated and help with
reviews.

/Bruce

[dpdk-dev] [PATCH v3 2/2] virtio/vdev: add a new vdev named eth_cvio

2016-04-21 Thread David Marchand

Hello,

On Thu, Apr 21, 2016 at 4:56 AM, Jianfeng Tan  wrote:
> Add a new virtual device named eth_cvio, it can be used just like
> eth_ring, eth_null, etc.
>
> Configured parameters include:
>   - rx (optional, 1 by default), number of rx, not used for now.
>   - tx (optional, 1 by default), number of tx, not used for now.
>   - cq (optional, 0 by default), not supported for now.
>   - mac (optional), random value will be given if not specified.
>   - queue_size (optional, 256 by default), size of virtqueue.
>   - path (madatory), path of vhost, depends on the file type, vhost
> user if the given path points to a unix socket; vhost-net if the
> given path points to a char device.
>   - ifname (optional), specify the name of backend tap device; only
> valid when backend is vhost-net.
>
> The major difference with original virtio for vm is that, here we use
> virtual addr instead of physical addr for vhost to calculate relative
> address.
>
> When enable CONFIG_RTE_VIRTIO_VDEV (enabled by default), the compiled
> library can be used in both VM and container environment.

This implementation heavily relies on dev_type to keep as much code
shared between pci / vdev path as possible.

virtio code relies on drv_flags (even updating it while this should be
per-device).
So first, virtio should rely on dev_flags.

The rest needs to be astracted in some virtio ops ?


Thanks.

-- 
David Marchand

[dpdk-dev] [PATCH v3 00/13] kill global pci device id list

2016-04-21 Thread David Marchand

On Wed, Apr 20, 2016 at 2:43 PM, David Marchand
 wrote:
> - not storing the pci ids in a dedicated section anymore, pci drivers are
>   exported and parsed by a quickly written (and naive) tool

Rethinking about this, this part won't do.
Stripping symbols breaks it, and I had to go some gymnastics to get
the symbols, while having exported information sanitised as strings in
a dedicated section constructed at build time would be saner.
Yes, this really sounds like modinfo ...

Volunteers ?

-- 
David Marchand

[dpdk-dev] Memory leak when adding/removing vhost_user ports

2016-04-21 Thread Yuanhan Liu

On Thu, Apr 21, 2016 at 04:04:03PM +0200, Christian Ehrhardt wrote:
> Thanks Ilya,
> yeah we usually wait for the point releases as they undergo some extra testing
> and verification.
> .1 shouldn't be too much into the future I guess.
> Thanks a lot for identifying.
> 
> That said, I'd still go on with Yuanhan to finalize the dpdk side leak fix we
> identified, so we eventually get it committed.
> So Yuanhan, what do you think of my last revised version of your patch for
> upstream DPDK (there with the?vhost_destroy_device then)?

That's good.

> I mean it is essentially your patch plus a bit of polishing, not mine so I
> don't feel entitled to submit it as mine :-)

Thanks. I will make and send out a formal patch later.

--yliu

[dpdk-dev] Memory leak when adding/removing vhost_user ports

2016-04-21 Thread Yuanhan Liu

On Thu, Apr 21, 2016 at 02:01:26PM +0300, Ilya Maximets wrote:
> Hi, Christian.
> You're, likely, using tar archive with openvswitch from openvswitch.org.
> It doesn't contain many bug fixes from git/branch-2.5 unfortunately.
> 
> The problem that you are facing has been solved in branch-2.5 by
> 
> commit d9df7b9206831631ddbd90f9cbeef1b4fc5a8e89
> Author: Ilya Maximets 
> Date:   Thu Mar 3 11:30:06 2016 +0300
> 
> netdev-dpdk: Fix memory leak in netdev_dpdk_vhost_destruct().
> 
> Fixes: 4573fbd38fa1 ("netdev-dpdk: Add vhost-user multiqueue support")
> Signed-off-by: Ilya Maximets 
> Acked-by: Flavio Leitner 
> Acked-by: Daniele Di Proietto 
Hi Ilya,

Thanks for the info. And, I actually checked this peice of code. I was
using new code, so, I didn't find anything wrong.

--yliu

[dpdk-dev] [PATCH v3 01/13] e1000: move pci device ids to driver

2016-04-21 Thread David Marchand

On Wed, Apr 20, 2016 at 8:15 PM, Neil Horman  wrote:
> On Wed, Apr 20, 2016 at 03:39:59PM +0200, David Marchand wrote:
>> On Wed, Apr 20, 2016 at 3:29 PM, Neil Horman  
>> wrote:
>> >> +#ifndef RTE_PCI_DEV_ID_DECL_EM
>> >> +#define RTE_PCI_DEV_ID_DECL_EM(vend, dev)
>> >> +#endif
>> >> +
>> >> +#ifndef PCI_VENDOR_ID_INTEL
>> >> +/** Vendor ID used by Intel devices */
>> >> +#define PCI_VENDOR_ID_INTEL 0x8086
>> >> +#endif
>> >> +
>> > This is broken, PCI_VENDOR_ID_INTEL should be defined in a central 
>> > location for
>> > all pci drivers, not redefined in every compilation unit.  And you can 
>> > likely
>>
>> Well we can keep the vendors in a common header, but I don't see the benefit.
>>
> ?
> The fact that you won't have to do this
> #ifndef PCI_VENDOR_ID_INTEL
> #define PCI_VENDOR_ID_INTEL ...
> #endif
> in every pmd

Ok, so you would keep the rte_pci_dev_ids.h with just the vendors in it ?

Or, we could rely on linux kernel headers (pci_ids.h), rather than
maintain a header in dpdk.
But this would add a dependency build on dpdk and I am not sure there
is something equivalent on freebsd (afaics all drivers seem to
duplicate the pci vendor id).
Any freebsd gourou reading this ?


>> > just remvoe the RTE_PCI_DEV_ID_DECL_* macros from each patch and use the
>> > RTE_PCI_DEV macro, as all the DECL macros just evaluate to that anyway.
>>
>> app/test/test_pci.c:#define RTE_PCI_DEV_ID_DECL_IGB(vend, dev)
>> {RTE_PCI_DEVICE(vend, dev)},
>> lib/librte_eal/linuxapp/kni/kni_misc.c: #define
>> RTE_PCI_DEV_ID_DECL_IGB(vend, dev) case (dev):
>>
>> All this stuff is because of pci tests and kni code.
>>
> You're going to have to explain a bit further than that.  Why does the kni 
> code
> and pci testing code require that each driver have their own macro that just
> maps to the same macro underneath?  Looking at the test_pci code, it appears 
> its
> done because we used to contain all our pci device ids in a single file, and 
> the
> device specific macros were used to selectively enable devices that were there
> for testing.  But the point of this series is in part to separate out the 
> device
> ids to their own locations, so it seems like you will have to fix up the pci
> tests anyway (and additionally it would be nice to include more than just EM,
> IGB, and IXGBE in thsoe tests anyway, though I understand thats outside the
> scope of this series)

- test_pci.c should be about testing pci infrastructure.
Relying on igb / ixgbe (or whatever pci device if we extend the list
to all pci ids) being present on the system to run successfully is
flawed but I have no better idea.


- kni implements specific ethtool stuff based on pci ids.
kni contains duplicated code from the kernel and it uses those ids to
drive to the right ops.

The solutions I have imagined so far :
* use a shared header for the devices that it supports
* drop the use of pci ids between kni library and kni kernel driver,
instead use the pmd name that would be resolved internally by the kni
library at RTE_KNI_IOCTL_CREATE time, and use this in the kni kernel
driver
* drop kni :-)

I don't mind doing trivial changes, but I don't have time for more on
this series.


-- 
David Marchand

[dpdk-dev] [PATCH 1/6] i40e: fix problematic dereference

2016-04-21 Thread Stephen Hemminger

On Thu, 21 Apr 2016 11:42:52 +0800
Helin Zhang  wrote:

> Fix issue reported by Coverity.
> 
> Coverity ID 119267: Dereference before null check.
> 
> Fixes: 8e109464c022 ("i40e: allow vector Rx and Tx usage")
> 
> Signed-off-by: Helin Zhang 
> ---
>  drivers/net/i40e/i40e_rxtx.c | 10 +-
>  1 file changed, 5 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c
> index 4d35d83..9c126a3 100644
> --- a/drivers/net/i40e/i40e_rxtx.c
> +++ b/drivers/net/i40e/i40e_rxtx.c
> @@ -2592,14 +2592,14 @@ i40e_rx_queue_release_mbufs(struct i40e_rx_queue *rxq)
>  {
>   uint16_t i;
>  
> - /* SSE Vector driver has a different way of releasing mbufs. */
> - if (rxq->rx_using_sse) {
> - i40e_rx_queue_release_mbufs_vec(rxq);
> + if (!rxq || !rxq->sw_ring) {
> + PMD_DRV_LOG(DEBUG, "Pointer to rxq or sw_ring is NULL");
>   return;
>   }
>  

I can't see how you could ever trigger this.
 i40e_rx_queue_release_mbufs() is called only called from places where rxq
is guraanteed non NULL.

Are you sure Coverity isn't tell you that?

[dpdk-dev] [PATCH] eal: fix unchecked return value from library

2016-04-21 Thread Stephen Hemminger

On Thu, 21 Apr 2016 13:56:57 +0200
Daniel Mrzyglod  wrote:

> + if (setsockopt(conn_sock, SOL_SOCKET, SO_LINGER, &l, sizeof(l)) 
> < 0)
> + RTE_LOG(ERR, EAL, "Cannot set SO_LINGER option "
> + "on listen socket (%s)\n", 
> strerror(errno));

Priority should be WARNING not ERR because it has no real impact on the 
application
usability

   level
   This  determines  the  importance  of  the message.  The levels are, in
   order of decreasing importance:

   LOG_EMERG  system is unusable

   LOG_ALERT  action must be taken immediately

   LOG_CRIT   critical conditions

   LOG_ERRerror conditions

   LOG_WARNINGwarning conditions

   LOG_NOTICE normal, but significant, condition

   LOG_INFO   informational message

   LOG_DEBUG  debug-level message

[dpdk-dev] [dpdk-users] DPDK 16.04 link changes cause PMD drivers to not be loaded

2016-04-21 Thread Aurojit Panda

[The original report is included below for your convenience]

Thomas Monjalon wrote:
> 2016-04-21 08:01, Aurojit Panda:
>> Panu Matilainen wrote:
> [...]
>>> Again, PMDs are *plugins* that are *meant* to be loaded at runtime.
>>> That allows for all sorts of flexibility especially
>>> for packaging and shipping, at some extra cost in setup complexity.
>> I am all for a plugin architecture, I was merely suggesting that you
>> embed some path infromation at the beginning. Also please note:
>> (a) This behavior changed recently.
>
> What changed recently?
>
>> (b) This change is entirely undocumented, which is why I was reporting
>> it in the first place.
>> (c) It is actually quite unintutive, because previously ensuring
>> LD_LIBRARY_PATH was correct was all that was required
>> to get any DPDK application to interact with ports.
>
> ?
> Are you talking about combined library?
>
>>> For your own purposes, you can of course tweak the linking settings
>>> as much as you like. Look for "plugins" in mk/rte.app.mk and change
>>> the shared lib condition on the line above to "y" and there you have it.
>>> But that's not the way plugins are meant to be used.
>> That is not a reasonable solution given that it makes it very hard to
>> track future changes to DPDK without merges.
>> My alternatives neither break people's abilities to use plugins,
>> nor do they impact behavior.
>
> Please do not hesitate to send some patch to show your solution.
> Thanks

I was trying to run testpmd from DPDK 16.04 on Linux with kernel version 
4.4.0-1 (ld version 2.26). My machine has two
XL710QDA2 NICs, and I built DPDK as a shared, combined library (i.e., 
CONFIG_RTE_BUILD_SHARED_LIB=y and
CONFIG_RTE_BUILD_COMBINE_LIBS=y in config/common_linuxapp). I found that the 
issue is due to ld not linking against all
libraries with the new linker script (introduced in 948fd64befc3726) but am not 
sure how to fix this. As evidence for
this being caused by this change:

$ LD_TRACE_LOADED_OBJECTS=y ./testpmd -c 0x1c00 -n 4 -w 82:00.0 -w 82:00.1 
--file-prefix "send" -- -i
   linux-vdso.so.1 (0x7ffe11b3e000)
   librte_distributor.so.1.1 => 
/home/apanda/e2d2/3rdparty/dpdk/build/lib/librte_distributor.so.1.1
(0x7fa55eb85000)
   librte_reorder.so.1.1 => 
/home/apanda/e2d2/3rdparty/dpdk/build/lib/librte_reorder.so.1.1 
(0x7fa55e982000)
   librte_pipeline.so.3.1 => 
/home/apanda/e2d2/3rdparty/dpdk/build/lib/librte_pipeline.so.3.1 
(0x7fa55e77d000)
   librte_table.so.2.1 => 
/home/apanda/e2d2/3rdparty/dpdk/build/lib/librte_table.so.2.1 
(0x7fa55e55e000)
   librte_port.so.2.1 => 
/home/apanda/e2d2/3rdparty/dpdk/build/lib/librte_port.so.2.1 
(0x7fa55e34e000)
   librte_timer.so.1.1 => 
/home/apanda/e2d2/3rdparty/dpdk/build/lib/librte_timer.so.1.1 
(0x7fa55e145000)
   librte_hash.so.2.1 => 
/home/apanda/e2d2/3rdparty/dpdk/build/lib/librte_hash.so.2.1 
(0x7fa55df37000)
   librte_jobstats.so.1.1 => 
/home/apanda/e2d2/3rdparty/dpdk/build/lib/librte_jobstats.so.1.1 
(0x7fa55dd35000)
   librte_lpm.so.2.1 => 
/home/apanda/e2d2/3rdparty/dpdk/build/lib/librte_lpm.so.2.1 (0x7fa55db2e000)
   librte_power.so.1.1 => 
/home/apanda/e2d2/3rdparty/dpdk/build/lib/librte_power.so.1.1 
(0x7fa55d91f000)
   librte_acl.so.2.1 => 
/home/apanda/e2d2/3rdparty/dpdk/build/lib/librte_acl.so.2.1 (0x7fa55d705000)
   librte_meter.so.1.1 => 
/home/apanda/e2d2/3rdparty/dpdk/build/lib/librte_meter.so.1.1 
(0x7fa55d504000)
   librte_sched.so.1.1 => 
/home/apanda/e2d2/3rdparty/dpdk/build/lib/librte_sched.so.1.1 
(0x7fa55d2fd000)
   librte_vhost.so.2.1 => 
/home/apanda/e2d2/3rdparty/dpdk/build/lib/librte_vhost.so.2.1 
(0x7fa55d0e6000)
   librte_kvargs.so.1.1 => 
/home/apanda/e2d2/3rdparty/dpdk/build/lib/librte_kvargs.so.1.1 
(0x7fa55cee4000)
   librte_mbuf.so.2.1 => 
/home/apanda/e2d2/3rdparty/dpdk/build/lib/librte_mbuf.so.2.1 
(0x7fa55cce2000)
   librte_ip_frag.so.1.1 => 
/home/apanda/e2d2/3rdparty/dpdk/build/lib/librte_ip_frag.so.1.1 
(0x7fa55cada000)
   libethdev.so.3.1 => 
/home/apanda/e2d2/3rdparty/dpdk/build/lib/libethdev.so.3.1 (0x7fa55c84f000)
   librte_cryptodev.so.1.1 => 
/home/apanda/e2d2/3rdparty/dpdk/build/lib/librte_cryptodev.so.1.1 
(0x7fa55c647000)
   librte_mempool.so.1.1 => 
/home/apanda/e2d2/3rdparty/dpdk/build/lib/librte_mempool.so.1.1 
(0x7fa55c444000)
   librte_ring.so.1.1 => 
/home/apanda/e2d2/3rdparty/dpdk/build/lib/librte_ring.so.1.1 
(0x7fa55c242000)
   librte_eal.so.2.1 => 
/home/apanda/e2d2/3rdparty/dpdk/build/lib/librte_eal.so.2.1 (0x7fa55bfe1000)
   librte_cmdline.so.2.1 => 
/home/apanda/e2d2/3rdparty/dpdk/build/lib/librte_cmdline.so.2.1 
(0x7fa55bdd8000)
   librte_cfgfile.so.2.1 => 
/home/apanda/e2d2/3rdparty/dpdk/build/lib/librte_cfgfile.so.2.1 
(0x7fa55bbd6000)

[dpdk-dev] [dpdk-users] DPDK 16.04 link changes cause PMD drivers to not be loaded

2016-04-21 Thread Aurojit Panda



Thomas Monjalon wrote:
> 2016-04-21 08:01, Aurojit Panda:
>> Panu Matilainen wrote:
> [...]
>>> Again, PMDs are *plugins* that are *meant* to be loaded at runtime.
>>> That allows for all sorts of flexibility especially
>>> for packaging and shipping, at some extra cost in setup complexity.
>> I am all for a plugin architecture, I was merely suggesting that you
>> embed some path infromation at the beginning. Also please note:
>> (a) This behavior changed recently.
>
> What changed recently?
>
Change 948fd64befc3726 moved DPDK from building a shared library to using LD 
linker scripts.

>> (b) This change is entirely undocumented, which is why I was reporting
>> it in the first place.
>> (c) It is actually quite unintutive, because previously ensuring
>> LD_LIBRARY_PATH was correct was all that was required
>> to get any DPDK application to interact with ports.
>
> ?
> Are you talking about combined library?
Yes, if you build a shared combined library, PMD drivers are not automatically 
loaded anymore. This implies that 
programs do not enumerate the set of ports on the machine. (Unless 
CONFIG_EAL_PMD_PATH is correctly set to the absolute 
directory where the built DPDK will live).
>
>>> For your own purposes, you can of course tweak the linking settings
>>> as much as you like. Look for "plugins" in mk/rte.app.mk and change
>>> the shared lib condition on the line above to "y" and there you have it.
>>> But that's not the way plugins are meant to be used.
>> That is not a reasonable solution given that it makes it very hard to
>> track future changes to DPDK without merges.
>> My alternatives neither break people's abilities to use plugins,
>> nor do they impact behavior.
>
> Please do not hesitate to send some patch to show your solution.

My initial e-mail was sent because I don't entirely understand LD linker 
scripts (the PMD libraries are listed within 
the built libdpdk.so file), so the only patch I can suggest is undoing 
948fd64befc3726 in which case the behavior as I 
describe.

> Thanks


Thanks

Panda

[dpdk-dev] [PATCH v3 00/13] kill global pci device id list

2016-04-21 Thread Neil Horman

On Thu, Apr 21, 2016 at 10:07:51AM +0200, David Marchand wrote:
> On Wed, Apr 20, 2016 at 2:43 PM, David Marchand
>  wrote:
> > - not storing the pci ids in a dedicated section anymore, pci drivers are
> >   exported and parsed by a quickly written (and naive) tool
> 
> Rethinking about this, this part won't do.
> Stripping symbols breaks it, and I had to go some gymnastics to get
> the symbols, while having exported information sanitised as strings in
> a dedicated section constructed at build time would be saner.
> Yes, this really sounds like modinfo ...
> 
> Volunteers ?
modinfo (or a tool likes it), requires more gymnastics in the actual code than
what we have.  It requires the auto-generation of extra c code that gets linked
into its own section (which implies the need for a linker script).

Not saying its a bad idea, its a pretty good one, just that its alot of work.  

We might be able to do something a bit more simple, perhaps convert the macros
to strings that we can extract with a tool?  not sure

Neil

> 
> 
> -- 
> David Marchand
>

[dpdk-dev] [PATCH v3 01/13] e1000: move pci device ids to driver

2016-04-21 Thread Neil Horman

On Thu, Apr 21, 2016 at 09:27:18AM +0200, David Marchand wrote:
> On Wed, Apr 20, 2016 at 8:15 PM, Neil Horman  wrote:
> > On Wed, Apr 20, 2016 at 03:39:59PM +0200, David Marchand wrote:
> >> On Wed, Apr 20, 2016 at 3:29 PM, Neil Horman  
> >> wrote:
> >> >> +#ifndef RTE_PCI_DEV_ID_DECL_EM
> >> >> +#define RTE_PCI_DEV_ID_DECL_EM(vend, dev)
> >> >> +#endif
> >> >> +
> >> >> +#ifndef PCI_VENDOR_ID_INTEL
> >> >> +/** Vendor ID used by Intel devices */
> >> >> +#define PCI_VENDOR_ID_INTEL 0x8086
> >> >> +#endif
> >> >> +
> >> > This is broken, PCI_VENDOR_ID_INTEL should be defined in a central 
> >> > location for
> >> > all pci drivers, not redefined in every compilation unit.  And you can 
> >> > likely
> >>
> >> Well we can keep the vendors in a common header, but I don't see the 
> >> benefit.
> >>
> > ?
> > The fact that you won't have to do this
> > #ifndef PCI_VENDOR_ID_INTEL
> > #define PCI_VENDOR_ID_INTEL ...
> > #endif
> > in every pmd
> 
> Ok, so you would keep the rte_pci_dev_ids.h with just the vendors in it ?
> 
Thats an option, yes, I'd rename it rte_pci_vendor_ids.h perhaps, but you get 
the idea.

> Or, we could rely on linux kernel headers (pci_ids.h), rather than
> maintain a header in dpdk.
Thats also a good idea.

> But this would add a dependency build on dpdk and I am not sure there
> is something equivalent on freebsd (afaics all drivers seem to
> duplicate the pci vendor id).
> Any freebsd gourou reading this ?
> 
If the dependency is unpalitable, I suppose its fine to duplicate the header
file.  My more direct point was really more just that you didn't need to
duplicate the vendor id macro multiple times within the dpdk project.  You can
still define the device ids internally to each pmd if you don't need it in other
locations, and the duplication against other projects there is ok by me.

> 
> >> > just remvoe the RTE_PCI_DEV_ID_DECL_* macros from each patch and use the
> >> > RTE_PCI_DEV macro, as all the DECL macros just evaluate to that anyway.
> >>
> >> app/test/test_pci.c:#define RTE_PCI_DEV_ID_DECL_IGB(vend, dev)
> >> {RTE_PCI_DEVICE(vend, dev)},
> >> lib/librte_eal/linuxapp/kni/kni_misc.c: #define
> >> RTE_PCI_DEV_ID_DECL_IGB(vend, dev) case (dev):
> >>
> >> All this stuff is because of pci tests and kni code.
> >>
> > You're going to have to explain a bit further than that.  Why does the kni 
> > code
> > and pci testing code require that each driver have their own macro that just
> > maps to the same macro underneath?  Looking at the test_pci code, it 
> > appears its
> > done because we used to contain all our pci device ids in a single file, 
> > and the
> > device specific macros were used to selectively enable devices that were 
> > there
> > for testing.  But the point of this series is in part to separate out the 
> > device
> > ids to their own locations, so it seems like you will have to fix up the pci
> > tests anyway (and additionally it would be nice to include more than just 
> > EM,
> > IGB, and IXGBE in thsoe tests anyway, though I understand thats outside the
> > scope of this series)
> 
> - test_pci.c should be about testing pci infrastructure.
> Relying on igb / ixgbe (or whatever pci device if we extend the list
> to all pci ids) being present on the system to run successfully is
> flawed but I have no better idea.
> 
Ok, so if I'm reading you correctly, you're indicating that test_pci is just
there to test the pci common infrastructure code, and happens to use a few well
known and supported pci id's (igb and ixgbe) to do that.  i.e. theres no need
for exhaustive pci device enumeration there, right?  If thats the case then you
can just create a data structure with the RTE_PCI_DEV macro and stop aliasing it
to all the device specific DECL macros, no?  My main concern here is really just
the needless aliasing of those macros.


> 
> - kni implements specific ethtool stuff based on pci ids.
> kni contains duplicated code from the kernel and it uses those ids to
> drive to the right ops.
> 
Ok, but how does that relate to the use of device specific PCI ennumeration
values?  Looking at the KNI code I see no reason that they are required any
longer (or previously were), and if you're going to move them around you may as
well clean up the usage at the same time.

> The solutions I have imagined so far :
> * use a shared header for the devices that it supports
> * drop the use of pci ids between kni library and kni kernel driver,
> instead use the pmd name that would be resolved internally by the kni
> library at RTE_KNI_IOCTL_CREATE time, and use this in the kni kernel
> driver
> * drop kni :-)
> 
These all sound good :)

> I don't mind doing trivial changes, but I don't have time for more on
> this series.
> 
Um, I'm not sure what to say here.  The whole point of review is to help improve
the code.  If you don't have time to do anything non-trivial, Why are we
reviewing it?

Neil
> 
> -- 
> David Marchand
>

[dpdk-dev] [dpdk-users] DPDK 16.04 link changes cause PMD drivers to not be loaded

2016-04-21 Thread Aurojit Panda

[Cross-posting to dev]

Panu Matilainen wrote:
> On 04/20/2016 05:26 PM, Aurojit Panda wrote:
>> I am sorry that is a bit unintuitive considering:
>>
>> (a) This behavior differs between static and shared builds of DPDK.
>> - In fact this behavior was identical in 2.2, and even in mainline
>> before 948fd64befc3726 went in.
>> (b) You already know the EAL_PMD_PATH at build time, and it makes it
>> quite hard to ship scripts or anything to build DPDK, since now the
>> configuration file becomes dependent on location.
>
> The expectation is that shared libraries are installed to a shared, known 
> location. Otherwise there's not that much
> point in using shared libraries.
>
> Note that you can also use the EAL option -d to load either single PMDs or 
> entire directories at runtime (regardless of
> EAL_PMD_PATH) so you dont have to know the path at build time if that's what 
> bothers you most.
>

I am actually bothered by needing to supply the path at all. You seem to assume 
that knowing an install path is 
essential for shared libraries, while you can in reality embed this in the 
rpath. Shared libraries are used for many 
reasons, including interop with some runtime, in which case they are not 
installed at a known place. I would prefer for 
those PMDs that are included with DPDK that either they be loaded from the 
"known" build path for them.

>>
>> I wonder if you would consider changing this, as it stands just building
>> DPDK after setting CONFIG_RTE_SHARED_LIB=y results in a testpmd that
>> cannot run.
>
> In shared library configuration, testpmd is not directly runnable regardless 
> of the drivers since its missing all the
> other libraries too:
>
> [pmatilai at sopuli dpdk]$ build/app/testpmd
> build/app/testpmd: error while loading shared libraries: 
> librte_distributor.so.1.1: cannot open shared object file: No
> such file or directory
> [pmatilai at sopuli dpdk]$
>
> You'll need to get those libraries into linkers path anyway, either by 
> installing them to a common location or by
> extending LD_LIBRARY_PATH. In either case, you need to know where the 
> libraries are anyway.
>
> All of which is not to say there might not be room for improvement, but the 
> linking behavior is not going to change.
> Again, PMDs are *plugins* that are *meant* to be loaded at runtime. That 
> allows for all sorts of flexibility especially
> for packaging and shipping, at some extra cost in setup complexity.

I am all for a plugin architecture, I was merely suggesting that you embed some 
path infromation at the beginning. Also 
please note:
(a) This behavior changed recently.
(b) This change is entirely undocumented, which is why I was reporting it in 
the first place.
(c) It is actually quite unintutive, because previously ensuring 
LD_LIBRARY_PATH was correct was all that was required 
to get any DPDK application to interact with ports.

>
> For your own purposes, you can of course tweak the linking settings as much 
> as you like. Look for "plugins" in
> mk/rte.app.mk and change the shared lib condition on the line above to "y" 
> and there you have it. But that's not the way
> plugins are meant to be used.

That is not a reasonable solution given that it makes it very hard to track 
future changes to DPDK without merges. My 
alternatives neither break people's abilities to use plugins, nor do they 
impact behavior.

>
> Oh and BTW, please don't top-post.
>
> - Panu -
>

[dpdk-dev] [PATCH v2] examples/qos_sched: fix negative loop bound

2016-04-21 Thread Michal Jastrzebski

Fix issue reported by Coverity.
Date: Thu, 21 Apr 2016 13:47:35 +0200
Message-Id: <1461239256-8104-4-git-send-email-michalx.k.jastrzebski at 
intel.com>
X-Mailer: git-send-email 2.7.0
In-Reply-To: <1461239256-8104-1-git-send-email-michalx.k.jastrzebski at 
intel.com>
References: <1461239256-8104-1-git-send-email-michalx.k.jastrzebski at 
intel.com>

From: Slawomir Mrozowicz 

Coverity ID 30704: Negative loop bound
negative_returns: Using unsigned variable n_tokens in a loop exit condition.

Fixes: de3cfa2c9823 ("sched: initial import")

Signed-off-by: Slawomir Mrozowicz 
---
 examples/qos_sched/args.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/qos_sched/args.c b/examples/qos_sched/args.c
index 3e7fd08..7a98e5c 100644
--- a/examples/qos_sched/args.c
+++ b/examples/qos_sched/args.c
@@ -162,7 +162,7 @@ static int
 app_parse_opt_vals(const char *conf_str, char separator, uint32_t n_vals, 
uint32_t *opt_vals)
 {
char *string;
-   uint32_t i, n_tokens;
+   int i, n_tokens;
char *tokens[MAX_OPT_VALUES];

if (conf_str == NULL || opt_vals == NULL || n_vals == 0 || n_vals > 
MAX_OPT_VALUES)
-- 
1.9.1

[dpdk-dev] [PATCH v2] examples/qos_sched: fix copy-paste error

2016-04-21 Thread Michal Jastrzebski

Fix issue reported by Coverity.
Date: Thu, 21 Apr 2016 13:47:34 +0200
Message-Id: <1461239256-8104-3-git-send-email-michalx.k.jastrzebski at 
intel.com>
X-Mailer: git-send-email 2.7.0
In-Reply-To: <1461239256-8104-1-git-send-email-michalx.k.jastrzebski at 
intel.com>
References: <1461239256-8104-1-git-send-email-michalx.k.jastrzebski at 
intel.com>

From: Slawomir Mrozowicz 

Coverity ID 30699: Copy-paste error;
rx_port in pconf->rx_port looks like a copy-paste error.

Fixes: de3cfa2c9823 ("sched: initial import")

Signed-off-by: Slawomir Mrozowicz 
---
 examples/qos_sched/args.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/qos_sched/args.c b/examples/qos_sched/args.c
index 3e7fd08..1916790 100644
--- a/examples/qos_sched/args.c
+++ b/examples/qos_sched/args.c
@@ -270,7 +270,7 @@ app_parse_flow_conf(const char *conf_str)
}
if (pconf->tx_port >= RTE_MAX_ETHPORTS) {
RTE_LOG(ERR, APP, "pfc %u: invalid tx port %"PRIu8" index\n",
-   nb_pfc, pconf->rx_port);
+   nb_pfc, pconf->tx_port);
return -1;
}

-- 
1.9.1

[dpdk-dev] [PATCH v2] examples/qos_sched: fix bad bit shift operation

2016-04-21 Thread Michal Jastrzebski

Fix issue reported by Coverity.
Date: Thu, 21 Apr 2016 13:47:33 +0200
Message-Id: <1461239256-8104-2-git-send-email-michalx.k.jastrzebski at 
intel.com>
X-Mailer: git-send-email 2.7.0
In-Reply-To: <1461239256-8104-1-git-send-email-michalx.k.jastrzebski at 
intel.com>
References: <1461239256-8104-1-git-send-email-michalx.k.jastrzebski at 
intel.com>

From: Slawomir Mrozowicz 

Coverity ID 30690: Bad bit shift operation
large_shift: In expression 1ULL << i, left shifting by more than 63 bits
has undefined behavior. The shift amount, i, is as much as 127.

Fixes: de3cfa2c9823 ("sched: initial import")

Signed-off-by: Slawomir Mrozowicz 
---
 examples/qos_sched/args.c | 84 +--
 1 file changed, 52 insertions(+), 32 deletions(-)

diff --git a/examples/qos_sched/args.c b/examples/qos_sched/args.c
index 3e7fd08..cd077ba 100644
--- a/examples/qos_sched/args.c
+++ b/examples/qos_sched/args.c
@@ -53,7 +53,7 @@

 static uint32_t app_master_core = 1;
 static uint32_t app_numa_mask;
-static uint64_t app_used_core_mask = 0;
+static int app_used_core_mask[RTE_MAX_LCORE];
 static uint64_t app_used_port_mask = 0;
 static uint64_t app_used_rx_port_mask = 0;
 static uint64_t app_used_tx_port_mask = 0;
@@ -115,22 +115,23 @@ static inline int str_is(const char *str, const char *is)
return strcmp(str, is) == 0;
 }

-/* returns core mask used by DPDK */
-static uint64_t
-app_eal_core_mask(void)
+/* compare used core with eal configuration,
+   returns:
+   1 if equal
+   0 if differ */
+static int
+app_eal_core_check(void)
 {
-   uint32_t i;
-   uint64_t cm = 0;
+   uint16_t i;
+   int ret = 1;
struct rte_config *cfg = rte_eal_get_configuration();

-   for (i = 0; i < RTE_MAX_LCORE; i++) {
-   if (cfg->lcore_role[i] == ROLE_RTE)
-   cm |= (1ULL << i);
+   for (i = 0; i < RTE_MAX_LCORE && ret; i++) {
+   if ((cfg->lcore_role[i] == ROLE_RTE) != app_used_core_mask[i])
+   ret = 0;
}

-   cm |= (1ULL << cfg->master_lcore);
-
-   return cm;
+   return ret;
 }


@@ -292,14 +293,9 @@ app_parse_flow_conf(const char *conf_str)
app_used_tx_port_mask |= mask;
app_used_port_mask |= mask;

-   mask = 1lu << pconf->rx_core;
-   app_used_core_mask |= mask;
-
-   mask = 1lu << pconf->wt_core;
-   app_used_core_mask |= mask;
-
-   mask = 1lu << pconf->tx_core;
-   app_used_core_mask |= mask;
+   app_used_core_mask[pconf->rx_core] = 1;
+   app_used_core_mask[pconf->wt_core] = 1;
+   app_used_core_mask[pconf->tx_core] = 1;

nb_pfc++;

@@ -335,7 +331,7 @@ app_parse_args(int argc, char **argv)
int option_index;
const char *optname;
char *prgname = argv[0];
-   uint32_t i, nb_lcores;
+   uint16_t i, j, k, nb_lcores;

static struct option lgopts[] = {
{ "pfc", 1, 0, 0 },
@@ -349,6 +345,9 @@ app_parse_args(int argc, char **argv)
{ NULL,  0, 0, 0 }
};

+   for (i = 0; i < RTE_MAX_LCORE; i++)
+   app_used_core_mask[i] = 0;
+
/* initialize EAL first */
ret = rte_eal_init(argc, argv);
if (ret < 0)
@@ -436,19 +435,40 @@ app_parse_args(int argc, char **argv)
}

/* check master core index validity */
-   for(i = 0; i <= app_master_core; i++) {
-   if (app_used_core_mask & (1u << app_master_core)) {
-   RTE_LOG(ERR, APP, "Master core index is not configured 
properly\n");
-   app_usage(prgname);
-   return -1;
-   }
+   if (app_used_core_mask[app_master_core] == 1) {
+   RTE_LOG(ERR, APP,
+   "Master core index is not configured properly\n");
+   app_usage(prgname);
+   return -1;
}
-   app_used_core_mask |= 1u << app_master_core;
+   app_used_core_mask[app_master_core] = 1;
+
+   if ((app_eal_core_check() == 0) ||
+   (app_master_core != rte_get_master_lcore())) {
+
+   char used_hexstr[RTE_MAX_LCORE/4+1];
+   char conf_hexstr[RTE_MAX_LCORE/4+1];
+   int used_byte, conf_byte;
+   struct rte_config *cfg = rte_eal_get_configuration();
+
+   for (i = 0; i < RTE_MAX_LCORE/4; i++) {
+   used_byte = 0;
+   conf_byte = 0;
+   for (j = 0; j < 3; j++) {
+   k = 4 * (RTE_MAX_LCORE/4 - i - 1) + j;
+   used_byte += app_used_core_mask[k] << j;
+   conf_byte +=
+   ((cfg->lcore_role[k] ==
+   ROLE_RTE)?1:0) << j;
+   }
+   sprintf(&used_hexstr[i], "%1x", used_byte);
+   sprintf(&conf

[dpdk-dev] [PATCH v3 2/2] virtio/vdev: add a new vdev named eth_cvio

2016-04-21 Thread Jianfeng Tan

Add a new virtual device named eth_cvio, it can be used just like
eth_ring, eth_null, etc.

Configured parameters include:
  - rx (optional, 1 by default), number of rx, not used for now.
  - tx (optional, 1 by default), number of tx, not used for now.
  - cq (optional, 0 by default), not supported for now.
  - mac (optional), random value will be given if not specified.
  - queue_size (optional, 256 by default), size of virtqueue.
  - path (madatory), path of vhost, depends on the file type, vhost
user if the given path points to a unix socket; vhost-net if the
given path points to a char device.
  - ifname (optional), specify the name of backend tap device; only
valid when backend is vhost-net.

The major difference with original virtio for vm is that, here we use
virtual addr instead of physical addr for vhost to calculate relative
address.

When enable CONFIG_RTE_VIRTIO_VDEV (enabled by default), the compiled
library can be used in both VM and container environment.

Examples:
path_vhost=/dev/vhost-net # use vhost-net as a backend
path_vhost= # use vhost-user as a backend

sudo ./examples/l2fwd/build/l2fwd -c 0x10 -n 4 \
--socket-mem 0,1024 --no-pci --file-prefix=l2fwd \
--vdev=eth_cvio0,mac=00:01:02:03:04:05,path=$path_vhost -- -p 0x1

Known issues:
 - Control queue and multi-queue are not supported yet.
 - Cannot work with --huge-unlink.
 - Cannot work with no-huge.
 - Cannot work when there are more than VHOST_MEMORY_MAX_NREGIONS(8)
   hugepages.
 - Root privilege is a must (mainly becase of sorting hugepages according
   to physical address).
 - Applications should not use file name like HUGEFILE_FMT ("%smap_%d").

Signed-off-by: Huawei Xie 
Signed-off-by: Jianfeng Tan 
Acked-By: Neil Horman 
---
 doc/guides/nics/overview.rst |  58 +-
 doc/guides/rel_notes/release_16_07.rst   |   4 +
 drivers/net/virtio/rte_eth_virtio_vdev.c | 188 ++-
 drivers/net/virtio/virtio_ethdev.c   | 134 ++
 drivers/net/virtio/virtio_ethdev.h   |   3 +
 drivers/net/virtio/virtio_pci.h  |   2 +-
 drivers/net/virtio/virtio_rxtx.c |   5 +-
 drivers/net/virtio/virtio_rxtx_simple.c  |  13 ++-
 drivers/net/virtio/virtqueue.h   |  10 ++
 9 files changed, 326 insertions(+), 91 deletions(-)

diff --git a/doc/guides/nics/overview.rst b/doc/guides/nics/overview.rst
index ed116e3..1ff72fb 100644
--- a/doc/guides/nics/overview.rst
+++ b/doc/guides/nics/overview.rst
@@ -74,40 +74,40 @@ Most of these differences are summarized below.

 .. table:: Features availability in networking drivers

-    = = = = = = = = = = = = = = = = = = = = = = = = = = = 
= = = = = = = =
-   Feature  a b b b c e e e i i i i i i i i i i f f f f m m m n n 
p r s v v v v x
-f n n o x 1 n n 4 4 4 4 g g x x x x m m m m l l p f u 
c i z h i i m e
-p x x n g 0 a i 0 0 0 0 b b g g g g 1 1 1 1 x x i p l 
a n e o r r x n
-a 2 2 d b 0   c e e e e   v b b b b 0 0 0 0 4 5 p   l 
p g d s t t n v
-c x x i e 0   . v v   f e e e e k k k k e  
   a t i i e i
-k   v n   . f f   . v v   . v v
   t   o o t r
-e   f g   .   .   . f f   . f f
   a . 3 t
+    = = = = = = = = = = = = = = = = = = = = = = = = = = = 
= = = = = = = = =
+   Feature  a b b b c e e e i i i i i i i i i i f f f f m m m n n 
p r s v v v v x c
+f n n o x 1 n n 4 4 4 4 g g x x x x m m m m l l p f u 
c i z h i i m e v
+p x x n g 0 a i 0 0 0 0 b b g g g g 1 1 1 1 x x i p l 
a n e o r r x n i
+a 2 2 d b 0   c e e e e   v b b b b 0 0 0 0 4 5 p   l 
p g d s t t n v r
+c x x i e 0   . v v   f e e e e k k k k e  
   a t i i e i t
+k   v n   . f f   . v v   . v v
   t   o o t r i
+e   f g   .   .   . f f   . f f
   a . 3 t o
 t v   v   v   v   v   v
   2 v
   e   e   e   e   e   e
 e
   c   c   c   c   c   c
 c
-    = = = = = = = = = = = = = = = = = = = = = = = = = = = 
= = = = = = = =
+    = = = = = = = = = = = = = = = = = = = = = = = = = = = 
= = = = = = = = =
speed capabilities
-   link statusX X   X X   X X X X   X X X X X X
   X X X X
+   link statusX X   X X   X X X X   X X X X X X
   X X X X X
link status event  X X X X X X   X X X X
 X
queue status event

[dpdk-dev] [PATCH v3 1/2] virtio/vdev: add embeded device emulation

2016-04-21 Thread Jianfeng Tan

Background: Previously, we usually use a virtio device in QEMU/VM's
context as below pic shows. Virtio nic is emulated in QEMU, and usually
presented in VM as a PCI device.

|---|
| vm|
|---| (over PCI bus or MMIO or Channel I/O)
|QEMU   | -> device emulation
|---|
  |
  | (vhost-user protocol or vhost-net ioctls)
  |
|---|
|   vhost   |
|---|

Then we come to the topic that how to present a virtio device in an app
or container, which uses virtio device to do inter process communication
with vhost backend process. To achieve that, first of all, we need way
in DPDK to interract with vhost backend. And then emulate a virtual
virtio device in DPDK (which is addressed in following patch).

|---|
|  DPDK app |
|---|
|  DPDK lib | -> device emulation (addressed by following patch)
|---|
  |
  | (vhost-user protocol or vhost-net ioctls), addressed by this patch
  |
|---|
|   vhost   |
|---|

How: we implement another instance of struct virtio_pci_ops to intercept
the communications between VM and QEMU. Instead of rd/wr ioport or PCI
configuration space, here we directly talk with backend through the vhost
file. Depending on the type of vhost file,
   - vhost-user is used if the given path points to a unix socket;
   - vhost-net is used if the given path points to a char device.

Signed-off-by: Huawei Xie 
Signed-off-by: Jianfeng Tan 
Acked-By: Neil Horman 
---
 config/common_linuxapp   |   3 +
 drivers/net/virtio/Makefile  |   4 +
 drivers/net/virtio/rte_eth_virtio_vdev.c | 897 +++
 drivers/net/virtio/vhost.h   | 194 +++
 drivers/net/virtio/virtio_ethdev.h   |   1 -
 drivers/net/virtio/virtio_pci.h  |  14 +
 6 files changed, 1112 insertions(+), 1 deletion(-)
 create mode 100644 drivers/net/virtio/rte_eth_virtio_vdev.c
 create mode 100644 drivers/net/virtio/vhost.h

diff --git a/config/common_linuxapp b/config/common_linuxapp
index 7e698e2..e69f71c 100644
--- a/config/common_linuxapp
+++ b/config/common_linuxapp
@@ -43,3 +43,6 @@ CONFIG_RTE_LIBRTE_VHOST=y
 CONFIG_RTE_LIBRTE_PMD_VHOST=y
 CONFIG_RTE_LIBRTE_PMD_AF_PACKET=y
 CONFIG_RTE_LIBRTE_POWER=y
+
+# Enable virtio support for containers
+CONFIG_RTE_VIRTIO_VDEV=y
diff --git a/drivers/net/virtio/Makefile b/drivers/net/virtio/Makefile
index ef84f60..4b5ba32 100644
--- a/drivers/net/virtio/Makefile
+++ b/drivers/net/virtio/Makefile
@@ -55,6 +55,10 @@ ifeq ($(findstring 
RTE_MACHINE_CPUFLAG_SSSE3,$(CFLAGS)),RTE_MACHINE_CPUFLAG_SSSE
 SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_rxtx_simple.c
 endif

+ifeq ($(CONFIG_RTE_VIRTIO_VDEV),y)
+   SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += rte_eth_virtio_vdev.c
+endif
+
 # this lib depends upon:
 DEPDIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += lib/librte_eal lib/librte_ether
 DEPDIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += lib/librte_mempool lib/librte_mbuf
diff --git a/drivers/net/virtio/rte_eth_virtio_vdev.c 
b/drivers/net/virtio/rte_eth_virtio_vdev.c
new file mode 100644
index 000..419acef
--- /dev/null
+++ b/drivers/net/virtio/rte_eth_virtio_vdev.c
@@ -0,0 +1,897 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the
+ *   distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ *   contributors may be used to endorse or promote products derived
+ *   from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#incl

[dpdk-dev] [PATCH v3 0/2] virtio support for container

2016-04-21 Thread Jianfeng Tan

v3:
 - Remove --single-file option; do no change at EAL memory.
 - Remove the added API rte_eal_get_backfile_info(), instead we check all
   opened files with HUGEFILE_FMT to find hugepage files owned by DPDK.
 - Accordingly, add more restrictions at "Known issue" section.
 - Rename parameter from queue_num to queue_size for confusion.
 - Rename vhost_embedded.c to rte_eth_virtio_vdev.c.
 - Move code related to the newly added vdev to rte_eth_virtio_vdev.c, to
   reuse eth_virtio_dev_init(), remove its static declaration.
 - Implement dev_uninit() for rte_eth_dev_detach().
 - WARN -> ERR, in vhost_embedded.c
 - Add more commit message for clarify the model.

v2:
 - Rebase on the patchset of virtio 1.0 support.
 - Fix cannot create non-hugepage memory.
 - Fix wrong size of memory region when "single-file" is used.
 - Fix setting of offset in virtqueue to use virtual address.
 - Fix setting TUNSETVNETHDRSZ in vhost-user's branch.
 - Add mac option to specify the mac address of this virtual device.
 - Update doc.

This patchset is to provide high performance networking interface (virtio)
for container-based DPDK applications. The way of starting DPDK apps in
containers with ownership of NIC devices exclusively is beyond the scope.
The basic idea here is to present a new virtual device (named eth_cvio),
which can be discovered and initialized in container-based DPDK apps using
rte_eal_init(). To minimize the change, we reuse already-existing virtio
frontend driver code (driver/net/virtio/).

Compared to QEMU/VM case, virtio device framework (translates I/O port r/w
operations into unix socket/cuse protocol, which is originally provided in
QEMU), is integrated in virtio frontend driver. So this converged driver
actually plays the role of original frontend driver and the role of QEMU
device framework.

The major difference lies in how to calculate relative address for vhost.
The principle of virtio is that: based on one or multiple shared memory
segments, vhost maintains a reference system with the base addresses and
length for each segment so that an address from VM comes (usually GPA,
Guest Physical Address) can be translated into vhost-recognizable address
(named VVA, Vhost Virtual Address). To decrease the overhead of address
translation, we should maintain as few segments as possible. In VM's case,
GPA is always locally continuous. In container's case, CVA (Container
Virtual Address) can be used. Specifically:
a. when set_base_addr, CVA address is used;
b. when preparing RX's descriptors, CVA address is used;
c. when transmitting packets, CVA is filled in TX's descriptors;
d. in TX and CQ's header, CVA is used.

How to share memory? In VM's case, qemu always shares all physical layout
to backend. But it's not feasible for a container, as a process, to share
all virtual memory regions to backend. So only specified virtual memory
regions (with type of shared) are sent to backend. It's a limitation that
only addresses in these areas can be used to transmit or receive packets.

Known issues:
 - Control queue and multi-queue are not supported yet.
 - Cannot work with --huge-unlink.
 - Cannot work with no-huge.
 - Cannot work when there are more than VHOST_MEMORY_MAX_NREGIONS(8)
   hugepages.
 - Root privilege is a must (mainly becase of sorting hugepages according
   to physical address).
 - Applications should not use file name like HUGEFILE_FMT ("%smap_%d").

How to use?

a. Apply this patchset.

b. To compile container apps:
$: make config RTE_SDK=`pwd` T=x86_64-native-linuxapp-gcc
$: make install RTE_SDK=`pwd` T=x86_64-native-linuxapp-gcc
$: make -C examples/l2fwd RTE_SDK=`pwd` T=x86_64-native-linuxapp-gcc
$: make -C examples/vhost RTE_SDK=`pwd` T=x86_64-native-linuxapp-gcc

c. To build a docker image using Dockerfile below.
$: cat ./Dockerfile
FROM ubuntu:latest
WORKDIR /usr/src/dpdk
COPY . /usr/src/dpdk
ENV PATH "$PATH:/usr/src/dpdk/examples/l2fwd/build/"
$: docker build -t dpdk-app-l2fwd .

d. Used with vhost-user
$: ./examples/vhost/build/vhost-switch -c 3 -n 4 \
--socket-mem 1024,1024 -- -p 0x1 --stats 1
$: docker run -i -t -v :/var/run/usvhost \
-v /dev/hugepages:/dev/hugepages \
dpdk-app-l2fwd l2fwd -c 0x4 -n 4 -m 1024 --no-pci \
--vdev=eth_cvio0,path=/var/run/usvhost -- -p 0x1

f. Used with vhost-net
$: modprobe vhost
$: modprobe vhost-net
$: docker run -i -t --privileged \
-v /dev/vhost-net:/dev/vhost-net \
-v /dev/net/tun:/dev/net/tun \
-v /dev/hugepages:/dev/hugepages \
dpdk-app-l2fwd l2fwd -c 0x4 -n 4 -m 1024 --no-pci \
--vdev=eth_cvio0,path=/dev/vhost-net -- -p 0x1

By the way, it's not necessary to run in a container.

Signed-off-by: Huawei Xie 
Signed-off-by: Jianfeng Tan 
Acked-By: Neil Horman 

Jianfeng Tan (2):
  virtio/vdev: add embeded device emulation
  virtio/vdev: add a new vdev named eth_cvio

 config/common_linuxapp   |3 +
 doc/guides/nics/overview.rst |   58 +-
 doc/guides/rel_not

81 matches

Mail list logo