[dpdk-dev] DPDK 2.2 roadmap

2015-09-15 Thread Olga Shern
Hi, 

Mellanox will submit a new PMD for ConnectX-4 and ConnectX-4 LX  cards 

Best Regards,
Olga


-Original Message-
From: dev [mailto:dev-boun...@dpdk.org] On Behalf Of Thomas Monjalon
Sent: Monday, September 14, 2015 11:57 AM
To: Zhang, Helin; O'Driscoll, Tim
Cc: dev at dpdk.org
Subject: Re: [dpdk-dev] DPDK 2.2 roadmap

Thanks for the details.
The roadmap is updated:
http://dpdk.org/dev/roadmap

Maybe the keep-alive feature needs some design discussion.

Anyone else to share his plan?



[dpdk-dev] [PATCH] cryptodev: changes to crypto operation APIs to support non prescriptive chaining of crypto transforms in a crypto operation. app/test: updates to cryptodev unit tests to support new

2015-09-15 Thread Declan Doherty
Proposed changes to cryptodev API for comment based on Neil's comments
on the initial RFC. I have included the updates to the cryptodev unit test
suite and the AESNI multi buffer PMD for illustrative purposes, I will include 
the
changes for the QAT in a V1 patchset if the proposes changes to the API are
acceptable.

Signed-off-by: Declan Doherty 
---
 app/test/test_cryptodev.c  | 276 +++--
 drivers/crypto/aesni_mb/aesni_mb_ops.h |   2 +-
 drivers/crypto/aesni_mb/rte_aesni_mb_pmd.c | 188 +-
 drivers/crypto/aesni_mb/rte_aesni_mb_pmd_ops.c |  10 +-
 drivers/crypto/aesni_mb/rte_aesni_mb_pmd_private.h |  28 ++-
 lib/librte_cryptodev/rte_crypto.h  | 141 ---
 lib/librte_cryptodev/rte_cryptodev.c   |  54 ++--
 lib/librte_cryptodev/rte_cryptodev.h   |  26 +-
 lib/librte_cryptodev/rte_cryptodev_pmd.h   |  10 +-
 9 files changed, 506 insertions(+), 229 deletions(-)

diff --git a/app/test/test_cryptodev.c b/app/test/test_cryptodev.c
index 68cc0bf..93b7e0a 100644
--- a/app/test/test_cryptodev.c
+++ b/app/test/test_cryptodev.c
@@ -62,6 +62,8 @@
 #define DIGEST_BYTE_LENGTH_SHA1(BYTE_LENGTH(160))
 #define DIGEST_BYTE_LENGTH_SHA256  (BYTE_LENGTH(256))
 #define DIGEST_BYTE_LENGTH_SHA512  (BYTE_LENGTH(512))
+#define DIGEST_BYTE_LENGTH_AES_XCBC (BYTE_LENGTH(96))
+#define AES_XCBC_MAC_KEY_SZ (16)

 #define TRUNCATED_DIGEST_BYTE_LENGTH_SHA1  (12)
 #define TRUNCATED_DIGEST_BYTE_LENGTH_SHA256(16)
@@ -75,13 +77,13 @@ struct crypto_testsuite_params {
struct rte_cryptodev_config conf;
struct rte_cryptodev_qp_conf qp_conf;

-   uint8_t valid_devs[RTE_MAX_CRYPTODEVS];
+   uint8_t valid_devs[RTE_CRYPTO_MAX_DEVS];
uint8_t valid_dev_count;
 };

 struct crypto_unittest_params {
-   struct rte_crypto_cipher_params cipher_params;
-   struct rte_crypto_hash_params hash_params;
+   struct rte_crypto_xform cipher_xform;
+   struct rte_crypto_xform auth_xform;

struct rte_cryptodev_session *sess;

@@ -92,6 +94,17 @@ struct crypto_unittest_params {
uint8_t *digest;
 };

+/*
+ * Forward declarations.
+ */
+static int
+test_AES_CBC_HMAC_SHA512_decrypt_create_session_params(struct 
crypto_unittest_params *ut_params);
+
+static int
+test_AES_CBC_HMAC_SHA512_decrypt_perform(struct rte_cryptodev_session *sess,
+   struct crypto_unittest_params *ut_params,
+   struct crypto_testsuite_params *ts_param);
+
 static struct rte_mbuf *
 setup_test_string(struct rte_mempool *mpool,
const char *string, size_t len, uint8_t blocksize)
@@ -184,7 +197,7 @@ testsuite_setup(void)
}

ts_params->crypto_op_pool = rte_crypto_op_pool_create("CRYPTO_OP_POOL",
-   NUM_MBUFS, MBUF_CACHE_SIZE, rte_socket_id());
+   NUM_MBUFS, MBUF_CACHE_SIZE, 2, rte_socket_id());
if (ts_params->crypto_op_pool == NULL) {
RTE_LOG(ERR, USER1, "Can't create CRYPTO_OP_POOL\n");
return TEST_FAILED;
@@ -436,6 +449,11 @@ static const uint8_t 
catch_22_quote_2_512_bytes_AES_CBC_ciphertext[] = {
0X95, 0XBB, 0X26, 0X74, 0X69, 0X12, 0X7F, 0XF1, 0XBB, 0XFF, 0XAE, 0XB5, 
0X99, 0X6E, 0XCB, 0X0C
 };

+static const uint8_t catch_22_quote_2_512_bytes_AES_CBC_HMAC_SHA1_digest[] = {
+   0x9a, 0X4f, 0X88, 0X1b, 0Xb6, 0X8f, 0Xd8, 0X60,
+   0X42, 0X1a, 0X7d, 0X3d, 0Xf5, 0X82, 0X80, 0Xf1,
+   0X18, 0X8c, 0X1d, 0X32 };
+

 static int
 test_AES_CBC_HMAC_SHA1_encrypt_digest(void)
@@ -452,22 +470,28 @@ test_AES_CBC_HMAC_SHA1_encrypt_digest(void)
TEST_ASSERT_NOT_NULL(ut_params->digest, "no room to append digest");

/* Setup Cipher Parameters */
-   ut_params->cipher_params.algo = RTE_CRYPTO_SYM_CIPHER_AES_CBC;
-   ut_params->cipher_params.op = RTE_CRYPTO_SYM_CIPHER_OP_ENCRYPT;
-   ut_params->cipher_params.key.data = aes_cbc_key;
-   ut_params->cipher_params.key.length = CIPHER_KEY_LENGTH_AES_CBC;
+   ut_params->cipher_xform.type = RTE_CRYPTO_XFORM_CIPHER;
+   ut_params->cipher_xform.next = _params->auth_xform;
+
+   ut_params->cipher_xform.cipher.algo = RTE_CRYPTO_SYM_CIPHER_AES_CBC;
+   ut_params->cipher_xform.cipher.op = RTE_CRYPTO_SYM_CIPHER_OP_ENCRYPT;
+   ut_params->cipher_xform.cipher.key.data = aes_cbc_key;
+   ut_params->cipher_xform.cipher.key.length = CIPHER_KEY_LENGTH_AES_CBC;

/* Setup HMAC Parameters */
-   ut_params->hash_params.op = RTE_CRYPTO_SYM_HASH_OP_DIGEST_GENERATE;
-   ut_params->hash_params.algo = RTE_CRYPTO_SYM_HASH_SHA1_HMAC;
-   ut_params->hash_params.auth_key.length = HMAC_KEY_LENGTH_SHA1;
-   ut_params->hash_params.auth_key.data = hmac_sha1_key;
-   ut_params->hash_params.digest_length = DIGEST_BYTE_LENGTH_SHA1;
+
+   ut_params->auth_xform.type = RTE_CRYPTO_XFORM_AUTH;
+   

[dpdk-dev] No egressing packet

2015-09-15 Thread Wu, Yiwen
Hi all,

I am new to dpdk. I am running a single forwarding program based on dpdk 
2.1.0. The program runs on a VM, binding on two interfaces. All it's 
doing is to forward packets from one interface to another. All 
ingressing packets are fine but there seems no egressing packets. I used 
rte_eth_add_tx_callback to register a tx callback. The callback is able 
to print the right egress packet but the destination is just not 
receiving it (via tcpdump). Does anybody have the similar experience? 
Any solution or hints will be great.

Thanks,

Yiwen


[dpdk-dev] vhost-net stops sending to virito pmd -- already fixed?

2015-09-15 Thread Kyle Larose
On Sun, Sep 13, 2015 at 5:43 PM, Thomas Monjalon
 wrote:
>
> Hi,
>
> 2015-09-11 12:32, Kyle Larose:
> > Looking through the version tree for virtio_rxtx.c, I saw the following
> > commit:
> >
> > http://dpdk.org/browse/dpdk/commit/lib/librte_pmd_virtio?id=8c09c20fb4cde76e53d87bd50acf2b441ecf6eb8
> >
> > Does anybody know offhand if the issue fixed by that commit could be the
> > root cause of what I am seeing?
>
> I won't have the definitive answer but I would like to use your question
> to highlight a common issue in git messages:
>
> PLEASE, authors of fixes, explain the bug you are fixing and how it can
> be reproduced. Good commit messages are REALLY read and useful.
>
> Thanks
>

I've figured out what happened. It has nothing to do with the fix I
pasted above. Instead, the issue has to do with running low on mbufs.

Here's the general logic:

1. If packets are not queued, return
2. Fetch each queued packet, as an mbuf, into the provided array. This
may involve some merging/etc
3. Try to fill the virtio receive ring with new mbufs
  3.a. If we fail to allocate an mbuf, break out of the refill loop
4. Update the receive ring information and kick the host

This is obviously a simplification, but the key point is 3.a. If we
hit this logic when the virtio receive ring is completely used up, we
essentially lock up. The host will have no buffers with which to queue
packets, so the next time we poll, we will hit case 1. However, since
we hit case 1, we will not allocate mbufs to the virtio receive ring,
regardless of how many are now free. Rinse and repeat; we are stuck
until the pmd is restarted or the link is restarted.

This is very easy to reproduce when the mbuf pool is fairly small, and
packets are being passed to worker threads/processes which may
increase the length of the pipeline.

I took a quick look at the ixgbe driver, and it looks like it checks
if it needs to allocate mbufs to the ring before trying to pull
packets off the nic. Should we not be doing something similar for
virtio? Rather than breaking out early if no packets are queued, we
should first make sure there are resources with which to queue
packets!

One solution here is to increase the mbuf pool to a size where such
exhaustion is impossible, but that doesn't seem like a graceful
solution. For example, it may be desirable to drop packets rather than
have a large memory pool, and becoming stuck under such a situation is
not good. Further, it isn't easy to know the exact size required. You
may end up wasting a bunch of resources allocating far more than
necessary, or you may unknowingly under allocate, only to find out
once your application has been deployed into production, and it's
dropping everything on the floor.

Does anyone have thoughts on this? I took a look at virtio_rxtx and
head and I didn't see anything resembling my suggestion.

Comments would be appreciated. Thanks,

Kyle


[dpdk-dev] [RFC PATCH] vhost: Add VHOST PMD

2015-09-15 Thread Loftus, Ciara
> +
> +static int
> +rte_pmd_vhost_devinit(const char *name, const char *params)
> +{
> + struct rte_kvargs *kvlist = NULL;
> + int ret = 0;
> + int index;
> + char *iface_name;
> +
> + RTE_LOG(INFO, PMD, "Initializing pmd_vhost for %s\n", name);
> +
> + kvlist = rte_kvargs_parse(params, valid_arguments);
> + if (kvlist == NULL)
> + return -1;
> +
> + if (strlen(name) < strlen("eth_vhost"))
> + return -1;
> +
> + index = strtol(name + strlen("eth_vhost"), NULL, 0);
> + if (errno == ERANGE)
> + return -1;
> +
> + if (rte_kvargs_count(kvlist, ETH_VHOST_IFACE_ARG) == 1) {
> + ret = rte_kvargs_process(kvlist, ETH_VHOST_IFACE_ARG,
> + _iface, _name);
> + if (ret < 0)
> + goto out_free;
> +
> + eth_dev_vhost_create(name, index, iface_name,
> rte_socket_id());
> + }
> +
> +out_free:
> + rte_kvargs_free(kvlist);
> + return ret;
> +}
> +

This suggests to me that vHost ports will only be available/created if one 
supplies the " --vdev 'eth_vhost0,iface=...' " options when launching the 
application. There seems to be no option available to add vHost ports 
on-the-fly after the init process. One would have to restart the application 
with different parameters in order to modify the vHost port configuration. Is 
this correct?

If so, this pmd implementation will not work with Open vSwitch. OVS relies on 
the ability to call the rte_vhost_driver_register function at any point in the 
lifetime of the application, in order to create new vHost ports and 
subsequently register/create the sockets. Being bound to the selection chosen 
on the command line when launching the application is not suitable for OVS.

Thanks,
Ciara


[dpdk-dev] Fwd: [PATCH] PPC: Fix NUMA node numbering on IBM POWER8 LE machine

2015-09-15 Thread Chao Zhu

Any response of this patch?

 Forwarded Message 
Subject:[dpdk-dev] [PATCH] PPC: Fix NUMA node numbering on IBM POWER8 
LE machine
Date:   Fri, 14 Aug 2015 20:19:48 +0800
From:   Chao Zhu 
To: dev at dpdk.org



When Linux is running on bare metal, it gets the raw hardware
information. On POWER8 little endian bare metal machine, the node number
is not continuous. It will jump from 0 to other values, for example, it
can be 0, 1, 16, 17. This patch modified the CONFIG_RTE_MAX_NUMA_NODES
value to make dpdk work on POWER8 bare metal little endian machine.

Signed-off-by: Chao Zhu 
---
  config/common_linuxapp |2 +-
  1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/config/common_linuxapp b/config/common_linuxapp
index 0de43d5..82a027e 100644
--- a/config/common_linuxapp
+++ b/config/common_linuxapp
@@ -98,7 +98,7 @@ CONFIG_RTE_NEXT_ABI=y
  #
  CONFIG_RTE_LIBRTE_EAL=y
  CONFIG_RTE_MAX_LCORE=128
-CONFIG_RTE_MAX_NUMA_NODES=8
+CONFIG_RTE_MAX_NUMA_NODES=32
  CONFIG_RTE_MAX_MEMSEG=256
  CONFIG_RTE_MAX_MEMZONE=2560
  CONFIG_RTE_MAX_TAILQ=32
-- 
1.7.1





[dpdk-dev] [PATCH] doc: update the dpdk 2.2 release notes

2015-09-15 Thread Thomas Monjalon
2015-09-15 14:07, John McNamara:
> +* **i40e: Fixed base driver allocation when not using first numa node.**
> +
> +Fixed i40e issue that occurred when a DPDK application didn't initialize 
> ports
> +if memory wasn't available on socket 0.
> +
> +
> +* **ixgbe: Removed burst size restriction of vector RX.**
> +
> +Fixed issue where a burst size less than 32 didn't receive anything.
> +
> +
> +* **ixgbe: Fixed issue with X550 DCB.**
> +
> +Fixed a DCB issue with x550 where for 8 TCs (Traffic Classes), if a packet
> +with user priority 6 or 7 was injected to the NIC, then the NIC would only 
> put
> +3 packets into the queue. There was also a similar issue for 4 TCs.
> +
> +
> +* **hash: Fixed memory allocation of Cuckoo Hash key table.**
> +
> +Fixed issue where an incorrect Cuckoo Hash key table size could be calculated
> +limiting the size to 4GB.
> +
> +Fixes: 48a399119619 ("hash: replace with cuckoo hash implementation")
> +
> +
> +* **eal/linux: Fixed epoll timeout.**
> +
> +Fixed issue where the ``rte_epoll_wait()`` function didn't return when the
> +underlying call to ``epoll_wait()`` timed out.


In order to easily read the release notes, I suggest to order items logically,
starting with API (ethdev, cryptodev), EAL, drivers, libs, tools, examples.
Maybe that adding some subtitles would help.
Opinions?


[dpdk-dev] [RFC PATCH v1] rte: LCore heartbeat example

2015-09-15 Thread Thomas Monjalon
Hi,

2015-09-15 13:16, Remy Horton:
> Provides a basic framework for detecting and reporting live-ness of 
> LCores, the primary requirement of which is minimal overheads for the 
> core(s) being checked. Core failures are notified via an application 
> defined callback. As an example l2fwd with random failures is used.

No it's not a framework, it's a sample application.
If the feature is interesting, it must be integrated in a library.
Then you need some doc, unit tests and an example to demonstrate its usage.
Please try to use an existing example instead of creating a new one.

Thanks


[dpdk-dev] [PATCH v1 3/3] examples/ip_pipeline: add mp/mc and frag/ras swq

2015-09-15 Thread Piotr Azarewicz
Add integrated MP/MC and fragmentation/reassembly support to SWQs

Signed-off-by: Piotr Azarewicz 
---
 examples/ip_pipeline/app.h  |   14 +++
 examples/ip_pipeline/config_check.c |   45 +++-
 examples/ip_pipeline/config_parse.c |  195 +--
 examples/ip_pipeline/init.c |  165 -
 examples/ip_pipeline/main.c |4 +-
 examples/ip_pipeline/pipeline_be.h  |   18 
 6 files changed, 402 insertions(+), 39 deletions(-)

diff --git a/examples/ip_pipeline/app.h b/examples/ip_pipeline/app.h
index 521e3a0..943466e 100644
--- a/examples/ip_pipeline/app.h
+++ b/examples/ip_pipeline/app.h
@@ -107,6 +107,14 @@ struct app_pktq_swq_params {
uint32_t dropless;
uint64_t n_retries;
uint32_t cpu_socket_id;
+   uint32_t ipv4_frag;
+   uint32_t ipv6_frag;
+   uint32_t ipv4_ras;
+   uint32_t ipv6_ras;
+   uint32_t mtu;
+   uint32_t metadata_size;
+   uint32_t mempool_direct_id;
+   uint32_t mempool_indirect_id;
 };

 #ifndef APP_FILE_NAME_SIZE
@@ -405,6 +413,10 @@ struct app_params {
char app_name[APP_APPNAME_SIZE];
const char *config_file;
const char *script_file;
+   const char *parser_file;
+   const char *output_file;
+   const char *preproc;
+   const char *preproc_args;
uint64_t port_mask;
uint32_t log_level;

@@ -880,6 +892,8 @@ int app_config_init(struct app_params *app);
 int app_config_args(struct app_params *app,
int argc, char **argv);

+int app_config_preproc(struct app_params *app);
+
 int app_config_parse(struct app_params *app,
const char *file_name);

diff --git a/examples/ip_pipeline/config_check.c 
b/examples/ip_pipeline/config_check.c
index 07f4c8b..8052bc4 100644
--- a/examples/ip_pipeline/config_check.c
+++ b/examples/ip_pipeline/config_check.c
@@ -33,6 +33,8 @@

 #include 

+#include 
+
 #include "app.h"

 static void
@@ -193,6 +195,7 @@ check_swqs(struct app_params *app)
struct app_pktq_swq_params *p = >swq_params[i];
uint32_t n_readers = app_swq_get_readers(app, p);
uint32_t n_writers = app_swq_get_writers(app, p);
+   uint32_t n_flags;

APP_CHECK((p->size > 0),
"%s size is 0\n", p->name);
@@ -217,14 +220,48 @@ check_swqs(struct app_params *app)
APP_CHECK((n_readers != 0),
"%s has no reader\n", p->name);

-   APP_CHECK((n_readers == 1),
-   "%s has more than one reader\n", p->name);
+   if (n_readers > 1)
+   APP_LOG(app, LOW, "%s has more than one reader", 
p->name);

APP_CHECK((n_writers != 0),
"%s has no writer\n", p->name);

-   APP_CHECK((n_writers == 1),
-   "%s has more than one writer\n", p->name);
+   if (n_writers > 1)
+   APP_LOG(app, LOW, "%s has more than one writer", 
p->name);
+
+   n_flags = p->ipv4_frag + p->ipv6_frag + p->ipv4_ras + 
p->ipv6_ras;
+
+   APP_CHECK((n_flags < 2),
+   "%s has more than one fragmentation or reassembly mode 
enabled\n",
+   p->name);
+
+   APP_CHECK((!((n_readers > 1) && (n_flags == 1))),
+   "%s has more than one reader when fragmentation or 
reassembly"
+   " mode enabled\n",
+   p->name);
+
+   APP_CHECK((!((n_writers > 1) && (n_flags == 1))),
+   "%s has more than one writer when fragmentation or 
reassembly"
+   " mode enabled\n",
+   p->name);
+
+   n_flags = p->ipv4_ras + p->ipv6_ras;
+
+   APP_CHECK((!((p->dropless == 1) && (n_flags == 1))),
+   "%s has dropless when reassembly mode enabled\n", 
p->name);
+
+   n_flags = p->ipv4_frag + p->ipv6_frag;
+
+   if (n_flags == 1) {
+   uint16_t ip_hdr_size = (p->ipv4_frag) ? sizeof(struct 
ipv4_hdr) :
+   sizeof(struct ipv6_hdr);
+
+   APP_CHECK((p->mtu > ip_hdr_size),
+   "%s mtu size is smaller than ip header\n", 
p->name);
+
+   APP_CHECK((!((p->mtu - ip_hdr_size) % 8)),
+   "%s mtu size is incorrect\n", p->name);
+   }
}
 }

diff --git a/examples/ip_pipeline/config_parse.c 
b/examples/ip_pipeline/config_parse.c
index c9b78f9..a35bd3e 100644
--- a/examples/ip_pipeline/config_parse.c
+++ b/examples/ip_pipeline/config_parse.c
@@ -156,6 +156,14 @@ static const struct app_pktq_swq_params default_swq_params 
= {
.dropless = 0,
.n_retries = 0,
.cpu_socket_id = 0,
+   .ipv4_frag = 0,
+   .ipv6_frag = 0,
+   .ipv4_ras = 0,
+  

[dpdk-dev] [PATCH v1 2/3] port: fix ras ring ports

2015-09-15 Thread Piotr Azarewicz
Bug fixes for ring ports with IPv4/IPv6 reassembly support.
Previous implementation can't work properly due to incorrect choosing
process function.
Also, assuming that, when processing ip packet, ip header is know we can
set l3_len parameter here.

Signed-off-by: Piotr Azarewicz 
---
 lib/librte_port/rte_port_ras.c |8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/lib/librte_port/rte_port_ras.c b/lib/librte_port/rte_port_ras.c
index 6bd0f8c..e45d450 100644
--- a/lib/librte_port/rte_port_ras.c
+++ b/lib/librte_port/rte_port_ras.c
@@ -144,7 +144,7 @@ rte_port_ring_writer_ras_create(void *params, int 
socket_id, int is_ipv4)
port->tx_burst_sz = conf->tx_burst_sz;
port->tx_buf_count = 0;

-   port->f_ras = (is_ipv4 == 0) ? process_ipv4 : process_ipv6;
+   port->f_ras = (is_ipv4 == 1) ? process_ipv4 : process_ipv6;

return port;
 }
@@ -182,7 +182,7 @@ process_ipv4(struct rte_port_ring_writer_ras *p, struct 
rte_mbuf *pkt)
/* Assume there is no ethernet header */
struct ipv4_hdr *pkt_hdr = rte_pktmbuf_mtod(pkt, struct ipv4_hdr *);

-   /* Get "Do not fragment" flag and fragment offset */
+   /* Get "More fragments" flag and fragment offset */
uint16_t frag_field = rte_be_to_cpu_16(pkt_hdr->fragment_offset);
uint16_t frag_offset = (uint16_t)(frag_field & IPV4_HDR_OFFSET_MASK);
uint16_t frag_flag = (uint16_t)(frag_field & IPV4_HDR_MF_FLAG);
@@ -195,6 +195,8 @@ process_ipv4(struct rte_port_ring_writer_ras *p, struct 
rte_mbuf *pkt)
struct rte_ip_frag_tbl *tbl = p->frag_tbl;
struct rte_ip_frag_death_row *dr = >death_row;

+   pkt->l3_len = sizeof(*pkt_hdr);
+
/* Process this fragment */
mo = rte_ipv4_frag_reassemble_packet(tbl, dr, pkt, rte_rdtsc(),
pkt_hdr);
@@ -224,6 +226,8 @@ process_ipv6(struct rte_port_ring_writer_ras *p, struct 
rte_mbuf *pkt)
struct rte_ip_frag_tbl *tbl = p->frag_tbl;
struct rte_ip_frag_death_row *dr = >death_row;

+   pkt->l3_len = sizeof(*pkt_hdr) + sizeof(*frag_hdr);
+
/* Process this fragment */
mo = rte_ipv6_frag_reassemble_packet(tbl, dr, pkt, rte_rdtsc(), 
pkt_hdr,
frag_hdr);
-- 
1.7.9.5



[dpdk-dev] [PATCH v1 1/3] port: add mp/mc ring ports

2015-09-15 Thread Piotr Azarewicz
ring_multi_reader input port (on top of multi consumer rte_ring)
ring_multi_writer output port (on top of multi producer rte_ring)

Signed-off-by: Piotr Azarewicz 
---
 lib/librte_port/rte_port_ring.c |  399 ++-
 lib/librte_port/rte_port_ring.h |   34 +++-
 2 files changed, 424 insertions(+), 9 deletions(-)

diff --git a/lib/librte_port/rte_port_ring.c b/lib/librte_port/rte_port_ring.c
index 9461c05..6b06466 100644
--- a/lib/librte_port/rte_port_ring.c
+++ b/lib/librte_port/rte_port_ring.c
@@ -70,8 +70,10 @@ rte_port_ring_reader_create(void *params, int socket_id)
struct rte_port_ring_reader *port;

/* Check input parameters */
-   if (conf == NULL) {
-   RTE_LOG(ERR, PORT, "%s: params is NULL\n", __func__);
+   if ((conf == NULL) ||
+   (conf->ring == NULL) ||
+   (conf->ring->cons.sc_dequeue != 1)) {
+   RTE_LOG(ERR, PORT, "%s: Invalid Parameters\n", __func__);
return NULL;
}

@@ -166,7 +168,8 @@ rte_port_ring_writer_create(void *params, int socket_id)

/* Check input parameters */
if ((conf == NULL) ||
-   (conf->ring == NULL) ||
+   (conf->ring == NULL) ||
+   (conf->ring->prod.sp_enqueue != 1) ||
(conf->tx_burst_sz > RTE_PORT_IN_BURST_SIZE_MAX)) {
RTE_LOG(ERR, PORT, "%s: Invalid Parameters\n", __func__);
return NULL;
@@ -343,7 +346,8 @@ rte_port_ring_writer_nodrop_create(void *params, int 
socket_id)

/* Check input parameters */
if ((conf == NULL) ||
-   (conf->ring == NULL) ||
+   (conf->ring == NULL) ||
+   (conf->ring->prod.sp_enqueue != 1) ||
(conf->tx_burst_sz > RTE_PORT_IN_BURST_SIZE_MAX)) {
RTE_LOG(ERR, PORT, "%s: Invalid Parameters\n", __func__);
return NULL;
@@ -448,6 +452,7 @@ rte_port_ring_writer_nodrop_tx_bulk(void *port,
 */
for (; n_pkts_ok < n_pkts; n_pkts_ok++) {
struct rte_mbuf *pkt = pkts[n_pkts_ok];
+
p->tx_buf[p->tx_buf_count++] = pkt;
}
send_burst_nodrop(p);
@@ -513,6 +518,367 @@ rte_port_ring_writer_nodrop_stats_read(void *port,
 }

 /*
+ * Port RING Multi Reader
+ */
+static void *
+rte_port_ring_multi_reader_create(void *params, int socket_id)
+{
+   struct rte_port_ring_multi_reader_params *conf =
+   (struct rte_port_ring_multi_reader_params *) params;
+   struct rte_port_ring_reader *port;
+
+   /* Check input parameters */
+   if ((conf == NULL) ||
+   (conf->ring == NULL) ||
+   (conf->ring->cons.sc_dequeue != 0)) {
+   RTE_LOG(ERR, PORT, "%s: Invalid Parameters\n", __func__);
+   return NULL;
+   }
+
+   /* Memory allocation */
+   port = rte_zmalloc_socket("PORT", sizeof(*port),
+   RTE_CACHE_LINE_SIZE, socket_id);
+   if (port == NULL) {
+   RTE_LOG(ERR, PORT, "%s: Failed to allocate port\n", __func__);
+   return NULL;
+   }
+
+   /* Initialization */
+   port->ring = conf->ring;
+
+   return port;
+}
+
+static int
+rte_port_ring_multi_reader_rx(void *port, struct rte_mbuf **pkts, uint32_t 
n_pkts)
+{
+   struct rte_port_ring_reader *p = (struct rte_port_ring_reader *) port;
+   uint32_t nb_rx;
+
+   nb_rx = rte_ring_mc_dequeue_burst(p->ring, (void **) pkts, n_pkts);
+   RTE_PORT_RING_READER_STATS_PKTS_IN_ADD(p, nb_rx);
+
+   return nb_rx;
+}
+
+static int
+rte_port_ring_multi_reader_free(void *port)
+{
+   if (port == NULL) {
+   RTE_LOG(ERR, PORT, "%s: port is NULL\n", __func__);
+   return -EINVAL;
+   }
+
+   rte_free(port);
+
+   return 0;
+}
+
+/*
+ * Port RING Multi Writer
+ */
+static void *
+rte_port_ring_multi_writer_create(void *params, int socket_id)
+{
+   struct rte_port_ring_multi_writer_params *conf =
+   (struct rte_port_ring_multi_writer_params *) params;
+   struct rte_port_ring_writer *port;
+
+   /* Check input parameters */
+   if ((conf == NULL) ||
+   (conf->ring == NULL) ||
+   (conf->ring->prod.sp_enqueue != 0) ||
+   (conf->tx_burst_sz > RTE_PORT_IN_BURST_SIZE_MAX)) {
+   RTE_LOG(ERR, PORT, "%s: Invalid Parameters\n", __func__);
+   return NULL;
+   }
+
+   /* Memory allocation */
+   port = rte_zmalloc_socket("PORT", sizeof(*port),
+   RTE_CACHE_LINE_SIZE, socket_id);
+   if (port == NULL) {
+   RTE_LOG(ERR, PORT, "%s: Failed to allocate port\n", __func__);
+   return NULL;
+   }
+
+   /* Initialization */
+   port->ring = conf->ring;
+   port->tx_burst_sz = conf->tx_burst_sz;
+   port->tx_buf_count = 0;
+   

[dpdk-dev] [PATCH v1 0/3] ip_pipeline: add MP/MC and frag/ras support to SWQs

2015-09-15 Thread Piotr Azarewicz
This patch set enhancement ip_pipeline application:
- librte_port: add support for multi-producer/multi-consumer ring ports
- librte_port: bug fixes for ring ports with IPv4/IPv6 reassembly support
- ip_pipeline application: integrate MP/MC and fragmentation/reassembly support
 to SWQs

Piotr Azarewicz (3):
  port: add mp/mc ring ports
  port: fix ras ring ports
  examples/ip_pipeline: add mp/mc and frag/ras swq

 examples/ip_pipeline/app.h  |   14 ++
 examples/ip_pipeline/config_check.c |   45 +++-
 examples/ip_pipeline/config_parse.c |  195 -
 examples/ip_pipeline/init.c |  165 ---
 examples/ip_pipeline/main.c |4 +-
 examples/ip_pipeline/pipeline_be.h  |   18 ++
 lib/librte_port/rte_port_ras.c  |8 +-
 lib/librte_port/rte_port_ring.c |  399 ++-
 lib/librte_port/rte_port_ring.h |   34 ++-
 9 files changed, 832 insertions(+), 50 deletions(-)

-- 
1.7.9.5



[dpdk-dev] [PATCH] doc: update the dpdk 2.2 release notes

2015-09-15 Thread John McNamara
Update the DPDK 2.2 release notes with recent fixes:

  7e01e3 i40e: fix base driver allocation when not using first numa node
  5e73f4 ixgbe: remove burst size restriction of vector Rx
  7fcd13 ixgbe: fix X550 DCB
  d49e0f hash: fix memory allocation of cuckoo key table
  9db649 eal/linux: fix epoll timeout

Signed-off-by: John McNamara 
---

Authors of the above commits can you please check the associated release note 
below.


 doc/guides/rel_notes/release_2_2.rst | 31 +++
 1 file changed, 31 insertions(+)

diff --git a/doc/guides/rel_notes/release_2_2.rst 
b/doc/guides/rel_notes/release_2_2.rst
index 682f468..9604316 100644
--- a/doc/guides/rel_notes/release_2_2.rst
+++ b/doc/guides/rel_notes/release_2_2.rst
@@ -8,6 +8,37 @@ New Features
 Resolved Issues
 ---

+* **i40e: Fixed base driver allocation when not using first numa node.**
+
+Fixed i40e issue that occurred when a DPDK application didn't initialize ports
+if memory wasn't available on socket 0.
+
+
+* **ixgbe: Removed burst size restriction of vector RX.**
+
+Fixed issue where a burst size less than 32 didn't receive anything.
+
+
+* **ixgbe: Fixed issue with X550 DCB.**
+
+Fixed a DCB issue with x550 where for 8 TCs (Traffic Classes), if a packet
+with user priority 6 or 7 was injected to the NIC, then the NIC would only put
+3 packets into the queue. There was also a similar issue for 4 TCs.
+
+
+* **hash: Fixed memory allocation of Cuckoo Hash key table.**
+
+Fixed issue where an incorrect Cuckoo Hash key table size could be calculated
+limiting the size to 4GB.
+
+Fixes: 48a399119619 ("hash: replace with cuckoo hash implementation")
+
+
+* **eal/linux: Fixed epoll timeout.**
+
+Fixed issue where the ``rte_epoll_wait()`` function didn't return when the
+underlying call to ``epoll_wait()`` timed out.
+

 Known Issues
 
-- 
1.8.1.4



[dpdk-dev] [PATCH v1 0/3] ip_pipeline: add MP/MC and frag/ras support to SWQs

2015-09-15 Thread Dumitrescu, Cristian


> -Original Message-
> From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of Piotr Azarewicz
> Sent: Tuesday, September 15, 2015 4:07 PM
> To: dev at dpdk.org
> Subject: [dpdk-dev] [PATCH v1 0/3] ip_pipeline: add MP/MC and frag/ras
> support to SWQs
> 
> This patch set enhancement ip_pipeline application:
> - librte_port: add support for multi-producer/multi-consumer ring ports
> - librte_port: bug fixes for ring ports with IPv4/IPv6 reassembly support
> - ip_pipeline application: integrate MP/MC and fragmentation/reassembly
> support
>  to SWQs
> 

Acked-by: Cristian Dumitrescu 



[dpdk-dev] [RFC PATCH v1] rte: LCore heartbeat example

2015-09-15 Thread Remy Horton
Provides a basic framework for detecting and reporting live-ness of 
LCores, the primary requirement of which is minimal overheads for the 
core(s) being checked. Core failures are notified via an application 
defined callback. As an example l2fwd with random failures is used.

Signed-off-by: Remy Horton 
---
 examples/l2fwd-heartbeat/Makefile |  50 +++
 examples/l2fwd-heartbeat/hbeat.c  | 121 ++
 examples/l2fwd-heartbeat/hbeat.h  | 143 +++
 examples/l2fwd-heartbeat/main.c   | 809 ++
 4 files changed, 1123 insertions(+)
 create mode 100644 examples/l2fwd-heartbeat/Makefile
 create mode 100644 examples/l2fwd-heartbeat/hbeat.c
 create mode 100644 examples/l2fwd-heartbeat/hbeat.h
 create mode 100644 examples/l2fwd-heartbeat/main.c

diff --git a/examples/l2fwd-heartbeat/Makefile 
b/examples/l2fwd-heartbeat/Makefile
new file mode 100644
index 000..8b89476
--- /dev/null
+++ b/examples/l2fwd-heartbeat/Makefile
@@ -0,0 +1,50 @@
+#   BSD LICENSE
+#
+#   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+#   All rights reserved.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+# * Redistributions of source code must retain the above copyright
+#   notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+#   notice, this list of conditions and the following disclaimer in
+#   the documentation and/or other materials provided with the
+#   distribution.
+# * Neither the name of Intel Corporation nor the names of its
+#   contributors may be used to endorse or promote products derived
+#   from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ifeq ($(RTE_SDK),)
+$(error "Please define RTE_SDK environment variable")
+endif
+
+# Default target, can be overridden by command line or environment
+RTE_TARGET ?= x86_64-native-linuxapp-gcc
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# binary name
+APP = l2fwd-heartbeat
+
+# all source are stored in SRCS-y
+SRCS-y := main.c hbeat.c
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS)
+
+include $(RTE_SDK)/mk/rte.extapp.mk
diff --git a/examples/l2fwd-heartbeat/hbeat.c b/examples/l2fwd-heartbeat/hbeat.c
new file mode 100644
index 000..755f3e8
--- /dev/null
+++ b/examples/l2fwd-heartbeat/hbeat.c
@@ -0,0 +1,121 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright 2015 Intel Shannon Ltd. All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the
+ *   distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ *   contributors may be used to endorse or promote products derived
+ *   from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include 
+
+#include 

[dpdk-dev] [PATCH v4 0/2] ethdev: add port speed capability bitmap

2015-09-15 Thread Morten Brørup
Adrien Mazarguil [mailto:adrien.mazarguil at 6wind.com] on 15. september 2015 
12:05:

> A given link cannot be simultaneously at 10 Gbps and 1 Gbps right? Using a 
> bit-field for the current link speed is confusing at best. Output values do 
> not need to be included in the unified API, they are never converted back 
> into enum values.

A given link can only use one speed at any given time, correct. However, other 
link properties must be output too, such as link status, duplex status, auto 
negotiation status, flow control status, media status, etc..  Using a bitfield 
provides all this information in an atomic operation.

> I'm stressing again the fact that doing so would require a changes in all 
> applications that use the current speed and in PMDs for no good reason.

Correct, it requires a lot of changes in all the PMDs. I think a good PHY API 
with the appropriate features is a good reason. If there is no consensus to put 
in the work to update the PHY API and PMDs accordingly, just leave it at its 
former state.


I recently saw an interesting presentation about TCP optimization, and the last 
slide has a very good point. It says (slightly paraphrased): "You can't sell 
just the traffic handling, you need the support infrastructure for O, such as 
CLI, Web UI, Historical counter database, SNMP, RADIUS, Analytics and so on." 
So related to our PHY API discussion, I would add that proper handling of the 
physical interfaces is one of those things that just need to be in order when 
you sell network appliances.


Med venlig hilsen / kind regards
- Morten Br?rup


[dpdk-dev] [PATCH v4 0/2] ethdev: add port speed capability bitmap

2015-09-15 Thread Adrien Mazarguil
Hi Marc,

Adding my thoughts to the discussion, see below.

On Tue, Sep 15, 2015 at 10:48:03AM +0200, Marc Sune wrote:
> I will answer Morten in another mail, because I got his point on the
> AUTONEG as a separate bit, and it _makes_ sense to me.
> 
> But Neilo,
> 
> 2015-09-15 10:25 GMT+02:00 N?lio Laranjeiro :
> 
> > On Tue, Sep 15, 2015 at 12:50:11AM +0200, Morten Br?rup wrote:
> > > Comments inline, marked MB>.
> > >
> > > Med venlig hilsen / kind regards
> > > - Morten Br?rup
> > >
> > > Marc Sune  on 14. september 2015 23:34 wrote:
> > >
> > > 2015-09-14 12:52 GMT+02:00 Morten Br?rup :
> > > > It is important to consider that a multipath link (bonding etc.) is
> > not a physical link, but a logical link (built on top of multiple physical
> > links). Regardless whether it is a Layer2 link aggregate (IEEE 802.1ad,
> > Ethernet bonding, EtherChannel, DSL pair bonding, etc.) or a Layer3
> > multipath link (e.g. simultaneously using Wi-Fi and mobile networks). So it
> > doesn't make sense trying to impose physical link properties on a purely
> > logical link. Likewise, it doesn't make sense to impose logical link
> > properties on physical links. In other words: Don't consider bonding or any
> > other logical link types when designing the PHY API.
> > >
> > > +1
> >
> > +1.

I agree with the fact that physical link properties do not make sense for
logical links, however in the case of the bonding PMD, the aggregated link
speed can be actually useful for applications (assuming it is kept up to
date, I think it's the case). The current API certainly allows this.

> > > > I think there is consensus that 1/ (PHY capabilities) and 2/ (PHY
> > advertisements) should use the same definitions, specifically a bitmap
> > field. And when you disregard bonding, I don't see any reason to use
> > different definitions for 3/ (PHY negotiation result). This makes it one
> > unified API for all three purposes.
> > >
> > > Agree.
> >
> > I don't agree with this one, some PMDs don't use the advertise of
> > autoneg result to get the speed or the duplex.  You make a
> > generality from your case above all PMDs.
> >
> 
> can you please explain how a particular PMD is recovering the actual link
> speed and the duplex has to do with the design of the (general) API?

It's not so much about the way PMDs recover link information, rather about
the amount of changes required to switch to a bit-field API for the current
link speed with no clear advantage. All PMDs must be modified, the initial
set of patches isn't complete in this regard.

> > Mellanox get the speed, duplex and status information from IOCTLs
> > which are not related to your bitmap.  So at least for this PMD, there
> > is already a conversion from 3 fields to a bitmap, knowing that it will
> > use the speed as an integer after.  What is the benefit of your solution?
> >
> 
> I said already I don't have a strong preference for 3/. But steering the
> design of an API through a "minimum common denominator" principle is not a
> good idea, specially since we are talking about a super simple mapping
> issue for this specific PMD.

I think Nelio was using mlx4 as an example, all PMDs have their own
particular method to recover it and several must perform calculations to get
the final value. Using integers for this task is certainly easier than going
through bit-field conversions.

> > > > Nelio suggested adding a support function to convert the bitmap field
> > to a speed value as an integer. I strongly support this, because you cannot
> > expect the bitmap to be ordered by speed.
> > >
> > > Agree with Nelio This is useful.
> >
> > It was exactly the extreme opposite, a function which takes a
> > rte_eth_link to a bitmap i.e. speed_to_bm (rte_eth_link link) because,
> > the speed is mostly used as an integer and not some kind of bitmap.
> >
> > > > This support function will be able to determine which speed is higher
> > when exotic speeds are added to the bitmap. Please extend this conversion
> > function to give three output parameters: speed, full/half duplex, auto
> > negotiation/non-auto negotiation, or add two separate functions to get the
> > duplex and auto-negotiation.
> > >
> > > Since, Full/Half duplex is for legacy 10/100Mbps only (afaik), I have my
> > doubts on using a bit for all speeds. I would suggest to define (unroll)
> > 100M (or 100M_FD) and 100M_HD, and the same 10Mbps/1gbps, as Thomas was
> > suggesting some mails ago.
> > >
> > > This was done in v4 (implicitely 100M == 100M_FD). See below.
> > >
> > > MB> I didn't intend two bits to be allocated in the bitmap for all
> > speeds to support full/half duplex, only for the relevant speeds. Since
> > full duplex is dominant, I agree with the previous decision (originally
> > suggested by Thomas, I think) to make full duplex implicit unless half
> > duplex is explicitly specified. E.g. 10M_HD, 10M (alias 10M_FD), 100M_HD,
> > 100M (alias 100M_FD), 1000M (or 1G), 2500M, 10G, 40G, 100G, etc.
> > >
> > >
> 

[dpdk-dev] [PATCH v4 0/2] ethdev: add port speed capability bitmap

2015-09-15 Thread Morten Brørup
Nelio, Marc,



I think Marc is on the right track here: You should design the API based on 
requirements and usefulness, not based on what is already implemented in some 
drivers. And as Marc pointed out, this is control plane stuff, so performance 
should not be an issue.



Let me throw in another detail. Read clause 22.2.4.2.13 in the IEEE 802.3 
standard very carefully. It says that the Link Status register in the PHY 
latches a link failure until the Link Status bit has been read. The purpose of 
this is to make it possible for slowly polling software to detect that the link 
has been temporarily down, even though it has come up again. Why would the 
application need to know that the link has been temporarily down, when it is 
already up again? Because the link might have come up in another speed/duplex, 
auto negotiation or flow control mode than it was before.



And while I?m at it: Remember the flow control (PAUSE) operations. It is 
important to be able to enable/disable flow control ? even cheap ?web managed? 
switches have the ability to disable flow control.





Med venlig hilsen / kind regards

- Morten Br?rup



From: Marc Sune [mailto:marcde...@gmail.com] 
Sent: 15. september 2015 10:48
To: N?lio Laranjeiro
Cc: Morten Br?rup; Thomas Monjalon; dev at dpdk.org; Olga Shern; Adrien 
Mazarguil
Subject: Re: [dpdk-dev] [PATCH v4 0/2] ethdev: add port speed capability bitmap



I will answer Morten in another mail, because I got his point on the AUTONEG as 
a separate bit, and it _makes_ sense to me.



But Neilo,



2015-09-15 10:25 GMT+02:00 N?lio Laranjeiro :

On Tue, Sep 15, 2015 at 12:50:11AM +0200, Morten Br?rup wrote:
> Comments inline, marked MB>.
>
> Med venlig hilsen / kind regards
> - Morten Br?rup
>
> Marc Sune  on 14. september 2015 23:34 wrote:
>
> 2015-09-14 12:52 GMT+02:00 Morten Br?rup :
> > It is important to consider that a multipath link (bonding etc.) is not a 
> > physical link, but a logical link (built on top of multiple physical 
> > links). Regardless whether it is a Layer2 link aggregate (IEEE 802.1ad, 
> > Ethernet bonding, EtherChannel, DSL pair bonding, etc.) or a Layer3 
> > multipath link (e.g. simultaneously using Wi-Fi and mobile networks). So it 
> > doesn't make sense trying to impose physical link properties on a purely 
> > logical link. Likewise, it doesn't make sense to impose logical link 
> > properties on physical links. In other words: Don't consider bonding or any 
> > other logical link types when designing the PHY API.
>
> +1

+1.

>  
>
> > I think there is consensus that 1/ (PHY capabilities) and 2/ (PHY 
> > advertisements) should use the same definitions, specifically a bitmap 
> > field. And when you disregard bonding, I don't see any reason to use 
> > different definitions for 3/ (PHY negotiation result). This makes it one 
> > unified API for all three purposes.
>
> Agree.

I don't agree with this one, some PMDs don't use the advertise of
autoneg result to get the speed or the duplex.  You make a
generality from your case above all PMDs.



can you please explain how a particular PMD is recovering the actual link speed 
and the duplex has to do with the design of the (general) API?




Mellanox get the speed, duplex and status information from IOCTLs
which are not related to your bitmap.  So at least for this PMD, there
is already a conversion from 3 fields to a bitmap, knowing that it will
use the speed as an integer after.  What is the benefit of your 
solution?



I said already I don't have a strong preference for 3/. But steering the design 
of an API through a "minimum common denominator" principle is not a good idea, 
specially since we are talking about a super simple mapping issue for this 
specific PMD.




> > Nelio suggested adding a support function to convert the bitmap 
field to a speed value as an integer. I strongly support this, because you 
cannot expect the bitmap to be ordered by speed.
>
> Agree with Nelio This is useful.

It was exactly the extreme opposite, a function which takes a
rte_eth_link to a bitmap i.e. speed_to_bm (rte_eth_link link) because,
the speed is mostly used as an integer and not some kind of bitmap.

> > This support function will be able to determine which speed is 
higher when exotic speeds are added to the bitmap. Please extend this 
conversion function to give three output parameters: speed, full/half duplex, 
auto negotiation/non-auto negotiation, or add two separate functions to get the 
duplex and auto-negotiation.
>
> Since, Full/Half duplex is for legacy 10/100Mbps only (afaik), I have 
my doubts on using a bit for all speeds. I would suggest to define (unroll) 
100M (or 100M_FD) and 100M_HD, and the same 10Mbps/1gbps, as Thomas was 
suggesting some mails ago.
>
> This was done in v4 (implicitely 100M == 100M_FD). See below.
>  
> MB> I didn't intend two 

[dpdk-dev] DPDK 2.2 roadmap

2015-09-15 Thread David Marchand
Hello all,

My turn.

As far as the 2.2 is concerned, I have some fixes/changes waiting for going
upstream :
- allow default mac removal (to be discussed)
- kvargs api updates / cleanup (no change on abi, I would say)
- vlan filtering api fixes and ixgbevf/igbvf associated fixes (might have
an impact on abi)
- ethdev fixes wrt hotplug framework
- minor fixes in testpmd

After this, depending on the schedule (so will most likely be for 2.3 or
later), I have some ideas on :
- cleanup for hotplug and maybe discussions on pci bind/unbind operations
- provide a little tool to have informations/capabilities on drivers (? la
modinfo)
- continue work on hotplug


By the way, I have some questions to the community :

- I noticed that with hotplug support, testpmd has become *really* hungry
on mbufs and memory.
The problem comes from the "basic" assumption that we must have enough
memory/mbufs for the maximum number of ports that might be available but
are not in the most common tests setup.
One solution might be to rework the way mbufs are reserved :
* either we let testpmd start with limited mbufs count the way it was
working before edab33b1 ("app/testpmd: support port hotplug"), then when
trying to start a port, this operation can fail if not enough mbufs are
available for it
* or we can try to create one mempool per port. The mempools would be
populated at the port init / close (?).
Anyone volunteers to rework this ?
Other ideas ?


- looking at a patch from Chao (
http://dpdk.org/ml/archives/dev/2015-August/022819.html), I think we need
to rework the way the numa nodes are handled in the dpdk.
The problem is that we rely on static arrays for some resources per socket.
I suppose this was designed with the idea that socket "physical" indexes
are contiguous, but this is not true on systems running power8 bare metal
(where numa indexes can be 0, 1, 16, 17 on quad nodes servers).
I suppose we can go with a mapping array (populated at the same time cpus
are discovered), then use this mapping array and preserve all apis, but
this might not be that trivial.
Volunteers ?
Ideas ?


- finally, looking at the eal, there are still some cleanups to do.
More specifically, are there any users of the ivshmem feature in dpdk ?
I can see little value in keeping the ivshmem feature in the eal (well
maybe because I don't use it) as it relies on hacks.
So I can see two options:
* someone still wants it to work, then we need a good rework to get rid of
those hacks under #ifdef in eal and the special configuration files can
disappear
* or if nobody complains, we can schedule its deprecation then removal.


Thanks.

-- 
David Marchand


[dpdk-dev] [PATCH] doc: add guideline for updating release notes

2015-09-15 Thread Thomas Monjalon
2015-09-11 12:04, John McNamara:
> From version 2.2 of DPDK onwards patchsets should include
> updates to the Release Notes for additions, fixes and
> changes.
> 
> Add guideline on what to update in the Release Notes to the
> Documentation Contribution guidelines.
> 
> Signed-off-by: John McNamara 

Applied, thanks.

It is now expected to be an acceptance criteria during reviews.

Any volunteer to fix the release notes for the previous commits?
17e01e3 i40e: fix base driver allocation when not using first numa node
45e73f4 ixgbe: remove burst size restriction of vector Rx
b7fcd13 ixgbe: fix X550 DCB
7d49e0f hash: fix memory allocation of cuckoo key table
79db649 eal/linux: fix epoll timeout



[dpdk-dev] [PATCH v4 0/2] ethdev: add port speed capability bitmap

2015-09-15 Thread Morten Brørup
Marc, here's a couple of details that I missed in my email below:

Correction: 1000BASE-T and 1000BASE-X also have half duplex, so full/half 
duplex is relevant for 10, 100 and 1000 Mbit/s speeds.

The bitmap in 3/ (result) probably also needs a bit ("NO_MEDIA" or whatever) to 
indicate if a media module (e.g. an SFP+ module) is present or missing (i.e. 
the SFP+ cage is empty); e.g. refer to the "Media Available" row in table 3-55 
in Intel's XL710 Ethernet Controller datasheet 
(xl710-10-40-controller-datasheet.pdf). Alternatively, this can be indicated by 
having 1/ (capabilities) returning an empty set of capabilities when no media 
module has been installed.

Furthermore, the 1/ (capabilities) or 3/ (result) also needs a means to 
indicate which physical port of a dual-personality port is being used. And by 
dual-personality ports, I mean a PHY with both an RJ45 copper port and an SFP 
cage, where only one of them can be active at any time.


Med venlig hilsen / kind regards
- Morten Br?rup


-Original Message-
From: Morten Br?rup 
Sent: 15. september 2015 00:50
To: 'Marc Sune'
Cc: Thomas Monjalon; N?lio Laranjeiro; dev at dpdk.org; Olga Shern; Adrien 
Mazarguil
Subject: RE: [dpdk-dev] [PATCH v4 0/2] ethdev: add port speed capability bitmap

Comments inline, marked MB>.

Med venlig hilsen / kind regards
- Morten Br?rup

Marc Sune  on 14. september 2015 23:34 wrote:

2015-09-14 12:52 GMT+02:00 Morten Br?rup :
> It is important to consider that a multipath link (bonding etc.) is not a 
> physical link, but a logical link (built on top of multiple physical links). 
> Regardless whether it is a Layer2 link aggregate (IEEE 802.1ad, Ethernet 
> bonding, EtherChannel, DSL pair bonding, etc.) or a Layer3 multipath link 
> (e.g. simultaneously using Wi-Fi and mobile networks). So it doesn't make 
> sense trying to impose physical link properties on a purely logical link. 
> Likewise, it doesn't make sense to impose logical link properties on physical 
> links. In other words: Don't consider bonding or any other logical link types 
> when designing the PHY API.

+1
?

> I think there is consensus that 1/ (PHY capabilities) and 2/ (PHY 
> advertisements) should use the same definitions, specifically a bitmap field. 
> And when you disregard bonding, I don't see any reason to use different 
> definitions for 3/ (PHY negotiation result). This makes it one unified API 
> for all three purposes.

Agree.
?

> Nelio suggested adding a support function to convert the bitmap field to a 
> speed value as an integer. I strongly support this, because you cannot expect 
> the bitmap to be ordered by speed. 

Agree with Nelio This is useful.
?
> This support function will be able to determine which speed is higher when 
> exotic speeds are added to the bitmap. Please extend this conversion function 
> to give three output parameters: speed, full/half duplex, auto 
> negotiation/non-auto negotiation, or add two separate functions to get the 
> duplex and auto-negotiation.

Since, Full/Half duplex is for legacy 10/100Mbps only (afaik), I have my doubts 
on using a bit for all speeds. I would suggest to define (unroll) 100M (or 
100M_FD) and 100M_HD, and the same 10Mbps/1gbps, as Thomas was suggesting some 
mails ago.

This was done in v4 (implicitely 100M == 100M_FD). See below.
?
MB> I didn't intend two bits to be allocated in the bitmap for all speeds to 
support full/half duplex, only for the relevant speeds. Since full duplex is 
dominant, I agree with the previous decision (originally suggested by Thomas, I 
think) to make full duplex implicit unless half duplex is explicitly specified. 
E.g. 10M_HD, 10M (alias 10M_FD), 100M_HD, 100M (alias 100M_FD), 1000M (or 1G), 
2500M, 10G, 40G, 100G, etc.


> I haven't read the suggested code, but there should be some means in 2/ 
> (advertisements) to disable auto negotiation, e.g. a single bit in the bitmap 
> to indicate if the speed/duplex-indicating bits in the bitmap means forced 
> speed/duplex (in which case only a single speed/duplex-bit should be set) or 
> auto negotiation advertised speed/duplex (in which case multiple 
> speed/duplex-bits can be set). 

Agree.

v3/4 of this patch adds the bitmap in the advertised, as per discussed, to 
select a group of speeds This is not implemented by drivers yet (!).

So, as of v4 of this patch, there could be: a) autoneg any supported speed (=> 
bitmap == 0) b) autoneg over group of speeds (=> more than one bit set in the 
bitmap) c) forced speed (one and only one set in the bitmap).

I think this is precisely what you meant + b) as a bonus

MB> This was not what I meant, but it wasn't very clearly written, so I'll try 
again: Add an additional single bit "NO_AUTONEG" (or whatever you want to name 
it) to the 2/ (advertisements) bitmap that explicitly turns off auto 
negotiation and tries to force the selected speed/duplex (i.e. only one other 
bit can be set in the bitmap when the NO_AUTONEG bit is set). Your c) makes it 

[dpdk-dev] [PATCH v4 0/2] ethdev: add port speed capability bitmap

2015-09-15 Thread Marc Sune
I will answer Morten in another mail, because I got his point on the
AUTONEG as a separate bit, and it _makes_ sense to me.

But Neilo,

2015-09-15 10:25 GMT+02:00 N?lio Laranjeiro :

> On Tue, Sep 15, 2015 at 12:50:11AM +0200, Morten Br?rup wrote:
> > Comments inline, marked MB>.
> >
> > Med venlig hilsen / kind regards
> > - Morten Br?rup
> >
> > Marc Sune  on 14. september 2015 23:34 wrote:
> >
> > 2015-09-14 12:52 GMT+02:00 Morten Br?rup :
> > > It is important to consider that a multipath link (bonding etc.) is
> not a physical link, but a logical link (built on top of multiple physical
> links). Regardless whether it is a Layer2 link aggregate (IEEE 802.1ad,
> Ethernet bonding, EtherChannel, DSL pair bonding, etc.) or a Layer3
> multipath link (e.g. simultaneously using Wi-Fi and mobile networks). So it
> doesn't make sense trying to impose physical link properties on a purely
> logical link. Likewise, it doesn't make sense to impose logical link
> properties on physical links. In other words: Don't consider bonding or any
> other logical link types when designing the PHY API.
> >
> > +1
>
> +1.
>
> >
> >
> > > I think there is consensus that 1/ (PHY capabilities) and 2/ (PHY
> advertisements) should use the same definitions, specifically a bitmap
> field. And when you disregard bonding, I don't see any reason to use
> different definitions for 3/ (PHY negotiation result). This makes it one
> unified API for all three purposes.
> >
> > Agree.
>
> I don't agree with this one, some PMDs don't use the advertise of
> autoneg result to get the speed or the duplex.  You make a
> generality from your case above all PMDs.
>

can you please explain how a particular PMD is recovering the actual link
speed and the duplex has to do with the design of the (general) API?


>
> Mellanox get the speed, duplex and status information from IOCTLs
> which are not related to your bitmap.  So at least for this PMD, there
> is already a conversion from 3 fields to a bitmap, knowing that it will
> use the speed as an integer after.  What is the benefit of your solution?
>

I said already I don't have a strong preference for 3/. But steering the
design of an API through a "minimum common denominator" principle is not a
good idea, specially since we are talking about a super simple mapping
issue for this specific PMD.


>
> > > Nelio suggested adding a support function to convert the bitmap field
> to a speed value as an integer. I strongly support this, because you cannot
> expect the bitmap to be ordered by speed.
> >
> > Agree with Nelio This is useful.
>
> It was exactly the extreme opposite, a function which takes a
> rte_eth_link to a bitmap i.e. speed_to_bm (rte_eth_link link) because,
> the speed is mostly used as an integer and not some kind of bitmap.
>
> > > This support function will be able to determine which speed is higher
> when exotic speeds are added to the bitmap. Please extend this conversion
> function to give three output parameters: speed, full/half duplex, auto
> negotiation/non-auto negotiation, or add two separate functions to get the
> duplex and auto-negotiation.
> >
> > Since, Full/Half duplex is for legacy 10/100Mbps only (afaik), I have my
> doubts on using a bit for all speeds. I would suggest to define (unroll)
> 100M (or 100M_FD) and 100M_HD, and the same 10Mbps/1gbps, as Thomas was
> suggesting some mails ago.
> >
> > This was done in v4 (implicitely 100M == 100M_FD). See below.
> >
> > MB> I didn't intend two bits to be allocated in the bitmap for all
> speeds to support full/half duplex, only for the relevant speeds. Since
> full duplex is dominant, I agree with the previous decision (originally
> suggested by Thomas, I think) to make full duplex implicit unless half
> duplex is explicitly specified. E.g. 10M_HD, 10M (alias 10M_FD), 100M_HD,
> 100M (alias 100M_FD), 1000M (or 1G), 2500M, 10G, 40G, 100G, etc.
> >
> >
> > > I haven't read the suggested code, but there should be some means in
> 2/ (advertisements) to disable auto negotiation, e.g. a single bit in the
> bitmap to indicate if the speed/duplex-indicating bits in the bitmap means
> forced speed/duplex (in which case only a single speed/duplex-bit should be
> set) or auto negotiation advertised speed/duplex (in which case multiple
> speed/duplex-bits can be set).
> >
> > Agree.
> >
> > v3/4 of this patch adds the bitmap in the advertised, as per discussed,
> to select a group of speeds This is not implemented by drivers yet (!).
> >
> > So, as of v4 of this patch, there could be: a) autoneg any supported
> speed (=> bitmap == 0) b) autoneg over group of speeds (=> more than one
> bit set in the bitmap) c) forced speed (one and only one set in the bitmap).
> >
> > I think this is precisely what you meant + b) as a bonus
> >
> > MB> This was not what I meant, but it wasn't very clearly written, so
> I'll try again: Add an additional single bit "NO_AUTONEG" (or whatever you
> want to name it) to the 2/ (advertisements) bitmap that 

[dpdk-dev] [PATCH v4 0/2] ethdev: add port speed capability bitmap

2015-09-15 Thread Nélio Laranjeiro
On Tue, Sep 15, 2015 at 12:50:11AM +0200, Morten Br?rup wrote:
> Comments inline, marked MB>.
> 
> Med venlig hilsen / kind regards
> - Morten Br?rup
> 
> Marc Sune  on 14. september 2015 23:34 wrote:
> 
> 2015-09-14 12:52 GMT+02:00 Morten Br?rup :
> > It is important to consider that a multipath link (bonding etc.) is not a 
> > physical link, but a logical link (built on top of multiple physical 
> > links). Regardless whether it is a Layer2 link aggregate (IEEE 802.1ad, 
> > Ethernet bonding, EtherChannel, DSL pair bonding, etc.) or a Layer3 
> > multipath link (e.g. simultaneously using Wi-Fi and mobile networks). So it 
> > doesn't make sense trying to impose physical link properties on a purely 
> > logical link. Likewise, it doesn't make sense to impose logical link 
> > properties on physical links. In other words: Don't consider bonding or any 
> > other logical link types when designing the PHY API.
> 
> +1

+1.

> ?
> 
> > I think there is consensus that 1/ (PHY capabilities) and 2/ (PHY 
> > advertisements) should use the same definitions, specifically a bitmap 
> > field. And when you disregard bonding, I don't see any reason to use 
> > different definitions for 3/ (PHY negotiation result). This makes it one 
> > unified API for all three purposes.
> 
> Agree.

I don't agree with this one, some PMDs don't use the advertise of
autoneg result to get the speed or the duplex.  You make a
generality from your case above all PMDs.

Mellanox get the speed, duplex and status information from IOCTLs
which are not related to your bitmap.  So at least for this PMD, there
is already a conversion from 3 fields to a bitmap, knowing that it will
use the speed as an integer after.  What is the benefit of your solution?

> > Nelio suggested adding a support function to convert the bitmap field to a 
> > speed value as an integer. I strongly support this, because you cannot 
> > expect the bitmap to be ordered by speed. 
> 
> Agree with Nelio This is useful.

It was exactly the extreme opposite, a function which takes a
rte_eth_link to a bitmap i.e. speed_to_bm (rte_eth_link link) because,
the speed is mostly used as an integer and not some kind of bitmap.

> > This support function will be able to determine which speed is higher when 
> > exotic speeds are added to the bitmap. Please extend this conversion 
> > function to give three output parameters: speed, full/half duplex, auto 
> > negotiation/non-auto negotiation, or add two separate functions to get the 
> > duplex and auto-negotiation.
> 
> Since, Full/Half duplex is for legacy 10/100Mbps only (afaik), I have my 
> doubts on using a bit for all speeds. I would suggest to define (unroll) 100M 
> (or 100M_FD) and 100M_HD, and the same 10Mbps/1gbps, as Thomas was suggesting 
> some mails ago.
> 
> This was done in v4 (implicitely 100M == 100M_FD). See below.
> ?
> MB> I didn't intend two bits to be allocated in the bitmap for all speeds to 
> support full/half duplex, only for the relevant speeds. Since full duplex is 
> dominant, I agree with the previous decision (originally suggested by Thomas, 
> I think) to make full duplex implicit unless half duplex is explicitly 
> specified. E.g. 10M_HD, 10M (alias 10M_FD), 100M_HD, 100M (alias 100M_FD), 
> 1000M (or 1G), 2500M, 10G, 40G, 100G, etc.
> 
> 
> > I haven't read the suggested code, but there should be some means in 2/ 
> > (advertisements) to disable auto negotiation, e.g. a single bit in the 
> > bitmap to indicate if the speed/duplex-indicating bits in the bitmap means 
> > forced speed/duplex (in which case only a single speed/duplex-bit should be 
> > set) or auto negotiation advertised speed/duplex (in which case multiple 
> > speed/duplex-bits can be set). 
> 
> Agree.
> 
> v3/4 of this patch adds the bitmap in the advertised, as per discussed, to 
> select a group of speeds This is not implemented by drivers yet (!).
> 
> So, as of v4 of this patch, there could be: a) autoneg any supported speed 
> (=> bitmap == 0) b) autoneg over group of speeds (=> more than one bit set in 
> the bitmap) c) forced speed (one and only one set in the bitmap).
> 
> I think this is precisely what you meant + b) as a bonus
> 
> MB> This was not what I meant, but it wasn't very clearly written, so I'll 
> try again: Add an additional single bit "NO_AUTONEG" (or whatever you want to 
> name it) to the 2/ (advertisements) bitmap that explicitly turns off auto 
> negotiation and tries to force the selected speed/duplex (i.e. only one other 
> bit can be set in the bitmap when the NO_AUTONEG bit is set). Your c) makes 
> it impossible to use auto negotiation to advertise a specific speed/duplex, 
> e.g. 100M_FD. My suggested NO_AUTONEG bit can also be used in 3/ (result) to 
> indicate that the speed was a result of Parallel Detection, i.e. that auto 
> negotiation failed or was disabled in either end of the link.
> 
> MB> However, I like your suggestion a).
> 
> ?
> > And some means in 3/ (result) and maybe 2/ 

[dpdk-dev] Fwd: [PATCH] PPC: Fix NUMA node numbering on IBM POWER8 LE machine

2015-09-15 Thread Bruce Richardson
On Tue, Sep 15, 2015 at 03:46:49PM +0800, Chao Zhu wrote:
> 
> Any response of this patch?

Looks ok to me - pretty trivial change.

/Bruce
> 
>  Forwarded Message 
> Subject:  [dpdk-dev] [PATCH] PPC: Fix NUMA node numbering on IBM POWER8 LE
> machine
> Date: Fri, 14 Aug 2015 20:19:48 +0800
> From: Chao Zhu 
> To:   dev at dpdk.org
> 
> 
> 
> When Linux is running on bare metal, it gets the raw hardware
> information. On POWER8 little endian bare metal machine, the node number
> is not continuous. It will jump from 0 to other values, for example, it
> can be 0, 1, 16, 17. This patch modified the CONFIG_RTE_MAX_NUMA_NODES
> value to make dpdk work on POWER8 bare metal little endian machine.
> 
> Signed-off-by: Chao Zhu 
> ---
>  config/common_linuxapp |2 +-
>  1 files changed, 1 insertions(+), 1 deletions(-)
> 
> diff --git a/config/common_linuxapp b/config/common_linuxapp
> index 0de43d5..82a027e 100644
> --- a/config/common_linuxapp
> +++ b/config/common_linuxapp
> @@ -98,7 +98,7 @@ CONFIG_RTE_NEXT_ABI=y
>  #
>  CONFIG_RTE_LIBRTE_EAL=y
>  CONFIG_RTE_MAX_LCORE=128
> -CONFIG_RTE_MAX_NUMA_NODES=8
> +CONFIG_RTE_MAX_NUMA_NODES=32
>  CONFIG_RTE_MAX_MEMSEG=256
>  CONFIG_RTE_MAX_MEMZONE=2560
>  CONFIG_RTE_MAX_TAILQ=32
> -- 
> 1.7.1
> 
> 
> 


[dpdk-dev] [PATCH] doc: add guideline for updating release notes

2015-09-15 Thread Mcnamara, John
> -Original Message-
> From: Thomas Monjalon [mailto:thomas.monjalon at 6wind.com]
> Sent: Tuesday, September 15, 2015 10:13 AM
> To: Mcnamara, John
> Cc: dev at dpdk.org
> Subject: Re: [dpdk-dev] [PATCH] doc: add guideline for updating release
> notes
> 
> 2015-09-11 12:04, John McNamara:
> > From version 2.2 of DPDK onwards patchsets should include updates to
> > the Release Notes for additions, fixes and changes.
> >
> > Add guideline on what to update in the Release Notes to the
> > Documentation Contribution guidelines.
> >
> > Signed-off-by: John McNamara 
> 
> Applied, thanks.
> 
> It is now expected to be an acceptance criteria during reviews.
> 
> Any volunteer to fix the release notes for the previous commits?
>   17e01e3 i40e: fix base driver allocation when not using first numa node
>   45e73f4 ixgbe: remove burst size restriction of vector Rx
>   b7fcd13 ixgbe: fix X550 DCB
>   7d49e0f hash: fix memory allocation of cuckoo key table
>   79db649 eal/linux: fix epoll timeout

Hi,

I'll fix the Release Notes based on the commit logs and CC the authors to 
ensure it is correct.

John





[dpdk-dev] [PATCH v4 0/2] ethdev: add port speed capability bitmap

2015-09-15 Thread Morten Brørup
Very valid question, Thomas. It's always a good idea to take a step back and 
look at the bigger picture!

Unfortunately, I can mention at least one company that has network appliances 
in production using such low speeds, and even half duplex: ours (SmartShare 
Systems). Basing our appliances on DPDK does not change this situation.

When you ship a lot of network appliances to a variety of customers, some of 
the appliances will eventually end up at customers who connects one of the 
ports to some equipment which is either very old, or which is configured for 
forced speed/duplex due to their old IT policy which hasn't been updated for 
decades. With a sufficient number customers, you are going to see everything 
possible! Reality surpasses imagination.


Med venlig hilsen / kind regards
- Morten Br?rup

-Original Message-
From: Thomas Monjalon [mailto:thomas.monja...@6wind.com] 
Sent: 15. september 2015 09:05
To: Marc Sune
Cc: Morten Br?rup; N?lio Laranjeiro; dev at dpdk.org; Olga Shern; Adrien 
Mazarguil
Subject: Re: [dpdk-dev] [PATCH v4 0/2] ethdev: add port speed capability bitmap

2015-09-14 23:33, Marc Sune:
> 2015-09-14 12:52 GMT+02:00 Morten Br?rup :
> > This support function will be able to determine which speed is 
> > higher when exotic speeds are added to the bitmap. Please extend 
> > this conversion function to give three output parameters: speed, 
> > full/half duplex, auto negotiation/non-auto negotiation, or add two 
> > separate functions to get the duplex and auto-negotiation.
> 
> Since, Full/Half duplex is for legacy 10/100Mbps only (afaik), I have 
> my doubts on using a bit for all speeds. I would suggest to define 
> (unroll) 100M (or 100M_FD) and 100M_HD, and the same 10Mbps/1gbps, as 
> Thomas was suggesting some mails ago.
> 
> This was done in v4 (implicitely 100M == 100M_FD). See below.

Are we going to use DPDK for such low speeds?
Maybe we can remove half duplex modes?

[...]
> So, as of v4 of this patch, there could be: a) autoneg any supported 
> speed (=> bitmap == 0) b) autoneg over group of speeds (=> more than 
> one bit set in the bitmap) c) forced speed (one and only one set in the 
> bitmap).

+1

[...]
> * encode the link speed and duplex as of now, separating duplex and 
> numeric speed. I would suggest to add the encoded speed+duplex bitmap 
> flag for consistency (although redundant).
> * or you return a single value, the bitmap with a single flag set of 
> the unrolled speeds, and then have the helpers int 
> rte_eth_speed_from_bm(int
> val_bm) and bool rte_eth_duplex_from_bm(int val_bm).

Who has already used half duplex mode with DPDK?



[dpdk-dev] [PATCH v4 0/2] ethdev: add port speed capability bitmap

2015-09-15 Thread Thomas Monjalon
2015-09-14 23:33, Marc Sune:
> 2015-09-14 12:52 GMT+02:00 Morten Br?rup :
> > This support function will be able to determine which speed is higher when
> > exotic speeds are added to the bitmap. Please extend this conversion
> > function to give three output parameters: speed, full/half duplex, auto
> > negotiation/non-auto negotiation, or add two separate functions to get the
> > duplex and auto-negotiation.
> 
> Since, Full/Half duplex is for legacy 10/100Mbps only (afaik), I have my
> doubts on using a bit for all speeds. I would suggest to define (unroll)
> 100M (or 100M_FD) and 100M_HD, and the same 10Mbps/1gbps, as Thomas was
> suggesting some mails ago.
> 
> This was done in v4 (implicitely 100M == 100M_FD). See below.

Are we going to use DPDK for such low speeds?
Maybe we can remove half duplex modes?

[...]
> So, as of v4 of this patch, there could be: a) autoneg any supported speed
> (=> bitmap == 0) b) autoneg over group of speeds (=> more than one bit set
> in the bitmap) c) forced speed (one and only one set in the bitmap).

+1

[...]
> * encode the link speed and duplex as of now, separating duplex and numeric
> speed. I would suggest to add the encoded speed+duplex bitmap flag for
> consistency (although redundant).
> * or you return a single value, the bitmap with a single flag set of the
> unrolled speeds, and then have the helpers int rte_eth_speed_from_bm(int
> val_bm) and bool rte_eth_duplex_from_bm(int val_bm).

Who has already used half duplex mode with DPDK?


[dpdk-dev] [PATCH v2] Change rte_eal_vdev_init to update port_id

2015-09-15 Thread Ravi Kerur
Hi David,


On Thu, Sep 3, 2015 at 7:04 AM, David Marchand 
wrote:

> Hello Ravi, Tetsuya,
>
> On Tue, Aug 25, 2015 at 7:59 PM, Ravi Kerur  wrote:
>
>> Let us know how you want us to fix this? To fix rte_eal_vdev_init and
>> rte_eal_pci_probe_one to return allocated port_id we had 2 approaches
>> mentioned in earlier discussion. In addition to those we have another
>> approach with changes isolated only to rte_ether component. I am attaching
>> diffs (preliminary) with this email. Please let us know your inputs since
>> it involves EAL component.
>>
>
> - This patch looks like a good ethdev cleanup (even if it really lacks
> some context / commit log).
>
> I wonder just why you only take the first part of the name in
> rte_eth_dev_get_port_by_name().
> Would not this match, let's say, both toto and toto0 vdevs ?
> Is this intended ?
>
> It was not intended, i will look into it.

>
> - In the end, with this patch, do we still need to update eal ?
> Looking at the code, I am not sure anymore.
>

Approach 3 (preliminary diffs sent as an attachment) doesn't involve EAL
but the other two solutions do. So please let us know which one you prefer.
I will send updated patch.

Thanks,
Ravi


>
>
>
> --
> David Marchand
>


[dpdk-dev] dpdk 1.8.0 disable burst problem

2015-09-15 Thread Liu, Yong
Hi Wei,
We can't reproduce your issue in our environment. Below is our steps.

1. disable vector and change burst size
CONFIG_RTE_IXGBE_INC_VECTOR=n
Define PACKET_READ_SIZE to 1 in client_server_mp/mp_server/main.c.
Define PKT_READ_SIZE to 1 in client_server_mp/mp_client/client.c.

2. Run mp_server with one client.
mp_server -n 4 -c 0xf -- -p 0x3 -n 1
Rx function: Rx Burst Bulk Alloc Tx function: simple tx code path

mp_client -n 4  -c 0xf0 --proc-type=secondary -- -n 0

3. Send packets to port0 and port1. Packets normally forwarded by client.

PORTS
-
Port 0: '90:e2:ba:36:99:3c' Port 1: '90:e2:ba:36:99:3d'

Port 0 - rx:  5000  tx:  1000
Port 1 - rx:  1000  tx:  5000

CLIENTS
---
Client  0 - rx:  6000, rx_drop: 0
tx:  6000, tx_drop: 0

> -Original Message-
> From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of ??
> Sent: Thursday, September 10, 2015 9:54 PM
> To: dev at dpdk.org
> Subject: [dpdk-dev] dpdk 1.8.0 disable burst problem
> 
> Hi all,
> 
> 
> I am using the dpdk example dpdk-
> 1.8.0/examples/multi_process/client_server_mp on ubuntu 14.04.  I need to
> disable the batch. At first, I just change the macro  in mp_server/main.c
> and mp_client/client.c
> #define PACKET_READ_SIZE 32 to 1
> The server and the client can not receive any packets.  Almost of the
> packets are counted to err from the port stat.
> 
> Port:0, rx:511, rx_err:33011882, rx_nombuf:0, tx:0, tx_err:0
> 
> Port:0, rx_rate:0, rx_err_rate:782253,rx_nombuf_rate:0, tx_rate:0,
> tx_err_rate:0
> 
> However, DPDK 1.4.1 works only changing the batch size from 32 to 1 in
> server and client.
> 
> What I did in the next step is
> 
> disable the vector PMD burst on DPDK 1.8.0 version.
> 
> disable the macro from config file
> 
> CONFIG_RTE_IXGBE_INC_VECTOR=n
> 
> However, nothing is changed. Port still reports packets errs.
> 
> Can anyone help to look at this problem? I will be very appreciated.
> 
> BTW, why DPDK 1.4.1 can not be compiled on ubuntu 14.04?


[dpdk-dev] [PATCH v4 0/2] ethdev: add port speed capability bitmap

2015-09-15 Thread Morten Brørup
Comments inline, marked MB>.

Med venlig hilsen / kind regards
- Morten Br?rup

Marc Sune  on 14. september 2015 23:34 wrote:

2015-09-14 12:52 GMT+02:00 Morten Br?rup :
> It is important to consider that a multipath link (bonding etc.) is not a 
> physical link, but a logical link (built on top of multiple physical links). 
> Regardless whether it is a Layer2 link aggregate (IEEE 802.1ad, Ethernet 
> bonding, EtherChannel, DSL pair bonding, etc.) or a Layer3 multipath link 
> (e.g. simultaneously using Wi-Fi and mobile networks). So it doesn't make 
> sense trying to impose physical link properties on a purely logical link. 
> Likewise, it doesn't make sense to impose logical link properties on physical 
> links. In other words: Don't consider bonding or any other logical link types 
> when designing the PHY API.

+1
?

> I think there is consensus that 1/ (PHY capabilities) and 2/ (PHY 
> advertisements) should use the same definitions, specifically a bitmap field. 
> And when you disregard bonding, I don't see any reason to use different 
> definitions for 3/ (PHY negotiation result). This makes it one unified API 
> for all three purposes.

Agree.
?

> Nelio suggested adding a support function to convert the bitmap field to a 
> speed value as an integer. I strongly support this, because you cannot expect 
> the bitmap to be ordered by speed. 

Agree with Nelio This is useful.
?
> This support function will be able to determine which speed is higher when 
> exotic speeds are added to the bitmap. Please extend this conversion function 
> to give three output parameters: speed, full/half duplex, auto 
> negotiation/non-auto negotiation, or add two separate functions to get the 
> duplex and auto-negotiation.

Since, Full/Half duplex is for legacy 10/100Mbps only (afaik), I have my doubts 
on using a bit for all speeds. I would suggest to define (unroll) 100M (or 
100M_FD) and 100M_HD, and the same 10Mbps/1gbps, as Thomas was suggesting some 
mails ago.

This was done in v4 (implicitely 100M == 100M_FD). See below.
?
MB> I didn't intend two bits to be allocated in the bitmap for all speeds to 
support full/half duplex, only for the relevant speeds. Since full duplex is 
dominant, I agree with the previous decision (originally suggested by Thomas, I 
think) to make full duplex implicit unless half duplex is explicitly specified. 
E.g. 10M_HD, 10M (alias 10M_FD), 100M_HD, 100M (alias 100M_FD), 1000M (or 1G), 
2500M, 10G, 40G, 100G, etc.


> I haven't read the suggested code, but there should be some means in 2/ 
> (advertisements) to disable auto negotiation, e.g. a single bit in the bitmap 
> to indicate if the speed/duplex-indicating bits in the bitmap means forced 
> speed/duplex (in which case only a single speed/duplex-bit should be set) or 
> auto negotiation advertised speed/duplex (in which case multiple 
> speed/duplex-bits can be set). 

Agree.

v3/4 of this patch adds the bitmap in the advertised, as per discussed, to 
select a group of speeds This is not implemented by drivers yet (!).

So, as of v4 of this patch, there could be: a) autoneg any supported speed (=> 
bitmap == 0) b) autoneg over group of speeds (=> more than one bit set in the 
bitmap) c) forced speed (one and only one set in the bitmap).

I think this is precisely what you meant + b) as a bonus

MB> This was not what I meant, but it wasn't very clearly written, so I'll try 
again: Add an additional single bit "NO_AUTONEG" (or whatever you want to name 
it) to the 2/ (advertisements) bitmap that explicitly turns off auto 
negotiation and tries to force the selected speed/duplex (i.e. only one other 
bit can be set in the bitmap when the NO_AUTONEG bit is set). Your c) makes it 
impossible to use auto negotiation to advertise a specific speed/duplex, e.g. 
100M_FD. My suggested NO_AUTONEG bit can also be used in 3/ (result) to 
indicate that the speed was a result of Parallel Detection, i.e. that auto 
negotiation failed or was disabled in either end of the link.

MB> However, I like your suggestion a).

?
> And some means in 3/ (result) and maybe 2/ (advertisements) to select and/or 
> indicate physical interface in dual-personality ports (e.g. ports where the 
> PHY has both an SFP and a RJ45 connector, but only one of the two can be used 
> at any time).

For rte_eth_link_get() I don't have such a strong opinion. You either

* encode the link speed and duplex as of now, separating duplex and numeric 
speed. I would suggest to add the encoded speed+duplex bitmap flag for 
consistency (although redundant).
* or you return a single value, the bitmap with a single flag set of the 
unrolled speeds, and then have the helpers int rte_eth_speed_from_bm(int 
val_bm) and bool rte_eth_duplex_from_bm(int val_bm).

MB> I prefer the latter of the two, only because it makes 3/ (result) 
consistent with 1/ (capabilities) and 2/ (advertisements).


Marc



[dpdk-dev] [PATCH v4 0/2] ethdev: add port speed capability bitmap

2015-09-15 Thread Marc Sune
2015-09-14 12:52 GMT+02:00 Morten Br?rup :

> It is important to consider that a multipath link (bonding etc.) is not a
> physical link, but a logical link (built on top of multiple physical
> links). Regardless whether it is a Layer2 link aggregate (IEEE 802.1ad,
> Ethernet bonding, EtherChannel, DSL pair bonding, etc.) or a Layer3
> multipath link (e.g. simultaneously using Wi-Fi and mobile networks). So it
> doesn't make sense trying to impose physical link properties on a purely
> logical link. Likewise, it doesn't make sense to impose logical link
> properties on physical links. In other words: Don't consider bonding or any
> other logical link types when designing the PHY API.
>

+1


>
> I think there is consensus that 1/ (PHY capabilities) and 2/ (PHY
> advertisements) should use the same definitions, specifically a bitmap
> field. And when you disregard bonding, I don't see any reason to use
> different definitions for 3/ (PHY negotiation result). This makes it one
> unified API for all three purposes.
>

Agree.


>
> Nelio suggested adding a support function to convert the bitmap field to a
> speed value as an integer. I strongly support this, because you cannot
> expect the bitmap to be ordered by speed.


Agree with Nelio This is useful.


> This support function will be able to determine which speed is higher when
> exotic speeds are added to the bitmap. Please extend this conversion
> function to give three output parameters: speed, full/half duplex, auto
> negotiation/non-auto negotiation, or add two separate functions to get the
> duplex and auto-negotiation.
>

Since, Full/Half duplex is for legacy 10/100Mbps only (afaik), I have my
doubts on using a bit for all speeds. I would suggest to define (unroll)
100M (or 100M_FD) and 100M_HD, and the same 10Mbps/1gbps, as Thomas was
suggesting some mails ago.

This was done in v4 (implicitely 100M == 100M_FD). See below.


>
> I haven't read the suggested code, but there should be some means in 2/
> (advertisements) to disable auto negotiation, e.g. a single bit in the
> bitmap to indicate if the speed/duplex-indicating bits in the bitmap means
> forced speed/duplex (in which case only a single speed/duplex-bit should be
> set) or auto negotiation advertised speed/duplex (in which case multiple
> speed/duplex-bits can be set).


Agree.

v3/4 of this patch adds the bitmap in the advertised, as per discussed, to
select a group of speeds This is not implemented by drivers yet (!).

So, as of v4 of this patch, there could be: a) autoneg any supported speed
(=> bitmap == 0) b) autoneg over group of speeds (=> more than one bit set
in the bitmap) c) forced speed (one and only one set in the bitmap).

I think this is precisely what you meant + b) as a bonus


> And some means in 3/ (result) and maybe 2/ (advertisements) to select
> and/or indicate physical interface in dual-personality ports (e.g. ports
> where the PHY has both an SFP and a RJ45 connector, but only one of the two
> can be used at any time).
>
>
For rte_eth_link_get() I don't have such a strong opinion. You either

* encode the link speed and duplex as of now, separating duplex and numeric
speed. I would suggest to add the encoded speed+duplex bitmap flag for
consistency (although redundant).
* or you return a single value, the bitmap with a single flag set of the
unrolled speeds, and then have the helpers int rte_eth_speed_from_bm(int
val_bm) and bool rte_eth_duplex_from_bm(int val_bm).


Marc


>
> Med venlig hilsen / kind regards
> - Morten Br?rup
>
> -Original Message-
> From: Thomas Monjalon [mailto:thomas.monjalon at 6wind.com]
> Sent: 13. september 2015 23:19
> To: Marc Sune
> Cc: N?lio Laranjeiro; dev at dpdk.org; Olga Shern; Adrien Mazarguil; Morten
> Br?rup
> Subject: Re: [dpdk-dev] [PATCH v4 0/2] ethdev: add port speed capability
> bitmap
>
> 2015-09-13 21:14, Marc Sune:
> > 2015-09-09 15:33 GMT+02:00 Thomas Monjalon :
> > > 2015-09-09 15:10, N?lio Laranjeiro:
> > > > I think V2 is better, maybe you can add a function to convert a
> > > > single bitmap value to the equivalent integer and get rid of
> > > > ETH_SPEED_XXX
> > > macros.
> > > >
> > > > Thomas what is your opinion?
> > >
> > > Your proposal looks good Nelio.
> >
> > I am confused, specially since you were the one advocating for having
> > a unified set of constants for speeds (discussion in v2).
>
> Yes, my first thought was advocating an unification between capabilities
> and negotiated link properties.
> After I was convinced by Nelio's arguments: bitmap is good for
> capabilities (especially to describe every capabilities in one field) but
> integer is better for negotiated speed (especially for aggregated links).
> Converting bitmap speed into integer should be easy to implement in a
> function.
>
> > In any case, as I see it, if we want to address the comments of  M.
> Brorup:
> >
> > http://comments.gmane.org/gmane.comp.networking.dpdk.devel/19664
> >
> > we need bitmaps for rte_eth_conf link_speed