[dpdk-dev] [PATCH v2] virtio: fix idx in used ring retrieved only once

2016-06-20 Thread Huawei Xie
In the following loop:
while (vq->vq_used_cons_idx != vq->vq_ring.used->idx) {
...
}
There is no external function call or any explict memory barrier
in the loop, the re-read of used->idx would be optimized and would
only be retrieved once.

use of voaltile normally should be prohibited, and access_once
is Linux kernel's style to handle this issue; Once we have that
macro in DPDK, we could change to that style.

virtio_recv_mergable_pkts might have the same issue, so will be fixed.

Fixes: 823ad647950a ("virtio: support multiple queues")
Fixes: 13ce5e7eb94f ("virtio: mergeable buffers")

Signed-off-by: Huawei Xie 
---
v2: use VIRTQUEUE_NUSED
---
 drivers/net/virtio/virtio_ethdev.c  | 4 ++--
 drivers/net/virtio/virtio_ring.h| 2 +-
 drivers/net/virtio/virtio_rxtx_simple.c | 3 +--
 3 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/drivers/net/virtio/virtio_ethdev.c 
b/drivers/net/virtio/virtio_ethdev.c
index ea7a48e..6a51948 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -219,12 +219,12 @@ virtio_send_command(struct virtnet_ctl *cvq, struct 
virtio_pmd_ctrl *ctrl,
virtqueue_notify(vq);

rte_rmb();
-   while (vq->vq_used_cons_idx == vq->vq_ring.used->idx) {
+   while (VIRTQUEUE_NUSED(vq) == 0) {
rte_rmb();
usleep(100);
}

-   while (vq->vq_used_cons_idx != vq->vq_ring.used->idx) {
+   while (VIRTQUEUE_NUSED(vq)) {
uint32_t idx, desc_idx, used_idx;
struct vring_used_elem *uep;

diff --git a/drivers/net/virtio/virtio_ring.h b/drivers/net/virtio/virtio_ring.h
index 447760a..fcecc16 100644
--- a/drivers/net/virtio/virtio_ring.h
+++ b/drivers/net/virtio/virtio_ring.h
@@ -79,7 +79,7 @@ struct vring_used_elem {

 struct vring_used {
uint16_t flags;
-   uint16_t idx;
+   volatile uint16_t idx;
struct vring_used_elem ring[0];
 };

diff --git a/drivers/net/virtio/virtio_rxtx_simple.c 
b/drivers/net/virtio/virtio_rxtx_simple.c
index 7b50119..242ad90 100644
--- a/drivers/net/virtio/virtio_rxtx_simple.c
+++ b/drivers/net/virtio/virtio_rxtx_simple.c
@@ -184,8 +184,7 @@ virtio_recv_pkts_vec(void *rx_queue, struct rte_mbuf 
**rx_pkts,
if (unlikely(nb_pkts < RTE_VIRTIO_DESC_PER_LOOP))
return 0;

-   nb_used = *(volatile uint16_t *)>vq_ring.used->idx -
-   vq->vq_used_cons_idx;
+   nb_used = VIRTQUEUE_NUSED(vq);

rte_compiler_barrier();

-- 
1.8.1.4



[dpdk-dev] [PATCH] virtio: fix idx in used ring retrieved only once

2016-06-16 Thread Huawei Xie
In the following loop:
while (vq->vq_used_cons_idx != vq->vq_ring.used->idx) {
...
}
There is no external function call or any explict memory barrier
in the loop, the re-read of used->idx would be optimized and would
only be retrieved once.

use of voaltile normally should be prohibited, and access_once
is Linux kernel's style to handle this issue; Once we have that
macro in DPDK, we could change to that style.

virtio_recv_mergable_pkts might have the same issue, so will be fixed.

Fixes: 823ad647950a ("virtio: support multiple queues")
Fixes: 13ce5e7eb94f ("virtio: mergeable buffers")

Signed-off-by: Huawei Xie 
---
 drivers/net/virtio/virtio_ethdev.c  | 4 ++--
 drivers/net/virtio/virtio_ring.h| 2 +-
 drivers/net/virtio/virtio_rxtx_simple.c | 3 +--
 3 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/drivers/net/virtio/virtio_ethdev.c 
b/drivers/net/virtio/virtio_ethdev.c
index ea7a48e..6a51948 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -219,12 +219,12 @@ virtio_send_command(struct virtnet_ctl *cvq, struct 
virtio_pmd_ctrl *ctrl,
virtqueue_notify(vq);

rte_rmb();
-   while (vq->vq_used_cons_idx == vq->vq_ring.used->idx) {
+   while (VIRTQUEUE_NUSED(vq) == 0) {
rte_rmb();
usleep(100);
}

-   while (vq->vq_used_cons_idx != vq->vq_ring.used->idx) {
+   while (VIRTQUEUE_NUSED(vq)) {
uint32_t idx, desc_idx, used_idx;
struct vring_used_elem *uep;

diff --git a/drivers/net/virtio/virtio_ring.h b/drivers/net/virtio/virtio_ring.h
index 447760a..fcecc16 100644
--- a/drivers/net/virtio/virtio_ring.h
+++ b/drivers/net/virtio/virtio_ring.h
@@ -79,7 +79,7 @@ struct vring_used_elem {

 struct vring_used {
uint16_t flags;
-   uint16_t idx;
+   volatile uint16_t idx;
struct vring_used_elem ring[0];
 };

diff --git a/drivers/net/virtio/virtio_rxtx_simple.c 
b/drivers/net/virtio/virtio_rxtx_simple.c
index 7b50119..a0ef8d2 100644
--- a/drivers/net/virtio/virtio_rxtx_simple.c
+++ b/drivers/net/virtio/virtio_rxtx_simple.c
@@ -184,8 +184,7 @@ virtio_recv_pkts_vec(void *rx_queue, struct rte_mbuf 
**rx_pkts,
if (unlikely(nb_pkts < RTE_VIRTIO_DESC_PER_LOOP))
return 0;

-   nb_used = *(volatile uint16_t *)>vq_ring.used->idx -
-   vq->vq_used_cons_idx;
+   nb_used = vq->vq_ring.used->idx - vq->vq_used_cons_idx;

rte_compiler_barrier();

-- 
1.8.1.4



[dpdk-dev] [PATCH] vhost: fix dereference null return value

2016-06-15 Thread Huawei Xie
fixes the following coverity issues:
CID 107118 (#1 of 1): Dereference null return value (NULL_RETURNS)
CID 119262 (#1 of 1): Dereference null return value (NULL_RETURNS)

Fixes: 8f972312b8f4 ("vhost: support vhost-user")
Fixes: 77d20126b4c2 ("vhost-user: handle message to enable vring")

Signed-off-by: Huawei Xie 
---
 lib/librte_vhost/vhost_user/virtio-net-user.c | 11 +--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/lib/librte_vhost/vhost_user/virtio-net-user.c 
b/lib/librte_vhost/vhost_user/virtio-net-user.c
index 5803182..5844a42 100644
--- a/lib/librte_vhost/vhost_user/virtio-net-user.c
+++ b/lib/librte_vhost/vhost_user/virtio-net-user.c
@@ -283,6 +283,9 @@ user_set_vring_kick(struct vhost_device_ctx ctx, struct 
VhostUserMsg *pmsg)
struct vhost_vring_file file;
struct virtio_net *dev = get_device(ctx);

+   if (!dev)
+   return;
+
file.index = pmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
if (pmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK)
file.fd = VIRTIO_INVALID_EVENTFD;
@@ -306,8 +309,9 @@ user_get_vring_base(struct vhost_device_ctx ctx,
 {
struct virtio_net *dev = get_device(ctx);

-   if (dev == NULL)
+   if (!dev)
return -1;
+
/* We have to stop the queue (virtio) if it is running. */
if (dev->flags & VIRTIO_DEV_RUNNING)
notify_ops->destroy_device(dev);
@@ -341,6 +345,9 @@ user_set_vring_enable(struct vhost_device_ctx ctx,
struct virtio_net *dev = get_device(ctx);
int enable = (int)state->num;

+   if (!dev)
+   return -1;
+
RTE_LOG(INFO, VHOST_CONFIG,
"set queue enable: %d to qp idx: %d\n",
enable, state->index);
@@ -361,7 +368,7 @@ user_set_protocol_features(struct vhost_device_ctx ctx,
struct virtio_net *dev;

dev = get_device(ctx);
-   if (dev == NULL || protocol_features & ~VHOST_USER_PROTOCOL_FEATURES)
+   if (!dev || protocol_features & ~VHOST_USER_PROTOCOL_FEATURES)
return;

dev->protocol_features = protocol_features;
-- 
1.8.1.4



[dpdk-dev] [PATCH] vhost: fix unchecked return value of fstat

2016-06-15 Thread Huawei Xie
Value returned from fstat is not checked for errors before being used.
Fixes the coverity issue:

static uint64_t
get_blk_size(int fd)
{
struct stat stat;

fstat(fd, );
return (uint64_t)stat.st_blksize;
>>>  CID 107103 (#1 of 1): Unchecked return value from library (CHECKED_RETURN)
>>>  check_return: Calling fstat(fd, ) without checking return value.
>>>  This library function may fail and return an error code.

Fixes: 8f972312b8f4 ("vhost: support vhost-user")

Signed-off-by: Huawei Xie 
---
 lib/librte_vhost/vhost_user/virtio-net-user.c | 10 --
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/lib/librte_vhost/vhost_user/virtio-net-user.c 
b/lib/librte_vhost/vhost_user/virtio-net-user.c
index f5248bc..5803182 100644
--- a/lib/librte_vhost/vhost_user/virtio-net-user.c
+++ b/lib/librte_vhost/vhost_user/virtio-net-user.c
@@ -64,9 +64,10 @@ static uint64_t
 get_blk_size(int fd)
 {
struct stat stat;
+   int ret;

-   fstat(fd, );
-   return (uint64_t)stat.st_blksize;
+   ret = fstat(fd, );
+   return ret == -1 ? (uint64_t)-1 : (uint64_t)stat.st_blksize;
 }

 static void
@@ -162,6 +163,11 @@ user_set_mem_table(struct vhost_device_ctx ctx, struct 
VhostUserMsg *pmsg)
 * aligned.
 */
alignment = get_blk_size(pmsg->fds[idx]);
+   if (alignment == (uint64_t)-1) {
+   RTE_LOG(ERR, VHOST_CONFIG,
+   "couldn't get hugepage size through fstat\n");
+   goto err_mmap;
+   }
mapped_size = RTE_ALIGN_CEIL(mapped_size, alignment);

mapped_address = (uint64_t)(uintptr_t)mmap(NULL,
-- 
1.8.1.4



[dpdk-dev] [PATCH v2] virtio: fix crash loading virtio driver when devargs isn't specified

2016-06-13 Thread Huawei Xie
We skip kernel managed virtio devices, if it isn't whitelisted.
Before checking if the virtio device is whitelisted, check if devargs is
specified.

Fixes: ac5e1d838dc1 ("virtio: skip error when probing kernel managed device")

Signed-off-by: Huawei Xie 
Reported-by: Vincent Li 
---
v2: - add reported-by
- reword subject
- reword commit message
---
 drivers/net/virtio/virtio_pci.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/virtio/virtio_pci.c b/drivers/net/virtio/virtio_pci.c
index d0f2428..60ec4da 100644
--- a/drivers/net/virtio/virtio_pci.c
+++ b/drivers/net/virtio/virtio_pci.c
@@ -626,6 +626,7 @@ next:
  * Return -1:
  *   if there is error mapping with VFIO/UIO.
  *   if port map error when driver type is KDRV_NONE.
+ *   if whitelisted but driver type is KDRV_UNKNOWN.
  * Return 1 if kernel driver is managing the device.
  * Return 0 on success.
  */
@@ -651,7 +652,8 @@ vtpci_init(struct rte_pci_device *dev, struct virtio_hw *hw,
PMD_INIT_LOG(INFO, "trying with legacy virtio pci.");
if (legacy_virtio_resource_init(dev, hw, dev_flags) < 0) {
if (dev->kdrv == RTE_KDRV_UNKNOWN &&
-   dev->devargs->type != RTE_DEVTYPE_WHITELISTED_PCI) {
+   (!dev->devargs ||
+dev->devargs->type != RTE_DEVTYPE_WHITELISTED_PCI)) {
PMD_INIT_LOG(INFO,
"skip kernel managed virtio device.");
return 1;
-- 
1.8.1.4



[dpdk-dev] [PATCH] vhost: remove internal lockless enqueue

2016-06-13 Thread Huawei Xie
All other DPDK PMDs doesn't support concurrent receiving or sending
packets to the same queue. The upper application should deal with
this, normally through queue and core bindings.

Due to historical reason, vhost internally supports concurrent lockless
enqueuing packets to the same virtio queue through costly cmpset operation.
This patch removes this internal lockless implementation and should improve
performance a bit.

Luckily DPDK OVS doesn't rely on this behavior.

Signed-off-by: Huawei Xie 
---
 doc/guides/rel_notes/release_16_07.rst |   3 +
 lib/librte_vhost/rte_virtio_net.h  |   3 +-
 lib/librte_vhost/vhost_rxtx.c  | 106 +++--
 lib/librte_vhost/virtio-net.c  |   1 -
 4 files changed, 24 insertions(+), 89 deletions(-)

diff --git a/doc/guides/rel_notes/release_16_07.rst 
b/doc/guides/rel_notes/release_16_07.rst
index 30e78d4..e96250f 100644
--- a/doc/guides/rel_notes/release_16_07.rst
+++ b/doc/guides/rel_notes/release_16_07.rst
@@ -112,6 +112,9 @@ API Changes
* Add a short 1-2 sentence description of the API change. Use fixed width
  quotes for ``rte_function_names`` or ``rte_struct_names``. Use the past 
tense.

+* The function ``rte_vhost_enqueue_burst`` no longer supports concurrent 
enqueuing
+  packets to the same queue.
+
 * The following counters are removed from ``rte_eth_stats`` structure:
   ibadcrc, ibadlen, imcasts, fdirmatch, fdirmiss,
   tx_pause_xon, rx_pause_xon, tx_pause_xoff, rx_pause_xoff.
diff --git a/lib/librte_vhost/rte_virtio_net.h 
b/lib/librte_vhost/rte_virtio_net.h
index 600b20b..dbba24e 100644
--- a/lib/librte_vhost/rte_virtio_net.h
+++ b/lib/librte_vhost/rte_virtio_net.h
@@ -88,7 +88,6 @@ struct vhost_virtqueue {
uint32_tbackend;/**< Backend value to 
determine if device should started/stopped. */
uint16_tvhost_hlen; /**< Vhost header 
length (varies depending on RX merge buffers. */
volatile uint16_t   last_used_idx;  /**< Last index used on 
the available ring */
-   volatile uint16_t   last_used_idx_res;  /**< Used for multiple 
devices reserving buffers. */
 #define VIRTIO_INVALID_EVENTFD (-1)
 #define VIRTIO_UNINITIALIZED_EVENTFD   (-2)
int callfd; /**< Used to notify the 
guest (trigger interrupt). */
@@ -192,7 +191,7 @@ rte_vring_available_entries(struct virtio_net *dev, 
uint16_t queue_id)
if (!vq->enabled)
return 0;

-   return *(volatile uint16_t *)>avail->idx - vq->last_used_idx_res;
+   return *(volatile uint16_t *)>avail->idx - vq->last_used_idx;
 }

 /**
diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c
index 750821a..3fa75cb 100644
--- a/lib/librte_vhost/vhost_rxtx.c
+++ b/lib/librte_vhost/vhost_rxtx.c
@@ -205,49 +205,6 @@ copy_mbuf_to_desc(struct virtio_net *dev, struct 
vhost_virtqueue *vq,
return 0;
 }

-/*
- * As many data cores may want to access available buffers
- * they need to be reserved.
- */
-static inline uint32_t
-reserve_avail_buf(struct vhost_virtqueue *vq, uint32_t count,
- uint16_t *start, uint16_t *end)
-{
-   uint16_t res_start_idx;
-   uint16_t res_end_idx;
-   uint16_t avail_idx;
-   uint16_t free_entries;
-   int success;
-
-   count = RTE_MIN(count, (uint32_t)MAX_PKT_BURST);
-
-again:
-   res_start_idx = vq->last_used_idx_res;
-   avail_idx = *((volatile uint16_t *)>avail->idx);
-
-   free_entries = avail_idx - res_start_idx;
-   count = RTE_MIN(count, free_entries);
-   if (count == 0)
-   return 0;
-
-   res_end_idx = res_start_idx + count;
-
-   /*
-* update vq->last_used_idx_res atomically; try again if failed.
-*
-* TODO: Allow to disable cmpset if no concurrency in application.
-*/
-   success = rte_atomic16_cmpset(>last_used_idx_res,
- res_start_idx, res_end_idx);
-   if (unlikely(!success))
-   goto again;
-
-   *start = res_start_idx;
-   *end   = res_end_idx;
-
-   return count;
-}
-
 /**
  * This function adds buffers to the virtio devices RX virtqueue. Buffers can
  * be received from the physical port or from another virtio device. A packet
@@ -260,7 +217,7 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
  struct rte_mbuf **pkts, uint32_t count)
 {
struct vhost_virtqueue *vq;
-   uint16_t res_start_idx, res_end_idx;
+   uint16_t avail_idx, free_entries, res_start_idx;
uint16_t desc_indexes[MAX_PKT_BURST];
uint32_t i;

@@ -276,13 +233,19 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
if (unlikely(vq->enabled == 0))
return 0;

-   count = reserve_avail_buf(vq, count, _start_idx, _end_idx);
+
+   count = RTE_

[dpdk-dev] [PATCH v3] virtio: split virtio rx/tx queue

2016-06-02 Thread Huawei Xie
We keep a common vq structure, containing only vq related fields,
and then split others into RX, TX and control queue respectively.

Signed-off-by: Huawei Xie 
---
v2:
- don't split virtio_dev_rx/tx_queue_setup
v3:
- fix some 80 char warnings
- fix other newer version checkpatch warnings

- remove hdr zone allocation for RX queue
v4:
- remove '\n' in PMD_RX_LOG
- fix some conversions between vq and rx/txvq in virtio_dev_free_mbufs

 drivers/net/virtio/virtio_ethdev.c  | 374 ++--
 drivers/net/virtio/virtio_ethdev.h  |   2 +-
 drivers/net/virtio/virtio_pci.c |   4 +-
 drivers/net/virtio/virtio_pci.h |   3 +-
 drivers/net/virtio/virtio_rxtx.c| 282 +---
 drivers/net/virtio/virtio_rxtx.h|  56 -
 drivers/net/virtio/virtio_rxtx_simple.c |  83 +++
 drivers/net/virtio/virtqueue.h  |  70 +++---
 8 files changed, 496 insertions(+), 378 deletions(-)

diff --git a/drivers/net/virtio/virtio_ethdev.c 
b/drivers/net/virtio/virtio_ethdev.c
index c3fb628..cba01d1 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -114,40 +114,61 @@ struct rte_virtio_xstats_name_off {
 };

 /* [rt]x_qX_ is prepended to the name string here */
-static const struct rte_virtio_xstats_name_off rte_virtio_q_stat_strings[] = {
-   {"good_packets",   offsetof(struct virtqueue, packets)},
-   {"good_bytes", offsetof(struct virtqueue, bytes)},
-   {"errors", offsetof(struct virtqueue, errors)},
-   {"multicast_packets",  offsetof(struct virtqueue, multicast)},
-   {"broadcast_packets",  offsetof(struct virtqueue, broadcast)},
-   {"undersize_packets",  offsetof(struct virtqueue, size_bins[0])},
-   {"size_64_packets",offsetof(struct virtqueue, size_bins[1])},
-   {"size_65_127_packets",offsetof(struct virtqueue, size_bins[2])},
-   {"size_128_255_packets",   offsetof(struct virtqueue, size_bins[3])},
-   {"size_256_511_packets",   offsetof(struct virtqueue, size_bins[4])},
-   {"size_512_1023_packets",  offsetof(struct virtqueue, size_bins[5])},
-   {"size_1024_1517_packets", offsetof(struct virtqueue, size_bins[6])},
-   {"size_1518_max_packets",  offsetof(struct virtqueue, size_bins[7])},
+static const struct rte_virtio_xstats_name_off rte_virtio_rxq_stat_strings[] = 
{
+   {"good_packets",   offsetof(struct virtnet_rx, stats.packets)},
+   {"good_bytes", offsetof(struct virtnet_rx, stats.bytes)},
+   {"errors", offsetof(struct virtnet_rx, stats.errors)},
+   {"multicast_packets",  offsetof(struct virtnet_rx, 
stats.multicast)},
+   {"broadcast_packets",  offsetof(struct virtnet_rx, 
stats.broadcast)},
+   {"undersize_packets",  offsetof(struct virtnet_rx, 
stats.size_bins[0])},
+   {"size_64_packets",offsetof(struct virtnet_rx, 
stats.size_bins[1])},
+   {"size_65_127_packets",offsetof(struct virtnet_rx, 
stats.size_bins[2])},
+   {"size_128_255_packets",   offsetof(struct virtnet_rx, 
stats.size_bins[3])},
+   {"size_256_511_packets",   offsetof(struct virtnet_rx, 
stats.size_bins[4])},
+   {"size_512_1023_packets",  offsetof(struct virtnet_rx, 
stats.size_bins[5])},
+   {"size_1024_1517_packets", offsetof(struct virtnet_rx, 
stats.size_bins[6])},
+   {"size_1518_max_packets",  offsetof(struct virtnet_rx, 
stats.size_bins[7])},
 };

-#define VIRTIO_NB_Q_XSTATS (sizeof(rte_virtio_q_stat_strings) / \
-   sizeof(rte_virtio_q_stat_strings[0]))
+/* [rt]x_qX_ is prepended to the name string here */
+static const struct rte_virtio_xstats_name_off rte_virtio_txq_stat_strings[] = 
{
+   {"good_packets",   offsetof(struct virtnet_tx, stats.packets)},
+   {"good_bytes", offsetof(struct virtnet_tx, stats.bytes)},
+   {"errors", offsetof(struct virtnet_tx, stats.errors)},
+   {"multicast_packets",  offsetof(struct virtnet_tx, 
stats.multicast)},
+   {"broadcast_packets",  offsetof(struct virtnet_tx, 
stats.broadcast)},
+   {"undersize_packets",  offsetof(struct virtnet_tx, 
stats.size_bins[0])},
+   {"size_64_packets",offsetof(struct virtnet_tx, 
stats.size_bins[1])},
+   {"size_65_127_packets",offsetof(struct virtnet_tx, 
stats.size_bins[2])},
+   {"size_128_255_packets",   offsetof(struct virtnet_tx, 
stats.size_bins[3])},
+   {"size_256_511_packets",   offsetof(struct virtnet_tx, 
stats.size_bins[4])},
+   {

[dpdk-dev] [PATCH v3] virtio: split virtio rx/tx queue

2016-05-30 Thread Huawei Xie
We keep a common vq structure, containing only vq related fields,
and then split others into RX, TX and control queue respectively.

Signed-off-by: Huawei Xie 
---
v2:
- don't split virtio_dev_rx/tx_queue_setup
v3:
- fix some 80 char warnings
- fix other newer version checkpatch warnings
- remove '\n' in PMD_RX_LOG
- remove hdr zone allocation for RX queue

 drivers/net/virtio/virtio_ethdev.c  | 352 ++--
 drivers/net/virtio/virtio_ethdev.h  |   2 +-
 drivers/net/virtio/virtio_pci.c |   4 +-
 drivers/net/virtio/virtio_pci.h |   3 +-
 drivers/net/virtio/virtio_rxtx.c| 294 ++
 drivers/net/virtio/virtio_rxtx.h|  56 -
 drivers/net/virtio/virtio_rxtx_simple.c |  83 
 drivers/net/virtio/virtqueue.h  |  70 +++
 8 files changed, 491 insertions(+), 373 deletions(-)

diff --git a/drivers/net/virtio/virtio_ethdev.c 
b/drivers/net/virtio/virtio_ethdev.c
index c3fb628..256888a 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -114,40 +114,61 @@ struct rte_virtio_xstats_name_off {
 };

 /* [rt]x_qX_ is prepended to the name string here */
-static const struct rte_virtio_xstats_name_off rte_virtio_q_stat_strings[] = {
-   {"good_packets",   offsetof(struct virtqueue, packets)},
-   {"good_bytes", offsetof(struct virtqueue, bytes)},
-   {"errors", offsetof(struct virtqueue, errors)},
-   {"multicast_packets",  offsetof(struct virtqueue, multicast)},
-   {"broadcast_packets",  offsetof(struct virtqueue, broadcast)},
-   {"undersize_packets",  offsetof(struct virtqueue, size_bins[0])},
-   {"size_64_packets",offsetof(struct virtqueue, size_bins[1])},
-   {"size_65_127_packets",offsetof(struct virtqueue, size_bins[2])},
-   {"size_128_255_packets",   offsetof(struct virtqueue, size_bins[3])},
-   {"size_256_511_packets",   offsetof(struct virtqueue, size_bins[4])},
-   {"size_512_1023_packets",  offsetof(struct virtqueue, size_bins[5])},
-   {"size_1024_1517_packets", offsetof(struct virtqueue, size_bins[6])},
-   {"size_1518_max_packets",  offsetof(struct virtqueue, size_bins[7])},
+static const struct rte_virtio_xstats_name_off rte_virtio_rxq_stat_strings[] = 
{
+   {"good_packets",   offsetof(struct virtnet_rx, stats.packets)},
+   {"good_bytes", offsetof(struct virtnet_rx, stats.bytes)},
+   {"errors", offsetof(struct virtnet_rx, stats.errors)},
+   {"multicast_packets",  offsetof(struct virtnet_rx, 
stats.multicast)},
+   {"broadcast_packets",  offsetof(struct virtnet_rx, 
stats.broadcast)},
+   {"undersize_packets",  offsetof(struct virtnet_rx, 
stats.size_bins[0])},
+   {"size_64_packets",offsetof(struct virtnet_rx, 
stats.size_bins[1])},
+   {"size_65_127_packets",offsetof(struct virtnet_rx, 
stats.size_bins[2])},
+   {"size_128_255_packets",   offsetof(struct virtnet_rx, 
stats.size_bins[3])},
+   {"size_256_511_packets",   offsetof(struct virtnet_rx, 
stats.size_bins[4])},
+   {"size_512_1023_packets",  offsetof(struct virtnet_rx, 
stats.size_bins[5])},
+   {"size_1024_1517_packets", offsetof(struct virtnet_rx, 
stats.size_bins[6])},
+   {"size_1518_max_packets",  offsetof(struct virtnet_rx, 
stats.size_bins[7])},
 };

-#define VIRTIO_NB_Q_XSTATS (sizeof(rte_virtio_q_stat_strings) / \
-   sizeof(rte_virtio_q_stat_strings[0]))
+/* [rt]x_qX_ is prepended to the name string here */
+static const struct rte_virtio_xstats_name_off rte_virtio_txq_stat_strings[] = 
{
+   {"good_packets",   offsetof(struct virtnet_tx, stats.packets)},
+   {"good_bytes", offsetof(struct virtnet_tx, stats.bytes)},
+   {"errors", offsetof(struct virtnet_tx, stats.errors)},
+   {"multicast_packets",  offsetof(struct virtnet_tx, 
stats.multicast)},
+   {"broadcast_packets",  offsetof(struct virtnet_tx, 
stats.broadcast)},
+   {"undersize_packets",  offsetof(struct virtnet_tx, 
stats.size_bins[0])},
+   {"size_64_packets",offsetof(struct virtnet_tx, 
stats.size_bins[1])},
+   {"size_65_127_packets",offsetof(struct virtnet_tx, 
stats.size_bins[2])},
+   {"size_128_255_packets",   offsetof(struct virtnet_tx, 
stats.size_bins[3])},
+   {"size_256_511_packets",   offsetof(struct virtnet_tx, 
stats.size_bins[4])},
+   {"size_512_1023_packets",  offsetof(struct virtnet_tx, 
stats.

[dpdk-dev] [PATCH] virtio: use volatile to get used->idx in the loop

2016-05-25 Thread Huawei Xie
There is no external function call or any barrier in the loop,
the used->idx would only be retrieved once.

Signed-off-by: Huawei Xie 
---
 drivers/net/virtio/virtio_ethdev.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/virtio/virtio_ethdev.c 
b/drivers/net/virtio/virtio_ethdev.c
index c3fb628..f6d6305 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -204,7 +204,8 @@ virtio_send_command(struct virtqueue *vq, struct 
virtio_pmd_ctrl *ctrl,
usleep(100);
}

-   while (vq->vq_used_cons_idx != vq->vq_ring.used->idx) {
+   while (vq->vq_used_cons_idx !=
+  *((volatile uint16_t *)(>vq_ring.used->idx))) {
uint32_t idx, desc_idx, used_idx;
struct vring_used_elem *uep;

-- 
1.8.1.4



[dpdk-dev] [PATCH] virtio: check if devargs is NULL before checking its value

2016-05-24 Thread Huawei Xie
Fixes: ac5e1d838dc1 ("virtio: skip error when probing kernel managed device")

Signed-off-by: Huawei Xie 
---
 drivers/net/virtio/virtio_pci.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/virtio/virtio_pci.c b/drivers/net/virtio/virtio_pci.c
index 9cdca06..0e76db9 100644
--- a/drivers/net/virtio/virtio_pci.c
+++ b/drivers/net/virtio/virtio_pci.c
@@ -626,6 +626,7 @@ next:
  * Return -1:
  *   if there is error mapping with VFIO/UIO.
  *   if port map error when driver type is KDRV_NONE.
+ *   if whitelisted but driver type is KDRV_UNKNOWN.
  * Return 1 if kernel driver is managing the device.
  * Return 0 on success.
  */
@@ -651,7 +652,8 @@ vtpci_init(struct rte_pci_device *dev, struct virtio_hw *hw,
PMD_INIT_LOG(INFO, "trying with legacy virtio pci.");
if (legacy_virtio_resource_init(dev, hw, dev_flags) < 0) {
if (dev->kdrv == RTE_KDRV_UNKNOWN &&
-   dev->devargs->type != RTE_DEVTYPE_WHITELISTED_PCI) {
+   (!dev->devargs ||
+dev->devargs->type != RTE_DEVTYPE_WHITELISTED_PCI)) {
PMD_INIT_LOG(INFO,
"skip kernel managed virtio device.");
return 1;
-- 
1.8.1.4



[dpdk-dev] [PATCH] virtio: split virtio rx/tx queue

2016-05-24 Thread Huawei Xie
We keep a common vq structure, containing only vq related fields,
and then split others into RX, TX and control queue respectively.

Signed-off-by: Huawei Xie 
---
 drivers/net/virtio/virtio_ethdev.c  | 352 ++--
 drivers/net/virtio/virtio_ethdev.h  |   2 +-
 drivers/net/virtio/virtio_pci.c |   4 +-
 drivers/net/virtio/virtio_pci.h |   3 +-
 drivers/net/virtio/virtio_rxtx.c| 294 ++
 drivers/net/virtio/virtio_rxtx.h|  56 -
 drivers/net/virtio/virtio_rxtx_simple.c |  83 
 drivers/net/virtio/virtqueue.h  |  70 +++
 8 files changed, 488 insertions(+), 376 deletions(-)

diff --git a/drivers/net/virtio/virtio_ethdev.c 
b/drivers/net/virtio/virtio_ethdev.c
index c3fb628..26fc489 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -114,40 +114,61 @@ struct rte_virtio_xstats_name_off {
 };

 /* [rt]x_qX_ is prepended to the name string here */
-static const struct rte_virtio_xstats_name_off rte_virtio_q_stat_strings[] = {
-   {"good_packets",   offsetof(struct virtqueue, packets)},
-   {"good_bytes", offsetof(struct virtqueue, bytes)},
-   {"errors", offsetof(struct virtqueue, errors)},
-   {"multicast_packets",  offsetof(struct virtqueue, multicast)},
-   {"broadcast_packets",  offsetof(struct virtqueue, broadcast)},
-   {"undersize_packets",  offsetof(struct virtqueue, size_bins[0])},
-   {"size_64_packets",offsetof(struct virtqueue, size_bins[1])},
-   {"size_65_127_packets",offsetof(struct virtqueue, size_bins[2])},
-   {"size_128_255_packets",   offsetof(struct virtqueue, size_bins[3])},
-   {"size_256_511_packets",   offsetof(struct virtqueue, size_bins[4])},
-   {"size_512_1023_packets",  offsetof(struct virtqueue, size_bins[5])},
-   {"size_1024_1517_packets", offsetof(struct virtqueue, size_bins[6])},
-   {"size_1518_max_packets",  offsetof(struct virtqueue, size_bins[7])},
+static const struct rte_virtio_xstats_name_off rte_virtio_rxq_stat_strings[] = 
{
+   {"good_packets",   offsetof(struct virtnet_rx, stats.packets)},
+   {"good_bytes", offsetof(struct virtnet_rx, stats.bytes)},
+   {"errors", offsetof(struct virtnet_rx, stats.errors)},
+   {"multicast_packets",  offsetof(struct virtnet_rx, 
stats.multicast)},
+   {"broadcast_packets",  offsetof(struct virtnet_rx, 
stats.broadcast)},
+   {"undersize_packets",  offsetof(struct virtnet_rx, 
stats.size_bins[0])},
+   {"size_64_packets",offsetof(struct virtnet_rx, 
stats.size_bins[1])},
+   {"size_65_127_packets",offsetof(struct virtnet_rx, 
stats.size_bins[2])},
+   {"size_128_255_packets",   offsetof(struct virtnet_rx, 
stats.size_bins[3])},
+   {"size_256_511_packets",   offsetof(struct virtnet_rx, 
stats.size_bins[4])},
+   {"size_512_1023_packets",  offsetof(struct virtnet_rx, 
stats.size_bins[5])},
+   {"size_1024_1517_packets", offsetof(struct virtnet_rx, 
stats.size_bins[6])},
+   {"size_1518_max_packets",  offsetof(struct virtnet_rx, 
stats.size_bins[7])},
 };

-#define VIRTIO_NB_Q_XSTATS (sizeof(rte_virtio_q_stat_strings) / \
-   sizeof(rte_virtio_q_stat_strings[0]))
+/* [rt]x_qX_ is prepended to the name string here */
+static const struct rte_virtio_xstats_name_off rte_virtio_txq_stat_strings[] = 
{
+   {"good_packets",   offsetof(struct virtnet_tx, stats.packets)},
+   {"good_bytes", offsetof(struct virtnet_tx, stats.bytes)},
+   {"errors", offsetof(struct virtnet_tx, stats.errors)},
+   {"multicast_packets",  offsetof(struct virtnet_tx, 
stats.multicast)},
+   {"broadcast_packets",  offsetof(struct virtnet_tx, 
stats.broadcast)},
+   {"undersize_packets",  offsetof(struct virtnet_tx, 
stats.size_bins[0])},
+   {"size_64_packets",offsetof(struct virtnet_tx, 
stats.size_bins[1])},
+   {"size_65_127_packets",offsetof(struct virtnet_tx, 
stats.size_bins[2])},
+   {"size_128_255_packets",   offsetof(struct virtnet_tx, 
stats.size_bins[3])},
+   {"size_256_511_packets",   offsetof(struct virtnet_tx, 
stats.size_bins[4])},
+   {"size_512_1023_packets",  offsetof(struct virtnet_tx, 
stats.size_bins[5])},
+   {"size_1024_1517_packets", offsetof(struct virtnet_tx, 
stats.size_bins[6])},
+   {"size_1518_max_packets",  offsetof(struct virtnet_tx, 

[dpdk-dev] [PATCH] virtio: split virtio rx/tx queue

2016-05-04 Thread Huawei Xie
Currently virtio RX/TX paths use common vq structure.
The initial idea is to split virtio RX and TX queues completely as they
have different memory requirement and we could arrange data friendly for
optimization for different paths in future.

With this patch, we keep a common vq structure, as we have too
many common vq operations. Split fields into virtnet_rx
and virtnet_tx respectively.

Signed-off-by: Huawei Xie 
---
 drivers/net/virtio/virtio_ethdev.c  | 333 +---
 drivers/net/virtio/virtio_pci.c |   4 +-
 drivers/net/virtio/virtio_pci.h |   3 +-
 drivers/net/virtio/virtio_rxtx.c| 531 +++-
 drivers/net/virtio/virtio_rxtx.h|  54 +++-
 drivers/net/virtio/virtio_rxtx_simple.c |  85 ++---
 drivers/net/virtio/virtqueue.h  |  67 ++--
 7 files changed, 655 insertions(+), 422 deletions(-)

diff --git a/drivers/net/virtio/virtio_ethdev.c 
b/drivers/net/virtio/virtio_ethdev.c
index 63a368a..4d4e59e 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -115,40 +115,62 @@ struct rte_virtio_xstats_name_off {
 };

 /* [rt]x_qX_ is prepended to the name string here */
-static const struct rte_virtio_xstats_name_off rte_virtio_q_stat_strings[] = {
-   {"good_packets",   offsetof(struct virtqueue, packets)},
-   {"good_bytes", offsetof(struct virtqueue, bytes)},
-   {"errors", offsetof(struct virtqueue, errors)},
-   {"multicast_packets",  offsetof(struct virtqueue, multicast)},
-   {"broadcast_packets",  offsetof(struct virtqueue, broadcast)},
-   {"undersize_packets",  offsetof(struct virtqueue, size_bins[0])},
-   {"size_64_packets",offsetof(struct virtqueue, size_bins[1])},
-   {"size_65_127_packets",offsetof(struct virtqueue, size_bins[2])},
-   {"size_128_255_packets",   offsetof(struct virtqueue, size_bins[3])},
-   {"size_256_511_packets",   offsetof(struct virtqueue, size_bins[4])},
-   {"size_512_1023_packets",  offsetof(struct virtqueue, size_bins[5])},
-   {"size_1024_1517_packets", offsetof(struct virtqueue, size_bins[6])},
-   {"size_1518_max_packets",  offsetof(struct virtqueue, size_bins[7])},
+static const struct rte_virtio_xstats_name_off rte_virtio_rxq_stat_strings[] = 
{
+   {"good_packets",   offsetof(struct virtnet_rx, stats.packets)},
+   {"good_bytes", offsetof(struct virtnet_rx, stats.bytes)},
+   {"errors", offsetof(struct virtnet_rx, stats.errors)},
+   {"multicast_packets",  offsetof(struct virtnet_rx, 
stats.multicast)},
+   {"broadcast_packets",  offsetof(struct virtnet_rx, 
stats.broadcast)},
+   {"undersize_packets",  offsetof(struct virtnet_rx, 
stats.size_bins[0])},
+   {"size_64_packets",offsetof(struct virtnet_rx, 
stats.size_bins[1])},
+   {"size_65_127_packets",offsetof(struct virtnet_rx, 
stats.size_bins[2])},
+   {"size_128_255_packets",   offsetof(struct virtnet_rx, 
stats.size_bins[3])},
+   {"size_256_511_packets",   offsetof(struct virtnet_rx, 
stats.size_bins[4])},
+   {"size_512_1023_packets",  offsetof(struct virtnet_rx, 
stats.size_bins[5])},
+   {"size_1024_1517_packets", offsetof(struct virtnet_rx, 
stats.size_bins[6])},
+   {"size_1518_max_packets",  offsetof(struct virtnet_rx, 
stats.size_bins[7])},
 };

-#define VIRTIO_NB_Q_XSTATS (sizeof(rte_virtio_q_stat_strings) / \
-   sizeof(rte_virtio_q_stat_strings[0]))
+/* [rt]x_qX_ is prepended to the name string here */
+static const struct rte_virtio_xstats_name_off rte_virtio_txq_stat_strings[] = 
{
+   {"good_packets",   offsetof(struct virtnet_tx, stats.packets)},
+   {"good_bytes", offsetof(struct virtnet_tx, stats.bytes)},
+   {"errors", offsetof(struct virtnet_tx, stats.errors)},
+   {"multicast_packets",  offsetof(struct virtnet_tx, 
stats.multicast)},
+   {"broadcast_packets",  offsetof(struct virtnet_tx, 
stats.broadcast)},
+   {"undersize_packets",  offsetof(struct virtnet_tx, 
stats.size_bins[0])},
+   {"size_64_packets",offsetof(struct virtnet_tx, 
stats.size_bins[1])},
+   {"size_65_127_packets",offsetof(struct virtnet_tx, 
stats.size_bins[2])},
+   {"size_128_255_packets",   offsetof(struct virtnet_tx, 
stats.size_bins[3])},
+   {"size_256_511_packets",   offsetof(struct virtnet_tx, 
stats.size_bins[4])},
+   {"size_512_1023_packets",  offsetof(struct virtnet_tx, 
stats.

[dpdk-dev] [PATCH v2] virtio: check if virtio net header could fit in mbuf headroom

2016-04-27 Thread Huawei Xie
check merge-able header as it is supported.
previously we don't support merge-able feature, so non merge-able
header is checked.

v2:
 add missed signoff

Signed-off-by: Huawei Xie 
---
 drivers/net/virtio/virtio_ethdev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/virtio/virtio_ethdev.c 
b/drivers/net/virtio/virtio_ethdev.c
index 63a368a..20ff03e 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -1036,7 +1036,7 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
struct rte_pci_device *pci_dev;
int ret;

-   RTE_BUILD_BUG_ON(RTE_PKTMBUF_HEADROOM < sizeof(struct virtio_net_hdr));
+   RTE_BUILD_BUG_ON(RTE_PKTMBUF_HEADROOM < sizeof(struct 
virtio_net_hdr_mrg_rxbuf));

eth_dev->dev_ops = _eth_dev_ops;
eth_dev->tx_pkt_burst = _xmit_pkts;
-- 
1.8.1.4



[dpdk-dev] [PATCH] virtio: avoid avail ring entry index update if equal

2016-04-27 Thread Huawei Xie
Avail ring is updated by the frontend and consumed by the backend.
There are frequent core to core cache transfers for the avail ring.

This optmization avoids avail ring entry index update if the entry
already holds the same value.
As DPDK virtio PMD implements FIFO free descriptor list (also for
performance reason of CACHE), in which descriptors are allocated
from the head and freed to the tail, with this patch in most cases
avail ring will remain the same, then it would be valid in both caches
of frontend and backend.

Signed-off-by: Huawei Xie 
Suggested-by: ms >> Michael S. Tsirkin 
---
 drivers/net/virtio/virtqueue.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/virtio/virtqueue.h b/drivers/net/virtio/virtqueue.h
index 4e9239e..8c46a83 100644
--- a/drivers/net/virtio/virtqueue.h
+++ b/drivers/net/virtio/virtqueue.h
@@ -302,7 +302,8 @@ vq_update_avail_ring(struct virtqueue *vq, uint16_t 
desc_idx)
 * descriptor.
 */
avail_idx = (uint16_t)(vq->vq_avail_idx & (vq->vq_nentries - 1));
-   vq->vq_ring.avail->ring[avail_idx] = desc_idx;
+   if (unlikely(vq->vq_ring.avail->ring[avail_idx] != desc_idx))
+   vq->vq_ring.avail->ring[avail_idx] = desc_idx;
vq->vq_avail_idx++;
 }

-- 
2.4.3



[dpdk-dev] [PATCH] virtio: check if virtio net header could fit in mbuf headroom

2016-04-25 Thread Huawei Xie
check merge-able header as it is supported.
previously we don't support merge-able feature, so non merge-able
header is checked.
---
 drivers/net/virtio/virtio_ethdev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/virtio/virtio_ethdev.c 
b/drivers/net/virtio/virtio_ethdev.c
index 63a368a..20ff03e 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -1036,7 +1036,7 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
struct rte_pci_device *pci_dev;
int ret;

-   RTE_BUILD_BUG_ON(RTE_PKTMBUF_HEADROOM < sizeof(struct virtio_net_hdr));
+   RTE_BUILD_BUG_ON(RTE_PKTMBUF_HEADROOM < sizeof(struct 
virtio_net_hdr_mrg_rxbuf));

eth_dev->dev_ops = _eth_dev_ops;
eth_dev->tx_pkt_burst = _xmit_pkts;
-- 
1.8.1.4



[dpdk-dev] [RFC PATCH] avail idx update optimizations

2016-04-22 Thread Huawei Xie
eliminate unnecessary cache to cache transfer between virtio and vhost
core

---
 drivers/net/virtio/virtqueue.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/virtio/virtqueue.h b/drivers/net/virtio/virtqueue.h
index 4e9239e..8c46a83 100644
--- a/drivers/net/virtio/virtqueue.h
+++ b/drivers/net/virtio/virtqueue.h
@@ -302,7 +302,8 @@ vq_update_avail_ring(struct virtqueue *vq, uint16_t 
desc_idx)
 * descriptor.
 */
avail_idx = (uint16_t)(vq->vq_avail_idx & (vq->vq_nentries - 1));
-   vq->vq_ring.avail->ring[avail_idx] = desc_idx;
+   if (unlikely(vq->vq_ring.avail->ring[avail_idx] != desc_idx))
+   vq->vq_ring.avail->ring[avail_idx] = desc_idx;
vq->vq_avail_idx++;
 }

-- 
2.4.3



[dpdk-dev] [PATCH v2] virtio: fix rx ring descriptor starvation

2016-03-23 Thread Huawei Xie
Acked-by: Huawei Xie 


[dpdk-dev] [PATCH v5 6/6] virtio: return 1 to tell the upper layer we don't take over this device

2016-03-08 Thread Huawei Xie
virtio PMD could use IO port to configure the virtio device without
using UIO/VFIO driver in legacy mode.

There are two issues with previous implementation:
1) virtio PMD will take over the virtio device(s) blindly even if not
intended for DPDK.
2) driver conflict between virtio PMD and virtio-net kernel driver.

This patch checks if there is kernel driver other than UIO/VFIO managing
the virtio device before using port IO.

If legacy_virtio_resource_init fails and kernel driver other than
VFIO/UIO is managing the device ,return 1 to tell the upper layer we
don't take over this device.
For all other IO port mapping errors, return -1.

Note than if VFIO/UIO fails, now we don't fall back to port IO.

Fixes: da978dfdc43b ("virtio: use port IO to get PCI resource")

Signed-off-by: Huawei Xie 
Acked-by: Yuanhan Liu 
Acked-by: David Marchand 
---
 drivers/net/virtio/virtio_ethdev.c |  6 --
 drivers/net/virtio/virtio_pci.c| 16 +++-
 2 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/drivers/net/virtio/virtio_ethdev.c 
b/drivers/net/virtio/virtio_ethdev.c
index caa970c..06bddd7 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -1015,6 +1015,7 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
struct virtio_net_config *config;
struct virtio_net_config local_config;
struct rte_pci_device *pci_dev;
+   int ret;

RTE_BUILD_BUG_ON(RTE_PKTMBUF_HEADROOM < sizeof(struct virtio_net_hdr));

@@ -1037,8 +1038,9 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev)

pci_dev = eth_dev->pci_dev;

-   if (vtpci_init(pci_dev, hw) < 0)
-   return -1;
+   ret = vtpci_init(pci_dev, hw);
+   if (ret)
+   return ret;

/* Reset the device although not necessary at startup */
vtpci_reset(hw);
diff --git a/drivers/net/virtio/virtio_pci.c b/drivers/net/virtio/virtio_pci.c
index 85fbe88..98fc370 100644
--- a/drivers/net/virtio/virtio_pci.c
+++ b/drivers/net/virtio/virtio_pci.c
@@ -622,6 +622,13 @@ next:
return 0;
 }

+/*
+ * Return -1:
+ *   if there is error mapping with VFIO/UIO.
+ *   if port map error when driver type is KDRV_NONE.
+ * Return 1 if kernel driver is managing the device.
+ * Return 0 on success.
+ */
 int
 vtpci_init(struct rte_pci_device *dev, struct virtio_hw *hw)
 {
@@ -641,8 +648,15 @@ vtpci_init(struct rte_pci_device *dev, struct virtio_hw 
*hw)
}

PMD_INIT_LOG(INFO, "trying with legacy virtio pci.");
-   if (legacy_virtio_resource_init(dev, hw) < 0)
+   if (legacy_virtio_resource_init(dev, hw) < 0) {
+   if (dev->kdrv == RTE_KDRV_UNKNOWN &&
+   dev->devargs->type != RTE_DEVTYPE_WHITELISTED_PCI) {
+   PMD_INIT_LOG(INFO,
+   "skip kernel managed virtio device.");
+   return 1;
+   }
return -1;
+   }

hw->vtpci_ops = _ops;
hw->use_msix = legacy_virtio_has_msix(>addr);
-- 
1.8.1.4



[dpdk-dev] [PATCH v5 5/6] eal: map IO port when kernel driver isn't managing the device

2016-03-08 Thread Huawei Xie
call rte_eal_pci_ioport_map (on x86) only if the pci device is not bound
to a kernel driver.

Signed-off-by: Huawei Xie 
Acked-by: Yuanhan Liu 
Acked-by: David Marchand 
---
 lib/librte_eal/linuxapp/eal/eal_pci.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c 
b/lib/librte_eal/linuxapp/eal/eal_pci.c
index 4ede4cb..833529f 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci.c
+++ b/lib/librte_eal/linuxapp/eal/eal_pci.c
@@ -698,11 +698,13 @@ rte_eal_pci_ioport_map(struct rte_pci_device *dev, int 
bar,
case RTE_KDRV_UIO_GENERIC:
ret = pci_uio_ioport_map(dev, bar, p);
break;
-   default:
+   case RTE_KDRV_NONE:
 #if defined(RTE_ARCH_X86)
ret = pci_ioport_map(dev, bar, p);
 #endif
break;
+   default:
+   break;
}

if (!ret)
-- 
1.8.1.4



[dpdk-dev] [PATCH v5 4/6] eal: simple code rework

2016-03-08 Thread Huawei Xie

Signed-off-by: Huawei Xie 
Acked-by: Yuanhan Liu 
Acked-by: David Marchand 
---
 lib/librte_eal/linuxapp/eal/eal_pci.c | 6 +-
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c 
b/lib/librte_eal/linuxapp/eal/eal_pci.c
index dc0aa37..4ede4cb 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci.c
+++ b/lib/librte_eal/linuxapp/eal/eal_pci.c
@@ -685,12 +685,11 @@ int
 rte_eal_pci_ioport_map(struct rte_pci_device *dev, int bar,
   struct rte_pci_ioport *p)
 {
-   int ret;
+   int ret = -1;

switch (dev->kdrv) {
 #ifdef VFIO_PRESENT
case RTE_KDRV_VFIO:
-   ret = -1;
if (pci_vfio_is_enabled())
ret = pci_vfio_ioport_map(dev, bar, p);
break;
@@ -701,10 +700,7 @@ rte_eal_pci_ioport_map(struct rte_pci_device *dev, int bar,
break;
default:
 #if defined(RTE_ARCH_X86)
-   /* special case for x86 ... */
ret = pci_ioport_map(dev, bar, p);
-#else
-   ret = -1;
 #endif
break;
}
-- 
1.8.1.4



[dpdk-dev] [PATCH v5 3/6] eal: use new RTE_ARCH_X86 for x86 arch

2016-03-08 Thread Huawei Xie

Signed-off-by: Huawei Xie 
Acked-by: Yuanhan Liu 
Acked-by: David Marchand 
---
 lib/librte_eal/linuxapp/eal/eal_pci.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c 
b/lib/librte_eal/linuxapp/eal/eal_pci.c
index b44fa32..dc0aa37 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci.c
+++ b/lib/librte_eal/linuxapp/eal/eal_pci.c
@@ -621,7 +621,7 @@ int rte_eal_pci_write_config(const struct rte_pci_device 
*device,
}
 }

-#if defined(RTE_ARCH_X86_64) || defined(RTE_ARCH_I686)
+#if defined(RTE_ARCH_X86)
 static int
 pci_ioport_map(struct rte_pci_device *dev, int bar __rte_unused,
   struct rte_pci_ioport *p)
@@ -700,7 +700,7 @@ rte_eal_pci_ioport_map(struct rte_pci_device *dev, int bar,
ret = pci_uio_ioport_map(dev, bar, p);
break;
default:
-#if defined(RTE_ARCH_X86_64) || defined(RTE_ARCH_I686)
+#if defined(RTE_ARCH_X86)
/* special case for x86 ... */
ret = pci_ioport_map(dev, bar, p);
 #else
@@ -730,7 +730,7 @@ rte_eal_pci_ioport_read(struct rte_pci_ioport *p,
pci_uio_ioport_read(p, data, len, offset);
break;
default:
-#if defined(RTE_ARCH_X86_64) || defined(RTE_ARCH_I686)
+#if defined(RTE_ARCH_X86)
/* special case for x86 ... */
pci_uio_ioport_read(p, data, len, offset);
 #endif
@@ -753,7 +753,7 @@ rte_eal_pci_ioport_write(struct rte_pci_ioport *p,
pci_uio_ioport_write(p, data, len, offset);
break;
default:
-#if defined(RTE_ARCH_X86_64) || defined(RTE_ARCH_I686)
+#if defined(RTE_ARCH_X86)
/* special case for x86 ... */
pci_uio_ioport_write(p, data, len, offset);
 #endif
@@ -779,7 +779,7 @@ rte_eal_pci_ioport_unmap(struct rte_pci_ioport *p)
ret = pci_uio_ioport_unmap(p);
break;
default:
-#if defined(RTE_ARCH_X86_64) || defined(RTE_ARCH_I686)
+#if defined(RTE_ARCH_X86)
/* special case for x86 ... nothing to do */
ret = 0;
 #else
-- 
1.8.1.4



[dpdk-dev] [PATCH v5 2/6] eal: RTE_KDRV_NONE means kernel driver isn't managing the device

2016-03-08 Thread Huawei Xie
Use RTE_KDRV_NONE to indicate that kernel driver (other than VFIO/UIO) isn't
managing the device.

Signed-off-by: Huawei Xie 
Acked-by: Yuanhan Liu 
Acked-by: David Marchand 
---
 lib/librte_eal/linuxapp/eal/eal_pci.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c 
b/lib/librte_eal/linuxapp/eal/eal_pci.c
index 4346973..b44fa32 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci.c
+++ b/lib/librte_eal/linuxapp/eal/eal_pci.c
@@ -362,7 +362,7 @@ pci_scan_one(const char *dirname, uint16_t domain, uint8_t 
bus,
else
dev->kdrv = RTE_KDRV_UNKNOWN;
} else
-   dev->kdrv = RTE_KDRV_UNKNOWN;
+   dev->kdrv = RTE_KDRV_NONE;

/* device is valid, add in list (sorted) */
if (TAILQ_EMPTY(_device_list)) {
-- 
1.8.1.4



[dpdk-dev] [PATCH v5 1/6] eal: make the comment more accurate

2016-03-08 Thread Huawei Xie
positive return of devinit of pci driver means the driver doesn't support
this device.

Signed-off-by: Huawei Xie 
Acked-by: Yuanhan Liu 
Acked-by: David Marchand 
---
 lib/librte_eal/common/eal_common_pci.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/lib/librte_eal/common/eal_common_pci.c 
b/lib/librte_eal/common/eal_common_pci.c
index 96d5113..797e7e3 100644
--- a/lib/librte_eal/common/eal_common_pci.c
+++ b/lib/librte_eal/common/eal_common_pci.c
@@ -204,7 +204,7 @@ rte_eal_pci_probe_one_driver(struct rte_pci_driver *dr, 
struct rte_pci_device *d
/* call the driver devinit() function */
return dr->devinit(dr, dev);
}
-   /* return positive value if driver is not found */
+   /* return positive value if driver doesn't support this device */
return 1;
 }

@@ -259,7 +259,7 @@ rte_eal_pci_detach_dev(struct rte_pci_driver *dr,
return 0;
}

-   /* return positive value if driver is not found */
+   /* return positive value if driver doesn't support this device */
return 1;
 }

@@ -283,7 +283,7 @@ pci_probe_all_drivers(struct rte_pci_device *dev)
/* negative value is an error */
return -1;
if (rc > 0)
-   /* positive value means driver not found */
+   /* positive value means driver doesn't support it */
continue;
return 0;
}
@@ -310,7 +310,7 @@ pci_detach_all_drivers(struct rte_pci_device *dev)
/* negative value is an error */
return -1;
if (rc > 0)
-   /* positive value means driver not found */
+   /* positive value means driver doesn't support it */
continue;
return 0;
}
-- 
1.8.1.4



[dpdk-dev] [PATCH v5 0/6] fix the issue that DPDK takes over virtio device blindly

2016-03-08 Thread Huawei Xie
v5 changes:
 Split patches
 Remove free of mac addr when vtpci_init fails. Will send the fix in
a seperate patch.
 Fail if the virtio device is whitelisted but bound to kernel driver.

v4 changes:
 Rebase as IO port map is moved to EAL.
 Reword some commit messages.
 Don't fall back to PORT IO if VFIO/UIO fails.

v3 changes:
 Change log message to tell user that the virtio device is skipped
due to it is managed by kernel driver, instead of asking user to unbind
it from kernel driver.

v2 changes:
 Remove unnecessary assignment of NULL to dev->data->mac_addrs
 Ajust one comment's position
 change LOG level from ERR to INFO

Huawei Xie (6):
  eal: make the comment more accurate
  eal: set kdrv to RTE_KDRV_NONE if kernel driver isn't managing the device.
  eal: use new RTE_ARCH_X86 macro for x86 arch
  eal: simple code rework
  eal: map IO port only when kernel driver isn't managing the device
  virtio: return 1 to tell the upper layer we don't take over this device

 drivers/net/virtio/virtio_ethdev.c |  6 --
 drivers/net/virtio/virtio_pci.c| 16 +++-
 lib/librte_eal/common/eal_common_pci.c |  8 
 lib/librte_eal/linuxapp/eal/eal_pci.c  | 22 ++
 4 files changed, 33 insertions(+), 19 deletions(-)

-- 
1.8.1.4



[dpdk-dev] [PATCH v7] mbuf: provide rte_pktmbuf_alloc_bulk API

2016-02-28 Thread Huawei Xie
v7 changes:
 rte_pktmbuf_alloc_bulk isn't exported as API, so shouldn't be listed in
version map

v6 changes:
 reflect the changes in release notes and library version map file
 revise our duff's code style a bit to make it more readable

v5 changes:
 add comment about duff's device and our variant implementation

v3 changes:
 move while after case 0
 add context about duff's device and why we use while loop in the commit
message

v2 changes:
 unroll the loop a bit to help the performance

rte_pktmbuf_alloc_bulk allocates a bulk of packet mbufs.

There is related thread about this bulk API.
http://dpdk.org/dev/patchwork/patch/4718/
Thanks to Konstantin's loop unrolling.

Attached the wiki page about duff's device. It explains the performance
optimization through loop unwinding, and also the most dramatic use of
case label fall-through.
https://en.wikipedia.org/wiki/Duff%27s_device

In this implementation, while() loop is used because we could not assume
count is strictly positive. Using while() loop saves one line of check.

Signed-off-by: Gerald Rogers 
Signed-off-by: Huawei Xie 
Acked-by: Konstantin Ananyev 
Acked-by: Olivier Matz 
---
 doc/guides/rel_notes/release_16_04.rst |  3 ++
 lib/librte_mbuf/rte_mbuf.h | 55 ++
 2 files changed, 58 insertions(+)

diff --git a/doc/guides/rel_notes/release_16_04.rst 
b/doc/guides/rel_notes/release_16_04.rst
index e2219d0..b10a11b 100644
--- a/doc/guides/rel_notes/release_16_04.rst
+++ b/doc/guides/rel_notes/release_16_04.rst
@@ -46,6 +46,9 @@ This section should contain new features added in this 
release. Sample format:

 * **Added vhost-user live migration support.**

+* **Enable bulk allocation of mbufs.**
+  A new function ``rte_pktmbuf_alloc_bulk()`` has been added to allow the user
+  to allocate a bulk of mbufs.

 Resolved Issues
 ---
diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
index c973e9b..c1f6bc4 100644
--- a/lib/librte_mbuf/rte_mbuf.h
+++ b/lib/librte_mbuf/rte_mbuf.h
@@ -1336,6 +1336,61 @@ static inline struct rte_mbuf *rte_pktmbuf_alloc(struct 
rte_mempool *mp)
 }

 /**
+ * Allocate a bulk of mbufs, initialize refcnt and reset the fields to default
+ * values.
+ *
+ *  @param pool
+ *The mempool from which mbufs are allocated.
+ *  @param mbufs
+ *Array of pointers to mbufs
+ *  @param count
+ *Array size
+ *  @return
+ *   - 0: Success
+ */
+static inline int rte_pktmbuf_alloc_bulk(struct rte_mempool *pool,
+struct rte_mbuf **mbufs, unsigned count)
+{
+   unsigned idx = 0;
+   int rc;
+
+   rc = rte_mempool_get_bulk(pool, (void **)mbufs, count);
+   if (unlikely(rc))
+   return rc;
+
+   /* To understand duff's device on loop unwinding optimization, see
+* https://en.wikipedia.org/wiki/Duff's_device.
+* Here while() loop is used rather than do() while{} to avoid extra
+* check if count is zero.
+*/
+   switch (count % 4) {
+   case 0:
+   while (idx != count) {
+   RTE_MBUF_ASSERT(rte_mbuf_refcnt_read(mbufs[idx]) == 0);
+   rte_mbuf_refcnt_set(mbufs[idx], 1);
+   rte_pktmbuf_reset(mbufs[idx]);
+   idx++;
+   case 3:
+   RTE_MBUF_ASSERT(rte_mbuf_refcnt_read(mbufs[idx]) == 0);
+   rte_mbuf_refcnt_set(mbufs[idx], 1);
+   rte_pktmbuf_reset(mbufs[idx]);
+   idx++;
+   case 2:
+   RTE_MBUF_ASSERT(rte_mbuf_refcnt_read(mbufs[idx]) == 0);
+   rte_mbuf_refcnt_set(mbufs[idx], 1);
+   rte_pktmbuf_reset(mbufs[idx]);
+   idx++;
+   case 1:
+   RTE_MBUF_ASSERT(rte_mbuf_refcnt_read(mbufs[idx]) == 0);
+   rte_mbuf_refcnt_set(mbufs[idx], 1);
+   rte_pktmbuf_reset(mbufs[idx]);
+   idx++;
+   }
+   }
+   return 0;
+}
+
+/**
  * Attach packet mbuf to another packet mbuf.
  *
  * After attachment we refer the mbuf we attached as 'indirect',
-- 
1.8.1.4



[dpdk-dev] [PATCH v4 2/4] eal: set kdrv to RTE_KDRV_NONE if kernel driver isn't managing the device

2016-02-26 Thread Huawei Xie
v4 changes:
 reword the commit message. When we mention kernel driver, emphasizes
that it includes UIO/VFIO.

Use RTE_KDRV_NONE to indicate that kernel driver(including UIO/VFIO)
isn't manipulating the device.

Signed-off-by: Huawei Xie 
Acked-by: Yuanhan Liu 
---
 lib/librte_eal/linuxapp/eal/eal_pci.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c 
b/lib/librte_eal/linuxapp/eal/eal_pci.c
index 4346973..b44fa32 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci.c
+++ b/lib/librte_eal/linuxapp/eal/eal_pci.c
@@ -362,7 +362,7 @@ pci_scan_one(const char *dirname, uint16_t domain, uint8_t 
bus,
else
dev->kdrv = RTE_KDRV_UNKNOWN;
} else
-   dev->kdrv = RTE_KDRV_UNKNOWN;
+   dev->kdrv = RTE_KDRV_NONE;

/* device is valid, add in list (sorted) */
if (TAILQ_EMPTY(_device_list)) {
-- 
1.8.1.4



[dpdk-dev] [PATCH v4 1/4] eal: make the comment more accurate

2016-02-26 Thread Huawei Xie
positive return of rte_eal_pci_probe_one_driver means the driver doesn't support
the device.

Signed-off-by: Huawei Xie 
Acked-by: Yuanhan Liu 
---
 lib/librte_eal/common/eal_common_pci.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/lib/librte_eal/common/eal_common_pci.c 
b/lib/librte_eal/common/eal_common_pci.c
index 96d5113..797e7e3 100644
--- a/lib/librte_eal/common/eal_common_pci.c
+++ b/lib/librte_eal/common/eal_common_pci.c
@@ -204,7 +204,7 @@ rte_eal_pci_probe_one_driver(struct rte_pci_driver *dr, 
struct rte_pci_device *d
/* call the driver devinit() function */
return dr->devinit(dr, dev);
}
-   /* return positive value if driver is not found */
+   /* return positive value if driver doesn't support this device */
return 1;
 }

@@ -259,7 +259,7 @@ rte_eal_pci_detach_dev(struct rte_pci_driver *dr,
return 0;
}

-   /* return positive value if driver is not found */
+   /* return positive value if driver doesn't support this device */
return 1;
 }

@@ -283,7 +283,7 @@ pci_probe_all_drivers(struct rte_pci_device *dev)
/* negative value is an error */
return -1;
if (rc > 0)
-   /* positive value means driver not found */
+   /* positive value means driver doesn't support it */
continue;
return 0;
}
@@ -310,7 +310,7 @@ pci_detach_all_drivers(struct rte_pci_device *dev)
/* negative value is an error */
return -1;
if (rc > 0)
-   /* positive value means driver not found */
+   /* positive value means driver doesn't support it */
continue;
return 0;
}
-- 
1.8.1.4



[dpdk-dev] [PATCH v3 4/4] virtio: check if kernel driver is manipulating the virtio device

2016-01-27 Thread Huawei Xie
v3 changes:
 change log message to tell user that the virtio device is skipped
due to it is managed by kernel driver, instead of asking user to
unbind it from kernel driver.

v2 changes:
 change LOG level from ERR to INFO

virtio PMD could use IO port to configure the virtio device without
using uio driver(vfio-noniommu mode should work as well).

There are two issues with previous implementation:
1) virtio PMD will take over each virtio device blindly even if some
are not intended for DPDK.
2) driver conflict between virtio PMD and virtio-net kernel driver.

This patch checks if there is any kernel driver manipulating the virtio
device before virtio PMD uses IO port to configure the device.

Fixes: da978dfdc43b ("virtio: use port IO to get PCI resource")

Signed-off-by: Huawei Xie 
---
 drivers/net/virtio/virtio_ethdev.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/drivers/net/virtio/virtio_ethdev.c 
b/drivers/net/virtio/virtio_ethdev.c
index e815acd..ea1874a 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -1138,6 +1138,11 @@ static int virtio_resource_init_by_ioports(struct 
rte_pci_device *pci_dev)
int found = 0;
size_t linesz;

+   if (pci_dev->kdrv != RTE_KDRV_NONE) {
+   PMD_INIT_LOG(INFO, "skip kernel managed virtio device.");
+   return -1;
+   }
+
snprintf(pci_id, sizeof(pci_id), PCI_PRI_FMT,
 pci_dev->addr.domain,
 pci_dev->addr.bus,
-- 
1.8.1.4



[dpdk-dev] [PATCH v3 3/4] virtio: return 1 to tell the upper layer we don't take over this device

2016-01-27 Thread Huawei Xie
v2 changes:
 Remove unnecessary assignment of NULL to dev->data->mac_addrs
 Ajust one comment's position

if virtio_resource_init fails, cleanup the resource and return 1 to
tell the upper layer we don't take over this device. -1 means error
which will cause DPDK to exit.

Signed-off-by: Huawei Xie 
---
 drivers/net/virtio/virtio_ethdev.c | 9 +++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/net/virtio/virtio_ethdev.c 
b/drivers/net/virtio/virtio_ethdev.c
index d928339..e815acd 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -1287,8 +1287,13 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev)

pci_dev = eth_dev->pci_dev;

-   if (virtio_resource_init(pci_dev) < 0)
-   return -1;
+   if (virtio_resource_init(pci_dev) < 0) {
+   rte_free(eth_dev->data->mac_addrs);
+   /* Return 1 to tell the upper layer we don't take over
+* this device.
+*/
+   return 1;
+   }

hw->use_msix = virtio_has_msix(_dev->addr);
hw->io_base = (uint32_t)(uintptr_t)pci_dev->mem_resource[0].addr;
-- 
1.8.1.4



[dpdk-dev] [PATCH v3 2/4] eal: set kdrv to RTE_KDRV_NONE if kernel driver isn't manipulating the device.

2016-01-27 Thread Huawei Xie
Use RTE_KDRV_NONE to indicate that kernel driver isn't manipulating
the device.

Signed-off-by: Huawei Xie 
Acked-by: David Marchand 
---
 lib/librte_eal/linuxapp/eal/eal_pci.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c 
b/lib/librte_eal/linuxapp/eal/eal_pci.c
index bc5b5be..640b190 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci.c
+++ b/lib/librte_eal/linuxapp/eal/eal_pci.c
@@ -362,7 +362,7 @@ pci_scan_one(const char *dirname, uint16_t domain, uint8_t 
bus,
else
dev->kdrv = RTE_KDRV_UNKNOWN;
} else
-   dev->kdrv = RTE_KDRV_UNKNOWN;
+   dev->kdrv = RTE_KDRV_NONE;

/* device is valid, add in list (sorted) */
if (TAILQ_EMPTY(_device_list)) {
-- 
1.8.1.4



[dpdk-dev] [PATCH v3 1/4] eal: make the comment more accurate

2016-01-27 Thread Huawei Xie
positive return of rte_eal_pci_probe_one_driver means the driver doesn't 
support the device.

Signed-off-by: Huawei Xie 
---
 lib/librte_eal/common/eal_common_pci.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/lib/librte_eal/common/eal_common_pci.c 
b/lib/librte_eal/common/eal_common_pci.c
index dcfe947..bbcdb2b 100644
--- a/lib/librte_eal/common/eal_common_pci.c
+++ b/lib/librte_eal/common/eal_common_pci.c
@@ -204,7 +204,7 @@ rte_eal_pci_probe_one_driver(struct rte_pci_driver *dr, 
struct rte_pci_device *d
/* call the driver devinit() function */
return dr->devinit(dr, dev);
}
-   /* return positive value if driver is not found */
+   /* return positive value if driver doesn't support this device */
return 1;
 }

@@ -259,7 +259,7 @@ rte_eal_pci_detach_dev(struct rte_pci_driver *dr,
return 0;
}

-   /* return positive value if driver is not found */
+   /* return positive value if driver doesn't support this device */
return 1;
 }

@@ -283,7 +283,7 @@ pci_probe_all_drivers(struct rte_pci_device *dev)
/* negative value is an error */
return -1;
if (rc > 0)
-   /* positive value means driver not found */
+   /* positive value means driver doesn't support it */
continue;
return 0;
}
@@ -310,7 +310,7 @@ pci_detach_all_drivers(struct rte_pci_device *dev)
/* negative value is an error */
return -1;
if (rc > 0)
-   /* positive value means driver not found */
+   /* positive value means driver doesn't support it */
continue;
return 0;
}
-- 
1.8.1.4



[dpdk-dev] [PATCH v3 0/4] fix the issue that DPDK takes over virtio device blindly

2016-01-27 Thread Huawei Xie
v3 changes:
 change log message to tell user that the virtio device is skipped
due to it is managed by kernel driver, instead of asking user to
unbind it from kernel driver.

v2 changes:
 Remove unnecessary assignment of NULL to dev->data->mac_addrs
 Ajust one comment's position
 change LOG level from ERR to INFO

virtio PMD doesn't set RTE_PCI_DRV_NEED_MAPPING in drv_flags of its
eth_driver. It will try igb_uio and PORT IO in turn to configure
virtio device. Even user in guest VM doesn't want to use virtio for
DPDK, virtio PMD will take over the device blindly.

The more serious problem is kernel driver is still manipulating the
device, which causes driver conflict.

This patch checks if there is any kernel driver manipulating the
virtio device before virtio PMD uses port IO to configure the device.

Huawei Xie (4):
  eal: make the comment more accurate
  eal: set kdrv to RTE_KDRV_NONE if kernel driver isn't manipulating the device.
  virtio: return 1 to tell the kernel we don't take over this device
  virtio: check if kernel driver is manipulating the virtio device

 drivers/net/virtio/virtio_ethdev.c | 14 --
 lib/librte_eal/common/eal_common_pci.c |  8 
 lib/librte_eal/linuxapp/eal/eal_pci.c  |  2 +-
 3 files changed, 17 insertions(+), 7 deletions(-)

-- 
1.8.1.4



[dpdk-dev] [PATCH v3] remove extra parentheses in return statement

2016-01-27 Thread Huawei Xie
v3 changes:
 remove other extra parentheses in 'return (logical expressions)'
which checkpatch doesn't report as error
 remove extra parentheses in return statement which crosses
multiple line
 fix the document

v2 changes:
 add missed commit message in v1

fix the error reported by checkpatch:
  "ERROR: return is not a function, parentheses are not required"

remove parentheses in return like:
  "return (logical expressions)"

remove parentheses in return a function like:
  "return (rte_mempool_lookup(...))"

Fixes: 6307b909b8e0 ("lib: remove extra parenthesis after return")

Signed-off-by: Huawei Xie 
---
 app/test-pmd/cmdline.c | 12 ++--
 app/test-pmd/config.c  |  2 +-
 app/test-pmd/flowgen.c |  2 +-
 app/test-pmd/mempool_anon.c| 12 ++--
 app/test-pmd/testpmd.h |  2 +-
 app/test-pmd/txonly.c  |  2 +-
 app/test/test_kni.c|  2 +-
 app/test/test_mbuf.c   | 12 ++--
 app/test/test_memcpy_perf.c|  4 +-
 app/test/test_mempool.c|  4 +-
 app/test/test_memzone.c| 24 +++
 app/test/test_red.c| 42 ++--
 app/test/test_ring.c   |  4 +-
 doc/guides/sample_app_ug/ipv4_multicast.rst|  8 +--
 drivers/crypto/aesni_mb/rte_aesni_mb_pmd_ops.c |  2 +-
 drivers/crypto/qat/qat_crypto.c|  4 +-
 drivers/crypto/qat/qat_qp.c| 22 +++---
 drivers/net/bnx2x/bnx2x.c  | 34 -
 drivers/net/bnx2x/bnx2x.h  |  4 +-
 drivers/net/bnx2x/bnx2x_rxtx.c | 16 ++---
 drivers/net/bnx2x/debug.c  |  6 +-
 drivers/net/bnx2x/elink.c  |  2 +-
 drivers/net/bonding/rte_eth_bond_pmd.c |  2 +-
 drivers/net/cxgbe/cxgbe_main.c |  2 +-
 drivers/net/e1000/em_ethdev.c  | 40 +--
 drivers/net/e1000/em_rxtx.c| 46 ++---
 drivers/net/e1000/igb_ethdev.c | 22 +++---
 drivers/net/e1000/igb_rxtx.c   | 30 
 drivers/net/enic/enic_clsf.c   |  2 +-
 drivers/net/fm10k/fm10k_ethdev.c   | 40 +--
 drivers/net/i40e/i40e_ethdev.c |  2 +-
 drivers/net/i40e/i40e_ethdev.h |  2 +-
 drivers/net/i40e/i40e_ethdev_vf.c  |  2 +-
 drivers/net/i40e/i40e_rxtx.c   | 14 ++--
 drivers/net/ixgbe/ixgbe_82599_bypass.c |  4 +-
 drivers/net/ixgbe/ixgbe_bypass.c   |  2 +-
 drivers/net/ixgbe/ixgbe_ethdev.c   | 34 -
 drivers/net/ixgbe/ixgbe_rxtx.c | 36 +-
 drivers/net/mlx5/mlx5_rxq.c|  2 +-
 drivers/net/mlx5/mlx5_utils.h  |  2 +-
 drivers/net/mpipe/mpipe_tilegx.c   |  4 +-
 drivers/net/nfp/nfp_net.c  | 16 ++---
 drivers/net/virtio/virtio_ethdev.c |  6 +-
 drivers/net/vmxnet3/vmxnet3_ring.h |  2 +-
 drivers/net/xenvirt/virtqueue.h|  2 +-
 examples/ip_pipeline/cpu_core_map.c|  2 +-
 .../pipeline/pipeline_flow_actions_be.c|  2 +-
 examples/ip_reassembly/main.c  | 22 +++---
 examples/ipv4_multicast/main.c | 14 ++--
 examples/l3fwd/main.c  |  4 +-
 .../client_server_mp/mp_server/init.c  |  2 +-
 examples/multi_process/symmetric_mp/main.c |  2 +-
 examples/netmap_compat/bridge/bridge.c |  8 +--
 examples/netmap_compat/lib/compat_netmap.c | 80 +++---
 examples/performance-thread/common/lthread_queue.h |  2 +-
 examples/performance-thread/common/lthread_sched.c |  4 +-
 examples/qos_sched/args.c  |  2 +-
 examples/quota_watermark/qw/main.h |  2 +-
 examples/vhost/main.c  |  4 +-
 examples/vhost_xen/main.c  |  4 +-
 examples/vhost_xen/vhost_monitor.c |  6 +-
 lib/librte_acl/acl_bld.c   |  4 +-
 lib/librte_acl/acl_run_neon.h  |  2 +-
 lib/librte_cfgfile/rte_cfgfile.c   |  4 +-
 lib/librte_cryptodev/rte_cryptodev.c   | 24 +++
 lib/librte_eal/bsdapp/eal/eal_lcore.c  |  2 +-
 lib/librte_eal/common/eal_common_memzone.c |  2 +-
 .../common/include/arch/ppc_64/rte_atomic.h| 12 ++--
 .../common/include/arch/ppc_64/rte_byteorder.h | 10 +--
 .../common/include/arch/ppc_64/rte_spinlock.h  |  2 +-
 

[dpdk-dev] [PATCH v2] fix checkpatch errors

2016-01-27 Thread Huawei Xie
v2 changes:
 add missed commit message in v1

fix the error reported by checkpatch:
 "ERROR: return is not a function, parentheses are not required"

also removed other extra parentheses like:
 "return val == 0"
 "return (rte_mempool_lookup(...))"

Signed-off-by: Huawei Xie 
---
 app/test-pmd/cmdline.c | 12 ++--
 app/test-pmd/config.c  |  2 +-
 app/test-pmd/flowgen.c |  2 +-
 app/test-pmd/mempool_anon.c| 12 ++--
 app/test-pmd/testpmd.h |  2 +-
 app/test-pmd/txonly.c  |  2 +-
 app/test/test_mbuf.c   | 12 ++--
 app/test/test_memcpy_perf.c|  4 +-
 app/test/test_mempool.c|  4 +-
 app/test/test_memzone.c| 24 +++
 app/test/test_red.c| 42 ++--
 app/test/test_ring.c   |  4 +-
 drivers/crypto/aesni_mb/rte_aesni_mb_pmd_ops.c |  2 +-
 drivers/crypto/qat/qat_qp.c| 22 +++---
 drivers/net/bnx2x/bnx2x.c  | 34 -
 drivers/net/bnx2x/bnx2x.h  |  4 +-
 drivers/net/bnx2x/bnx2x_rxtx.c | 16 ++---
 drivers/net/bnx2x/debug.c  |  6 +-
 drivers/net/bonding/rte_eth_bond_pmd.c |  2 +-
 drivers/net/e1000/em_ethdev.c  | 40 +--
 drivers/net/e1000/em_rxtx.c| 46 ++---
 drivers/net/e1000/igb_ethdev.c | 18 ++---
 drivers/net/e1000/igb_rxtx.c   | 30 
 drivers/net/fm10k/fm10k_ethdev.c   | 40 +--
 drivers/net/i40e/i40e_ethdev.c |  2 +-
 drivers/net/i40e/i40e_ethdev.h |  2 +-
 drivers/net/i40e/i40e_ethdev_vf.c  |  2 +-
 drivers/net/i40e/i40e_rxtx.c   | 14 ++--
 drivers/net/ixgbe/ixgbe_82599_bypass.c |  4 +-
 drivers/net/ixgbe/ixgbe_bypass.c   |  2 +-
 drivers/net/ixgbe/ixgbe_ethdev.c   | 34 -
 drivers/net/ixgbe/ixgbe_rxtx.c | 36 +-
 drivers/net/mlx5/mlx5_utils.h  |  2 +-
 drivers/net/mpipe/mpipe_tilegx.c   |  4 +-
 drivers/net/nfp/nfp_net.c  | 16 ++---
 drivers/net/virtio/virtio_ethdev.c |  6 +-
 examples/ip_pipeline/cpu_core_map.c|  2 +-
 .../pipeline/pipeline_flow_actions_be.c|  2 +-
 examples/ip_reassembly/main.c  | 22 +++---
 examples/ipv4_multicast/main.c | 14 ++--
 examples/l3fwd/main.c  |  4 +-
 examples/multi_process/symmetric_mp/main.c |  2 +-
 examples/netmap_compat/bridge/bridge.c |  8 +--
 examples/netmap_compat/lib/compat_netmap.c | 80 +++---
 examples/qos_sched/args.c  |  2 +-
 examples/quota_watermark/qw/main.h |  2 +-
 examples/vhost/main.c  |  4 +-
 examples/vhost_xen/main.c  |  2 +-
 examples/vhost_xen/vhost_monitor.c |  6 +-
 lib/librte_acl/acl_run_neon.h  |  2 +-
 lib/librte_cryptodev/rte_cryptodev.c   | 22 +++---
 lib/librte_eal/common/eal_common_memzone.c |  2 +-
 .../common/include/arch/ppc_64/rte_byteorder.h |  2 +-
 lib/librte_eal/common/malloc_heap.c|  2 +-
 lib/librte_eal/linuxapp/eal/eal_xen_memory.c   |  2 +-
 lib/librte_eal/linuxapp/kni/kni_vhost.c|  2 +-
 lib/librte_ether/rte_ether.h   | 10 +--
 lib/librte_hash/rte_cuckoo_hash.c  | 18 ++---
 lib/librte_ip_frag/ip_frag_internal.c  |  4 +-
 lib/librte_lpm/rte_lpm.c   |  2 +-
 lib/librte_mempool/rte_mempool.h   |  2 +-
 lib/librte_ring/rte_ring.h |  6 +-
 lib/librte_sched/rte_bitmap.h  |  6 +-
 lib/librte_sched/rte_red.h |  2 +-
 lib/librte_sched/rte_sched.c   |  4 +-
 65 files changed, 372 insertions(+), 372 deletions(-)

diff --git a/app/test-pmd/cmdline.c b/app/test-pmd/cmdline.c
index 73298c9..a82682d 100644
--- a/app/test-pmd/cmdline.c
+++ b/app/test-pmd/cmdline.c
@@ -2418,11 +2418,11 @@ parse_item_list(char* str, const char* item_name, 
unsigned int max_items,
}
if (c != ',') {
printf("character %c is not a decimal digit\n", c);
-   return (0);
+   return 0;
}
if (! value_ok) {
printf("No valid value before comma\n");
- 

[dpdk-dev] [PATCH v6 2/2] vhost: call rte_pktmbuf_alloc_bulk in vhost dequeue

2016-01-27 Thread Huawei Xie
v4 changes:
 fix a silly typo in error handling when rte_pktmbuf_alloc fails
reported by haifeng

pre-allocate a bulk of mbufs instead of allocating one mbuf a time on demand

Signed-off-by: Gerald Rogers 
Signed-off-by: Huawei Xie 
Acked-by: Konstantin Ananyev 
Acked-by: Yuanhan Liu 
Tested-by: Yuanhan Liu 
---
 lib/librte_vhost/vhost_rxtx.c | 35 ++-
 1 file changed, 22 insertions(+), 13 deletions(-)

diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c
index bbf3fac..f10d534 100644
--- a/lib/librte_vhost/vhost_rxtx.c
+++ b/lib/librte_vhost/vhost_rxtx.c
@@ -576,6 +576,8 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t 
queue_id,
uint32_t i;
uint16_t free_entries, entry_success = 0;
uint16_t avail_idx;
+   uint8_t alloc_err = 0;
+   uint8_t seg_num;

if (unlikely(!is_valid_virt_queue_idx(queue_id, 1, dev->virt_qp_nb))) {
RTE_LOG(ERR, VHOST_DATA,
@@ -609,6 +611,14 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t 
queue_id,

LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Buffers available %d\n",
dev->device_fh, free_entries);
+
+   if (unlikely(rte_pktmbuf_alloc_bulk(mbuf_pool,
+   pkts, free_entries)) < 0) {
+   RTE_LOG(ERR, VHOST_DATA,
+   "Failed to bulk allocating %d mbufs\n", free_entries);
+   return 0;
+   }
+
/* Retrieve all of the head indexes first to avoid caching issues. */
for (i = 0; i < free_entries; i++)
head[i] = vq->avail->ring[(vq->last_used_idx + i) & (vq->size - 
1)];
@@ -621,9 +631,9 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t 
queue_id,
uint32_t vb_avail, vb_offset;
uint32_t seg_avail, seg_offset;
uint32_t cpy_len;
-   uint32_t seg_num = 0;
+   seg_num = 0;
struct rte_mbuf *cur;
-   uint8_t alloc_err = 0;
+

desc = >desc[head[entry_success]];

@@ -654,13 +664,7 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t 
queue_id,
vq->used->ring[used_idx].id = head[entry_success];
vq->used->ring[used_idx].len = 0;

-   /* Allocate an mbuf and populate the structure. */
-   m = rte_pktmbuf_alloc(mbuf_pool);
-   if (unlikely(m == NULL)) {
-   RTE_LOG(ERR, VHOST_DATA,
-   "Failed to allocate memory for mbuf.\n");
-   break;
-   }
+   prev = cur = m = pkts[entry_success];
seg_offset = 0;
seg_avail = m->buf_len - RTE_PKTMBUF_HEADROOM;
cpy_len = RTE_MIN(vb_avail, seg_avail);
@@ -668,8 +672,6 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t 
queue_id,
PRINT_PACKET(dev, (uintptr_t)vb_addr, desc->len, 0);

seg_num++;
-   cur = m;
-   prev = m;
while (cpy_len != 0) {
rte_memcpy(rte_pktmbuf_mtod_offset(cur, void *, 
seg_offset),
(void *)((uintptr_t)(vb_addr + vb_offset)),
@@ -761,16 +763,23 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t 
queue_id,
cpy_len = RTE_MIN(vb_avail, seg_avail);
}

-   if (unlikely(alloc_err == 1))
+   if (unlikely(alloc_err))
break;

m->nb_segs = seg_num;

-   pkts[entry_success] = m;
vq->last_used_idx++;
entry_success++;
}

+   if (unlikely(alloc_err)) {
+   uint16_t i = entry_success;
+
+   m->nb_segs = seg_num;
+   for (; i < free_entries; i++)
+   rte_pktmbuf_free(pkts[i]);
+   }
+
rte_compiler_barrier();
vq->used->idx += entry_success;
/* Kick guest if required. */
-- 
1.8.1.4



[dpdk-dev] [PATCH v6 1/2] mbuf: provide rte_pktmbuf_alloc_bulk API

2016-01-27 Thread Huawei Xie
v6 changes:
 reflect the changes in release notes and library version map file
 revise our duff's code style a bit to make it more readable

v5 changes:
 add comment about duff's device and our variant implementation

v3 changes:
 move while after case 0
 add context about duff's device and why we use while loop in the commit
message

v2 changes:
 unroll the loop a bit to help the performance

rte_pktmbuf_alloc_bulk allocates a bulk of packet mbufs.

There is related thread about this bulk API.
http://dpdk.org/dev/patchwork/patch/4718/
Thanks to Konstantin's loop unrolling.

Attached the wiki page about duff's device. It explains the performance
optimization through loop unwinding, and also the most dramatic use of
case label fall-through.
https://en.wikipedia.org/wiki/Duff%27s_device

In our implementation, we use while() loop rather than do{} while() loop
because we could not assume count is strictly positive. Using while()
loop saves one line of check if count is zero.

Signed-off-by: Gerald Rogers 
Signed-off-by: Huawei Xie 
Acked-by: Konstantin Ananyev 
---
 doc/guides/rel_notes/release_2_3.rst |  3 ++
 lib/librte_mbuf/rte_mbuf.h   | 55 
 lib/librte_mbuf/rte_mbuf_version.map |  7 +
 3 files changed, 65 insertions(+)

diff --git a/doc/guides/rel_notes/release_2_3.rst 
b/doc/guides/rel_notes/release_2_3.rst
index 99de186..a52cba3 100644
--- a/doc/guides/rel_notes/release_2_3.rst
+++ b/doc/guides/rel_notes/release_2_3.rst
@@ -4,6 +4,9 @@ DPDK Release 2.3
 New Features
 

+* **Enable bulk allocation of mbufs. **
+  A new function ``rte_pktmbuf_alloc_bulk()`` has been added to allow the user
+  to allocate a bulk of mbufs.

 Resolved Issues
 ---
diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
index f234ac9..b2ed479 100644
--- a/lib/librte_mbuf/rte_mbuf.h
+++ b/lib/librte_mbuf/rte_mbuf.h
@@ -1336,6 +1336,61 @@ static inline struct rte_mbuf *rte_pktmbuf_alloc(struct 
rte_mempool *mp)
 }

 /**
+ * Allocate a bulk of mbufs, initialize refcnt and reset the fields to default
+ * values.
+ *
+ *  @param pool
+ *The mempool from which mbufs are allocated.
+ *  @param mbufs
+ *Array of pointers to mbufs
+ *  @param count
+ *Array size
+ *  @return
+ *   - 0: Success
+ */
+static inline int rte_pktmbuf_alloc_bulk(struct rte_mempool *pool,
+struct rte_mbuf **mbufs, unsigned count)
+{
+   unsigned idx = 0;
+   int rc;
+
+   rc = rte_mempool_get_bulk(pool, (void **)mbufs, count);
+   if (unlikely(rc))
+   return rc;
+
+   /* To understand duff's device on loop unwinding optimization, see
+* https://en.wikipedia.org/wiki/Duff's_device.
+* Here while() loop is used rather than do() while{} to avoid extra
+* check if count is zero.
+*/
+   switch (count % 4) {
+   case 0:
+   while (idx != count) {
+   RTE_MBUF_ASSERT(rte_mbuf_refcnt_read(mbufs[idx]) == 0);
+   rte_mbuf_refcnt_set(mbufs[idx], 1);
+   rte_pktmbuf_reset(mbufs[idx]);
+   idx++;
+   case 3:
+   RTE_MBUF_ASSERT(rte_mbuf_refcnt_read(mbufs[idx]) == 0);
+   rte_mbuf_refcnt_set(mbufs[idx], 1);
+   rte_pktmbuf_reset(mbufs[idx]);
+   idx++;
+   case 2:
+   RTE_MBUF_ASSERT(rte_mbuf_refcnt_read(mbufs[idx]) == 0);
+   rte_mbuf_refcnt_set(mbufs[idx], 1);
+   rte_pktmbuf_reset(mbufs[idx]);
+   idx++;
+   case 1:
+   RTE_MBUF_ASSERT(rte_mbuf_refcnt_read(mbufs[idx]) == 0);
+   rte_mbuf_refcnt_set(mbufs[idx], 1);
+   rte_pktmbuf_reset(mbufs[idx]);
+   idx++;
+   }
+   }
+   return 0;
+}
+
+/**
  * Attach packet mbuf to another packet mbuf.
  *
  * After attachment we refer the mbuf we attached as 'indirect',
diff --git a/lib/librte_mbuf/rte_mbuf_version.map 
b/lib/librte_mbuf/rte_mbuf_version.map
index e10f6bd..257c65a 100644
--- a/lib/librte_mbuf/rte_mbuf_version.map
+++ b/lib/librte_mbuf/rte_mbuf_version.map
@@ -18,3 +18,10 @@ DPDK_2.1 {
rte_pktmbuf_pool_create;

 } DPDK_2.0;
+
+DPDK_2.3 {
+   global:
+
+   rte_pktmbuf_alloc_bulk;
+
+} DPDK_2.1;
-- 
1.8.1.4



[dpdk-dev] [PATCH v6 0/2] provide rte_pktmbuf_alloc_bulk API and call it in vhost dequeue

2016-01-27 Thread Huawei Xie
v6 changes:
 reflect the changes in release notes and library version map file
 revise our duff's code style a bit to make it more readable

v5 changes:
 add comment about duff's device and our variant implementation

v4 changes:
 fix a silly typo in error handling when rte_pktmbuf_alloc fails

v3 changes:
 move while after case 0
 add context about duff's device and why we use while loop in the commit
message

v2 changes:
 unroll the loop in rte_pktmbuf_alloc_bulk to help the performance

For symmetric rte_pktmbuf_free_bulk, if the app knows in its scenarios
their mbufs are all simple mbufs, i.e meet the following requirements:
 * no multiple segments
 * not indirect mbuf
 * refcnt is 1
 * belong to the same mbuf memory pool,
it could directly call rte_mempool_put to free the bulk of mbufs,
otherwise rte_pktmbuf_free_bulk has to call rte_pktmbuf_free to free
the mbuf one by one.
This patchset will not provide this symmetric implementation.

Huawei Xie (2):
  mbuf: provide rte_pktmbuf_alloc_bulk API
  vhost: call rte_pktmbuf_alloc_bulk in vhost dequeue

 doc/guides/rel_notes/release_2_3.rst |  3 ++
 lib/librte_mbuf/rte_mbuf.h   | 55 
 lib/librte_mbuf/rte_mbuf_version.map |  7 +
 lib/librte_vhost/vhost_rxtx.c| 35 ++-
 4 files changed, 87 insertions(+), 13 deletions(-)

-- 
1.8.1.4



[dpdk-dev] [PATCH] vhost: remove lockless enqueue to the virtio ring

2016-01-04 Thread Huawei Xie
This patch removes the internal lockless enqueue implmentation.
DPDK doesn't support receiving/transmitting packets from/to the same
queue. Vhost PMD wraps vhost device as normal DPDK port. DPDK
applications normally have their own lock implmentation when enqueue
packets to the same queue of a port.

The atomic cmpset is a costly operation. This patch should help
performance a bit.

Signed-off-by: Huawei Xie 
---
 lib/librte_vhost/vhost_rxtx.c | 86 +--
 1 file changed, 25 insertions(+), 61 deletions(-)

diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c
index bbf3fac..26a1b9c 100644
--- a/lib/librte_vhost/vhost_rxtx.c
+++ b/lib/librte_vhost/vhost_rxtx.c
@@ -69,10 +69,8 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
uint64_t buff_hdr_addr = 0;
uint32_t head[MAX_PKT_BURST];
uint32_t head_idx, packet_success = 0;
-   uint16_t avail_idx, res_cur_idx;
-   uint16_t res_base_idx, res_end_idx;
+   uint16_t avail_idx, res_cur_idx, res_end_idx;
uint16_t free_entries;
-   uint8_t success = 0;

LOG_DEBUG(VHOST_DATA, "(%"PRIu64") virtio_dev_rx()\n", dev->device_fh);
if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->virt_qp_nb))) {
@@ -88,29 +86,18 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,

count = (count > MAX_PKT_BURST) ? MAX_PKT_BURST : count;

-   /*
-* As many data cores may want access to available buffers,
-* they need to be reserved.
-*/
-   do {
-   res_base_idx = vq->last_used_idx_res;
-   avail_idx = *((volatile uint16_t *)>avail->idx);
-
-   free_entries = (avail_idx - res_base_idx);
-   /*check that we have enough buffers*/
-   if (unlikely(count > free_entries))
-   count = free_entries;
-
-   if (count == 0)
-   return 0;
-
-   res_end_idx = res_base_idx + count;
-   /* vq->last_used_idx_res is atomically updated. */
-   /* TODO: Allow to disable cmpset if no concurrency in 
application. */
-   success = rte_atomic16_cmpset(>last_used_idx_res,
-   res_base_idx, res_end_idx);
-   } while (unlikely(success == 0));
-   res_cur_idx = res_base_idx;
+   avail_idx = *((volatile uint16_t *)>avail->idx);
+   free_entries = (avail_idx - vq->last_used_idx_res);
+   /*check that we have enough buffers*/
+   if (unlikely(count > free_entries))
+   count = free_entries;
+   if (count == 0)
+   return 0;
+
+   res_cur_idx = vq->last_used_idx_res;
+   res_end_idx = res_cur_idx + count;
+   vq->last_used_idx_res = res_end_idx;
+
LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Current Index %d| End Index %d\n",
dev->device_fh, res_cur_idx, res_end_idx);

@@ -230,10 +217,6 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,

rte_compiler_barrier();

-   /* Wait until it's our turn to add our buffer to the used ring. */
-   while (unlikely(vq->last_used_idx != res_base_idx))
-   rte_pause();
-
*(volatile uint16_t *)>used->idx += count;
vq->last_used_idx = res_end_idx;

@@ -474,7 +457,6 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t 
queue_id,
uint32_t pkt_idx = 0, entry_success = 0;
uint16_t avail_idx;
uint16_t res_base_idx, res_cur_idx;
-   uint8_t success = 0;

LOG_DEBUG(VHOST_DATA, "(%"PRIu64") virtio_dev_merge_rx()\n",
dev->device_fh);
@@ -496,46 +478,28 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t 
queue_id,

for (pkt_idx = 0; pkt_idx < count; pkt_idx++) {
uint32_t pkt_len = pkts[pkt_idx]->pkt_len + vq->vhost_hlen;
+   uint32_t secure_len = 0;
+   uint32_t vec_idx = 0;

-   do {
-   /*
-* As many data cores may want access to available
-* buffers, they need to be reserved.
-*/
-   uint32_t secure_len = 0;
-   uint32_t vec_idx = 0;
-
-   res_base_idx = vq->last_used_idx_res;
-   res_cur_idx = res_base_idx;
+   res_base_idx = res_cur_idx = vq->last_used_idx_res;

-   do {
-   avail_idx = *((volatile uint16_t 
*)>avail->idx);
-   if (unlikely(res_cur_idx == avail_idx))
-   goto merge_rx_exit;
+   do {
+   avail_idx = *((volatile uint16_t *)>avail->idx);
+   if (unli

[dpdk-dev] [PATCH] fix checkpatch errors

2016-01-04 Thread Huawei Xie
Signed-off-by: Huawei Xie 
---
 app/test-pmd/cmdline.c | 12 ++--
 app/test-pmd/config.c  |  2 +-
 app/test-pmd/flowgen.c |  2 +-
 app/test-pmd/mempool_anon.c| 12 ++--
 app/test-pmd/testpmd.h |  2 +-
 app/test-pmd/txonly.c  |  2 +-
 app/test/test_mbuf.c   | 12 ++--
 app/test/test_memcpy_perf.c|  4 +-
 app/test/test_mempool.c|  4 +-
 app/test/test_memzone.c| 24 +++
 app/test/test_red.c| 42 ++--
 app/test/test_ring.c   |  4 +-
 drivers/crypto/aesni_mb/rte_aesni_mb_pmd_ops.c |  2 +-
 drivers/crypto/qat/qat_qp.c| 22 +++---
 drivers/net/bnx2x/bnx2x.c  | 34 -
 drivers/net/bnx2x/bnx2x.h  |  4 +-
 drivers/net/bnx2x/bnx2x_rxtx.c | 16 ++---
 drivers/net/bnx2x/debug.c  |  6 +-
 drivers/net/bonding/rte_eth_bond_pmd.c |  2 +-
 drivers/net/e1000/em_ethdev.c  | 40 +--
 drivers/net/e1000/em_rxtx.c| 46 ++---
 drivers/net/e1000/igb_ethdev.c | 18 ++---
 drivers/net/e1000/igb_rxtx.c   | 30 
 drivers/net/fm10k/fm10k_ethdev.c   | 40 +--
 drivers/net/i40e/i40e_ethdev.c |  2 +-
 drivers/net/i40e/i40e_ethdev.h |  2 +-
 drivers/net/i40e/i40e_ethdev_vf.c  |  2 +-
 drivers/net/i40e/i40e_rxtx.c   | 14 ++--
 drivers/net/ixgbe/ixgbe_82599_bypass.c |  4 +-
 drivers/net/ixgbe/ixgbe_bypass.c   |  2 +-
 drivers/net/ixgbe/ixgbe_ethdev.c   | 34 -
 drivers/net/ixgbe/ixgbe_rxtx.c | 36 +-
 drivers/net/mlx5/mlx5_utils.h  |  2 +-
 drivers/net/mpipe/mpipe_tilegx.c   |  4 +-
 drivers/net/nfp/nfp_net.c  | 16 ++---
 drivers/net/virtio/virtio_ethdev.c |  6 +-
 examples/ip_pipeline/cpu_core_map.c|  2 +-
 .../pipeline/pipeline_flow_actions_be.c|  2 +-
 examples/ip_reassembly/main.c  | 22 +++---
 examples/ipv4_multicast/main.c | 14 ++--
 examples/l3fwd/main.c  |  4 +-
 examples/multi_process/symmetric_mp/main.c |  2 +-
 examples/netmap_compat/bridge/bridge.c |  8 +--
 examples/netmap_compat/lib/compat_netmap.c | 80 +++---
 examples/qos_sched/args.c  |  2 +-
 examples/quota_watermark/qw/main.h |  2 +-
 examples/vhost/main.c  |  4 +-
 examples/vhost_xen/main.c  |  2 +-
 examples/vhost_xen/vhost_monitor.c |  6 +-
 lib/librte_acl/acl_run_neon.h  |  2 +-
 lib/librte_cryptodev/rte_cryptodev.c   | 22 +++---
 lib/librte_eal/common/eal_common_memzone.c |  2 +-
 .../common/include/arch/ppc_64/rte_byteorder.h |  2 +-
 lib/librte_eal/common/malloc_heap.c|  2 +-
 lib/librte_eal/linuxapp/eal/eal_xen_memory.c   |  2 +-
 lib/librte_eal/linuxapp/kni/kni_vhost.c|  2 +-
 lib/librte_ether/rte_ether.h   | 10 +--
 lib/librte_hash/rte_cuckoo_hash.c  | 18 ++---
 lib/librte_ip_frag/ip_frag_internal.c  |  4 +-
 lib/librte_lpm/rte_lpm.c   |  2 +-
 lib/librte_mempool/rte_mempool.h   |  2 +-
 lib/librte_ring/rte_ring.h |  6 +-
 lib/librte_sched/rte_bitmap.h  |  6 +-
 lib/librte_sched/rte_red.h |  2 +-
 lib/librte_sched/rte_sched.c   |  4 +-
 65 files changed, 372 insertions(+), 372 deletions(-)

diff --git a/app/test-pmd/cmdline.c b/app/test-pmd/cmdline.c
index 73298c9..a82682d 100644
--- a/app/test-pmd/cmdline.c
+++ b/app/test-pmd/cmdline.c
@@ -2418,11 +2418,11 @@ parse_item_list(char* str, const char* item_name, 
unsigned int max_items,
}
if (c != ',') {
printf("character %c is not a decimal digit\n", c);
-   return (0);
+   return 0;
}
if (! value_ok) {
printf("No valid value before comma\n");
-   return (0);
+   return 0;
}
if (nb_item < max_items) {
parsed_items[nb_item] = value;
@@ -2434,11 +2434,11 @@ parse_item_list(char* str, const char* item_name, 
unsigned int max_items,
i

[dpdk-dev] [PATCH v2 4/4] virtio: check if any kernel driver is manipulating the virtio device

2016-01-04 Thread Huawei Xie
v2 changes:
 change LOG level from ERR to INFO

virtio PMD could use IO port to configure the virtio device without
using uio driver.

There are two issues with previous implementation:
1) virtio PMD will take over each virtio device blindly even if some
are not intended for DPDK.
2) driver conflict between virtio PMD and virtio-net kernel driver.

This patch checks if there is any kernel driver manipulating the virtio
device before virtio PMD uses IO port to configure the device.

Fixes: da978dfdc43b ("virtio: use port IO to get PCI resource")

Signed-off-by: Huawei Xie 
---
 drivers/net/virtio/virtio_ethdev.c | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/drivers/net/virtio/virtio_ethdev.c 
b/drivers/net/virtio/virtio_ethdev.c
index e815acd..7a50dac 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -1138,6 +1138,13 @@ static int virtio_resource_init_by_ioports(struct 
rte_pci_device *pci_dev)
int found = 0;
size_t linesz;

+   if (pci_dev->kdrv != RTE_KDRV_NONE) {
+   PMD_INIT_LOG(INFO,
+   "kernel driver is manipulating this device." \
+   " Please unbind the kernel driver.");
+   return -1;
+   }
+
snprintf(pci_id, sizeof(pci_id), PCI_PRI_FMT,
 pci_dev->addr.domain,
 pci_dev->addr.bus,
-- 
1.8.1.4



[dpdk-dev] [PATCH v2 3/4] virtio: return 1 to tell the upper layer we don't take over this device

2016-01-04 Thread Huawei Xie
v2 changes:
 Remove unnecessary assignment of NULL to dev->data->mac_addrs
 Ajust one comment's position

if virtio_resource_init fails, cleanup the resource and return 1 to
tell the upper layer we don't take over this device.
return -1 means error and DPDK will exit.

Signed-off-by: Huawei Xie 
---
 drivers/net/virtio/virtio_ethdev.c | 9 +++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/net/virtio/virtio_ethdev.c 
b/drivers/net/virtio/virtio_ethdev.c
index d928339..e815acd 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -1287,8 +1287,13 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev)

pci_dev = eth_dev->pci_dev;

-   if (virtio_resource_init(pci_dev) < 0)
-   return -1;
+   if (virtio_resource_init(pci_dev) < 0) {
+   rte_free(eth_dev->data->mac_addrs);
+   /* Return 1 to tell the upper layer we don't take over
+* this device.
+*/
+   return 1;
+   }

hw->use_msix = virtio_has_msix(_dev->addr);
hw->io_base = (uint32_t)(uintptr_t)pci_dev->mem_resource[0].addr;
-- 
1.8.1.4



[dpdk-dev] [PATCH v2 2/4] eal: set kdrv to RTE_KDRV_NONE if kernel driver isn't manipulating the device.

2016-01-04 Thread Huawei Xie
Use RTE_KDRV_NONE to indicate that kernel driver isn't manipulating
the device.

Signed-off-by: Huawei Xie 
Acked-by: David Marchand 
---
 lib/librte_eal/linuxapp/eal/eal_pci.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c 
b/lib/librte_eal/linuxapp/eal/eal_pci.c
index bc5b5be..640b190 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci.c
+++ b/lib/librte_eal/linuxapp/eal/eal_pci.c
@@ -362,7 +362,7 @@ pci_scan_one(const char *dirname, uint16_t domain, uint8_t 
bus,
else
dev->kdrv = RTE_KDRV_UNKNOWN;
} else
-   dev->kdrv = RTE_KDRV_UNKNOWN;
+   dev->kdrv = RTE_KDRV_NONE;

/* device is valid, add in list (sorted) */
if (TAILQ_EMPTY(_device_list)) {
-- 
1.8.1.4



[dpdk-dev] [PATCH v2 1/4] eal: make the comment more accurate

2016-01-04 Thread Huawei Xie
Signed-off-by: Huawei Xie 
---
 lib/librte_eal/common/eal_common_pci.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/lib/librte_eal/common/eal_common_pci.c 
b/lib/librte_eal/common/eal_common_pci.c
index dcfe947..bbcdb2b 100644
--- a/lib/librte_eal/common/eal_common_pci.c
+++ b/lib/librte_eal/common/eal_common_pci.c
@@ -204,7 +204,7 @@ rte_eal_pci_probe_one_driver(struct rte_pci_driver *dr, 
struct rte_pci_device *d
/* call the driver devinit() function */
return dr->devinit(dr, dev);
}
-   /* return positive value if driver is not found */
+   /* return positive value if driver doesn't support this device */
return 1;
 }

@@ -259,7 +259,7 @@ rte_eal_pci_detach_dev(struct rte_pci_driver *dr,
return 0;
}

-   /* return positive value if driver is not found */
+   /* return positive value if driver doesn't support this device */
return 1;
 }

@@ -283,7 +283,7 @@ pci_probe_all_drivers(struct rte_pci_device *dev)
/* negative value is an error */
return -1;
if (rc > 0)
-   /* positive value means driver not found */
+   /* positive value means driver doesn't support it */
continue;
return 0;
}
@@ -310,7 +310,7 @@ pci_detach_all_drivers(struct rte_pci_device *dev)
/* negative value is an error */
return -1;
if (rc > 0)
-   /* positive value means driver not found */
+   /* positive value means driver doesn't support it */
continue;
return 0;
}
-- 
1.8.1.4



[dpdk-dev] [PATCH v2 0/4] fix the issue that DPDK takes over virtio device blindly

2016-01-04 Thread Huawei Xie
v2 changes:
 Remove unnecessary assignment of NULL to dev->data->mac_addrs
 Ajust one comment's position
 change LOG level from ERR to INFO

virtio PMD doesn't set RTE_PCI_DRV_NEED_MAPPING in drv_flags of its
eth_driver. It will try igb_uio and PORT IO in turn to configure
virtio device. Even user in guest VM doesn't want to use virtio for
DPDK, virtio PMD will take over the device blindly.

The more serious problem is kernel driver is still manipulating the
device, which causes driver conflict.

This patch checks if there is any kernel driver manipulating the
virtio device before virtio PMD uses port IO to configure the device.

Huawei Xie (4):
  eal: make the comment more accurate
  eal: set kdrv to RTE_KDRV_NONE if kernel driver isn't manipulating the device.
  virtio: return 1 to tell the kernel we don't take over this device
  virtio: check if any kernel driver is manipulating the virtio device

 drivers/net/virtio/virtio_ethdev.c | 16 ++--
 lib/librte_eal/common/eal_common_pci.c |  8 
 lib/librte_eal/linuxapp/eal/eal_pci.c  |  2 +-
 3 files changed, 19 insertions(+), 7 deletions(-)

-- 
1.8.1.4



[dpdk-dev] [PATCH] remove redundant __func__ in PMD_INIT_LOG and PMD_RX_LOG

2015-12-28 Thread Huawei Xie

Signed-off-by: Huawei Xie 
---
 drivers/net/virtio/virtio_ethdev.c   | 12 +---
 drivers/net/vmxnet3/vmxnet3_ethdev.c |  6 +++---
 drivers/net/vmxnet3/vmxnet3_rxtx.c   |  2 +-
 3 files changed, 9 insertions(+), 11 deletions(-)

diff --git a/drivers/net/virtio/virtio_ethdev.c 
b/drivers/net/virtio/virtio_ethdev.c
index d928339..f19306f 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -150,9 +150,7 @@ virtio_send_command(struct virtqueue *vq, struct 
virtio_pmd_ctrl *ctrl,
ctrl->status = status;

if (!(vq && vq->hw->cvq)) {
-   PMD_INIT_LOG(ERR,
-"%s(): Control queue is not supported.",
-__func__);
+   PMD_INIT_LOG(ERR, "Control queue is not supported.");
return -1;
}
head = vq->vq_desc_head_idx;
@@ -306,12 +304,12 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev,
vq_size = VIRTIO_READ_REG_2(hw, VIRTIO_PCI_QUEUE_NUM);
PMD_INIT_LOG(DEBUG, "vq_size: %u nb_desc:%u", vq_size, nb_desc);
if (vq_size == 0) {
-   PMD_INIT_LOG(ERR, "%s: virtqueue does not exist", __func__);
+   PMD_INIT_LOG(ERR, "virtqueue does not exist");
return -EINVAL;
}

if (!rte_is_power_of_2(vq_size)) {
-   PMD_INIT_LOG(ERR, "%s: virtqueue size is not powerof 2", 
__func__);
+   PMD_INIT_LOG(ERR, "virtqueue size is not powerof 2");
return -EINVAL;
}

@@ -336,7 +334,7 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev,
RTE_CACHE_LINE_SIZE);
}
if (vq == NULL) {
-   PMD_INIT_LOG(ERR, "%s: Can not allocate virtqueue", __func__);
+   PMD_INIT_LOG(ERR, "Can not allocate virtqueue");
return (-ENOMEM);
}
if (queue_type == VTNET_RQ && vq->sw_ring == NULL) {
@@ -1146,7 +1144,7 @@ static int virtio_resource_init_by_ioports(struct 
rte_pci_device *pci_dev)

fp = fopen("/proc/ioports", "r");
if (fp == NULL) {
-   PMD_INIT_LOG(ERR, "%s(): can't open ioports", __func__);
+   PMD_INIT_LOG(ERR, "can't open ioports");
return -1;
}

diff --git a/drivers/net/vmxnet3/vmxnet3_ethdev.c 
b/drivers/net/vmxnet3/vmxnet3_ethdev.c
index c363bf6..f5834d6 100644
--- a/drivers/net/vmxnet3/vmxnet3_ethdev.c
+++ b/drivers/net/vmxnet3/vmxnet3_ethdev.c
@@ -564,7 +564,7 @@ vmxnet3_dev_start(struct rte_eth_dev *dev)
status = VMXNET3_READ_BAR1_REG(hw, VMXNET3_REG_CMD);

if (status != 0) {
-   PMD_INIT_LOG(ERR, "Device activation in %s(): UNSUCCESSFUL", 
__func__);
+   PMD_INIT_LOG(ERR, "Device activation: UNSUCCESSFUL");
return -1;
}

@@ -577,7 +577,7 @@ vmxnet3_dev_start(struct rte_eth_dev *dev)
 */
ret = vmxnet3_dev_rxtx_init(dev);
if (ret != VMXNET3_SUCCESS) {
-   PMD_INIT_LOG(ERR, "Device receive init in %s: UNSUCCESSFUL", 
__func__);
+   PMD_INIT_LOG(ERR, "Device receive init: UNSUCCESSFUL");
return ret;
}

@@ -882,7 +882,7 @@ vmxnet3_process_events(struct vmxnet3_hw *hw)
uint32_t events = hw->shared->ecr;

if (!events) {
-   PMD_INIT_LOG(ERR, "No events to process in %s()", __func__);
+   PMD_INIT_LOG(ERR, "No events to process");
return;
}

diff --git a/drivers/net/vmxnet3/vmxnet3_rxtx.c 
b/drivers/net/vmxnet3/vmxnet3_rxtx.c
index 4de5d89..e592010 100644
--- a/drivers/net/vmxnet3/vmxnet3_rxtx.c
+++ b/drivers/net/vmxnet3/vmxnet3_rxtx.c
@@ -462,7 +462,7 @@ vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t 
ring_id)
/* Allocate blank mbuf for the current Rx Descriptor */
mbuf = rte_rxmbuf_alloc(rxq->mp);
if (unlikely(mbuf == NULL)) {
-   PMD_RX_LOG(ERR, "Error allocating mbuf in %s", 
__func__);
+   PMD_RX_LOG(ERR, "Error allocating mbuf");
rxq->stats.rx_buf_alloc_failure++;
err = ENOMEM;
break;
-- 
1.8.1.4



[dpdk-dev] [PATCH] remove redundant __func__ in PMD_INIT_LOG and PMD_RX_LOG

2015-12-28 Thread Huawei Xie

Signed-off-by: Huawei Xie 
---
 drivers/net/virtio/virtio_ethdev.c   | 12 +---
 drivers/net/vmxnet3/vmxnet3_ethdev.c |  6 +++---
 drivers/net/vmxnet3/vmxnet3_rxtx.c   |  2 +-
 3 files changed, 9 insertions(+), 11 deletions(-)

diff --git a/drivers/net/virtio/virtio_ethdev.c 
b/drivers/net/virtio/virtio_ethdev.c
index d928339..f19306f 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -150,9 +150,7 @@ virtio_send_command(struct virtqueue *vq, struct 
virtio_pmd_ctrl *ctrl,
ctrl->status = status;

if (!(vq && vq->hw->cvq)) {
-   PMD_INIT_LOG(ERR,
-"%s(): Control queue is not supported.",
-__func__);
+   PMD_INIT_LOG(ERR, "Control queue is not supported.");
return -1;
}
head = vq->vq_desc_head_idx;
@@ -306,12 +304,12 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev,
vq_size = VIRTIO_READ_REG_2(hw, VIRTIO_PCI_QUEUE_NUM);
PMD_INIT_LOG(DEBUG, "vq_size: %u nb_desc:%u", vq_size, nb_desc);
if (vq_size == 0) {
-   PMD_INIT_LOG(ERR, "%s: virtqueue does not exist", __func__);
+   PMD_INIT_LOG(ERR, "virtqueue does not exist");
return -EINVAL;
}

if (!rte_is_power_of_2(vq_size)) {
-   PMD_INIT_LOG(ERR, "%s: virtqueue size is not powerof 2", 
__func__);
+   PMD_INIT_LOG(ERR, "virtqueue size is not powerof 2");
return -EINVAL;
}

@@ -336,7 +334,7 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev,
RTE_CACHE_LINE_SIZE);
}
if (vq == NULL) {
-   PMD_INIT_LOG(ERR, "%s: Can not allocate virtqueue", __func__);
+   PMD_INIT_LOG(ERR, "Can not allocate virtqueue");
return (-ENOMEM);
}
if (queue_type == VTNET_RQ && vq->sw_ring == NULL) {
@@ -1146,7 +1144,7 @@ static int virtio_resource_init_by_ioports(struct 
rte_pci_device *pci_dev)

fp = fopen("/proc/ioports", "r");
if (fp == NULL) {
-   PMD_INIT_LOG(ERR, "%s(): can't open ioports", __func__);
+   PMD_INIT_LOG(ERR, "can't open ioports");
return -1;
}

diff --git a/drivers/net/vmxnet3/vmxnet3_ethdev.c 
b/drivers/net/vmxnet3/vmxnet3_ethdev.c
index c363bf6..f5834d6 100644
--- a/drivers/net/vmxnet3/vmxnet3_ethdev.c
+++ b/drivers/net/vmxnet3/vmxnet3_ethdev.c
@@ -564,7 +564,7 @@ vmxnet3_dev_start(struct rte_eth_dev *dev)
status = VMXNET3_READ_BAR1_REG(hw, VMXNET3_REG_CMD);

if (status != 0) {
-   PMD_INIT_LOG(ERR, "Device activation in %s(): UNSUCCESSFUL", 
__func__);
+   PMD_INIT_LOG(ERR, "Device activation: UNSUCCESSFUL");
return -1;
}

@@ -577,7 +577,7 @@ vmxnet3_dev_start(struct rte_eth_dev *dev)
 */
ret = vmxnet3_dev_rxtx_init(dev);
if (ret != VMXNET3_SUCCESS) {
-   PMD_INIT_LOG(ERR, "Device receive init in %s: UNSUCCESSFUL", 
__func__);
+   PMD_INIT_LOG(ERR, "Device receive init: UNSUCCESSFUL");
return ret;
}

@@ -882,7 +882,7 @@ vmxnet3_process_events(struct vmxnet3_hw *hw)
uint32_t events = hw->shared->ecr;

if (!events) {
-   PMD_INIT_LOG(ERR, "No events to process in %s()", __func__);
+   PMD_INIT_LOG(ERR, "No events to process");
return;
}

diff --git a/drivers/net/vmxnet3/vmxnet3_rxtx.c 
b/drivers/net/vmxnet3/vmxnet3_rxtx.c
index 4de5d89..e592010 100644
--- a/drivers/net/vmxnet3/vmxnet3_rxtx.c
+++ b/drivers/net/vmxnet3/vmxnet3_rxtx.c
@@ -462,7 +462,7 @@ vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t 
ring_id)
/* Allocate blank mbuf for the current Rx Descriptor */
mbuf = rte_rxmbuf_alloc(rxq->mp);
if (unlikely(mbuf == NULL)) {
-   PMD_RX_LOG(ERR, "Error allocating mbuf in %s", 
__func__);
+   PMD_RX_LOG(ERR, "Error allocating mbuf");
rxq->stats.rx_buf_alloc_failure++;
err = ENOMEM;
break;
-- 
1.8.1.4



[dpdk-dev] [PATCH v5 2/2] vhost: call rte_pktmbuf_alloc_bulk in vhost dequeue

2015-12-28 Thread Huawei Xie
v4 changes:
 fix a silly typo in error handling when rte_pktmbuf_alloc fails
reported by haifeng

pre-allocate a bulk of mbufs instead of allocating one mbuf a time on demand

Signed-off-by: Gerald Rogers 
Signed-off-by: Huawei Xie 
Acked-by: Konstantin Ananyev 
Acked-by: Yuanhan Liu 
Tested-by: Yuanhan Liu 
---
 lib/librte_vhost/vhost_rxtx.c | 35 ++-
 1 file changed, 22 insertions(+), 13 deletions(-)

diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c
index bbf3fac..f10d534 100644
--- a/lib/librte_vhost/vhost_rxtx.c
+++ b/lib/librte_vhost/vhost_rxtx.c
@@ -576,6 +576,8 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t 
queue_id,
uint32_t i;
uint16_t free_entries, entry_success = 0;
uint16_t avail_idx;
+   uint8_t alloc_err = 0;
+   uint8_t seg_num;

if (unlikely(!is_valid_virt_queue_idx(queue_id, 1, dev->virt_qp_nb))) {
RTE_LOG(ERR, VHOST_DATA,
@@ -609,6 +611,14 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t 
queue_id,

LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Buffers available %d\n",
dev->device_fh, free_entries);
+
+   if (unlikely(rte_pktmbuf_alloc_bulk(mbuf_pool,
+   pkts, free_entries)) < 0) {
+   RTE_LOG(ERR, VHOST_DATA,
+   "Failed to bulk allocating %d mbufs\n", free_entries);
+   return 0;
+   }
+
/* Retrieve all of the head indexes first to avoid caching issues. */
for (i = 0; i < free_entries; i++)
head[i] = vq->avail->ring[(vq->last_used_idx + i) & (vq->size - 
1)];
@@ -621,9 +631,9 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t 
queue_id,
uint32_t vb_avail, vb_offset;
uint32_t seg_avail, seg_offset;
uint32_t cpy_len;
-   uint32_t seg_num = 0;
+   seg_num = 0;
struct rte_mbuf *cur;
-   uint8_t alloc_err = 0;
+

desc = >desc[head[entry_success]];

@@ -654,13 +664,7 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t 
queue_id,
vq->used->ring[used_idx].id = head[entry_success];
vq->used->ring[used_idx].len = 0;

-   /* Allocate an mbuf and populate the structure. */
-   m = rte_pktmbuf_alloc(mbuf_pool);
-   if (unlikely(m == NULL)) {
-   RTE_LOG(ERR, VHOST_DATA,
-   "Failed to allocate memory for mbuf.\n");
-   break;
-   }
+   prev = cur = m = pkts[entry_success];
seg_offset = 0;
seg_avail = m->buf_len - RTE_PKTMBUF_HEADROOM;
cpy_len = RTE_MIN(vb_avail, seg_avail);
@@ -668,8 +672,6 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t 
queue_id,
PRINT_PACKET(dev, (uintptr_t)vb_addr, desc->len, 0);

seg_num++;
-   cur = m;
-   prev = m;
while (cpy_len != 0) {
rte_memcpy(rte_pktmbuf_mtod_offset(cur, void *, 
seg_offset),
(void *)((uintptr_t)(vb_addr + vb_offset)),
@@ -761,16 +763,23 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t 
queue_id,
cpy_len = RTE_MIN(vb_avail, seg_avail);
}

-   if (unlikely(alloc_err == 1))
+   if (unlikely(alloc_err))
break;

m->nb_segs = seg_num;

-   pkts[entry_success] = m;
vq->last_used_idx++;
entry_success++;
}

+   if (unlikely(alloc_err)) {
+   uint16_t i = entry_success;
+
+   m->nb_segs = seg_num;
+   for (; i < free_entries; i++)
+   rte_pktmbuf_free(pkts[i]);
+   }
+
rte_compiler_barrier();
vq->used->idx += entry_success;
/* Kick guest if required. */
-- 
1.8.1.4



[dpdk-dev] [PATCH v5 1/2] mbuf: provide rte_pktmbuf_alloc_bulk API

2015-12-28 Thread Huawei Xie
v5 changes:
 add comment about duff's device and our variant implementation
 revise the code style a bit

v3 changes:
 move while after case 0
 add context about duff's device and why we use while loop in the commit
message

v2 changes:
 unroll the loop a bit to help the performance

rte_pktmbuf_alloc_bulk allocates a bulk of packet mbufs.

There is related thread about this bulk API.
http://dpdk.org/dev/patchwork/patch/4718/
Thanks to Konstantin's loop unrolling.

Attached the wiki page about duff's device. It explains the performance
optimization through loop unwinding, and also the most dramatic use of
case label fall-through.
https://en.wikipedia.org/wiki/Duff%27s_device

In our implementation, we use while() loop rather than do{} while() loop
because we could not assume count is strictly positive. Using while()
loop saves one line of check if count is zero.

Signed-off-by: Gerald Rogers 
Signed-off-by: Huawei Xie 
Acked-by: Konstantin Ananyev 
---
 lib/librte_mbuf/rte_mbuf.h | 55 ++
 1 file changed, 55 insertions(+)

diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
index f234ac9..b2ed479 100644
--- a/lib/librte_mbuf/rte_mbuf.h
+++ b/lib/librte_mbuf/rte_mbuf.h
@@ -1336,6 +1336,61 @@ static inline struct rte_mbuf *rte_pktmbuf_alloc(struct 
rte_mempool *mp)
 }

 /**
+ * Allocate a bulk of mbufs, initialize refcnt and reset the fields to default
+ * values.
+ *
+ *  @param pool
+ *The mempool from which mbufs are allocated.
+ *  @param mbufs
+ *Array of pointers to mbufs
+ *  @param count
+ *Array size
+ *  @return
+ *   - 0: Success
+ */
+static inline int rte_pktmbuf_alloc_bulk(struct rte_mempool *pool,
+struct rte_mbuf **mbufs, unsigned count)
+{
+   unsigned idx = 0;
+   int rc;
+
+   rc = rte_mempool_get_bulk(pool, (void **)mbufs, count);
+   if (unlikely(rc))
+   return rc;
+
+   /* To understand duff's device on loop unwinding optimization, see
+* https://en.wikipedia.org/wiki/Duff's_device.
+* Here while() loop is used rather than do() while{} to avoid extra
+* check if count is zero.
+*/
+   switch (count % 4) {
+   case 0:
+   while (idx != count) {
+   RTE_MBUF_ASSERT(rte_mbuf_refcnt_read(mbufs[idx]) == 0);
+   rte_mbuf_refcnt_set(mbufs[idx], 1);
+   rte_pktmbuf_reset(mbufs[idx]);
+   idx++;
+   case 3:
+   RTE_MBUF_ASSERT(rte_mbuf_refcnt_read(mbufs[idx]) == 0);
+   rte_mbuf_refcnt_set(mbufs[idx], 1);
+   rte_pktmbuf_reset(mbufs[idx]);
+   idx++;
+   case 2:
+   RTE_MBUF_ASSERT(rte_mbuf_refcnt_read(mbufs[idx]) == 0);
+   rte_mbuf_refcnt_set(mbufs[idx], 1);
+   rte_pktmbuf_reset(mbufs[idx]);
+   idx++;
+   case 1:
+   RTE_MBUF_ASSERT(rte_mbuf_refcnt_read(mbufs[idx]) == 0);
+   rte_mbuf_refcnt_set(mbufs[idx], 1);
+   rte_pktmbuf_reset(mbufs[idx]);
+   idx++;
+   }
+   }
+   return 0;
+}
+
+/**
  * Attach packet mbuf to another packet mbuf.
  *
  * After attachment we refer the mbuf we attached as 'indirect',
-- 
1.8.1.4



[dpdk-dev] [PATCH v5 0/2] provide rte_pktmbuf_alloc_bulk API and call it in vhost dequeue

2015-12-28 Thread Huawei Xie
v5 changes:
 add comment about duff's device and our variant implementation

v4 changes:
 fix a silly typo in error handling when rte_pktmbuf_alloc fails

v3 changes:
 move while after case 0
 add context about duff's device and why we use while loop in the commit
message

v2 changes:
 unroll the loop in rte_pktmbuf_alloc_bulk to help the performance

For symmetric rte_pktmbuf_free_bulk, if the app knows in its scenarios
their mbufs are all simple mbufs, i.e meet the following requirements:
 * no multiple segments
 * not indirect mbuf
 * refcnt is 1
 * belong to the same mbuf memory pool,
it could directly call rte_mempool_put to free the bulk of mbufs,
otherwise rte_pktmbuf_free_bulk has to call rte_pktmbuf_free to free
the mbuf one by one.
This patchset will not provide this symmetric implementation.

Huawei Xie (2):
  mbuf: provide rte_pktmbuf_alloc_bulk API
  vhost: call rte_pktmbuf_alloc_bulk in vhost dequeue

 lib/librte_mbuf/rte_mbuf.h| 55 +++
 lib/librte_vhost/vhost_rxtx.c | 35 +--
 2 files changed, 77 insertions(+), 13 deletions(-)

-- 
1.8.1.4



[dpdk-dev] [PATCH 4/4] virtio: check if any kernel driver is manipulating the device

2015-12-25 Thread Huawei Xie
virtio PMD could use IO port to configure the virtio device without
using uio driver.

There are two issues with previous implementation:
1) virtio PMD will take over each virtio device blindly even if some
are not intended for DPDK.
2) driver conflict between virtio PMD and virtio-net kernel driver.

This patch checks if there is any kernel driver manipulating the virtio
device before virtio PMD uses IO port to configure the device.

Fixes: da978dfdc43b ("virtio: use port IO to get PCI resource")

Signed-off-by: Huawei Xie 
---
 drivers/net/virtio/virtio_ethdev.c | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/drivers/net/virtio/virtio_ethdev.c 
b/drivers/net/virtio/virtio_ethdev.c
index 00015ef..504346a 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -1138,6 +1138,13 @@ static int virtio_resource_init_by_ioports(struct 
rte_pci_device *pci_dev)
int found = 0;
size_t linesz;

+   if (pci_dev->kdrv != RTE_KDRV_NONE) {
+   PMD_INIT_LOG(ERR,
+   "%s(): kernel driver is manipulating this device." \
+   " Please unbind the kernel driver.", __func__);
+   return -1;
+   }
+
snprintf(pci_id, sizeof(pci_id), PCI_PRI_FMT,
 pci_dev->addr.domain,
 pci_dev->addr.bus,
-- 
1.8.1.4



[dpdk-dev] [PATCH 2/4] eal: set kdrv to RTE_KDRV_NONE if kernel driver isn't manipulating the device.

2015-12-25 Thread Huawei Xie
Use RTE_KDRV_NONE to indicate that kernel driver isn't manipulating the
device.

Signed-off-by: Huawei Xie 
---
 lib/librte_eal/linuxapp/eal/eal_pci.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c 
b/lib/librte_eal/linuxapp/eal/eal_pci.c
index bc5b5be..640b190 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci.c
+++ b/lib/librte_eal/linuxapp/eal/eal_pci.c
@@ -362,7 +362,7 @@ pci_scan_one(const char *dirname, uint16_t domain, uint8_t 
bus,
else
dev->kdrv = RTE_KDRV_UNKNOWN;
} else
-   dev->kdrv = RTE_KDRV_UNKNOWN;
+   dev->kdrv = RTE_KDRV_NONE;

/* device is valid, add in list (sorted) */
if (TAILQ_EMPTY(_device_list)) {
-- 
1.8.1.4



[dpdk-dev] [PATCH 1/4] eal: make the comment more accurate

2015-12-25 Thread Huawei Xie

Signed-off-by: Huawei Xie 
---
 lib/librte_eal/common/eal_common_pci.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/lib/librte_eal/common/eal_common_pci.c 
b/lib/librte_eal/common/eal_common_pci.c
index dcfe947..bbcdb2b 100644
--- a/lib/librte_eal/common/eal_common_pci.c
+++ b/lib/librte_eal/common/eal_common_pci.c
@@ -204,7 +204,7 @@ rte_eal_pci_probe_one_driver(struct rte_pci_driver *dr, 
struct rte_pci_device *d
/* call the driver devinit() function */
return dr->devinit(dr, dev);
}
-   /* return positive value if driver is not found */
+   /* return positive value if driver doesn't support this device */
return 1;
 }

@@ -259,7 +259,7 @@ rte_eal_pci_detach_dev(struct rte_pci_driver *dr,
return 0;
}

-   /* return positive value if driver is not found */
+   /* return positive value if driver doesn't support this device */
return 1;
 }

@@ -283,7 +283,7 @@ pci_probe_all_drivers(struct rte_pci_device *dev)
/* negative value is an error */
return -1;
if (rc > 0)
-   /* positive value means driver not found */
+   /* positive value means driver doesn't support it */
continue;
return 0;
}
@@ -310,7 +310,7 @@ pci_detach_all_drivers(struct rte_pci_device *dev)
/* negative value is an error */
return -1;
if (rc > 0)
-   /* positive value means driver not found */
+   /* positive value means driver doesn't support it */
continue;
return 0;
}
-- 
1.8.1.4



[dpdk-dev] [PATCH 0/4] check if any kernel driver is manipulating the virtio device

2015-12-25 Thread Huawei Xie
virtio PMD doesn't set RTE_PCI_DRV_NEED_MAPPING in drv_flags of its
eth_driver. It will try igb_uio and PORT IO in turn to configure
virtio device. Even user in guest VM doesn't want to use virtio for
DPDK, virtio PMD will take over the device blindly.

The more serious problem is kernel driver is still manipulating the
device, which causes driver conflict.

This patch checks if there is any kernel driver manipulating the
virtio device before virtio PMD uses port IO to configure the device.

Huawei Xie (4):
  eal: make the comment more accurate
  eal: set kdrv to RTE_KDRV_NONE if kernel driver isn't manipulating the device.
  virtio: return 1 to tell the kernel we don't take over this device
  virtio: check if any kernel driver is manipulating the virtio device

 drivers/net/virtio/virtio_ethdev.c | 15 +--
 lib/librte_eal/common/eal_common_pci.c |  8 
 lib/librte_eal/linuxapp/eal/eal_pci.c  |  2 +-
 3 files changed, 18 insertions(+), 7 deletions(-)

-- 
1.8.1.4



[dpdk-dev] [PATCH v4 2/2] vhost: call rte_pktmbuf_alloc_bulk in vhost dequeue

2015-12-23 Thread Huawei Xie
v4 changes:
 fix a silly typo in error handling when rte_pktmbuf_alloc fails
reported by haifeng

pre-allocate a bulk of mbufs instead of allocating one mbuf a time on demand

Signed-off-by: Gerald Rogers 
Signed-off-by: Huawei Xie 
Acked-by: Konstantin Ananyev 
Acked-by: Yuanhan Liu 
Tested-by: Yuanhan Liu 
---
 lib/librte_vhost/vhost_rxtx.c | 35 ++-
 1 file changed, 22 insertions(+), 13 deletions(-)

diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c
index bbf3fac..f10d534 100644
--- a/lib/librte_vhost/vhost_rxtx.c
+++ b/lib/librte_vhost/vhost_rxtx.c
@@ -576,6 +576,8 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t 
queue_id,
uint32_t i;
uint16_t free_entries, entry_success = 0;
uint16_t avail_idx;
+   uint8_t alloc_err = 0;
+   uint8_t seg_num;

if (unlikely(!is_valid_virt_queue_idx(queue_id, 1, dev->virt_qp_nb))) {
RTE_LOG(ERR, VHOST_DATA,
@@ -609,6 +611,14 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t 
queue_id,

LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Buffers available %d\n",
dev->device_fh, free_entries);
+
+   if (unlikely(rte_pktmbuf_alloc_bulk(mbuf_pool,
+   pkts, free_entries)) < 0) {
+   RTE_LOG(ERR, VHOST_DATA,
+   "Failed to bulk allocating %d mbufs\n", free_entries);
+   return 0;
+   }
+
/* Retrieve all of the head indexes first to avoid caching issues. */
for (i = 0; i < free_entries; i++)
head[i] = vq->avail->ring[(vq->last_used_idx + i) & (vq->size - 
1)];
@@ -621,9 +631,9 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t 
queue_id,
uint32_t vb_avail, vb_offset;
uint32_t seg_avail, seg_offset;
uint32_t cpy_len;
-   uint32_t seg_num = 0;
+   seg_num = 0;
struct rte_mbuf *cur;
-   uint8_t alloc_err = 0;
+

desc = >desc[head[entry_success]];

@@ -654,13 +664,7 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t 
queue_id,
vq->used->ring[used_idx].id = head[entry_success];
vq->used->ring[used_idx].len = 0;

-   /* Allocate an mbuf and populate the structure. */
-   m = rte_pktmbuf_alloc(mbuf_pool);
-   if (unlikely(m == NULL)) {
-   RTE_LOG(ERR, VHOST_DATA,
-   "Failed to allocate memory for mbuf.\n");
-   break;
-   }
+   prev = cur = m = pkts[entry_success];
seg_offset = 0;
seg_avail = m->buf_len - RTE_PKTMBUF_HEADROOM;
cpy_len = RTE_MIN(vb_avail, seg_avail);
@@ -668,8 +672,6 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t 
queue_id,
PRINT_PACKET(dev, (uintptr_t)vb_addr, desc->len, 0);

seg_num++;
-   cur = m;
-   prev = m;
while (cpy_len != 0) {
rte_memcpy(rte_pktmbuf_mtod_offset(cur, void *, 
seg_offset),
(void *)((uintptr_t)(vb_addr + vb_offset)),
@@ -761,16 +763,23 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t 
queue_id,
cpy_len = RTE_MIN(vb_avail, seg_avail);
}

-   if (unlikely(alloc_err == 1))
+   if (unlikely(alloc_err))
break;

m->nb_segs = seg_num;

-   pkts[entry_success] = m;
vq->last_used_idx++;
entry_success++;
}

+   if (unlikely(alloc_err)) {
+   uint16_t i = entry_success;
+
+   m->nb_segs = seg_num;
+   for (; i < free_entries; i++)
+   rte_pktmbuf_free(pkts[i]);
+   }
+
rte_compiler_barrier();
vq->used->idx += entry_success;
/* Kick guest if required. */
-- 
1.8.1.4



[dpdk-dev] [PATCH v4 1/2] mbuf: provide rte_pktmbuf_alloc_bulk API

2015-12-23 Thread Huawei Xie
v3 changes:
 move while after case 0
 add context about duff's device and why we use while loop in the commit
message

v2 changes:
 unroll the loop a bit to help the performance

rte_pktmbuf_alloc_bulk allocates a bulk of packet mbufs.

There is related thread about this bulk API.
http://dpdk.org/dev/patchwork/patch/4718/
Thanks to Konstantin's loop unrolling.

Attached the wiki page about duff's device. It explains the performance
optimization through loop unwinding, and also the most dramatic use of
case label fall-through.
https://en.wikipedia.org/wiki/Duff%27s_device

In our implementation, we use while() loop rather than do{} while() loop
because we could not assume count is strictly positive. Using while()
loop saves one line of check if count is zero.

Signed-off-by: Gerald Rogers 
Signed-off-by: Huawei Xie 
Acked-by: Konstantin Ananyev 
---
 lib/librte_mbuf/rte_mbuf.h | 49 ++
 1 file changed, 49 insertions(+)

diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
index f234ac9..3381c28 100644
--- a/lib/librte_mbuf/rte_mbuf.h
+++ b/lib/librte_mbuf/rte_mbuf.h
@@ -1336,6 +1336,55 @@ static inline struct rte_mbuf *rte_pktmbuf_alloc(struct 
rte_mempool *mp)
 }

 /**
+ * Allocate a bulk of mbufs, initialize refcnt and reset the fields to default
+ * values.
+ *
+ *  @param pool
+ *The mempool from which mbufs are allocated.
+ *  @param mbufs
+ *Array of pointers to mbufs
+ *  @param count
+ *Array size
+ *  @return
+ *   - 0: Success
+ */
+static inline int rte_pktmbuf_alloc_bulk(struct rte_mempool *pool,
+struct rte_mbuf **mbufs, unsigned count)
+{
+   unsigned idx = 0;
+   int rc;
+
+   rc = rte_mempool_get_bulk(pool, (void **)mbufs, count);
+   if (unlikely(rc))
+   return rc;
+
+   switch (count % 4) {
+   case 0: while (idx != count) {
+   RTE_MBUF_ASSERT(rte_mbuf_refcnt_read(mbufs[idx]) == 0);
+   rte_mbuf_refcnt_set(mbufs[idx], 1);
+   rte_pktmbuf_reset(mbufs[idx]);
+   idx++;
+   case 3:
+   RTE_MBUF_ASSERT(rte_mbuf_refcnt_read(mbufs[idx]) == 0);
+   rte_mbuf_refcnt_set(mbufs[idx], 1);
+   rte_pktmbuf_reset(mbufs[idx]);
+   idx++;
+   case 2:
+   RTE_MBUF_ASSERT(rte_mbuf_refcnt_read(mbufs[idx]) == 0);
+   rte_mbuf_refcnt_set(mbufs[idx], 1);
+   rte_pktmbuf_reset(mbufs[idx]);
+   idx++;
+   case 1:
+   RTE_MBUF_ASSERT(rte_mbuf_refcnt_read(mbufs[idx]) == 0);
+   rte_mbuf_refcnt_set(mbufs[idx], 1);
+   rte_pktmbuf_reset(mbufs[idx]);
+   idx++;
+   }
+   }
+   return 0;
+}
+
+/**
  * Attach packet mbuf to another packet mbuf.
  *
  * After attachment we refer the mbuf we attached as 'indirect',
-- 
1.8.1.4



[dpdk-dev] [PATCH v4 0/2] provide rte_pktmbuf_alloc_bulk API and call it in vhost dequeue

2015-12-23 Thread Huawei Xie
v4 changes:
 fix a silly typo in error handling when rte_pktmbuf_alloc fails

v3 changes:
 move while after case 0
 add context about duff's device and why we use while loop in the commit
message

v2 changes:
 unroll the loop in rte_pktmbuf_alloc_bulk to help the performance

For symmetric rte_pktmbuf_free_bulk, if the app knows in its scenarios
their mbufs are all simple mbufs, i.e meet the following requirements:
 * no multiple segments
 * not indirect mbuf
 * refcnt is 1
 * belong to the same mbuf memory pool,
it could directly call rte_mempool_put to free the bulk of mbufs,
otherwise rte_pktmbuf_free_bulk has to call rte_pktmbuf_free to free
the mbuf one by one.
This patchset will not provide this symmetric implementation.

Huawei Xie (2):
  mbuf: provide rte_pktmbuf_alloc_bulk API
  vhost: call rte_pktmbuf_alloc_bulk in vhost dequeue

 lib/librte_mbuf/rte_mbuf.h| 49 +++
 lib/librte_vhost/vhost_rxtx.c | 35 +++
 2 files changed, 71 insertions(+), 13 deletions(-)

-- 
1.8.1.4



[dpdk-dev] [PATCH v3 2/2] vhost: call rte_pktmbuf_alloc_bulk in vhost dequeue

2015-12-23 Thread Huawei Xie
pre-allocate a bulk of mbufs instead of allocating one mbuf a time on demand

Signed-off-by: Gerald Rogers 
Signed-off-by: Huawei Xie 
Acked-by: Konstantin Ananyev 
---
 lib/librte_vhost/vhost_rxtx.c | 35 ++-
 1 file changed, 22 insertions(+), 13 deletions(-)

diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c
index bbf3fac..0faae58 100644
--- a/lib/librte_vhost/vhost_rxtx.c
+++ b/lib/librte_vhost/vhost_rxtx.c
@@ -576,6 +576,8 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t 
queue_id,
uint32_t i;
uint16_t free_entries, entry_success = 0;
uint16_t avail_idx;
+   uint8_t alloc_err = 0;
+   uint8_t seg_num;

if (unlikely(!is_valid_virt_queue_idx(queue_id, 1, dev->virt_qp_nb))) {
RTE_LOG(ERR, VHOST_DATA,
@@ -609,6 +611,14 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t 
queue_id,

LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Buffers available %d\n",
dev->device_fh, free_entries);
+
+   if (unlikely(rte_pktmbuf_alloc_bulk(mbuf_pool,
+   pkts, free_entries)) < 0) {
+   RTE_LOG(ERR, VHOST_DATA,
+   "Failed to bulk allocating %d mbufs\n", free_entries);
+   return 0;
+   }
+
/* Retrieve all of the head indexes first to avoid caching issues. */
for (i = 0; i < free_entries; i++)
head[i] = vq->avail->ring[(vq->last_used_idx + i) & (vq->size - 
1)];
@@ -621,9 +631,9 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t 
queue_id,
uint32_t vb_avail, vb_offset;
uint32_t seg_avail, seg_offset;
uint32_t cpy_len;
-   uint32_t seg_num = 0;
+   seg_num = 0;
struct rte_mbuf *cur;
-   uint8_t alloc_err = 0;
+

desc = >desc[head[entry_success]];

@@ -654,13 +664,7 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t 
queue_id,
vq->used->ring[used_idx].id = head[entry_success];
vq->used->ring[used_idx].len = 0;

-   /* Allocate an mbuf and populate the structure. */
-   m = rte_pktmbuf_alloc(mbuf_pool);
-   if (unlikely(m == NULL)) {
-   RTE_LOG(ERR, VHOST_DATA,
-   "Failed to allocate memory for mbuf.\n");
-   break;
-   }
+   prev = cur = m = pkts[entry_success];
seg_offset = 0;
seg_avail = m->buf_len - RTE_PKTMBUF_HEADROOM;
cpy_len = RTE_MIN(vb_avail, seg_avail);
@@ -668,8 +672,6 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t 
queue_id,
PRINT_PACKET(dev, (uintptr_t)vb_addr, desc->len, 0);

seg_num++;
-   cur = m;
-   prev = m;
while (cpy_len != 0) {
rte_memcpy(rte_pktmbuf_mtod_offset(cur, void *, 
seg_offset),
(void *)((uintptr_t)(vb_addr + vb_offset)),
@@ -761,16 +763,23 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t 
queue_id,
cpy_len = RTE_MIN(vb_avail, seg_avail);
}

-   if (unlikely(alloc_err == 1))
+   if (unlikely(alloc_err))
break;

m->nb_segs = seg_num;

-   pkts[entry_success] = m;
vq->last_used_idx++;
entry_success++;
}

+   if (unlikely(alloc_err)) {
+   uint16_t i = entry_success;
+
+   m->nb_segs = seg_num;
+   for (; i < free_entries; i++)
+   rte_pktmbuf_free(pkts[entry_success]);
+   }
+
rte_compiler_barrier();
vq->used->idx += entry_success;
/* Kick guest if required. */
-- 
1.8.1.4



[dpdk-dev] [PATCH v3 1/2] mbuf: provide rte_pktmbuf_alloc_bulk API

2015-12-23 Thread Huawei Xie
v3 changes:
 move while after case 0
 add context about duff's device and why we use while loop in the commit
message

v2 changes:
 unroll the loop a bit to help the performance

rte_pktmbuf_alloc_bulk allocates a bulk of packet mbufs.

There is related thread about this bulk API.
http://dpdk.org/dev/patchwork/patch/4718/
Thanks to Konstantin's loop unrolling.

Attached the wiki page about duff's device. It explains the performance
optimization through loop unwinding, and also the most dramatic use of
case label fall-through.
https://en.wikipedia.org/wiki/Duff%27s_device

In our implementation, we use while() loop rather than do{} while() loop
because we could not assume count is strictly positive. Using while()
loop saves one line of check if count is zero.

Signed-off-by: Gerald Rogers 
Signed-off-by: Huawei Xie 
Acked-by: Konstantin Ananyev 
---
 lib/librte_mbuf/rte_mbuf.h | 49 ++
 1 file changed, 49 insertions(+)

diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
index f234ac9..3381c28 100644
--- a/lib/librte_mbuf/rte_mbuf.h
+++ b/lib/librte_mbuf/rte_mbuf.h
@@ -1336,6 +1336,55 @@ static inline struct rte_mbuf *rte_pktmbuf_alloc(struct 
rte_mempool *mp)
 }

 /**
+ * Allocate a bulk of mbufs, initialize refcnt and reset the fields to default
+ * values.
+ *
+ *  @param pool
+ *The mempool from which mbufs are allocated.
+ *  @param mbufs
+ *Array of pointers to mbufs
+ *  @param count
+ *Array size
+ *  @return
+ *   - 0: Success
+ */
+static inline int rte_pktmbuf_alloc_bulk(struct rte_mempool *pool,
+struct rte_mbuf **mbufs, unsigned count)
+{
+   unsigned idx = 0;
+   int rc;
+
+   rc = rte_mempool_get_bulk(pool, (void **)mbufs, count);
+   if (unlikely(rc))
+   return rc;
+
+   switch (count % 4) {
+   case 0: while (idx != count) {
+   RTE_MBUF_ASSERT(rte_mbuf_refcnt_read(mbufs[idx]) == 0);
+   rte_mbuf_refcnt_set(mbufs[idx], 1);
+   rte_pktmbuf_reset(mbufs[idx]);
+   idx++;
+   case 3:
+   RTE_MBUF_ASSERT(rte_mbuf_refcnt_read(mbufs[idx]) == 0);
+   rte_mbuf_refcnt_set(mbufs[idx], 1);
+   rte_pktmbuf_reset(mbufs[idx]);
+   idx++;
+   case 2:
+   RTE_MBUF_ASSERT(rte_mbuf_refcnt_read(mbufs[idx]) == 0);
+   rte_mbuf_refcnt_set(mbufs[idx], 1);
+   rte_pktmbuf_reset(mbufs[idx]);
+   idx++;
+   case 1:
+   RTE_MBUF_ASSERT(rte_mbuf_refcnt_read(mbufs[idx]) == 0);
+   rte_mbuf_refcnt_set(mbufs[idx], 1);
+   rte_pktmbuf_reset(mbufs[idx]);
+   idx++;
+   }
+   }
+   return 0;
+}
+
+/**
  * Attach packet mbuf to another packet mbuf.
  *
  * After attachment we refer the mbuf we attached as 'indirect',
-- 
1.8.1.4



[dpdk-dev] [PATCH v2 2/2] vhost: call rte_pktmbuf_alloc_bulk in vhost dequeue

2015-12-14 Thread Huawei Xie
pre-allocate a bulk of mbufs instead of allocating one mbuf a time on demand

Signed-off-by: Gerald Rogers 
Signed-off-by: Huawei Xie 
Acked-by: Konstantin Ananyev 
---
 lib/librte_vhost/vhost_rxtx.c | 35 ++-
 1 file changed, 22 insertions(+), 13 deletions(-)

diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c
index bbf3fac..0faae58 100644
--- a/lib/librte_vhost/vhost_rxtx.c
+++ b/lib/librte_vhost/vhost_rxtx.c
@@ -576,6 +576,8 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t 
queue_id,
uint32_t i;
uint16_t free_entries, entry_success = 0;
uint16_t avail_idx;
+   uint8_t alloc_err = 0;
+   uint8_t seg_num;

if (unlikely(!is_valid_virt_queue_idx(queue_id, 1, dev->virt_qp_nb))) {
RTE_LOG(ERR, VHOST_DATA,
@@ -609,6 +611,14 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t 
queue_id,

LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Buffers available %d\n",
dev->device_fh, free_entries);
+
+   if (unlikely(rte_pktmbuf_alloc_bulk(mbuf_pool,
+   pkts, free_entries)) < 0) {
+   RTE_LOG(ERR, VHOST_DATA,
+   "Failed to bulk allocating %d mbufs\n", free_entries);
+   return 0;
+   }
+
/* Retrieve all of the head indexes first to avoid caching issues. */
for (i = 0; i < free_entries; i++)
head[i] = vq->avail->ring[(vq->last_used_idx + i) & (vq->size - 
1)];
@@ -621,9 +631,9 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t 
queue_id,
uint32_t vb_avail, vb_offset;
uint32_t seg_avail, seg_offset;
uint32_t cpy_len;
-   uint32_t seg_num = 0;
+   seg_num = 0;
struct rte_mbuf *cur;
-   uint8_t alloc_err = 0;
+

desc = >desc[head[entry_success]];

@@ -654,13 +664,7 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t 
queue_id,
vq->used->ring[used_idx].id = head[entry_success];
vq->used->ring[used_idx].len = 0;

-   /* Allocate an mbuf and populate the structure. */
-   m = rte_pktmbuf_alloc(mbuf_pool);
-   if (unlikely(m == NULL)) {
-   RTE_LOG(ERR, VHOST_DATA,
-   "Failed to allocate memory for mbuf.\n");
-   break;
-   }
+   prev = cur = m = pkts[entry_success];
seg_offset = 0;
seg_avail = m->buf_len - RTE_PKTMBUF_HEADROOM;
cpy_len = RTE_MIN(vb_avail, seg_avail);
@@ -668,8 +672,6 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t 
queue_id,
PRINT_PACKET(dev, (uintptr_t)vb_addr, desc->len, 0);

seg_num++;
-   cur = m;
-   prev = m;
while (cpy_len != 0) {
rte_memcpy(rte_pktmbuf_mtod_offset(cur, void *, 
seg_offset),
(void *)((uintptr_t)(vb_addr + vb_offset)),
@@ -761,16 +763,23 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t 
queue_id,
cpy_len = RTE_MIN(vb_avail, seg_avail);
}

-   if (unlikely(alloc_err == 1))
+   if (unlikely(alloc_err))
break;

m->nb_segs = seg_num;

-   pkts[entry_success] = m;
vq->last_used_idx++;
entry_success++;
}

+   if (unlikely(alloc_err)) {
+   uint16_t i = entry_success;
+
+   m->nb_segs = seg_num;
+   for (; i < free_entries; i++)
+   rte_pktmbuf_free(pkts[entry_success]);
+   }
+
rte_compiler_barrier();
vq->used->idx += entry_success;
/* Kick guest if required. */
-- 
1.8.1.4



[dpdk-dev] [PATCH v2 1/2] mbuf: provide rte_pktmbuf_alloc_bulk API

2015-12-14 Thread Huawei Xie
v2 changes:
 unroll the loop a bit to help the performance

rte_pktmbuf_alloc_bulk allocates a bulk of packet mbufs.

There is related thread about this bulk API.
http://dpdk.org/dev/patchwork/patch/4718/
Thanks to Konstantin's loop unrolling.

Signed-off-by: Gerald Rogers 
Signed-off-by: Huawei Xie 
Acked-by: Konstantin Ananyev 
---
 lib/librte_mbuf/rte_mbuf.h | 50 ++
 1 file changed, 50 insertions(+)

diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
index f234ac9..4e209e0 100644
--- a/lib/librte_mbuf/rte_mbuf.h
+++ b/lib/librte_mbuf/rte_mbuf.h
@@ -1336,6 +1336,56 @@ static inline struct rte_mbuf *rte_pktmbuf_alloc(struct 
rte_mempool *mp)
 }

 /**
+ * Allocate a bulk of mbufs, initialize refcnt and reset the fields to default
+ * values.
+ *
+ *  @param pool
+ *The mempool from which mbufs are allocated.
+ *  @param mbufs
+ *Array of pointers to mbufs
+ *  @param count
+ *Array size
+ *  @return
+ *   - 0: Success
+ */
+static inline int rte_pktmbuf_alloc_bulk(struct rte_mempool *pool,
+struct rte_mbuf **mbufs, unsigned count)
+{
+   unsigned idx = 0;
+   int rc;
+
+   rc = rte_mempool_get_bulk(pool, (void **)mbufs, count);
+   if (unlikely(rc))
+   return rc;
+
+   switch (count % 4) {
+   while (idx != count) {
+   case 0:
+   RTE_MBUF_ASSERT(rte_mbuf_refcnt_read(mbufs[idx]) == 0);
+   rte_mbuf_refcnt_set(mbufs[idx], 1);
+   rte_pktmbuf_reset(mbufs[idx]);
+   idx++;
+   case 3:
+   RTE_MBUF_ASSERT(rte_mbuf_refcnt_read(mbufs[idx]) == 0);
+   rte_mbuf_refcnt_set(mbufs[idx], 1);
+   rte_pktmbuf_reset(mbufs[idx]);
+   idx++;
+   case 2:
+   RTE_MBUF_ASSERT(rte_mbuf_refcnt_read(mbufs[idx]) == 0);
+   rte_mbuf_refcnt_set(mbufs[idx], 1);
+   rte_pktmbuf_reset(mbufs[idx]);
+   idx++;
+   case 1:
+   RTE_MBUF_ASSERT(rte_mbuf_refcnt_read(mbufs[idx]) == 0);
+   rte_mbuf_refcnt_set(mbufs[idx], 1);
+   rte_pktmbuf_reset(mbufs[idx]);
+   idx++;
+   }
+   }
+   return 0;
+}
+
+/**
  * Attach packet mbuf to another packet mbuf.
  *
  * After attachment we refer the mbuf we attached as 'indirect',
-- 
1.8.1.4



[dpdk-dev] [PATCH v2 0/2] provide rte_pktmbuf_alloc_bulk API and call it in vhost dequeue

2015-12-14 Thread Huawei Xie
v2 changes:
 unroll the loop in rte_pktmbuf_alloc_bulk to help the performance

For symmetric rte_pktmbuf_free_bulk, if the app knows in its scenarios
their mbufs are all simple mbufs, i.e meet the following requirements:
 * no multiple segments
 * not indirect mbuf
 * refcnt is 1
 * belong to the same mbuf memory pool,
it could directly call rte_mempool_put to free the bulk of mbufs,
otherwise rte_pktmbuf_free_bulk has to call rte_pktmbuf_free to free
the mbuf one by one.
This patchset will not provide this symmetric implementation.

Huawei Xie (2):
  mbuf: provide rte_pktmbuf_alloc_bulk API
  vhost: call rte_pktmbuf_alloc_bulk in vhost dequeue

 lib/librte_mbuf/rte_mbuf.h| 50 +++
 lib/librte_vhost/vhost_rxtx.c | 35 +++---
 2 files changed, 72 insertions(+), 13 deletions(-)

-- 
1.8.1.4



[dpdk-dev] [PATCH 2/2] vhost: call rte_pktmbuf_alloc_bulk in vhost dequeue

2015-12-14 Thread Huawei Xie
pre-allocate a bulk of mbufs instead of allocating one mbuf a time on demand

Signed-off-by: Gerald Rogers 
Signed-off-by: Huawei Xie 
Acked-by: Konstantin Ananyev 
---
 lib/librte_vhost/vhost_rxtx.c | 35 ++-
 1 file changed, 22 insertions(+), 13 deletions(-)

diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c
index bbf3fac..0faae58 100644
--- a/lib/librte_vhost/vhost_rxtx.c
+++ b/lib/librte_vhost/vhost_rxtx.c
@@ -576,6 +576,8 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t 
queue_id,
uint32_t i;
uint16_t free_entries, entry_success = 0;
uint16_t avail_idx;
+   uint8_t alloc_err = 0;
+   uint8_t seg_num;

if (unlikely(!is_valid_virt_queue_idx(queue_id, 1, dev->virt_qp_nb))) {
RTE_LOG(ERR, VHOST_DATA,
@@ -609,6 +611,14 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t 
queue_id,

LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Buffers available %d\n",
dev->device_fh, free_entries);
+
+   if (unlikely(rte_pktmbuf_alloc_bulk(mbuf_pool,
+   pkts, free_entries)) < 0) {
+   RTE_LOG(ERR, VHOST_DATA,
+   "Failed to bulk allocating %d mbufs\n", free_entries);
+   return 0;
+   }
+
/* Retrieve all of the head indexes first to avoid caching issues. */
for (i = 0; i < free_entries; i++)
head[i] = vq->avail->ring[(vq->last_used_idx + i) & (vq->size - 
1)];
@@ -621,9 +631,9 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t 
queue_id,
uint32_t vb_avail, vb_offset;
uint32_t seg_avail, seg_offset;
uint32_t cpy_len;
-   uint32_t seg_num = 0;
+   seg_num = 0;
struct rte_mbuf *cur;
-   uint8_t alloc_err = 0;
+

desc = >desc[head[entry_success]];

@@ -654,13 +664,7 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t 
queue_id,
vq->used->ring[used_idx].id = head[entry_success];
vq->used->ring[used_idx].len = 0;

-   /* Allocate an mbuf and populate the structure. */
-   m = rte_pktmbuf_alloc(mbuf_pool);
-   if (unlikely(m == NULL)) {
-   RTE_LOG(ERR, VHOST_DATA,
-   "Failed to allocate memory for mbuf.\n");
-   break;
-   }
+   prev = cur = m = pkts[entry_success];
seg_offset = 0;
seg_avail = m->buf_len - RTE_PKTMBUF_HEADROOM;
cpy_len = RTE_MIN(vb_avail, seg_avail);
@@ -668,8 +672,6 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t 
queue_id,
PRINT_PACKET(dev, (uintptr_t)vb_addr, desc->len, 0);

seg_num++;
-   cur = m;
-   prev = m;
while (cpy_len != 0) {
rte_memcpy(rte_pktmbuf_mtod_offset(cur, void *, 
seg_offset),
(void *)((uintptr_t)(vb_addr + vb_offset)),
@@ -761,16 +763,23 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t 
queue_id,
cpy_len = RTE_MIN(vb_avail, seg_avail);
}

-   if (unlikely(alloc_err == 1))
+   if (unlikely(alloc_err))
break;

m->nb_segs = seg_num;

-   pkts[entry_success] = m;
vq->last_used_idx++;
entry_success++;
}

+   if (unlikely(alloc_err)) {
+   uint16_t i = entry_success;
+
+   m->nb_segs = seg_num;
+   for (; i < free_entries; i++)
+   rte_pktmbuf_free(pkts[entry_success]);
+   }
+
rte_compiler_barrier();
vq->used->idx += entry_success;
/* Kick guest if required. */
-- 
1.8.1.4



[dpdk-dev] [PATCH 0/2] provide rte_pktmbuf_alloc_bulk API and call it in vhost dequeue

2015-12-14 Thread Huawei Xie
For symmetric rte_pktmbuf_free_bulk, if the app knows in its scenarios
their mbufs are all simple mbufs, i.e meet the following requirements:
 * no multiple segments
 * not indirect mbuf
 * refcnt is 1
 * belong to the same mbuf memory pool,
it could directly call rte_mempool_put to free the bulk of mbufs,
otherwise rte_pktmbuf_free_bulk has to call rte_pktmbuf_free to free
the mbuf one by one.
This patchset will not provide this symmetric implementation.

Huawei Xie (2):
  mbuf: provide rte_pktmbuf_alloc_bulk API
  vhost: call rte_pktmbuf_alloc_bulk in vhost dequeue

 lib/librte_mbuf/rte_mbuf.h| 31 +++
 lib/librte_vhost/vhost_rxtx.c | 35 ++-
 2 files changed, 53 insertions(+), 13 deletions(-)

-- 
1.8.1.4



[dpdk-dev] [PATCH v6 7/8] virtio: pick simple rx/tx func

2015-10-29 Thread Huawei Xie
Changes in v4:
Check merge-able feature when select simple rx/tx functions.

simple rx/tx func is chose when merge-able rx is disabled and user specifies 
single segment and
no offload support.

Signed-off-by: Huawei Xie 
---
 drivers/net/virtio/virtio_rxtx.c | 15 +++
 1 file changed, 15 insertions(+)

diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
index 947fc46..0f1daf2 100644
--- a/drivers/net/virtio/virtio_rxtx.c
+++ b/drivers/net/virtio/virtio_rxtx.c
@@ -53,6 +53,7 @@

 #include "virtio_logs.h"
 #include "virtio_ethdev.h"
+#include "virtio_pci.h"
 #include "virtqueue.h"
 #include "virtio_rxtx.h"

@@ -62,6 +63,10 @@
 #define  VIRTIO_DUMP_PACKET(m, len) do { } while (0)
 #endif

+
+#define VIRTIO_SIMPLE_FLAGS ((uint32_t)ETH_TXQ_FLAGS_NOMULTSEGS | \
+   ETH_TXQ_FLAGS_NOOFFLOADS)
+
 static int use_simple_rxtx;

 static void
@@ -459,6 +464,7 @@ virtio_dev_tx_queue_setup(struct rte_eth_dev *dev,
const struct rte_eth_txconf *tx_conf)
 {
uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX;
+   struct virtio_hw *hw = dev->data->dev_private;
struct virtqueue *vq;
uint16_t tx_free_thresh;
int ret;
@@ -471,6 +477,15 @@ virtio_dev_tx_queue_setup(struct rte_eth_dev *dev,
return -EINVAL;
}

+   /* Use simple rx/tx func if single segment and no offloads */
+   if ((tx_conf->txq_flags & VIRTIO_SIMPLE_FLAGS) == VIRTIO_SIMPLE_FLAGS &&
+!vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
+   PMD_INIT_LOG(INFO, "Using simple rx/tx path");
+   dev->tx_pkt_burst = virtio_xmit_pkts_simple;
+   dev->rx_pkt_burst = virtio_recv_pkts_vec;
+   use_simple_rxtx = 1;
+   }
+
ret = virtio_dev_queue_setup(dev, VTNET_TQ, queue_idx, vtpci_queue_idx,
nb_desc, socket_id, );
if (ret < 0) {
-- 
1.8.1.4



[dpdk-dev] [PATCH v6 6/8] virtio: simple tx routine

2015-10-29 Thread Huawei Xie
Changes in v5:
- call __rte_pktmbuf_prefree_seg to check refcnt when free mbufs

Changes in v4:
- move virtio_xmit_cleanup ahead to free descriptors earlier

Changes in v3:
- Remove return at the end of void function
- Remove always_inline attribute for virtio_xmit_cleanup

bulk free of mbufs when clean used ring.
shift operation of idx could be saved if vq_free_cnt means
free slots rather than free descriptors.

TODO: rearrange vq data structure, pack the stats var together so that we
could use one vec instruction to update all of them.

Signed-off-by: Huawei Xie 
---
 drivers/net/virtio/virtio_ethdev.h  |   3 +
 drivers/net/virtio/virtio_rxtx_simple.c | 106 
 2 files changed, 109 insertions(+)

diff --git a/drivers/net/virtio/virtio_ethdev.h 
b/drivers/net/virtio/virtio_ethdev.h
index d7797ab..ae2d47d 100644
--- a/drivers/net/virtio/virtio_ethdev.h
+++ b/drivers/net/virtio/virtio_ethdev.h
@@ -111,6 +111,9 @@ uint16_t virtio_xmit_pkts(void *tx_queue, struct rte_mbuf 
**tx_pkts,
 uint16_t virtio_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
uint16_t nb_pkts);

+uint16_t virtio_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
+   uint16_t nb_pkts);
+
 /*
  * The VIRTIO_NET_F_GUEST_TSO[46] features permit the host to send us
  * frames larger than 1514 bytes. We do not yet support software LRO
diff --git a/drivers/net/virtio/virtio_rxtx_simple.c 
b/drivers/net/virtio/virtio_rxtx_simple.c
index ef17562..624e789 100644
--- a/drivers/net/virtio/virtio_rxtx_simple.c
+++ b/drivers/net/virtio/virtio_rxtx_simple.c
@@ -288,6 +288,112 @@ virtio_recv_pkts_vec(void *rx_queue, struct rte_mbuf 
**rx_pkts,
return nb_pkts_received;
 }

+#define VIRTIO_TX_FREE_THRESH 32
+#define VIRTIO_TX_MAX_FREE_BUF_SZ 32
+#define VIRTIO_TX_FREE_NR 32
+/* TODO: vq->tx_free_cnt could mean num of free slots so we could avoid shift 
*/
+static inline void
+virtio_xmit_cleanup(struct virtqueue *vq)
+{
+   uint16_t i, desc_idx;
+   int nb_free = 0;
+   struct rte_mbuf *m, *free[VIRTIO_TX_MAX_FREE_BUF_SZ];
+
+   desc_idx = (uint16_t)(vq->vq_used_cons_idx &
+  ((vq->vq_nentries >> 1) - 1));
+   m = (struct rte_mbuf *)vq->vq_descx[desc_idx++].cookie;
+   m = __rte_pktmbuf_prefree_seg(m);
+   if (likely(m != NULL)) {
+   free[0] = m;
+   nb_free = 1;
+   for (i = 1; i < VIRTIO_TX_FREE_NR; i++) {
+   m = (struct rte_mbuf *)vq->vq_descx[desc_idx++].cookie;
+   m = __rte_pktmbuf_prefree_seg(m);
+   if (likely(m != NULL)) {
+   if (likely(m->pool == free[0]->pool))
+   free[nb_free++] = m;
+   else {
+   rte_mempool_put_bulk(free[0]->pool,
+   (void **)free, nb_free);
+   free[0] = m;
+   nb_free = 1;
+   }
+   }
+   }
+   rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free);
+   } else {
+   for (i = 1; i < VIRTIO_TX_FREE_NR; i++) {
+   m = (struct rte_mbuf *)vq->vq_descx[desc_idx++].cookie;
+   m = __rte_pktmbuf_prefree_seg(m);
+   if (m != NULL)
+   rte_mempool_put(m->pool, m);
+   }
+   }
+
+   vq->vq_used_cons_idx += VIRTIO_TX_FREE_NR;
+   vq->vq_free_cnt += (VIRTIO_TX_FREE_NR << 1);
+}
+
+uint16_t
+virtio_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
+   uint16_t nb_pkts)
+{
+   struct virtqueue *txvq = tx_queue;
+   uint16_t nb_used;
+   uint16_t desc_idx;
+   struct vring_desc *start_dp;
+   uint16_t nb_tail, nb_commit;
+   int i;
+   uint16_t desc_idx_max = (txvq->vq_nentries >> 1) - 1;
+
+   nb_used = VIRTQUEUE_NUSED(txvq);
+   rte_compiler_barrier();
+
+   if (nb_used >= VIRTIO_TX_FREE_THRESH)
+   virtio_xmit_cleanup(tx_queue);
+
+   nb_commit = nb_pkts = RTE_MIN((txvq->vq_free_cnt >> 1), nb_pkts);
+   desc_idx = (uint16_t) (txvq->vq_avail_idx & desc_idx_max);
+   start_dp = txvq->vq_ring.desc;
+   nb_tail = (uint16_t) (desc_idx_max + 1 - desc_idx);
+
+   if (nb_commit >= nb_tail) {
+   for (i = 0; i < nb_tail; i++)
+   txvq->vq_descx[desc_idx + i].cookie = tx_pkts[i];
+   for (i = 0; i < nb_tail; i++) {
+   start_dp[desc_idx].addr =
+   RTE_MBUF_DATA_DMA_ADDR(*tx_pkts);
+   start_dp[desc_idx].len = (*tx_pkts)->pkt_len;
+   tx_pkts++;
+

[dpdk-dev] [PATCH v6 5/8] virtio: virtio vec rx

2015-10-29 Thread Huawei Xie
With fixed avail ring, we don't need to get desc idx from avail ring.
virtio driver only has to deal with desc ring.
This patch uses vector instruction to accelerate processing desc ring.

Signed-off-by: Huawei Xie 
---
 drivers/net/virtio/virtio_ethdev.h  |   2 +
 drivers/net/virtio/virtio_rxtx.c|   3 +
 drivers/net/virtio/virtio_rxtx.h|   2 +
 drivers/net/virtio/virtio_rxtx_simple.c | 224 
 drivers/net/virtio/virtqueue.h  |   1 +
 5 files changed, 232 insertions(+)

diff --git a/drivers/net/virtio/virtio_ethdev.h 
b/drivers/net/virtio/virtio_ethdev.h
index 9026d42..d7797ab 100644
--- a/drivers/net/virtio/virtio_ethdev.h
+++ b/drivers/net/virtio/virtio_ethdev.h
@@ -108,6 +108,8 @@ uint16_t virtio_recv_mergeable_pkts(void *rx_queue, struct 
rte_mbuf **rx_pkts,
 uint16_t virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts);

+uint16_t virtio_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
+   uint16_t nb_pkts);

 /*
  * The VIRTIO_NET_F_GUEST_TSO[46] features permit the host to send us
diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
index 5162ce6..947fc46 100644
--- a/drivers/net/virtio/virtio_rxtx.c
+++ b/drivers/net/virtio/virtio_rxtx.c
@@ -432,6 +432,9 @@ virtio_dev_rx_queue_setup(struct rte_eth_dev *dev,
vq->mpool = mp;

dev->data->rx_queues[queue_idx] = vq;
+
+   virtio_rxq_vec_setup(vq);
+
return 0;
 }

diff --git a/drivers/net/virtio/virtio_rxtx.h b/drivers/net/virtio/virtio_rxtx.h
index 7d2d8fe..831e492 100644
--- a/drivers/net/virtio/virtio_rxtx.h
+++ b/drivers/net/virtio/virtio_rxtx.h
@@ -33,5 +33,7 @@

 #define RTE_PMD_VIRTIO_RX_MAX_BURST 64

+int virtio_rxq_vec_setup(struct virtqueue *rxq);
+
 int virtqueue_enqueue_recv_refill_simple(struct virtqueue *vq,
struct rte_mbuf *m);
diff --git a/drivers/net/virtio/virtio_rxtx_simple.c 
b/drivers/net/virtio/virtio_rxtx_simple.c
index cac5b9f..ef17562 100644
--- a/drivers/net/virtio/virtio_rxtx_simple.c
+++ b/drivers/net/virtio/virtio_rxtx_simple.c
@@ -58,6 +58,10 @@
 #include "virtqueue.h"
 #include "virtio_rxtx.h"

+#define RTE_VIRTIO_VPMD_RX_BURST 32
+#define RTE_VIRTIO_DESC_PER_LOOP 8
+#define RTE_VIRTIO_VPMD_RX_REARM_THRESH RTE_VIRTIO_VPMD_RX_BURST
+
 int __attribute__((cold))
 virtqueue_enqueue_recv_refill_simple(struct virtqueue *vq,
struct rte_mbuf *cookie)
@@ -82,3 +86,223 @@ virtqueue_enqueue_recv_refill_simple(struct virtqueue *vq,

return 0;
 }
+
+static inline void
+virtio_rxq_rearm_vec(struct virtqueue *rxvq)
+{
+   int i;
+   uint16_t desc_idx;
+   struct rte_mbuf **sw_ring;
+   struct vring_desc *start_dp;
+   int ret;
+
+   desc_idx = rxvq->vq_avail_idx & (rxvq->vq_nentries - 1);
+   sw_ring = >sw_ring[desc_idx];
+   start_dp = >vq_ring.desc[desc_idx];
+
+   ret = rte_mempool_get_bulk(rxvq->mpool, (void **)sw_ring,
+   RTE_VIRTIO_VPMD_RX_REARM_THRESH);
+   if (unlikely(ret)) {
+   rte_eth_devices[rxvq->port_id].data->rx_mbuf_alloc_failed +=
+   RTE_VIRTIO_VPMD_RX_REARM_THRESH;
+   return;
+   }
+
+   for (i = 0; i < RTE_VIRTIO_VPMD_RX_REARM_THRESH; i++) {
+   uintptr_t p;
+
+   p = (uintptr_t)_ring[i]->rearm_data;
+   *(uint64_t *)p = rxvq->mbuf_initializer;
+
+   start_dp[i].addr =
+   (uint64_t)((uintptr_t)sw_ring[i]->buf_physaddr +
+   RTE_PKTMBUF_HEADROOM - sizeof(struct virtio_net_hdr));
+   start_dp[i].len = sw_ring[i]->buf_len -
+   RTE_PKTMBUF_HEADROOM + sizeof(struct virtio_net_hdr);
+   }
+
+   rxvq->vq_avail_idx += RTE_VIRTIO_VPMD_RX_REARM_THRESH;
+   rxvq->vq_free_cnt -= RTE_VIRTIO_VPMD_RX_REARM_THRESH;
+   vq_update_avail_idx(rxvq);
+}
+
+/* virtio vPMD receive routine, only accept(nb_pkts >= 
RTE_VIRTIO_DESC_PER_LOOP)
+ *
+ * This routine is for non-mergable RX, one desc for each guest buffer.
+ * This routine is based on the RX ring layout optimization. Each entry in the
+ * avail ring points to the desc with the same index in the desc ring and this
+ * will never be changed in the driver.
+ *
+ * - nb_pkts < RTE_VIRTIO_DESC_PER_LOOP, just return no packet
+ */
+uint16_t
+virtio_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
+   uint16_t nb_pkts)
+{
+   struct virtqueue *rxvq = rx_queue;
+   uint16_t nb_used;
+   uint16_t desc_idx;
+   struct vring_used_elem *rused;
+   struct rte_mbuf **sw_ring;
+   struct rte_mbuf **sw_ring_end;
+   uint16_t nb_pkts_received;
+   __m128i shuf_msk1, shuf_msk2, len_adjust;
+
+   shuf_msk1 = _mm_set_epi8(
+   0xFF, 0xFF, 0xFF, 0xFF,
+   0xFF, 0xFF, /* vlan tci */
+   5, 4,  

[dpdk-dev] [PATCH v6 4/8] virtio: fill RX avail ring with blank mbufs

2015-10-29 Thread Huawei Xie
fill avail ring with blank mbufs in virtio_dev_vring_start

Signed-off-by: Huawei Xie 
---
 drivers/net/virtio/Makefile |  2 +-
 drivers/net/virtio/virtio_rxtx.c|  6 ++-
 drivers/net/virtio/virtio_rxtx.h|  3 ++
 drivers/net/virtio/virtio_rxtx_simple.c | 84 +
 4 files changed, 92 insertions(+), 3 deletions(-)
 create mode 100644 drivers/net/virtio/virtio_rxtx_simple.c

diff --git a/drivers/net/virtio/Makefile b/drivers/net/virtio/Makefile
index 930b60f..43835ba 100644
--- a/drivers/net/virtio/Makefile
+++ b/drivers/net/virtio/Makefile
@@ -50,7 +50,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtqueue.c
 SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_pci.c
 SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_rxtx.c
 SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_ethdev.c
-
+SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_rxtx_simple.c

 # this lib depends upon:
 DEPDIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += lib/librte_eal lib/librte_ether
diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
index 7c82a6a..5162ce6 100644
--- a/drivers/net/virtio/virtio_rxtx.c
+++ b/drivers/net/virtio/virtio_rxtx.c
@@ -320,8 +320,10 @@ virtio_dev_vring_start(struct virtqueue *vq, int 
queue_type)
/**
* Enqueue allocated buffers*
***/
-   error = virtqueue_enqueue_recv_refill(vq, m);
-
+   if (use_simple_rxtx)
+   error = 
virtqueue_enqueue_recv_refill_simple(vq, m);
+   else
+   error = virtqueue_enqueue_recv_refill(vq, m);
if (error) {
rte_pktmbuf_free(m);
break;
diff --git a/drivers/net/virtio/virtio_rxtx.h b/drivers/net/virtio/virtio_rxtx.h
index a10aa69..7d2d8fe 100644
--- a/drivers/net/virtio/virtio_rxtx.h
+++ b/drivers/net/virtio/virtio_rxtx.h
@@ -32,3 +32,6 @@
  */

 #define RTE_PMD_VIRTIO_RX_MAX_BURST 64
+
+int virtqueue_enqueue_recv_refill_simple(struct virtqueue *vq,
+   struct rte_mbuf *m);
diff --git a/drivers/net/virtio/virtio_rxtx_simple.c 
b/drivers/net/virtio/virtio_rxtx_simple.c
new file mode 100644
index 000..cac5b9f
--- /dev/null
+++ b/drivers/net/virtio/virtio_rxtx_simple.c
@@ -0,0 +1,84 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the
+ *   distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ *   contributors may be used to endorse or promote products derived
+ *   from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "virtio_logs.h"
+#include "virtio_ethdev.h"
+#include "virtqueue.h"
+#include "virtio_rxtx.h"
+
+int __attribute__((cold))
+virtqueue_enqueue_recv_refill_simple(struct virtqueue *vq,
+   struct rte_mbuf *cookie)
+{
+   struct vq_desc_extra *dxp;
+   struct vring_desc *start_dp;
+   uint16_t desc_idx;
+
+   desc_idx = vq->vq_avail_idx & (vq->vq_nentries - 1);
+   dxp = >vq_descx[desc_idx];
+   dxp->cookie = (void *)cookie;
+   vq->sw_ring[desc_idx] = coo

[dpdk-dev] [PATCH v6 3/8] virtio: rx/tx ring layout optimization

2015-10-29 Thread Huawei Xie
Changes in V4:
- fix the error in tx ring layout chart in this commit message.

In DPDK based switching envrioment, mostly vhost runs on a dedicated core
while virtio processing in guest VMs runs on different cores.
Take RX for example, with generic implementation, for each guest buffer,
a) virtio driver allocates a descriptor from free descriptor list
b) modify the entry of avail ring to point to allocated descriptor
c) after packet is received, free the descriptor

When vhost fetches the avail ring, it need to fetch the modified L1 cache from
virtio core, which is a heavy cost in current CPU implementation.

This idea of this optimization is:
allocate the fixed descriptor for each entry of avail ring, so avail ring 
will
always be the same during the run.
This removes L1M cache transfer from virtio core to vhost core for avail ring.
(Note we couldn't avoid the cache transfer for descriptors).
Besides, descriptor allocation and free operation is eliminated.
This also makes vector procesing possible to further accelerate the processing.

This is the layout for the avail ring(take 256 ring entries for example), with
each entry pointing to the descriptor with the same index.
avail
idx
+
|
+++---+-+--+
| 0  | 1  | 2 | ... |  254  | 255  |  avail ring
+-+--+-+--+-+-+-+---+--+---+
  |||   |   |  |
  |||   |   |  |
  vvv   |   v  v
+-+--+-+--+-+-+-+---+--+---+
| 0  | 1  | 2 | ... |  254  | 255  |  desc ring
+++---+-+--+
|
|
+++---+-+--+
| 0  | 1  | 2 | |  254  | 255  |  used ring
+++---+-+--+
|
+

This is the ring layout for TX.
As we need one virtio header for each xmit packet, we have 128 slots available.

 ++
 ||
 ||
+-+-+-+--+--+--+--+
|  0  |  1  | ... |  127 || 128  | 129  | ...  | 255  |   avail ring
+--+--+--+--+-+---+--+---+--+---+--+--+---+
   | ||  ||  |  | |
   v vv  ||  v  v v
+--+--+--+--+-+---+--+---+--+---+--+--+---+
| 128 | 129 | ... |  255 || 128  | 129  | ...  | 255  |   desc ring for 
virtio_net_hdr
+--+--+--+--+-+---+--+---+--+---+--+--+---+
   | ||  ||  |  | |
   v vv  ||  v  v v
+--+--+--+--+-+---+--+---+--+---+--+--+---+
|  0  |  1  | ... |  127 ||  0   |  1   | ...  | 127  |   desc ring for tx dat
+-+-+-+--+--+--+--+
 ||
 ||
 ++

Signed-off-by: Huawei Xie 
---
 drivers/net/virtio/virtio_rxtx.c | 24 
 1 file changed, 24 insertions(+)

diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
index 5c00e9d..7c82a6a 100644
--- a/drivers/net/virtio/virtio_rxtx.c
+++ b/drivers/net/virtio/virtio_rxtx.c
@@ -302,6 +302,12 @@ virtio_dev_vring_start(struct virtqueue *vq, int 
queue_type)
nbufs = 0;
error = ENOSPC;

+   if (use_simple_rxtx)
+   for (i = 0; i < vq->vq_nentries; i++) {
+   vq->vq_ring.avail->ring[i] = i;
+   vq->vq_ring.desc[i].flags = VRING_DESC_F_WRITE;
+   }
+
memset(>fake_mbuf, 0, sizeof(vq->fake_mbuf));
for (i = 0; i < RTE_PMD_VIRTIO_RX_MAX_BURST; i++)
vq->sw_ring[vq->vq_nentries + i] = >fake_mbuf;
@@ -332,6 +338,24 @@ virtio_dev_vring_start(struct virtqueue *vq, int 
queue_type)
VIRTIO_WRITE_REG_4(vq->hw, VIRTIO_PCI_QUEUE_PFN,
vq->mz->phys_addr >> VIRTIO_PCI_QUEUE_ADDR_SHIFT);
} else if (queue_type == VTNET_TQ) {
+   if (use_simple_rxtx) {
+   int mid_idx  = vq->vq_nentries >> 1;
+   for (i = 0; i < mid_idx; i++) {
+   vq->vq_ring.avail->ring[i] = i + mid_idx;
+   vq->vq_ring.desc[i + mid_idx].next = i;
+   vq->vq_ring.desc[i + mid_idx].addr =
+   vq->virtio_net_hdr_mem +
+   mid_idx * 
vq->hw->vtnet_hdr_size;
+   vq->vq_ring.desc[i + mid_idx].len =
+   vq->hw->vtnet_hdr_size;
+   vq->vq_ring.desc[i + mid_idx].flags =
+  

[dpdk-dev] [PATCH v6 2/8] virtio: add software rx ring, fake_buf into virtqueue

2015-10-29 Thread Huawei Xie
Changes in v3:
- Remove unnecessary NULL test for rte_free
- Remove unnecessary assign of local var vq after free

Add software RX ring in virtqueue.
Add fake_mbuf in virtqueue for wraparound processing.
Use global simple_rxtx to indicate whether simple rxtx is enabled

Signed-off-by: Huawei Xie 
---
 drivers/net/virtio/virtio_ethdev.c | 11 ++-
 drivers/net/virtio/virtio_rxtx.c   |  7 +++
 drivers/net/virtio/virtqueue.h |  4 
 3 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/drivers/net/virtio/virtio_ethdev.c 
b/drivers/net/virtio/virtio_ethdev.c
index 79a3640..82676d3 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -247,8 +247,8 @@ virtio_dev_queue_release(struct virtqueue *vq) {
VIRTIO_WRITE_REG_2(hw, VIRTIO_PCI_QUEUE_SEL, vq->queue_id);
VIRTIO_WRITE_REG_4(hw, VIRTIO_PCI_QUEUE_PFN, 0);

+   rte_free(vq->sw_ring);
rte_free(vq);
-   vq = NULL;
}
 }

@@ -292,6 +292,9 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev,
dev->data->port_id, queue_idx);
vq = rte_zmalloc(vq_name, sizeof(struct virtqueue) +
vq_size * sizeof(struct vq_desc_extra), 
RTE_CACHE_LINE_SIZE);
+   vq->sw_ring = rte_zmalloc_socket("rxq->sw_ring",
+   (RTE_PMD_VIRTIO_RX_MAX_BURST + vq_size) *
+   sizeof(vq->sw_ring[0]), RTE_CACHE_LINE_SIZE, socket_id);
} else if (queue_type == VTNET_TQ) {
snprintf(vq_name, sizeof(vq_name), "port%d_tvq%d",
dev->data->port_id, queue_idx);
@@ -308,6 +311,12 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev,
PMD_INIT_LOG(ERR, "%s: Can not allocate virtqueue", __func__);
return (-ENOMEM);
}
+   if (queue_type == VTNET_RQ && vq->sw_ring == NULL) {
+   PMD_INIT_LOG(ERR, "%s: Can not allocate RX soft ring",
+   __func__);
+   rte_free(vq);
+   return -ENOMEM;
+   }

vq->hw = hw;
vq->port_id = dev->data->port_id;
diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
index 9324f7f..5c00e9d 100644
--- a/drivers/net/virtio/virtio_rxtx.c
+++ b/drivers/net/virtio/virtio_rxtx.c
@@ -62,6 +62,8 @@
 #define  VIRTIO_DUMP_PACKET(m, len) do { } while (0)
 #endif

+static int use_simple_rxtx;
+
 static void
 vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx)
 {
@@ -299,6 +301,11 @@ virtio_dev_vring_start(struct virtqueue *vq, int 
queue_type)
/* Allocate blank mbufs for the each rx descriptor */
nbufs = 0;
error = ENOSPC;
+
+   memset(>fake_mbuf, 0, sizeof(vq->fake_mbuf));
+   for (i = 0; i < RTE_PMD_VIRTIO_RX_MAX_BURST; i++)
+   vq->sw_ring[vq->vq_nentries + i] = >fake_mbuf;
+
while (!virtqueue_full(vq)) {
m = rte_rxmbuf_alloc(vq->mpool);
if (m == NULL)
diff --git a/drivers/net/virtio/virtqueue.h b/drivers/net/virtio/virtqueue.h
index 7789411..6a1ec48 100644
--- a/drivers/net/virtio/virtqueue.h
+++ b/drivers/net/virtio/virtqueue.h
@@ -190,6 +190,10 @@ struct virtqueue {
uint16_t vq_avail_idx;
phys_addr_t virtio_net_hdr_mem; /**< hdr for each xmit packet */

+   struct rte_mbuf **sw_ring; /**< RX software ring. */
+   /* dummy mbuf, for wraparound when processing RX ring. */
+   struct rte_mbuf fake_mbuf;
+
/* Statistics */
uint64_tpackets;
uint64_tbytes;
-- 
1.8.1.4



[dpdk-dev] [PATCH v6 1/8] virtio: add virtio_rxtx.h header file

2015-10-29 Thread Huawei Xie
Would move all rx/tx related declarations into this header file in future.
Add RTE_VIRTIO_PMD_MAX_BURST.

Signed-off-by: Huawei Xie 
---
 drivers/net/virtio/virtio_ethdev.c |  1 +
 drivers/net/virtio/virtio_rxtx.c   |  1 +
 drivers/net/virtio/virtio_rxtx.h   | 34 ++
 3 files changed, 36 insertions(+)
 create mode 100644 drivers/net/virtio/virtio_rxtx.h

diff --git a/drivers/net/virtio/virtio_ethdev.c 
b/drivers/net/virtio/virtio_ethdev.c
index 465d3cd..79a3640 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -61,6 +61,7 @@
 #include "virtio_pci.h"
 #include "virtio_logs.h"
 #include "virtqueue.h"
+#include "virtio_rxtx.h"


 static int eth_virtio_dev_init(struct rte_eth_dev *eth_dev);
diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
index c5b53bb..9324f7f 100644
--- a/drivers/net/virtio/virtio_rxtx.c
+++ b/drivers/net/virtio/virtio_rxtx.c
@@ -54,6 +54,7 @@
 #include "virtio_logs.h"
 #include "virtio_ethdev.h"
 #include "virtqueue.h"
+#include "virtio_rxtx.h"

 #ifdef RTE_LIBRTE_VIRTIO_DEBUG_DUMP
 #define VIRTIO_DUMP_PACKET(m, len) rte_pktmbuf_dump(stdout, m, len)
diff --git a/drivers/net/virtio/virtio_rxtx.h b/drivers/net/virtio/virtio_rxtx.h
new file mode 100644
index 000..a10aa69
--- /dev/null
+++ b/drivers/net/virtio/virtio_rxtx.h
@@ -0,0 +1,34 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the
+ *   distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ *   contributors may be used to endorse or promote products derived
+ *   from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#define RTE_PMD_VIRTIO_RX_MAX_BURST 64
-- 
1.8.1.4



[dpdk-dev] [PATCH v6 0/8] virtio ring layout optimization and simple rx/tx processing

2015-10-29 Thread Huawei Xie
Changes in v6:
- Update release notes
- Fix the error in virtio tx ring layout ascii chart in the cover-letter

Changes in v5:
- Call __rte_pktmbuf_prefree_seg to check refcnt when free mbufs

Changes in v4:
- Fix the error in virtio tx ring layout ascii chart in the commit message
- Move virtio_xmit_cleanup ahead to free descriptors earlier
- Test merge-able feature when select simple rx/tx functions

Changes in v3:
- Remove unnecessary NULL test for rte_free
- Remove unnecessary assign of local var after free
- Remove return at the end of void function
- Remove always_inline attribute for virtio_xmit_cleanup
- Reword some commit messages
- Add TODO in the commit message of simple tx patch

Changes in v2:
- Remove the configure macro
- Enable simple R/TX processing when user specifies simple txq flags
- Reword some comments and commit messages

In DPDK based switching enviroment, mostly vhost runs on a dedicated core
while virtio processing in guest VMs runs on other different cores.
Take RX for example, with generic implementation, for each guest buffer,
a) virtio driver allocates a descriptor from free descriptor list
b) modify the entry of avail ring to point to allocated descriptor
c) after packet is received, free the descriptor

When vhost fetches the avail ring, it need to fetch the modified L1 cache from
virtio core, which is a heavy cost in current CPU implementation.

This idea of this optimization is:
allocate the fixed descriptor for each entry of avail ring, so avail ring 
will
always be the same during the run.
This removes L1M cache transfer from virtio core to vhost core for avail ring.
(Note we couldn't avoid the cache transfer for descriptors).
Besides, descriptor allocation and free operation is eliminated.
This also makes vector procesing possible to further accelerate the processing.

This is the layout for the avail ring(take 256 ring entries for example), with
each entry pointing to the descriptor with the same index.
avail
idx
+
|
+++---+-+--+
| 0  | 1  | 2 | ... |  254  | 255  |  avail ring
+-+--+-+--+-+-+-+---+--+---+
  |||   |   |  |
  |||   |   |  |
  vvv   |   v  v
+-+--+-+--+-+-+-+---+--+---+
| 0  | 1  | 2 | ... |  254  | 255  |  desc ring
+++---+-+--+
|
|
+++---+-+--+
| 0  | 1  | 2 | |  254  | 255  |  used ring
+++---+-+--+
|
+

This is the ring layout for TX.
As we need one virtio header for each xmit packet, we have 128 slots available.

 ++
 ||
 ||
+-+-+-+--+--+--+--+
|  0  |  1  | ... |  127 || 128  | 129  | ...  | 255  |   avail ring
+--+--+--+--+-+---+--+---+--+---+--+--+---+
   | ||  ||  |  | |
   v vv  ||  v  v v
+--+--+--+--+-+---+--+---+--+---+--+--+---+
| 128 | 129 | ... |  255 || 127  | 128  | ...  | 255  |   desc ring for 
virtio_net_hdr
+--+--+--+--+-+---+--+---+--+---+--+--+---+
   | ||  ||  |  | |
   v vv  ||  v  v v
+--+--+--+--+-+---+--+---+--+---+--+--+---+
|  0  |  1  | ... |  127 ||  0   |  1   | ...  | 127  |   desc ring for tx dat
+-+-+-+--+--+--+--+
 ||
 ||
 ++


Performance boost could be observed only if the virtio backend isn't the 
bottleneck or in VM2VM
case.
There are also several vhost optimization patches to be submitted later.


Huawei Xie (8):
  virtio: add virtio_rxtx.h header file
  virtio: add software rx ring, fake_buf into virtqueue
  virtio: rx/tx ring layout optimization
  virtio: fill RX avail ring with blank mbufs
  virtio: virtio vec rx
  virtio: simple tx routine
  virtio: pick simple rx/tx func
  doc: update release notes 2.2 about virtio performance optimization

 doc/guides/rel_notes/release_2_2.rst|   3 +
 drivers/net/virtio/Makefile |   2 +-
 drivers/net/virtio/virtio_ethdev.c  |  12 +-
 drivers/net/virtio/virtio_ethdev.h  |   5 +
 drivers/net/virtio/virtio_rxtx.c|  56 -
 drivers/net/virtio/virtio_rxtx.h|  39 +++
 drivers/net/virtio/virtio_rxtx_simple.c | 414 
 drivers/net/virtio/virtqueue.h  |   5 +
 8 files changed, 532 insertions(+), 4 deletions(-)
 create mode 100644 drivers/net/virtio/virtio_rxtx.h
 create mode 100644 drivers/net/virtio/virtio_rxtx_simple.c

-- 
1.8.1.4



[dpdk-dev] [PATCH v5 6/7] virtio: simple tx routine

2015-10-26 Thread Huawei Xie
Changes in v5:
- call __rte_pktmbuf_prefree_seg to check refcnt when free mbufs

Changes in v4:
- move virtio_xmit_cleanup ahead to free descriptors earlier

Changes in v3:
- Remove return at the end of void function
- Remove always_inline attribute for virtio_xmit_cleanup

bulk free of mbufs when clean used ring.
shift operation of idx could be saved if vq_free_cnt means
free slots rather than free descriptors.

TODO: rearrange vq data structure, pack the stats var together so that we
could use one vec instruction to update all of them.

Signed-off-by: Huawei Xie 
---
 drivers/net/virtio/virtio_ethdev.h  |   3 +
 drivers/net/virtio/virtio_rxtx_simple.c | 106 
 2 files changed, 109 insertions(+)

diff --git a/drivers/net/virtio/virtio_ethdev.h 
b/drivers/net/virtio/virtio_ethdev.h
index d7797ab..ae2d47d 100644
--- a/drivers/net/virtio/virtio_ethdev.h
+++ b/drivers/net/virtio/virtio_ethdev.h
@@ -111,6 +111,9 @@ uint16_t virtio_xmit_pkts(void *tx_queue, struct rte_mbuf 
**tx_pkts,
 uint16_t virtio_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
uint16_t nb_pkts);

+uint16_t virtio_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
+   uint16_t nb_pkts);
+
 /*
  * The VIRTIO_NET_F_GUEST_TSO[46] features permit the host to send us
  * frames larger than 1514 bytes. We do not yet support software LRO
diff --git a/drivers/net/virtio/virtio_rxtx_simple.c 
b/drivers/net/virtio/virtio_rxtx_simple.c
index ef17562..624e789 100644
--- a/drivers/net/virtio/virtio_rxtx_simple.c
+++ b/drivers/net/virtio/virtio_rxtx_simple.c
@@ -288,6 +288,112 @@ virtio_recv_pkts_vec(void *rx_queue, struct rte_mbuf 
**rx_pkts,
return nb_pkts_received;
 }

+#define VIRTIO_TX_FREE_THRESH 32
+#define VIRTIO_TX_MAX_FREE_BUF_SZ 32
+#define VIRTIO_TX_FREE_NR 32
+/* TODO: vq->tx_free_cnt could mean num of free slots so we could avoid shift 
*/
+static inline void
+virtio_xmit_cleanup(struct virtqueue *vq)
+{
+   uint16_t i, desc_idx;
+   int nb_free = 0;
+   struct rte_mbuf *m, *free[VIRTIO_TX_MAX_FREE_BUF_SZ];
+
+   desc_idx = (uint16_t)(vq->vq_used_cons_idx &
+  ((vq->vq_nentries >> 1) - 1));
+   m = (struct rte_mbuf *)vq->vq_descx[desc_idx++].cookie;
+   m = __rte_pktmbuf_prefree_seg(m);
+   if (likely(m != NULL)) {
+   free[0] = m;
+   nb_free = 1;
+   for (i = 1; i < VIRTIO_TX_FREE_NR; i++) {
+   m = (struct rte_mbuf *)vq->vq_descx[desc_idx++].cookie;
+   m = __rte_pktmbuf_prefree_seg(m);
+   if (likely(m != NULL)) {
+   if (likely(m->pool == free[0]->pool))
+   free[nb_free++] = m;
+   else {
+   rte_mempool_put_bulk(free[0]->pool,
+   (void **)free, nb_free);
+   free[0] = m;
+   nb_free = 1;
+   }
+   }
+   }
+   rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free);
+   } else {
+   for (i = 1; i < VIRTIO_TX_FREE_NR; i++) {
+   m = (struct rte_mbuf *)vq->vq_descx[desc_idx++].cookie;
+   m = __rte_pktmbuf_prefree_seg(m);
+   if (m != NULL)
+   rte_mempool_put(m->pool, m);
+   }
+   }
+
+   vq->vq_used_cons_idx += VIRTIO_TX_FREE_NR;
+   vq->vq_free_cnt += (VIRTIO_TX_FREE_NR << 1);
+}
+
+uint16_t
+virtio_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
+   uint16_t nb_pkts)
+{
+   struct virtqueue *txvq = tx_queue;
+   uint16_t nb_used;
+   uint16_t desc_idx;
+   struct vring_desc *start_dp;
+   uint16_t nb_tail, nb_commit;
+   int i;
+   uint16_t desc_idx_max = (txvq->vq_nentries >> 1) - 1;
+
+   nb_used = VIRTQUEUE_NUSED(txvq);
+   rte_compiler_barrier();
+
+   if (nb_used >= VIRTIO_TX_FREE_THRESH)
+   virtio_xmit_cleanup(tx_queue);
+
+   nb_commit = nb_pkts = RTE_MIN((txvq->vq_free_cnt >> 1), nb_pkts);
+   desc_idx = (uint16_t) (txvq->vq_avail_idx & desc_idx_max);
+   start_dp = txvq->vq_ring.desc;
+   nb_tail = (uint16_t) (desc_idx_max + 1 - desc_idx);
+
+   if (nb_commit >= nb_tail) {
+   for (i = 0; i < nb_tail; i++)
+   txvq->vq_descx[desc_idx + i].cookie = tx_pkts[i];
+   for (i = 0; i < nb_tail; i++) {
+   start_dp[desc_idx].addr =
+   RTE_MBUF_DATA_DMA_ADDR(*tx_pkts);
+   start_dp[desc_idx].len = (*tx_pkts)->pkt_len;
+   tx_pkts++;
+

[dpdk-dev] [PATCH v5 5/7] virtio: virtio vec rx

2015-10-26 Thread Huawei Xie
With fixed avail ring, we don't need to get desc idx from avail ring.
virtio driver only has to deal with desc ring.
This patch uses vector instruction to accelerate processing desc ring.

Signed-off-by: Huawei Xie 
---
 drivers/net/virtio/virtio_ethdev.h  |   2 +
 drivers/net/virtio/virtio_rxtx.c|   3 +
 drivers/net/virtio/virtio_rxtx.h|   2 +
 drivers/net/virtio/virtio_rxtx_simple.c | 224 
 drivers/net/virtio/virtqueue.h  |   1 +
 5 files changed, 232 insertions(+)

diff --git a/drivers/net/virtio/virtio_ethdev.h 
b/drivers/net/virtio/virtio_ethdev.h
index 9026d42..d7797ab 100644
--- a/drivers/net/virtio/virtio_ethdev.h
+++ b/drivers/net/virtio/virtio_ethdev.h
@@ -108,6 +108,8 @@ uint16_t virtio_recv_mergeable_pkts(void *rx_queue, struct 
rte_mbuf **rx_pkts,
 uint16_t virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts);

+uint16_t virtio_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
+   uint16_t nb_pkts);

 /*
  * The VIRTIO_NET_F_GUEST_TSO[46] features permit the host to send us
diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
index 5162ce6..947fc46 100644
--- a/drivers/net/virtio/virtio_rxtx.c
+++ b/drivers/net/virtio/virtio_rxtx.c
@@ -432,6 +432,9 @@ virtio_dev_rx_queue_setup(struct rte_eth_dev *dev,
vq->mpool = mp;

dev->data->rx_queues[queue_idx] = vq;
+
+   virtio_rxq_vec_setup(vq);
+
return 0;
 }

diff --git a/drivers/net/virtio/virtio_rxtx.h b/drivers/net/virtio/virtio_rxtx.h
index 7d2d8fe..831e492 100644
--- a/drivers/net/virtio/virtio_rxtx.h
+++ b/drivers/net/virtio/virtio_rxtx.h
@@ -33,5 +33,7 @@

 #define RTE_PMD_VIRTIO_RX_MAX_BURST 64

+int virtio_rxq_vec_setup(struct virtqueue *rxq);
+
 int virtqueue_enqueue_recv_refill_simple(struct virtqueue *vq,
struct rte_mbuf *m);
diff --git a/drivers/net/virtio/virtio_rxtx_simple.c 
b/drivers/net/virtio/virtio_rxtx_simple.c
index cac5b9f..ef17562 100644
--- a/drivers/net/virtio/virtio_rxtx_simple.c
+++ b/drivers/net/virtio/virtio_rxtx_simple.c
@@ -58,6 +58,10 @@
 #include "virtqueue.h"
 #include "virtio_rxtx.h"

+#define RTE_VIRTIO_VPMD_RX_BURST 32
+#define RTE_VIRTIO_DESC_PER_LOOP 8
+#define RTE_VIRTIO_VPMD_RX_REARM_THRESH RTE_VIRTIO_VPMD_RX_BURST
+
 int __attribute__((cold))
 virtqueue_enqueue_recv_refill_simple(struct virtqueue *vq,
struct rte_mbuf *cookie)
@@ -82,3 +86,223 @@ virtqueue_enqueue_recv_refill_simple(struct virtqueue *vq,

return 0;
 }
+
+static inline void
+virtio_rxq_rearm_vec(struct virtqueue *rxvq)
+{
+   int i;
+   uint16_t desc_idx;
+   struct rte_mbuf **sw_ring;
+   struct vring_desc *start_dp;
+   int ret;
+
+   desc_idx = rxvq->vq_avail_idx & (rxvq->vq_nentries - 1);
+   sw_ring = >sw_ring[desc_idx];
+   start_dp = >vq_ring.desc[desc_idx];
+
+   ret = rte_mempool_get_bulk(rxvq->mpool, (void **)sw_ring,
+   RTE_VIRTIO_VPMD_RX_REARM_THRESH);
+   if (unlikely(ret)) {
+   rte_eth_devices[rxvq->port_id].data->rx_mbuf_alloc_failed +=
+   RTE_VIRTIO_VPMD_RX_REARM_THRESH;
+   return;
+   }
+
+   for (i = 0; i < RTE_VIRTIO_VPMD_RX_REARM_THRESH; i++) {
+   uintptr_t p;
+
+   p = (uintptr_t)_ring[i]->rearm_data;
+   *(uint64_t *)p = rxvq->mbuf_initializer;
+
+   start_dp[i].addr =
+   (uint64_t)((uintptr_t)sw_ring[i]->buf_physaddr +
+   RTE_PKTMBUF_HEADROOM - sizeof(struct virtio_net_hdr));
+   start_dp[i].len = sw_ring[i]->buf_len -
+   RTE_PKTMBUF_HEADROOM + sizeof(struct virtio_net_hdr);
+   }
+
+   rxvq->vq_avail_idx += RTE_VIRTIO_VPMD_RX_REARM_THRESH;
+   rxvq->vq_free_cnt -= RTE_VIRTIO_VPMD_RX_REARM_THRESH;
+   vq_update_avail_idx(rxvq);
+}
+
+/* virtio vPMD receive routine, only accept(nb_pkts >= 
RTE_VIRTIO_DESC_PER_LOOP)
+ *
+ * This routine is for non-mergable RX, one desc for each guest buffer.
+ * This routine is based on the RX ring layout optimization. Each entry in the
+ * avail ring points to the desc with the same index in the desc ring and this
+ * will never be changed in the driver.
+ *
+ * - nb_pkts < RTE_VIRTIO_DESC_PER_LOOP, just return no packet
+ */
+uint16_t
+virtio_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
+   uint16_t nb_pkts)
+{
+   struct virtqueue *rxvq = rx_queue;
+   uint16_t nb_used;
+   uint16_t desc_idx;
+   struct vring_used_elem *rused;
+   struct rte_mbuf **sw_ring;
+   struct rte_mbuf **sw_ring_end;
+   uint16_t nb_pkts_received;
+   __m128i shuf_msk1, shuf_msk2, len_adjust;
+
+   shuf_msk1 = _mm_set_epi8(
+   0xFF, 0xFF, 0xFF, 0xFF,
+   0xFF, 0xFF, /* vlan tci */
+   5, 4,  

[dpdk-dev] [PATCH v5 4/7] virtio: fill RX avail ring with blank mbufs

2015-10-26 Thread Huawei Xie
fill avail ring with blank mbufs in virtio_dev_vring_start

Signed-off-by: Huawei Xie 
---
 drivers/net/virtio/Makefile |  2 +-
 drivers/net/virtio/virtio_rxtx.c|  6 ++-
 drivers/net/virtio/virtio_rxtx.h|  3 ++
 drivers/net/virtio/virtio_rxtx_simple.c | 84 +
 4 files changed, 92 insertions(+), 3 deletions(-)
 create mode 100644 drivers/net/virtio/virtio_rxtx_simple.c

diff --git a/drivers/net/virtio/Makefile b/drivers/net/virtio/Makefile
index 930b60f..43835ba 100644
--- a/drivers/net/virtio/Makefile
+++ b/drivers/net/virtio/Makefile
@@ -50,7 +50,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtqueue.c
 SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_pci.c
 SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_rxtx.c
 SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_ethdev.c
-
+SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_rxtx_simple.c

 # this lib depends upon:
 DEPDIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += lib/librte_eal lib/librte_ether
diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
index 7c82a6a..5162ce6 100644
--- a/drivers/net/virtio/virtio_rxtx.c
+++ b/drivers/net/virtio/virtio_rxtx.c
@@ -320,8 +320,10 @@ virtio_dev_vring_start(struct virtqueue *vq, int 
queue_type)
/**
* Enqueue allocated buffers*
***/
-   error = virtqueue_enqueue_recv_refill(vq, m);
-
+   if (use_simple_rxtx)
+   error = 
virtqueue_enqueue_recv_refill_simple(vq, m);
+   else
+   error = virtqueue_enqueue_recv_refill(vq, m);
if (error) {
rte_pktmbuf_free(m);
break;
diff --git a/drivers/net/virtio/virtio_rxtx.h b/drivers/net/virtio/virtio_rxtx.h
index a10aa69..7d2d8fe 100644
--- a/drivers/net/virtio/virtio_rxtx.h
+++ b/drivers/net/virtio/virtio_rxtx.h
@@ -32,3 +32,6 @@
  */

 #define RTE_PMD_VIRTIO_RX_MAX_BURST 64
+
+int virtqueue_enqueue_recv_refill_simple(struct virtqueue *vq,
+   struct rte_mbuf *m);
diff --git a/drivers/net/virtio/virtio_rxtx_simple.c 
b/drivers/net/virtio/virtio_rxtx_simple.c
new file mode 100644
index 000..cac5b9f
--- /dev/null
+++ b/drivers/net/virtio/virtio_rxtx_simple.c
@@ -0,0 +1,84 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the
+ *   distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ *   contributors may be used to endorse or promote products derived
+ *   from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "virtio_logs.h"
+#include "virtio_ethdev.h"
+#include "virtqueue.h"
+#include "virtio_rxtx.h"
+
+int __attribute__((cold))
+virtqueue_enqueue_recv_refill_simple(struct virtqueue *vq,
+   struct rte_mbuf *cookie)
+{
+   struct vq_desc_extra *dxp;
+   struct vring_desc *start_dp;
+   uint16_t desc_idx;
+
+   desc_idx = vq->vq_avail_idx & (vq->vq_nentries - 1);
+   dxp = >vq_descx[desc_idx];
+   dxp->cookie = (void *)cookie;
+   vq->sw_ring[desc_idx] = coo

[dpdk-dev] [PATCH v5 3/7] virtio: rx/tx ring layout optimization

2015-10-26 Thread Huawei Xie
Changes in V4:
- fix the error in tx ring layout chart in this commit message.

In DPDK based switching envrioment, mostly vhost runs on a dedicated core
while virtio processing in guest VMs runs on different cores.
Take RX for example, with generic implementation, for each guest buffer,
a) virtio driver allocates a descriptor from free descriptor list
b) modify the entry of avail ring to point to allocated descriptor
c) after packet is received, free the descriptor

When vhost fetches the avail ring, it need to fetch the modified L1 cache from
virtio core, which is a heavy cost in current CPU implementation.

This idea of this optimization is:
allocate the fixed descriptor for each entry of avail ring, so avail ring 
will
always be the same during the run.
This removes L1M cache transfer from virtio core to vhost core for avail ring.
(Note we couldn't avoid the cache transfer for descriptors).
Besides, descriptor allocation and free operation is eliminated.
This also makes vector procesing possible to further accelerate the processing.

This is the layout for the avail ring(take 256 ring entries for example), with
each entry pointing to the descriptor with the same index.
avail
idx
+
|
+++---+-+--+
| 0  | 1  | 2 | ... |  254  | 255  |  avail ring
+-+--+-+--+-+-+-+---+--+---+
  |||   |   |  |
  |||   |   |  |
  vvv   |   v  v
+-+--+-+--+-+-+-+---+--+---+
| 0  | 1  | 2 | ... |  254  | 255  |  desc ring
+++---+-+--+
|
|
+++---+-+--+
| 0  | 1  | 2 | |  254  | 255  |  used ring
+++---+-+--+
|
+

This is the ring layout for TX.
As we need one virtio header for each xmit packet, we have 128 slots available.

 ++
 ||
 ||
+-+-+-+--+--+--+--+
|  0  |  1  | ... |  127 || 128  | 129  | ...  | 255  |   avail ring
+--+--+--+--+-+---+--+---+--+---+--+--+---+
   | ||  ||  |  | |
   v vv  ||  v  v v
+--+--+--+--+-+---+--+---+--+---+--+--+---+
| 128 | 129 | ... |  255 || 128  | 129  | ...  | 255  |   desc ring for 
virtio_net_hdr
+--+--+--+--+-+---+--+---+--+---+--+--+---+
   | ||  ||  |  | |
   v vv  ||  v  v v
+--+--+--+--+-+---+--+---+--+---+--+--+---+
|  0  |  1  | ... |  127 ||  0   |  1   | ...  | 127  |   desc ring for tx dat
+-+-+-+--+--+--+--+
 ||
 ||
 ++

Signed-off-by: Huawei Xie 
---
 drivers/net/virtio/virtio_rxtx.c | 24 
 1 file changed, 24 insertions(+)

diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
index 5c00e9d..7c82a6a 100644
--- a/drivers/net/virtio/virtio_rxtx.c
+++ b/drivers/net/virtio/virtio_rxtx.c
@@ -302,6 +302,12 @@ virtio_dev_vring_start(struct virtqueue *vq, int 
queue_type)
nbufs = 0;
error = ENOSPC;

+   if (use_simple_rxtx)
+   for (i = 0; i < vq->vq_nentries; i++) {
+   vq->vq_ring.avail->ring[i] = i;
+   vq->vq_ring.desc[i].flags = VRING_DESC_F_WRITE;
+   }
+
memset(>fake_mbuf, 0, sizeof(vq->fake_mbuf));
for (i = 0; i < RTE_PMD_VIRTIO_RX_MAX_BURST; i++)
vq->sw_ring[vq->vq_nentries + i] = >fake_mbuf;
@@ -332,6 +338,24 @@ virtio_dev_vring_start(struct virtqueue *vq, int 
queue_type)
VIRTIO_WRITE_REG_4(vq->hw, VIRTIO_PCI_QUEUE_PFN,
vq->mz->phys_addr >> VIRTIO_PCI_QUEUE_ADDR_SHIFT);
} else if (queue_type == VTNET_TQ) {
+   if (use_simple_rxtx) {
+   int mid_idx  = vq->vq_nentries >> 1;
+   for (i = 0; i < mid_idx; i++) {
+   vq->vq_ring.avail->ring[i] = i + mid_idx;
+   vq->vq_ring.desc[i + mid_idx].next = i;
+   vq->vq_ring.desc[i + mid_idx].addr =
+   vq->virtio_net_hdr_mem +
+   mid_idx * 
vq->hw->vtnet_hdr_size;
+   vq->vq_ring.desc[i + mid_idx].len =
+   vq->hw->vtnet_hdr_size;
+   vq->vq_ring.desc[i + mid_idx].flags =
+  

[dpdk-dev] [PATCH v5 2/7] virtio: add software rx ring, fake_buf into virtqueue

2015-10-26 Thread Huawei Xie
Changes in v3:
- Remove unnecessary NULL test for rte_free
- Remove unnecessary assign of local var vq after free

Add software RX ring in virtqueue.
Add fake_mbuf in virtqueue for wraparound processing.
Use global simple_rxtx to indicate whether simple rxtx is enabled

Signed-off-by: Huawei Xie 
---
 drivers/net/virtio/virtio_ethdev.c | 11 ++-
 drivers/net/virtio/virtio_rxtx.c   |  7 +++
 drivers/net/virtio/virtqueue.h |  4 
 3 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/drivers/net/virtio/virtio_ethdev.c 
b/drivers/net/virtio/virtio_ethdev.c
index 79a3640..82676d3 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -247,8 +247,8 @@ virtio_dev_queue_release(struct virtqueue *vq) {
VIRTIO_WRITE_REG_2(hw, VIRTIO_PCI_QUEUE_SEL, vq->queue_id);
VIRTIO_WRITE_REG_4(hw, VIRTIO_PCI_QUEUE_PFN, 0);

+   rte_free(vq->sw_ring);
rte_free(vq);
-   vq = NULL;
}
 }

@@ -292,6 +292,9 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev,
dev->data->port_id, queue_idx);
vq = rte_zmalloc(vq_name, sizeof(struct virtqueue) +
vq_size * sizeof(struct vq_desc_extra), 
RTE_CACHE_LINE_SIZE);
+   vq->sw_ring = rte_zmalloc_socket("rxq->sw_ring",
+   (RTE_PMD_VIRTIO_RX_MAX_BURST + vq_size) *
+   sizeof(vq->sw_ring[0]), RTE_CACHE_LINE_SIZE, socket_id);
} else if (queue_type == VTNET_TQ) {
snprintf(vq_name, sizeof(vq_name), "port%d_tvq%d",
dev->data->port_id, queue_idx);
@@ -308,6 +311,12 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev,
PMD_INIT_LOG(ERR, "%s: Can not allocate virtqueue", __func__);
return (-ENOMEM);
}
+   if (queue_type == VTNET_RQ && vq->sw_ring == NULL) {
+   PMD_INIT_LOG(ERR, "%s: Can not allocate RX soft ring",
+   __func__);
+   rte_free(vq);
+   return -ENOMEM;
+   }

vq->hw = hw;
vq->port_id = dev->data->port_id;
diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
index 9324f7f..5c00e9d 100644
--- a/drivers/net/virtio/virtio_rxtx.c
+++ b/drivers/net/virtio/virtio_rxtx.c
@@ -62,6 +62,8 @@
 #define  VIRTIO_DUMP_PACKET(m, len) do { } while (0)
 #endif

+static int use_simple_rxtx;
+
 static void
 vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx)
 {
@@ -299,6 +301,11 @@ virtio_dev_vring_start(struct virtqueue *vq, int 
queue_type)
/* Allocate blank mbufs for the each rx descriptor */
nbufs = 0;
error = ENOSPC;
+
+   memset(>fake_mbuf, 0, sizeof(vq->fake_mbuf));
+   for (i = 0; i < RTE_PMD_VIRTIO_RX_MAX_BURST; i++)
+   vq->sw_ring[vq->vq_nentries + i] = >fake_mbuf;
+
while (!virtqueue_full(vq)) {
m = rte_rxmbuf_alloc(vq->mpool);
if (m == NULL)
diff --git a/drivers/net/virtio/virtqueue.h b/drivers/net/virtio/virtqueue.h
index 7789411..6a1ec48 100644
--- a/drivers/net/virtio/virtqueue.h
+++ b/drivers/net/virtio/virtqueue.h
@@ -190,6 +190,10 @@ struct virtqueue {
uint16_t vq_avail_idx;
phys_addr_t virtio_net_hdr_mem; /**< hdr for each xmit packet */

+   struct rte_mbuf **sw_ring; /**< RX software ring. */
+   /* dummy mbuf, for wraparound when processing RX ring. */
+   struct rte_mbuf fake_mbuf;
+
/* Statistics */
uint64_tpackets;
uint64_tbytes;
-- 
1.8.1.4



[dpdk-dev] [PATCH v5 1/7] virtio: add virtio_rxtx.h header file

2015-10-26 Thread Huawei Xie
Would move all rx/tx related declarations into this header file in future.
Add RTE_VIRTIO_PMD_MAX_BURST.

Signed-off-by: Huawei Xie 
---
 drivers/net/virtio/virtio_ethdev.c |  1 +
 drivers/net/virtio/virtio_rxtx.c   |  1 +
 drivers/net/virtio/virtio_rxtx.h   | 34 ++
 3 files changed, 36 insertions(+)
 create mode 100644 drivers/net/virtio/virtio_rxtx.h

diff --git a/drivers/net/virtio/virtio_ethdev.c 
b/drivers/net/virtio/virtio_ethdev.c
index 465d3cd..79a3640 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -61,6 +61,7 @@
 #include "virtio_pci.h"
 #include "virtio_logs.h"
 #include "virtqueue.h"
+#include "virtio_rxtx.h"


 static int eth_virtio_dev_init(struct rte_eth_dev *eth_dev);
diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
index c5b53bb..9324f7f 100644
--- a/drivers/net/virtio/virtio_rxtx.c
+++ b/drivers/net/virtio/virtio_rxtx.c
@@ -54,6 +54,7 @@
 #include "virtio_logs.h"
 #include "virtio_ethdev.h"
 #include "virtqueue.h"
+#include "virtio_rxtx.h"

 #ifdef RTE_LIBRTE_VIRTIO_DEBUG_DUMP
 #define VIRTIO_DUMP_PACKET(m, len) rte_pktmbuf_dump(stdout, m, len)
diff --git a/drivers/net/virtio/virtio_rxtx.h b/drivers/net/virtio/virtio_rxtx.h
new file mode 100644
index 000..a10aa69
--- /dev/null
+++ b/drivers/net/virtio/virtio_rxtx.h
@@ -0,0 +1,34 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the
+ *   distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ *   contributors may be used to endorse or promote products derived
+ *   from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#define RTE_PMD_VIRTIO_RX_MAX_BURST 64
-- 
1.8.1.4



[dpdk-dev] [PATCH v5 0/7] virtio ring layout optimization and simple rx/tx processing

2015-10-26 Thread Huawei Xie
Changes in v5:
- Call __rte_pktmbuf_prefree_seg to check refcnt when free mbufs

Changes in v4:
- Fix the error in virtio tx ring layout ascii chart in the commit message
- Move virtio_xmit_cleanup ahead to free descriptors earlier
- Test merge-able feature when select simple rx/tx functions

Changes in v3:
- Remove unnecessary NULL test for rte_free
- Remove unnecessary assign of local var after free
- Remove return at the end of void function
- Remove always_inline attribute for virtio_xmit_cleanup
- Reword some commit messages
- Add TODO in the commit message of simple tx patch

Changes in v2:
- Remove the configure macro
- Enable simple R/TX processing when user specifies simple txq flags
- Reword some comments and commit messages

In DPDK based switching enviroment, mostly vhost runs on a dedicated core
while virtio processing in guest VMs runs on other different cores.
Take RX for example, with generic implementation, for each guest buffer,
a) virtio driver allocates a descriptor from free descriptor list
b) modify the entry of avail ring to point to allocated descriptor
c) after packet is received, free the descriptor

When vhost fetches the avail ring, it need to fetch the modified L1 cache from
virtio core, which is a heavy cost in current CPU implementation.

This idea of this optimization is:
allocate the fixed descriptor for each entry of avail ring, so avail ring 
will
always be the same during the run.
This removes L1M cache transfer from virtio core to vhost core for avail ring.
(Note we couldn't avoid the cache transfer for descriptors).
Besides, descriptor allocation and free operation is eliminated.
This also makes vector procesing possible to further accelerate the processing.

This is the layout for the avail ring(take 256 ring entries for example), with
each entry pointing to the descriptor with the same index.
avail
idx
+
|
+++---+-+--+
| 0  | 1  | 2 | ... |  254  | 255  |  avail ring
+-+--+-+--+-+-+-+---+--+---+
  |||   |   |  |
  |||   |   |  |
  vvv   |   v  v
+-+--+-+--+-+-+-+---+--+---+
| 0  | 1  | 2 | ... |  254  | 255  |  desc ring
+++---+-+--+
|
|
+++---+-+--+
| 0  | 1  | 2 | |  254  | 255  |  used ring
+++---+-+--+
|
+

This is the ring layout for TX.
As we need one virtio header for each xmit packet, we have 128 slots available.

 ++
 ||
 ||
+-+-+-+--+--+--+--+
|  0  |  1  | ... |  127 || 128  | 129  | ...  | 255  |   avail ring
+--+--+--+--+-+---+--+---+--+---+--+--+---+
   | ||  ||  |  | |
   v vv  ||  v  v v
+--+--+--+--+-+---+--+---+--+---+--+--+---+
| 127 | 128 | ... |  255 || 127  | 128  | ...  | 255  |   desc ring for 
virtio_net_hdr
+--+--+--+--+-+---+--+---+--+---+--+--+---+
   | ||  ||  |  | |
   v vv  ||  v  v v
+--+--+--+--+-+---+--+---+--+---+--+--+---+
|  0  |  1  | ... |  127 ||  0   |  1   | ...  | 127  |   desc ring for tx dat
+-+-+-+--+--+--+--+
 ||
 ||
 ++


Performance boost could be observed only if the virtio backend isn't the 
bottleneck or in VM2VM
case.
There are also several vhost optimization patches to be submitted later.


Huawei Xie (7):
  virtio: add virtio_rxtx.h header file
  virtio: add software rx ring, fake_buf into virtqueue
  virtio: rx/tx ring layout optimization
  virtio: fill RX avail ring with blank mbufs
  virtio: virtio vec rx
  virtio: simple tx routine
  virtio: pick simple rx/tx func

 drivers/net/virtio/Makefile |   2 +-
 drivers/net/virtio/virtio_ethdev.c  |  12 +-
 drivers/net/virtio/virtio_ethdev.h  |   5 +
 drivers/net/virtio/virtio_rxtx.c|  56 -
 drivers/net/virtio/virtio_rxtx.h|  39 +++
 drivers/net/virtio/virtio_rxtx_simple.c | 414 
 drivers/net/virtio/virtqueue.h  |   5 +
 7 files changed, 529 insertions(+), 4 deletions(-)
 create mode 100644 drivers/net/virtio/virtio_rxtx.h
 create mode 100644 drivers/net/virtio/virtio_rxtx_simple.c

-- 
1.8.1.4



[dpdk-dev] [PATCH v4 7/7] virtio: pick simple rx/tx func

2015-10-22 Thread Huawei Xie
Changes in v4:
Check merge-able feature when select simple rx/tx functions.

simple rx/tx func is chose when merge-able rx is disabled and user specifies 
single segment and
no offload support.

Signed-off-by: Huawei Xie 
---
 drivers/net/virtio/virtio_rxtx.c | 15 +++
 1 file changed, 15 insertions(+)

diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
index 947fc46..0f1daf2 100644
--- a/drivers/net/virtio/virtio_rxtx.c
+++ b/drivers/net/virtio/virtio_rxtx.c
@@ -53,6 +53,7 @@

 #include "virtio_logs.h"
 #include "virtio_ethdev.h"
+#include "virtio_pci.h"
 #include "virtqueue.h"
 #include "virtio_rxtx.h"

@@ -62,6 +63,10 @@
 #define  VIRTIO_DUMP_PACKET(m, len) do { } while (0)
 #endif

+
+#define VIRTIO_SIMPLE_FLAGS ((uint32_t)ETH_TXQ_FLAGS_NOMULTSEGS | \
+   ETH_TXQ_FLAGS_NOOFFLOADS)
+
 static int use_simple_rxtx;

 static void
@@ -459,6 +464,7 @@ virtio_dev_tx_queue_setup(struct rte_eth_dev *dev,
const struct rte_eth_txconf *tx_conf)
 {
uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX;
+   struct virtio_hw *hw = dev->data->dev_private;
struct virtqueue *vq;
uint16_t tx_free_thresh;
int ret;
@@ -471,6 +477,15 @@ virtio_dev_tx_queue_setup(struct rte_eth_dev *dev,
return -EINVAL;
}

+   /* Use simple rx/tx func if single segment and no offloads */
+   if ((tx_conf->txq_flags & VIRTIO_SIMPLE_FLAGS) == VIRTIO_SIMPLE_FLAGS &&
+!vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
+   PMD_INIT_LOG(INFO, "Using simple rx/tx path");
+   dev->tx_pkt_burst = virtio_xmit_pkts_simple;
+   dev->rx_pkt_burst = virtio_recv_pkts_vec;
+   use_simple_rxtx = 1;
+   }
+
ret = virtio_dev_queue_setup(dev, VTNET_TQ, queue_idx, vtpci_queue_idx,
nb_desc, socket_id, );
if (ret < 0) {
-- 
1.8.1.4



[dpdk-dev] [PATCH v4 6/7] virtio: simple tx routine

2015-10-22 Thread Huawei Xie
Changes in v4:
- move virtio_xmit_cleanup ahead to free descriptors earlier

Changes in v3:
- Remove return at the end of void function
- Remove always_inline attribute for virtio_xmit_cleanup
bulk free of mbufs when clean used ring.
shift operation of idx could be saved if vq_free_cnt means
free slots rather than free descriptors.

TODO: rearrange vq data structure, pack the stats var together so that we
could use one vec instruction to update all of them.

Signed-off-by: Huawei Xie 
---
 drivers/net/virtio/virtio_ethdev.h  |  3 ++
 drivers/net/virtio/virtio_rxtx_simple.c | 93 +
 2 files changed, 96 insertions(+)

diff --git a/drivers/net/virtio/virtio_ethdev.h 
b/drivers/net/virtio/virtio_ethdev.h
index d7797ab..ae2d47d 100644
--- a/drivers/net/virtio/virtio_ethdev.h
+++ b/drivers/net/virtio/virtio_ethdev.h
@@ -111,6 +111,9 @@ uint16_t virtio_xmit_pkts(void *tx_queue, struct rte_mbuf 
**tx_pkts,
 uint16_t virtio_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
uint16_t nb_pkts);

+uint16_t virtio_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
+   uint16_t nb_pkts);
+
 /*
  * The VIRTIO_NET_F_GUEST_TSO[46] features permit the host to send us
  * frames larger than 1514 bytes. We do not yet support software LRO
diff --git a/drivers/net/virtio/virtio_rxtx_simple.c 
b/drivers/net/virtio/virtio_rxtx_simple.c
index ef17562..79b4f7f 100644
--- a/drivers/net/virtio/virtio_rxtx_simple.c
+++ b/drivers/net/virtio/virtio_rxtx_simple.c
@@ -288,6 +288,99 @@ virtio_recv_pkts_vec(void *rx_queue, struct rte_mbuf 
**rx_pkts,
return nb_pkts_received;
 }

+#define VIRTIO_TX_FREE_THRESH 32
+#define VIRTIO_TX_MAX_FREE_BUF_SZ 32
+#define VIRTIO_TX_FREE_NR 32
+/* TODO: vq->tx_free_cnt could mean num of free slots so we could avoid shift 
*/
+static inline void
+virtio_xmit_cleanup(struct virtqueue *vq)
+{
+   uint16_t i, desc_idx;
+   int nb_free = 0;
+   struct rte_mbuf *m, *free[VIRTIO_TX_MAX_FREE_BUF_SZ];
+
+   desc_idx = (uint16_t)(vq->vq_used_cons_idx &
+   ((vq->vq_nentries >> 1) - 1));
+   free[0] = (struct rte_mbuf *)vq->vq_descx[desc_idx++].cookie;
+   nb_free = 1;
+
+   for (i = 1; i < VIRTIO_TX_FREE_NR; i++) {
+   m = (struct rte_mbuf *)vq->vq_descx[desc_idx++].cookie;
+   if (likely(m->pool == free[0]->pool))
+   free[nb_free++] = m;
+   else {
+   rte_mempool_put_bulk(free[0]->pool, (void **)free,
+   nb_free);
+   free[0] = m;
+   nb_free = 1;
+   }
+   }
+
+   rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free);
+   vq->vq_used_cons_idx += VIRTIO_TX_FREE_NR;
+   vq->vq_free_cnt += (VIRTIO_TX_FREE_NR << 1);
+}
+
+uint16_t
+virtio_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
+   uint16_t nb_pkts)
+{
+   struct virtqueue *txvq = tx_queue;
+   uint16_t nb_used;
+   uint16_t desc_idx;
+   struct vring_desc *start_dp;
+   uint16_t nb_tail, nb_commit;
+   int i;
+   uint16_t desc_idx_max = (txvq->vq_nentries >> 1) - 1;
+
+   nb_used = VIRTQUEUE_NUSED(txvq);
+   rte_compiler_barrier();
+
+   if (nb_used >= VIRTIO_TX_FREE_THRESH)
+   virtio_xmit_cleanup(tx_queue);
+
+   nb_commit = nb_pkts = RTE_MIN((txvq->vq_free_cnt >> 1), nb_pkts);
+   desc_idx = (uint16_t) (txvq->vq_avail_idx & desc_idx_max);
+   start_dp = txvq->vq_ring.desc;
+   nb_tail = (uint16_t) (desc_idx_max + 1 - desc_idx);
+
+   if (nb_commit >= nb_tail) {
+   for (i = 0; i < nb_tail; i++)
+   txvq->vq_descx[desc_idx + i].cookie = tx_pkts[i];
+   for (i = 0; i < nb_tail; i++) {
+   start_dp[desc_idx].addr =
+   RTE_MBUF_DATA_DMA_ADDR(*tx_pkts);
+   start_dp[desc_idx].len = (*tx_pkts)->pkt_len;
+   tx_pkts++;
+   desc_idx++;
+   }
+   nb_commit -= nb_tail;
+   desc_idx = 0;
+   }
+   for (i = 0; i < nb_commit; i++)
+   txvq->vq_descx[desc_idx + i].cookie = tx_pkts[i];
+   for (i = 0; i < nb_commit; i++) {
+   start_dp[desc_idx].addr = RTE_MBUF_DATA_DMA_ADDR(*tx_pkts);
+   start_dp[desc_idx].len = (*tx_pkts)->pkt_len;
+   tx_pkts++;
+   desc_idx++;
+   }
+
+   rte_compiler_barrier();
+
+   txvq->vq_free_cnt -= (uint16_t)(nb_pkts << 1);
+   txvq->vq_avail_idx += nb_pkts;
+   txvq->vq_ring.avail->idx = txvq->vq_avail_idx;
+   txvq->packets += nb_pkts;
+
+   if (likely(nb_pkts)) {
+   if (unlikely(virtqueue_kick_prepare(txvq)))
+ 

[dpdk-dev] [PATCH v4 5/7] virtio: virtio vec rx

2015-10-22 Thread Huawei Xie
With fixed avail ring, we don't need to get desc idx from avail ring.
virtio driver only has to deal with desc ring.
This patch uses vector instruction to accelerate processing desc ring.

Signed-off-by: Huawei Xie 
---
 drivers/net/virtio/virtio_ethdev.h  |   2 +
 drivers/net/virtio/virtio_rxtx.c|   3 +
 drivers/net/virtio/virtio_rxtx.h|   2 +
 drivers/net/virtio/virtio_rxtx_simple.c | 224 
 drivers/net/virtio/virtqueue.h  |   1 +
 5 files changed, 232 insertions(+)

diff --git a/drivers/net/virtio/virtio_ethdev.h 
b/drivers/net/virtio/virtio_ethdev.h
index 9026d42..d7797ab 100644
--- a/drivers/net/virtio/virtio_ethdev.h
+++ b/drivers/net/virtio/virtio_ethdev.h
@@ -108,6 +108,8 @@ uint16_t virtio_recv_mergeable_pkts(void *rx_queue, struct 
rte_mbuf **rx_pkts,
 uint16_t virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts);

+uint16_t virtio_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
+   uint16_t nb_pkts);

 /*
  * The VIRTIO_NET_F_GUEST_TSO[46] features permit the host to send us
diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
index 5162ce6..947fc46 100644
--- a/drivers/net/virtio/virtio_rxtx.c
+++ b/drivers/net/virtio/virtio_rxtx.c
@@ -432,6 +432,9 @@ virtio_dev_rx_queue_setup(struct rte_eth_dev *dev,
vq->mpool = mp;

dev->data->rx_queues[queue_idx] = vq;
+
+   virtio_rxq_vec_setup(vq);
+
return 0;
 }

diff --git a/drivers/net/virtio/virtio_rxtx.h b/drivers/net/virtio/virtio_rxtx.h
index 7d2d8fe..831e492 100644
--- a/drivers/net/virtio/virtio_rxtx.h
+++ b/drivers/net/virtio/virtio_rxtx.h
@@ -33,5 +33,7 @@

 #define RTE_PMD_VIRTIO_RX_MAX_BURST 64

+int virtio_rxq_vec_setup(struct virtqueue *rxq);
+
 int virtqueue_enqueue_recv_refill_simple(struct virtqueue *vq,
struct rte_mbuf *m);
diff --git a/drivers/net/virtio/virtio_rxtx_simple.c 
b/drivers/net/virtio/virtio_rxtx_simple.c
index cac5b9f..ef17562 100644
--- a/drivers/net/virtio/virtio_rxtx_simple.c
+++ b/drivers/net/virtio/virtio_rxtx_simple.c
@@ -58,6 +58,10 @@
 #include "virtqueue.h"
 #include "virtio_rxtx.h"

+#define RTE_VIRTIO_VPMD_RX_BURST 32
+#define RTE_VIRTIO_DESC_PER_LOOP 8
+#define RTE_VIRTIO_VPMD_RX_REARM_THRESH RTE_VIRTIO_VPMD_RX_BURST
+
 int __attribute__((cold))
 virtqueue_enqueue_recv_refill_simple(struct virtqueue *vq,
struct rte_mbuf *cookie)
@@ -82,3 +86,223 @@ virtqueue_enqueue_recv_refill_simple(struct virtqueue *vq,

return 0;
 }
+
+static inline void
+virtio_rxq_rearm_vec(struct virtqueue *rxvq)
+{
+   int i;
+   uint16_t desc_idx;
+   struct rte_mbuf **sw_ring;
+   struct vring_desc *start_dp;
+   int ret;
+
+   desc_idx = rxvq->vq_avail_idx & (rxvq->vq_nentries - 1);
+   sw_ring = >sw_ring[desc_idx];
+   start_dp = >vq_ring.desc[desc_idx];
+
+   ret = rte_mempool_get_bulk(rxvq->mpool, (void **)sw_ring,
+   RTE_VIRTIO_VPMD_RX_REARM_THRESH);
+   if (unlikely(ret)) {
+   rte_eth_devices[rxvq->port_id].data->rx_mbuf_alloc_failed +=
+   RTE_VIRTIO_VPMD_RX_REARM_THRESH;
+   return;
+   }
+
+   for (i = 0; i < RTE_VIRTIO_VPMD_RX_REARM_THRESH; i++) {
+   uintptr_t p;
+
+   p = (uintptr_t)_ring[i]->rearm_data;
+   *(uint64_t *)p = rxvq->mbuf_initializer;
+
+   start_dp[i].addr =
+   (uint64_t)((uintptr_t)sw_ring[i]->buf_physaddr +
+   RTE_PKTMBUF_HEADROOM - sizeof(struct virtio_net_hdr));
+   start_dp[i].len = sw_ring[i]->buf_len -
+   RTE_PKTMBUF_HEADROOM + sizeof(struct virtio_net_hdr);
+   }
+
+   rxvq->vq_avail_idx += RTE_VIRTIO_VPMD_RX_REARM_THRESH;
+   rxvq->vq_free_cnt -= RTE_VIRTIO_VPMD_RX_REARM_THRESH;
+   vq_update_avail_idx(rxvq);
+}
+
+/* virtio vPMD receive routine, only accept(nb_pkts >= 
RTE_VIRTIO_DESC_PER_LOOP)
+ *
+ * This routine is for non-mergable RX, one desc for each guest buffer.
+ * This routine is based on the RX ring layout optimization. Each entry in the
+ * avail ring points to the desc with the same index in the desc ring and this
+ * will never be changed in the driver.
+ *
+ * - nb_pkts < RTE_VIRTIO_DESC_PER_LOOP, just return no packet
+ */
+uint16_t
+virtio_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
+   uint16_t nb_pkts)
+{
+   struct virtqueue *rxvq = rx_queue;
+   uint16_t nb_used;
+   uint16_t desc_idx;
+   struct vring_used_elem *rused;
+   struct rte_mbuf **sw_ring;
+   struct rte_mbuf **sw_ring_end;
+   uint16_t nb_pkts_received;
+   __m128i shuf_msk1, shuf_msk2, len_adjust;
+
+   shuf_msk1 = _mm_set_epi8(
+   0xFF, 0xFF, 0xFF, 0xFF,
+   0xFF, 0xFF, /* vlan tci */
+   5, 4,  

[dpdk-dev] [PATCH v4 4/7] virtio: fill RX avail ring with blank mbufs

2015-10-22 Thread Huawei Xie
fill avail ring with blank mbufs in virtio_dev_vring_start

Signed-off-by: Huawei Xie 
---
 drivers/net/virtio/Makefile |  2 +-
 drivers/net/virtio/virtio_rxtx.c|  6 ++-
 drivers/net/virtio/virtio_rxtx.h|  3 ++
 drivers/net/virtio/virtio_rxtx_simple.c | 84 +
 4 files changed, 92 insertions(+), 3 deletions(-)
 create mode 100644 drivers/net/virtio/virtio_rxtx_simple.c

diff --git a/drivers/net/virtio/Makefile b/drivers/net/virtio/Makefile
index 930b60f..43835ba 100644
--- a/drivers/net/virtio/Makefile
+++ b/drivers/net/virtio/Makefile
@@ -50,7 +50,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtqueue.c
 SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_pci.c
 SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_rxtx.c
 SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_ethdev.c
-
+SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_rxtx_simple.c

 # this lib depends upon:
 DEPDIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += lib/librte_eal lib/librte_ether
diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
index 7c82a6a..5162ce6 100644
--- a/drivers/net/virtio/virtio_rxtx.c
+++ b/drivers/net/virtio/virtio_rxtx.c
@@ -320,8 +320,10 @@ virtio_dev_vring_start(struct virtqueue *vq, int 
queue_type)
/**
* Enqueue allocated buffers*
***/
-   error = virtqueue_enqueue_recv_refill(vq, m);
-
+   if (use_simple_rxtx)
+   error = 
virtqueue_enqueue_recv_refill_simple(vq, m);
+   else
+   error = virtqueue_enqueue_recv_refill(vq, m);
if (error) {
rte_pktmbuf_free(m);
break;
diff --git a/drivers/net/virtio/virtio_rxtx.h b/drivers/net/virtio/virtio_rxtx.h
index a10aa69..7d2d8fe 100644
--- a/drivers/net/virtio/virtio_rxtx.h
+++ b/drivers/net/virtio/virtio_rxtx.h
@@ -32,3 +32,6 @@
  */

 #define RTE_PMD_VIRTIO_RX_MAX_BURST 64
+
+int virtqueue_enqueue_recv_refill_simple(struct virtqueue *vq,
+   struct rte_mbuf *m);
diff --git a/drivers/net/virtio/virtio_rxtx_simple.c 
b/drivers/net/virtio/virtio_rxtx_simple.c
new file mode 100644
index 000..cac5b9f
--- /dev/null
+++ b/drivers/net/virtio/virtio_rxtx_simple.c
@@ -0,0 +1,84 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the
+ *   distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ *   contributors may be used to endorse or promote products derived
+ *   from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "virtio_logs.h"
+#include "virtio_ethdev.h"
+#include "virtqueue.h"
+#include "virtio_rxtx.h"
+
+int __attribute__((cold))
+virtqueue_enqueue_recv_refill_simple(struct virtqueue *vq,
+   struct rte_mbuf *cookie)
+{
+   struct vq_desc_extra *dxp;
+   struct vring_desc *start_dp;
+   uint16_t desc_idx;
+
+   desc_idx = vq->vq_avail_idx & (vq->vq_nentries - 1);
+   dxp = >vq_descx[desc_idx];
+   dxp->cookie = (void *)cookie;
+   vq->sw_ring[desc_idx] = coo

[dpdk-dev] [PATCH v4 3/7] virtio: rx/tx ring layout optimization

2015-10-22 Thread Huawei Xie
Changes in V4:
- fix the error in tx ring layout chart in this commit message.

In DPDK based switching envrioment, mostly vhost runs on a dedicated core
while virtio processing in guest VMs runs on different cores.
Take RX for example, with generic implementation, for each guest buffer,
a) virtio driver allocates a descriptor from free descriptor list
b) modify the entry of avail ring to point to allocated descriptor
c) after packet is received, free the descriptor

When vhost fetches the avail ring, it need to fetch the modified L1 cache from
virtio core, which is a heavy cost in current CPU implementation.

This idea of this optimization is:
allocate the fixed descriptor for each entry of avail ring, so avail ring 
will
always be the same during the run.
This removes L1M cache transfer from virtio core to vhost core for avail ring.
(Note we couldn't avoid the cache transfer for descriptors).
Besides, descriptor allocation and free operation is eliminated.
This also makes vector procesing possible to further accelerate the processing.

This is the layout for the avail ring(take 256 ring entries for example), with
each entry pointing to the descriptor with the same index.
avail
idx
+
|
+++---+-+--+
| 0  | 1  | 2 | ... |  254  | 255  |  avail ring
+-+--+-+--+-+-+-+---+--+---+
  |||   |   |  |
  |||   |   |  |
  vvv   |   v  v
+-+--+-+--+-+-+-+---+--+---+
| 0  | 1  | 2 | ... |  254  | 255  |  desc ring
+++---+-+--+
|
|
+++---+-+--+
| 0  | 1  | 2 | |  254  | 255  |  used ring
+++---+-+--+
|
+

This is the ring layout for TX.
As we need one virtio header for each xmit packet, we have 128 slots available.

 ++
 ||
 ||
+-+-+-+--+--+--+--+
|  0  |  1  | ... |  127 || 128  | 129  | ...  | 255  |   avail ring
+--+--+--+--+-+---+--+---+--+---+--+--+---+
   | ||  ||  |  | |
   v vv  ||  v  v v
+--+--+--+--+-+---+--+---+--+---+--+--+---+
| 128 | 129 | ... |  255 || 128  | 129  | ...  | 255  |   desc ring for 
virtio_net_hdr
+--+--+--+--+-+---+--+---+--+---+--+--+---+
   | ||  ||  |  | |
   v vv  ||  v  v v
+--+--+--+--+-+---+--+---+--+---+--+--+---+
|  0  |  1  | ... |  127 ||  0   |  1   | ...  | 127  |   desc ring for tx dat
+-+-+-+--+--+--+--+
 ||
 ||
 ++

Signed-off-by: Huawei Xie 
---
 drivers/net/virtio/virtio_rxtx.c | 24 
 1 file changed, 24 insertions(+)

diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
index 5c00e9d..7c82a6a 100644
--- a/drivers/net/virtio/virtio_rxtx.c
+++ b/drivers/net/virtio/virtio_rxtx.c
@@ -302,6 +302,12 @@ virtio_dev_vring_start(struct virtqueue *vq, int 
queue_type)
nbufs = 0;
error = ENOSPC;

+   if (use_simple_rxtx)
+   for (i = 0; i < vq->vq_nentries; i++) {
+   vq->vq_ring.avail->ring[i] = i;
+   vq->vq_ring.desc[i].flags = VRING_DESC_F_WRITE;
+   }
+
memset(>fake_mbuf, 0, sizeof(vq->fake_mbuf));
for (i = 0; i < RTE_PMD_VIRTIO_RX_MAX_BURST; i++)
vq->sw_ring[vq->vq_nentries + i] = >fake_mbuf;
@@ -332,6 +338,24 @@ virtio_dev_vring_start(struct virtqueue *vq, int 
queue_type)
VIRTIO_WRITE_REG_4(vq->hw, VIRTIO_PCI_QUEUE_PFN,
vq->mz->phys_addr >> VIRTIO_PCI_QUEUE_ADDR_SHIFT);
} else if (queue_type == VTNET_TQ) {
+   if (use_simple_rxtx) {
+   int mid_idx  = vq->vq_nentries >> 1;
+   for (i = 0; i < mid_idx; i++) {
+   vq->vq_ring.avail->ring[i] = i + mid_idx;
+   vq->vq_ring.desc[i + mid_idx].next = i;
+   vq->vq_ring.desc[i + mid_idx].addr =
+   vq->virtio_net_hdr_mem +
+   mid_idx * 
vq->hw->vtnet_hdr_size;
+   vq->vq_ring.desc[i + mid_idx].len =
+   vq->hw->vtnet_hdr_size;
+   vq->vq_ring.desc[i + mid_idx].flags =
+  

[dpdk-dev] [PATCH v4 2/7] virtio: add software rx ring, fake_buf into virtqueue

2015-10-22 Thread Huawei Xie
Changes in v3:
- Remove unnecessary NULL test for rte_free
- Remove unnecessary assign of local var vq after free

Add software RX ring in virtqueue.
Add fake_mbuf in virtqueue for wraparound processing.
Use global simple_rxtx to indicate whether simple rxtx is enabled

Signed-off-by: Huawei Xie 
---
 drivers/net/virtio/virtio_ethdev.c | 11 ++-
 drivers/net/virtio/virtio_rxtx.c   |  7 +++
 drivers/net/virtio/virtqueue.h |  4 
 3 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/drivers/net/virtio/virtio_ethdev.c 
b/drivers/net/virtio/virtio_ethdev.c
index 79a3640..82676d3 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -247,8 +247,8 @@ virtio_dev_queue_release(struct virtqueue *vq) {
VIRTIO_WRITE_REG_2(hw, VIRTIO_PCI_QUEUE_SEL, vq->queue_id);
VIRTIO_WRITE_REG_4(hw, VIRTIO_PCI_QUEUE_PFN, 0);

+   rte_free(vq->sw_ring);
rte_free(vq);
-   vq = NULL;
}
 }

@@ -292,6 +292,9 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev,
dev->data->port_id, queue_idx);
vq = rte_zmalloc(vq_name, sizeof(struct virtqueue) +
vq_size * sizeof(struct vq_desc_extra), 
RTE_CACHE_LINE_SIZE);
+   vq->sw_ring = rte_zmalloc_socket("rxq->sw_ring",
+   (RTE_PMD_VIRTIO_RX_MAX_BURST + vq_size) *
+   sizeof(vq->sw_ring[0]), RTE_CACHE_LINE_SIZE, socket_id);
} else if (queue_type == VTNET_TQ) {
snprintf(vq_name, sizeof(vq_name), "port%d_tvq%d",
dev->data->port_id, queue_idx);
@@ -308,6 +311,12 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev,
PMD_INIT_LOG(ERR, "%s: Can not allocate virtqueue", __func__);
return (-ENOMEM);
}
+   if (queue_type == VTNET_RQ && vq->sw_ring == NULL) {
+   PMD_INIT_LOG(ERR, "%s: Can not allocate RX soft ring",
+   __func__);
+   rte_free(vq);
+   return -ENOMEM;
+   }

vq->hw = hw;
vq->port_id = dev->data->port_id;
diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
index 9324f7f..5c00e9d 100644
--- a/drivers/net/virtio/virtio_rxtx.c
+++ b/drivers/net/virtio/virtio_rxtx.c
@@ -62,6 +62,8 @@
 #define  VIRTIO_DUMP_PACKET(m, len) do { } while (0)
 #endif

+static int use_simple_rxtx;
+
 static void
 vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx)
 {
@@ -299,6 +301,11 @@ virtio_dev_vring_start(struct virtqueue *vq, int 
queue_type)
/* Allocate blank mbufs for the each rx descriptor */
nbufs = 0;
error = ENOSPC;
+
+   memset(>fake_mbuf, 0, sizeof(vq->fake_mbuf));
+   for (i = 0; i < RTE_PMD_VIRTIO_RX_MAX_BURST; i++)
+   vq->sw_ring[vq->vq_nentries + i] = >fake_mbuf;
+
while (!virtqueue_full(vq)) {
m = rte_rxmbuf_alloc(vq->mpool);
if (m == NULL)
diff --git a/drivers/net/virtio/virtqueue.h b/drivers/net/virtio/virtqueue.h
index 7789411..6a1ec48 100644
--- a/drivers/net/virtio/virtqueue.h
+++ b/drivers/net/virtio/virtqueue.h
@@ -190,6 +190,10 @@ struct virtqueue {
uint16_t vq_avail_idx;
phys_addr_t virtio_net_hdr_mem; /**< hdr for each xmit packet */

+   struct rte_mbuf **sw_ring; /**< RX software ring. */
+   /* dummy mbuf, for wraparound when processing RX ring. */
+   struct rte_mbuf fake_mbuf;
+
/* Statistics */
uint64_tpackets;
uint64_tbytes;
-- 
1.8.1.4



[dpdk-dev] [PATCH v4 1/7] virtio: add virtio_rxtx.h header file

2015-10-22 Thread Huawei Xie
Would move all rx/tx related declarations into this header file in future.
Add RTE_VIRTIO_PMD_MAX_BURST.

Signed-off-by: Huawei Xie 
---
 drivers/net/virtio/virtio_ethdev.c |  1 +
 drivers/net/virtio/virtio_rxtx.c   |  1 +
 drivers/net/virtio/virtio_rxtx.h   | 34 ++
 3 files changed, 36 insertions(+)
 create mode 100644 drivers/net/virtio/virtio_rxtx.h

diff --git a/drivers/net/virtio/virtio_ethdev.c 
b/drivers/net/virtio/virtio_ethdev.c
index 465d3cd..79a3640 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -61,6 +61,7 @@
 #include "virtio_pci.h"
 #include "virtio_logs.h"
 #include "virtqueue.h"
+#include "virtio_rxtx.h"


 static int eth_virtio_dev_init(struct rte_eth_dev *eth_dev);
diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
index c5b53bb..9324f7f 100644
--- a/drivers/net/virtio/virtio_rxtx.c
+++ b/drivers/net/virtio/virtio_rxtx.c
@@ -54,6 +54,7 @@
 #include "virtio_logs.h"
 #include "virtio_ethdev.h"
 #include "virtqueue.h"
+#include "virtio_rxtx.h"

 #ifdef RTE_LIBRTE_VIRTIO_DEBUG_DUMP
 #define VIRTIO_DUMP_PACKET(m, len) rte_pktmbuf_dump(stdout, m, len)
diff --git a/drivers/net/virtio/virtio_rxtx.h b/drivers/net/virtio/virtio_rxtx.h
new file mode 100644
index 000..a10aa69
--- /dev/null
+++ b/drivers/net/virtio/virtio_rxtx.h
@@ -0,0 +1,34 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the
+ *   distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ *   contributors may be used to endorse or promote products derived
+ *   from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#define RTE_PMD_VIRTIO_RX_MAX_BURST 64
-- 
1.8.1.4



[dpdk-dev] [PATCH v4 0/7] virtio ring layout optimization and simple rx/tx processing

2015-10-22 Thread Huawei Xie
Changes in v2:
- Remove the configure macro
- Enable simple R/TX processing when user specifies simple txq flags
- Reword some comments and commit messages

Changes in v3:
- Remove unnecessary NULL test for rte_free
- Remove unnecessary assign of local var after free
- Remove return at the end of void function
- Remove always_inline attribute for virtio_xmit_cleanup
- Reword some commit messages
- Add TODO in the commit message of simple tx patch

Changes in v4:
- Fix the error in virtio tx ring layout ascii chart in the commit message
- move virtio_xmit_cleanup ahead to free descriptors earlier
- Test merge-able feature when select simple rx/tx functions

In DPDK based switching enviroment, mostly vhost runs on a dedicated core
while virtio processing in guest VMs runs on other different cores.
Take RX for example, with generic implementation, for each guest buffer,
a) virtio driver allocates a descriptor from free descriptor list
b) modify the entry of avail ring to point to allocated descriptor
c) after packet is received, free the descriptor

When vhost fetches the avail ring, it need to fetch the modified L1 cache from
virtio core, which is a heavy cost in current CPU implementation.

This idea of this optimization is:
allocate the fixed descriptor for each entry of avail ring, so avail ring 
will
always be the same during the run.
This removes L1M cache transfer from virtio core to vhost core for avail ring.
(Note we couldn't avoid the cache transfer for descriptors).
Besides, descriptor allocation and free operation is eliminated.
This also makes vector procesing possible to further accelerate the processing.

This is the layout for the avail ring(take 256 ring entries for example), with
each entry pointing to the descriptor with the same index.
avail
idx
+
|
+++---+-+--+
| 0  | 1  | 2 | ... |  254  | 255  |  avail ring
+-+--+-+--+-+-+-+---+--+---+
  |||   |   |  |
  |||   |   |  |
  vvv   |   v  v
+-+--+-+--+-+-+-+---+--+---+
| 0  | 1  | 2 | ... |  254  | 255  |  desc ring
+++---+-+--+
|
|
+++---+-+--+
| 0  | 1  | 2 | |  254  | 255  |  used ring
+++---+-+--+
|
+

This is the ring layout for TX.
As we need one virtio header for each xmit packet, we have 128 slots available.

 ++
 ||
 ||
+-+-+-+--+--+--+--+
|  0  |  1  | ... |  127 || 128  | 129  | ...  | 255  |   avail ring
+--+--+--+--+-+---+--+---+--+---+--+--+---+
   | ||  ||  |  | |
   v vv  ||  v  v v
+--+--+--+--+-+---+--+---+--+---+--+--+---+
| 127 | 128 | ... |  255 || 127  | 128  | ...  | 255  |   desc ring for 
virtio_net_hdr
+--+--+--+--+-+---+--+---+--+---+--+--+---+
   | ||  ||  |  | |
   v vv  ||  v  v v
+--+--+--+--+-+---+--+---+--+---+--+--+---+
|  0  |  1  | ... |  127 ||  0   |  1   | ...  | 127  |   desc ring for tx dat
+-+-+-+--+--+--+--+
 ||
 ||
 ++


Performance boost could be observed only if the virtio backend isn't the 
bottleneck or in VM2VM
case.
There are also several vhost optimization patches to be submitted later.

Huawei Xie (7):
  virtio: add virtio_rxtx.h header file
  virtio: add software rx ring, fake_buf into virtqueue
  virtio: rx/tx ring layout optimization
  virtio: fill RX avail ring with blank mbufs
  virtio: virtio vec rx
  virtio: simple tx routine
  virtio: choose simple rx/tx func

 drivers/net/virtio/Makefile |   2 +-
 drivers/net/virtio/virtio_ethdev.c  |  12 +-
 drivers/net/virtio/virtio_ethdev.h  |   5 +
 drivers/net/virtio/virtio_rxtx.c|  56 -
 drivers/net/virtio/virtio_rxtx.h|  39 
 drivers/net/virtio/virtio_rxtx_simple.c | 401 
 drivers/net/virtio/virtqueue.h  |   5 +
 7 files changed, 516 insertions(+), 4 deletions(-)
 create mode 100644 drivers/net/virtio/virtio_rxtx.h
 create mode 100644 drivers/net/virtio/virtio_rxtx_simple.c

-- 
1.8.1.4



[dpdk-dev] [PATCH v3 7/7] virtio: pick simple rx/tx func

2015-10-21 Thread Huawei Xie
simple rx/tx func is enabled when user specifies single segment and no offload 
support.
merge-able should be disabled to use simple rxtx.

Signed-off-by: Huawei Xie 
---
 drivers/net/virtio/virtio_rxtx.c | 12 
 1 file changed, 12 insertions(+)

diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
index 947fc46..71f8cd4 100644
--- a/drivers/net/virtio/virtio_rxtx.c
+++ b/drivers/net/virtio/virtio_rxtx.c
@@ -62,6 +62,10 @@
 #define  VIRTIO_DUMP_PACKET(m, len) do { } while (0)
 #endif

+
+#define VIRTIO_SIMPLE_FLAGS ((uint32_t)ETH_TXQ_FLAGS_NOMULTSEGS | \
+   ETH_TXQ_FLAGS_NOOFFLOADS)
+
 static int use_simple_rxtx;

 static void
@@ -471,6 +475,14 @@ virtio_dev_tx_queue_setup(struct rte_eth_dev *dev,
return -EINVAL;
}

+   /* Use simple rx/tx func if single segment and no offloads */
+   if ((tx_conf->txq_flags & VIRTIO_SIMPLE_FLAGS) == VIRTIO_SIMPLE_FLAGS) {
+   PMD_INIT_LOG(INFO, "Using simple rx/tx path");
+   dev->tx_pkt_burst = virtio_xmit_pkts_simple;
+   dev->rx_pkt_burst = virtio_recv_pkts_vec;
+   use_simple_rxtx = 1;
+   }
+
ret = virtio_dev_queue_setup(dev, VTNET_TQ, queue_idx, vtpci_queue_idx,
nb_desc, socket_id, );
if (ret < 0) {
-- 
1.8.1.4



[dpdk-dev] [PATCH v3 6/7] virtio: simple tx routine

2015-10-21 Thread Huawei Xie
Changes in v3:
- Remove return at the end of void function
- Remove always_inline attribute for virtio_xmit_cleanup

bulk free of mbufs when clean used ring.
shift operation of idx could be saved if vq_free_cnt means
free slots rather than free descriptors.

TODO: rearrange vq data structure, pack the stats var together so that we could 
use
one vec instruction to update all of them.

Signed-off-by: Huawei Xie 
---
 drivers/net/virtio/virtio_ethdev.h  |  3 ++
 drivers/net/virtio/virtio_rxtx_simple.c | 93 +
 2 files changed, 96 insertions(+)

diff --git a/drivers/net/virtio/virtio_ethdev.h 
b/drivers/net/virtio/virtio_ethdev.h
index d7797ab..ae2d47d 100644
--- a/drivers/net/virtio/virtio_ethdev.h
+++ b/drivers/net/virtio/virtio_ethdev.h
@@ -111,6 +111,9 @@ uint16_t virtio_xmit_pkts(void *tx_queue, struct rte_mbuf 
**tx_pkts,
 uint16_t virtio_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
uint16_t nb_pkts);

+uint16_t virtio_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
+   uint16_t nb_pkts);
+
 /*
  * The VIRTIO_NET_F_GUEST_TSO[46] features permit the host to send us
  * frames larger than 1514 bytes. We do not yet support software LRO
diff --git a/drivers/net/virtio/virtio_rxtx_simple.c 
b/drivers/net/virtio/virtio_rxtx_simple.c
index ef17562..a53d462 100644
--- a/drivers/net/virtio/virtio_rxtx_simple.c
+++ b/drivers/net/virtio/virtio_rxtx_simple.c
@@ -288,6 +288,99 @@ virtio_recv_pkts_vec(void *rx_queue, struct rte_mbuf 
**rx_pkts,
return nb_pkts_received;
 }

+#define VIRTIO_TX_FREE_THRESH 32
+#define VIRTIO_TX_MAX_FREE_BUF_SZ 32
+#define VIRTIO_TX_FREE_NR 32
+/* TODO: vq->tx_free_cnt could mean num of free slots so we could avoid shift 
*/
+static inline void
+virtio_xmit_cleanup(struct virtqueue *vq)
+{
+   uint16_t i, desc_idx;
+   int nb_free = 0;
+   struct rte_mbuf *m, *free[VIRTIO_TX_MAX_FREE_BUF_SZ];
+
+   desc_idx = (uint16_t)(vq->vq_used_cons_idx &
+   ((vq->vq_nentries >> 1) - 1));
+   free[0] = (struct rte_mbuf *)vq->vq_descx[desc_idx++].cookie;
+   nb_free = 1;
+
+   for (i = 1; i < VIRTIO_TX_FREE_NR; i++) {
+   m = (struct rte_mbuf *)vq->vq_descx[desc_idx++].cookie;
+   if (likely(m->pool == free[0]->pool))
+   free[nb_free++] = m;
+   else {
+   rte_mempool_put_bulk(free[0]->pool, (void **)free,
+   nb_free);
+   free[0] = m;
+   nb_free = 1;
+   }
+   }
+
+   rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free);
+   vq->vq_used_cons_idx += VIRTIO_TX_FREE_NR;
+   vq->vq_free_cnt += (VIRTIO_TX_FREE_NR << 1);
+}
+
+uint16_t
+virtio_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
+   uint16_t nb_pkts)
+{
+   struct virtqueue *txvq = tx_queue;
+   uint16_t nb_used;
+   uint16_t desc_idx;
+   struct vring_desc *start_dp;
+   uint16_t nb_tail, nb_commit;
+   int i;
+   uint16_t desc_idx_max = (txvq->vq_nentries >> 1) - 1;
+
+   nb_used = VIRTQUEUE_NUSED(txvq);
+   rte_compiler_barrier();
+
+   nb_commit = nb_pkts = RTE_MIN((txvq->vq_free_cnt >> 1), nb_pkts);
+   desc_idx = (uint16_t) (txvq->vq_avail_idx & desc_idx_max);
+   start_dp = txvq->vq_ring.desc;
+   nb_tail = (uint16_t) (desc_idx_max + 1 - desc_idx);
+
+   if (nb_used >= VIRTIO_TX_FREE_THRESH)
+   virtio_xmit_cleanup(tx_queue);
+
+   if (nb_commit >= nb_tail) {
+   for (i = 0; i < nb_tail; i++)
+   txvq->vq_descx[desc_idx + i].cookie = tx_pkts[i];
+   for (i = 0; i < nb_tail; i++) {
+   start_dp[desc_idx].addr =
+   RTE_MBUF_DATA_DMA_ADDR(*tx_pkts);
+   start_dp[desc_idx].len = (*tx_pkts)->pkt_len;
+   tx_pkts++;
+   desc_idx++;
+   }
+   nb_commit -= nb_tail;
+   desc_idx = 0;
+   }
+   for (i = 0; i < nb_commit; i++)
+   txvq->vq_descx[desc_idx + i].cookie = tx_pkts[i];
+   for (i = 0; i < nb_commit; i++) {
+   start_dp[desc_idx].addr = RTE_MBUF_DATA_DMA_ADDR(*tx_pkts);
+   start_dp[desc_idx].len = (*tx_pkts)->pkt_len;
+   tx_pkts++;
+   desc_idx++;
+   }
+
+   rte_compiler_barrier();
+
+   txvq->vq_free_cnt -= (uint16_t)(nb_pkts << 1);
+   txvq->vq_avail_idx += nb_pkts;
+   txvq->vq_ring.avail->idx = txvq->vq_avail_idx;
+   txvq->packets += nb_pkts;
+
+   if (likely(nb_pkts)) {
+   if (unlikely(virtqueue_kick_prepare(txvq)))
+   virtqueue_notify(txvq);
+   }
+
+   return nb_pkts;
+}
+
 int __attribute__((cold))
 virtio_rxq_vec_setup(struct virtqueue *rxq)
 {
-- 
1.8.1.4



[dpdk-dev] [PATCH v3 5/7] virtio: virtio vec rx

2015-10-21 Thread Huawei Xie
With fixed avail ring, we don't need to get desc idx from avail ring.
virtio driver only has to deal with desc ring.
This patch uses vector instruction to accelerate processing desc ring.

Signed-off-by: Huawei Xie 
---
 drivers/net/virtio/virtio_ethdev.h  |   2 +
 drivers/net/virtio/virtio_rxtx.c|   3 +
 drivers/net/virtio/virtio_rxtx.h|   2 +
 drivers/net/virtio/virtio_rxtx_simple.c | 224 
 drivers/net/virtio/virtqueue.h  |   1 +
 5 files changed, 232 insertions(+)

diff --git a/drivers/net/virtio/virtio_ethdev.h 
b/drivers/net/virtio/virtio_ethdev.h
index 9026d42..d7797ab 100644
--- a/drivers/net/virtio/virtio_ethdev.h
+++ b/drivers/net/virtio/virtio_ethdev.h
@@ -108,6 +108,8 @@ uint16_t virtio_recv_mergeable_pkts(void *rx_queue, struct 
rte_mbuf **rx_pkts,
 uint16_t virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts);

+uint16_t virtio_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
+   uint16_t nb_pkts);

 /*
  * The VIRTIO_NET_F_GUEST_TSO[46] features permit the host to send us
diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
index 5162ce6..947fc46 100644
--- a/drivers/net/virtio/virtio_rxtx.c
+++ b/drivers/net/virtio/virtio_rxtx.c
@@ -432,6 +432,9 @@ virtio_dev_rx_queue_setup(struct rte_eth_dev *dev,
vq->mpool = mp;

dev->data->rx_queues[queue_idx] = vq;
+
+   virtio_rxq_vec_setup(vq);
+
return 0;
 }

diff --git a/drivers/net/virtio/virtio_rxtx.h b/drivers/net/virtio/virtio_rxtx.h
index 7d2d8fe..831e492 100644
--- a/drivers/net/virtio/virtio_rxtx.h
+++ b/drivers/net/virtio/virtio_rxtx.h
@@ -33,5 +33,7 @@

 #define RTE_PMD_VIRTIO_RX_MAX_BURST 64

+int virtio_rxq_vec_setup(struct virtqueue *rxq);
+
 int virtqueue_enqueue_recv_refill_simple(struct virtqueue *vq,
struct rte_mbuf *m);
diff --git a/drivers/net/virtio/virtio_rxtx_simple.c 
b/drivers/net/virtio/virtio_rxtx_simple.c
index cac5b9f..ef17562 100644
--- a/drivers/net/virtio/virtio_rxtx_simple.c
+++ b/drivers/net/virtio/virtio_rxtx_simple.c
@@ -58,6 +58,10 @@
 #include "virtqueue.h"
 #include "virtio_rxtx.h"

+#define RTE_VIRTIO_VPMD_RX_BURST 32
+#define RTE_VIRTIO_DESC_PER_LOOP 8
+#define RTE_VIRTIO_VPMD_RX_REARM_THRESH RTE_VIRTIO_VPMD_RX_BURST
+
 int __attribute__((cold))
 virtqueue_enqueue_recv_refill_simple(struct virtqueue *vq,
struct rte_mbuf *cookie)
@@ -82,3 +86,223 @@ virtqueue_enqueue_recv_refill_simple(struct virtqueue *vq,

return 0;
 }
+
+static inline void
+virtio_rxq_rearm_vec(struct virtqueue *rxvq)
+{
+   int i;
+   uint16_t desc_idx;
+   struct rte_mbuf **sw_ring;
+   struct vring_desc *start_dp;
+   int ret;
+
+   desc_idx = rxvq->vq_avail_idx & (rxvq->vq_nentries - 1);
+   sw_ring = >sw_ring[desc_idx];
+   start_dp = >vq_ring.desc[desc_idx];
+
+   ret = rte_mempool_get_bulk(rxvq->mpool, (void **)sw_ring,
+   RTE_VIRTIO_VPMD_RX_REARM_THRESH);
+   if (unlikely(ret)) {
+   rte_eth_devices[rxvq->port_id].data->rx_mbuf_alloc_failed +=
+   RTE_VIRTIO_VPMD_RX_REARM_THRESH;
+   return;
+   }
+
+   for (i = 0; i < RTE_VIRTIO_VPMD_RX_REARM_THRESH; i++) {
+   uintptr_t p;
+
+   p = (uintptr_t)_ring[i]->rearm_data;
+   *(uint64_t *)p = rxvq->mbuf_initializer;
+
+   start_dp[i].addr =
+   (uint64_t)((uintptr_t)sw_ring[i]->buf_physaddr +
+   RTE_PKTMBUF_HEADROOM - sizeof(struct virtio_net_hdr));
+   start_dp[i].len = sw_ring[i]->buf_len -
+   RTE_PKTMBUF_HEADROOM + sizeof(struct virtio_net_hdr);
+   }
+
+   rxvq->vq_avail_idx += RTE_VIRTIO_VPMD_RX_REARM_THRESH;
+   rxvq->vq_free_cnt -= RTE_VIRTIO_VPMD_RX_REARM_THRESH;
+   vq_update_avail_idx(rxvq);
+}
+
+/* virtio vPMD receive routine, only accept(nb_pkts >= 
RTE_VIRTIO_DESC_PER_LOOP)
+ *
+ * This routine is for non-mergable RX, one desc for each guest buffer.
+ * This routine is based on the RX ring layout optimization. Each entry in the
+ * avail ring points to the desc with the same index in the desc ring and this
+ * will never be changed in the driver.
+ *
+ * - nb_pkts < RTE_VIRTIO_DESC_PER_LOOP, just return no packet
+ */
+uint16_t
+virtio_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
+   uint16_t nb_pkts)
+{
+   struct virtqueue *rxvq = rx_queue;
+   uint16_t nb_used;
+   uint16_t desc_idx;
+   struct vring_used_elem *rused;
+   struct rte_mbuf **sw_ring;
+   struct rte_mbuf **sw_ring_end;
+   uint16_t nb_pkts_received;
+   __m128i shuf_msk1, shuf_msk2, len_adjust;
+
+   shuf_msk1 = _mm_set_epi8(
+   0xFF, 0xFF, 0xFF, 0xFF,
+   0xFF, 0xFF, /* vlan tci */
+   5, 4,  

[dpdk-dev] [PATCH v3 4/7] virtio: fill RX avail ring with blank mbufs

2015-10-21 Thread Huawei Xie
fill avail ring with blank mbufs in virtio_dev_vring_start

Signed-off-by: Huawei Xie 
---
 drivers/net/virtio/Makefile |  2 +-
 drivers/net/virtio/virtio_rxtx.c|  6 ++-
 drivers/net/virtio/virtio_rxtx.h|  3 ++
 drivers/net/virtio/virtio_rxtx_simple.c | 84 +
 4 files changed, 92 insertions(+), 3 deletions(-)
 create mode 100644 drivers/net/virtio/virtio_rxtx_simple.c

diff --git a/drivers/net/virtio/Makefile b/drivers/net/virtio/Makefile
index 930b60f..43835ba 100644
--- a/drivers/net/virtio/Makefile
+++ b/drivers/net/virtio/Makefile
@@ -50,7 +50,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtqueue.c
 SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_pci.c
 SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_rxtx.c
 SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_ethdev.c
-
+SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_rxtx_simple.c

 # this lib depends upon:
 DEPDIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += lib/librte_eal lib/librte_ether
diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
index 7c82a6a..5162ce6 100644
--- a/drivers/net/virtio/virtio_rxtx.c
+++ b/drivers/net/virtio/virtio_rxtx.c
@@ -320,8 +320,10 @@ virtio_dev_vring_start(struct virtqueue *vq, int 
queue_type)
/**
* Enqueue allocated buffers*
***/
-   error = virtqueue_enqueue_recv_refill(vq, m);
-
+   if (use_simple_rxtx)
+   error = 
virtqueue_enqueue_recv_refill_simple(vq, m);
+   else
+   error = virtqueue_enqueue_recv_refill(vq, m);
if (error) {
rte_pktmbuf_free(m);
break;
diff --git a/drivers/net/virtio/virtio_rxtx.h b/drivers/net/virtio/virtio_rxtx.h
index a10aa69..7d2d8fe 100644
--- a/drivers/net/virtio/virtio_rxtx.h
+++ b/drivers/net/virtio/virtio_rxtx.h
@@ -32,3 +32,6 @@
  */

 #define RTE_PMD_VIRTIO_RX_MAX_BURST 64
+
+int virtqueue_enqueue_recv_refill_simple(struct virtqueue *vq,
+   struct rte_mbuf *m);
diff --git a/drivers/net/virtio/virtio_rxtx_simple.c 
b/drivers/net/virtio/virtio_rxtx_simple.c
new file mode 100644
index 000..cac5b9f
--- /dev/null
+++ b/drivers/net/virtio/virtio_rxtx_simple.c
@@ -0,0 +1,84 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the
+ *   distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ *   contributors may be used to endorse or promote products derived
+ *   from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "virtio_logs.h"
+#include "virtio_ethdev.h"
+#include "virtqueue.h"
+#include "virtio_rxtx.h"
+
+int __attribute__((cold))
+virtqueue_enqueue_recv_refill_simple(struct virtqueue *vq,
+   struct rte_mbuf *cookie)
+{
+   struct vq_desc_extra *dxp;
+   struct vring_desc *start_dp;
+   uint16_t desc_idx;
+
+   desc_idx = vq->vq_avail_idx & (vq->vq_nentries - 1);
+   dxp = >vq_descx[desc_idx];
+   dxp->cookie = (void *)cookie;
+   vq->sw_ring[desc_idx] = coo

[dpdk-dev] [PATCH v3 3/7] virtio: rx/tx ring layout optimization

2015-10-21 Thread Huawei Xie
In DPDK based switching enviroment, mostly vhost runs on a dedicated core
while virtio processing in guest VMs runs on other different cores.
Take RX for example, with generic implementation, for each guest buffer,
a) virtio driver allocates a descriptor from free descriptor list
b) modify the entry of avail ring to point to allocated descriptor
c) after packet is received, free the descriptor

When vhost fetches the avail ring, it need to fetch the modified L1 cache from
virtio core, which is a heavy cost in current CPU implementation.

This idea of this optimization is:
allocate the fixed descriptor for each entry of avail ring, so avail ring 
will
always be the same during the run.
This removes L1M cache transfer from virtio core to vhost core for avail ring.
(Note we couldn't avoid the cache transfer for descriptors).
Besides, descriptor allocation and free operation is eliminated.
This also makes vector procesing possible to further accelerate the processing.

This is the layout for the avail ring(take 256 ring entries for example), with
each entry pointing to the descriptor with the same index.
avail
idx
+
|
+++---+-+--+
| 0  | 1  | 2 | ... |  254  | 255  |  avail ring
+-+--+-+--+-+-+-+---+--+---+
  |||   |   |  |
  |||   |   |  |
  vvv   |   v  v
+-+--+-+--+-+-+-+---+--+---+
| 0  | 1  | 2 | ... |  254  | 255  |  desc ring
+++---+-+--+
|
|
+++---+-+--+
| 0  | 1  | 2 | |  254  | 255  |  used ring
+++---+-+--+
|
+

This is the ring layout for TX.
As we need one virtio header for each xmit packet, we have 128 slots available.

 ++
 ||
 ||
+-+-+-+--+--+--+--+
|  0  |  1  | ... |  127 || 128  | 129  | ...  | 255  |   avail ring
+--+--+--+--+-+---+--+---+--+---+--+--+---+
   | ||  ||  |  | |
   v vv  ||  v  v v
+--+--+--+--+-+---+--+---+--+---+--+--+---+
| 127 | 128 | ... |  255 || 127  | 128  | ...  | 255  |   desc ring for 
virtio_net_hdr
+--+--+--+--+-+---+--+---+--+---+--+--+---+
   | ||  ||  |  | |
   v vv  ||  v  v v
+--+--+--+--+-+---+--+---+--+---+--+--+---+
|  0  |  1  | ... |  127 ||  0   |  1   | ...  | 127  |   desc ring for tx dat
+-+-+-+--+--+--+--+
 ||
 ||
 ++

Signed-off-by: Huawei Xie 
---
 drivers/net/virtio/virtio_rxtx.c | 24 
 1 file changed, 24 insertions(+)

diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
index 5c00e9d..7c82a6a 100644
--- a/drivers/net/virtio/virtio_rxtx.c
+++ b/drivers/net/virtio/virtio_rxtx.c
@@ -302,6 +302,12 @@ virtio_dev_vring_start(struct virtqueue *vq, int 
queue_type)
nbufs = 0;
error = ENOSPC;

+   if (use_simple_rxtx)
+   for (i = 0; i < vq->vq_nentries; i++) {
+   vq->vq_ring.avail->ring[i] = i;
+   vq->vq_ring.desc[i].flags = VRING_DESC_F_WRITE;
+   }
+
memset(>fake_mbuf, 0, sizeof(vq->fake_mbuf));
for (i = 0; i < RTE_PMD_VIRTIO_RX_MAX_BURST; i++)
vq->sw_ring[vq->vq_nentries + i] = >fake_mbuf;
@@ -332,6 +338,24 @@ virtio_dev_vring_start(struct virtqueue *vq, int 
queue_type)
VIRTIO_WRITE_REG_4(vq->hw, VIRTIO_PCI_QUEUE_PFN,
vq->mz->phys_addr >> VIRTIO_PCI_QUEUE_ADDR_SHIFT);
} else if (queue_type == VTNET_TQ) {
+   if (use_simple_rxtx) {
+   int mid_idx  = vq->vq_nentries >> 1;
+   for (i = 0; i < mid_idx; i++) {
+   vq->vq_ring.avail->ring[i] = i + mid_idx;
+   vq->vq_ring.desc[i + mid_idx].next = i;
+   vq->vq_ring.desc[i + mid_idx].addr =
+   vq->virtio_net_hdr_mem +
+   mid_idx * 
vq->hw->vtnet_hdr_size;
+   vq->vq_ring.desc[i + mid_idx].len =
+   vq->hw->vtnet_hdr_size;
+   vq->vq_ring.desc[i + mid_idx].flags =
+   VRING_DESC_F_NEXT;
+   vq->vq_ring.desc[i].flags

[dpdk-dev] [PATCH v3 2/7] virtio: add software rx ring, fake_buf into virtqueue

2015-10-21 Thread Huawei Xie
Changes in v3:
- Remove unnecessary NULL test for rte_free
- Remove unnecessary assign of local var vq after free

Add software RX ring in virtqueue.
Add fake_mbuf in virtqueue for wraparound processing.
Use global simple_rxtx to indicate whether simple rxtx is enabled

Signed-off-by: Huawei Xie 
---
 drivers/net/virtio/virtio_ethdev.c | 11 ++-
 drivers/net/virtio/virtio_rxtx.c   |  7 +++
 drivers/net/virtio/virtqueue.h |  4 
 3 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/drivers/net/virtio/virtio_ethdev.c 
b/drivers/net/virtio/virtio_ethdev.c
index 79a3640..82676d3 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -247,8 +247,8 @@ virtio_dev_queue_release(struct virtqueue *vq) {
VIRTIO_WRITE_REG_2(hw, VIRTIO_PCI_QUEUE_SEL, vq->queue_id);
VIRTIO_WRITE_REG_4(hw, VIRTIO_PCI_QUEUE_PFN, 0);

+   rte_free(vq->sw_ring);
rte_free(vq);
-   vq = NULL;
}
 }

@@ -292,6 +292,9 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev,
dev->data->port_id, queue_idx);
vq = rte_zmalloc(vq_name, sizeof(struct virtqueue) +
vq_size * sizeof(struct vq_desc_extra), 
RTE_CACHE_LINE_SIZE);
+   vq->sw_ring = rte_zmalloc_socket("rxq->sw_ring",
+   (RTE_PMD_VIRTIO_RX_MAX_BURST + vq_size) *
+   sizeof(vq->sw_ring[0]), RTE_CACHE_LINE_SIZE, socket_id);
} else if (queue_type == VTNET_TQ) {
snprintf(vq_name, sizeof(vq_name), "port%d_tvq%d",
dev->data->port_id, queue_idx);
@@ -308,6 +311,12 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev,
PMD_INIT_LOG(ERR, "%s: Can not allocate virtqueue", __func__);
return (-ENOMEM);
}
+   if (queue_type == VTNET_RQ && vq->sw_ring == NULL) {
+   PMD_INIT_LOG(ERR, "%s: Can not allocate RX soft ring",
+   __func__);
+   rte_free(vq);
+   return -ENOMEM;
+   }

vq->hw = hw;
vq->port_id = dev->data->port_id;
diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
index 9324f7f..5c00e9d 100644
--- a/drivers/net/virtio/virtio_rxtx.c
+++ b/drivers/net/virtio/virtio_rxtx.c
@@ -62,6 +62,8 @@
 #define  VIRTIO_DUMP_PACKET(m, len) do { } while (0)
 #endif

+static int use_simple_rxtx;
+
 static void
 vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx)
 {
@@ -299,6 +301,11 @@ virtio_dev_vring_start(struct virtqueue *vq, int 
queue_type)
/* Allocate blank mbufs for the each rx descriptor */
nbufs = 0;
error = ENOSPC;
+
+   memset(>fake_mbuf, 0, sizeof(vq->fake_mbuf));
+   for (i = 0; i < RTE_PMD_VIRTIO_RX_MAX_BURST; i++)
+   vq->sw_ring[vq->vq_nentries + i] = >fake_mbuf;
+
while (!virtqueue_full(vq)) {
m = rte_rxmbuf_alloc(vq->mpool);
if (m == NULL)
diff --git a/drivers/net/virtio/virtqueue.h b/drivers/net/virtio/virtqueue.h
index 7789411..6a1ec48 100644
--- a/drivers/net/virtio/virtqueue.h
+++ b/drivers/net/virtio/virtqueue.h
@@ -190,6 +190,10 @@ struct virtqueue {
uint16_t vq_avail_idx;
phys_addr_t virtio_net_hdr_mem; /**< hdr for each xmit packet */

+   struct rte_mbuf **sw_ring; /**< RX software ring. */
+   /* dummy mbuf, for wraparound when processing RX ring. */
+   struct rte_mbuf fake_mbuf;
+
/* Statistics */
uint64_tpackets;
uint64_tbytes;
-- 
1.8.1.4



[dpdk-dev] [PATCH v3 1/7] virtio: add virtio_rxtx.h header file

2015-10-21 Thread Huawei Xie
Would move all rx/tx related declarations into this header file in future.
Add RTE_VIRTIO_PMD_MAX_BURST.

Signed-off-by: Huawei Xie 
---
 drivers/net/virtio/virtio_ethdev.c |  1 +
 drivers/net/virtio/virtio_rxtx.c   |  1 +
 drivers/net/virtio/virtio_rxtx.h   | 34 ++
 3 files changed, 36 insertions(+)
 create mode 100644 drivers/net/virtio/virtio_rxtx.h

diff --git a/drivers/net/virtio/virtio_ethdev.c 
b/drivers/net/virtio/virtio_ethdev.c
index 465d3cd..79a3640 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -61,6 +61,7 @@
 #include "virtio_pci.h"
 #include "virtio_logs.h"
 #include "virtqueue.h"
+#include "virtio_rxtx.h"


 static int eth_virtio_dev_init(struct rte_eth_dev *eth_dev);
diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
index c5b53bb..9324f7f 100644
--- a/drivers/net/virtio/virtio_rxtx.c
+++ b/drivers/net/virtio/virtio_rxtx.c
@@ -54,6 +54,7 @@
 #include "virtio_logs.h"
 #include "virtio_ethdev.h"
 #include "virtqueue.h"
+#include "virtio_rxtx.h"

 #ifdef RTE_LIBRTE_VIRTIO_DEBUG_DUMP
 #define VIRTIO_DUMP_PACKET(m, len) rte_pktmbuf_dump(stdout, m, len)
diff --git a/drivers/net/virtio/virtio_rxtx.h b/drivers/net/virtio/virtio_rxtx.h
new file mode 100644
index 000..a10aa69
--- /dev/null
+++ b/drivers/net/virtio/virtio_rxtx.h
@@ -0,0 +1,34 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the
+ *   distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ *   contributors may be used to endorse or promote products derived
+ *   from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#define RTE_PMD_VIRTIO_RX_MAX_BURST 64
-- 
1.8.1.4



[dpdk-dev] [PATCH v3 0/7] virtio ring layout optimization and simple rx/tx processing

2015-10-21 Thread Huawei Xie
Changes in v2:
- Remove the configure macro
- Enable simple R/TX processing when user specifies simple txq flags
- Reword some comments and commit messages

Changes in v3:
- Remove unnecessary NULL test for rte_free
- Remove unnecessary assign of local var after free
- Remove return at the end of void function
- Remove always_inline attribute for virtio_xmit_cleanup
- Reword some commit messages
- Add TODO in the commit message of simple tx patch

In DPDK based switching enviroment, mostly vhost runs on a dedicated core
while virtio processing in guest VMs runs on other different cores.
Take RX for example, with generic implementation, for each guest buffer,
a) virtio driver allocates a descriptor from free descriptor list
b) modify the entry of avail ring to point to allocated descriptor
c) after packet is received, free the descriptor

When vhost fetches the avail ring, it need to fetch the modified L1 cache from
virtio core, which is a heavy cost in current CPU implementation.

This idea of this optimization is:
allocate the fixed descriptor for each entry of avail ring, so avail ring 
will
always be the same during the run.
This removes L1M cache transfer from virtio core to vhost core for avail ring.
(Note we couldn't avoid the cache transfer for descriptors).
Besides, descriptor allocation and free operation is eliminated.
This also makes vector procesing possible to further accelerate the processing.

This is the layout for the avail ring(take 256 ring entries for example), with
each entry pointing to the descriptor with the same index.
avail
idx
+
|
+++---+-+--+
| 0  | 1  | 2 | ... |  254  | 255  |  avail ring
+-+--+-+--+-+-+-+---+--+---+
  |||   |   |  |
  |||   |   |  |
  vvv   |   v  v
+-+--+-+--+-+-+-+---+--+---+
| 0  | 1  | 2 | ... |  254  | 255  |  desc ring
+++---+-+--+
|
|
+++---+-+--+
| 0  | 1  | 2 | |  254  | 255  |  used ring
+++---+-+--+
|
+

This is the ring layout for TX.
As we need one virtio header for each xmit packet, we have 128 slots available.

 ++
 ||
 ||
+-+-+-+--+--+--+--+
|  0  |  1  | ... |  127 || 128  | 129  | ...  | 255  |   avail ring
+--+--+--+--+-+---+--+---+--+---+--+--+---+
   | ||  ||  |  | |
   v vv  ||  v  v v
+--+--+--+--+-+---+--+---+--+---+--+--+---+
| 127 | 128 | ... |  255 || 127  | 128  | ...  | 255  |   desc ring for 
virtio_net_hdr
+--+--+--+--+-+---+--+---+--+---+--+--+---+
   | ||  ||  |  | |
   v vv  ||  v  v v
+--+--+--+--+-+---+--+---+--+---+--+--+---+
|  0  |  1  | ... |  127 ||  0   |  1   | ...  | 127  |   desc ring for tx dat
+-+-+-+--+--+--+--+
 ||
 ||
 ++


Performance boost could be observed only if the virtio backend isn't the 
bottleneck or in VM2VM
case.
There are also several vhost optimization patches to be submitted later.

Huawei Xie (7):
  virtio: add virtio_rxtx.h header file
  virtio: add software rx ring, fake_buf into virtqueue
  virtio: rx/tx ring layout optimization
  virtio: fill RX avail ring with blank mbufs
  virtio: virtio vec rx
  virtio: simple tx routine
  virtio: choose simple rx/tx func

 drivers/net/virtio/Makefile |   2 +-
 drivers/net/virtio/virtio_ethdev.c  |  12 +-
 drivers/net/virtio/virtio_ethdev.h  |   5 +
 drivers/net/virtio/virtio_rxtx.c|  53 -
 drivers/net/virtio/virtio_rxtx.h|  39 
 drivers/net/virtio/virtio_rxtx_simple.c | 401 
 drivers/net/virtio/virtqueue.h  |   5 +
 7 files changed, 513 insertions(+), 4 deletions(-)
 create mode 100644 drivers/net/virtio/virtio_rxtx.h
 create mode 100644 drivers/net/virtio/virtio_rxtx_simple.c

-- 
1.8.1.4



[dpdk-dev] [PATCH v2 7/7] virtio: pick simple rx/tx func

2015-10-18 Thread Huawei Xie
simple rx/tx func is enabled when user specifies single segment, no offload 
support.
merge-able should be disabled to use simple rxtx.

Signed-off-by: Huawei Xie 
---
 drivers/net/virtio/virtio_rxtx.c | 12 
 1 file changed, 12 insertions(+)

diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
index 947fc46..71f8cd4 100644
--- a/drivers/net/virtio/virtio_rxtx.c
+++ b/drivers/net/virtio/virtio_rxtx.c
@@ -62,6 +62,10 @@
 #define  VIRTIO_DUMP_PACKET(m, len) do { } while (0)
 #endif

+
+#define VIRTIO_SIMPLE_FLAGS ((uint32_t)ETH_TXQ_FLAGS_NOMULTSEGS | \
+   ETH_TXQ_FLAGS_NOOFFLOADS)
+
 static int use_simple_rxtx;

 static void
@@ -471,6 +475,14 @@ virtio_dev_tx_queue_setup(struct rte_eth_dev *dev,
return -EINVAL;
}

+   /* Use simple rx/tx func if single segment and no offloads */
+   if ((tx_conf->txq_flags & VIRTIO_SIMPLE_FLAGS) == VIRTIO_SIMPLE_FLAGS) {
+   PMD_INIT_LOG(INFO, "Using simple rx/tx path");
+   dev->tx_pkt_burst = virtio_xmit_pkts_simple;
+   dev->rx_pkt_burst = virtio_recv_pkts_vec;
+   use_simple_rxtx = 1;
+   }
+
ret = virtio_dev_queue_setup(dev, VTNET_TQ, queue_idx, vtpci_queue_idx,
nb_desc, socket_id, );
if (ret < 0) {
-- 
1.8.1.4



[dpdk-dev] [PATCH v2 5/7] virtio: virtio vec rx

2015-10-18 Thread Huawei Xie
With fixed avail ring, we don't need to get desc idx from avail ring.
virtio driver only has to deal with desc ring.
This patch uses vector instruction to accelerate processing desc ring.

Signed-off-by: Huawei Xie 
---
 drivers/net/virtio/virtio_ethdev.h  |   2 +
 drivers/net/virtio/virtio_rxtx.c|   3 +
 drivers/net/virtio/virtio_rxtx.h|   2 +
 drivers/net/virtio/virtio_rxtx_simple.c | 224 
 drivers/net/virtio/virtqueue.h  |   1 +
 5 files changed, 232 insertions(+)

diff --git a/drivers/net/virtio/virtio_ethdev.h 
b/drivers/net/virtio/virtio_ethdev.h
index 9026d42..d7797ab 100644
--- a/drivers/net/virtio/virtio_ethdev.h
+++ b/drivers/net/virtio/virtio_ethdev.h
@@ -108,6 +108,8 @@ uint16_t virtio_recv_mergeable_pkts(void *rx_queue, struct 
rte_mbuf **rx_pkts,
 uint16_t virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts);

+uint16_t virtio_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
+   uint16_t nb_pkts);

 /*
  * The VIRTIO_NET_F_GUEST_TSO[46] features permit the host to send us
diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
index 5162ce6..947fc46 100644
--- a/drivers/net/virtio/virtio_rxtx.c
+++ b/drivers/net/virtio/virtio_rxtx.c
@@ -432,6 +432,9 @@ virtio_dev_rx_queue_setup(struct rte_eth_dev *dev,
vq->mpool = mp;

dev->data->rx_queues[queue_idx] = vq;
+
+   virtio_rxq_vec_setup(vq);
+
return 0;
 }

diff --git a/drivers/net/virtio/virtio_rxtx.h b/drivers/net/virtio/virtio_rxtx.h
index 7d2d8fe..831e492 100644
--- a/drivers/net/virtio/virtio_rxtx.h
+++ b/drivers/net/virtio/virtio_rxtx.h
@@ -33,5 +33,7 @@

 #define RTE_PMD_VIRTIO_RX_MAX_BURST 64

+int virtio_rxq_vec_setup(struct virtqueue *rxq);
+
 int virtqueue_enqueue_recv_refill_simple(struct virtqueue *vq,
struct rte_mbuf *m);
diff --git a/drivers/net/virtio/virtio_rxtx_simple.c 
b/drivers/net/virtio/virtio_rxtx_simple.c
index cac5b9f..ef17562 100644
--- a/drivers/net/virtio/virtio_rxtx_simple.c
+++ b/drivers/net/virtio/virtio_rxtx_simple.c
@@ -58,6 +58,10 @@
 #include "virtqueue.h"
 #include "virtio_rxtx.h"

+#define RTE_VIRTIO_VPMD_RX_BURST 32
+#define RTE_VIRTIO_DESC_PER_LOOP 8
+#define RTE_VIRTIO_VPMD_RX_REARM_THRESH RTE_VIRTIO_VPMD_RX_BURST
+
 int __attribute__((cold))
 virtqueue_enqueue_recv_refill_simple(struct virtqueue *vq,
struct rte_mbuf *cookie)
@@ -82,3 +86,223 @@ virtqueue_enqueue_recv_refill_simple(struct virtqueue *vq,

return 0;
 }
+
+static inline void
+virtio_rxq_rearm_vec(struct virtqueue *rxvq)
+{
+   int i;
+   uint16_t desc_idx;
+   struct rte_mbuf **sw_ring;
+   struct vring_desc *start_dp;
+   int ret;
+
+   desc_idx = rxvq->vq_avail_idx & (rxvq->vq_nentries - 1);
+   sw_ring = >sw_ring[desc_idx];
+   start_dp = >vq_ring.desc[desc_idx];
+
+   ret = rte_mempool_get_bulk(rxvq->mpool, (void **)sw_ring,
+   RTE_VIRTIO_VPMD_RX_REARM_THRESH);
+   if (unlikely(ret)) {
+   rte_eth_devices[rxvq->port_id].data->rx_mbuf_alloc_failed +=
+   RTE_VIRTIO_VPMD_RX_REARM_THRESH;
+   return;
+   }
+
+   for (i = 0; i < RTE_VIRTIO_VPMD_RX_REARM_THRESH; i++) {
+   uintptr_t p;
+
+   p = (uintptr_t)_ring[i]->rearm_data;
+   *(uint64_t *)p = rxvq->mbuf_initializer;
+
+   start_dp[i].addr =
+   (uint64_t)((uintptr_t)sw_ring[i]->buf_physaddr +
+   RTE_PKTMBUF_HEADROOM - sizeof(struct virtio_net_hdr));
+   start_dp[i].len = sw_ring[i]->buf_len -
+   RTE_PKTMBUF_HEADROOM + sizeof(struct virtio_net_hdr);
+   }
+
+   rxvq->vq_avail_idx += RTE_VIRTIO_VPMD_RX_REARM_THRESH;
+   rxvq->vq_free_cnt -= RTE_VIRTIO_VPMD_RX_REARM_THRESH;
+   vq_update_avail_idx(rxvq);
+}
+
+/* virtio vPMD receive routine, only accept(nb_pkts >= 
RTE_VIRTIO_DESC_PER_LOOP)
+ *
+ * This routine is for non-mergable RX, one desc for each guest buffer.
+ * This routine is based on the RX ring layout optimization. Each entry in the
+ * avail ring points to the desc with the same index in the desc ring and this
+ * will never be changed in the driver.
+ *
+ * - nb_pkts < RTE_VIRTIO_DESC_PER_LOOP, just return no packet
+ */
+uint16_t
+virtio_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
+   uint16_t nb_pkts)
+{
+   struct virtqueue *rxvq = rx_queue;
+   uint16_t nb_used;
+   uint16_t desc_idx;
+   struct vring_used_elem *rused;
+   struct rte_mbuf **sw_ring;
+   struct rte_mbuf **sw_ring_end;
+   uint16_t nb_pkts_received;
+   __m128i shuf_msk1, shuf_msk2, len_adjust;
+
+   shuf_msk1 = _mm_set_epi8(
+   0xFF, 0xFF, 0xFF, 0xFF,
+   0xFF, 0xFF, /* vlan tci */
+   5, 4,  

[dpdk-dev] [PATCH v2 4/7] virtio: fill RX avail ring with blank mbufs

2015-10-18 Thread Huawei Xie
fill avail ring with blank mbufs in virtio_dev_vring_start

Signed-off-by: Huawei Xie 
---
 drivers/net/virtio/Makefile |  2 +-
 drivers/net/virtio/virtio_rxtx.c|  6 ++-
 drivers/net/virtio/virtio_rxtx.h|  3 ++
 drivers/net/virtio/virtio_rxtx_simple.c | 84 +
 4 files changed, 92 insertions(+), 3 deletions(-)
 create mode 100644 drivers/net/virtio/virtio_rxtx_simple.c

diff --git a/drivers/net/virtio/Makefile b/drivers/net/virtio/Makefile
index 930b60f..43835ba 100644
--- a/drivers/net/virtio/Makefile
+++ b/drivers/net/virtio/Makefile
@@ -50,7 +50,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtqueue.c
 SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_pci.c
 SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_rxtx.c
 SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_ethdev.c
-
+SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_rxtx_simple.c

 # this lib depends upon:
 DEPDIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += lib/librte_eal lib/librte_ether
diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
index 7c82a6a..5162ce6 100644
--- a/drivers/net/virtio/virtio_rxtx.c
+++ b/drivers/net/virtio/virtio_rxtx.c
@@ -320,8 +320,10 @@ virtio_dev_vring_start(struct virtqueue *vq, int 
queue_type)
/**
* Enqueue allocated buffers*
***/
-   error = virtqueue_enqueue_recv_refill(vq, m);
-
+   if (use_simple_rxtx)
+   error = 
virtqueue_enqueue_recv_refill_simple(vq, m);
+   else
+   error = virtqueue_enqueue_recv_refill(vq, m);
if (error) {
rte_pktmbuf_free(m);
break;
diff --git a/drivers/net/virtio/virtio_rxtx.h b/drivers/net/virtio/virtio_rxtx.h
index a10aa69..7d2d8fe 100644
--- a/drivers/net/virtio/virtio_rxtx.h
+++ b/drivers/net/virtio/virtio_rxtx.h
@@ -32,3 +32,6 @@
  */

 #define RTE_PMD_VIRTIO_RX_MAX_BURST 64
+
+int virtqueue_enqueue_recv_refill_simple(struct virtqueue *vq,
+   struct rte_mbuf *m);
diff --git a/drivers/net/virtio/virtio_rxtx_simple.c 
b/drivers/net/virtio/virtio_rxtx_simple.c
new file mode 100644
index 000..cac5b9f
--- /dev/null
+++ b/drivers/net/virtio/virtio_rxtx_simple.c
@@ -0,0 +1,84 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the
+ *   distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ *   contributors may be used to endorse or promote products derived
+ *   from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "virtio_logs.h"
+#include "virtio_ethdev.h"
+#include "virtqueue.h"
+#include "virtio_rxtx.h"
+
+int __attribute__((cold))
+virtqueue_enqueue_recv_refill_simple(struct virtqueue *vq,
+   struct rte_mbuf *cookie)
+{
+   struct vq_desc_extra *dxp;
+   struct vring_desc *start_dp;
+   uint16_t desc_idx;
+
+   desc_idx = vq->vq_avail_idx & (vq->vq_nentries - 1);
+   dxp = >vq_descx[desc_idx];
+   dxp->cookie = (void *)cookie;
+   vq->sw_ring[desc_idx] = coo

[dpdk-dev] [PATCH v2 3/7] virtio: rx/tx ring layout optimization

2015-10-18 Thread Huawei Xie
In DPDK based switching enviroment, mostly vhost runs on a dedicated core
while virtio processing in guest VMs runs on different cores.
Take RX for example, with generic implementation, for each guest buffer,
a) virtio driver allocates a descriptor from free descriptor list
b) modify the entry of avail ring to point to allocated descriptor
c) after packet is received, free the descriptor

When vhost fetches the avail ring, it needs to fetch the modified L1 cache from
virtio core, which is a heavy cost in current CPU implementation.

This idea of this optimization is:
allocate the fixed descriptor for each entry of avail ring.
and avail ring will always be the same during the run.
This removes L1 cache transfer from virtio core to vhost core for avail ring.
Besides, no descriptor free and allocation is needed.
This also makes vector procesing possible to further accelerate the processing.

This is the layout for the avail ring(take 256 ring entries for example), with
each entry pointing to the descriptor with the same index.
avail
idx
+
|
+++---+-+--+
| 0  | 1  | 2 | ... |  254  | 255  |  avail ring
+-+--+-+--+-+-+-+---+--+---+
  |||   |   |  |
  |||   |   |  |
  vvv   |   v  v
+-+--+-+--+-+-+-+---+--+---+
| 0  | 1  | 2 | ... |  254  | 255  |  desc ring
+++---+-+--+
|
|
+++---+-+--+
| 0  | 1  | 2 | |  254  | 255  |  used ring
+++---+-+--+
|
+

This is the ring layout for TX.
As we need one virtio header for each xmit packet, we have 128 slots available.

 ++
 ||
 ||
+-+-+-+--+--+--+--+
|  0  |  1  | ... |  127 || 128  | 129  | ...  | 255  |   avail ring
+--+--+--+--+-+---+--+---+--+---+--+--+---+
   | ||  ||  |  | |
   v vv  ||  v  v v
+--+--+--+--+-+---+--+---+--+---+--+--+---+
| 127 | 128 | ... |  255 || 127  | 128  | ...  | 255  |   desc ring for 
virtio_net_hdr
+--+--+--+--+-+---+--+---+--+---+--+--+---+
   | ||  ||  |  | |
   v vv  ||  v  v v
+--+--+--+--+-+---+--+---+--+---+--+--+---+
|  0  |  1  | ... |  127 ||  0   |  1   | ...  | 127  |   desc ring for tx dat
+-+-+-+--+--+--+--+
 ||
 ||
 ++

Signed-off-by: Huawei Xie 
---
 drivers/net/virtio/virtio_rxtx.c | 24 
 1 file changed, 24 insertions(+)

diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
index 5c00e9d..7c82a6a 100644
--- a/drivers/net/virtio/virtio_rxtx.c
+++ b/drivers/net/virtio/virtio_rxtx.c
@@ -302,6 +302,12 @@ virtio_dev_vring_start(struct virtqueue *vq, int 
queue_type)
nbufs = 0;
error = ENOSPC;

+   if (use_simple_rxtx)
+   for (i = 0; i < vq->vq_nentries; i++) {
+   vq->vq_ring.avail->ring[i] = i;
+   vq->vq_ring.desc[i].flags = VRING_DESC_F_WRITE;
+   }
+
memset(>fake_mbuf, 0, sizeof(vq->fake_mbuf));
for (i = 0; i < RTE_PMD_VIRTIO_RX_MAX_BURST; i++)
vq->sw_ring[vq->vq_nentries + i] = >fake_mbuf;
@@ -332,6 +338,24 @@ virtio_dev_vring_start(struct virtqueue *vq, int 
queue_type)
VIRTIO_WRITE_REG_4(vq->hw, VIRTIO_PCI_QUEUE_PFN,
vq->mz->phys_addr >> VIRTIO_PCI_QUEUE_ADDR_SHIFT);
} else if (queue_type == VTNET_TQ) {
+   if (use_simple_rxtx) {
+   int mid_idx  = vq->vq_nentries >> 1;
+   for (i = 0; i < mid_idx; i++) {
+   vq->vq_ring.avail->ring[i] = i + mid_idx;
+   vq->vq_ring.desc[i + mid_idx].next = i;
+   vq->vq_ring.desc[i + mid_idx].addr =
+   vq->virtio_net_hdr_mem +
+   mid_idx * 
vq->hw->vtnet_hdr_size;
+   vq->vq_ring.desc[i + mid_idx].len =
+   vq->hw->vtnet_hdr_size;
+   vq->vq_ring.desc[i + mid_idx].flags =
+   VRING_DESC_F_NEXT;
+   vq->vq_ring.desc[i].flags = 0;
+   }
+   for (i = m

[dpdk-dev] [PATCH v2 2/7] virtio: add software rx ring, fake_buf into virtqueue

2015-10-18 Thread Huawei Xie
Add software RX ring in virtqueue.
Add fake_mbuf in virtqueue for wraparound processing.
Use global simple_rxtx to indicate whether simple rxtx is enabled

Signed-off-by: Huawei Xie 
---
 drivers/net/virtio/virtio_ethdev.c | 12 
 drivers/net/virtio/virtio_rxtx.c   |  7 +++
 drivers/net/virtio/virtqueue.h |  4 
 3 files changed, 23 insertions(+)

diff --git a/drivers/net/virtio/virtio_ethdev.c 
b/drivers/net/virtio/virtio_ethdev.c
index 79a3640..3b7b841 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -247,6 +247,9 @@ virtio_dev_queue_release(struct virtqueue *vq) {
VIRTIO_WRITE_REG_2(hw, VIRTIO_PCI_QUEUE_SEL, vq->queue_id);
VIRTIO_WRITE_REG_4(hw, VIRTIO_PCI_QUEUE_PFN, 0);

+   if (vq->sw_ring)
+   rte_free(vq->sw_ring);
+
rte_free(vq);
vq = NULL;
}
@@ -292,6 +295,9 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev,
dev->data->port_id, queue_idx);
vq = rte_zmalloc(vq_name, sizeof(struct virtqueue) +
vq_size * sizeof(struct vq_desc_extra), 
RTE_CACHE_LINE_SIZE);
+   vq->sw_ring = rte_zmalloc_socket("rxq->sw_ring",
+   (RTE_PMD_VIRTIO_RX_MAX_BURST + vq_size) *
+   sizeof(vq->sw_ring[0]), RTE_CACHE_LINE_SIZE, socket_id);
} else if (queue_type == VTNET_TQ) {
snprintf(vq_name, sizeof(vq_name), "port%d_tvq%d",
dev->data->port_id, queue_idx);
@@ -308,6 +314,12 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev,
PMD_INIT_LOG(ERR, "%s: Can not allocate virtqueue", __func__);
return (-ENOMEM);
}
+   if (queue_type == VTNET_RQ && vq->sw_ring == NULL) {
+   PMD_INIT_LOG(ERR, "%s: Can not allocate RX soft ring",
+   __func__);
+   rte_free(vq);
+   return -ENOMEM;
+   }

vq->hw = hw;
vq->port_id = dev->data->port_id;
diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
index 9324f7f..5c00e9d 100644
--- a/drivers/net/virtio/virtio_rxtx.c
+++ b/drivers/net/virtio/virtio_rxtx.c
@@ -62,6 +62,8 @@
 #define  VIRTIO_DUMP_PACKET(m, len) do { } while (0)
 #endif

+static int use_simple_rxtx;
+
 static void
 vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx)
 {
@@ -299,6 +301,11 @@ virtio_dev_vring_start(struct virtqueue *vq, int 
queue_type)
/* Allocate blank mbufs for the each rx descriptor */
nbufs = 0;
error = ENOSPC;
+
+   memset(>fake_mbuf, 0, sizeof(vq->fake_mbuf));
+   for (i = 0; i < RTE_PMD_VIRTIO_RX_MAX_BURST; i++)
+   vq->sw_ring[vq->vq_nentries + i] = >fake_mbuf;
+
while (!virtqueue_full(vq)) {
m = rte_rxmbuf_alloc(vq->mpool);
if (m == NULL)
diff --git a/drivers/net/virtio/virtqueue.h b/drivers/net/virtio/virtqueue.h
index 7789411..6a1ec48 100644
--- a/drivers/net/virtio/virtqueue.h
+++ b/drivers/net/virtio/virtqueue.h
@@ -190,6 +190,10 @@ struct virtqueue {
uint16_t vq_avail_idx;
phys_addr_t virtio_net_hdr_mem; /**< hdr for each xmit packet */

+   struct rte_mbuf **sw_ring; /**< RX software ring. */
+   /* dummy mbuf, for wraparound when processing RX ring. */
+   struct rte_mbuf fake_mbuf;
+
/* Statistics */
uint64_tpackets;
uint64_tbytes;
-- 
1.8.1.4



[dpdk-dev] [PATCH v2 1/7] virtio: add virtio_rxtx.h header file

2015-10-18 Thread Huawei Xie
Would move all rx/tx related code into this header file in future.
Add RTE_VIRTIO_PMD_MAX_BURST.

Signed-off-by: Huawei Xie 
---
 drivers/net/virtio/virtio_ethdev.c |  1 +
 drivers/net/virtio/virtio_rxtx.c   |  1 +
 drivers/net/virtio/virtio_rxtx.h   | 34 ++
 3 files changed, 36 insertions(+)
 create mode 100644 drivers/net/virtio/virtio_rxtx.h

diff --git a/drivers/net/virtio/virtio_ethdev.c 
b/drivers/net/virtio/virtio_ethdev.c
index 465d3cd..79a3640 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -61,6 +61,7 @@
 #include "virtio_pci.h"
 #include "virtio_logs.h"
 #include "virtqueue.h"
+#include "virtio_rxtx.h"


 static int eth_virtio_dev_init(struct rte_eth_dev *eth_dev);
diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
index c5b53bb..9324f7f 100644
--- a/drivers/net/virtio/virtio_rxtx.c
+++ b/drivers/net/virtio/virtio_rxtx.c
@@ -54,6 +54,7 @@
 #include "virtio_logs.h"
 #include "virtio_ethdev.h"
 #include "virtqueue.h"
+#include "virtio_rxtx.h"

 #ifdef RTE_LIBRTE_VIRTIO_DEBUG_DUMP
 #define VIRTIO_DUMP_PACKET(m, len) rte_pktmbuf_dump(stdout, m, len)
diff --git a/drivers/net/virtio/virtio_rxtx.h b/drivers/net/virtio/virtio_rxtx.h
new file mode 100644
index 000..a10aa69
--- /dev/null
+++ b/drivers/net/virtio/virtio_rxtx.h
@@ -0,0 +1,34 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the
+ *   distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ *   contributors may be used to endorse or promote products derived
+ *   from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#define RTE_PMD_VIRTIO_RX_MAX_BURST 64
-- 
1.8.1.4



  1   2   3   >