[dpdk-dev] [PATCH v2 00/11] qemu vhost-user support

2015-02-12 Thread Huawei Xie
vhost-user supports passing vring information to a seperate vhost enabled
user space process, normally a user space vSwitch, through unix domain socket.

In previous DPDK version, we implement a user space character device driver
vhost-cuse in user space DPDK process. vring information is passed to the
cuse driver through ioctl call, including eventfds for interrupt injection and
host notification. A kernel module is developed to copy these fds from
qemu process into our process. We also need some trick to map guest memory.
(TODO: kickfd/callfd is reversed which causes confusion)

known issue in vhost-user implementation in QEMU, reported by haifeng.lin at 
huawei.com
* QEMU doesn't send correct memory region information with multiple numa node 
configuration
http://lists.gnu.org/archive/html/qemu-devel/2014-12/msg01454.html

Thanks Tetsuya for reporting the issue that "FD_ISSET would crash when receive 
-1
as fd on Ubuntu 14.04".

Huawei Xie (11):
 enable VIRTIO_NET_F_CTRL_RX
 create vhost_cuse directory and move vhost-net-cdev.c into vhost_cuse
 rename vhost-net-cdev.h to vhost-net.h
 move fd copying(from qemu process into vhost process) to eventfd_copy.c
 copy host_memory_map from virtio-net.c to a new file virtio-net-cdev.c
 make host_memory_map a more generic function.
 implement cuse_set_memory_table in virtio-net-cdev.c
 add select based event driven processing
 vhost user support
 support dev->ifname
 support calling rte_vhost_driver_register after rte_vhost_driver_session_start

 lib/librte_vhost/Makefile |   8 +-
 lib/librte_vhost/rte_virtio_net.h |   5 +-
 lib/librte_vhost/vhost-net-cdev.c | 389 
 lib/librte_vhost/vhost-net-cdev.h | 113 --
 lib/librte_vhost/vhost-net.h  | 118 +++
 lib/librte_vhost/vhost_cuse/eventfd_copy.c|  88 +
 lib/librte_vhost/vhost_cuse/eventfd_copy.h|  39 ++
 lib/librte_vhost/vhost_cuse/vhost-net-cdev.c  | 417 ++
 lib/librte_vhost/vhost_cuse/virtio-net-cdev.c | 423 ++
 lib/librte_vhost/vhost_cuse/virtio-net-cdev.h |  48 +++
 lib/librte_vhost/vhost_rxtx.c |   2 +-
 lib/librte_vhost/vhost_user/fd_man.c  | 258 ++
 lib/librte_vhost/vhost_user/fd_man.h  |  67 
 lib/librte_vhost/vhost_user/vhost-net-user.c  | 472 +
 lib/librte_vhost/vhost_user/vhost-net-user.h  | 106 ++
 lib/librte_vhost/vhost_user/virtio-net-user.c | 314 
 lib/librte_vhost/vhost_user/virtio-net-user.h |  49 +++
 lib/librte_vhost/virtio-net.c | 491 ++
 lib/librte_vhost/virtio-net.h |  43 +++
 19 files changed, 2491 insertions(+), 959 deletions(-)
 delete mode 100644 lib/librte_vhost/vhost-net-cdev.c
 delete mode 100644 lib/librte_vhost/vhost-net-cdev.h
 create mode 100644 lib/librte_vhost/vhost-net.h
 create mode 100644 lib/librte_vhost/vhost_cuse/eventfd_copy.c
 create mode 100644 lib/librte_vhost/vhost_cuse/eventfd_copy.h
 create mode 100644 lib/librte_vhost/vhost_cuse/vhost-net-cdev.c
 create mode 100644 lib/librte_vhost/vhost_cuse/virtio-net-cdev.c
 create mode 100644 lib/librte_vhost/vhost_cuse/virtio-net-cdev.h
 create mode 100644 lib/librte_vhost/vhost_user/fd_man.c
 create mode 100644 lib/librte_vhost/vhost_user/fd_man.h
 create mode 100644 lib/librte_vhost/vhost_user/vhost-net-user.c
 create mode 100644 lib/librte_vhost/vhost_user/vhost-net-user.h
 create mode 100644 lib/librte_vhost/vhost_user/virtio-net-user.c
 create mode 100644 lib/librte_vhost/vhost_user/virtio-net-user.h
 create mode 100644 lib/librte_vhost/virtio-net.h

-- 
1.8.1.4



[dpdk-dev] [PATCH v2 03/11] lib/librte_vhost: rename vhost-net-cdev.h to vhost-net.h

2015-02-12 Thread Huawei Xie
This file defines common operations provided by virtio-net(.c).

Signed-off-by: Huawei Xie 
---
 lib/librte_vhost/vhost-net-cdev.h| 113 ---
 lib/librte_vhost/vhost-net.h | 113 +++
 lib/librte_vhost/vhost_cuse/vhost-net-cdev.c |   2 +-
 lib/librte_vhost/vhost_rxtx.c|   2 +-
 lib/librte_vhost/virtio-net.c|   2 +-
 5 files changed, 116 insertions(+), 116 deletions(-)
 delete mode 100644 lib/librte_vhost/vhost-net-cdev.h
 create mode 100644 lib/librte_vhost/vhost-net.h

diff --git a/lib/librte_vhost/vhost-net-cdev.h 
b/lib/librte_vhost/vhost-net-cdev.h
deleted file mode 100644
index 03a5c57..000
--- a/lib/librte_vhost/vhost-net-cdev.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*-
- *   BSD LICENSE
- *
- *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- *   All rights reserved.
- *
- *   Redistribution and use in source and binary forms, with or without
- *   modification, are permitted provided that the following conditions
- *   are met:
- *
- * * Redistributions of source code must retain the above copyright
- *   notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- *   notice, this list of conditions and the following disclaimer in
- *   the documentation and/or other materials provided with the
- *   distribution.
- * * Neither the name of Intel Corporation nor the names of its
- *   contributors may be used to endorse or promote products derived
- *   from this software without specific prior written permission.
- *
- *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef _VHOST_NET_CDEV_H_
-#define _VHOST_NET_CDEV_H_
-#include 
-#include 
-#include 
-#include 
-#include 
-
-#include 
-
-/* Macros for printing using RTE_LOG */
-#define RTE_LOGTYPE_VHOST_CONFIG RTE_LOGTYPE_USER1
-#define RTE_LOGTYPE_VHOST_DATA   RTE_LOGTYPE_USER1
-
-#ifdef RTE_LIBRTE_VHOST_DEBUG
-#define VHOST_MAX_PRINT_BUFF 6072
-#define LOG_LEVEL RTE_LOG_DEBUG
-#define LOG_DEBUG(log_type, fmt, args...) RTE_LOG(DEBUG, log_type, fmt, ##args)
-#define PRINT_PACKET(device, addr, size, header) do { \
-   char *pkt_addr = (char *)(addr); \
-   unsigned int index; \
-   char packet[VHOST_MAX_PRINT_BUFF]; \
-   \
-   if ((header)) \
-   snprintf(packet, VHOST_MAX_PRINT_BUFF, "(%"PRIu64") Header size 
%d: ", (device->device_fh), (size)); \
-   else \
-   snprintf(packet, VHOST_MAX_PRINT_BUFF, "(%"PRIu64") Packet size 
%d: ", (device->device_fh), (size)); \
-   for (index = 0; index < (size); index++) { \
-   snprintf(packet + strnlen(packet, VHOST_MAX_PRINT_BUFF), 
VHOST_MAX_PRINT_BUFF - strnlen(packet, VHOST_MAX_PRINT_BUFF), \
-   "%02hhx ", pkt_addr[index]); \
-   } \
-   snprintf(packet + strnlen(packet, VHOST_MAX_PRINT_BUFF), 
VHOST_MAX_PRINT_BUFF - strnlen(packet, VHOST_MAX_PRINT_BUFF), "\n"); \
-   \
-   LOG_DEBUG(VHOST_DATA, "%s", packet); \
-} while (0)
-#else
-#define LOG_LEVEL RTE_LOG_INFO
-#define LOG_DEBUG(log_type, fmt, args...) do {} while (0)
-#define PRINT_PACKET(device, addr, size, header) do {} while (0)
-#endif
-
-
-/*
- * Structure used to identify device context.
- */
-struct vhost_device_ctx {
-   pid_t   pid;/* PID of process calling the IOCTL. */
-   uint64_tfh; /* Populated with fi->fh to track the device 
index. */
-};
-
-/*
- * Structure contains function pointers to be defined in virtio-net.c. These
- * functions are called in CUSE context and are used to configure devices.
- */
-struct vhost_net_device_ops {
-   int (*new_device)(struct vhost_device_ctx);
-   void (*destroy_device)(struct vhost_device_ctx);
-
-   int (*get_features)(struct vhost_device_ctx, uint64_t *);
-   int (*set_features)(struct vhost_device_ctx, uint64_t *);
-
-   int (*set_mem_table)(struct vhost_device_ctx, const void *, uint32_t);
-
-   int (*set_vring_num)(struct vhost_device_ctx,

[dpdk-dev] [PATCH v2 04/11] lib/librte_vhost: move fd copying(from qemu process into vhost process) to eventfd_copy.c

2015-02-12 Thread Huawei Xie
 vhost-user doesn't need eventfd kernel module to copy fds between processes.

Signed-off-by: Huawei Xie 
---
 lib/librte_vhost/Makefile|  2 +-
 lib/librte_vhost/vhost_cuse/eventfd_copy.c   | 88 
 lib/librte_vhost/vhost_cuse/eventfd_copy.h   | 39 
 lib/librte_vhost/vhost_cuse/vhost-net-cdev.c | 41 +
 lib/librte_vhost/virtio-net.c| 57 +-
 5 files changed, 161 insertions(+), 66 deletions(-)
 create mode 100644 lib/librte_vhost/vhost_cuse/eventfd_copy.c
 create mode 100644 lib/librte_vhost/vhost_cuse/eventfd_copy.h

diff --git a/lib/librte_vhost/Makefile b/lib/librte_vhost/Makefile
index 49ae7ae..88d1295 100644
--- a/lib/librte_vhost/Makefile
+++ b/lib/librte_vhost/Makefile
@@ -41,7 +41,7 @@ LIBABIVER := 1
 CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -I vhost_cuse -O3 -D_FILE_OFFSET_BITS=64 
-lfuse
 LDFLAGS += -lfuse
 # all source are stored in SRCS-y
-SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := vhost_cuse/vhost-net-cdev.c virtio-net.c 
vhost_rxtx.c
+SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := vhost_cuse/vhost-net-cdev.c 
vhost_cuse/eventfd_copy.c virtio-net.c vhost_rxtx.c

 # install includes
 SYMLINK-$(CONFIG_RTE_LIBRTE_VHOST)-include += rte_virtio_net.h
diff --git a/lib/librte_vhost/vhost_cuse/eventfd_copy.c 
b/lib/librte_vhost/vhost_cuse/eventfd_copy.c
new file mode 100644
index 000..4d697a2
--- /dev/null
+++ b/lib/librte_vhost/vhost_cuse/eventfd_copy.c
@@ -0,0 +1,88 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the
+ *   distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ *   contributors may be used to endorse or promote products derived
+ *   from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+
+#include "eventfd_link/eventfd_link.h"
+#include "eventfd_copy.h"
+#include "vhost-net.h"
+
+static const char eventfd_cdev[] = "/dev/eventfd-link";
+
+/*
+ * This function uses the eventfd_link kernel module to copy an eventfd file
+ * descriptor provided by QEMU in to our process space.
+ */
+int
+eventfd_copy(int target_fd, int target_pid)
+{
+   int eventfd_link, ret;
+   struct eventfd_copy eventfd_copy;
+   int fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
+
+   if (fd == -1)
+   return -1;
+
+   /* Open the character device to the kernel module. */
+   /* TODO: check this earlier rather than fail until VM boots! */
+   eventfd_link = open(eventfd_cdev, O_RDWR);
+   if (eventfd_link < 0) {
+   RTE_LOG(ERR, VHOST_CONFIG,
+   "eventfd_link module is not loaded\n");
+   close(fd);
+   return -1;
+   }
+
+   eventfd_copy.source_fd = fd;
+   eventfd_copy.target_fd = target_fd;
+   eventfd_copy.target_pid = target_pid;
+   /* Call the IOCTL to copy the eventfd. */
+   ret = ioctl(eventfd_link, EVENTFD_COPY, _copy);
+   close(eventfd_link);
+
+   if (ret < 0) {
+   RTE_LOG(ERR, VHOST_CONFIG,
+   "EVENTFD_COPY ioctl failed\n");
+   close(fd);
+   return -1;
+   }
+
+   return fd;
+}
diff --git a/lib/librte_vhost/vhost_cuse/eventfd_copy.h 
b/lib/librte_vhost/vhost_cuse/eventfd_copy.h
new file mode 100644
index 000..19ae30d
--- /dev/null
+++ b/lib/librte_vhost/vhost_cuse/eventfd_copy.h
@@ -0,0 +1,39 @@
+/*-
+ * 

[dpdk-dev] [PATCH v2 07/11] lib/librte_vhost: implement cuse_set_memory_table

2015-02-12 Thread Huawei Xie
remove set_memory_table ops

vhost-cuse or vhost-user will both implement their own set_memory_region 
handler.

In current vhost-cuse implementation, guest numa memory isn't supported.
Assume that guest memory is backed by only one file.

Signed-off-by: Huawei Xie 
---
 lib/librte_vhost/Makefile |   2 +-
 lib/librte_vhost/vhost-net.h  |   4 +-
 lib/librte_vhost/vhost_cuse/vhost-net-cdev.c  |   7 +-
 lib/librte_vhost/vhost_cuse/virtio-net-cdev.c | 115 +
 lib/librte_vhost/vhost_cuse/virtio-net-cdev.h |  45 
 lib/librte_vhost/virtio-net.c | 348 --
 lib/librte_vhost/virtio-net.h |  43 
 7 files changed, 210 insertions(+), 354 deletions(-)
 create mode 100644 lib/librte_vhost/vhost_cuse/virtio-net-cdev.h
 create mode 100644 lib/librte_vhost/virtio-net.h

diff --git a/lib/librte_vhost/Makefile b/lib/librte_vhost/Makefile
index 88d1295..797a806 100644
--- a/lib/librte_vhost/Makefile
+++ b/lib/librte_vhost/Makefile
@@ -41,7 +41,7 @@ LIBABIVER := 1
 CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -I vhost_cuse -O3 -D_FILE_OFFSET_BITS=64 
-lfuse
 LDFLAGS += -lfuse
 # all source are stored in SRCS-y
-SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := vhost_cuse/vhost-net-cdev.c 
vhost_cuse/eventfd_copy.c virtio-net.c vhost_rxtx.c
+SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := vhost_cuse/vhost-net-cdev.c 
vhost_cuse/virtio-net-cdev.c vhost_cuse/eventfd_copy.c virtio-net.c vhost_rxtx.c

 # install includes
 SYMLINK-$(CONFIG_RTE_LIBRTE_VHOST)-include += rte_virtio_net.h
diff --git a/lib/librte_vhost/vhost-net.h b/lib/librte_vhost/vhost-net.h
index 03a5c57..86b38a5 100644
--- a/lib/librte_vhost/vhost-net.h
+++ b/lib/librte_vhost/vhost-net.h
@@ -41,6 +41,8 @@

 #include 

+#define VHOST_MEMORY_MAX_NREGIONS 8
+
 /* Macros for printing using RTE_LOG */
 #define RTE_LOGTYPE_VHOST_CONFIG RTE_LOGTYPE_USER1
 #define RTE_LOGTYPE_VHOST_DATA   RTE_LOGTYPE_USER1
@@ -92,8 +94,6 @@ struct vhost_net_device_ops {
int (*get_features)(struct vhost_device_ctx, uint64_t *);
int (*set_features)(struct vhost_device_ctx, uint64_t *);

-   int (*set_mem_table)(struct vhost_device_ctx, const void *, uint32_t);
-
int (*set_vring_num)(struct vhost_device_ctx, struct vhost_vring_state 
*);
int (*set_vring_addr)(struct vhost_device_ctx, struct vhost_vring_addr 
*);
int (*set_vring_base)(struct vhost_device_ctx, struct vhost_vring_state 
*);
diff --git a/lib/librte_vhost/vhost_cuse/vhost-net-cdev.c 
b/lib/librte_vhost/vhost_cuse/vhost-net-cdev.c
index e7794b0..72609a3 100644
--- a/lib/librte_vhost/vhost_cuse/vhost-net-cdev.c
+++ b/lib/librte_vhost/vhost_cuse/vhost-net-cdev.c
@@ -44,6 +44,7 @@
 #include 
 #include 

+#include "virtio-net-cdev.h"
 #include "vhost-net.h"
 #include "eventfd_copy.h"

@@ -57,7 +58,7 @@ static const char cuse_device_name[] = "/dev/cuse";
 static const char default_cdev[] = "vhost-net";

 static struct fuse_session *session;
-static struct vhost_net_device_ops const *ops;
+struct vhost_net_device_ops const *ops;

 /*
  * Returns vhost_device_ctx from given fuse_req_t. The index is populated later
@@ -247,8 +248,8 @@ vhost_net_ioctl(fuse_req_t req, int cmd, void *arg,
break;

default:
-   result = ops->set_mem_table(ctx,
-   in_buf, mem_temp.nregions);
+   result = cuse_set_mem_table(ctx, in_buf,
+   mem_temp.nregions);
if (result)
fuse_reply_err(req, EINVAL);
else
diff --git a/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c 
b/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c
index 58ac3dd..adebb54 100644
--- a/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c
+++ b/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c
@@ -47,7 +47,10 @@

 #include 

+#include "rte_virtio_net.h"
 #include "vhost-net.h"
+#include "virtio-net-cdev.h"
+#include "virtio-net.h"

 /* Line size for reading maps file. */
 static const uint32_t BUFSIZE = PATH_MAX;
@@ -253,3 +256,115 @@ host_memory_map(pid_t pid, uint64_t addr,

return 0;
 }
+
+int
+cuse_set_mem_table(struct vhost_device_ctx ctx,
+   const struct vhost_memory *mem_regions_addr, uint32_t nregions)
+{
+   uint64_t size = offsetof(struct vhost_memory, regions);
+   uint32_t idx, valid_regions;
+   struct virtio_memory_regions *pregion;
+   struct vhost_memory_region *mem_regions = (void *)(uintptr_t)
+   ((uint64_t)(uintptr_t)mem_regions_addr + size);
+   uint64_t base_address = 0, mapped_address, mapped_size;
+   struct virtio_net *dev;
+
+   dev = get_device(ctx);
+   if (dev == NULL)
+   return -1;
+
+   if (dev->mem && dev->mem->mapped_address) {
+   munmap((void *)(uintpt

[dpdk-dev] [PATCH v2 09/11] lib/librte_vhost: vhost user support

2015-02-12 Thread Huawei Xie
In rte_vhost_driver_register(), vhost unix domain socket listener fd is created
and added to polled(based on select) fdset.

In rte_vhost_driver_session_start(), fds in the fdset are checked for
processing. If there is new connection from qemu, connection fd accepted is
added to polled fdset. The listener and connection fds in the fdset are
then both checked. When there is message on the connection fd, its
callback vserver_message_handler is called to process vhost-user messages.

To support identifying which virtio is from which guest VM, we could call
rte_vhost_driver_register with different socket path. Virtio devices from
same VM will connect to VM specific socket. The socket path information is
stored in the virtio_net structure.

Signed-off-by: Huawei Xie 
---
 lib/librte_vhost/Makefile |   8 +-
 lib/librte_vhost/rte_virtio_net.h |   2 +
 lib/librte_vhost/vhost-net.h  |   4 +-
 lib/librte_vhost/vhost_user/vhost-net-user.c  | 457 ++
 lib/librte_vhost/vhost_user/vhost-net-user.h  | 106 ++
 lib/librte_vhost/vhost_user/virtio-net-user.c | 314 ++
 lib/librte_vhost/vhost_user/virtio-net-user.h |  49 +++
 lib/librte_vhost/virtio-net.c |  20 +-
 8 files changed, 951 insertions(+), 9 deletions(-)
 create mode 100644 lib/librte_vhost/vhost_user/vhost-net-user.c
 create mode 100644 lib/librte_vhost/vhost_user/vhost-net-user.h
 create mode 100644 lib/librte_vhost/vhost_user/virtio-net-user.c
 create mode 100644 lib/librte_vhost/vhost_user/virtio-net-user.h

diff --git a/lib/librte_vhost/Makefile b/lib/librte_vhost/Makefile
index 797a806..52f6575 100644
--- a/lib/librte_vhost/Makefile
+++ b/lib/librte_vhost/Makefile
@@ -38,10 +38,14 @@ EXPORT_MAP := rte_vhost_version.map

 LIBABIVER := 1

-CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -I vhost_cuse -O3 -D_FILE_OFFSET_BITS=64 
-lfuse
+CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3 -D_FILE_OFFSET_BITS=64
+CFLAGS += -I vhost_cuse -lfuse
+CFLAGS += -I vhost_user
 LDFLAGS += -lfuse
 # all source are stored in SRCS-y
-SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := vhost_cuse/vhost-net-cdev.c 
vhost_cuse/virtio-net-cdev.c vhost_cuse/eventfd_copy.c virtio-net.c vhost_rxtx.c
+SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := virtio-net.c vhost_rxtx.c
+#SRCS-$(CONFIG_RTE_LIBRTE_VHOST) += vhost_cuse/vhost-net-cdev.c 
vhost_cuse/virtio-net-cdev.c vhost_cuse/eventfd_copy.c
+SRCS-$(CONFIG_RTE_LIBRTE_VHOST) += vhost_user/vhost-net-user.c 
vhost_user/virtio-net-user.c vhost_user/fd_man.c

 # install includes
 SYMLINK-$(CONFIG_RTE_LIBRTE_VHOST)-include += rte_virtio_net.h
diff --git a/lib/librte_vhost/rte_virtio_net.h 
b/lib/librte_vhost/rte_virtio_net.h
index 0bf07c7..46c2072 100644
--- a/lib/librte_vhost/rte_virtio_net.h
+++ b/lib/librte_vhost/rte_virtio_net.h
@@ -50,6 +50,8 @@
 #include 
 #include 

+#define VHOST_MEMORY_MAX_NREGIONS 8
+
 /* Used to indicate that the device is running on a data core */
 #define VIRTIO_DEV_RUNNING 1

diff --git a/lib/librte_vhost/vhost-net.h b/lib/librte_vhost/vhost-net.h
index 86b38a5..a56e405 100644
--- a/lib/librte_vhost/vhost-net.h
+++ b/lib/librte_vhost/vhost-net.h
@@ -41,7 +41,9 @@

 #include 

-#define VHOST_MEMORY_MAX_NREGIONS 8
+#include "rte_virtio_net.h"
+
+extern struct vhost_net_device_ops const *ops;

 /* Macros for printing using RTE_LOG */
 #define RTE_LOGTYPE_VHOST_CONFIG RTE_LOGTYPE_USER1
diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.c 
b/lib/librte_vhost/vhost_user/vhost-net-user.c
new file mode 100644
index 000..712a82f
--- /dev/null
+++ b/lib/librte_vhost/vhost_user/vhost-net-user.c
@@ -0,0 +1,457 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the
+ *   distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ *   contributors may be used to endorse or promote products derived
+ *   from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, B

[dpdk-dev] [PATCH v2 11/11] lib/librte_vhost: support dynamically registering vhost server

2015-02-12 Thread Huawei Xie
* support calling rte_vhost_driver_register after rte_vhost_driver_session_start
* add mutext to protect fdset from concurrent access
* add busy flag in fdentry. this flag is set before cb and cleared after cb is 
finished.

mutex lock scenario in vhost:

* event_dispatch(in rte_vhost_driver_session_start) runs in a seperate thread, 
infinitely
processing vhost messages through cb(callback).
* event_dispatch acquires the lock, get the cb and its context, mark the busy 
flag,
and releases the mutex.
* vserver_new_vq_conn cb calls fdset_add, which acquires the mutex and add new 
fd into fdset.
* vserver_message_handler cb frees data context, marks remove flag to request 
to delete
connfd(connection fd) from fdset.
* after cb returns, event_dispatch
  1. clears busy flag.
  2. if there is remove request, call fdset_del, which acquires mutex, checks 
busy flag, and
removes connfd from fdset.
* rte_vhost_driver_unregister(not implemented) runs in another thread, acquires 
the mutex,
calls fdset_del to remove fd(listenerfd) from fdset. Then it could free data 
context.

The above steps ensures fd data context isn't freed when cb is using.

VM(s) should have been shutdown before rte_vhost_driver_unregister.

Signed-off-by: Huawei Xie 
---
 lib/librte_vhost/vhost_user/fd_man.c | 63 +---
 lib/librte_vhost/vhost_user/fd_man.h |  5 ++-
 lib/librte_vhost/vhost_user/vhost-net-user.c | 34 +--
 3 files changed, 82 insertions(+), 20 deletions(-)

diff --git a/lib/librte_vhost/vhost_user/fd_man.c 
b/lib/librte_vhost/vhost_user/fd_man.c
index 929fbc3..63ac4df 100644
--- a/lib/librte_vhost/vhost_user/fd_man.c
+++ b/lib/librte_vhost/vhost_user/fd_man.c
@@ -40,6 +40,7 @@
 #include 
 #include 

+#include 
 #include 

 #include "fd_man.h"
@@ -145,6 +146,8 @@ fdset_add(struct fdset *pfdset, int fd, fd_cb rcb, fd_cb 
wcb, void *dat)
if (pfdset == NULL || fd == -1)
return -1;

+   pthread_mutex_lock(>fd_mutex);
+
/* Find a free slot in the list. */
i = fdset_find_free_slot(pfdset);
if (i == -1)
@@ -153,6 +156,8 @@ fdset_add(struct fdset *pfdset, int fd, fd_cb rcb, fd_cb 
wcb, void *dat)
fdset_add_fd(pfdset, i, fd, rcb, wcb, dat);
pfdset->num++;

+   pthread_mutex_unlock(>fd_mutex);
+
return 0;
 }

@@ -164,17 +169,36 @@ fdset_del(struct fdset *pfdset, int fd)
 {
int i;

+   if (pfdset == NULL || fd == -1)
+   return;
+
+again:
+   pthread_mutex_lock(>fd_mutex);
+
i = fdset_find_fd(pfdset, fd);
if (i != -1 && fd != -1) {
+   /* busy indicates r/wcb is executing! */
+   if (pfdset->fd[i].busy == 1) {
+   pthread_mutex_unlock(>fd_mutex);
+   goto again;
+   }
+
pfdset->fd[i].fd = -1;
pfdset->fd[i].rcb = pfdset->fd[i].wcb = NULL;
pfdset->num--;
}
+
+   pthread_mutex_unlock(>fd_mutex);
 }

 /**
  * This functions runs in infinite blocking loop until there is no fd in
  * pfdset. It calls corresponding r/w handler if there is event on the fd.
+ *
+ * Before the callback is called, we set the flag to busy status; If other
+ * thread(now rte_vhost_driver_unregister) calls fdset_del concurrently, it
+ * will wait until the flag is reset to zero(which indicates the callback is
+ * finished), then it could free the context after fdset_del.
  */
 void
 fdset_event_dispatch(struct fdset *pfdset)
@@ -183,6 +207,10 @@ fdset_event_dispatch(struct fdset *pfdset)
int i, maxfds;
struct fdentry *pfdentry;
int num = MAX_FDS;
+   fd_cb rcb, wcb;
+   void *dat;
+   int fd;
+   int remove1, remove2;

if (pfdset == NULL)
return;
@@ -190,18 +218,41 @@ fdset_event_dispatch(struct fdset *pfdset)
while (1) {
FD_ZERO();
FD_ZERO();
+   pthread_mutex_lock(>fd_mutex);
+
maxfds = fdset_fill(, , pfdset);
-   if (maxfds == -1)
-   return;
+   if (maxfds == -1) {
+   pthread_mutex_unlock(>fd_mutex);
+   sleep(1);
+   continue;
+   }
+
+   pthread_mutex_unlock(>fd_mutex);

select(maxfds + 1, , , NULL, NULL);

for (i = 0; i < num; i++) {
+   remove1 = remove2 = 0;
+   pthread_mutex_lock(>fd_mutex);
pfdentry = >fd[i];
-   if (pfdentry->fd >= 0 && FD_ISSET(pfdentry->fd, ) 
&& pfdentry->rcb)
-   pfdentry->rcb(pfdentry->fd, pfdentry->dat);
-   if (pfdentry->fd >= 0 && FD_ISSET(pfdentry->fd, ) 
&& p

[dpdk-dev] [PATCH v2 08/11] lib/librte_vhost: add select based event driven processing

2015-02-12 Thread Huawei Xie
for more generic event driven processing, refer to:
http://libevent.org/


Signed-off-by: Huawei Xie 
---
 lib/librte_vhost/vhost_user/fd_man.c | 207 +++
 lib/librte_vhost/vhost_user/fd_man.h |  64 +++
 2 files changed, 271 insertions(+)
 create mode 100644 lib/librte_vhost/vhost_user/fd_man.c
 create mode 100644 lib/librte_vhost/vhost_user/fd_man.h

diff --git a/lib/librte_vhost/vhost_user/fd_man.c 
b/lib/librte_vhost/vhost_user/fd_man.c
new file mode 100644
index 000..929fbc3
--- /dev/null
+++ b/lib/librte_vhost/vhost_user/fd_man.c
@@ -0,0 +1,207 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the
+ *   distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ *   contributors may be used to endorse or promote products derived
+ *   from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+
+#include "fd_man.h"
+
+/**
+ * Returns the index in the fdset for a given fd.
+ * If fd is -1, it means to search for a free entry.
+ * @return
+ *   index for the fd, or -1 if fd isn't in the fdset.
+ */
+static int
+fdset_find_fd(struct fdset *pfdset, int fd)
+{
+   int i;
+
+   if (pfdset == NULL)
+   return -1;
+
+   for (i = 0; i < MAX_FDS && pfdset->fd[i].fd != fd; i++)
+   ;
+
+   return i ==  MAX_FDS ? -1 : i;
+}
+
+static int
+fdset_find_free_slot(struct fdset *pfdset)
+{
+   return fdset_find_fd(pfdset, -1);
+}
+
+static void
+fdset_add_fd(struct fdset  *pfdset, int idx, int fd,
+   fd_cb rcb, fd_cb wcb, void *dat)
+{
+   struct fdentry *pfdentry;
+
+   if (pfdset == NULL || idx >= MAX_FDS)
+   return;
+
+   pfdentry = >fd[idx];
+   pfdentry->fd = fd;
+   pfdentry->rcb = rcb;
+   pfdentry->wcb = wcb;
+   pfdentry->dat = dat;
+}
+
+/**
+ * Fill the read/write fd_set with the fds in the fdset.
+ * @return
+ *  the maximum fds filled in the read/write fd_set.
+ */
+static int
+fdset_fill(fd_set *rfset, fd_set *wfset, struct fdset *pfdset)
+{
+   struct fdentry *pfdentry;
+   int i, maxfds = -1;
+   int num = MAX_FDS;
+
+   if (pfdset == NULL)
+   return -1;
+
+   for (i = 0; i < num; i++) {
+   pfdentry = >fd[i];
+   if (pfdentry->fd != -1) {
+   int added = 0;
+   if (pfdentry->rcb && rfset) {
+   FD_SET(pfdentry->fd, rfset);
+   added = 1;
+   }
+   if (pfdentry->wcb && wfset) {
+   FD_SET(pfdentry->fd, wfset);
+   added = 1;
+   }
+   if (added)
+   maxfds = pfdentry->fd < maxfds ?
+   maxfds : pfdentry->fd;
+   }
+   }
+   return maxfds;
+}
+
+void
+fdset_init(struct fdset *pfdset)
+{
+   int i;
+
+   if (pfdset == NULL)
+   return;
+
+   for (i = 0; i < MAX_FDS; i++)
+   pfdset->fd[i].fd = -1;
+   pfdset->num = 0;
+}
+
+/**
+ * Register the fd in the fdset with read/write handler and context.
+ */
+int
+fdset_add(struct fdset *pfdset, int fd, fd_cb rcb, fd_cb wcb, void *dat)
+{
+   int i;
+
+

[dpdk-dev] [PATCH v2 02/11] lib/librte_vhost: create vhost_cuse directory and move vhost-net-cdev.c into vhost_cuse

2015-02-12 Thread Huawei Xie
vhost-cuse driver will be divided into two parts: cuse driver specific message
handling(in cuse directory) and common message handling(in virtio-net.c).

vhost ioctl message is pre-processed in cuse and then sent to virtio-net
if is not terminated.

virtio-net.c provides common message handling for both vhost-cuse and 
vhost-user.

Signed-off-by: Huawei Xie 
---
 lib/librte_vhost/Makefile|   4 +-
 lib/librte_vhost/vhost-net-cdev.c| 389 ---
 lib/librte_vhost/vhost_cuse/vhost-net-cdev.c | 389 +++
 3 files changed, 391 insertions(+), 391 deletions(-)
 delete mode 100644 lib/librte_vhost/vhost-net-cdev.c
 create mode 100644 lib/librte_vhost/vhost_cuse/vhost-net-cdev.c

diff --git a/lib/librte_vhost/Makefile b/lib/librte_vhost/Makefile
index 369c25a..49ae7ae 100644
--- a/lib/librte_vhost/Makefile
+++ b/lib/librte_vhost/Makefile
@@ -38,10 +38,10 @@ EXPORT_MAP := rte_vhost_version.map

 LIBABIVER := 1

-CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3 -D_FILE_OFFSET_BITS=64 -lfuse
+CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -I vhost_cuse -O3 -D_FILE_OFFSET_BITS=64 
-lfuse
 LDFLAGS += -lfuse
 # all source are stored in SRCS-y
-SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := vhost-net-cdev.c virtio-net.c vhost_rxtx.c
+SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := vhost_cuse/vhost-net-cdev.c virtio-net.c 
vhost_rxtx.c

 # install includes
 SYMLINK-$(CONFIG_RTE_LIBRTE_VHOST)-include += rte_virtio_net.h
diff --git a/lib/librte_vhost/vhost-net-cdev.c 
b/lib/librte_vhost/vhost-net-cdev.c
deleted file mode 100644
index 57c76cb..000
--- a/lib/librte_vhost/vhost-net-cdev.c
+++ /dev/null
@@ -1,389 +0,0 @@
-/*-
- *   BSD LICENSE
- *
- *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- *   All rights reserved.
- *
- *   Redistribution and use in source and binary forms, with or without
- *   modification, are permitted provided that the following conditions
- *   are met:
- *
- * * Redistributions of source code must retain the above copyright
- *   notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- *   notice, this list of conditions and the following disclaimer in
- *   the documentation and/or other materials provided with the
- *   distribution.
- * * Neither the name of Intel Corporation nor the names of its
- *   contributors may be used to endorse or promote products derived
- *   from this software without specific prior written permission.
- *
- *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-
-#include 
-#include 
-#include 
-#include 
-
-#include "vhost-net-cdev.h"
-
-#define FUSE_OPT_DUMMY "\0\0"
-#define FUSE_OPT_FORE  "-f\0\0"
-#define FUSE_OPT_NOMULTI "-s\0\0"
-
-static const uint32_t default_major = 231;
-static const uint32_t default_minor = 1;
-static const char cuse_device_name[] = "/dev/cuse";
-static const char default_cdev[] = "vhost-net";
-
-static struct fuse_session *session;
-static struct vhost_net_device_ops const *ops;
-
-/*
- * Returns vhost_device_ctx from given fuse_req_t. The index is populated later
- * when the device is added to the device linked list.
- */
-static struct vhost_device_ctx
-fuse_req_to_vhost_ctx(fuse_req_t req, struct fuse_file_info *fi)
-{
-   struct vhost_device_ctx ctx;
-   struct fuse_ctx const *const req_ctx = fuse_req_ctx(req);
-
-   ctx.pid = req_ctx->pid;
-   ctx.fh = fi->fh;
-
-   return ctx;
-}
-
-/*
- * When the device is created in QEMU it gets initialised here and
- * added to the device linked list.
- */
-static void
-vhost_net_open(fuse_req_t req, struct fuse_file_info *fi)
-{
-   struct vhost_device_ctx ctx = fuse_req_to_vhost_ctx(req, fi);
-   int err = 0;
-
-   err = ops->new_device(ctx);
-   if (err == -1) {
-   fuse_reply_err(req, EPERM);
-   return;
-   }
-
-   fi->fh = err;
-
-   RTE_LOG(INFO, VHOST_CONFIG,
-   "(%"PRIu64") Device configuration st

[dpdk-dev] [PATCH v2 10/11] lib/librte_vhost: support dev->ifname for vhost-user

2015-02-12 Thread Huawei Xie
for vhost-cuse, ifname is the name of the tap device
for vhost-user, ifname is the name of the unix domain socket path

Signed-off-by: Huawei Xie 
---
 lib/librte_vhost/rte_virtio_net.h |  3 +-
 lib/librte_vhost/vhost-net.h  |  3 ++
 lib/librte_vhost/vhost_cuse/vhost-net-cdev.c  |  8 +++-
 lib/librte_vhost/vhost_cuse/virtio-net-cdev.c | 53 ++
 lib/librte_vhost/vhost_cuse/virtio-net-cdev.h |  3 ++
 lib/librte_vhost/vhost_user/vhost-net-user.c  |  7 +++
 lib/librte_vhost/virtio-net.c | 63 +--
 7 files changed, 95 insertions(+), 45 deletions(-)

diff --git a/lib/librte_vhost/rte_virtio_net.h 
b/lib/librte_vhost/rte_virtio_net.h
index 46c2072..611a3d4 100644
--- a/lib/librte_vhost/rte_virtio_net.h
+++ b/lib/librte_vhost/rte_virtio_net.h
@@ -100,7 +100,8 @@ struct virtio_net {
uint64_tfeatures;   /**< Negotiated feature set. */
uint64_tdevice_fh;  /**< device identifier. */
uint32_tflags;  /**< Device flags. Only used to 
check if device is running on data core. */
-   charifname[IFNAMSIZ];   /**< Name of the tap 
device. */
+#define IF_NAME_SZ (PATH_MAX > IFNAMSIZ ? PATH_MAX : IFNAMSIZ)
+   charifname[IF_NAME_SZ]; /**< Name of the tap 
device or socket path. */
void*priv;  /**< private context */
 } __rte_cache_aligned;

diff --git a/lib/librte_vhost/vhost-net.h b/lib/librte_vhost/vhost-net.h
index a56e405..0f3f8dc 100644
--- a/lib/librte_vhost/vhost-net.h
+++ b/lib/librte_vhost/vhost-net.h
@@ -93,6 +93,9 @@ struct vhost_net_device_ops {
int (*new_device)(struct vhost_device_ctx);
void (*destroy_device)(struct vhost_device_ctx);

+   void (*set_ifname)(struct vhost_device_ctx,
+   const char *if_name, unsigned int if_len);
+
int (*get_features)(struct vhost_device_ctx, uint64_t *);
int (*set_features)(struct vhost_device_ctx, uint64_t *);

diff --git a/lib/librte_vhost/vhost_cuse/vhost-net-cdev.c 
b/lib/librte_vhost/vhost_cuse/vhost-net-cdev.c
index 72609a3..6b68abf 100644
--- a/lib/librte_vhost/vhost_cuse/vhost-net-cdev.c
+++ b/lib/librte_vhost/vhost_cuse/vhost-net-cdev.c
@@ -196,7 +196,13 @@ vhost_net_ioctl(fuse_req_t req, int cmd, void *arg,
case VHOST_NET_SET_BACKEND:
LOG_DEBUG(VHOST_CONFIG,
"(%"PRIu64") IOCTL: VHOST_NET_SET_BACKEND\n", ctx.fh);
-   VHOST_IOCTL_R(struct vhost_vring_file, file, ops->set_backend);
+   if (!in_buf) {
+   VHOST_IOCTL_RETRY(sizeof(file), 0);
+   break;
+   }
+   file = *(const struct vhost_vring_file *)in_buf;
+   result = cuse_set_backend(ctx, );
+   fuse_reply_ioctl(req, result, NULL, 0);
break;

case VHOST_GET_FEATURES:
diff --git a/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c 
b/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c
index adebb54..ae2c3fa 100644
--- a/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c
+++ b/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c
@@ -43,6 +43,10 @@
 #include 
 #include 
 #include 
+#include 
+#include 
+#include 
+#include 
 #include 

 #include 
@@ -51,6 +55,7 @@
 #include "vhost-net.h"
 #include "virtio-net-cdev.h"
 #include "virtio-net.h"
+#include "eventfd_copy.h"

 /* Line size for reading maps file. */
 static const uint32_t BUFSIZE = PATH_MAX;
@@ -368,3 +373,51 @@ cuse_set_mem_table(struct vhost_device_ctx ctx,

return 0;
 }
+
+/*
+ * Function to get the tap device name from the provided file descriptor and
+ * save it in the device structure.
+ */
+static int
+get_ifname(struct vhost_device_ctx ctx, struct virtio_net *dev, int tap_fd, 
int pid)
+{
+   int fd_tap;
+   struct ifreq ifr;
+   uint32_t ifr_size;
+   int ret;
+
+   fd_tap = eventfd_copy(tap_fd, pid);
+   if (fd_tap < 0)
+   return -1;
+
+   ret = ioctl(fd_tap, TUNGETIFF, );
+
+   if (close(fd_tap) < 0)
+   RTE_LOG(ERR, VHOST_CONFIG,
+   "(%"PRIu64") fd close failed\n",
+   dev->device_fh);
+
+   if (ret >= 0) {
+   ifr_size = strnlen(ifr.ifr_name, sizeof(ifr.ifr_name));
+   ops->set_ifname(ctx, ifr.ifr_name, ifr_size);
+   } else
+   RTE_LOG(ERR, VHOST_CONFIG,
+   "(%"PRIu64") TUNGETIFF ioctl failed\n",
+   dev->device_fh);
+
+   return 0;
+}
+
+int cuse_set_backend(struct vhost_device_ctx ctx, struct vhost_vring_file 
*file)
+{
+   struct virtio_net *dev;
+
+   dev = get_device(ctx);
+   if (dev == NULL)
+   return -1;
+
+   if (!(

[dpdk-dev] [PATCH v2 01/11] lib/librte_vhost: enable VIRTIO_NET_F_CTRL_RX VIRTIO_NET_F_CTRL_RX is dependant on VIRTIO_NET_F_CTRL_VQ. Observed that virtio-net driver in guest would crash with only CTRL

2015-02-12 Thread Huawei Xie
In virtnet_send_command:

/* Caller should know better */
BUG_ON(!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ) ||
(out + in > VIRTNET_SEND_COMMAND_SG_MAX));

Signed-off-by: Huawei Xie 
---
 lib/librte_vhost/virtio-net.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/lib/librte_vhost/virtio-net.c b/lib/librte_vhost/virtio-net.c
index b041849..52b4957 100644
--- a/lib/librte_vhost/virtio-net.c
+++ b/lib/librte_vhost/virtio-net.c
@@ -73,7 +73,8 @@ static struct virtio_net_config_ll *ll_root;

 /* Features supported by this lib. */
 #define VHOST_SUPPORTED_FEATURES ((1ULL << VIRTIO_NET_F_MRG_RXBUF) | \
- (1ULL << VIRTIO_NET_F_CTRL_RX))
+   (1ULL << VIRTIO_NET_F_CTRL_VQ) | \
+   (1ULL << VIRTIO_NET_F_CTRL_RX))
 static uint64_t VHOST_FEATURES = VHOST_SUPPORTED_FEATURES;

 /* Line size for reading maps file. */
-- 
1.8.1.4



[dpdk-dev] [PATCH v2 06/11] lib/librte_vhost: make host_memory_map a more generic function.

2015-02-12 Thread Huawei Xie
This functions accepts a virtual address and pid(qemu), and maps it into
current process(vhost)'s address space.

The memory behind the virtual address should be backed by a file,
and virtual address should be the starting address.

Signed-off-by: Huawei Xie 
---
 lib/librte_vhost/vhost_cuse/virtio-net-cdev.c | 42 +--
 1 file changed, 20 insertions(+), 22 deletions(-)

diff --git a/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c 
b/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c
index baca379..58ac3dd 100644
--- a/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c
+++ b/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c
@@ -75,8 +75,8 @@ struct procmap {
  * map it to our address space.
  */
 static int
-host_memory_map(struct virtio_net *dev, struct virtio_memory *mem,
-   pid_t pid, uint64_t addr)
+host_memory_map(pid_t pid, uint64_t addr,
+   uint64_t *mapped_address, uint64_t *mapped_size)
 {
struct dirent *dptr = NULL;
struct procmap procmap;
@@ -104,8 +104,8 @@ host_memory_map(struct virtio_net *dev, struct 
virtio_memory *mem,
fmap = fopen(mapfile, "r");
if (fmap == NULL) {
RTE_LOG(ERR, VHOST_CONFIG,
-   "(%"PRIu64") Failed to open maps file for pid %d\n",
-   dev->device_fh, pid);
+   "Failed to open maps file for pid %d\n",
+   pid);
return -1;
}

@@ -179,8 +179,8 @@ host_memory_map(struct virtio_net *dev, struct 
virtio_memory *mem,

if (!found) {
RTE_LOG(ERR, VHOST_CONFIG,
-   "(%"PRIu64") Failed to find memory file in pid %d maps 
file\n",
-   dev->device_fh, pid);
+   "Failed to find memory file in pid %d maps file\n",
+   pid);
return -1;
}

@@ -188,8 +188,8 @@ host_memory_map(struct virtio_net *dev, struct 
virtio_memory *mem,
dp = opendir(procdir);
if (dp == NULL) {
RTE_LOG(ERR, VHOST_CONFIG,
-   "(%"PRIu64") Cannot open pid %d process directory\n",
-   dev->device_fh, pid);
+   "Cannot open pid %d process directory\n",
+   pid);
return -1;
}

@@ -202,8 +202,7 @@ host_memory_map(struct virtio_net *dev, struct 
virtio_memory *mem,
path = realpath(memfile, resolved_path);
if ((path == NULL) && (strlen(resolved_path) == 0)) {
RTE_LOG(ERR, VHOST_CONFIG,
-   "(%"PRIu64") Failed to resolve fd directory\n",
-   dev->device_fh);
+   "Failed to resolve fd directory\n");
closedir(dp);
return -1;
}
@@ -218,8 +217,8 @@ host_memory_map(struct virtio_net *dev, struct 
virtio_memory *mem,

if (found == 0) {
RTE_LOG(ERR, VHOST_CONFIG,
-   "(%"PRIu64") Failed to find memory file for pid %d\n",
-   dev->device_fh, pid);
+   "Failed to find memory file for pid %d\n",
+   pid);
return -1;
}
/* Open the shared memory file and map the memory into this process. */
@@ -227,31 +226,30 @@ host_memory_map(struct virtio_net *dev, struct 
virtio_memory *mem,

if (fd == -1) {
RTE_LOG(ERR, VHOST_CONFIG,
-   "(%"PRIu64") Failed to open %s for pid %d\n",
-   dev->device_fh, memfile, pid);
+   "Failed to open %s for pid %d\n",
+   memfile, pid);
return -1;
}

map = mmap(0, (size_t)procmap.len, PROT_READ|PROT_WRITE,
-   MAP_POPULATE|MAP_SHARED, fd, 0);
+   MAP_POPULATE|MAP_SHARED, fd, 0);
close(fd);

if (map == MAP_FAILED) {
RTE_LOG(ERR, VHOST_CONFIG,
-   "(%"PRIu64") Error mapping the file %s for pid %d\n",
-   dev->device_fh, memfile, pid);
+   "Error mapping the file %s for pid %d\n",
+   memfile, pid);
return -1;
}

/* Store the memory address and size in the device data structure */
-   mem->mapped_address = (uint64_t)(uintptr_t)map;
-   mem->mapped_size = procmap.len;
+   *mapped_address = (uint64_t)(uintptr_t)map;
+   *mapped_size = procmap.len;

LOG_DEBUG(VHOST_CONFIG,
-   "(%"PRIu64") Mem File: %s->%s - Size: %llu - VA: %p\n",
-   dev->device_fh,
+   "Mem File: %s->%s - Size: %llu - VA: %p\n",
memfile, resolved_path,
-   (unsigned long long)mem->mapped_size, map);
+   (unsigned long long)*mapped_size, map);

return 0;
 }
-- 
1.8.1.4



[dpdk-dev] [PATCH v2] MAINTAINERS: claim responsibility for virtio PMD and vhost library

2015-07-13 Thread Huawei Xie
As orignal author of virtio PMD(coauthor with Rashmin) and vhost user, claim 
responsibility for virtio PMD, vhost lib and vhost example.

v2 changes:
would claim xenvirt responsibility in another patch

Signed-off-by: Huawei Xie 
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index e6de6f0..9a4e18d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -247,6 +247,7 @@ F: drivers/net/mlx4/
 F: doc/guides/nics/mlx4.rst

 RedHat virtio
+M: Huawei Xie 
 M: Changchun Ouyang 
 F: drivers/net/virtio/
 F: doc/guides/nics/virtio.rst
-- 
1.8.1.4



[dpdk-dev] [PATCH] vhost: provide vhost API to unregister vhost unix domain socket

2015-06-02 Thread Huawei Xie
rte_vhost_driver_unregister will remove the listenfd from event list, and then 
close it.

Signed-off-by: Huawei Xie 
Signed-off-by: Peng Sun 
---
 lib/librte_vhost/rte_virtio_net.h|  3 ++
 lib/librte_vhost/vhost_cuse/vhost-net-cdev.c |  9 
 lib/librte_vhost/vhost_user/vhost-net-user.c | 70 +++-
 lib/librte_vhost/vhost_user/vhost-net-user.h |  2 +-
 4 files changed, 71 insertions(+), 13 deletions(-)

diff --git a/lib/librte_vhost/rte_virtio_net.h 
b/lib/librte_vhost/rte_virtio_net.h
index 5d38185..5630fbc 100644
--- a/lib/librte_vhost/rte_virtio_net.h
+++ b/lib/librte_vhost/rte_virtio_net.h
@@ -188,6 +188,9 @@ int rte_vhost_enable_guest_notification(struct virtio_net 
*dev, uint16_t queue_i
 /* Register vhost driver. dev_name could be different for multiple instance 
support. */
 int rte_vhost_driver_register(const char *dev_name);

+/* Unregister vhost driver. This is only meaningful to vhost user. */
+int rte_vhost_driver_unregister(const char *dev_name);
+
 /* Register callbacks. */
 int rte_vhost_driver_callback_register(struct virtio_net_device_ops const * 
const);
 /* Start vhost driver session blocking loop. */
diff --git a/lib/librte_vhost/vhost_cuse/vhost-net-cdev.c 
b/lib/librte_vhost/vhost_cuse/vhost-net-cdev.c
index 6b68abf..1ae7c49 100644
--- a/lib/librte_vhost/vhost_cuse/vhost-net-cdev.c
+++ b/lib/librte_vhost/vhost_cuse/vhost-net-cdev.c
@@ -405,6 +405,15 @@ rte_vhost_driver_register(const char *dev_name)
 }

 /**
+ * An empty function for unregister
+ */
+int
+rte_vhost_driver_unregister(const char *dev_name __rte_unused)
+{
+   return 0;
+}
+
+/**
  * The CUSE session is launched allowing the application to receive open,
  * release and ioctl calls.
  */
diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.c 
b/lib/librte_vhost/vhost_user/vhost-net-user.c
index 31f1215..dff46ee 100644
--- a/lib/librte_vhost/vhost_user/vhost-net-user.c
+++ b/lib/librte_vhost/vhost_user/vhost-net-user.c
@@ -66,6 +66,8 @@ struct connfd_ctx {
 struct _vhost_server {
struct vhost_server *server[MAX_VHOST_SERVER];
struct fdset fdset;
+   int vserver_cnt;
+   pthread_mutex_t server_mutex;
 };

 static struct _vhost_server g_vhost_server = {
@@ -74,10 +76,10 @@ static struct _vhost_server g_vhost_server = {
.fd_mutex = PTHREAD_MUTEX_INITIALIZER,
.num = 0
},
+   .vserver_cnt = 0,
+   .server_mutex = PTHREAD_MUTEX_INITIALIZER,
 };

-static int vserver_idx;
-
 static const char *vhost_message_str[VHOST_USER_MAX] = {
[VHOST_USER_NONE] = "VHOST_USER_NONE",
[VHOST_USER_GET_FEATURES] = "VHOST_USER_GET_FEATURES",
@@ -427,7 +429,6 @@ vserver_message_handler(int connfd, void *dat, int *remove)
}
 }

-
 /**
  * Creates and initialise the vhost server.
  */
@@ -436,34 +437,79 @@ rte_vhost_driver_register(const char *path)
 {
struct vhost_server *vserver;

-   if (vserver_idx == 0)
+   pthread_mutex_lock(_vhost_server.server_mutex);
+   if (ops == NULL)
ops = get_virtio_net_callbacks();
-   if (vserver_idx == MAX_VHOST_SERVER)
+
+   if (g_vhost_server.vserver_cnt == MAX_VHOST_SERVER) {
+   RTE_LOG(ERR, VHOST_CONFIG,
+   "error: the number of servers reaches maximum\n");
+   pthread_mutex_unlock(_vhost_server.server_mutex);
return -1;
+   }

vserver = calloc(sizeof(struct vhost_server), 1);
-   if (vserver == NULL)
+   if (vserver == NULL) {
+   pthread_mutex_unlock(_vhost_server.server_mutex);
return -1;
-
-   unlink(path);
+   }

vserver->listenfd = uds_socket(path);
if (vserver->listenfd < 0) {
free(vserver);
+   pthread_mutex_unlock(_vhost_server.server_mutex);
return -1;
}
-   vserver->path = path;
+
+   vserver->path = strdup(path);

fdset_add(_vhost_server.fdset, vserver->listenfd,
-   vserver_new_vq_conn, NULL,
-   vserver);
+   vserver_new_vq_conn, NULL, vserver);

-   g_vhost_server.server[vserver_idx++] = vserver;
+   g_vhost_server.server[g_vhost_server.vserver_cnt++] = vserver;
+   pthread_mutex_unlock(_vhost_server.server_mutex);

return 0;
 }


+/**
+ * Unregister the specified vhost server
+ */
+int
+rte_vhost_driver_unregister(const char *path)
+{
+   int i;
+   int count;
+
+   pthread_mutex_lock(_vhost_server.server_mutex);
+
+   for (i = 0; i < g_vhost_server.vserver_cnt; i++) {
+   if (!strcmp(g_vhost_server.server[i]->path, path)) {
+   fdset_del(_vhost_server.fdset,
+   g_vhost_server.server[i]->listenfd);
+
+   close(g_vhost_server.server[i]->listenfd);
+   free(g_vhost_server.server[i]-

[dpdk-dev] [PATCH v2] vhost: provide vhost API to unregister vhost unix domain socket

2015-06-05 Thread Huawei Xie
rte_vhost_driver_unregister will remove the listenfd from event list, and then 
close it.

Signed-off-by: Huawei Xie 
Signed-off-by: Peng Sun 
---
 lib/librte_vhost/rte_virtio_net.h|  3 ++
 lib/librte_vhost/vhost_cuse/vhost-net-cdev.c |  9 
 lib/librte_vhost/vhost_user/vhost-net-user.c | 68 +++-
 lib/librte_vhost/vhost_user/vhost-net-user.h |  2 +-
 4 files changed, 69 insertions(+), 13 deletions(-)

diff --git a/lib/librte_vhost/rte_virtio_net.h 
b/lib/librte_vhost/rte_virtio_net.h
index 5d38185..5630fbc 100644
--- a/lib/librte_vhost/rte_virtio_net.h
+++ b/lib/librte_vhost/rte_virtio_net.h
@@ -188,6 +188,9 @@ int rte_vhost_enable_guest_notification(struct virtio_net 
*dev, uint16_t queue_i
 /* Register vhost driver. dev_name could be different for multiple instance 
support. */
 int rte_vhost_driver_register(const char *dev_name);

+/* Unregister vhost driver. This is only meaningful to vhost user. */
+int rte_vhost_driver_unregister(const char *dev_name);
+
 /* Register callbacks. */
 int rte_vhost_driver_callback_register(struct virtio_net_device_ops const * 
const);
 /* Start vhost driver session blocking loop. */
diff --git a/lib/librte_vhost/vhost_cuse/vhost-net-cdev.c 
b/lib/librte_vhost/vhost_cuse/vhost-net-cdev.c
index 6b68abf..1ae7c49 100644
--- a/lib/librte_vhost/vhost_cuse/vhost-net-cdev.c
+++ b/lib/librte_vhost/vhost_cuse/vhost-net-cdev.c
@@ -405,6 +405,15 @@ rte_vhost_driver_register(const char *dev_name)
 }

 /**
+ * An empty function for unregister
+ */
+int
+rte_vhost_driver_unregister(const char *dev_name __rte_unused)
+{
+   return 0;
+}
+
+/**
  * The CUSE session is launched allowing the application to receive open,
  * release and ioctl calls.
  */
diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.c 
b/lib/librte_vhost/vhost_user/vhost-net-user.c
index 31f1215..87a4711 100644
--- a/lib/librte_vhost/vhost_user/vhost-net-user.c
+++ b/lib/librte_vhost/vhost_user/vhost-net-user.c
@@ -66,6 +66,8 @@ struct connfd_ctx {
 struct _vhost_server {
struct vhost_server *server[MAX_VHOST_SERVER];
struct fdset fdset;
+   int vserver_cnt;
+   pthread_mutex_t server_mutex;
 };

 static struct _vhost_server g_vhost_server = {
@@ -74,10 +76,10 @@ static struct _vhost_server g_vhost_server = {
.fd_mutex = PTHREAD_MUTEX_INITIALIZER,
.num = 0
},
+   .vserver_cnt = 0,
+   .server_mutex = PTHREAD_MUTEX_INITIALIZER,
 };

-static int vserver_idx;
-
 static const char *vhost_message_str[VHOST_USER_MAX] = {
[VHOST_USER_NONE] = "VHOST_USER_NONE",
[VHOST_USER_GET_FEATURES] = "VHOST_USER_GET_FEATURES",
@@ -427,7 +429,6 @@ vserver_message_handler(int connfd, void *dat, int *remove)
}
 }

-
 /**
  * Creates and initialise the vhost server.
  */
@@ -436,34 +437,77 @@ rte_vhost_driver_register(const char *path)
 {
struct vhost_server *vserver;

-   if (vserver_idx == 0)
+   pthread_mutex_lock(_vhost_server.server_mutex);
+   if (ops == NULL)
ops = get_virtio_net_callbacks();
-   if (vserver_idx == MAX_VHOST_SERVER)
+
+   if (g_vhost_server.vserver_cnt == MAX_VHOST_SERVER) {
+   RTE_LOG(ERR, VHOST_CONFIG,
+   "error: the number of servers reaches maximum\n");
+   pthread_mutex_unlock(_vhost_server.server_mutex);
return -1;
+   }

vserver = calloc(sizeof(struct vhost_server), 1);
-   if (vserver == NULL)
+   if (vserver == NULL) {
+   pthread_mutex_unlock(_vhost_server.server_mutex);
return -1;
-
-   unlink(path);
+   }

vserver->listenfd = uds_socket(path);
if (vserver->listenfd < 0) {
free(vserver);
+   pthread_mutex_unlock(_vhost_server.server_mutex);
return -1;
}
-   vserver->path = path;
+
+   vserver->path = strdup(path);

fdset_add(_vhost_server.fdset, vserver->listenfd,
-   vserver_new_vq_conn, NULL,
-   vserver);
+   vserver_new_vq_conn, NULL, vserver);

-   g_vhost_server.server[vserver_idx++] = vserver;
+   g_vhost_server.server[g_vhost_server.vserver_cnt++] = vserver;
+   pthread_mutex_unlock(_vhost_server.server_mutex);

return 0;
 }


+/**
+ * Unregister the specified vhost server
+ */
+int
+rte_vhost_driver_unregister(const char *path)
+{
+   int i;
+   int count;
+
+   pthread_mutex_lock(_vhost_server.server_mutex);
+
+   for (i = 0; i < g_vhost_server.vserver_cnt; i++) {
+   if (!strcmp(g_vhost_server.server[i]->path, path)) {
+   fdset_del(_vhost_server.fdset,
+   g_vhost_server.server[i]->listenfd);
+
+   close(g_vhost_server.server[i]->listenfd);
+   free(g_vhost_server.server[i]-

[dpdk-dev] [PATCH 0/2] vhost: numa aware allocation of virtio_net device and vhost virt queue

2015-06-05 Thread Huawei Xie
The virtio_net device and vhost virt queue should be allocated on the same numa 
node as vring descriptors.
When we firstly allocate the virtio_net device and vhost virt queue, we don't 
know the numa node of vring descriptors.
When we receive the VHOST_SET_VRING_ADDR message, we get the numa node of vring 
descriptors, so we will try to reallocate virtio_net and vhost virt queue to 
the same numa node.

Huawei Xie (2):
  use rte_malloc/free for virtio_net and virt_queue memory data allocation/free
  When we get the address of vring descriptor table, will try to reallocate 
virtio_net device and virtqueue to the same numa node.

 config/common_linuxapp|   1 +
 lib/librte_vhost/Makefile |   4 ++
 lib/librte_vhost/virtio-net.c | 112 ++
 mk/rte.app.mk |   3 ++
 4 files changed, 111 insertions(+), 9 deletions(-)

-- 
1.8.1.4



[dpdk-dev] [PATCH 1/2] vhost: malloc -> rte_malloc for virtio_net and virt queue allocation

2015-06-05 Thread Huawei Xie
use rte_malloc/free for virtio_net and virt queue allocation/free

Signed-off-by: Huawei Xie 
---
 lib/librte_vhost/virtio-net.c | 19 ++-
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/lib/librte_vhost/virtio-net.c b/lib/librte_vhost/virtio-net.c
index 4672e67..19b74d6 100644
--- a/lib/librte_vhost/virtio-net.c
+++ b/lib/librte_vhost/virtio-net.c
@@ -45,6 +45,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 

 #include "vhost-net.h"
@@ -202,9 +203,9 @@ static void
 free_device(struct virtio_net_config_ll *ll_dev)
 {
/* Free any malloc'd memory */
-   free(ll_dev->dev.virtqueue[VIRTIO_RXQ]);
-   free(ll_dev->dev.virtqueue[VIRTIO_TXQ]);
-   free(ll_dev);
+   rte_free(ll_dev->dev.virtqueue[VIRTIO_RXQ]);
+   rte_free(ll_dev->dev.virtqueue[VIRTIO_TXQ]);
+   rte_free(ll_dev);
 }

 /*
@@ -278,7 +279,7 @@ new_device(struct vhost_device_ctx ctx)
struct vhost_virtqueue *virtqueue_rx, *virtqueue_tx;

/* Setup device and virtqueues. */
-   new_ll_dev = malloc(sizeof(struct virtio_net_config_ll));
+   new_ll_dev = rte_malloc(NULL, sizeof(struct virtio_net_config_ll), 0);
if (new_ll_dev == NULL) {
RTE_LOG(ERR, VHOST_CONFIG,
"(%"PRIu64") Failed to allocate memory for dev.\n",
@@ -286,19 +287,19 @@ new_device(struct vhost_device_ctx ctx)
return -1;
}

-   virtqueue_rx = malloc(sizeof(struct vhost_virtqueue));
+   virtqueue_rx = rte_malloc(NULL, sizeof(struct vhost_virtqueue), 0);
if (virtqueue_rx == NULL) {
-   free(new_ll_dev);
+   rte_free(new_ll_dev);
RTE_LOG(ERR, VHOST_CONFIG,
"(%"PRIu64") Failed to allocate memory for rxq.\n",
ctx.fh);
return -1;
}

-   virtqueue_tx = malloc(sizeof(struct vhost_virtqueue));
+   virtqueue_tx = rte_malloc(NULL, sizeof(struct vhost_virtqueue), 0);
if (virtqueue_tx == NULL) {
-   free(virtqueue_rx);
-   free(new_ll_dev);
+   rte_free(virtqueue_rx);
+   rte_free(new_ll_dev);
RTE_LOG(ERR, VHOST_CONFIG,
"(%"PRIu64") Failed to allocate memory for txq.\n",
ctx.fh);
-- 
1.8.1.4



[dpdk-dev] [PATCH 2/2] vhost: realloc virtio_net and virtqueue to the same node of vring desc table

2015-06-05 Thread Huawei Xie
When we get the address of vring descriptor table in VHOST_SET_VRING_ADDR 
message,
will try to reallocate virtio_net device and virtqueue to the same numa node.

Signed-off-by: Huawei Xie 
---
 config/common_linuxapp|  1 +
 lib/librte_vhost/Makefile |  4 ++
 lib/librte_vhost/virtio-net.c | 93 +++
 mk/rte.app.mk |  3 ++
 4 files changed, 101 insertions(+)

diff --git a/config/common_linuxapp b/config/common_linuxapp
index 0078dc9..4ace24e 100644
--- a/config/common_linuxapp
+++ b/config/common_linuxapp
@@ -421,6 +421,7 @@ CONFIG_RTE_KNI_VHOST_DEBUG_TX=n
 #
 CONFIG_RTE_LIBRTE_VHOST=n
 CONFIG_RTE_LIBRTE_VHOST_USER=y
+CONFIG_RTE_LIBRTE_VHOST_NUMA=n
 CONFIG_RTE_LIBRTE_VHOST_DEBUG=n

 #
diff --git a/lib/librte_vhost/Makefile b/lib/librte_vhost/Makefile
index a8645a6..6681f22 100644
--- a/lib/librte_vhost/Makefile
+++ b/lib/librte_vhost/Makefile
@@ -46,6 +46,10 @@ CFLAGS += -I vhost_cuse -lfuse
 LDFLAGS += -lfuse
 endif

+ifeq ($(CONFIG_RTE_LIBRTE_VHOST_NUMA),y)
+LDFLAGS += -lnuma
+endif
+
 # all source are stored in SRCS-y
 SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := virtio-net.c vhost_rxtx.c
 ifeq ($(CONFIG_RTE_LIBRTE_VHOST_USER),y)
diff --git a/lib/librte_vhost/virtio-net.c b/lib/librte_vhost/virtio-net.c
index 19b74d6..8a80f5e 100644
--- a/lib/librte_vhost/virtio-net.c
+++ b/lib/librte_vhost/virtio-net.c
@@ -38,6 +38,9 @@
 #include 
 #include 
 #include 
+#ifdef RTE_LIBRTE_VHOST_NUMA
+#include 
+#endif

 #include 

@@ -481,6 +484,93 @@ set_vring_num(struct vhost_device_ctx ctx, struct 
vhost_vring_state *state)
 }

 /*
+ * Reallocate virtio_det and vhost_virtqueue data structure to make them on the
+ * same numa node as the memory of vring descriptor.
+ */
+#ifdef RTE_LIBRTE_VHOST_NUMA
+static struct virtio_net*
+numa_realloc(struct virtio_net *dev, int index)
+{
+   int oldnode, newnode;
+   struct virtio_net_config_ll *old_ll_dev, *new_ll_dev;
+   struct vhost_virtqueue *old_vq, *new_vq;
+   int ret;
+   int realloc_dev = 0, realloc_vq = 0;
+
+   old_ll_dev = (struct virtio_net_config_ll *)dev;
+   old_vq = dev->virtqueue[index];
+
+   ret  = get_mempolicy(, NULL, 0, old_vq->desc,
+   MPOL_F_NODE | MPOL_F_ADDR);
+   ret = ret | get_mempolicy(, NULL, 0, old_ll_dev,
+   MPOL_F_NODE | MPOL_F_ADDR);
+   if (ret) {
+   RTE_LOG(ERR, VHOST_CONFIG,
+   "Unable to get vring desc or dev numa information.\n");
+   return dev;
+   }
+   if (oldnode != newnode)
+   realloc_dev = 1;
+
+   ret = get_mempolicy(, NULL, 0, old_vq,
+   MPOL_F_NODE | MPOL_F_ADDR);
+   if (ret) {
+   RTE_LOG(ERR, VHOST_CONFIG,
+   "Unable to get vq numa information.\n");
+   return dev;
+   }
+   if (oldnode != newnode)
+   realloc_vq = 1;
+
+   if (realloc_dev == 0 && realloc_vq == 0)
+   return dev;
+
+   if (realloc_dev)
+   new_ll_dev = rte_malloc_socket(NULL,
+   sizeof(struct virtio_net_config_ll), 0, newnode);
+   if (realloc_vq)
+   new_vq = rte_malloc_socket(NULL,
+   sizeof(struct vhost_virtqueue), 0, newnode);
+   if (!new_ll_dev || !new_vq) {
+   if (new_ll_dev)
+   rte_free(new_ll_dev);
+   if (new_vq)
+   rte_free(new_vq);
+   return dev;
+   }
+
+   if (realloc_vq)
+   memcpy(new_vq, old_vq, sizeof(*new_vq));
+   if (realloc_dev)
+   memcpy(new_ll_dev, old_ll_dev, sizeof(*new_ll_dev));
+   (new_ll_dev ? new_ll_dev : old_ll_dev)->dev.virtqueue[index] =
+   new_vq ? new_vq : old_vq;
+   if (realloc_vq)
+   rte_free(old_vq);
+   if (realloc_dev) {
+   if (ll_root == old_ll_dev)
+   ll_root = new_ll_dev;
+   else {
+   struct virtio_net_config_ll *prev = ll_root;
+   while (prev->next != old_ll_dev)
+   prev = prev->next;
+   prev->next = new_ll_dev;
+   new_ll_dev->next = old_ll_dev->next;
+   }
+   rte_free(old_ll_dev);
+   }
+
+   return _ll_dev->dev;
+}
+#else
+static struct virtio_net*
+numa_realloc(struct virtio_net *dev, int index __rte_unused)
+{
+   return dev;
+}
+#endif
+
+/*
  * Called from CUSE IOCTL: VHOST_SET_VRING_ADDR
  * The virtio device sends us the desc, used and avail ring addresses.
  * This function then converts these to our address space.
@@ -508,6 +598,9 @@ set_vring_addr(struct vhost_device_ctx ctx, struct 
vhost_vring_addr *addr)
return -1;
}

+   dev = numa_realloc(dev, addr->index);
+   vq = dev->virt

[dpdk-dev] [PATCH v2] vhost: provide vhost API to unregister vhost unix domain socket

2015-06-05 Thread Huawei Xie
rte_vhost_driver_unregister will remove the listenfd from event list, and then 
close it.

Signed-off-by: Huawei Xie 
Signed-off-by: Peng Sun 
---
 lib/librte_vhost/rte_virtio_net.h|  3 ++
 lib/librte_vhost/vhost_cuse/vhost-net-cdev.c |  9 
 lib/librte_vhost/vhost_user/vhost-net-user.c | 68 +++-
 lib/librte_vhost/vhost_user/vhost-net-user.h |  2 +-
 4 files changed, 69 insertions(+), 13 deletions(-)

diff --git a/lib/librte_vhost/rte_virtio_net.h 
b/lib/librte_vhost/rte_virtio_net.h
index 5d38185..5630fbc 100644
--- a/lib/librte_vhost/rte_virtio_net.h
+++ b/lib/librte_vhost/rte_virtio_net.h
@@ -188,6 +188,9 @@ int rte_vhost_enable_guest_notification(struct virtio_net 
*dev, uint16_t queue_i
 /* Register vhost driver. dev_name could be different for multiple instance 
support. */
 int rte_vhost_driver_register(const char *dev_name);

+/* Unregister vhost driver. This is only meaningful to vhost user. */
+int rte_vhost_driver_unregister(const char *dev_name);
+
 /* Register callbacks. */
 int rte_vhost_driver_callback_register(struct virtio_net_device_ops const * 
const);
 /* Start vhost driver session blocking loop. */
diff --git a/lib/librte_vhost/vhost_cuse/vhost-net-cdev.c 
b/lib/librte_vhost/vhost_cuse/vhost-net-cdev.c
index 6b68abf..1ae7c49 100644
--- a/lib/librte_vhost/vhost_cuse/vhost-net-cdev.c
+++ b/lib/librte_vhost/vhost_cuse/vhost-net-cdev.c
@@ -405,6 +405,15 @@ rte_vhost_driver_register(const char *dev_name)
 }

 /**
+ * An empty function for unregister
+ */
+int
+rte_vhost_driver_unregister(const char *dev_name __rte_unused)
+{
+   return 0;
+}
+
+/**
  * The CUSE session is launched allowing the application to receive open,
  * release and ioctl calls.
  */
diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.c 
b/lib/librte_vhost/vhost_user/vhost-net-user.c
index 31f1215..87a4711 100644
--- a/lib/librte_vhost/vhost_user/vhost-net-user.c
+++ b/lib/librte_vhost/vhost_user/vhost-net-user.c
@@ -66,6 +66,8 @@ struct connfd_ctx {
 struct _vhost_server {
struct vhost_server *server[MAX_VHOST_SERVER];
struct fdset fdset;
+   int vserver_cnt;
+   pthread_mutex_t server_mutex;
 };

 static struct _vhost_server g_vhost_server = {
@@ -74,10 +76,10 @@ static struct _vhost_server g_vhost_server = {
.fd_mutex = PTHREAD_MUTEX_INITIALIZER,
.num = 0
},
+   .vserver_cnt = 0,
+   .server_mutex = PTHREAD_MUTEX_INITIALIZER,
 };

-static int vserver_idx;
-
 static const char *vhost_message_str[VHOST_USER_MAX] = {
[VHOST_USER_NONE] = "VHOST_USER_NONE",
[VHOST_USER_GET_FEATURES] = "VHOST_USER_GET_FEATURES",
@@ -427,7 +429,6 @@ vserver_message_handler(int connfd, void *dat, int *remove)
}
 }

-
 /**
  * Creates and initialise the vhost server.
  */
@@ -436,34 +437,77 @@ rte_vhost_driver_register(const char *path)
 {
struct vhost_server *vserver;

-   if (vserver_idx == 0)
+   pthread_mutex_lock(_vhost_server.server_mutex);
+   if (ops == NULL)
ops = get_virtio_net_callbacks();
-   if (vserver_idx == MAX_VHOST_SERVER)
+
+   if (g_vhost_server.vserver_cnt == MAX_VHOST_SERVER) {
+   RTE_LOG(ERR, VHOST_CONFIG,
+   "error: the number of servers reaches maximum\n");
+   pthread_mutex_unlock(_vhost_server.server_mutex);
return -1;
+   }

vserver = calloc(sizeof(struct vhost_server), 1);
-   if (vserver == NULL)
+   if (vserver == NULL) {
+   pthread_mutex_unlock(_vhost_server.server_mutex);
return -1;
-
-   unlink(path);
+   }

vserver->listenfd = uds_socket(path);
if (vserver->listenfd < 0) {
free(vserver);
+   pthread_mutex_unlock(_vhost_server.server_mutex);
return -1;
}
-   vserver->path = path;
+
+   vserver->path = strdup(path);

fdset_add(_vhost_server.fdset, vserver->listenfd,
-   vserver_new_vq_conn, NULL,
-   vserver);
+   vserver_new_vq_conn, NULL, vserver);

-   g_vhost_server.server[vserver_idx++] = vserver;
+   g_vhost_server.server[g_vhost_server.vserver_cnt++] = vserver;
+   pthread_mutex_unlock(_vhost_server.server_mutex);

return 0;
 }


+/**
+ * Unregister the specified vhost server
+ */
+int
+rte_vhost_driver_unregister(const char *path)
+{
+   int i;
+   int count;
+
+   pthread_mutex_lock(_vhost_server.server_mutex);
+
+   for (i = 0; i < g_vhost_server.vserver_cnt; i++) {
+   if (!strcmp(g_vhost_server.server[i]->path, path)) {
+   fdset_del(_vhost_server.fdset,
+   g_vhost_server.server[i]->listenfd);
+
+   close(g_vhost_server.server[i]->listenfd);
+   free(g_vhost_server.server[i]-

[dpdk-dev] [PATCH v2 0/2] vhost: numa aware allocation of vhost device and queues

2015-06-19 Thread Huawei Xie
The vhost device and queues should be allocated on the same numa node as vring 
descriptor table.
When we firstly allocate the vhost device and queues, we don't know the numa 
node of vring descriptor table.
When we receive the VHOST_SET_VRING_ADDR message, we get the numa node of vring 
descriptor table, we will try to reallocate vhost device and queues to the same 
numa node.


Huawei Xie (2):
  use rte_malloc to allocate vhost device and queues
  reallocate vhost device and queues when we get the address of vring 
descriptor table

 config/common_linuxapp|   1 +
 lib/librte_vhost/Makefile |   4 ++
 lib/librte_vhost/virtio-net.c | 112 ++
 mk/rte.app.mk |   4 ++
 4 files changed, 112 insertions(+), 9 deletions(-)

-- 
1.8.1.4



[dpdk-dev] [PATCH v2 1/2] vhost: use rte_malloc to allocate device and queues

2015-06-19 Thread Huawei Xie
use rte_malloc to allocate vhost device and queues

Signed-off-by: Huawei Xie 
---
 lib/librte_vhost/virtio-net.c | 19 ++-
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/lib/librte_vhost/virtio-net.c b/lib/librte_vhost/virtio-net.c
index 4672e67..19b74d6 100644
--- a/lib/librte_vhost/virtio-net.c
+++ b/lib/librte_vhost/virtio-net.c
@@ -45,6 +45,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 

 #include "vhost-net.h"
@@ -202,9 +203,9 @@ static void
 free_device(struct virtio_net_config_ll *ll_dev)
 {
/* Free any malloc'd memory */
-   free(ll_dev->dev.virtqueue[VIRTIO_RXQ]);
-   free(ll_dev->dev.virtqueue[VIRTIO_TXQ]);
-   free(ll_dev);
+   rte_free(ll_dev->dev.virtqueue[VIRTIO_RXQ]);
+   rte_free(ll_dev->dev.virtqueue[VIRTIO_TXQ]);
+   rte_free(ll_dev);
 }

 /*
@@ -278,7 +279,7 @@ new_device(struct vhost_device_ctx ctx)
struct vhost_virtqueue *virtqueue_rx, *virtqueue_tx;

/* Setup device and virtqueues. */
-   new_ll_dev = malloc(sizeof(struct virtio_net_config_ll));
+   new_ll_dev = rte_malloc(NULL, sizeof(struct virtio_net_config_ll), 0);
if (new_ll_dev == NULL) {
RTE_LOG(ERR, VHOST_CONFIG,
"(%"PRIu64") Failed to allocate memory for dev.\n",
@@ -286,19 +287,19 @@ new_device(struct vhost_device_ctx ctx)
return -1;
}

-   virtqueue_rx = malloc(sizeof(struct vhost_virtqueue));
+   virtqueue_rx = rte_malloc(NULL, sizeof(struct vhost_virtqueue), 0);
if (virtqueue_rx == NULL) {
-   free(new_ll_dev);
+   rte_free(new_ll_dev);
RTE_LOG(ERR, VHOST_CONFIG,
"(%"PRIu64") Failed to allocate memory for rxq.\n",
ctx.fh);
return -1;
}

-   virtqueue_tx = malloc(sizeof(struct vhost_virtqueue));
+   virtqueue_tx = rte_malloc(NULL, sizeof(struct vhost_virtqueue), 0);
if (virtqueue_tx == NULL) {
-   free(virtqueue_rx);
-   free(new_ll_dev);
+   rte_free(virtqueue_rx);
+   rte_free(new_ll_dev);
RTE_LOG(ERR, VHOST_CONFIG,
"(%"PRIu64") Failed to allocate memory for txq.\n",
ctx.fh);
-- 
1.8.1.4



[dpdk-dev] [PATCH v2 2/2] vhost: realloc vhost device and queues to the same numa node of vring desc table

2015-06-19 Thread Huawei Xie
When we get the address of vring descriptor table in VHOST_SET_VRING_ADDR 
message, will try to reallocate vhost device and queues to the same numa node.

v2 changes:
- fix uninitialised new_vq and new_ll_device
- fix missed endif in rte.app.mk
- fix new_ll_dev and new_vq allocation failure issue
- return old virtio device if new_ll_dev isn't allocated

Signed-off-by: Huawei Xie 
---
 config/common_linuxapp|  1 +
 lib/librte_vhost/Makefile |  4 ++
 lib/librte_vhost/virtio-net.c | 93 +++
 mk/rte.app.mk |  4 ++
 4 files changed, 102 insertions(+)

diff --git a/config/common_linuxapp b/config/common_linuxapp
index 0078dc9..4ace24e 100644
--- a/config/common_linuxapp
+++ b/config/common_linuxapp
@@ -421,6 +421,7 @@ CONFIG_RTE_KNI_VHOST_DEBUG_TX=n
 #
 CONFIG_RTE_LIBRTE_VHOST=n
 CONFIG_RTE_LIBRTE_VHOST_USER=y
+CONFIG_RTE_LIBRTE_VHOST_NUMA=n
 CONFIG_RTE_LIBRTE_VHOST_DEBUG=n

 #
diff --git a/lib/librte_vhost/Makefile b/lib/librte_vhost/Makefile
index a8645a6..6681f22 100644
--- a/lib/librte_vhost/Makefile
+++ b/lib/librte_vhost/Makefile
@@ -46,6 +46,10 @@ CFLAGS += -I vhost_cuse -lfuse
 LDFLAGS += -lfuse
 endif

+ifeq ($(CONFIG_RTE_LIBRTE_VHOST_NUMA),y)
+LDFLAGS += -lnuma
+endif
+
 # all source are stored in SRCS-y
 SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := virtio-net.c vhost_rxtx.c
 ifeq ($(CONFIG_RTE_LIBRTE_VHOST_USER),y)
diff --git a/lib/librte_vhost/virtio-net.c b/lib/librte_vhost/virtio-net.c
index 19b74d6..0a065af 100644
--- a/lib/librte_vhost/virtio-net.c
+++ b/lib/librte_vhost/virtio-net.c
@@ -38,6 +38,9 @@
 #include 
 #include 
 #include 
+#ifdef RTE_LIBRTE_VHOST_NUMA
+#include 
+#endif

 #include 

@@ -481,6 +484,93 @@ set_vring_num(struct vhost_device_ctx ctx, struct 
vhost_vring_state *state)
 }

 /*
+ * Reallocate virtio_det and vhost_virtqueue data structure to make them on the
+ * same numa node as the memory of vring descriptor.
+ */
+#ifdef RTE_LIBRTE_VHOST_NUMA
+static struct virtio_net*
+numa_realloc(struct virtio_net *dev, int index)
+{
+   int oldnode, newnode;
+   struct virtio_net_config_ll *old_ll_dev, *new_ll_dev = NULL;
+   struct vhost_virtqueue *old_vq, *new_vq = NULL;
+   int ret;
+   int realloc_dev = 0, realloc_vq = 0;
+
+   old_ll_dev = (struct virtio_net_config_ll *)dev;
+   old_vq = dev->virtqueue[index];
+
+   ret  = get_mempolicy(, NULL, 0, old_vq->desc,
+   MPOL_F_NODE | MPOL_F_ADDR);
+   ret = ret | get_mempolicy(, NULL, 0, old_ll_dev,
+   MPOL_F_NODE | MPOL_F_ADDR);
+   if (ret) {
+   RTE_LOG(ERR, VHOST_CONFIG,
+   "Unable to get vring desc or dev numa information.\n");
+   return dev;
+   }
+   if (oldnode != newnode)
+   realloc_dev = 1;
+
+   ret = get_mempolicy(, NULL, 0, old_vq,
+   MPOL_F_NODE | MPOL_F_ADDR);
+   if (ret) {
+   RTE_LOG(ERR, VHOST_CONFIG,
+   "Unable to get vq numa information.\n");
+   return dev;
+   }
+   if (oldnode != newnode)
+   realloc_vq = 1;
+
+   if (realloc_dev == 0 && realloc_vq == 0)
+   return dev;
+
+   if (realloc_dev)
+   new_ll_dev = rte_malloc_socket(NULL,
+   sizeof(struct virtio_net_config_ll), 0, newnode);
+   if (realloc_vq)
+   new_vq = rte_malloc_socket(NULL,
+   sizeof(struct vhost_virtqueue), 0, newnode);
+   if (!new_ll_dev && !new_vq) {
+   if (new_ll_dev)
+   rte_free(new_ll_dev);
+   if (new_vq)
+   rte_free(new_vq);
+   return dev;
+   }
+
+   if (realloc_vq)
+   memcpy(new_vq, old_vq, sizeof(*new_vq));
+   if (realloc_dev)
+   memcpy(new_ll_dev, old_ll_dev, sizeof(*new_ll_dev));
+   (new_ll_dev ? new_ll_dev : old_ll_dev)->dev.virtqueue[index] =
+   new_vq ? new_vq : old_vq;
+   if (realloc_vq)
+   rte_free(old_vq);
+   if (realloc_dev) {
+   if (ll_root == old_ll_dev)
+   ll_root = new_ll_dev;
+   else {
+   struct virtio_net_config_ll *prev = ll_root;
+   while (prev->next != old_ll_dev)
+   prev = prev->next;
+   prev->next = new_ll_dev;
+   new_ll_dev->next = old_ll_dev->next;
+   }
+   rte_free(old_ll_dev);
+   }
+
+   return realloc_dev ? _ll_dev->dev : dev;
+}
+#else
+static struct virtio_net*
+numa_realloc(struct virtio_net *dev, int index __rte_unused)
+{
+   return dev;
+}
+#endif
+
+/*
  * Called from CUSE IOCTL: VHOST_SET_VRING_ADDR
  * The virtio device sends us the desc, used and avail ring addresses.
  * This function then converts

[dpdk-dev] [PATCH v3 2/2] vhost: version map file update

2015-06-19 Thread Huawei Xie
update version map file for rte_vhost_driver_unregister API

v3 changes:
update version map file

Signed-off-by: Huawei Xie 
---
 lib/librte_vhost/rte_vhost_version.map | 8 
 1 file changed, 8 insertions(+)

diff --git a/lib/librte_vhost/rte_vhost_version.map 
b/lib/librte_vhost/rte_vhost_version.map
index 163dde0..fb6bb9e 100644
--- a/lib/librte_vhost/rte_vhost_version.map
+++ b/lib/librte_vhost/rte_vhost_version.map
@@ -13,3 +13,11 @@ DPDK_2.0 {

local: *;
 };
+
+DPDK_2.1 {
+   global:
+
+   rte_vhost_driver_unregister;
+
+   local: *;
+} DPDK_2.0;
-- 
1.8.1.4



[dpdk-dev] [PATCH] use tab rather than space in mbuf version map file

2015-06-19 Thread Huawei Xie
Signed-off-by: Huawei Xie 
---
 lib/librte_mbuf/rte_mbuf_version.map | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/lib/librte_mbuf/rte_mbuf_version.map 
b/lib/librte_mbuf/rte_mbuf_version.map
index 7ae2244..543dc4c 100644
--- a/lib/librte_mbuf/rte_mbuf_version.map
+++ b/lib/librte_mbuf/rte_mbuf_version.map
@@ -13,9 +13,9 @@ DPDK_2.0 {
 };

 DPDK_2.1 {
-   global:
+   global:

-   rte_pktmbuf_pool_create;
+   rte_pktmbuf_pool_create;

-   local: *;
+   local: *;
 } DPDK_2.0;
-- 
1.8.1.4



[dpdk-dev] [PATCH] vhost: flush used->idx update before reading avail->flags

2015-04-23 Thread Huawei Xie
update of used->idx and read of avail->flags could be reordered.
memory fence should be used to ensure the order, otherwise guest could see a 
stale used->idx value after it toggles the interrupt suppression flag.

Signed-off-by: Huawei Xie 
---
 lib/librte_vhost/vhost_rxtx.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c
index 510ffe8..6afba35 100644
--- a/lib/librte_vhost/vhost_rxtx.c
+++ b/lib/librte_vhost/vhost_rxtx.c
@@ -178,6 +178,9 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
*(volatile uint16_t *)>used->idx += count;
vq->last_used_idx = res_end_idx;

+   /* flush used->idx update before we read avail->flags. */
+   rte_mb();
+
/* Kick the guest if necessary. */
if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT))
eventfd_write((int)vq->callfd, 1);
-- 
1.8.1.4



[dpdk-dev] [PATCH v2] vhost: flush used->idx update before reading avail->flags

2015-04-29 Thread Huawei Xie
update of used->idx and read of avail->flags could be reordered.
memory fence should be used to ensure the order, otherwise guest could see a 
stale used->idx value after it toggles the interrupt suppression flag.
After guest sets the interrupt suppression flag, it will check if there is more 
buffer to process through used->idx. If it sees a stale value, it will exit the 
processing while host willn't send interrupt to guest.

Signed-off-by: Huawei Xie 
---
 lib/librte_vhost/vhost_rxtx.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c
index 510ffe8..4809d32 100644
--- a/lib/librte_vhost/vhost_rxtx.c
+++ b/lib/librte_vhost/vhost_rxtx.c
@@ -178,6 +178,9 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
*(volatile uint16_t *)>used->idx += count;
vq->last_used_idx = res_end_idx;

+   /* flush used->idx update before we read avail->flags. */
+   rte_mb();
+
/* Kick the guest if necessary. */
if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT))
eventfd_write((int)vq->callfd, 1);
@@ -505,6 +508,9 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t 
queue_id,
*(volatile uint16_t *)>used->idx += entry_success;
vq->last_used_idx = res_end_idx;

+   /* flush used->idx update before we read avail->flags. */
+   rte_mb();
+
/* Kick the guest if necessary. */
if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT))
eventfd_write((int)vq->callfd, 1);
-- 
1.8.1.4



[dpdk-dev] [PATCH] vhost: make vhost lockless enqueue configurable

2015-04-29 Thread Huawei Xie
vhost enabled vSwitch could have their own thread-safe vring enqueue policy.
Add the RTE_LIBRTE_VHOST_LOCKLESS_ENQ macro for vhost lockless enqueue.
Turn it off by default.

Signed-off-by: Huawei Xie 
---
 config/common_linuxapp|  1 +
 lib/librte_vhost/vhost_rxtx.c | 24 +++-
 2 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/config/common_linuxapp b/config/common_linuxapp
index 0078dc9..7f59499 100644
--- a/config/common_linuxapp
+++ b/config/common_linuxapp
@@ -421,6 +421,7 @@ CONFIG_RTE_KNI_VHOST_DEBUG_TX=n
 #
 CONFIG_RTE_LIBRTE_VHOST=n
 CONFIG_RTE_LIBRTE_VHOST_USER=y
+CONFIG_RTE_LIBRTE_VHOST_LOCKLESS_ENQ=n
 CONFIG_RTE_LIBRTE_VHOST_DEBUG=n

 #
diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c
index 510ffe8..475be6e 100644
--- a/lib/librte_vhost/vhost_rxtx.c
+++ b/lib/librte_vhost/vhost_rxtx.c
@@ -80,7 +80,11 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
 * they need to be reserved.
 */
do {
+#ifdef RTE_LIBRTE_VHOST_LOCKESS_ENQ
res_base_idx = vq->last_used_idx_res;
+#else
+   res_base_idx = vq->last_used_idx;
+#endif
avail_idx = *((volatile uint16_t *)>avail->idx);

free_entries = (avail_idx - res_base_idx);
@@ -92,10 +96,15 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
return 0;

res_end_idx = res_base_idx + count;
+
+#ifdef RTE_LIBRTE_VHOST_LOCKLESS_ENQ
/* vq->last_used_idx_res is atomically updated. */
-   /* TODO: Allow to disable cmpset if no concurrency in 
application. */
success = rte_atomic16_cmpset(>last_used_idx_res,
res_base_idx, res_end_idx);
+#else
+   /* last_used_idx_res isn't used. */
+   success = 1;
+#endif
} while (unlikely(success == 0));
res_cur_idx = res_base_idx;
LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Current Index %d| End Index %d\n",
@@ -171,9 +180,11 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,

rte_compiler_barrier();

+#ifdef RTE_LIBRTE_VHOST_LOCKLESS_ENQ
/* Wait until it's our turn to add our buffer to the used ring. */
while (unlikely(vq->last_used_idx != res_base_idx))
rte_pause();
+#endif

*(volatile uint16_t *)>used->idx += count;
vq->last_used_idx = res_end_idx;
@@ -422,11 +433,15 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t 
queue_id,
uint16_t i, id;

do {
+#ifdef RTE_LIBRTE_VHOST_LOCKLESS_ENQ
/*
 * As many data cores may want access to available
 * buffers, they need to be reserved.
 */
res_base_idx = vq->last_used_idx_res;
+#else
+   res_base_idx = vq->last_used_idx;
+#endif
res_cur_idx = res_base_idx;

do {
@@ -459,10 +474,15 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t 
queue_id,
}
} while (pkt_len > secure_len);

+#ifdef RTE_LIBRTE_VHOST_LOCKLESS_ENQ
/* vq->last_used_idx_res is atomically updated. */
success = rte_atomic16_cmpset(>last_used_idx_res,
res_base_idx,
res_cur_idx);
+#else
+   /* last_used_idx_res isn't used. */
+   success = 1;
+#endif
} while (success == 0);

id = res_base_idx;
@@ -495,12 +515,14 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t 
queue_id,

rte_compiler_barrier();

+#ifdef RTE_LIBRTE_VHOST_LOCKLESS_ENQ
/*
 * Wait until it's our turn to add our buffer
 * to the used ring.
 */
while (unlikely(vq->last_used_idx != res_base_idx))
rte_pause();
+#endif

*(volatile uint16_t *)>used->idx += entry_success;
vq->last_used_idx = res_end_idx;
-- 
1.8.1.4



[dpdk-dev] [PATCH 0/2] provide rte_pktmbuf_alloc_bulk API and call it in vhost dequeue

2015-12-14 Thread Huawei Xie
For symmetric rte_pktmbuf_free_bulk, if the app knows in its scenarios
their mbufs are all simple mbufs, i.e meet the following requirements:
 * no multiple segments
 * not indirect mbuf
 * refcnt is 1
 * belong to the same mbuf memory pool,
it could directly call rte_mempool_put to free the bulk of mbufs,
otherwise rte_pktmbuf_free_bulk has to call rte_pktmbuf_free to free
the mbuf one by one.
This patchset will not provide this symmetric implementation.

Huawei Xie (2):
  mbuf: provide rte_pktmbuf_alloc_bulk API
  vhost: call rte_pktmbuf_alloc_bulk in vhost dequeue

 lib/librte_mbuf/rte_mbuf.h| 31 +++
 lib/librte_vhost/vhost_rxtx.c | 35 ++-
 2 files changed, 53 insertions(+), 13 deletions(-)

-- 
1.8.1.4



[dpdk-dev] [PATCH 2/2] vhost: call rte_pktmbuf_alloc_bulk in vhost dequeue

2015-12-14 Thread Huawei Xie
pre-allocate a bulk of mbufs instead of allocating one mbuf a time on demand

Signed-off-by: Gerald Rogers 
Signed-off-by: Huawei Xie 
Acked-by: Konstantin Ananyev 
---
 lib/librte_vhost/vhost_rxtx.c | 35 ++-
 1 file changed, 22 insertions(+), 13 deletions(-)

diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c
index bbf3fac..0faae58 100644
--- a/lib/librte_vhost/vhost_rxtx.c
+++ b/lib/librte_vhost/vhost_rxtx.c
@@ -576,6 +576,8 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t 
queue_id,
uint32_t i;
uint16_t free_entries, entry_success = 0;
uint16_t avail_idx;
+   uint8_t alloc_err = 0;
+   uint8_t seg_num;

if (unlikely(!is_valid_virt_queue_idx(queue_id, 1, dev->virt_qp_nb))) {
RTE_LOG(ERR, VHOST_DATA,
@@ -609,6 +611,14 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t 
queue_id,

LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Buffers available %d\n",
dev->device_fh, free_entries);
+
+   if (unlikely(rte_pktmbuf_alloc_bulk(mbuf_pool,
+   pkts, free_entries)) < 0) {
+   RTE_LOG(ERR, VHOST_DATA,
+   "Failed to bulk allocating %d mbufs\n", free_entries);
+   return 0;
+   }
+
/* Retrieve all of the head indexes first to avoid caching issues. */
for (i = 0; i < free_entries; i++)
head[i] = vq->avail->ring[(vq->last_used_idx + i) & (vq->size - 
1)];
@@ -621,9 +631,9 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t 
queue_id,
uint32_t vb_avail, vb_offset;
uint32_t seg_avail, seg_offset;
uint32_t cpy_len;
-   uint32_t seg_num = 0;
+   seg_num = 0;
struct rte_mbuf *cur;
-   uint8_t alloc_err = 0;
+

desc = >desc[head[entry_success]];

@@ -654,13 +664,7 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t 
queue_id,
vq->used->ring[used_idx].id = head[entry_success];
vq->used->ring[used_idx].len = 0;

-   /* Allocate an mbuf and populate the structure. */
-   m = rte_pktmbuf_alloc(mbuf_pool);
-   if (unlikely(m == NULL)) {
-   RTE_LOG(ERR, VHOST_DATA,
-   "Failed to allocate memory for mbuf.\n");
-   break;
-   }
+   prev = cur = m = pkts[entry_success];
seg_offset = 0;
seg_avail = m->buf_len - RTE_PKTMBUF_HEADROOM;
cpy_len = RTE_MIN(vb_avail, seg_avail);
@@ -668,8 +672,6 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t 
queue_id,
PRINT_PACKET(dev, (uintptr_t)vb_addr, desc->len, 0);

seg_num++;
-   cur = m;
-   prev = m;
while (cpy_len != 0) {
rte_memcpy(rte_pktmbuf_mtod_offset(cur, void *, 
seg_offset),
(void *)((uintptr_t)(vb_addr + vb_offset)),
@@ -761,16 +763,23 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t 
queue_id,
cpy_len = RTE_MIN(vb_avail, seg_avail);
}

-   if (unlikely(alloc_err == 1))
+   if (unlikely(alloc_err))
break;

m->nb_segs = seg_num;

-   pkts[entry_success] = m;
vq->last_used_idx++;
entry_success++;
}

+   if (unlikely(alloc_err)) {
+   uint16_t i = entry_success;
+
+   m->nb_segs = seg_num;
+   for (; i < free_entries; i++)
+   rte_pktmbuf_free(pkts[entry_success]);
+   }
+
rte_compiler_barrier();
vq->used->idx += entry_success;
/* Kick guest if required. */
-- 
1.8.1.4



[dpdk-dev] [PATCH v2 0/2] provide rte_pktmbuf_alloc_bulk API and call it in vhost dequeue

2015-12-14 Thread Huawei Xie
v2 changes:
 unroll the loop in rte_pktmbuf_alloc_bulk to help the performance

For symmetric rte_pktmbuf_free_bulk, if the app knows in its scenarios
their mbufs are all simple mbufs, i.e meet the following requirements:
 * no multiple segments
 * not indirect mbuf
 * refcnt is 1
 * belong to the same mbuf memory pool,
it could directly call rte_mempool_put to free the bulk of mbufs,
otherwise rte_pktmbuf_free_bulk has to call rte_pktmbuf_free to free
the mbuf one by one.
This patchset will not provide this symmetric implementation.

Huawei Xie (2):
  mbuf: provide rte_pktmbuf_alloc_bulk API
  vhost: call rte_pktmbuf_alloc_bulk in vhost dequeue

 lib/librte_mbuf/rte_mbuf.h| 50 +++
 lib/librte_vhost/vhost_rxtx.c | 35 +++---
 2 files changed, 72 insertions(+), 13 deletions(-)

-- 
1.8.1.4



[dpdk-dev] [PATCH v2 1/2] mbuf: provide rte_pktmbuf_alloc_bulk API

2015-12-14 Thread Huawei Xie
v2 changes:
 unroll the loop a bit to help the performance

rte_pktmbuf_alloc_bulk allocates a bulk of packet mbufs.

There is related thread about this bulk API.
http://dpdk.org/dev/patchwork/patch/4718/
Thanks to Konstantin's loop unrolling.

Signed-off-by: Gerald Rogers 
Signed-off-by: Huawei Xie 
Acked-by: Konstantin Ananyev 
---
 lib/librte_mbuf/rte_mbuf.h | 50 ++
 1 file changed, 50 insertions(+)

diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
index f234ac9..4e209e0 100644
--- a/lib/librte_mbuf/rte_mbuf.h
+++ b/lib/librte_mbuf/rte_mbuf.h
@@ -1336,6 +1336,56 @@ static inline struct rte_mbuf *rte_pktmbuf_alloc(struct 
rte_mempool *mp)
 }

 /**
+ * Allocate a bulk of mbufs, initialize refcnt and reset the fields to default
+ * values.
+ *
+ *  @param pool
+ *The mempool from which mbufs are allocated.
+ *  @param mbufs
+ *Array of pointers to mbufs
+ *  @param count
+ *Array size
+ *  @return
+ *   - 0: Success
+ */
+static inline int rte_pktmbuf_alloc_bulk(struct rte_mempool *pool,
+struct rte_mbuf **mbufs, unsigned count)
+{
+   unsigned idx = 0;
+   int rc;
+
+   rc = rte_mempool_get_bulk(pool, (void **)mbufs, count);
+   if (unlikely(rc))
+   return rc;
+
+   switch (count % 4) {
+   while (idx != count) {
+   case 0:
+   RTE_MBUF_ASSERT(rte_mbuf_refcnt_read(mbufs[idx]) == 0);
+   rte_mbuf_refcnt_set(mbufs[idx], 1);
+   rte_pktmbuf_reset(mbufs[idx]);
+   idx++;
+   case 3:
+   RTE_MBUF_ASSERT(rte_mbuf_refcnt_read(mbufs[idx]) == 0);
+   rte_mbuf_refcnt_set(mbufs[idx], 1);
+   rte_pktmbuf_reset(mbufs[idx]);
+   idx++;
+   case 2:
+   RTE_MBUF_ASSERT(rte_mbuf_refcnt_read(mbufs[idx]) == 0);
+   rte_mbuf_refcnt_set(mbufs[idx], 1);
+   rte_pktmbuf_reset(mbufs[idx]);
+   idx++;
+   case 1:
+   RTE_MBUF_ASSERT(rte_mbuf_refcnt_read(mbufs[idx]) == 0);
+   rte_mbuf_refcnt_set(mbufs[idx], 1);
+   rte_pktmbuf_reset(mbufs[idx]);
+   idx++;
+   }
+   }
+   return 0;
+}
+
+/**
  * Attach packet mbuf to another packet mbuf.
  *
  * After attachment we refer the mbuf we attached as 'indirect',
-- 
1.8.1.4



[dpdk-dev] [PATCH v2 2/2] vhost: call rte_pktmbuf_alloc_bulk in vhost dequeue

2015-12-14 Thread Huawei Xie
pre-allocate a bulk of mbufs instead of allocating one mbuf a time on demand

Signed-off-by: Gerald Rogers 
Signed-off-by: Huawei Xie 
Acked-by: Konstantin Ananyev 
---
 lib/librte_vhost/vhost_rxtx.c | 35 ++-
 1 file changed, 22 insertions(+), 13 deletions(-)

diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c
index bbf3fac..0faae58 100644
--- a/lib/librte_vhost/vhost_rxtx.c
+++ b/lib/librte_vhost/vhost_rxtx.c
@@ -576,6 +576,8 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t 
queue_id,
uint32_t i;
uint16_t free_entries, entry_success = 0;
uint16_t avail_idx;
+   uint8_t alloc_err = 0;
+   uint8_t seg_num;

if (unlikely(!is_valid_virt_queue_idx(queue_id, 1, dev->virt_qp_nb))) {
RTE_LOG(ERR, VHOST_DATA,
@@ -609,6 +611,14 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t 
queue_id,

LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Buffers available %d\n",
dev->device_fh, free_entries);
+
+   if (unlikely(rte_pktmbuf_alloc_bulk(mbuf_pool,
+   pkts, free_entries)) < 0) {
+   RTE_LOG(ERR, VHOST_DATA,
+   "Failed to bulk allocating %d mbufs\n", free_entries);
+   return 0;
+   }
+
/* Retrieve all of the head indexes first to avoid caching issues. */
for (i = 0; i < free_entries; i++)
head[i] = vq->avail->ring[(vq->last_used_idx + i) & (vq->size - 
1)];
@@ -621,9 +631,9 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t 
queue_id,
uint32_t vb_avail, vb_offset;
uint32_t seg_avail, seg_offset;
uint32_t cpy_len;
-   uint32_t seg_num = 0;
+   seg_num = 0;
struct rte_mbuf *cur;
-   uint8_t alloc_err = 0;
+

desc = >desc[head[entry_success]];

@@ -654,13 +664,7 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t 
queue_id,
vq->used->ring[used_idx].id = head[entry_success];
vq->used->ring[used_idx].len = 0;

-   /* Allocate an mbuf and populate the structure. */
-   m = rte_pktmbuf_alloc(mbuf_pool);
-   if (unlikely(m == NULL)) {
-   RTE_LOG(ERR, VHOST_DATA,
-   "Failed to allocate memory for mbuf.\n");
-   break;
-   }
+   prev = cur = m = pkts[entry_success];
seg_offset = 0;
seg_avail = m->buf_len - RTE_PKTMBUF_HEADROOM;
cpy_len = RTE_MIN(vb_avail, seg_avail);
@@ -668,8 +672,6 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t 
queue_id,
PRINT_PACKET(dev, (uintptr_t)vb_addr, desc->len, 0);

seg_num++;
-   cur = m;
-   prev = m;
while (cpy_len != 0) {
rte_memcpy(rte_pktmbuf_mtod_offset(cur, void *, 
seg_offset),
(void *)((uintptr_t)(vb_addr + vb_offset)),
@@ -761,16 +763,23 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t 
queue_id,
cpy_len = RTE_MIN(vb_avail, seg_avail);
}

-   if (unlikely(alloc_err == 1))
+   if (unlikely(alloc_err))
break;

m->nb_segs = seg_num;

-   pkts[entry_success] = m;
vq->last_used_idx++;
entry_success++;
}

+   if (unlikely(alloc_err)) {
+   uint16_t i = entry_success;
+
+   m->nb_segs = seg_num;
+   for (; i < free_entries; i++)
+   rte_pktmbuf_free(pkts[entry_success]);
+   }
+
rte_compiler_barrier();
vq->used->idx += entry_success;
/* Kick guest if required. */
-- 
1.8.1.4



[dpdk-dev] [PATCH v3 1/2] mbuf: provide rte_pktmbuf_alloc_bulk API

2015-12-23 Thread Huawei Xie
v3 changes:
 move while after case 0
 add context about duff's device and why we use while loop in the commit
message

v2 changes:
 unroll the loop a bit to help the performance

rte_pktmbuf_alloc_bulk allocates a bulk of packet mbufs.

There is related thread about this bulk API.
http://dpdk.org/dev/patchwork/patch/4718/
Thanks to Konstantin's loop unrolling.

Attached the wiki page about duff's device. It explains the performance
optimization through loop unwinding, and also the most dramatic use of
case label fall-through.
https://en.wikipedia.org/wiki/Duff%27s_device

In our implementation, we use while() loop rather than do{} while() loop
because we could not assume count is strictly positive. Using while()
loop saves one line of check if count is zero.

Signed-off-by: Gerald Rogers 
Signed-off-by: Huawei Xie 
Acked-by: Konstantin Ananyev 
---
 lib/librte_mbuf/rte_mbuf.h | 49 ++
 1 file changed, 49 insertions(+)

diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
index f234ac9..3381c28 100644
--- a/lib/librte_mbuf/rte_mbuf.h
+++ b/lib/librte_mbuf/rte_mbuf.h
@@ -1336,6 +1336,55 @@ static inline struct rte_mbuf *rte_pktmbuf_alloc(struct 
rte_mempool *mp)
 }

 /**
+ * Allocate a bulk of mbufs, initialize refcnt and reset the fields to default
+ * values.
+ *
+ *  @param pool
+ *The mempool from which mbufs are allocated.
+ *  @param mbufs
+ *Array of pointers to mbufs
+ *  @param count
+ *Array size
+ *  @return
+ *   - 0: Success
+ */
+static inline int rte_pktmbuf_alloc_bulk(struct rte_mempool *pool,
+struct rte_mbuf **mbufs, unsigned count)
+{
+   unsigned idx = 0;
+   int rc;
+
+   rc = rte_mempool_get_bulk(pool, (void **)mbufs, count);
+   if (unlikely(rc))
+   return rc;
+
+   switch (count % 4) {
+   case 0: while (idx != count) {
+   RTE_MBUF_ASSERT(rte_mbuf_refcnt_read(mbufs[idx]) == 0);
+   rte_mbuf_refcnt_set(mbufs[idx], 1);
+   rte_pktmbuf_reset(mbufs[idx]);
+   idx++;
+   case 3:
+   RTE_MBUF_ASSERT(rte_mbuf_refcnt_read(mbufs[idx]) == 0);
+   rte_mbuf_refcnt_set(mbufs[idx], 1);
+   rte_pktmbuf_reset(mbufs[idx]);
+   idx++;
+   case 2:
+   RTE_MBUF_ASSERT(rte_mbuf_refcnt_read(mbufs[idx]) == 0);
+   rte_mbuf_refcnt_set(mbufs[idx], 1);
+   rte_pktmbuf_reset(mbufs[idx]);
+   idx++;
+   case 1:
+   RTE_MBUF_ASSERT(rte_mbuf_refcnt_read(mbufs[idx]) == 0);
+   rte_mbuf_refcnt_set(mbufs[idx], 1);
+   rte_pktmbuf_reset(mbufs[idx]);
+   idx++;
+   }
+   }
+   return 0;
+}
+
+/**
  * Attach packet mbuf to another packet mbuf.
  *
  * After attachment we refer the mbuf we attached as 'indirect',
-- 
1.8.1.4



[dpdk-dev] [PATCH v3 2/2] vhost: call rte_pktmbuf_alloc_bulk in vhost dequeue

2015-12-23 Thread Huawei Xie
pre-allocate a bulk of mbufs instead of allocating one mbuf a time on demand

Signed-off-by: Gerald Rogers 
Signed-off-by: Huawei Xie 
Acked-by: Konstantin Ananyev 
---
 lib/librte_vhost/vhost_rxtx.c | 35 ++-
 1 file changed, 22 insertions(+), 13 deletions(-)

diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c
index bbf3fac..0faae58 100644
--- a/lib/librte_vhost/vhost_rxtx.c
+++ b/lib/librte_vhost/vhost_rxtx.c
@@ -576,6 +576,8 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t 
queue_id,
uint32_t i;
uint16_t free_entries, entry_success = 0;
uint16_t avail_idx;
+   uint8_t alloc_err = 0;
+   uint8_t seg_num;

if (unlikely(!is_valid_virt_queue_idx(queue_id, 1, dev->virt_qp_nb))) {
RTE_LOG(ERR, VHOST_DATA,
@@ -609,6 +611,14 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t 
queue_id,

LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Buffers available %d\n",
dev->device_fh, free_entries);
+
+   if (unlikely(rte_pktmbuf_alloc_bulk(mbuf_pool,
+   pkts, free_entries)) < 0) {
+   RTE_LOG(ERR, VHOST_DATA,
+   "Failed to bulk allocating %d mbufs\n", free_entries);
+   return 0;
+   }
+
/* Retrieve all of the head indexes first to avoid caching issues. */
for (i = 0; i < free_entries; i++)
head[i] = vq->avail->ring[(vq->last_used_idx + i) & (vq->size - 
1)];
@@ -621,9 +631,9 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t 
queue_id,
uint32_t vb_avail, vb_offset;
uint32_t seg_avail, seg_offset;
uint32_t cpy_len;
-   uint32_t seg_num = 0;
+   seg_num = 0;
struct rte_mbuf *cur;
-   uint8_t alloc_err = 0;
+

desc = >desc[head[entry_success]];

@@ -654,13 +664,7 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t 
queue_id,
vq->used->ring[used_idx].id = head[entry_success];
vq->used->ring[used_idx].len = 0;

-   /* Allocate an mbuf and populate the structure. */
-   m = rte_pktmbuf_alloc(mbuf_pool);
-   if (unlikely(m == NULL)) {
-   RTE_LOG(ERR, VHOST_DATA,
-   "Failed to allocate memory for mbuf.\n");
-   break;
-   }
+   prev = cur = m = pkts[entry_success];
seg_offset = 0;
seg_avail = m->buf_len - RTE_PKTMBUF_HEADROOM;
cpy_len = RTE_MIN(vb_avail, seg_avail);
@@ -668,8 +672,6 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t 
queue_id,
PRINT_PACKET(dev, (uintptr_t)vb_addr, desc->len, 0);

seg_num++;
-   cur = m;
-   prev = m;
while (cpy_len != 0) {
rte_memcpy(rte_pktmbuf_mtod_offset(cur, void *, 
seg_offset),
(void *)((uintptr_t)(vb_addr + vb_offset)),
@@ -761,16 +763,23 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t 
queue_id,
cpy_len = RTE_MIN(vb_avail, seg_avail);
}

-   if (unlikely(alloc_err == 1))
+   if (unlikely(alloc_err))
break;

m->nb_segs = seg_num;

-   pkts[entry_success] = m;
vq->last_used_idx++;
entry_success++;
}

+   if (unlikely(alloc_err)) {
+   uint16_t i = entry_success;
+
+   m->nb_segs = seg_num;
+   for (; i < free_entries; i++)
+   rte_pktmbuf_free(pkts[entry_success]);
+   }
+
rte_compiler_barrier();
vq->used->idx += entry_success;
/* Kick guest if required. */
-- 
1.8.1.4



[dpdk-dev] [PATCH v4 0/2] provide rte_pktmbuf_alloc_bulk API and call it in vhost dequeue

2015-12-23 Thread Huawei Xie
v4 changes:
 fix a silly typo in error handling when rte_pktmbuf_alloc fails

v3 changes:
 move while after case 0
 add context about duff's device and why we use while loop in the commit
message

v2 changes:
 unroll the loop in rte_pktmbuf_alloc_bulk to help the performance

For symmetric rte_pktmbuf_free_bulk, if the app knows in its scenarios
their mbufs are all simple mbufs, i.e meet the following requirements:
 * no multiple segments
 * not indirect mbuf
 * refcnt is 1
 * belong to the same mbuf memory pool,
it could directly call rte_mempool_put to free the bulk of mbufs,
otherwise rte_pktmbuf_free_bulk has to call rte_pktmbuf_free to free
the mbuf one by one.
This patchset will not provide this symmetric implementation.

Huawei Xie (2):
  mbuf: provide rte_pktmbuf_alloc_bulk API
  vhost: call rte_pktmbuf_alloc_bulk in vhost dequeue

 lib/librte_mbuf/rte_mbuf.h| 49 +++
 lib/librte_vhost/vhost_rxtx.c | 35 +++
 2 files changed, 71 insertions(+), 13 deletions(-)

-- 
1.8.1.4



[dpdk-dev] [PATCH v4 1/2] mbuf: provide rte_pktmbuf_alloc_bulk API

2015-12-23 Thread Huawei Xie
v3 changes:
 move while after case 0
 add context about duff's device and why we use while loop in the commit
message

v2 changes:
 unroll the loop a bit to help the performance

rte_pktmbuf_alloc_bulk allocates a bulk of packet mbufs.

There is related thread about this bulk API.
http://dpdk.org/dev/patchwork/patch/4718/
Thanks to Konstantin's loop unrolling.

Attached the wiki page about duff's device. It explains the performance
optimization through loop unwinding, and also the most dramatic use of
case label fall-through.
https://en.wikipedia.org/wiki/Duff%27s_device

In our implementation, we use while() loop rather than do{} while() loop
because we could not assume count is strictly positive. Using while()
loop saves one line of check if count is zero.

Signed-off-by: Gerald Rogers 
Signed-off-by: Huawei Xie 
Acked-by: Konstantin Ananyev 
---
 lib/librte_mbuf/rte_mbuf.h | 49 ++
 1 file changed, 49 insertions(+)

diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
index f234ac9..3381c28 100644
--- a/lib/librte_mbuf/rte_mbuf.h
+++ b/lib/librte_mbuf/rte_mbuf.h
@@ -1336,6 +1336,55 @@ static inline struct rte_mbuf *rte_pktmbuf_alloc(struct 
rte_mempool *mp)
 }

 /**
+ * Allocate a bulk of mbufs, initialize refcnt and reset the fields to default
+ * values.
+ *
+ *  @param pool
+ *The mempool from which mbufs are allocated.
+ *  @param mbufs
+ *Array of pointers to mbufs
+ *  @param count
+ *Array size
+ *  @return
+ *   - 0: Success
+ */
+static inline int rte_pktmbuf_alloc_bulk(struct rte_mempool *pool,
+struct rte_mbuf **mbufs, unsigned count)
+{
+   unsigned idx = 0;
+   int rc;
+
+   rc = rte_mempool_get_bulk(pool, (void **)mbufs, count);
+   if (unlikely(rc))
+   return rc;
+
+   switch (count % 4) {
+   case 0: while (idx != count) {
+   RTE_MBUF_ASSERT(rte_mbuf_refcnt_read(mbufs[idx]) == 0);
+   rte_mbuf_refcnt_set(mbufs[idx], 1);
+   rte_pktmbuf_reset(mbufs[idx]);
+   idx++;
+   case 3:
+   RTE_MBUF_ASSERT(rte_mbuf_refcnt_read(mbufs[idx]) == 0);
+   rte_mbuf_refcnt_set(mbufs[idx], 1);
+   rte_pktmbuf_reset(mbufs[idx]);
+   idx++;
+   case 2:
+   RTE_MBUF_ASSERT(rte_mbuf_refcnt_read(mbufs[idx]) == 0);
+   rte_mbuf_refcnt_set(mbufs[idx], 1);
+   rte_pktmbuf_reset(mbufs[idx]);
+   idx++;
+   case 1:
+   RTE_MBUF_ASSERT(rte_mbuf_refcnt_read(mbufs[idx]) == 0);
+   rte_mbuf_refcnt_set(mbufs[idx], 1);
+   rte_pktmbuf_reset(mbufs[idx]);
+   idx++;
+   }
+   }
+   return 0;
+}
+
+/**
  * Attach packet mbuf to another packet mbuf.
  *
  * After attachment we refer the mbuf we attached as 'indirect',
-- 
1.8.1.4



[dpdk-dev] [PATCH v4 2/2] vhost: call rte_pktmbuf_alloc_bulk in vhost dequeue

2015-12-23 Thread Huawei Xie
v4 changes:
 fix a silly typo in error handling when rte_pktmbuf_alloc fails
reported by haifeng

pre-allocate a bulk of mbufs instead of allocating one mbuf a time on demand

Signed-off-by: Gerald Rogers 
Signed-off-by: Huawei Xie 
Acked-by: Konstantin Ananyev 
Acked-by: Yuanhan Liu 
Tested-by: Yuanhan Liu 
---
 lib/librte_vhost/vhost_rxtx.c | 35 ++-
 1 file changed, 22 insertions(+), 13 deletions(-)

diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c
index bbf3fac..f10d534 100644
--- a/lib/librte_vhost/vhost_rxtx.c
+++ b/lib/librte_vhost/vhost_rxtx.c
@@ -576,6 +576,8 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t 
queue_id,
uint32_t i;
uint16_t free_entries, entry_success = 0;
uint16_t avail_idx;
+   uint8_t alloc_err = 0;
+   uint8_t seg_num;

if (unlikely(!is_valid_virt_queue_idx(queue_id, 1, dev->virt_qp_nb))) {
RTE_LOG(ERR, VHOST_DATA,
@@ -609,6 +611,14 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t 
queue_id,

LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Buffers available %d\n",
dev->device_fh, free_entries);
+
+   if (unlikely(rte_pktmbuf_alloc_bulk(mbuf_pool,
+   pkts, free_entries)) < 0) {
+   RTE_LOG(ERR, VHOST_DATA,
+   "Failed to bulk allocating %d mbufs\n", free_entries);
+   return 0;
+   }
+
/* Retrieve all of the head indexes first to avoid caching issues. */
for (i = 0; i < free_entries; i++)
head[i] = vq->avail->ring[(vq->last_used_idx + i) & (vq->size - 
1)];
@@ -621,9 +631,9 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t 
queue_id,
uint32_t vb_avail, vb_offset;
uint32_t seg_avail, seg_offset;
uint32_t cpy_len;
-   uint32_t seg_num = 0;
+   seg_num = 0;
struct rte_mbuf *cur;
-   uint8_t alloc_err = 0;
+

desc = >desc[head[entry_success]];

@@ -654,13 +664,7 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t 
queue_id,
vq->used->ring[used_idx].id = head[entry_success];
vq->used->ring[used_idx].len = 0;

-   /* Allocate an mbuf and populate the structure. */
-   m = rte_pktmbuf_alloc(mbuf_pool);
-   if (unlikely(m == NULL)) {
-   RTE_LOG(ERR, VHOST_DATA,
-   "Failed to allocate memory for mbuf.\n");
-   break;
-   }
+   prev = cur = m = pkts[entry_success];
seg_offset = 0;
seg_avail = m->buf_len - RTE_PKTMBUF_HEADROOM;
cpy_len = RTE_MIN(vb_avail, seg_avail);
@@ -668,8 +672,6 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t 
queue_id,
PRINT_PACKET(dev, (uintptr_t)vb_addr, desc->len, 0);

seg_num++;
-   cur = m;
-   prev = m;
while (cpy_len != 0) {
rte_memcpy(rte_pktmbuf_mtod_offset(cur, void *, 
seg_offset),
(void *)((uintptr_t)(vb_addr + vb_offset)),
@@ -761,16 +763,23 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t 
queue_id,
cpy_len = RTE_MIN(vb_avail, seg_avail);
}

-   if (unlikely(alloc_err == 1))
+   if (unlikely(alloc_err))
break;

m->nb_segs = seg_num;

-   pkts[entry_success] = m;
vq->last_used_idx++;
entry_success++;
}

+   if (unlikely(alloc_err)) {
+   uint16_t i = entry_success;
+
+   m->nb_segs = seg_num;
+   for (; i < free_entries; i++)
+   rte_pktmbuf_free(pkts[i]);
+   }
+
rte_compiler_barrier();
vq->used->idx += entry_success;
/* Kick guest if required. */
-- 
1.8.1.4



[dpdk-dev] [PATCH 0/4] check if any kernel driver is manipulating the virtio device

2015-12-25 Thread Huawei Xie
virtio PMD doesn't set RTE_PCI_DRV_NEED_MAPPING in drv_flags of its
eth_driver. It will try igb_uio and PORT IO in turn to configure
virtio device. Even user in guest VM doesn't want to use virtio for
DPDK, virtio PMD will take over the device blindly.

The more serious problem is kernel driver is still manipulating the
device, which causes driver conflict.

This patch checks if there is any kernel driver manipulating the
virtio device before virtio PMD uses port IO to configure the device.

Huawei Xie (4):
  eal: make the comment more accurate
  eal: set kdrv to RTE_KDRV_NONE if kernel driver isn't manipulating the device.
  virtio: return 1 to tell the kernel we don't take over this device
  virtio: check if any kernel driver is manipulating the virtio device

 drivers/net/virtio/virtio_ethdev.c | 15 +--
 lib/librte_eal/common/eal_common_pci.c |  8 
 lib/librte_eal/linuxapp/eal/eal_pci.c  |  2 +-
 3 files changed, 18 insertions(+), 7 deletions(-)

-- 
1.8.1.4



[dpdk-dev] [PATCH 1/4] eal: make the comment more accurate

2015-12-25 Thread Huawei Xie

Signed-off-by: Huawei Xie 
---
 lib/librte_eal/common/eal_common_pci.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/lib/librte_eal/common/eal_common_pci.c 
b/lib/librte_eal/common/eal_common_pci.c
index dcfe947..bbcdb2b 100644
--- a/lib/librte_eal/common/eal_common_pci.c
+++ b/lib/librte_eal/common/eal_common_pci.c
@@ -204,7 +204,7 @@ rte_eal_pci_probe_one_driver(struct rte_pci_driver *dr, 
struct rte_pci_device *d
/* call the driver devinit() function */
return dr->devinit(dr, dev);
}
-   /* return positive value if driver is not found */
+   /* return positive value if driver doesn't support this device */
return 1;
 }

@@ -259,7 +259,7 @@ rte_eal_pci_detach_dev(struct rte_pci_driver *dr,
return 0;
}

-   /* return positive value if driver is not found */
+   /* return positive value if driver doesn't support this device */
return 1;
 }

@@ -283,7 +283,7 @@ pci_probe_all_drivers(struct rte_pci_device *dev)
/* negative value is an error */
return -1;
if (rc > 0)
-   /* positive value means driver not found */
+   /* positive value means driver doesn't support it */
continue;
return 0;
}
@@ -310,7 +310,7 @@ pci_detach_all_drivers(struct rte_pci_device *dev)
/* negative value is an error */
return -1;
if (rc > 0)
-   /* positive value means driver not found */
+   /* positive value means driver doesn't support it */
continue;
return 0;
}
-- 
1.8.1.4



[dpdk-dev] [PATCH 2/4] eal: set kdrv to RTE_KDRV_NONE if kernel driver isn't manipulating the device.

2015-12-25 Thread Huawei Xie
Use RTE_KDRV_NONE to indicate that kernel driver isn't manipulating the
device.

Signed-off-by: Huawei Xie 
---
 lib/librte_eal/linuxapp/eal/eal_pci.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c 
b/lib/librte_eal/linuxapp/eal/eal_pci.c
index bc5b5be..640b190 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci.c
+++ b/lib/librte_eal/linuxapp/eal/eal_pci.c
@@ -362,7 +362,7 @@ pci_scan_one(const char *dirname, uint16_t domain, uint8_t 
bus,
else
dev->kdrv = RTE_KDRV_UNKNOWN;
} else
-   dev->kdrv = RTE_KDRV_UNKNOWN;
+   dev->kdrv = RTE_KDRV_NONE;

/* device is valid, add in list (sorted) */
if (TAILQ_EMPTY(_device_list)) {
-- 
1.8.1.4



[dpdk-dev] [PATCH 4/4] virtio: check if any kernel driver is manipulating the device

2015-12-25 Thread Huawei Xie
virtio PMD could use IO port to configure the virtio device without
using uio driver.

There are two issues with previous implementation:
1) virtio PMD will take over each virtio device blindly even if some
are not intended for DPDK.
2) driver conflict between virtio PMD and virtio-net kernel driver.

This patch checks if there is any kernel driver manipulating the virtio
device before virtio PMD uses IO port to configure the device.

Fixes: da978dfdc43b ("virtio: use port IO to get PCI resource")

Signed-off-by: Huawei Xie 
---
 drivers/net/virtio/virtio_ethdev.c | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/drivers/net/virtio/virtio_ethdev.c 
b/drivers/net/virtio/virtio_ethdev.c
index 00015ef..504346a 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -1138,6 +1138,13 @@ static int virtio_resource_init_by_ioports(struct 
rte_pci_device *pci_dev)
int found = 0;
size_t linesz;

+   if (pci_dev->kdrv != RTE_KDRV_NONE) {
+   PMD_INIT_LOG(ERR,
+   "%s(): kernel driver is manipulating this device." \
+   " Please unbind the kernel driver.", __func__);
+   return -1;
+   }
+
snprintf(pci_id, sizeof(pci_id), PCI_PRI_FMT,
 pci_dev->addr.domain,
 pci_dev->addr.bus,
-- 
1.8.1.4



[dpdk-dev] [PATCH v5 0/2] provide rte_pktmbuf_alloc_bulk API and call it in vhost dequeue

2015-12-28 Thread Huawei Xie
v5 changes:
 add comment about duff's device and our variant implementation

v4 changes:
 fix a silly typo in error handling when rte_pktmbuf_alloc fails

v3 changes:
 move while after case 0
 add context about duff's device and why we use while loop in the commit
message

v2 changes:
 unroll the loop in rte_pktmbuf_alloc_bulk to help the performance

For symmetric rte_pktmbuf_free_bulk, if the app knows in its scenarios
their mbufs are all simple mbufs, i.e meet the following requirements:
 * no multiple segments
 * not indirect mbuf
 * refcnt is 1
 * belong to the same mbuf memory pool,
it could directly call rte_mempool_put to free the bulk of mbufs,
otherwise rte_pktmbuf_free_bulk has to call rte_pktmbuf_free to free
the mbuf one by one.
This patchset will not provide this symmetric implementation.

Huawei Xie (2):
  mbuf: provide rte_pktmbuf_alloc_bulk API
  vhost: call rte_pktmbuf_alloc_bulk in vhost dequeue

 lib/librte_mbuf/rte_mbuf.h| 55 +++
 lib/librte_vhost/vhost_rxtx.c | 35 +--
 2 files changed, 77 insertions(+), 13 deletions(-)

-- 
1.8.1.4



[dpdk-dev] [PATCH v5 1/2] mbuf: provide rte_pktmbuf_alloc_bulk API

2015-12-28 Thread Huawei Xie
v5 changes:
 add comment about duff's device and our variant implementation
 revise the code style a bit

v3 changes:
 move while after case 0
 add context about duff's device and why we use while loop in the commit
message

v2 changes:
 unroll the loop a bit to help the performance

rte_pktmbuf_alloc_bulk allocates a bulk of packet mbufs.

There is related thread about this bulk API.
http://dpdk.org/dev/patchwork/patch/4718/
Thanks to Konstantin's loop unrolling.

Attached the wiki page about duff's device. It explains the performance
optimization through loop unwinding, and also the most dramatic use of
case label fall-through.
https://en.wikipedia.org/wiki/Duff%27s_device

In our implementation, we use while() loop rather than do{} while() loop
because we could not assume count is strictly positive. Using while()
loop saves one line of check if count is zero.

Signed-off-by: Gerald Rogers 
Signed-off-by: Huawei Xie 
Acked-by: Konstantin Ananyev 
---
 lib/librte_mbuf/rte_mbuf.h | 55 ++
 1 file changed, 55 insertions(+)

diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
index f234ac9..b2ed479 100644
--- a/lib/librte_mbuf/rte_mbuf.h
+++ b/lib/librte_mbuf/rte_mbuf.h
@@ -1336,6 +1336,61 @@ static inline struct rte_mbuf *rte_pktmbuf_alloc(struct 
rte_mempool *mp)
 }

 /**
+ * Allocate a bulk of mbufs, initialize refcnt and reset the fields to default
+ * values.
+ *
+ *  @param pool
+ *The mempool from which mbufs are allocated.
+ *  @param mbufs
+ *Array of pointers to mbufs
+ *  @param count
+ *Array size
+ *  @return
+ *   - 0: Success
+ */
+static inline int rte_pktmbuf_alloc_bulk(struct rte_mempool *pool,
+struct rte_mbuf **mbufs, unsigned count)
+{
+   unsigned idx = 0;
+   int rc;
+
+   rc = rte_mempool_get_bulk(pool, (void **)mbufs, count);
+   if (unlikely(rc))
+   return rc;
+
+   /* To understand duff's device on loop unwinding optimization, see
+* https://en.wikipedia.org/wiki/Duff's_device.
+* Here while() loop is used rather than do() while{} to avoid extra
+* check if count is zero.
+*/
+   switch (count % 4) {
+   case 0:
+   while (idx != count) {
+   RTE_MBUF_ASSERT(rte_mbuf_refcnt_read(mbufs[idx]) == 0);
+   rte_mbuf_refcnt_set(mbufs[idx], 1);
+   rte_pktmbuf_reset(mbufs[idx]);
+   idx++;
+   case 3:
+   RTE_MBUF_ASSERT(rte_mbuf_refcnt_read(mbufs[idx]) == 0);
+   rte_mbuf_refcnt_set(mbufs[idx], 1);
+   rte_pktmbuf_reset(mbufs[idx]);
+   idx++;
+   case 2:
+   RTE_MBUF_ASSERT(rte_mbuf_refcnt_read(mbufs[idx]) == 0);
+   rte_mbuf_refcnt_set(mbufs[idx], 1);
+   rte_pktmbuf_reset(mbufs[idx]);
+   idx++;
+   case 1:
+   RTE_MBUF_ASSERT(rte_mbuf_refcnt_read(mbufs[idx]) == 0);
+   rte_mbuf_refcnt_set(mbufs[idx], 1);
+   rte_pktmbuf_reset(mbufs[idx]);
+   idx++;
+   }
+   }
+   return 0;
+}
+
+/**
  * Attach packet mbuf to another packet mbuf.
  *
  * After attachment we refer the mbuf we attached as 'indirect',
-- 
1.8.1.4



[dpdk-dev] [PATCH v5 2/2] vhost: call rte_pktmbuf_alloc_bulk in vhost dequeue

2015-12-28 Thread Huawei Xie
v4 changes:
 fix a silly typo in error handling when rte_pktmbuf_alloc fails
reported by haifeng

pre-allocate a bulk of mbufs instead of allocating one mbuf a time on demand

Signed-off-by: Gerald Rogers 
Signed-off-by: Huawei Xie 
Acked-by: Konstantin Ananyev 
Acked-by: Yuanhan Liu 
Tested-by: Yuanhan Liu 
---
 lib/librte_vhost/vhost_rxtx.c | 35 ++-
 1 file changed, 22 insertions(+), 13 deletions(-)

diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c
index bbf3fac..f10d534 100644
--- a/lib/librte_vhost/vhost_rxtx.c
+++ b/lib/librte_vhost/vhost_rxtx.c
@@ -576,6 +576,8 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t 
queue_id,
uint32_t i;
uint16_t free_entries, entry_success = 0;
uint16_t avail_idx;
+   uint8_t alloc_err = 0;
+   uint8_t seg_num;

if (unlikely(!is_valid_virt_queue_idx(queue_id, 1, dev->virt_qp_nb))) {
RTE_LOG(ERR, VHOST_DATA,
@@ -609,6 +611,14 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t 
queue_id,

LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Buffers available %d\n",
dev->device_fh, free_entries);
+
+   if (unlikely(rte_pktmbuf_alloc_bulk(mbuf_pool,
+   pkts, free_entries)) < 0) {
+   RTE_LOG(ERR, VHOST_DATA,
+   "Failed to bulk allocating %d mbufs\n", free_entries);
+   return 0;
+   }
+
/* Retrieve all of the head indexes first to avoid caching issues. */
for (i = 0; i < free_entries; i++)
head[i] = vq->avail->ring[(vq->last_used_idx + i) & (vq->size - 
1)];
@@ -621,9 +631,9 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t 
queue_id,
uint32_t vb_avail, vb_offset;
uint32_t seg_avail, seg_offset;
uint32_t cpy_len;
-   uint32_t seg_num = 0;
+   seg_num = 0;
struct rte_mbuf *cur;
-   uint8_t alloc_err = 0;
+

desc = >desc[head[entry_success]];

@@ -654,13 +664,7 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t 
queue_id,
vq->used->ring[used_idx].id = head[entry_success];
vq->used->ring[used_idx].len = 0;

-   /* Allocate an mbuf and populate the structure. */
-   m = rte_pktmbuf_alloc(mbuf_pool);
-   if (unlikely(m == NULL)) {
-   RTE_LOG(ERR, VHOST_DATA,
-   "Failed to allocate memory for mbuf.\n");
-   break;
-   }
+   prev = cur = m = pkts[entry_success];
seg_offset = 0;
seg_avail = m->buf_len - RTE_PKTMBUF_HEADROOM;
cpy_len = RTE_MIN(vb_avail, seg_avail);
@@ -668,8 +672,6 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t 
queue_id,
PRINT_PACKET(dev, (uintptr_t)vb_addr, desc->len, 0);

seg_num++;
-   cur = m;
-   prev = m;
while (cpy_len != 0) {
rte_memcpy(rte_pktmbuf_mtod_offset(cur, void *, 
seg_offset),
(void *)((uintptr_t)(vb_addr + vb_offset)),
@@ -761,16 +763,23 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t 
queue_id,
cpy_len = RTE_MIN(vb_avail, seg_avail);
}

-   if (unlikely(alloc_err == 1))
+   if (unlikely(alloc_err))
break;

m->nb_segs = seg_num;

-   pkts[entry_success] = m;
vq->last_used_idx++;
entry_success++;
}

+   if (unlikely(alloc_err)) {
+   uint16_t i = entry_success;
+
+   m->nb_segs = seg_num;
+   for (; i < free_entries; i++)
+   rte_pktmbuf_free(pkts[i]);
+   }
+
rte_compiler_barrier();
vq->used->idx += entry_success;
/* Kick guest if required. */
-- 
1.8.1.4



[dpdk-dev] [PATCH] remove redundant __func__ in PMD_INIT_LOG and PMD_RX_LOG

2015-12-28 Thread Huawei Xie

Signed-off-by: Huawei Xie 
---
 drivers/net/virtio/virtio_ethdev.c   | 12 +---
 drivers/net/vmxnet3/vmxnet3_ethdev.c |  6 +++---
 drivers/net/vmxnet3/vmxnet3_rxtx.c   |  2 +-
 3 files changed, 9 insertions(+), 11 deletions(-)

diff --git a/drivers/net/virtio/virtio_ethdev.c 
b/drivers/net/virtio/virtio_ethdev.c
index d928339..f19306f 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -150,9 +150,7 @@ virtio_send_command(struct virtqueue *vq, struct 
virtio_pmd_ctrl *ctrl,
ctrl->status = status;

if (!(vq && vq->hw->cvq)) {
-   PMD_INIT_LOG(ERR,
-"%s(): Control queue is not supported.",
-__func__);
+   PMD_INIT_LOG(ERR, "Control queue is not supported.");
return -1;
}
head = vq->vq_desc_head_idx;
@@ -306,12 +304,12 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev,
vq_size = VIRTIO_READ_REG_2(hw, VIRTIO_PCI_QUEUE_NUM);
PMD_INIT_LOG(DEBUG, "vq_size: %u nb_desc:%u", vq_size, nb_desc);
if (vq_size == 0) {
-   PMD_INIT_LOG(ERR, "%s: virtqueue does not exist", __func__);
+   PMD_INIT_LOG(ERR, "virtqueue does not exist");
return -EINVAL;
}

if (!rte_is_power_of_2(vq_size)) {
-   PMD_INIT_LOG(ERR, "%s: virtqueue size is not powerof 2", 
__func__);
+   PMD_INIT_LOG(ERR, "virtqueue size is not powerof 2");
return -EINVAL;
}

@@ -336,7 +334,7 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev,
RTE_CACHE_LINE_SIZE);
}
if (vq == NULL) {
-   PMD_INIT_LOG(ERR, "%s: Can not allocate virtqueue", __func__);
+   PMD_INIT_LOG(ERR, "Can not allocate virtqueue");
return (-ENOMEM);
}
if (queue_type == VTNET_RQ && vq->sw_ring == NULL) {
@@ -1146,7 +1144,7 @@ static int virtio_resource_init_by_ioports(struct 
rte_pci_device *pci_dev)

fp = fopen("/proc/ioports", "r");
if (fp == NULL) {
-   PMD_INIT_LOG(ERR, "%s(): can't open ioports", __func__);
+   PMD_INIT_LOG(ERR, "can't open ioports");
return -1;
}

diff --git a/drivers/net/vmxnet3/vmxnet3_ethdev.c 
b/drivers/net/vmxnet3/vmxnet3_ethdev.c
index c363bf6..f5834d6 100644
--- a/drivers/net/vmxnet3/vmxnet3_ethdev.c
+++ b/drivers/net/vmxnet3/vmxnet3_ethdev.c
@@ -564,7 +564,7 @@ vmxnet3_dev_start(struct rte_eth_dev *dev)
status = VMXNET3_READ_BAR1_REG(hw, VMXNET3_REG_CMD);

if (status != 0) {
-   PMD_INIT_LOG(ERR, "Device activation in %s(): UNSUCCESSFUL", 
__func__);
+   PMD_INIT_LOG(ERR, "Device activation: UNSUCCESSFUL");
return -1;
}

@@ -577,7 +577,7 @@ vmxnet3_dev_start(struct rte_eth_dev *dev)
 */
ret = vmxnet3_dev_rxtx_init(dev);
if (ret != VMXNET3_SUCCESS) {
-   PMD_INIT_LOG(ERR, "Device receive init in %s: UNSUCCESSFUL", 
__func__);
+   PMD_INIT_LOG(ERR, "Device receive init: UNSUCCESSFUL");
return ret;
}

@@ -882,7 +882,7 @@ vmxnet3_process_events(struct vmxnet3_hw *hw)
uint32_t events = hw->shared->ecr;

if (!events) {
-   PMD_INIT_LOG(ERR, "No events to process in %s()", __func__);
+   PMD_INIT_LOG(ERR, "No events to process");
return;
}

diff --git a/drivers/net/vmxnet3/vmxnet3_rxtx.c 
b/drivers/net/vmxnet3/vmxnet3_rxtx.c
index 4de5d89..e592010 100644
--- a/drivers/net/vmxnet3/vmxnet3_rxtx.c
+++ b/drivers/net/vmxnet3/vmxnet3_rxtx.c
@@ -462,7 +462,7 @@ vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t 
ring_id)
/* Allocate blank mbuf for the current Rx Descriptor */
mbuf = rte_rxmbuf_alloc(rxq->mp);
if (unlikely(mbuf == NULL)) {
-   PMD_RX_LOG(ERR, "Error allocating mbuf in %s", 
__func__);
+   PMD_RX_LOG(ERR, "Error allocating mbuf");
rxq->stats.rx_buf_alloc_failure++;
err = ENOMEM;
break;
-- 
1.8.1.4



[dpdk-dev] [PATCH] remove redundant __func__ in PMD_INIT_LOG and PMD_RX_LOG

2015-12-28 Thread Huawei Xie

Signed-off-by: Huawei Xie 
---
 drivers/net/virtio/virtio_ethdev.c   | 12 +---
 drivers/net/vmxnet3/vmxnet3_ethdev.c |  6 +++---
 drivers/net/vmxnet3/vmxnet3_rxtx.c   |  2 +-
 3 files changed, 9 insertions(+), 11 deletions(-)

diff --git a/drivers/net/virtio/virtio_ethdev.c 
b/drivers/net/virtio/virtio_ethdev.c
index d928339..f19306f 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -150,9 +150,7 @@ virtio_send_command(struct virtqueue *vq, struct 
virtio_pmd_ctrl *ctrl,
ctrl->status = status;

if (!(vq && vq->hw->cvq)) {
-   PMD_INIT_LOG(ERR,
-"%s(): Control queue is not supported.",
-__func__);
+   PMD_INIT_LOG(ERR, "Control queue is not supported.");
return -1;
}
head = vq->vq_desc_head_idx;
@@ -306,12 +304,12 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev,
vq_size = VIRTIO_READ_REG_2(hw, VIRTIO_PCI_QUEUE_NUM);
PMD_INIT_LOG(DEBUG, "vq_size: %u nb_desc:%u", vq_size, nb_desc);
if (vq_size == 0) {
-   PMD_INIT_LOG(ERR, "%s: virtqueue does not exist", __func__);
+   PMD_INIT_LOG(ERR, "virtqueue does not exist");
return -EINVAL;
}

if (!rte_is_power_of_2(vq_size)) {
-   PMD_INIT_LOG(ERR, "%s: virtqueue size is not powerof 2", 
__func__);
+   PMD_INIT_LOG(ERR, "virtqueue size is not powerof 2");
return -EINVAL;
}

@@ -336,7 +334,7 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev,
RTE_CACHE_LINE_SIZE);
}
if (vq == NULL) {
-   PMD_INIT_LOG(ERR, "%s: Can not allocate virtqueue", __func__);
+   PMD_INIT_LOG(ERR, "Can not allocate virtqueue");
return (-ENOMEM);
}
if (queue_type == VTNET_RQ && vq->sw_ring == NULL) {
@@ -1146,7 +1144,7 @@ static int virtio_resource_init_by_ioports(struct 
rte_pci_device *pci_dev)

fp = fopen("/proc/ioports", "r");
if (fp == NULL) {
-   PMD_INIT_LOG(ERR, "%s(): can't open ioports", __func__);
+   PMD_INIT_LOG(ERR, "can't open ioports");
return -1;
}

diff --git a/drivers/net/vmxnet3/vmxnet3_ethdev.c 
b/drivers/net/vmxnet3/vmxnet3_ethdev.c
index c363bf6..f5834d6 100644
--- a/drivers/net/vmxnet3/vmxnet3_ethdev.c
+++ b/drivers/net/vmxnet3/vmxnet3_ethdev.c
@@ -564,7 +564,7 @@ vmxnet3_dev_start(struct rte_eth_dev *dev)
status = VMXNET3_READ_BAR1_REG(hw, VMXNET3_REG_CMD);

if (status != 0) {
-   PMD_INIT_LOG(ERR, "Device activation in %s(): UNSUCCESSFUL", 
__func__);
+   PMD_INIT_LOG(ERR, "Device activation: UNSUCCESSFUL");
return -1;
}

@@ -577,7 +577,7 @@ vmxnet3_dev_start(struct rte_eth_dev *dev)
 */
ret = vmxnet3_dev_rxtx_init(dev);
if (ret != VMXNET3_SUCCESS) {
-   PMD_INIT_LOG(ERR, "Device receive init in %s: UNSUCCESSFUL", 
__func__);
+   PMD_INIT_LOG(ERR, "Device receive init: UNSUCCESSFUL");
return ret;
}

@@ -882,7 +882,7 @@ vmxnet3_process_events(struct vmxnet3_hw *hw)
uint32_t events = hw->shared->ecr;

if (!events) {
-   PMD_INIT_LOG(ERR, "No events to process in %s()", __func__);
+   PMD_INIT_LOG(ERR, "No events to process");
return;
}

diff --git a/drivers/net/vmxnet3/vmxnet3_rxtx.c 
b/drivers/net/vmxnet3/vmxnet3_rxtx.c
index 4de5d89..e592010 100644
--- a/drivers/net/vmxnet3/vmxnet3_rxtx.c
+++ b/drivers/net/vmxnet3/vmxnet3_rxtx.c
@@ -462,7 +462,7 @@ vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t 
ring_id)
/* Allocate blank mbuf for the current Rx Descriptor */
mbuf = rte_rxmbuf_alloc(rxq->mp);
if (unlikely(mbuf == NULL)) {
-   PMD_RX_LOG(ERR, "Error allocating mbuf in %s", 
__func__);
+   PMD_RX_LOG(ERR, "Error allocating mbuf");
rxq->stats.rx_buf_alloc_failure++;
err = ENOMEM;
break;
-- 
1.8.1.4



[dpdk-dev] [PATCH v3 0/2] vhost: numa aware allocation of vhost device and queues

2015-06-25 Thread Huawei Xie
The vhost device and queues should be allocated on the same numa node as vring 
descriptor table.
When we firstly allocate the vhost device and queues, we don't know the numa 
node of vring descriptor table.
When we receive the VHOST_SET_VRING_ADDR message, we get the numa node of vring 
descriptor table, we will try to reallocate vhost device and queues to the same 
numa node.


Huawei Xie (2):
  use rte_malloc to allocate vhost device and queues
  reallocate vhost device and queues when we get the address of vring 
descriptor table

 config/common_linuxapp|   1 +
 lib/librte_vhost/Makefile |   4 ++
 lib/librte_vhost/virtio-net.c | 107 ++
 mk/rte.app.mk |   4 ++
 4 files changed, 107 insertions(+), 9 deletions(-)

-- 
1.8.1.4



[dpdk-dev] [PATCH v3 1/2] vhost: use rte_malloc to allocate device and queues

2015-06-25 Thread Huawei Xie
use rte_malloc to allocate vhost device and queues


Signed-off-by: Huawei Xie 
---
 lib/librte_vhost/virtio-net.c | 19 ++-
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/lib/librte_vhost/virtio-net.c b/lib/librte_vhost/virtio-net.c
index 4672e67..19b74d6 100644
--- a/lib/librte_vhost/virtio-net.c
+++ b/lib/librte_vhost/virtio-net.c
@@ -45,6 +45,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 

 #include "vhost-net.h"
@@ -202,9 +203,9 @@ static void
 free_device(struct virtio_net_config_ll *ll_dev)
 {
/* Free any malloc'd memory */
-   free(ll_dev->dev.virtqueue[VIRTIO_RXQ]);
-   free(ll_dev->dev.virtqueue[VIRTIO_TXQ]);
-   free(ll_dev);
+   rte_free(ll_dev->dev.virtqueue[VIRTIO_RXQ]);
+   rte_free(ll_dev->dev.virtqueue[VIRTIO_TXQ]);
+   rte_free(ll_dev);
 }

 /*
@@ -278,7 +279,7 @@ new_device(struct vhost_device_ctx ctx)
struct vhost_virtqueue *virtqueue_rx, *virtqueue_tx;

/* Setup device and virtqueues. */
-   new_ll_dev = malloc(sizeof(struct virtio_net_config_ll));
+   new_ll_dev = rte_malloc(NULL, sizeof(struct virtio_net_config_ll), 0);
if (new_ll_dev == NULL) {
RTE_LOG(ERR, VHOST_CONFIG,
"(%"PRIu64") Failed to allocate memory for dev.\n",
@@ -286,19 +287,19 @@ new_device(struct vhost_device_ctx ctx)
return -1;
}

-   virtqueue_rx = malloc(sizeof(struct vhost_virtqueue));
+   virtqueue_rx = rte_malloc(NULL, sizeof(struct vhost_virtqueue), 0);
if (virtqueue_rx == NULL) {
-   free(new_ll_dev);
+   rte_free(new_ll_dev);
RTE_LOG(ERR, VHOST_CONFIG,
"(%"PRIu64") Failed to allocate memory for rxq.\n",
ctx.fh);
return -1;
}

-   virtqueue_tx = malloc(sizeof(struct vhost_virtqueue));
+   virtqueue_tx = rte_malloc(NULL, sizeof(struct vhost_virtqueue), 0);
if (virtqueue_tx == NULL) {
-   free(virtqueue_rx);
-   free(new_ll_dev);
+   rte_free(virtqueue_rx);
+   rte_free(new_ll_dev);
RTE_LOG(ERR, VHOST_CONFIG,
"(%"PRIu64") Failed to allocate memory for txq.\n",
ctx.fh);
-- 
1.8.1.4



[dpdk-dev] [PATCH v3 2/2] vhost: realloc vhost device and queues to the same numa node of vring desc table

2015-06-25 Thread Huawei Xie
When we get the address of vring descriptor table in VHOST_SET_VRING_ADDR 
message, will try to reallocate vhost device and virt queue to the same numa 
node.

v3 changes:
- remove unnecessary rte_free of new_vq and new_ll_dev

v2 changes:
- fix uninitialised new_vq and new_ll_device
- fix missed endif in rte.app.mk
- fix new_ll_dev and new_vq allocation failure issue
- return old virtio device if new_ll_dev isn't allocated

Signed-off-by: Huawei Xie 
---
 config/common_linuxapp|  1 +
 lib/librte_vhost/Makefile |  4 ++
 lib/librte_vhost/virtio-net.c | 88 +++
 mk/rte.app.mk |  4 ++
 4 files changed, 97 insertions(+)

diff --git a/config/common_linuxapp b/config/common_linuxapp
index 0078dc9..4ace24e 100644
--- a/config/common_linuxapp
+++ b/config/common_linuxapp
@@ -421,6 +421,7 @@ CONFIG_RTE_KNI_VHOST_DEBUG_TX=n
 #
 CONFIG_RTE_LIBRTE_VHOST=n
 CONFIG_RTE_LIBRTE_VHOST_USER=y
+CONFIG_RTE_LIBRTE_VHOST_NUMA=n
 CONFIG_RTE_LIBRTE_VHOST_DEBUG=n

 #
diff --git a/lib/librte_vhost/Makefile b/lib/librte_vhost/Makefile
index a8645a6..6681f22 100644
--- a/lib/librte_vhost/Makefile
+++ b/lib/librte_vhost/Makefile
@@ -46,6 +46,10 @@ CFLAGS += -I vhost_cuse -lfuse
 LDFLAGS += -lfuse
 endif

+ifeq ($(CONFIG_RTE_LIBRTE_VHOST_NUMA),y)
+LDFLAGS += -lnuma
+endif
+
 # all source are stored in SRCS-y
 SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := virtio-net.c vhost_rxtx.c
 ifeq ($(CONFIG_RTE_LIBRTE_VHOST_USER),y)
diff --git a/lib/librte_vhost/virtio-net.c b/lib/librte_vhost/virtio-net.c
index 19b74d6..fcaefd6 100644
--- a/lib/librte_vhost/virtio-net.c
+++ b/lib/librte_vhost/virtio-net.c
@@ -38,6 +38,9 @@
 #include 
 #include 
 #include 
+#ifdef RTE_LIBRTE_VHOST_NUMA
+#include 
+#endif

 #include 

@@ -481,6 +484,88 @@ set_vring_num(struct vhost_device_ctx ctx, struct 
vhost_vring_state *state)
 }

 /*
+ * Reallocate virtio_det and vhost_virtqueue data structure to make them on the
+ * same numa node as the memory of vring descriptor.
+ */
+#ifdef RTE_LIBRTE_VHOST_NUMA
+static struct virtio_net*
+numa_realloc(struct virtio_net *dev, int index)
+{
+   int oldnode, newnode;
+   struct virtio_net_config_ll *old_ll_dev, *new_ll_dev = NULL;
+   struct vhost_virtqueue *old_vq, *new_vq = NULL;
+   int ret;
+   int realloc_dev = 0, realloc_vq = 0;
+
+   old_ll_dev = (struct virtio_net_config_ll *)dev;
+   old_vq = dev->virtqueue[index];
+
+   ret  = get_mempolicy(, NULL, 0, old_vq->desc,
+   MPOL_F_NODE | MPOL_F_ADDR);
+   ret = ret | get_mempolicy(, NULL, 0, old_ll_dev,
+   MPOL_F_NODE | MPOL_F_ADDR);
+   if (ret) {
+   RTE_LOG(ERR, VHOST_CONFIG,
+   "Unable to get vring desc or dev numa information.\n");
+   return dev;
+   }
+   if (oldnode != newnode)
+   realloc_dev = 1;
+
+   ret = get_mempolicy(, NULL, 0, old_vq,
+   MPOL_F_NODE | MPOL_F_ADDR);
+   if (ret) {
+   RTE_LOG(ERR, VHOST_CONFIG,
+   "Unable to get vq numa information.\n");
+   return dev;
+   }
+   if (oldnode != newnode)
+   realloc_vq = 1;
+
+   if (realloc_dev == 0 && realloc_vq == 0)
+   return dev;
+
+   if (realloc_dev)
+   new_ll_dev = rte_malloc_socket(NULL,
+   sizeof(struct virtio_net_config_ll), 0, newnode);
+   if (realloc_vq)
+   new_vq = rte_malloc_socket(NULL,
+   sizeof(struct vhost_virtqueue), 0, newnode);
+   if (!new_ll_dev && !new_vq)
+   return dev;
+
+   if (realloc_vq)
+   memcpy(new_vq, old_vq, sizeof(*new_vq));
+   if (realloc_dev)
+   memcpy(new_ll_dev, old_ll_dev, sizeof(*new_ll_dev));
+   (new_ll_dev ? new_ll_dev : old_ll_dev)->dev.virtqueue[index] =
+   new_vq ? new_vq : old_vq;
+   if (realloc_vq)
+   rte_free(old_vq);
+   if (realloc_dev) {
+   if (ll_root == old_ll_dev)
+   ll_root = new_ll_dev;
+   else {
+   struct virtio_net_config_ll *prev = ll_root;
+   while (prev->next != old_ll_dev)
+   prev = prev->next;
+   prev->next = new_ll_dev;
+   new_ll_dev->next = old_ll_dev->next;
+   }
+   rte_free(old_ll_dev);
+   }
+
+   return realloc_dev ? _ll_dev->dev : dev;
+}
+#else
+static struct virtio_net*
+numa_realloc(struct virtio_net *dev, int index __rte_unused)
+{
+   return dev;
+}
+#endif
+
+/*
  * Called from CUSE IOCTL: VHOST_SET_VRING_ADDR
  * The virtio device sends us the desc, used and avail ring addresses.
  * This function then converts these to our address space.
@@ -508,6 +593,9 @@ set_vring_addr(struct vhost_device_ctx

[dpdk-dev] [PATCH] vhost: turn on vhost by default

2015-06-30 Thread Huawei Xie
Previous vhost-cuse implementation requires fuse development package.
Now that we have vhost-user implementation, which is enabled by default
and doesn't require additional library to build, we could turn on vhost.

Signed-off-by: Huawei Xie 
---
 config/common_linuxapp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/config/common_linuxapp b/config/common_linuxapp
index aae22f4..085a6c5 100644
--- a/config/common_linuxapp
+++ b/config/common_linuxapp
@@ -427,7 +427,7 @@ CONFIG_RTE_KNI_VHOST_DEBUG_TX=n
 # fuse-devel enables user space char driver development
 # vhost-user is turned on by default.
 #
-CONFIG_RTE_LIBRTE_VHOST=n
+CONFIG_RTE_LIBRTE_VHOST=y
 CONFIG_RTE_LIBRTE_VHOST_USER=y
 CONFIG_RTE_LIBRTE_VHOST_DEBUG=n

-- 
1.8.1.4



[dpdk-dev] [PATCH v4 0/4] vhost: vhost unix domain socket cleanup

2015-06-30 Thread Huawei Xie
vhost user could register multiple unix domain socket server, and use the path
to identify the virtio device connecting to it. rte_vhost_driver_unregister
will clean up the unix domain socket for the specified path.

v2 changes:
-minor code style fix, remove unnecessary new line

v3 changes:
update version map file

v4 changes:
-add comment for potential unwanted callback on listenfds
-call fdset_del_slot to remove connection fd 

Huawei Xie (4):
  fdset_del_slot
  vhost socket cleanup
  version map file update
  add comment for potential unwanted call on listenfds

 lib/librte_vhost/rte_vhost_version.map   |  8 
 lib/librte_vhost/rte_virtio_net.h|  3 ++
 lib/librte_vhost/vhost_cuse/vhost-net-cdev.c |  9 
 lib/librte_vhost/vhost_user/fd_man.c | 34 +-
 lib/librte_vhost/vhost_user/vhost-net-user.c | 68 +++-
 lib/librte_vhost/vhost_user/vhost-net-user.h |  2 +-
 6 files changed, 110 insertions(+), 14 deletions(-)

-- 
1.8.1.4



[dpdk-dev] [PATCH v4 1/4] vhost: call fdset_del_slot to remove connection fd

2015-06-30 Thread Huawei Xie
In the event handler of connection fd, the connection fd could be possibly
closed. The event dispatch loop would then try to remove the fd from fdset.
Between these two actions, another thread might register a new listenfd
reusing the val of just closed fd, so we couldn't call fdset_del which would
wrongly clean up the new listenfd. A new function fdset_del_slot is provided
to cleanup the fd at the specified location.

v4 changes:
- call fdset_del_slot to remove connection fd

Signed-off-by: Huawei Xie 
---
 lib/librte_vhost/vhost_user/fd_man.c | 27 ++-
 1 file changed, 26 insertions(+), 1 deletion(-)

diff --git a/lib/librte_vhost/vhost_user/fd_man.c 
b/lib/librte_vhost/vhost_user/fd_man.c
index 831c9c1..bd30f8d 100644
--- a/lib/librte_vhost/vhost_user/fd_man.c
+++ b/lib/librte_vhost/vhost_user/fd_man.c
@@ -188,6 +188,24 @@ fdset_del(struct fdset *pfdset, int fd)
 }

 /**
+ *  Unregister the fd at the specified slot from the fdset.
+ */
+static void
+fdset_del_slot(struct fdset *pfdset, int index)
+{
+   if (pfdset == NULL || index < 0 || index >= MAX_FDS)
+   return;
+
+   pthread_mutex_lock(>fd_mutex);
+
+   pfdset->fd[index].fd = -1;
+   pfdset->fd[index].rcb = pfdset->fd[index].wcb = NULL;
+   pfdset->num--;
+
+   pthread_mutex_unlock(>fd_mutex);
+}
+
+/**
  * This functions runs in infinite blocking loop until there is no fd in
  * pfdset. It calls corresponding r/w handler if there is event on the fd.
  *
@@ -248,8 +266,15 @@ fdset_event_dispatch(struct fdset *pfdset)
 * We don't allow fdset_del to be called in callback
 * directly.
 */
+   /*
+* When we are to clean up the fd from fdset,
+* because the fd is closed in the cb,
+* the old fd val could be reused by when creates new
+* listen fd in another thread, we couldn't call
+* fd_set_del.
+*/
if (remove1 || remove2)
-   fdset_del(pfdset, fd);
+   fdset_del_slot(pfdset, i);
}
}
 }
-- 
1.8.1.4



[dpdk-dev] [PATCH v4 2/4] vhost: vhost unix domain socket cleanup

2015-06-30 Thread Huawei Xie
rte_vhost_driver_unregister API will remove the listenfd from event list,
and then close it.

v2 changes:
-minor code style fix, remove unnecessary new line

Signed-off-by: Huawei Xie 
Signed-off-by: Peng Sun 
---
 lib/librte_vhost/rte_virtio_net.h|  3 ++
 lib/librte_vhost/vhost_cuse/vhost-net-cdev.c |  9 
 lib/librte_vhost/vhost_user/vhost-net-user.c | 68 +++-
 lib/librte_vhost/vhost_user/vhost-net-user.h |  2 +-
 4 files changed, 69 insertions(+), 13 deletions(-)

diff --git a/lib/librte_vhost/rte_virtio_net.h 
b/lib/librte_vhost/rte_virtio_net.h
index 5d38185..5630fbc 100644
--- a/lib/librte_vhost/rte_virtio_net.h
+++ b/lib/librte_vhost/rte_virtio_net.h
@@ -188,6 +188,9 @@ int rte_vhost_enable_guest_notification(struct virtio_net 
*dev, uint16_t queue_i
 /* Register vhost driver. dev_name could be different for multiple instance 
support. */
 int rte_vhost_driver_register(const char *dev_name);

+/* Unregister vhost driver. This is only meaningful to vhost user. */
+int rte_vhost_driver_unregister(const char *dev_name);
+
 /* Register callbacks. */
 int rte_vhost_driver_callback_register(struct virtio_net_device_ops const * 
const);
 /* Start vhost driver session blocking loop. */
diff --git a/lib/librte_vhost/vhost_cuse/vhost-net-cdev.c 
b/lib/librte_vhost/vhost_cuse/vhost-net-cdev.c
index 6b68abf..1ae7c49 100644
--- a/lib/librte_vhost/vhost_cuse/vhost-net-cdev.c
+++ b/lib/librte_vhost/vhost_cuse/vhost-net-cdev.c
@@ -405,6 +405,15 @@ rte_vhost_driver_register(const char *dev_name)
 }

 /**
+ * An empty function for unregister
+ */
+int
+rte_vhost_driver_unregister(const char *dev_name __rte_unused)
+{
+   return 0;
+}
+
+/**
  * The CUSE session is launched allowing the application to receive open,
  * release and ioctl calls.
  */
diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.c 
b/lib/librte_vhost/vhost_user/vhost-net-user.c
index 31f1215..87a4711 100644
--- a/lib/librte_vhost/vhost_user/vhost-net-user.c
+++ b/lib/librte_vhost/vhost_user/vhost-net-user.c
@@ -66,6 +66,8 @@ struct connfd_ctx {
 struct _vhost_server {
struct vhost_server *server[MAX_VHOST_SERVER];
struct fdset fdset;
+   int vserver_cnt;
+   pthread_mutex_t server_mutex;
 };

 static struct _vhost_server g_vhost_server = {
@@ -74,10 +76,10 @@ static struct _vhost_server g_vhost_server = {
.fd_mutex = PTHREAD_MUTEX_INITIALIZER,
.num = 0
},
+   .vserver_cnt = 0,
+   .server_mutex = PTHREAD_MUTEX_INITIALIZER,
 };

-static int vserver_idx;
-
 static const char *vhost_message_str[VHOST_USER_MAX] = {
[VHOST_USER_NONE] = "VHOST_USER_NONE",
[VHOST_USER_GET_FEATURES] = "VHOST_USER_GET_FEATURES",
@@ -427,7 +429,6 @@ vserver_message_handler(int connfd, void *dat, int *remove)
}
 }

-
 /**
  * Creates and initialise the vhost server.
  */
@@ -436,34 +437,77 @@ rte_vhost_driver_register(const char *path)
 {
struct vhost_server *vserver;

-   if (vserver_idx == 0)
+   pthread_mutex_lock(_vhost_server.server_mutex);
+   if (ops == NULL)
ops = get_virtio_net_callbacks();
-   if (vserver_idx == MAX_VHOST_SERVER)
+
+   if (g_vhost_server.vserver_cnt == MAX_VHOST_SERVER) {
+   RTE_LOG(ERR, VHOST_CONFIG,
+   "error: the number of servers reaches maximum\n");
+   pthread_mutex_unlock(_vhost_server.server_mutex);
return -1;
+   }

vserver = calloc(sizeof(struct vhost_server), 1);
-   if (vserver == NULL)
+   if (vserver == NULL) {
+   pthread_mutex_unlock(_vhost_server.server_mutex);
return -1;
-
-   unlink(path);
+   }

vserver->listenfd = uds_socket(path);
if (vserver->listenfd < 0) {
free(vserver);
+   pthread_mutex_unlock(_vhost_server.server_mutex);
return -1;
}
-   vserver->path = path;
+
+   vserver->path = strdup(path);

fdset_add(_vhost_server.fdset, vserver->listenfd,
-   vserver_new_vq_conn, NULL,
-   vserver);
+   vserver_new_vq_conn, NULL, vserver);

-   g_vhost_server.server[vserver_idx++] = vserver;
+   g_vhost_server.server[g_vhost_server.vserver_cnt++] = vserver;
+   pthread_mutex_unlock(_vhost_server.server_mutex);

return 0;
 }


+/**
+ * Unregister the specified vhost server
+ */
+int
+rte_vhost_driver_unregister(const char *path)
+{
+   int i;
+   int count;
+
+   pthread_mutex_lock(_vhost_server.server_mutex);
+
+   for (i = 0; i < g_vhost_server.vserver_cnt; i++) {
+   if (!strcmp(g_vhost_server.server[i]->path, path)) {
+   fdset_del(_vhost_server.fdset,
+   g_vhost_server.server[i]->listenfd);
+
+   close(g_vhost_server.server[i]->lis

[dpdk-dev] [PATCH v4 3/4] vhost: version map file update

2015-06-30 Thread Huawei Xie
update version map file for rte_vhost_driver_unregister API

v3 changes:
update version map file

Signed-off-by: Huawei Xie 
---
 lib/librte_vhost/rte_vhost_version.map | 8 
 1 file changed, 8 insertions(+)

diff --git a/lib/librte_vhost/rte_vhost_version.map 
b/lib/librte_vhost/rte_vhost_version.map
index 163dde0..fb6bb9e 100644
--- a/lib/librte_vhost/rte_vhost_version.map
+++ b/lib/librte_vhost/rte_vhost_version.map
@@ -13,3 +13,11 @@ DPDK_2.0 {

local: *;
 };
+
+DPDK_2.1 {
+   global:
+
+   rte_vhost_driver_unregister;
+
+   local: *;
+} DPDK_2.0;
-- 
1.8.1.4



[dpdk-dev] [PATCH v4 4/4] vhost: add comment for potential unwanted callback on listenfds

2015-06-30 Thread Huawei Xie
add comment for potential unwanted callback on listenfds

v4 changes:
add comment for potential unwanted callback on listenfds

Signed-off-by: Huawei Xie 
---
 lib/librte_vhost/vhost_user/fd_man.c | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/lib/librte_vhost/vhost_user/fd_man.c 
b/lib/librte_vhost/vhost_user/fd_man.c
index bd30f8d..d68b270 100644
--- a/lib/librte_vhost/vhost_user/fd_man.c
+++ b/lib/librte_vhost/vhost_user/fd_man.c
@@ -242,6 +242,13 @@ fdset_event_dispatch(struct fdset *pfdset)

pthread_mutex_unlock(>fd_mutex);

+   /*
+* When select is blocked, other threads might unregister
+* listenfds from and register new listenfds into fdset.
+* When select returns, the entries for listenfds in the fdset
+* might have been updated. It is ok if there is unwanted call
+* for new listenfds.
+*/
ret = select(maxfds + 1, , , NULL, );
if (ret <= 0)
continue;
-- 
1.8.1.4



[dpdk-dev] [PATCH] lib/librte_vhost: remove vhost device from data plane when receive VHOST_SET_MEM_TABLE message

2015-03-03 Thread Huawei Xie
This patch fixes the segfault issue in the case vhost receives new 
VHOST_SET_MEM_TABLE message without VHOST_VRING_GET_VRING_BASE(which we uses as 
the stop message).

Signed-off-by: Huawei Xie 
---
 lib/librte_vhost/vhost_user/virtio-net-user.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/lib/librte_vhost/vhost_user/virtio-net-user.c 
b/lib/librte_vhost/vhost_user/virtio-net-user.c
index 97c5177..aa08706 100644
--- a/lib/librte_vhost/vhost_user/virtio-net-user.c
+++ b/lib/librte_vhost/vhost_user/virtio-net-user.c
@@ -109,6 +109,10 @@ user_set_mem_table(struct vhost_device_ctx ctx, struct 
VhostUserMsg *pmsg)
if (dev == NULL)
return -1;

+   /* Remove from the data plane. */
+   if (dev->flags & VIRTIO_DEV_RUNNING)
+   notify_ops->destroy_device(dev);
+
if (dev->mem) {
free_mem_region(dev);
free(dev->mem);
-- 
1.8.1.4



[dpdk-dev] [PATCH] lib/librte_vhost: combine select with sleep

2015-03-06 Thread Huawei Xie
combine sleep into select when there is no file descriptors to be monitored.

Signed-off-by: Huawei Xie 
---
 lib/librte_vhost/vhost_user/fd_man.c | 13 +++--
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/lib/librte_vhost/vhost_user/fd_man.c 
b/lib/librte_vhost/vhost_user/fd_man.c
index 63ac4df..a89b6fe 100644
--- a/lib/librte_vhost/vhost_user/fd_man.c
+++ b/lib/librte_vhost/vhost_user/fd_man.c
@@ -211,25 +211,26 @@ fdset_event_dispatch(struct fdset *pfdset)
void *dat;
int fd;
int remove1, remove2;
+   int ret;

if (pfdset == NULL)
return;

while (1) {
+   struct timeval tv;
+   tv.tv_sec = 1;
+   tv.tv_usec = 0;
FD_ZERO();
FD_ZERO();
pthread_mutex_lock(>fd_mutex);

maxfds = fdset_fill(, , pfdset);
-   if (maxfds == -1) {
-   pthread_mutex_unlock(>fd_mutex);
-   sleep(1);
-   continue;
-   }

pthread_mutex_unlock(>fd_mutex);

-   select(maxfds + 1, , , NULL, NULL);
+   ret = select(maxfds + 1, , , NULL, );
+   if (ret <= 0)
+   continue;

for (i = 0; i < num; i++) {
remove1 = remove2 = 0;
-- 
1.8.1.4



[dpdk-dev] [PATCH] lib/librte_vhost: use loop instead of goto

2015-03-06 Thread Huawei Xie
This patch reorder the code a bit to use loop instead of goto.
Besides, remove abudant check 'fd != -1'.

Signed-off-by: Huawei Xie 
---
 lib/librte_vhost/vhost_user/fd_man.c | 26 +++---
 1 file changed, 11 insertions(+), 15 deletions(-)

diff --git a/lib/librte_vhost/vhost_user/fd_man.c 
b/lib/librte_vhost/vhost_user/fd_man.c
index a89b6fe..831c9c1 100644
--- a/lib/librte_vhost/vhost_user/fd_man.c
+++ b/lib/librte_vhost/vhost_user/fd_man.c
@@ -172,23 +172,19 @@ fdset_del(struct fdset *pfdset, int fd)
if (pfdset == NULL || fd == -1)
return;

-again:
-   pthread_mutex_lock(>fd_mutex);
+   do {
+   pthread_mutex_lock(>fd_mutex);

-   i = fdset_find_fd(pfdset, fd);
-   if (i != -1 && fd != -1) {
-   /* busy indicates r/wcb is executing! */
-   if (pfdset->fd[i].busy == 1) {
-   pthread_mutex_unlock(>fd_mutex);
-   goto again;
+   i = fdset_find_fd(pfdset, fd);
+   if (i != -1 && pfdset->fd[i].busy == 0) {
+   /* busy indicates r/wcb is executing! */
+   pfdset->fd[i].fd = -1;
+   pfdset->fd[i].rcb = pfdset->fd[i].wcb = NULL;
+   pfdset->num--;
+   i = -1;
}
-
-   pfdset->fd[i].fd = -1;
-   pfdset->fd[i].rcb = pfdset->fd[i].wcb = NULL;
-   pfdset->num--;
-   }
-
-   pthread_mutex_unlock(>fd_mutex);
+   pthread_mutex_unlock(>fd_mutex);
+   } while (i != -1);
 }

 /**
-- 
1.8.1.4



[dpdk-dev] [PATCH] lib/librte_vhost: exchange kickfd and callfd to avoid confusion

2015-03-06 Thread Huawei Xie
Previous vhost implementation wrongly name kickfd as callfd and callfd as 
kickfd.
It is functional correct, but causes confusion.

Signed-off-by: Huawei Xie 
---
 examples/vhost/main.c |  6 +++---
 lib/librte_vhost/rte_virtio_net.h |  4 ++--
 lib/librte_vhost/vhost_rxtx.c |  6 +++---
 lib/librte_vhost/vhost_user/virtio-net-user.c | 12 ++--
 lib/librte_vhost/virtio-net.c | 12 ++--
 5 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/examples/vhost/main.c b/examples/vhost/main.c
index 334e2fe..61ea671 100644
--- a/examples/vhost/main.c
+++ b/examples/vhost/main.c
@@ -1434,7 +1434,7 @@ put_desc_to_used_list_zcp(struct vhost_virtqueue *vq, 
uint16_t desc_idx)

/* Kick the guest if necessary. */
if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT))
-   eventfd_write((int)vq->kickfd, 1);
+   eventfd_write((int)vq->callfd, 1);
 }

 /*
@@ -1627,7 +1627,7 @@ txmbuf_clean_zcp(struct virtio_net *dev, struct vpool 
*vpool)

/* Kick guest if required. */
if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT))
-   eventfd_write((int)vq->kickfd, 1);
+   eventfd_write((int)vq->callfd, 1);

return 0;
 }
@@ -1775,7 +1775,7 @@ virtio_dev_rx_zcp(struct virtio_net *dev, struct rte_mbuf 
**pkts,

/* Kick the guest if necessary. */
if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT))
-   eventfd_write((int)vq->kickfd, 1);
+   eventfd_write((int)vq->callfd, 1);

return count;
 }
diff --git a/lib/librte_vhost/rte_virtio_net.h 
b/lib/librte_vhost/rte_virtio_net.h
index 611a3d4..2fc1c44 100644
--- a/lib/librte_vhost/rte_virtio_net.h
+++ b/lib/librte_vhost/rte_virtio_net.h
@@ -86,8 +86,8 @@ struct vhost_virtqueue {
uint16_tvhost_hlen; /**< Vhost header 
length (varies depending on RX merge buffers. */
volatile uint16_t   last_used_idx;  /**< Last index used on 
the available ring */
volatile uint16_t   last_used_idx_res;  /**< Used for multiple 
devices reserving buffers. */
-   eventfd_t   callfd; /**< Currently unused 
as polling mode is enabled. */
-   eventfd_t   kickfd; /**< Used to notify the 
guest (trigger interrupt). */
+   eventfd_t   callfd; /**< Used to notify the 
guest (trigger interrupt). */
+   eventfd_t   kickfd; /**< Currently unused 
as polling mode is enabled. */
struct buf_vector   buf_vec[BUF_VECTOR_MAX];/**< for 
scatter RX. */
 } __rte_cache_aligned;

diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c
index c7c9550..535c7a1 100644
--- a/lib/librte_vhost/vhost_rxtx.c
+++ b/lib/librte_vhost/vhost_rxtx.c
@@ -180,7 +180,7 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,

/* Kick the guest if necessary. */
if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT))
-   eventfd_write((int)vq->kickfd, 1);
+   eventfd_write((int)vq->callfd, 1);
return count;
 }

@@ -507,7 +507,7 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t 
queue_id,

/* Kick the guest if necessary. */
if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT))
-   eventfd_write((int)vq->kickfd, 1);
+   eventfd_write((int)vq->callfd, 1);
}

return count;
@@ -725,6 +725,6 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t 
queue_id,
vq->used->idx += entry_success;
/* Kick guest if required. */
if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT))
-   eventfd_write((int)vq->kickfd, 1);
+   eventfd_write((int)vq->callfd, 1);
return entry_success;
 }
diff --git a/lib/librte_vhost/vhost_user/virtio-net-user.c 
b/lib/librte_vhost/vhost_user/virtio-net-user.c
index 97c5177..e0c7394 100644
--- a/lib/librte_vhost/vhost_user/virtio-net-user.c
+++ b/lib/librte_vhost/vhost_user/virtio-net-user.c
@@ -286,13 +286,13 @@ user_get_vring_base(struct vhost_device_ctx ctx,
 * sent and only sent in vhost_vring_stop.
 * TODO: cleanup the vring, it isn't usable since here.
 */
-   if (((int)dev->virtqueue[VIRTIO_RXQ]->callfd) >= 0) {
-   close(dev->virtqueue[VIRTIO_RXQ]->callfd);
-   dev->virtqueue[VIRTIO_RXQ]->callfd = (eventfd_t)-1;
+   if (((int)dev->virtqueue[VIRTIO_RXQ]->kickfd) >= 0) {
+   close(dev->virtqueue[VIRTIO_RXQ]->kickfd);
+   dev->virtqueue[VIRTIO_RXQ]->kickfd = (eventfd_t)-1;
}
-   if (((int)dev->virtqueue[VIRTIO_TXQ]->c

[dpdk-dev] [PATCH] test whether file descriptor is valid before close it

2015-03-06 Thread Huawei Xie
This avoids closing -1 in our case.

Signed-off-by: Huawei Xie 
---
 lib/librte_vhost/virtio-net.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/lib/librte_vhost/virtio-net.c b/lib/librte_vhost/virtio-net.c
index 6917fcf..4672e67 100644
--- a/lib/librte_vhost/virtio-net.c
+++ b/lib/librte_vhost/virtio-net.c
@@ -185,13 +185,13 @@ cleanup_device(struct virtio_net *dev)
}

/* Close any event notifiers opened by device. */
-   if (dev->virtqueue[VIRTIO_RXQ]->callfd)
+   if ((int)dev->virtqueue[VIRTIO_RXQ]->callfd >= 0)
close((int)dev->virtqueue[VIRTIO_RXQ]->callfd);
-   if (dev->virtqueue[VIRTIO_RXQ]->kickfd)
+   if ((int)dev->virtqueue[VIRTIO_RXQ]->kickfd >= 0)
close((int)dev->virtqueue[VIRTIO_RXQ]->kickfd);
-   if (dev->virtqueue[VIRTIO_TXQ]->callfd)
+   if ((int)dev->virtqueue[VIRTIO_TXQ]->callfd >= 0)
close((int)dev->virtqueue[VIRTIO_TXQ]->callfd);
-   if (dev->virtqueue[VIRTIO_TXQ]->kickfd)
+   if ((int)dev->virtqueue[VIRTIO_TXQ]->kickfd >= 0)
close((int)dev->virtqueue[VIRTIO_TXQ]->kickfd);
 }

-- 
1.8.1.4



[dpdk-dev] [PATCH] claim responsibility for KVM virtio PMD, vhost backend, and XEN virtio solution.

2015-03-10 Thread Huawei Xie
---
 MAINTAINERS | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 07fdf5e..b4327d3 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -131,6 +131,12 @@ F: app/test-pmd/mempool_*
 F: examples/vhost_xen/
 F: doc/guides/prog_guide/intel_dpdk_xen_based_packet_switch_sol.rst

+Linux Xen VIRTIO
+M: Huawei Xie 
+F: lib/librte_pmd_xenvirt/
+F: examples/vhost_xen/
+F: doc/guides/prog_guide/intel_dpdk_xen_based_packet_switch_sol.rst
+
 FreeBSD EAL (with overlaps)
 M: Bruce Richardson 
 F: lib/librte_eal/bsdapp/Makefile
@@ -226,6 +232,7 @@ F: lib/librte_pmd_mlx4/
 F: doc/guides/prog_guide/mlx4_poll_mode_drv.rst

 RedHat virtio
+M: Huawei Xie 
 M: Changchun Ouyang 
 F: lib/librte_pmd_virtio/
 F: doc/guides/prog_guide/poll_mode_drv_emulated_virtio_nic.rst
-- 
1.8.1.4



[dpdk-dev] [PATCH] claim responsibility for KVM virtio PMD, vhost backend, and XEN virtio solution.

2015-03-10 Thread Huawei Xie
---
 MAINTAINERS | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 07fdf5e..b4327d3 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -131,6 +131,12 @@ F: app/test-pmd/mempool_*
 F: examples/vhost_xen/
 F: doc/guides/prog_guide/intel_dpdk_xen_based_packet_switch_sol.rst

+Linux Xen VIRTIO
+M: Huawei Xie 
+F: lib/librte_pmd_xenvirt/
+F: examples/vhost_xen/
+F: doc/guides/prog_guide/intel_dpdk_xen_based_packet_switch_sol.rst
+
 FreeBSD EAL (with overlaps)
 M: Bruce Richardson 
 F: lib/librte_eal/bsdapp/Makefile
@@ -226,6 +232,7 @@ F: lib/librte_pmd_mlx4/
 F: doc/guides/prog_guide/mlx4_poll_mode_drv.rst

 RedHat virtio
+M: Huawei Xie 
 M: Changchun Ouyang 
 F: lib/librte_pmd_virtio/
 F: doc/guides/prog_guide/poll_mode_drv_emulated_virtio_nic.rst
-- 
1.8.1.4



[dpdk-dev] [PATCH] vhost example doc update

2015-03-12 Thread Huawei Xie
add vhost user documentation
fix some minor issues

Signed-off-by: Huawei Xie 
---
 doc/guides/sample_app_ug/vhost.rst | 123 -
 1 file changed, 94 insertions(+), 29 deletions(-)

diff --git a/doc/guides/sample_app_ug/vhost.rst 
b/doc/guides/sample_app_ug/vhost.rst
index fa53db6..4f28abd 100644
--- a/doc/guides/sample_app_ug/vhost.rst
+++ b/doc/guides/sample_app_ug/vhost.rst
@@ -45,7 +45,7 @@ Background
 Virtio networking (virtio-net) was developed as the Linux* KVM 
para-virtualized method for communicating network packets
 between host and guest.
 It was found that virtio-net performance was poor due to context switching and 
packet copying between host, guest, and QEMU.
-The following figure shows the system architecture for a virtio- based 
networking (virtio-net).
+The following figure shows the system architecture for a virtio-based 
networking (virtio-net).

 .. _figure_16:

@@ -89,20 +89,34 @@ Sample Code Overview
 The DPDK vhost-net sample code demonstrates KVM (QEMU) offloading the 
servicing of a Virtual Machine's (VM's)
 virtio-net devices to a DPDK-based application in place of the kernel's 
vhost-net module.

-The DPDK vhost-net sample code is a simple packet switching application with 
the following features:
+The DPDK vhost-net sample code is based on vhost library. Vhost library is 
developed for user space ethernet switch to
+easily integrate with vhost functionality.
+
+The vhost library implements the following features:

 *   Management of virtio-net device creation/destruction events.

-*   Mapping of the VM's physical memory into the DPDK vhost-net sample code's 
address space.
+*   Mapping of the VM's physical memory into the DPDK vhost-net's address 
space.

 *   Triggering/receiving notifications to/from VMs via eventfds.

 *   A virtio-net back-end implementation providing a subset of virtio-net 
features.

+There are two vhost implementations in vhost library, vhost cuse and vhost 
user. In vhost cuse, a character device driver is implemented to
+receive and process vhost requests through ioctl messages. In vhost user, a 
socket server is created to received vhost requests through
+socket messages. Most of the messages share the same handler routine.
+
+.. note::
+**Any vhost cuse specific requirement in the following sections will be 
emphasized**.
+
+Two impelmentations are turned on and off statically through configure file. 
Only one implementation could be turned on. They don't co-exist in current 
implementation.
+
+The vhost sample code application is a simple packet switching application 
with the following feature:
+
 *   Packet switching between virtio-net devices and the network interface card,
 including using VMDQs to reduce the switching that needs to be performed 
in software.

-The following figure shows the architecture of the Vhost sample application.
+The following figure shows the architecture of the Vhost sample application 
based on vhost-cuse.

 .. _figure_18:

@@ -131,15 +145,19 @@ The example in this section have been validated with the 
following distributions

 *   Fedora* 19

+*   Fedora* 20
+
 Prerequisites
 -

 This section lists prerequisite packages that must be installed.

-Installing Packages on the Host
-~~~
+Installing Packages on the Host(vhost cuse required)
+

-The vhost sample code uses the following packages; fuse, fuse-devel, and 
kernel- modules-extra.
+The vhost cuse code uses the following packages; fuse, fuse-devel, and 
kernel-modules-extra.
+The vhost user code don't rely on those modules as eventfds are already 
installed into vhost process through
+unix domain socket.

 #.  Install Fuse Development Libraries and headers:

@@ -153,6 +171,11 @@ The vhost sample code uses the following packages; fuse, 
fuse-devel, and kernel-

 yum -y install kernel-modules-extra

+QEMU simulator
+~~
+
+For vhost user, qemu 2.2 is required.
+
 Setting up the Execution Environment
 

@@ -202,7 +225,7 @@ In this section, we create a second hugetlbs mount point to 
allocate hugepages f

 .. code-block:: console

-echo 256 > /sys/kernel/mm/hugepages/hugepages-2048kB/ nr_hugepages
+echo 256 > /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages

 #.  Mount hugetlbs at a separate mount point for 2 MB pages:

@@ -251,6 +274,8 @@ at the command line as follows.

 Observe that in the example, "-device" and "-netdev" are repeated for two 
virtio-net devices.

+For vhost cuse:
+
 .. code-block:: console

 user at target:~$ qemu-system-x86_64 ... \
@@ -259,15 +284,46 @@ at the command line as follows.
 -netdev tap,id=hostnet2,vhost=on,vhostfd= \
 -device virtio-net-pci, netdev=hostnet2,id=net1

+For vhost user:
+
+.. code-block:: console
+
+user at target:~$ qemu-system-x86_64 ... \
+-ch

[dpdk-dev] [PATCH] vhost library doc update

2015-03-12 Thread Huawei Xie
add vhost user documentation

Signed-off-by: Huawei Xie 
---
 doc/guides/prog_guide/vhost_lib.rst | 52 ++---
 1 file changed, 42 insertions(+), 10 deletions(-)

diff --git a/doc/guides/prog_guide/vhost_lib.rst 
b/doc/guides/prog_guide/vhost_lib.rst
index 0b6eda7..ab35b74 100644
--- a/doc/guides/prog_guide/vhost_lib.rst
+++ b/doc/guides/prog_guide/vhost_lib.rst
@@ -31,25 +31,28 @@
 Vhost Library
 =

-The vhost cuse (cuse: user space character device driver) library implements a
-vhost cuse driver. It also creates, manages and destroys vhost devices for
-corresponding virtio devices in the guest. Vhost supported vSwitch could 
register
-callbacks to this library, which will be called when a vhost device is 
activated
-or deactivated by guest virtual machine.
+The vhost library implements a user space vhost driver. It supports both 
vhost-cuse
+(cuse: user space character device) and vhost-user(user space socket server).
+It also creates, manages and destroys vhost devices for corresponding virtio
+devices in the guest. Vhost supported vSwitch could register callbacks to this
+library, which will be called when a vhost device is activated or deactivated
+by guest virtual machine.

 Vhost API Overview
 --

 *   Vhost driver registration

-  rte_vhost_driver_register registers the vhost cuse driver into the 
system.
-  Character device file will be created in the /dev directory.
+  rte_vhost_driver_register registers the vhost driver into the system.
+  For vhost-cuse, character device file will be created under the /dev 
directory.
   Character device name is specified as the parameter.
+  For vhost-user, a unix domain socket server will be created with the 
parameter as
+  the local socket path.

 *   Vhost session start

   rte_vhost_driver_session_start starts the vhost session loop.
-  Vhost cuse session is an infinite blocking loop.
+  Vhost session is an infinite blocking loop.
   Put the session in a dedicate DPDK thread.

 *   Callback register
@@ -73,6 +76,8 @@ Vhost API Overview
 Vhost Implementation
 

+Vhost cuse implementation
+~
 When vSwitch registers the vhost driver, it will register a cuse device driver
 into the system and creates a character device file. This cuse driver will
 receive vhost open/release/IOCTL message from QEMU simulator.
@@ -89,13 +94,40 @@ which means vhost could access the shared virtio ring and 
the guest physical
 address specified in the entry of the ring.

 The guest virtual machine tells the vhost whether the virtio device is ready
-for processing or is de-activated through VHOST_SET_BACKEND message.
+for processing or is de-activated through VHOST_NET_SET_BACKEND message.
 The registered callback from vSwitch will be called.

 When the release call is released, vhost will destroy the device.

+Vhost user implementation
+~
+When vSwitch registers a vhost driver, it will create a unix domain socket 
server
+into the system. This server will listen for a connection and process the 
vhost message from
+QEMU simulator.
+
+When there is a new socket connection, it means a new virtio device has been 
created in
+the guest virtual machine, and the vhost driver will create a vhost device for 
this virtio device.
+
+For messages with a file descriptor, the file descriptor could be directly 
used in the vhost
+process as it is already installed by unix domain socket.
+ * VHOST_SET_MEM_TABLE
+ * VHOST_SET_VRING_KICK
+ * VHOST_SET_VRING_CALL
+ * VHOST_SET_LOG_FD
+ * VHOST_SET_VRING_ERR
+
+For VHOST_SET_MEM_TABLE message, QEMU will send us information for each memory 
region and its
+file descriptor in the ancillary data of the message. The fd is used to map 
that region.
+
+There is no VHOST_NET_SET_BACKEND message as in vhost cuse to signal us 
whether virtio device
+is ready or should be stopped.
+VHOST_SET_VRING_KICK is used as the signal to put the vhost device onto data 
plane.
+VHOST_GET_VRING_BASE is used as the signal to remove vhost device from data 
plane.
+
+When the socket connection is closed, vhost will destroy the device.
+
 Vhost supported vSwitch reference
 -

-For how to support vhost in vSwitch, please refer to vhost example in the
+For more vhost details and how to support vhost in vSwitch, please refer to 
vhost example in the
 DPDK Sample Applications Guide.
-- 
1.8.1.4



[dpdk-dev] [PATCH] lib/librte_vhost: add CONFIG_RTE_LIBRTE_VHOST_USER switch

2015-03-13 Thread Huawei Xie
Turn on CONFIG_RTE_LIBRTE_VHOST to enable vhost.
vhost-user is turned on by default. Turn off CONFIG_RTE_LIBRTE_VHOST_USER to
enable vhost-cuse implementation.

Signed-off-by: Huawei Xie 
---
 config/common_linuxapp|  4 +++-
 lib/librte_vhost/Makefile | 11 +--
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/config/common_linuxapp b/config/common_linuxapp
index 97f1c9e..09a58ac 100644
--- a/config/common_linuxapp
+++ b/config/common_linuxapp
@@ -414,10 +414,12 @@ CONFIG_RTE_KNI_VHOST_DEBUG_TX=n

 #
 # Compile vhost library
-# fuse-devel is needed to run vhost.
+# fuse-devel is needed to run vhost-cuse.
 # fuse-devel enables user space char driver development
+# vhost-user is turned on by default.
 #
 CONFIG_RTE_LIBRTE_VHOST=n
+CONFIG_RTE_LIBRTE_VHOST_USER=y
 CONFIG_RTE_LIBRTE_VHOST_DEBUG=n

 #
diff --git a/lib/librte_vhost/Makefile b/lib/librte_vhost/Makefile
index 52f6575..a8645a6 100644
--- a/lib/librte_vhost/Makefile
+++ b/lib/librte_vhost/Makefile
@@ -39,13 +39,20 @@ EXPORT_MAP := rte_vhost_version.map
 LIBABIVER := 1

 CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3 -D_FILE_OFFSET_BITS=64
-CFLAGS += -I vhost_cuse -lfuse
+ifeq ($(CONFIG_RTE_LIBRTE_VHOST_USER),y)
 CFLAGS += -I vhost_user
+else
+CFLAGS += -I vhost_cuse -lfuse
 LDFLAGS += -lfuse
+endif
+
 # all source are stored in SRCS-y
 SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := virtio-net.c vhost_rxtx.c
-#SRCS-$(CONFIG_RTE_LIBRTE_VHOST) += vhost_cuse/vhost-net-cdev.c 
vhost_cuse/virtio-net-cdev.c vhost_cuse/eventfd_copy.c
+ifeq ($(CONFIG_RTE_LIBRTE_VHOST_USER),y)
 SRCS-$(CONFIG_RTE_LIBRTE_VHOST) += vhost_user/vhost-net-user.c 
vhost_user/virtio-net-user.c vhost_user/fd_man.c
+else
+SRCS-$(CONFIG_RTE_LIBRTE_VHOST) += vhost_cuse/vhost-net-cdev.c 
vhost_cuse/virtio-net-cdev.c vhost_cuse/eventfd_copy.c
+endif

 # install includes
 SYMLINK-$(CONFIG_RTE_LIBRTE_VHOST)-include += rte_virtio_net.h
-- 
1.8.1.4



[dpdk-dev] [PATCH] lib/librte_vhost: fix build errors

2015-03-19 Thread Huawei Xie
fix the error "missing initializer" and "cast to pointer from integer of 
different size".

For the pointer to integer cast issue, need to investigate changing the typeof 
mapped_address.

Signed-off-by: Huawei Xie 
---
 lib/librte_vhost/vhost_user/vhost-net-user.c  | 2 +-
 lib/librte_vhost/vhost_user/virtio-net-user.c | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.c 
b/lib/librte_vhost/vhost_user/vhost-net-user.c
index 3aa9436..31f1215 100644
--- a/lib/librte_vhost/vhost_user/vhost-net-user.c
+++ b/lib/librte_vhost/vhost_user/vhost-net-user.c
@@ -277,7 +277,7 @@ vserver_new_vq_conn(int fd, void *dat, __rte_unused int 
*remove)
int conn_fd;
struct connfd_ctx *ctx;
int fh;
-   struct vhost_device_ctx vdev_ctx = { 0 };
+   struct vhost_device_ctx vdev_ctx = { (pid_t)0, 0 };
unsigned int size;

conn_fd = accept(fd, NULL, NULL);
diff --git a/lib/librte_vhost/vhost_user/virtio-net-user.c 
b/lib/librte_vhost/vhost_user/virtio-net-user.c
index 465d3ef..c1ffc38 100644
--- a/lib/librte_vhost/vhost_user/virtio-net-user.c
+++ b/lib/librte_vhost/vhost_user/virtio-net-user.c
@@ -83,7 +83,7 @@ free_mem_region(struct virtio_net *dev)
for (idx = 0; idx < dev->mem->nregions; idx++) {
if (region[idx].mapped_address) {
alignment = region[idx].blksz;
-   munmap((void *)
+   munmap((void *)(uintptr_t)
RTE_ALIGN_FLOOR(
region[idx].mapped_address, alignment),
RTE_ALIGN_CEIL(
@@ -155,7 +155,7 @@ user_set_mem_table(struct vhost_device_ctx ctx, struct 
VhostUserMsg *pmsg)

RTE_LOG(INFO, VHOST_CONFIG,
"mapped region %d fd:%d to %p sz:0x%"PRIx64" 
off:0x%"PRIx64"\n",
-   idx, pmsg->fds[idx], (void *)mapped_address,
+   idx, pmsg->fds[idx], (void *)(uintptr_t)mapped_address,
mapped_size, memory.regions[idx].mmap_offset);

if (mapped_address == (uint64_t)(uintptr_t)MAP_FAILED) {
@@ -194,7 +194,7 @@ user_set_mem_table(struct vhost_device_ctx ctx, struct 
VhostUserMsg *pmsg)
 err_mmap:
while (idx--) {
alignment = pregion_orig[idx].blksz;
-   munmap((void *)RTE_ALIGN_FLOOR(
+   munmap((void *)(uintptr_t)RTE_ALIGN_FLOOR(
pregion_orig[idx].mapped_address, alignment),
RTE_ALIGN_CEIL(pregion_orig[idx].mapped_size,
alignment));
-- 
1.8.1.4



[dpdk-dev] [PATCH v3] lib/librte_vhost: user space vhost driver library

2014-08-06 Thread Huawei Xie
This user space vhost library is provided aiming to facilitate integration with 
DPDK accelerated vswitch. 

Huawei Xie (1):
  vhost library support to facilitate integration with DPDK accelerated vswitch.

 config/common_linuxapp   |7 +
 lib/Makefile |1 +
 lib/librte_vhost/Makefile|   48 ++
 lib/librte_vhost/eventfd_link/Makefile   |   39 +
 lib/librte_vhost/eventfd_link/eventfd_link.c |  194 +
 lib/librte_vhost/eventfd_link/eventfd_link.h |   40 +
 lib/librte_vhost/rte_virtio_net.h|  192 +
 lib/librte_vhost/vhost-net-cdev.c|  363 ++
 lib/librte_vhost/vhost-net-cdev.h|  109 +++
 lib/librte_vhost/vhost_rxtx.c|  292 
 lib/librte_vhost/virtio-net.c| 1002 ++
 11 files changed, 2287 insertions(+)
 create mode 100644 lib/librte_vhost/Makefile
 create mode 100644 lib/librte_vhost/eventfd_link/Makefile
 create mode 100644 lib/librte_vhost/eventfd_link/eventfd_link.c
 create mode 100644 lib/librte_vhost/eventfd_link/eventfd_link.h
 create mode 100644 lib/librte_vhost/rte_virtio_net.h
 create mode 100644 lib/librte_vhost/vhost-net-cdev.c
 create mode 100644 lib/librte_vhost/vhost-net-cdev.h
 create mode 100644 lib/librte_vhost/vhost_rxtx.c
 create mode 100644 lib/librte_vhost/virtio-net.c

-- 
1.8.1.4



[dpdk-dev] [PATCH 3/3] examples/vhost: add new vhost example

2014-08-06 Thread Huawei Xie
Signed-off-by: Huawei Xie 
Acked-by: Konstantin Ananyev 
---
 examples/vhost/Makefile |   52 +
 examples/vhost/libvirt/qemu-wrap.py |  366 +
 examples/vhost/main.c   | 3047 +++
 examples/vhost/main.h   |  109 ++
 4 files changed, 3574 insertions(+)
 create mode 100644 examples/vhost/Makefile
 create mode 100755 examples/vhost/libvirt/qemu-wrap.py
 create mode 100644 examples/vhost/main.c
 create mode 100644 examples/vhost/main.h

diff --git a/examples/vhost/Makefile b/examples/vhost/Makefile
new file mode 100644
index 000..a4d4fb0
--- /dev/null
+++ b/examples/vhost/Makefile
@@ -0,0 +1,52 @@
+#   BSD LICENSE
+#
+#   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+#   All rights reserved.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+# * Redistributions of source code must retain the above copyright
+#   notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+#   notice, this list of conditions and the following disclaimer in
+#   the documentation and/or other materials provided with the
+#   distribution.
+# * Neither the name of Intel Corporation nor the names of its
+#   contributors may be used to endorse or promote products derived
+#   from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ifeq ($(RTE_SDK),)
+$(error "Please define RTE_SDK environment variable")
+endif
+
+# Default target, can be overriden by command line or environment
+RTE_TARGET ?= x86_64-native-linuxapp-gcc
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# binary name
+APP = vhost-switch
+
+# all source are stored in SRCS-y
+#SRCS-y := cusedrv.c loopback-userspace.c
+SRCS-y := main.c
+
+CFLAGS += -O2 -I/usr/local/include -D_FILE_OFFSET_BITS=64 -Wno-unused-parameter
+CFLAGS += $(WERROR_FLAGS)
+LDFLAGS += -lfuse
+
+include $(RTE_SDK)/mk/rte.extapp.mk
diff --git a/examples/vhost/libvirt/qemu-wrap.py 
b/examples/vhost/libvirt/qemu-wrap.py
new file mode 100755
index 000..8d820be
--- /dev/null
+++ b/examples/vhost/libvirt/qemu-wrap.py
@@ -0,0 +1,366 @@
+#!/usr/bin/python
+#/*
+# *   BSD LICENSE
+# *
+# *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+# *   All rights reserved.
+# *
+# *   Redistribution and use in source and binary forms, with or without
+# *   modification, are permitted provided that the following conditions
+# *   are met:
+# *
+# * * Redistributions of source code must retain the above copyright
+# *   notice, this list of conditions and the following disclaimer.
+# * * Redistributions in binary form must reproduce the above copyright
+# *   notice, this list of conditions and the following disclaimer in
+# *   the documentation and/or other materials provided with the
+# *   distribution.
+# * * Neither the name of Intel Corporation nor the names of its
+# *   contributors may be used to endorse or promote products derived
+# *   from this software without specific prior written permission.
+# *
+# *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# */
+
+#
+# This script 

[dpdk-dev] [PATCH v4 0/2] lib/librte_pmd_i40e: set vlan filter fix

2014-12-02 Thread Huawei Xie
This patchset fixes "set vlan filter" issue.

v2 changes:
* add two macros I40E_VFTA_IDX and I40E_VFTA_BIT for VFTA array operation.

v3 changes:
* code style fix
* rebase on latest commit

v4 changes:
* add more descriptive commit message

Huawei Xie (2):
  vlan id set fix
  add I40E_VFTA_IDX and I40E_VFTA_BIT macros for VFTA related operation

 lib/librte_pmd_i40e/i40e_ethdev.c | 20 ++--
 lib/librte_pmd_i40e/i40e_ethdev.h |  9 +
 2 files changed, 19 insertions(+), 10 deletions(-)

-- 
1.8.1.4



[dpdk-dev] [PATCH v4 1/2] lib/librte_pmd_i40e: set vlan id filter fix

2014-12-02 Thread Huawei Xie
">> 5" rather than ">> 4"

vlan id is a 12 bit value.
VFTA is 128 x 32 bit array (128 double word array) which could store 2^12 vlan 
bits.
Each bit represents whether corresponding vlan tag is set in the VSI.
Use high 7 bits as the index for the double word array.

Signed-off-by: Huawei Xie 
---
 lib/librte_pmd_i40e/i40e_ethdev.c | 7 ++-
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/lib/librte_pmd_i40e/i40e_ethdev.c 
b/lib/librte_pmd_i40e/i40e_ethdev.c
index dacf2db..518597f 100644
--- a/lib/librte_pmd_i40e/i40e_ethdev.c
+++ b/lib/librte_pmd_i40e/i40e_ethdev.c
@@ -4172,14 +4172,11 @@ i40e_set_vlan_filter(struct i40e_vsi *vsi,
 {
uint32_t vid_idx, vid_bit;

-#define UINT32_BIT_MASK  0x1F
-#define VALID_VLAN_BIT_MASK  0xFFF
/* VFTA is 32-bits size array, each element contains 32 vlan bits, Find 
the
 *  element first, then find the bits it belongs to
 */
-   vid_idx = (uint32_t) ((vlan_id & VALID_VLAN_BIT_MASK) >>
- sizeof(uint32_t));
-   vid_bit = (uint32_t) (1 << (vlan_id & UINT32_BIT_MASK));
+   vid_idx = (uint32_t) ((vlan_id >> 5) & 0x7F);
+   vid_bit = (uint32_t) (1 << (vlan_id & 0x1F));

if (on)
vsi->vfta[vid_idx] |= vid_bit;
-- 
1.8.1.4



[dpdk-dev] [PATCH v4 2/2] lib/librte_pmd_i40e: add I40E_VFTA_IDX and I40E_VFTA__BIT macros for VFTA related operation

2014-12-02 Thread Huawei Xie
Add two macros I40E_VFTA_IDX and I40E_VFTA_BIT for vlan filter search and set.
Add vlan_id check in vlan filter search and set function.

Signed-off-by: Huawei Xie 
---
 lib/librte_pmd_i40e/i40e_ethdev.c | 17 ++---
 lib/librte_pmd_i40e/i40e_ethdev.h |  9 +
 2 files changed, 19 insertions(+), 7 deletions(-)

diff --git a/lib/librte_pmd_i40e/i40e_ethdev.c 
b/lib/librte_pmd_i40e/i40e_ethdev.c
index 518597f..43b9448 100644
--- a/lib/librte_pmd_i40e/i40e_ethdev.c
+++ b/lib/librte_pmd_i40e/i40e_ethdev.c
@@ -4157,8 +4157,11 @@ i40e_find_vlan_filter(struct i40e_vsi *vsi,
 {
uint32_t vid_idx, vid_bit;

-   vid_idx = (uint32_t) ((vlan_id >> 5) & 0x7F);
-   vid_bit = (uint32_t) (1 << (vlan_id & 0x1F));
+   if (vlan_id > ETH_VLAN_ID_MAX)
+   return 0;
+
+   vid_idx = I40E_VFTA_IDX(vlan_id);
+   vid_bit = I40E_VFTA_BIT(vlan_id);

if (vsi->vfta[vid_idx] & vid_bit)
return 1;
@@ -4172,11 +4175,11 @@ i40e_set_vlan_filter(struct i40e_vsi *vsi,
 {
uint32_t vid_idx, vid_bit;

-   /* VFTA is 32-bits size array, each element contains 32 vlan bits, Find 
the
-*  element first, then find the bits it belongs to
-*/
-   vid_idx = (uint32_t) ((vlan_id >> 5) & 0x7F);
-   vid_bit = (uint32_t) (1 << (vlan_id & 0x1F));
+   if (vlan_id > ETH_VLAN_ID_MAX)
+   return;
+
+   vid_idx = I40E_VFTA_IDX(vlan_id);
+   vid_bit = I40E_VFTA_BIT(vlan_id);

if (on)
vsi->vfta[vid_idx] |= vid_bit;
diff --git a/lib/librte_pmd_i40e/i40e_ethdev.h 
b/lib/librte_pmd_i40e/i40e_ethdev.h
index f99fbea..f913ea9 100644
--- a/lib/librte_pmd_i40e/i40e_ethdev.h
+++ b/lib/librte_pmd_i40e/i40e_ethdev.h
@@ -50,6 +50,15 @@
 #define I40E_DEFAULT_QP_NUM_FDIR  1
 #define I40E_UINT32_BIT_SIZE  (CHAR_BIT * sizeof(uint32_t))
 #define I40E_VFTA_SIZE(4096 / I40E_UINT32_BIT_SIZE)
+/*
+ * vlan_id is a 12 bit number.
+ * The VFTA array is actually a 4096 bit array, 128 of 32bit elements.
+ * 2^5 = 32. The val of lower 5 bits specifies the bit in the 32bit element.
+ * The higher 7 bit val specifies VFTA array index.
+ */
+#define I40E_VFTA_BIT(vlan_id)(1 << ((vlan_id) & 0x1F))
+#define I40E_VFTA_IDX(vlan_id)((vlan_id) >> 5)
+
 /* Default TC traffic in case DCB is not enabled */
 #define I40E_DEFAULT_TCMAP0x1
 #define I40E_FDIR_QUEUE_ID0
-- 
1.8.1.4



[dpdk-dev] [PATCH] examples/vhost: increase MAX_QUEUE number

2014-12-10 Thread Huawei Xie
increase MAX_QUEUES from 256 to 512
In vhost example, MAX_QUEUES macro should be the maximum possible queue number 
of the port.
Theoretically we should only set up the queues that are used, i.e., first rx 
queue of each pool, or
at most queues from 0 to MAX_QUEUES. Before we revise the implementation and 
are certain all NICs support
this well, add a remind message to user.

Signed-off-by: Huawei Xie 
---
 examples/vhost/main.c | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/examples/vhost/main.c b/examples/vhost/main.c
index 0c76ece..d2c1852 100644
--- a/examples/vhost/main.c
+++ b/examples/vhost/main.c
@@ -53,7 +53,7 @@

 #include "main.h"

-#define MAX_QUEUES 256
+#define MAX_QUEUES 512

 /* the maximum number of external ports supported */
 #define MAX_SUP_PORTS 1
@@ -380,6 +380,12 @@ port_init(uint8_t port)
/* The max pool number from dev_info will be used to validate the pool 
number specified in cmd line */
rte_eth_dev_info_get (port, _info);

+   if (dev_info.max_rx_queues > MAX_QUEUES) {
+   rte_exit(EXIT_FAILURE,
+   "please define MAX_QUEUES no less than %u in %s\n",
+   dev_info.max_rx_queues, __FILE__);
+   }
+
rxconf = _info.default_rxconf;
txconf = _info.default_txconf;
rxconf->rx_drop_en = 1;
-- 
1.8.1.4



[dpdk-dev] [PATCH RFC v2 00/12] lib/librte_vhost: vhost-user support

2014-12-11 Thread Huawei Xie
This patchset refines vhost library to support both vhost-cuse and vhost-user.


Huawei Xie (12):
  create vhost_cuse directory and move vhost-net-cdev.c to vhost_cuse directory
  rename vhost-net-cdev.h as vhost-net.h
  move eventfd_copy logic out from virtio-net.c to vhost-net-cdev.c
  exact copy of host_memory_map from virtio-net.c to new file
  virtio-net-cdev.c
  host_memory_map refine: map partial memory of target process into current 
process
  cuse_set_memory_table is the VHOST_SET_MEMORY_TABLE message handler for cuse
  fd management for vhost user
  vhost-user support
  minor fix
  vhost-user memory region map/unmap
  kick/callfd fix
  cleanup when vhost user connection is closed

 lib/librte_vhost/Makefile |   5 +-
 lib/librte_vhost/rte_virtio_net.h |   2 +
 lib/librte_vhost/vhost-net-cdev.c | 389 --
 lib/librte_vhost/vhost-net-cdev.h | 113 ---
 lib/librte_vhost/vhost-net.h  | 117 +++
 lib/librte_vhost/vhost_cuse/vhost-net-cdev.c  | 452 ++
 lib/librte_vhost/vhost_cuse/virtio-net-cdev.c | 349 
 lib/librte_vhost/vhost_cuse/virtio-net-cdev.h |  45 +++
 lib/librte_vhost/vhost_rxtx.c |   2 +-
 lib/librte_vhost/vhost_user/fd_man.c  | 205 
 lib/librte_vhost/vhost_user/fd_man.h  |  64 
 lib/librte_vhost/vhost_user/vhost-net-user.c  | 423 
 lib/librte_vhost/vhost_user/vhost-net-user.h  | 107 ++
 lib/librte_vhost/vhost_user/virtio-net-user.c | 313 ++
 lib/librte_vhost/vhost_user/virtio-net-user.h |  49 +++
 lib/librte_vhost/virtio-net.c | 394 ++
 lib/librte_vhost/virtio-net.h |  43 +++
 17 files changed, 2199 insertions(+), 873 deletions(-)
 delete mode 100644 lib/librte_vhost/vhost-net-cdev.c
 delete mode 100644 lib/librte_vhost/vhost-net-cdev.h
 create mode 100644 lib/librte_vhost/vhost-net.h
 create mode 100644 lib/librte_vhost/vhost_cuse/vhost-net-cdev.c
 create mode 100644 lib/librte_vhost/vhost_cuse/virtio-net-cdev.c
 create mode 100644 lib/librte_vhost/vhost_cuse/virtio-net-cdev.h
 create mode 100644 lib/librte_vhost/vhost_user/fd_man.c
 create mode 100644 lib/librte_vhost/vhost_user/fd_man.h
 create mode 100644 lib/librte_vhost/vhost_user/vhost-net-user.c
 create mode 100644 lib/librte_vhost/vhost_user/vhost-net-user.h
 create mode 100644 lib/librte_vhost/vhost_user/virtio-net-user.c
 create mode 100644 lib/librte_vhost/vhost_user/virtio-net-user.h
 create mode 100644 lib/librte_vhost/virtio-net.h

-- 
1.8.1.4



[dpdk-dev] [PATCH RFC v2 02/12] lib/librte_vhost: rename vhost-net-cdev.h as vhost-net.h

2014-12-11 Thread Huawei Xie
vhost-net.h is the shared header file for both vhost-cuse and vhost-user

Signed-off-by: Huawei Xie 
---
 lib/librte_vhost/vhost-net-cdev.h| 113 ---
 lib/librte_vhost/vhost-net.h | 113 +++
 lib/librte_vhost/vhost_cuse/vhost-net-cdev.c |   2 +-
 lib/librte_vhost/vhost_rxtx.c|   2 +-
 lib/librte_vhost/virtio-net.c|   2 +-
 5 files changed, 116 insertions(+), 116 deletions(-)
 delete mode 100644 lib/librte_vhost/vhost-net-cdev.h
 create mode 100644 lib/librte_vhost/vhost-net.h

diff --git a/lib/librte_vhost/vhost-net-cdev.h 
b/lib/librte_vhost/vhost-net-cdev.h
deleted file mode 100644
index 03a5c57..000
--- a/lib/librte_vhost/vhost-net-cdev.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*-
- *   BSD LICENSE
- *
- *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- *   All rights reserved.
- *
- *   Redistribution and use in source and binary forms, with or without
- *   modification, are permitted provided that the following conditions
- *   are met:
- *
- * * Redistributions of source code must retain the above copyright
- *   notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- *   notice, this list of conditions and the following disclaimer in
- *   the documentation and/or other materials provided with the
- *   distribution.
- * * Neither the name of Intel Corporation nor the names of its
- *   contributors may be used to endorse or promote products derived
- *   from this software without specific prior written permission.
- *
- *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef _VHOST_NET_CDEV_H_
-#define _VHOST_NET_CDEV_H_
-#include 
-#include 
-#include 
-#include 
-#include 
-
-#include 
-
-/* Macros for printing using RTE_LOG */
-#define RTE_LOGTYPE_VHOST_CONFIG RTE_LOGTYPE_USER1
-#define RTE_LOGTYPE_VHOST_DATA   RTE_LOGTYPE_USER1
-
-#ifdef RTE_LIBRTE_VHOST_DEBUG
-#define VHOST_MAX_PRINT_BUFF 6072
-#define LOG_LEVEL RTE_LOG_DEBUG
-#define LOG_DEBUG(log_type, fmt, args...) RTE_LOG(DEBUG, log_type, fmt, ##args)
-#define PRINT_PACKET(device, addr, size, header) do { \
-   char *pkt_addr = (char *)(addr); \
-   unsigned int index; \
-   char packet[VHOST_MAX_PRINT_BUFF]; \
-   \
-   if ((header)) \
-   snprintf(packet, VHOST_MAX_PRINT_BUFF, "(%"PRIu64") Header size 
%d: ", (device->device_fh), (size)); \
-   else \
-   snprintf(packet, VHOST_MAX_PRINT_BUFF, "(%"PRIu64") Packet size 
%d: ", (device->device_fh), (size)); \
-   for (index = 0; index < (size); index++) { \
-   snprintf(packet + strnlen(packet, VHOST_MAX_PRINT_BUFF), 
VHOST_MAX_PRINT_BUFF - strnlen(packet, VHOST_MAX_PRINT_BUFF), \
-   "%02hhx ", pkt_addr[index]); \
-   } \
-   snprintf(packet + strnlen(packet, VHOST_MAX_PRINT_BUFF), 
VHOST_MAX_PRINT_BUFF - strnlen(packet, VHOST_MAX_PRINT_BUFF), "\n"); \
-   \
-   LOG_DEBUG(VHOST_DATA, "%s", packet); \
-} while (0)
-#else
-#define LOG_LEVEL RTE_LOG_INFO
-#define LOG_DEBUG(log_type, fmt, args...) do {} while (0)
-#define PRINT_PACKET(device, addr, size, header) do {} while (0)
-#endif
-
-
-/*
- * Structure used to identify device context.
- */
-struct vhost_device_ctx {
-   pid_t   pid;/* PID of process calling the IOCTL. */
-   uint64_tfh; /* Populated with fi->fh to track the device 
index. */
-};
-
-/*
- * Structure contains function pointers to be defined in virtio-net.c. These
- * functions are called in CUSE context and are used to configure devices.
- */
-struct vhost_net_device_ops {
-   int (*new_device)(struct vhost_device_ctx);
-   void (*destroy_device)(struct vhost_device_ctx);
-
-   int (*get_features)(struct vhost_device_ctx, uint64_t *);
-   int (*set_features)(struct vhost_device_ctx, uint64_t *);
-
-   int (*set_mem_table)(struct vhost_device_ctx, const void *, uint32_t);
-
-   int (*set_vring_num)(struct vhost_device_ctx,

[dpdk-dev] [PATCH RFC v2 05/12] lib/librte_vhost: host_memory_map refine

2014-12-11 Thread Huawei Xie
host_memory_map only maps partial memory of target process into current process 
through shared backed file.

Signed-off-by: Huawei Xie 
---
 lib/librte_vhost/vhost_cuse/virtio-net-cdev.c | 43 +--
 1 file changed, 20 insertions(+), 23 deletions(-)

diff --git a/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c 
b/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c
index fbfc403..58ac3dd 100644
--- a/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c
+++ b/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c
@@ -75,8 +75,8 @@ struct procmap {
  * map it to our address space.
  */
 static int
-host_memory_map(struct virtio_net *dev, struct virtio_memory *mem,
-   pid_t pid, uint64_t addr)
+host_memory_map(pid_t pid, uint64_t addr,
+   uint64_t *mapped_address, uint64_t *mapped_size)
 {
struct dirent *dptr = NULL;
struct procmap procmap;
@@ -104,8 +104,8 @@ host_memory_map(struct virtio_net *dev, struct 
virtio_memory *mem,
fmap = fopen(mapfile, "r");
if (fmap == NULL) {
RTE_LOG(ERR, VHOST_CONFIG,
-   "(%"PRIu64") Failed to open maps file for pid %d\n",
-   dev->device_fh, pid);
+   "Failed to open maps file for pid %d\n",
+   pid);
return -1;
}

@@ -179,8 +179,8 @@ host_memory_map(struct virtio_net *dev, struct 
virtio_memory *mem,

if (!found) {
RTE_LOG(ERR, VHOST_CONFIG,
-   "(%"PRIu64") Failed to find memory file in pid %d maps 
file\n",
-   dev->device_fh, pid);
+   "Failed to find memory file in pid %d maps file\n",
+   pid);
return -1;
}

@@ -188,8 +188,8 @@ host_memory_map(struct virtio_net *dev, struct 
virtio_memory *mem,
dp = opendir(procdir);
if (dp == NULL) {
RTE_LOG(ERR, VHOST_CONFIG,
-   "(%"PRIu64") Cannot open pid %d process directory\n",
-   dev->device_fh, pid);
+   "Cannot open pid %d process directory\n",
+   pid);
return -1;
}

@@ -202,8 +202,7 @@ host_memory_map(struct virtio_net *dev, struct 
virtio_memory *mem,
path = realpath(memfile, resolved_path);
if ((path == NULL) && (strlen(resolved_path) == 0)) {
RTE_LOG(ERR, VHOST_CONFIG,
-   "(%"PRIu64") Failed to resolve fd directory\n",
-   dev->device_fh);
+   "Failed to resolve fd directory\n");
closedir(dp);
return -1;
}
@@ -218,8 +217,8 @@ host_memory_map(struct virtio_net *dev, struct 
virtio_memory *mem,

if (found == 0) {
RTE_LOG(ERR, VHOST_CONFIG,
-   "(%"PRIu64") Failed to find memory file for pid %d\n",
-   dev->device_fh, pid);
+   "Failed to find memory file for pid %d\n",
+   pid);
return -1;
}
/* Open the shared memory file and map the memory into this process. */
@@ -227,32 +226,30 @@ host_memory_map(struct virtio_net *dev, struct 
virtio_memory *mem,

if (fd == -1) {
RTE_LOG(ERR, VHOST_CONFIG,
-   "(%"PRIu64") Failed to open %s for pid %d\n",
-   dev->device_fh, memfile, pid);
+   "Failed to open %s for pid %d\n",
+   memfile, pid);
return -1;
}

map = mmap(0, (size_t)procmap.len, PROT_READ|PROT_WRITE,
-   MAP_POPULATE|MAP_SHARED, fd, 0);
+   MAP_POPULATE|MAP_SHARED, fd, 0);
close(fd);

if (map == MAP_FAILED) {
RTE_LOG(ERR, VHOST_CONFIG,
-   "(%"PRIu64") Error mapping the file %s for pid %d\n",
-   dev->device_fh, memfile, pid);
+   "Error mapping the file %s for pid %d\n",
+   memfile, pid);
return -1;
}

/* Store the memory address and size in the device data structure */
-   mem->mapped_address = (uint64_t)(uintptr_t)map;
-   mem->mapped_size = procmap.len;
+   *mapped_address = (uint64_t)(uintptr_t)map;
+   *mapped_size = procmap.len;

LOG_DEBUG(VHOST_CONFIG,
-   "(%"PRIu64") Mem File: %s->%s - Size: %llu - VA: %p\n",
-   dev->device_fh,
+   "Mem File: %s->%s - Size: %llu - VA: %p\n",
memfile, resolved_path,
-   (unsigned long long)mem->mapped_size, map);
+   (unsigned long long)*mapped_size, map);

return 0;
 }
-
-- 
1.8.1.4



[dpdk-dev] [PATCH RFC v2 04/12] lib/librte_vhost: copy of host_memory_map from virtio-net.c to new file virtio-net-cdev.c

2014-12-11 Thread Huawei Xie
create virtio-net-cdev.c and copy host_memory_map into it.

Signed-off-by: Huawei Xie 
---
 lib/librte_vhost/vhost_cuse/virtio-net-cdev.c | 258 ++
 1 file changed, 258 insertions(+)
 create mode 100644 lib/librte_vhost/vhost_cuse/virtio-net-cdev.c

diff --git a/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c 
b/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c
new file mode 100644
index 000..fbfc403
--- /dev/null
+++ b/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c
@@ -0,0 +1,258 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the
+ *   distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ *   contributors may be used to endorse or promote products derived
+ *   from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+
+#include "vhost-net.h"
+
+/* Line size for reading maps file. */
+static const uint32_t BUFSIZE = PATH_MAX;
+
+/* Size of prot char array in procmap. */
+#define PROT_SZ 5
+
+/* Number of elements in procmap struct. */
+#define PROCMAP_SZ 8
+
+/* Structure containing information gathered from maps file. */
+struct procmap {
+   uint64_t va_start;  /* Start virtual address in file. */
+   uint64_t len;   /* Size of file. */
+   uint64_t pgoff; /* Not used. */
+   uint32_t maj;   /* Not used. */
+   uint32_t min;   /* Not used. */
+   uint32_t ino;   /* Not used. */
+   char prot[PROT_SZ]; /* Not used. */
+   char fname[PATH_MAX];   /* File name. */
+};
+
+/*
+ * Locate the file containing QEMU's memory space and
+ * map it to our address space.
+ */
+static int
+host_memory_map(struct virtio_net *dev, struct virtio_memory *mem,
+   pid_t pid, uint64_t addr)
+{
+   struct dirent *dptr = NULL;
+   struct procmap procmap;
+   DIR *dp = NULL;
+   int fd;
+   int i;
+   char memfile[PATH_MAX];
+   char mapfile[PATH_MAX];
+   char procdir[PATH_MAX];
+   char resolved_path[PATH_MAX];
+   char *path = NULL;
+   FILE *fmap;
+   void *map;
+   uint8_t found = 0;
+   char line[BUFSIZE];
+   char dlm[] = "-   :   ";
+   char *str, *sp, *in[PROCMAP_SZ];
+   char *end = NULL;
+
+   /* Path where mem files are located. */
+   snprintf(procdir, PATH_MAX, "/proc/%u/fd/", pid);
+   /* Maps file used to locate mem file. */
+   snprintf(mapfile, PATH_MAX, "/proc/%u/maps", pid);
+
+   fmap = fopen(mapfile, "r");
+   if (fmap == NULL) {
+   RTE_LOG(ERR, VHOST_CONFIG,
+   "(%"PRIu64") Failed to open maps file for pid %d\n",
+   dev->device_fh, pid);
+   return -1;
+   }
+
+   /* Read through maps file until we find out base_address. */
+   while (fgets(line, BUFSIZE, fmap) != 0) {
+   str = line;
+   errno = 0;
+   /* Split line into fields. */
+   for (i = 0; i < PROCMAP_SZ; i++) {
+   in[i] = strtok_r(str, [i], );
+   if ((in[i] == NULL) || (errno != 0)) {
+   fclose(fmap);
+   return -1;
+   }
+   str = NULL;
+   }
+
+   /* Convert

[dpdk-dev] [PATCH RFC v2 09/12] lib/librte_vhost: minor fix

2014-12-11 Thread Huawei Xie
Signed-off-by: Huawei Xie 
---
 lib/librte_vhost/vhost_user/vhost-net-user.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.c 
b/lib/librte_vhost/vhost_user/vhost-net-user.c
index 841d7e6..6b9ebd7 100644
--- a/lib/librte_vhost/vhost_user/vhost-net-user.c
+++ b/lib/librte_vhost/vhost_user/vhost-net-user.c
@@ -237,7 +237,7 @@ send_vhost_message(int sockfd, struct VhostUserMsg *msg)

msg->flags &= ~VHOST_USER_VERSION_MASK;
msg->flags |= VHOST_USER_VERSION;
-   sg->flags |= VHOST_USER_REPLY_MASK;
+   msg->flags |= VHOST_USER_REPLY_MASK;

ret = send_fd_message(sockfd, (char *)msg,
VHOST_USER_HDR_SIZE + msg->size, NULL, 0);
-- 
1.8.1.4



[dpdk-dev] [PATCH RFC v2 07/12] lib/librte_vhost: async event and callback

2014-12-11 Thread Huawei Xie

Signed-off-by: Huawei Xie 
---
 lib/librte_vhost/vhost_user/fd_man.c | 205 +++
 lib/librte_vhost/vhost_user/fd_man.h |  64 +++
 2 files changed, 269 insertions(+)
 create mode 100644 lib/librte_vhost/vhost_user/fd_man.c
 create mode 100644 lib/librte_vhost/vhost_user/fd_man.h

diff --git a/lib/librte_vhost/vhost_user/fd_man.c 
b/lib/librte_vhost/vhost_user/fd_man.c
new file mode 100644
index 000..f4db68b
--- /dev/null
+++ b/lib/librte_vhost/vhost_user/fd_man.c
@@ -0,0 +1,205 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the
+ *   distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ *   contributors may be used to endorse or promote products derived
+ *   from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+
+#include "fd_man.h"
+
+/**
+ * Returns the index in the fdset for a given fd.
+ * If fd is -1, it means to search for a free entry.
+ * @return
+ *   index for the fd, or -1 if fd isn't in the fdset.
+ */
+static int
+fdset_find_fd(struct fdset *pfdset, int fd)
+{
+   int i;
+
+   if (pfdset == NULL)
+   return -1;
+
+   for (i = 0; i < pfdset->num && pfdset->fd[i].fd != fd; i++);
+
+   return i ==  pfdset->num ? -1 : i;
+}
+
+static int
+fdset_find_free_slot(struct fdset *pfdset)
+{
+   return fdset_find_fd(pfdset, -1);
+}
+
+static void
+fdset_add_fd(struct fdset  *pfdset, int idx, int fd,
+   fd_cb rcb, fd_cb wcb, uint64_t dat)
+{
+   struct fdentry *pfdentry;
+
+   if (pfdset == NULL || idx >= pfdset->num)
+   return;
+
+   pfdentry = >fd[idx];
+   pfdentry->fd = fd;
+   pfdentry->rcb = rcb;
+   pfdentry->wcb = wcb;
+   pfdentry->dat = dat;
+}
+
+/**
+ * Fill the read/write fdset with the fds in the fdset.
+ * @return
+ *  the maximum fds filled in the read/write fd_set.
+ */
+static int
+fdset_fill(fd_set *rfset, fd_set *wfset, struct fdset *pfdset)
+{
+   struct fdentry *pfdentry;
+   int i, maxfds = -1;
+   int num = MAX_FDS;
+
+   if (pfdset == NULL)
+   return -1;
+
+   for (i = 0; i < num ; i++) {
+   pfdentry = >fd[i];
+   if (pfdentry->fd != -1) {
+   int added = 0;
+   if (pfdentry->rcb && rfset) {
+   FD_SET(pfdentry->fd, rfset);
+   added = 1;
+   }
+   if (pfdentry->wcb && wfset) {
+   FD_SET(pfdentry->fd, wfset);
+   added = 1;
+   }
+   if (added)
+   maxfds = pfdentry->fd < maxfds ?
+   maxfds : pfdentry->fd;
+   }
+   }
+   return maxfds;
+}
+
+void
+fdset_init(struct fdset *pfdset)
+{
+   int i;
+
+   if (pfdset == NULL)
+   return;
+
+   for (i = 0; i < MAX_FDS; i++)
+   pfdset->fd[i].fd = -1;
+   pfdset->num = MAX_FDS;
+}
+
+/**
+ * Register the fd in the fdset with read/write handler and context.
+ */
+int
+fdset_add(struct fdset *pfdset, int fd, fd_cb rcb, fd_cb wcb, uint64_t dat)
+{
+   int i;
+
+   if (pfdset == NULL || fd == -1)
+   re

[dpdk-dev] [PATCH RFC v2 10/12] lib/librte_vhost: vhost-user memory region map

2014-12-11 Thread Huawei Xie
deals with vhost user memory map/unmap alignment

Signed-off-by: Huawei Xie 
---
 lib/librte_vhost/rte_virtio_net.h |   2 +
 lib/librte_vhost/vhost-net.h  |   2 -
 lib/librte_vhost/vhost_user/vhost-net-user.h  |   3 +-
 lib/librte_vhost/vhost_user/virtio-net-user.c | 105 --
 4 files changed, 100 insertions(+), 12 deletions(-)

diff --git a/lib/librte_vhost/rte_virtio_net.h 
b/lib/librte_vhost/rte_virtio_net.h
index 00b1328..77db80b 100644
--- a/lib/librte_vhost/rte_virtio_net.h
+++ b/lib/librte_vhost/rte_virtio_net.h
@@ -48,6 +48,8 @@
 #include 
 #include 

+#define VHOST_MEMORY_MAX_NREGIONS 8
+
 /* Used to indicate that the device is running on a data core */
 #define VIRTIO_DEV_RUNNING 1

diff --git a/lib/librte_vhost/vhost-net.h b/lib/librte_vhost/vhost-net.h
index f9ec40b..ec2584f 100644
--- a/lib/librte_vhost/vhost-net.h
+++ b/lib/librte_vhost/vhost-net.h
@@ -43,8 +43,6 @@

 #include "rte_virtio_net.h"

-#define VHOST_MEMORY_MAX_NREGIONS 8
-
 extern struct vhost_net_device_ops const *ops;

 /* Macros for printing using RTE_LOG */
diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.h 
b/lib/librte_vhost/vhost_user/vhost-net-user.h
index c138844..f4c9d01 100644
--- a/lib/librte_vhost/vhost_user/vhost-net-user.h
+++ b/lib/librte_vhost/vhost_user/vhost-net-user.h
@@ -37,6 +37,7 @@
 #include 
 #include 

+#include "rte_virtio_net.h"
 #include "fd_man.h"

 struct vhost_server {
@@ -47,8 +48,6 @@ struct vhost_server {

 /* refer to hw/virtio/vhost-user.c */

-#define VHOST_MEMORY_MAX_NREGIONS8
-
 typedef enum VhostUserRequest {
VHOST_USER_NONE = 0,
VHOST_USER_GET_FEATURES = 1,
diff --git a/lib/librte_vhost/vhost_user/virtio-net-user.c 
b/lib/librte_vhost/vhost_user/virtio-net-user.c
index ad59fcc..3aecb17 100644
--- a/lib/librte_vhost/vhost_user/virtio-net-user.c
+++ b/lib/librte_vhost/vhost_user/virtio-net-user.c
@@ -36,7 +36,11 @@
 #include 
 #include 
 #include 
+#include 
+#include 
+#include 

+#include 
 #include 

 #include "virtio-net.h"
@@ -44,13 +48,56 @@
 #include "vhost-net-user.h"
 #include "vhost-net.h"

+struct orig_region_map {
+   int fd;
+   uint64_t mapped_address;
+   uint64_t mapped_size;
+   uint64_t blksz;
+};
+
+#define orig_region(ptr, nregions) (struct orig_region_map *)RTE_PTR_ADD(ptr, 
sizeof(struct virtio_memory) + sizeof(struct virtio_memory_regions) * 
(nregions))
+
+static uint64_t
+get_blk_size(int fd)
+{
+   struct stat stat;
+   fstat(fd, );
+   return (uint64_t)stat.st_blksize;
+}
+
 int
 user_set_mem_table(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
 {
-   unsigned int idx;
struct VhostUserMemory memory = pmsg->payload.memory;
struct virtio_memory_regions regions[VHOST_MEMORY_MAX_NREGIONS];
-   uint64_t mapped_address, base_address = 0;
+   uint64_t mapped_address, mapped_size, base_address = 0;
+   struct virtio_net *dev;
+   unsigned int idx = 0;
+   struct orig_region_map tmp[VHOST_MEMORY_MAX_NREGIONS] =
+   { [0 ... VHOST_MEMORY_MAX_NREGIONS - 1] = { 0 } };
+   struct orig_region_map *region;
+   uint64_t alignment;
+   int ret;
+
+   /* unmap old memory regions one by one*/
+   dev = get_device(ctx);
+   if (dev->mem) {
+   region = orig_region(dev->mem, dev->mem->nregions);
+   for (idx = 0; idx < dev->mem->nregions; idx++) {
+   if (region[idx].mapped_address) {
+   alignment = region[idx].blksz;
+   printf("Freeing %p\n",
+   (void 
*)(uintptr_t)region[idx].mapped_address);
+   ret = munmap((void 
*)RTE_ALIGN_FLOOR(region[idx].mapped_address, alignment),
+   RTE_ALIGN_CEIL(region[idx].mapped_size, 
alignment));
+   printf("munmap ret= %d\n", ret);
+   printf("close file %d\n", region[idx].fd);
+   close(region[idx].fd);
+   }
+   }
+   free(dev->mem);
+   dev->mem = NULL;
+   }

for (idx = 0; idx < memory.nregions; idx++) {
if (memory.regions[idx].guest_phys_addr == 0)
@@ -73,22 +120,30 @@ user_set_mem_table(struct vhost_device_ctx ctx, struct 
VhostUserMsg *pmsg)
memory.regions[idx].userspace_addr;

/* This is ugly */
+   mapped_size = regions[idx].memory_size +
+   memory.regions[idx].mmap_offset;
mapped_address = (uint64_t)(uintptr_t)mmap(NULL,
-   regions[idx].memory_size +
-   memory.regions[idx].mmap_offset,
+   mapped_size,
  

[dpdk-dev] [PATCH RFC v2 01/12] lib/librte_vhost: mov vhost-cuse implementation to vhost_cuse directory

2014-12-11 Thread Huawei Xie
create vhost_cuse directory and move vhost-net-cdev.c to vhost_cuse directory

Signed-off-by: Huawei Xie 
---
 lib/librte_vhost/Makefile|   4 +-
 lib/librte_vhost/vhost-net-cdev.c| 389 ---
 lib/librte_vhost/vhost_cuse/vhost-net-cdev.c | 389 +++
 3 files changed, 391 insertions(+), 391 deletions(-)
 delete mode 100644 lib/librte_vhost/vhost-net-cdev.c
 create mode 100644 lib/librte_vhost/vhost_cuse/vhost-net-cdev.c

diff --git a/lib/librte_vhost/Makefile b/lib/librte_vhost/Makefile
index c008d64..0b2f08f 100644
--- a/lib/librte_vhost/Makefile
+++ b/lib/librte_vhost/Makefile
@@ -34,10 +34,10 @@ include $(RTE_SDK)/mk/rte.vars.mk
 # library name
 LIB = librte_vhost.a

-CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3 -D_FILE_OFFSET_BITS=64 -lfuse
+CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -I vhost_cuse -O3 -D_FILE_OFFSET_BITS=64 
-lfuse
 LDFLAGS += -lfuse
 # all source are stored in SRCS-y
-SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := vhost-net-cdev.c virtio-net.c vhost_rxtx.c
+SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := vhost_cuse/vhost-net-cdev.c virtio-net.c 
vhost_rxtx.c

 # install includes
 SYMLINK-$(CONFIG_RTE_LIBRTE_VHOST)-include += rte_virtio_net.h
diff --git a/lib/librte_vhost/vhost-net-cdev.c 
b/lib/librte_vhost/vhost-net-cdev.c
deleted file mode 100644
index 57c76cb..000
--- a/lib/librte_vhost/vhost-net-cdev.c
+++ /dev/null
@@ -1,389 +0,0 @@
-/*-
- *   BSD LICENSE
- *
- *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- *   All rights reserved.
- *
- *   Redistribution and use in source and binary forms, with or without
- *   modification, are permitted provided that the following conditions
- *   are met:
- *
- * * Redistributions of source code must retain the above copyright
- *   notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- *   notice, this list of conditions and the following disclaimer in
- *   the documentation and/or other materials provided with the
- *   distribution.
- * * Neither the name of Intel Corporation nor the names of its
- *   contributors may be used to endorse or promote products derived
- *   from this software without specific prior written permission.
- *
- *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-
-#include 
-#include 
-#include 
-#include 
-
-#include "vhost-net-cdev.h"
-
-#define FUSE_OPT_DUMMY "\0\0"
-#define FUSE_OPT_FORE  "-f\0\0"
-#define FUSE_OPT_NOMULTI "-s\0\0"
-
-static const uint32_t default_major = 231;
-static const uint32_t default_minor = 1;
-static const char cuse_device_name[] = "/dev/cuse";
-static const char default_cdev[] = "vhost-net";
-
-static struct fuse_session *session;
-static struct vhost_net_device_ops const *ops;
-
-/*
- * Returns vhost_device_ctx from given fuse_req_t. The index is populated later
- * when the device is added to the device linked list.
- */
-static struct vhost_device_ctx
-fuse_req_to_vhost_ctx(fuse_req_t req, struct fuse_file_info *fi)
-{
-   struct vhost_device_ctx ctx;
-   struct fuse_ctx const *const req_ctx = fuse_req_ctx(req);
-
-   ctx.pid = req_ctx->pid;
-   ctx.fh = fi->fh;
-
-   return ctx;
-}
-
-/*
- * When the device is created in QEMU it gets initialised here and
- * added to the device linked list.
- */
-static void
-vhost_net_open(fuse_req_t req, struct fuse_file_info *fi)
-{
-   struct vhost_device_ctx ctx = fuse_req_to_vhost_ctx(req, fi);
-   int err = 0;
-
-   err = ops->new_device(ctx);
-   if (err == -1) {
-   fuse_reply_err(req, EPERM);
-   return;
-   }
-
-   fi->fh = err;
-
-   RTE_LOG(INFO, VHOST_CONFIG,
-   "(%"PRIu64") Device configuration started\n", fi->fh);
-   fuse_reply_open(req, fi);
-}
-
-/*
- * When QEMU is shutdown or killed the device gets released.
- */
-static void
-vhost_net_release(fuse_req_t req, struct fuse_file_info *fi)
-{
-   int err = 0

[dpdk-dev] [PATCH RFC v2 06/12] lib/librte_vhost: cuse_set_memory_table

2014-12-11 Thread Huawei Xie
cuse_set_memory_table

Signed-off-by: Huawei Xie 
---
 lib/librte_vhost/Makefile |   2 +-
 lib/librte_vhost/vhost-net.h  |   4 +-
 lib/librte_vhost/vhost_cuse/vhost-net-cdev.c  |   7 +-
 lib/librte_vhost/vhost_cuse/virtio-net-cdev.c |  85 +++
 lib/librte_vhost/vhost_cuse/virtio-net-cdev.h |  45 
 lib/librte_vhost/virtio-net.c | 306 +-
 6 files changed, 144 insertions(+), 305 deletions(-)
 create mode 100644 lib/librte_vhost/vhost_cuse/virtio-net-cdev.h

diff --git a/lib/librte_vhost/Makefile b/lib/librte_vhost/Makefile
index 0b2f08f..e0d0ef6 100644
--- a/lib/librte_vhost/Makefile
+++ b/lib/librte_vhost/Makefile
@@ -37,7 +37,7 @@ LIB = librte_vhost.a
 CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -I vhost_cuse -O3 -D_FILE_OFFSET_BITS=64 
-lfuse
 LDFLAGS += -lfuse
 # all source are stored in SRCS-y
-SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := vhost_cuse/vhost-net-cdev.c virtio-net.c 
vhost_rxtx.c
+SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := vhost_cuse/vhost-net-cdev.c 
vhost_cuse/virtio-net-cdev.c virtio-net.c vhost_rxtx.c

 # install includes
 SYMLINK-$(CONFIG_RTE_LIBRTE_VHOST)-include += rte_virtio_net.h
diff --git a/lib/librte_vhost/vhost-net.h b/lib/librte_vhost/vhost-net.h
index 03a5c57..f7e96fd 100644
--- a/lib/librte_vhost/vhost-net.h
+++ b/lib/librte_vhost/vhost-net.h
@@ -41,6 +41,8 @@

 #include 

+#define VHOST_MEMORY_MAX_NREGIONS 8
+
 /* Macros for printing using RTE_LOG */
 #define RTE_LOGTYPE_VHOST_CONFIG RTE_LOGTYPE_USER1
 #define RTE_LOGTYPE_VHOST_DATA   RTE_LOGTYPE_USER1
@@ -92,7 +94,7 @@ struct vhost_net_device_ops {
int (*get_features)(struct vhost_device_ctx, uint64_t *);
int (*set_features)(struct vhost_device_ctx, uint64_t *);

-   int (*set_mem_table)(struct vhost_device_ctx, const void *, uint32_t);
+   int (*set_mem_table)(struct vhost_device_ctx, const struct 
virtio_memory_regions *, uint32_t nregions);

int (*set_vring_num)(struct vhost_device_ctx, struct vhost_vring_state 
*);
int (*set_vring_addr)(struct vhost_device_ctx, struct vhost_vring_addr 
*);
diff --git a/lib/librte_vhost/vhost_cuse/vhost-net-cdev.c 
b/lib/librte_vhost/vhost_cuse/vhost-net-cdev.c
index 9424452..3422795 100644
--- a/lib/librte_vhost/vhost_cuse/vhost-net-cdev.c
+++ b/lib/librte_vhost/vhost_cuse/vhost-net-cdev.c
@@ -46,6 +46,7 @@
 #include 
 #include 

+#include "virtio-net-cdev.h"
 #include "vhost-net.h"
 #include "eventfd_link/eventfd_link.h"

@@ -60,7 +61,7 @@ static const char default_cdev[] = "vhost-net";
 static const char eventfd_cdev[] = "/dev/eventfd-link";

 static struct fuse_session *session;
-static struct vhost_net_device_ops const *ops;
+struct vhost_net_device_ops const *ops;

 /*
  * Returns vhost_device_ctx from given fuse_req_t. The index is populated later
@@ -291,8 +292,8 @@ vhost_net_ioctl(fuse_req_t req, int cmd, void *arg,
break;

default:
-   result = ops->set_mem_table(ctx,
-   in_buf, mem_temp.nregions);
+   result = cuse_set_mem_table(ctx, in_buf,
+   mem_temp.nregions);
if (result)
fuse_reply_err(req, EINVAL);
else
diff --git a/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c 
b/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c
index 58ac3dd..edcbc10 100644
--- a/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c
+++ b/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c
@@ -47,7 +47,11 @@

 #include 

+#include "rte_virtio_net.h"
 #include "vhost-net.h"
+#include "virtio-net-cdev.h"
+
+extern struct vhost_net_device_ops const *ops;

 /* Line size for reading maps file. */
 static const uint32_t BUFSIZE = PATH_MAX;
@@ -253,3 +257,84 @@ host_memory_map(pid_t pid, uint64_t addr,

return 0;
 }
+
+int
+cuse_set_mem_table(struct vhost_device_ctx ctx,
+   const struct vhost_memory *mem_regions_addr, uint32_t nregions)
+{
+   uint64_t size = offsetof(struct vhost_memory, regions);
+   uint32_t idx, valid_regions;
+   struct virtio_memory_regions regions[VHOST_MEMORY_MAX_NREGIONS];
+   struct vhost_memory_region *mem_regions = (void *)(uintptr_t)
+   ((uint64_t)(uintptr_t)mem_regions_addr + size);
+   uint64_t base_address = 0, mapped_address, mapped_size;
+
+   for (idx = 0; idx < nregions; idx++) {
+   regions[idx].guest_phys_address =
+   mem_regions[idx].guest_phys_addr;
+   regions[idx].guest_phys_address_end =
+   regions[idx].guest_phys_address +
+   mem_regions[idx].memory_size;
+   regions[idx].memory_size =
+   mem_regions[idx].memory_size;
+   regions[idx].userspace_address =
+   mem_regions[idx].use

[dpdk-dev] [PATCH RFC v2 08/12] lib/librte_vhost: vhost-user support

2014-12-11 Thread Huawei Xie
vhost-user support


Signed-off-by: Huawei Xie 
---
 lib/librte_vhost/Makefile |   5 +-
 lib/librte_vhost/vhost-net.h  |   4 +
 lib/librte_vhost/vhost_cuse/virtio-net-cdev.c |   9 +
 lib/librte_vhost/vhost_user/vhost-net-user.c  | 422 ++
 lib/librte_vhost/vhost_user/vhost-net-user.h  | 108 +++
 lib/librte_vhost/vhost_user/virtio-net-user.c | 199 
 lib/librte_vhost/vhost_user/virtio-net-user.h |  48 +++
 lib/librte_vhost/virtio-net.c |  16 +-
 lib/librte_vhost/virtio-net.h |  43 +++
 9 files changed, 842 insertions(+), 12 deletions(-)
 create mode 100644 lib/librte_vhost/vhost_user/vhost-net-user.c
 create mode 100644 lib/librte_vhost/vhost_user/vhost-net-user.h
 create mode 100644 lib/librte_vhost/vhost_user/virtio-net-user.c
 create mode 100644 lib/librte_vhost/vhost_user/virtio-net-user.h
 create mode 100644 lib/librte_vhost/virtio-net.h

diff --git a/lib/librte_vhost/Makefile b/lib/librte_vhost/Makefile
index e0d0ef6..b2f14a0 100644
--- a/lib/librte_vhost/Makefile
+++ b/lib/librte_vhost/Makefile
@@ -34,10 +34,11 @@ include $(RTE_SDK)/mk/rte.vars.mk
 # library name
 LIB = librte_vhost.a

-CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -I vhost_cuse -O3 -D_FILE_OFFSET_BITS=64 
-lfuse
+CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -I vhost_cuse -I vhost_user -O3 
-D_FILE_OFFSET_BITS=64 -lfuse
 LDFLAGS += -lfuse
 # all source are stored in SRCS-y
-SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := vhost_cuse/vhost-net-cdev.c 
vhost_cuse/virtio-net-cdev.c virtio-net.c vhost_rxtx.c
+#SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := vhost_cuse/vhost-net-cdev.c 
vhost_cuse/virtio-net-cdev.c virtio-net.c vhost_rxtx.c
+SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := vhost_user/vhost-net-user.c 
vhost_user/virtio-net-user.c vhost_user/fd_man.c virtio-net.c vhost_rxtx.c

 # install includes
 SYMLINK-$(CONFIG_RTE_LIBRTE_VHOST)-include += rte_virtio_net.h
diff --git a/lib/librte_vhost/vhost-net.h b/lib/librte_vhost/vhost-net.h
index f7e96fd..f9ec40b 100644
--- a/lib/librte_vhost/vhost-net.h
+++ b/lib/librte_vhost/vhost-net.h
@@ -41,8 +41,12 @@

 #include 

+#include "rte_virtio_net.h"
+
 #define VHOST_MEMORY_MAX_NREGIONS 8

+extern struct vhost_net_device_ops const *ops;
+
 /* Macros for printing using RTE_LOG */
 #define RTE_LOGTYPE_VHOST_CONFIG RTE_LOGTYPE_USER1
 #define RTE_LOGTYPE_VHOST_DATA   RTE_LOGTYPE_USER1
diff --git a/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c 
b/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c
index edcbc10..8ac3360 100644
--- a/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c
+++ b/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c
@@ -268,6 +268,7 @@ cuse_set_mem_table(struct vhost_device_ctx ctx,
struct vhost_memory_region *mem_regions = (void *)(uintptr_t)
((uint64_t)(uintptr_t)mem_regions_addr + size);
uint64_t base_address = 0, mapped_address, mapped_size;
+   struct virtio_dev *dev;

for (idx = 0; idx < nregions; idx++) {
regions[idx].guest_phys_address =
@@ -335,6 +336,14 @@ cuse_set_mem_table(struct vhost_device_ctx ctx,
regions[idx].guest_phys_address;
}

+   dev = get_device(ctx);
+   if (dev && dev->mem && dev->mmaped_address) {
+   munmap((void *)(uintptr_t)dev->mmaped_address,
+   (size_t)dev->mmaped_size);
+   free(dev->mem);
+   dev->mem = NULL;
+   }
+
ops->set_mem_table(ctx, [0], valid_regions);
return 0;
 }
diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.c 
b/lib/librte_vhost/vhost_user/vhost-net-user.c
new file mode 100644
index 000..841d7e6
--- /dev/null
+++ b/lib/librte_vhost/vhost_user/vhost-net-user.c
@@ -0,0 +1,422 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the
+ *   distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ *   contributors may be used to endorse or promote products derived
+ *   from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL

[dpdk-dev] [PATCH RFC v2 12/12] lib/librte_vhost: cleanup when vhost user socket connection is closed

2014-12-11 Thread Huawei Xie
close the memory region file descriptor
close the kick/callfd

vSwitch needs to run endlessly. resource leak is deadly issue.


Signed-off-by: Huawei Xie 
---
 lib/librte_vhost/vhost_user/vhost-net-user.c  |  7 ++--
 lib/librte_vhost/vhost_user/virtio-net-user.c | 59 +++
 lib/librte_vhost/vhost_user/virtio-net-user.h |  1 +
 3 files changed, 47 insertions(+), 20 deletions(-)

diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.c 
b/lib/librte_vhost/vhost_user/vhost-net-user.c
index 6b9ebd7..35215b4 100644
--- a/lib/librte_vhost/vhost_user/vhost-net-user.c
+++ b/lib/librte_vhost/vhost_user/vhost-net-user.c
@@ -283,9 +283,9 @@ vserver_message_handler(int connfd, uint64_t dat)
RTE_LOG(ERR, VHOST_CONFIG,
"vhost read message failed\n");

-   /*TODO: cleanup */
close(connfd);
fdset_del(_vhost_server->fdset, connfd);
+   user_destroy_device(ctx);
ops->destroy_device(ctx);

return;
@@ -293,9 +293,9 @@ vserver_message_handler(int connfd, uint64_t dat)
RTE_LOG(INFO, VHOST_CONFIG,
"vhost peer closed\n");

-   /*TODO: cleanup */
close(connfd);
fdset_del(_vhost_server->fdset, connfd);
+   user_destroy_device(ctx);
ops->destroy_device(ctx);

return;
@@ -304,9 +304,10 @@ vserver_message_handler(int connfd, uint64_t dat)
RTE_LOG(ERR, VHOST_CONFIG,
"vhost read incorrect message\n");

-   /*TODO: cleanup */
close(connfd);
fdset_del(_vhost_server->fdset, connfd);
+   user_destroy_device(ctx);
+   ops->destroy_device(ctx);

return;
}
diff --git a/lib/librte_vhost/vhost_user/virtio-net-user.c 
b/lib/librte_vhost/vhost_user/virtio-net-user.c
index 4e49e9b..75f9f54 100644
--- a/lib/librte_vhost/vhost_user/virtio-net-user.c
+++ b/lib/librte_vhost/vhost_user/virtio-net-user.c
@@ -65,6 +65,31 @@ get_blk_size(int fd)
return (uint64_t)stat.st_blksize;
 }

+static void
+free_mem_region(struct virtio_net *dev)
+{
+   struct orig_region_map *region;
+   unsigned int idx;
+   int ret;
+   uint64_t alignment;
+
+   if (!dev || !dev->mem)
+   return;
+
+   region = orig_region(dev->mem, dev->mem->nregions);
+   for (idx = 0; idx < dev->mem->nregions; idx++) {
+   if (region[idx].mapped_address) {
+   alignment = region[idx].blksz;
+   printf("Freeing %p\n",
+   (void *)(uintptr_t)region[idx].mapped_address);
+   ret = munmap((void 
*)RTE_ALIGN_FLOOR(region[idx].mapped_address, alignment), 
RTE_ALIGN_CEIL(region[idx].mapped_size, alignment));
+   printf("munmap ret= %d\n", ret);
+   printf("close file %d\n", region[idx].fd);
+   close(region[idx].fd);
+   }
+   }
+}
+
 int
 user_set_mem_table(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
 {
@@ -73,28 +98,15 @@ user_set_mem_table(struct vhost_device_ctx ctx, struct 
VhostUserMsg *pmsg)
uint64_t mapped_address, mapped_size, base_address = 0;
struct virtio_net *dev;
unsigned int idx = 0;
+   struct orig_region_map *region;
struct orig_region_map tmp[VHOST_MEMORY_MAX_NREGIONS] =
{ [0 ... VHOST_MEMORY_MAX_NREGIONS - 1] = { 0 } };
-   struct orig_region_map *region;
uint64_t alignment;
-   int ret;

/* unmap old memory regions one by one*/
dev = get_device(ctx);
-   if (dev->mem) {
-   region = orig_region(dev->mem, dev->mem->nregions);
-   for (idx = 0; idx < dev->mem->nregions; idx++) {
-   if (region[idx].mapped_address) {
-   alignment = region[idx].blksz;
-   printf("Freeing %p\n",
-   (void 
*)(uintptr_t)region[idx].mapped_address);
-   ret = munmap((void 
*)RTE_ALIGN_FLOOR(region[idx].mapped_address, alignment),
-   RTE_ALIGN_CEIL(region[idx].mapped_size, 
alignment));
-   printf("munmap ret= %d\n", ret);
-   printf("close file %d\n", region[idx].fd);
-   close(region[idx].fd);
-   }
-   }
+   if (dev && dev->mem) {
+   free_mem_region(dev);
free(dev->mem);
dev->mem = NULL;
}
@@ -248,7 +260,6 @@ user_set_vring_kick(struct vhost

[dpdk-dev] [PATCH 1/2] lib/librte_vhost: vhost library support to facilitate integration with vswitch.

2014-07-18 Thread Huawei Xie
Signed-off-by: Huawei Xie 
Acked-by: Konstantin Ananyev 
Acked-by: Thomos Long 
---
 config/common_linuxapp   |6 +
 lib/Makefile |1 +
 lib/librte_vhost/Makefile|   48 ++
 lib/librte_vhost/eventfd_link/Makefile   |   39 +
 lib/librte_vhost/eventfd_link/eventfd_link.c |  205 ++
 lib/librte_vhost/eventfd_link/eventfd_link.h |   79 ++
 lib/librte_vhost/rte_virtio_net.h|  192 +
 lib/librte_vhost/vhost-net-cdev.c|  363 ++
 lib/librte_vhost/vhost-net-cdev.h|  112 +++
 lib/librte_vhost/vhost_rxtx.c|  292 
 lib/librte_vhost/virtio-net.c| 1002 ++
 11 files changed, 2339 insertions(+)
 create mode 100644 lib/librte_vhost/Makefile
 create mode 100644 lib/librte_vhost/eventfd_link/Makefile
 create mode 100644 lib/librte_vhost/eventfd_link/eventfd_link.c
 create mode 100644 lib/librte_vhost/eventfd_link/eventfd_link.h
 create mode 100644 lib/librte_vhost/rte_virtio_net.h
 create mode 100644 lib/librte_vhost/vhost-net-cdev.c
 create mode 100644 lib/librte_vhost/vhost-net-cdev.h
 create mode 100644 lib/librte_vhost/vhost_rxtx.c
 create mode 100644 lib/librte_vhost/virtio-net.c

diff --git a/config/common_linuxapp b/config/common_linuxapp
index 7bf5d80..002ed84 100644
--- a/config/common_linuxapp
+++ b/config/common_linuxapp
@@ -390,6 +390,12 @@ CONFIG_RTE_KNI_VHOST_DEBUG_RX=n
 CONFIG_RTE_KNI_VHOST_DEBUG_TX=n

 #
+# Compile vhost library
+#
+CONFIG_RTE_LIBRTE_VHOST=y
+CONFIG_RTE_LIBRTE_VHOST_DEBUG=n
+
+#
 #Compile Xen domain0 support
 #
 CONFIG_RTE_LIBRTE_XEN_DOM0=n
diff --git a/lib/Makefile b/lib/Makefile
index 10c5bb3..007c174 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -60,6 +60,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_METER) += librte_meter
 DIRS-$(CONFIG_RTE_LIBRTE_SCHED) += librte_sched
 DIRS-$(CONFIG_RTE_LIBRTE_KVARGS) += librte_kvargs
 DIRS-$(CONFIG_RTE_LIBRTE_DISTRIBUTOR) += librte_distributor
+DIRS-$(CONFIG_RTE_LIBRTE_VHOST) += librte_vhost
 DIRS-$(CONFIG_RTE_LIBRTE_PORT) += librte_port
 DIRS-$(CONFIG_RTE_LIBRTE_TABLE) += librte_table
 DIRS-$(CONFIG_RTE_LIBRTE_PIPELINE) += librte_pipeline
diff --git a/lib/librte_vhost/Makefile b/lib/librte_vhost/Makefile
new file mode 100644
index 000..f79778b
--- /dev/null
+++ b/lib/librte_vhost/Makefile
@@ -0,0 +1,48 @@
+#   BSD LICENSE
+# 
+#   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+#   All rights reserved.
+# 
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+# 
+# * Redistributions of source code must retain the above copyright
+#   notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+#   notice, this list of conditions and the following disclaimer in
+#   the documentation and/or other materials provided with the
+#   distribution.
+# * Neither the name of Intel Corporation nor the names of its
+#   contributors may be used to endorse or promote products derived
+#   from this software without specific prior written permission.
+# 
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# library name
+LIB = librte_vhost.a
+
+CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3 -D_FILE_OFFSET_BITS=64 -lfuse
+LDFLAGS += -lfuse
+# all source are stored in SRCS-y
+SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := vhost-net-cdev.c virtio-net.c vhost_rxtx.c
+
+# install includes
+SYMLINK-$(CONFIG_RTE_LIBRTE_VHOST)-include += rte_virtio_net.h
+
+# this lib needs eal
+DEPDIRS-$(CONFIG_RTE_LIBRTE_VHOST) += lib/librte_eal lib/librte_mbuf
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_vhost/eventfd_link/Makefile 
b/lib/librte_vhost/eventfd_link/Makefile
new file mode 100644
index 000..5fe7297
--- /dev/null
+++ b/lib/librte_vhost/eventfd_link/Makefile
@@ -0,0 +1,39 @@
+#   BSD LICENSE
+# 
+#   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+#   All rights reserved.
+# 
+#   Redistribution and use in source and binary forms, with or without
+#   mo

[dpdk-dev] [PATCH 0/2] user space vhost library

2014-07-18 Thread Huawei Xie
This user space vhost library is based off user space vhost example and aims to 
provide same API for different vhost implementations. This implementation 
includes user space vhost cuse driver, kernel module for eventfd proxy and 
vhost enqueue/dequeue functionalities.

hxie5 (2):
  vhost library support to facilitate integration with switch.
  Turn off vhost_lib by default as it needs fuse, fuse-devel to compile

 config/common_linuxapp   |7 +
 lib/Makefile |1 +
 lib/librte_vhost/Makefile|   48 ++
 lib/librte_vhost/eventfd_link/Makefile   |   39 +
 lib/librte_vhost/eventfd_link/eventfd_link.c |  205 ++
 lib/librte_vhost/eventfd_link/eventfd_link.h |   79 ++
 lib/librte_vhost/rte_virtio_net.h|  192 +
 lib/librte_vhost/vhost-net-cdev.c|  363 ++
 lib/librte_vhost/vhost-net-cdev.h|  112 +++
 lib/librte_vhost/vhost_rxtx.c|  292 
 lib/librte_vhost/virtio-net.c| 1002 ++
 11 files changed, 2340 insertions(+)
 create mode 100644 lib/librte_vhost/Makefile
 create mode 100644 lib/librte_vhost/eventfd_link/Makefile
 create mode 100644 lib/librte_vhost/eventfd_link/eventfd_link.c
 create mode 100644 lib/librte_vhost/eventfd_link/eventfd_link.h
 create mode 100644 lib/librte_vhost/rte_virtio_net.h
 create mode 100644 lib/librte_vhost/vhost-net-cdev.c
 create mode 100644 lib/librte_vhost/vhost-net-cdev.h
 create mode 100644 lib/librte_vhost/vhost_rxtx.c
 create mode 100644 lib/librte_vhost/virtio-net.c

-- 
1.8.1.4



[dpdk-dev] [PATCH 2/2] lib/Makefile: Turn off vhost_lib by default as it needs fuse, fuse-devel to compile

2014-07-18 Thread Huawei Xie
Signed-off-by: Huawei Xie 
---
 config/common_linuxapp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/config/common_linuxapp b/config/common_linuxapp
index 002ed84..5b58278 100644
--- a/config/common_linuxapp
+++ b/config/common_linuxapp
@@ -391,8 +391,9 @@ CONFIG_RTE_KNI_VHOST_DEBUG_TX=n

 #
 # Compile vhost library
+# fuse, fuse-devel, kernel-modules-extra packages are needed
 #
-CONFIG_RTE_LIBRTE_VHOST=y
+CONFIG_RTE_LIBRTE_VHOST=n
 CONFIG_RTE_LIBRTE_VHOST_DEBUG=n

 #
-- 
1.8.1.4



[dpdk-dev] [PATCH v2] user space vhost driver library

2014-07-18 Thread Huawei Xie
This user space vhost library is based off user space vhost example and aims to 
provide same API for different vhost implementations. This implementation 
includes user space vhost cuse driver, kernel module for eventfd proxy and 
vhost enqueue/dequeue functionalities.

Huawei Xie (1):
  vhost library support to facilitate integration with vswitch.

 config/common_linuxapp   |7 +
 lib/Makefile |1 +
 lib/librte_vhost/Makefile|   48 ++
 lib/librte_vhost/eventfd_link/Makefile   |   39 +
 lib/librte_vhost/eventfd_link/eventfd_link.c |  205 ++
 lib/librte_vhost/eventfd_link/eventfd_link.h |   79 ++
 lib/librte_vhost/rte_virtio_net.h|  192 +
 lib/librte_vhost/vhost-net-cdev.c|  363 ++
 lib/librte_vhost/vhost-net-cdev.h|  112 +++
 lib/librte_vhost/vhost_rxtx.c|  292 
 lib/librte_vhost/virtio-net.c| 1002 ++
 11 files changed, 2340 insertions(+)
 create mode 100644 lib/librte_vhost/Makefile
 create mode 100644 lib/librte_vhost/eventfd_link/Makefile
 create mode 100644 lib/librte_vhost/eventfd_link/eventfd_link.c
 create mode 100644 lib/librte_vhost/eventfd_link/eventfd_link.h
 create mode 100644 lib/librte_vhost/rte_virtio_net.h
 create mode 100644 lib/librte_vhost/vhost-net-cdev.c
 create mode 100644 lib/librte_vhost/vhost-net-cdev.h
 create mode 100644 lib/librte_vhost/vhost_rxtx.c
 create mode 100644 lib/librte_vhost/virtio-net.c

-- 
1.8.1.4



[dpdk-dev] [PATCH v2] lib/librte_vhost: vhost library support to facilitate integration with vswitch.

2014-07-18 Thread Huawei Xie
Signed-off-by: Huawei Xie 
---
 config/common_linuxapp   |7 +
 lib/Makefile |1 +
 lib/librte_vhost/Makefile|   48 ++
 lib/librte_vhost/eventfd_link/Makefile   |   39 +
 lib/librte_vhost/eventfd_link/eventfd_link.c |  205 ++
 lib/librte_vhost/eventfd_link/eventfd_link.h |   79 ++
 lib/librte_vhost/rte_virtio_net.h|  192 +
 lib/librte_vhost/vhost-net-cdev.c|  363 ++
 lib/librte_vhost/vhost-net-cdev.h|  112 +++
 lib/librte_vhost/vhost_rxtx.c|  292 
 lib/librte_vhost/virtio-net.c| 1002 ++
 11 files changed, 2340 insertions(+)
 create mode 100644 lib/librte_vhost/Makefile
 create mode 100644 lib/librte_vhost/eventfd_link/Makefile
 create mode 100644 lib/librte_vhost/eventfd_link/eventfd_link.c
 create mode 100644 lib/librte_vhost/eventfd_link/eventfd_link.h
 create mode 100644 lib/librte_vhost/rte_virtio_net.h
 create mode 100644 lib/librte_vhost/vhost-net-cdev.c
 create mode 100644 lib/librte_vhost/vhost-net-cdev.h
 create mode 100644 lib/librte_vhost/vhost_rxtx.c
 create mode 100644 lib/librte_vhost/virtio-net.c

diff --git a/config/common_linuxapp b/config/common_linuxapp
index 7bf5d80..5b58278 100644
--- a/config/common_linuxapp
+++ b/config/common_linuxapp
@@ -390,6 +390,13 @@ CONFIG_RTE_KNI_VHOST_DEBUG_RX=n
 CONFIG_RTE_KNI_VHOST_DEBUG_TX=n

 #
+# Compile vhost library
+# fuse, fuse-devel, kernel-modules-extra packages are needed
+#
+CONFIG_RTE_LIBRTE_VHOST=n
+CONFIG_RTE_LIBRTE_VHOST_DEBUG=n
+
+#
 #Compile Xen domain0 support
 #
 CONFIG_RTE_LIBRTE_XEN_DOM0=n
diff --git a/lib/Makefile b/lib/Makefile
index 10c5bb3..007c174 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -60,6 +60,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_METER) += librte_meter
 DIRS-$(CONFIG_RTE_LIBRTE_SCHED) += librte_sched
 DIRS-$(CONFIG_RTE_LIBRTE_KVARGS) += librte_kvargs
 DIRS-$(CONFIG_RTE_LIBRTE_DISTRIBUTOR) += librte_distributor
+DIRS-$(CONFIG_RTE_LIBRTE_VHOST) += librte_vhost
 DIRS-$(CONFIG_RTE_LIBRTE_PORT) += librte_port
 DIRS-$(CONFIG_RTE_LIBRTE_TABLE) += librte_table
 DIRS-$(CONFIG_RTE_LIBRTE_PIPELINE) += librte_pipeline
diff --git a/lib/librte_vhost/Makefile b/lib/librte_vhost/Makefile
new file mode 100644
index 000..f79778b
--- /dev/null
+++ b/lib/librte_vhost/Makefile
@@ -0,0 +1,48 @@
+#   BSD LICENSE
+# 
+#   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+#   All rights reserved.
+# 
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+# 
+# * Redistributions of source code must retain the above copyright
+#   notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+#   notice, this list of conditions and the following disclaimer in
+#   the documentation and/or other materials provided with the
+#   distribution.
+# * Neither the name of Intel Corporation nor the names of its
+#   contributors may be used to endorse or promote products derived
+#   from this software without specific prior written permission.
+# 
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# library name
+LIB = librte_vhost.a
+
+CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3 -D_FILE_OFFSET_BITS=64 -lfuse
+LDFLAGS += -lfuse
+# all source are stored in SRCS-y
+SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := vhost-net-cdev.c virtio-net.c vhost_rxtx.c
+
+# install includes
+SYMLINK-$(CONFIG_RTE_LIBRTE_VHOST)-include += rte_virtio_net.h
+
+# this lib needs eal
+DEPDIRS-$(CONFIG_RTE_LIBRTE_VHOST) += lib/librte_eal lib/librte_mbuf
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_vhost/eventfd_link/Makefile 
b/lib/librte_vhost/eventfd_link/Makefile
new file mode 100644
index 000..5fe7297
--- /dev/null
+++ b/lib/librte_vhost/eventfd_link/Makefile
@@ -0,0 +1,39 @@
+#   BSD LICENSE
+# 
+#   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+#   All rights reserved.
+# 
+#   Redistribution and use in source and binary forms, with 

[dpdk-dev] [PATCH 0/3] Transform the vhost example to two parts: vhost library and example.

2014-09-02 Thread Huawei Xie
This transform is to facilitate integration with DPDK accelerated ethernet 
switch.

Huawei Xie (3):
  remove vhost example
  vhost library support to facilitate integration with DPDK accelerated
vswitch
  This vhost example is based on vhost library. Besides,  -
This patch fixes hundreds of 80 character limitation issues.
 - Use structure assignment rather than memcpy in get_eth_conf
according to Stephen's comments.

 config/common_linuxapp   |7 +
 examples/vhost/Makefile  |   10 +-
 examples/vhost/eventfd_link/Makefile |   39 -
 examples/vhost/eventfd_link/eventfd_link.c   |  205 ---
 examples/vhost/eventfd_link/eventfd_link.h   |   79 -
 examples/vhost/libvirt/qemu-wrap.py  |5 +-
 examples/vhost/main.c| 2205 ++
 examples/vhost/main.h|   85 +-
 examples/vhost/vhost-net-cdev.c  |  367 -
 examples/vhost/vhost-net-cdev.h  |   83 -
 examples/vhost/virtio-net.c  | 1165 --
 examples/vhost/virtio-net.h  |  161 --
 lib/Makefile |1 +
 lib/librte_vhost/Makefile|   48 +
 lib/librte_vhost/eventfd_link/Makefile   |   39 +
 lib/librte_vhost/eventfd_link/eventfd_link.c |  196 +++
 lib/librte_vhost/eventfd_link/eventfd_link.h |   40 +
 lib/librte_vhost/rte_virtio_net.h|  222 +++
 lib/librte_vhost/vhost-net-cdev.c|  394 +
 lib/librte_vhost/vhost-net-cdev.h|  119 ++
 lib/librte_vhost/vhost_rxtx.c|  316 
 lib/librte_vhost/virtio-net.c| 1113 +
 mk/rte.app.mk|5 +
 23 files changed, 3371 insertions(+), 3533 deletions(-)
 delete mode 100644 examples/vhost/eventfd_link/Makefile
 delete mode 100644 examples/vhost/eventfd_link/eventfd_link.c
 delete mode 100644 examples/vhost/eventfd_link/eventfd_link.h
 mode change 100755 => 100644 examples/vhost/libvirt/qemu-wrap.py
 delete mode 100644 examples/vhost/vhost-net-cdev.c
 delete mode 100644 examples/vhost/vhost-net-cdev.h
 delete mode 100644 examples/vhost/virtio-net.c
 delete mode 100644 examples/vhost/virtio-net.h
 create mode 100644 lib/librte_vhost/Makefile
 create mode 100644 lib/librte_vhost/eventfd_link/Makefile
 create mode 100644 lib/librte_vhost/eventfd_link/eventfd_link.c
 create mode 100644 lib/librte_vhost/eventfd_link/eventfd_link.h
 create mode 100644 lib/librte_vhost/rte_virtio_net.h
 create mode 100644 lib/librte_vhost/vhost-net-cdev.c
 create mode 100644 lib/librte_vhost/vhost-net-cdev.h
 create mode 100644 lib/librte_vhost/vhost_rxtx.c
 create mode 100644 lib/librte_vhost/virtio-net.c

-- 
1.8.1.4



[dpdk-dev] [PATCH 1/3] examples/vhost: remove vhost example

2014-09-02 Thread Huawei Xie
Signed-off-by: Huawei Xie 
Acked-by: Konstantin Ananyev 
Acked-by: Thomos Long 
---
 examples/vhost/Makefile|   60 -
 examples/vhost/eventfd_link/Makefile   |   39 -
 examples/vhost/eventfd_link/eventfd_link.c |  205 --
 examples/vhost/eventfd_link/eventfd_link.h |   79 -
 examples/vhost/libvirt/qemu-wrap.py|  367 ---
 examples/vhost/main.c  | 3722 
 examples/vhost/main.h  |   86 -
 examples/vhost/vhost-net-cdev.c|  367 ---
 examples/vhost/vhost-net-cdev.h|   83 -
 examples/vhost/virtio-net.c| 1165 -
 examples/vhost/virtio-net.h|  161 --
 11 files changed, 6334 deletions(-)
 delete mode 100644 examples/vhost/Makefile
 delete mode 100644 examples/vhost/eventfd_link/Makefile
 delete mode 100644 examples/vhost/eventfd_link/eventfd_link.c
 delete mode 100644 examples/vhost/eventfd_link/eventfd_link.h
 delete mode 100755 examples/vhost/libvirt/qemu-wrap.py
 delete mode 100644 examples/vhost/main.c
 delete mode 100644 examples/vhost/main.h
 delete mode 100644 examples/vhost/vhost-net-cdev.c
 delete mode 100644 examples/vhost/vhost-net-cdev.h
 delete mode 100644 examples/vhost/virtio-net.c
 delete mode 100644 examples/vhost/virtio-net.h

diff --git a/examples/vhost/Makefile b/examples/vhost/Makefile
deleted file mode 100644
index f45f83f..000
--- a/examples/vhost/Makefile
+++ /dev/null
@@ -1,60 +0,0 @@
-#   BSD LICENSE
-#
-#   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
-#   All rights reserved.
-#
-#   Redistribution and use in source and binary forms, with or without
-#   modification, are permitted provided that the following conditions
-#   are met:
-#
-# * Redistributions of source code must retain the above copyright
-#   notice, this list of conditions and the following disclaimer.
-# * Redistributions in binary form must reproduce the above copyright
-#   notice, this list of conditions and the following disclaimer in
-#   the documentation and/or other materials provided with the
-#   distribution.
-# * Neither the name of Intel Corporation nor the names of its
-#   contributors may be used to endorse or promote products derived
-#   from this software without specific prior written permission.
-#
-#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-ifeq ($(RTE_SDK),)
-$(error "Please define RTE_SDK environment variable")
-endif
-
-# Default target, can be overriden by command line or environment
-RTE_TARGET ?= x86_64-native-linuxapp-gcc
-
-include $(RTE_SDK)/mk/rte.vars.mk
-
-ifneq ($(CONFIG_RTE_EXEC_ENV),"linuxapp")
-$(info This application can only operate in a linuxapp environment, \
-please change the definition of the RTE_TARGET environment variable)
-all:
-else
-
-# binary name
-APP = vhost-switch
-
-# all source are stored in SRCS-y
-#SRCS-y := cusedrv.c loopback-userspace.c
-SRCS-y := main.c virtio-net.c vhost-net-cdev.c
-
-CFLAGS += -O2 -I/usr/local/include -D_FILE_OFFSET_BITS=64 -Wno-unused-parameter
-CFLAGS += $(WERROR_FLAGS)
-LDFLAGS += -lfuse
-
-include $(RTE_SDK)/mk/rte.extapp.mk
-
-endif
diff --git a/examples/vhost/eventfd_link/Makefile 
b/examples/vhost/eventfd_link/Makefile
deleted file mode 100644
index fc3927b..000
--- a/examples/vhost/eventfd_link/Makefile
+++ /dev/null
@@ -1,39 +0,0 @@
-#   BSD LICENSE
-#
-#   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
-#   All rights reserved.
-#
-#   Redistribution and use in source and binary forms, with or without
-#   modification, are permitted provided that the following conditions
-#   are met:
-#
-# * Redistributions of source code must retain the above copyright
-#   notice, this list of conditions and the following disclaimer.
-# * Redistributions in binary form must reproduce the above copyright
-#   notice, this list of conditions and the following disclaimer in
-#   the documentation and/or other materials provided with the
-#   distribution.
-# * Neither the name of Intel Corporation nor the names of its
-#   contributors may be used to endorse or promote products derived
-#   

[dpdk-dev] [PATCH 2/3] lib/librte_vhost: vhost library support to facilitate integration with DPDK accelerated vswitch

2014-09-02 Thread Huawei Xie
This library is turned off by default so that it doesn't break build on default 
system.
Install fuse development library and turn it on.

Signed-off-by: Huawei Xie 
Acked-by: Konstantin Ananyev 
Acked-by: Thomos Long 
---
 config/common_linuxapp   |7 +
 lib/Makefile |1 +
 lib/librte_vhost/Makefile|   48 ++
 lib/librte_vhost/eventfd_link/Makefile   |   39 +
 lib/librte_vhost/eventfd_link/eventfd_link.c |  196 +
 lib/librte_vhost/eventfd_link/eventfd_link.h |   40 +
 lib/librte_vhost/rte_virtio_net.h|  222 +
 lib/librte_vhost/vhost-net-cdev.c|  394 +
 lib/librte_vhost/vhost-net-cdev.h|  119 +++
 lib/librte_vhost/vhost_rxtx.c|  316 
 lib/librte_vhost/virtio-net.c| 1113 ++
 mk/rte.app.mk|5 +
 12 files changed, 2500 insertions(+)
 create mode 100644 lib/librte_vhost/Makefile
 create mode 100644 lib/librte_vhost/eventfd_link/Makefile
 create mode 100644 lib/librte_vhost/eventfd_link/eventfd_link.c
 create mode 100644 lib/librte_vhost/eventfd_link/eventfd_link.h
 create mode 100644 lib/librte_vhost/rte_virtio_net.h
 create mode 100644 lib/librte_vhost/vhost-net-cdev.c
 create mode 100644 lib/librte_vhost/vhost-net-cdev.h
 create mode 100644 lib/librte_vhost/vhost_rxtx.c
 create mode 100644 lib/librte_vhost/virtio-net.c

diff --git a/config/common_linuxapp b/config/common_linuxapp
index 9047975..c7c1c83 100644
--- a/config/common_linuxapp
+++ b/config/common_linuxapp
@@ -390,6 +390,13 @@ CONFIG_RTE_KNI_VHOST_DEBUG_RX=n
 CONFIG_RTE_KNI_VHOST_DEBUG_TX=n

 #
+# Compile vhost library
+# fuse, fuse-devel, kernel-modules-extra packages are needed
+#
+CONFIG_RTE_LIBRTE_VHOST=n
+CONFIG_RTE_LIBRTE_VHOST_DEBUG=n
+
+#
 #Compile Xen domain0 support
 #
 CONFIG_RTE_LIBRTE_XEN_DOM0=n
diff --git a/lib/Makefile b/lib/Makefile
index 10c5bb3..007c174 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -60,6 +60,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_METER) += librte_meter
 DIRS-$(CONFIG_RTE_LIBRTE_SCHED) += librte_sched
 DIRS-$(CONFIG_RTE_LIBRTE_KVARGS) += librte_kvargs
 DIRS-$(CONFIG_RTE_LIBRTE_DISTRIBUTOR) += librte_distributor
+DIRS-$(CONFIG_RTE_LIBRTE_VHOST) += librte_vhost
 DIRS-$(CONFIG_RTE_LIBRTE_PORT) += librte_port
 DIRS-$(CONFIG_RTE_LIBRTE_TABLE) += librte_table
 DIRS-$(CONFIG_RTE_LIBRTE_PIPELINE) += librte_pipeline
diff --git a/lib/librte_vhost/Makefile b/lib/librte_vhost/Makefile
new file mode 100644
index 000..6ad706d
--- /dev/null
+++ b/lib/librte_vhost/Makefile
@@ -0,0 +1,48 @@
+#   BSD LICENSE
+#
+#   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+#   All rights reserved.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+# * Redistributions of source code must retain the above copyright
+#   notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+#   notice, this list of conditions and the following disclaimer in
+#   the documentation and/or other materials provided with the
+#   distribution.
+# * Neither the name of Intel Corporation nor the names of its
+#   contributors may be used to endorse or promote products derived
+#   from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# library name
+LIB = librte_vhost.a
+
+CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3 -D_FILE_OFFSET_BITS=64 -lfuse
+LDFLAGS += -lfuse
+# all source are stored in SRCS-y
+SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := vhost-net-cdev.c virtio-net.c vhost_rxtx.c
+
+# install includes
+SYMLINK-$(CONFIG_RTE_LIBRTE_VHOST)-include += rte_virtio_net.h
+
+# this lib needs eal
+DEPDIRS-$(CONFIG_RTE_LIBRTE_VHOST) += lib/librte_eal lib/librte_mbuf
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_vhost/eventfd_link/Makefile 
b/lib/librte_vhost/eventfd_link/Makefile
new file mode 100644
index 000..fc3927b
--- /dev/null
+++ b/lib/li

[dpdk-dev] [PATCH 3/3] examples/vhost: vhost example based on vhost library.

2014-09-02 Thread Huawei Xie
Signed-off-by: Huawei Xie 
Acked-by: Konstantin Ananyev 
---
 examples/vhost/Makefile |   52 +
 examples/vhost/libvirt/qemu-wrap.py |  366 
 examples/vhost/main.c   | 3145 +++
 examples/vhost/main.h   |  109 ++
 4 files changed, 3672 insertions(+)
 create mode 100644 examples/vhost/Makefile
 create mode 100644 examples/vhost/libvirt/qemu-wrap.py
 create mode 100644 examples/vhost/main.c
 create mode 100644 examples/vhost/main.h

diff --git a/examples/vhost/Makefile b/examples/vhost/Makefile
new file mode 100644
index 000..a4d4fb0
--- /dev/null
+++ b/examples/vhost/Makefile
@@ -0,0 +1,52 @@
+#   BSD LICENSE
+#
+#   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+#   All rights reserved.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+# * Redistributions of source code must retain the above copyright
+#   notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+#   notice, this list of conditions and the following disclaimer in
+#   the documentation and/or other materials provided with the
+#   distribution.
+# * Neither the name of Intel Corporation nor the names of its
+#   contributors may be used to endorse or promote products derived
+#   from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ifeq ($(RTE_SDK),)
+$(error "Please define RTE_SDK environment variable")
+endif
+
+# Default target, can be overriden by command line or environment
+RTE_TARGET ?= x86_64-native-linuxapp-gcc
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# binary name
+APP = vhost-switch
+
+# all source are stored in SRCS-y
+#SRCS-y := cusedrv.c loopback-userspace.c
+SRCS-y := main.c
+
+CFLAGS += -O2 -I/usr/local/include -D_FILE_OFFSET_BITS=64 -Wno-unused-parameter
+CFLAGS += $(WERROR_FLAGS)
+LDFLAGS += -lfuse
+
+include $(RTE_SDK)/mk/rte.extapp.mk
diff --git a/examples/vhost/libvirt/qemu-wrap.py 
b/examples/vhost/libvirt/qemu-wrap.py
new file mode 100644
index 000..8d820be
--- /dev/null
+++ b/examples/vhost/libvirt/qemu-wrap.py
@@ -0,0 +1,366 @@
+#!/usr/bin/python
+#/*
+# *   BSD LICENSE
+# *
+# *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+# *   All rights reserved.
+# *
+# *   Redistribution and use in source and binary forms, with or without
+# *   modification, are permitted provided that the following conditions
+# *   are met:
+# *
+# * * Redistributions of source code must retain the above copyright
+# *   notice, this list of conditions and the following disclaimer.
+# * * Redistributions in binary form must reproduce the above copyright
+# *   notice, this list of conditions and the following disclaimer in
+# *   the documentation and/or other materials provided with the
+# *   distribution.
+# * * Neither the name of Intel Corporation nor the names of its
+# *   contributors may be used to endorse or promote products derived
+# *   from this software without specific prior written permission.
+# *
+# *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# */
+
+#
+# This script 

[dpdk-dev] [PATCH v4 0/5] lib/librte_vhost: user space vhost cuse driver library

2014-09-12 Thread Huawei Xie
This set of patches transforms and refactors vhost example to a user
space vhost cuse library. This library implements a user space vhost
cuse driver, and provides generic APIs for user space ethernet vswitch
to integrate us-vhost for fast packet switching with guest virtio.

Change notes:

 v2) Turn off vhost lib by default

 v3) Fixed checkpatch issues

 v4) Split the patch per Thomas's requirement


Huawei Xie (5):
  mv vhost example to vhost lib directory
  copy the vhost rx/tx functions from main.c to new file vhost_rxtx.c 
  remove main.c main.h
  remove Makefile
  rename virtio-net.h to rte_virtio_net.h as API header file
  vmdq, mac learning and other switch related logics are removed
  zero copy feature isn't generic,and is removed.
  add vhost lib Makefile.
  Add TODOs for found new issues.
  Fix coding style issue which are treated as errors by checkpatch.pl
  add vhost lib support in makefile
  turn off vhost lib by default as it requires fuse development package.

 config/common_linuxapp   |7 +
 examples/vhost/Makefile  |   60 -
 examples/vhost/eventfd_link/Makefile |   39 -
 examples/vhost/eventfd_link/eventfd_link.c   |  205 --
 examples/vhost/eventfd_link/eventfd_link.h   |   79 -
 examples/vhost/libvirt/qemu-wrap.py  |  367 ---
 examples/vhost/main.c| 3722 --
 examples/vhost/main.h|   86 -
 examples/vhost/vhost-net-cdev.c  |  367 ---
 examples/vhost/vhost-net-cdev.h  |   83 -
 examples/vhost/virtio-net.c  | 1165 
 examples/vhost/virtio-net.h  |  161 --
 lib/Makefile |1 +
 lib/librte_vhost/Makefile|   48 +
 lib/librte_vhost/eventfd_link/Makefile   |   39 +
 lib/librte_vhost/eventfd_link/eventfd_link.c |  205 ++
 lib/librte_vhost/eventfd_link/eventfd_link.h |   79 +
 lib/librte_vhost/libvirt/qemu-wrap.py|  367 +++
 lib/librte_vhost/rte_virtio_net.h|  192 ++
 lib/librte_vhost/vhost-net-cdev.c|  362 +++
 lib/librte_vhost/vhost-net-cdev.h|  112 +
 lib/librte_vhost/vhost_rxtx.c|  301 +++
 lib/librte_vhost/virtio-net.c| 1000 +++
 mk/rte.app.mk|5 +
 24 files changed, 2718 insertions(+), 6334 deletions(-)
 delete mode 100644 examples/vhost/Makefile
 delete mode 100644 examples/vhost/eventfd_link/Makefile
 delete mode 100644 examples/vhost/eventfd_link/eventfd_link.c
 delete mode 100644 examples/vhost/eventfd_link/eventfd_link.h
 delete mode 100755 examples/vhost/libvirt/qemu-wrap.py
 delete mode 100644 examples/vhost/main.c
 delete mode 100644 examples/vhost/main.h
 delete mode 100644 examples/vhost/vhost-net-cdev.c
 delete mode 100644 examples/vhost/vhost-net-cdev.h
 delete mode 100644 examples/vhost/virtio-net.c
 delete mode 100644 examples/vhost/virtio-net.h
 create mode 100644 lib/librte_vhost/Makefile
 create mode 100644 lib/librte_vhost/eventfd_link/Makefile
 create mode 100644 lib/librte_vhost/eventfd_link/eventfd_link.c
 create mode 100644 lib/librte_vhost/eventfd_link/eventfd_link.h
 create mode 100755 lib/librte_vhost/libvirt/qemu-wrap.py
 create mode 100644 lib/librte_vhost/rte_virtio_net.h
 create mode 100644 lib/librte_vhost/vhost-net-cdev.c
 create mode 100644 lib/librte_vhost/vhost-net-cdev.h
 create mode 100644 lib/librte_vhost/vhost_rxtx.c
 create mode 100644 lib/librte_vhost/virtio-net.c

-- 
1.8.1.4



[dpdk-dev] [PATCH v4 3/5] lib/librte_vhost: vhost lib refactor

2014-09-12 Thread Huawei Xie
This vhost lib consists of five APIs plus several other helper routines
for feature disable/enable.
1) rte_vhost_driver_register to register vhost driver to the system.
2) rte_vhost_driver_callback_register to register the callback. Callbacks are
called when virtio device is ready for polling or is de-activated.
3) rte_vhost_driver_session_start, a blocking API to start vhost message handler
session.
4) rte_vhost_enqueue_burst and rte_vhost_dequeue_burst for enqueue/dequeue
to/from virtio ring.

Modifications include:
VMDQ, mac learning and other switch related logics are removed.
zero copy feature isn't generic currently, so it is removed.
retry logic is removed.
The above three logics will be implemented in example as reference.
vhost lib Makefile is added.
Add several TODOs:
1) allow application to disable cmpset reserve in rte_vhost_enqueue_burst
in case there is no contention.
2) fix memcpy from mbuf to vring desc when mbuf is chained and the desc couldn't
hold all the data
3) fix vhost_set_mem_table possible race condition: two vqs concurrently calls
set_mem_table which cause saved mem_temp to be overide.
merge-able feature is removed, will be merged after this patch is applied.

Signed-off-by: Huawei Xie 
Acked-by: Konstantin Ananyev 
Acked-by: Tommy Long 
---
 lib/librte_vhost/Makefile |  48 
 lib/librte_vhost/rte_virtio_net.h | 179 ---
 lib/librte_vhost/vhost-net-cdev.c |  35 +++---
 lib/librte_vhost/vhost-net-cdev.h |  45 +--
 lib/librte_vhost/vhost_rxtx.c | 157 +---
 lib/librte_vhost/virtio-net.c | 249 +++---
 6 files changed, 341 insertions(+), 372 deletions(-)
 create mode 100644 lib/librte_vhost/Makefile

diff --git a/lib/librte_vhost/Makefile b/lib/librte_vhost/Makefile
new file mode 100644
index 000..6ad706d
--- /dev/null
+++ b/lib/librte_vhost/Makefile
@@ -0,0 +1,48 @@
+#   BSD LICENSE
+#
+#   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+#   All rights reserved.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+# * Redistributions of source code must retain the above copyright
+#   notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+#   notice, this list of conditions and the following disclaimer in
+#   the documentation and/or other materials provided with the
+#   distribution.
+# * Neither the name of Intel Corporation nor the names of its
+#   contributors may be used to endorse or promote products derived
+#   from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# library name
+LIB = librte_vhost.a
+
+CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3 -D_FILE_OFFSET_BITS=64 -lfuse
+LDFLAGS += -lfuse
+# all source are stored in SRCS-y
+SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := vhost-net-cdev.c virtio-net.c vhost_rxtx.c
+
+# install includes
+SYMLINK-$(CONFIG_RTE_LIBRTE_VHOST)-include += rte_virtio_net.h
+
+# this lib needs eal
+DEPDIRS-$(CONFIG_RTE_LIBRTE_VHOST) += lib/librte_eal lib/librte_mbuf
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_vhost/rte_virtio_net.h 
b/lib/librte_vhost/rte_virtio_net.h
index 1a2f0dc..08dc6f4 100644
--- a/lib/librte_vhost/rte_virtio_net.h
+++ b/lib/librte_vhost/rte_virtio_net.h
@@ -34,28 +34,25 @@
 #ifndef _VIRTIO_NET_H_
 #define _VIRTIO_NET_H_

+#include 
+#include 
+#include 
+#include 
+
+#include 
+#include 
+#include 
+
 /* Used to indicate that the device is running on a data core */
 #define VIRTIO_DEV_RUNNING 1

 /* Backend value set by guest. */
 #define VIRTIO_DEV_STOPPED -1

-#define PAGE_SIZE   4096

 /* Enum for virtqueue management. */
 enum {VIRTIO_RXQ, VIRTIO_TXQ, VIRTIO_QNUM};

-#define BUF_VECTOR_MAX 256
-
-/*
- * Structure contains buffer address, length and descriptor index
- * from vring to do scatter RX.
-*/
-struct buf_vector {
-uint64_t buf_addr;
-uint32_t buf_len;
-uint32_t desc_idx;
-};

 /*
  * Structure contains variable

[dpdk-dev] [PATCH v4 4/5] coding style issue fix

2014-09-12 Thread Huawei Xie
This vhost lib is based on old vhost example, and there are still plenty of
coding style issues left. Will fix those issues once this patch is applied. 

Signed-off-by: Huawei Xie 
Acked-by: Konstantin Ananyev 
Acked-by: Tommy Long 
---
 lib/librte_vhost/rte_virtio_net.h |  52 
 lib/librte_vhost/vhost-net-cdev.c | 256 +++---
 lib/librte_vhost/vhost-net-cdev.h |  40 +++---
 lib/librte_vhost/vhost_rxtx.c |  15 ++-
 lib/librte_vhost/virtio-net.c |  88 +++--
 5 files changed, 220 insertions(+), 231 deletions(-)

diff --git a/lib/librte_vhost/rte_virtio_net.h 
b/lib/librte_vhost/rte_virtio_net.h
index 08dc6f4..82eb993 100644
--- a/lib/librte_vhost/rte_virtio_net.h
+++ b/lib/librte_vhost/rte_virtio_net.h
@@ -43,44 +43,38 @@
 #include 
 #include 

-/* Used to indicate that the device is running on a data core */
-#define VIRTIO_DEV_RUNNING 1
-
-/* Backend value set by guest. */
-#define VIRTIO_DEV_STOPPED -1
-
+#define VIRTIO_DEV_RUNNING 1  /**< Used to indicate that the device is running 
on a data core. */
+#define VIRTIO_DEV_STOPPED -1 /**< Backend value set by guest. */

 /* Enum for virtqueue management. */
 enum {VIRTIO_RXQ, VIRTIO_TXQ, VIRTIO_QNUM};

-
-/*
- * Structure contains variables relevant to TX/RX virtqueues.
+/**
+ * Structure contains variables relevant to RX/TX virtqueues.
  */
-struct vhost_virtqueue
-{
-   struct vring_desc   *desc;  /* Virtqueue 
descriptor ring. */
-   struct vring_avail  *avail; /* Virtqueue 
available ring. */
-   struct vring_used   *used;  /* Virtqueue 
used ring. */
-   uint32_tsize;   /* Size 
of descriptor ring. */
-   uint32_tbackend;/* 
Backend value to determine if device should started/stopped. */
-   uint16_tvhost_hlen; /* 
Vhost header length (varies depending on RX merge buffers. */
-   volatile uint16_t   last_used_idx;  /* Last index used on 
the available ring */
-   volatile uint16_t   last_used_idx_res;  /* Used for multiple 
devices reserving buffers. */
-   eventfd_t   callfd; /* 
Currently unused as polling mode is enabled. */
-   eventfd_t   kickfd; /* Used 
to notify the guest (trigger interrupt). */
+struct vhost_virtqueue {
+   struct vring_desc*desc; /**< descriptor ring. */
+   struct vring_avail   *avail;/**< available ring. */
+   struct vring_used*used; /**< used ring. */
+   uint32_t size;  /**< size of descriptor ring. */
+   uint32_t backend;   /**< backend value to determine 
if device should be started/stopped. */
+   uint16_t vhost_hlen;/**< vhost header length 
(varies depending on RX merge buffers. */
+   volatile uint16_tlast_used_idx; /**< last index used on the 
available ring. */
+   volatile uint16_tlast_used_idx_res; /**< used for multiple devices 
reserving buffers. */
+   eventfd_tcallfd;/**< currently unused as 
polling mode is enabled. */
+   eventfd_tkickfd;/**< used to notify the guest 
(trigger interrupt). */
 } __rte_cache_aligned;

-
-/*
- * Information relating to memory regions including offsets to addresses in 
QEMUs memory file.
+/**
+ * Information relating to memory regions including offsets to
+ * addresses in QEMU memory file.
  */
 struct virtio_memory_regions {
-   uint64_tguest_phys_address; /* Base guest physical 
address of region. */
-   uint64_tguest_phys_address_end; /* End guest physical address 
of region. */
-   uint64_tmemory_size;/* Size of region. */
-   uint64_tuserspace_address;  /* Base userspace 
address of region. */
-   uint64_taddress_offset; /* Offset of region for 
address translation. */
+   uint64_tguest_phys_address; /**< base guest physical address of 
region. */
+   uint64_tguest_phys_address_end; /**< end guest physical address of 
region. */
+   uint64_tmemory_size;/**< size of region. */
+   uint64_tuserspace_address;  /**< base userspace address of 
region. */
+   uint64_taddress_offset; /**< offset of region for address 
translation. */
 };


diff --git a/lib/librte_vhost/vhost-net-cdev.c 
b/lib/librte_vhost/vhost-net-cdev.c
index e73bf23..c3b580a 100644
--- a/lib/librte_vhost/vhost-net-cdev.c
+++ b/lib/librte_vhost/vhost-net-cdev.c
@@ -46,16 +46,16 @@

 #include "vhost-net-cdev.h"

-#define FUSE_OPT_DUMMY  

[dpdk-dev] [PATCH v4 5/5] lib/librte_vhost: add vhost lib support in makefile

2014-09-12 Thread Huawei Xie
The build of vhost lib requires fuse development package. It is turned off by
default so as not to break DPDK build.

Signed-off-by: Huawei Xie 
Acked-by: Konstantin Ananyev 
Acked-by: Tommy Long 
---
 config/common_linuxapp | 7 +++
 lib/Makefile   | 1 +
 mk/rte.app.mk  | 5 +
 3 files changed, 13 insertions(+)

diff --git a/config/common_linuxapp b/config/common_linuxapp
index 9047975..c7c1c83 100644
--- a/config/common_linuxapp
+++ b/config/common_linuxapp
@@ -390,6 +390,13 @@ CONFIG_RTE_KNI_VHOST_DEBUG_RX=n
 CONFIG_RTE_KNI_VHOST_DEBUG_TX=n

 #
+# Compile vhost library
+# fuse, fuse-devel, kernel-modules-extra packages are needed
+#
+CONFIG_RTE_LIBRTE_VHOST=n
+CONFIG_RTE_LIBRTE_VHOST_DEBUG=n
+
+#
 #Compile Xen domain0 support
 #
 CONFIG_RTE_LIBRTE_XEN_DOM0=n
diff --git a/lib/Makefile b/lib/Makefile
index 10c5bb3..007c174 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -60,6 +60,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_METER) += librte_meter
 DIRS-$(CONFIG_RTE_LIBRTE_SCHED) += librte_sched
 DIRS-$(CONFIG_RTE_LIBRTE_KVARGS) += librte_kvargs
 DIRS-$(CONFIG_RTE_LIBRTE_DISTRIBUTOR) += librte_distributor
+DIRS-$(CONFIG_RTE_LIBRTE_VHOST) += librte_vhost
 DIRS-$(CONFIG_RTE_LIBRTE_PORT) += librte_port
 DIRS-$(CONFIG_RTE_LIBRTE_TABLE) += librte_table
 DIRS-$(CONFIG_RTE_LIBRTE_PIPELINE) += librte_pipeline
diff --git a/mk/rte.app.mk b/mk/rte.app.mk
index 34dff2a..285b65c 100644
--- a/mk/rte.app.mk
+++ b/mk/rte.app.mk
@@ -190,6 +190,11 @@ ifeq ($(CONFIG_RTE_LIBRTE_VIRTIO_PMD),y)
 LDLIBS += -lrte_pmd_virtio_uio
 endif

+ifeq ($(CONFIG_RTE_LIBRTE_VHOST), y)
+LDLIBS += -lrte_vhost
+LDLIBS += -lfuse
+endif
+
 ifeq ($(CONFIG_RTE_LIBRTE_I40E_PMD),y)
 LDLIBS += -lrte_pmd_i40e
 endif
-- 
1.8.1.4



[dpdk-dev] [PATCH v4 2/5] lib/librte_vhost: re-factor vhost lib for subsequent transform

2014-09-12 Thread Huawei Xie
This patch does simple split of the original vhost example source files.
vhost rx/tx functions virtio_dev_rx/tx are copied from main.c to new file
vhost_rxtx.c.
main.c and main.h are removed. A new vhost example patchset will be submitted
later based on these two files.
Makefile for old example is removed.
virtio-net.h is renamed to rte_virtio_net.h as API header file.


Signed-off-by: Huawei Xie 
Acked-by: Konstantin Ananyev 
Acked-by: Tommy Long 
---
 lib/librte_vhost/Makefile |   60 -
 lib/librte_vhost/main.c   | 3722 -
 lib/librte_vhost/main.h   |   86 -
 lib/librte_vhost/rte_virtio_net.h |  161 ++
 lib/librte_vhost/vhost_rxtx.c |  281 +++
 lib/librte_vhost/virtio-net.h |  161 --
 6 files changed, 442 insertions(+), 4029 deletions(-)
 delete mode 100644 lib/librte_vhost/Makefile
 delete mode 100644 lib/librte_vhost/main.c
 delete mode 100644 lib/librte_vhost/main.h
 create mode 100644 lib/librte_vhost/rte_virtio_net.h
 create mode 100644 lib/librte_vhost/vhost_rxtx.c
 delete mode 100644 lib/librte_vhost/virtio-net.h

diff --git a/lib/librte_vhost/Makefile b/lib/librte_vhost/Makefile
deleted file mode 100644
index f45f83f..000
--- a/lib/librte_vhost/Makefile
+++ /dev/null
@@ -1,60 +0,0 @@
-#   BSD LICENSE
-#
-#   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
-#   All rights reserved.
-#
-#   Redistribution and use in source and binary forms, with or without
-#   modification, are permitted provided that the following conditions
-#   are met:
-#
-# * Redistributions of source code must retain the above copyright
-#   notice, this list of conditions and the following disclaimer.
-# * Redistributions in binary form must reproduce the above copyright
-#   notice, this list of conditions and the following disclaimer in
-#   the documentation and/or other materials provided with the
-#   distribution.
-# * Neither the name of Intel Corporation nor the names of its
-#   contributors may be used to endorse or promote products derived
-#   from this software without specific prior written permission.
-#
-#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-ifeq ($(RTE_SDK),)
-$(error "Please define RTE_SDK environment variable")
-endif
-
-# Default target, can be overriden by command line or environment
-RTE_TARGET ?= x86_64-native-linuxapp-gcc
-
-include $(RTE_SDK)/mk/rte.vars.mk
-
-ifneq ($(CONFIG_RTE_EXEC_ENV),"linuxapp")
-$(info This application can only operate in a linuxapp environment, \
-please change the definition of the RTE_TARGET environment variable)
-all:
-else
-
-# binary name
-APP = vhost-switch
-
-# all source are stored in SRCS-y
-#SRCS-y := cusedrv.c loopback-userspace.c
-SRCS-y := main.c virtio-net.c vhost-net-cdev.c
-
-CFLAGS += -O2 -I/usr/local/include -D_FILE_OFFSET_BITS=64 -Wno-unused-parameter
-CFLAGS += $(WERROR_FLAGS)
-LDFLAGS += -lfuse
-
-include $(RTE_SDK)/mk/rte.extapp.mk
-
-endif
diff --git a/lib/librte_vhost/main.c b/lib/librte_vhost/main.c
deleted file mode 100644
index 7d9e6a2..000
--- a/lib/librte_vhost/main.c
+++ /dev/null
@@ -1,3722 +0,0 @@
-/*-
- *   BSD LICENSE
- *
- *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- *   All rights reserved.
- *
- *   Redistribution and use in source and binary forms, with or without
- *   modification, are permitted provided that the following conditions
- *   are met:
- *
- * * Redistributions of source code must retain the above copyright
- *   notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- *   notice, this list of conditions and the following disclaimer in
- *   the documentation and/or other materials provided with the
- *   distribution.
- * * Neither the name of Intel Corporation nor the names of its
- *   contributors may be used to endorse or promote products derived
- *   from this software without specific prior written permission.
- *
- *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDIN

[dpdk-dev] [PATCH v4 1/5] lib/librte_vhost: mv vhost example to vhost lib directory for further code re-factoring

2014-09-12 Thread Huawei Xie
This commit creates vhost library directory, and copies vhost example into it.

Signed-off-by: Huawei Xie 
Acked-by: Konstantin Ananyev 
Acked-by: Tommy Long 
---
 examples/vhost/Makefile  |   60 -
 examples/vhost/eventfd_link/Makefile |   39 -
 examples/vhost/eventfd_link/eventfd_link.c   |  205 --
 examples/vhost/eventfd_link/eventfd_link.h   |   79 -
 examples/vhost/libvirt/qemu-wrap.py  |  367 ---
 examples/vhost/main.c| 3722 --
 examples/vhost/main.h|   86 -
 examples/vhost/vhost-net-cdev.c  |  367 ---
 examples/vhost/vhost-net-cdev.h  |   83 -
 examples/vhost/virtio-net.c  | 1165 
 examples/vhost/virtio-net.h  |  161 --
 lib/librte_vhost/Makefile|   60 +
 lib/librte_vhost/eventfd_link/Makefile   |   39 +
 lib/librte_vhost/eventfd_link/eventfd_link.c |  205 ++
 lib/librte_vhost/eventfd_link/eventfd_link.h |   79 +
 lib/librte_vhost/libvirt/qemu-wrap.py|  367 +++
 lib/librte_vhost/main.c  | 3722 ++
 lib/librte_vhost/main.h  |   86 +
 lib/librte_vhost/vhost-net-cdev.c|  367 +++
 lib/librte_vhost/vhost-net-cdev.h|   83 +
 lib/librte_vhost/virtio-net.c| 1165 
 lib/librte_vhost/virtio-net.h|  161 ++
 22 files changed, 6334 insertions(+), 6334 deletions(-)
 delete mode 100644 examples/vhost/Makefile
 delete mode 100644 examples/vhost/eventfd_link/Makefile
 delete mode 100644 examples/vhost/eventfd_link/eventfd_link.c
 delete mode 100644 examples/vhost/eventfd_link/eventfd_link.h
 delete mode 100755 examples/vhost/libvirt/qemu-wrap.py
 delete mode 100644 examples/vhost/main.c
 delete mode 100644 examples/vhost/main.h
 delete mode 100644 examples/vhost/vhost-net-cdev.c
 delete mode 100644 examples/vhost/vhost-net-cdev.h
 delete mode 100644 examples/vhost/virtio-net.c
 delete mode 100644 examples/vhost/virtio-net.h
 create mode 100644 lib/librte_vhost/Makefile
 create mode 100644 lib/librte_vhost/eventfd_link/Makefile
 create mode 100644 lib/librte_vhost/eventfd_link/eventfd_link.c
 create mode 100644 lib/librte_vhost/eventfd_link/eventfd_link.h
 create mode 100755 lib/librte_vhost/libvirt/qemu-wrap.py
 create mode 100644 lib/librte_vhost/main.c
 create mode 100644 lib/librte_vhost/main.h
 create mode 100644 lib/librte_vhost/vhost-net-cdev.c
 create mode 100644 lib/librte_vhost/vhost-net-cdev.h
 create mode 100644 lib/librte_vhost/virtio-net.c
 create mode 100644 lib/librte_vhost/virtio-net.h

diff --git a/examples/vhost/Makefile b/examples/vhost/Makefile
deleted file mode 100644
index f45f83f..000
--- a/examples/vhost/Makefile
+++ /dev/null
@@ -1,60 +0,0 @@
-#   BSD LICENSE
-#
-#   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
-#   All rights reserved.
-#
-#   Redistribution and use in source and binary forms, with or without
-#   modification, are permitted provided that the following conditions
-#   are met:
-#
-# * Redistributions of source code must retain the above copyright
-#   notice, this list of conditions and the following disclaimer.
-# * Redistributions in binary form must reproduce the above copyright
-#   notice, this list of conditions and the following disclaimer in
-#   the documentation and/or other materials provided with the
-#   distribution.
-# * Neither the name of Intel Corporation nor the names of its
-#   contributors may be used to endorse or promote products derived
-#   from this software without specific prior written permission.
-#
-#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-ifeq ($(RTE_SDK),)
-$(error "Please define RTE_SDK environment variable")
-endif
-
-# Default target, can be overriden by command line or environment
-RTE_TARGET ?= x86_64-native-linuxapp-gcc
-
-include $(RTE_SDK)/mk/rte.vars.mk
-
-ifneq ($(CONFIG_RTE_EXEC_ENV),"linuxapp")
-$(info This application can only operate in a linuxapp environment, \
-please change the definition of the RTE_TARGET environment variable)
-all:
-else
-
-# binary nam

[dpdk-dev] [PATCH 0/2] examples/vhost: vhost example based on vhost lib API

2014-09-18 Thread Huawei Xie
Old vhost example consists of vhost cuse driver, virtio device management, and
VMDQ based packet switching logic.
vhost cuse driver and virtio device management are moved into vhost lib. This 
patch modifies vhost example to use vhost lib API.

Huawei Xie (2):
  copy main.c main.h from old vhost example
  modify vhost example to use vhost lib API

 examples/vhost/Makefile |   52 +
 examples/vhost/main.c   | 3045 +++
 examples/vhost/main.h   |  123 ++
 3 files changed, 3220 insertions(+)
 create mode 100644 examples/vhost/Makefile
 create mode 100644 examples/vhost/main.c
 create mode 100644 examples/vhost/main.h

-- 
1.8.1.4



[dpdk-dev] [PATCH 2/2] examples/vhost: vhost example modification to use vhost lib API

2014-09-18 Thread Huawei Xie
This vhost example demonstrates how to integrate user space vhost with DPDK
accelerated ethernet vSwitch.
 1) rte_vhost_driver_register initialises vhost driver.
 2) rte_vhost_driver_callback_register registers new_device/destroy_device 
callbacks.
Those callbacks should be implemented in ethernet switch application.
new_device is called when a virtio_device is ready for processing in 
vSwitch.
destroy_device is called when a virtio_device is de-activated by guest OS.
 3) rte_vhost_driver_session_start starts vhost driver session loop.
 4) rte_vhost_enqueue/dequeue_burst to send packets to or receive packets from
guest virtio device. virtio_dev_rx/tx is removed.
 5) zero copy feature is implemented in the example.
 6) mergable rx/tx will be implemented in vhost lib.

Signed-off-by: Huawei Xie 
---
 examples/vhost/Makefile |   52 ++
 examples/vhost/main.c   | 1455 +--
 examples/vhost/main.h   |   47 +-
 3 files changed, 483 insertions(+), 1071 deletions(-)
 create mode 100644 examples/vhost/Makefile

diff --git a/examples/vhost/Makefile b/examples/vhost/Makefile
new file mode 100644
index 000..a4d4fb0
--- /dev/null
+++ b/examples/vhost/Makefile
@@ -0,0 +1,52 @@
+#   BSD LICENSE
+#
+#   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+#   All rights reserved.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+# * Redistributions of source code must retain the above copyright
+#   notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+#   notice, this list of conditions and the following disclaimer in
+#   the documentation and/or other materials provided with the
+#   distribution.
+# * Neither the name of Intel Corporation nor the names of its
+#   contributors may be used to endorse or promote products derived
+#   from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ifeq ($(RTE_SDK),)
+$(error "Please define RTE_SDK environment variable")
+endif
+
+# Default target, can be overriden by command line or environment
+RTE_TARGET ?= x86_64-native-linuxapp-gcc
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# binary name
+APP = vhost-switch
+
+# all source are stored in SRCS-y
+#SRCS-y := cusedrv.c loopback-userspace.c
+SRCS-y := main.c
+
+CFLAGS += -O2 -I/usr/local/include -D_FILE_OFFSET_BITS=64 -Wno-unused-parameter
+CFLAGS += $(WERROR_FLAGS)
+LDFLAGS += -lfuse
+
+include $(RTE_SDK)/mk/rte.extapp.mk
diff --git a/examples/vhost/main.c b/examples/vhost/main.c
index 7d9e6a2..a796dbd 100644
--- a/examples/vhost/main.c
+++ b/examples/vhost/main.c
@@ -49,10 +49,9 @@
 #include 
 #include 
 #include 
+#include 

 #include "main.h"
-#include "virtio-net.h"
-#include "vhost-net-cdev.h"

 #define MAX_QUEUES 128

@@ -100,7 +99,6 @@
 #define TX_WTHRESH 0  /* Default values of TX write-back threshold reg. */

 #define MAX_PKT_BURST 32   /* Max burst size for RX/TX */
-#define MAX_MRG_PKT_BURST 16   /* Max burst for merge buffers. Set to 1 due to 
performance issue. */
 #define BURST_TX_DRAIN_US 100  /* TX drain every ~100us */

 #define BURST_RX_WAIT_US 15/* Defines how long we wait between retries on 
RX */
@@ -168,13 +166,14 @@ static uint32_t num_switching_cores = 0;

 /* number of devices/queues to support*/
 static uint32_t num_queues = 0;
-uint32_t num_devices = 0;
+static uint32_t num_devices;

 /*
  * Enable zero copy, pkts buffer will directly dma to hw descriptor,
  * disabled on default.
  */
 static uint32_t zero_copy;
+static int mergeable;

 /* number of descriptors to apply*/
 static uint32_t num_rx_descriptor = RTE_TEST_RX_DESC_DEFAULT_ZCP;
@@ -218,12 +217,6 @@ static uint32_t burst_rx_retry_num = BURST_RX_RETRIES;
 /* Character device basename. Can be set by user. */
 static char dev_basename[MAX_BASENAME_SZ] = "vhost-net";

-/* Charater device index. Can be set by user. */
-static uint32_t dev_index = 0;
-
-/* This can

[dpdk-dev] [PATCH 1/2] examples/vhost: copy old vhost example files

2014-09-18 Thread Huawei Xie
main.c and main.h are refactored to become vhost lib in vhost lib patch, thus
are removed. Here they are copied back from old vhost example without any 
modification.

Signed-off-by: Huawei Xie 
---
 examples/vhost/main.c | 3722 +
 examples/vhost/main.h |   86 ++
 2 files changed, 3808 insertions(+)
 create mode 100644 examples/vhost/main.c
 create mode 100644 examples/vhost/main.h

diff --git a/examples/vhost/main.c b/examples/vhost/main.c
new file mode 100644
index 000..7d9e6a2
--- /dev/null
+++ b/examples/vhost/main.c
@@ -0,0 +1,3722 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the
+ *   distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ *   contributors may be used to endorse or promote products derived
+ *   from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "main.h"
+#include "virtio-net.h"
+#include "vhost-net-cdev.h"
+
+#define MAX_QUEUES 128
+
+/* the maximum number of external ports supported */
+#define MAX_SUP_PORTS 1
+
+/*
+ * Calculate the number of buffers needed per port
+ */
+#define NUM_MBUFS_PER_PORT ((MAX_QUEUES*RTE_TEST_RX_DESC_DEFAULT) +
\
+   
(num_switching_cores*MAX_PKT_BURST) +   \
+   
(num_switching_cores*RTE_TEST_TX_DESC_DEFAULT) +\
+   
(num_switching_cores*MBUF_CACHE_SIZE))
+
+#define MBUF_CACHE_SIZE 128
+#define MBUF_SIZE (2048 + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM)
+
+/*
+ * No frame data buffer allocated from host are required for zero copy
+ * implementation, guest will allocate the frame data buffer, and vhost
+ * directly use it.
+ */
+#define VIRTIO_DESCRIPTOR_LEN_ZCP 1518
+#define MBUF_SIZE_ZCP (VIRTIO_DESCRIPTOR_LEN_ZCP + sizeof(struct rte_mbuf) \
+   + RTE_PKTMBUF_HEADROOM)
+#define MBUF_CACHE_SIZE_ZCP 0
+
+/*
+ * RX and TX Prefetch, Host, and Write-back threshold values should be
+ * carefully set for optimal performance. Consult the network
+ * controller's datasheet and supporting DPDK documentation for guidance
+ * on how these parameters should be set.
+ */
+#define RX_PTHRESH 8 /* Default values of RX prefetch threshold reg. */
+#define RX_HTHRESH 8 /* Default values of RX host threshold reg. */
+#define RX_WTHRESH 4 /* Default values of RX write-back threshold reg. */
+
+/*
+ * These default values are optimized for use with the Intel(R) 82599 10 GbE
+ * Controller and the DPDK ixgbe PMD. Consider using other values for other
+ * network controllers and/or network drivers.
+ */
+#define TX_PTHRESH 36 /* Default values of TX prefetch threshold reg. */
+#define TX_HTHRESH 0  /* Default values of TX host threshold reg. */
+#define TX_WTHRESH 0  /* Default values of TX write-back threshold reg. */
+
+#define MAX_PKT_BURST 32   /* Max burst size for RX/TX */
+#define MAX_MRG_PKT_BURST 16   /* Max burst for merge buffers. Set to 1 due to 
performance issue. */
+#define BURST_TX_DRAIN_US 100  /* TX drain every ~100us */
+
+#define BURST_RX_WAIT_US 15/* Defines how long we wait between retries on 
RX */
+#define BURST_RX_RETRIES 4 /* Number of retries on RX. */
+
+#define JUMBO_FRAME_

  1   2   3   >