Re: [Qemu-devel] [PATCH 6/9] virtio net: introduce dataplane for virtio net

2013-02-27 Thread liu ping fan
On Fri, Feb 22, 2013 at 4:55 AM, mdroth mdr...@linux.vnet.ibm.com wrote:
 On Thu, Feb 21, 2013 at 08:54:50PM +0800, Liu Ping Fan wrote:
 From: Liu Ping Fan pingf...@linux.vnet.ibm.com

 This is a emulation to virtio-blk dataplane, which push the data
 handling out of biglock. And it is a try to implement this process
 in userspace, while vhost-net in kernel.

 Signed-off-by: Liu Ping Fan pingf...@linux.vnet.ibm.com
 ---
  hw/dataplane/virtio-net.c |  422 
 +
  hw/dataplane/virtio-net.h |   26 +++
  hw/virtio-net.c   |   56 +-
  hw/virtio-net.h   |   61 +++
  4 files changed, 517 insertions(+), 48 deletions(-)
  create mode 100644 hw/dataplane/virtio-net.c
  create mode 100644 hw/dataplane/virtio-net.h

 diff --git a/hw/dataplane/virtio-net.c b/hw/dataplane/virtio-net.c
 new file mode 100644
 index 000..9a1795d
 --- /dev/null
 +++ b/hw/dataplane/virtio-net.c
 @@ -0,0 +1,422 @@
 +/* Copyright IBM, Corp. 2013
 + *
 + * Based on vhost-net and virtio-blk dataplane code
 + *
 + * This work is licensed under the terms of the GNU GPL, version 2.
 + */
 +#include hw/virtio.h
 +#include qemu/iov.h
 +#include vring.h
 +#include linux/virtio_ring.h
 +#include net/net.h
 +#include net/checksum.h
 +#include net/tap.h
 +#include virtio-net.h
 +#include qemu/error-report.h
 +
 +typedef struct VirtIONetDataPlane {
 +int async_tx_head;
 +Vring *rx_vring;
 +Vring *tx_vring;
 +EventHandler *rx_handler;
 +EventHandler *tx_handler;
 +bool stop;
 +} VirtIONetDataPlane;
 +
 +WorkThread virt_net_thread;
 +
 +#define VRING_MAX 128
 +
 +static int32_t virtnet_tx(VirtIONet *n, VirtQueue *vq);
 +
 +static void virtnet_tx_complete(struct NetClientState *nc, ssize_t sz)
 +{
 +int ret;
 +VirtIONet *n = DO_UPCAST(NICState, nc, nc)-opaque;
 +
 +vring_push(n-dp-tx_vring, n-dp-async_tx_head, 0);
 +ret = virtnet_tx(n, n-tx_vq);
 +if (ret != -EBUSY) {
 +vring_enable_notification(n-vdev, n-dp-tx_vring);
 +}
 +}
 +
 +static int virtnet_tx(VirtIONet *n, VirtQueue *vq)
 +{
 +struct iovec out_iov[VRING_MAX], sg[VRING_MAX];
 +struct iovec *snd, *end = out_iov[VRING_MAX];
 +int head;
 +unsigned int out_num, in_num, sg_num;
 +int ret;
 +int num_packets = 0;
 +
 +if (!(n-vdev.status  VIRTIO_CONFIG_S_DRIVER_OK)) {
 +return num_packets;
 +}
 +
 +assert(n-vdev.vm_running);
 +
 +if (n-async_tx.elem.out_num) {
 +return num_packets;
 +}
 +
 +while (true) {
 +head = vring_pop(n-vdev, n-dp-tx_vring, out_iov, end, out_num,
 +in_num);
 +if (head  0) {
 +break;
 +}
 +snd = out_iov;
 +assert(n-host_hdr_len = n-guest_hdr_len);
 +if (n-host_hdr_len != n-guest_hdr_len) {
 +sg_num = iov_copy(sg, ARRAY_SIZE(sg),
 +   out_iov, out_num,
 +   0, n-host_hdr_len);
 +sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num,
 + out_iov, out_num,
 + n-guest_hdr_len, -1);
 +out_num = sg_num;
 +snd = sg;
 +}
 +
 +ret = qemu_sendv_packet_async(n-nic-nc, snd, out_num,
 +virtnet_tx_complete);
 +if (ret == 0) {
 +n-dp-async_tx_head = head;
 +return -EBUSY;
 +}
 +vring_push(n-dp-tx_vring, head, 0);
 +if (num_packets++  n-tx_burst) {
 +break;
 +}

 I'm not sure why we'd break here: if we're sending out lots of packets
 should we keep notifications disabled and continue sending them till
 we'd block? Is it to avoid starving the rx side?

Yes.
 +}
 +
 +return num_packets;
 +}
 +
 +static void virtnet_handle_tx(VirtIODevice *vdev, VirtQueue *vq)
 +{
 +int32 ret;
 +VirtIONet *n = (VirtIONet *)vdev;
 +
 +/* This happens when device was stopped but VCPU wasn't. */
 +if (!n-vdev.vm_running) {
 +return;
 +}
 +vring_disable_notification(vdev, n-dp-tx_vring);
 +ret = virtnet_tx(n, vq);
 +if (ret != -EBUSY) {
 +vring_enable_notification(vdev, n-dp-tx_vring);
 +}
 +}
 +
 +
 +static int virtio_net_can_receive(NetClientState *nc)
 +{
 +VirtIONet *n = DO_UPCAST(NICState, nc, nc)-opaque;
 +if (!n-vdev.vm_running) {
 +return 0;
 +}
 +if (!(n-vdev.status  VIRTIO_CONFIG_S_DRIVER_OK)) {
 +return 0;
 +}
 +
 +return 1;
 +}
 +
 +/* peek but not use */
 +static int rx_mergeable_buf_sz(VirtIONet *n)
 +{
 +uint16_t start, idx, head;
 +int total = 0;
 +Vring *vring = n-dp-rx_vring;
 +struct vring_desc *dsc;
 +struct vring_desc *base;
 +
 +for (start = vring-last_avail_idx; start != vring-vr.avail-idx;
 +start++) {
 +head = start%vring-vr.num;
 +idx = vring-vr.avail-ring[head];
 +if 

[Qemu-devel] [PATCH 6/9] virtio net: introduce dataplane for virtio net

2013-02-21 Thread Liu Ping Fan
From: Liu Ping Fan pingf...@linux.vnet.ibm.com

This is a emulation to virtio-blk dataplane, which push the data
handling out of biglock. And it is a try to implement this process
in userspace, while vhost-net in kernel.

Signed-off-by: Liu Ping Fan pingf...@linux.vnet.ibm.com
---
 hw/dataplane/virtio-net.c |  422 +
 hw/dataplane/virtio-net.h |   26 +++
 hw/virtio-net.c   |   56 +-
 hw/virtio-net.h   |   61 +++
 4 files changed, 517 insertions(+), 48 deletions(-)
 create mode 100644 hw/dataplane/virtio-net.c
 create mode 100644 hw/dataplane/virtio-net.h

diff --git a/hw/dataplane/virtio-net.c b/hw/dataplane/virtio-net.c
new file mode 100644
index 000..9a1795d
--- /dev/null
+++ b/hw/dataplane/virtio-net.c
@@ -0,0 +1,422 @@
+/* Copyright IBM, Corp. 2013
+ *
+ * Based on vhost-net and virtio-blk dataplane code
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ */
+#include hw/virtio.h
+#include qemu/iov.h
+#include vring.h
+#include linux/virtio_ring.h
+#include net/net.h
+#include net/checksum.h
+#include net/tap.h
+#include virtio-net.h
+#include qemu/error-report.h
+
+typedef struct VirtIONetDataPlane {
+int async_tx_head;
+Vring *rx_vring;
+Vring *tx_vring;
+EventHandler *rx_handler;
+EventHandler *tx_handler;
+bool stop;
+} VirtIONetDataPlane;
+
+WorkThread virt_net_thread;
+
+#define VRING_MAX 128
+
+static int32_t virtnet_tx(VirtIONet *n, VirtQueue *vq);
+
+static void virtnet_tx_complete(struct NetClientState *nc, ssize_t sz)
+{
+int ret;
+VirtIONet *n = DO_UPCAST(NICState, nc, nc)-opaque;
+
+vring_push(n-dp-tx_vring, n-dp-async_tx_head, 0);
+ret = virtnet_tx(n, n-tx_vq);
+if (ret != -EBUSY) {
+vring_enable_notification(n-vdev, n-dp-tx_vring);
+}
+}
+
+static int virtnet_tx(VirtIONet *n, VirtQueue *vq)
+{
+struct iovec out_iov[VRING_MAX], sg[VRING_MAX];
+struct iovec *snd, *end = out_iov[VRING_MAX];
+int head;
+unsigned int out_num, in_num, sg_num;
+int ret;
+int num_packets = 0;
+
+if (!(n-vdev.status  VIRTIO_CONFIG_S_DRIVER_OK)) {
+return num_packets;
+}
+
+assert(n-vdev.vm_running);
+
+if (n-async_tx.elem.out_num) {
+return num_packets;
+}
+
+while (true) {
+head = vring_pop(n-vdev, n-dp-tx_vring, out_iov, end, out_num,
+in_num);
+if (head  0) {
+break;
+}
+snd = out_iov;
+assert(n-host_hdr_len = n-guest_hdr_len);
+if (n-host_hdr_len != n-guest_hdr_len) {
+sg_num = iov_copy(sg, ARRAY_SIZE(sg),
+   out_iov, out_num,
+   0, n-host_hdr_len);
+sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num,
+ out_iov, out_num,
+ n-guest_hdr_len, -1);
+out_num = sg_num;
+snd = sg;
+}
+
+ret = qemu_sendv_packet_async(n-nic-nc, snd, out_num,
+virtnet_tx_complete);
+if (ret == 0) {
+n-dp-async_tx_head = head;
+return -EBUSY;
+}
+vring_push(n-dp-tx_vring, head, 0);
+if (num_packets++  n-tx_burst) {
+break;
+}
+}
+
+return num_packets;
+}
+
+static void virtnet_handle_tx(VirtIODevice *vdev, VirtQueue *vq)
+{
+int32 ret;
+VirtIONet *n = (VirtIONet *)vdev;
+
+/* This happens when device was stopped but VCPU wasn't. */
+if (!n-vdev.vm_running) {
+return;
+}
+vring_disable_notification(vdev, n-dp-tx_vring);
+ret = virtnet_tx(n, vq);
+if (ret != -EBUSY) {
+vring_enable_notification(vdev, n-dp-tx_vring);
+}
+}
+
+
+static int virtio_net_can_receive(NetClientState *nc)
+{
+VirtIONet *n = DO_UPCAST(NICState, nc, nc)-opaque;
+if (!n-vdev.vm_running) {
+return 0;
+}
+if (!(n-vdev.status  VIRTIO_CONFIG_S_DRIVER_OK)) {
+return 0;
+}
+
+return 1;
+}
+
+/* peek but not use */
+static int rx_mergeable_buf_sz(VirtIONet *n)
+{
+uint16_t start, idx, head;
+int total = 0;
+Vring *vring = n-dp-rx_vring;
+struct vring_desc *dsc;
+struct vring_desc *base;
+
+for (start = vring-last_avail_idx; start != vring-vr.avail-idx;
+start++) {
+head = start%vring-vr.num;
+idx = vring-vr.avail-ring[head];
+if (vring-vr.desc[idx].flags  VRING_DESC_F_INDIRECT) {
+base = hostmem_lookup(vring-hostmem, vring-vr.desc[idx].addr,
+vring-vr.desc[idx].len, 0);
+} else {
+base = vring-vr.desc;
+}
+dsc = base;
+do {
+total += dsc-len;
+if (!(dsc-flags  VRING_DESC_F_NEXT)) {
+break;
+}
+dsc = base[dsc-next];
+} while (true);
+}
+return total;
+}
+
+static bool 

Re: [Qemu-devel] [PATCH 6/9] virtio net: introduce dataplane for virtio net

2013-02-21 Thread mdroth
On Thu, Feb 21, 2013 at 08:54:50PM +0800, Liu Ping Fan wrote:
 From: Liu Ping Fan pingf...@linux.vnet.ibm.com
 
 This is a emulation to virtio-blk dataplane, which push the data
 handling out of biglock. And it is a try to implement this process
 in userspace, while vhost-net in kernel.
 
 Signed-off-by: Liu Ping Fan pingf...@linux.vnet.ibm.com
 ---
  hw/dataplane/virtio-net.c |  422 
 +
  hw/dataplane/virtio-net.h |   26 +++
  hw/virtio-net.c   |   56 +-
  hw/virtio-net.h   |   61 +++
  4 files changed, 517 insertions(+), 48 deletions(-)
  create mode 100644 hw/dataplane/virtio-net.c
  create mode 100644 hw/dataplane/virtio-net.h
 
 diff --git a/hw/dataplane/virtio-net.c b/hw/dataplane/virtio-net.c
 new file mode 100644
 index 000..9a1795d
 --- /dev/null
 +++ b/hw/dataplane/virtio-net.c
 @@ -0,0 +1,422 @@
 +/* Copyright IBM, Corp. 2013
 + *
 + * Based on vhost-net and virtio-blk dataplane code
 + *
 + * This work is licensed under the terms of the GNU GPL, version 2.
 + */
 +#include hw/virtio.h
 +#include qemu/iov.h
 +#include vring.h
 +#include linux/virtio_ring.h
 +#include net/net.h
 +#include net/checksum.h
 +#include net/tap.h
 +#include virtio-net.h
 +#include qemu/error-report.h
 +
 +typedef struct VirtIONetDataPlane {
 +int async_tx_head;
 +Vring *rx_vring;
 +Vring *tx_vring;
 +EventHandler *rx_handler;
 +EventHandler *tx_handler;
 +bool stop;
 +} VirtIONetDataPlane;
 +
 +WorkThread virt_net_thread;
 +
 +#define VRING_MAX 128
 +
 +static int32_t virtnet_tx(VirtIONet *n, VirtQueue *vq);
 +
 +static void virtnet_tx_complete(struct NetClientState *nc, ssize_t sz)
 +{
 +int ret;
 +VirtIONet *n = DO_UPCAST(NICState, nc, nc)-opaque;
 +
 +vring_push(n-dp-tx_vring, n-dp-async_tx_head, 0);
 +ret = virtnet_tx(n, n-tx_vq);
 +if (ret != -EBUSY) {
 +vring_enable_notification(n-vdev, n-dp-tx_vring);
 +}
 +}
 +
 +static int virtnet_tx(VirtIONet *n, VirtQueue *vq)
 +{
 +struct iovec out_iov[VRING_MAX], sg[VRING_MAX];
 +struct iovec *snd, *end = out_iov[VRING_MAX];
 +int head;
 +unsigned int out_num, in_num, sg_num;
 +int ret;
 +int num_packets = 0;
 +
 +if (!(n-vdev.status  VIRTIO_CONFIG_S_DRIVER_OK)) {
 +return num_packets;
 +}
 +
 +assert(n-vdev.vm_running);
 +
 +if (n-async_tx.elem.out_num) {
 +return num_packets;
 +}
 +
 +while (true) {
 +head = vring_pop(n-vdev, n-dp-tx_vring, out_iov, end, out_num,
 +in_num);
 +if (head  0) {
 +break;
 +}
 +snd = out_iov;
 +assert(n-host_hdr_len = n-guest_hdr_len);
 +if (n-host_hdr_len != n-guest_hdr_len) {
 +sg_num = iov_copy(sg, ARRAY_SIZE(sg),
 +   out_iov, out_num,
 +   0, n-host_hdr_len);
 +sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num,
 + out_iov, out_num,
 + n-guest_hdr_len, -1);
 +out_num = sg_num;
 +snd = sg;
 +}
 +
 +ret = qemu_sendv_packet_async(n-nic-nc, snd, out_num,
 +virtnet_tx_complete);
 +if (ret == 0) {
 +n-dp-async_tx_head = head;
 +return -EBUSY;
 +}
 +vring_push(n-dp-tx_vring, head, 0);
 +if (num_packets++  n-tx_burst) {
 +break;
 +}

I'm not sure why we'd break here: if we're sending out lots of packets
should we keep notifications disabled and continue sending them till
we'd block? Is it to avoid starving the rx side?

 +}
 +
 +return num_packets;
 +}
 +
 +static void virtnet_handle_tx(VirtIODevice *vdev, VirtQueue *vq)
 +{
 +int32 ret;
 +VirtIONet *n = (VirtIONet *)vdev;
 +
 +/* This happens when device was stopped but VCPU wasn't. */
 +if (!n-vdev.vm_running) {
 +return;
 +}
 +vring_disable_notification(vdev, n-dp-tx_vring);
 +ret = virtnet_tx(n, vq);
 +if (ret != -EBUSY) {
 +vring_enable_notification(vdev, n-dp-tx_vring);
 +}
 +}
 +
 +
 +static int virtio_net_can_receive(NetClientState *nc)
 +{
 +VirtIONet *n = DO_UPCAST(NICState, nc, nc)-opaque;
 +if (!n-vdev.vm_running) {
 +return 0;
 +}
 +if (!(n-vdev.status  VIRTIO_CONFIG_S_DRIVER_OK)) {
 +return 0;
 +}
 +
 +return 1;
 +}
 +
 +/* peek but not use */
 +static int rx_mergeable_buf_sz(VirtIONet *n)
 +{
 +uint16_t start, idx, head;
 +int total = 0;
 +Vring *vring = n-dp-rx_vring;
 +struct vring_desc *dsc;
 +struct vring_desc *base;
 +
 +for (start = vring-last_avail_idx; start != vring-vr.avail-idx;
 +start++) {
 +head = start%vring-vr.num;
 +idx = vring-vr.avail-ring[head];
 +if (vring-vr.desc[idx].flags  VRING_DESC_F_INDIRECT) {
 +base =