Re: [Qemu-devel] [PATCH 6/9] virtio net: introduce dataplane for virtio net

2013-02-27 Thread liu ping fan
On Fri, Feb 22, 2013 at 4:55 AM, mdroth  wrote:
> On Thu, Feb 21, 2013 at 08:54:50PM +0800, Liu Ping Fan wrote:
>> From: Liu Ping Fan 
>>
>> This is a emulation to virtio-blk dataplane, which push the data
>> handling out of biglock. And it is a try to implement this process
>> in userspace, while vhost-net in kernel.
>>
>> Signed-off-by: Liu Ping Fan 
>> ---
>>  hw/dataplane/virtio-net.c |  422 
>> +
>>  hw/dataplane/virtio-net.h |   26 +++
>>  hw/virtio-net.c   |   56 +-
>>  hw/virtio-net.h   |   61 +++
>>  4 files changed, 517 insertions(+), 48 deletions(-)
>>  create mode 100644 hw/dataplane/virtio-net.c
>>  create mode 100644 hw/dataplane/virtio-net.h
>>
>> diff --git a/hw/dataplane/virtio-net.c b/hw/dataplane/virtio-net.c
>> new file mode 100644
>> index 000..9a1795d
>> --- /dev/null
>> +++ b/hw/dataplane/virtio-net.c
>> @@ -0,0 +1,422 @@
>> +/* Copyright IBM, Corp. 2013
>> + *
>> + * Based on vhost-net and virtio-blk dataplane code
>> + *
>> + * This work is licensed under the terms of the GNU GPL, version 2.
>> + */
>> +#include "hw/virtio.h"
>> +#include "qemu/iov.h"
>> +#include "vring.h"
>> +#include 
>> +#include "net/net.h"
>> +#include "net/checksum.h"
>> +#include "net/tap.h"
>> +#include "virtio-net.h"
>> +#include "qemu/error-report.h"
>> +
>> +typedef struct VirtIONetDataPlane {
>> +int async_tx_head;
>> +Vring *rx_vring;
>> +Vring *tx_vring;
>> +EventHandler *rx_handler;
>> +EventHandler *tx_handler;
>> +bool stop;
>> +} VirtIONetDataPlane;
>> +
>> +WorkThread virt_net_thread;
>> +
>> +#define VRING_MAX 128
>> +
>> +static int32_t virtnet_tx(VirtIONet *n, VirtQueue *vq);
>> +
>> +static void virtnet_tx_complete(struct NetClientState *nc, ssize_t sz)
>> +{
>> +int ret;
>> +VirtIONet *n = DO_UPCAST(NICState, nc, nc)->opaque;
>> +
>> +vring_push(n->dp->tx_vring, n->dp->async_tx_head, 0);
>> +ret = virtnet_tx(n, n->tx_vq);
>> +if (ret != -EBUSY) {
>> +vring_enable_notification(&n->vdev, n->dp->tx_vring);
>> +}
>> +}
>> +
>> +static int virtnet_tx(VirtIONet *n, VirtQueue *vq)
>> +{
>> +struct iovec out_iov[VRING_MAX], sg[VRING_MAX];
>> +struct iovec *snd, *end = &out_iov[VRING_MAX];
>> +int head;
>> +unsigned int out_num, in_num, sg_num;
>> +int ret;
>> +int num_packets = 0;
>> +
>> +if (!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK)) {
>> +return num_packets;
>> +}
>> +
>> +assert(n->vdev.vm_running);
>> +
>> +if (n->async_tx.elem.out_num) {
>> +return num_packets;
>> +}
>> +
>> +while (true) {
>> +head = vring_pop(&n->vdev, n->dp->tx_vring, out_iov, end, &out_num,
>> +&in_num);
>> +if (head < 0) {
>> +break;
>> +}
>> +snd = out_iov;
>> +assert(n->host_hdr_len <= n->guest_hdr_len);
>> +if (n->host_hdr_len != n->guest_hdr_len) {
>> +sg_num = iov_copy(sg, ARRAY_SIZE(sg),
>> +   out_iov, out_num,
>> +   0, n->host_hdr_len);
>> +sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num,
>> + out_iov, out_num,
>> + n->guest_hdr_len, -1);
>> +out_num = sg_num;
>> +snd = sg;
>> +}
>> +
>> +ret = qemu_sendv_packet_async(&n->nic->nc, snd, out_num,
>> +virtnet_tx_complete);
>> +if (ret == 0) {
>> +n->dp->async_tx_head = head;
>> +return -EBUSY;
>> +}
>> +vring_push(n->dp->tx_vring, head, 0);
>> +if (num_packets++ > n->tx_burst) {
>> +break;
>> +}
>
> I'm not sure why we'd break here: if we're sending out lots of packets
> should we keep notifications disabled and continue sending them till
> we'd block? Is it to avoid starving the rx side?
>
Yes.
>> +}
>> +
>> +return num_packets;
>> +}
>> +
>> +static void virtnet_handle_tx(VirtIODevice *vdev, VirtQueue *vq)
>> +{
>> +int32 ret;
>> +VirtIONet *n = (VirtIONet *)vdev;
>> +
>> +/* This happens when device was stopped but VCPU wasn't. */
>> +if (!n->vdev.vm_running) {
>> +return;
>> +}
>> +vring_disable_notification(vdev, n->dp->tx_vring);
>> +ret = virtnet_tx(n, vq);
>> +if (ret != -EBUSY) {
>> +vring_enable_notification(vdev, n->dp->tx_vring);
>> +}
>> +}
>> +
>> +
>> +static int virtio_net_can_receive(NetClientState *nc)
>> +{
>> +VirtIONet *n = DO_UPCAST(NICState, nc, nc)->opaque;
>> +if (!n->vdev.vm_running) {
>> +return 0;
>> +}
>> +if (!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK)) {
>> +return 0;
>> +}
>> +
>> +return 1;
>> +}
>> +
>> +/* peek but not use */
>> +static int rx_mergeable_buf_sz(VirtIONet *n)
>> +{
>> +uint16_t start, idx, head;
>> +int total = 0;
>> +Vring *vring = n-

Re: [Qemu-devel] [PATCH 6/9] virtio net: introduce dataplane for virtio net

2013-02-21 Thread mdroth
On Thu, Feb 21, 2013 at 08:54:50PM +0800, Liu Ping Fan wrote:
> From: Liu Ping Fan 
> 
> This is a emulation to virtio-blk dataplane, which push the data
> handling out of biglock. And it is a try to implement this process
> in userspace, while vhost-net in kernel.
> 
> Signed-off-by: Liu Ping Fan 
> ---
>  hw/dataplane/virtio-net.c |  422 
> +
>  hw/dataplane/virtio-net.h |   26 +++
>  hw/virtio-net.c   |   56 +-
>  hw/virtio-net.h   |   61 +++
>  4 files changed, 517 insertions(+), 48 deletions(-)
>  create mode 100644 hw/dataplane/virtio-net.c
>  create mode 100644 hw/dataplane/virtio-net.h
> 
> diff --git a/hw/dataplane/virtio-net.c b/hw/dataplane/virtio-net.c
> new file mode 100644
> index 000..9a1795d
> --- /dev/null
> +++ b/hw/dataplane/virtio-net.c
> @@ -0,0 +1,422 @@
> +/* Copyright IBM, Corp. 2013
> + *
> + * Based on vhost-net and virtio-blk dataplane code
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2.
> + */
> +#include "hw/virtio.h"
> +#include "qemu/iov.h"
> +#include "vring.h"
> +#include 
> +#include "net/net.h"
> +#include "net/checksum.h"
> +#include "net/tap.h"
> +#include "virtio-net.h"
> +#include "qemu/error-report.h"
> +
> +typedef struct VirtIONetDataPlane {
> +int async_tx_head;
> +Vring *rx_vring;
> +Vring *tx_vring;
> +EventHandler *rx_handler;
> +EventHandler *tx_handler;
> +bool stop;
> +} VirtIONetDataPlane;
> +
> +WorkThread virt_net_thread;
> +
> +#define VRING_MAX 128
> +
> +static int32_t virtnet_tx(VirtIONet *n, VirtQueue *vq);
> +
> +static void virtnet_tx_complete(struct NetClientState *nc, ssize_t sz)
> +{
> +int ret;
> +VirtIONet *n = DO_UPCAST(NICState, nc, nc)->opaque;
> +
> +vring_push(n->dp->tx_vring, n->dp->async_tx_head, 0);
> +ret = virtnet_tx(n, n->tx_vq);
> +if (ret != -EBUSY) {
> +vring_enable_notification(&n->vdev, n->dp->tx_vring);
> +}
> +}
> +
> +static int virtnet_tx(VirtIONet *n, VirtQueue *vq)
> +{
> +struct iovec out_iov[VRING_MAX], sg[VRING_MAX];
> +struct iovec *snd, *end = &out_iov[VRING_MAX];
> +int head;
> +unsigned int out_num, in_num, sg_num;
> +int ret;
> +int num_packets = 0;
> +
> +if (!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK)) {
> +return num_packets;
> +}
> +
> +assert(n->vdev.vm_running);
> +
> +if (n->async_tx.elem.out_num) {
> +return num_packets;
> +}
> +
> +while (true) {
> +head = vring_pop(&n->vdev, n->dp->tx_vring, out_iov, end, &out_num,
> +&in_num);
> +if (head < 0) {
> +break;
> +}
> +snd = out_iov;
> +assert(n->host_hdr_len <= n->guest_hdr_len);
> +if (n->host_hdr_len != n->guest_hdr_len) {
> +sg_num = iov_copy(sg, ARRAY_SIZE(sg),
> +   out_iov, out_num,
> +   0, n->host_hdr_len);
> +sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num,
> + out_iov, out_num,
> + n->guest_hdr_len, -1);
> +out_num = sg_num;
> +snd = sg;
> +}
> +
> +ret = qemu_sendv_packet_async(&n->nic->nc, snd, out_num,
> +virtnet_tx_complete);
> +if (ret == 0) {
> +n->dp->async_tx_head = head;
> +return -EBUSY;
> +}
> +vring_push(n->dp->tx_vring, head, 0);
> +if (num_packets++ > n->tx_burst) {
> +break;
> +}

I'm not sure why we'd break here: if we're sending out lots of packets
should we keep notifications disabled and continue sending them till
we'd block? Is it to avoid starving the rx side?

> +}
> +
> +return num_packets;
> +}
> +
> +static void virtnet_handle_tx(VirtIODevice *vdev, VirtQueue *vq)
> +{
> +int32 ret;
> +VirtIONet *n = (VirtIONet *)vdev;
> +
> +/* This happens when device was stopped but VCPU wasn't. */
> +if (!n->vdev.vm_running) {
> +return;
> +}
> +vring_disable_notification(vdev, n->dp->tx_vring);
> +ret = virtnet_tx(n, vq);
> +if (ret != -EBUSY) {
> +vring_enable_notification(vdev, n->dp->tx_vring);
> +}
> +}
> +
> +
> +static int virtio_net_can_receive(NetClientState *nc)
> +{
> +VirtIONet *n = DO_UPCAST(NICState, nc, nc)->opaque;
> +if (!n->vdev.vm_running) {
> +return 0;
> +}
> +if (!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK)) {
> +return 0;
> +}
> +
> +return 1;
> +}
> +
> +/* peek but not use */
> +static int rx_mergeable_buf_sz(VirtIONet *n)
> +{
> +uint16_t start, idx, head;
> +int total = 0;
> +Vring *vring = n->dp->rx_vring;
> +struct vring_desc *dsc;
> +struct vring_desc *base;
> +
> +for (start = vring->last_avail_idx; start != vring->vr.avail->idx;
> +start++) {
> +head = start%vring->vr

[Qemu-devel] [PATCH 6/9] virtio net: introduce dataplane for virtio net

2013-02-21 Thread Liu Ping Fan
From: Liu Ping Fan 

This is a emulation to virtio-blk dataplane, which push the data
handling out of biglock. And it is a try to implement this process
in userspace, while vhost-net in kernel.

Signed-off-by: Liu Ping Fan 
---
 hw/dataplane/virtio-net.c |  422 +
 hw/dataplane/virtio-net.h |   26 +++
 hw/virtio-net.c   |   56 +-
 hw/virtio-net.h   |   61 +++
 4 files changed, 517 insertions(+), 48 deletions(-)
 create mode 100644 hw/dataplane/virtio-net.c
 create mode 100644 hw/dataplane/virtio-net.h

diff --git a/hw/dataplane/virtio-net.c b/hw/dataplane/virtio-net.c
new file mode 100644
index 000..9a1795d
--- /dev/null
+++ b/hw/dataplane/virtio-net.c
@@ -0,0 +1,422 @@
+/* Copyright IBM, Corp. 2013
+ *
+ * Based on vhost-net and virtio-blk dataplane code
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ */
+#include "hw/virtio.h"
+#include "qemu/iov.h"
+#include "vring.h"
+#include 
+#include "net/net.h"
+#include "net/checksum.h"
+#include "net/tap.h"
+#include "virtio-net.h"
+#include "qemu/error-report.h"
+
+typedef struct VirtIONetDataPlane {
+int async_tx_head;
+Vring *rx_vring;
+Vring *tx_vring;
+EventHandler *rx_handler;
+EventHandler *tx_handler;
+bool stop;
+} VirtIONetDataPlane;
+
+WorkThread virt_net_thread;
+
+#define VRING_MAX 128
+
+static int32_t virtnet_tx(VirtIONet *n, VirtQueue *vq);
+
+static void virtnet_tx_complete(struct NetClientState *nc, ssize_t sz)
+{
+int ret;
+VirtIONet *n = DO_UPCAST(NICState, nc, nc)->opaque;
+
+vring_push(n->dp->tx_vring, n->dp->async_tx_head, 0);
+ret = virtnet_tx(n, n->tx_vq);
+if (ret != -EBUSY) {
+vring_enable_notification(&n->vdev, n->dp->tx_vring);
+}
+}
+
+static int virtnet_tx(VirtIONet *n, VirtQueue *vq)
+{
+struct iovec out_iov[VRING_MAX], sg[VRING_MAX];
+struct iovec *snd, *end = &out_iov[VRING_MAX];
+int head;
+unsigned int out_num, in_num, sg_num;
+int ret;
+int num_packets = 0;
+
+if (!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK)) {
+return num_packets;
+}
+
+assert(n->vdev.vm_running);
+
+if (n->async_tx.elem.out_num) {
+return num_packets;
+}
+
+while (true) {
+head = vring_pop(&n->vdev, n->dp->tx_vring, out_iov, end, &out_num,
+&in_num);
+if (head < 0) {
+break;
+}
+snd = out_iov;
+assert(n->host_hdr_len <= n->guest_hdr_len);
+if (n->host_hdr_len != n->guest_hdr_len) {
+sg_num = iov_copy(sg, ARRAY_SIZE(sg),
+   out_iov, out_num,
+   0, n->host_hdr_len);
+sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num,
+ out_iov, out_num,
+ n->guest_hdr_len, -1);
+out_num = sg_num;
+snd = sg;
+}
+
+ret = qemu_sendv_packet_async(&n->nic->nc, snd, out_num,
+virtnet_tx_complete);
+if (ret == 0) {
+n->dp->async_tx_head = head;
+return -EBUSY;
+}
+vring_push(n->dp->tx_vring, head, 0);
+if (num_packets++ > n->tx_burst) {
+break;
+}
+}
+
+return num_packets;
+}
+
+static void virtnet_handle_tx(VirtIODevice *vdev, VirtQueue *vq)
+{
+int32 ret;
+VirtIONet *n = (VirtIONet *)vdev;
+
+/* This happens when device was stopped but VCPU wasn't. */
+if (!n->vdev.vm_running) {
+return;
+}
+vring_disable_notification(vdev, n->dp->tx_vring);
+ret = virtnet_tx(n, vq);
+if (ret != -EBUSY) {
+vring_enable_notification(vdev, n->dp->tx_vring);
+}
+}
+
+
+static int virtio_net_can_receive(NetClientState *nc)
+{
+VirtIONet *n = DO_UPCAST(NICState, nc, nc)->opaque;
+if (!n->vdev.vm_running) {
+return 0;
+}
+if (!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK)) {
+return 0;
+}
+
+return 1;
+}
+
+/* peek but not use */
+static int rx_mergeable_buf_sz(VirtIONet *n)
+{
+uint16_t start, idx, head;
+int total = 0;
+Vring *vring = n->dp->rx_vring;
+struct vring_desc *dsc;
+struct vring_desc *base;
+
+for (start = vring->last_avail_idx; start != vring->vr.avail->idx;
+start++) {
+head = start%vring->vr.num;
+idx = vring->vr.avail->ring[head];
+if (vring->vr.desc[idx].flags & VRING_DESC_F_INDIRECT) {
+base = hostmem_lookup(&vring->hostmem, vring->vr.desc[idx].addr,
+vring->vr.desc[idx].len, 0);
+} else {
+base = vring->vr.desc;
+}
+dsc = base;
+do {
+total += dsc->len;
+if (!(dsc->flags & VRING_DESC_F_NEXT)) {
+break;
+}
+dsc = &base[dsc->next];
+} while (true);
+}
+return total;
+}
+
+static bool v