This is Anthony's net-tap-zero-copy.patch which eliminates
a copy on the host->guest data path with virtio_net.
---
 qemu/hw/virtio-net.c |   76 ++++++++++++++++++++++++++++++++++++-------------
 qemu/net.h           |    3 ++
 qemu/vl.c            |   50 +++++++++++++++++++++++++++++++++
 3 files changed, 109 insertions(+), 20 deletions(-)

diff --git a/qemu/hw/virtio-net.c b/qemu/hw/virtio-net.c
index a681a7e..5e71afe 100644
--- a/qemu/hw/virtio-net.c
+++ b/qemu/hw/virtio-net.c
@@ -70,6 +70,8 @@ typedef struct VirtIONet
     VLANClientState *vc;
     QEMUTimer *tx_timer;
     int tx_timer_active;
+    int last_elem_valid;
+    VirtQueueElement last_elem;
 } VirtIONet;
 
 /* TODO
@@ -153,47 +155,80 @@ static int virtio_net_can_receive(void *opaque)
     return 1;
 }
 
-static void virtio_net_receive(void *opaque, const uint8_t *buf, int size)
+static void virtio_net_receive_zc(void *opaque, IOZeroCopyHandler *zc, void 
*data)
 {
     VirtIONet *n = opaque;
-    VirtQueueElement elem;
+    VirtQueueElement *elem = &n->last_elem;
     struct virtio_net_hdr *hdr;
-    int offset, i;
-    int total;
+    ssize_t err;
+    int idx;
 
-    if (virtqueue_pop(n->rx_vq, &elem) == 0)
+    if (!n->last_elem_valid && virtqueue_pop(n->rx_vq, elem) == 0)
        return;
 
-    if (elem.in_num < 1 || elem.in_sg[0].iov_len != sizeof(*hdr)) {
+    if (elem->in_num < 1 || elem->in_sg[0].iov_len != sizeof(*hdr)) {
        fprintf(stderr, "virtio-net header not in first element\n");
        exit(1);
     }
 
-    hdr = (void *)elem.in_sg[0].iov_base;
+    n->last_elem_valid = 1;
+
+    hdr = (void *)elem->in_sg[0].iov_base;
     hdr->flags = 0;
     hdr->gso_type = VIRTIO_NET_HDR_GSO_NONE;
 
-    offset = 0;
-    total = sizeof(*hdr);
+    idx = tap_has_offload(n->vc->vlan->first_client) ? 0 : 1;
+
+    do {
+        err = zc(data, &elem->in_sg[idx], elem->in_num - idx);
+    } while (err == -1 && errno == EINTR);
+
+    if (err == -1 && errno == EAGAIN)
+        return;
 
-    if (tap_has_offload(n->vc->vlan->first_client)) {
-       memcpy(hdr, buf, sizeof(*hdr));
-       offset += total;
+    if (err < 0) {
+        fprintf(stderr, "virtio_net: error during IO\n");
+        return;
     }
 
+    /* signal other side */
+    n->last_elem_valid = 0;
+    virtqueue_push(n->rx_vq, elem, sizeof(*hdr) + err);
+    virtio_notify(&n->vdev, n->rx_vq);
+}
+
+struct compat_data
+{
+    const uint8_t *buf;
+    int size;
+};
+
+static ssize_t compat_copy(void *opaque, struct iovec *iov, int iovcnt)
+{
+    struct compat_data *compat = opaque;
+    int offset, i;
+
     /* copy in packet.  ugh */
-    i = 1;
-    while (offset < size && i < elem.in_num) {
-       int len = MIN(elem.in_sg[i].iov_len, size - offset);
-       memcpy(elem.in_sg[i].iov_base, buf + offset, len);
+    offset = 0;
+    i = 0;
+    while (offset < compat->size && i < iovcnt) {
+       int len = MIN(iov[i].iov_len, compat->size - offset);
+       memcpy(iov[i].iov_base, compat->buf + offset, len);
        offset += len;
-       total += len;
        i++;
     }
 
-    /* signal other side */
-    virtqueue_push(n->rx_vq, &elem, total);
-    virtio_notify(&n->vdev, n->rx_vq);
+    return offset;
+}
+
+static void virtio_net_receive(void *opaque, const uint8_t *buf, int size)
+{
+    struct compat_data compat;
+
+    compat.buf = buf;
+    compat.size = size;
+
+    virtio_net_receive_zc(opaque, compat_copy, &compat);
 }
 
 /* TX */
@@ -310,6 +345,7 @@ PCIDevice *virtio_net_init(PCIBus *bus, NICInfo *nd, int 
devfn)
     memcpy(n->mac, nd->macaddr, 6);
     n->vc = qemu_new_vlan_client(nd->vlan, virtio_net_receive,
                                  virtio_net_can_receive, n);
+    n->vc->fd_read_zc = virtio_net_receive_zc;
 
     n->tx_timer = qemu_new_timer(vm_clock, virtio_net_tx_timer, n);
     n->tx_timer_active = 0;
diff --git a/qemu/net.h b/qemu/net.h
index 6cfd8ce..aca50e9 100644
--- a/qemu/net.h
+++ b/qemu/net.h
@@ -6,6 +6,8 @@
 /* VLANs support */
 
 typedef ssize_t (IOReadvHandler)(void *, const struct iovec *, int);
+typedef ssize_t (IOZeroCopyHandler)(void *, struct iovec *, int);
+typedef void (IOReadZCHandler)(void *, IOZeroCopyHandler *, void *);
 
 typedef struct VLANClientState VLANClientState;
 
@@ -14,6 +16,7 @@ typedef void (SetOffload)(VLANClientState *, int, int, int, 
int);
 struct VLANClientState {
     IOReadHandler *fd_read;
     IOReadvHandler *fd_readv;
+    IOReadZCHandler *fd_read_zc;
     /* Packets may still be sent if this returns zero.  It's used to
        rate-limit the slirp code.  */
     IOCanRWHandler *fd_can_read;
diff --git a/qemu/vl.c b/qemu/vl.c
index de92848..bc5b151 100644
--- a/qemu/vl.c
+++ b/qemu/vl.c
@@ -4204,6 +4204,7 @@ typedef struct TAPState {
     char buf[TAP_BUFSIZE];
     int size;
     int offload;
+    int received_eagain;
 } TAPState;
 
 static void tap_receive(void *opaque, const uint8_t *buf, int size)
@@ -4232,6 +4233,48 @@ static ssize_t tap_readv(void *opaque, const struct 
iovec *iov,
     return len;
 }
 
+static VLANClientState *tap_can_zero_copy(TAPState *s)
+{
+    VLANClientState *vc, *vc1 = NULL;
+    int vc_count = 0;
+
+    for (vc = s->vc->vlan->first_client; vc; vc = vc->next) {
+        if (vc == s->vc)
+            continue;
+
+        if (!vc->fd_read_zc || vc_count)
+            return NULL;
+
+        vc_count++;
+        vc1 = vc;
+    }
+
+    return vc1;
+}
+
+static ssize_t tap_sendv(void *opaque, struct iovec *iov, int iovcnt)
+{
+    TAPState *s = opaque;
+    ssize_t ret;
+
+    kvm_sleep_begin();
+    ret = readv(s->fd, iov, iovcnt);
+    kvm_sleep_end();
+    if (ret == -1 && errno == EAGAIN)
+        s->received_eagain = 1;
+
+    return ret;
+}
+
+static void tap_send_zero_copy(TAPState *s, VLANClientState *vc)
+{
+    s->received_eagain = 0;
+    while (s->received_eagain == 0 &&
+           (!vc->fd_can_read || vc->fd_can_read(vc->opaque))) {
+        vc->fd_read_zc(vc->opaque, tap_sendv, s);
+    }
+}
+
 static int tap_can_send(void *opaque)
 {
     TAPState *s = opaque;
@@ -4261,6 +4304,13 @@ static int tap_can_send(void *opaque)
 static void tap_send(void *opaque)
 {
     TAPState *s = opaque;
+    VLANClientState *zc;
+
+    zc = tap_can_zero_copy(s);
+    if (zc) {
+        tap_send_zero_copy(s, zc);
+        return;
+    }
 
     /* First try to send any buffered packet */
     if (s->size > 0) {
-- 
1.5.4.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to