I think we should hold off on this sort of patch at first.  I know it 
improves performance, but it's very hack-ish.  I have a similar patch[1] 
that improves performance more but is even more hack-ish.

I think we have to approach this by not special cases virtio-net to know 
about the tap fd, but to figure out the interface that virtio-net would 
need to be efficient, and then refactor the net interface to look like 
that.  Then we can still support user, pcap, and the other network 
transports.

[1] http://hg.codemonkey.ws/qemu-virtio/file/75cefe566cea/aio-net.diff

Regards,

Anthony Liguori

Dor Laor wrote:
> From f244bcad756c4f761627557bb7f315b1d8f22fb2 Mon Sep 17 00:00:00 2001
> From: Dor Laor <[EMAIL PROTECTED]>
> Date: Thu, 20 Dec 2007 13:26:30 +0200
> Subject: [PATCH] [VIRTIO-NET] Rx performance improvement
> The current performance are not good enough, the problem lies
> in qemu tap handling code that caused to pass packets one at
> a time and also to copy them to a temporal buffer.
>
> This patch prevents qemu handlers from reading the tap and instead
> it selects the tap descriptors for virtio devices.
> This eliminates copies and also batch guest notifications (interrupts).
>
> Using this patch the rx performance reaches 800Mbps.
> The patch does not follow qemu's api since the intention is first to have
> a better io in kvm and then to polish it correctly.
>
> Signed-off-by: Dor Laor <[EMAIL PROTECTED]>
> ---
> qemu/hw/pc.h         |    2 +-
> qemu/hw/virtio-net.c |  114 
> +++++++++++++++++++++++++++++++++++--------------
> qemu/sysemu.h        |    3 +
> qemu/vl.c            |   23 ++++++++++-
> 4 files changed, 107 insertions(+), 35 deletions(-)
>
> diff --git a/qemu/hw/pc.h b/qemu/hw/pc.h
> index 95471f3..5d4c747 100644
> --- a/qemu/hw/pc.h
> +++ b/qemu/hw/pc.h
> @@ -145,7 +145,7 @@ void isa_ne2000_init(int base, qemu_irq irq, 
> NICInfo *nd);
> /* virtio-net.c */
>
> void *virtio_net_init(PCIBus *bus, NICInfo *nd, int devfn);
> -
> +void virtio_net_poll(void);
>
> /* virtio-blk.h */
> void *virtio_blk_init(PCIBus *bus, uint16_t vendor, uint16_t device,
> diff --git a/qemu/hw/virtio-net.c b/qemu/hw/virtio-net.c
> index f6f1f28..b955a5e 100644
> --- a/qemu/hw/virtio-net.c
> +++ b/qemu/hw/virtio-net.c
> @@ -60,8 +60,13 @@ typedef struct VirtIONet
>     VirtQueue *tx_vq;
>     VLANClientState *vc;
>     int can_receive;
> +    int tap_fd;
> +    struct VirtIONet *next;
> +    int do_notify;
> } VirtIONet;
>
> +static VirtIONet *VirtIONetHead = NULL;
> +
> static VirtIONet *to_virtio_net(VirtIODevice *vdev)
> {
>     return (VirtIONet *)vdev;
> @@ -96,42 +101,81 @@ static int virtio_net_can_receive(void *opaque)
>     return (n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK) && 
> n->can_receive;
> }
>
> -static void virtio_net_receive(void *opaque, const uint8_t *buf, int 
> size)
> +void virtio_net_poll(void)
> {
> -    VirtIONet *n = opaque;
> +    VirtIONet *vnet;
> +    int len;
> +    fd_set rfds;
> +    struct timeval tv;
> +    int max_fd = -1;
>     VirtQueueElement elem;
>     struct virtio_net_hdr *hdr;
> -    int offset, i;
> -
> -    /* FIXME: the drivers really need to set their status better */
> -    if (n->rx_vq->vring.avail == NULL) {
> -    n->can_receive = 0;
> -    return;
> -    }
> -
> -    if (virtqueue_pop(n->rx_vq, &elem) == 0) {
> -    /* wait until the guest adds some rx bufs */
> -    n->can_receive = 0;
> -    return;
> -    }
> -
> -    hdr = (void *)elem.in_sg[0].iov_base;
> -    hdr->flags = 0;
> -    hdr->gso_type = VIRTIO_NET_HDR_GSO_NONE;
> -
> -    /* copy in packet.  ugh */
> -    offset = 0;
> -    i = 1;
> -    while (offset < size && i < elem.in_num) {
> -    int len = MIN(elem.in_sg[i].iov_len, size - offset);
> -    memcpy(elem.in_sg[i].iov_base, buf + offset, len);
> -    offset += len;
> -    i++;
> -    }
> +    int did_notify;
> +
> +    FD_ZERO(&rfds);
> +    tv.tv_sec = 0;
> +    tv.tv_usec = 0;
> +
> +    while (1) {
> +
> +         // Prepare the set of device to select from
> +        for (vnet = VirtIONetHead; vnet; vnet = vnet->next) {
> +
> +             vnet->do_notify = 0;
> +             //first check if the driver is ok
> +             if (!virtio_net_can_receive(vnet))
> +                 continue;
> +
> +             /* FIXME: the drivers really need to set their status 
> better */
> +             if (vnet->rx_vq->vring.avail == NULL) {
> +                vnet->can_receive = 0;
> +                continue;
> +             }
> +
> +             FD_SET(vnet->tap_fd, &rfds);
> +             if (max_fd < vnet->tap_fd) max_fd = vnet->tap_fd;
> +        }
> +       +        if (select(max_fd + 1, &rfds, NULL, NULL, &tv) <= 0)
> +            break;
> +
> +       // Now check who has data pending in the tap
> +        for (vnet = VirtIONetHead; vnet; vnet = vnet->next) {
> +
> +             if (!FD_ISSET(vnet->tap_fd, &rfds))
> +                 continue;
> +   +             if (virtqueue_pop(vnet->rx_vq, &elem) == 0) {
> +                vnet->can_receive = 0;
> +                 continue;
> +             }
> +
> +             hdr = (void *)elem.in_sg[0].iov_base;
> +             hdr->flags = 0;
> +             hdr->gso_type = VIRTIO_NET_HDR_GSO_NONE;
> +again:
> +             len = readv(vnet->tap_fd, &elem.in_sg[1], elem.in_num - 1);
> +             if (len == -1)
> +                 if (errno == EINTR || errno == EAGAIN)
> +                     goto again;
> +                 else
> +                     fprintf(stderr, "reading network error %d", len);
> +
> +            virtqueue_push(vnet->rx_vq, &elem, sizeof(*hdr) + len);
> +            vnet->do_notify = 1;
> +        }
> +
> +        /* signal other side */
> +        did_notify = 0;
> +        for (vnet = VirtIONetHead; vnet; vnet = vnet->next)
> +            if (vnet->do_notify) {
> +                virtio_notify(&vnet->vdev, vnet->rx_vq);
> +                did_notify++;
> +            }
> +        if (!did_notify)
> +            break;
> +     }
>
> -    /* signal other side */
> -    virtqueue_push(n->rx_vq, &elem, sizeof(*hdr) + offset);
> -    virtio_notify(&n->vdev, n->rx_vq);
> }
>
> /* TX */
> @@ -174,8 +218,12 @@ void *virtio_net_init(PCIBus *bus, NICInfo *nd, 
> int devfn)
>     n->tx_vq = virtio_add_queue(&n->vdev, 128, virtio_net_handle_tx);
>     n->can_receive = 0;
>     memcpy(n->mac, nd->macaddr, 6);
> -    n->vc = qemu_new_vlan_client(nd->vlan, virtio_net_receive,
> +    n->vc = qemu_new_vlan_client(nd->vlan, NULL,
>                  virtio_net_can_receive, n);
> +    n->tap_fd = get_tap_fd(n->vc->vlan->first_client->opaque);
> +    n->next = VirtIONetHead;
> +    //push the device on top of the list
> +    VirtIONetHead = n;
>
>     return &n->vdev;
> }
> diff --git a/qemu/sysemu.h b/qemu/sysemu.h
> index e20159d..4bedd11 100644
> --- a/qemu/sysemu.h
> +++ b/qemu/sysemu.h
> @@ -66,6 +66,9 @@ void qemu_del_wait_object(HANDLE handle, 
> WaitObjectFunc *func, void *opaque);
> /* TAP win32 */
> int tap_win32_init(VLANState *vlan, const char *ifname);
>
> +/* virtio hack for zero copy receive */
> +int get_tap_fd(void *opaque);
> +
> /* SLIRP */
> void do_info_slirp(void);
>
> diff --git a/qemu/vl.c b/qemu/vl.c
> index 26055a4..eef602a 100644
> --- a/qemu/vl.c
> +++ b/qemu/vl.c
> @@ -3885,8 +3885,26 @@ typedef struct TAPState {
>     VLANClientState *vc;
>     int fd;
>     char down_script[1024];
> +    int no_poll;
> } TAPState;
>
> +int get_tap_fd(void *opaque)
> +{
> +    TAPState *s = opaque;
> +
> +    if (s) {
> +       s->no_poll = 1;
> +       return s->fd;
> +    }
> +    return -1;
> +}
> +
> +static int tap_read_poll(void *opaque)
> +{
> +   TAPState *s = opaque;
> +    return (!s->no_poll);
> +}
> +
> static void tap_receive(void *opaque, const uint8_t *buf, int size)
> {
>     TAPState *s = opaque;
> @@ -3930,9 +3948,10 @@ static TAPState *net_tap_fd_init(VLANState 
> *vlan, int fd)
>     if (!s)
>         return NULL;
>     s->fd = fd;
> +    s->no_poll = 0;
>     enable_sigio_timer(fd);
>     s->vc = qemu_new_vlan_client(vlan, tap_receive, NULL, s);
> -    qemu_set_fd_handler(s->fd, tap_send, NULL, s);
> +    qemu_set_fd_handler2(s->fd, tap_read_poll, tap_send, NULL, s);
>     snprintf(s->vc->info_str, sizeof(s->vc->info_str), "tap: fd=%d", fd);
>     return s;
> }
> @@ -7717,6 +7736,8 @@ void main_loop_wait(int timeout)
>         slirp_select_poll(&rfds, &wfds, &xfds);
>     }
> #endif
> +    virtio_net_poll();
> +
>     qemu_aio_poll();
>
>     if (vm_running) {


-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2005.
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/
_______________________________________________
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel

Reply via email to