From: Liu Ping Fan <pingf...@linux.vnet.ibm.com> This is a emulation to virtio-blk dataplane, which push the data handling out of biglock. And it is a try to implement this process in userspace, while vhost-net in kernel.
Signed-off-by: Liu Ping Fan <pingf...@linux.vnet.ibm.com> --- hw/dataplane/virtio-net.c | 422 +++++++++++++++++++++++++++++++++++++++++++++ hw/dataplane/virtio-net.h | 26 +++ hw/virtio-net.c | 56 +----- hw/virtio-net.h | 61 +++++++ 4 files changed, 517 insertions(+), 48 deletions(-) create mode 100644 hw/dataplane/virtio-net.c create mode 100644 hw/dataplane/virtio-net.h diff --git a/hw/dataplane/virtio-net.c b/hw/dataplane/virtio-net.c new file mode 100644 index 0000000..9a1795d --- /dev/null +++ b/hw/dataplane/virtio-net.c @@ -0,0 +1,422 @@ +/* Copyright IBM, Corp. 2013 + * + * Based on vhost-net and virtio-blk dataplane code + * + * This work is licensed under the terms of the GNU GPL, version 2. + */ +#include "hw/virtio.h" +#include "qemu/iov.h" +#include "vring.h" +#include <linux/virtio_ring.h> +#include "net/net.h" +#include "net/checksum.h" +#include "net/tap.h" +#include "virtio-net.h" +#include "qemu/error-report.h" + +typedef struct VirtIONetDataPlane { + int async_tx_head; + Vring *rx_vring; + Vring *tx_vring; + EventHandler *rx_handler; + EventHandler *tx_handler; + bool stop; +} VirtIONetDataPlane; + +WorkThread virt_net_thread; + +#define VRING_MAX 128 + +static int32_t virtnet_tx(VirtIONet *n, VirtQueue *vq); + +static void virtnet_tx_complete(struct NetClientState *nc, ssize_t sz) +{ + int ret; + VirtIONet *n = DO_UPCAST(NICState, nc, nc)->opaque; + + vring_push(n->dp->tx_vring, n->dp->async_tx_head, 0); + ret = virtnet_tx(n, n->tx_vq); + if (ret != -EBUSY) { + vring_enable_notification(&n->vdev, n->dp->tx_vring); + } +} + +static int virtnet_tx(VirtIONet *n, VirtQueue *vq) +{ + struct iovec out_iov[VRING_MAX], sg[VRING_MAX]; + struct iovec *snd, *end = &out_iov[VRING_MAX]; + int head; + unsigned int out_num, in_num, sg_num; + int ret; + int num_packets = 0; + + if (!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK)) { + return num_packets; + } + + assert(n->vdev.vm_running); + + if (n->async_tx.elem.out_num) { + return num_packets; + } + + while (true) { + head = vring_pop(&n->vdev, n->dp->tx_vring, out_iov, end, &out_num, + &in_num); + if (head < 0) { + break; + } + snd = out_iov; + assert(n->host_hdr_len <= n->guest_hdr_len); + if (n->host_hdr_len != n->guest_hdr_len) { + sg_num = iov_copy(sg, ARRAY_SIZE(sg), + out_iov, out_num, + 0, n->host_hdr_len); + sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num, + out_iov, out_num, + n->guest_hdr_len, -1); + out_num = sg_num; + snd = sg; + } + + ret = qemu_sendv_packet_async(&n->nic->nc, snd, out_num, + virtnet_tx_complete); + if (ret == 0) { + n->dp->async_tx_head = head; + return -EBUSY; + } + vring_push(n->dp->tx_vring, head, 0); + if (num_packets++ > n->tx_burst) { + break; + } + } + + return num_packets; +} + +static void virtnet_handle_tx(VirtIODevice *vdev, VirtQueue *vq) +{ + int32 ret; + VirtIONet *n = (VirtIONet *)vdev; + + /* This happens when device was stopped but VCPU wasn't. */ + if (!n->vdev.vm_running) { + return; + } + vring_disable_notification(vdev, n->dp->tx_vring); + ret = virtnet_tx(n, vq); + if (ret != -EBUSY) { + vring_enable_notification(vdev, n->dp->tx_vring); + } +} + + +static int virtio_net_can_receive(NetClientState *nc) +{ + VirtIONet *n = DO_UPCAST(NICState, nc, nc)->opaque; + if (!n->vdev.vm_running) { + return 0; + } + if (!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK)) { + return 0; + } + + return 1; +} + +/* peek but not use */ +static int rx_mergeable_buf_sz(VirtIONet *n) +{ + uint16_t start, idx, head; + int total = 0; + Vring *vring = n->dp->rx_vring; + struct vring_desc *dsc; + struct vring_desc *base; + + for (start = vring->last_avail_idx; start != vring->vr.avail->idx; + start++) { + head = start%vring->vr.num; + idx = vring->vr.avail->ring[head]; + if (vring->vr.desc[idx].flags & VRING_DESC_F_INDIRECT) { + base = hostmem_lookup(&vring->hostmem, vring->vr.desc[idx].addr, + vring->vr.desc[idx].len, 0); + } else { + base = vring->vr.desc; + } + dsc = base; + do { + total += dsc->len; + if (!(dsc->flags & VRING_DESC_F_NEXT)) { + break; + } + dsc = &base[dsc->next]; + } while (true); + } + return total; +} + +static bool virtnet_has_buffers(VirtIONet *n, int bufsize) +{ + if (!vring_more_avail(n->dp->rx_vring)) { + return false; + } + if (n->mergeable_rx_bufs) { + if (rx_mergeable_buf_sz(n) < bufsize) { + return false; + } + } + return true; +} + +static ssize_t virtnet_rx(NetClientState *nc, const uint8_t *buf, size_t size) +{ + VirtIONet *n = DO_UPCAST(NICState, nc, nc)->opaque; + struct iovec in_vec[VIRTQUEUE_MAX_SIZE], head_iov[2], *end; + unsigned int in_num, out_num, vnet_hdr_sz; + int head; + size_t len, total, offset = 0; + uint16_t numbuff = 0; + + total = offset = 0; + + end = &in_vec[VIRTQUEUE_MAX_SIZE]; + if (!virtio_net_receive_filter(n, buf, size)) { + return size; + } + + /* enough buff ? */ + if (!virtnet_has_buffers(n, size)) { + vring_enable_notification(&n->vdev, n->dp->rx_vring); + return 0; + } + + while (size > offset) { + head = vring_pop(&n->vdev, n->dp->rx_vring, in_vec, end, &out_num, + &in_num); + if (head < 0) { + return 0; + } + len = 0; + if (numbuff == 0) { + virtio_net_receive_header(n, in_vec, in_num, buf, size); + + if (n->mergeable_rx_bufs) { + vnet_hdr_sz = sizeof(struct virtio_net_hdr_mrg_rxbuf); + } else { + vnet_hdr_sz = sizeof(struct virtio_net_hdr); + } + iov_copy(head_iov, 2, in_vec, in_num, 0, vnet_hdr_sz); + offset += n->host_hdr_len; + total += vnet_hdr_sz; + len += vnet_hdr_sz; + } + len += iov_from_buf(in_vec, in_num, vnet_hdr_sz, buf+offset, + size-offset); + offset += len; + total += len; + numbuff++; + /* Guest wont see used->idx until we are ready */ + vring_fill(n->dp->rx_vring, head, len); + } + + if (n->mergeable_rx_bufs) { + iov_from_buf(head_iov, 2, + offsetof(struct virtio_net_hdr_mrg_rxbuf, num_buffers), &numbuff, + sizeof(numbuff)); + } + vring_flush(n->dp->rx_vring); + + if (vring_should_notify(&n->vdev, n->dp->rx_vring)) { + virtio_irq(n->rx_vq); + } + + return size; +} + +static void tx_cb(EventHandler *handler, uint32_t events) +{ + VirtIONet *n = handler->opaque; + + event_notifier_test_and_clear(handler->notifier); + virtnet_handle_tx(&n->vdev, n->tx_vq); +} + +/* rvq has buffer again, push tap to fill in */ +static void rx_cb(EventHandler *handler, uint32_t events) +{ + VirtIONet *n = handler->opaque; + + event_notifier_test_and_clear(handler->notifier); + qemu_flush_queued_packets(&n->nic->nc); +} + +static NetClientInfo net_dp_info = { + .type = NET_CLIENT_OPTIONS_KIND_NIC, + .size = sizeof(NICState), + .can_receive = virtio_net_can_receive, + .receive = virtnet_rx, + .cleanup = virtio_net_cleanup, + .link_status_changed = virtio_net_set_link_status, +}; + +void virtnet_dataplane_create(VirtIONet *n) +{ + EventHandler *tx_handler, *rx_handler; + + n->dp = g_malloc(sizeof(VirtIONetDataPlane)); + n->dp->stop = false; + n->dp->rx_vring = g_malloc(sizeof(Vring)); + n->dp->tx_vring = g_malloc(sizeof(Vring)); + rx_handler = n->dp->rx_handler = g_malloc(sizeof(EventHandler)); + tx_handler = n->dp->tx_handler = g_malloc(sizeof(EventHandler)); + tx_handler->opaque = n; + rx_handler->opaque = n; + + /* safely redirect receive handler */ + n->nic->nc.info = &net_dp_info; +} + +static int virtnet_dataplane_disable_notifiers(VirtIONet *n) +{ + int i, r; + VirtIODevice *vdev = &n->vdev; + + for (i = 0; i < 2; ++i) { + r = vdev->binding->set_host_notifier(vdev->binding_opaque, i, false); + if (r < 0) { + fprintf(stderr, "virtnet dataplane %d notifier unbinding failed: + %d\n", i, -r); + } + } + return r; +} + +static int virtnet_dataplane_enable_notifiers(VirtIONet *n) +{ + int i, r; + VirtIODevice *vdev = &n->vdev; + + if (!vdev->binding->set_host_notifier) { + fprintf(stderr, "binding does not support host notifiers\n"); + r = -ENOSYS; + goto fail; + } + for (i = 0; i < 2; ++i) { + r = vdev->binding->set_host_notifier(vdev->binding_opaque, i, true); + if (r < 0) { + fprintf(stderr, "virtnet dataplane %d notifier binding failed: + %d\n", i, -r); + goto fail_vq; + } + } + + return 0; +fail_vq: + while (--i >= 0) { + r = vdev->binding->set_host_notifier(vdev->binding_opaque, i, false); + if (r < 0) { + fprintf(stderr, "virtnet dataplane %d notifier cleanup error: + %d\n", i, -r); + fflush(stderr); + } + assert(r >= 0); + } +fail: + return r; +} + + +static void thread_cb(EventHandler *handler, uint32_t events) +{ + EventNotifier *e = handler->notifier; + event_notifier_test_and_clear(e); +} + +static void *working_thread(void *data) +{ + WorkThread *t = (WorkThread *)data; + + qemu_mutex_lock(&t->lock); + qemu_cond_signal(&t->cond_start); + qemu_mutex_unlock(&t->lock); + while (t->state == THREAD_START) { + event_poll(&t->polltbl); + } + return NULL; +} + +static void init_work_thread(void) +{ + EventHandler *thread_handler = g_malloc(sizeof(EventHandler)); + WorkThread *t = &virt_net_thread; + + qemu_mutex_init(&t->lock); + qemu_cond_init(&t->cond_start); + event_poll_init(&t->polltbl, 4); + event_notifier_init(&t->e, 0); + event_poll_add(&t->polltbl, thread_handler, &t->e, thread_cb); + qemu_mutex_lock(&t->lock); + t->state = THREAD_START; + qemu_thread_create(&t->thread, working_thread, t, QEMU_THREAD_JOINABLE); + qemu_cond_wait(&t->cond_start, &t->lock); + qemu_mutex_unlock(&t->lock); +} + +void virtnet_dataplane_start(VirtIONet *n) +{ + bool rslt; + EventNotifier *tx_e = virtio_queue_get_host_notifier(n->tx_vq); + EventNotifier *rx_e = virtio_queue_get_host_notifier(n->rx_vq); + WorkThread *t = &virt_net_thread; + + virtnet_dataplane_enable_notifiers(n); + rslt = vring_setup(n->dp->rx_vring, &n->vdev, 0); + if (!rslt) { + error_report("fail to setup rx vring\n"); + exit(1); + } + vring_restore(n->dp->rx_vring, + virtio_queue_get_last_avail_idx(&n->vdev, 0)); + rslt = vring_setup(n->dp->tx_vring, &n->vdev, 1); + if (!rslt) { + error_report("fail to setup tx vring\n"); + exit(1); + } + vring_restore(n->dp->tx_vring, + virtio_queue_get_last_avail_idx(&n->vdev, 1)); + init_work_thread(); + + event_poll_add(&t->polltbl, n->dp->rx_handler, rx_e, rx_cb); + event_poll_add(&t->polltbl, n->dp->tx_handler, tx_e, tx_cb); +} + +void virtnet_dataplane_stop(VirtIONet *n) +{ + EventNotifier *rx_e = virtio_queue_get_host_notifier(n->rx_vq); + EventNotifier *tx_e = virtio_queue_get_host_notifier(n->tx_vq); + WorkThread *t = &virt_net_thread; + + event_poll_del_fd(&t->polltbl, event_notifier_get_fd(rx_e)); + event_poll_del_fd(&t->polltbl, event_notifier_get_fd(tx_e)); + + t->state = THREAD_EXIT; + event_notifier_set(&t->e); + qemu_thread_join(&t->thread); + virtio_queue_set_last_avail_idx(&n->vdev, 0, + n->dp->rx_vring->last_avail_idx); + virtio_queue_set_last_avail_idx(&n->vdev, 1, + n->dp->tx_vring->last_avail_idx); + vring_teardown(n->dp->rx_vring); + vring_teardown(n->dp->tx_vring); + virtnet_dataplane_disable_notifiers(n); +} + +void virtnet_dataplane_destroy(VirtIONet *n) +{ + virtnet_dataplane_stop(n); + g_free(n->dp->rx_vring); + g_free(n->dp->tx_vring); + g_free(n->dp->rx_handler); + g_free(n->dp->tx_handler); + g_free(n->dp); +} diff --git a/hw/dataplane/virtio-net.h b/hw/dataplane/virtio-net.h new file mode 100644 index 0000000..e50b2de --- /dev/null +++ b/hw/dataplane/virtio-net.h @@ -0,0 +1,26 @@ +/* Copyright IBM, Corp. 2013 + * + * This work is licensed under the terms of the GNU GPL, version 2. + */ +#ifndef VIRT_NET_DATAPLANE_H +#define VIRT_NET_DATAPLANE_H + +#include "event-poll.h" +#include "qemu/thread.h" +#include "hw/virtio-net.h" + +typedef enum { THREAD_START, THREAD_EXIT +} WorkState; + +typedef struct WorkThread { + EventPoll polltbl; + QemuThread thread; + EventNotifier e; + + WorkState state; + QemuMutex lock; + QemuCond cond_start; +} WorkThread; + +extern WorkThread virt_net_thread; +#endif diff --git a/hw/virtio-net.c b/hw/virtio-net.c index 5d03b31..6bf4a40 100644 --- a/hw/virtio-net.c +++ b/hw/virtio-net.c @@ -26,47 +26,6 @@ #define MAC_TABLE_ENTRIES 64 #define MAX_VLAN (1 << 12) /* Per 802.1Q definition */ -typedef struct VirtIONet -{ - VirtIODevice vdev; - uint8_t mac[ETH_ALEN]; - uint16_t status; - VirtQueue *rx_vq; - VirtQueue *tx_vq; - VirtQueue *ctrl_vq; - NICState *nic; - QEMUTimer *tx_timer; - QEMUBH *tx_bh; - uint32_t tx_timeout; - int32_t tx_burst; - int tx_waiting; - uint32_t has_vnet_hdr; - size_t host_hdr_len; - size_t guest_hdr_len; - uint8_t has_ufo; - struct { - VirtQueueElement elem; - ssize_t len; - } async_tx; - int mergeable_rx_bufs; - uint8_t promisc; - uint8_t allmulti; - uint8_t alluni; - uint8_t nomulti; - uint8_t nouni; - uint8_t nobcast; - uint8_t vhost_started; - struct { - int in_use; - int first_multi; - uint8_t multi_overflow; - uint8_t uni_overflow; - uint8_t *macs; - } mac_table; - uint32_t *vlans; - DeviceState *qdev; -} VirtIONet; - /* TODO * - we could suppress RX interrupt if we were so inclined. */ @@ -165,7 +124,7 @@ static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status) } } -static void virtio_net_set_link_status(NetClientState *nc) +void virtio_net_set_link_status(NetClientState *nc) { VirtIONet *n = DO_UPCAST(NICState, nc, nc)->opaque; uint16_t old_status = n->status; @@ -528,8 +487,8 @@ static void work_around_broken_dhclient(struct virtio_net_hdr *hdr, } } -static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt, - const void *buf, size_t size) +void virtio_net_receive_header(VirtIONet *n, const struct iovec *iov, + int iov_cnt, const void *buf, size_t size) { if (n->has_vnet_hdr) { /* FIXME this cast is evil */ @@ -546,7 +505,7 @@ static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt, } } -static int receive_filter(VirtIONet *n, const uint8_t *buf, int size) +int virtio_net_receive_filter(VirtIONet *n, const uint8_t *buf, int size) { static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; static const uint8_t vlan[] = {0x81, 0x00}; @@ -612,8 +571,9 @@ static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, size_t if (!virtio_net_has_buffers(n, size + n->guest_hdr_len - n->host_hdr_len)) return 0; - if (!receive_filter(n, buf, size)) + if (!virtio_net_receive_filter(n, buf, size)) { return size; + } offset = i = 0; @@ -649,7 +609,7 @@ static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, size_t sizeof(mhdr.num_buffers)); } - receive_header(n, sg, elem.in_num, buf, size); + virtio_net_receive_header(n, sg, elem.in_num, buf, size); offset = n->host_hdr_len; total += n->guest_hdr_len; guest_offset = n->guest_hdr_len; @@ -994,7 +954,7 @@ static int virtio_net_load(QEMUFile *f, void *opaque, int version_id) return 0; } -static void virtio_net_cleanup(NetClientState *nc) +void virtio_net_cleanup(NetClientState *nc) { VirtIONet *n = DO_UPCAST(NICState, nc, nc)->opaque; diff --git a/hw/virtio-net.h b/hw/virtio-net.h index d46fb98..ed91a02 100644 --- a/hw/virtio-net.h +++ b/hw/virtio-net.h @@ -159,4 +159,65 @@ struct virtio_net_ctrl_mac { DEFINE_PROP_BIT("ctrl_rx", _state, _field, VIRTIO_NET_F_CTRL_RX, true), \ DEFINE_PROP_BIT("ctrl_vlan", _state, _field, VIRTIO_NET_F_CTRL_VLAN, true), \ DEFINE_PROP_BIT("ctrl_rx_extra", _state, _field, VIRTIO_NET_F_CTRL_RX_EXTRA, true) + + +#ifdef CONFIG_VIRTIO_NET_DATA_PLANE +struct VirtIONetDataPlane; +#endif + +typedef struct VirtIONet { + VirtIODevice vdev; + uint8_t mac[ETH_ALEN]; + uint16_t status; + VirtQueue *rx_vq; + VirtQueue *tx_vq; + VirtQueue *ctrl_vq; + NICState *nic; + QEMUTimer *tx_timer; + QEMUBH *tx_bh; + uint32_t tx_timeout; + int32_t tx_burst; + int tx_waiting; + uint32_t has_vnet_hdr; + size_t host_hdr_len; + size_t guest_hdr_len; + uint8_t has_ufo; + struct { + VirtQueueElement elem; + ssize_t len; + } async_tx; + int mergeable_rx_bufs; + uint8_t promisc; + uint8_t allmulti; + uint8_t alluni; + uint8_t nomulti; + uint8_t nouni; + uint8_t nobcast; + uint8_t vhost_started; + struct { + int in_use; + int first_multi; + uint8_t multi_overflow; + uint8_t uni_overflow; + uint8_t *macs; + } mac_table; + uint32_t *vlans; + +#ifdef CONFIG_VIRTIO_NET_DATA_PLANE + struct VirtIONetDataPlane *dp; + bool dp_start; +#endif + DeviceState *qdev; +} VirtIONet; + +int virtio_net_receive_filter(VirtIONet *n, const uint8_t *buf, int size); +void virtio_net_receive_header(VirtIONet *n, const struct iovec *iov, + int iov_cnt, const void *buf, size_t size); +void virtio_net_set_link_status(NetClientState *nc); +void virtio_net_cleanup(NetClientState *nc); + +#ifdef CONFIG_VIRTIO_NET_DATA_PLANE +void virtnet_dataplane_create(VirtIONet *n); +#endif + #endif -- 1.7.4.4