Signed-off-by: Gregory Haskins <[EMAIL PROTECTED]> --- drivers/kvm/Kconfig | 5 drivers/kvm/Makefile | 2 drivers/kvm/ioqnet_host.c | 556 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 563 insertions(+), 0 deletions(-)
diff --git a/drivers/kvm/Kconfig b/drivers/kvm/Kconfig index cb674bb..1c884c5 100644 --- a/drivers/kvm/Kconfig +++ b/drivers/kvm/Kconfig @@ -62,6 +62,11 @@ config KVM_PVBUS_HOST of the hypervisor itself. You only need this option if you plan to run PVBUS based PV guests in KVM. +config KVM_IOQNET + boolean "IOQNET host support" + depends on KVM + select KVM_PVBUS_HOST + config KVM_NET_HOST tristate "Para virtual network host device" depends on KVM diff --git a/drivers/kvm/Makefile b/drivers/kvm/Makefile index 8926fa9..66e5272 100644 --- a/drivers/kvm/Makefile +++ b/drivers/kvm/Makefile @@ -22,3 +22,5 @@ kvm-net-host-objs = kvm_net_host.o obj-$(CONFIG_KVM_NET_HOST) += kvm_net_host.o kvm-pvbus-objs := ioq_guest.o pvbus_guest.o obj-$(CONFIG_KVM_PVBUS_GUEST) += kvm-pvbus.o +kvm-ioqnet-objs := ioqnet_host.o +obj-$(CONFIG_KVM_IOQNET) += kvm-ioqnet.o \ No newline at end of file diff --git a/drivers/kvm/ioqnet_host.c b/drivers/kvm/ioqnet_host.c new file mode 100644 index 0000000..aff7e5c --- /dev/null +++ b/drivers/kvm/ioqnet_host.c @@ -0,0 +1,556 @@ +/* + * Copyright 2007 Novell. All Rights Reserved. + * + * ioqnet - A paravirtualized network device based on the IOQ interface. + * + * This module represents the backend driver for an IOQNET driver on the KVM + * platform. + * + * Author: + * Gregory Haskins <[EMAIL PROTECTED]> + * + * Derived in part from the SNULL example from the book "Linux Device + * Drivers" by Alessandro Rubini and Jonathan Corbet, published + * by O'Reilly & Associates. + * + * This file is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/moduleparam.h> + +#include <linux/sched.h> +#include <linux/kernel.h> /* printk() */ +#include <linux/slab.h> /* kmalloc() */ +#include <linux/errno.h> /* error codes */ +#include <linux/types.h> /* size_t */ +#include <linux/interrupt.h> /* mark_bh */ + +#include <linux/in.h> +#include <linux/netdevice.h> /* struct device, and other headers */ +#include <linux/etherdevice.h> /* eth_type_trans */ +#include <linux/ip.h> /* struct iphdr */ +#include <linux/tcp.h> /* struct tcphdr */ +#include <linux/skbuff.h> +#include <linux/ioq.h> +#include <linux/pvbus.h> + +#include <linux/in6.h> +#include <asm/checksum.h> +#include <linux/ioq.h> +#include <linux/ioqnet.h> +#include <linux/highmem.h> + +#include "pvbus_host.h" +#include "kvm.h" + +MODULE_AUTHOR("Gregory Haskins"); +MODULE_LICENSE("GPL"); + +#define IOQNET_NAME "ioqnet" + +/* + * FIXME: Any "BUG_ON" code that can be triggered by a malicious guest must + * be turned into an inject_gp() + */ + +struct ioqnet_queue { + struct ioq *queue; + struct ioq_notifier notifier; +}; + +struct ioqnet_priv { + spinlock_t lock; + struct kvm *kvm; + struct kvm_pv_device pvdev; + struct net_device *netdev; + struct net_device_stats stats; + struct ioqnet_queue rxq; + struct ioqnet_queue txq; + struct tasklet_struct txtask; + int connected; + int opened; +}; + +#undef PDEBUG /* undef it, just in case */ +#ifdef IOQNET_DEBUG +# define PDEBUG(fmt, args...) printk( KERN_DEBUG "ioqnet: " fmt, ## args) +#else +# define PDEBUG(fmt, args...) /* not debugging: nothing */ +#endif + +/* + * Enable and disable receive interrupts. + */ +static void ioqnet_rx_ints(struct net_device *dev, int enable) +{ + struct ioqnet_priv *priv = netdev_priv(dev); + struct ioq *ioq = priv->rxq.queue; + + if (priv->connected) { + if (enable) + ioq_start(ioq, 0); + else + ioq_stop(ioq, 0); + } +} + +/* + * Open and close + */ + +int ioqnet_open(struct net_device *dev) +{ + struct ioqnet_priv *priv = netdev_priv(dev); + + priv->opened = 1; + netif_start_queue(dev); + + return 0; +} + +int ioqnet_release(struct net_device *dev) +{ + struct ioqnet_priv *priv = netdev_priv(dev); + + priv->opened = 0; + netif_stop_queue(dev); + + return 0; +} + +/* + * Configuration changes (passed on by ifconfig) + */ +int ioqnet_config(struct net_device *dev, struct ifmap *map) +{ + if (dev->flags & IFF_UP) /* can't act on a running interface */ + return -EBUSY; + + /* Don't allow changing the I/O address */ + if (map->base_addr != dev->base_addr) { + printk(KERN_WARNING "ioqnet: Can't change I/O address\n"); + return -EOPNOTSUPP; + } + + /* ignore other fields */ + return 0; +} + +/* + * The poll implementation. + */ +static int ioqnet_poll(struct net_device *dev, int *budget) +{ + int npackets = 0, quota = min(dev->quota, *budget); + struct ioqnet_priv *priv = netdev_priv(dev); + struct ioq_iterator iter; + unsigned long flags; + int ret; + + if (!priv->connected) + return 0; + + spin_lock_irqsave(&priv->lock, flags); + + /* We want to iterate on the tail of the in-use index */ + ret = ioq_iter(priv->rxq.queue, &iter, ioq_idxtype_inuse, 0); + BUG_ON(ret < 0); + + ret = ioq_iter_seek(&iter, ioq_seek_tail, 0, 0); + BUG_ON(ret < 0); + + /* + * We stop if we have met the quota or there are no more packets. + * The EOM is indicated by finding a packet that is still owned by + * the north side + */ + while ((npackets < quota) && iter.desc->sown) { + struct ioq_ring_desc *desc = iter.desc; + struct ioqnet_tx_ptr *ptr = gpa_to_hva(priv->kvm, desc->ptr); + struct sk_buff *skb; + int i; + size_t len = 0; + + /* First figure out how much of an skb we need */ + for (i = 0; i < desc->alen; ++i) { + len += ptr[i].len; + } + + skb = dev_alloc_skb(len + 2); + if (!skb) { + /* FIXME: This leaks... */ + printk(KERN_ERR "FATAL: Out of memory on IOQNET\n"); + netif_stop_queue(dev); + return -ENOMEM; + } + + skb_reserve(skb, 2); + + /* Then copy the data out to our fresh SKB */ + for (i = 0; i < desc->alen; ++i) { + struct ioqnet_tx_ptr *p = &ptr[i]; + void *d = gpa_to_hva(priv->kvm, + p->data); + + memcpy(skb_push(skb, p->len), d, p->len); + kunmap(d); + } + + /* Maintain stats */ + npackets++; + priv->stats.rx_packets++; + priv->stats.rx_bytes += len; + + /* Pass the buffer up to the stack */ + skb->dev = dev; + skb->protocol = eth_type_trans(skb, dev); + netif_receive_skb(skb); + + /* Advance the in-use tail */ + desc->sown = 0; + ret = ioq_iter_pop(&iter, 0); + BUG_ON(ret < 0); + + /* Toggle the lock */ + spin_unlock_irqrestore(&priv->lock, flags); + spin_lock_irqsave(&priv->lock, flags); + } + + /* + * If we processed all packets, we're done; tell the kernel and + * reenable ints + */ + *budget -= npackets; + dev->quota -= npackets; + if (ioq_empty(priv->rxq.queue, ioq_idxtype_inuse)) { + /* FIXME: there is a race with enabling interrupts */ + netif_rx_complete(dev); + ioqnet_rx_ints(dev, 1); + ret = 0; + } else + /* We couldn't process everything. */ + ret = 1; + + spin_unlock_irqrestore(&priv->lock, flags); + + /* And let the north side know that we changed the rx-queue */ + ioq_signal(priv->rxq.queue, 0); + + return ret; +} + +/* + * Transmit a packet (called by the kernel) + */ +int ioqnet_tx(struct sk_buff *skb, struct net_device *dev) +{ + struct ioqnet_priv *priv = netdev_priv(dev); + struct ioq_iterator iter; + int ret; + unsigned long flags; + char *data; + + if (skb->len < ETH_ZLEN) + return -EINVAL; + + if (!priv->connected) + return 0; + + spin_lock_irqsave(&priv->lock, flags); + + if (ioq_full(priv->txq.queue, ioq_idxtype_valid)) { + /* + * We must flow-control the kernel by disabling the queue + */ + spin_unlock_irqrestore(&priv->lock, flags); + netif_stop_queue(dev); + return 0; + } + + /* + * We want to iterate on the head of the "inuse" index + */ + ret = ioq_iter(priv->txq.queue, &iter, ioq_idxtype_inuse, 0); + BUG_ON(ret < 0); + + ret = ioq_iter_seek(&iter, ioq_seek_head, 0, 0); + BUG_ON(ret < 0); + + if (skb->len > iter.desc->len) + return -EINVAL; + + dev->trans_start = jiffies; /* save the timestamp */ + + /* Copy the data to the north-side buffer */ + data = (char*)gpa_to_hva(priv->kvm, iter.desc->ptr); + memcpy(data, skb->data, skb->len); + kunmap(data); + + /* Give ownership back to the north */ + iter.desc->sown = 0; + + /* Advance the index */ + ret = ioq_iter_push(&iter, 0); + BUG_ON(ret < 0); + + /* + * This will signal the north side to consume the packet + */ + ioq_signal(priv->txq.queue, 0); + + spin_unlock_irqrestore(&priv->lock, flags); + + return 0; +} + +void ioqnet_tx_intr(unsigned long data) +{ + struct ioqnet_priv *priv = (struct ioqnet_priv*)data; + unsigned long flags; + + spin_lock_irqsave(&priv->lock, flags); + + /* + * If we were previously stopped due to flow control, restart the + * processing + */ + if (netif_queue_stopped(priv->netdev) + && !ioq_full(priv->txq.queue, ioq_idxtype_inuse)) { + + netif_wake_queue(priv->netdev); + } + + spin_unlock_irqrestore(&priv->lock, flags); +} + +/* + * Ioctl commands + */ +int ioqnet_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) +{ + PDEBUG("ioctl\n"); + return 0; +} + +/* + * Return statistics to the caller + */ +struct net_device_stats *ioqnet_stats(struct net_device *dev) +{ + struct ioqnet_priv *priv = netdev_priv(dev); + return &priv->stats; +} + +static void ioq_rx_notify(struct ioq_notifier *notifier) +{ + struct ioqnet_priv *priv; + struct net_device *dev; + + priv = container_of(notifier, struct ioqnet_priv, rxq.notifier); + dev = priv->netdev; + + ioqnet_rx_ints(dev, 0); /* Disable further interrupts */ + netif_rx_schedule(dev); +} + +static void ioq_tx_notify(struct ioq_notifier *notifier) +{ + struct ioqnet_priv *priv; + + priv = container_of(notifier, struct ioqnet_priv, txq.notifier); + + tasklet_schedule(&priv->txtask); +} + +/* + * The init function (sometimes called probe). + * It is invoked by register_netdev() + */ +void ioqnet_init(struct net_device *dev) +{ + ether_setup(dev); /* assign some of the fields */ + + dev->open = ioqnet_open; + dev->stop = ioqnet_release; + dev->set_config = ioqnet_config; + dev->hard_start_xmit = ioqnet_tx; + dev->do_ioctl = ioqnet_ioctl; + dev->get_stats = ioqnet_stats; + dev->poll = ioqnet_poll; + dev->weight = 2; + dev->hard_header_cache = NULL; /* Disable caching */ + + /* We go "link down" until the guest connects to us */ + netif_carrier_off(dev); + +} + + +/* -------------------------------------------------------------- */ + +static inline struct ioqnet_priv* to_priv(struct kvm_pv_device *t) +{ + return container_of(t, struct ioqnet_priv, pvdev); +} + + +static int ioqnet_connect(struct ioqnet_priv *priv, + ioq_id_t id, + struct ioqnet_queue *q, + void (*func)(struct ioq_notifier*)) +{ + int ret; + struct ioq_mgr *ioqmgr = priv->kvm->ioqmgr; + + ret = ioqmgr->connect(ioqmgr, id, &q->queue, 0); + if (ret < 0) + return ret; + + q->notifier.signal = func; + + return 0; +} + +static int ioqnet_pvbus_connect(struct ioqnet_priv *priv, + void *data, size_t len) +{ + struct ioqnet_connect *cnct = (struct ioqnet_connect*)data; + int ret; + + /* We connect the north's rxq to our txq */ + ret = ioqnet_connect(priv, cnct->rxq, &priv->txq, ioq_tx_notify); + if (ret < 0) + return ret; + + /* And vice-versa */ + ret = ioqnet_connect(priv, cnct->txq, &priv->rxq, ioq_rx_notify); + if (ret < 0) + return ret; + + /* + * So now that the guest has connected we can send a "link up" event + * to the kernel. + */ + netif_carrier_on(priv->netdev); + + priv->connected = 1; + + return 0; +} + +static int ioqnet_pvbus_query_mac(struct ioqnet_priv *priv, + void *data, size_t len) +{ + if (len != ETH_ALEN) + return -EINVAL; + + memcpy(data, priv->netdev->dev_addr, ETH_ALEN); + + return 0; +} + +/* + * This function is invoked whenever a guest calls pvbus_ops->call() against + * our instance ID + */ +static int ioqnet_pvbus_device_call(struct kvm_pv_device *t, u32 func, + void *data, size_t len) +{ + struct ioqnet_priv *priv = to_priv(t); + int ret; + + switch (func) { + case IOQNET_CONNECT: + ret = ioqnet_pvbus_connect(priv, data, len); + break; + case IOQNET_QUERY_MAC: + ret = ioqnet_pvbus_query_mac(priv, data, len); + break; + } + + return ret; +} + +static void ioqnet_pvbus_device_destroy(struct kvm_pv_device *t) +{ + return 0; +} + +/* + * This function is invoked whenever someone instantiates an IOQNET object + */ +static int ioqnet_pvbus_devtype_create(struct kvm *kvm, + struct kvm_pv_devtype *t, u64 id, + const char *cfg, + struct kvm_pv_device **pvdev) +{ + struct net_device *dev; + struct ioqnet_priv *priv; + + dev = alloc_netdev(sizeof(struct ioqnet_priv), "ioq%d", + ioqnet_init); + if (!dev) + return -ENOMEM; + + priv = netdev_priv(dev); + + memset(priv, 0, sizeof(*priv)); + + priv->pvdev.call = ioqnet_pvbus_device_call; + priv->pvdev.destroy = ioqnet_pvbus_device_destroy; + priv->pvdev.id = id; + priv->pvdev.ver = IOQNET_VERSION; + + spin_lock_init(&priv->lock); + priv->kvm = kvm; + priv->netdev = dev; + tasklet_init(&priv->txtask, ioqnet_tx_intr, (unsigned long)priv); + + ret = register_netdev(dev); + if (ret < 0) { + printk("ioqnet: error %i registering device \"%s\"\n", + ret, dev->name); + free_netdev(dev); + } + + *pvdev = &priv->pvdev; + + return 0; +} + +static int ioqnet_pvbus_devtype_destroy(struct kvm_pv_devtype *t) +{ + return -ENOSYS; +} + +static struct kvm_pv_devtype ioqnet_devtype = { + .create = ioqnet_pvbus_devtype_create, + .destroy = ioqnet_pvbus_devtype_destroy, + .name = IOQNET_NAME, +}; + +static int __init ioqnet_init_module(void) +{ + return kvm_pvbus_registertype(&ioqnet_devtype); +} + +static void __exit ioqnet_cleanup_module(void) +{ + kvm_pvbus_unregistertype(IOQNET_NAME); +} + +module_init(ioqnet_init_module); +module_exit(ioqnet_cleanup_module); ------------------------------------------------------------------------- This SF.net email is sponsored by: Splunk Inc. Still grepping through log files to find problems? Stop. Now Search log events and configuration files using AJAX and a browser. Download your FREE copy of Splunk now >> http://get.splunk.com/ _______________________________________________ kvm-devel mailing list kvm-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/kvm-devel