Signed-off-by: Gregory Haskins <[EMAIL PROTECTED]> --- drivers/net/Kconfig | 10 + drivers/net/Makefile | 2 drivers/net/ioqnet/Makefile | 11 + drivers/net/ioqnet/driver.c | 658 +++++++++++++++++++++++++++++++++++++++++++ include/linux/ioqnet.h | 44 +++ 5 files changed, 725 insertions(+), 0 deletions(-)
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index fb99cd4..7ee7454 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -2947,6 +2947,16 @@ config NETCONSOLE If you want to log kernel messages over the network, enable this. See <file:Documentation/networking/netconsole.txt> for details. +config IOQNET + tristate "IOQNET (IOQ based paravirtualized network driver)" + select IOQ + select PVBUS + +config IOQNET_DEBUG + bool "IOQNET debugging" + depends on IOQNET + default n + endif #NETDEVICES config NETPOLL diff --git a/drivers/net/Makefile b/drivers/net/Makefile index a77affa..4c8a918 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -224,6 +224,8 @@ obj-$(CONFIG_ENP2611_MSF_NET) += ixp2000/ obj-$(CONFIG_NETCONSOLE) += netconsole.o +obj-$(CONFIG_IOQNET) += ioqnet/ + obj-$(CONFIG_FS_ENET) += fs_enet/ obj-$(CONFIG_NETXEN_NIC) += netxen/ diff --git a/drivers/net/ioqnet/Makefile b/drivers/net/ioqnet/Makefile new file mode 100644 index 0000000..d7020ee --- /dev/null +++ b/drivers/net/ioqnet/Makefile @@ -0,0 +1,11 @@ +# +# Makefile for the IOQNET ethernet driver +# + +ioqnet-objs = driver.o +obj-$(CONFIG_IOQNET) += ioqnet.o + + +ifeq ($(CONFIG_IOQNET_DEBUG),y) +EXTRA_CFLAGS += -DIOQNET_DEBUG +endif diff --git a/drivers/net/ioqnet/driver.c b/drivers/net/ioqnet/driver.c new file mode 100644 index 0000000..8352029 --- /dev/null +++ b/drivers/net/ioqnet/driver.c @@ -0,0 +1,658 @@ +/* + * ioqnet - A paravirtualized network device based on the IOQ interface + * + * Copyright (C) 2007 Novell, Gregory Haskins <[EMAIL PROTECTED]> + * + * Derived from the SNULL example from the book "Linux Device + * Drivers" by Alessandro Rubini and Jonathan Corbet, published + * by O'Reilly & Associates. + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/moduleparam.h> + +#include <linux/sched.h> +#include <linux/kernel.h> /* printk() */ +#include <linux/slab.h> /* kmalloc() */ +#include <linux/errno.h> /* error codes */ +#include <linux/types.h> /* size_t */ +#include <linux/interrupt.h> /* mark_bh */ + +#include <linux/in.h> +#include <linux/netdevice.h> /* struct device, and other headers */ +#include <linux/etherdevice.h> /* eth_type_trans */ +#include <linux/ip.h> /* struct iphdr */ +#include <linux/tcp.h> /* struct tcphdr */ +#include <linux/skbuff.h> +#include <linux/ioq.h> +#include <linux/pvbus.h> + +#include <linux/in6.h> +#include <asm/checksum.h> + +#include <linux/ioqnet.h> + +MODULE_AUTHOR("Gregory Haskins"); +MODULE_LICENSE("GPL"); + +#undef PDEBUG /* undef it, just in case */ +#ifdef IOQNET_DEBUG +# define PDEBUG(fmt, args...) printk( KERN_DEBUG "ioqnet: " fmt, ## args) +#else +# define PDEBUG(fmt, args...) /* not debugging: nothing */ +#endif + +#define RX_RINGLEN 64 +#define TX_RINGLEN 64 +#define TX_PTRS_PER_DESC 64 + +struct ioqnet_queue { + struct ioq *queue; + struct ioq_notifier notifier; +}; + +struct ioqnet_tx_desc { + struct sk_buff *skb; + struct ioqnet_tx_ptr data[TX_PTRS_PER_DESC]; +}; + +struct ioqnet_priv { + spinlock_t lock; + struct net_device *dev; + struct pvbus_device *pdev; + struct net_device_stats stats; + struct ioqnet_queue rxq; + struct ioqnet_queue txq; + struct tasklet_struct txtask; +}; + +static int ioqnet_queue_init(struct ioqnet_priv *priv, + struct ioqnet_queue *q, + size_t ringsize, + void (*func)(struct ioq_notifier*)) +{ + int ret = priv->pdev->createqueue(priv->pdev, &q->queue, ringsize, 0); + if (ret < 0) + return ret; + + q->notifier.signal = func; + q->queue->notifier = &q->notifier; + + return 0; +} + +/* Perform a hypercall to register/connect our queues */ +static int ioqnet_connect(struct ioqnet_priv *priv) +{ + struct ioqnet_connect data = { + .rxq = priv->rxq.queue->id, + .txq = priv->txq.queue->id, + }; + + return priv->pdev->call(priv->pdev, IOQNET_CONNECT, + &data, sizeof(data), 0); +} + +static int ioqnet_disconnect(struct ioqnet_priv *priv) +{ + return priv->pdev->call(priv->pdev, IOQNET_DISCONNECT, NULL, 0, 0); +} + +/* Perform a hypercall to get the assigned MAC addr */ +static int ioqnet_query_mac(struct ioqnet_priv *priv) +{ + return priv->pdev->call(priv->pdev, + IOQNET_QUERY_MAC, + priv->dev->dev_addr, + ETH_ALEN, 0); +} + + +/* + * Enable and disable receive interrupts. + */ +static void ioqnet_rx_ints(struct net_device *dev, int enable) +{ + struct ioqnet_priv *priv = netdev_priv(dev); + struct ioq *ioq = priv->rxq.queue; + if (enable) + ioq_start(ioq, 0); + else + ioq_stop(ioq, 0); +} + +static void ioqnet_alloc_rx_desc(struct ioq_ring_desc *desc, size_t len) +{ + struct sk_buff *skb = dev_alloc_skb(len + 2); + BUG_ON(!skb); + + skb_reserve(skb, 2); /* align IP on 16B boundary */ + + desc->cookie = (u64)skb; + desc->ptr = (u64)__pa(skb->data); + desc->len = len; /* total length */ + desc->alen = 0; /* actual length - to be filled in by host */ + + mb(); + desc->valid = 1; + desc->sown = 1; /* give ownership to the south */ + mb(); +} + +static void ioqnet_setup_rx(struct ioqnet_priv *priv) +{ + struct ioq *ioq = priv->rxq.queue; + struct ioq_iterator iter; + int ret; + + /* + * We want to iterate on the "valid" index. By default the iterator + * will not "autoupdate" which means it will not hypercall the host + * with our changes. This is good, because we are really just + * initializing stuff here anyway. Note that you can always manually + * signal the host with ioq_signal() if the autoupdate feature is not + * used. + */ + ret = ioq_iter_init(ioq, &iter, ioq_idxtype_valid, 0); + BUG_ON(ret < 0); + + /* + * Seek to the head of the valid index (which should be our first + * item, since the queue is brand-new) + */ + ret = ioq_iter_seek(&iter, ioq_seek_head, 0, 0); + BUG_ON(ret < 0); + + /* + * Now populate each descriptor with an empty SKB and mark it valid + */ + while (!iter.desc->valid) { + ioqnet_alloc_rx_desc(iter.desc, priv->dev->mtu); + + /* + * This push operation will simultaneously advance the + * valid-head index and increment our position in the queue + * by one. + */ + ret = ioq_iter_push(&iter, 0); + BUG_ON(ret < 0); + } +} + +static void ioqnet_setup_tx(struct ioqnet_priv *priv) +{ + struct ioq *ioq = priv->txq.queue; + struct ioq_iterator iter; + int ret; + int i; + + /* + * We setup the tx-desc in a similar way to how we did the rx SKBs + */ + ret = ioq_iter_init(ioq, &iter, ioq_idxtype_valid, 0); + BUG_ON(ret < 0); + + ret = ioq_iter_seek(&iter, ioq_seek_head, 0, 0); + BUG_ON(ret < 0); + + for (i = 0; i < TX_RINGLEN; ++i) { + struct ioq_ring_desc *desc = iter.desc; + struct ioqnet_tx_desc *txdesc = kzalloc(sizeof(*txdesc), + GFP_KERNEL | GFP_DMA); + + desc->cookie = (u64)txdesc; + desc->ptr = (u64)__pa(&txdesc->data[0]); + desc->len = TX_PTRS_PER_DESC; /* "len" is "count" */ + desc->alen = 0; + desc->valid = 0; /* mark it "invalid" since payload empty */ + desc->sown = 0; /* retain ownership until "inuse" */ + + /* + * One big difference between the RX and TX ring is that + * we are going to do an "iter++" here instead of an + * "iter->push()". That is because we don't want to actually + * advance the valid-index. We use the valid index to + * determine the difference between outstanding consumed and + * outstanding unconsumed packets + */ + ret = ioq_iter_seek(&iter, ioq_seek_next, 0, 0); + BUG_ON(ret < 0); + } +} + +/* + * Open and close + */ + +static int ioqnet_open(struct net_device *dev) +{ + struct ioqnet_priv *priv = netdev_priv(dev); + + if (ioqnet_connect(priv) < 0) + printk("IOQNET: Could not initialize instance %lld\n", + priv->pdev->id); + + + netif_start_queue(dev); + return 0; +} + +static void ioqnet_destroy_queue(struct ioq *ioq) +{ + ioq_stop(ioq, 0); + ioq->destroy(ioq); +} + +static int ioqnet_release(struct net_device *dev) +{ + struct ioqnet_priv *priv = netdev_priv(dev); + + netif_stop_queue(dev); + + if (ioqnet_disconnect(priv) < 0) + printk("IOQNET: Could not initialize instance %lld\n", + priv->pdev->id); + + ioqnet_destroy_queue(priv->rxq.queue); + ioqnet_destroy_queue(priv->txq.queue); + + return 0; +} + +/* + * Configuration changes (passed on by ifconfig) + */ +static int ioqnet_config(struct net_device *dev, struct ifmap *map) +{ + if (dev->flags & IFF_UP) /* can't act on a running interface */ + return -EBUSY; + + /* Don't allow changing the I/O address */ + if (map->base_addr != dev->base_addr) { + printk(KERN_WARNING "ioqnet: Can't change I/O address\n"); + return -EOPNOTSUPP; + } + + /* ignore other fields */ + return 0; +} + +/* + * The poll implementation. + */ +static int ioqnet_poll(struct net_device *dev, int *budget) +{ + int npackets = 0, quota = min(dev->quota, *budget); + struct ioqnet_priv *priv = netdev_priv(dev); + struct ioq_iterator iter; + unsigned long flags; + int ret; + + PDEBUG("polling...\n"); + + spin_lock_irqsave(&priv->lock, flags); + + /* We want to iterate on the tail of the in-use index */ + ret = ioq_iter_init(priv->rxq.queue, &iter, ioq_idxtype_inuse, 0); + BUG_ON(ret < 0); + + ret = ioq_iter_seek(&iter, ioq_seek_tail, 0, 0); + BUG_ON(ret < 0); + + /* + * We stop if we have met the quota or there are no more packets. + * The EOM is indicated by finding a packet that is still owned by + * the south side + */ + while ((npackets < quota) && (!iter.desc->sown)) { + struct sk_buff *skb = (struct sk_buff*)iter.desc->cookie; + + skb_push(skb, iter.desc->alen); + + /* Maintain stats */ + npackets++; + priv->stats.rx_packets++; + priv->stats.rx_bytes += iter.desc->alen; + + /* Pass the buffer up to the stack */ + skb->dev = dev; + skb->protocol = eth_type_trans(skb, dev); + netif_receive_skb(skb); + + mb(); + + /* Grab a new buffer to put in the ring */ + ioqnet_alloc_rx_desc(iter.desc, dev->mtu); + + /* Advance the in-use tail */ + ret = ioq_iter_pop(&iter, 0); + BUG_ON(ret < 0); + + /* Toggle the lock */ + spin_unlock_irqrestore(&priv->lock, flags); + spin_lock_irqsave(&priv->lock, flags); + } + + PDEBUG("poll: %d packets received\n", npackets); + + /* + * If we processed all packets, we're done; tell the kernel and + * reenable ints + */ + *budget -= npackets; + dev->quota -= npackets; + if (ioq_empty(priv->rxq.queue, ioq_idxtype_inuse)) { + /* FIXME: there is a race with enabling interrupts */ + netif_rx_complete(dev); + ioqnet_rx_ints(dev, 1); + ret = 0; + } else + /* We couldn't process everything. */ + ret = 1; + + spin_unlock_irqrestore(&priv->lock, flags); + + /* And let the south side know that we changed the rx-queue */ + ioq_signal(priv->rxq.queue, 0); + + return ret; +} + +/* + * Transmit a packet (called by the kernel) + */ +static int ioqnet_tx_start(struct sk_buff *skb, struct net_device *dev) +{ + struct ioqnet_priv *priv = netdev_priv(dev); + struct ioq_iterator viter; + struct ioq_iterator uiter; + struct ioqnet_tx_desc *txdesc; + int ret; + int i; + unsigned long flags; + + if (skb->len < ETH_ZLEN) + return -EINVAL; + + PDEBUG("sending %d bytes\n", skb->len); + + spin_lock_irqsave(&priv->lock, flags); + + if (ioq_full(priv->txq.queue, ioq_idxtype_valid)) { + /* + * We must flow-control the kernel by disabling the queue + */ + spin_unlock_irqrestore(&priv->lock, flags); + netif_stop_queue(dev); + return 0; + } + + /* + * We want to iterate on the head of both the "inuse" and "valid" index + */ + ret = ioq_iter_init(priv->txq.queue, &viter, ioq_idxtype_valid, 0); + BUG_ON(ret < 0); + ret = ioq_iter_init(priv->txq.queue, &uiter, ioq_idxtype_inuse, 0); + BUG_ON(ret < 0); + + ret = ioq_iter_seek(&viter, ioq_seek_head, 0, 0); + BUG_ON(ret < 0); + ret = ioq_iter_seek(&uiter, ioq_seek_head, 0, 0); + BUG_ON(ret < 0); + + /* The head pointers should move in lockstep */ + BUG_ON(uiter.pos != viter.pos); + + dev->trans_start = jiffies; /* save the timestamp */ + skb_get(skb); /* add a refcount */ + + txdesc = (struct ioqnet_tx_desc*)uiter.desc->cookie; + + /* + * We simply put the skb right onto the ring. We will get an interrupt + * later when the data has been consumed and we can reap the pointers + * at that time + */ + for (i = 0; i < 1; ++i) { /* Someday we will support SG */ + txdesc->data[i].len = (u64)skb->len; + txdesc->data[i].data = (u64)__pa(skb->data); + + uiter.desc->alen++; + } + + txdesc->skb = skb; /* save the skb for future release */ + + mb(); + uiter.desc->valid = 1; + uiter.desc->sown = 1; /* give ownership to the south */ + mb(); + + /* Advance both indexes together */ + ret = ioq_iter_push(&viter, 0); + BUG_ON(ret < 0); + ret = ioq_iter_push(&uiter, 0); + BUG_ON(ret < 0); + + /* + * This will signal the south side to consume the packet + */ + ioq_signal(priv->txq.queue, 0); + + spin_unlock_irqrestore(&priv->lock, flags); + + return 0; +} + +/* + * called by the tx interrupt handler to indicate that one or more packets + * have been consumed + */ +static void ioqnet_tx_complete(unsigned long data) +{ + struct ioqnet_priv *priv = (struct ioqnet_priv*)data; + struct ioq_iterator iter; + unsigned long flags; + int ret; + + PDEBUG("send complete\n"); + + spin_lock_irqsave(&priv->lock, flags); + + /* We want to iterate on the tail of the valid index */ + ret = ioq_iter_init(priv->rxq.queue, &iter, ioq_idxtype_valid, 0); + BUG_ON(ret < 0); + + ret = ioq_iter_seek(&iter, ioq_seek_tail, 0, 0); + BUG_ON(ret < 0); + + /* + * We are done once we find the first packet either invalid or still + * owned by the south-side + */ + while (iter.desc->valid && !iter.desc->sown) { + struct ioqnet_tx_desc *txdesc; + struct sk_buff *skb; + + txdesc = (struct ioqnet_tx_desc*)iter.desc->cookie; + skb = txdesc->skb; + + /* Maintain stats */ + priv->stats.tx_packets++; + priv->stats.tx_bytes += skb->len; + + /* Reset the descriptor */ + mb(); + iter.desc->alen = 0; + iter.desc->valid = 0; + mb(); + + dev_kfree_skb(skb); + + /* Advance the valid-index tail */ + ret = ioq_iter_pop(&iter, 0); + BUG_ON(ret < 0); + + /* Toggle the lock */ + spin_unlock_irqrestore(&priv->lock, flags); + spin_lock_irqsave(&priv->lock, flags); + } + + /* + * If we were previously stopped due to flow control, restart the + * processing + */ + if (netif_queue_stopped(priv->dev) + && !ioq_full(priv->txq.queue, ioq_idxtype_inuse)) { + + netif_wake_queue(priv->dev); + } + + spin_unlock_irqrestore(&priv->lock, flags); +} + +/* + * Ioctl commands + */ +static int ioqnet_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) +{ + PDEBUG("ioctl\n"); + return 0; +} + +/* + * Return statistics to the caller + */ +struct net_device_stats *ioqnet_stats(struct net_device *dev) +{ + struct ioqnet_priv *priv = netdev_priv(dev); + return &priv->stats; +} + +static void ioq_rx_notify(struct ioq_notifier *notifier) +{ + struct ioqnet_priv *priv; + struct net_device *dev; + + priv = container_of(notifier, struct ioqnet_priv, rxq.notifier); + dev = priv->dev; + + ioqnet_rx_ints(dev, 0); /* Disable further interrupts */ + netif_rx_schedule(dev); +} + +static void ioq_tx_notify(struct ioq_notifier *notifier) +{ + struct ioqnet_priv *priv; + + priv = container_of(notifier, struct ioqnet_priv, txq.notifier); + + PDEBUG("tx_notify for %lld\n", priv->pdev->id); + + tasklet_schedule(&priv->txtask); +} + +/* + * This is called whenever a new pvbus_device is added to the pvbus with + * the matching IOQNET_NAME + */ +static int ioqnet_probe(struct pvbus_device *pdev) +{ + struct net_device *dev; + struct ioqnet_priv *priv; + int ret; + + printk(KERN_INFO "IOQNET: Found new device at %lld\n", pdev->id); + + dev = alloc_etherdev(sizeof(struct ioqnet_priv)); + if (!dev) + return -ENOMEM; + + priv = netdev_priv(dev); + memset(priv, 0, sizeof(*priv)); + + spin_lock_init(&priv->lock); + priv->dev = dev; + priv->pdev = pdev; + tasklet_init(&priv->txtask, ioqnet_tx_complete, (unsigned long)priv); + + ioqnet_queue_init(priv, &priv->rxq, RX_RINGLEN, ioq_rx_notify); + ioqnet_queue_init(priv, &priv->txq, TX_RINGLEN, ioq_tx_notify); + + ioqnet_setup_rx(priv); + ioqnet_setup_tx(priv); + + ioqnet_rx_ints(dev, 1); /* enable receive interrupts */ + ioq_start(priv->txq.queue, 0); /* enable transmit interrupts */ + + ether_setup(dev); /* assign some of the fields */ + + dev->open = ioqnet_open; + dev->stop = ioqnet_release; + dev->set_config = ioqnet_config; + dev->hard_start_xmit = ioqnet_tx_start; + dev->do_ioctl = ioqnet_ioctl; + dev->get_stats = ioqnet_stats; + dev->poll = ioqnet_poll; + dev->weight = 2; + dev->hard_header_cache = NULL; /* Disable caching */ + + ret = ioqnet_query_mac(priv); + if (ret < 0) { + printk("IOQNET: Could not obtain MAC address for %lld\n", + priv->pdev->id); + goto out_free; + } + + ret = register_netdev(dev); + if (ret < 0) { + printk("IOQNET: error %i registering device \"%s\"\n", + ret, dev->name); + goto out_free; + } + + pdev->priv = priv; + + return 0; + + out_free: + free_netdev(dev); + + return ret; +} + +static int ioqnet_remove(struct pvbus_device *pdev) +{ + struct ioqnet_priv *priv = (struct ioqnet_priv*)pdev->priv; + + unregister_netdev(priv->dev); + ioqnet_release(priv->dev); + free_netdev(priv->dev); + + return 0; +} + +/* + * Finally, the module stuff + */ + +static struct pvbus_driver ioqnet_pvbus_driver = { + .name = IOQNET_NAME, + .owner = THIS_MODULE, + .probe = ioqnet_probe, + .remove = ioqnet_remove, +}; + +__init int ioqnet_init_module(void) +{ + return pvbus_driver_register(&ioqnet_pvbus_driver); +} + +__exit void ioqnet_cleanup(void) +{ + pvbus_driver_unregister(&ioqnet_pvbus_driver); +} + + +module_init(ioqnet_init_module); +module_exit(ioqnet_cleanup); diff --git a/include/linux/ioqnet.h b/include/linux/ioqnet.h new file mode 100644 index 0000000..7c73a26 --- /dev/null +++ b/include/linux/ioqnet.h @@ -0,0 +1,44 @@ +/* + * Copyright 2007 Novell. All Rights Reserved. + * + * IOQ Network Driver + * + * Author: + * Gregory Haskins <[EMAIL PROTECTED]> + * + * This file is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef _IOQNET_H +#define _IOQNET_H + +#define IOQNET_VERSION 1 +#define IOQNET_NAME "ioqnet" + +/* IOQNET functions (invoked via pvbus_device->call()) */ +#define IOQNET_CONNECT 1 +#define IOQNET_DISCONNECT 2 +#define IOQNET_QUERY_MAC 3 + +struct ioqnet_connect { + ioq_id_t rxq; + ioq_id_t txq; +}; + +struct ioqnet_tx_ptr { + u64 len; + u64 data; +}; + +#endif /* _IOQNET_H */ ------------------------------------------------------------------------- This SF.net email is sponsored by: Splunk Inc. Still grepping through log files to find problems? Stop. Now Search log events and configuration files using AJAX and a browser. Download your FREE copy of Splunk now >> http://get.splunk.com/ _______________________________________________ kvm-devel mailing list kvm-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/kvm-devel