Re: [PATCH 2/3] NET: [CORE] Stack changes to add multiqueue hardware support API

2007-06-29 Thread Jeff Garzik

David Miller wrote:

From: PJ Waskiewicz [EMAIL PROTECTED]
Date: Thu, 28 Jun 2007 09:21:13 -0700


-struct net_device *alloc_netdev(int sizeof_priv, const char *name,
-   void (*setup)(struct net_device *))
+struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
+   void (*setup)(struct net_device *), int queue_count)
 {
void *p;
struct net_device *dev;
@@ -3557,7 +3564,9 @@ struct net_device *alloc_netdev(int sizeof_priv, const 
char *name,
BUG_ON(strlen(name) = sizeof(dev-name));
 
 	/* ensure 32-byte alignment of both the device and private area */

-   alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST)  ~NETDEV_ALIGN_CONST;
+   alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST +
+(sizeof(struct net_device_subqueue) * queue_count)) 
+~NETDEV_ALIGN_CONST;
alloc_size += sizeof_priv + NETDEV_ALIGN_CONST;
 
 	p = kzalloc(alloc_size, GFP_KERNEL);

@@ -3573,12 +3582,14 @@ struct net_device *alloc_netdev(int sizeof_priv, const 
char *name,
if (sizeof_priv)
dev-priv = netdev_priv(dev);
 
+	dev-egress_subqueue_count = queue_count;

+
dev-get_stats = internal_stats;
setup(dev);
strcpy(dev-name, name);
return dev;
 }


This isn't going to work.

The pointer returned from netdev_priv() doesn't take into account the
variable sized queues at the end of struct netdev, so we can stomp
over the queues with the private area.

This probably works by luck because of NETDEV_ALIGN.

The simplest fix is to just make netdev_priv() use dev-priv,
except when it's being initialized during allocation, and
that's what I'm going to do when I apply your patch.


Ugh.  That will reverse the gains we had with the current setup, won't it?

Also, what happens when we want to add ingress_queue[0] ?

Jeff





-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/3] NET: [CORE] Stack changes to add multiqueue hardware support API

2007-06-28 Thread PJ Waskiewicz
Updated: Fixed allocation of subqueues in alloc_netdev_mq() to
allocate all subqueues, not num - 1.

Added checks for netif_subqueue_stopped() to netpoll,
pktgen, and software device dev_queue_xmit().  This will ensure
external events to these subsystems will be handled correctly if
a subqueue is shut down.

Add the multiqueue hardware device support API to the core network
stack.  Allow drivers to allocate multiple queues and manage them
at the netdev level if they choose to do so.

Added a new field to sk_buff, namely queue_mapping, for drivers to
know which tx_ring to select based on OS classification of the flow.

Signed-off-by: Peter P Waskiewicz Jr [EMAIL PROTECTED]
---

 include/linux/etherdevice.h |3 +-
 include/linux/netdevice.h   |   62 ++-
 include/linux/skbuff.h  |4 ++-
 net/core/dev.c  |   27 +--
 net/core/netpoll.c  |8 +++---
 net/core/pktgen.c   |   10 +--
 net/core/skbuff.c   |3 ++
 net/ethernet/eth.c  |9 +++---
 8 files changed, 104 insertions(+), 22 deletions(-)

diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
index f48eb89..b3fbb54 100644
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -39,7 +39,8 @@ extern void   eth_header_cache_update(struct hh_cache 
*hh, struct net_device *dev
 extern int eth_header_cache(struct neighbour *neigh,
 struct hh_cache *hh);
 
-extern struct net_device *alloc_etherdev(int sizeof_priv);
+extern struct net_device *alloc_etherdev_mq(int sizeof_priv, int queue_count);
+#define alloc_etherdev(sizeof_priv) alloc_etherdev_mq(sizeof_priv, 1)
 
 /**
  * is_zero_ether_addr - Determine if give Ethernet address is all zeros.
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 2c0cc19..7078745 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -108,6 +108,14 @@ struct wireless_dev;
 #define MAX_HEADER (LL_MAX_HEADER + 48)
 #endif
 
+struct net_device_subqueue
+{
+   /* Give a control state for each queue.  This struct may contain
+* per-queue locks in the future.
+*/
+   unsigned long   state;
+};
+
 /*
  * Network device statistics. Akin to the 2.0 ether stats but
  * with byte counters.
@@ -331,6 +339,7 @@ struct net_device
 #define NETIF_F_VLAN_CHALLENGED1024/* Device cannot handle VLAN 
packets */
 #define NETIF_F_GSO2048/* Enable software GSO. */
 #define NETIF_F_LLTX   4096/* LockLess TX */
+#define NETIF_F_MULTI_QUEUE16384   /* Has multiple TX/RX queues */
 
/* Segmentation offload features */
 #define NETIF_F_GSO_SHIFT  16
@@ -557,6 +566,10 @@ struct net_device
 
/* rtnetlink link ops */
const struct rtnl_link_ops *rtnl_link_ops;
+
+   /* The TX queue control structures */
+   int egress_subqueue_count;
+   struct net_device_subqueue  egress_subqueue[0];
 };
 #define to_net_dev(d) container_of(d, struct net_device, dev)
 
@@ -719,6 +732,48 @@ static inline int netif_running(const struct net_device 
*dev)
return test_bit(__LINK_STATE_START, dev-state);
 }
 
+/*
+ * Routines to manage the subqueues on a device.  We only need start
+ * stop, and a check if it's stopped.  All other device management is
+ * done at the overall netdevice level.
+ * Also test the device if we're multiqueue.
+ */
+static inline void netif_start_subqueue(struct net_device *dev, u16 
queue_index)
+{
+   clear_bit(__LINK_STATE_XOFF, dev-egress_subqueue[queue_index].state);
+}
+
+static inline void netif_stop_subqueue(struct net_device *dev, u16 queue_index)
+{
+#ifdef CONFIG_NETPOLL_TRAP
+   if (netpoll_trap())
+   return;
+#endif
+   set_bit(__LINK_STATE_XOFF, dev-egress_subqueue[queue_index].state);
+}
+
+static inline int netif_subqueue_stopped(const struct net_device *dev,
+u16 queue_index)
+{
+   return test_bit(__LINK_STATE_XOFF,
+   dev-egress_subqueue[queue_index].state);
+}
+
+static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index)
+{
+#ifdef CONFIG_NETPOLL_TRAP
+   if (netpoll_trap())
+   return;
+#endif
+   if (test_and_clear_bit(__LINK_STATE_XOFF,
+  dev-egress_subqueue[queue_index].state))
+   __netif_schedule(dev);
+}
+
+static inline int netif_is_multiqueue(const struct net_device *dev)
+{
+   return (!!(NETIF_F_MULTI_QUEUE  dev-features));
+}
 
 /* Use this variant when it is known for sure that it
  * is executing from interrupt context.
@@ -1009,8 +1064,11 @@ static inline void netif_tx_disable(struct net_device 
*dev)
 extern voidether_setup(struct net_device *dev);
 
 /* Support for loadable net-drivers */
-extern struct net_device *alloc_netdev(int 

Re: [PATCH 2/3] NET: [CORE] Stack changes to add multiqueue hardware support API

2007-06-28 Thread Patrick McHardy

PJ Waskiewicz wrote:

Updated: Fixed allocation of subqueues in alloc_netdev_mq() to
allocate all subqueues, not num - 1.

Added checks for netif_subqueue_stopped() to netpoll,
pktgen, and software device dev_queue_xmit().  This will ensure
external events to these subsystems will be handled correctly if
a subqueue is shut down.

Add the multiqueue hardware device support API to the core network
stack.  Allow drivers to allocate multiple queues and manage them
at the netdev level if they choose to do so.

Added a new field to sk_buff, namely queue_mapping, for drivers to
know which tx_ring to select based on OS classification of the flow.

Signed-off-by: Peter P Waskiewicz Jr [EMAIL PROTECTED]
  


Acked-by: Patrick McHardy [EMAIL PROTECTED]

skb-iif and queue_mapping should probably go somewhere near
the other shaping stuff and unsigned int seems to be a better
choice for egress_subqueue_count, but I can take care of that
when this patch is in Dave's tree.




-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [PATCH 2/3] NET: [CORE] Stack changes to add multiqueue hardware support API

2007-06-28 Thread Waskiewicz Jr, Peter P
 PJ Waskiewicz wrote:
   include/linux/etherdevice.h |3 +-
   include/linux/netdevice.h   |   62 
 ++-
   include/linux/skbuff.h  |4 ++-
   net/core/dev.c  |   27 +--
   net/core/netpoll.c  |8 +++---
   net/core/pktgen.c   |   10 +--
   net/core/skbuff.c   |3 ++
   net/ethernet/eth.c  |9 +++---
   8 files changed, 104 insertions(+), 22 deletions(-)
 
  include/linux/pkt_sched.h |9 +++
  net/sched/Kconfig |   23 +++
  net/sched/sch_prio.c  |  147
 +
   3 files changed, 166 insertions(+), 13 deletions(-)
 
 
 Quick question: where are the sch_generic changes? :)
 
 If you hold for ten minutes I'll post a set of slightly 
 changed patches with the NETDEVICES_MULTIQUEUE option and a 
 fix for this.

Jamal's and KK's qdisc_restart() rewrite took the netif_queue_stopped()
call out of sch_generic.c.  So the underlying qdisc is only responsible
for checking the queue status now before dequeueing.

-PJ
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2/3] NET: [CORE] Stack changes to add multiqueue hardware support API

2007-06-28 Thread Patrick McHardy
Waskiewicz Jr, Peter P wrote:
Quick question: where are the sch_generic changes? :)

If you hold for ten minutes I'll post a set of slightly 
changed patches with the NETDEVICES_MULTIQUEUE option and a 
fix for this.
 
 
 Jamal's and KK's qdisc_restart() rewrite took the netif_queue_stopped()
 call out of sch_generic.c.  So the underlying qdisc is only responsible
 for checking the queue status now before dequeueing.


Yes, I noticed that now. Doesn't seem right though as long as
queueing while queue is stopped is treated as a bug by the
drivers.

But I vaguely recall seeing a discussion about this, I'll check
the archives.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [PATCH 2/3] NET: [CORE] Stack changes to add multiqueue hardware support API

2007-06-28 Thread Waskiewicz Jr, Peter P
 Waskiewicz Jr, Peter P wrote:
 Quick question: where are the sch_generic changes? :)
 
 If you hold for ten minutes I'll post a set of slightly changed 
 patches with the NETDEVICES_MULTIQUEUE option and a fix for this.
  
  
  Jamal's and KK's qdisc_restart() rewrite took the 
 netif_queue_stopped()
  call out of sch_generic.c.  So the underlying qdisc is only 
 responsible
  for checking the queue status now before dequeueing.
 
 
 Yes, I noticed that now. Doesn't seem right though as long as
 queueing while queue is stopped is treated as a bug by the
 drivers.
 
 But I vaguely recall seeing a discussion about this, I'll check
 the archives.

The basic gist is before the dequeue is done, the qdisc is locked by the
qdisc is running bit, so another CPU cannot get in there.  So if the
queue isn't stopped when a dequeue is done, that same queue should not
be stopped when hard_start_xmit() is called.  The only thing I could
think of that could happen is some out-of-band cleanup routine in the
driver where the tx_ring lock is held, and the skb is bounced back,
where the driver returns NETIF_TX_BUSY, and you requeue.  This is an
extreme corner case, so the check could be removed.

-PJ
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2/3] NET: [CORE] Stack changes to add multiqueue hardware support API

2007-06-28 Thread Patrick McHardy
Waskiewicz Jr, Peter P wrote:
Waskiewicz Jr, Peter P wrote:

Yes, I noticed that now. Doesn't seem right though as long as
queueing while queue is stopped is treated as a bug by the
drivers.

But I vaguely recall seeing a discussion about this, I'll check
the archives.
 
 
 The basic gist is before the dequeue is done, the qdisc is locked by the
 qdisc is running bit, so another CPU cannot get in there.  So if the
 queue isn't stopped when a dequeue is done, that same queue should not
 be stopped when hard_start_xmit() is called.  The only thing I could
 think of that could happen is some out-of-band cleanup routine in the
 driver where the tx_ring lock is held, and the skb is bounced back,
 where the driver returns NETIF_TX_BUSY, and you requeue.  This is an
 extreme corner case, so the check could be removed.


Yes, but there are users that don't go through qdiscs, like netpoll,
Having them check the QDISC_RUNNING bit seems ugly.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2/3] NET: [CORE] Stack changes to add multiqueue hardware support API

2007-06-28 Thread Jeff Garzik

Patrick McHardy wrote:

Yes, but there are users that don't go through qdiscs, like netpoll,
Having them check the QDISC_RUNNING bit seems ugly.


Is netpoll the only such user?

netpoll tends to be a special case in every sense of the word, and I 
wish it was less so :/


Jeff


-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2/3] NET: [CORE] Stack changes to add multiqueue hardware support API

2007-06-28 Thread Patrick McHardy
Jeff Garzik wrote:
 Patrick McHardy wrote:
 
 Yes, but there are users that don't go through qdiscs, like netpoll,
 Having them check the QDISC_RUNNING bit seems ugly.
 
 
 Is netpoll the only such user?

I'm not sure, I just remembered that one :)

Looking at Peter's multiqueue patch, which should include all
hard_start_xmit users (I'm not seeing sch_teql though, Peter?)
the only other one is pktgen.

 netpoll tends to be a special case in every sense of the word, and I
 wish it was less so :/

Indeed.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2/3] NET: [CORE] Stack changes to add multiqueue hardware support API

2007-06-28 Thread David Miller
From: Jeff Garzik [EMAIL PROTECTED]
Date: Thu, 28 Jun 2007 15:32:40 -0400

 Patrick McHardy wrote:
  Yes, but there are users that don't go through qdiscs, like netpoll,
  Having them check the QDISC_RUNNING bit seems ugly.
 
 Is netpoll the only such user?
 
 netpoll tends to be a special case in every sense of the word, and I 
 wish it was less so :/

Seconded.

I'm perfectly happy to consider rearchitecting of netpoll to something
that works better.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [PATCH 2/3] NET: [CORE] Stack changes to add multiqueue hardware support API

2007-06-28 Thread Waskiewicz Jr, Peter P
 Waskiewicz Jr, Peter P wrote:
 
  Looking at Peter's multiqueue patch, which should include all 
  hard_start_xmit users (I'm not seeing sch_teql though,
  Peter?) the only other one is pktgen.
  
 
  Ugh.  That is another netif_queue_stopped() that needs 
  netif_subqueue_stopped().  I can send an updated patch for 
 the core to 
  fix this based from your patches Patrick.

 
 I still have the tree around, here's an updated version.
 
 
  So what do we do about netpoll then wrt netif_(sub)queue_stopped() 
  being removed from qdisc_restart()?  The fallout of having 
 netpoll() 
  cause a queue to stop (queue 0 only) is the skb sent will 
 be requeued, 
  since the driver will return NETIF_TX_BUSY if this actually 
 happens.  
  But this is a corner case, and we won't lose packets; we'll 
 just have 
  increased latency on that queue.  Should I worry about this or just 
  move forward with the sch_teql.c change and repost the core patch?

 
 
 I don't think you need to worry about that, the subqueue 
 patch just follows the existing code.

Thanks Patrick for taking care of this.  I am totally fine with this
patch; if anyone else has feedback, please send it.  If not, I'm excited
to see if these can be considered for 2.6.23 now.  :)  Thanks everyone
for the help.

Cheers,
-PJ Waskiewicz
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2/3] NET: [CORE] Stack changes to add multiqueue hardware support API

2007-06-28 Thread David Miller
From: Waskiewicz Jr, Peter P [EMAIL PROTECTED]
Date: Thu, 28 Jun 2007 16:08:43 -0700

 Thanks Patrick for taking care of this.  I am totally fine with this
 patch; if anyone else has feedback, please send it.  If not, I'm excited
 to see if these can be considered for 2.6.23 now.  :)  Thanks everyone
 for the help.

I'll look over the current patches later this evening, I was
initially waiting for the GSO BUG() akpm reported to get fixed
and Herbert took care of that an hour or so ago.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2/3] NET: [CORE] Stack changes to add multiqueue hardware support API

2007-06-28 Thread David Miller
From: PJ Waskiewicz [EMAIL PROTECTED]
Date: Thu, 28 Jun 2007 09:21:13 -0700

 -struct net_device *alloc_netdev(int sizeof_priv, const char *name,
 - void (*setup)(struct net_device *))
 +struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
 + void (*setup)(struct net_device *), int queue_count)
  {
   void *p;
   struct net_device *dev;
 @@ -3557,7 +3564,9 @@ struct net_device *alloc_netdev(int sizeof_priv, const 
 char *name,
   BUG_ON(strlen(name) = sizeof(dev-name));
  
   /* ensure 32-byte alignment of both the device and private area */
 - alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST)  ~NETDEV_ALIGN_CONST;
 + alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST +
 +  (sizeof(struct net_device_subqueue) * queue_count)) 
 +  ~NETDEV_ALIGN_CONST;
   alloc_size += sizeof_priv + NETDEV_ALIGN_CONST;
  
   p = kzalloc(alloc_size, GFP_KERNEL);
 @@ -3573,12 +3582,14 @@ struct net_device *alloc_netdev(int sizeof_priv, 
 const char *name,
   if (sizeof_priv)
   dev-priv = netdev_priv(dev);
  
 + dev-egress_subqueue_count = queue_count;
 +
   dev-get_stats = internal_stats;
   setup(dev);
   strcpy(dev-name, name);
   return dev;
  }

This isn't going to work.

The pointer returned from netdev_priv() doesn't take into account the
variable sized queues at the end of struct netdev, so we can stomp
over the queues with the private area.

This probably works by luck because of NETDEV_ALIGN.

The simplest fix is to just make netdev_priv() use dev-priv,
except when it's being initialized during allocation, and
that's what I'm going to do when I apply your patch.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [PATCH 2/3] NET: [CORE] Stack changes to add multiqueue hardware support API

2007-06-25 Thread Waskiewicz Jr, Peter P
  /* ensure 32-byte alignment of both the device and 
 private area */
  -   alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST)  
 ~NETDEV_ALIGN_CONST;
  +   alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST +
  +(sizeof(struct net_device_subqueue) * 
 (queue_count - 1))) 
 
 
 Why queue_count - 1 ? It should be queue_count I think.

I'm not sure what went through my head, but I'll fix this.

 Otherwise ACK for this patch except that it should also 
 contain the sch_generic changes.

I misread your previous mail; I'll get the sch_generic.c changes into
this patch.

Thanks Patrick,
-PJ
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2/3] NET: [CORE] Stack changes to add multiqueue hardware support API

2007-06-24 Thread Patrick McHardy
PJ Waskiewicz wrote:
 +struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
 + void (*setup)(struct net_device *), int queue_count)
  {
   void *p;
   struct net_device *dev;
 @@ -3361,7 +3368,9 @@ struct net_device *alloc_netdev(int sizeof_priv, const 
 char *name,
   BUG_ON(strlen(name) = sizeof(dev-name));
  
   /* ensure 32-byte alignment of both the device and private area */
 - alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST)  ~NETDEV_ALIGN_CONST;
 + alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST +
 +  (sizeof(struct net_device_subqueue) * (queue_count - 1))) 


Why queue_count - 1 ? It should be queue_count I think.


Otherwise ACK for this patch except that it should also contain the
sch_generic changes.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/3] NET: [CORE] Stack changes to add multiqueue hardware support API

2007-06-23 Thread PJ Waskiewicz
Updated: Added checks for netif_subqueue_stopped() to netpoll,
pktgen, and software device dev_queue_xmit().  This will ensure
external events to these subsystems will be handled correctly if
a subqueue is shut down.

Add the multiqueue hardware device support API to the core network
stack.  Allow drivers to allocate multiple queues and manage them
at the netdev level if they choose to do so.

Added a new field to sk_buff, namely queue_mapping, for drivers to
know which tx_ring to select based on OS classification of the flow.

Signed-off-by: Peter P Waskiewicz Jr [EMAIL PROTECTED]
---

 include/linux/etherdevice.h |3 +-
 include/linux/netdevice.h   |   62 ++-
 include/linux/skbuff.h  |4 ++-
 net/core/dev.c  |   27 +--
 net/core/netpoll.c  |8 +++---
 net/core/pktgen.c   |   10 +--
 net/core/skbuff.c   |3 ++
 net/ethernet/eth.c  |9 +++---
 8 files changed, 104 insertions(+), 22 deletions(-)

diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
index f48eb89..b3fbb54 100644
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -39,7 +39,8 @@ extern void   eth_header_cache_update(struct hh_cache 
*hh, struct net_device *dev
 extern int eth_header_cache(struct neighbour *neigh,
 struct hh_cache *hh);
 
-extern struct net_device *alloc_etherdev(int sizeof_priv);
+extern struct net_device *alloc_etherdev_mq(int sizeof_priv, int queue_count);
+#define alloc_etherdev(sizeof_priv) alloc_etherdev_mq(sizeof_priv, 1)
 
 /**
  * is_zero_ether_addr - Determine if give Ethernet address is all zeros.
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index e7913ee..6509eb4 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -108,6 +108,14 @@ struct wireless_dev;
 #define MAX_HEADER (LL_MAX_HEADER + 48)
 #endif
 
+struct net_device_subqueue
+{
+   /* Give a control state for each queue.  This struct may contain
+* per-queue locks in the future.
+*/
+   unsigned long   state;
+};
+
 /*
  * Network device statistics. Akin to the 2.0 ether stats but
  * with byte counters.
@@ -325,6 +333,7 @@ struct net_device
 #define NETIF_F_VLAN_CHALLENGED1024/* Device cannot handle VLAN 
packets */
 #define NETIF_F_GSO2048/* Enable software GSO. */
 #define NETIF_F_LLTX   4096/* LockLess TX */
+#define NETIF_F_MULTI_QUEUE16384   /* Has multiple TX/RX queues */
 
/* Segmentation offload features */
 #define NETIF_F_GSO_SHIFT  16
@@ -543,6 +552,10 @@ struct net_device
 
/* rtnetlink link ops */
const struct rtnl_link_ops *rtnl_link_ops;
+
+   /* The TX queue control structures */
+   int egress_subqueue_count;
+   struct net_device_subqueue  egress_subqueue[0];
 };
 #define to_net_dev(d) container_of(d, struct net_device, dev)
 
@@ -705,6 +718,48 @@ static inline int netif_running(const struct net_device 
*dev)
return test_bit(__LINK_STATE_START, dev-state);
 }
 
+/*
+ * Routines to manage the subqueues on a device.  We only need start
+ * stop, and a check if it's stopped.  All other device management is
+ * done at the overall netdevice level.
+ * Also test the device if we're multiqueue.
+ */
+static inline void netif_start_subqueue(struct net_device *dev, u16 
queue_index)
+{
+   clear_bit(__LINK_STATE_XOFF, dev-egress_subqueue[queue_index].state);
+}
+
+static inline void netif_stop_subqueue(struct net_device *dev, u16 queue_index)
+{
+#ifdef CONFIG_NETPOLL_TRAP
+   if (netpoll_trap())
+   return;
+#endif
+   set_bit(__LINK_STATE_XOFF, dev-egress_subqueue[queue_index].state);
+}
+
+static inline int netif_subqueue_stopped(const struct net_device *dev,
+ u16 queue_index)
+{
+   return test_bit(__LINK_STATE_XOFF,
+   dev-egress_subqueue[queue_index].state);
+}
+
+static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index)
+{
+#ifdef CONFIG_NETPOLL_TRAP
+   if (netpoll_trap())
+   return;
+#endif
+   if (test_and_clear_bit(__LINK_STATE_XOFF,
+  dev-egress_subqueue[queue_index].state))
+   __netif_schedule(dev);
+}
+
+static inline int netif_is_multiqueue(const struct net_device *dev)
+{
+   return (!!(NETIF_F_MULTI_QUEUE  dev-features));
+}
 
 /* Use this variant when it is known for sure that it
  * is executing from interrupt context.
@@ -995,8 +1050,11 @@ static inline void netif_tx_disable(struct net_device 
*dev)
 extern voidether_setup(struct net_device *dev);
 
 /* Support for loadable net-drivers */
-extern struct net_device *alloc_netdev(int sizeof_priv, const char *name,
-  void (*setup)(struct 

[PATCH 2/3] NET: [CORE] Stack changes to add multiqueue hardware support API

2007-06-21 Thread PJ Waskiewicz
Add the multiqueue hardware device support API to the core network
stack.  Allow drivers to allocate multiple queues and manage them
at the netdev level if they choose to do so.

Added a new field to sk_buff, namely queue_mapping, for drivers to
know which tx_ring to select based on OS classification of the flow.

Signed-off-by: Peter P Waskiewicz Jr [EMAIL PROTECTED]
---

 include/linux/etherdevice.h |3 +-
 include/linux/netdevice.h   |   62 ++-
 include/linux/skbuff.h  |4 ++-
 net/core/dev.c  |   20 ++
 net/core/skbuff.c   |3 ++
 net/ethernet/eth.c  |9 +++---
 6 files changed, 87 insertions(+), 14 deletions(-)

diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
index f48eb89..b3fbb54 100644
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -39,7 +39,8 @@ extern void   eth_header_cache_update(struct hh_cache 
*hh, struct net_device *dev
 extern int eth_header_cache(struct neighbour *neigh,
 struct hh_cache *hh);
 
-extern struct net_device *alloc_etherdev(int sizeof_priv);
+extern struct net_device *alloc_etherdev_mq(int sizeof_priv, int queue_count);
+#define alloc_etherdev(sizeof_priv) alloc_etherdev_mq(sizeof_priv, 1)
 
 /**
  * is_zero_ether_addr - Determine if give Ethernet address is all zeros.
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index e7913ee..6509eb4 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -108,6 +108,14 @@ struct wireless_dev;
 #define MAX_HEADER (LL_MAX_HEADER + 48)
 #endif
 
+struct net_device_subqueue
+{
+   /* Give a control state for each queue.  This struct may contain
+* per-queue locks in the future.
+*/
+   unsigned long   state;
+};
+
 /*
  * Network device statistics. Akin to the 2.0 ether stats but
  * with byte counters.
@@ -325,6 +333,7 @@ struct net_device
 #define NETIF_F_VLAN_CHALLENGED1024/* Device cannot handle VLAN 
packets */
 #define NETIF_F_GSO2048/* Enable software GSO. */
 #define NETIF_F_LLTX   4096/* LockLess TX */
+#define NETIF_F_MULTI_QUEUE16384   /* Has multiple TX/RX queues */
 
/* Segmentation offload features */
 #define NETIF_F_GSO_SHIFT  16
@@ -543,6 +552,10 @@ struct net_device
 
/* rtnetlink link ops */
const struct rtnl_link_ops *rtnl_link_ops;
+
+   /* The TX queue control structures */
+   int egress_subqueue_count;
+   struct net_device_subqueue  egress_subqueue[0];
 };
 #define to_net_dev(d) container_of(d, struct net_device, dev)
 
@@ -705,6 +718,48 @@ static inline int netif_running(const struct net_device 
*dev)
return test_bit(__LINK_STATE_START, dev-state);
 }
 
+/*
+ * Routines to manage the subqueues on a device.  We only need start
+ * stop, and a check if it's stopped.  All other device management is
+ * done at the overall netdevice level.
+ * Also test the device if we're multiqueue.
+ */
+static inline void netif_start_subqueue(struct net_device *dev, u16 
queue_index)
+{
+   clear_bit(__LINK_STATE_XOFF, dev-egress_subqueue[queue_index].state);
+}
+
+static inline void netif_stop_subqueue(struct net_device *dev, u16 queue_index)
+{
+#ifdef CONFIG_NETPOLL_TRAP
+   if (netpoll_trap())
+   return;
+#endif
+   set_bit(__LINK_STATE_XOFF, dev-egress_subqueue[queue_index].state);
+}
+
+static inline int netif_subqueue_stopped(const struct net_device *dev,
+ u16 queue_index)
+{
+   return test_bit(__LINK_STATE_XOFF,
+   dev-egress_subqueue[queue_index].state);
+}
+
+static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index)
+{
+#ifdef CONFIG_NETPOLL_TRAP
+   if (netpoll_trap())
+   return;
+#endif
+   if (test_and_clear_bit(__LINK_STATE_XOFF,
+  dev-egress_subqueue[queue_index].state))
+   __netif_schedule(dev);
+}
+
+static inline int netif_is_multiqueue(const struct net_device *dev)
+{
+   return (!!(NETIF_F_MULTI_QUEUE  dev-features));
+}
 
 /* Use this variant when it is known for sure that it
  * is executing from interrupt context.
@@ -995,8 +1050,11 @@ static inline void netif_tx_disable(struct net_device 
*dev)
 extern voidether_setup(struct net_device *dev);
 
 /* Support for loadable net-drivers */
-extern struct net_device *alloc_netdev(int sizeof_priv, const char *name,
-  void (*setup)(struct net_device *));
+extern struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
+ void (*setup)(struct net_device *),
+ int queue_count);
+#define alloc_netdev(sizeof_priv, name, setup) \
+   alloc_netdev_mq(sizeof_priv,