[PATCH 0/3] Splice network receive support

2007-06-12 Thread Jens Axboe
Hi,

This series of patches applies on top of the splice series just
posted. It implements basic network receive support, ie splicing
from a socket to a pipe.

There seems to be a skhead_buff_cache leak somewhere that I need
to track down, otherwise it works fine for me.

-- 
Jens Axboe


-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/3] splice: don't assume regular pages in splice_to_pipe()

2007-06-12 Thread Jens Axboe
Allow caller to pass in a release function, there might be
other resources that need releasing as well. Needed for
network receive.

Signed-off-by: Jens Axboe [EMAIL PROTECTED]
---
 fs/splice.c|9 -
 include/linux/splice.h |1 +
 2 files changed, 9 insertions(+), 1 deletions(-)

diff --git a/fs/splice.c b/fs/splice.c
index f24e367..25ec9c8 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -247,11 +247,16 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
}
 
while (page_nr  spd-nr_pages)
-   page_cache_release(spd-pages[page_nr++]);
+   spd-spd_release(spd, page_nr++);
 
return ret;
 }
 
+static void spd_release_page(struct splice_pipe_desc *spd, unsigned int i)
+{
+   page_cache_release(spd-pages[i]);
+}
+
 static int
 __generic_file_splice_read(struct file *in, loff_t *ppos,
   struct pipe_inode_info *pipe, size_t len,
@@ -270,6 +275,7 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
.partial = partial,
.flags = flags,
.ops = page_cache_pipe_buf_ops,
+   .spd_release = spd_release_page,
};
 
index = *ppos  PAGE_CACHE_SHIFT;
@@ -1442,6 +1448,7 @@ static long vmsplice_to_pipe(struct file *file, const 
struct iovec __user *iov,
.partial = partial,
.flags = flags,
.ops = user_page_pipe_buf_ops,
+   .spd_release = spd_release_page,
};
 
pipe = pipe_info(file-f_path.dentry-d_inode);
diff --git a/include/linux/splice.h b/include/linux/splice.h
index 1a1182b..04c1068 100644
--- a/include/linux/splice.h
+++ b/include/linux/splice.h
@@ -53,6 +53,7 @@ struct splice_pipe_desc {
int nr_pages;   /* number of pages in map */
unsigned int flags; /* splice flags */
const struct pipe_buf_operations *ops;/* ops associated with output 
pipe */
+   void (*spd_release)(struct splice_pipe_desc *, unsigned int);
 };
 
 typedef int (splice_actor)(struct pipe_inode_info *, struct pipe_buffer *,
-- 
1.5.2.1.174.gcd03

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 3/3] TCP splice receive support

2007-06-12 Thread Jens Axboe
Support for network splice receive.

Signed-off-by: Jens Axboe [EMAIL PROTECTED]
---
 include/linux/net.h|3 +
 include/linux/skbuff.h |5 +
 include/net/tcp.h  |3 +
 net/core/skbuff.c  |  231 
 net/ipv4/af_inet.c |1 +
 net/ipv4/tcp.c |  129 +++
 net/socket.c   |   13 +++
 7 files changed, 385 insertions(+), 0 deletions(-)

diff --git a/include/linux/net.h b/include/linux/net.h
index efc4517..472ee12 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -19,6 +19,7 @@
 #define _LINUX_NET_H
 
 #include linux/wait.h
+#include linux/splice.h
 #include asm/socket.h
 
 struct poll_table_struct;
@@ -165,6 +166,8 @@ struct proto_ops {
  struct vm_area_struct * vma);
ssize_t (*sendpage)  (struct socket *sock, struct page *page,
  int offset, size_t size, int flags);
+   ssize_t (*splice_read)(struct socket *sock,  loff_t *ppos,
+  struct pipe_inode_info *pipe, size_t 
len, unsigned int flags);
 };
 
 struct net_proto_family {
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index e7367c7..64e3eed 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1504,6 +1504,11 @@ extern int  skb_store_bits(struct sk_buff 
*skb, int offset,
 extern __wsum skb_copy_and_csum_bits(const struct sk_buff *skb,
  int offset, u8 *to, int len,
  __wsum csum);
+extern int skb_splice_bits(struct sk_buff *skb,
+   unsigned int offset,
+   struct pipe_inode_info *pipe,
+   unsigned int len,
+   unsigned int flags);
 extern void   skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to);
 extern void   skb_split(struct sk_buff *skb,
 struct sk_buff *skb1, const u32 len);
diff --git a/include/net/tcp.h b/include/net/tcp.h
index a8af9ae..8e86697 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -308,6 +308,9 @@ extern int  tcp_twsk_unique(struct sock *sk,
 
 extern voidtcp_twsk_destructor(struct sock *sk);
 
+extern ssize_t tcp_splice_read(struct socket *sk, loff_t *ppos,
+   struct pipe_inode_info *pipe, 
size_t len, unsigned int flags);
+
 static inline void tcp_dec_quickack_mode(struct sock *sk,
 const unsigned int pkts)
 {
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 7c6a34e..daea7b0 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -52,6 +52,7 @@
 #endif
 #include linux/string.h
 #include linux/skbuff.h
+#include linux/splice.h
 #include linux/cache.h
 #include linux/rtnetlink.h
 #include linux/init.h
@@ -71,6 +72,40 @@
 static struct kmem_cache *skbuff_head_cache __read_mostly;
 static struct kmem_cache *skbuff_fclone_cache __read_mostly;
 
+static void sock_pipe_buf_release(struct pipe_inode_info *pipe,
+ struct pipe_buffer *buf)
+{
+   struct sk_buff *skb = (struct sk_buff *) buf-private;
+
+   kfree_skb(skb);
+}
+
+static void sock_pipe_buf_get(struct pipe_inode_info *pipe,
+   struct pipe_buffer *buf)
+{
+   struct sk_buff *skb = (struct sk_buff *) buf-private;
+
+   skb_get(skb);
+}
+
+static int sock_pipe_buf_steal(struct pipe_inode_info *pipe,
+  struct pipe_buffer *buf)
+{
+   return 1;
+}
+
+
+/* Pipe buffer operations for a socket. */
+static struct pipe_buf_operations sock_pipe_buf_ops = {
+   .can_merge = 0,
+   .map = generic_pipe_buf_map,
+   .unmap = generic_pipe_buf_unmap,
+   .pin = generic_pipe_buf_pin,
+   .release = sock_pipe_buf_release,
+   .steal = sock_pipe_buf_steal,
+   .get = sock_pipe_buf_get,
+};
+
 /*
  * Keep out-of-line to prevent kernel bloat.
  * __builtin_return_address is not used because it is not always
@@ -1116,6 +1151,202 @@ fault:
return -EFAULT;
 }
 
+/*
+ * Callback from splice_to_pipe(), if we need to release some pages
+ * at the end of the spd in case we error'ed out in filling the pipe.
+ */
+static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i)
+{
+   struct sk_buff *skb = (struct sk_buff *) spd-partial[i].private;
+
+   kfree_skb(skb);
+}
+
+/*
+ * Fill page/offset/length into spd, if it can hold more pages.
+ */
+static inline int spd_fill_page(struct splice_pipe_desc *spd, struct page 
*page,
+   unsigned int len, unsigned int offset,
+   struct sk_buff *skb)
+{
+   

[PATCH 2/3] tcp_read_sock: alloc recv_actor() return return negative error value

2007-06-12 Thread Jens Axboe
Signed-off-by: Jens Axboe [EMAIL PROTECTED]
---
 net/ipv4/tcp.c |8 ++--
 1 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index cd3c7e9..450f44b 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1064,7 +1064,11 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t 
*desc,
break;
}
used = recv_actor(desc, skb, offset, len);
-   if (used = len) {
+   if (used  0) {
+   if (!copied)
+   copied = used;
+   break;
+   } else if (used = len) {
seq += used;
copied += used;
offset += used;
@@ -1086,7 +1090,7 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t 
*desc,
tcp_rcv_space_adjust(sk);
 
/* Clean up data we have read: This will do ACK frames. */
-   if (copied)
+   if (copied  0)
tcp_cleanup_rbuf(sk, copied);
return copied;
 }
-- 
1.5.2.1.174.gcd03

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] CONFIG_INET depend on CONFIG_SYSCTL

2007-06-12 Thread David Miller
From: Yoshinori Sato [EMAIL PROTECTED]
Date: Tue, 12 Jun 2007 16:38:55 +0900

 It cannot build with CONFIG_SYSCTL=n and CONFIG_INET=y.
 In case of CONFIG_INET=y it should become CONFIG_SYSCTL=y. 
 
 Signed-off-by: Yoshinori Sato [EMAIL PROTECTED]

1) Please post networking patches to netdev@vger.kernel.org
   which has been added to the CC:

2) It is much better to add the appropriate CONFIG_SYSCTL
   ifdefs to the INET code than to force it on for everyone.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH net-2.6 1/1] [TCP]: Fix left_out setting during FRTO

2007-06-12 Thread Ilpo Järvinen
Without FRTO, the tcp_try_to_open is never called with
lost_out  0 (see tcp_time_to_recover). However, when FRTO is
enabled, the !tp-lost condition is not used until end of FRTO
because that way TCP avoids premature entry to fast recovery
during FRTO.

Signed-off-by: Ilpo Järvinen [EMAIL PROTECTED]
---

This case was found during left_out drop audit (only relevant to
net-2.6 since tcp-2.6 does a right thing after left_out drop).


diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 74683d8..ed4a1bd 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2037,7 +2037,7 @@ static void tcp_try_to_open(struct sock *sk, int flag)
 {
struct tcp_sock *tp = tcp_sk(sk);
 
-   tp-left_out = tp-sacked_out;
+   tcp_sync_left_out(tp);
 
if (tp-retrans_out == 0)
tp-retrans_stamp = 0;
-- 
1.5.0.6


Re: [PATCH] NET: Multiqueue network device support.

2007-06-12 Thread Johannes Berg
On Mon, 2007-06-11 at 08:23 -0400, jamal wrote:
 On Mon, 2007-11-06 at 13:58 +0200, Patrick McHardy wrote:
 
  Thats not true. Assume PSL has lots of packets, PSH is empty. We
  fill the PHL queue until their is no room left, so the driver
  has to stop the queue. 
 
 Sure. Packets stashed on the any DMA ring are considered gone to the
 wire. That is a very valid assumption to make.

Not at all! Packets could be on the DMA queue forever if you're feeding
out more packets. Heck, on most wireless hardware packets can even be
*expired* from the DMA queue and you get an indication that it was
impossible to send them.

johannes


signature.asc
Description: This is a digitally signed message part


Re: [2.6.21.1] soft lockup when removing netconsole module

2007-06-12 Thread Jarek Poplawski
On Tue, May 29, 2007 at 12:56:28AM -0700, Andrew Morton wrote:
 On Sat, 26 May 2007 17:40:12 +0200 Folkert van Heusden [EMAIL PROTECTED] 
 wrote:
 
  When trying to remove the netconsole module, I got the following kernel
  output after a while (couple of minutes iirc):
  
  [525720.117293] BUG: soft lockup detected on CPU#1!
  [525720.117353]  [c1004d53] show_trace_log_lvl+0x1a/0x30
  [525720.117439]  [c1004d7b] show_trace+0x12/0x14
  [525720.117526]  [c1004e75] dump_stack+0x16/0x18
  [525720.117613]  [c104dd5b] softlockup_tick+0xa6/0xc2
  [525720.117694]  [c1026855] run_local_timers+0x12/0x14
  [525720.117738]  [c1026669] update_process_times+0x72/0xa1
  [525720.117744]  [c1038673] tick_sched_timer+0x53/0xb6
  [525720.117748]  [c1033d62] hrtimer_interrupt+0x189/0x1e3
  [525720.117753]  [c100e9e2] local_apic_timer_interrupt+0x55/0x5b
  [525720.117761]  [c100ea12] smp_apic_timer_interrupt+0x2a/0x39
  [525720.117766]  [c1004a3f] apic_timer_interrupt+0x33/0x38
  [525720.117770]  [c120f4b1] mutex_lock+0x8/0xa
  [525720.117775]  [c102d2f0] flush_workqueue+0x2f/0x8f
  [525720.117780]  [c102d7a0] cancel_rearming_delayed_workqueue+0x29/0x2b
  [525720.117785]  [c102d7b1] cancel_rearming_delayed_work+0xf/0x11
  [525720.117790]  [c11be143] netpoll_cleanup+0x75/0xa5
  [525720.117794]  [f893712d] cleanup_netconsole+0x17/0x1a [netconsole]
  [525720.117804]  [c1041f11] sys_delete_module+0x12f/0x14f
  [525720.117809]  [c1003f74] syscall_call+0x7/0xb
  [525720.117812]  ===
  
  Also the rmmod hangs and would not exit even with kill -9. It also
  sucks up 100% cpu.
 
 Jason recently posted a mystery patch without telling us what problem it
 fixed.
 

To be fair the problem should be known:

http://marc.info/?l=linux-kernelm=117700287817801w=2

List:   linux-kernel
Subject:Re: [PATCH -mm] workqueue: debug possible endless loop in 
cancel_rearming_delayed_work
From:   Chuck Ebbert cebbert () redhat ! com
Date:   2007-04-19 17:07:11
Message-ID: 4627A1BF.8080406 () redhat ! com

 Okay, an easy test for it: insmod netconsole ; rmmod netconsole
 
 In 2.6.20.x it loops forever and cancel_rearming_delayed_work()
 is part of the trace...

I hoped the discussion about cancel_rearming_delayed_work would
reach more people (there was also a patch proposal to add a warning
to the usage comment). But it seem it was not enough...

Of course such a problem should preferably be fixed by somebody who
knows the code (alas I don't know netconsole), to be sure all needed
cancels are still done after this change. I hope Jason's patch is
right but I'm a little surprised I can't see netdev in cc (I'll try
to fix this).

Cheers,
Jarek P.

PS: I'm very sorry for such late response (holidays).

 It looks like you just found it: cancel_rearming_delayed_work() will hang
 if the work isn't actually pending.  Please test this:
 
 
 From: Jason Wessel [EMAIL PROTECTED]
 
 Do not call cancel_rearming_delayed_work() if there is no
 pending work.
 
 Signed-off-by: Jason Wessel [EMAIL PROTECTED]
 Signed-off-by: Andrew Morton [EMAIL PROTECTED]
 ---
 
  net/core/netpoll.c |6 --
  1 file changed, 4 insertions(+), 2 deletions(-)
 
 diff -puN net/core/netpoll.c~a net/core/netpoll.c
 --- a/net/core/netpoll.c~a
 +++ a/net/core/netpoll.c
 @@ -784,8 +784,10 @@ void netpoll_cleanup(struct netpoll *np)
   if (atomic_dec_and_test(npinfo-refcnt)) {
   skb_queue_purge(npinfo-arp_tx);
   skb_queue_purge(npinfo-txq);
 - cancel_rearming_delayed_work(npinfo-tx_work);
 - flush_scheduled_work();
 + if (delayed_work_pending(npinfo-tx_work)) {
 + 
 cancel_rearming_delayed_work(npinfo-tx_work);
 + flush_scheduled_work();
 + }
  
   kfree(npinfo);
   }
 _
 
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH][RFC] network splice receive

2007-06-12 Thread Evgeniy Polyakov
On Sat, Jun 09, 2007 at 08:36:09AM +0200, Jens Axboe ([EMAIL PROTECTED]) wrote:
 On Fri, Jun 08 2007, Evgeniy Polyakov wrote:
  On Fri, Jun 08, 2007 at 06:57:25PM +0400, Evgeniy Polyakov ([EMAIL 
  PROTECTED]) wrote:
   I will try some things for the nearest 30-60 minutes, and then will move 
   to
   canoe trip until thuesday, so will not be able to work on this idea.
  
  Ok, replacing in fs/splice.c every page_cache_release() with
  static void splice_page_release(struct page *p)
  {
  if (!PageSlab(p))
  page_cache_release(p);
  }
 
 Ehm, I don't see why that should be necessary. Except in
 splice_to_pipe(), I have considered that we need to pass in a release
 function if mapping fails at some point. But it's probably best to do
 that in the caller, since they have the knowledge of how to release the
 pages.
 
 The rest of the PageSlab() tests are bogus.

I had a crashdump, where page was released via splice_to_pipe() indeed,
I did not investigate if it is possible to release provided page in
other places. I think if in future there will other slab usage cases
except networking receiving, that might be useful, but as is it is not
needed.

  and putting cloned skb into private field instead of 
  original on in spd_fill_page() ends up without kernel hung.
 
 Why? Seems pointless to allocate a clone just to hold on to the skb, a
 reference should be equally good. I would not be opposed to doing it
 this way, I just don't see what a clone buys us as compared to just
 holding that reference to the skb.

Receiving code does not expect shared skbs - too many fields are changed
with assumptions that it is a private copy.

  I'm not sure it is correct, that page can be released in fs/splice.c
  without calling any callback from network code, when network data is
  being processed.
 
 Please explain!

I had a crashdump, where page was attempted to be released in
fs/splice.c:splice_to_pipe(), I do not have details handy, but the best
solution would be to provide a release callback and use that instead of
page_cache_release().

-- 
Evgeniy Polyakov
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH SET] pktgen IPSEC 0/4

2007-06-12 Thread jamal

This is a set of patches that add ipsec functionality to pktgen. 
It is against Daves net-2.6.23

Robert, please take a closer look at this set and either sign off or
comment for me to redo something. I have a short cycle before being
busyed out where i can fix things.

Dave, I would like to push these to net-2.6.23 as soon as Robert Acks
them.

cheers,
jamal 


-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] pktgen IPSEC 1/4: Centralize pktgen packet overhead management

2007-06-12 Thread jamal
Manual labor still ... 1 of 4

cheers,
jamal

commit 38477d7ddfa58f58cce99bc902b4c18883647a71
Author: Jamal Hadi Salim [EMAIL PROTECTED]
Date:   Tue Jun 12 06:43:00 2007 -0400

[PKTGEN] Centralize packet overhead tracking
Track the extra packet overhead for VLAN tags, MPLS, IPSEC etc

Signed-off-by: Jamal Hadi Salim [EMAIL PROTECTED]

diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 9cd3a1c..1352316 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -228,6 +228,7 @@ struct pktgen_dev {
 
int min_pkt_size;   /* = ETH_ZLEN; */
int max_pkt_size;   /* = ETH_ZLEN; */
+   int pkt_overhead;   /* overhead for MPLS, VLANs, IPSEC etc */
int nfrags;
__u32 delay_us; /* Default delay */
__u32 delay_ns;
@@ -2075,6 +2076,13 @@ static void spin(struct pktgen_dev *pkt_dev, __u64 
spin_until_us)
pkt_dev-idle_acc += now - start;
 }
 
+static inline void set_pkt_overhead(struct pktgen_dev *pkt_dev)
+{
+   pkt_dev-pkt_overhead += pkt_dev-nr_labels*sizeof(u32);
+   pkt_dev-pkt_overhead += VLAN_TAG_SIZE(pkt_dev);
+   pkt_dev-pkt_overhead += SVLAN_TAG_SIZE(pkt_dev);
+}
+
 /* Increment/randomize headers according to flags and current values
  * for IP src/dest, UDP src/dst port, MAC-Addr src/dst
  */
@@ -2323,9 +2331,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device 
*odev,
 
datalen = (odev-hard_header_len + 16)  ~0xf;
skb = alloc_skb(pkt_dev-cur_pkt_size + 64 + datalen +
-   pkt_dev-nr_labels*sizeof(u32) +
-   VLAN_TAG_SIZE(pkt_dev) + SVLAN_TAG_SIZE(pkt_dev),
-   GFP_ATOMIC);
+   pkt_dev-pkt_overhead, GFP_ATOMIC);
if (!skb) {
sprintf(pkt_dev-result, No memory);
return NULL;
@@ -2368,7 +2374,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device 
*odev,
 
/* Eth + IPh + UDPh + mpls */
datalen = pkt_dev-cur_pkt_size - 14 - 20 - 8 -
- pkt_dev-nr_labels*sizeof(u32) - VLAN_TAG_SIZE(pkt_dev) - 
SVLAN_TAG_SIZE(pkt_dev);
+ pkt_dev-pkt_overhead;
if (datalen  sizeof(struct pktgen_hdr))
datalen = sizeof(struct pktgen_hdr);
 
@@ -2391,8 +2397,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device 
*odev,
iph-check = ip_fast_csum((void *)iph, iph-ihl);
skb-protocol = protocol;
skb-mac_header = (skb-network_header - ETH_HLEN -
-  pkt_dev-nr_labels * sizeof(u32) -
-  VLAN_TAG_SIZE(pkt_dev) - SVLAN_TAG_SIZE(pkt_dev));
+  pkt_dev-pkt_overhead);
skb-dev = odev;
skb-pkt_type = PACKET_HOST;
 
@@ -2662,9 +2667,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device 
*odev,
mod_cur_headers(pkt_dev);
 
skb = alloc_skb(pkt_dev-cur_pkt_size + 64 + 16 +
-   pkt_dev-nr_labels*sizeof(u32) +
-   VLAN_TAG_SIZE(pkt_dev) + SVLAN_TAG_SIZE(pkt_dev),
-   GFP_ATOMIC);
+   pkt_dev-pkt_overhead, GFP_ATOMIC);
if (!skb) {
sprintf(pkt_dev-result, No memory);
return NULL;
@@ -2708,7 +2711,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device 
*odev,
/* Eth + IPh + UDPh + mpls */
datalen = pkt_dev-cur_pkt_size - 14 -
  sizeof(struct ipv6hdr) - sizeof(struct udphdr) -
- pkt_dev-nr_labels*sizeof(u32) - VLAN_TAG_SIZE(pkt_dev) - 
SVLAN_TAG_SIZE(pkt_dev);
+ pkt_dev-pkt_overhead;
 
if (datalen  sizeof(struct pktgen_hdr)) {
datalen = sizeof(struct pktgen_hdr);
@@ -2738,8 +2741,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device 
*odev,
ipv6_addr_copy(iph-saddr, pkt_dev-cur_in6_saddr);
 
skb-mac_header = (skb-network_header - ETH_HLEN -
-  pkt_dev-nr_labels * sizeof(u32) -
-  VLAN_TAG_SIZE(pkt_dev) - SVLAN_TAG_SIZE(pkt_dev));
+  pkt_dev-pkt_overhead);
skb-protocol = protocol;
skb-dev = odev;
skb-pkt_type = PACKET_HOST;
@@ -2857,6 +2859,7 @@ static void pktgen_run(struct pktgen_thread *t)
pkt_dev-started_at = getCurUs();
pkt_dev-next_tx_us = getCurUs();   /* Transmit 
immediately */
pkt_dev-next_tx_ns = 0;
+   set_pkt_overhead(pkt_dev);
 
strcpy(pkt_dev-result, Starting);
started++;


[PATCH] pktgen IPSEC 2/4: Introduce pktgen sequential flows

2007-06-12 Thread jamal
2 of 4

cheers,
jamal
commit 882c296bb3f153e1ac770a874c75cfb2bab8481b
Author: Jamal Hadi Salim [EMAIL PROTECTED]
Date:   Tue Jun 12 07:24:00 2007 -0400

[PKTGEN] Introduce sequential flows

By default all flows in pktgen are randomly selected.
This patch introduces ability to have all defined flows to
be sent sequentially. Robert defined randomness to be the
default behavior.

Signed-off-by: Jamal Hadi Salim [EMAIL PROTECTED]

diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 1352316..bc4fb3b 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -181,6 +181,7 @@
 #define F_MPLS_RND(18)   /* Random MPLS labels */
 #define F_VID_RND (19)   /* Random VLAN ID */
 #define F_SVID_RND(110)  /* Random SVLAN ID */
+#define F_FLOW_SEQ(111)  /* Sequential flows */
 
 /* Thread control flag bits */
 #define T_TERMINATE   (10)
@@ -207,8 +208,12 @@ static struct proc_dir_entry *pg_proc_dir = NULL;
 struct flow_state {
__be32 cur_daddr;
int count;
+   __u32 flags;
 };
 
+/* flow flag bits */
+#define F_INIT   (10)/* flow has been initialized */
+
 struct pktgen_dev {
/*
 * Try to keep frequent/infrequent used vars. separated.
@@ -342,6 +347,7 @@ struct pktgen_dev {
unsigned cflows;/* Concurrent flows (config) */
unsigned lflow; /* Flow length  (config) */
unsigned nflows;/* accumulated flows (stats) */
+   unsigned curfl; /* current sequenced flow (state)*/
 
char result[512];
 };
@@ -691,6 +697,13 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
if (pkt_dev-flags  F_MPLS_RND)
seq_printf(seq,  MPLS_RND  );
 
+   if (pkt_dev-cflows) {
+   if (pkt_dev-flags  F_FLOW_SEQ)
+   seq_printf(seq,  FLOW_SEQ  ); /*in sequence flows*/
+   else
+   seq_printf(seq,  FLOW_RND  );
+   }
+
if (pkt_dev-flags  F_MACSRC_RND)
seq_printf(seq, MACSRC_RND  );
 
@@ -1182,6 +1195,9 @@ static ssize_t pktgen_if_write(struct file *file,
else if (strcmp(f, !SVID_RND) == 0)
pkt_dev-flags = ~F_SVID_RND;
 
+   else if (strcmp(f, FLOW_SEQ) == 0)
+   pkt_dev-flags |= F_FLOW_SEQ;
+
else if (strcmp(f, !IPV6) == 0)
pkt_dev-flags = ~F_IPV6;
 
@@ -1190,7 +1206,7 @@ static ssize_t pktgen_if_write(struct file *file,
Flag -:%s:- unknown\nAvailable flags, (prepend 
! to un-set flag):\n%s,
f,
IPSRC_RND, IPDST_RND, UDPSRC_RND, UDPDST_RND, 
-   MACSRC_RND, MACDST_RND, TXSIZE_RND, IPV6, 
MPLS_RND, VID_RND, SVID_RND\n);
+   MACSRC_RND, MACDST_RND, TXSIZE_RND, IPV6, 
MPLS_RND, VID_RND, SVID_RND, FLOW_SEQ\n);
return count;
}
sprintf(pg_result, OK: flags=0x%x, pkt_dev-flags);
@@ -2083,6 +2099,37 @@ static inline void set_pkt_overhead(struct pktgen_dev 
*pkt_dev)
pkt_dev-pkt_overhead += SVLAN_TAG_SIZE(pkt_dev);
 }
 
+static inline int f_seen(struct pktgen_dev *pkt_dev, int flow)
+{
+
+   if (pkt_dev-flows[flow].flags  F_INIT)
+   return 1;
+   else
+   return 0;
+}
+
+static inline int f_pick(struct pktgen_dev *pkt_dev)
+{
+   int flow = pkt_dev-curfl;
+
+   if (pkt_dev-flags  F_FLOW_SEQ) {
+   if (pkt_dev-flows[flow].count = pkt_dev-lflow) {
+   /* reset time */
+   pkt_dev-flows[flow].count = 0;
+   pkt_dev-curfl += 1;
+   if (pkt_dev-curfl = pkt_dev-cflows)
+   pkt_dev-curfl = 0; /*reset */
+   }
+   } else {
+   flow = random32() % pkt_dev-cflows;
+
+   if (pkt_dev-flows[flow].count  pkt_dev-lflow)
+   pkt_dev-flows[flow].count = 0;
+   }
+
+   return pkt_dev-curfl;
+}
+
 /* Increment/randomize headers according to flags and current values
  * for IP src/dest, UDP src/dst port, MAC-Addr src/dst
  */
@@ -2092,12 +2139,8 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
__u32 imx;
int flow = 0;
 
-   if (pkt_dev-cflows) {
-   flow = random32() % pkt_dev-cflows;
-
-   if (pkt_dev-flows[flow].count  pkt_dev-lflow)
-   pkt_dev-flows[flow].count = 0;
-   }
+   if (pkt_dev-cflows)
+   flow = f_pick(pkt_dev);
 
/*  Deal with source MAC */
if (pkt_dev-src_mac_count  1) {
@@ -2213,7 +2256,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
pkt_dev-cur_saddr = htonl(t);
}
 
-   if (pkt_dev-cflows  pkt_dev-flows[flow].count != 0) {
+   if 

[PATCH] pktgen IPSEC 3/4: Introduce xfrm SAD only lookup

2007-06-12 Thread jamal
3 of 4 ..

cheers,
jamal

commit 677f1c1459218919f5aa2622625dc8709c2a98ce
Author: Jamal Hadi Salim [EMAIL PROTECTED]
Date:   Tue Jun 12 07:28:59 2007 -0400

[XFRM] Introduce standalone SAD lookup
This allows other in-kernel functions to do SAD lookups.
The only known user at the moment is pktgen.

Signed-off-by: Jamal Hadi Salim [EMAIL PROTECTED]

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 311f25a..79d2c37 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -920,6 +920,10 @@ extern struct xfrm_state *xfrm_state_find(xfrm_address_t 
*daddr, xfrm_address_t
  struct flowi *fl, struct xfrm_tmpl 
*tmpl,
  struct xfrm_policy *pol, int *err,
  unsigned short family);
+extern struct xfrm_state * xfrm_stateonly_find(xfrm_address_t *daddr,
+  xfrm_address_t *saddr,
+  unsigned short family,
+  u8 mode, u8 proto, u32 reqid);
 extern int xfrm_state_check_expire(struct xfrm_state *x);
 extern void xfrm_state_insert(struct xfrm_state *x);
 extern int xfrm_state_add(struct xfrm_state *x);
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 85f3f43..b8562e4 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -686,6 +686,41 @@ out:
return x;
 }
 
+struct xfrm_state *
+xfrm_stateonly_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
+   unsigned short family, u8 mode, u8 proto, u32 reqid)
+{
+   unsigned int h = xfrm_dst_hash(daddr, saddr, reqid, family);
+   struct xfrm_state *rx = NULL, *x = NULL;
+   struct hlist_node *entry;
+
+   spin_lock(xfrm_state_lock);
+   hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
+   if (x-props.family == family 
+   x-props.reqid == reqid 
+   !(x-props.flags  XFRM_STATE_WILDRECV) 
+   xfrm_state_addr_check(x, daddr, saddr, family) 
+   mode == x-props.mode 
+   proto == x-id.proto)  {
+
+   if (x-km.state != XFRM_STATE_VALID)
+   continue;
+   else {
+   rx = x;
+   break;
+   }
+   }
+   }
+
+   if (rx)
+   xfrm_state_hold(rx);
+   spin_unlock(xfrm_state_lock);
+
+
+   return rx;
+}
+EXPORT_SYMBOL(xfrm_stateonly_find);
+
 static void __xfrm_state_insert(struct xfrm_state *x)
 {
unsigned int h;


[PATCH] pktgen IPSEC 4/4: Add IPSEC support to pktgen

2007-06-12 Thread jamal

4 of 4

cheers,
jamal

commit e035613eae587251b8c98b7d503eab207f1d26e2
Author: Jamal Hadi Salim [EMAIL PROTECTED]
Date:   Tue Jun 12 07:43:30 2007 -0400

[PKTGEN] IPSEC support
Added transport mode ESP support for starters.
I will send more of these modes and types once i have resolved
the tunnel mode isses.

Signed-off-by: Jamal Hadi Salim [EMAIL PROTECTED]

diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index bc4fb3b..bcec8e4 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -152,6 +152,9 @@
 #include net/checksum.h
 #include net/ipv6.h
 #include net/addrconf.h
+#ifdef CONFIG_XFRM
+#include net/xfrm.h
+#endif
 #include asm/byteorder.h
 #include linux/rcupdate.h
 #include asm/bitops.h
@@ -182,6 +185,7 @@
 #define F_VID_RND (19)   /* Random VLAN ID */
 #define F_SVID_RND(110)  /* Random SVLAN ID */
 #define F_FLOW_SEQ(111)  /* Sequential flows */
+#define F_IPSEC_ON(112)  /* ipsec on for flows */
 
 /* Thread control flag bits */
 #define T_TERMINATE   (10)
@@ -208,6 +212,9 @@ static struct proc_dir_entry *pg_proc_dir = NULL;
 struct flow_state {
__be32 cur_daddr;
int count;
+#ifdef CONFIG_XFRM
+   struct xfrm_state *x;
+#endif
__u32 flags;
 };
 
@@ -348,7 +355,10 @@ struct pktgen_dev {
unsigned lflow; /* Flow length  (config) */
unsigned nflows;/* accumulated flows (stats) */
unsigned curfl; /* current sequenced flow (state)*/
-
+#ifdef CONFIG_XFRM
+   __u8ipsmode;/* IPSEC mode (config) */
+   __u8ipsproto;   /* IPSEC type (config) */
+#endif
char result[512];
 };
 
@@ -704,6 +714,9 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
seq_printf(seq,  FLOW_RND  );
}
 
+   if (pkt_dev-flags  F_IPSEC_ON)
+   seq_printf(seq,  IPSEC  );
+
if (pkt_dev-flags  F_MACSRC_RND)
seq_printf(seq, MACSRC_RND  );
 
@@ -1198,6 +1211,11 @@ static ssize_t pktgen_if_write(struct file *file,
else if (strcmp(f, FLOW_SEQ) == 0)
pkt_dev-flags |= F_FLOW_SEQ;
 
+#ifdef CONFIG_XFRM
+   else if (strcmp(f, IPSEC) == 0)
+   pkt_dev-flags |= F_IPSEC_ON;
+#endif
+
else if (strcmp(f, !IPV6) == 0)
pkt_dev-flags = ~F_IPV6;
 
@@ -1206,7 +1224,7 @@ static ssize_t pktgen_if_write(struct file *file,
Flag -:%s:- unknown\nAvailable flags, (prepend 
! to un-set flag):\n%s,
f,
IPSRC_RND, IPDST_RND, UDPSRC_RND, UDPDST_RND, 
-   MACSRC_RND, MACDST_RND, TXSIZE_RND, IPV6, 
MPLS_RND, VID_RND, SVID_RND, FLOW_SEQ\n);
+   MACSRC_RND, MACDST_RND, TXSIZE_RND, IPV6, 
MPLS_RND, VID_RND, SVID_RND, FLOW_SEQ, IPSEC\n);
return count;
}
sprintf(pg_result, OK: flags=0x%x, pkt_dev-flags);
@@ -2094,6 +2112,7 @@ static void spin(struct pktgen_dev *pkt_dev, __u64 
spin_until_us)
 
 static inline void set_pkt_overhead(struct pktgen_dev *pkt_dev)
 {
+   pkt_dev-pkt_overhead = 0;
pkt_dev-pkt_overhead += pkt_dev-nr_labels*sizeof(u32);
pkt_dev-pkt_overhead += VLAN_TAG_SIZE(pkt_dev);
pkt_dev-pkt_overhead += SVLAN_TAG_SIZE(pkt_dev);
@@ -2130,6 +2149,31 @@ static inline int f_pick(struct pktgen_dev *pkt_dev)
return pkt_dev-curfl;
 }
 
+
+#ifdef CONFIG_XFRM
+/* If there was already an IPSEC SA, we keep it as is, else
+ * we go look for it ...
+*/
+inline
+void get_ipsec_sa(struct pktgen_dev *pkt_dev, int flow)
+{
+   struct xfrm_state *x = pkt_dev-flows[flow].x;
+   if (!x) {
+   /*slow path: we dont already have xfrm_state*/
+   x = xfrm_stateonly_find((xfrm_address_t *)pkt_dev-cur_daddr,
+   (xfrm_address_t *)pkt_dev-cur_saddr,
+   AF_INET,
+   pkt_dev-ipsmode,
+   pkt_dev-ipsproto, 0);
+   if (x) {
+   pkt_dev-flows[flow].x = x;
+   set_pkt_overhead(pkt_dev);
+   pkt_dev-pkt_overhead+=x-props.header_len;
+   }
+
+   }
+}
+#endif
 /* Increment/randomize headers according to flags and current values
  * for IP src/dest, UDP src/dst port, MAC-Addr src/dst
  */
@@ -2289,6 +2333,10 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
pkt_dev-flows[flow].flags |= F_INIT;
pkt_dev-flows[flow].cur_daddr =
pkt_dev-cur_daddr;
+#ifdef CONFIG_XFRM
+   if (pkt_dev-flags  F_IPSEC_ON)
+   get_ipsec_sa(pkt_dev, flow);
+#endif

[RFC PATCH net-2.6] [TCP]: Congestion control API RTT sampling fix

2007-06-12 Thread Ilpo Järvinen
I was thinking something like this to fix the cc module breakage 
introduced by the API change (haven't tested it besides compile):


[RFC PATCH net-2.6] [TCP]: Congestion control API RTT sampling fix


Commit 164891aadf1721fca4dce473bb0e0998181537c6 broke RTT
sampling of congestion control modules. Inaccurate timestamps
could be fed to them without providing any way for them to
identify such cases. Previously RTT sampler was called only if
FLAG_RETRANS_DATA_ACKED was not set filtering inaccurate
timestamps nicely. In addition, the new behavior could give an
invalid timestamp (zero) to RTT sampler if only skbs with
TCPCB_RETRANS were ACKed. This solves both problems.

Signed-off-by: Ilpo Järvinen [EMAIL PROTECTED]
---
 include/linux/ktime.h   |   18 ++
 include/linux/skbuff.h  |4 
 net/ipv4/tcp_illinois.c |3 +++
 net/ipv4/tcp_input.c|6 +-
 net/ipv4/tcp_lp.c   |3 ++-
 net/ipv4/tcp_vegas.c|3 +++
 net/ipv4/tcp_veno.c |3 +++
 7 files changed, 38 insertions(+), 2 deletions(-)

diff --git a/include/linux/ktime.h b/include/linux/ktime.h
index c762954..9f7fa3e 100644
--- a/include/linux/ktime.h
+++ b/include/linux/ktime.h
@@ -102,6 +102,12 @@ static inline ktime_t ktime_set(const long secs, const 
unsigned long nsecs)
 #define ktime_add_ns(kt, nsval) \
({ (ktime_t){ .tv64 = (kt).tv64 + (nsval) }; })
 
+/* Compare two ktime_t variables, returns 1 if equal */
+static inline int ktime_equal(const ktime_t cmp1, const ktime_t cmp2)
+{
+   return cmp1.tv64 == cmp2.tv64;
+}
+
 /* convert a timespec to ktime_t format: */
 static inline ktime_t timespec_to_ktime(struct timespec ts)
 {
@@ -200,6 +206,18 @@ static inline ktime_t ktime_add(const ktime_t add1, const 
ktime_t add2)
 extern ktime_t ktime_add_ns(const ktime_t kt, u64 nsec);
 
 /**
+ * ktime_equal - Compares two ktime_t variables to see if they are equal
+ * @cmp1:  comparable1
+ * @cmp2:  comparable2
+ *
+ * Compare two ktime_t variables, returns 1 if equal
+ */
+static inline int ktime_equal(const ktime_t cmp1, const ktime_t cmp2)
+{
+   return !((cmp1.tv.sec ^ cmp2.tv.sec) | (cmp1.tv.usec ^ cmp2.tv.usec));
+}
+
+/**
  * timespec_to_ktime - convert a timespec to ktime_t format
  * @ts:the timespec variable to convert
  *
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index e7367c7..6f0b2f7 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1579,6 +1579,10 @@ static inline ktime_t net_timedelta(ktime_t t)
return ktime_sub(ktime_get_real(), t);
 }
 
+static inline ktime_t net_invalid_timestamp(void)
+{
+   return ktime_set(0, 0);
+}
 
 extern __sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len);
 extern __sum16 __skb_checksum_complete(struct sk_buff *skb);
diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c
index 4adc47c..5f8d01b 100644
--- a/net/ipv4/tcp_illinois.c
+++ b/net/ipv4/tcp_illinois.c
@@ -90,6 +90,9 @@ static void tcp_illinois_acked(struct sock *sk, u32 
pkts_acked, ktime_t last)
 
ca-acked = pkts_acked;
 
+   if (ktime_equal(last, net_invalid_timestamp())
+   return;
+
rtt = ktime_to_us(net_timedelta(last));
 
/* ignore bogus values, this prevents wraparound in alpha math */
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index ed4a1bd..d506bdc 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2409,7 +2409,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 
*seq_rtt_p)
int acked = 0;
int prior_packets = tp-packets_out;
__s32 seq_rtt = -1;
-   ktime_t last_ackt = ktime_set(0,0);
+   ktime_t last_ackt = net_invalid_timestamp();
 
while ((skb = tcp_write_queue_head(sk)) 
   skb != tcp_send_head(sk)) {
@@ -2487,6 +2487,10 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 
*seq_rtt_p)
tcp_ack_update_rtt(sk, acked, seq_rtt);
tcp_ack_packets_out(sk);
 
+   /* Is the ACK triggering packet unambiguous? */
+   if (acked  FLAG_RETRANS_DATA_ACKED)
+   last_ackt = net_invalid_timestamp();
+
if (ca_ops-pkts_acked)
ca_ops-pkts_acked(sk, pkts_acked, last_ackt);
}
diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c
index 43294ad..efa358b 100644
--- a/net/ipv4/tcp_lp.c
+++ b/net/ipv4/tcp_lp.c
@@ -266,7 +266,8 @@ static void tcp_lp_pkts_acked(struct sock *sk, u32 
num_acked, ktime_t last)
struct tcp_sock *tp = tcp_sk(sk);
struct lp *lp = inet_csk_ca(sk);
 
-   tcp_lp_rtt_sample(sk,  ktime_to_us(net_timedelta(last)));
+   if (!ktime_equal(last, net_invalid_timestamp())
+   tcp_lp_rtt_sample(sk,  ktime_to_us(net_timedelta(last)));
 
/* calc inference */
if (tcp_time_stamp  tp-rx_opt.rcv_tsecr)
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c
index 73e19cf..bd7a08f 100644
--- 

Re: [PATCH][RFC] network splice receive

2007-06-12 Thread Jens Axboe
On Tue, Jun 12 2007, Evgeniy Polyakov wrote:
 On Sat, Jun 09, 2007 at 08:36:09AM +0200, Jens Axboe ([EMAIL PROTECTED]) 
 wrote:
  On Fri, Jun 08 2007, Evgeniy Polyakov wrote:
   On Fri, Jun 08, 2007 at 06:57:25PM +0400, Evgeniy Polyakov ([EMAIL 
   PROTECTED]) wrote:
I will try some things for the nearest 30-60 minutes, and then will 
move to
canoe trip until thuesday, so will not be able to work on this idea.
   
   Ok, replacing in fs/splice.c every page_cache_release() with
   static void splice_page_release(struct page *p)
   {
 if (!PageSlab(p))
 page_cache_release(p);
   }
  
  Ehm, I don't see why that should be necessary. Except in
  splice_to_pipe(), I have considered that we need to pass in a release
  function if mapping fails at some point. But it's probably best to do
  that in the caller, since they have the knowledge of how to release the
  pages.
  
  The rest of the PageSlab() tests are bogus.
 
 I had a crashdump, where page was released via splice_to_pipe() indeed,
 I did not investigate if it is possible to release provided page in
 other places. I think if in future there will other slab usage cases
 except networking receiving, that might be useful, but as is it is not
 needed.

Read the just posted code, it has moved way beyond this :-)

   and putting cloned skb into private field instead of 
   original on in spd_fill_page() ends up without kernel hung.
  
  Why? Seems pointless to allocate a clone just to hold on to the skb, a
  reference should be equally good. I would not be opposed to doing it
  this way, I just don't see what a clone buys us as compared to just
  holding that reference to the skb.
 
 Receiving code does not expect shared skbs - too many fields are changed
 with assumptions that it is a private copy.

Actually the main problem is that tcp_read_sock() unconditionally frees
the skb, so it wouldn't help if we grabbed a reference to it. I've yet
to receive an explanation of why it does so, seem awkward and violates
the whole principle of reference counted objects. Davem??

So for now, skb_splice_bits() clones the incoming skb to avoid that. I'd
hope we can get rid of that by fixing tcp_read_sock(), though.

-- 
Jens Axboe

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


pmtu discovery on sa esp

2007-06-12 Thread Marco Berizzi
Hello everybody.
I have just upgraded from 2.6.21.3 to
2.6.22-rc4 and I get a ton of
pmtu discovery on sa esp/blablab/blabla
messages (this box is running openswan).
Is this an expected behaviour?

TIA


-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] NET: Multiqueue network device support.

2007-06-12 Thread jamal
On Tue, 2007-12-06 at 11:19 +0200, Johannes Berg wrote:
 On Mon, 2007-06-11 at 08:23 -0400, jamal wrote:

  Sure. Packets stashed on the any DMA ring are considered gone to the
  wire. That is a very valid assumption to make.
 
 Not at all! Packets could be on the DMA queue forever if you're feeding
 out more packets. Heck, on most wireless hardware packets can even be
 *expired* from the DMA queue and you get an indication that it was
 impossible to send them.

The spirit of the discussion you are quoting was much higher level than
that. Yes what you describe can happen on any DMA (to hard-disk etc)
A simpler example, if you tcpdump on an outgoing packet you see it on
its way to the driver - it is accounted for as gone[1].
In any case, read the rest of the thread.

cheers,
jamal

[1] Current Linux tcpdumping is not that accurate, but i dont wanna go
into that discussion

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH][RFC] network splice receive

2007-06-12 Thread Evgeniy Polyakov
On Tue, Jun 12, 2007 at 01:33:54PM +0200, Jens Axboe ([EMAIL PROTECTED]) wrote:
  I had a crashdump, where page was released via splice_to_pipe() indeed,
  I did not investigate if it is possible to release provided page in
  other places. I think if in future there will other slab usage cases
  except networking receiving, that might be useful, but as is it is not
  needed.
 
 Read the just posted code, it has moved way beyond this :-)

It is just a side result of traditional optimization technique called 
vim ':%s/page_cache_release/splice_page_release' :)

and putting cloned skb into private field instead of 
original on in spd_fill_page() ends up without kernel hung.
   
   Why? Seems pointless to allocate a clone just to hold on to the skb, a
   reference should be equally good. I would not be opposed to doing it
   this way, I just don't see what a clone buys us as compared to just
   holding that reference to the skb.
  
  Receiving code does not expect shared skbs - too many fields are changed
  with assumptions that it is a private copy.
 
 Actually the main problem is that tcp_read_sock() unconditionally frees
 the skb, so it wouldn't help if we grabbed a reference to it. I've yet
 to receive an explanation of why it does so, seem awkward and violates
 the whole principle of reference counted objects. Davem??
 
 So for now, skb_splice_bits() clones the incoming skb to avoid that. I'd
 hope we can get rid of that by fixing tcp_read_sock(), though.

It does that because it knows, that skb is not allowed to be shared
there. Similar things are being done in udp for example - code changes
internal mebers of skb, since it knows skb is not shared.

For example generic_make_request() is not allowed to change, say, 
bio-bi_sector or bi_destructor, since it does not own a block request, 
not matter what bi_cnt is. From another side, -bi_destructor() can do
whatever it wants with bio without any check for its reference counter.

According to sk_eat_skb() - it is an optimisation to remove atomic
check.

 -- 
 Jens Axboe

-- 
Evgeniy Polyakov
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: pmtu discovery on sa esp

2007-06-12 Thread Patrick McHardy
Marco Berizzi wrote:
 Hello everybody.
 I have just upgraded from 2.6.21.3 to
 2.6.22-rc4 and I get a ton of
 pmtu discovery on sa esp/blablab/blabla
 messages (this box is running openswan).
 Is this an expected behaviour?


We have some MTU opimiztations in 2.6.22-rc that might be related.
Please check with tcpdump what exactly is happening and whether
the 2.6.22-rc box is sending too large packets.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH][RFC] network splice receive

2007-06-12 Thread Jens Axboe
On Tue, Jun 12 2007, Evgeniy Polyakov wrote:
 and putting cloned skb into private field instead of 
 original on in spd_fill_page() ends up without kernel hung.

Why? Seems pointless to allocate a clone just to hold on to the skb, a
reference should be equally good. I would not be opposed to doing it
this way, I just don't see what a clone buys us as compared to just
holding that reference to the skb.
   
   Receiving code does not expect shared skbs - too many fields are changed
   with assumptions that it is a private copy.
  
  Actually the main problem is that tcp_read_sock() unconditionally frees
  the skb, so it wouldn't help if we grabbed a reference to it. I've yet
  to receive an explanation of why it does so, seem awkward and violates
  the whole principle of reference counted objects. Davem??
  
  So for now, skb_splice_bits() clones the incoming skb to avoid that. I'd
  hope we can get rid of that by fixing tcp_read_sock(), though.
 
 It does that because it knows, that skb is not allowed to be shared
 there. Similar things are being done in udp for example - code changes
 internal mebers of skb, since it knows skb is not shared.
 
 For example generic_make_request() is not allowed to change, say, 
 bio-bi_sector or bi_destructor, since it does not own a block request, 
 not matter what bi_cnt is. From another side, -bi_destructor() can do
 whatever it wants with bio without any check for its reference counter.

But generic_make_request() DOES change -bi_sector, that's how partition
remapping works :-). The destructor can of course do whatever it wants,
by definition the bio is not referenced at that point (or it would not
have been called). So while I think your analogy is quite poor, I do now
follow the code (even if I think it's ugly). There's quite a big
difference between changing parts of the elements of a structure to just
grabbing a reference to it. If the skb cannot be referenced, skb_get()
should return NULL.

But that aside, I see the issue. I'll just stick to the clone, it works
fine as-is (well almost, there's a leak there, but functionally it's
ok!).

-- 
Jens Axboe

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[patch 2/4] lockdep: fixup sk_callback_lock annotation

2007-06-12 Thread Peter Zijlstra
the two init sites resulted in inconsistend names for the lock class.

Signed-off-by: Peter Zijlstra [EMAIL PROTECTED]
Acked-by: Ingo Molnar [EMAIL PROTECTED]
Cc: netdev@vger.kernel.org
---
 net/core/sock.c |   23 +++
 1 file changed, 19 insertions(+), 4 deletions(-)

Index: linux-2.6/net/core/sock.c
===
--- linux-2.6.orig/net/core/sock.c
+++ linux-2.6/net/core/sock.c
@@ -171,6 +171,19 @@ static const char *af_family_slock_key_s
   slock-AF_TIPC  , slock-AF_BLUETOOTH, slock-AF_IUCV ,
   slock-AF_RXRPC , slock-AF_MAX
 };
+static const char *af_family_clock_key_strings[AF_MAX+1] = {
+  clock-AF_UNSPEC, clock-AF_UNIX , clock-AF_INET ,
+  clock-AF_AX25  , clock-AF_IPX  , clock-AF_APPLETALK,
+  clock-AF_NETROM, clock-AF_BRIDGE   , clock-AF_ATMPVC   ,
+  clock-AF_X25   , clock-AF_INET6, clock-AF_ROSE ,
+  clock-AF_DECnet, clock-AF_NETBEUI  , clock-AF_SECURITY ,
+  clock-AF_KEY   , clock-AF_NETLINK  , clock-AF_PACKET   ,
+  clock-AF_ASH   , clock-AF_ECONET   , clock-AF_ATMSVC   ,
+  clock-21   , clock-AF_SNA  , clock-AF_IRDA ,
+  clock-AF_PPPOX , clock-AF_WANPIPE  , clock-AF_LLC  ,
+  clock-27   , clock-28  , clock-29  ,
+  clock-AF_TIPC  , clock-AF_BLUETOOTH, clock-AF_MAX
+};
 #endif
 
 /*
@@ -941,8 +954,9 @@ struct sock *sk_clone(const struct sock 
 
rwlock_init(newsk-sk_dst_lock);
rwlock_init(newsk-sk_callback_lock);
-   lockdep_set_class(newsk-sk_callback_lock,
-  af_callback_keys + newsk-sk_family);
+   lockdep_set_class_and_name(newsk-sk_callback_lock,
+   af_callback_keys + newsk-sk_family,
+   af_family_clock_key_strings[newsk-sk_family]);
 
newsk-sk_dst_cache = NULL;
newsk-sk_wmem_queued   = 0;
@@ -1530,8 +1544,9 @@ void sock_init_data(struct socket *sock,
 
rwlock_init(sk-sk_dst_lock);
rwlock_init(sk-sk_callback_lock);
-   lockdep_set_class(sk-sk_callback_lock,
-  af_callback_keys + sk-sk_family);
+   lockdep_set_class_and_name(sk-sk_callback_lock,
+   af_callback_keys + sk-sk_family,
+   af_family_clock_key_strings[sk-sk_family]);
 
sk-sk_state_change =   sock_def_wakeup;
sk-sk_data_ready   =   sock_def_readable;

-- 

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH][RFC] network splice receive

2007-06-12 Thread Evgeniy Polyakov
On Tue, Jun 12, 2007 at 02:40:05PM +0200, Jens Axboe ([EMAIL PROTECTED]) wrote:
 On Tue, Jun 12 2007, Evgeniy Polyakov wrote:
  and putting cloned skb into private field instead of 
  original on in spd_fill_page() ends up without kernel hung.
 
 Why? Seems pointless to allocate a clone just to hold on to the skb, a
 reference should be equally good. I would not be opposed to doing it
 this way, I just don't see what a clone buys us as compared to just
 holding that reference to the skb.

Receiving code does not expect shared skbs - too many fields are changed
with assumptions that it is a private copy.
   
   Actually the main problem is that tcp_read_sock() unconditionally frees
   the skb, so it wouldn't help if we grabbed a reference to it. I've yet
   to receive an explanation of why it does so, seem awkward and violates
   the whole principle of reference counted objects. Davem??
   
   So for now, skb_splice_bits() clones the incoming skb to avoid that. I'd
   hope we can get rid of that by fixing tcp_read_sock(), though.
  
  It does that because it knows, that skb is not allowed to be shared
  there. Similar things are being done in udp for example - code changes
  internal mebers of skb, since it knows skb is not shared.
  
  For example generic_make_request() is not allowed to change, say, 
  bio-bi_sector or bi_destructor, since it does not own a block request, 
  not matter what bi_cnt is. From another side, -bi_destructor() can do
  whatever it wants with bio without any check for its reference counter.
 
 But generic_make_request() DOES change -bi_sector, that's how partition
 remapping works :-). The destructor can of course do whatever it wants,
 by definition the bio is not referenced at that point (or it would not
 have been called). So while I think your analogy is quite poor, I do now
 follow the code (even if I think it's ugly). There's quite a big

Yeah, that was quite long time ago I hacked block layer :)
Good we found a way to explain the issue.

 difference between changing parts of the elements of a structure to just
 grabbing a reference to it. If the skb cannot be referenced, skb_get()
 should return NULL.
 
 But that aside, I see the issue. I'll just stick to the clone, it works
 fine as-is (well almost, there's a leak there, but functionally it's
 ok!).

Btw, is it allowed to use splice from network with, say, nfs?
Since RPC code uses sk_user_data as long as network splice.

 -- 
 Jens Axboe

-- 
Evgeniy Polyakov
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH][RFC] network splice receive

2007-06-12 Thread Jens Axboe
On Tue, Jun 12 2007, Evgeniy Polyakov wrote:
  difference between changing parts of the elements of a structure to just
  grabbing a reference to it. If the skb cannot be referenced, skb_get()
  should return NULL.
  
  But that aside, I see the issue. I'll just stick to the clone, it works
  fine as-is (well almost, there's a leak there, but functionally it's
  ok!).
 
 Btw, is it allowed to use splice from network with, say, nfs?
 Since RPC code uses sk_user_data as long as network splice.

It doesn't anymore, see the version posted today (or yesterday, but it
would be silly to read older code than the newest :-)

-- 
Jens Axboe

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] NET: Multiqueue network device support.

2007-06-12 Thread Patrick McHardy
jamal wrote:
the qdisc has a chance to hand out either a packet
  of the same priority or higher priority, but at the cost of
  at worst (n - 1) * m unnecessary dequeues+requeues in case
  there is only a packet of lowest priority and we need to
  fully serve all higher priority HW queues before it can
  actually be dequeued. 
 
 
 yes, i see that. 
 [It actually is related to the wake threshold you use in the 
 driver. tg3 and e1000 for example will do it after 30 or so packets.
 But i get your point - what you are trying to describe is a worst case
 scenario].


Yes. Using a higher threshold reduces the overhead, but leads to
lower priority packets getting out even if higher priority packets
are present in the qdisc. Note that if we use the threshold with
multiple queue states (threshold per ring) this doesn't happen.

  The other possibility would be to
  activate the queue again once all rings can take packets
  again, but that wouldn't fix the problem, which you can
  easily see if you go back to my example and assume we still
  have a low priority packet within the qdisc when the lowest
  priority ring fills up (and the queue is stopped), and after
  we tried to wake it and stopped it again the higher priority
  packet arrives.
 
 
 In your use case, only low prio packets are available on the stack.
 Above you mention arrival of high prio - assuming thats intentional and
 not it being late over there ;-
 If higher prio packets are arriving on the qdisc when you open up, then
 given strict prio those packets get to go to the driver first until
 there are no more left; followed of course by low prio which then
 shutdown the path again...


Whats happening is: Lowest priority ring fills up, queue is stopped.
We have more packets for it in the qdisc. A higher priority packet
is transmitted, the queue is woken up again, the lowest priority packet
goes to the driver and hits the full ring, packet is requeued and
queue shut down until ring frees up again. Now a high priority packet
arrives. It won't get to the driver anymore. But its not very important
since having two different wakeup-strategies would be a bit strange
anyway, so lets just rule out this possibility.

Considering your proposal in combination with RR, you can see
the same problem of unnecessary dequeues+requeues. 
 
 
 Well, we havent really extended the use case from prio to RR.
 But this is a good start as any since all sorts of work conserving
 schedulers will behave in a similar fashion ..
 
 
Since there
is no priority for waking the queue when a equal or higher
priority ring got dequeued as in the prio case, I presume you
would wake the queue whenever a packet was sent. 
 
 
 I suppose that is a viable approach if the hardware is RR based.
 Actually in the case of e1000 it is WRR not plain RR, but that is a
 moot point which doesnt affect the discussion.
 
 
For the RR
qdisc dequeue after requeue should hand out the same packet,
independantly of newly enqueued packets (which doesn't happen
and is a bug in Peter's RR version), so in the worst case the
HW has to make the entire round before a packet can get
dequeued in case the corresponding HW queue is full. This is
a bit better than prio, but still up to n - 1 unnecessary
requeues+dequeues. I think it can happen more often than
for prio though.
 
 
 I think what would better to be use is DRR. I pointed the code i did
 a long time ago to Peter. 
 With DRR, a deficit is viable to be carried forward.


If both driver and HW do it, its probably OK for short term, but it
shouldn't grow too large since short-term fairness is also important.
But the unnecessary dequeues+requeues can still happen.

Forgetting about things like multiple qdisc locks and just
looking at queueing behaviour, the question seems to come
down to whether the unnecessary dequeues/requeues are acceptable
(which I don't think since they are easily avoidable).
 
 
 As i see it, the worst case scenario would have a finite time.
 A 100Mbps NIC should be able to dish out, depending on packet size,
 148Kpps to 8.6Kpps; a GigE 10x that.
 so i think the phase in general wont last that long given the assumption
 is packets are coming in from the stack to the driver with about the
 packet rate equivalent to wire rate (for the case of all work conserving
 schedulers).
 In the general case there should be no contention at all.


It does have finite time, but its still undesirable. The average case
would probably have been more interesting, but its also harder :)
I also expect to see lots of requeues under normal load that doesn't
ressemble the worst-case, but only tests can confirm that.

 OTOH
you could turn it around and argue that the patches won't do
much harm since ripping them out again (modulo queue mapping)
should result in the same behaviour with just more overhead.
 
 
 I am not sure i understood - but note that i have asked for a middle
 ground from the begining. 


I just mean that we could rip the patches out at any 

[PATCH] pktgen IPSEC 3/4: Introduce xfrm SAD only lookup

2007-06-12 Thread Robert Olsson

jamal writes:
  3 of 4 ..

  [XFRM] Introduce standalone SAD lookup
  This allows other in-kernel functions to do SAD lookups.
  The only known user at the moment is pktgen.
  
  Signed-off-by: Jamal Hadi Salim [EMAIL PROTECTED]
 

 xfrm is not my area. 

 Acked-by: Robert Olsson [EMAIL PROTECTED]

 Cheers
--ro


  
  diff --git a/include/net/xfrm.h b/include/net/xfrm.h
  index 311f25a..79d2c37 100644
  --- a/include/net/xfrm.h
  +++ b/include/net/xfrm.h
  @@ -920,6 +920,10 @@ extern struct xfrm_state 
  *xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t
 struct flowi *fl, struct xfrm_tmpl 
  *tmpl,
 struct xfrm_policy *pol, int *err,
 unsigned short family);
  +extern struct xfrm_state * xfrm_stateonly_find(xfrm_address_t *daddr,
  +   xfrm_address_t *saddr,
  +   unsigned short family,
  +   u8 mode, u8 proto, u32 reqid);
   extern int xfrm_state_check_expire(struct xfrm_state *x);
   extern void xfrm_state_insert(struct xfrm_state *x);
   extern int xfrm_state_add(struct xfrm_state *x);
  diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
  index 85f3f43..b8562e4 100644
  --- a/net/xfrm/xfrm_state.c
  +++ b/net/xfrm/xfrm_state.c
  @@ -686,6 +686,41 @@ out:
   return x;
   }
   
  +struct xfrm_state *
  +xfrm_stateonly_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
  +unsigned short family, u8 mode, u8 proto, u32 reqid)
  +{
  +unsigned int h = xfrm_dst_hash(daddr, saddr, reqid, family);
  +struct xfrm_state *rx = NULL, *x = NULL;
  +struct hlist_node *entry;
  +
  +spin_lock(xfrm_state_lock);
  +hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
  +if (x-props.family == family 
  +x-props.reqid == reqid 
  +!(x-props.flags  XFRM_STATE_WILDRECV) 
  +xfrm_state_addr_check(x, daddr, saddr, family) 
  +mode == x-props.mode 
  +proto == x-id.proto)  {
  +
  +if (x-km.state != XFRM_STATE_VALID)
  +continue;
  +else {
  +rx = x;
  +break;
  +}
  +}
  +}
  +
  +if (rx)
  +xfrm_state_hold(rx);
  +spin_unlock(xfrm_state_lock);
  +
  +
  +return rx;
  +}
  +EXPORT_SYMBOL(xfrm_stateonly_find);
  +
   static void __xfrm_state_insert(struct xfrm_state *x)
   {
   unsigned int h;
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] pktgen IPSEC 1/4: Centralize pktgen packet overhead management

2007-06-12 Thread Robert Olsson

jamal writes:
  Manual labor still ... 1 of 4

  [PKTGEN] Centralize packet overhead tracking
  Track the extra packet overhead for VLAN tags, MPLS, IPSEC etc
  
  Signed-off-by: Jamal Hadi Salim [EMAIL PROTECTED]


   Thanks, Jamal.

   I'll guess the ipsec part is to be considered work-in-progress
   and you're doing both the work and the progress.
  
   Signed-off-by: Robert Olsson [EMAIL PROTECTED]

   Cheers
--ro

 
  diff --git a/net/core/pktgen.c b/net/core/pktgen.c
  index 9cd3a1c..1352316 100644
  --- a/net/core/pktgen.c
  +++ b/net/core/pktgen.c
  @@ -228,6 +228,7 @@ struct pktgen_dev {
   
   int min_pkt_size;   /* = ETH_ZLEN; */
   int max_pkt_size;   /* = ETH_ZLEN; */
  +int pkt_overhead;   /* overhead for MPLS, VLANs, IPSEC etc */
   int nfrags;
   __u32 delay_us; /* Default delay */
   __u32 delay_ns;
  @@ -2075,6 +2076,13 @@ static void spin(struct pktgen_dev *pkt_dev, __u64 
  spin_until_us)
   pkt_dev-idle_acc += now - start;
   }
   
  +static inline void set_pkt_overhead(struct pktgen_dev *pkt_dev)
  +{
  +pkt_dev-pkt_overhead += pkt_dev-nr_labels*sizeof(u32);
  +pkt_dev-pkt_overhead += VLAN_TAG_SIZE(pkt_dev);
  +pkt_dev-pkt_overhead += SVLAN_TAG_SIZE(pkt_dev);
  +}
  +
   /* Increment/randomize headers according to flags and current values
* for IP src/dest, UDP src/dst port, MAC-Addr src/dst
*/
  @@ -2323,9 +2331,7 @@ static struct sk_buff *fill_packet_ipv4(struct 
  net_device *odev,
   
   datalen = (odev-hard_header_len + 16)  ~0xf;
   skb = alloc_skb(pkt_dev-cur_pkt_size + 64 + datalen +
  -pkt_dev-nr_labels*sizeof(u32) +
  -VLAN_TAG_SIZE(pkt_dev) + SVLAN_TAG_SIZE(pkt_dev),
  -GFP_ATOMIC);
  +pkt_dev-pkt_overhead, GFP_ATOMIC);
   if (!skb) {
   sprintf(pkt_dev-result, No memory);
   return NULL;
  @@ -2368,7 +2374,7 @@ static struct sk_buff *fill_packet_ipv4(struct 
  net_device *odev,
   
   /* Eth + IPh + UDPh + mpls */
   datalen = pkt_dev-cur_pkt_size - 14 - 20 - 8 -
  -  pkt_dev-nr_labels*sizeof(u32) - VLAN_TAG_SIZE(pkt_dev) - 
  SVLAN_TAG_SIZE(pkt_dev);
  +  pkt_dev-pkt_overhead;
   if (datalen  sizeof(struct pktgen_hdr))
   datalen = sizeof(struct pktgen_hdr);
   
  @@ -2391,8 +2397,7 @@ static struct sk_buff *fill_packet_ipv4(struct 
  net_device *odev,
   iph-check = ip_fast_csum((void *)iph, iph-ihl);
   skb-protocol = protocol;
   skb-mac_header = (skb-network_header - ETH_HLEN -
  -   pkt_dev-nr_labels * sizeof(u32) -
  -   VLAN_TAG_SIZE(pkt_dev) - SVLAN_TAG_SIZE(pkt_dev));
  +   pkt_dev-pkt_overhead);
   skb-dev = odev;
   skb-pkt_type = PACKET_HOST;
   
  @@ -2662,9 +2667,7 @@ static struct sk_buff *fill_packet_ipv6(struct 
  net_device *odev,
   mod_cur_headers(pkt_dev);
   
   skb = alloc_skb(pkt_dev-cur_pkt_size + 64 + 16 +
  -pkt_dev-nr_labels*sizeof(u32) +
  -VLAN_TAG_SIZE(pkt_dev) + SVLAN_TAG_SIZE(pkt_dev),
  -GFP_ATOMIC);
  +pkt_dev-pkt_overhead, GFP_ATOMIC);
   if (!skb) {
   sprintf(pkt_dev-result, No memory);
   return NULL;
  @@ -2708,7 +2711,7 @@ static struct sk_buff *fill_packet_ipv6(struct 
  net_device *odev,
   /* Eth + IPh + UDPh + mpls */
   datalen = pkt_dev-cur_pkt_size - 14 -
 sizeof(struct ipv6hdr) - sizeof(struct udphdr) -
  -  pkt_dev-nr_labels*sizeof(u32) - VLAN_TAG_SIZE(pkt_dev) - 
  SVLAN_TAG_SIZE(pkt_dev);
  +  pkt_dev-pkt_overhead;
   
   if (datalen  sizeof(struct pktgen_hdr)) {
   datalen = sizeof(struct pktgen_hdr);
  @@ -2738,8 +2741,7 @@ static struct sk_buff *fill_packet_ipv6(struct 
  net_device *odev,
   ipv6_addr_copy(iph-saddr, pkt_dev-cur_in6_saddr);
   
   skb-mac_header = (skb-network_header - ETH_HLEN -
  -   pkt_dev-nr_labels * sizeof(u32) -
  -   VLAN_TAG_SIZE(pkt_dev) - SVLAN_TAG_SIZE(pkt_dev));
  +   pkt_dev-pkt_overhead);
   skb-protocol = protocol;
   skb-dev = odev;
   skb-pkt_type = PACKET_HOST;
  @@ -2857,6 +2859,7 @@ static void pktgen_run(struct pktgen_thread *t)
   pkt_dev-started_at = getCurUs();
   pkt_dev-next_tx_us = getCurUs();   /* Transmit 
  immediately */
   pkt_dev-next_tx_ns = 0;
  +set_pkt_overhead(pkt_dev);
   
   strcpy(pkt_dev-result, Starting);
   started++;
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] pktgen IPSEC 3/4: Introduce xfrm SAD only lookup

2007-06-12 Thread Patrick McHardy
Looks good too me, just a few minor nitpicks as usual :)

jamal wrote:
 [XFRM] Introduce standalone SAD lookup

 +struct xfrm_state *
 +xfrm_stateonly_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
 + unsigned short family, u8 mode, u8 proto, u32 reqid)
 +{
 + unsigned int h = xfrm_dst_hash(daddr, saddr, reqid, family);
 + struct xfrm_state *rx = NULL, *x = NULL;
 + struct hlist_node *entry;
 +
 + spin_lock(xfrm_state_lock);
 + hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
 + if (x-props.family == family 
 + x-props.reqid == reqid 
 + !(x-props.flags  XFRM_STATE_WILDRECV) 
 + xfrm_state_addr_check(x, daddr, saddr, family) 
 + mode == x-props.mode 
 + proto == x-id.proto)  {
 +

^^ please delete empty line
 + if (x-km.state != XFRM_STATE_VALID)
 + continue;

^ one indentation level too much

 + else {
 + rx = x;
 + break;
 + }

The whole thing could be compacted by moving the XFRM_STATE_VALID
check to the first condition:

if (x-props.family == family 
x-props.reqid == reqid 
!(x-props.flags  XFRM_STATE_WILDRECV) 
xfrm_state_addr_check(x, daddr, saddr, family) 
mode == x-props.mode 
proto == x-id.proto 
x-km.state == XFRM_STATE_VALID) {
rx = x;
break;
}

or alternatively turn the != XFRM_STATE_VALID into == if you
want to keep the first condition similar to xfrm_state_find
(but the mode and proto conditions are reversed anyways).

BTW, wouldn't it make sense to allow use of the SPI as well?
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] pktgen IPSEC 4/4: Add IPSEC support to pktgen

2007-06-12 Thread Robert Olsson

jamal writes:
  4 of 4

  [PKTGEN] IPSEC support
  Added transport mode ESP support for starters.
  I will send more of these modes and types once i have resolved
  the tunnel mode isses.
  
  Signed-off-by: Jamal Hadi Salim [EMAIL PROTECTED]


 Signed-off-by: Robert Olsson [EMAIL PROTECTED]
 
 Cheers
--ro




  
  diff --git a/net/core/pktgen.c b/net/core/pktgen.c
  index bc4fb3b..bcec8e4 100644
  --- a/net/core/pktgen.c
  +++ b/net/core/pktgen.c
  @@ -152,6 +152,9 @@
   #include net/checksum.h
   #include net/ipv6.h
   #include net/addrconf.h
  +#ifdef CONFIG_XFRM
  +#include net/xfrm.h
  +#endif
   #include asm/byteorder.h
   #include linux/rcupdate.h
   #include asm/bitops.h
  @@ -182,6 +185,7 @@
   #define F_VID_RND (19)/* Random VLAN ID */
   #define F_SVID_RND(110)   /* Random SVLAN ID */
   #define F_FLOW_SEQ(111)   /* Sequential flows */
  +#define F_IPSEC_ON(112)   /* ipsec on for flows */
   
   /* Thread control flag bits */
   #define T_TERMINATE   (10)
  @@ -208,6 +212,9 @@ static struct proc_dir_entry *pg_proc_dir = NULL;
   struct flow_state {
   __be32 cur_daddr;
   int count;
  +#ifdef CONFIG_XFRM
  +struct xfrm_state *x;
  +#endif
   __u32 flags;
   };
   
  @@ -348,7 +355,10 @@ struct pktgen_dev {
   unsigned lflow; /* Flow length  (config) */
   unsigned nflows;/* accumulated flows (stats) */
   unsigned curfl; /* current sequenced flow (state)*/
  -
  +#ifdef CONFIG_XFRM
  +__u8ipsmode;/* IPSEC mode (config) */
  +__u8ipsproto;   /* IPSEC type (config) */
  +#endif
   char result[512];
   };
   
  @@ -704,6 +714,9 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
   seq_printf(seq,  FLOW_RND  );
   }
   
  +if (pkt_dev-flags  F_IPSEC_ON)
  +seq_printf(seq,  IPSEC  );
  +
   if (pkt_dev-flags  F_MACSRC_RND)
   seq_printf(seq, MACSRC_RND  );
   
  @@ -1198,6 +1211,11 @@ static ssize_t pktgen_if_write(struct file *file,
   else if (strcmp(f, FLOW_SEQ) == 0)
   pkt_dev-flags |= F_FLOW_SEQ;
   
  +#ifdef CONFIG_XFRM
  +else if (strcmp(f, IPSEC) == 0)
  +pkt_dev-flags |= F_IPSEC_ON;
  +#endif
  +
   else if (strcmp(f, !IPV6) == 0)
   pkt_dev-flags = ~F_IPV6;
   
  @@ -1206,7 +1224,7 @@ static ssize_t pktgen_if_write(struct file *file,
   Flag -:%s:- unknown\nAvailable flags, (prepend 
  ! to un-set flag):\n%s,
   f,
   IPSRC_RND, IPDST_RND, UDPSRC_RND, UDPDST_RND, 
  -MACSRC_RND, MACDST_RND, TXSIZE_RND, IPV6, 
  MPLS_RND, VID_RND, SVID_RND, FLOW_SEQ\n);
  +MACSRC_RND, MACDST_RND, TXSIZE_RND, IPV6, 
  MPLS_RND, VID_RND, SVID_RND, FLOW_SEQ, IPSEC\n);
   return count;
   }
   sprintf(pg_result, OK: flags=0x%x, pkt_dev-flags);
  @@ -2094,6 +2112,7 @@ static void spin(struct pktgen_dev *pkt_dev, __u64 
  spin_until_us)
   
   static inline void set_pkt_overhead(struct pktgen_dev *pkt_dev)
   {
  +pkt_dev-pkt_overhead = 0;
   pkt_dev-pkt_overhead += pkt_dev-nr_labels*sizeof(u32);
   pkt_dev-pkt_overhead += VLAN_TAG_SIZE(pkt_dev);
   pkt_dev-pkt_overhead += SVLAN_TAG_SIZE(pkt_dev);
  @@ -2130,6 +2149,31 @@ static inline int f_pick(struct pktgen_dev *pkt_dev)
   return pkt_dev-curfl;
   }
   
  +
  +#ifdef CONFIG_XFRM
  +/* If there was already an IPSEC SA, we keep it as is, else
  + * we go look for it ...
  +*/
  +inline
  +void get_ipsec_sa(struct pktgen_dev *pkt_dev, int flow)
  +{
  +struct xfrm_state *x = pkt_dev-flows[flow].x;
  +if (!x) {
  +/*slow path: we dont already have xfrm_state*/
  +x = xfrm_stateonly_find((xfrm_address_t *)pkt_dev-cur_daddr,
  +(xfrm_address_t *)pkt_dev-cur_saddr,
  +AF_INET,
  +pkt_dev-ipsmode,
  +pkt_dev-ipsproto, 0);
  +if (x) {
  +pkt_dev-flows[flow].x = x;
  +set_pkt_overhead(pkt_dev);
  +pkt_dev-pkt_overhead+=x-props.header_len;
  +}
  +
  +}
  +}
  +#endif
   /* Increment/randomize headers according to flags and current values
* for IP src/dest, UDP src/dst port, MAC-Addr src/dst
*/
  @@ -2289,6 +2333,10 @@ static void mod_cur_headers(struct pktgen_dev 
  *pkt_dev)
   pkt_dev-flows[flow].flags |= F_INIT;
   pkt_dev-flows[flow].cur_daddr =
   pkt_dev-cur_daddr;
  +#ifdef CONFIG_XFRM
  +if (pkt_dev-flags  F_IPSEC_ON)
  +   

RE: [PATCH] NET: Multiqueue network device support.

2007-06-12 Thread Cohen, Guy

Hi Jamal,

Here is a simple scenario (nothing here is rare of extreme case):
- Busy wireless environment
- FTP TX on BE queue (low priority)
- Skype TX on VO queue (high priority)

The channel is busy with high priority packets hence the BE packets are
transmitted to the air rarely so the DMA/HW queue of the BE access
category gets full and the qdisc is stopped.
Now periodic VO-tagged Skype packets arrive. I would expect that they
get the priority (and pass) in all stages of the stack and reach the HW
ASAP and compete there on the medium with the other access categories
and the other clients on the channel.
Now this packet will be stuck in the qdisc and wait there until a BE
packet is transmitted, which can take a long time. This is a real
problem.

There is also a problem with the queues that will be dedicated to TX
aggregation in 11n (currently implemented) - the packets will be
classified to queues by the destination MAC address and not only by the
priority class, but I don't want to get into that now. I think that
there are enough arguments now why the patch that started this thread is
needed...

Please see below some replies to your questions.

Regards,
Guy.


jamal wrote:
 It could be estimated well by the host sw; but lets defer that to
later
 in case i am clueless on something or you misunderstood something i
 said.

It cannot be estimated well by the host SW. This is one of the main
issues - we can't put it aside...

 I understand.  Please correct me if am wrong:
 The only reason AC_BK packet will go out instead of AC_VO when
 contending in hardware is because of a statistical opportunity not the
 firmware intentionaly trying to allow AC_BK out
 i.e it is influenced by the three variables:
 1) The contention window 2) the backoff timer and 3)the tx opportunity
 And if you look at the default IEEE parameters as in that url slide
43,
 the only time AC_BK will win is luck.

In most scenarios BK packets will be transmitted and will win the medium
against VO packets (thought, in some non-favored ratio).

 Heres a really dated paper before the standard was ratified:
 http://www.mwnl.snu.ac.kr/~schoi/publication/Conferences/02-EW.pdf

Sorry, I'm really overloaded - I won't be able to review the docs you
sent (really apologize for that).

 So essentially the test you mention changes priorities in real time.
 What is the purpose of this test? Is WMM expected to change its
 priorities in real time?

The WMM parameters of the AC are set and controlled by the network/BSS
(access point) administrator and can be used in anyway. There are the
default parameters but they can be changed.

Regards,
Guy.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: cannot set IP for ethernet

2007-06-12 Thread Patrick McHardy
Oliver Neukum wrote:
 with 2.6.22-rc4-git2 I am getting errors when setting IP for ethernet
 interfaces:
 
 ioctl(4, SIOCSIFADDR, 0x7fff94931600)   = -1 ENOBUFS (No buffer space 
 available)
 
 The error is independant of the interface. It happens to all interfaces.
 There's nothing in the syslog.
 
 valisk:/home/oliver # uname -a
 Linux valisk 2.6.22-rc4-git2-default #3 SMP Tue Jun 12 13:27:54 CEST 2007 
 x86_64 x86_64 x86_64 GNU/Linux


This can happen if the initial inetdev allocation when the netdevice is
registered fails. I think it would make sense to try to allocate again
when adding addresses in that case, otherwise there is no way of
recovery other than unregistering and registering the device again.

diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index abf6352..dc77e91 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -401,8 +401,11 @@ static int inet_set_ifa(struct net_device *dev, struct 
in_ifaddr *ifa)
ASSERT_RTNL();
 
if (!in_dev) {
-   inet_free_ifa(ifa);
-   return -ENOBUFS;
+   in_dev = inetdev_init(dev);
+   if (!in_dev) {
+   inet_free_ifa(ifa);
+   return -ENOBUFS;
+   }
}
ipv4_devconf_setall(in_dev);
if (ifa-ifa_dev != in_dev) {
@@ -514,8 +517,11 @@ static struct in_ifaddr *rtm_to_ifaddr(struct nlmsghdr 
*nlh)
 
in_dev = __in_dev_get_rtnl(dev);
if (in_dev == NULL) {
-   err = -ENOBUFS;
-   goto errout;
+   in_dev = inetdev_init(dev);
+   if (!in_dev) {
+   err = -ENOBUFS;
+   goto errout;
+   }
}
 
ipv4_devconf_setall(in_dev);


Re: [PATCH] NET: Multiqueue network device support.

2007-06-12 Thread jamal
On Tue, 2007-12-06 at 15:21 +0200, Patrick McHardy wrote:
 jamal wrote:

 
 
 Yes. Using a higher threshold reduces the overhead, but leads to
 lower priority packets getting out even if higher priority packets
 are present in the qdisc. 

As per earlier discussion, the packets already given to hardware should
be fine to go out first. If they get overriden by the chance arrival of
higher prio packets from the stack, then that is fine.

 Note that if we use the threshold with
 multiple queue states (threshold per ring) this doesn't happen.

I think if you do the math, youll find that (n - 1) * m is actually
not that unreasonable given parameters typically used on the drivers;
Lets for example take the parameters from e1000; the tx ring is around
256, the wake threshold is 32 packets (although i have found a better
number is 1/2 the tx size and have that changed in my batching patches).

Assume such a driver with above parameters doing Gige exists and it
implements 4 queus (n = 4); in such a case, (n-1)*m/32 is
3*256/32 = 3*8 = 24 times.

You have to admit your use case is a real corner case but lets be
conservative since we are doing a worst case scenario and from that
perspective consider that gige can be achieved at pkt levels of 86Kpps
to 1.48Mpps and if you are non-work conserving you will be running at
that rate and lets pick the low end of 86Kpps - what that means is there
is a blip (remember again this to be a corner case) for a few microsecs
once in a while with probability of what you described actually
occuring... 
Ok, so then update the threshold to 1/2 the tx ring etc and it is even
less. You get the message.

 If both driver and HW do it, its probably OK for short term, but it
 shouldn't grow too large since short-term fairness is also important.
 But the unnecessary dequeues+requeues can still happen.

In a corner case, yes there is a probability that will happen.
I think its extremely low.

 
 It does have finite time, but its still undesirable. The average case
 would probably have been more interesting, but its also harder :)
 I also expect to see lots of requeues under normal load that doesn't
 ressemble the worst-case, but only tests can confirm that.
 

And that is what i was asking of Peter. Some testing. Clearly the
subqueueing is more complex; what i am asking for is for the driver
to bear the brunt and not for it to be an impacting architectural
change.

  I am not sure i understood - but note that i have asked for a middle
  ground from the begining. 
 
 
 I just mean that we could rip the patches out at any point again
 without user visible impact aside from more overhead. So even
 if they turn out to be a mistake its easily correctable.

That is a good compromise i think. The reason i am spending my time
discussing this is i believe this to be a very important subsystem.
You know i have been voiceferous for years on this topic.
What i was worried about is these patches make it and become engrained
with hot lava on stone.

 I've also looked into moving all multiqueue specific handling to
 the top-level qdisc out of sch_generic, unfortunately that leads
 to races unless all subqueue state operations takes dev-qdisc_lock.
 Besides the overhead I think it would lead to ABBA deadlocks.

I  am confident you can handle that.

 So how do we move forward?

What you described above is a good compromise IMO. I dont have much time
to chase this path at the moment but what it does is give me freedom to
revisit later on with data points. More importantly you understand my
view;- And of course you did throw a lot of rocks but it
a definete alternative ;-

cheers,
jamal


-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] pktgen IPSEC 1/4: Centralize pktgen packet overhead management

2007-06-12 Thread jamal
On Tue, 2007-12-06 at 15:21 +0200, Robert Olsson wrote:

 
I'll guess the ipsec part is to be considered work-in-progress
and you're doing both the work and the progress.
   

;-

Much thanks Robert.

cheers,
jamal

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] pktgen IPSEC 3/4: Introduce xfrm SAD only lookup

2007-06-12 Thread jamal
On Tue, 2007-12-06 at 15:45 +0200, Patrick McHardy wrote:
 Looks good too me, just a few minor nitpicks as usual :)

I like the nitpicks - they make the code better (as long as we put
a time limit on them ;-)

 
 ^^ please delete empty line

will do.

  +   if (x-km.state != XFRM_STATE_VALID)
  +   continue;
 
 ^ one indentation level too much

will fix.

 The whole thing could be compacted by moving the XFRM_STATE_VALID
 check to the first condition:
 
   if (x-props.family == family 
   x-props.reqid == reqid 
   !(x-props.flags  XFRM_STATE_WILDRECV) 
   xfrm_state_addr_check(x, daddr, saddr, family) 
   mode == x-props.mode 
   proto == x-id.proto 
   x-km.state == XFRM_STATE_VALID) {
   rx = x;
   break;
   }
 
 or alternatively turn the != XFRM_STATE_VALID into == if you
 want to keep the first condition similar to xfrm_state_find
 (but the mode and proto conditions are reversed anyways).
 

Will do.

 BTW, wouldn't it make sense to allow use of the SPI as well?

SPI is the least user friendly parameter - but i could add it later.
I want to add tunnel mode next then i can revisit SPI.

Thanks for taking the time to review this Patrick.

cheers,
jamal


-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [PATCH] NET: Multiqueue network device support.

2007-06-12 Thread jamal
Guy,
I apologize for not responding immediately - i promise to in a few hours
when i get back (and read it over some good coffee) - seems like you
have some good stuff there; thanks for taking the time despite the
overload.

cheers,
jamal

On Tue, 2007-12-06 at 17:04 +0300, Cohen, Guy wrote:
 Hi Jamal,
 
 Here is a simple scenario (nothing here is rare of extreme case):
 - Busy wireless environment
 - FTP TX on BE queue (low priority)
 - Skype TX on VO queue (high priority)
 


-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC PATCH net-2.6] [TCP]: Congestion control API RTT sampling fix

2007-06-12 Thread Stephen Hemminger
On Tue, 12 Jun 2007 15:06:57 +0300 (EEST)
Ilpo Järvinen [EMAIL PROTECTED] wrote:

 I was thinking something like this to fix the cc module breakage 
 introduced by the API change (haven't tested it besides compile):
 
 
 [RFC PATCH net-2.6] [TCP]: Congestion control API RTT sampling fix
 
 
 Commit 164891aadf1721fca4dce473bb0e0998181537c6 broke RTT
 sampling of congestion control modules. Inaccurate timestamps
 could be fed to them without providing any way for them to
 identify such cases. Previously RTT sampler was called only if
 FLAG_RETRANS_DATA_ACKED was not set filtering inaccurate
 timestamps nicely. In addition, the new behavior could give an
 invalid timestamp (zero) to RTT sampler if only skbs with
 TCPCB_RETRANS were ACKed. This solves both problems.
 
 Signed-off-by: Ilpo Järvinen [EMAIL PROTECTED]
 ---
  include/linux/ktime.h   |   18 ++
  include/linux/skbuff.h  |4 
  net/ipv4/tcp_illinois.c |3 +++
  net/ipv4/tcp_input.c|6 +-
  net/ipv4/tcp_lp.c   |3 ++-
  net/ipv4/tcp_vegas.c|3 +++
  net/ipv4/tcp_veno.c |3 +++
  7 files changed, 38 insertions(+), 2 deletions(-)
 
 diff --git a/include/linux/ktime.h b/include/linux/ktime.h
 index c762954..9f7fa3e 100644
 --- a/include/linux/ktime.h
 +++ b/include/linux/ktime.h
 @@ -102,6 +102,12 @@ static inline ktime_t ktime_set(const long secs, const 
 unsigned long nsecs)
  #define ktime_add_ns(kt, nsval) \
   ({ (ktime_t){ .tv64 = (kt).tv64 + (nsval) }; })
  
 +/* Compare two ktime_t variables, returns 1 if equal */
 +static inline int ktime_equal(const ktime_t cmp1, const ktime_t cmp2)
 +{
 + return cmp1.tv64 == cmp2.tv64;
 +}
 +
  /* convert a timespec to ktime_t format: */
  static inline ktime_t timespec_to_ktime(struct timespec ts)
  {
 @@ -200,6 +206,18 @@ static inline ktime_t ktime_add(const ktime_t add1, 
 const ktime_t add2)
  extern ktime_t ktime_add_ns(const ktime_t kt, u64 nsec);
  
  /**
 + * ktime_equal - Compares two ktime_t variables to see if they are equal
 + * @cmp1:comparable1
 + * @cmp2:comparable2
 + *
 + * Compare two ktime_t variables, returns 1 if equal
 + */
 +static inline int ktime_equal(const ktime_t cmp1, const ktime_t cmp2)
 +{
 + return !((cmp1.tv.sec ^ cmp2.tv.sec) | (cmp1.tv.usec ^ cmp2.tv.usec));
 +}

Since ktime is a union just comparing the two 64bit values should
be simpler.

static inline int ktime_equal(const ktime_t t1, const ktime_t t2)
{
return t1.s64 == t2.s64;
}

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] [RFC -v3] NET: Implement a standard ndev_printk family

2007-06-12 Thread Joe Perches
On Mon, 2007-06-11 at 17:40 -0700, Auke Kok wrote:
 +#define ndev_err(netdev, level, format, arg...) \
 + do { \
 + struct net_device *__nd = (netdev); \
 + if ((__nd)-msg_enable  NETIF_MSG_##level) \
 + printk(KERN_ERR %s: %s:  format, (__nd)-name, \
 + (__nd)-dev.parent-bus_id, ## arg); \
 + } while (0)
 +

I think it's better to remove the macro concatenation/obfuscation
of the NETIF_MSG_##level argument and simply pass the appropriate
NETIF_MSG_type directly to these ndev_level calls.

It would also simplify the more than 300 calls in drivers/net of

if (netif_msg_type(ptr))
printk(foo)

to

ndev_level(netdev, NETIF_MSG_type, fmt, args)


-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH][RFC] network splice receive v2

2007-06-12 Thread Evgeniy Polyakov
On Mon, Jun 11, 2007 at 01:59:26PM +0200, Jens Axboe ([EMAIL PROTECTED]) wrote:
 Patches are against the #splice branch of the block repo, official url
 of that is:
 
 git://git.kernel.dk/data/git/linux-2.6-block.git/
 
 and it's based on Linus main tree. Let me know if I should supply netdev
 branch patches instead, or even just provide a rolled up patch (or patch
 series) for anyone curious to test or play with it.

Hi Jens.

I've just pulled your tree (splice-net, but splice tree looks the same, git 
pull says
'Already up-to-date.') on top of linus git and got following bug trace. 
I will investigate it further tomorrow.

[   51.942373] [ cut here ]
[   51.947041] kernel BUG at include/linux/mm.h:285!
[   51.951786] invalid opcode:  [1] PREEMPT SMP 
[   51.956680] CPU 0 
[   51.958784] Modules linked in: button loop snd_intel8x0
snd_ac97_codec psmouse ac97_bus snd_pcm snd_timer snd soundcore
snd_page_alloc k8temp i2c_nforcen
[   51.988793] Pid: 2604, comm: splice-fromnet Not tainted
2.6.22-rc4-splice #2
[   51.995886] RIP: 0010:[80389b15]  [80389b15]
__skb_splice_bits+0xcd/0x201
[   52.004520] RSP: 0018:810037f23c28  EFLAGS: 00010246
[   52.009872] RAX:  RBX: 810037f23d98 RCX:
003f
[   52.017053] RDX: 81003fe93808 RSI: 81003fe93808 RDI:
0003c0a3
[   52.024233] RBP: 810037f23c78 R08:  R09:
81003780e4b8
[   52.031412] R10: 803b01d9 R11: 810037f23de8 R12:
009a
[   52.038591] R13:  R14: 810037f23c90 R15:
05a8
[   52.045771] FS:  2b9181d2c6d0() GS:804fb000()
knlGS:
[   52.053920] CS:  0010 DS:  ES:  CR0: 8005003b
[   52.059714] CR2: 2b9181bb60e0 CR3: 3d109000 CR4:
06e0
[   52.066894] Process splice-fromnet (pid: 2604, threadinfo
810037f22000, task 8100010f4100)
[   52.075908] Stack:  004612d0 810037f23c94
81003780e4b8 37f23c78
[   52.084214]  faf2050e 81003780e4b8 81003780e4b8
81003e8f22d8
[   52.091860]  81003c99c820 4d5f4ede 810037f23dd8
8038bf20
[   52.099265] Call Trace:
[   52.101998]  [8038bf20] skb_splice_bits+0x6c/0xd0
[   52.107619]  [803dc720] _read_unlock_irq+0x31/0x4e
[   52.113330]  [803afc1c] tcp_splice_data_recv+0x20/0x22
[   52.119386]  [803afaf3] tcp_read_sock+0xa2/0x1ab
[   52.124920]  [803afbfc] tcp_splice_data_recv+0x0/0x22
[   52.130888]  [803b0232] tcp_splice_read+0xa1/0x21b
[   52.136593]  [803891cf] sock_def_readable+0x0/0x6f
[   52.142303]  [80384a25] sock_splice_read+0x15/0x17
[   52.148010]  [8029e773] do_splice_to+0x76/0x88
[   52.153370]  [8029fc87] sys_splice+0x1a8/0x232
[   52.158733]  [802097ce] system_call+0x7e/0x83
[   52.164005] 
[   52.165544] 
[   52.165545] Code: 0f 0b eb fe 44 39 65 d4 8b 4d d4 41 0f 47 cc 90 ff
42 08 48 
[   52.175364] RIP  [80389b15] __skb_splice_bits+0xcd/0x201
[   52.181636]  RSP 810037f23c28



-- 
Evgeniy Polyakov
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH][RFC] network splice receive v2

2007-06-12 Thread Jens Axboe
On Tue, Jun 12 2007, Evgeniy Polyakov wrote:
 On Mon, Jun 11, 2007 at 01:59:26PM +0200, Jens Axboe ([EMAIL PROTECTED]) 
 wrote:
  Patches are against the #splice branch of the block repo, official url
  of that is:
  
  git://git.kernel.dk/data/git/linux-2.6-block.git/
  
  and it's based on Linus main tree. Let me know if I should supply netdev
  branch patches instead, or even just provide a rolled up patch (or patch
  series) for anyone curious to test or play with it.
 
 Hi Jens.
 
 I've just pulled your tree (splice-net, but splice tree looks the
 same, git pull says 'Already up-to-date.') on top of linus git and got
 following bug trace.  I will investigate it further tomorrow.

Please tell me the contents of splice-net, it looks like you didn't
actually use the new code. That BUG_ON() is in get_page(), which
splice-net no longer uses. So the bug report cannot be valid for the
current code.

-- 
Jens Axboe

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] [RFC -v3] NET: Implement a standard ndev_printk family

2007-06-12 Thread Kok, Auke

Jeff Garzik wrote:

Joe Perches wrote:

On Mon, 2007-06-11 at 17:40 -0700, Auke Kok wrote:

+#define ndev_err(netdev, level, format, arg...) \
+   do { \
+   struct net_device *__nd = (netdev); \
+   if ((__nd)-msg_enable  NETIF_MSG_##level) \
+   printk(KERN_ERR %s: %s:  format, (__nd)-name, \
+   (__nd)-dev.parent-bus_id, ## arg); \
+   } while (0)
+

I think it's better to remove the macro concatenation/obfuscation
of the NETIF_MSG_##level argument and simply pass the appropriate
NETIF_MSG_type directly to these ndev_level calls.

It would also simplify the more than 300 calls in drivers/net of

if (netif_msg_type(ptr))
printk(foo)

to

ndev_level(netdev, NETIF_MSG_type, fmt, args)


I think this is a whole lot of iteration and effort for a non-problem.


Why do you say that? What is your motivation for that statement? Can you be a 
bit more descriptive/constructive?


I have often seen comments on drivers adding new printk's and lots of them 
completely ignore the msg_enable bits while advertising that they do thought 
some debug/ethtool way. tg3, sky2, r8169, etc... all advertise that they allow 
setting/changing msg_enable yet don't actually do _anything_ with the bits.


Only 3 other driver besides the ones I've patched get it right

How is that a non-problem?
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] NET: Multiqueue network device support.

2007-06-12 Thread David Miller
From: Patrick McHardy [EMAIL PROTECTED]
Date: Tue, 12 Jun 2007 15:21:54 +0200

 So how do we move forward?

We're going to put hw multiqueue support in, all of this discussion
has been pointless, I just watch this thread and basically laugh at
the resistence to hw multiqueue support :-)
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] NET: Multiqueue network device support.

2007-06-12 Thread Jeff Garzik


If hardware w/ multiple queues will the capability for different MAC 
addresses, different RX filters, etc. does it make sense to add that 
below the net_device level?


We will have to add all the configuration machinery at the per-queue 
level that already exists at the per-netdev level.


Jeff



-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: mac80211 fixes for 2.6.22

2007-06-12 Thread David Miller
From: John W. Linville [EMAIL PROTECTED]
Date: Mon, 11 Jun 2007 21:16:16 -0400

 Here are a few mac80211 patches appropriate for 2.6.22.
 
 Individual patches to follow, or you can pull at your leisure...
...
   git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless-2.6.git 
 mac80211-fixes

Pulled, thanks John.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] NET: Multiqueue network device support.

2007-06-12 Thread Ben Greear

Jeff Garzik wrote:


If hardware w/ multiple queues will the capability for different MAC 
addresses, different RX filters, etc. does it make sense to add that 
below the net_device level?


We will have to add all the configuration machinery at the per-queue 
level that already exists at the per-netdev level.


Perhaps the mac-vlan patch would be a good fit.  Currently it is all
software based, but if the hardware can filter on MAC, it can basically
do mac-vlan acceleration.  The mac-vlan devices are just like 'real' ethernet
devices, so they can be used with whatever schemes work with regular devices.

Thanks,
Ben



Jeff



-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html



--
Ben Greear [EMAIL PROTECTED]
Candela Technologies Inc  http://www.candelatech.com

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Please pull 'libertas-fixes' branch of wireless-2.6

2007-06-12 Thread John W. Linville
Fixes identified by the libertas team as important for 2.6.22...

---

The following changes since commit 717c9339202a42ae7bec7d3c4b84deecdcae9f81:
  Dan Williams (1):
libertas: reduce SSID and BSSID mixed-case abuse

are found in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless-2.6.git 
libertas-fixes

Dan Williams (1):
  libertas: actually send mesh frames to mesh netdev

Luis Carlos (1):
  libertas: convert libertas_mpp into anycast_mask

Luis Carlos Cobo Rus (2):
  libertas: pull current channel from firmware on mesh autostart
  libertas: deauthenticate from AP in channel switch

 drivers/net/wireless/libertas/assoc.c   |   13 +
 drivers/net/wireless/libertas/assoc.h   |2 ++
 drivers/net/wireless/libertas/cmdresp.c |1 +
 drivers/net/wireless/libertas/dev.h |1 +
 drivers/net/wireless/libertas/host.h|4 ++--
 drivers/net/wireless/libertas/main.c|   27 ++-
 drivers/net/wireless/libertas/rx.c  |5 ++---
 7 files changed, 35 insertions(+), 18 deletions(-)

diff --git a/drivers/net/wireless/libertas/assoc.c 
b/drivers/net/wireless/libertas/assoc.c
index ee82413..f67efa0 100644
--- a/drivers/net/wireless/libertas/assoc.c
+++ b/drivers/net/wireless/libertas/assoc.c
@@ -200,6 +200,14 @@ static int update_channel(wlan_private * priv)
cmd_option_waitforrsp, 0, NULL);
 }
 
+void libertas_sync_channel(struct work_struct *work)
+{
+   wlan_private *priv = container_of(work, wlan_private, sync_channel);
+
+   if (update_channel(priv) != 0)
+   lbs_pr_info(Channel synchronization failed.);
+}
+
 static int assoc_helper_channel(wlan_private *priv,
 struct assoc_request * assoc_req)
 {
@@ -403,6 +411,11 @@ static int should_deauth_infrastructure(wlan_adapter 
*adapter,
return 1;
}
 
+   if (test_bit(ASSOC_FLAG_CHANNEL, assoc_req-flags)) {
+   lbs_deb_assoc(Deauthenticating due to channel switch.\n);
+   return 1;
+   }
+
/* FIXME: deal with 'auto' mode somehow */
if (test_bit(ASSOC_FLAG_MODE, assoc_req-flags)) {
if (assoc_req-mode != IW_MODE_INFRA)
diff --git a/drivers/net/wireless/libertas/assoc.h 
b/drivers/net/wireless/libertas/assoc.h
index b5eddf8..5e9c31f 100644
--- a/drivers/net/wireless/libertas/assoc.h
+++ b/drivers/net/wireless/libertas/assoc.h
@@ -9,6 +9,8 @@ void libertas_association_worker(struct work_struct *work);
 
 struct assoc_request * wlan_get_association_request(wlan_adapter *adapter);
 
+void libertas_sync_channel(struct work_struct *work);
+
 #define ASSOC_DELAY (HZ / 2)
 static inline void wlan_postpone_association_work(wlan_private *priv)
 {
diff --git a/drivers/net/wireless/libertas/cmdresp.c 
b/drivers/net/wireless/libertas/cmdresp.c
index ebedd63..0c3b9a5 100644
--- a/drivers/net/wireless/libertas/cmdresp.c
+++ b/drivers/net/wireless/libertas/cmdresp.c
@@ -987,6 +987,7 @@ int libertas_process_event(wlan_private * priv)
netif_carrier_on(priv-mesh_dev) ;
}
adapter-mode = IW_MODE_ADHOC ;
+   schedule_work(priv-sync_channel);
break;
 
default:
diff --git a/drivers/net/wireless/libertas/dev.h 
b/drivers/net/wireless/libertas/dev.h
index d6c340a..785192b 100644
--- a/drivers/net/wireless/libertas/dev.h
+++ b/drivers/net/wireless/libertas/dev.h
@@ -150,6 +150,7 @@ struct _wlan_private {
 
struct delayed_work assoc_work;
struct workqueue_struct *assoc_thread;
+   struct work_struct sync_channel;
 
/** Hardware access */
int (*hw_register_dev) (wlan_private * priv);
diff --git a/drivers/net/wireless/libertas/host.h 
b/drivers/net/wireless/libertas/host.h
index cedf1db..7509cc1 100644
--- a/drivers/net/wireless/libertas/host.h
+++ b/drivers/net/wireless/libertas/host.h
@@ -310,8 +310,8 @@ enum cmd_mesh_access_opts {
cmd_act_mesh_get_ttl = 1,
cmd_act_mesh_set_ttl,
cmd_act_mesh_get_stats,
-   cmd_act_mesh_get_mpp,
-   cmd_act_mesh_set_mpp,
+   cmd_act_mesh_get_anycast,
+   cmd_act_mesh_set_anycast,
 };
 
 /** Card Event definition */
diff --git a/drivers/net/wireless/libertas/main.c 
b/drivers/net/wireless/libertas/main.c
index ec9be0c..623ab4b 100644
--- a/drivers/net/wireless/libertas/main.c
+++ b/drivers/net/wireless/libertas/main.c
@@ -178,45 +178,45 @@ u16 libertas_region_code_to_index[MRVDRV_MAX_REGION_CODE] 
=
  */
 
 /**
- * @brief Get function for sysfs attribute libertas_mpp
+ * @brief Get function for sysfs attribute anycast_mask
  */
-static ssize_t libertas_mpp_get(struct device * dev,
+static ssize_t libertas_anycast_get(struct device * dev,
struct device_attribute *attr, char * buf) {
struct cmd_ds_mesh_access mesh_access;
 
memset(mesh_access, 0, sizeof(mesh_access));

Please pull 'libertas-upstream' branch of wireless-2.6

2007-06-12 Thread John W. Linville
Patches identified by the libertas team as suitable for 2.6.23...

---

The following changes since commit 82fde74b94f11eee1e9c30e43fb162f80a5e63c0:
  Luis Carlos (1):
libertas: convert libertas_mpp into anycast_mask

are found in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless-2.6.git 
libertas-upstream

Dan Williams (23):
  libertas: actually remove version.h
  libertas: kill wlan_scan_process_results
  libertas: kill ieeetypes_capinfo bitfield, use ieee80211.h types
  libertas: rename WLAN_802_11_KEY to enc_key and clean up usage
  libertas: clean up indentation in libertas_association_worker
  libertas: clean up 802.11 IE post-scan handling
  libertas: rename private ioctl constants and clean up ioctl handling
  libertas: remove if_bootcmd.c
  libertas: fix mixed-case abuse in cmd_ds_802_11_scan
  libertas: fix mixed-case abuse in cmd_ds_802_11_ad_hoc_result
  libertas: fix mixed-case abuse in cmd_ds_802_11_ad_hoc_start
  libertas: re-uppercase command defines and other constants
  libertas: fix debug build breakage due to field rename
  libertas: remove thread.h and make kthread usage clearer
  libertas: new mesh control knobs
  libertas: bump version to 322.p1
  libertas: wlan_ - libertas_ rename in ioctl.c
  libertas: fix more mixed-case abuse
  libertas: move generic firmware reset command to common code
  libertas: wlan_ - libertas_ function prefix renames for main.c
  libertas: simplify and clean up data rate handling
  libertas: fix MESH_[GET/SET]_BCASTR ioctl, clean up ioctl subcmd handling
  libertas: style fixes

Luis Carlos Cobo (1):
  libertas: specific mesh scan for mshX interface

 drivers/net/wireless/libertas/11d.c|   22 +-
 drivers/net/wireless/libertas/Makefile |1 -
 drivers/net/wireless/libertas/README   |   65 
 drivers/net/wireless/libertas/assoc.c  |   85 +++---
 drivers/net/wireless/libertas/cmd.c|  338 ++--
 drivers/net/wireless/libertas/cmdresp.c|  172 +-
 drivers/net/wireless/libertas/debugfs.c|  130 
 drivers/net/wireless/libertas/decl.h   |6 +-
 drivers/net/wireless/libertas/defs.h   |   66 ++---
 drivers/net/wireless/libertas/dev.h|   34 +--
 drivers/net/wireless/libertas/ethtool.c|8 +-
 drivers/net/wireless/libertas/fw.c |   43 ++--
 drivers/net/wireless/libertas/host.h   |  438 +-
 drivers/net/wireless/libertas/hostcmd.h|   69 ++---
 drivers/net/wireless/libertas/if_bootcmd.c |   40 ---
 drivers/net/wireless/libertas/if_usb.c |   58 ++--
 drivers/net/wireless/libertas/if_usb.h |1 -
 drivers/net/wireless/libertas/ioctl.c  |  478 +++-
 drivers/net/wireless/libertas/join.c   |  368 +++---
 drivers/net/wireless/libertas/join.h   |2 +
 drivers/net/wireless/libertas/main.c   |  237 +--
 drivers/net/wireless/libertas/rx.c |9 +-
 drivers/net/wireless/libertas/scan.c   |  355 -
 drivers/net/wireless/libertas/scan.h   |   10 +-
 drivers/net/wireless/libertas/thread.h |   52 ---
 drivers/net/wireless/libertas/tx.c |2 +-
 drivers/net/wireless/libertas/types.h  |   65 +
 drivers/net/wireless/libertas/version.h|1 -
 drivers/net/wireless/libertas/wext.c   |  428 --
 drivers/net/wireless/libertas/wext.h   |   68 +++--
 30 files changed, 1684 insertions(+), 1967 deletions(-)
 delete mode 100644 drivers/net/wireless/libertas/if_bootcmd.c
 delete mode 100644 drivers/net/wireless/libertas/thread.h
 delete mode 100644 drivers/net/wireless/libertas/version.h

Omnibus patch attached as libertas-upstream.diff.bz2 due to size concerns.
-- 
John W. Linville
[EMAIL PROTECTED]


libertas-upstream.diff.bz2
Description: BZip2 compressed data


Re: [PATCH] NET: Multiqueue network device support.

2007-06-12 Thread David Miller
From: Ben Greear [EMAIL PROTECTED]
Date: Tue, 12 Jun 2007 14:17:44 -0700

 Jeff Garzik wrote:
  
  If hardware w/ multiple queues will the capability for different MAC 
  addresses, different RX filters, etc. does it make sense to add that 
  below the net_device level?
  
  We will have to add all the configuration machinery at the per-queue 
  level that already exists at the per-netdev level.
 
 Perhaps the mac-vlan patch would be a good fit.  Currently it is all
 software based, but if the hardware can filter on MAC, it can basically
 do mac-vlan acceleration.  The mac-vlan devices are just like 'real' ethernet
 devices, so they can be used with whatever schemes work with regular devices.

Interesting.

But to answer Jeff's question, that's not really the model being
used to implement multiple queues.

The MAC is still very much centralized in most designs.

So one way they'll do it is to support assigning N MAC addresses,
and you configure the input filters of the chip to push packets
for each MAC to the proper receive queue.

So the MAC will accept any of those in the N MAC addresses as
it's own, then you use the filtering facilities to steer
frames to the correct RX queue.

The TX and RX queues can be so isolated as to be able to be exported
to virtualization nodes.  You can give them full access to the DMA
queues and assosciated mailboxes.  So instead of all of this bogus
virtualized device overhead, you just give the guest access to the
real device.

So you can use multiple queues either for better single node SMP
performance, or better virtualization performance.

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: TCP_MD5 and Intel e1000

2007-06-12 Thread David Miller
From: David Miller [EMAIL PROTECTED]
Date: Tue, 22 May 2007 03:14:32 -0700 (PDT)

 From: YOSHIFUJI Hideaki / 吉藤英明 [EMAIL PROTECTED]
 Date: Tue, 22 May 2007 18:36:47 +0900 (JST)
 
  In article [EMAIL PROTECTED] (at Tue, 22 May 2007 10:57:38 +0200), Eric 
  Dumazet [EMAIL PROTECTED] says:
  
I have tried to set up quagga with tcp-md5 support from kernel. All 
seems ok
with a intel e100 NIC, but as i testetd with a intel e1000 NIC the tcp
packets have an invalid md5 digest.
If i run tcpdump on the mashine the packets are generated, it shows on 
the
outgoing interface invalid md5 digests.
Are there known issues about tcp-md5 and e1000 NICs?
  :
   You could try ethtool -K tx off, and/or other ethtool -K settings
  
  Disabling offloading should help; currently tcp-md5 stack
  blindly copy md5-signature from the first segment
  which is not appropriate for rest of segments.
 
 It is clear we should disable TSO for sockets making use of TCP-MD5.

I'm going to fix this as follows:

commit 3d7dbeac58d0669c37e35a3b91bb41c0146395ce
Author: David S. Miller [EMAIL PROTECTED]
Date:   Tue Jun 12 14:36:42 2007 -0700

[TCP]: Disable TSO if MD5SIG is enabled.

Signed-off-by: David S. Miller [EMAIL PROTECTED]

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 97e294e..354721d 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -878,6 +878,7 @@ int tcp_v4_md5_do_add(struct sock *sk, __be32 addr,
kfree(newkey);
return -ENOMEM;
}
+   sk-sk_route_caps = ~NETIF_F_GSO_MASK;
}
if (tcp_alloc_md5sig_pool() == NULL) {
kfree(newkey);
@@ -1007,7 +1008,7 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, char 
__user *optval,
return -EINVAL;
 
tp-md5sig_info = p;
-
+   sk-sk_route_caps = ~NETIF_F_GSO_MASK;
}
 
newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 4f06a51..193d9d6 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -590,6 +590,7 @@ static int tcp_v6_md5_do_add(struct sock *sk, struct 
in6_addr *peer,
kfree(newkey);
return -ENOMEM;
}
+   sk-sk_route_caps = ~NETIF_F_GSO_MASK;
}
tcp_alloc_md5sig_pool();
if (tp-md5sig_info-alloced6 == tp-md5sig_info-entries6) {
@@ -724,6 +725,7 @@ static int tcp_v6_parse_md5_keys (struct sock *sk, char 
__user *optval,
return -ENOMEM;
 
tp-md5sig_info = p;
+   sk-sk_route_caps = ~NETIF_F_GSO_MASK;
}
 
newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] NET: Multiqueue network device support.

2007-06-12 Thread Jeff Garzik

David Miller wrote:

From: Ben Greear [EMAIL PROTECTED]
Date: Tue, 12 Jun 2007 14:17:44 -0700


Jeff Garzik wrote:
If hardware w/ multiple queues will the capability for different MAC 
addresses, different RX filters, etc. does it make sense to add that 
below the net_device level?


We will have to add all the configuration machinery at the per-queue 
level that already exists at the per-netdev level.

Perhaps the mac-vlan patch would be a good fit.  Currently it is all
software based, but if the hardware can filter on MAC, it can basically
do mac-vlan acceleration.  The mac-vlan devices are just like 'real' ethernet
devices, so they can be used with whatever schemes work with regular devices.


Interesting.

But to answer Jeff's question, that's not really the model being
used to implement multiple queues.

The MAC is still very much centralized in most designs.

So one way they'll do it is to support assigning N MAC addresses,
and you configure the input filters of the chip to push packets
for each MAC to the proper receive queue.

So the MAC will accept any of those in the N MAC addresses as
it's own, then you use the filtering facilities to steer
frames to the correct RX queue.


Not quite...  You'll have to deal with multiple Rx filters, not just the 
current one-filter-for-all model present in today's NICs.  Pools of 
queues will have separate configured characteristics.  The steer 
portion you mention is a bottleneck that wants to be eliminated.


Jeff



-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] NET: Multiqueue network device support.

2007-06-12 Thread Ben Greear

David Miller wrote:

From: Ben Greear [EMAIL PROTECTED]
Date: Tue, 12 Jun 2007 14:17:44 -0700


Jeff Garzik wrote:
If hardware w/ multiple queues will the capability for different MAC 
addresses, different RX filters, etc. does it make sense to add that 
below the net_device level?


We will have to add all the configuration machinery at the per-queue 
level that already exists at the per-netdev level.

Perhaps the mac-vlan patch would be a good fit.  Currently it is all
software based, but if the hardware can filter on MAC, it can basically
do mac-vlan acceleration.  The mac-vlan devices are just like 'real' ethernet
devices, so they can be used with whatever schemes work with regular devices.


Interesting.

But to answer Jeff's question, that's not really the model being
used to implement multiple queues.

The MAC is still very much centralized in most designs.

So one way they'll do it is to support assigning N MAC addresses,
and you configure the input filters of the chip to push packets
for each MAC to the proper receive queue.

So the MAC will accept any of those in the N MAC addresses as
it's own, then you use the filtering facilities to steer
frames to the correct RX queue.

The TX and RX queues can be so isolated as to be able to be exported
to virtualization nodes.  You can give them full access to the DMA
queues and assosciated mailboxes.  So instead of all of this bogus
virtualized device overhead, you just give the guest access to the
real device.

So you can use multiple queues either for better single node SMP
performance, or better virtualization performance.


That sounds plausible for many uses, but it may also be useful to have
the virtual devices.  Having 802.1Q VLANs be 'real' devices has worked out
quite well, so I think there is a place for a 'mac-vlan' as well.

With your description above, the 'correct RX queue' could be the
only queue that the mac-vlan sees, so it would behave somewhat like
a vanilla ethernet driver.  When the mac-vlan transmits, it could
transmit directly into it's particular TX queue on the underlying device.

In a non guest environment, I believe the mac-vlan will act somewhat like
a more flexible form of an ip-alias.  When name-spaces are implemented,
the mac-vlan would very easily allow the different name-spaces to share the 
same physical
hardware.  The overhead should be minimal, and it's likely that using
a 'real' network device will be a lot easier to maintain than trying to directly
share separate queues on a single device that is somehow visible in multiple
namespaces.

And, since the mac-vlan can work as pure software on top of any NIC that
can go promisc and send with arbitrary source MAC, it will already work
with virtually all wired ethernet devices currently in existence.

Thanks,
Ben


--
Ben Greear [EMAIL PROTECTED]
Candela Technologies Inc  http://www.candelatech.com

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] NET: Multiqueue network device support.

2007-06-12 Thread Roland Dreier
   The MAC is still very much centralized in most designs.
   So one way they'll do it is to support assigning N MAC addresses,
   and you configure the input filters of the chip to push packets
   for each MAC to the proper receive queue.
   So the MAC will accept any of those in the N MAC addresses as
   it's own, then you use the filtering facilities to steer
   frames to the correct RX queue.
  
  Not quite...  You'll have to deal with multiple Rx filters, not just
  the current one-filter-for-all model present in today's NICs.  Pools
  of queues will have separate configured characteristics.  The steer
  portion you mention is a bottleneck that wants to be eliminated.

I think you're misunderstanding.  These NICs still have only one
physical port, so sending or receiving real packets onto a physical
wire is fundamentally serialized.  The steering of packets to receive
queues is done right after the packets are received from the wire --
in fact it can be done as soon as the NIC has parsed enough of the
headers to make a decision, which might be before the full packet has
even been received.  The steering is no more of a bottleneck than the
physical link is.

 - R.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] NET: Multiqueue network device support.

2007-06-12 Thread David Miller
From: Jeff Garzik [EMAIL PROTECTED]
Date: Tue, 12 Jun 2007 17:46:20 -0400

 Not quite...  You'll have to deal with multiple Rx filters, not just the 
 current one-filter-for-all model present in today's NICs.  Pools of 
 queues will have separate configured characteristics.  The steer 
 portion you mention is a bottleneck that wants to be eliminated.

It runs in hardware at wire speed, what's the issue? :-)
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] NET: Multiqueue network device support.

2007-06-12 Thread David Miller
From: Ben Greear [EMAIL PROTECTED]
Date: Tue, 12 Jun 2007 14:46:50 -0700

 And, since the mac-vlan can work as pure software on top of any NIC that
 can go promisc and send with arbitrary source MAC, it will already work
 with virtually all wired ethernet devices currently in existence.

Absolutely, I'm not against something like mac-vlan at all.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Please pull 'libertas' branch of wireless-2.6 (resent w/o attachment)

2007-06-12 Thread John W. Linville
Resending w/o the attached patch, in case it was too big...yikes!

Individual patches are available here:


http://www.kernel.org/pub/linux/kernel/people/linville/wireless-2.6/libertas

John

---

Jeff,

This is the same as the previous pull request, only rebased on
2.6.22-rc4.  Since this is a big pull already, I didn't want to
complicate it with the additional patches identified by the libertas
team as 2.6.22-worthy.

John

---

The following changes since commit 5ecd3100e695228ac5e0ce0e325e252c0f11806f:
  Linus Torvalds (1):
Linux 2.6.22-rc4

are found in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless-2.6.git 
libertas

Chris Ball (1):
  libertas: wakeup both mesh and normal wakeup when getting out of scan

Dan Williams (25):
  libertas: call SET_NETDEV_DEV from common code
  libertas: replace 'macaddress' with 'bssid'
  libertas: correctly unregister mesh netdev on error
  libertas: don't tear down netdev in libertas_activate_card
  libertas: make scan result handling more flexible
  libertas: fix 'keep previous scan' behavior
  libertas: move channel changing into association framework
  libertas: make association paths consistent
  libertas: use MAC_FMT and MAC_ARG where appropriate
  libertas: use compare_ether_addr() rather than memcmp() where appropriate
  libertas: fix debug enter/leave prints for libertas_execute_next_command
  libertas: correctly balance locking in libertas_process_rx_command
  libertas: correct error report paths for wlan_fwt_list_ioctl
  libertas: fix deadlock SIOCGIWSCAN handler
  libertas: fix default adhoc channel
  libertas: honor specific channel requests during association
  libertas: send SIOCGIWSCAN event after partial scans too
  libertas: debug print spacing fixes in assoc.c
  libertas: add more verbose debugging to libertas_cmd_80211_authenticate
  libertas: Make WPA work through supplicant handshake
  libertas: sparse fixes
  libertas: tweak association debug output
  libertas: remove structure WLAN_802_11_SSID and libertas_escape_essid
  libertas: remove WPA_SUPPLICANT structure
  libertas: reduce SSID and BSSID mixed-case abuse

David Woodhouse (6):
  libertas: fix character set in README
  libertas: first pass at fixing up endianness issues
  libertas: More endianness fixes.
  libertas: more endianness fixes, in tx.c this time
  libertas: don't byte-swap firmware version number. It's a byte array.
  libertas: fix big-endian associate command.

Holger Schurig (23):
  libertas: rename wlan_association_worker
  libertas: a debug output was missing a newline
  libertas: fix removal of all debugfs files
  libertas: remove __FILE__ from debug output
  libertas: remove unused/superfluous definitions of DEV_NAME_LEN
  libertas: move vendor  product id's into if_usb.c
  libertas: make libertas_wlan_data_rates static
  libertas: exclude non-used code when PROC_DEBUG is not set
  libertas: make debug configurable
  libertas: tune debug code
  libertas: single out mesh code
  libertas: change debug output of libertas_interrupt()
  libertas: get rid of libertas_sbi_get_priv()
  libertas: fix SSID output
  libertas: changed some occurences of kmalloc() + memset(a,0,sz) to 
kzalloc()
  libertas: move reset_device() code main.c to if_usb.c
  libertas: split wlan_add_card()
  libertas: indirect all hardware access via hw_ functions
  libertas: move contents of fw.h to decl.h
  libertas: split module into two (libertas.ko and usb8xxx.ko)
  libertas: fix RESET logic at unload time
  libertas: let DRV_NAME be overridable
  libertas: remove unused variables in wlan_dev_t

Javier Cardona (2):
  libertas: fixed transmission flow control on the mesh interface
  libertas: added transmission failures to mesh statistics

Luis Carlos Cobo (4):
  libertas: fixed incorrect assigment of fcs errors to frag errors
  libertas: add URB debug info
  libertas: fixed kernel oops on module/card removal
  libertas: updated mesh commands for 5.220.9.p11

Luis Carlos Cobo Rus (6):
  libertas: version bump (321p0) and cmds update for new fw (5.220.10.p0)
  libertas: cleanup of fwt_list_route processing
  libertas: updated readme file
  libertas: make mac address configuration work with mesh interface too
  libertas: split wext for eth and msh
  libertas: support for mesh autostart on firmware 5.220.11

Marcelo Tosatti (5):
  libertas: scan two channels per scan command
  libertas: remove deprecated pm_register and associated code
  libertas: fix scanning from associate path
  libertas: fix error handling of card initialization
  libertas: fix oops on rmmod

 drivers/net/wireless/Kconfig   |   19 +-
 drivers/net/wireless/libertas/11d.c|  

Re: [PATCH] NET: Multiqueue network device support.

2007-06-12 Thread David Miller
From: Jason Lunz [EMAIL PROTECTED]
Date: Tue, 12 Jun 2007 17:47:53 -0400

 Are you aware of any hardware designs that allow other ways to map
 packets onto rx queues?  I can think of several scenarios where it could
 be advantageous to map packets by IP 3- or 5-tuple to get cpu locality
 all the way up the stack on a flow-by-flow basis. But doing this would
 require some way to request this mapping from the hardware.

These chips allow this too, Microsoft defined a standard for
RX queue interrupt hashing by flow so everyone puts it, or
something like it, in hardware.

 In the extreme case it would be cool if it were possible to push a
 bpf-like classifier down into the hardware to allow arbitrary kinds of
 flow distribution.

Maybe not a fully bpf, but many chips allow something close.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] NET: Multiqueue network device support.

2007-06-12 Thread Jeff Garzik

Roland Dreier wrote:

   The MAC is still very much centralized in most designs.
   So one way they'll do it is to support assigning N MAC addresses,
   and you configure the input filters of the chip to push packets
   for each MAC to the proper receive queue.
   So the MAC will accept any of those in the N MAC addresses as
   it's own, then you use the filtering facilities to steer
   frames to the correct RX queue.
  
  Not quite...  You'll have to deal with multiple Rx filters, not just

  the current one-filter-for-all model present in today's NICs.  Pools
  of queues will have separate configured characteristics.  The steer
  portion you mention is a bottleneck that wants to be eliminated.

I think you're misunderstanding.  These NICs still have only one
physical port, so sending or receiving real packets onto a physical
wire is fundamentally serialized.  The steering of packets to receive
queues is done right after the packets are received from the wire --
in fact it can be done as soon as the NIC has parsed enough of the
headers to make a decision, which might be before the full packet has
even been received.  The steering is no more of a bottleneck than the
physical link is.


No, you're misreading.  People are putting in independent configurable 
Rx filters because a single Rx filter setup for all queues was a 
bottleneck.  Not a performance bottleneck but a configuration and 
flexibility limitation that's being removed.


And where shall we put the configuration machinery, to support sub-queues?
Shall we duplicate the existing configuration code for sub-queues?
What will ifconfig/ip usage look like?
How will it differ from configurating full net_devices, if you are 
assigning the same types of parameters?


Jeff



-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] NET: Multiqueue network device support.

2007-06-12 Thread David Miller
From: Roland Dreier [EMAIL PROTECTED]
Date: Tue, 12 Jun 2007 14:52:11 -0700

 I think you're misunderstanding.  These NICs still have only one
 physical port, so sending or receiving real packets onto a physical
 wire is fundamentally serialized.  The steering of packets to receive
 queues is done right after the packets are received from the wire --
 in fact it can be done as soon as the NIC has parsed enough of the
 headers to make a decision, which might be before the full packet has
 even been received.  The steering is no more of a bottleneck than the
 physical link is.

Yep, that's right.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] NET: Multiqueue network device support.

2007-06-12 Thread David Miller
From: Jeff Garzik [EMAIL PROTECTED]
Date: Tue, 12 Jun 2007 17:59:43 -0400

 And where shall we put the configuration machinery, to support sub-queues?
 Shall we duplicate the existing configuration code for sub-queues?
 What will ifconfig/ip usage look like?
 How will it differ from configurating full net_devices, if you are 
 assigning the same types of parameters?

If you're asking about the virtualization scenerio, the
control node (dom0 or whatever) is the only entity which
can get at programming the filters and will set it up
properly based upon which parts of the physical device
are being exported to which guest nodes.

For the non-virtualized case, it's a good question.

But really the current hardware is just about simple queue steering,
and simple static DRR/WRED fairness algorithms applied to the queues
in hardware.

We don't need to add support for configuring anything fancy from the
start just to get something working.  Especially the important bits
such as the virtualization case and the interrupt and queue
distribution case on SMP.  The latter can even be configured
automatically by the driver, and that's in fact what I expect
drivers to do initially.

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] NET: Multiqueue network device support.

2007-06-12 Thread Jason Lunz
On Tue, Jun 12, 2007 at 02:55:34PM -0700, David Miller wrote:
 These chips allow this too, Microsoft defined a standard for
 RX queue interrupt hashing by flow so everyone puts it, or
 something like it, in hardware.

I think you're referring to RSS?

http://www.microsoft.com/whdc/device/network/NDIS_RSS.mspx
http://msdn2.microsoft.com/en-us/library/ms795609.aspx

Jason
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] NET: Multiqueue network device support.

2007-06-12 Thread Jeff Garzik

David Miller wrote:

If you're asking about the virtualization scenerio, the
control node (dom0 or whatever) is the only entity which
can get at programming the filters and will set it up
properly based upon which parts of the physical device
are being exported to which guest nodes.


You're avoiding the question.  Clearly guest VMs must contact the host 
VM (dom0) to get real work done.


They are ultimately going to have to pass the same configuration info as 
the non-virt case.




For the non-virtualized case, it's a good question.


...




But really the current hardware is just about simple queue steering,
and simple static DRR/WRED fairness algorithms applied to the queues
in hardware.

We don't need to add support for configuring anything fancy from the
start just to get something working.


Correct.  But if we don't plan for the future that's currently in the 
silicon pipeline, our ass will be in a sling WHEN we must figure out the 
best configuration points for sub-queues.


Or are we prepared to rip out sub-queues for a non-experimental 
solution, when confronted with the obvious necessity of configuring them?


You know I want multi-queue and increased parallelism it provides.  A lot.

But let's not dig ourselves into a hole we must climb out of in 6-12 
months.  We need to think about configuration issues -now-.


Jeff



-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] NET: Multiqueue network device support.

2007-06-12 Thread Jeff Garzik

Ben Greear wrote:

That sounds plausible for many uses, but it may also be useful to have
the virtual devices.  Having 802.1Q VLANs be 'real' devices has worked out
quite well, so I think there is a place for a 'mac-vlan' as well.


Virtual devices are pretty much the only solution we have right now, 
both in terms of available control points, and in terms of mapping to 
similar existing solutions (like wireless and its multiple net devices).


Jeff


-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] NET: Multiqueue network device support.

2007-06-12 Thread Jason Lunz
On Tue, Jun 12, 2007 at 02:26:58PM -0700, David Miller wrote:
 The MAC is still very much centralized in most designs.
 
 So one way they'll do it is to support assigning N MAC addresses,
 and you configure the input filters of the chip to push packets
 for each MAC to the proper receive queue.
 
 So the MAC will accept any of those in the N MAC addresses as
 it's own, then you use the filtering facilities to steer
 frames to the correct RX queue.
 
 The TX and RX queues can be so isolated as to be able to be exported
 to virtualization nodes.  You can give them full access to the DMA
 queues and assosciated mailboxes.  So instead of all of this bogus
 virtualized device overhead, you just give the guest access to the
 real device.
 
 So you can use multiple queues either for better single node SMP
 performance, or better virtualization performance.

Are you aware of any hardware designs that allow other ways to map
packets onto rx queues?  I can think of several scenarios where it could
be advantageous to map packets by IP 3- or 5-tuple to get cpu locality
all the way up the stack on a flow-by-flow basis. But doing this would
require some way to request this mapping from the hardware.

In the extreme case it would be cool if it were possible to push a
bpf-like classifier down into the hardware to allow arbitrary kinds of
flow distribution.

Jason
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] NET: Multiqueue network device support.

2007-06-12 Thread Ben Greear

Jeff Garzik wrote:

Ben Greear wrote:

That sounds plausible for many uses, but it may also be useful to have
the virtual devices.  Having 802.1Q VLANs be 'real' devices has worked 
out

quite well, so I think there is a place for a 'mac-vlan' as well.


Virtual devices are pretty much the only solution we have right now, 
both in terms of available control points, and in terms of mapping to 
similar existing solutions (like wireless and its multiple net devices).


I believe Patrick is working on cleaning up mac-vlans and converting them
to use the new netlink configuration API, so there should be a patch for
these hitting the list shortly.

Thanks,
Ben


--
Ben Greear [EMAIL PROTECTED]
Candela Technologies Inc  http://www.candelatech.com

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Please pull 'libertas-fixes' branch of wireless-2.6

2007-06-12 Thread Jeff Garzik

John W. Linville wrote:

Fixes identified by the libertas team as important for 2.6.22...

---

The following changes since commit 717c9339202a42ae7bec7d3c4b84deecdcae9f81:
  Dan Williams (1):
libertas: reduce SSID and BSSID mixed-case abuse

are found in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless-2.6.git 
libertas-fixes

Dan Williams (1):
  libertas: actually send mesh frames to mesh netdev

Luis Carlos (1):
  libertas: convert libertas_mpp into anycast_mask

Luis Carlos Cobo Rus (2):
  libertas: pull current channel from firmware on mesh autostart
  libertas: deauthenticate from AP in channel switch


Just to be clear, you intend 'libertas' and 'libertas-fixes' (in that 
order) for 2.6.22, and 'libertas-upstream' for 2.6.23?


Jeff



-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2/2] NetXen: Add correct routines to setup multicast address

2007-06-12 Thread Jeff Garzik

Mithlesh Thukral wrote:

NetXen: Add multi cast filter code
This patch adds multi cast filter code to NetXen NIC driver.
It also adds capabilities to setup the multicast address in hardware
from the host side.

Signed-off by: Mithlesh Thukral [EMAIL PROTECTED]
---

 drivers/net/netxen/netxen_nic.h |   24 
 drivers/net/netxen/netxen_nic_hdr.h |3 
 drivers/net/netxen/netxen_nic_hw.c  |  132 +-

 3 files changed, 156 insertions(+), 3 deletions(-)


Michael seems to keep finding endian bugs in this code...


-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 0/15] spidernet driver bug fixes

2007-06-12 Thread Jeff Garzik

Linas Vepstas wrote:

On Fri, Jun 08, 2007 at 01:20:20PM -0400, Jeff Garzik wrote:

On Fri, Jun 08, 2007 at 12:06:08PM -0500, Linas Vepstas wrote:

On Fri, Jun 08, 2007 at 11:12:31AM +1000, Michael Ellerman wrote:

On Thu, 2007-06-07 at 14:17 -0500, Linas Vepstas wrote:
The major bug fixes are: 

I realise it's late, but shouldn't major bugfixes be going into 22 ?
Yeah, I suppose, I admit I've lost track of the process. 
You need to order your bug fixes first in the queue. 


OK, here are the patches, re-ordered. There is a different number
than last time, as I threw out one, merged one, and got cold feet
on a third one.  They still pass the tests.

The first five patches focus on three serious bugs, fixing crashes or
hangs.

-- patch 1 -- kernel crash when ifdown while receiving packets.
-- patch 2,3,4 -- device driver deadlocks on RX ram full mesgs.
  (kernel stays up, ifdown/up clear the problem).
-- patch 5 -- misconfigured TX interrupts results in 3x-4x per
  degradation for small packets.

-- patch 6 -- rx stats may be mangled
-- patch 7 -- hw checksum sometimes breaks ipv6 operation

-- patches 8-15 -- misc tweaks, and documentation.


I re-ran my stress tests with patches 1-7 applied; they pass.


This is a bit frustrating, because this includes many patches that you 
ALREADY told me to queue for 2.6.23, which I did, in 
netdev-2.6.git#upstream.


Should I just drop all spidernet patches and start over?

Jeff



-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[2.6 patch] net/sunrpc/rpcb_clnt.c: make struct rpcb_program static

2007-06-12 Thread Adrian Bunk
This patch makes the needlessly global struct rpcb_program static.

Signed-off-by: Adrian Bunk [EMAIL PROTECTED]

---

 net/sunrpc/rpcb_clnt.c |4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

--- linux-2.6.22-rc4-mm2/net/sunrpc/rpcb_clnt.c.old 2007-06-12 
23:25:01.0 +0200
+++ linux-2.6.22-rc4-mm2/net/sunrpc/rpcb_clnt.c 2007-06-12 23:25:19.0 
+0200
@@ -118,7 +118,7 @@
 #define RPCB_MAXOWNERLEN   sizeof(RPCB_OWNER_STRING)
 
 static voidrpcb_getport_done(struct rpc_task *, void *);
-extern struct rpc_program  rpcb_program;
+static struct rpc_program  rpcb_program;
 
 struct rpcbind_args {
struct rpc_xprt *   r_xprt;
@@ -616,7 +616,7 @@
 
 static struct rpc_stat rpcb_stats;
 
-struct rpc_program rpcb_program = {
+static struct rpc_program rpcb_program = {
.name   = rpcbind,
.number = RPCBIND_PROGRAM,
.nrvers = ARRAY_SIZE(rpcb_version),

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Resend: [PATCH] pktgen IPSEC 3/4: Introduce xfrm SAD only lookup

2007-06-12 Thread jamal

This takes into considerations Patricks feedback.

cheers,
jamal

commit 4fe3190756589ef8155eb97fe725f2564f1fc77d
Author: Jamal Hadi Salim [EMAIL PROTECTED]
Date:   Tue Jun 12 12:35:39 2007 -0400

[XFRM] Introduce standalone SAD lookup
This allows other in-kernel functions to do SAD lookups.
The only known user at the moment is pktgen.

Signed-off-by: Jamal Hadi Salim [EMAIL PROTECTED]

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 311f25a..79d2c37 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -920,6 +920,10 @@ extern struct xfrm_state *xfrm_state_find(xfrm_address_t 
*daddr, xfrm_address_t
  struct flowi *fl, struct xfrm_tmpl 
*tmpl,
  struct xfrm_policy *pol, int *err,
  unsigned short family);
+extern struct xfrm_state * xfrm_stateonly_find(xfrm_address_t *daddr,
+  xfrm_address_t *saddr,
+  unsigned short family,
+  u8 mode, u8 proto, u32 reqid);
 extern int xfrm_state_check_expire(struct xfrm_state *x);
 extern void xfrm_state_insert(struct xfrm_state *x);
 extern int xfrm_state_add(struct xfrm_state *x);
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 85f3f43..8d14cd4 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -686,6 +686,37 @@ out:
return x;
 }
 
+struct xfrm_state *
+xfrm_stateonly_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
+   unsigned short family, u8 mode, u8 proto, u32 reqid)
+{
+   unsigned int h = xfrm_dst_hash(daddr, saddr, reqid, family);
+   struct xfrm_state *rx = NULL, *x = NULL;
+   struct hlist_node *entry;
+
+   spin_lock(xfrm_state_lock);
+   hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
+   if (x-props.family == family 
+   x-props.reqid == reqid 
+   !(x-props.flags  XFRM_STATE_WILDRECV) 
+   xfrm_state_addr_check(x, daddr, saddr, family) 
+   mode == x-props.mode 
+   proto == x-id.proto 
+   x-km.state == XFRM_STATE_VALID) {
+   rx = x;
+   break;
+   }
+   }
+
+   if (rx)
+   xfrm_state_hold(rx);
+   spin_unlock(xfrm_state_lock);
+
+
+   return rx;
+}
+EXPORT_SYMBOL(xfrm_stateonly_find);
+
 static void __xfrm_state_insert(struct xfrm_state *x)
 {
unsigned int h;


Resend: [PATCH] pktgen IPSEC 4/4: Add IPSEC support to pktgen

2007-06-12 Thread jamal

Sorry Robert, I found a problem compiling when i turned off XFRM. This
fixes it.

cheers,
jamal

commit bfd389bba7654aa118f0949ff0de45a3bce9700c
Author: Jamal Hadi Salim [EMAIL PROTECTED]
Date:   Tue Jun 12 18:59:33 2007 -0400

[PKTGEN] IPSEC support
Added transport mode ESP support for starters.
I will send more of these modes and types once i have resolved
the tunnel mode isses.

Signed-off-by: Jamal Hadi Salim [EMAIL PROTECTED]

diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index bc4fb3b..e7d1dff 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -152,6 +152,9 @@
 #include net/checksum.h
 #include net/ipv6.h
 #include net/addrconf.h
+#ifdef CONFIG_XFRM
+#include net/xfrm.h
+#endif
 #include asm/byteorder.h
 #include linux/rcupdate.h
 #include asm/bitops.h
@@ -182,6 +185,7 @@
 #define F_VID_RND (19)   /* Random VLAN ID */
 #define F_SVID_RND(110)  /* Random SVLAN ID */
 #define F_FLOW_SEQ(111)  /* Sequential flows */
+#define F_IPSEC_ON(112)  /* ipsec on for flows */
 
 /* Thread control flag bits */
 #define T_TERMINATE   (10)
@@ -208,6 +212,9 @@ static struct proc_dir_entry *pg_proc_dir = NULL;
 struct flow_state {
__be32 cur_daddr;
int count;
+#ifdef CONFIG_XFRM
+   struct xfrm_state *x;
+#endif
__u32 flags;
 };
 
@@ -348,7 +355,10 @@ struct pktgen_dev {
unsigned lflow; /* Flow length  (config) */
unsigned nflows;/* accumulated flows (stats) */
unsigned curfl; /* current sequenced flow (state)*/
-
+#ifdef CONFIG_XFRM
+   __u8ipsmode;/* IPSEC mode (config) */
+   __u8ipsproto;   /* IPSEC type (config) */
+#endif
char result[512];
 };
 
@@ -704,6 +714,11 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
seq_printf(seq,  FLOW_RND  );
}
 
+#ifdef CONFIG_XFRM
+   if (pkt_dev-flags  F_IPSEC_ON)
+   seq_printf(seq,  IPSEC  );
+#endif
+
if (pkt_dev-flags  F_MACSRC_RND)
seq_printf(seq, MACSRC_RND  );
 
@@ -1198,6 +1213,11 @@ static ssize_t pktgen_if_write(struct file *file,
else if (strcmp(f, FLOW_SEQ) == 0)
pkt_dev-flags |= F_FLOW_SEQ;
 
+#ifdef CONFIG_XFRM
+   else if (strcmp(f, IPSEC) == 0)
+   pkt_dev-flags |= F_IPSEC_ON;
+#endif
+
else if (strcmp(f, !IPV6) == 0)
pkt_dev-flags = ~F_IPV6;
 
@@ -1206,7 +1226,7 @@ static ssize_t pktgen_if_write(struct file *file,
Flag -:%s:- unknown\nAvailable flags, (prepend 
! to un-set flag):\n%s,
f,
IPSRC_RND, IPDST_RND, UDPSRC_RND, UDPDST_RND, 
-   MACSRC_RND, MACDST_RND, TXSIZE_RND, IPV6, 
MPLS_RND, VID_RND, SVID_RND, FLOW_SEQ\n);
+   MACSRC_RND, MACDST_RND, TXSIZE_RND, IPV6, 
MPLS_RND, VID_RND, SVID_RND, FLOW_SEQ, IPSEC\n);
return count;
}
sprintf(pg_result, OK: flags=0x%x, pkt_dev-flags);
@@ -2094,6 +2114,7 @@ static void spin(struct pktgen_dev *pkt_dev, __u64 
spin_until_us)
 
 static inline void set_pkt_overhead(struct pktgen_dev *pkt_dev)
 {
+   pkt_dev-pkt_overhead = 0;
pkt_dev-pkt_overhead += pkt_dev-nr_labels*sizeof(u32);
pkt_dev-pkt_overhead += VLAN_TAG_SIZE(pkt_dev);
pkt_dev-pkt_overhead += SVLAN_TAG_SIZE(pkt_dev);
@@ -2130,6 +2151,31 @@ static inline int f_pick(struct pktgen_dev *pkt_dev)
return pkt_dev-curfl;
 }
 
+
+#ifdef CONFIG_XFRM
+/* If there was already an IPSEC SA, we keep it as is, else
+ * we go look for it ...
+*/
+inline
+void get_ipsec_sa(struct pktgen_dev *pkt_dev, int flow)
+{
+   struct xfrm_state *x = pkt_dev-flows[flow].x;
+   if (!x) {
+   /*slow path: we dont already have xfrm_state*/
+   x = xfrm_stateonly_find((xfrm_address_t *)pkt_dev-cur_daddr,
+   (xfrm_address_t *)pkt_dev-cur_saddr,
+   AF_INET,
+   pkt_dev-ipsmode,
+   pkt_dev-ipsproto, 0);
+   if (x) {
+   pkt_dev-flows[flow].x = x;
+   set_pkt_overhead(pkt_dev);
+   pkt_dev-pkt_overhead+=x-props.header_len;
+   }
+
+   }
+}
+#endif
 /* Increment/randomize headers according to flags and current values
  * for IP src/dest, UDP src/dst port, MAC-Addr src/dst
  */
@@ -2289,6 +2335,10 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
pkt_dev-flows[flow].flags |= F_INIT;
pkt_dev-flows[flow].cur_daddr =
pkt_dev-cur_daddr;
+#ifdef CONFIG_XFRM
+   if (pkt_dev-flags  F_IPSEC_ON)
+ 

Re: [PATCH net-2.6 1/1] [TCP]: Fix left_out setting during FRTO

2007-06-12 Thread David Miller
From: Ilpo_Järvinen [EMAIL PROTECTED]
Date: Tue, 12 Jun 2007 11:50:29 +0300 (EEST)

 Without FRTO, the tcp_try_to_open is never called with
 lost_out  0 (see tcp_time_to_recover). However, when FRTO is
 enabled, the !tp-lost condition is not used until end of FRTO
 because that way TCP avoids premature entry to fast recovery
 during FRTO.
 
 Signed-off-by: Ilpo Järvinen [EMAIL PROTECTED]

Thanks for catching this, patch applied.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [2.6 patch] net/sunrpc/rpcb_clnt.c: make struct rpcb_program static

2007-06-12 Thread Chuck Lever

Adrian Bunk wrote:

This patch makes the needlessly global struct rpcb_program static.

Signed-off-by: Adrian Bunk [EMAIL PROTECTED]


Acked-by: Chuck Lever [EMAIL PROTECTED]


---

 net/sunrpc/rpcb_clnt.c |4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

--- linux-2.6.22-rc4-mm2/net/sunrpc/rpcb_clnt.c.old 2007-06-12 
23:25:01.0 +0200
+++ linux-2.6.22-rc4-mm2/net/sunrpc/rpcb_clnt.c 2007-06-12 23:25:19.0 
+0200
@@ -118,7 +118,7 @@
 #define RPCB_MAXOWNERLEN   sizeof(RPCB_OWNER_STRING)
 
 static void			rpcb_getport_done(struct rpc_task *, void *);

-extern struct rpc_program  rpcb_program;
+static struct rpc_program  rpcb_program;
 
 struct rpcbind_args {

struct rpc_xprt *   r_xprt;
@@ -616,7 +616,7 @@
 
 static struct rpc_stat rpcb_stats;
 
-struct rpc_program rpcb_program = {

+static struct rpc_program rpcb_program = {
.name   = rpcbind,
.number = RPCBIND_PROGRAM,
.nrvers = ARRAY_SIZE(rpcb_version),



begin:vcard
fn:Chuck Lever
n:Lever;Chuck
org:Oracle Corporation;Corporate Architecture: Linux Projects Group
adr:;;1015 Granger Avenue;Ann Arbor;MI;48104;USA
title:Principal Member of Staff
tel;work:+1 248 614 5091
x-mozilla-html:FALSE
url:http://oss.oracle.com/~cel/
version:2.1
end:vcard



Re: [PATCH 0/15] spidernet driver bug fixes

2007-06-12 Thread Linas Vepstas
On Tue, Jun 12, 2007 at 07:00:17PM -0400, Jeff Garzik wrote:
 Linas Vepstas wrote:
 On Fri, Jun 08, 2007 at 01:20:20PM -0400, Jeff Garzik wrote:
 On Fri, Jun 08, 2007 at 12:06:08PM -0500, Linas Vepstas wrote:
 On Fri, Jun 08, 2007 at 11:12:31AM +1000, Michael Ellerman wrote:
 On Thu, 2007-06-07 at 14:17 -0500, Linas Vepstas wrote:
 The major bug fixes are: 
 I realise it's late, but shouldn't major bugfixes be going into 22 ?
 Yeah, I suppose, I admit I've lost track of the process. 
 You need to order your bug fixes first in the queue. 
 
 OK, here are the patches, re-ordered. There is a different number
 than last time, as I threw out one, merged one, and got cold feet
 on a third one.  They still pass the tests.
 
 The first five patches focus on three serious bugs, fixing crashes or
 hangs.
 
 -- patch 1 -- kernel crash when ifdown while receiving packets.
 -- patch 2,3,4 -- device driver deadlocks on RX ram full mesgs.
   (kernel stays up, ifdown/up clear the problem).
 -- patch 5 -- misconfigured TX interrupts results in 3x-4x per
   degradation for small packets.
 
 -- patch 6 -- rx stats may be mangled
 -- patch 7 -- hw checksum sometimes breaks ipv6 operation
 
 -- patches 8-15 -- misc tweaks, and documentation.
 
 
 I re-ran my stress tests with patches 1-7 applied; they pass.
 
 This is a bit frustrating, because this includes many patches that you 
 ALREADY told me to queue for 2.6.23, which I did, in 
 netdev-2.6.git#upstream.

Sigh. I redid the series so as to avoid this problem, per the 
previous conversation. 

 Should I just drop all spidernet patches and start over?

No. Apply the series I just sent you, dropping the one called
patch 6/15, the one from Florin Malita, as it appears you'd
previously picked this up.  The rest of the patches should apply
cleanly; I just cheked. I just did a git pull of 
git://git.kernel.org/pub/scm/linux/kernel/git/jgarzik/netdev-2.6
and checked. The result of patching is exactly as it should be.

Just in case it wasn't clear, I'd like to see patches 1-5 go
into 2.6.22 ... as these address the most critical complaints I'd
gotten recently.

--linas

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [PATCH] NET: Multiqueue network device support.

2007-06-12 Thread jamal
Hi Guy,

On Tue, 2007-12-06 at 17:04 +0300, Cohen, Guy wrote:
 Hi Jamal,
 
 Here is a simple scenario (nothing here is rare of extreme case):
 - Busy wireless environment
 - FTP TX on BE queue (low priority)
 - Skype TX on VO queue (high priority)
 
 The channel is busy with high priority packets hence the BE packets are
 transmitted to the air rarely so the DMA/HW queue of the BE access
 category gets full and the qdisc is stopped.
 Now periodic VO-tagged Skype packets arrive. I would expect that they
 get the priority (and pass) in all stages of the stack and reach the HW
 ASAP and compete there on the medium with the other access categories
 and the other clients on the channel.
 Now this packet will be stuck in the qdisc and wait there until a BE
 packet is transmitted, which can take a long time. This is a real
 problem.

Understood.
My take is that this is resolvable by understanding the nature of the
beast. IOW, the strategy of when to open up on such a medium is not
conventional as one of a wired netdev. 
You can use signalling from the media such as an AP giving you 
signals for different ACs to open up; example: if the AC_BE is not being
allowed out and it is just rotting because the AP is favoring VO, then
you need to occasionally open up the tx path for the driver etc.

 There is also a problem with the queues that will be dedicated to TX
 aggregation in 11n (currently implemented) - the packets will be
 classified to queues by the destination MAC address and not only by the
 priority class, but I don't want to get into that now. 

We have an infrastructure at the qdisc level for selecting queues based
on literally anything you can think of in a packet as well as metadata.
So i think this aspect should be fine.

 I think that
 there are enough arguments now why the patch that started this thread is
 needed...

Sorry Guy, I dont see it that way - unfortunately i dont think anybody
else other than Patrick understood what i said  and this thread is going
on for too long i doubt 99% of the people are following any more ;-

 In most scenarios BK packets will be transmitted and will win the medium
 against VO packets (thought, in some non-favored ratio).

So if understand you correctly: over a period of time, yes BK will make
it out but under contention it will loose; is that always? Is there some
mathematics behind this stuff?

 Sorry, I'm really overloaded - I won't be able to review the docs you
 sent (really apologize for that).

No problem. I totaly understand.

 The WMM parameters of the AC are set and controlled by the network/BSS
 (access point) administrator and can be used in anyway. There are the
 default parameters but they can be changed.

It would certainly lead to unexpected behavior if you start favoring BE
over VO, no? Would that ever happen by adjusting the WMM parameters?

cheers,
jamal



-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[ANNOUNCE] new driver ixgbe for Intel(R) 10GbE PCI Express adapters.

2007-06-12 Thread Ayyappan . Veeraiyan
All,

We are pleased to release the new driver ixgbe for Intel(R) 82598
based 10GbE PCI Express adapters. The 82598 silicon and the adapters will be
released soon.

Please find the full driver as a patch to latest linus-2.6 tree here:
git-pull git://lost.foo-projects.org/~aveerani/git/linux-2.6 ixgbe

Also, I am posting the driver patch in the followup mail. 

Short introduction on the ixgbe driver and 82598 silicon:

The 82598 (PCI Express) silicon's architecture and SW interface
is vastly different from legacy 82597 (PCI-X device). The register
offsets and the bit definitions are very different from 82597. The
EEPROM/FLASH, SERDES interface for external PHY and other interfaces
are also different. 82598 has new Tx and Rx descriptor interfaces
(adavanced descriptors) to support packet/header split Rx feauture and
Rx packet steering (based on 5 tuples or MAC addresses). It supports
list of new features like MSI-X, Multiple Rx and Tx queues, TSO for
IPv6. Because of all these differences, we had to write a new driver
for 82598 and the new driver is lot cleaner with no 82597 errata
workarounds in the hot path.

This driver has been tested extensively for the last couple of months
in our labs.

Please review and provide comments.

Apart from implementing the community feedback, here is the list of
things in TODO list...

1. Add suspend/resume support.
2. Rewrite the driver handling of LLTX logic. Will post a patch very
   soon for review.
3. Add PCI error handler support.

thanks,
Ayyappan
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH][NET_SCHED] Make HTB scheduler work with TSO.

2007-06-12 Thread Ranjit Manomohan
Currently the HTB scheduler does not correctly account for TSO packets 
which causes large inaccuracies in the bandwidth control when using TSO.

This patch allows the HTB scheduler to work with TSO enabled devices.

Signed-off-by: Ranjit Manomohan [EMAIL PROTECTED]

diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 035788c..e872724 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -153,15 +153,12 @@ #endif
/* of un.leaf originals should be done. */
 };

-/* TODO: maybe compute rate when size is too large .. or drop ? */
 static inline long L2T(struct htb_class *cl, struct qdisc_rate_table *rate,
   int size)
 {
int slot = size  rate-rate.cell_log;
-   if (slot  255) {
-   cl-xstats.giants++;
-   slot = 255;
-   }
+   if (slot  255)
+   return (rate-data[255]*(slot  8) + rate-data[slot  0xFF]);
return rate-data[slot];
 }

@@ -634,13 +631,14 @@ #endif
cl-qstats.drops++;
return NET_XMIT_DROP;
} else {
-   cl-bstats.packets++;
+   cl-bstats.packets +=
+   skb_is_gso(skb)?skb_shinfo(skb)-gso_segs:1;
cl-bstats.bytes += skb-len;
htb_activate(q, cl);
}

sch-q.qlen++;
-   sch-bstats.packets++;
+   sch-bstats.packets += skb_is_gso(skb)?skb_shinfo(skb)-gso_segs:1;
sch-bstats.bytes += skb-len;
return NET_XMIT_SUCCESS;
 }
@@ -717,8 +715,9 @@ #endif
  * In such case we remove class from event queue first.
  */
 static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
-int level, int bytes)
+int level, struct sk_buff *skb)
 {
+   int bytes = skb-len;
long toks, diff;
enum htb_cmode old_mode;

@@ -753,13 +752,15 @@ #define HTB_ACCNT(T,B,R) toks = diff + c
 #ifdef HTB_RATECM
/* update rate counters */
cl-sum_bytes += bytes;
-   cl-sum_packets++;
+   cl-sum_packets += skb_is_gso(skb)?
+   skb_shinfo(skb)-gso_segs:1;
 #endif

/* update byte stats except for leaves which are already 
updated */
if (cl-level) {
cl-bstats.bytes += bytes;
-   cl-bstats.packets++;
+   cl-bstats.packets += skb_is_gso(skb)?
+   skb_shinfo(skb)-gso_segs:1;
}
cl = cl-parent;
}
@@ -943,7 +944,7 @@ next:
   gives us slightly better performance */
if (!cl-un.leaf.q-q.qlen)
htb_deactivate(q, cl);
-   htb_charge_class(q, cl, level, skb-len);
+   htb_charge_class(q, cl, level, skb);
}
return skb;
 }

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 0/15] spidernet driver bug fixes

2007-06-12 Thread Jeff Garzik

Linas Vepstas wrote:

On Tue, Jun 12, 2007 at 07:00:17PM -0400, Jeff Garzik wrote:

Linas Vepstas wrote:

On Fri, Jun 08, 2007 at 01:20:20PM -0400, Jeff Garzik wrote:

On Fri, Jun 08, 2007 at 12:06:08PM -0500, Linas Vepstas wrote:

On Fri, Jun 08, 2007 at 11:12:31AM +1000, Michael Ellerman wrote:

On Thu, 2007-06-07 at 14:17 -0500, Linas Vepstas wrote:
The major bug fixes are: 

I realise it's late, but shouldn't major bugfixes be going into 22 ?
Yeah, I suppose, I admit I've lost track of the process. 
You need to order your bug fixes first in the queue. 

OK, here are the patches, re-ordered. There is a different number
than last time, as I threw out one, merged one, and got cold feet
on a third one.  They still pass the tests.

The first five patches focus on three serious bugs, fixing crashes or
hangs.

-- patch 1 -- kernel crash when ifdown while receiving packets.
-- patch 2,3,4 -- device driver deadlocks on RX ram full mesgs.
 (kernel stays up, ifdown/up clear the problem).
-- patch 5 -- misconfigured TX interrupts results in 3x-4x per
 degradation for small packets.

-- patch 6 -- rx stats may be mangled
-- patch 7 -- hw checksum sometimes breaks ipv6 operation

-- patches 8-15 -- misc tweaks, and documentation.


I re-ran my stress tests with patches 1-7 applied; they pass.
This is a bit frustrating, because this includes many patches that you 
ALREADY told me to queue for 2.6.23, which I did, in 
netdev-2.6.git#upstream.


Sigh. I redid the series so as to avoid this problem, per the 
previous conversation. 


Should I just drop all spidernet patches and start over?


No. Apply the series I just sent you, dropping the one called
patch 6/15, the one from Florin Malita, as it appears you'd
previously picked this up.  The rest of the patches should apply
cleanly; I just cheked. I just did a git pull of 
git://git.kernel.org/pub/scm/linux/kernel/git/jgarzik/netdev-2.6

and checked. The result of patching is exactly as it should be.

Just in case it wasn't clear, I'd like to see patches 1-5 go
into 2.6.22 ... as these address the most critical complaints I'd
gotten recently.

--linas




As I just stated, many of the patches in the current patch series have 
already been applied to netdev-2.6.git#upstream:


Linas Vepstas (11):
  s2io: add PCI error recovery support
  s2io: add PCI error recovery support
  spidernet: beautify error messages
  spidernet: move a block of code around
  spidernet: zero out a pointer.
  spidernet: null out skb pointer after its been used.
  spidernet: Don't terminate the RX ring
  spidernet: enhance the dump routine
  spidernet: reset the card when an rxramfull is seen
  spidernet: service TX later.
  spidernet: increase the NAPI weight

These are clearly duplicating some of the patches in your patchseries, 
which means you are woefully out of sync with upstream.


Jeff


-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: 2.6.20.7 TCP cubic (and bic) initial slow start way too slow?

2007-06-12 Thread Stephen Hemminger
On Tue, 12 Jun 2007 15:12:58 -0700 (PDT)
David Miller [EMAIL PROTECTED] wrote:

 From: Bill Fink [EMAIL PROTECTED]
 Date: Wed, 16 May 2007 02:44:09 -0400
 
  [EMAIL PROTECTED] ~]# netstat -s | grep -i retrans
  25446 segments retransmited
  20936 fast retransmits
  4503 retransmits in slow start
  4 sack retransmits failed
  
  It then only took 2.14 seconds to transfer 1 GB of data.
  
  That's all for now.
 
 Thanks for all of your testing and numbers Bill.
 
 Inhong et al., we have to do something about this, the issue
 has been known and sitting around for weeks if not months.
 
 How safely can we set the default initial_ssthresh to zero in
 Cubic and BIC?

Yes. set it to zero. The module parameter could even go, and just
leave the route metric as a way to set/remember it.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Please pull 'libertas-fixes' branch of wireless-2.6

2007-06-12 Thread John W. Linville
On Tue, Jun 12, 2007 at 06:54:35PM -0400, Jeff Garzik wrote:
 John W. Linville wrote:
 Fixes identified by the libertas team as important for 2.6.22...
 
 ---
 
 The following changes since commit 
 717c9339202a42ae7bec7d3c4b84deecdcae9f81:
   Dan Williams (1):
 libertas: reduce SSID and BSSID mixed-case abuse
 
 are found in the git repository at:
 
   git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless-2.6.git 
   libertas-fixes
 
 Dan Williams (1):
   libertas: actually send mesh frames to mesh netdev
 
 Luis Carlos (1):
   libertas: convert libertas_mpp into anycast_mask
 
 Luis Carlos Cobo Rus (2):
   libertas: pull current channel from firmware on mesh autostart
   libertas: deauthenticate from AP in channel switch
 
 Just to be clear, you intend 'libertas' and 'libertas-fixes' (in that 
 order) for 2.6.22, and 'libertas-upstream' for 2.6.23?

Yes, correct.

Thanks,

John
-- 
John W. Linville
[EMAIL PROTECTED]
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


r8169 tx problem (1s pause with ping)

2007-06-12 Thread Benjamin LaHaise
Hello folks,

I'm seeing something odd with r8169 on FC7: doing a ping -s 1600 alternates 
between a 1s latency and sub 1ms.  Has anyone else seen anything like this?  
The system in question is an Asus M2A-VM with an onboard RTL8111 (I think).  
NAPI doesn't seem to make a difference.  The kernel in question is currently 
a vanilla 2.6.21.5.  Sub-mtu sized packets behave normally.

02:00.0 Ethernet controller: Realtek Semiconductor Co., Ltd. RTL8111/8168B PCI 
Express Gigabit Ethernet controller (rev 01)

PING 1.2.3.4 (1.2.3.4) 1600(1628) bytes of data.
1608 bytes from 1.2.3.4: icmp_seq=1 ttl=64 time=1000 ms
1608 bytes from 1.2.3.4: icmp_seq=2 ttl=64 time=0.816 ms
1608 bytes from 1.2.3.4: icmp_seq=3 ttl=64 time=1000 ms
1608 bytes from 1.2.3.4: icmp_seq=4 ttl=64 time=0.661 ms

-ben
-- 
Time is of no importance, Mr. President, only life is important.
Don't Email: [EMAIL PROTECTED].
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 0/15] spidernet driver bug fixes

2007-06-12 Thread Michael Ellerman
On Tue, 2007-06-12 at 19:00 -0400, Jeff Garzik wrote:
 Linas Vepstas wrote:
  On Fri, Jun 08, 2007 at 01:20:20PM -0400, Jeff Garzik wrote:
  On Fri, Jun 08, 2007 at 12:06:08PM -0500, Linas Vepstas wrote:
  On Fri, Jun 08, 2007 at 11:12:31AM +1000, Michael Ellerman wrote:
  On Thu, 2007-06-07 at 14:17 -0500, Linas Vepstas wrote:
  The major bug fixes are: 
  I realise it's late, but shouldn't major bugfixes be going into 22 ?
  Yeah, I suppose, I admit I've lost track of the process. 
  You need to order your bug fixes first in the queue. 
  
  OK, here are the patches, re-ordered. There is a different number
  than last time, as I threw out one, merged one, and got cold feet
  on a third one.  They still pass the tests.
  
  The first five patches focus on three serious bugs, fixing crashes or
  hangs.
  
  -- patch 1 -- kernel crash when ifdown while receiving packets.
  -- patch 2,3,4 -- device driver deadlocks on RX ram full mesgs.
(kernel stays up, ifdown/up clear the problem).
  -- patch 5 -- misconfigured TX interrupts results in 3x-4x per
degradation for small packets.
  
  -- patch 6 -- rx stats may be mangled
  -- patch 7 -- hw checksum sometimes breaks ipv6 operation
  
  -- patches 8-15 -- misc tweaks, and documentation.
  
  
  I re-ran my stress tests with patches 1-7 applied; they pass.
 
 This is a bit frustrating, because this includes many patches that you 
 ALREADY told me to queue for 2.6.23, which I did, in 
 netdev-2.6.git#upstream.

Linas posted the patches, I responded querying whether the bug fixes
should go into 2.6.22, and then you told him you need to order your bug
fixes first in the queue. Which seemed pretty clear to me that you'd
wait for the reordered series.

cheers

-- 
Michael Ellerman
OzLabs, IBM Australia Development Lab

wwweb: http://michael.ellerman.id.au
phone: +61 2 6212 1183 (tie line 70 21183)

We do not inherit the earth from our ancestors,
we borrow it from our children. - S.M.A.R.T Person


signature.asc
Description: This is a digitally signed message part


Re: [PATCH 0/15] spidernet driver bug fixes

2007-06-12 Thread Jeff Garzik

Michael Ellerman wrote:

Linas posted the patches, I responded querying whether the bug fixes
should go into 2.6.22, and then you told him you need to order your bug
fixes first in the queue. Which seemed pretty clear to me that you'd
wait for the reordered series.


This was presuming Linas actually knew what he himself had submitted 
previously, and had been accepted...


I explicitly emailed Linas on May 24, 2007 detailing each patch that had 
been applied, and to which netdev-2.6.git branch it had been applied 
(and thus whether it was queued for 2.6.22 or 2.6.23).  Relevant 
Message-id is [EMAIL PROTECTED], and was sent not only to 
Linas but also to netdev@vger.kernel.org, [EMAIL PROTECTED], and 
[EMAIL PROTECTED]


These changes were subsequently made public immediately via 
git://git.kernel.org/.../jgarzik/netdev-2.6.git branches 
'upstream-fixes' and 'upstream', and were followed a few days later by 
akpm's public tree, starting with 2.6.22-rc3-mm1 (and all subsequent 
releases).


All of the above seemed pretty clear, too.

To move forward, it sounds like the best thing to do is drop all 
spidernet patches and start over, yes?


Jeff


-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


arp-scan triggers via-velocity eth0: excessive work at interrupt

2007-06-12 Thread linux
It kind of surprised me that sending 254 arp packets by using the arp-scan
tool (http://www.nta-monitor.com/tools/arp-scan/) on a /24 consistently
triggers a burst of eth0: excessive work at interrupt.

This is a 600 MHz PIII, 2.6.22-rc4, via-velocity driver.

model name  : Pentium III (Katmai)
stepping: 3
cpu MHz : 601.406
cache size  : 512 KB

00:09.0 Ethernet controller [0200]: VIA Technologies, Inc. VT6120/VT6121/VT6122 
Gigabit Ethernet Adapter [1106:3119] (rev 11)

Just double-checking... the program actually sent 463 packets (256 +
a retry to all those that didn't respond to the first one), and triggers
11 copies of the kernel message.

Command line: arp-scan -I eth0 -l [-v]
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: IC Plus Corp IC Plus IP1000

2007-06-12 Thread linux
[EMAIL PROTECTED] wrote:
 I wonder if it at some time will be included in the standard Linux kernel?
 I am of course interested because my main board has it built in, so I 
 would be willing to test it.

Me, too!

This has been discussed sporadically for the last year, and I can confirm
that the driver source from the manufacturer's web page is starting
to suffer bit rot, but after patching the more egregious breakage
(references to linux/config.h, UTS_RELEASE and pci_module_init()
stop it from compiling), it works.

It doesn't even spew eth0: excessive work at interrupt when running
arp-scan, unlike certain in-tree drivers. :-)

I got a bit of a rude shock today after doing an emergency replacement
on a socket 939 motherboard and blandly assuring a Windows-experienced
co-worker that despite a change from nForce to VIA KT890 chipset, the
system should just work.

One round of floppy shuffle and code-fixing later, my co-worker is
not impressed by the Linux version of Have driver disk.  :-)


Is anyone able to push it to completion?  I have a vague idea that the
vendor lost interest.  (Should I write to Greg K-H and tell him
Free Linux Driver Developed!?)

I can play testing guinea-pig if needed.

Thanks!
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: IC Plus Corp IC Plus IP1000

2007-06-12 Thread Jeff Garzik

Peter Rasmussen wrote:
I am not on this list, but found this address on:   
http://linux-net.osdl.org/index.php/Mailing_Lists.


My question is regarding the ethernet controller (from lspci):

Sundance Technology Inc / IC Plus Corp IC Plus IP1000 Family Gigabit 
Ethernet (rev 41)


that seems to have a driver for it published on:

http://www.icplus.com.tw/driver-pp-IP1000A.html

Unfortunately I am not able to build it as described.

I wonder if it at some time will be included in the standard Linux kernel?
I am of course interested because my main board has it built in, so I 
would be willing to test it.


Use the 'sundance' driver that's been in the kernel for quite a while.

Jeff



-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Please pull 'libertas-fixes' branch of wireless-2.6

2007-06-12 Thread Jeff Garzik

John W. Linville wrote:

Fixes identified by the libertas team as important for 2.6.22...

---

The following changes since commit 717c9339202a42ae7bec7d3c4b84deecdcae9f81:
  Dan Williams (1):
libertas: reduce SSID and BSSID mixed-case abuse

are found in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless-2.6.git 
libertas-fixes

Dan Williams (1):
  libertas: actually send mesh frames to mesh netdev

Luis Carlos (1):
  libertas: convert libertas_mpp into anycast_mask

Luis Carlos Cobo Rus (2):
  libertas: pull current channel from firmware on mesh autostart
  libertas: deauthenticate from AP in channel switch

 drivers/net/wireless/libertas/assoc.c   |   13 +
 drivers/net/wireless/libertas/assoc.h   |2 ++
 drivers/net/wireless/libertas/cmdresp.c |1 +
 drivers/net/wireless/libertas/dev.h |1 +
 drivers/net/wireless/libertas/host.h|4 ++--
 drivers/net/wireless/libertas/main.c|   27 ++-
 drivers/net/wireless/libertas/rx.c  |5 ++---
 7 files changed, 35 insertions(+), 18 deletions(-)


pulled into #upstream-fixes


-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Please pull 'libertas' branch of wireless-2.6 (resent w/o attachment)

2007-06-12 Thread Jeff Garzik

John W. Linville wrote:

Resending w/o the attached patch, in case it was too big...yikes!

Individual patches are available here:


http://www.kernel.org/pub/linux/kernel/people/linville/wireless-2.6/libertas

John

---

Jeff,

This is the same as the previous pull request, only rebased on
2.6.22-rc4.  Since this is a big pull already, I didn't want to
complicate it with the additional patches identified by the libertas
team as 2.6.22-worthy.

John

---

The following changes since commit 5ecd3100e695228ac5e0ce0e325e252c0f11806f:
  Linus Torvalds (1):
Linux 2.6.22-rc4

are found in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless-2.6.git 
libertas

Chris Ball (1):
  libertas: wakeup both mesh and normal wakeup when getting out of scan

Dan Williams (25):
  libertas: call SET_NETDEV_DEV from common code
  libertas: replace 'macaddress' with 'bssid'
  libertas: correctly unregister mesh netdev on error
  libertas: don't tear down netdev in libertas_activate_card
  libertas: make scan result handling more flexible
  libertas: fix 'keep previous scan' behavior
  libertas: move channel changing into association framework
  libertas: make association paths consistent
  libertas: use MAC_FMT and MAC_ARG where appropriate
  libertas: use compare_ether_addr() rather than memcmp() where appropriate
  libertas: fix debug enter/leave prints for libertas_execute_next_command
  libertas: correctly balance locking in libertas_process_rx_command
  libertas: correct error report paths for wlan_fwt_list_ioctl
  libertas: fix deadlock SIOCGIWSCAN handler
  libertas: fix default adhoc channel
  libertas: honor specific channel requests during association
  libertas: send SIOCGIWSCAN event after partial scans too
  libertas: debug print spacing fixes in assoc.c
  libertas: add more verbose debugging to libertas_cmd_80211_authenticate
  libertas: Make WPA work through supplicant handshake
  libertas: sparse fixes
  libertas: tweak association debug output
  libertas: remove structure WLAN_802_11_SSID and libertas_escape_essid
  libertas: remove WPA_SUPPLICANT structure
  libertas: reduce SSID and BSSID mixed-case abuse

David Woodhouse (6):
  libertas: fix character set in README
  libertas: first pass at fixing up endianness issues
  libertas: More endianness fixes.
  libertas: more endianness fixes, in tx.c this time
  libertas: don't byte-swap firmware version number. It's a byte array.
  libertas: fix big-endian associate command.

Holger Schurig (23):
  libertas: rename wlan_association_worker
  libertas: a debug output was missing a newline
  libertas: fix removal of all debugfs files
  libertas: remove __FILE__ from debug output
  libertas: remove unused/superfluous definitions of DEV_NAME_LEN
  libertas: move vendor  product id's into if_usb.c
  libertas: make libertas_wlan_data_rates static
  libertas: exclude non-used code when PROC_DEBUG is not set
  libertas: make debug configurable
  libertas: tune debug code
  libertas: single out mesh code
  libertas: change debug output of libertas_interrupt()
  libertas: get rid of libertas_sbi_get_priv()
  libertas: fix SSID output
  libertas: changed some occurences of kmalloc() + memset(a,0,sz) to 
kzalloc()
  libertas: move reset_device() code main.c to if_usb.c
  libertas: split wlan_add_card()
  libertas: indirect all hardware access via hw_ functions
  libertas: move contents of fw.h to decl.h
  libertas: split module into two (libertas.ko and usb8xxx.ko)
  libertas: fix RESET logic at unload time
  libertas: let DRV_NAME be overridable
  libertas: remove unused variables in wlan_dev_t

Javier Cardona (2):
  libertas: fixed transmission flow control on the mesh interface
  libertas: added transmission failures to mesh statistics

Luis Carlos Cobo (4):
  libertas: fixed incorrect assigment of fcs errors to frag errors
  libertas: add URB debug info
  libertas: fixed kernel oops on module/card removal
  libertas: updated mesh commands for 5.220.9.p11

Luis Carlos Cobo Rus (6):
  libertas: version bump (321p0) and cmds update for new fw (5.220.10.p0)
  libertas: cleanup of fwt_list_route processing
  libertas: updated readme file
  libertas: make mac address configuration work with mesh interface too
  libertas: split wext for eth and msh
  libertas: support for mesh autostart on firmware 5.220.11

Marcelo Tosatti (5):
  libertas: scan two channels per scan command
  libertas: remove deprecated pm_register and associated code
  libertas: fix scanning from associate path
  libertas: fix error handling of card initialization
  libertas: fix oops on rmmod

 drivers/net/wireless/Kconfig   |   19 +-
 

Re: [PATCH 1/2] NetXen: Fix link status messages

2007-06-12 Thread Jeff Garzik

Mithlesh Thukral wrote:

NetXen: Fix incorrect link status even with switch turned OFF.
NetXen driver failed to accurately indicate when a link is up or down. 
This was encountered during failover testing, when the first port 
indicated that the link was up even when the 10G switch it was assigned
to in the Bladecenter was turned off completely. 


Signed-off by: Wen Xiong [EMAIL PROTECTED]
Signed-off by: Mithlesh Thukral [EMAIL PROTECTED]
---

 drivers/net/netxen/netxen_nic.h  |1 +
 drivers/net/netxen/netxen_nic_init.c |   21 +
 drivers/net/netxen/netxen_nic_isr.c  |   24 
 3 files changed, 38 insertions(+), 8 deletions(-)


applied to #upstream-fixes


-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 1/3] myri10ge: limit the number of recoveries

2007-06-12 Thread Jeff Garzik

Brice Goglin wrote:

Limit the number of recoveries from a NIC hw watchdog reset to 1 by default.
It enables detection of defective NICs immediately since these memory parity
errors are expected to happen very rarely (less than once per century*NIC).

Signed-off-by: Brice Goglin [EMAIL PROTECTED]
---
 drivers/net/myri10ge/myri10ge.c |   12 ++--
 1 file changed, 10 insertions(+), 2 deletions(-)


applied 1-3 to #upstream-fixes


-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[KJ PATCH] Replacing alloc_pages(gfp,0) with alloc_page(gfp) in net/core/skbuff.c

2007-06-12 Thread Shani Moideen

Replacing alloc_pages(gfp,0) with alloc_page(gfp) 
in net/core/skbuff.c

Signed-off-by: Shani Moideen [EMAIL PROTECTED]


diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 1422573..b923181 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1805,7 +1805,7 @@ int skb_append_datato_frags(struct sock *sk, struct 
sk_buff *skb,
return -EFAULT;
 
/* allocate a new page for next frag */
-   page = alloc_pages(sk-sk_allocation, 0);
+   page = alloc_page(sk-sk_allocation);
 
/* If alloc_page fails just return failure and caller will
 * free previous allocated pages by doing kfree_skb()

-- 
Shani
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[KJ PATCH] Replacing alloc_pages(gfp,0) with alloc_page(gfp) in net/core/pktgen.c

2007-06-12 Thread Shani Moideen

Replacing alloc_pages(gfp,0) with alloc_page(gfp) 
in net/core/pktgen.c

Signed-off-by: Shani Moideen [EMAIL PROTECTED]


diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index b92a322..2600c7f 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -2414,7 +2414,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device 
*odev,
 
i = 0;
while (datalen  0) {
-   struct page *page = alloc_pages(GFP_KERNEL, 0);
+   struct page *page = alloc_page(GFP_KERNEL);
skb_shinfo(skb)-frags[i].page = page;
skb_shinfo(skb)-frags[i].page_offset = 0;
skb_shinfo(skb)-frags[i].size =
@@ -2762,7 +2762,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device 
*odev,
 
i = 0;
while (datalen  0) {
-   struct page *page = alloc_pages(GFP_KERNEL, 0);
+   struct page *page = alloc_page(GFP_KERNEL);
skb_shinfo(skb)-frags[i].page = page;
skb_shinfo(skb)-frags[i].page_offset = 0;
skb_shinfo(skb)-frags[i].size =

-- 
Shani 
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[KJ PATCH] Replacing alloc_pages(gfp,0) with alloc_page(gfp) in net/core/sock.c

2007-06-12 Thread Shani Moideen

Replacing alloc_pages(gfp,0) with alloc_page(gfp) 
in net/core/sock.c

Signed-off-by: Shani Moideen [EMAIL PROTECTED]


diff --git a/net/core/sock.c b/net/core/sock.c
index 22183c2..25bb52b 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1193,7 +1193,7 @@ static struct sk_buff *sock_alloc_send_pskb(struct sock 
*sk,
struct page *page;
skb_frag_t *frag;
 
-   page = alloc_pages(sk-sk_allocation, 
0);
+   page = alloc_page(sk-sk_allocation);
if (!page) {
err = -ENOBUFS;
skb_shinfo(skb)-nr_frags = i;

-- 
Shani 
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Patch to drivers/usb/serial/sierra.c to support Sierra Wireless Aircard 595U

2007-06-12 Thread Phil Karn

*** linux-2.6.21.5/drivers/usb/serial/sierra.c  Mon Jun 11 11:37:06 2007
--- linux-2.6.21.5a/drivers/usb/serial/sierra.c Fri Jun  8 23:37:06 2007
***
*** 44,49 
--- 44,50 

{ USB_DEVICE(0x1199, 0x0112) }, /* Sierra Wireless AirCard 580 */
{ USB_DEVICE(0x0F3D, 0x0112) }, /* AirPrime/Sierra PC 5220 */
+   { USB_DEVICE(0x1199, 0x0120) }, /* Sierra Wireless Aircard 595U */
{ }
  };
  MODULE_DEVICE_TABLE(usb, id_table);
***
*** 66,71 
--- 67,73 
{ USB_DEVICE(0x1199, 0x6803) }, /* Sierra Wireless MC8765 */
{ USB_DEVICE(0x1199, 0x6812) }, /* Sierra Wireless MC8775 */
{ USB_DEVICE(0x1199, 0x6820) }, /* Sierra Wireless AirCard 875 */
+   { USB_DEVICE(0x1199, 0x0120) }, /* Sierra Wireless Aircard 595U */
{ }
  };

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[git patches] net driver fixes

2007-06-12 Thread Jeff Garzik

This is a resend of the submission from June 9th, along with added stuff:
* big update to new (in 2.6.22) wireless driver libertas
* revert e100 's-bit' change; see commit message for more info
* more myri, NetXen fixes

Please pull from 'upstream-linus' branch of
master.kernel.org:/pub/scm/linux/kernel/git/jgarzik/netdev-2.6.git 
upstream-linus

to receive the following updates:

 drivers/net/e100.c |   72 ++-
 drivers/net/ehea/ehea.h|2 +-
 drivers/net/ehea/ehea_main.c   |   12 +-
 drivers/net/ibmveth.c  |   80 +-
 drivers/net/myri10ge/myri10ge.c|   29 +-
 drivers/net/netxen/netxen_nic.h|   48 +-
 drivers/net/netxen/netxen_nic_ethtool.c|8 +-
 drivers/net/netxen/netxen_nic_hw.c |   12 +-
 drivers/net/netxen/netxen_nic_init.c   |   44 +-
 drivers/net/netxen/netxen_nic_isr.c|   24 +
 drivers/net/netxen/netxen_nic_main.c   |7 +
 drivers/net/netxen/netxen_nic_niu.c|8 +-
 drivers/net/phy/marvell.c  |   62 +-
 drivers/net/usb/Kconfig|4 +-
 drivers/net/via-velocity.c |2 +-
 drivers/net/wireless/Kconfig   |   19 +-
 drivers/net/wireless/libertas/11d.c|  152 ++--
 drivers/net/wireless/libertas/11d.h|6 +-
 drivers/net/wireless/libertas/Makefile |4 +-
 drivers/net/wireless/libertas/README   |   52 +-
 drivers/net/wireless/libertas/assoc.c  |  358 +---
 drivers/net/wireless/libertas/assoc.h  |   10 +-
 drivers/net/wireless/libertas/cmd.c|  559 +--
 drivers/net/wireless/libertas/cmdresp.c|  376 
 drivers/net/wireless/libertas/debugfs.c|  432 
 drivers/net/wireless/libertas/decl.h   |   20 +-
 drivers/net/wireless/libertas/defs.h   |  101 ++-
 drivers/net/wireless/libertas/dev.h|   99 +-
 drivers/net/wireless/libertas/ethtool.c|   55 +-
 drivers/net/wireless/libertas/fw.c |  111 +--
 drivers/net/wireless/libertas/fw.h |   13 -
 drivers/net/wireless/libertas/host.h   |   17 +-
 drivers/net/wireless/libertas/hostcmd.h|  392 
 drivers/net/wireless/libertas/if_bootcmd.c |6 +-
 drivers/net/wireless/libertas/if_usb.c |  448 +
 drivers/net/wireless/libertas/if_usb.h |   32 +-
 drivers/net/wireless/libertas/ioctl.c  |  286 --
 drivers/net/wireless/libertas/join.c   |  464 -
 drivers/net/wireless/libertas/join.h   |   13 +-
 drivers/net/wireless/libertas/main.c   |  690 ++---
 drivers/net/wireless/libertas/rx.c |   64 +-
 drivers/net/wireless/libertas/sbi.h|   40 -
 drivers/net/wireless/libertas/scan.c   | 1529 +---
 drivers/net/wireless/libertas/scan.h   |   81 +-
 drivers/net/wireless/libertas/thread.h |8 +-
 drivers/net/wireless/libertas/tx.c |   74 +-
 drivers/net/wireless/libertas/types.h  |   63 +-
 drivers/net/wireless/libertas/wext.c   |  778 ---
 drivers/net/wireless/libertas/wext.h   |   13 +-
 49 files changed, 4001 insertions(+), 3778 deletions(-)
 delete mode 100644 drivers/net/wireless/libertas/fw.h
 delete mode 100644 drivers/net/wireless/libertas/sbi.h

Brian King (2):
  ibmveth: Fix h_free_logical_lan error on pool resize
  ibmveth: Automatically enable larger rx buffer pools for larger mtu

Brice Goglin (3):
  myri10ge: limit the number of recoveries
  myri10ge: report when the link partner is running in Myrinet mode
  myri10ge: update driver version

Chris Ball (1):
  libertas: wakeup both mesh and normal wakeup when getting out of scan

Dan Williams (26):
  libertas: call SET_NETDEV_DEV from common code
  libertas: replace 'macaddress' with 'bssid'
  libertas: correctly unregister mesh netdev on error
  libertas: don't tear down netdev in libertas_activate_card
  libertas: make scan result handling more flexible
  libertas: fix 'keep previous scan' behavior
  libertas: move channel changing into association framework
  libertas: make association paths consistent
  libertas: use MAC_FMT and MAC_ARG where appropriate
  libertas: use compare_ether_addr() rather than memcmp() where appropriate
  libertas: fix debug enter/leave prints for libertas_execute_next_command
  libertas: correctly balance locking in libertas_process_rx_command
  libertas: correct error report paths for wlan_fwt_list_ioctl
  libertas: fix deadlock SIOCGIWSCAN handler
  libertas: fix default adhoc channel
  libertas: honor specific channel requests during association
  libertas: send SIOCGIWSCAN event after partial scans too
  libertas: debug print spacing fixes in assoc.c
  libertas: add more verbose debugging to libertas_cmd_80211_authenticate
  libertas: Make WPA work through supplicant handshake
  libertas: sparse fixes
  

Re: 2.6.20.7 TCP cubic (and bic) initial slow start way too slow?

2007-06-12 Thread Bill Fink
On Tue, 12 Jun 2007, Stephen Hemminger wrote:

 On Tue, 12 Jun 2007 15:12:58 -0700 (PDT)
 David Miller [EMAIL PROTECTED] wrote:
 
  From: Bill Fink [EMAIL PROTECTED]
  Date: Wed, 16 May 2007 02:44:09 -0400
  
   [EMAIL PROTECTED] ~]# netstat -s | grep -i retrans
   25446 segments retransmited
   20936 fast retransmits
   4503 retransmits in slow start
   4 sack retransmits failed
   
   It then only took 2.14 seconds to transfer 1 GB of data.
   
   That's all for now.
  
  Thanks for all of your testing and numbers Bill.
  
  Inhong et al., we have to do something about this, the issue
  has been known and sitting around for weeks if not months.
  
  How safely can we set the default initial_ssthresh to zero in
  Cubic and BIC?
 
 Yes. set it to zero. The module parameter could even go, and just
 leave the route metric as a way to set/remember it.

Actually, after thinking about this some more I had some second
thoughts about the matter.  For my scenario of an uncongested 10-GigE
path an initial_ssthresh=0 is definitely what is desired.

But perhaps on a congested link with lots of connections, the
initial_ssthresh=100 setting might have some benefit.  I don't
have an easy way of testing that so I was hoping Injong or someone
else might do that and report back.  If there was a benefit, perhaps
it would be useful to have a per-route option for setting the
initial_ssthresh.  That would leave the question of what to make
the default.  There was also the mystery of why cubic's slow start
performance was so much worse than bic's.  If a real benefit could
be demonstrated for the congested case, and if bic's slow start
behavior could be grafted onto cubic, then bic's current slow start
performance (with initial_ssthresh=100) might serve as an adequate
compromise between performance and not being overly aggressive for
the default behavior.

OTOH just setting it to zero as a default should also be fine as
that's the standard Reno behavior.  I'm leaning in that direction
personally, but I'm possibly biased because of my environment,
where I'm trying to get maximum performance out of 10-GigE WAN
networks that aren't particularly congested normally.

-Bill
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [PATCH] NET: Multiqueue network device support.

2007-06-12 Thread Leonid Grossman


 -Original Message-
 From: [EMAIL PROTECTED] [mailto:netdev-
 [EMAIL PROTECTED] On Behalf Of Jason Lunz
 Sent: Tuesday, June 12, 2007 2:48 PM
 To: David Miller
 Cc: [EMAIL PROTECTED]; [EMAIL PROTECTED]; netdev@vger.kernel.org;
 [EMAIL PROTECTED]; [EMAIL PROTECTED]; [EMAIL PROTECTED];
 [EMAIL PROTECTED]
 Subject: Re: [PATCH] NET: Multiqueue network device support.
 
 On Tue, Jun 12, 2007 at 02:26:58PM -0700, David Miller wrote:
  The MAC is still very much centralized in most designs.
 
  So one way they'll do it is to support assigning N MAC addresses,
  and you configure the input filters of the chip to push packets
  for each MAC to the proper receive queue.
 
  So the MAC will accept any of those in the N MAC addresses as
  it's own, then you use the filtering facilities to steer
  frames to the correct RX queue.
 
  The TX and RX queues can be so isolated as to be able to be exported
  to virtualization nodes.  You can give them full access to the DMA
  queues and assosciated mailboxes.  So instead of all of this bogus
  virtualized device overhead, you just give the guest access to the
  real device.
 
  So you can use multiple queues either for better single node SMP
  performance, or better virtualization performance.
 
 Are you aware of any hardware designs that allow other ways to map
 packets onto rx queues?  I can think of several scenarios where it
 could
 be advantageous to map packets by IP 3- or 5-tuple to get cpu locality
 all the way up the stack on a flow-by-flow basis. But doing this would
 require some way to request this mapping from the hardware.

10GbE Xframe NICs do that, as well as rx steering by MAC address, VLAN,
MS RSS, generic hashing and bunch of other criteria (there is actually a
decent chapter on rx steering in the ASIC manual at www.neterion.com
support page).
The caveat is that in the current products the tuple table is limited to
256 entries only. Next ASIC bumps this number to 64k.

 
 In the extreme case it would be cool if it were possible to push a
 bpf-like classifier down into the hardware to allow arbitrary kinds of
 flow distribution.
 
 Jason
 -
 To unsubscribe from this list: send the line unsubscribe netdev in
 the body of a message to [EMAIL PROTECTED]
 More majordomo info at  http://vger.kernel.org/majordomo-info.html
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 3/4] NetXen: Add correct routines to setup multicast address

2007-06-12 Thread Dhananjay Phadke
Mithlesh,

You don't initialize max_mc_count anywhere. The multicast address pool
can hold 16 addresses for ports {0,1} and 4 for ports {2,3}. You should
have following line in the probe routine.

 adapter-max_mc_count = (adapter-portnum  1) ? 4 : 16;

--
Dhananjay Phadke
NetXen Inc.


Mithlesh Thukral wrote:
 NetXen: Add multi cast filter code
 This patch adds multi cast filter code to NetXen NIC driver.
 It also adds capabilities to setup the multicast address in hardware
 from the host side.
 
 Signed-off by: Mithlesh Thukral [EMAIL PROTECTED]
 ---
 
  drivers/net/netxen/netxen_nic.h |   24 +
  drivers/net/netxen/netxen_nic_hdr.h |3 
  drivers/net/netxen/netxen_nic_hw.c  |  119 +-
  3 files changed, 143 insertions(+), 3 deletions(-)
 
 diff --git a/drivers/net/netxen/netxen_nic.h b/drivers/net/netxen/netxen_nic.h
 index a0b39ee..2fddfd1 100644
 --- a/drivers/net/netxen/netxen_nic.h
 +++ b/drivers/net/netxen/netxen_nic.h
 @@ -261,6 +261,27 @@ #define netxen_set_msg_ctxid(config_word
  #define netxen_set_msg_opcode(config_word, val)  \
   ((config_word) = ~(0xf28), (config_word) |= (val  0xf)  28)
  
 +#define netxen_set_addr_ctl_id_pool0(config_word, val)   \
 + ((config_word) = ~3, (config_word) |= val  0x3)
 +#define netxen_set_addr_ctl_enable_xtnd_0(config_word)   \
 + ((config_word) |= 1  2)
 +#define netxen_set_addr_ctl_id_pool1(config_word, val)   \
 + ((config_word) = ~(0x34), (config_word) |= (val  0x3)  4)
 +#define netxen_set_addr_ctl_enable_xtnd_1(config_word)   \
 + ((config_word) |= 1  6)
 +#define netxen_set_addr_ctl_id_pool2(config_word, val)   \
 + ((config_word) = ~(0x38), (config_word) |= (val  0x3)  8)
 +#define netxen_set_addr_ctl_enable_xtnd_2(config_word)   \
 + ((config_word) |= 1  10)
 +#define netxen_set_addr_ctl_id_pool3(config_word, val)   \
 + ((config_word) = ~(0x312), (config_word) |= (val  0x3)  12)
 +#define netxen_set_addr_ctl_enable_xtnd_3(config_word)   \
 + ((config_word) |= 1  14)
 +#define netxen_set_addr_ctl_mode(config_word, val)   \
 + ((config_word) = ~(0x326), (config_word) |= (val  0x3)  26)
 +#define netxen_set_addr_ctl_enable_poll(config_word, val)\
 + ((config_word) = ~(0xf30), (config_word) |= (val  0xf)  30)
 +
  struct netxen_rcv_context {
   __le64 rcv_ring_addr;
   __le32 rcv_ring_size;
 @@ -883,6 +904,9 @@ struct netxen_adapter {
   unsigned char mac_addr[ETH_ALEN];
   int mtu;
   int portnum;
 + u8 promisc;
 + u8 mc_enabled;
 + u8 max_mc_count;
  
   spinlock_t tx_lock;
   spinlock_t lock;
 diff --git a/drivers/net/netxen/netxen_nic_hdr.h 
 b/drivers/net/netxen/netxen_nic_hdr.h
 index 608e37b..2bfecbc 100644
 --- a/drivers/net/netxen/netxen_nic_hdr.h
 +++ b/drivers/net/netxen/netxen_nic_hdr.h
 @@ -545,6 +545,9 @@ #define NETXEN_MULTICAST_ADDR_HI_1(NETX
  #define NETXEN_MULTICAST_ADDR_HI_2   (NETXEN_CRB_NIU + 0x1018)
  #define NETXEN_MULTICAST_ADDR_HI_3   (NETXEN_CRB_NIU + 0x101c)
  
 +#define NETXEN_UNICAST_ADDR_BASE (NETXEN_CRB_NIU + 0x1080)
 +#define NETXEN_MULTICAST_ADDR_BASE   (NETXEN_CRB_NIU + 0x1100)
 +
  #define  NETXEN_NIU_GB_MAC_CONFIG_0(I)   \
   (NETXEN_CRB_NIU + 0x3 + (I)*0x1)
  #define  NETXEN_NIU_GB_MAC_CONFIG_1(I)   \
 diff --git a/drivers/net/netxen/netxen_nic_hw.c 
 b/drivers/net/netxen/netxen_nic_hw.c
 index baff17a..c5d4ff9 100644
 --- a/drivers/net/netxen/netxen_nic_hw.c
 +++ b/drivers/net/netxen/netxen_nic_hw.c
 @@ -303,6 +303,97 @@ int netxen_nic_set_mac(struct net_device
   return 0;
  }
  
 +#define NETXEN_UNICAST_ADDR(port, index) \
 + (NETXEN_UNICAST_ADDR_BASE+(port*32)+(index*8))
 +
 +int netxen_nic_enable_mcast_filter(struct netxen_adapter *adapter)
 +{
 + u32 val = 0;
 + u16 port = physical_port[adapter-portnum];
 +
 + if (adapter-mc_enabled)
 + return 0;
 + 
 + netxen_set_addr_ctl_enable_poll(val, 0xf);
 +
 + if (adapter-ahw.board_type == NETXEN_NIC_XGBE)
 + netxen_set_addr_ctl_mode(val, 0x3);
 + else
 + netxen_set_addr_ctl_mode(val, 0x0);
 +
 + netxen_set_addr_ctl_id_pool0(val, 0x0);
 + netxen_set_addr_ctl_id_pool1(val, 0x1);
 + netxen_set_addr_ctl_id_pool2(val, 0x2);
 + netxen_set_addr_ctl_id_pool3(val, 0x3);
 +
 + netxen_set_addr_ctl_enable_xtnd_0(val);
 + netxen_set_addr_ctl_enable_xtnd_1(val);
 + netxen_set_addr_ctl_enable_xtnd_2(val);
 + netxen_set_addr_ctl_enable_xtnd_3(val);
 + 
 + netxen_crb_writelit_adapter(adapter, NETXEN_MAC_ADDR_CNTL_REG, val);
 + 
 + val = 0xff;
 +
 + netxen_crb_writelit_adapter(adapter, NETXEN_UNICAST_ADDR(port,0), val);
 + netxen_crb_writelit_adapter(adapter, NETXEN_UNICAST_ADDR(port,0)+4, 
 + val);
 + 
 + memcpy(val, adapter-mac_addr, 3);
 + netxen_crb_writelit_adapter(adapter, 

Re: [PATCH] NET: Multiqueue network device support.

2007-06-12 Thread Zhu Yi
On Tue, 2007-06-12 at 23:17 +0200, Patrick McHardy wrote:
 I've hacked up a
 small multiqueue simulator device and to my big surprise my testing
 showed that Jamal's suggestion of using a single queue state seems to
 work better than I expected. But I've been doing mostly testing of
 the device itself up to now with very simple traffic patterns (mostly
 just flood all queues), so I'll try to get some real results
 tomorrow. 

The key argument for Jamal's solution is the NIC will send out 32
packets in the full PHL in a reasonably short time (a few microsecs per
Jamal's calculation). But for wireless, the PHL hardware has low
probability to seize the wireless medium when there are full of high
priority frames in the air. That is, the chance for transmission in PHL
and PHH is not equal. Queuing packets in software will starve high
priority packets than putting them to PHH as early as possible.

Patrick, I don't think your testing considered about above scenario,
right?

Thanks,
-yi
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html