[RFC] avoid unnecessary alignement overhead in skb-data allocation.

2006-08-07 Thread Evgeniy Polyakov
Hello.

Attached patch allows to avoid unnecessary alignment overhead
in skb-data allocation.
Main idea is to allocate struct skb_shared_info from cache when
addition of sizeof(struct skb_shared_info) ens up in different order
allocation than initial size order.
This allows to solve problem with 4k allocations for 1500 MTU and 32k
allocations for 9k jumbo frames for some chips.
Patch was not tested, so if idea worth it I will complete it.

Signed-off-by: Evgeniy Polyakov [EMAIL PROTECTED]

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 19c96d4..7474682 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -282,7 +282,8 @@ struct sk_buff {
nfctinfo:3;
__u8pkt_type:3,
fclone:2,
-   ipvs_property:1;
+   ipvs_property:1,
+   shinfo_cache:1;
__be16  protocol;
 
void(*destructor)(struct sk_buff *skb);
@@ -403,7 +404,9 @@ extern unsigned int   skb_find_text(stru
struct ts_state *state);
 
 /* Internal */
-#define skb_shinfo(SKB)((struct skb_shared_info *)((SKB)-end))
+#define skb_shinfo(SKB)((SKB)-shinfo_cache?\
+   (struct skb_shared_info *)(*((SKB)-end)):\
+   ((struct skb_shared_info *)((SKB)-end)))
 
 /**
  * skb_queue_empty - check if a queue is empty
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 022d889..7287814 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -69,6 +69,7 @@ #include asm/system.h
 
 static kmem_cache_t *skbuff_head_cache __read_mostly;
 static kmem_cache_t *skbuff_fclone_cache __read_mostly;
+static kmem_cache_t *skbuff_shared_info_cache __read_mostly;
 
 /*
  * Keep out-of-line to prevent kernel bloat.
@@ -146,6 +147,8 @@ struct sk_buff *__alloc_skb(unsigned int
struct skb_shared_info *shinfo;
struct sk_buff *skb;
u8 *data;
+   int order = get_order(size + sizeof(void *));
+   struct skb_shared_info *sh;
 
cache = fclone ? skbuff_fclone_cache : skbuff_head_cache;
 
@@ -156,11 +159,28 @@ struct sk_buff *__alloc_skb(unsigned int
 
/* Get the DATA. Size must match skb_add_mtu(). */
size = SKB_DATA_ALIGN(size);
-   data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
-   if (!data)
-   goto nodata;
+   if ((1UL  order)  size + sizeof(void *) + sizeof(struct 
skb_shared_info)) {
+   data = kmalloc(size + sizeof(struct skb_shared_info), 
gfp_mask);
+   if (!data)
+   goto nodata;
+   memset(skb, 0, offsetof(struct sk_buff, truesize));
+   } else {
+   unsigned long *ptr;
+
+   data = kmalloc(size, gfp_mask);
+   if (!data)
+   goto nodata;
+   sh = kmem_cache_alloc(skbuff_shared_info_cache, gfp_mask);
+   if (!sh) {
+   kfree(data);
+   goto nodata;
+   }
+   memset(skb, 0, offsetof(struct sk_buff, truesize));
+   skb-shinfo_cache = 1;
+   ptr = data;
+   ptr[size] = sh;
+   }
 
-   memset(skb, 0, offsetof(struct sk_buff, truesize));
skb-truesize = size + sizeof(struct sk_buff);
atomic_set(skb-users, 1);
skb-head = data;
@@ -314,6 +334,8 @@ static void skb_release_data(struct sk_b
skb_drop_fraglist(skb);
 
kfree(skb-head);
+   if (skb-shinfo_cache)
+   kmem_cache_free(skbuff_shared_info_cache, *(skb-end));
}
 }
 
@@ -500,6 +522,7 @@ #endif
C(data);
C(tail);
C(end);
+   C(shinfo_cache);
 
atomic_inc((skb_shinfo(skb)-dataref));
skb-cloned = 1;
@@ -2057,6 +2080,14 @@ void __init skb_init(void)
NULL, NULL);
if (!skbuff_fclone_cache)
panic(cannot create skbuff cache);
+   
+   skbuff_shared_info_cache = kmem_cache_create(skbuff_shared_info_cache,
+   sizeof(struct sbk_shared_info),
+   0,
+   SLAB_HWCACHE_ALIGN,
+   NULL, NULL);
+   if (!skbuff_shared_info_cache)
+   panic(cannot create skbuff shared info cache);
 }
 
 EXPORT_SYMBOL(___pskb_trim);


-- 
Evgeniy Polyakov
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC] avoid unnecessary alignement overhead in skb-data allocation.

2006-08-07 Thread David Miller
From: Evgeniy Polyakov [EMAIL PROTECTED]
Date: Mon, 7 Aug 2006 10:01:56 +0400

 + int order = get_order(size + sizeof(void *));
 + struct skb_shared_info *sh;
  
   cache = fclone ? skbuff_fclone_cache : skbuff_head_cache;
  
 @@ -156,11 +159,28 @@ struct sk_buff *__alloc_skb(unsigned int
  
   /* Get the DATA. Size must match skb_add_mtu(). */
   size = SKB_DATA_ALIGN(size);
 - data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
 - if (!data)
 - goto nodata;
 + if ((1UL  order)  size + sizeof(void *) + sizeof(struct 
 skb_shared_info)) {

get_order() returns a PAGE_SIZE order not a byte one.  So this test
here at the end is incorrect.  It should probably be something
like if ((PAGE_SIZE  order)  ...

I don't know if I want to eat an entire extra allocation for every SKB
just to handle broken e1000 cards that can't be bothered to support
non-power-of-2 receive buffer sizes and a proper MTU setting.

I guess we might have to, but this is extremely unfortunate. :-/

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH -rt DO NOT APPLY] Fix for tg3 networking lockup

2006-08-07 Thread David Miller
From: Steven Rostedt [EMAIL PROTECTED]
Date: Mon, 7 Aug 2006 01:34:56 -0400 (EDT)

 My suggestion would be to separate that tg3_timer into 4 different
 timers, which is what it actually looks like.

Timers have non-trivial cost.  It's cheaper to have one and
vector off to the necessary operations each tick internalls.

That's why it's implemented as one timer.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC] avoid unnecessary alignement overhead in skb-data allocation.

2006-08-07 Thread Herbert Xu
Evgeniy Polyakov [EMAIL PROTECTED] wrote:
 
 Attached patch allows to avoid unnecessary alignment overhead
 in skb-data allocation.
 Main idea is to allocate struct skb_shared_info from cache when
 addition of sizeof(struct skb_shared_info) ens up in different order
 allocation than initial size order.
 This allows to solve problem with 4k allocations for 1500 MTU and 32k
 allocations for 9k jumbo frames for some chips.
 Patch was not tested, so if idea worth it I will complete it.

I thought the Intel guys were saying that their NIC could write the
full 16KB which means it it's unsafe to use the last four bytes, no?

Cheers,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmVHI~} [EMAIL PROTECTED]
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC] avoid unnecessary alignement overhead in skb-data allocation.

2006-08-07 Thread Evgeniy Polyakov
On Sun, Aug 06, 2006 at 11:23:39PM -0700, David Miller ([EMAIL PROTECTED]) 
wrote:
  +   if ((1UL  order)  size + sizeof(void *) + sizeof(struct 
  skb_shared_info)) {
 
 get_order() returns a PAGE_SIZE order not a byte one.  So this test
 here at the end is incorrect.  It should probably be something
 like if ((PAGE_SIZE  order)  ...
 
 I don't know if I want to eat an entire extra allocation for every SKB
 just to handle broken e1000 cards that can't be bothered to support
 non-power-of-2 receive buffer sizes and a proper MTU setting.
 
 I guess we might have to, but this is extremely unfortunate. :-/

I have even better idea - create alloc_skb_aligned() for those who
knows in advance, that it's size is always aligned to power of 2, so
additional skb_shared_info will 100% require higher order allocation.
Then e1000 can use that instead of usual alloc_skb().

-- 
Evgeniy Polyakov
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC] avoid unnecessary alignement overhead in skb-data allocation.

2006-08-07 Thread Evgeniy Polyakov
On Mon, Aug 07, 2006 at 04:29:09PM +1000, Herbert Xu ([EMAIL PROTECTED]) wrote:
 Evgeniy Polyakov [EMAIL PROTECTED] wrote:
  
  Attached patch allows to avoid unnecessary alignment overhead
  in skb-data allocation.
  Main idea is to allocate struct skb_shared_info from cache when
  addition of sizeof(struct skb_shared_info) ens up in different order
  allocation than initial size order.
  This allows to solve problem with 4k allocations for 1500 MTU and 32k
  allocations for 9k jumbo frames for some chips.
  Patch was not tested, so if idea worth it I will complete it.
 
 I thought the Intel guys were saying that their NIC could write the
 full 16KB which means it it's unsafe to use the last four bytes, no?

Well, theirs comments in code say, that maximum allowed frame size is
0x3f00, so there is a little place at the end to put there a pointer,
so I allocate size + sizeof(void *).
If they actually can eat all 16k, then we need a pointer somewhere in
the skb for shared_info, since 16k + sizeof(void *) will be aligned to
32k.

 Cheers,
 -- 
 Visit Openswan at http://www.openswan.org/
 Email: Herbert Xu ~{PmVHI~} [EMAIL PROTECTED]
 Home Page: http://gondor.apana.org.au/~herbert/
 PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

-- 
Evgeniy Polyakov
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC] avoid unnecessary alignement overhead in skb-data allocation.

2006-08-07 Thread Herbert Xu
On Mon, Aug 07, 2006 at 10:36:36AM +0400, Evgeniy Polyakov wrote:
 
 Well, theirs comments in code say, that maximum allowed frame size is
 0x3f00, so there is a little place at the end to put there a pointer,
 so I allocate size + sizeof(void *).
 If they actually can eat all 16k, then we need a pointer somewhere in
 the skb for shared_info, since 16k + sizeof(void *) will be aligned to
 32k.

It would be good to get a definitive statement from them before we go
down this track.

Cheers,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmVHI~} [EMAIL PROTECTED]
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC] avoid unnecessary alignement overhead in skb-data allocation.

2006-08-07 Thread Herbert Xu
David Miller [EMAIL PROTECTED] wrote:
 
 I don't know if I want to eat an entire extra allocation for every SKB
 just to handle broken e1000 cards that can't be bothered to support
 non-power-of-2 receive buffer sizes and a proper MTU setting.
 
 I guess we might have to, but this is extremely unfortunate. :-/

I'd hope not.  Apparently they are capable of putting data into
individual pages and chaining them together.  The only problem
is that half a page is wasted for 1500-byte packets.

However, allocating 16KB packets would waste even more memory
if only 1500 bytes end up getting used.

Cheers,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmVHI~} [EMAIL PROTECTED]
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC] avoid unnecessary alignement overhead in skb-data allocation.

2006-08-07 Thread Evgeniy Polyakov
On Mon, Aug 07, 2006 at 05:17:13PM +1000, Herbert Xu ([EMAIL PROTECTED]) wrote:
 David Miller [EMAIL PROTECTED] wrote:
  
  I don't know if I want to eat an entire extra allocation for every SKB
  just to handle broken e1000 cards that can't be bothered to support
  non-power-of-2 receive buffer sizes and a proper MTU setting.
  
  I guess we might have to, but this is extremely unfortunate. :-/
 
 I'd hope not.  Apparently they are capable of putting data into
 individual pages and chaining them together.  The only problem

Unfortunately not all chips are capable to do this.

-- 
Evgeniy Polyakov
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC] avoid unnecessary alignement overhead in skb-data allocation.

2006-08-07 Thread Evgeniy Polyakov
On Mon, Aug 07, 2006 at 05:28:16PM +1000, Herbert Xu ([EMAIL PROTECTED]) wrote:
 On Mon, Aug 07, 2006 at 11:24:23AM +0400, Evgeniy Polyakov wrote:
 
   I'd hope not.  Apparently they are capable of putting data into
   individual pages and chaining them together.  The only problem
  
  Unfortunately not all chips are capable to do this.
 
 No not all chips are capable of header-splitting.  However, from what
 Jesse was saying it sounded as if all (or most?) chips are capable of
 storing data cross pages.

Only if they form contiguous region?
Jesse, is it possible for every e1000 chip to split frame into several
page-sized chunks i.e. create some kind of receiving scatter-gather?

 Cheers,
 -- 
 Visit Openswan at http://www.openswan.org/
 Email: Herbert Xu ~{PmVHI~} [EMAIL PROTECTED]
 Home Page: http://gondor.apana.org.au/~herbert/
 PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

-- 
Evgeniy Polyakov
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC] avoid unnecessary alignement overhead in skb-data allocation.

2006-08-07 Thread Herbert Xu
On Mon, Aug 07, 2006 at 11:24:23AM +0400, Evgeniy Polyakov wrote:

  I'd hope not.  Apparently they are capable of putting data into
  individual pages and chaining them together.  The only problem
 
 Unfortunately not all chips are capable to do this.

No not all chips are capable of header-splitting.  However, from what
Jesse was saying it sounded as if all (or most?) chips are capable of
storing data cross pages.

Cheers,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmVHI~} [EMAIL PROTECTED]
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] SNMPv2 tcpOutSegs counter error

2006-08-07 Thread Wei Yongjun
I used tcb-end_seq instead of tcb-seq. And add a new condition 'tcb-
seq == tcb-end_seq' to make ACK segment to be counted.

On Sunday 06 August 2006 22:48, Herbert Xu wrote:
 On Sun, Aug 06, 2006 at 07:44:47PM -0700, David Miller wrote:
  From: Herbert Xu [EMAIL PROTECTED]
  Date: Mon, 07 Aug 2006 12:40:34 +1000
 
   The general approach looks sound.  I have one esoteric question
   though.  If a retransmitted packet is coalesced with one that is
   yet to be transmitted (a fairly unlikely scenario, but possible I
   think), should it count towards OUTSEGS?
 
  Probably the packet should be counted to OUTSEGS if any of it
contains
  new data.

 OK, in that case Yongjun please update your patch to test against
 tcb-end_seq instead of tcb-seq.

 Cheers,

Signed-off-by: Wei Yongjun [EMAIL PROTECTED]

--- a/net/ipv4/tcp_output.c 2006-08-03 18:05:22.425081936 -0400
+++ b/net/ipv4/tcp_output.c 2006-08-07 09:48:41.186372896 -0400
@@ -462,7 +462,8 @@ static int tcp_transmit_skb(struct sock 
if (skb-len != tcp_header_size)
tcp_event_data_sent(tp, skb, sk);
 
-   TCP_INC_STATS(TCP_MIB_OUTSEGS);
+   if(after(tcb-end_seq, tp-snd_nxt) || tcb-seq == tcb-end_seq)
+   TCP_INC_STATS(TCP_MIB_OUTSEGS);
 
err = icsk-icsk_af_ops-queue_xmit(skb, 0);
if (likely(err = 0))
@@ -2151,10 +2152,9 @@ int tcp_connect(struct sock *sk)
skb_shinfo(buff)-tso_segs = 1;
skb_shinfo(buff)-tso_size = 0;
buff-csum = 0;
+   tp-snd_nxt = tp-write_seq;
TCP_SKB_CB(buff)-seq = tp-write_seq++;
TCP_SKB_CB(buff)-end_seq = tp-write_seq;
-   tp-snd_nxt = tp-write_seq;
-   tp-pushed_seq = tp-write_seq;
 
/* Send it off. */
TCP_SKB_CB(buff)-when = tcp_time_stamp;
@@ -2164,6 +2164,11 @@ int tcp_connect(struct sock *sk)
sk_charge_skb(sk, buff);
tp-packets_out += tcp_skb_pcount(buff);
tcp_transmit_skb(sk, buff, 1, GFP_KERNEL);
+   /* change tp-snd_nxt after tcp_transmit_skb() to make this packet to be
+* counted to tcpOutSegs
+*/
+   tp-snd_nxt = tp-write_seq;
+   tp-pushed_seq = tp-write_seq;
TCP_INC_STATS(TCP_MIB_ACTIVEOPENS);
 
/* Timer for repeating the SYN until an answer. */


-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC] avoid unnecessary alignement overhead in skb-data allocation.

2006-08-07 Thread Herbert Xu
On Mon, Aug 07, 2006 at 11:31:03AM +0400, Evgeniy Polyakov wrote:
 
 Only if they form contiguous region?
 Jesse, is it possible for every e1000 chip to split frame into several
 page-sized chunks i.e. create some kind of receiving scatter-gather?

Actually, it was Chris Leech who raised this possibility:

: Yes, e1000 devices will spill over and use multiple buffers for a
: single frame.  We've been trying to find a good way to use multiple
: buffers to take care of these allocation problems.  The structure of
: the sk_buff does not make it easy.  Or should I say that it's the
: limitation that drivers are not allowed to chain together multiple
: sk_buffs to represent a single frame that does not make it easy.

Perhaps he can enlighten us.

Cheers,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmVHI~} [EMAIL PROTECTED]
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Packet reordering in pcap capture file

2006-08-07 Thread Alan Shieh

Stephen Hemminger wrote:

On Sat, 05 Aug 2006 03:28:38 -0400
Alan Shieh [EMAIL PROTECTED] wrote:



Hi everyone,

I sometimes see packets stored out of order in pcap files that generated 
by tcpdump -i any on kernel 2.4.26 with all packets arriving and 
departing on an e1000 NIC. That is, the ordering by receive timestamp on 
the packets is not the same as the ordering of the packets within the file.


In my precise scenario, packets of RX packets show up in the log 230 ms 
later than they ought to based on the receive timestamp. The kernel 
behavior (e.g., the packets that are sent by this node) seems to reflect 
the arrival of the Rx packet at the position in the logfile, rather than 
the arrival time according to the timestamp.


What are some of the known causes of this behavior? I'd like to know 
what locks, etc. might be causing this processing / capture delay.



SMP or single CPU? What is the clock source being used?
If you had a CPU like dual-core AMD that doesn't sync TSC's and
that was the clock source, the timestamps could be wrong.


Single CPU, using TSC. The behavior of the system is as if the RTT is 
230ms, so I think a queue is building up somewhere within the kernel. I 
am trying to narrow down the possible ways my experimental code could 
have caused such a queue backlog. I've tried setting netdev-quota in 
the e1000 module to a much larger value, thus forcing the backlog to be 
processed faster, but that does not help.


Alan
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC] avoid unnecessary alignement overhead in skb-data allocation.

2006-08-07 Thread Eric Dumazet
On Monday 07 August 2006 08:01, Evgeniy Polyakov wrote:
 Hello.

 Attached patch allows to avoid unnecessary alignment overhead
 in skb-data allocation.
 Main idea is to allocate struct skb_shared_info from cache when
 addition of sizeof(struct skb_shared_info) ens up in different order
 allocation than initial size order.
 This allows to solve problem with 4k allocations for 1500 MTU and 32k
 allocations for 9k jumbo frames for some chips.
 Patch was not tested, so if idea worth it I will complete it.

 Signed-off-by: Evgeniy Polyakov [EMAIL PROTECTED]


 + if ((1UL  order)  size + sizeof(void *) + sizeof(struct
 skb_shared_info)) { + data = kmalloc(size + sizeof(struct
 skb_shared_info), gfp_mask); +if (!data)
 + goto nodata;
 + memset(skb, 0, offsetof(struct sk_buff, truesize));
 + } else {
 + unsigned long *ptr;
 +
 + data = kmalloc(size, gfp_mask);

You certainly want to kmalloc(size + sizeof(void *)) here, dont you ?

 + if (!data)
 + goto nodata;
 + sh = kmem_cache_alloc(skbuff_shared_info_cache, gfp_mask);
 + if (!sh) {
 + kfree(data);
 + goto nodata;
 + }
 + memset(skb, 0, offsetof(struct sk_buff, truesize));
 + skb-shinfo_cache = 1;
 + ptr = data;
 + ptr[size] = sh;

Eric
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC] avoid unnecessary alignement overhead in skb-data allocation.

2006-08-07 Thread Evgeniy Polyakov
On Mon, Aug 07, 2006 at 10:05:57AM +0200, Eric Dumazet ([EMAIL PROTECTED]) 
wrote:
  +   if ((1UL  order)  size + sizeof(void *) + sizeof(struct
  skb_shared_info)) { +   data = kmalloc(size + sizeof(struct
  skb_shared_info), gfp_mask); +  if (!data)
  +   goto nodata;
  +   memset(skb, 0, offsetof(struct sk_buff, truesize));
  +   } else {
  +   unsigned long *ptr;
  +
  +   data = kmalloc(size, gfp_mask);
 
 You certainly want to kmalloc(size + sizeof(void *)) here, dont you ?

Yep.
I think in next iteration of this patch I will add additional argument
which will present order of aligned size (to eliminate get_order() loop
for those who know it in advance like e1000). In case there are no
place even for sizeof(void *) (what happens with e1000) and allocation 
order is quite high (more than half of the page), then additional field 
in skb can be used (or we can reuse it unconditionally to store pointer
to shared info if skb is being allocated through alloc_skb_aligned()
function).

 Eric

-- 
Evgeniy Polyakov
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [patch] RFC: matching interface groups

2006-08-07 Thread Balazs Scheidler
On Fri, 2006-08-04 at 12:06 +0200, Patrick McHardy wrote:
 Balazs Scheidler wrote:
  The use-case is as follows:
  
  * I have two different subsystems creating interfaces dynamically (for
  example pptpd and serial pppd lines, each creating dynamic pppX
  interfaces),
  * I would like to assign a different set of iptables rules for these
  clients,
  * I would like to react to a new interface being added to a specific set
  in a userspace application,
  
  The reasons I see this needs new kernel functionality:
  
  * iptables supports wildcard interface matching (for example iptables
  -i ppp+), but as the names of the interfaces used by PPTPD and PPPD
  cannot be distinguished this way, this is not enough,
  * Reloading the iptables ruleset everytime a new interface comes up is
  not really feasible, as it abrupts packet processing, and validating the
  ruleset in the kernel can take significant amount of time,
  * the kernel change is very simple, adapting userspace to this change is
  also very simple, and in userspace various software packages can easily
  interoperate with each-other once this is merged.
  
  The implementation:
  
  Each interface can belong to a single group at a time, an interface
  comes up without being a member in any of the groups.
  
  Userspace can assign interfaces to groups after being created, this
  would typically be performed in /etc/ppp/ip-up.d (and similar) scripts.
  
  In spirit interface group is somewhat similar to the routing
  protocol field for routing entries, which contains information on which
  routing daemon was responsible for adding the given route entry.
  
  Things to be done if you like this approach:
  
  * interface group match in iptables,
  * support for naming interface groups in userspace, a'la routing
  protocols,
  * emitting a netlink notification when the group of an interface
  changes,
  * possibly converting the ip link command to use NETLINK messages,
  instead of using ioctl()
  
  What do you think?
 
 
 I like it .. kind of like routing realms. For your specific case there
 is a possible solution already supported by the kernel, you can
 pre-allocate ppp devices using PPPIOCNEWUNIT, rename them and later
 attach to individual units in the ppp daemon using PPPIOCATTACH
 (I have a patch for this somewhere if you're interested). But that
 only works for PPP devices and the group idea looks more flexible.

Thanks for liking it :) I'm going to implement a complete patch with
iptables match and support for naming interface groups like routing
realms and post it when I'm ready.

I'd go for the more general solution as I have other interfaces not just
ppp, it was just a trivial example.

-- 
Bazsi

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] [e1000]: Remove unnecessary tx_lock

2006-08-07 Thread jamal
On Sun, 2006-06-08 at 16:16 -0700, Jesse Brandeburg wrote:
[..]
 
 As for specifics, for TX_WAKE_THRESHOLD, i noticed that we were
 starting the queue after every packet was cleaned, so when the ring
 went full there was a lot of queue thrash.

indeed this is what used to happen and was bad So this is a huge
improvement.
What happens now under steady state at high traffic transmits is,
instead of 1, you see E1000_TX_WEIGHT in between queue sleep/wakes. I
assume this is a given since E1000_TX_WEIGHT is higher than
TX_WAKE_THRESHOLD.  I am not sure if i can vouch for even more
improvement by mucking around with values of E1000_TX_WEIGHT.

Can you please take a look at the patch i posted? I would like to submit
that for inclusion. It does two things
a) make pruning available to be invoked from elsewhere (I tried to do it
from the tx path but it gave me non-good results)
b) makes E1000_TX_WEIGHT and TX_WAKE_THRESHOLD relative to the size
of the transmit ring. I think this is a sane thing to do.

You could either extract the bits or i could resend to you as two
different patches. I have tested it and it works.

   tg3 seemed to fix it in a
 smart way and so I did a similar fix.  Note we should have at least
 MAX_SKB_FRAGS (usually 32) + a few descriptors free before we should
 start the tx again, otherwise we run the risk of a maximum fragmented
 packet being unable to fit in the tx ring.

I noticed you check for that in the tx path.

 now, for E1000_TX_WEIGHT, that was more of an experiment as i noticed
 we could stay in transmit clean up forEVER (okay not literally) which
 would really violate our NAPI timeslice.  

Interesting. The only time i have seen the NAPI time slice kick in is in
slow hardware or emulators (like UML). 
I wonder if the pruning path could be made faster? What is the most
consuming item? I realize there will be a substantial amount of cache
misses. Maybe in addition to prunning E1000_TX_WEIGHT descriptors also
fire a timer to clean up the rest (to avoid it being accounted for in
the napi timeslice;-). Essentially i think you have some thing in the
pruning path that needs to be optimized. Profiling and improving that
would help.

 I messed with some values
 and 64 didn't really seem like too bad a compromise (it does slow
 things down just a tad in the general case) while really helping a
 couple of tests where there were lots of outstanding transmits
 happening at the same time as lots of receives.
 

The later are the kind of tests i am running. If you are a router or a
busy server they apply. In slow machines a ping flood also applies etc. 

 This need for a tx weight is yet another global (design) problem with
 NAPI enabled drivers, 

oh yes, the Intel cabal - blame NAPI first;- 
IMO, the problem is you are consuming too many cycles in the receive
path. NAPI has to be fair to all netdevices and cant hog all the CPU
because a certain netdevice uses too many cycles to process a packet. 

 but someday I'll try to document some of the issues I've seen.

I think it would be invaluable. Just dont jump to blame Canada^WNAPI
conclusion because it distracts; 

cheers,
jamal

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] [e1000]: Remove unnecessary tx_lock

2006-08-07 Thread Edgar E. Iglesias
On Mon, Aug 07, 2006 at 08:50:36AM -0400, jamal wrote:
 On Sun, 2006-06-08 at 16:16 -0700, Jesse Brandeburg wrote:
 [..]
  
  As for specifics, for TX_WAKE_THRESHOLD, i noticed that we were
  starting the queue after every packet was cleaned, so when the ring
  went full there was a lot of queue thrash.
 
 indeed this is what used to happen and was bad So this is a huge
 improvement.
 What happens now under steady state at high traffic transmits is,
 instead of 1, you see E1000_TX_WEIGHT in between queue sleep/wakes. I
 assume this is a given since E1000_TX_WEIGHT is higher than
 TX_WAKE_THRESHOLD.  I am not sure if i can vouch for even more
 improvement by mucking around with values of E1000_TX_WEIGHT.
 
 Can you please take a look at the patch i posted? I would like to submit
 that for inclusion. It does two things
 a) make pruning available to be invoked from elsewhere (I tried to do it
 from the tx path but it gave me non-good results)
 b) makes E1000_TX_WEIGHT and TX_WAKE_THRESHOLD relative to the size
 of the transmit ring. I think this is a sane thing to do.
 

Hi Jamal,

I have a question regarding your patch. In clean_tx_irq, it seems you dont
clean the ring unless fdesc  tx_ring-prunet. Won't this cause deadlocks for
local TCP connections if transmit goes quiet?

It seems to me as if this patch depends on the skb orphaning previously 
suggested on this thread. Please correct me if I'm wrong.

Best regards
-- 
Programmer
Edgar E. Iglesias [EMAIL PROTECTED] 46.46.272.1946
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] [e1000]: Remove unnecessary tx_lock

2006-08-07 Thread jamal

On Mon, 2006-07-08 at 17:21 +0200, Edgar E. Iglesias wrote:
[..]
 I have a question regarding your patch. In clean_tx_irq, it seems you dont
 clean the ring unless fdesc  tx_ring-prunet. Won't this cause deadlocks for
 local TCP connections if transmit goes quiet?
 

I have not tested the TCP case; however, note that the specific part you
reference is commented out. There are no deadlock issues in the case of
forwarding (as in my testcases). 

I did not quiet follow the ensuing discussion after your post:
These descriptors being pruned in the tx path happen only after the
packets have been sent out on the wire. Why would this contribute to a
deadlock but not when it happens on the receive path? It is true that
tcp retransmit queue will still be referencing the skbs, but why is it
any different because in one case it happens in the tx and in the other
on the receive? Is there dependency on waking up the queue?

 It seems to me as if this patch depends on the skb orphaning previously 
 suggested on this thread. Please correct me if I'm wrong.
 

I didnt quiet follow that discussion I will go back and read it; you
could also answer my questions above to make me understand better.

cheers,
jamal

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


forcedeth gigabit detection

2006-08-07 Thread Frank v Waveren
(sorry about sending this to you twice Carl-Daniel, I sent this to
@oss.sgi.com by accident)

The nforce2 builtin network on my A7N8X-delux motherboard won't detect
as gigabit-capable using the forcedeth driver. 

I'm using the forcedeth driver that comes with linux 2.6.17 (which is
0.54).

Ethtool gives:
Settings for eth1:
Supported ports: [ MII ]
Supported link modes:   10baseT/Half 10baseT/Full 
100baseT/Half 100baseT/Full 
Supports auto-negotiation: Yes
Advertised link modes:  10baseT/Half 10baseT/Full 
100baseT/Half 100baseT/Full 
Advertised auto-negotiation: Yes
Speed: 100Mb/s
Duplex: Full
Port: MII
PHYAD: 1
Transceiver: external
Auto-negotiation: on
Supports Wake-on: g
Wake-on: d
Link detected: yes

I assume the supported link modes are those supported by the NIC, not
the link (It's a short piece of cat5 between the two cards (no
switch/hub), it should support gigabit but you never know)?

The odd thing is the NIC on the other side of the cable (which is also
a forcedeth from the nforce3 chipset) detects as:
Settings for eth0:
Supported ports: [ MII ]
Supported link modes:   10baseT/Half 10baseT/Full 
100baseT/Half 100baseT/Full 
1000baseT/Full 
Supports auto-negotiation: Yes
Advertised link modes:  10baseT/Half 10baseT/Full 
100baseT/Half 100baseT/Full 
1000baseT/Full 
Advertised auto-negotiation: Yes
Speed: 100Mb/s
Duplex: Full
Port: MII
PHYAD: 1
Transceiver: externel
Auto-negotiation: on
Supports Wake-on: g
Wake-on: d
Link detected: yes

Here's the debugging output from the forcedeth that won't do gigabit
on module load and bringing the interface up:
Aug  7 15:52:46 jupiter kernel: PCI: Setting latency timer of device 
:00:04.0 to 64
Aug  7 15:52:46 jupiter kernel: :00:04.0: resource 0 start df083000 len 
4096 flags 0x0200.
Aug  7 15:52:46 jupiter kernel: :00:04.0: MAC Address 00:0e:a6:0b:6f:73
Aug  7 15:52:46 jupiter kernel: eth%%d: mii_rw read from reg 2 at PHY 1: 0x0.
Aug  7 15:52:46 jupiter kernel: eth%%d: mii_rw read from reg 3 at PHY 1: 0x8201.
Aug  7 15:52:46 jupiter kernel: :00:04.0: open: Found PHY :0020 at 
address 1.
Aug  7 15:52:46 jupiter kernel: eth%%d: mii_rw read from reg 4 at PHY 1: 0x1e1.
Aug  7 15:52:46 jupiter kernel: eth%%d: mii_rw wrote 0xde1 to reg 4 at PHY 1
Aug  7 15:52:46 jupiter kernel: eth%%d: mii_rw read from reg 1 at PHY 1: 0x786d.
Aug  7 15:52:46 jupiter kernel: eth%%d: mii_rw read from reg 0 at PHY 1: 0x3100.
Aug  7 15:52:46 jupiter kernel: eth%%d: mii_rw wrote 0xb100 to reg 0 at PHY 1
Aug  7 15:52:46 jupiter kernel: eth%%d: mii_rw read from reg 0 at PHY 1: 0x3000.
Aug  7 15:52:46 jupiter kernel: eth%%d: mii_rw read from reg 0 at PHY 1: 0x3000.
Aug  7 15:52:46 jupiter kernel: eth%%d: mii_rw wrote 0x3200 to reg 0 at PHY 1
Aug  7 15:53:03 jupiter kernel: nv_open: begin
Aug  7 15:53:03 jupiter kernel: eth1: nv_alloc_rx: Packet 0 marked as Available
...
Aug  7 15:53:03 jupiter kernel: eth1: nv_alloc_rx: Packet 127 marked as 
Available
Aug  7 15:53:03 jupiter kernel: eth1: nv_txrx_reset
Aug  7 15:53:03 jupiter kernel: eth1: mii_rw read from reg 1 at PHY 1: 0x786d.
Aug  7 15:53:03 jupiter kernel: eth1: mii_rw read from reg 1 at PHY 1: 0x786d.
Aug  7 15:53:03 jupiter kernel: eth1: mii_rw read from reg 4 at PHY 1: 0x1e1.
Aug  7 15:53:03 jupiter kernel: eth1: mii_rw read from reg 5 at PHY 1: 0xc5e1.
Aug  7 15:53:03 jupiter kernel: eth1: nv_update_linkspeed: PHY advertises 
0x01e1, lpa 0xc5e1.
Aug  7 15:53:03 jupiter kernel: eth1: nv_start_rx
Aug  7 15:53:03 jupiter kernel: eth1: nv_start_rx to duplex 1, speed 0x00010064.
Aug  7 15:53:03 jupiter kernel: eth1: nv_start_tx
Aug  7 15:53:03 jupiter kernel: eth1: nv_stop_rx
Aug  7 15:53:03 jupiter kernel: eth1: nv_start_rx
Aug  7 15:53:03 jupiter kernel: eth1: nv_start_rx to duplex 1, speed 0x00010064.
Aug  7 15:53:03 jupiter kernel: eth1: nv_stop_rx
Aug  7 15:53:03 jupiter kernel: eth1: nv_start_rx
Aug  7 15:53:03 jupiter kernel: eth1: nv_start_rx to duplex 1, speed 0x00010064.
Aug  7 15:53:03 jupiter kernel: eth1: nv_stop_rx
Aug  7 15:53:03 jupiter kernel: eth1: nv_start_rx
Aug  7 15:53:03 jupiter kernel: eth1: nv_start_rx to duplex 1, speed 0x00010064.
Aug  7 15:53:03 jupiter kernel: eth1: nv_stop_rx
Aug  7 15:53:03 jupiter kernel: eth1: nv_start_rx
Aug  7 15:53:03 jupiter kernel: eth1: nv_start_rx to duplex 1, speed 0x00010064.
Aug  7 15:53:03 jupiter kernel: eth1: nv_stop_rx
Aug  7 15:53:03 jupiter kernel: eth1: nv_start_rx
Aug  7 15:53:03 jupiter kernel: eth1: nv_start_rx to duplex 1, speed 0x00010064.
Aug  7 15:53:03 jupiter kernel: eth1: nv_stop_rx
Aug  7 15:53:03 jupiter kernel: eth1: 

Re: [PATCH] [e1000]: Remove unnecessary tx_lock

2006-08-07 Thread Edgar E. Iglesias
On Mon, Aug 07, 2006 at 11:40:49AM -0400, jamal wrote:
 
 On Mon, 2006-07-08 at 17:21 +0200, Edgar E. Iglesias wrote:
 [..]
  I have a question regarding your patch. In clean_tx_irq, it seems you dont
  clean the ring unless fdesc  tx_ring-prunet. Won't this cause deadlocks 
  for
  local TCP connections if transmit goes quiet?
  
 
 I have not tested the TCP case; however, note that the specific part you
 reference is commented out. There are no deadlock issues in the case of
 forwarding (as in my testcases). 

Ok, I thought you wanted the code inside the ifdefs to be considered. If not,
I guess there is no problem. Yes, the forwarding case does not suffer from
any deadlocks issues that I am aware of.

 
 I did not quiet follow the ensuing discussion after your post:
 These descriptors being pruned in the tx path happen only after the
 packets have been sent out on the wire. Why would this contribute to a
 deadlock but not when it happens on the receive path? It is true that
 tcp retransmit queue will still be referencing the skbs, but why is it
 any different because in one case it happens in the tx and in the other
 on the receive? Is there dependency on waking up the queue?

No, the deadlock happens only if you don't prune the descriptors. If the host
sends some data and then goes quite, fdesc  tx_ring-prunet might not be
true for a long time and skbs will end up sitting in the tx ring indefinitely,
charging the socket's sndbuf.

Best regards
-- 
Programmer
Edgar E. Iglesias [EMAIL PROTECTED] 46.46.272.1946
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] [e1000]: Remove unnecessary tx_lock

2006-08-07 Thread Jamal Hadi Salim
On Mon, 2006-07-08 at 17:59 +0200, Edgar E. Iglesias wrote:

 Ok, I thought you wanted the code inside the ifdefs to be considered. If not,
 I guess there is no problem. Yes, the forwarding case does not suffer from
 any deadlocks issues that I am aware of.
 

From my tests:
It does _not_ provide any performance improvements and at some point i decided
i didnt want to add more variables to analyze, so i got rid of it; I would have 
had to hand edit the patch to totally remove it; so that why you still see the 
ifdefed out variant.

 No, the deadlock happens only if you don't prune the descriptors. If the host
 sends some data and then goes quite, fdesc  tx_ring-prunet might not be
 true for a long time and skbs will end up sitting in the tx ring indefinitely,
 charging the socket's sndbuf.
 

Note: I didnt get rid of the rx path pruning. i.e that is still on. It
just prunes lesser descriptors with that change on the tx. So not very
different from before.

I think i may be getting a gist now of the discussion after a re-read; 
while packets are still charged to TCP may have been transmitted they may sit
on the tx ring forever. They will only be pruned if we had netif_stopped
(and even that is not good enough with Jesse's threshold check) or if a
new packet comes in destined for us. 
Did i understand correctly? If yes, i didnt introduce this challenge it
has always been there. I think i understand the suggestion now from
Dave/Herbert to orphan those skbs... 

cheers,
jamal


-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] [e1000]: Remove unnecessary tx_lock

2006-08-07 Thread Edgar E. Iglesias
On Mon, Aug 07, 2006 at 11:40:49AM -0400, jamal wrote:
 
 On Mon, 2006-07-08 at 17:21 +0200, Edgar E. Iglesias wrote:
 [..]
  I have a question regarding your patch. In clean_tx_irq, it seems you dont
  clean the ring unless fdesc  tx_ring-prunet. Won't this cause deadlocks 
  for
  local TCP connections if transmit goes quiet?
  
 
 I have not tested the TCP case; however, note that the specific part you
 reference is commented out. There are no deadlock issues in the case of
 forwarding (as in my testcases). 
 
 I did not quiet follow the ensuing discussion after your post:
 These descriptors being pruned in the tx path happen only after the
 packets have been sent out on the wire. Why would this contribute to a
 deadlock but not when it happens on the receive path? It is true that
 tcp retransmit queue will still be referencing the skbs, but why is it
 any different because in one case it happens in the tx and in the other
 on the receive? Is there dependency on waking up the queue?
 

Hi again Jamal,

Not sure if it is doable, but to I'll post the thoughts anyway.

Assuming you would get the code inside the jamal ifdefs working without
deadlocks, you now have a tx_irq function which if fdesc = tx_ring-prunet
essentially just checks for hw lockups. Let's speculate and further assume you
could do the detect_tx_hung from some other context (timer or whatever) then
you end up having a tx_irq function which most of the time does nothing.

The next step could be to move the fdesc = tx_ring-prunet logic into the
transmit path and completely disable the tx_irq when the condition is not met.

Now you end up not taking the irq at all as long as fdesc = tx_ring-prunet.

This was the logic I tried on the cris driver but ended up with deadlocks :)

Best regards
-- 
Programmer
Edgar E. Iglesias [EMAIL PROTECTED] 46.46.272.1946
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] limit rt cache size

2006-08-07 Thread Alexey Kuznetsov
Hello!

 During OpenVZ stress testing we found that UDP traffic with
 random src can generate too much excessive rt hash growing
 leading finally to OOM and kernel panics.
 
 It was found that for 4GB i686 system (having 1048576 total pages and
  225280 normal zone pages) kernel allocates the following route hash:
 syslog: IP route cache hash table entries: 262144 (order: 8, 1048576 bytes)
 = ip_rt_max_size = 4194304 entries, i.e.
 max rt size is 4194304 * 256b = 1Gb of RAM  normal_zone

Grrr... Indeed.


 Attached the patch which removes HASH_HIGHMEM flag from
 alloc_large_system_hash() call. However, I'm not sure whether
 it should be removed as well for TCP tcp_hashinfo.ehash and
 tcp_hashinfo.bhash (as those are probably limited by number of files?).

The patch looks OK. But I am not sure too.

To be honest, I do not understand the sense of HASH_HIGHMEM flag.
At the first sight, hash table eats low memory, objects hashed in this table
also eat low memory. Why is its size calculated from total memory?
But taking into account that this flag is used only by tcp.c and route.c,
both of which feed on low memory, I miss something important.

Let's ask people on netdev.


What's about routing cache size, it looks like it is another bug.
route.c should not force rt_max_size = 16*rt_hash_size.
I think it should consult available memory and to limit rt_max_size
to some reasonable value, even if hash size is too high.



 --- ./net/ipv4/route.c.xrt2006-07-14 19:08:33.0 +0400
 +++ ./net/ipv4/route.c2006-08-07 18:25:37.0 +0400
 @@ -3149,7 +3149,7 @@ int __init ip_rt_init(void)
   rhash_entries,
   (num_physpages = 128 * 1024) ?
   15 : 17,
 - HASH_HIGHMEM,
 + 0,
   rt_hash_log,
   rt_hash_mask,
   0);

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] [e1000]: Remove unnecessary tx_lock

2006-08-07 Thread Edgar E. Iglesias
On Mon, Aug 07, 2006 at 12:31:59PM -0400, Jamal Hadi Salim wrote:
 On Mon, 2006-07-08 at 17:59 +0200, Edgar E. Iglesias wrote:
 
  Ok, I thought you wanted the code inside the ifdefs to be considered. If 
  not,
  I guess there is no problem. Yes, the forwarding case does not suffer from
  any deadlocks issues that I am aware of.
  
 
 From my tests:
 It does _not_ provide any performance improvements and at some point i decided
 i didnt want to add more variables to analyze, so i got rid of it; I would 
 have 
 had to hand edit the patch to totally remove it; so that why you still see 
 the 
 ifdefed out variant.
 
  No, the deadlock happens only if you don't prune the descriptors. If the 
  host
  sends some data and then goes quite, fdesc  tx_ring-prunet might not be
  true for a long time and skbs will end up sitting in the tx ring 
  indefinitely,
  charging the socket's sndbuf.
  
 
 Note: I didnt get rid of the rx path pruning. i.e that is still on. It
 just prunes lesser descriptors with that change on the tx. So not very
 different from before.
 
 I think i may be getting a gist now of the discussion after a re-read; 
 while packets are still charged to TCP may have been transmitted they may sit
 on the tx ring forever. They will only be pruned if we had netif_stopped
 (and even that is not good enough with Jesse's threshold check) or if a
 new packet comes in destined for us. 
 Did i understand correctly? If yes, i didnt introduce this challenge it
 has always been there. I think i understand the suggestion now from
 Dave/Herbert to orphan those skbs... 

I'll give you an example.

A TCP flow sends X data and later waits for a response, host is now quietly
waiting. Assume fdesc = tx_ring-prunet, so we dont free any skbs, right?

Now assume that some part of X data gets lost, our retransmit timer hits and
we want to retransmit but our socket is charged with too much data sitting on
the nics tx-ring, so we don't send anything. By orphaning, those skbs won't
charge the socket and the flow can retransmit.

Best regards
-- 
Programmer
Edgar E. Iglesias [EMAIL PROTECTED] 46.46.272.1946
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/7] [NEIGH]: Convert neighbour deletion to new netlink api

2006-08-07 Thread Thomas Graf
Fixes:
  Return ENOENT if the neighbour is not found (was EINVAL)
  Return EAFNOSUPPORT if no table matches the specified
  address family.

Signed-off-by: Thomas Graf [EMAIL PROTECTED]

Index: net-2.6.19.git/net/core/neighbour.c
===
--- net-2.6.19.git.orig/net/core/neighbour.c
+++ net-2.6.19.git/net/core/neighbour.c
@@ -30,6 +30,7 @@
 #include net/dst.h
 #include net/sock.h
 #include net/netevent.h
+#include net/netlink.h
 #include linux/rtnetlink.h
 #include linux/random.h
 #include linux/string.h
@@ -1437,48 +1438,62 @@ int neigh_table_clear(struct neigh_table
 
 int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 {
-   struct ndmsg *ndm = NLMSG_DATA(nlh);
-   struct rtattr **nda = arg;
+   struct ndmsg *ndm;
+   struct nlattr *dst_attr;
struct neigh_table *tbl;
struct net_device *dev = NULL;
-   int err = -ENODEV;
+   int err = -EINVAL;
 
-   if (ndm-ndm_ifindex 
-   (dev = dev_get_by_index(ndm-ndm_ifindex)) == NULL)
+   if (nlmsg_len(nlh)  sizeof(*ndm))
+   goto out;
+
+   dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
+   if (dst_attr == NULL)
goto out;
 
+   ndm = nlmsg_data(nlh);
+   if (ndm-ndm_ifindex) {
+   dev = dev_get_by_index(ndm-ndm_ifindex);
+   if (dev == NULL) {
+   err = -ENODEV;
+   goto out;
+   }
+   }
+
read_lock(neigh_tbl_lock);
for (tbl = neigh_tables; tbl; tbl = tbl-next) {
-   struct rtattr *dst_attr = nda[NDA_DST - 1];
-   struct neighbour *n;
+   struct neighbour *neigh;
 
if (tbl-family != ndm-ndm_family)
continue;
read_unlock(neigh_tbl_lock);
 
-   err = -EINVAL;
-   if (!dst_attr || RTA_PAYLOAD(dst_attr)  tbl-key_len)
+   if (nla_len(dst_attr)  tbl-key_len)
goto out_dev_put;
 
if (ndm-ndm_flags  NTF_PROXY) {
-   err = pneigh_delete(tbl, RTA_DATA(dst_attr), dev);
+   err = pneigh_delete(tbl, nla_data(dst_attr), dev);
goto out_dev_put;
}
 
-   if (!dev)
-   goto out;
+   if (dev == NULL)
+   goto out_dev_put;
 
-   n = neigh_lookup(tbl, RTA_DATA(dst_attr), dev);
-   if (n) {
-   err = neigh_update(n, NULL, NUD_FAILED, 
-  NEIGH_UPDATE_F_OVERRIDE|
-  NEIGH_UPDATE_F_ADMIN);
-   neigh_release(n);
+   neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
+   if (neigh == NULL) {
+   err = -ENOENT;
+   goto out_dev_put;
}
+
+   err = neigh_update(neigh, NULL, NUD_FAILED,
+  NEIGH_UPDATE_F_OVERRIDE |
+  NEIGH_UPDATE_F_ADMIN);
+   neigh_release(neigh);
goto out_dev_put;
}
read_unlock(neigh_tbl_lock);
-   err = -EADDRNOTAVAIL;
+   err = -EAFNOSUPPORT;
+
 out_dev_put:
if (dev)
dev_put(dev);

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCHSET] Convert neighbour code to new netlink api

2006-08-07 Thread Thomas Graf
Pretty much straight forward, some minor fixes that go
along with it.

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/7] [NEIGH]: Convert neighbour addition to new netlink api

2006-08-07 Thread Thomas Graf
Fixes:
Return EAFNOSUPPORT if no table matches the specified
address family.

Signed-off-by: Thomas Graf [EMAIL PROTECTED]

Index: net-2.6.19.git/net/core/neighbour.c
===
--- net-2.6.19.git.orig/net/core/neighbour.c
+++ net-2.6.19.git/net/core/neighbour.c
@@ -1503,76 +1503,88 @@ out:
 
 int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 {
-   struct ndmsg *ndm = NLMSG_DATA(nlh);
-   struct rtattr **nda = arg;
+   struct ndmsg *ndm;
+   struct nlattr *tb[NDA_MAX+1];
struct neigh_table *tbl;
struct net_device *dev = NULL;
-   int err = -ENODEV;
+   int err;
+
+   err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
+   if (err  0)
+   goto out;
 
-   if (ndm-ndm_ifindex 
-   (dev = dev_get_by_index(ndm-ndm_ifindex)) == NULL)
+   err = -EINVAL;
+   if (tb[NDA_DST] == NULL)
goto out;
 
+   ndm = nlmsg_data(nlh);
+   if (ndm-ndm_ifindex) {
+   dev = dev_get_by_index(ndm-ndm_ifindex);
+   if (dev == NULL) {
+   err = -ENODEV;
+   goto out;
+   }
+
+   if (tb[NDA_LLADDR]  nla_len(tb[NDA_LLADDR])  dev-addr_len)
+   goto out_dev_put;
+   }
+
read_lock(neigh_tbl_lock);
for (tbl = neigh_tables; tbl; tbl = tbl-next) {
-   struct rtattr *lladdr_attr = nda[NDA_LLADDR - 1];
-   struct rtattr *dst_attr = nda[NDA_DST - 1];
-   int override = 1;
-   struct neighbour *n;
+   int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE;
+   struct neighbour *neigh;
+   void *dst, *lladdr;
 
if (tbl-family != ndm-ndm_family)
continue;
read_unlock(neigh_tbl_lock);
 
-   err = -EINVAL;
-   if (!dst_attr || RTA_PAYLOAD(dst_attr)  tbl-key_len)
+   if (nla_len(tb[NDA_DST])  tbl-key_len)
goto out_dev_put;
+   dst = nla_data(tb[NDA_DST]);
+   lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
 
if (ndm-ndm_flags  NTF_PROXY) {
-   err = -ENOBUFS;
-   if (pneigh_lookup(tbl, RTA_DATA(dst_attr), dev, 1))
-   err = 0;
+   err = 0;
+   if (pneigh_lookup(tbl, dst, dev, 1) == NULL)
+   err = -ENOBUFS;
goto out_dev_put;
}
 
-   err = -EINVAL;
-   if (!dev)
-   goto out;
-   if (lladdr_attr  RTA_PAYLOAD(lladdr_attr)  dev-addr_len)
+   if (dev == NULL)
goto out_dev_put;
+
+   neigh = neigh_lookup(tbl, dst, dev);
+   if (neigh == NULL) {
+   if (!(nlh-nlmsg_flags  NLM_F_CREATE)) {
+   err = -ENOENT;
+   goto out_dev_put;
+   }

-   n = neigh_lookup(tbl, RTA_DATA(dst_attr), dev);
-   if (n) {
-   if (nlh-nlmsg_flags  NLM_F_EXCL) {
-   err = -EEXIST;
-   neigh_release(n);
+   neigh = __neigh_lookup_errno(tbl, dst, dev);
+   if (IS_ERR(neigh)) {
+   err = PTR_ERR(neigh);
goto out_dev_put;
}
-   
-   override = nlh-nlmsg_flags  NLM_F_REPLACE;
-   } else if (!(nlh-nlmsg_flags  NLM_F_CREATE)) {
-   err = -ENOENT;
-   goto out_dev_put;
} else {
-   n = __neigh_lookup_errno(tbl, RTA_DATA(dst_attr), dev);
-   if (IS_ERR(n)) {
-   err = PTR_ERR(n);
+   if (nlh-nlmsg_flags  NLM_F_EXCL) {
+   err = -EEXIST;
+   neigh_release(neigh);
goto out_dev_put;
}
-   }
 
-   err = neigh_update(n,
-  lladdr_attr ? RTA_DATA(lladdr_attr) : NULL,
-  ndm-ndm_state,
-  (override ? NEIGH_UPDATE_F_OVERRIDE : 0) |
-  NEIGH_UPDATE_F_ADMIN);
+   if (!(nlh-nlmsg_flags  NLM_F_REPLACE))
+   flags = ~NEIGH_UPDATE_F_OVERRIDE;
+   }
 
-   neigh_release(n);
+   err = neigh_update(neigh, lladdr, ndm-ndm_state, flags);
+   neigh_release(neigh);
goto 

[PATCH 3/7] [NEIGH]: Convert neighbour dumping to new netlink api

2006-08-07 Thread Thomas Graf
Signed-off-by: Thomas Graf [EMAIL PROTECTED]

Index: net-2.6.19.git/net/core/neighbour.c
===
--- net-2.6.19.git.orig/net/core/neighbour.c
+++ net-2.6.19.git/net/core/neighbour.c
@@ -1898,48 +1898,49 @@ out:
return skb-len;
 }
 
-static int neigh_fill_info(struct sk_buff *skb, struct neighbour *n,
-  u32 pid, u32 seq, int event, unsigned int flags)
+static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
+  u32 pid, u32 seq, int type, unsigned int flags)
 {
unsigned long now = jiffies;
-   unsigned char *b = skb-tail;
struct nda_cacheinfo ci;
-   int locked = 0;
-   u32 probes;
-   struct nlmsghdr *nlh = NLMSG_NEW(skb, pid, seq, event,
-sizeof(struct ndmsg), flags);
-   struct ndmsg *ndm = NLMSG_DATA(nlh);
+   struct nlmsghdr *nlh;
+   struct ndmsg *ndm;
+
+   nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
+   if (nlh == NULL)
+   return -ENOBUFS;
 
-   ndm-ndm_family  = n-ops-family;
+   ndm = nlmsg_data(nlh);
+   ndm-ndm_family  = neigh-ops-family;
ndm-ndm_pad1= 0;
ndm-ndm_pad2= 0;
-   ndm-ndm_flags   = n-flags;
-   ndm-ndm_type= n-type;
-   ndm-ndm_ifindex = n-dev-ifindex;
-   RTA_PUT(skb, NDA_DST, n-tbl-key_len, n-primary_key);
-   read_lock_bh(n-lock);
-   locked   = 1;
-   ndm-ndm_state   = n-nud_state;
-   if (n-nud_state  NUD_VALID)
-   RTA_PUT(skb, NDA_LLADDR, n-dev-addr_len, n-ha);
-   ci.ndm_used  = now - n-used;
-   ci.ndm_confirmed = now - n-confirmed;
-   ci.ndm_updated   = now - n-updated;
-   ci.ndm_refcnt= atomic_read(n-refcnt) - 1;
-   probes = atomic_read(n-probes);
-   read_unlock_bh(n-lock);
-   locked   = 0;
-   RTA_PUT(skb, NDA_CACHEINFO, sizeof(ci), ci);
-   RTA_PUT(skb, NDA_PROBES, sizeof(probes), probes);
-   nlh-nlmsg_len   = skb-tail - b;
-   return skb-len;
+   ndm-ndm_flags   = neigh-flags;
+   ndm-ndm_type= neigh-type;
+   ndm-ndm_ifindex = neigh-dev-ifindex;
+
+   NLA_PUT(skb, NDA_DST, neigh-tbl-key_len, neigh-primary_key);
+
+   read_lock_bh(neigh-lock);
+   ndm-ndm_state   = neigh-nud_state;
+   if ((neigh-nud_state  NUD_VALID) 
+   nla_put(skb, NDA_LLADDR, neigh-dev-addr_len, neigh-ha)  0) {
+   read_unlock_bh(neigh-lock);
+   goto nla_put_failure;
+   }
 
-nlmsg_failure:
-rtattr_failure:
-   if (locked)
-   read_unlock_bh(n-lock);
-   skb_trim(skb, b - skb-data);
-   return -1;
+   ci.ndm_used  = now - neigh-used;
+   ci.ndm_confirmed = now - neigh-confirmed;
+   ci.ndm_updated   = now - neigh-updated;
+   ci.ndm_refcnt= atomic_read(neigh-refcnt) - 1;
+   read_unlock_bh(neigh-lock);
+
+   NLA_PUT_U32(skb, NDA_PROBES, atomic_read(neigh-probes));
+   NLA_PUT(skb, NDA_CACHEINFO, sizeof(ci), ci);
+
+   return nlmsg_end(skb, nlh);
+
+nla_put_failure:
+   return nlmsg_cancel(skb, nlh);
 }
 
 
@@ -1983,7 +1984,7 @@ int neigh_dump_info(struct sk_buff *skb,
int t, family, s_t;
 
read_lock(neigh_tbl_lock);
-   family = ((struct rtgenmsg *)NLMSG_DATA(cb-nlh))-rtgen_family;
+   family = ((struct rtgenmsg *) nlmsg_data(cb-nlh))-rtgen_family;
s_t = cb-args[0];
 
for (tbl = neigh_tables, t = 0; tbl; tbl = tbl-next, t++) {
@@ -2364,39 +2365,34 @@ static struct file_operations neigh_stat
 #ifdef CONFIG_ARPD
 void neigh_app_ns(struct neighbour *n)
 {
-   struct nlmsghdr  *nlh;
-   int size = NLMSG_SPACE(sizeof(struct ndmsg) + 256);
-   struct sk_buff *skb = alloc_skb(size, GFP_ATOMIC);
+   struct sk_buff *skb;
 
-   if (!skb)
+   skb = nlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC);
+   if (skb == NULL)
return;
 
-   if (neigh_fill_info(skb, n, 0, 0, RTM_GETNEIGH, 0)  0) {
+   if (neigh_fill_info(skb, n, 0, 0, RTM_GETNEIGH, NLM_F_REQUEST) = 0)
kfree_skb(skb);
-   return;
+   else {
+   NETLINK_CB(skb).dst_group  = RTNLGRP_NEIGH;
+   netlink_broadcast(rtnl, skb, 0, RTNLGRP_NEIGH, GFP_ATOMIC);
}
-   nlh= (struct nlmsghdr *)skb-data;
-   nlh-nlmsg_flags   = NLM_F_REQUEST;
-   NETLINK_CB(skb).dst_group  = RTNLGRP_NEIGH;
-   netlink_broadcast(rtnl, skb, 0, RTNLGRP_NEIGH, GFP_ATOMIC);
 }
 
 static void neigh_app_notify(struct neighbour *n)
 {
-   struct nlmsghdr *nlh;
-   int size = NLMSG_SPACE(sizeof(struct ndmsg) + 256);
-   struct sk_buff *skb = alloc_skb(size, GFP_ATOMIC);
+   struct sk_buff *skb;
 
-   if (!skb)
+   skb = nlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC);
+   if (skb == NULL)
return;
 
-   if (neigh_fill_info(skb, 

[PATCH 4/7] [NEIGH]: Move netlink neighbour bits to linux/neighbour.h

2006-08-07 Thread Thomas Graf
Moves netlink neighbour bits to linux/neighbour.h. Also
moves bits to be exported to userspace from net/neighbour.h
to linux/neighbour.h and removes __KERNEL__ guards, userspace
is not supposed to be using it.

rtnetlink_rcv_msg() is not longer required to parse attributes
for the neighbour layer, remove dependency on obsolete and
buggy rta_buf.

Signed-off-by: Thomas Graf [EMAIL PROTECTED]

Index: net-2.6.19.git/include/linux/neighbour.h
===
--- /dev/null
+++ net-2.6.19.git/include/linux/neighbour.h
@@ -0,0 +1,65 @@
+#ifndef __LINUX_NEIGHBOUR_H
+#define __LINUX_NEIGHBOUR_H
+
+#include linux/netlink.h
+
+struct ndmsg
+{
+   __u8ndm_family;
+   __u8ndm_pad1;
+   __u16   ndm_pad2;
+   __s32   ndm_ifindex;
+   __u16   ndm_state;
+   __u8ndm_flags;
+   __u8ndm_type;
+};
+
+enum
+{
+   NDA_UNSPEC,
+   NDA_DST,
+   NDA_LLADDR,
+   NDA_CACHEINFO,
+   NDA_PROBES,
+   __NDA_MAX
+};
+
+#define NDA_MAX (__NDA_MAX - 1)
+
+/*
+ * Neighbor Cache Entry Flags
+ */
+
+#define NTF_PROXY  0x08/* == ATF_PUBL */
+#define NTF_ROUTER 0x80
+
+/*
+ * Neighbor Cache Entry States.
+ */
+
+#define NUD_INCOMPLETE 0x01
+#define NUD_REACHABLE  0x02
+#define NUD_STALE  0x04
+#define NUD_DELAY  0x08
+#define NUD_PROBE  0x10
+#define NUD_FAILED 0x20
+
+/* Dummy states */
+#define NUD_NOARP  0x40
+#define NUD_PERMANENT  0x80
+#define NUD_NONE   0x00
+
+/* NUD_NOARP  NUD_PERMANENT are pseudostates, they never change
+   and make no address resolution or NUD.
+   NUD_PERMANENT is also cannot be deleted by garbage collectors.
+ */
+
+struct nda_cacheinfo
+{
+   __u32   ndm_confirmed;
+   __u32   ndm_used;
+   __u32   ndm_updated;
+   __u32   ndm_refcnt;
+};
+
+#endif
Index: net-2.6.19.git/include/linux/rtnetlink.h
===
--- net-2.6.19.git.orig/include/linux/rtnetlink.h
+++ net-2.6.19.git/include/linux/rtnetlink.h
@@ -386,69 +386,6 @@ struct rta_session
 
 
 
-/**
- * Neighbour discovery.
- /
-
-struct ndmsg
-{
-   unsigned char   ndm_family;
-   unsigned char   ndm_pad1;
-   unsigned short  ndm_pad2;
-   int ndm_ifindex;/* Link index   */
-   __u16   ndm_state;
-   __u8ndm_flags;
-   __u8ndm_type;
-};
-
-enum
-{
-   NDA_UNSPEC,
-   NDA_DST,
-   NDA_LLADDR,
-   NDA_CACHEINFO,
-   NDA_PROBES,
-   __NDA_MAX
-};
-
-#define NDA_MAX (__NDA_MAX - 1)
-
-#define NDA_RTA(r)  ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct 
ndmsg
-#define NDA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct ndmsg))
-
-/*
- * Neighbor Cache Entry Flags
- */
-
-#define NTF_PROXY  0x08/* == ATF_PUBL */
-#define NTF_ROUTER 0x80
-
-/*
- * Neighbor Cache Entry States.
- */
-
-#define NUD_INCOMPLETE 0x01
-#define NUD_REACHABLE  0x02
-#define NUD_STALE  0x04
-#define NUD_DELAY  0x08
-#define NUD_PROBE  0x10
-#define NUD_FAILED 0x20
-
-/* Dummy states */
-#define NUD_NOARP  0x40
-#define NUD_PERMANENT  0x80
-#define NUD_NONE   0x00
-
-
-struct nda_cacheinfo
-{
-   __u32   ndm_confirmed;
-   __u32   ndm_used;
-   __u32   ndm_updated;
-   __u32   ndm_refcnt;
-};
-
-
 /*
  * Neighbour tables specific messages.
  *
Index: net-2.6.19.git/include/net/neighbour.h
===
--- net-2.6.19.git.orig/include/net/neighbour.h
+++ net-2.6.19.git/include/net/neighbour.h
@@ -1,6 +1,8 @@
 #ifndef _NET_NEIGHBOUR_H
 #define _NET_NEIGHBOUR_H
 
+#include linux/neighbour.h
+
 /*
  * Generic neighbour manipulation
  *
@@ -14,40 +16,6 @@
  * - Add neighbour cache statistics like rtstat
  */
 
-/* The following flags  states are exported to user space,
-   so that they should be moved to include/linux/ directory.
- */
-
-/*
- * Neighbor Cache Entry Flags
- */
-
-#define NTF_PROXY  0x08/* == ATF_PUBL */
-#define NTF_ROUTER 0x80
-
-/*
- * Neighbor Cache Entry States.
- */
-
-#define NUD_INCOMPLETE 0x01
-#define NUD_REACHABLE  0x02
-#define NUD_STALE  0x04
-#define NUD_DELAY  0x08
-#define NUD_PROBE  0x10
-#define NUD_FAILED 0x20
-
-/* Dummy states */
-#define NUD_NOARP  0x40
-#define NUD_PERMANENT  0x80
-#define NUD_NONE   0x00
-
-/* NUD_NOARP  NUD_PERMANENT are pseudostates, they never change
-   and make no address resolution or NUD.
-   NUD_PERMANENT is also cannot be deleted by garbage collectors.
- */
-
-#ifdef __KERNEL__
-
 #include asm/atomic.h
 #include linux/netdevice.h
 #include 

[PATCH 5/7] [NEIGH]: Convert neighbour table modification to new netlink api

2006-08-07 Thread Thomas Graf
Signed-off-by: Thomas Graf [EMAIL PROTECTED]

Index: net-2.6.19.git/net/core/neighbour.c
===
--- net-2.6.19.git.orig/net/core/neighbour.c
+++ net-2.6.19.git/net/core/neighbour.c
@@ -1751,28 +1751,61 @@ static inline struct neigh_parms *lookup
return NULL;
 }
 
+static struct nla_policy nl_neightbl_policy[NDTA_MAX+1] __read_mostly = {
+   [NDTA_NAME] = { .type = NLA_STRING },
+   [NDTA_THRESH1]  = { .type = NLA_U32 },
+   [NDTA_THRESH2]  = { .type = NLA_U32 },
+   [NDTA_THRESH3]  = { .type = NLA_U32 },
+   [NDTA_GC_INTERVAL]  = { .type = NLA_U64 },
+   [NDTA_PARMS]= { .type = NLA_NESTED },
+};
+
+static struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] __read_mostly = {
+   [NDTPA_IFINDEX] = { .type = NLA_U32 },
+   [NDTPA_QUEUE_LEN]   = { .type = NLA_U32 },
+   [NDTPA_PROXY_QLEN]  = { .type = NLA_U32 },
+   [NDTPA_APP_PROBES]  = { .type = NLA_U32 },
+   [NDTPA_UCAST_PROBES]= { .type = NLA_U32 },
+   [NDTPA_MCAST_PROBES]= { .type = NLA_U32 },
+   [NDTPA_BASE_REACHABLE_TIME] = { .type = NLA_U64 },
+   [NDTPA_GC_STALETIME]= { .type = NLA_U64 },
+   [NDTPA_DELAY_PROBE_TIME]= { .type = NLA_U64 },
+   [NDTPA_RETRANS_TIME]= { .type = NLA_U64 },
+   [NDTPA_ANYCAST_DELAY]   = { .type = NLA_U64 },
+   [NDTPA_PROXY_DELAY] = { .type = NLA_U64 },
+   [NDTPA_LOCKTIME]= { .type = NLA_U64 },
+};
+
 int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 {
struct neigh_table *tbl;
-   struct ndtmsg *ndtmsg = NLMSG_DATA(nlh);
-   struct rtattr **tb = arg;
-   int err = -EINVAL;
+   struct ndtmsg *ndtmsg;
+   struct nlattr *tb[NDTA_MAX+1];
+   int err;
+
+   err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
+ nl_neightbl_policy);
+   if (err  0)
+   goto errout;
 
-   if (!tb[NDTA_NAME - 1] || !RTA_PAYLOAD(tb[NDTA_NAME - 1]))
-   return -EINVAL;
+   if (tb[NDTA_NAME] == NULL) {
+   err = -EINVAL;
+   goto errout;
+   }
 
+   ndtmsg = nlmsg_data(nlh);
read_lock(neigh_tbl_lock);
for (tbl = neigh_tables; tbl; tbl = tbl-next) {
if (ndtmsg-ndtm_family  tbl-family != ndtmsg-ndtm_family)
continue;
 
-   if (!rtattr_strcmp(tb[NDTA_NAME - 1], tbl-id))
+   if (nla_strcmp(tb[NDTA_NAME], tbl-id) == 0)
break;
}
 
if (tbl == NULL) {
err = -ENOENT;
-   goto errout;
+   goto errout_locked;
}
 
/* 
@@ -1781,86 +1814,89 @@ int neightbl_set(struct sk_buff *skb, st
 */
write_lock_bh(tbl-lock);
 
-   if (tb[NDTA_THRESH1 - 1])
-   tbl-gc_thresh1 = RTA_GET_U32(tb[NDTA_THRESH1 - 1]);
-
-   if (tb[NDTA_THRESH2 - 1])
-   tbl-gc_thresh2 = RTA_GET_U32(tb[NDTA_THRESH2 - 1]);
-
-   if (tb[NDTA_THRESH3 - 1])
-   tbl-gc_thresh3 = RTA_GET_U32(tb[NDTA_THRESH3 - 1]);
-
-   if (tb[NDTA_GC_INTERVAL - 1])
-   tbl-gc_interval = RTA_GET_MSECS(tb[NDTA_GC_INTERVAL - 1]);
-
-   if (tb[NDTA_PARMS - 1]) {
-   struct rtattr *tbp[NDTPA_MAX];
+   if (tb[NDTA_PARMS]) {
+   struct nlattr *tbp[NDTPA_MAX+1];
struct neigh_parms *p;
-   u32 ifindex = 0;
+   int i, ifindex = 0;
 
-   if (rtattr_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS - 1])  0)
-   goto rtattr_failure;
+   err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS],
+  nl_ntbl_parm_policy);
+   if (err  0)
+   goto errout_tbl_lock;
 
-   if (tbp[NDTPA_IFINDEX - 1])
-   ifindex = RTA_GET_U32(tbp[NDTPA_IFINDEX - 1]);
+   if (tbp[NDTPA_IFINDEX])
+   ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
 
p = lookup_neigh_params(tbl, ifindex);
if (p == NULL) {
err = -ENOENT;
-   goto rtattr_failure;
+   goto errout_tbl_lock;
}
-   
-   if (tbp[NDTPA_QUEUE_LEN - 1])
-   p-queue_len = RTA_GET_U32(tbp[NDTPA_QUEUE_LEN - 1]);
-
-   if (tbp[NDTPA_PROXY_QLEN - 1])
-   p-proxy_qlen = RTA_GET_U32(tbp[NDTPA_PROXY_QLEN - 1]);
-
-   if (tbp[NDTPA_APP_PROBES - 1])
-   p-app_probes = RTA_GET_U32(tbp[NDTPA_APP_PROBES - 1]);
 
-   if (tbp[NDTPA_UCAST_PROBES - 1])
-   p-ucast_probes =
-  

[PATCH 6/7] [NEIGH]: Convert neighbour table dumping to new netlink api

2006-08-07 Thread Thomas Graf
Also fixes skipping of already dumped neighbours.

Signed-off-by: Thomas Graf [EMAIL PROTECTED]

Index: net-2.6.19.git/net/core/neighbour.c
===
--- net-2.6.19.git.orig/net/core/neighbour.c
+++ net-2.6.19.git/net/core/neighbour.c
@@ -1594,56 +1594,59 @@ out:
 
 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
 {
-   struct rtattr *nest = NULL;
-   
-   nest = RTA_NEST(skb, NDTA_PARMS);
+   struct nlattr *nest;
+
+   nest = nla_nest_start(skb, NDTA_PARMS);
+   if (nest == NULL)
+   return -ENOBUFS;
 
if (parms-dev)
-   RTA_PUT_U32(skb, NDTPA_IFINDEX, parms-dev-ifindex);
+   NLA_PUT_U32(skb, NDTPA_IFINDEX, parms-dev-ifindex);
 
-   RTA_PUT_U32(skb, NDTPA_REFCNT, atomic_read(parms-refcnt));
-   RTA_PUT_U32(skb, NDTPA_QUEUE_LEN, parms-queue_len);
-   RTA_PUT_U32(skb, NDTPA_PROXY_QLEN, parms-proxy_qlen);
-   RTA_PUT_U32(skb, NDTPA_APP_PROBES, parms-app_probes);
-   RTA_PUT_U32(skb, NDTPA_UCAST_PROBES, parms-ucast_probes);
-   RTA_PUT_U32(skb, NDTPA_MCAST_PROBES, parms-mcast_probes);
-   RTA_PUT_MSECS(skb, NDTPA_REACHABLE_TIME, parms-reachable_time);
-   RTA_PUT_MSECS(skb, NDTPA_BASE_REACHABLE_TIME,
+   NLA_PUT_U32(skb, NDTPA_REFCNT, atomic_read(parms-refcnt));
+   NLA_PUT_U32(skb, NDTPA_QUEUE_LEN, parms-queue_len);
+   NLA_PUT_U32(skb, NDTPA_PROXY_QLEN, parms-proxy_qlen);
+   NLA_PUT_U32(skb, NDTPA_APP_PROBES, parms-app_probes);
+   NLA_PUT_U32(skb, NDTPA_UCAST_PROBES, parms-ucast_probes);
+   NLA_PUT_U32(skb, NDTPA_MCAST_PROBES, parms-mcast_probes);
+   NLA_PUT_MSECS(skb, NDTPA_REACHABLE_TIME, parms-reachable_time);
+   NLA_PUT_MSECS(skb, NDTPA_BASE_REACHABLE_TIME,
  parms-base_reachable_time);
-   RTA_PUT_MSECS(skb, NDTPA_GC_STALETIME, parms-gc_staletime);
-   RTA_PUT_MSECS(skb, NDTPA_DELAY_PROBE_TIME, parms-delay_probe_time);
-   RTA_PUT_MSECS(skb, NDTPA_RETRANS_TIME, parms-retrans_time);
-   RTA_PUT_MSECS(skb, NDTPA_ANYCAST_DELAY, parms-anycast_delay);
-   RTA_PUT_MSECS(skb, NDTPA_PROXY_DELAY, parms-proxy_delay);
-   RTA_PUT_MSECS(skb, NDTPA_LOCKTIME, parms-locktime);
+   NLA_PUT_MSECS(skb, NDTPA_GC_STALETIME, parms-gc_staletime);
+   NLA_PUT_MSECS(skb, NDTPA_DELAY_PROBE_TIME, parms-delay_probe_time);
+   NLA_PUT_MSECS(skb, NDTPA_RETRANS_TIME, parms-retrans_time);
+   NLA_PUT_MSECS(skb, NDTPA_ANYCAST_DELAY, parms-anycast_delay);
+   NLA_PUT_MSECS(skb, NDTPA_PROXY_DELAY, parms-proxy_delay);
+   NLA_PUT_MSECS(skb, NDTPA_LOCKTIME, parms-locktime);
 
-   return RTA_NEST_END(skb, nest);
+   return nla_nest_end(skb, nest);
 
-rtattr_failure:
-   return RTA_NEST_CANCEL(skb, nest);
+nla_put_failure:
+   return nla_nest_cancel(skb, nest);
 }
 
-static int neightbl_fill_info(struct neigh_table *tbl, struct sk_buff *skb,
- struct netlink_callback *cb)
+static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
+ u32 pid, u32 seq, int type, int flags)
 {
struct nlmsghdr *nlh;
struct ndtmsg *ndtmsg;
 
-   nlh = NLMSG_NEW_ANSWER(skb, cb, RTM_NEWNEIGHTBL, sizeof(struct ndtmsg),
-  NLM_F_MULTI);
+   nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
+   if (nlh == NULL)
+   return -ENOBUFS;
 
-   ndtmsg = NLMSG_DATA(nlh);
+   ndtmsg = nlmsg_data(nlh);
 
read_lock_bh(tbl-lock);
ndtmsg-ndtm_family = tbl-family;
ndtmsg-ndtm_pad1   = 0;
ndtmsg-ndtm_pad2   = 0;
 
-   RTA_PUT_STRING(skb, NDTA_NAME, tbl-id);
-   RTA_PUT_MSECS(skb, NDTA_GC_INTERVAL, tbl-gc_interval);
-   RTA_PUT_U32(skb, NDTA_THRESH1, tbl-gc_thresh1);
-   RTA_PUT_U32(skb, NDTA_THRESH2, tbl-gc_thresh2);
-   RTA_PUT_U32(skb, NDTA_THRESH3, tbl-gc_thresh3);
+   NLA_PUT_STRING(skb, NDTA_NAME, tbl-id);
+   NLA_PUT_MSECS(skb, NDTA_GC_INTERVAL, tbl-gc_interval);
+   NLA_PUT_U32(skb, NDTA_THRESH1, tbl-gc_thresh1);
+   NLA_PUT_U32(skb, NDTA_THRESH2, tbl-gc_thresh2);
+   NLA_PUT_U32(skb, NDTA_THRESH3, tbl-gc_thresh3);
 
{
unsigned long now = jiffies;
@@ -1662,7 +1665,7 @@ static int neightbl_fill_info(struct nei
.ndtc_proxy_qlen= tbl-proxy_queue.qlen,
};
 
-   RTA_PUT(skb, NDTA_CONFIG, sizeof(ndc), ndc);
+   NLA_PUT(skb, NDTA_CONFIG, sizeof(ndc), ndc);
}
 
{
@@ -1687,55 +1690,50 @@ static int neightbl_fill_info(struct nei
ndst.ndts_forced_gc_runs+= st-forced_gc_runs;
}
 
-   RTA_PUT(skb, NDTA_STATS, sizeof(ndst), ndst);
+   NLA_PUT(skb, NDTA_STATS, sizeof(ndst), ndst);
}
 
BUG_ON(tbl-parms.dev);
if (neightbl_fill_parms(skb, tbl-parms)  0)

[PATCH 7/7] [NEIGH]: Move netlink neighbour table bits to linux/neighbour.h

2006-08-07 Thread Thomas Graf
rtnetlink_rcv_msg() is not longer required to parse attributes
for the neighbour tables layer, remove dependency on obsolete and
buggy rta_buf.

Signed-off-by: Thomas Graf [EMAIL PROTECTED]

Index: net-2.6.19.git/include/linux/neighbour.h
===
--- net-2.6.19.git.orig/include/linux/neighbour.h
+++ net-2.6.19.git/include/linux/neighbour.h
@@ -62,4 +62,98 @@ struct nda_cacheinfo
__u32   ndm_refcnt;
 };
 
+/*
+ * Neighbour tables specific messages.
+ *
+ * To retrieve the neighbour tables send RTM_GETNEIGHTBL with the
+ * NLM_F_DUMP flag set. Every neighbour table configuration is
+ * spread over multiple messages to avoid running into message
+ * size limits on systems with many interfaces. The first message
+ * in the sequence transports all not device specific data such as
+ * statistics, configuration, and the default parameter set.
+ * This message is followed by 0..n messages carrying device
+ * specific parameter sets.
+ * Although the ordering should be sufficient, NDTA_NAME can be
+ * used to identify sequences. The initial message can be identified
+ * by checking for NDTA_CONFIG. The device specific messages do
+ * not contain this TLV but have NDTPA_IFINDEX set to the
+ * corresponding interface index.
+ *
+ * To change neighbour table attributes, send RTM_SETNEIGHTBL
+ * with NDTA_NAME set. Changeable attribute include NDTA_THRESH[1-3],
+ * NDTA_GC_INTERVAL, and all TLVs in NDTA_PARMS unless marked
+ * otherwise. Device specific parameter sets can be changed by
+ * setting NDTPA_IFINDEX to the interface index of the corresponding
+ * device.
+ /
+
+struct ndt_stats
+{
+   __u64   ndts_allocs;
+   __u64   ndts_destroys;
+   __u64   ndts_hash_grows;
+   __u64   ndts_res_failed;
+   __u64   ndts_lookups;
+   __u64   ndts_hits;
+   __u64   ndts_rcv_probes_mcast;
+   __u64   ndts_rcv_probes_ucast;
+   __u64   ndts_periodic_gc_runs;
+   __u64   ndts_forced_gc_runs;
+};
+
+enum {
+   NDTPA_UNSPEC,
+   NDTPA_IFINDEX,  /* u32, unchangeable */
+   NDTPA_REFCNT,   /* u32, read-only */
+   NDTPA_REACHABLE_TIME,   /* u64, read-only, msecs */
+   NDTPA_BASE_REACHABLE_TIME,  /* u64, msecs */
+   NDTPA_RETRANS_TIME, /* u64, msecs */
+   NDTPA_GC_STALETIME, /* u64, msecs */
+   NDTPA_DELAY_PROBE_TIME, /* u64, msecs */
+   NDTPA_QUEUE_LEN,/* u32 */
+   NDTPA_APP_PROBES,   /* u32 */
+   NDTPA_UCAST_PROBES, /* u32 */
+   NDTPA_MCAST_PROBES, /* u32 */
+   NDTPA_ANYCAST_DELAY,/* u64, msecs */
+   NDTPA_PROXY_DELAY,  /* u64, msecs */
+   NDTPA_PROXY_QLEN,   /* u32 */
+   NDTPA_LOCKTIME, /* u64, msecs */
+   __NDTPA_MAX
+};
+#define NDTPA_MAX (__NDTPA_MAX - 1)
+
+struct ndtmsg
+{
+   __u8ndtm_family;
+   __u8ndtm_pad1;
+   __u16   ndtm_pad2;
+};
+
+struct ndt_config
+{
+   __u16   ndtc_key_len;
+   __u16   ndtc_entry_size;
+   __u32   ndtc_entries;
+   __u32   ndtc_last_flush;/* delta to now in msecs */
+   __u32   ndtc_last_rand; /* delta to now in msecs */
+   __u32   ndtc_hash_rnd;
+   __u32   ndtc_hash_mask;
+   __u32   ndtc_hash_chain_gc;
+   __u32   ndtc_proxy_qlen;
+};
+
+enum {
+   NDTA_UNSPEC,
+   NDTA_NAME,  /* char *, unchangeable */
+   NDTA_THRESH1,   /* u32 */
+   NDTA_THRESH2,   /* u32 */
+   NDTA_THRESH3,   /* u32 */
+   NDTA_CONFIG,/* struct ndt_config, read-only */
+   NDTA_PARMS, /* nested TLV NDTPA_* */
+   NDTA_STATS, /* struct ndt_stats, read-only */
+   NDTA_GC_INTERVAL,   /* u64, msecs */
+   __NDTA_MAX
+};
+#define NDTA_MAX (__NDTA_MAX - 1)
+
 #endif
Index: net-2.6.19.git/include/linux/rtnetlink.h
===
--- net-2.6.19.git.orig/include/linux/rtnetlink.h
+++ net-2.6.19.git/include/linux/rtnetlink.h
@@ -384,107 +384,6 @@ struct rta_session
} u;
 };
 
-
-
-/*
- * Neighbour tables specific messages.
- *
- * To retrieve the neighbour tables send RTM_GETNEIGHTBL with the
- * NLM_F_DUMP flag set. Every neighbour table configuration is
- * spread over multiple messages to avoid running into message
- * size limits on systems with many interfaces. The first message
- * in the sequence transports all not 

Re: [PATCH wireless-dev] d80211: Switch d80211_mgmt.h to ieee80211.h style definitions

2006-08-07 Thread Jouni Malinen
On Sat, Aug 05, 2006 at 01:45:33AM -0700, Michael Wu wrote:
 d80211: Switch d80211_mgmt.h to ieee80211.h style definitions
 
 This patch switches a number of defines in d80211_mgmt.h to enums taken from 
 ieee80211.h and makes the corresponding changes in net/d80211.

  include/net/d80211_mgmt.h  |  153 
  net/d80211/ieee80211.c |2 -
  net/d80211/ieee80211_sta.c |   50 +++---


Is this really needed? Based on the changes here, these are mostly used
internally in net/d80211/ieee80211_sta.c and not in other parts of the
tree. In addition, I would actually like to see ieee80211_sta.c be made
mostly obsolete with a user space implementation of client MLME (e.g.,
the one in wpa_supplicant which is based on this ieee80211_sta.c file).

Changing WLAN_STATUS_* and WLAN_REASON_* to be enum's while keeping
their old names is fine. However, I would rather not change the names of
the information element IDs (WLAN_EID_*) into MFIE_TYPE_*. What is that
'MFIE' anyway? Management Frame IE? These IEs are not limited to
management frames..

In other words, I would skip all the changes into ieee80211_sta.c and
would only change d80211_mgmt.h to use enum's with old names.

-- 
Jouni MalinenPGP id EFC895FA
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: 802.11/crypto questions

2006-08-07 Thread Jouni Malinen
On Tue, Aug 01, 2006 at 07:40:52AM +0200, Johannes Berg wrote:

 Since there's been lots of talk about multi-STA or multi-BSSID devices
 (more than one client, more than one AP on the same PHY) and I was just
 looking into some crypto stuff on bcm43xx, it got me wondering.
 
 The bcm43xx has an elaborate group key matching thingie which will be
 useful if it's all WPA2, but if it's say just plain WEP, then that is a
 problem since you'd either need to use the same keys or disable the
 bcm43xx crypto hardware.
 
 Hence, I suppose the question really is -- does the dscape stack allow
 changing the 'sw crypto needed' flag on the fly?

Depends on what exactly you mean with on the fly. We have indeed
changed between doing software and hardware crypto for some cases, e.g.,
when enabling another BSS while one BSS is using static WEP (which would
need default WEP keys in hwaccel) in one BSS, we may disable hwaccel for
the receive case. Not all configuration changes are yet supported with
minimal modification, i.e., hostapd may end up having to disconnect all
stations and start over with such a change, but still, the driver should
be prepared on dynamically changing the key configuration and this can
include moving some keys from hwaccel to software.

If it can be done without major effort, I would recommend making the
low-level drivers quite flexible as far as configuring keys to hardware
accelaration is concerned. Depending on the hardware design, there may
be multiple different ways of doing this and some multi-BSS
configurations are likely to require changes in how the keys are used
between hardware and software.

-- 
Jouni MalinenPGP id EFC895FA
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[-mm patch] net/: make code static

2006-08-07 Thread Adrian Bunk
This patch makes needlessly global code static.

Signed-off-by: Adrian Bunk [EMAIL PROTECTED]

---

BTW:
It doesn't seem to be intended that the new
ipv4/fib_rules.c:fib4_rules_cleanup() is completely unused?

 include/net/ip6_fib.h  |4 
 net/ipv4/cipso_ipv4.c  |2 +-
 net/ipv4/fib_rules.c   |4 ++--
 net/ipv6/fib6_rules.c  |4 ++--
 net/ipv6/ip6_fib.c |6 +++---
 net/ipv6/route.c   |6 +++---
 net/netlabel/netlabel_domainhash.c |4 ++--
 7 files changed, 13 insertions(+), 17 deletions(-)

--- linux-2.6.18-rc3-mm2-full/net/ipv4/cipso_ipv4.c.old 2006-08-07 
16:39:05.0 +0200
+++ linux-2.6.18-rc3-mm2-full/net/ipv4/cipso_ipv4.c 2006-08-07 
16:39:15.0 +0200
@@ -60,7 +60,7 @@
  * if in practice there are a lot of different DOIs this list should
  * probably be turned into a hash table or something similar so we
  * can do quick lookups. */
-DEFINE_SPINLOCK(cipso_v4_doi_list_lock);
+static DEFINE_SPINLOCK(cipso_v4_doi_list_lock);
 static struct list_head cipso_v4_doi_list = LIST_HEAD_INIT(cipso_v4_doi_list);
 
 /* Label mapping cache */
--- linux-2.6.18-rc3-mm2-full/net/ipv4/fib_rules.c.old  2006-08-07 
16:39:33.0 +0200
+++ linux-2.6.18-rc3-mm2-full/net/ipv4/fib_rules.c  2006-08-07 
16:39:51.0 +0200
@@ -101,8 +101,8 @@
return err;
 }
 
-int fib4_rule_action(struct fib_rule *rule, struct flowi *flp, int flags,
-struct fib_lookup_arg *arg)
+static int fib4_rule_action(struct fib_rule *rule, struct flowi *flp,
+   int flags, struct fib_lookup_arg *arg)
 {
int err = -EAGAIN;
struct fib_table *tbl;
--- linux-2.6.18-rc3-mm2-full/net/ipv6/fib6_rules.c.old 2006-08-07 
16:41:07.0 +0200
+++ linux-2.6.18-rc3-mm2-full/net/ipv6/fib6_rules.c 2006-08-07 
16:41:16.0 +0200
@@ -66,8 +66,8 @@
return (struct dst_entry *) arg.result;
 }
 
-int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
-int flags, struct fib_lookup_arg *arg)
+static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
+   int flags, struct fib_lookup_arg *arg)
 {
struct rt6_info *rt = NULL;
struct fib6_table *table;
--- linux-2.6.18-rc3-mm2-full/include/net/ip6_fib.h.old 2006-08-07 
16:41:36.0 +0200
+++ linux-2.6.18-rc3-mm2-full/include/net/ip6_fib.h 2006-08-07 
16:41:43.0 +0200
@@ -192,10 +192,6 @@
 struct in6_addr *daddr, int 
dst_len,
 struct in6_addr *saddr, int 
src_len);
 
-extern voidfib6_clean_tree(struct fib6_node *root,
-   int (*func)(struct rt6_info *, 
void *arg),
-   int prune, void *arg);
-
 extern voidfib6_clean_all(int (*func)(struct rt6_info *, 
void *arg),
   int prune, void *arg);
 
--- linux-2.6.18-rc3-mm2-full/net/ipv6/ip6_fib.c.old2006-08-07 
16:41:51.0 +0200
+++ linux-2.6.18-rc3-mm2-full/net/ipv6/ip6_fib.c2006-08-07 
16:42:05.0 +0200
@@ -1169,9 +1169,9 @@
  * ignoring pure split nodes) will be scanned.
  */
 
-void fib6_clean_tree(struct fib6_node *root,
-int (*func)(struct rt6_info *, void *arg),
-int prune, void *arg)
+static void fib6_clean_tree(struct fib6_node *root,
+   int (*func)(struct rt6_info *, void *arg),
+   int prune, void *arg)
 {
struct fib6_cleaner_t c;
 
--- linux-2.6.18-rc3-mm2-full/net/ipv6/route.c.old  2006-08-07 
16:42:24.0 +0200
+++ linux-2.6.18-rc3-mm2-full/net/ipv6/route.c  2006-08-07 16:43:05.0 
+0200
@@ -613,8 +613,8 @@
return rt;
 }
 
-struct rt6_info *ip6_pol_route_input(struct fib6_table *table, struct flowi 
*fl,
-int flags)
+static struct rt6_info *ip6_pol_route_input(struct fib6_table *table,
+   struct flowi *fl, int flags)
 {
struct fib6_node *fn;
struct rt6_info *rt, *nrt;
@@ -872,7 +872,7 @@
 }
 
 static struct dst_entry *ndisc_dst_gc_list;
-DEFINE_SPINLOCK(ndisc_lock);
+static DEFINE_SPINLOCK(ndisc_lock);
 
 struct dst_entry *ndisc_dst_alloc(struct net_device *dev, 
  struct neighbour *neigh,
--- linux-2.6.18-rc3-mm2-full/net/netlabel/netlabel_domainhash.c.old
2006-08-07 16:43:27.0 +0200
+++ linux-2.6.18-rc3-mm2-full/net/netlabel/netlabel_domainhash.c
2006-08-07 16:43:53.0 +0200
@@ -50,11 +50,11 @@
 /* Domain hash table */
 /* XXX - updates should be so rare that having one spinlock for the entire
  * hash table should be okay */
-DEFINE_SPINLOCK(netlbl_domhsh_lock);
+static 

Re: [PATCH] [e1000]: Remove unnecessary tx_lock

2006-08-07 Thread jamal
On Mon, 2006-07-08 at 18:29 +0200, Edgar E. Iglesias wrote:

 Assuming you would get the code inside the jamal ifdefs working without
 deadlocks, you now have a tx_irq function which if fdesc = tx_ring-prunet
 essentially just checks for hw lockups. Let's speculate and further assume you
 could do the detect_tx_hung from some other context (timer or whatever) then
 you end up having a tx_irq function which most of the time does nothing.
 
 The next step could be to move the fdesc = tx_ring-prunet logic into the
 transmit path and completely disable the tx_irq when the condition is not met.
 
 Now you end up not taking the irq at all as long as fdesc = tx_ring-prunet.
 
 This was the logic I tried on the cris driver but ended up with deadlocks :)
 

Like i said in one of my earlier postings (first email i CCed you on),
this specific test i assumed was as close to what you did. But if i
understand what you describe as deadlock then we have a slightly
different problem that can only be solved by orphaning the skbs that are
determined to have been put on the DMA ring 

cheers,
jamal

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Fwd: [IPROUTE2]: Explicit BNF definition for actions]

2006-08-07 Thread Stephen Hemminger
On Sat, 05 Aug 2006 12:42:35 -0400
jamal [EMAIL PROTECTED] wrote:

 
 Stephen,
 
 This is the one after the last one you responded to. Your git tree is
 not uptodate so i cant tell if you are missing it or not ;-
 
 I dont think i will be making more updates for this release - more to
 come later.
 
 cheers,
 jamal
 

I needed to merge the branch in, the git tree is now up to date.

-- 
Stephen Hemminger [EMAIL PROTECTED]
And in the Packet there writ down that doome
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 9/9] [NET]: Move netlink interface bits to linux/if.h

2006-08-07 Thread Stephen Hemminger
On Fri, 04 Aug 2006 23:23:14 -0700 (PDT)
David Miller [EMAIL PROTECTED] wrote:

 From: Stephen Hemminger [EMAIL PROTECTED]
 Date: Sat, 05 Aug 2006 22:42:18 -0700
 
  Thomas Graf wrote:
   -/* The struct should be in sync with struct net_device_stats */
   -struct rtnl_link_stats
   -{
   - __u32   rx_packets; /* total packets received   */
   - __u32   tx_packets; /* total packets transmitted*/
   - __u32   rx_bytes;   /* total bytes received */
   - __u32   tx_bytes;   /* total bytes transmitted  */
 
  
  Make the packets and byte counts __u64 so if/when we extend net_device 
  stats to bigger
  values, the message format doesn't have to change.
 
 It's an existing ABI that he is just moving from one place
 to another, we are not at liberty to change it's layout.

Yeah that makes sense, I thought it was a new interface.

-- 
Stephen Hemminger [EMAIL PROTECTED]
And in the Packet there writ down that doome
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] [e1000]: Remove unnecessary tx_lock

2006-08-07 Thread jamal
On Mon, 2006-07-08 at 19:04 +0200, Edgar E. Iglesias wrote:

 
 I'll give you an example.

Thanks - that matches my understanding.

 A TCP flow sends X data and later waits for a response, host is now quietly
 waiting. Assume fdesc = tx_ring-prunet, so we dont free any skbs, right?
 

I am hoping they will be freed by a tx interrupt that will force poll to
happen. Or a new packet arrival etc. Just like before. Why do you see
the two as different? (the tx path pruning is still going on as i noted
before). If all you are looking for is a scheme to quickly free the skbs
so that TCP doesnt get charged, I am not sure if this is the right one.

 Now assume that some part of X data gets lost, our retransmit timer hits and
 we want to retransmit but our socket is charged with too much data sitting on
 the nics tx-ring, so we don't send anything. By orphaning, those skbs won't
 charge the socket and the flow can retransmit.

I understand that as well as the dilemma that TCP not being charged for
skbs (if you decide to orphan) it holds in its retransmit queue ;-
Which is not a problem unless that queueu grows to be a huge one ;-

cheers,
jamal

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] tiacx sparse cleanups

2006-08-07 Thread Randy.Dunlap
From: Randy Dunlap [EMAIL PROTECTED]

tiacx sparse cleanups:
- use NULL instead of 0 for pointer value
- use C99 struct initializers
- use ANSI function declaration

Signed-off-by: Randy Dunlap [EMAIL PROTECTED]
---
 drivers/net/wireless/tiacx/common.c |2 
 drivers/net/wireless/tiacx/ioctl.c  |  202 ++--
 drivers/net/wireless/tiacx/pci.c|2 
 drivers/net/wireless/tiacx/usb.c|2 
 4 files changed, 104 insertions(+), 104 deletions(-)


--- linux-2618-rc3mm2.orig/drivers/net/wireless/tiacx/common.c
+++ linux-2618-rc3mm2/drivers/net/wireless/tiacx/common.c
@@ -1396,7 +1396,7 @@ manage_proc_entries(const struct net_dev
log(L_INIT, %sing /proc entry %s\n,
remove ? remov : creat, procbuf);
if (!remove) {
-   if (!create_proc_read_entry(procbuf, 0, 0, 
proc_funcs[i], adev)) {
+   if (!create_proc_read_entry(procbuf, 0, NULL, 
proc_funcs[i], adev)) {
printk(acx: cannot register /proc entry %s\n, 
procbuf);
return NOT_OK;
}
--- linux-2618-rc3mm2.orig/drivers/net/wireless/tiacx/ioctl.c
+++ linux-2618-rc3mm2/drivers/net/wireless/tiacx/ioctl.c
@@ -2163,7 +2163,7 @@ acx_ioctl_set_rates(struct net_device *n
 
log(L_IOCTL, set_rates %s\n, extra);
result = fill_ratemasks(extra, brate, orate,
-   acx111_supported, acx111_gen_mask, 0);
+   acx111_supported, acx111_gen_mask, NULL);
if (result) goto end;
SET_BIT(orate, brate);
log(L_IOCTL, brate %08X orate %08X\n, brate, orate);
@@ -2615,107 +2615,107 @@ static const iw_handler acx_ioctl_privat
 
 static const struct iw_priv_args acx_ioctl_private_args[] = {
 #if ACX_DEBUG
-{ cmd : ACX100_IOCTL_DEBUG,
-   set_args : IW_PRIV_TYPE_INT | IW_PRIV_SIZE_FIXED | 1,
-   get_args : 0,
-   name : SetDebug },
+{  .cmd = ACX100_IOCTL_DEBUG,
+   .set_args = IW_PRIV_TYPE_INT | IW_PRIV_SIZE_FIXED | 1,
+   .get_args = 0,
+   .name = SetDebug },
 #endif
-{ cmd : ACX100_IOCTL_SET_PLED,
-   set_args : IW_PRIV_TYPE_BYTE | 2,
-   get_args : 0,
-   name : SetLEDPower },
-{ cmd : ACX100_IOCTL_GET_PLED,
-   set_args : 0,
-   get_args : IW_PRIV_TYPE_BYTE | IW_PRIV_SIZE_FIXED | 2,
-   name : GetLEDPower },
-{ cmd : ACX100_IOCTL_SET_RATES,
-   set_args : IW_PRIV_TYPE_CHAR | 256,
-   get_args : 0,
-   name : SetRates },
-{ cmd : ACX100_IOCTL_LIST_DOM,
-   set_args : 0,
-   get_args : 0,
-   name : ListRegDomain },
-{ cmd : ACX100_IOCTL_SET_DOM,
-   set_args : IW_PRIV_TYPE_BYTE | IW_PRIV_SIZE_FIXED | 1,
-   get_args : 0,
-   name : SetRegDomain },
-{ cmd : ACX100_IOCTL_GET_DOM,
-   set_args : 0,
-   get_args : IW_PRIV_TYPE_BYTE | IW_PRIV_SIZE_FIXED | 1,
-   name : GetRegDomain },
-{ cmd : ACX100_IOCTL_SET_SCAN_PARAMS,
-   set_args : IW_PRIV_TYPE_INT | IW_PRIV_SIZE_FIXED | 4,
-   get_args : 0,
-   name : SetScanParams },
-{ cmd : ACX100_IOCTL_GET_SCAN_PARAMS,
-   set_args : 0,
-   get_args : IW_PRIV_TYPE_INT | IW_PRIV_SIZE_FIXED | 4,
-   name : GetScanParams },
-{ cmd : ACX100_IOCTL_SET_PREAMB,
-   set_args : IW_PRIV_TYPE_BYTE | IW_PRIV_SIZE_FIXED | 1,
-   get_args : 0,
-   name : SetSPreamble },
-{ cmd : ACX100_IOCTL_GET_PREAMB,
-   set_args : 0,
-   get_args : IW_PRIV_TYPE_BYTE | IW_PRIV_SIZE_FIXED | 1,
-   name : GetSPreamble },
-{ cmd : ACX100_IOCTL_SET_ANT,
-   set_args : IW_PRIV_TYPE_BYTE | IW_PRIV_SIZE_FIXED | 1,
-   get_args : 0,
-   name : SetAntenna },
-{ cmd : ACX100_IOCTL_GET_ANT,
-   set_args : 0,
-   get_args : 0,
-   name : GetAntenna },
-{ cmd : ACX100_IOCTL_RX_ANT,
-   set_args : IW_PRIV_TYPE_BYTE | IW_PRIV_SIZE_FIXED | 1,
-   get_args : 0,
-   name : SetRxAnt },
-{ cmd : ACX100_IOCTL_TX_ANT,
-   set_args : IW_PRIV_TYPE_BYTE | IW_PRIV_SIZE_FIXED | 1,
-   get_args : 0,
-   name : SetTxAnt },
-{ cmd : ACX100_IOCTL_SET_PHY_AMP_BIAS,
-   set_args : IW_PRIV_TYPE_BYTE | IW_PRIV_SIZE_FIXED | 1,
-   get_args : 0,
-   name : SetPhyAmpBias},
-{ cmd : ACX100_IOCTL_GET_PHY_CHAN_BUSY,
-   set_args : 0,
-   get_args : 0,
-   name : GetPhyChanBusy },
-{ cmd : ACX100_IOCTL_SET_ED,
-   set_args : IW_PRIV_TYPE_INT | IW_PRIV_SIZE_FIXED | 1,
-   get_args : 0,
-   name : SetED },
-{ cmd : ACX100_IOCTL_SET_CCA,
-   set_args : IW_PRIV_TYPE_BYTE | IW_PRIV_SIZE_FIXED | 1,
-   get_args : 0,
-   name : SetCCA },
-{ cmd : ACX100_IOCTL_MONITOR,
-   set_args : IW_PRIV_TYPE_INT | IW_PRIV_SIZE_FIXED | 2,
-   get_args : 0,
-   name : monitor },
-{ cmd : ACX100_IOCTL_TEST,
-   set_args : 0,
-   get_args : 0,
-   name : Test },
-{ cmd : ACX100_IOCTL_DBG_SET_MASKS,
-   set_args : IW_PRIV_TYPE_INT | IW_PRIV_SIZE_FIXED | 2,
-   

Re: [Fwd: [IPROUTE2]: Explicit BNF definition for actions]

2006-08-07 Thread jamal
On Mon, 2006-07-08 at 10:49 -0700, Stephen Hemminger wrote:

 I needed to merge the branch in, the git tree is now up to date.

I have never gathered the energy to figure that stuff out so i dont envy
you.

In any case, seems to be missing the majority of the patches i sent,
including this one whose subject you are responding to;- 
I dont have time to chase it right now, but i could resend later on. Or
you could grab them off the list archive.

cheers,
jamal


-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH wireless-dev] d80211: Switch d80211_mgmt.h to ieee80211.h style definitions

2006-08-07 Thread Michael Wu
On Monday 07 August 2006 10:20, Jouni Malinen wrote:
 Changing WLAN_STATUS_* and WLAN_REASON_* to be enum's while keeping
 their old names is fine. However, I would rather not change the names of
 the information element IDs (WLAN_EID_*) into MFIE_TYPE_*. What is that
 'MFIE' anyway? Management Frame IE? These IEs are not limited to
 management frames..

I agree these names aren't that great, but these changes make it easier for 
for existing fullmac drivers to switch to the d80211 headers. That is the 
only reason.

 In other words, I would skip all the changes into ieee80211_sta.c and
 would only change d80211_mgmt.h to use enum's with old names.
Sure.

-Michael Wu


pgp1MZdwtVOF0.pgp
Description: PGP signature


Re: [PATCH] [e1000]: Remove unnecessary tx_lock

2006-08-07 Thread Edgar E. Iglesias
On Mon, Aug 07, 2006 at 02:00:24PM -0400, jamal wrote:
 On Mon, 2006-07-08 at 19:04 +0200, Edgar E. Iglesias wrote:
 
  
  I'll give you an example.
 
 Thanks - that matches my understanding.
 
  A TCP flow sends X data and later waits for a response, host is now quietly
  waiting. Assume fdesc = tx_ring-prunet, so we dont free any skbs, right?
  
 
 I am hoping they will be freed by a tx interrupt that will force poll to
 happen. Or a new packet arrival etc. Just like before. Why do you see
 the two as different? (the tx path pruning is still going on as i noted
 before). If all you are looking for is a scheme to quickly free the skbs
 so that TCP doesnt get charged, I am not sure if this is the right one.
 

I think we are out of sync :) My, fault I haven't been clear enough.

First of all, I don't think the patch with jamal undefined has any problems. I 
assumed wrongly from the start that you somehow wanted that part to go in 
aswell, sorry about that. As you say, the flow goes just as before.

Now, with jamal defined, I only see e1000_prune_tx_ring beeing called if
fdesc  tx_ring-prunet or fdesc  tx_ring-waket. In other words, the freing
of skbs is dependant on external events that might not become true if the
host is quiet. Skb's could end up sitting on the ring indefinitely.

Sorry for the confusion.
-- 
Programmer
Edgar E. Iglesias [EMAIL PROTECTED] 46.46.272.1946
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] [e1000]: Remove unnecessary tx_lock

2006-08-07 Thread jamal
On Mon, 2006-07-08 at 20:47 +0200, Edgar E. Iglesias wrote:

 I think we are out of sync :) 

Imagine that, eh? ;-

 My, fault I haven't been clear enough.
 

Not just your transmit but also my receive is at fault ;- (aka, I may
not be listening as well as i should). Now two machines or CPUs you
would think wont have this problem since they dont possess minds;-

 First of all, I don't think the patch with jamal undefined has any problems. 
 I 
 assumed wrongly from the start that you somehow wanted that part to go in 
 aswell, sorry about that. As you say, the flow goes just as before.
 
 Now, with jamal defined, I only see e1000_prune_tx_ring beeing called if
 fdesc  tx_ring-prunet or fdesc  tx_ring-waket. 

Ok, thats the code that has been commented out, no? i.e there is no
fdesc otherwise.

 In other words, the freing
 of skbs is dependant on external events that might not become true if the
 host is quiet. Skb's could end up sitting on the ring indefinitely.
 

Yes, this has _always_ been true. In the patch i posted it merely
converted things, example:

-#define E1000_TX_WEIGHT 64
-   /* weight of a sort for tx, to avoid endless transmit
cleanup */
-   if (count++ == E1000_TX_WEIGHT) break;
+   /* avoid endless transmit cleanup */
+   if (count++ == tx_ring-prunet) break;

As you can see E1000_TX_WEIGHT threshold exists today and you are right
if no TX interupts, packet arrivals or scheduled wakes happen the that
descriptor that was not pruned will sit there forever (which is a bad
thing for TCP). Are we in sync?
If yes, what is the likelihood they will sit there forever? I think
perhaps some TX interupts will happen, no?

cheers,
jamal


-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] [e1000]: Remove unnecessary tx_lock

2006-08-07 Thread Edgar E. Iglesias
On Mon, Aug 07, 2006 at 03:03:33PM -0400, jamal wrote:
 On Mon, 2006-07-08 at 20:47 +0200, Edgar E. Iglesias wrote:
 
  I think we are out of sync :) 
 
 Imagine that, eh? ;-
 
  My, fault I haven't been clear enough.
  
 
 Not just your transmit but also my receive is at fault ;- (aka, I may
 not be listening as well as i should). Now two machines or CPUs you
 would think wont have this problem since they dont possess minds;-
 
  First of all, I don't think the patch with jamal undefined has any 
  problems. I 
  assumed wrongly from the start that you somehow wanted that part to go in 
  aswell, sorry about that. As you say, the flow goes just as before.
  
  Now, with jamal defined, I only see e1000_prune_tx_ring beeing called if
  fdesc  tx_ring-prunet or fdesc  tx_ring-waket. 
 
 Ok, thats the code that has been commented out, no? i.e there is no
 fdesc otherwise.

Exactly.

 
  In other words, the freing
  of skbs is dependant on external events that might not become true if the
  host is quiet. Skb's could end up sitting on the ring indefinitely.
  
 
 Yes, this has _always_ been true. In the patch i posted it merely
 converted things, example:
 
 -#define E1000_TX_WEIGHT 64
 -   /* weight of a sort for tx, to avoid endless transmit
 cleanup */
 -   if (count++ == E1000_TX_WEIGHT) break;
 +   /* avoid endless transmit cleanup */
 +   if (count++ == tx_ring-prunet) break;
 
 As you can see E1000_TX_WEIGHT threshold exists today and you are right
 if no TX interupts, packet arrivals or scheduled wakes happen the that
 descriptor that was not pruned will sit there forever (which is a bad
 thing for TCP). Are we in sync?

Yep :)

 If yes, what is the likelihood they will sit there forever? I think
 perhaps some TX interupts will happen, no?

with jamal undefined, absolutely. With jamal defined, TX interrupts will come
but I couldnt find a way into e1000_prune_tx_ring unless fdesc met the 
conditions. Correct? 

Best regards
-- 
Programmer
Edgar E. Iglesias [EMAIL PROTECTED] 46.46.272.1946
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] [e1000]: Remove unnecessary tx_lock

2006-08-07 Thread jamal
On Mon, 2006-07-08 at 21:14 +0200, Edgar E. Iglesias wrote:

  If yes, what is the likelihood they will sit there forever? I think
  perhaps some TX interupts will happen, no?
 
 with jamal undefined, absolutely. With jamal defined, TX interrupts will come
 but I couldnt find a way into e1000_prune_tx_ring unless fdesc met the 
 conditions. Correct? 
 

Forgive me since i am still missing something ..

Observe that the same threshold used in two different ways:

1) in tx path tx_ring-prunet is to check on when we should _start_ to
prune.
2) on rx path tx_ring-prunet is to check when to _stop_ pruning.

i.e #1 is a preemptive action.

You seem to suggest doing it the way i was it made things worse?

Note that TX interrupts will happen as long as you dont prune some
descriptors (I am assuming this, I havent checked the settings).

cheers,
jamal


-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH]: e1000: Janitor: Use #defined values for literals

2006-08-07 Thread Linas Vepstas

Resending patch from 23 June 2006; there was some confusion about
whether a similar patch had already been applied; seems it wasn't.

Minor janitorial patch: use #defines for literal values.

Signed-off-by: Linas Vepstas [EMAIL PROTECTED]


 drivers/net/e1000/e1000_main.c |4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

Index: linux-2.6.18-rc3-mm2/drivers/net/e1000/e1000_main.c
===
--- linux-2.6.18-rc3-mm2.orig/drivers/net/e1000/e1000_main.c2006-08-07 
14:39:37.0 -0500
+++ linux-2.6.18-rc3-mm2/drivers/net/e1000/e1000_main.c 2006-08-07 
15:06:31.0 -0500
@@ -4955,8 +4955,8 @@ static pci_ers_result_t e1000_io_slot_re
}
pci_set_master(pdev);
 
-   pci_enable_wake(pdev, 3, 0);
-   pci_enable_wake(pdev, 4, 0); /* 4 == D3 cold */
+   pci_enable_wake(pdev, PCI_D3hot, 0);
+   pci_enable_wake(pdev, PCI_D3cold, 0);
 
/* Perform card reset only on one instance of the card */
if (PCI_FUNC (pdev-devfn) != 0)
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH]: e1000: Janitor: Use #defined values for literals

2006-08-07 Thread Auke Kok

Linas Vepstas wrote:

Resending patch from 23 June 2006; there was some confusion about
whether a similar patch had already been applied; seems it wasn't.

Minor janitorial patch: use #defines for literal values.

Signed-off-by: Linas Vepstas [EMAIL PROTECTED]


Acked-by: Auke Kok [EMAIL PROTECTED]




 drivers/net/e1000/e1000_main.c |4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

Index: linux-2.6.18-rc3-mm2/drivers/net/e1000/e1000_main.c
===
--- linux-2.6.18-rc3-mm2.orig/drivers/net/e1000/e1000_main.c2006-08-07 
14:39:37.0 -0500
+++ linux-2.6.18-rc3-mm2/drivers/net/e1000/e1000_main.c 2006-08-07 
15:06:31.0 -0500
@@ -4955,8 +4955,8 @@ static pci_ers_result_t e1000_io_slot_re
}
pci_set_master(pdev);
 
-	pci_enable_wake(pdev, 3, 0);

-   pci_enable_wake(pdev, 4, 0); /* 4 == D3 cold */
+   pci_enable_wake(pdev, PCI_D3hot, 0);
+   pci_enable_wake(pdev, PCI_D3cold, 0);
 
 	/* Perform card reset only on one instance of the card */

if (PCI_FUNC (pdev-devfn) != 0)
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH]: e1000: Janitor: Use #defined values for literals

2006-08-07 Thread Jeff Garzik

hopefully queued-by, as well...

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] [e1000]: Remove unnecessary tx_lock

2006-08-07 Thread Edgar E. Iglesias
On Mon, Aug 07, 2006 at 03:34:30PM -0400, jamal wrote:
 On Mon, 2006-07-08 at 21:14 +0200, Edgar E. Iglesias wrote:
 
   If yes, what is the likelihood they will sit there forever? I think
   perhaps some TX interupts will happen, no?
  
  with jamal undefined, absolutely. With jamal defined, TX interrupts will 
  come
  but I couldnt find a way into e1000_prune_tx_ring unless fdesc met the 
  conditions. Correct? 
  
 
 Forgive me since i am still missing something ..
 
 Observe that the same threshold used in two different ways:
 
 1) in tx path tx_ring-prunet is to check on when we should _start_ to
 prune.
 2) on rx path tx_ring-prunet is to check when to _stop_ pruning.
 

I can see two calls to e1000_prune_tx_ring with jamal _defined_.

1. tx path
+#ifdef jamal
+   {
+   int fdesc = E1000_DESC_UNUSED(tx_ring);
+   if (unlikely(fdesc  tx_ring-waket))
+   e1000_prune_tx_ring(adapter,tx_ring);
+   }
+#endif

2. tx and rx path
+#ifdef jamal
+   spin_lock(tx_ring-tx_lock);
+   {
+   int fdesc = E1000_DESC_UNUSED(tx_ring);
+   if (fdesc  tx_ring-prunet) {
+   if (e1000_prune_tx_ring(adapter,tx_ring))
+   cleaned = TRUE;
}
}
+   spin_unlock(tx_ring-tx_lock);
+#else
+   if (e1000_prune_tx_ring(adapter,tx_ring))
+   cleaned = TRUE;
+#endif

Assume a ring of 64 entries, prunet of 16, waket of 8. Now host sends 40 skbs
and stops. tx-ring holds 40 skbs, has 24 free. TX interrupts hit you, you may
even be receiveing packets but I don't see how you enter prune_tx_ring without
more packets going out via hard_start_xmit? skb's will sit on the ring until 
more packets are sent from the quiet host.

As you can see, with jamal _undefined_ e1000_prune_tx_ring is called
unconditionally and I beleive things will work ok.

I am not familiar with this code nor the hw so I'm probably missing something
fundamental. 

Best regards
-- 
Programmer
Edgar E. Iglesias [EMAIL PROTECTED] 46.46.272.1946
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Excess use of packed attribute

2006-08-07 Thread Stephen Hemminger
After reading:
http://bugzilla.kernel.org/show_bug.cgi?id=6693

I noticed there were stupid uses of packed attribute in several network headers.

Silly offenders:include/net/ipx.h
include/net/ieee80211.h
include/net/ip6_tunnel.h
include/net/ndisc.h
include/linux/if_ether.h
include/linux/if_fddi.h

include/linux/sctp.h -- really bad

-- 
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 0/9] tulip patches from parisc-linux

2006-08-07 Thread Kyle McMartin
[Val asked that I split this out, instead of just publishing a git tree,
so here goes nothing. I don't think I've ever used git-send-email before,
but hopefully I won't screw up too badly.]

The following patch series brings the mainline tulip driver in synch
with the modifications made in parisc-linux. Most of these patches
have been in parisc-linux cvs for the better part of several years,
so they are quite well tested.

It contains the following changes,

 21142.c   |4 +-
 de2104x.c |6 +--
 interrupt.c   |4 ++
 media.c   |   40 +-
 timer.c   |   14 ++-
 tulip.h   |   43 ++--
 tulip_core.c  |  102 --
 winbond-840.c |   68 +-
 8 files changed, 164 insertions(+), 117 deletions(-)

Francois Romieu:
  [TULIP] Defer tulip_select_media() to process context

Grant Grundler:
  [TULIP] Fix PHY init and reset
  [TULIP] Print physical address in tulip_init_one
  [TULIP] Make tulip_stop_rxtx() failure case slightly more informative
  [TULIP] Clean tulip.h so it can be used by winbond-840.c
  [TULIP] Flush MMIO writes in reset sequence
  [TULIP] Fix IRQ/DMA race

Helge Deller:
  [TULIP] Fix section mismatch in de2104x.c

Thibaut Varene:
  [TULIP] Make DS21143 printout match lspci output

Cheers!
Kyle M.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Reboot-Shutdown signal through net device.

2006-08-07 Thread moto
Within our PCI Network Device there is a special feature, that allows
out-of-band signaling-messeging (by using special interrupt and shared
registers) to control system state: Reboot, Shutdown.
What would be an acceptable method of executing such controls?
So, the sequence is: 
 1. Device Driver receives Reboot-or-Shutdown message.
 2. Device Driver initiates Reboot-or-Shutdown sequence (how?).
I have cosidere usermode helper, looks like this solution is favored...
Any recommendations?

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] ipx: header length validation needed

2006-08-07 Thread Stephen Hemminger
IPX is not checking for non-linear (and short packets) in it's receive routine.
This is serious because it may mean it ends up reading past end of skb.

This maybe related to this bug, because sky2 will copy small packets into small
skb's.

http://bugzilla.kernel.org/show_bug.cgi?id=6693

Signed-off-by: Stephen Hemminger [EMAIL PROTECTED]


--- a/net/ipx/af_ipx.c  2006-07-07 13:02:31.0 -0700
+++ b/net/ipx/af_ipx.c  2006-08-07 13:18:08.0 -0700
@@ -1642,6 +1642,9 @@
if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
goto out;
 
+   if (!pskb_may_pull(skb, sizeof(struct ipxhdr)))
+   goto drop;
+
ipx = ipx_hdr(skb);
ipx_pktsize = ntohs(ipx-ipx_pktsize);

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/9] [TULIP] Fix PHY init and reset

2006-08-07 Thread Kyle McMartin
From: Grant Grundler [EMAIL PROTECTED]

A whole slew of fixes for tulip_select_media for:
 - Flush posted MMIO writes as per PCI spec
 - Polling the reset bit (bit 15) is required to determine when
   the init sequence can be sent.

This fixes tulip on HP PA-RISC systems, which use DP83840A and
LXT971D PHYs. Tested for several years on a variety of HP PA-RISC
systems.

[Initial work done by Grant Grundler, DS21142 support added by
Thibaut Varene.]

Signed-off-by: Grant Grundler [EMAIL PROTECTED]
Signed-off-by: Thibaut Varene [EMAIL PROTECTED]
Signed-off-by: Kyle McMartin [EMAIL PROTECTED]
---
 drivers/net/tulip/media.c |   40 +---
 1 files changed, 37 insertions(+), 3 deletions(-)

diff --git a/drivers/net/tulip/media.c b/drivers/net/tulip/media.c
index e9bc2a9..5093d87 100644
--- a/drivers/net/tulip/media.c
+++ b/drivers/net/tulip/media.c
@@ -44,8 +44,10 @@ static const unsigned char comet_miireg2
 
 /* MII transceiver control section.
Read and write the MII registers using software-generated serial
-   MDIO protocol.  See the MII specifications or DP83840A data sheet
-   for details. */
+   MDIO protocol.
+   See IEEE 802.3-2002.pdf (Section 2, Chapter 22.2.4 Management functions)
+   or DP83840A data sheet for more details.
+   */
 
 int tulip_mdio_read(struct net_device *dev, int phy_id, int location)
 {
@@ -261,24 +263,56 @@ void tulip_select_media(struct net_devic
u16 *reset_sequence = 
((u16*)(p+3))[init_length];
int reset_length = p[2 + init_length*2];
misc_info = reset_sequence + reset_length;
-   if (startup)
+   if (startup) {
+   int timeout = 10;
for (i = 0; i  reset_length; i++)

iowrite32(get_u16(reset_sequence[i])  16, ioaddr + CSR15);
+
+   /* flush posted writes */
+   ioread32(ioaddr + CSR15);
+
+   /* Sect 3.10.3 in DP83840A.pdf (p39) */
+   udelay(500);
+
+   /* Section 4.2 in DP83840A.pdf (p43) */
+   /* and IEEE 802.3 22.2.4.1.1 Reset */
+   while (timeout-- 
+   (tulip_mdio_read (dev, phy_num, 
MII_BMCR)  BMCR_RESET))
+   udelay(100);
+   }
for (i = 0; i  init_length; i++)
iowrite32(get_u16(init_sequence[i])  
16, ioaddr + CSR15);
+
+   ioread32(ioaddr + CSR15);   /* flush posted 
writes */
} else {
u8 *init_sequence = p + 2;
u8 *reset_sequence = p + 3 + init_length;
int reset_length = p[2 + init_length];
misc_info = (u16*)(reset_sequence + 
reset_length);
if (startup) {
+   int timeout = 10;
iowrite32(mtable-csr12dir | 0x100, 
ioaddr + CSR12);
for (i = 0; i  reset_length; i++)
iowrite32(reset_sequence[i], 
ioaddr + CSR12);
+
+   /* flush posted writes */
+   ioread32(ioaddr + CSR12);
+
+   /* Sect 3.10.3 in DP83840A.pdf (p39) */
+   udelay(500);
+
+   /* Section 4.2 in DP83840A.pdf (p43) */
+   /* and IEEE 802.3 22.2.4.1.1 Reset */
+   while (timeout-- 
+   (tulip_mdio_read (dev, phy_num, 
MII_BMCR)  BMCR_RESET))
+   udelay(100);
}
for (i = 0; i  init_length; i++)
iowrite32(init_sequence[i], ioaddr + 
CSR12);
+
+   ioread32(ioaddr + CSR12);   /* flush posted 
writes */
}
+
tmp_info = get_u16(misc_info[1]);
if (tmp_info)
tp-advertising[phy_num] = tmp_info | 1;
-- 
1.4.1.1

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 7/9] [TULIP] Defer tulip_select_media() to process context

2006-08-07 Thread Kyle McMartin
From: Francois Romieu [EMAIL PROTECTED]

Move tulip_select_media() processing to a workqueue, instead of delaying
in interrupt context.

[Edit by Kyle to use kevent thread, instead of creating its own
 workqueue.]

Signed-off-by: Kyle McMartin [EMAIL PROTECTED]
---
 drivers/net/tulip/21142.c  |4 +--
 drivers/net/tulip/timer.c  |   14 -
 drivers/net/tulip/tulip.h  |   19 ++--
 drivers/net/tulip/tulip_core.c |   64 +++-
 4 files changed, 60 insertions(+), 41 deletions(-)

diff --git a/drivers/net/tulip/21142.c b/drivers/net/tulip/21142.c
index 683f14b..ffba0c1 100644
--- a/drivers/net/tulip/21142.c
+++ b/drivers/net/tulip/21142.c
@@ -26,9 +26,9 @@ static u16 t21142_csr15[] = { 0x0008, 0x
 
 /* Handle the 21143 uniquely: do autoselect with NWay, not the EEPROM list
of available transceivers.  */
-void t21142_timer(unsigned long data)
+void t21142_media_task(void *data)
 {
-   struct net_device *dev = (struct net_device *)data;
+   struct net_device *dev = data;
struct tulip_private *tp = netdev_priv(dev);
void __iomem *ioaddr = tp-base_addr;
int csr12 = ioread32(ioaddr + CSR12);
diff --git a/drivers/net/tulip/timer.c b/drivers/net/tulip/timer.c
index e058a9f..272ef62 100644
--- a/drivers/net/tulip/timer.c
+++ b/drivers/net/tulip/timer.c
@@ -18,13 +18,14 @@ #include linux/pci.h
 #include tulip.h
 
 
-void tulip_timer(unsigned long data)
+void tulip_media_task(void *data)
 {
-   struct net_device *dev = (struct net_device *)data;
+   struct net_device *dev = data;
struct tulip_private *tp = netdev_priv(dev);
void __iomem *ioaddr = tp-base_addr;
u32 csr12 = ioread32(ioaddr + CSR12);
int next_tick = 2*HZ;
+   unsigned long flags;
 
if (tulip_debug  2) {
printk(KERN_DEBUG %s: Media selection tick, %s, status %8.8x 
mode
@@ -126,6 +127,15 @@ void tulip_timer(unsigned long data)
}
break;
}
+
+
+   spin_lock_irqsave(tp-lock, flags);
+   if (tp-timeout_recovery) {
+   tulip_tx_timeout_complete(tp, ioaddr);
+   tp-timeout_recovery = 0;
+   }
+   spin_unlock_irqrestore(tp-lock, flags);
+
/* mod_timer synchronizes us with potential add_timer calls
 * from interrupts.
 */
diff --git a/drivers/net/tulip/tulip.h b/drivers/net/tulip/tulip.h
index 951af5e..408fe46 100644
--- a/drivers/net/tulip/tulip.h
+++ b/drivers/net/tulip/tulip.h
@@ -43,7 +43,8 @@ struct tulip_chip_table {
int io_size;
int valid_intrs;/* CSR7 interrupt enable settings */
int flags;
-   void (*media_timer) (unsigned long data);
+   void (*media_timer) (unsigned long);
+   void (*media_task) (void *);
 };
 
 
@@ -371,6 +372,7 @@ #endif
unsigned int medialock:1;   /* Don't sense media type. */
unsigned int mediasense:1;  /* Media sensing in progress. */
unsigned int nway:1, nwayset:1; /* 21143 internal NWay. */
+   unsigned int timeout_recovery:1;
unsigned int csr0;  /* CSR0 setting. */
unsigned int csr6;  /* Current CSR6 control settings. */
unsigned char eeprom[EEPROM_SIZE];  /* Serial EEPROM contents. */
@@ -389,6 +391,7 @@ #endif
void __iomem *base_addr;
int csr12_shadow;
int pad0;   /* Used for 8-byte alignment */
+   struct work_struct media_work;
 };
 
 
@@ -403,7 +406,7 @@ struct eeprom_fixup {
 
 /* 21142.c */
 extern u16 t21142_csr14[];
-void t21142_timer(unsigned long data);
+void t21142_media_task(void *data);
 void t21142_start_nway(struct net_device *dev);
 void t21142_lnk_change(struct net_device *dev, int csr5);
 
@@ -441,7 +444,7 @@ void pnic_lnk_change(struct net_device *
 void pnic_timer(unsigned long data);
 
 /* timer.c */
-void tulip_timer(unsigned long data);
+void tulip_media_task(void *data);
 void mxic_timer(unsigned long data);
 void comet_timer(unsigned long data);
 
@@ -493,4 +496,14 @@ static inline void tulip_restart_rxtx(st
tulip_start_rxtx(tp);
 }
 
+static inline void tulip_tx_timeout_complete(struct tulip_private *tp, void 
__iomem *ioaddr)
+{
+   /* Stop and restart the chip's Tx processes. */
+   tulip_restart_rxtx(tp);
+   /* Trigger an immediate transmit demand. */
+   iowrite32(0, ioaddr + CSR1);
+
+   tp-stats.tx_errors++;
+}
+
 #endif /* __NET_TULIP_H__ */
diff --git a/drivers/net/tulip/tulip_core.c b/drivers/net/tulip/tulip_core.c
index 363e5f6..bdb6698 100644
--- a/drivers/net/tulip/tulip_core.c
+++ b/drivers/net/tulip/tulip_core.c
@@ -130,7 +130,14 @@ #else
 int tulip_debug = 1;
 #endif
 
+static void tulip_timer(unsigned long data)
+{
+   struct net_device *dev = (struct net_device *)data;
+   struct tulip_private *tp = netdev_priv(dev);
 
+   if (netif_running(dev))
+   schedule_work(tp-media_work);
+}
 
 /*
  * This table use during operation 

[PATCH 9/9] [TULIP] Fix section mismatch in de2104x.c

2006-08-07 Thread Kyle McMartin
From: Helge Deller [EMAIL PROTECTED]

WARNING: drivers/net/tulip/de2104x.o - Section mismatch: reference to 
.init.text:de_init_one from .data.rel.local after 'de_driver' (at offset 0x20)
WARNING: drivers/net/tulip/de2104x.o - Section mismatch: reference to 
.exit.text:de_remove_one from .data.rel.local after 'de_driver' (at offset 0x28)

Signed-off-by: Helge Deller [EMAIL PROTECTED]
Signed-off-by: Kyle McMartin [EMAIL PROTECTED]
---
 drivers/net/tulip/de2104x.c |6 +++---
 1 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/tulip/de2104x.c b/drivers/net/tulip/de2104x.c
index d05c5aa..150a05a 100644
--- a/drivers/net/tulip/de2104x.c
+++ b/drivers/net/tulip/de2104x.c
@@ -1730,7 +1730,7 @@ static void __init de21040_get_media_inf
 }
 
 /* Note: this routine returns extra data bits for size detection. */
-static unsigned __init tulip_read_eeprom(void __iomem *regs, int location, int 
addr_len)
+static unsigned __devinit tulip_read_eeprom(void __iomem *regs, int location, 
int addr_len)
 {
int i;
unsigned retval = 0;
@@ -1926,7 +1926,7 @@ bad_srom:
goto fill_defaults;
 }
 
-static int __init de_init_one (struct pci_dev *pdev,
+static int __devinit de_init_one (struct pci_dev *pdev,
  const struct pci_device_id *ent)
 {
struct net_device *dev;
@@ -2082,7 +2082,7 @@ err_out_free:
return rc;
 }
 
-static void __exit de_remove_one (struct pci_dev *pdev)
+static void __devexit de_remove_one (struct pci_dev *pdev)
 {
struct net_device *dev = pci_get_drvdata(pdev);
struct de_private *de = dev-priv;
-- 
1.4.1.1

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 3/9] [TULIP] Make tulip_stop_rxtx() failure case slightly more informative

2006-08-07 Thread Kyle McMartin
From: Grant Grundler [EMAIL PROTECTED]

Signed-off-by: Grant Grundler [EMAIL PROTECTED]
Signed-off-by: Kyle McMartin [EMAIL PROTECTED]
---
 drivers/net/tulip/tulip.h |7 +--
 1 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/net/tulip/tulip.h b/drivers/net/tulip/tulip.h
index 3bcfbf3..d79c7ae 100644
--- a/drivers/net/tulip/tulip.h
+++ b/drivers/net/tulip/tulip.h
@@ -473,8 +473,11 @@ static inline void tulip_stop_rxtx(struc
udelay(10);
 
if (!i)
-   printk(KERN_DEBUG %s: tulip_stop_rxtx() failed\n,
-   pci_name(tp-pdev));
+   printk(KERN_DEBUG %s: tulip_stop_rxtx() failed
+(CSR5 0x%x CSR6 0x%x)\n,
+   pci_name(tp-pdev),
+   ioread32(ioaddr + CSR5),
+   ioread32(ioaddr + CSR6));
}
 }
 
-- 
1.4.1.1

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 5/9] [TULIP] Flush MMIO writes in reset sequence

2006-08-07 Thread Kyle McMartin
From: Grant Grundler [EMAIL PROTECTED]

The obvious safe registers to read is one from PCI config space.

Signed-off-by: Grant Grundler [EMAIL PROTECTED]
Signed-off-by: Kyle McMartin [EMAIL PROTECTED]
---
 drivers/net/tulip/tulip_core.c |2 ++
 1 files changed, 2 insertions(+), 0 deletions(-)

diff --git a/drivers/net/tulip/tulip_core.c b/drivers/net/tulip/tulip_core.c
index 6b54572..81905f4 100644
--- a/drivers/net/tulip/tulip_core.c
+++ b/drivers/net/tulip/tulip_core.c
@@ -295,12 +295,14 @@ static void tulip_up(struct net_device *
 
/* Reset the chip, holding bit 0 set at least 50 PCI cycles. */
iowrite32(0x0001, ioaddr + CSR0);
+   pci_read_config_dword(tp-pdev, PCI_COMMAND, i);  /* flush write */
udelay(100);
 
/* Deassert reset.
   Wait the specified 50 PCI cycles after a reset by initializing
   Tx and Rx queues and the address filter list. */
iowrite32(tp-csr0, ioaddr + CSR0);
+   pci_read_config_dword(tp-pdev, PCI_COMMAND, i);  /* flush write */
udelay(100);
 
if (tulip_debug  1)
-- 
1.4.1.1

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 6/9] [TULIP] Fix IRQ/DMA race

2006-08-07 Thread Kyle McMartin
From: Grant Grundler [EMAIL PROTECTED]

IRQs are racing with tulip_down().
DMA can be restarted by tulip_interrupt() _after_ we call
tulip_stop_rxtx() and the DMA buffers are unmapped.  The result
is an MCA (hard crash on ia64) because of an IO TLB miss.

Signed-off-by: Grant Grundler [EMAIL PROTECTED]
Signed-off-by: Kyle McMartin [EMAIL PROTECTED]
---
 drivers/net/tulip/interrupt.c  |4 
 drivers/net/tulip/tulip_core.c |   17 +++--
 2 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/drivers/net/tulip/interrupt.c b/drivers/net/tulip/interrupt.c
index 99ccf2e..19faa0e 100644
--- a/drivers/net/tulip/interrupt.c
+++ b/drivers/net/tulip/interrupt.c
@@ -87,6 +87,10 @@ int tulip_refill_rx(struct net_device *d
}
tp-rx_ring[entry].status = cpu_to_le32(DescOwned);
}
+
+/* FIXME: restarting DMA breaks tulip_down() code path.
+   tulip_down() will unmap the RX and TX descriptors.
+ */
if(tp-chip_id == LC82C168) {
if(((ioread32(tp-base_addr + CSR5)17)0x07) == 4) {
/* Rx stopped due to out of buffers,
diff --git a/drivers/net/tulip/tulip_core.c b/drivers/net/tulip/tulip_core.c
index 81905f4..363e5f6 100644
--- a/drivers/net/tulip/tulip_core.c
+++ b/drivers/net/tulip/tulip_core.c
@@ -742,21 +742,20 @@ #endif
 
/* Disable interrupts by clearing the interrupt mask. */
iowrite32 (0x, ioaddr + CSR7);
+   ioread32 (ioaddr + CSR7);   /* flush posted write */
 
-   /* Stop the Tx and Rx processes. */
-   tulip_stop_rxtx(tp);
+   spin_unlock_irqrestore (tp-lock, flags);
 
-   /* prepare receive buffers */
-   tulip_refill_rx(dev);
+   free_irq (dev-irq, dev);   /* no more races after this */
+   tulip_stop_rxtx(tp);/* Stop DMA */
 
-   /* release any unconsumed transmit buffers */
-   tulip_clean_tx_ring(tp);
+   /* Put driver back into the state we start with */
+   tulip_refill_rx(dev);   /* prepare RX buffers */
+   tulip_clean_tx_ring(tp);/* clean up unsent TX buffers */
 
if (ioread32 (ioaddr + CSR6) != 0x)
tp-stats.rx_missed_errors += ioread32 (ioaddr + CSR8)  0x;
 
-   spin_unlock_irqrestore (tp-lock, flags);
-
init_timer(tp-timer);
tp-timer.data = (unsigned long)dev;
tp-timer.function = tulip_tbl[tp-chip_id].media_timer;
@@ -782,7 +781,6 @@ static int tulip_close (struct net_devic
printk (KERN_DEBUG %s: Shutting down ethercard, status was 
%2.2x.\n,
dev-name, ioread32 (ioaddr + CSR5));
 
-   free_irq (dev-irq, dev);
 
/* Free all the skbuffs in the Rx queue. */
for (i = 0; i  RX_RING_SIZE; i++) {
@@ -1752,7 +1750,6 @@ static int tulip_suspend (struct pci_dev
tulip_down(dev);
 
netif_device_detach(dev);
-   free_irq(dev-irq, dev);
 
pci_save_state(pdev);
pci_disable_device(pdev);
-- 
1.4.1.1

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 8/9] [TULIP] Make DS21143 printout match lspci output

2006-08-07 Thread Kyle McMartin
From: Thibaut Varene [EMAIL PROTECTED]

Signed-off-by: Thibaut Varene [EMAIL PROTECTED]
Signed-off-by: Kyle McMartin [EMAIL PROTECTED]
---
 drivers/net/tulip/tulip_core.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/drivers/net/tulip/tulip_core.c b/drivers/net/tulip/tulip_core.c
index bdb6698..21eaeb2 100644
--- a/drivers/net/tulip/tulip_core.c
+++ b/drivers/net/tulip/tulip_core.c
@@ -155,7 +155,7 @@ struct tulip_chip_table tulip_tbl[] = {
tulip_media_task },
 
   /* DC21142, DC21143 */
-  { Digital DS21143 Tulip, 128, 0x0801fbff,
+  { Digital DS21142/43 Tulip, 128, 0x0801fbff,
HAS_MII | HAS_MEDIA_TABLE | ALWAYS_CHECK_MII | HAS_ACPI | HAS_NWAY
| HAS_INTR_MITIGATION | HAS_PCI_MWI, tulip_timer, t21142_media_task },
 
-- 
1.4.1.1

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 4/9] [TULIP] Clean tulip.h so it can be used by winbond-840.c

2006-08-07 Thread Kyle McMartin
From: Grant Grundler [EMAIL PROTECTED]

Include tulip.h in winbond-840.c and clean up lots of redundant
definitions.

Signed-off-by: Grant Grundler [EMAIL PROTECTED]
Signed-off-by: Kyle McMartin [EMAIL PROTECTED]
---
 drivers/net/tulip/tulip.h   |   17 ++
 drivers/net/tulip/tulip_core.c  |7 +---
 drivers/net/tulip/winbond-840.c |   68 ++-
 3 files changed, 37 insertions(+), 55 deletions(-)

diff --git a/drivers/net/tulip/tulip.h b/drivers/net/tulip/tulip.h
index d79c7ae..951af5e 100644
--- a/drivers/net/tulip/tulip.h
+++ b/drivers/net/tulip/tulip.h
@@ -30,11 +30,10 @@ #include asm/irq.h
 /* undefine, or define to various debugging levels (4 == obscene levels) */
 #define TULIP_DEBUG 1
 
-/* undefine USE_IO_OPS for MMIO, define for PIO */
 #ifdef CONFIG_TULIP_MMIO
-# undef USE_IO_OPS
+#define TULIP_BAR  1   /* CBMA */
 #else
-# define USE_IO_OPS 1
+#define TULIP_BAR  0   /* CBIO */
 #endif
 
 
@@ -142,6 +141,7 @@ enum status_bits {
RxNoBuf = 0x80,
RxIntr = 0x40,
TxFIFOUnderflow = 0x20,
+   RxErrIntr = 0x10,
TxJabber = 0x08,
TxNoBuf = 0x04,
TxDied = 0x02,
@@ -192,9 +192,14 @@ struct tulip_tx_desc {
 
 
 enum desc_status_bits {
-   DescOwned = 0x8000,
-   RxDescFatalErr = 0x8000,
-   RxWholePkt = 0x0300,
+   DescOwned= 0x8000,
+   DescWholePkt = 0x6000,
+   DescEndPkt   = 0x4000,
+   DescStartPkt = 0x2000,
+   DescEndRing  = 0x0200,
+   DescUseLink  = 0x0100,
+   RxDescFatalErr = 0x008000,
+   RxWholePkt   = 0x0300,
 };
 
 
diff --git a/drivers/net/tulip/tulip_core.c b/drivers/net/tulip/tulip_core.c
index bf93679..6b54572 100644
--- a/drivers/net/tulip/tulip_core.c
+++ b/drivers/net/tulip/tulip_core.c
@@ -1361,11 +1361,8 @@ #endif
if (pci_request_regions (pdev, tulip))
goto err_out_free_netdev;
 
-#ifndef USE_IO_OPS
-   ioaddr =  pci_iomap(pdev, 1, tulip_tbl[chip_idx].io_size);
-#else
-   ioaddr =  pci_iomap(pdev, 0, tulip_tbl[chip_idx].io_size);
-#endif
+   ioaddr =  pci_iomap(pdev, TULIP_BAR, tulip_tbl[chip_idx].io_size);
+
if (!ioaddr)
goto err_out_free_res;
 
diff --git a/drivers/net/tulip/winbond-840.c b/drivers/net/tulip/winbond-840.c
index 7f41481..fa3a7b3 100644
--- a/drivers/net/tulip/winbond-840.c
+++ b/drivers/net/tulip/winbond-840.c
@@ -90,10 +90,8 @@ static int full_duplex[MAX_UNITS] = {-1,
Making the Tx ring too large decreases the effectiveness of channel
bonding and packet priority.
There are no ill effects from too-large receive rings. */
-#define TX_RING_SIZE   16
 #define TX_QUEUE_LEN   10  /* Limit ring entries actually used.  */
 #define TX_QUEUE_LEN_RESTART   5
-#define RX_RING_SIZE   32
 
 #define TX_BUFLIMIT(1024-128)
 
@@ -137,6 +135,8 @@ #include asm/processor.h  /* Processor
 #include asm/io.h
 #include asm/irq.h
 
+#include tulip.h
+
 /* These identify the driver base version and may not be removed. */
 static char version[] __devinitdata =
 KERN_INFO DRV_NAME .c:v DRV_VERSION  (2.4 port)  DRV_RELDATE   Donald 
Becker [EMAIL PROTECTED]\n
@@ -242,8 +242,8 @@ static const struct pci_id_info pci_id_t
 };
 
 /* This driver was written to use PCI memory space, however some x86 systems
-   work only with I/O space accesses.  Pass -DUSE_IO_OPS to use PCI I/O space
-   accesses instead of memory space. */
+   work only with I/O space accesses. See CONFIG_TULIP_MMIO in .config
+*/
 
 /* Offsets to the Command and Status Registers, CSRs.
While similar to the Tulip, these registers are longword aligned.
@@ -261,21 +261,11 @@ enum w840_offsets {
CurTxDescAddr=0x4C, CurTxBufAddr=0x50,
 };
 
-/* Bits in the interrupt status/enable registers. */
-/* The bits in the Intr Status/Enable registers, mostly interrupt sources. */
-enum intr_status_bits {
-   NormalIntr=0x1, AbnormalIntr=0x8000,
-   IntrPCIErr=0x2000, TimerInt=0x800,
-   IntrRxDied=0x100, RxNoBuf=0x80, IntrRxDone=0x40,
-   TxFIFOUnderflow=0x20, RxErrIntr=0x10,
-   TxIdle=0x04, IntrTxStopped=0x02, IntrTxDone=0x01,
-};
-
 /* Bits in the NetworkConfig register. */
 enum rx_mode_bits {
-   AcceptErr=0x80, AcceptRunt=0x40,
-   AcceptBroadcast=0x20, AcceptMulticast=0x10,
-   AcceptAllPhys=0x08, AcceptMyPhys=0x02,
+   AcceptErr=0x80,
+   RxAcceptBroadcast=0x20, AcceptMulticast=0x10,
+   RxAcceptAllPhys=0x08, AcceptMyPhys=0x02,
 };
 
 enum mii_reg_bits {
@@ -297,13 +287,6 @@ struct w840_tx_desc {
u32 buffer1, buffer2;
 };
 
-/* Bits in network_desc.status */
-enum desc_status_bits {
-   DescOwn=0x8000, DescEndRing=0x0200, DescUseLink=0x0100,
-   DescWholePkt=0x6000, DescStartPkt=0x2000, DescEndPkt=0x4000,
-   DescIntr=0x8000,
-};
-
 #define MII_CNT1 /* winbond only supports one MII */
 struct netdev_private {
struct 

Re: [PATCH] [e1000]: Remove unnecessary tx_lock

2006-08-07 Thread Brandeburg, Jesse
On Mon, 7 Aug 2006, jamal wrote:
 -#define E1000_TX_WEIGHT 64
 -   /* weight of a sort for tx, to avoid endless transmit
 cleanup */
 -   if (count++ == E1000_TX_WEIGHT) break;
 +   /* avoid endless transmit cleanup */
 +   if (count++ == tx_ring-prunet) break;
 
 As you can see E1000_TX_WEIGHT threshold exists today and you are right
 if no TX interupts, packet arrivals or scheduled wakes happen the that
 descriptor that was not pruned will sit there forever (which is a bad
 thing for TCP). Are we in sync?
 If yes, what is the likelihood they will sit there forever? I think
 perhaps some TX interupts will happen, no?

we don't enable it right now, but you could use the TXQE (tx queue empty) 
interrupt to avoid the starvation case.  I think it might flood you with 
TXQE interrupts however, so we'd probably have to figure out some way to 
turn it on occasionally.

Jesse
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/9] [TULIP] Print physical address in tulip_init_one

2006-08-07 Thread Kyle McMartin
From: Grant Grundler [EMAIL PROTECTED]

As the cookie returned by pci_iomap() is fairly useless...

Signed-off-by: Grant Grundler [EMAIL PROTECTED]
Signed-off-by: Kyle McMartin [EMAIL PROTECTED]
---
 drivers/net/tulip/tulip_core.c |   10 --
 1 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/net/tulip/tulip_core.c b/drivers/net/tulip/tulip_core.c
index 7351831..bf93679 100644
--- a/drivers/net/tulip/tulip_core.c
+++ b/drivers/net/tulip/tulip_core.c
@@ -1644,8 +1644,14 @@ #endif
if (register_netdev(dev))
goto err_out_free_ring;
 
-   printk(KERN_INFO %s: %s rev %d at %p,,
-  dev-name, chip_name, chip_rev, ioaddr);
+   printk(KERN_INFO %s: %s rev %d at 
+#ifdef CONFIG_TULIP_MMIO
+   MMIO
+#else
+   Port
+#endif
+0x%lx,, dev-name, chip_name, chip_rev,
+   pci_resource_start(pdev, TULIP_BAR));
pci_set_drvdata(pdev, dev);
 
if (eeprom_missing)
-- 
1.4.1.1

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: forcedeth gigabit detection

2006-08-07 Thread Krzysztof Halasa
Frank v Waveren [EMAIL PROTECTED] writes:

 The nforce2 builtin network on my A7N8X-delux motherboard won't detect
 as gigabit-capable using the forcedeth driver. 

Asustek doesn't seem to indicate it has gigabit ports, are you
sure your mb does have them? Perhaps it's a different version,
something like A7N8X-E?
-- 
Krzysztof Halasa
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] Replace pci_module_init with pci_register_driver in drivers/net

2006-08-07 Thread Pavel Roskin
From: Pavel Roskin [EMAIL PROTECTED]


---

 drivers/net/3c59x.c   |2 +-
 drivers/net/8139cp.c  |2 +-
 drivers/net/8139too.c |2 +-
 drivers/net/acenic.c  |2 +-
 drivers/net/amd8111e.c|2 +-
 drivers/net/arcnet/com20020-pci.c |2 +-
 drivers/net/b44.c |2 +-
 drivers/net/bnx2.c|2 +-
 drivers/net/cassini.c |2 +-
 drivers/net/chelsio/cxgb2.c   |2 +-
 drivers/net/defxx.c   |2 +-
 drivers/net/dl2k.c|2 +-
 drivers/net/e100.c|2 +-
 drivers/net/e1000/e1000_main.c|2 +-
 drivers/net/eepro100.c|2 +-
 drivers/net/epic100.c |2 +-
 drivers/net/fealnx.c  |2 +-
 drivers/net/forcedeth.c   |2 +-
 drivers/net/hp100.c   |2 +-
 drivers/net/ixgb/ixgb_main.c  |2 +-
 drivers/net/natsemi.c |2 +-
 drivers/net/ne2k-pci.c|2 +-
 drivers/net/ns83820.c |2 +-
 drivers/net/pci-skeleton.c|2 +-
 drivers/net/pcnet32.c |2 +-
 drivers/net/r8169.c   |2 +-
 drivers/net/rrunner.c |2 +-
 drivers/net/s2io.c|2 +-
 drivers/net/saa9730.c |2 +-
 drivers/net/sis190.c  |2 +-
 drivers/net/sis900.c  |2 +-
 drivers/net/sk98lin/skge.c|2 +-
 drivers/net/skfp/skfddi.c |2 +-
 drivers/net/skge.c|2 +-
 drivers/net/starfire.c|2 +-
 drivers/net/sundance.c|2 +-
 drivers/net/sungem.c  |2 +-
 drivers/net/tc35815.c |2 +-
 drivers/net/tg3.c |2 +-
 drivers/net/tokenring/3c359.c |2 +-
 drivers/net/tokenring/lanstreamer.c   |2 +-
 drivers/net/tokenring/olympic.c   |2 +-
 drivers/net/tulip/de2104x.c   |2 +-
 drivers/net/tulip/de4x5.c |2 +-
 drivers/net/tulip/dmfe.c  |2 +-
 drivers/net/tulip/tulip_core.c|2 +-
 drivers/net/tulip/uli526x.c   |2 +-
 drivers/net/tulip/winbond-840.c   |2 +-
 drivers/net/tulip/xircom_tulip_cb.c   |2 +-
 drivers/net/typhoon.c |2 +-
 drivers/net/via-rhine.c   |2 +-
 drivers/net/via-velocity.c|2 +-
 drivers/net/wan/dscc4.c   |2 +-
 drivers/net/wan/farsync.c |2 +-
 drivers/net/wan/lmc/lmc_main.c|2 +-
 drivers/net/wan/pc300_drv.c   |2 +-
 drivers/net/wan/pci200syn.c   |2 +-
 drivers/net/wan/wanxl.c   |2 +-
 drivers/net/wireless/atmel_pci.c  |2 +-
 drivers/net/wireless/ipw2100.c|2 +-
 drivers/net/wireless/ipw2200.c|2 +-
 drivers/net/wireless/orinoco_nortel.c |2 +-
 drivers/net/wireless/orinoco_pci.c|2 +-
 drivers/net/wireless/orinoco_plx.c|2 +-
 drivers/net/wireless/orinoco_tmd.c|2 +-
 drivers/net/wireless/prism54/islpci_hotplug.c |2 +-
 drivers/net/yellowfin.c   |2 +-
 67 files changed, 67 insertions(+), 67 deletions(-)

diff --git a/drivers/net/3c59x.c b/drivers/net/3c59x.c
index 80e8ca0..7c23813 100644
--- a/drivers/net/3c59x.c
+++ b/drivers/net/3c59x.c
@@ -3169,7 +3169,7 @@ static int __init vortex_init(void)
 {
int pci_rc, eisa_rc;
 
-   pci_rc = pci_module_init(vortex_driver);
+   pci_rc = pci_register_driver(vortex_driver);
eisa_rc = vortex_eisa_init();
 
if (pci_rc == 0)
diff --git a/drivers/net/8139cp.c b/drivers/net/8139cp.c
index 1428bb7..7061a23 100644
--- a/drivers/net/8139cp.c
+++ b/drivers/net/8139cp.c
@@ -2098,7 +2098,7 @@ static int __init cp_init (void)
 #ifdef MODULE
printk(%s, version);
 #endif
-   return pci_module_init (cp_driver);
+   return pci_register_driver (cp_driver);
 }
 
 static void __exit cp_exit (void)
diff --git a/drivers/net/8139too.c b/drivers/net/8139too.c
index e4f4eaf..0b58725 100644
--- a/drivers/net/8139too.c
+++ b/drivers/net/8139too.c
@@ -2629,7 +2629,7 @@ #ifdef MODULE
printk (KERN_INFO RTL8139_DRIVER_NAME \n);
 #endif
 
-   return pci_module_init (rtl8139_pci_driver);
+   

[RFC: -mm patch] bcm43xx_main.c: remove 3 functions

2006-08-07 Thread Adrian Bunk
This patch removes three no longer used functions (that are even 
generating gcc warnings).

This patch doesn't look right, but it is the result of 
58e5528ee464d38040b9489e10033c9387a10d56 in git-netdev...

Signed-off-by: Adrian Bunk [EMAIL PROTECTED]

---

 drivers/net/wireless/bcm43xx/bcm43xx_main.c |   33 
 1 file changed, 33 deletions(-)

--- linux-2.6.18-rc3-mm2-full/drivers/net/wireless/bcm43xx/bcm43xx_main.c.old   
2006-08-07 18:21:31.0 +0200
+++ linux-2.6.18-rc3-mm2-full/drivers/net/wireless/bcm43xx/bcm43xx_main.c   
2006-08-07 18:23:36.0 +0200
@@ -3194,39 +3194,6 @@
bcm43xx_clear_keys(bcm);
 }
 
-static int bcm43xx_rng_read(struct hwrng *rng, u32 *data)
-{
-   struct bcm43xx_private *bcm = (struct bcm43xx_private *)rng-priv;
-   unsigned long flags;
-
-   spin_lock_irqsave((bcm)-irq_lock, flags);
-   *data = bcm43xx_read16(bcm, BCM43xx_MMIO_RNG);
-   spin_unlock_irqrestore((bcm)-irq_lock, flags);
-
-   return (sizeof(u16));
-}
-
-static void bcm43xx_rng_exit(struct bcm43xx_private *bcm)
-{
-   hwrng_unregister(bcm-rng);
-}
-
-static int bcm43xx_rng_init(struct bcm43xx_private *bcm)
-{
-   int err;
-
-   snprintf(bcm-rng_name, ARRAY_SIZE(bcm-rng_name),
-%s_%s, KBUILD_MODNAME, bcm-net_dev-name);
-   bcm-rng.name = bcm-rng_name;
-   bcm-rng.data_read = bcm43xx_rng_read;
-   bcm-rng.priv = (unsigned long)bcm;
-   err = hwrng_register(bcm-rng);
-   if (err)
-   printk(KERN_ERR PFX RNG init failed (%d)\n, err);
-
-   return err;
-}
-
 static int bcm43xx_shutdown_all_wireless_cores(struct bcm43xx_private *bcm)
 {
int ret = 0;

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: forcedeth gigabit detection

2006-08-07 Thread Frank v Waveren
You know, I suddenly feel very foolish. Sorry for having wasted
everyone's time, and thanks for your keen eye.

On Tue, Aug 08, 2006 at 12:19:24AM +0200, Krzysztof Halasa wrote:
 Frank v Waveren [EMAIL PROTECTED] writes:
 
  The nforce2 builtin network on my A7N8X-delux motherboard won't detect
  as gigabit-capable using the forcedeth driver. 
 
 Asustek doesn't seem to indicate it has gigabit ports, are you
 sure your mb does have them? Perhaps it's a different version,
 something like A7N8X-E?
 -- 
 Krzysztof Halasa
 

-- 
Frank v Waveren  Key fingerprint: BDD7 D61E
[EMAIL PROTECTED]  5D39 CF05 4BFC 
F57A
Public key: hkp://wwwkeys.pgp.net/468D62C8  FA00 7D51 468D 62C8


signature.asc
Description: Digital signature


Re: [PATCH] fix alloc_skb comment typo

2006-08-07 Thread David Miller
From: Christoph Hellwig [EMAIL PROTECTED]
Date: Sat, 5 Aug 2006 14:59:06 +0200

 Signed-off-by: Christoph Hellwig [EMAIL PROTECTED]

Applied, thanks Christoph.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


tg3: tg3_stop_block timed out

2006-08-07 Thread Bernd Schubert
Hi,

I have seen a few reports like this, but now broadcom seems to actively
support tg3, so I decided to send this.

... [many hamilton not responding messages]
4554928.798000] nfs: server hamilton not responding, still trying
[4554935.319000] nfs: server hamilton not responding, still trying
[4555468.94] NETDEV WATCHDOG: eth1: transmit timed out
[4555468.94] tg3: eth1: transmit timed out, resetting
[4555469.044000] tg3: tg3_stop_block timed out, ofs=3400 enable_bit=2
[4555469.147000] tg3: tg3_stop_block timed out, ofs=2400 enable_bit=2
[4555469.251000] tg3: tg3_stop_block timed out, ofs=1400 enable_bit=2
[4555469.354000] tg3: tg3_stop_block timed out, ofs=c00 enable_bit=2
[4555469.433000] tg3: eth1: Link is down.
[4555472.593000] tg3: eth1: Link is up at 1000 Mbps, full duplex.
[4555472.594000] tg3: eth1: Flow control is on for TX and on for RX.
[4555498.016000] nfs: server 129.206.21.200 OK
[4555648.015000] nfs: server 129.206.21.200 OK
... [many ok messages]

It seems to be the first time that something like this happend, at least I
don't find anything in the previous logs.

This is with 2.6.16, would it be worth to try a more recent tg3 driver (e.g.
from broadcom (3.58) or backported from 2.6.17 (3.59))? 


Thanks, 
Bernd




-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH RESEND 1/2] in-kernel sockets API

2006-08-07 Thread Sridhar Samudrala
Dave,

Could you consider this for inclusion into 2.6.19 tree?

Thanks
Sridhar

This patch implements wrapper functions that provide a convenient way to
access the sockets API for in-kernel users like sunrpc, cifs  ocfs2 etc
and any future users.

Signed-off-by: Sridhar Samudrala [EMAIL PROTECTED]
Acked-by: James Morris [EMAIL PROTECTED]

---

 include/linux/net.h |   19 +
 net/socket.c|  113 +++
 2 files changed, 132 insertions(+), 0 deletions(-)

diff --git a/include/linux/net.h b/include/linux/net.h
index b20c53c..19da2c0 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -208,6 +208,25 @@ extern int  kernel_recvmsg(struct
struct kvec *vec, size_t num,
size_t len, int flags);
 
+extern int kernel_bind(struct socket *sock, struct sockaddr *addr,
+  int addrlen);
+extern int kernel_listen(struct socket *sock, int backlog);
+extern int kernel_accept(struct socket *sock, struct socket **newsock,
+int flags);
+extern int kernel_connect(struct socket *sock, struct sockaddr *addr,
+ int addrlen, int flags);
+extern int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
+ int *addrlen);
+extern int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
+ int *addrlen);
+extern int kernel_getsockopt(struct socket *sock, int level, int optname,
+char *optval, int *optlen);
+extern int kernel_setsockopt(struct socket *sock, int level, int optname,
+char *optval, int optlen);
+extern int kernel_sendpage(struct socket *sock, struct page *page, int offset,
+  size_t size, int flags);
+extern int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg);
+
 #ifndef CONFIG_SMP
 #define SOCKOPS_WRAPPED(name) name
 #define SOCKOPS_WRAP(name, fam)
diff --git a/net/socket.c b/net/socket.c
index b4848ce..0c9d01d 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -2160,6 +2160,109 @@ static long compat_sock_ioctl(struct fil
 }
 #endif
 
+int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
+{
+   return sock-ops-bind(sock, addr, addrlen);
+}
+
+int kernel_listen(struct socket *sock, int backlog)
+{
+   return sock-ops-listen(sock, backlog);
+}
+
+int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
+{
+   struct sock *sk = sock-sk;
+   int err;
+
+   err = sock_create_lite(sk-sk_family, sk-sk_type, sk-sk_protocol,
+  newsock);
+   if (err  0)
+   goto done;
+
+   err = sock-ops-accept(sock, *newsock, flags);
+   if (err  0) {
+   sock_release(*newsock);
+   goto done;
+   }
+
+   (*newsock)-ops = sock-ops;
+
+done:
+   return err;
+}
+
+int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
+   int flags)
+{
+   return sock-ops-connect(sock, addr, addrlen, flags);
+}
+
+int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
+int *addrlen)
+{
+   return sock-ops-getname(sock, addr, addrlen, 0);
+}
+
+int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
+int *addrlen)
+{
+   return sock-ops-getname(sock, addr, addrlen, 1);
+}
+
+int kernel_getsockopt(struct socket *sock, int level, int optname,
+   char *optval, int *optlen)
+{
+   mm_segment_t oldfs = get_fs();
+   int err;
+
+   set_fs(KERNEL_DS);
+   if (level == SOL_SOCKET)
+   err = sock_getsockopt(sock, level, optname, optval, optlen);
+   else
+   err = sock-ops-getsockopt(sock, level, optname, optval,
+   optlen);
+   set_fs(oldfs);
+   return err;
+}
+
+int kernel_setsockopt(struct socket *sock, int level, int optname,
+   char *optval, int optlen)
+{
+   mm_segment_t oldfs = get_fs();
+   int err;
+
+   set_fs(KERNEL_DS);
+   if (level == SOL_SOCKET)
+   err = sock_setsockopt(sock, level, optname, optval, optlen);
+   else
+   err = sock-ops-setsockopt(sock, level, optname, optval,
+   optlen);
+   set_fs(oldfs);
+   return err;
+}
+
+int kernel_sendpage(struct socket *sock, struct page *page, int offset,
+   size_t size, int flags)
+{
+   if (sock-ops-sendpage)
+   return sock-ops-sendpage(sock, page, offset, size, flags);
+
+   return sock_no_sendpage(sock, page, offset, size, flags);
+}
+
+int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
+{
+   mm_segment_t oldfs = get_fs();
+   int err;
+
+   set_fs(KERNEL_DS);
+   err = 

[PATCH RESEND 2/2] update sunrpc to use in-kernel sockets API

2006-08-07 Thread Sridhar Samudrala
Update sunrpc to use in-kernel sockets API.

Signed-off-by: Sridhar Samudrala [EMAIL PROTECTED]
Acked-by: James Morris [EMAIL PROTECTED]

---

 net/sunrpc/svcsock.c  |   38 ++
 net/sunrpc/xprtsock.c |8 
 2 files changed, 18 insertions(+), 28 deletions(-)

diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index d9a9573..953aff8 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -388,7 +388,7 @@ svc_sendto(struct svc_rqst *rqstp, struc
/* send head */
if (slen == xdr-head[0].iov_len)
flags = 0;
-   len = sock-ops-sendpage(sock, rqstp-rq_respages[0], 0, 
xdr-head[0].iov_len, flags);
+   len = kernel_sendpage(sock, rqstp-rq_respages[0], 0, 
xdr-head[0].iov_len, flags);
if (len != xdr-head[0].iov_len)
goto out;
slen -= xdr-head[0].iov_len;
@@ -400,7 +400,7 @@ svc_sendto(struct svc_rqst *rqstp, struc
while (pglen  0) {
if (slen == size)
flags = 0;
-   result = sock-ops-sendpage(sock, *ppage, base, size, flags);
+   result = kernel_sendpage(sock, *ppage, base, size, flags);
if (result  0)
len += result;
if (result != size)
@@ -413,7 +413,7 @@ svc_sendto(struct svc_rqst *rqstp, struc
}
/* send tail */
if (xdr-tail[0].iov_len) {
-   result = sock-ops-sendpage(sock, 
rqstp-rq_respages[rqstp-rq_restailpage], 
+   result = kernel_sendpage(sock, 
rqstp-rq_respages[rqstp-rq_restailpage],
 ((unsigned 
long)xdr-tail[0].iov_base) (PAGE_SIZE-1),
 xdr-tail[0].iov_len, 0);
 
@@ -434,13 +434,10 @@ out:
 static int
 svc_recv_available(struct svc_sock *svsk)
 {
-   mm_segment_toldfs;
struct socket   *sock = svsk-sk_sock;
int avail, err;
 
-   oldfs = get_fs(); set_fs(KERNEL_DS);
-   err = sock-ops-ioctl(sock, TIOCINQ, (unsigned long) avail);
-   set_fs(oldfs);
+   err = kernel_sock_ioctl(sock, TIOCINQ, (unsigned long) avail);
 
return (err = 0)? avail : err;
 }
@@ -472,7 +469,7 @@ svc_recvfrom(struct svc_rqst *rqstp, str
 * at accept time. FIXME
 */
alen = sizeof(rqstp-rq_addr);
-   sock-ops-getname(sock, (struct sockaddr *)rqstp-rq_addr, alen, 1);
+   kernel_getpeername(sock, (struct sockaddr *)rqstp-rq_addr, alen);
 
dprintk(svc: socket %p recvfrom(%p, %Zu) = %d\n,
rqstp-rq_sock, iov[0].iov_base, iov[0].iov_len, len);
@@ -758,7 +755,6 @@ svc_tcp_accept(struct svc_sock *svsk)
struct svc_serv *serv = svsk-sk_server;
struct socket   *sock = svsk-sk_sock;
struct socket   *newsock;
-   const struct proto_ops *ops;
struct svc_sock *newsvsk;
int err, slen;
 
@@ -766,29 +762,23 @@ svc_tcp_accept(struct svc_sock *svsk)
if (!sock)
return;
 
-   err = sock_create_lite(PF_INET, SOCK_STREAM, IPPROTO_TCP, newsock);
-   if (err) {
+   clear_bit(SK_CONN, svsk-sk_flags);
+   err = kernel_accept(sock, newsock, O_NONBLOCK);
+   if (err  0) {
if (err == -ENOMEM)
printk(KERN_WARNING %s: no more sockets!\n,
   serv-sv_name);
-   return;
-   }
-
-   dprintk(svc: tcp_accept %p allocated\n, newsock);
-   newsock-ops = ops = sock-ops;
-
-   clear_bit(SK_CONN, svsk-sk_flags);
-   if ((err = ops-accept(sock, newsock, O_NONBLOCK))  0) {
-   if (err != -EAGAIN  net_ratelimit())
+   else if (err != -EAGAIN  net_ratelimit())
printk(KERN_WARNING %s: accept failed (err %d)!\n,
   serv-sv_name, -err);
-   goto failed;/* aborted connection or whatever */
+   return;
}
+
set_bit(SK_CONN, svsk-sk_flags);
svc_sock_enqueue(svsk);
 
slen = sizeof(sin);
-   err = ops-getname(newsock, (struct sockaddr *) sin, slen, 1);
+   err = kernel_getpeername(newsock, (struct sockaddr *) sin, slen);
if (err  0) {
if (net_ratelimit())
printk(KERN_WARNING %s: peername failed (err %d)!\n,
@@ -1406,14 +1396,14 @@ svc_create_socket(struct svc_serv *serv,
if (sin != NULL) {
if (type == SOCK_STREAM)
sock-sk-sk_reuse = 1; /* allow address reuse */
-   error = sock-ops-bind(sock, (struct sockaddr *) sin,
+   error = kernel_bind(sock, (struct sockaddr *) sin,
sizeof(*sin));
if (error  0)
goto bummer;
}
 
if (protocol == IPPROTO_TCP) {
-   if ((error = sock-ops-listen(sock, 64))  0)
+   

Re: tg3: tg3_stop_block timed out

2006-08-07 Thread Michael Chan
On Tue, 2006-08-08 at 00:43 +0200, Bernd Schubert wrote:
 Hi,
 
 I have seen a few reports like this, but now broadcom seems to actively
 support tg3, so I decided to send this.
 
 ... [many hamilton not responding messages]
 4554928.798000] nfs: server hamilton not responding, still trying
 [4554935.319000] nfs: server hamilton not responding, still trying
 [4555468.94] NETDEV WATCHDOG: eth1: transmit timed out
 [4555468.94] tg3: eth1: transmit timed out, resetting
 [4555469.044000] tg3: tg3_stop_block timed out, ofs=3400 enable_bit=2
 [4555469.147000] tg3: tg3_stop_block timed out, ofs=2400 enable_bit=2
 [4555469.251000] tg3: tg3_stop_block timed out, ofs=1400 enable_bit=2
 [4555469.354000] tg3: tg3_stop_block timed out, ofs=c00 enable_bit=2
 [4555469.433000] tg3: eth1: Link is down.
 [4555472.593000] tg3: eth1: Link is up at 1000 Mbps, full duplex.
 [4555472.594000] tg3: eth1: Flow control is on for TX and on for RX.
 [4555498.016000] nfs: server 129.206.21.200 OK
 [4555648.015000] nfs: server 129.206.21.200 OK
 ... [many ok messages]
 
I need to know what hardware you're using so please send me the tg3
probing output for eth1 when you load the driver. Do you have TSO
enabled?

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] move skb-dev assignment into netdev_alloc_skb

2006-08-07 Thread David Miller
From: Christoph Hellwig [EMAIL PROTECTED]
Date: Sat, 5 Aug 2006 15:01:09 +0200

 All caller of netdev_alloc_skb need to assign skb-dev shortly
 afterwards.  Move it into common code.
 
 I also had to fixup a little bit of the surrounding control flow in
 e1000 - it was just too convoluted.
 
 Signed-off-by: Christoph Hellwig [EMAIL PROTECTED]

Since the e1000 change is non-trivial I'm not going to bypass
the driver author on it, sorry.

What I did do was put the netdev_alloc_skb() change into my
tree, and since I'm co-author of the tg3 driver I'll apply
that bit too.

The e1000 bit will need to go through the e1000 maintainers.

Thanks.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Excess use of packed attribute

2006-08-07 Thread David Miller
From: Stephen Hemminger [EMAIL PROTECTED]
Date: Mon, 7 Aug 2006 13:34:23 -0700

 Silly offenders:  include/net/ipx.h
   include/net/ieee80211.h
   include/net/ip6_tunnel.h
   include/net/ndisc.h
   include/linux/if_ether.h
   include/linux/if_fddi.h
   
   include/linux/sctp.h -- really bad

The ndisc.h one, for example, is needed for cases like ARM.

The if_ether.h one is also needed, or else for:

struct ethhdr *eth;

eth + 1 would do the wrong thing as the compiler would
align the structure to the native pointer size or similar.
This is an issue because ethhdr is 14 bytes in size.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH RESEND 1/2] in-kernel sockets API

2006-08-07 Thread Stephen Hemminger
On Mon, 07 Aug 2006 16:00:29 -0700
Sridhar Samudrala [EMAIL PROTECTED] wrote:

 Dave,
 
 Could you consider this for inclusion into 2.6.19 tree?
 
 Thanks
 Sridhar
 
 This patch implements wrapper functions that provide a convenient way to
 access the sockets API for in-kernel users like sunrpc, cifs  ocfs2 etc
 and any future users.
 
 Signed-off-by: Sridhar Samudrala [EMAIL PROTECTED]
 Acked-by: James Morris [EMAIL PROTECTED]
 
 ---
 
  include/linux/net.h |   19 +
  net/socket.c|  113 
 +++
  2 files changed, 132 insertions(+), 0 deletions(-)
 
 diff --git a/include/linux/net.h b/include/linux/net.h
 index b20c53c..19da2c0 100644
 --- a/include/linux/net.h
 +++ b/include/linux/net.h
 @@ -208,6 +208,25 @@ extern intkernel_recvmsg(struct
   struct kvec *vec, size_t num,
   size_t len, int flags);
  
 +extern int kernel_bind(struct socket *sock, struct sockaddr *addr,
 +int addrlen);
 +extern int kernel_listen(struct socket *sock, int backlog);
 +extern int kernel_accept(struct socket *sock, struct socket **newsock,
 +  int flags);
 +extern int kernel_connect(struct socket *sock, struct sockaddr *addr,
 +   int addrlen, int flags);
 +extern int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
 +   int *addrlen);
 +extern int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
 +   int *addrlen);
 +extern int kernel_getsockopt(struct socket *sock, int level, int optname,
 +  char *optval, int *optlen);
 +extern int kernel_setsockopt(struct socket *sock, int level, int optname,
 +  char *optval, int optlen);
 +extern int kernel_sendpage(struct socket *sock, struct page *page, int 
 offset,
 +size_t size, int flags);
 +extern int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long 
 arg);
 +
  #ifndef CONFIG_SMP
  #define SOCKOPS_WRAPPED(name) name
  #define SOCKOPS_WRAP(name, fam)
 diff --git a/net/socket.c b/net/socket.c
 index b4848ce..0c9d01d 100644
 --- a/net/socket.c
 +++ b/net/socket.c
 @@ -2160,6 +2160,109 @@ static long compat_sock_ioctl(struct fil
  }
  #endif
  
 +int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
 +{
 + return sock-ops-bind(sock, addr, addrlen);
 +}
 +
 +int kernel_listen(struct socket *sock, int backlog)
 +{
 + return sock-ops-listen(sock, backlog);
 +}
 +
 +int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
 +{
 + struct sock *sk = sock-sk;
 + int err;
 +
 + err = sock_create_lite(sk-sk_family, sk-sk_type, sk-sk_protocol,
 +newsock);
 + if (err  0)
 + goto done;
 +
 + err = sock-ops-accept(sock, *newsock, flags);
 + if (err  0) {
 + sock_release(*newsock);
 + goto done;
 + }
 +
 + (*newsock)-ops = sock-ops;
 +
 +done:
 + return err;
 +}
 +
 +int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
 +   int flags)
 +{
 + return sock-ops-connect(sock, addr, addrlen, flags);
 +}
 +
 +int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
 +  int *addrlen)
 +{
 + return sock-ops-getname(sock, addr, addrlen, 0);
 +}
 +
 +int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
 +  int *addrlen)
 +{
 + return sock-ops-getname(sock, addr, addrlen, 1);
 +}
 +
 +int kernel_getsockopt(struct socket *sock, int level, int optname,
 + char *optval, int *optlen)
 +{
 + mm_segment_t oldfs = get_fs();
 + int err;
 +
 + set_fs(KERNEL_DS);
 + if (level == SOL_SOCKET)
 + err = sock_getsockopt(sock, level, optname, optval, optlen);
 + else
 + err = sock-ops-getsockopt(sock, level, optname, optval,
 + optlen);
 + set_fs(oldfs);
 + return err;
 +}
 +
 +int kernel_setsockopt(struct socket *sock, int level, int optname,
 + char *optval, int optlen)
 +{
 + mm_segment_t oldfs = get_fs();
 + int err;
 +
 + set_fs(KERNEL_DS);
 + if (level == SOL_SOCKET)
 + err = sock_setsockopt(sock, level, optname, optval, optlen);
 + else
 + err = sock-ops-setsockopt(sock, level, optname, optval,
 + optlen);
 + set_fs(oldfs);
 + return err;
 +}
 +
 +int kernel_sendpage(struct socket *sock, struct page *page, int offset,
 + size_t size, int flags)
 +{
 + if (sock-ops-sendpage)
 + return sock-ops-sendpage(sock, page, offset, size, flags);
 +
 + return sock_no_sendpage(sock, page, offset, size, flags);
 +}
 +
 +int kernel_sock_ioctl(struct socket *sock, int cmd, 

[PATCH wireless-dev 2/6] d80211: Fix PS-Poll frame dropping

2006-08-07 Thread Jouni Malinen
Fixed PS-Poll processing for STAs that are not authenticated or
associated:
- 80211.ko dropped these frames even though it should have sent them
  to hostapd (this was broken by addition of IBSS support)

Signed-off-by: Jouni Malinen [EMAIL PROTECTED]

Index: wireless-dev/net/d80211/ieee80211.c
===
--- wireless-dev.orig/net/d80211/ieee80211.c
+++ wireless-dev/net/d80211/ieee80211.c
@@ -3074,8 +3074,9 @@ ieee80211_rx_h_check(struct ieee80211_tx
 rx-sdata-type != IEEE80211_IF_TYPE_IBSS 
 (!rx-sta || !(rx-sta-flags  WLAN_STA_ASSOC {
if ((!(rx-fc  IEEE80211_FCTL_FROMDS) 
-!(rx-fc  IEEE80211_FCTL_TODS)) ||
-   !rx-u.rx.ra_match) {
+!(rx-fc  IEEE80211_FCTL_TODS) 
+(rx-fc  IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA)
+   || !rx-u.rx.ra_match) {
/* Drop IBSS frames and frames for other hosts
 * silently. */
return TXRX_DROP;

--
-- 
Jouni MalinenPGP id EFC895FA
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] [e1000]: Remove unnecessary tx_lock

2006-08-07 Thread Herbert Xu
On Mon, Aug 07, 2006 at 03:03:33PM -0400, jamal wrote:
 
 -#define E1000_TX_WEIGHT 64
 -   /* weight of a sort for tx, to avoid endless transmit
 cleanup */
 -   if (count++ == E1000_TX_WEIGHT) break;
 +   /* avoid endless transmit cleanup */
 +   if (count++ == tx_ring-prunet) break;
 
 As you can see E1000_TX_WEIGHT threshold exists today and you are right
 if no TX interupts, packet arrivals or scheduled wakes happen the that
 descriptor that was not pruned will sit there forever (which is a bad
 thing for TCP). Are we in sync?
 If yes, what is the likelihood they will sit there forever? I think
 perhaps some TX interupts will happen, no?

I thought this code is only used for NAPI so as long as work was done
it'll keep calling this.

One thing I'm not sure about though is the time between it decides that
there is no work and the point where the interrupts are reenabled.

What if work arrives in that time and no work ever arrives after the
interrupts are turned on again? Does that mean the work will sit there
forever?

Cheers,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmVHI~} [EMAIL PROTECTED]
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] ipx: header length validation needed

2006-08-07 Thread David Miller
From: Stephen Hemminger [EMAIL PROTECTED]
Date: Mon, 7 Aug 2006 13:46:36 -0700

 IPX is not checking for non-linear (and short packets) in it's
 receive routine.  This is serious because it may mean it ends up
 reading past end of skb.

This takes care of ipx_rcv() but the rest of the IPX protocol
handling still has the problem, so you'll need to meticuliously
follow the whole receive path and fix up all the spots that
parse subsequent parts of the IPX packet to fix this properly.

For example, take a look at ipxitf_pprop().
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH wireless-dev 3/6] d80211: Fix PLCP header length comment

2006-08-07 Thread Jouni Malinen
Fixed a typo in a comment: PLCP header length is in microseconds, not
milliseconds.

Signed-off-by: Jouni Malinen [EMAIL PROTECTED]

Index: wireless-dev/net/d80211/ieee80211.c
===
--- wireless-dev.orig/net/d80211/ieee80211.c
+++ wireless-dev/net/d80211/ieee80211.c
@@ -637,7 +637,7 @@ static int ieee80211_frame_duration(stru
 * 802.11 (DS): 15.3.3, 802.11b: 18.3.4
 * aSIFSTime = 10 usec
 * aPreambleLength = 144 usec or 72 usec with short preamble
-* aPLCPHeaderLength = 48 ms or 24 ms with short preamble
+* aPLCPHeaderLength = 48 usec or 24 usec with short preamble
 */
dur = 10; /* aSIFSTime = 10 usec */
dur += short_preamble ? (72 + 24) : (144 + 48);

--
-- 
Jouni MalinenPGP id EFC895FA
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH wireless-dev 4/6] d80211: Send Layer 2 Update frames in kernel

2006-08-07 Thread Jouni Malinen
Send Layer 2 Update frame from the 802.11 code in kernel to the netdev
that the STA is bound to. If the STA is bound to another VLAN netdev,
send another update frame. This fixes an issue in which a local bridge
table was not updated when hostapd sent this frame.

Signed-off-by: Jouni Malinen [EMAIL PROTECTED]

Index: wireless-dev/net/d80211/ieee80211_ioctl.c
===
--- wireless-dev.orig/net/d80211/ieee80211_ioctl.c
+++ wireless-dev/net/d80211/ieee80211_ioctl.c
@@ -15,6 +15,7 @@
 #include linux/types.h
 #include linux/slab.h
 #include linux/skbuff.h
+#include linux/etherdevice.h
 #include linux/if_arp.h
 #include linux/wireless.h
 #include net/iw_handler.h
@@ -215,6 +216,52 @@ static int ieee80211_ioctl_flush(struct 
 }
 
 
+/* Layer 2 Update frame (802.2 Type 1 LLC XID Update response) */
+struct iapp_layer2_update {
+   u8 da[ETH_ALEN]; /* broadcast */
+   u8 sa[ETH_ALEN]; /* STA addr */
+   u16 len; /* 6 */
+   u8 dsap; /* 0 */
+   u8 ssap; /* 0 */
+   u8 control;
+   u8 xid_info[3];
+} __attribute__ ((packed));
+
+static void ieee80211_send_layer2_update(struct net_device *dev,
+const u8 *addr)
+{
+   struct iapp_layer2_update *msg;
+   struct sk_buff *skb;
+
+   /* Send Level 2 Update Frame to update forwarding tables in layer 2
+* bridge devices */
+
+   skb = dev_alloc_skb(sizeof(*msg));
+   if (skb == NULL)
+   return;
+   msg = (struct iapp_layer2_update *) skb_put(skb, sizeof(*msg));
+
+   /* 802.2 Type 1 Logical Link Control (LLC) Exchange Identifier (XID)
+* Update response frame; IEEE Std 802.2-1998, 5.4.1.2.1 */
+
+   memset(msg-da, 0xff, ETH_ALEN);
+   memcpy(msg-sa, addr, ETH_ALEN);
+   msg-len = htons(6);
+   msg-dsap = 0;
+   msg-ssap = 0x01; /* NULL LSAP, CR Bit: Response */
+   msg-control = 0xaf; /* XID response lsb.F101.
+ * F=0 (no poll command; unsolicited frame) */
+   msg-xid_info[0] = 0x81; /* XID format identifier */
+   msg-xid_info[1] = 1; /* LLC types/classes: Type 1 LLC */
+   msg-xid_info[2] = 0; /* XID sender's receive window size (RW) */
+
+   skb-dev = dev;
+   skb-protocol = eth_type_trans(skb, dev);
+   memset(skb-cb, 0, sizeof(skb-cb));
+   netif_rx(skb);
+}
+
+
 static int ieee80211_ioctl_add_sta(struct net_device *dev,
   struct prism2_hostapd_param *param)
 {
@@ -296,6 +343,10 @@ static int ieee80211_ioctl_add_sta(struc
 
sta_info_put(sta);
 
+   if (sdata-type == IEEE80211_IF_TYPE_AP ||
+   sdata-type == IEEE80211_IF_TYPE_VLAN)
+   ieee80211_send_layer2_update(dev, param-sta_addr);
+
return 0;
 }
 
@@ -1168,6 +1219,10 @@ static int ieee80211_ioctl_set_sta_vlan(
   dev-name, MAC_ARG(param-sta_addr),
new_vlan_dev-name);
 #endif
+   if (sta-dev != new_vlan_dev) {
+   ieee80211_send_layer2_update(new_vlan_dev,
+sta-addr);
+   }
 sta-dev = new_vlan_dev;
sta-vlan_id = param-u.set_sta_vlan.vlan_id;
 dev_put(new_vlan_dev);

--
-- 
Jouni MalinenPGP id EFC895FA
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH wireless-dev 6/6] d80211: Fix TKIP replay protection

2006-08-07 Thread Jouni Malinen
Fixed TKIP replay protection for the case where hwaccel is enabled.
rx_initialized flag was not set in this case and the TSC validation
was skipped for the frames.

Signed-off-by: Jouni Malinen [EMAIL PROTECTED]

Index: wireless-dev/net/d80211/tkip.c
===
--- wireless-dev.orig/net/d80211/tkip.c
+++ wireless-dev/net/d80211/tkip.c
@@ -286,6 +286,7 @@ int ieee80211_tkip_decrypt_data(struct c
 
if (only_iv) {
res = TKIP_DECRYPT_OK;
+   key-u.tkip.rx_initialized[queue] = 1;
goto done;
}
 

--
-- 
Jouni MalinenPGP id EFC895FA
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: tg3: tg3_stop_block timed out

2006-08-07 Thread Bernd Schubert
Hi Michael,

thanks for your help!

On Tuesday 08 August 2006 01:07, Michael Chan wrote:
  ... [many hamilton not responding messages]
  4554928.798000] nfs: server hamilton not responding, still trying
  [4554935.319000] nfs: server hamilton not responding, still trying
  [4555468.94] NETDEV WATCHDOG: eth1: transmit timed out
  [4555468.94] tg3: eth1: transmit timed out, resetting
  [4555469.044000] tg3: tg3_stop_block timed out, ofs=3400 enable_bit=2
  [4555469.147000] tg3: tg3_stop_block timed out, ofs=2400 enable_bit=2
  [4555469.251000] tg3: tg3_stop_block timed out, ofs=1400 enable_bit=2
  [4555469.354000] tg3: tg3_stop_block timed out, ofs=c00 enable_bit=2
  [4555469.433000] tg3: eth1: Link is down.
  [4555472.593000] tg3: eth1: Link is up at 1000 Mbps, full duplex.
  [4555472.594000] tg3: eth1: Flow control is on for TX and on for RX.
  [4555498.016000] nfs: server 129.206.21.200 OK
  [4555648.015000] nfs: server 129.206.21.200 OK
  ... [many ok messages]

 I need to know what hardware you're using so please send me the tg3
 probing output for eth1 when you load the driver. Do you have TSO
 enabled?


tg3.c:v3.49 (Feb 2, 2006)
acpi_bus-0201 [01] bus_set_power : Device is not power manageable
eth1: Tigon3 [partno(BCM95704A6) rev 2003 PHY(5704)] (PCIX:100MHz:64-bit) 
10/100/1000BaseT Ethernet 00:e0:81:2b:aa:28
eth1: RXcsums[1] LinkChgREG[0] MIirq[0] ASF[1] Split[0] WireSpeed[1] TSOcap[0]
eth1: dma_rwctrl[769f4000] dma_mask[64-bit]
eth2: Tigon3 [partno(BCM95704A6) rev 2003 PHY(5704)] (PCIX:100MHz:64-bit) 
10/100/1000BaseT Ethernet 00:e0:81:2b:aa:29
eth2: RXcsums[1] LinkChgREG[0] MIirq[0] ASF[0] Split[0] WireSpeed[1] TSOcap[1]
eth2: dma_rwctrl[769f4000] dma_mask[64-bit]

The NIC is onboard a Tyan S2882. 

:02:09.0 Ethernet controller: Broadcom Corporation NetXtreme BCM5704 
Gigabit Ethernet (rev 03)
Subsystem: Broadcom Corporation: Unknown device 1644
Flags: bus master, 66MHz, medium devsel, latency 64, IRQ 24
Memory at fc8c (64-bit, non-prefetchable) [size=64K]
Memory at fc8b (64-bit, non-prefetchable) [size=64K]
Capabilities: [40]  Capabilities: [48] Power Management version 2
Capabilities: [50] Vital Product Data
Capabilities: [58] Message Signalled Interrupts: 64bit+ Queue=0/3 
Enable-

:02:09.1 Ethernet controller: Broadcom Corporation NetXtreme BCM5704 
Gigabit Ethernet (rev 03)
Subsystem: Broadcom Corporation: Unknown device 1644
Flags: bus master, 66MHz, medium devsel, latency 64, IRQ 25
Memory at fc8e (64-bit, non-prefetchable) [size=64K]
Memory at fc8d (64-bit, non-prefetchable) [size=64K]
Capabilities: [40]  Capabilities: [48] Power Management version 2
Capabilities: [50] Vital Product Data
Capabilities: [58] Message Signalled Interrupts: 64bit+ Queue=0/3 
Enable-


The driver is compiled into the kernel (its a nfs-root booted system and 
NIC modules are presently not supported by our initrd).
So the default option for tso is set. Is there any way to determine the 
present tso setting? With ethtool I only find the options to turn it off/on, 
but none to query the current state.


Thanks a lot,
Bernd


-- 
Bernd Schubert
PCI / Theoretische Chemie
Universität Heidelberg
INF 229
69120 Heidelberg

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH wireless-dev 0/6] Set of small fixes to net/d80211

2006-08-07 Thread Jouni Malinen
Here's a set of small fixes to net/d80211 from the Devicescape tree.
Please consider applying.

--
-- 
Jouni MalinenPGP id EFC895FA
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH wireless-dev 1/6] d80211: Fix RTS threshold use

2006-08-07 Thread Jouni Malinen
Fixed dot11RTSThreshold use which was off-by-3:
- must add FCS_LEN to the skb-len
- frame length needs to be greater than threshold; not greater than
  or equal

Signed-off-by: Jouni Malinen [EMAIL PROTECTED]

Index: wireless-dev/net/d80211/ieee80211.c
===
--- wireless-dev.orig/net/d80211/ieee80211.c
+++ wireless-dev/net/d80211/ieee80211.c
@@ -762,7 +762,7 @@ ieee80211_tx_h_misc(struct ieee80211_txr
struct ieee80211_tx_control *control = tx-u.tx.control;
 
if (!is_multicast_ether_addr(hdr-addr1)) {
-   if (tx-skb-len = tx-local-rts_threshold 
+   if (tx-skb-len + FCS_LEN  tx-local-rts_threshold 
tx-local-rts_threshold  IEEE80211_MAX_RTS_THRESHOLD) {
control-use_rts_cts = 1;
control-retry_limit =

--
-- 
Jouni MalinenPGP id EFC895FA
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH wireless-dev 5/6] d80211: Fix ieee80211_remove_tx_extra() if key not configured

2006-08-07 Thread Jouni Malinen
QoS header processing mangled unencrypted WMM frames on software
retry. The QoS data needs to be removed even when encryption key is
not configured.

Signed-off-by: Jouni Malinen [EMAIL PROTECTED]

Index: wireless-dev/net/d80211/ieee80211.c
===
--- wireless-dev.orig/net/d80211/ieee80211.c
+++ wireless-dev/net/d80211/ieee80211.c
@@ -3977,11 +3977,11 @@ static void ieee80211_remove_tx_extra(st
pkt_data-requeue = control-requeue;
pkt_data-queue = control-queue;
 
-   if (key == NULL)
-   return;
-
hdrlen = ieee80211_get_hdrlen_from_skb(skb);
 
+   if (key == NULL)
+   goto no_key;
+
switch (key-alg) {
case ALG_WEP:
iv_len = WEP_IV_LEN;
@@ -3996,7 +3996,7 @@ static void ieee80211_remove_tx_extra(st
mic_len = CCMP_MIC_LEN;
break;
default:
-   return;
+   goto no_key;
}
 
if (skb-len = mic_len  key-force_sw_encrypt)
@@ -4006,6 +4006,7 @@ static void ieee80211_remove_tx_extra(st
skb_pull(skb, iv_len);
}
 
+no_key:
{
struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb-data;
u16 fc = le16_to_cpu(hdr-frame_control);

--
-- 
Jouni MalinenPGP id EFC895FA
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] ipx: header length validation needed

2006-08-07 Thread Stephen Hemminger
This patch will linearize and check there is enough data.
It handles the pprop case as well as avoiding a whole audit of
the routing code.

Signed-off-by: Stephen Hemminger [EMAIL PROTECTED]

--- a/net/ipx/af_ipx.c  2006-08-07 13:45:59.0 -0700
+++ b/net/ipx/af_ipx.c  2006-08-07 16:34:00.0 -0700
@@ -1649,7 +1649,8 @@
ipx_pktsize = ntohs(ipx-ipx_pktsize);

/* Too small or invalid header? */
-   if (ipx_pktsize  sizeof(struct ipxhdr) || ipx_pktsize  skb-len)
+   if (ipx_pktsize  sizeof(struct ipxhdr)
+  || !pskb_may_pull(skb, ipx_pktsize))
goto drop;
 
if (ipx-ipx_checksum != IPX_NO_CHECKSUM 
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] [e1000]: Remove unnecessary tx_lock

2006-08-07 Thread Brandeburg, Jesse
On Tue, 8 Aug 2006, Herbert Xu wrote:
  -#define E1000_TX_WEIGHT 64
  -   /* weight of a sort for tx, to avoid endless transmit
  cleanup */
  -   if (count++ == E1000_TX_WEIGHT) break;
  +   /* avoid endless transmit cleanup */
  +   if (count++ == tx_ring-prunet) break;
  
  As you can see E1000_TX_WEIGHT threshold exists today and you are right
  if no TX interupts, packet arrivals or scheduled wakes happen the that
  descriptor that was not pruned will sit there forever (which is a bad
  thing for TCP). Are we in sync?
  If yes, what is the likelihood they will sit there forever? I think
  perhaps some TX interupts will happen, no?
 
 I thought this code is only used for NAPI so as long as work was done
 it'll keep calling this.

yes, you're correct.
 
 One thing I'm not sure about though is the time between it decides that
 there is no work and the point where the interrupts are reenabled.

e1000 only clears the interrupts when it reads ICR in e1000_intr (before 
scheduling napi poll) so any interrupts that occur while polling (and 
interrupts are disabled) will cause a new assertion once interrupts are 
re-enabled.  Sometimes a little bit inefficient due to extra trips through 
poll, but guarantees never to miss an int.  I'm open to creative ways to 
avoid this, but adding an I/O read in e1000_clean would be pretty yucky.

 What if work arrives in that time and no work ever arrives after the
 interrupts are turned on again? Does that mean the work will sit there
 forever?

nope, see above.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] [e1000]: Remove unnecessary tx_lock

2006-08-07 Thread Herbert Xu
On Mon, Aug 07, 2006 at 04:35:36PM -0700, Brandeburg, Jesse wrote:
 
 e1000 only clears the interrupts when it reads ICR in e1000_intr (before 
 scheduling napi poll) so any interrupts that occur while polling (and 
 interrupts are disabled) will cause a new assertion once interrupts are 
 re-enabled.  Sometimes a little bit inefficient due to extra trips through 
 poll, but guarantees never to miss an int.  I'm open to creative ways to 
 avoid this, but adding an I/O read in e1000_clean would be pretty yucky.

The standard solution in Linux is to clear and recheck.  So just before
you reenable the interrupts you'd clear pending interrupts again and
check for rx/tx work, if there is work then you just go back to polling.

Cheers,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmVHI~} [EMAIL PROTECTED]
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: tg3: tg3_stop_block timed out

2006-08-07 Thread Michael Chan
On Tue, 2006-08-08 at 01:24 +0200, Bernd Schubert wrote:

 
 tg3.c:v3.49 (Feb 2, 2006)
 acpi_bus-0201 [01] bus_set_power : Device is not power manageable
 eth1: Tigon3 [partno(BCM95704A6) rev 2003 PHY(5704)] (PCIX:100MHz:64-bit) 
 10/100/1000BaseT Ethernet 00:e0:81:2b:aa:28
 eth1: RXcsums[1] LinkChgREG[0] MIirq[0] ASF[1] Split[0] WireSpeed[1] TSOcap[0]
 eth1: dma_rwctrl[769f4000] dma_mask[64-bit]
 eth2: Tigon3 [partno(BCM95704A6) rev 2003 PHY(5704)] (PCIX:100MHz:64-bit) 
 10/100/1000BaseT Ethernet 00:e0:81:2b:aa:29
 eth2: RXcsums[1] LinkChgREG[0] MIirq[0] ASF[0] Split[0] WireSpeed[1] TSOcap[1]
 eth2: dma_rwctrl[769f4000] dma_mask[64-bit]
 

You have ASF enabled on eth1 but not on eth2 so I wonder if ASF is
causing the problem.  Can you run the same traffic on eth2 and see if
you get the same timeout problem?  Thanks.

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Excess use of packed attribute

2006-08-07 Thread Sridhar Samudrala
On Mon, 2006-08-07 at 13:34 -0700, Stephen Hemminger wrote:
 After reading:
   http://bugzilla.kernel.org/show_bug.cgi?id=6693
 
 I noticed there were stupid uses of packed attribute in several network 
 headers.
 
 Silly offenders:  include/net/ipx.h
   include/net/ieee80211.h
   include/net/ip6_tunnel.h
   include/net/ndisc.h
   include/linux/if_ether.h
   include/linux/if_fddi.h
   
   include/linux/sctp.h -- really bad

All the structures in sctp.h that use packed atrribute define
standard on-wire SCTP chunk/parameter formats. They need to be at the
exact offsets as they go on wire.
I think we saw some issues without the packed attribute on 64-bit archs
and just to be safe we added packed to all the on-wire structures.

Thanks
Sridhar

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC] avoid unnecessary alignement overhead in skb-data allocation.

2006-08-07 Thread Jesse Brandeburg

On 8/7/06, Herbert Xu [EMAIL PROTECTED] wrote:

On Mon, Aug 07, 2006 at 11:31:03AM +0400, Evgeniy Polyakov wrote:

 Only if they form contiguous region?
 Jesse, is it possible for every e1000 chip to split frame into several
 page-sized chunks i.e. create some kind of receiving scatter-gather?


now you get to the meat of the problem.  Yes, all versions of e1000
can receive packets longer than the receive data area in the
descriptor.  If the data area is shorter than the packet, then the
data over flows into the next descriptor.



Actually, it was Chris Leech who raised this possibility:

: Yes, e1000 devices will spill over and use multiple buffers for a
: single frame.  We've been trying to find a good way to use multiple
: buffers to take care of these allocation problems.  The structure of
: the sk_buff does not make it easy.  Or should I say that it's the
: limitation that drivers are not allowed to chain together multiple
: sk_buffs to represent a single frame that does not make it easy.

Perhaps he can enlighten us.


Or since i'm here... in any case we had driver code (see driver
6.2.15) that did this at one point, but we removed it because it was
using frag_list

So here is our problem with the network driver API.
the only way to indicate multiple buffer (descriptor) receives is to
use nr_frags.  Our non split-header hardware needs power of 2
allocations *except* in the 1500 byte MTU case where we can optimize
by having the hardware drop all frames  1522 bytes

we would like to have a method to use alloc_skb to get packets from
slab to receive into and then chain them together.  Right now that is
not possible because you can't map alloc_skb'd data areas directly to
pages to put into nr_frags.

much of this comes from the requirement that the stack free the skb we
allocated.  if we had an async callback for the driver to take care of
freeing the skb then we could
a) recycle
b) handle pages in some efficient manner.

also, eth_type_trans wants skb-data to point to header, which would
require us to memcpy data from a page back to skb-data.

We could use help to get this done and mutiple drivers would benefit.
I can't get it done by myself, as much as I would like to.

As for Evgeniy's suggestion of using the end of the e1000 receive
buffer to store something I think it is a bad idea. Our hardware deals
with powers of 2.  From the e1000 manual:
=
LPE controls whether long packet reception is permitted. Hardware
discards long packets if LPE is 0. A long packet is one longer than
1522 bytes. If LPE is 1, the maximum packet size that the device can
receive is 16384 bytes.
=
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/3] FS_ENET: use PAL for mii management

2006-08-07 Thread Vitaly Bordug

This patch should update the fs_enet infrastructure to utilize
Phy Abstraction Layer subsystem. Along with the abouve, there are apparent
bugfixes, rehaul and improvements.

Signed-off-by: Vitaly Bordug [EMAIL PROTECTED]
---

 drivers/net/fs_enet/Makefile   |6 
 drivers/net/fs_enet/fec.h  |   42 +++
 drivers/net/fs_enet/fs_enet-main.c |  207 +--
 drivers/net/fs_enet/fs_enet-mii.c  |  505 
 drivers/net/fs_enet/fs_enet.h  |   40 ++-
 drivers/net/fs_enet/mac-fcc.c  |   32 ++
 drivers/net/fs_enet/mac-fec.c  |  142 +-
 drivers/net/fs_enet/mac-scc.c  |4 
 drivers/net/fs_enet/mii-bitbang.c  |  448 
 drivers/net/fs_enet/mii-fec.c  |  243 +
 drivers/net/fs_enet/mii-fixed.c|   91 --
 11 files changed, 711 insertions(+), 1049 deletions(-)

diff --git a/drivers/net/fs_enet/Makefile b/drivers/net/fs_enet/Makefile
index d6dd3f2..02d4dc1 100644
--- a/drivers/net/fs_enet/Makefile
+++ b/drivers/net/fs_enet/Makefile
@@ -4,7 +4,7 @@ #
 
 obj-$(CONFIG_FS_ENET) += fs_enet.o
 
-obj-$(CONFIG_8xx) += mac-fec.o mac-scc.o
-obj-$(CONFIG_8260) += mac-fcc.o
+obj-$(CONFIG_8xx) += mac-fec.o mac-scc.o mii-fec.o
+obj-$(CONFIG_CPM2) += mac-fcc.o mii-bitbang.o
 
-fs_enet-objs := fs_enet-main.o fs_enet-mii.o mii-bitbang.o mii-fixed.o
+fs_enet-objs := fs_enet-main.o
diff --git a/drivers/net/fs_enet/fec.h b/drivers/net/fs_enet/fec.h
new file mode 100644
index 000..e980527
--- /dev/null
+++ b/drivers/net/fs_enet/fec.h
@@ -0,0 +1,42 @@
+#ifndef FS_ENET_FEC_H
+#define FS_ENET_FEC_H
+
+/* CRC polynomium used by the FEC for the multicast group filtering */
+#define FEC_CRC_POLY   0x04C11DB7
+
+#define FEC_MAX_MULTICAST_ADDRS64
+
+/* Interrupt events/masks.
+*/
+#define FEC_ENET_HBERR 0x8000U /* Heartbeat error  */
+#define FEC_ENET_BABR  0x4000U /* Babbling receiver*/
+#define FEC_ENET_BABT  0x2000U /* Babbling transmitter */
+#define FEC_ENET_GRA   0x1000U /* Graceful stop complete   */
+#define FEC_ENET_TXF   0x0800U /* Full frame transmitted   */
+#define FEC_ENET_TXB   0x0400U /* A buffer was transmitted */
+#define FEC_ENET_RXF   0x0200U /* Full frame received  */
+#define FEC_ENET_RXB   0x0100U /* A buffer was received*/
+#define FEC_ENET_MII   0x0080U /* MII interrupt*/
+#define FEC_ENET_EBERR 0x0040U /* SDMA bus error   */
+
+#define FEC_ECNTRL_PINMUX  0x0004
+#define FEC_ECNTRL_ETHER_EN0x0002
+#define FEC_ECNTRL_RESET   0x0001
+
+#define FEC_RCNTRL_BC_REJ  0x0010
+#define FEC_RCNTRL_PROM0x0008
+#define FEC_RCNTRL_MII_MODE0x0004
+#define FEC_RCNTRL_DRT 0x0002
+#define FEC_RCNTRL_LOOP0x0001
+
+#define FEC_TCNTRL_FDEN0x0004
+#define FEC_TCNTRL_HBC 0x0002
+#define FEC_TCNTRL_GTS 0x0001
+
+
+
+/*
+ * Delay to wait for FEC reset command to complete (in us)
+ */
+#define FEC_RESET_DELAY50
+#endif
diff --git a/drivers/net/fs_enet/fs_enet-main.c 
b/drivers/net/fs_enet/fs_enet-main.c
index f6abff5..df62506 100644
--- a/drivers/net/fs_enet/fs_enet-main.c
+++ b/drivers/net/fs_enet/fs_enet-main.c
@@ -37,6 +37,7 @@ #include linux/ethtool.h
 #include linux/bitops.h
 #include linux/fs.h
 #include linux/platform_device.h
+#include linux/phy.h
 
 #include linux/vmalloc.h
 #include asm/pgtable.h
@@ -682,35 +683,6 @@ static void fs_free_irq(struct net_devic
(*fep-ops-post_free_irq)(dev, irq);
 }
 
-/**/
-
-/* This interrupt occurs when the PHY detects a link change. */
-static irqreturn_t
-fs_mii_link_interrupt(int irq, void *dev_id, struct pt_regs *regs)
-{
-   struct net_device *dev = dev_id;
-   struct fs_enet_private *fep;
-   const struct fs_platform_info *fpi;
-
-   fep = netdev_priv(dev);
-   fpi = fep-fpi;
-
-   /*
-* Acknowledge the interrupt if possible. If we have not
-* found the PHY yet we can't process or acknowledge the
-* interrupt now. Instead we ignore this interrupt for now,
-* which we can do since it is edge triggered. It will be
-* acknowledged later by fs_enet_open().
-*/
-   if (!fep-phy)
-   return IRQ_NONE;
-
-   fs_mii_ack_int(dev);
-   fs_mii_link_status_change_check(dev, 0);
-
-   return IRQ_HANDLED;
-}
-
 static void fs_timeout(struct net_device *dev)
 {
struct fs_enet_private *fep = netdev_priv(dev);
@@ -722,10 +694,13 @@ static void fs_timeout(struct net_device
spin_lock_irqsave(fep-lock, flags);
 
if (dev-flags  IFF_UP) {
+   phy_stop(fep-phydev);
(*fep-ops-stop)(dev);
(*fep-ops-restart)(dev);
+   phy_start(fep-phydev);
   

[PATCH 1/3] PAL: Support of the fixed PHY

2006-08-07 Thread Vitaly Bordug

This makes it possible for HW PHY-less boards to utilize PAL goodies.
Generic routines to connect to fixed PHY are provided, as well as ability
to specify software callback that fills up link, speed, etc. information
into PHY descriptor (the latter feature not tested so far).

Signed-off-by: Vitaly Bordug [EMAIL PROTECTED]
---

 drivers/net/phy/Kconfig  |   17 ++
 drivers/net/phy/Makefile |1 
 drivers/net/phy/fixed.c  |  358 ++
 drivers/net/phy/phy_device.c |   51 --
 include/linux/phy.h  |1 
 5 files changed, 407 insertions(+), 21 deletions(-)

diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig
index 2ba6d3a..b79ec0d 100644
--- a/drivers/net/phy/Kconfig
+++ b/drivers/net/phy/Kconfig
@@ -56,5 +56,22 @@ config SMSC_PHY
---help---
  Currently supports the LAN83C185 PHY
 
+config FIXED_PHY
+   tristate Drivers for PHY emulation on fixed speed/link
+   depends on PHYLIB
+   ---help---
+ Adds the driver to PHY layer to cover the boards that do not have any 
PHY bound,
+ but with the ability to manipulate with speed/link in software. The 
relavant MII
+ speed/duplex parameters could be effectively handled in 
user-specified  fuction.
+ Currently tested with mpc866ads.
+
+config FIXED_MII_10_FDX
+   bool Emulation for 10M Fdx fixed PHY behavior
+   depends on FIXED_PHY
+
+config FIXED_MII_100_FDX
+   bool Emulation for 100M Fdx fixed PHY behavior
+   depends on FIXED_PHY
+
 endmenu
 
diff --git a/drivers/net/phy/Makefile b/drivers/net/phy/Makefile
index a00e619..320f832 100644
--- a/drivers/net/phy/Makefile
+++ b/drivers/net/phy/Makefile
@@ -10,3 +10,4 @@ obj-$(CONFIG_LXT_PHY) += lxt.o
 obj-$(CONFIG_QSEMI_PHY)+= qsemi.o
 obj-$(CONFIG_SMSC_PHY) += smsc.o
 obj-$(CONFIG_VITESSE_PHY)  += vitesse.o
+obj-$(CONFIG_FIXED_PHY)+= fixed.o
diff --git a/drivers/net/phy/fixed.c b/drivers/net/phy/fixed.c
new file mode 100644
index 000..5d6442c
--- /dev/null
+++ b/drivers/net/phy/fixed.c
@@ -0,0 +1,358 @@
+/*
+ * drivers/net/phy/fixed.c
+ *
+ * Driver for fixed PHYs, when transceiver is able to operate in one fixed 
mode.
+ *
+ * Author: Vitaly Bordug
+ *
+ * Copyright (c) 2006 MontaVista Software, Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ *
+ */
+#include linux/config.h
+#include linux/kernel.h
+#include linux/sched.h
+#include linux/string.h
+#include linux/errno.h
+#include linux/unistd.h
+#include linux/slab.h
+#include linux/interrupt.h
+#include linux/init.h
+#include linux/delay.h
+#include linux/netdevice.h
+#include linux/etherdevice.h
+#include linux/skbuff.h
+#include linux/spinlock.h
+#include linux/mm.h
+#include linux/module.h
+#include linux/mii.h
+#include linux/ethtool.h
+#include linux/phy.h
+
+#include asm/io.h
+#include asm/irq.h
+#include asm/uaccess.h
+
+#define MII_REGS_NUM   7
+
+/*
+The idea is to emulate normal phy behavior by responding with
+pre-defined values to mii BMCR read, so that read_status hook could
+take all the needed info.
+*/
+
+struct fixed_phy_status {
+   u8  link;
+   u16 speed;
+   u8  duplex;
+};
+
+/*-
+ *  Private information hoder for mii_bus
+ 
*-*/
+struct fixed_info {
+   u16 *regs;
+   u8 regs_num;
+   struct fixed_phy_status phy_status;
+   struct phy_device *phydev; /* pointer to the container */
+   /* link  speed cb */
+   int(*link_update)(struct net_device*, struct fixed_phy_status*);
+
+};
+
+/*-
+ *  If something weird is required to be done with link/speed,
+ * network driver is able to assign a function to implement this.
+ * May be useful for PHY's that need to be software-driven.
+ 
*-*/
+int fixed_mdio_set_link_update(struct phy_device* phydev,
+   int(*link_update)(struct net_device*, struct fixed_phy_status*))
+{
+   struct fixed_info *fixed;
+
+   if(link_update == NULL)
+   return -EINVAL;
+
+   if(phydev) {
+   if(phydev-bus) {
+   fixed = phydev-bus-priv;
+   fixed-link_update = link_update;
+   return 0;
+   }
+   }
+   return -EINVAL;
+}
+EXPORT_SYMBOL(fixed_mdio_set_link_update);
+
+/*-
+ *  This is used for updating internal mii regs from the status
+ 

[PATCH 3/3] ppc32: board-specific part of fs_enet update

2006-08-07 Thread Vitaly Bordug

This contains board-specific portion to respect driver changes (for 8272ads
, 885ads and 866ads). Altered platform_data structures as well as initial
setup routines relevant to fs_enet.

Signed-off-by: Vitaly Bordug [EMAIL PROTECTED]
---

 arch/ppc/platforms/85xx/mpc8560_ads.c|   89 
 arch/ppc/platforms/85xx/mpc85xx_ads_common.h |   19 +++
 arch/ppc/platforms/mpc8272ads_setup.c|  154 -
 arch/ppc/platforms/mpc866ads_setup.c |  192 +-
 arch/ppc/platforms/mpc885ads_setup.c |  175 +---
 arch/ppc/platforms/pq2ads_pd.h   |   82 ---
 arch/ppc/syslib/mpc85xx_devices.c|   89 
 arch/ppc/syslib/mpc8xx_devices.c |8 +
 arch/ppc/syslib/mpc8xx_sys.c |6 +
 arch/ppc/syslib/pq2_devices.c|5 +
 arch/ppc/syslib/pq2_sys.c|3 
 include/asm-ppc/cpm2.h   |   95 +
 include/asm-ppc/mpc8260.h|1 
 include/asm-ppc/mpc8xx.h |1 
 include/linux/fs_enet_pd.h   |   50 +++
 15 files changed, 578 insertions(+), 391 deletions(-)

diff --git a/arch/ppc/platforms/85xx/mpc8560_ads.c 
b/arch/ppc/platforms/85xx/mpc8560_ads.c
index d90cd24..94badaf 100644
--- a/arch/ppc/platforms/85xx/mpc8560_ads.c
+++ b/arch/ppc/platforms/85xx/mpc8560_ads.c
@@ -29,6 +29,7 @@ #include linux/serial_core.h
 #include linux/initrd.h
 #include linux/module.h
 #include linux/fsl_devices.h
+#include linux/fs_enet_pd.h
 
 #include asm/system.h
 #include asm/pgtable.h
@@ -58,6 +59,71 @@ #include syslib/ppc85xx_setup.h
  * Setup the architecture
  *
  */
+static void init_fcc_ioports(void)
+{
+   struct immap *immap;
+   struct io_port *io;
+   u32 tempval;
+
+   immap = cpm2_immr;
+
+   io = immap-im_ioport;
+   /* FCC2/3 are on the ports B/C. */
+   tempval = in_be32(io-iop_pdirb);
+   tempval = ~PB2_DIRB0;
+   tempval |= PB2_DIRB1;
+   out_be32(io-iop_pdirb, tempval);
+
+   tempval = in_be32(io-iop_psorb);
+   tempval = ~PB2_PSORB0;
+   tempval |= PB2_PSORB1;
+   out_be32(io-iop_psorb, tempval);
+
+   tempval = in_be32(io-iop_pparb);
+   tempval |= (PB2_DIRB0 | PB2_DIRB1);
+   out_be32(io-iop_pparb, tempval);
+
+   tempval = in_be32(io-iop_pdirb);
+   tempval = ~PB3_DIRB0;
+   tempval |= PB3_DIRB1;
+   out_be32(io-iop_pdirb, tempval);
+
+   tempval = in_be32(io-iop_psorb);
+   tempval = ~PB3_PSORB0;
+   tempval |= PB3_PSORB1;
+   out_be32(io-iop_psorb, tempval);
+
+   tempval = in_be32(io-iop_pparb);
+   tempval |= (PB3_DIRB0 | PB3_DIRB1);
+   out_be32(io-iop_pparb, tempval);
+
+tempval = in_be32(io-iop_pdirc);
+tempval |= PC3_DIRC1;
+out_be32(io-iop_pdirc, tempval);
+
+tempval = in_be32(io-iop_pparc);
+tempval |= PC3_DIRC1;
+out_be32(io-iop_pparc, tempval);
+
+   /* Port C has clocks..  */
+   tempval = in_be32(io-iop_psorc);
+   tempval = ~(CLK_TRX);
+   out_be32(io-iop_psorc, tempval);
+
+   tempval = in_be32(io-iop_pdirc);
+   tempval = ~(CLK_TRX);
+   out_be32(io-iop_pdirc, tempval);
+   tempval = in_be32(io-iop_pparc);
+   tempval |= (CLK_TRX);
+   out_be32(io-iop_pparc, tempval);
+
+   /* Configure Serial Interface clock routing.
+* First,  clear all FCC bits to zero,
+* then set the ones we want.
+*/
+   immap-im_cpmux.cmx_fcr = ~(CPMUX_CLK_MASK);
+   immap-im_cpmux.cmx_fcr |= CPMUX_CLK_ROUTE;
+}
 
 static void __init
 mpc8560ads_setup_arch(void)
@@ -66,6 +132,7 @@ mpc8560ads_setup_arch(void)
unsigned int freq;
struct gianfar_platform_data *pdata;
struct gianfar_mdio_data *mdata;
+   struct fs_platform_info *fpi;
 
cpm2_reset();
 
@@ -110,6 +177,28 @@ #endif
memcpy(pdata-mac_addr, binfo-bi_enet1addr, 6);
}
 
+   init_fcc_ioports();
+   ppc_sys_device_remove(MPC85xx_CPM_FCC1);
+
+   fpi = (struct fs_platform_info *) ppc_sys_get_pdata(MPC85xx_CPM_FCC2);
+   if (fpi) {
+   memcpy(fpi-macaddr, binfo-bi_enet2addr, 6);
+   fpi-bus_id = 0:02;
+   fpi-phy_addr = 2;
+   fpi-dpram_offset = (u32)cpm2_immr-im_dprambase;
+   fpi-fcc_regs_c = (u32)cpm2_immr-im_fcc_c[1];
+   }
+
+   fpi = (struct fs_platform_info *) ppc_sys_get_pdata(MPC85xx_CPM_FCC3);
+   if (fpi) {
+   memcpy(fpi-macaddr, binfo-bi_enet2addr, 6);
+   fpi-macaddr[5] += 1;
+   fpi-bus_id = 0:03;
+   fpi-phy_addr = 3;
+   fpi-dpram_offset = (u32)cpm2_immr-im_dprambase;
+   fpi-fcc_regs_c = (u32)cpm2_immr-im_fcc_c[2];
+   }
+
 #ifdef CONFIG_BLK_DEV_INITRD
if (initrd_start)
ROOT_DEV = Root_RAM0;
diff --git 

[PATCH 0/3] FS_ENET: move to the PAL api

2006-08-07 Thread Vitaly Bordug
These are patches, that utilize Phy Abstraction Layer API in the fs_enet
Freescale SoC Ethernet driver. Comments gavered from the community addressed,
+ minor fixes and improvements.

--
Sincerely, Vitaly
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/3] FS_ENET: use PAL for mii management

2006-08-07 Thread Vitaly Bordug

This patch should update the fs_enet infrastructure to utilize
Phy Abstraction Layer subsystem. Along with the above, there are apparent
bugfixes, overhaul and improvements.

Signed-off-by: Vitaly Bordug [EMAIL PROTECTED]
---

 drivers/net/fs_enet/Makefile   |6 
 drivers/net/fs_enet/fec.h  |   42 +++
 drivers/net/fs_enet/fs_enet-main.c |  207 +--
 drivers/net/fs_enet/fs_enet-mii.c  |  505 
 drivers/net/fs_enet/fs_enet.h  |   40 ++-
 drivers/net/fs_enet/mac-fcc.c  |   32 ++
 drivers/net/fs_enet/mac-fec.c  |  142 +-
 drivers/net/fs_enet/mac-scc.c  |4 
 drivers/net/fs_enet/mii-bitbang.c  |  448 
 drivers/net/fs_enet/mii-fec.c  |  243 +
 drivers/net/fs_enet/mii-fixed.c|   91 --
 11 files changed, 711 insertions(+), 1049 deletions(-)

diff --git a/drivers/net/fs_enet/Makefile b/drivers/net/fs_enet/Makefile
index d6dd3f2..02d4dc1 100644
--- a/drivers/net/fs_enet/Makefile
+++ b/drivers/net/fs_enet/Makefile
@@ -4,7 +4,7 @@ #
 
 obj-$(CONFIG_FS_ENET) += fs_enet.o
 
-obj-$(CONFIG_8xx) += mac-fec.o mac-scc.o
-obj-$(CONFIG_8260) += mac-fcc.o
+obj-$(CONFIG_8xx) += mac-fec.o mac-scc.o mii-fec.o
+obj-$(CONFIG_CPM2) += mac-fcc.o mii-bitbang.o
 
-fs_enet-objs := fs_enet-main.o fs_enet-mii.o mii-bitbang.o mii-fixed.o
+fs_enet-objs := fs_enet-main.o
diff --git a/drivers/net/fs_enet/fec.h b/drivers/net/fs_enet/fec.h
new file mode 100644
index 000..e980527
--- /dev/null
+++ b/drivers/net/fs_enet/fec.h
@@ -0,0 +1,42 @@
+#ifndef FS_ENET_FEC_H
+#define FS_ENET_FEC_H
+
+/* CRC polynomium used by the FEC for the multicast group filtering */
+#define FEC_CRC_POLY   0x04C11DB7
+
+#define FEC_MAX_MULTICAST_ADDRS64
+
+/* Interrupt events/masks.
+*/
+#define FEC_ENET_HBERR 0x8000U /* Heartbeat error  */
+#define FEC_ENET_BABR  0x4000U /* Babbling receiver*/
+#define FEC_ENET_BABT  0x2000U /* Babbling transmitter */
+#define FEC_ENET_GRA   0x1000U /* Graceful stop complete   */
+#define FEC_ENET_TXF   0x0800U /* Full frame transmitted   */
+#define FEC_ENET_TXB   0x0400U /* A buffer was transmitted */
+#define FEC_ENET_RXF   0x0200U /* Full frame received  */
+#define FEC_ENET_RXB   0x0100U /* A buffer was received*/
+#define FEC_ENET_MII   0x0080U /* MII interrupt*/
+#define FEC_ENET_EBERR 0x0040U /* SDMA bus error   */
+
+#define FEC_ECNTRL_PINMUX  0x0004
+#define FEC_ECNTRL_ETHER_EN0x0002
+#define FEC_ECNTRL_RESET   0x0001
+
+#define FEC_RCNTRL_BC_REJ  0x0010
+#define FEC_RCNTRL_PROM0x0008
+#define FEC_RCNTRL_MII_MODE0x0004
+#define FEC_RCNTRL_DRT 0x0002
+#define FEC_RCNTRL_LOOP0x0001
+
+#define FEC_TCNTRL_FDEN0x0004
+#define FEC_TCNTRL_HBC 0x0002
+#define FEC_TCNTRL_GTS 0x0001
+
+
+
+/*
+ * Delay to wait for FEC reset command to complete (in us)
+ */
+#define FEC_RESET_DELAY50
+#endif
diff --git a/drivers/net/fs_enet/fs_enet-main.c 
b/drivers/net/fs_enet/fs_enet-main.c
index f6abff5..df62506 100644
--- a/drivers/net/fs_enet/fs_enet-main.c
+++ b/drivers/net/fs_enet/fs_enet-main.c
@@ -37,6 +37,7 @@ #include linux/ethtool.h
 #include linux/bitops.h
 #include linux/fs.h
 #include linux/platform_device.h
+#include linux/phy.h
 
 #include linux/vmalloc.h
 #include asm/pgtable.h
@@ -682,35 +683,6 @@ static void fs_free_irq(struct net_devic
(*fep-ops-post_free_irq)(dev, irq);
 }
 
-/**/
-
-/* This interrupt occurs when the PHY detects a link change. */
-static irqreturn_t
-fs_mii_link_interrupt(int irq, void *dev_id, struct pt_regs *regs)
-{
-   struct net_device *dev = dev_id;
-   struct fs_enet_private *fep;
-   const struct fs_platform_info *fpi;
-
-   fep = netdev_priv(dev);
-   fpi = fep-fpi;
-
-   /*
-* Acknowledge the interrupt if possible. If we have not
-* found the PHY yet we can't process or acknowledge the
-* interrupt now. Instead we ignore this interrupt for now,
-* which we can do since it is edge triggered. It will be
-* acknowledged later by fs_enet_open().
-*/
-   if (!fep-phy)
-   return IRQ_NONE;
-
-   fs_mii_ack_int(dev);
-   fs_mii_link_status_change_check(dev, 0);
-
-   return IRQ_HANDLED;
-}
-
 static void fs_timeout(struct net_device *dev)
 {
struct fs_enet_private *fep = netdev_priv(dev);
@@ -722,10 +694,13 @@ static void fs_timeout(struct net_device
spin_lock_irqsave(fep-lock, flags);
 
if (dev-flags  IFF_UP) {
+   phy_stop(fep-phydev);
(*fep-ops-stop)(dev);
(*fep-ops-restart)(dev);
+   phy_start(fep-phydev);
  

[PATCH 1/3] PAL: Support of the fixed PHY

2006-08-07 Thread Vitaly Bordug

This makes it possible for HW PHY-less boards to utilize PAL goodies.
Generic routines to connect to fixed PHY are provided, as well as ability
to specify software callback that fills up link, speed, etc. information
into PHY descriptor (the latter feature not tested so far).

Signed-off-by: Vitaly Bordug [EMAIL PROTECTED]
---

 drivers/net/phy/Kconfig  |   17 ++
 drivers/net/phy/Makefile |1 
 drivers/net/phy/fixed.c  |  358 ++
 drivers/net/phy/phy_device.c |   51 --
 include/linux/phy.h  |1 
 5 files changed, 407 insertions(+), 21 deletions(-)

diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig
index 2ba6d3a..b79ec0d 100644
--- a/drivers/net/phy/Kconfig
+++ b/drivers/net/phy/Kconfig
@@ -56,5 +56,22 @@ config SMSC_PHY
---help---
  Currently supports the LAN83C185 PHY
 
+config FIXED_PHY
+   tristate Drivers for PHY emulation on fixed speed/link
+   depends on PHYLIB
+   ---help---
+ Adds the driver to PHY layer to cover the boards that do not have any 
PHY bound,
+ but with the ability to manipulate with speed/link in software. The 
relavant MII
+ speed/duplex parameters could be effectively handled in 
user-specified  fuction.
+ Currently tested with mpc866ads.
+
+config FIXED_MII_10_FDX
+   bool Emulation for 10M Fdx fixed PHY behavior
+   depends on FIXED_PHY
+
+config FIXED_MII_100_FDX
+   bool Emulation for 100M Fdx fixed PHY behavior
+   depends on FIXED_PHY
+
 endmenu
 
diff --git a/drivers/net/phy/Makefile b/drivers/net/phy/Makefile
index a00e619..320f832 100644
--- a/drivers/net/phy/Makefile
+++ b/drivers/net/phy/Makefile
@@ -10,3 +10,4 @@ obj-$(CONFIG_LXT_PHY) += lxt.o
 obj-$(CONFIG_QSEMI_PHY)+= qsemi.o
 obj-$(CONFIG_SMSC_PHY) += smsc.o
 obj-$(CONFIG_VITESSE_PHY)  += vitesse.o
+obj-$(CONFIG_FIXED_PHY)+= fixed.o
diff --git a/drivers/net/phy/fixed.c b/drivers/net/phy/fixed.c
new file mode 100644
index 000..5d6442c
--- /dev/null
+++ b/drivers/net/phy/fixed.c
@@ -0,0 +1,358 @@
+/*
+ * drivers/net/phy/fixed.c
+ *
+ * Driver for fixed PHYs, when transceiver is able to operate in one fixed 
mode.
+ *
+ * Author: Vitaly Bordug
+ *
+ * Copyright (c) 2006 MontaVista Software, Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ *
+ */
+#include linux/config.h
+#include linux/kernel.h
+#include linux/sched.h
+#include linux/string.h
+#include linux/errno.h
+#include linux/unistd.h
+#include linux/slab.h
+#include linux/interrupt.h
+#include linux/init.h
+#include linux/delay.h
+#include linux/netdevice.h
+#include linux/etherdevice.h
+#include linux/skbuff.h
+#include linux/spinlock.h
+#include linux/mm.h
+#include linux/module.h
+#include linux/mii.h
+#include linux/ethtool.h
+#include linux/phy.h
+
+#include asm/io.h
+#include asm/irq.h
+#include asm/uaccess.h
+
+#define MII_REGS_NUM   7
+
+/*
+The idea is to emulate normal phy behavior by responding with
+pre-defined values to mii BMCR read, so that read_status hook could
+take all the needed info.
+*/
+
+struct fixed_phy_status {
+   u8  link;
+   u16 speed;
+   u8  duplex;
+};
+
+/*-
+ *  Private information hoder for mii_bus
+ 
*-*/
+struct fixed_info {
+   u16 *regs;
+   u8 regs_num;
+   struct fixed_phy_status phy_status;
+   struct phy_device *phydev; /* pointer to the container */
+   /* link  speed cb */
+   int(*link_update)(struct net_device*, struct fixed_phy_status*);
+
+};
+
+/*-
+ *  If something weird is required to be done with link/speed,
+ * network driver is able to assign a function to implement this.
+ * May be useful for PHY's that need to be software-driven.
+ 
*-*/
+int fixed_mdio_set_link_update(struct phy_device* phydev,
+   int(*link_update)(struct net_device*, struct fixed_phy_status*))
+{
+   struct fixed_info *fixed;
+
+   if(link_update == NULL)
+   return -EINVAL;
+
+   if(phydev) {
+   if(phydev-bus) {
+   fixed = phydev-bus-priv;
+   fixed-link_update = link_update;
+   return 0;
+   }
+   }
+   return -EINVAL;
+}
+EXPORT_SYMBOL(fixed_mdio_set_link_update);
+
+/*-
+ *  This is used for updating internal mii regs from the status
+ 

  1   2   >