[RFC] avoid unnecessary alignement overhead in skb-data allocation.
Hello. Attached patch allows to avoid unnecessary alignment overhead in skb-data allocation. Main idea is to allocate struct skb_shared_info from cache when addition of sizeof(struct skb_shared_info) ens up in different order allocation than initial size order. This allows to solve problem with 4k allocations for 1500 MTU and 32k allocations for 9k jumbo frames for some chips. Patch was not tested, so if idea worth it I will complete it. Signed-off-by: Evgeniy Polyakov [EMAIL PROTECTED] diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 19c96d4..7474682 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -282,7 +282,8 @@ struct sk_buff { nfctinfo:3; __u8pkt_type:3, fclone:2, - ipvs_property:1; + ipvs_property:1, + shinfo_cache:1; __be16 protocol; void(*destructor)(struct sk_buff *skb); @@ -403,7 +404,9 @@ extern unsigned int skb_find_text(stru struct ts_state *state); /* Internal */ -#define skb_shinfo(SKB)((struct skb_shared_info *)((SKB)-end)) +#define skb_shinfo(SKB)((SKB)-shinfo_cache?\ + (struct skb_shared_info *)(*((SKB)-end)):\ + ((struct skb_shared_info *)((SKB)-end))) /** * skb_queue_empty - check if a queue is empty diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 022d889..7287814 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -69,6 +69,7 @@ #include asm/system.h static kmem_cache_t *skbuff_head_cache __read_mostly; static kmem_cache_t *skbuff_fclone_cache __read_mostly; +static kmem_cache_t *skbuff_shared_info_cache __read_mostly; /* * Keep out-of-line to prevent kernel bloat. @@ -146,6 +147,8 @@ struct sk_buff *__alloc_skb(unsigned int struct skb_shared_info *shinfo; struct sk_buff *skb; u8 *data; + int order = get_order(size + sizeof(void *)); + struct skb_shared_info *sh; cache = fclone ? skbuff_fclone_cache : skbuff_head_cache; @@ -156,11 +159,28 @@ struct sk_buff *__alloc_skb(unsigned int /* Get the DATA. Size must match skb_add_mtu(). */ size = SKB_DATA_ALIGN(size); - data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask); - if (!data) - goto nodata; + if ((1UL order) size + sizeof(void *) + sizeof(struct skb_shared_info)) { + data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask); + if (!data) + goto nodata; + memset(skb, 0, offsetof(struct sk_buff, truesize)); + } else { + unsigned long *ptr; + + data = kmalloc(size, gfp_mask); + if (!data) + goto nodata; + sh = kmem_cache_alloc(skbuff_shared_info_cache, gfp_mask); + if (!sh) { + kfree(data); + goto nodata; + } + memset(skb, 0, offsetof(struct sk_buff, truesize)); + skb-shinfo_cache = 1; + ptr = data; + ptr[size] = sh; + } - memset(skb, 0, offsetof(struct sk_buff, truesize)); skb-truesize = size + sizeof(struct sk_buff); atomic_set(skb-users, 1); skb-head = data; @@ -314,6 +334,8 @@ static void skb_release_data(struct sk_b skb_drop_fraglist(skb); kfree(skb-head); + if (skb-shinfo_cache) + kmem_cache_free(skbuff_shared_info_cache, *(skb-end)); } } @@ -500,6 +522,7 @@ #endif C(data); C(tail); C(end); + C(shinfo_cache); atomic_inc((skb_shinfo(skb)-dataref)); skb-cloned = 1; @@ -2057,6 +2080,14 @@ void __init skb_init(void) NULL, NULL); if (!skbuff_fclone_cache) panic(cannot create skbuff cache); + + skbuff_shared_info_cache = kmem_cache_create(skbuff_shared_info_cache, + sizeof(struct sbk_shared_info), + 0, + SLAB_HWCACHE_ALIGN, + NULL, NULL); + if (!skbuff_shared_info_cache) + panic(cannot create skbuff shared info cache); } EXPORT_SYMBOL(___pskb_trim); -- Evgeniy Polyakov - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC] avoid unnecessary alignement overhead in skb-data allocation.
From: Evgeniy Polyakov [EMAIL PROTECTED] Date: Mon, 7 Aug 2006 10:01:56 +0400 + int order = get_order(size + sizeof(void *)); + struct skb_shared_info *sh; cache = fclone ? skbuff_fclone_cache : skbuff_head_cache; @@ -156,11 +159,28 @@ struct sk_buff *__alloc_skb(unsigned int /* Get the DATA. Size must match skb_add_mtu(). */ size = SKB_DATA_ALIGN(size); - data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask); - if (!data) - goto nodata; + if ((1UL order) size + sizeof(void *) + sizeof(struct skb_shared_info)) { get_order() returns a PAGE_SIZE order not a byte one. So this test here at the end is incorrect. It should probably be something like if ((PAGE_SIZE order) ... I don't know if I want to eat an entire extra allocation for every SKB just to handle broken e1000 cards that can't be bothered to support non-power-of-2 receive buffer sizes and a proper MTU setting. I guess we might have to, but this is extremely unfortunate. :-/ - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH -rt DO NOT APPLY] Fix for tg3 networking lockup
From: Steven Rostedt [EMAIL PROTECTED] Date: Mon, 7 Aug 2006 01:34:56 -0400 (EDT) My suggestion would be to separate that tg3_timer into 4 different timers, which is what it actually looks like. Timers have non-trivial cost. It's cheaper to have one and vector off to the necessary operations each tick internalls. That's why it's implemented as one timer. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC] avoid unnecessary alignement overhead in skb-data allocation.
Evgeniy Polyakov [EMAIL PROTECTED] wrote: Attached patch allows to avoid unnecessary alignment overhead in skb-data allocation. Main idea is to allocate struct skb_shared_info from cache when addition of sizeof(struct skb_shared_info) ens up in different order allocation than initial size order. This allows to solve problem with 4k allocations for 1500 MTU and 32k allocations for 9k jumbo frames for some chips. Patch was not tested, so if idea worth it I will complete it. I thought the Intel guys were saying that their NIC could write the full 16KB which means it it's unsafe to use the last four bytes, no? Cheers, -- Visit Openswan at http://www.openswan.org/ Email: Herbert Xu ~{PmVHI~} [EMAIL PROTECTED] Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC] avoid unnecessary alignement overhead in skb-data allocation.
On Sun, Aug 06, 2006 at 11:23:39PM -0700, David Miller ([EMAIL PROTECTED]) wrote: + if ((1UL order) size + sizeof(void *) + sizeof(struct skb_shared_info)) { get_order() returns a PAGE_SIZE order not a byte one. So this test here at the end is incorrect. It should probably be something like if ((PAGE_SIZE order) ... I don't know if I want to eat an entire extra allocation for every SKB just to handle broken e1000 cards that can't be bothered to support non-power-of-2 receive buffer sizes and a proper MTU setting. I guess we might have to, but this is extremely unfortunate. :-/ I have even better idea - create alloc_skb_aligned() for those who knows in advance, that it's size is always aligned to power of 2, so additional skb_shared_info will 100% require higher order allocation. Then e1000 can use that instead of usual alloc_skb(). -- Evgeniy Polyakov - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC] avoid unnecessary alignement overhead in skb-data allocation.
On Mon, Aug 07, 2006 at 04:29:09PM +1000, Herbert Xu ([EMAIL PROTECTED]) wrote: Evgeniy Polyakov [EMAIL PROTECTED] wrote: Attached patch allows to avoid unnecessary alignment overhead in skb-data allocation. Main idea is to allocate struct skb_shared_info from cache when addition of sizeof(struct skb_shared_info) ens up in different order allocation than initial size order. This allows to solve problem with 4k allocations for 1500 MTU and 32k allocations for 9k jumbo frames for some chips. Patch was not tested, so if idea worth it I will complete it. I thought the Intel guys were saying that their NIC could write the full 16KB which means it it's unsafe to use the last four bytes, no? Well, theirs comments in code say, that maximum allowed frame size is 0x3f00, so there is a little place at the end to put there a pointer, so I allocate size + sizeof(void *). If they actually can eat all 16k, then we need a pointer somewhere in the skb for shared_info, since 16k + sizeof(void *) will be aligned to 32k. Cheers, -- Visit Openswan at http://www.openswan.org/ Email: Herbert Xu ~{PmVHI~} [EMAIL PROTECTED] Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt -- Evgeniy Polyakov - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC] avoid unnecessary alignement overhead in skb-data allocation.
On Mon, Aug 07, 2006 at 10:36:36AM +0400, Evgeniy Polyakov wrote: Well, theirs comments in code say, that maximum allowed frame size is 0x3f00, so there is a little place at the end to put there a pointer, so I allocate size + sizeof(void *). If they actually can eat all 16k, then we need a pointer somewhere in the skb for shared_info, since 16k + sizeof(void *) will be aligned to 32k. It would be good to get a definitive statement from them before we go down this track. Cheers, -- Visit Openswan at http://www.openswan.org/ Email: Herbert Xu ~{PmVHI~} [EMAIL PROTECTED] Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC] avoid unnecessary alignement overhead in skb-data allocation.
David Miller [EMAIL PROTECTED] wrote: I don't know if I want to eat an entire extra allocation for every SKB just to handle broken e1000 cards that can't be bothered to support non-power-of-2 receive buffer sizes and a proper MTU setting. I guess we might have to, but this is extremely unfortunate. :-/ I'd hope not. Apparently they are capable of putting data into individual pages and chaining them together. The only problem is that half a page is wasted for 1500-byte packets. However, allocating 16KB packets would waste even more memory if only 1500 bytes end up getting used. Cheers, -- Visit Openswan at http://www.openswan.org/ Email: Herbert Xu ~{PmVHI~} [EMAIL PROTECTED] Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC] avoid unnecessary alignement overhead in skb-data allocation.
On Mon, Aug 07, 2006 at 05:17:13PM +1000, Herbert Xu ([EMAIL PROTECTED]) wrote: David Miller [EMAIL PROTECTED] wrote: I don't know if I want to eat an entire extra allocation for every SKB just to handle broken e1000 cards that can't be bothered to support non-power-of-2 receive buffer sizes and a proper MTU setting. I guess we might have to, but this is extremely unfortunate. :-/ I'd hope not. Apparently they are capable of putting data into individual pages and chaining them together. The only problem Unfortunately not all chips are capable to do this. -- Evgeniy Polyakov - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC] avoid unnecessary alignement overhead in skb-data allocation.
On Mon, Aug 07, 2006 at 05:28:16PM +1000, Herbert Xu ([EMAIL PROTECTED]) wrote: On Mon, Aug 07, 2006 at 11:24:23AM +0400, Evgeniy Polyakov wrote: I'd hope not. Apparently they are capable of putting data into individual pages and chaining them together. The only problem Unfortunately not all chips are capable to do this. No not all chips are capable of header-splitting. However, from what Jesse was saying it sounded as if all (or most?) chips are capable of storing data cross pages. Only if they form contiguous region? Jesse, is it possible for every e1000 chip to split frame into several page-sized chunks i.e. create some kind of receiving scatter-gather? Cheers, -- Visit Openswan at http://www.openswan.org/ Email: Herbert Xu ~{PmVHI~} [EMAIL PROTECTED] Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt -- Evgeniy Polyakov - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC] avoid unnecessary alignement overhead in skb-data allocation.
On Mon, Aug 07, 2006 at 11:24:23AM +0400, Evgeniy Polyakov wrote: I'd hope not. Apparently they are capable of putting data into individual pages and chaining them together. The only problem Unfortunately not all chips are capable to do this. No not all chips are capable of header-splitting. However, from what Jesse was saying it sounded as if all (or most?) chips are capable of storing data cross pages. Cheers, -- Visit Openswan at http://www.openswan.org/ Email: Herbert Xu ~{PmVHI~} [EMAIL PROTECTED] Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] SNMPv2 tcpOutSegs counter error
I used tcb-end_seq instead of tcb-seq. And add a new condition 'tcb- seq == tcb-end_seq' to make ACK segment to be counted. On Sunday 06 August 2006 22:48, Herbert Xu wrote: On Sun, Aug 06, 2006 at 07:44:47PM -0700, David Miller wrote: From: Herbert Xu [EMAIL PROTECTED] Date: Mon, 07 Aug 2006 12:40:34 +1000 The general approach looks sound. I have one esoteric question though. If a retransmitted packet is coalesced with one that is yet to be transmitted (a fairly unlikely scenario, but possible I think), should it count towards OUTSEGS? Probably the packet should be counted to OUTSEGS if any of it contains new data. OK, in that case Yongjun please update your patch to test against tcb-end_seq instead of tcb-seq. Cheers, Signed-off-by: Wei Yongjun [EMAIL PROTECTED] --- a/net/ipv4/tcp_output.c 2006-08-03 18:05:22.425081936 -0400 +++ b/net/ipv4/tcp_output.c 2006-08-07 09:48:41.186372896 -0400 @@ -462,7 +462,8 @@ static int tcp_transmit_skb(struct sock if (skb-len != tcp_header_size) tcp_event_data_sent(tp, skb, sk); - TCP_INC_STATS(TCP_MIB_OUTSEGS); + if(after(tcb-end_seq, tp-snd_nxt) || tcb-seq == tcb-end_seq) + TCP_INC_STATS(TCP_MIB_OUTSEGS); err = icsk-icsk_af_ops-queue_xmit(skb, 0); if (likely(err = 0)) @@ -2151,10 +2152,9 @@ int tcp_connect(struct sock *sk) skb_shinfo(buff)-tso_segs = 1; skb_shinfo(buff)-tso_size = 0; buff-csum = 0; + tp-snd_nxt = tp-write_seq; TCP_SKB_CB(buff)-seq = tp-write_seq++; TCP_SKB_CB(buff)-end_seq = tp-write_seq; - tp-snd_nxt = tp-write_seq; - tp-pushed_seq = tp-write_seq; /* Send it off. */ TCP_SKB_CB(buff)-when = tcp_time_stamp; @@ -2164,6 +2164,11 @@ int tcp_connect(struct sock *sk) sk_charge_skb(sk, buff); tp-packets_out += tcp_skb_pcount(buff); tcp_transmit_skb(sk, buff, 1, GFP_KERNEL); + /* change tp-snd_nxt after tcp_transmit_skb() to make this packet to be +* counted to tcpOutSegs +*/ + tp-snd_nxt = tp-write_seq; + tp-pushed_seq = tp-write_seq; TCP_INC_STATS(TCP_MIB_ACTIVEOPENS); /* Timer for repeating the SYN until an answer. */ - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC] avoid unnecessary alignement overhead in skb-data allocation.
On Mon, Aug 07, 2006 at 11:31:03AM +0400, Evgeniy Polyakov wrote: Only if they form contiguous region? Jesse, is it possible for every e1000 chip to split frame into several page-sized chunks i.e. create some kind of receiving scatter-gather? Actually, it was Chris Leech who raised this possibility: : Yes, e1000 devices will spill over and use multiple buffers for a : single frame. We've been trying to find a good way to use multiple : buffers to take care of these allocation problems. The structure of : the sk_buff does not make it easy. Or should I say that it's the : limitation that drivers are not allowed to chain together multiple : sk_buffs to represent a single frame that does not make it easy. Perhaps he can enlighten us. Cheers, -- Visit Openswan at http://www.openswan.org/ Email: Herbert Xu ~{PmVHI~} [EMAIL PROTECTED] Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Packet reordering in pcap capture file
Stephen Hemminger wrote: On Sat, 05 Aug 2006 03:28:38 -0400 Alan Shieh [EMAIL PROTECTED] wrote: Hi everyone, I sometimes see packets stored out of order in pcap files that generated by tcpdump -i any on kernel 2.4.26 with all packets arriving and departing on an e1000 NIC. That is, the ordering by receive timestamp on the packets is not the same as the ordering of the packets within the file. In my precise scenario, packets of RX packets show up in the log 230 ms later than they ought to based on the receive timestamp. The kernel behavior (e.g., the packets that are sent by this node) seems to reflect the arrival of the Rx packet at the position in the logfile, rather than the arrival time according to the timestamp. What are some of the known causes of this behavior? I'd like to know what locks, etc. might be causing this processing / capture delay. SMP or single CPU? What is the clock source being used? If you had a CPU like dual-core AMD that doesn't sync TSC's and that was the clock source, the timestamps could be wrong. Single CPU, using TSC. The behavior of the system is as if the RTT is 230ms, so I think a queue is building up somewhere within the kernel. I am trying to narrow down the possible ways my experimental code could have caused such a queue backlog. I've tried setting netdev-quota in the e1000 module to a much larger value, thus forcing the backlog to be processed faster, but that does not help. Alan - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC] avoid unnecessary alignement overhead in skb-data allocation.
On Monday 07 August 2006 08:01, Evgeniy Polyakov wrote: Hello. Attached patch allows to avoid unnecessary alignment overhead in skb-data allocation. Main idea is to allocate struct skb_shared_info from cache when addition of sizeof(struct skb_shared_info) ens up in different order allocation than initial size order. This allows to solve problem with 4k allocations for 1500 MTU and 32k allocations for 9k jumbo frames for some chips. Patch was not tested, so if idea worth it I will complete it. Signed-off-by: Evgeniy Polyakov [EMAIL PROTECTED] + if ((1UL order) size + sizeof(void *) + sizeof(struct skb_shared_info)) { + data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask); +if (!data) + goto nodata; + memset(skb, 0, offsetof(struct sk_buff, truesize)); + } else { + unsigned long *ptr; + + data = kmalloc(size, gfp_mask); You certainly want to kmalloc(size + sizeof(void *)) here, dont you ? + if (!data) + goto nodata; + sh = kmem_cache_alloc(skbuff_shared_info_cache, gfp_mask); + if (!sh) { + kfree(data); + goto nodata; + } + memset(skb, 0, offsetof(struct sk_buff, truesize)); + skb-shinfo_cache = 1; + ptr = data; + ptr[size] = sh; Eric - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC] avoid unnecessary alignement overhead in skb-data allocation.
On Mon, Aug 07, 2006 at 10:05:57AM +0200, Eric Dumazet ([EMAIL PROTECTED]) wrote: + if ((1UL order) size + sizeof(void *) + sizeof(struct skb_shared_info)) { + data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask); + if (!data) + goto nodata; + memset(skb, 0, offsetof(struct sk_buff, truesize)); + } else { + unsigned long *ptr; + + data = kmalloc(size, gfp_mask); You certainly want to kmalloc(size + sizeof(void *)) here, dont you ? Yep. I think in next iteration of this patch I will add additional argument which will present order of aligned size (to eliminate get_order() loop for those who know it in advance like e1000). In case there are no place even for sizeof(void *) (what happens with e1000) and allocation order is quite high (more than half of the page), then additional field in skb can be used (or we can reuse it unconditionally to store pointer to shared info if skb is being allocated through alloc_skb_aligned() function). Eric -- Evgeniy Polyakov - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [patch] RFC: matching interface groups
On Fri, 2006-08-04 at 12:06 +0200, Patrick McHardy wrote: Balazs Scheidler wrote: The use-case is as follows: * I have two different subsystems creating interfaces dynamically (for example pptpd and serial pppd lines, each creating dynamic pppX interfaces), * I would like to assign a different set of iptables rules for these clients, * I would like to react to a new interface being added to a specific set in a userspace application, The reasons I see this needs new kernel functionality: * iptables supports wildcard interface matching (for example iptables -i ppp+), but as the names of the interfaces used by PPTPD and PPPD cannot be distinguished this way, this is not enough, * Reloading the iptables ruleset everytime a new interface comes up is not really feasible, as it abrupts packet processing, and validating the ruleset in the kernel can take significant amount of time, * the kernel change is very simple, adapting userspace to this change is also very simple, and in userspace various software packages can easily interoperate with each-other once this is merged. The implementation: Each interface can belong to a single group at a time, an interface comes up without being a member in any of the groups. Userspace can assign interfaces to groups after being created, this would typically be performed in /etc/ppp/ip-up.d (and similar) scripts. In spirit interface group is somewhat similar to the routing protocol field for routing entries, which contains information on which routing daemon was responsible for adding the given route entry. Things to be done if you like this approach: * interface group match in iptables, * support for naming interface groups in userspace, a'la routing protocols, * emitting a netlink notification when the group of an interface changes, * possibly converting the ip link command to use NETLINK messages, instead of using ioctl() What do you think? I like it .. kind of like routing realms. For your specific case there is a possible solution already supported by the kernel, you can pre-allocate ppp devices using PPPIOCNEWUNIT, rename them and later attach to individual units in the ppp daemon using PPPIOCATTACH (I have a patch for this somewhere if you're interested). But that only works for PPP devices and the group idea looks more flexible. Thanks for liking it :) I'm going to implement a complete patch with iptables match and support for naming interface groups like routing realms and post it when I'm ready. I'd go for the more general solution as I have other interfaces not just ppp, it was just a trivial example. -- Bazsi - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] [e1000]: Remove unnecessary tx_lock
On Sun, 2006-06-08 at 16:16 -0700, Jesse Brandeburg wrote: [..] As for specifics, for TX_WAKE_THRESHOLD, i noticed that we were starting the queue after every packet was cleaned, so when the ring went full there was a lot of queue thrash. indeed this is what used to happen and was bad So this is a huge improvement. What happens now under steady state at high traffic transmits is, instead of 1, you see E1000_TX_WEIGHT in between queue sleep/wakes. I assume this is a given since E1000_TX_WEIGHT is higher than TX_WAKE_THRESHOLD. I am not sure if i can vouch for even more improvement by mucking around with values of E1000_TX_WEIGHT. Can you please take a look at the patch i posted? I would like to submit that for inclusion. It does two things a) make pruning available to be invoked from elsewhere (I tried to do it from the tx path but it gave me non-good results) b) makes E1000_TX_WEIGHT and TX_WAKE_THRESHOLD relative to the size of the transmit ring. I think this is a sane thing to do. You could either extract the bits or i could resend to you as two different patches. I have tested it and it works. tg3 seemed to fix it in a smart way and so I did a similar fix. Note we should have at least MAX_SKB_FRAGS (usually 32) + a few descriptors free before we should start the tx again, otherwise we run the risk of a maximum fragmented packet being unable to fit in the tx ring. I noticed you check for that in the tx path. now, for E1000_TX_WEIGHT, that was more of an experiment as i noticed we could stay in transmit clean up forEVER (okay not literally) which would really violate our NAPI timeslice. Interesting. The only time i have seen the NAPI time slice kick in is in slow hardware or emulators (like UML). I wonder if the pruning path could be made faster? What is the most consuming item? I realize there will be a substantial amount of cache misses. Maybe in addition to prunning E1000_TX_WEIGHT descriptors also fire a timer to clean up the rest (to avoid it being accounted for in the napi timeslice;-). Essentially i think you have some thing in the pruning path that needs to be optimized. Profiling and improving that would help. I messed with some values and 64 didn't really seem like too bad a compromise (it does slow things down just a tad in the general case) while really helping a couple of tests where there were lots of outstanding transmits happening at the same time as lots of receives. The later are the kind of tests i am running. If you are a router or a busy server they apply. In slow machines a ping flood also applies etc. This need for a tx weight is yet another global (design) problem with NAPI enabled drivers, oh yes, the Intel cabal - blame NAPI first;- IMO, the problem is you are consuming too many cycles in the receive path. NAPI has to be fair to all netdevices and cant hog all the CPU because a certain netdevice uses too many cycles to process a packet. but someday I'll try to document some of the issues I've seen. I think it would be invaluable. Just dont jump to blame Canada^WNAPI conclusion because it distracts; cheers, jamal - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] [e1000]: Remove unnecessary tx_lock
On Mon, Aug 07, 2006 at 08:50:36AM -0400, jamal wrote: On Sun, 2006-06-08 at 16:16 -0700, Jesse Brandeburg wrote: [..] As for specifics, for TX_WAKE_THRESHOLD, i noticed that we were starting the queue after every packet was cleaned, so when the ring went full there was a lot of queue thrash. indeed this is what used to happen and was bad So this is a huge improvement. What happens now under steady state at high traffic transmits is, instead of 1, you see E1000_TX_WEIGHT in between queue sleep/wakes. I assume this is a given since E1000_TX_WEIGHT is higher than TX_WAKE_THRESHOLD. I am not sure if i can vouch for even more improvement by mucking around with values of E1000_TX_WEIGHT. Can you please take a look at the patch i posted? I would like to submit that for inclusion. It does two things a) make pruning available to be invoked from elsewhere (I tried to do it from the tx path but it gave me non-good results) b) makes E1000_TX_WEIGHT and TX_WAKE_THRESHOLD relative to the size of the transmit ring. I think this is a sane thing to do. Hi Jamal, I have a question regarding your patch. In clean_tx_irq, it seems you dont clean the ring unless fdesc tx_ring-prunet. Won't this cause deadlocks for local TCP connections if transmit goes quiet? It seems to me as if this patch depends on the skb orphaning previously suggested on this thread. Please correct me if I'm wrong. Best regards -- Programmer Edgar E. Iglesias [EMAIL PROTECTED] 46.46.272.1946 - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] [e1000]: Remove unnecessary tx_lock
On Mon, 2006-07-08 at 17:21 +0200, Edgar E. Iglesias wrote: [..] I have a question regarding your patch. In clean_tx_irq, it seems you dont clean the ring unless fdesc tx_ring-prunet. Won't this cause deadlocks for local TCP connections if transmit goes quiet? I have not tested the TCP case; however, note that the specific part you reference is commented out. There are no deadlock issues in the case of forwarding (as in my testcases). I did not quiet follow the ensuing discussion after your post: These descriptors being pruned in the tx path happen only after the packets have been sent out on the wire. Why would this contribute to a deadlock but not when it happens on the receive path? It is true that tcp retransmit queue will still be referencing the skbs, but why is it any different because in one case it happens in the tx and in the other on the receive? Is there dependency on waking up the queue? It seems to me as if this patch depends on the skb orphaning previously suggested on this thread. Please correct me if I'm wrong. I didnt quiet follow that discussion I will go back and read it; you could also answer my questions above to make me understand better. cheers, jamal - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
forcedeth gigabit detection
(sorry about sending this to you twice Carl-Daniel, I sent this to @oss.sgi.com by accident) The nforce2 builtin network on my A7N8X-delux motherboard won't detect as gigabit-capable using the forcedeth driver. I'm using the forcedeth driver that comes with linux 2.6.17 (which is 0.54). Ethtool gives: Settings for eth1: Supported ports: [ MII ] Supported link modes: 10baseT/Half 10baseT/Full 100baseT/Half 100baseT/Full Supports auto-negotiation: Yes Advertised link modes: 10baseT/Half 10baseT/Full 100baseT/Half 100baseT/Full Advertised auto-negotiation: Yes Speed: 100Mb/s Duplex: Full Port: MII PHYAD: 1 Transceiver: external Auto-negotiation: on Supports Wake-on: g Wake-on: d Link detected: yes I assume the supported link modes are those supported by the NIC, not the link (It's a short piece of cat5 between the two cards (no switch/hub), it should support gigabit but you never know)? The odd thing is the NIC on the other side of the cable (which is also a forcedeth from the nforce3 chipset) detects as: Settings for eth0: Supported ports: [ MII ] Supported link modes: 10baseT/Half 10baseT/Full 100baseT/Half 100baseT/Full 1000baseT/Full Supports auto-negotiation: Yes Advertised link modes: 10baseT/Half 10baseT/Full 100baseT/Half 100baseT/Full 1000baseT/Full Advertised auto-negotiation: Yes Speed: 100Mb/s Duplex: Full Port: MII PHYAD: 1 Transceiver: externel Auto-negotiation: on Supports Wake-on: g Wake-on: d Link detected: yes Here's the debugging output from the forcedeth that won't do gigabit on module load and bringing the interface up: Aug 7 15:52:46 jupiter kernel: PCI: Setting latency timer of device :00:04.0 to 64 Aug 7 15:52:46 jupiter kernel: :00:04.0: resource 0 start df083000 len 4096 flags 0x0200. Aug 7 15:52:46 jupiter kernel: :00:04.0: MAC Address 00:0e:a6:0b:6f:73 Aug 7 15:52:46 jupiter kernel: eth%%d: mii_rw read from reg 2 at PHY 1: 0x0. Aug 7 15:52:46 jupiter kernel: eth%%d: mii_rw read from reg 3 at PHY 1: 0x8201. Aug 7 15:52:46 jupiter kernel: :00:04.0: open: Found PHY :0020 at address 1. Aug 7 15:52:46 jupiter kernel: eth%%d: mii_rw read from reg 4 at PHY 1: 0x1e1. Aug 7 15:52:46 jupiter kernel: eth%%d: mii_rw wrote 0xde1 to reg 4 at PHY 1 Aug 7 15:52:46 jupiter kernel: eth%%d: mii_rw read from reg 1 at PHY 1: 0x786d. Aug 7 15:52:46 jupiter kernel: eth%%d: mii_rw read from reg 0 at PHY 1: 0x3100. Aug 7 15:52:46 jupiter kernel: eth%%d: mii_rw wrote 0xb100 to reg 0 at PHY 1 Aug 7 15:52:46 jupiter kernel: eth%%d: mii_rw read from reg 0 at PHY 1: 0x3000. Aug 7 15:52:46 jupiter kernel: eth%%d: mii_rw read from reg 0 at PHY 1: 0x3000. Aug 7 15:52:46 jupiter kernel: eth%%d: mii_rw wrote 0x3200 to reg 0 at PHY 1 Aug 7 15:53:03 jupiter kernel: nv_open: begin Aug 7 15:53:03 jupiter kernel: eth1: nv_alloc_rx: Packet 0 marked as Available ... Aug 7 15:53:03 jupiter kernel: eth1: nv_alloc_rx: Packet 127 marked as Available Aug 7 15:53:03 jupiter kernel: eth1: nv_txrx_reset Aug 7 15:53:03 jupiter kernel: eth1: mii_rw read from reg 1 at PHY 1: 0x786d. Aug 7 15:53:03 jupiter kernel: eth1: mii_rw read from reg 1 at PHY 1: 0x786d. Aug 7 15:53:03 jupiter kernel: eth1: mii_rw read from reg 4 at PHY 1: 0x1e1. Aug 7 15:53:03 jupiter kernel: eth1: mii_rw read from reg 5 at PHY 1: 0xc5e1. Aug 7 15:53:03 jupiter kernel: eth1: nv_update_linkspeed: PHY advertises 0x01e1, lpa 0xc5e1. Aug 7 15:53:03 jupiter kernel: eth1: nv_start_rx Aug 7 15:53:03 jupiter kernel: eth1: nv_start_rx to duplex 1, speed 0x00010064. Aug 7 15:53:03 jupiter kernel: eth1: nv_start_tx Aug 7 15:53:03 jupiter kernel: eth1: nv_stop_rx Aug 7 15:53:03 jupiter kernel: eth1: nv_start_rx Aug 7 15:53:03 jupiter kernel: eth1: nv_start_rx to duplex 1, speed 0x00010064. Aug 7 15:53:03 jupiter kernel: eth1: nv_stop_rx Aug 7 15:53:03 jupiter kernel: eth1: nv_start_rx Aug 7 15:53:03 jupiter kernel: eth1: nv_start_rx to duplex 1, speed 0x00010064. Aug 7 15:53:03 jupiter kernel: eth1: nv_stop_rx Aug 7 15:53:03 jupiter kernel: eth1: nv_start_rx Aug 7 15:53:03 jupiter kernel: eth1: nv_start_rx to duplex 1, speed 0x00010064. Aug 7 15:53:03 jupiter kernel: eth1: nv_stop_rx Aug 7 15:53:03 jupiter kernel: eth1: nv_start_rx Aug 7 15:53:03 jupiter kernel: eth1: nv_start_rx to duplex 1, speed 0x00010064. Aug 7 15:53:03 jupiter kernel: eth1: nv_stop_rx Aug 7 15:53:03 jupiter kernel: eth1: nv_start_rx Aug 7 15:53:03 jupiter kernel: eth1: nv_start_rx to duplex 1, speed 0x00010064. Aug 7 15:53:03 jupiter kernel: eth1: nv_stop_rx Aug 7 15:53:03 jupiter kernel: eth1:
Re: [PATCH] [e1000]: Remove unnecessary tx_lock
On Mon, Aug 07, 2006 at 11:40:49AM -0400, jamal wrote: On Mon, 2006-07-08 at 17:21 +0200, Edgar E. Iglesias wrote: [..] I have a question regarding your patch. In clean_tx_irq, it seems you dont clean the ring unless fdesc tx_ring-prunet. Won't this cause deadlocks for local TCP connections if transmit goes quiet? I have not tested the TCP case; however, note that the specific part you reference is commented out. There are no deadlock issues in the case of forwarding (as in my testcases). Ok, I thought you wanted the code inside the ifdefs to be considered. If not, I guess there is no problem. Yes, the forwarding case does not suffer from any deadlocks issues that I am aware of. I did not quiet follow the ensuing discussion after your post: These descriptors being pruned in the tx path happen only after the packets have been sent out on the wire. Why would this contribute to a deadlock but not when it happens on the receive path? It is true that tcp retransmit queue will still be referencing the skbs, but why is it any different because in one case it happens in the tx and in the other on the receive? Is there dependency on waking up the queue? No, the deadlock happens only if you don't prune the descriptors. If the host sends some data and then goes quite, fdesc tx_ring-prunet might not be true for a long time and skbs will end up sitting in the tx ring indefinitely, charging the socket's sndbuf. Best regards -- Programmer Edgar E. Iglesias [EMAIL PROTECTED] 46.46.272.1946 - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] [e1000]: Remove unnecessary tx_lock
On Mon, 2006-07-08 at 17:59 +0200, Edgar E. Iglesias wrote: Ok, I thought you wanted the code inside the ifdefs to be considered. If not, I guess there is no problem. Yes, the forwarding case does not suffer from any deadlocks issues that I am aware of. From my tests: It does _not_ provide any performance improvements and at some point i decided i didnt want to add more variables to analyze, so i got rid of it; I would have had to hand edit the patch to totally remove it; so that why you still see the ifdefed out variant. No, the deadlock happens only if you don't prune the descriptors. If the host sends some data and then goes quite, fdesc tx_ring-prunet might not be true for a long time and skbs will end up sitting in the tx ring indefinitely, charging the socket's sndbuf. Note: I didnt get rid of the rx path pruning. i.e that is still on. It just prunes lesser descriptors with that change on the tx. So not very different from before. I think i may be getting a gist now of the discussion after a re-read; while packets are still charged to TCP may have been transmitted they may sit on the tx ring forever. They will only be pruned if we had netif_stopped (and even that is not good enough with Jesse's threshold check) or if a new packet comes in destined for us. Did i understand correctly? If yes, i didnt introduce this challenge it has always been there. I think i understand the suggestion now from Dave/Herbert to orphan those skbs... cheers, jamal - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] [e1000]: Remove unnecessary tx_lock
On Mon, Aug 07, 2006 at 11:40:49AM -0400, jamal wrote: On Mon, 2006-07-08 at 17:21 +0200, Edgar E. Iglesias wrote: [..] I have a question regarding your patch. In clean_tx_irq, it seems you dont clean the ring unless fdesc tx_ring-prunet. Won't this cause deadlocks for local TCP connections if transmit goes quiet? I have not tested the TCP case; however, note that the specific part you reference is commented out. There are no deadlock issues in the case of forwarding (as in my testcases). I did not quiet follow the ensuing discussion after your post: These descriptors being pruned in the tx path happen only after the packets have been sent out on the wire. Why would this contribute to a deadlock but not when it happens on the receive path? It is true that tcp retransmit queue will still be referencing the skbs, but why is it any different because in one case it happens in the tx and in the other on the receive? Is there dependency on waking up the queue? Hi again Jamal, Not sure if it is doable, but to I'll post the thoughts anyway. Assuming you would get the code inside the jamal ifdefs working without deadlocks, you now have a tx_irq function which if fdesc = tx_ring-prunet essentially just checks for hw lockups. Let's speculate and further assume you could do the detect_tx_hung from some other context (timer or whatever) then you end up having a tx_irq function which most of the time does nothing. The next step could be to move the fdesc = tx_ring-prunet logic into the transmit path and completely disable the tx_irq when the condition is not met. Now you end up not taking the irq at all as long as fdesc = tx_ring-prunet. This was the logic I tried on the cris driver but ended up with deadlocks :) Best regards -- Programmer Edgar E. Iglesias [EMAIL PROTECTED] 46.46.272.1946 - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] limit rt cache size
Hello! During OpenVZ stress testing we found that UDP traffic with random src can generate too much excessive rt hash growing leading finally to OOM and kernel panics. It was found that for 4GB i686 system (having 1048576 total pages and 225280 normal zone pages) kernel allocates the following route hash: syslog: IP route cache hash table entries: 262144 (order: 8, 1048576 bytes) = ip_rt_max_size = 4194304 entries, i.e. max rt size is 4194304 * 256b = 1Gb of RAM normal_zone Grrr... Indeed. Attached the patch which removes HASH_HIGHMEM flag from alloc_large_system_hash() call. However, I'm not sure whether it should be removed as well for TCP tcp_hashinfo.ehash and tcp_hashinfo.bhash (as those are probably limited by number of files?). The patch looks OK. But I am not sure too. To be honest, I do not understand the sense of HASH_HIGHMEM flag. At the first sight, hash table eats low memory, objects hashed in this table also eat low memory. Why is its size calculated from total memory? But taking into account that this flag is used only by tcp.c and route.c, both of which feed on low memory, I miss something important. Let's ask people on netdev. What's about routing cache size, it looks like it is another bug. route.c should not force rt_max_size = 16*rt_hash_size. I think it should consult available memory and to limit rt_max_size to some reasonable value, even if hash size is too high. --- ./net/ipv4/route.c.xrt2006-07-14 19:08:33.0 +0400 +++ ./net/ipv4/route.c2006-08-07 18:25:37.0 +0400 @@ -3149,7 +3149,7 @@ int __init ip_rt_init(void) rhash_entries, (num_physpages = 128 * 1024) ? 15 : 17, - HASH_HIGHMEM, + 0, rt_hash_log, rt_hash_mask, 0); - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] [e1000]: Remove unnecessary tx_lock
On Mon, Aug 07, 2006 at 12:31:59PM -0400, Jamal Hadi Salim wrote: On Mon, 2006-07-08 at 17:59 +0200, Edgar E. Iglesias wrote: Ok, I thought you wanted the code inside the ifdefs to be considered. If not, I guess there is no problem. Yes, the forwarding case does not suffer from any deadlocks issues that I am aware of. From my tests: It does _not_ provide any performance improvements and at some point i decided i didnt want to add more variables to analyze, so i got rid of it; I would have had to hand edit the patch to totally remove it; so that why you still see the ifdefed out variant. No, the deadlock happens only if you don't prune the descriptors. If the host sends some data and then goes quite, fdesc tx_ring-prunet might not be true for a long time and skbs will end up sitting in the tx ring indefinitely, charging the socket's sndbuf. Note: I didnt get rid of the rx path pruning. i.e that is still on. It just prunes lesser descriptors with that change on the tx. So not very different from before. I think i may be getting a gist now of the discussion after a re-read; while packets are still charged to TCP may have been transmitted they may sit on the tx ring forever. They will only be pruned if we had netif_stopped (and even that is not good enough with Jesse's threshold check) or if a new packet comes in destined for us. Did i understand correctly? If yes, i didnt introduce this challenge it has always been there. I think i understand the suggestion now from Dave/Herbert to orphan those skbs... I'll give you an example. A TCP flow sends X data and later waits for a response, host is now quietly waiting. Assume fdesc = tx_ring-prunet, so we dont free any skbs, right? Now assume that some part of X data gets lost, our retransmit timer hits and we want to retransmit but our socket is charged with too much data sitting on the nics tx-ring, so we don't send anything. By orphaning, those skbs won't charge the socket and the flow can retransmit. Best regards -- Programmer Edgar E. Iglesias [EMAIL PROTECTED] 46.46.272.1946 - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 1/7] [NEIGH]: Convert neighbour deletion to new netlink api
Fixes: Return ENOENT if the neighbour is not found (was EINVAL) Return EAFNOSUPPORT if no table matches the specified address family. Signed-off-by: Thomas Graf [EMAIL PROTECTED] Index: net-2.6.19.git/net/core/neighbour.c === --- net-2.6.19.git.orig/net/core/neighbour.c +++ net-2.6.19.git/net/core/neighbour.c @@ -30,6 +30,7 @@ #include net/dst.h #include net/sock.h #include net/netevent.h +#include net/netlink.h #include linux/rtnetlink.h #include linux/random.h #include linux/string.h @@ -1437,48 +1438,62 @@ int neigh_table_clear(struct neigh_table int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { - struct ndmsg *ndm = NLMSG_DATA(nlh); - struct rtattr **nda = arg; + struct ndmsg *ndm; + struct nlattr *dst_attr; struct neigh_table *tbl; struct net_device *dev = NULL; - int err = -ENODEV; + int err = -EINVAL; - if (ndm-ndm_ifindex - (dev = dev_get_by_index(ndm-ndm_ifindex)) == NULL) + if (nlmsg_len(nlh) sizeof(*ndm)) + goto out; + + dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST); + if (dst_attr == NULL) goto out; + ndm = nlmsg_data(nlh); + if (ndm-ndm_ifindex) { + dev = dev_get_by_index(ndm-ndm_ifindex); + if (dev == NULL) { + err = -ENODEV; + goto out; + } + } + read_lock(neigh_tbl_lock); for (tbl = neigh_tables; tbl; tbl = tbl-next) { - struct rtattr *dst_attr = nda[NDA_DST - 1]; - struct neighbour *n; + struct neighbour *neigh; if (tbl-family != ndm-ndm_family) continue; read_unlock(neigh_tbl_lock); - err = -EINVAL; - if (!dst_attr || RTA_PAYLOAD(dst_attr) tbl-key_len) + if (nla_len(dst_attr) tbl-key_len) goto out_dev_put; if (ndm-ndm_flags NTF_PROXY) { - err = pneigh_delete(tbl, RTA_DATA(dst_attr), dev); + err = pneigh_delete(tbl, nla_data(dst_attr), dev); goto out_dev_put; } - if (!dev) - goto out; + if (dev == NULL) + goto out_dev_put; - n = neigh_lookup(tbl, RTA_DATA(dst_attr), dev); - if (n) { - err = neigh_update(n, NULL, NUD_FAILED, - NEIGH_UPDATE_F_OVERRIDE| - NEIGH_UPDATE_F_ADMIN); - neigh_release(n); + neigh = neigh_lookup(tbl, nla_data(dst_attr), dev); + if (neigh == NULL) { + err = -ENOENT; + goto out_dev_put; } + + err = neigh_update(neigh, NULL, NUD_FAILED, + NEIGH_UPDATE_F_OVERRIDE | + NEIGH_UPDATE_F_ADMIN); + neigh_release(neigh); goto out_dev_put; } read_unlock(neigh_tbl_lock); - err = -EADDRNOTAVAIL; + err = -EAFNOSUPPORT; + out_dev_put: if (dev) dev_put(dev); - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCHSET] Convert neighbour code to new netlink api
Pretty much straight forward, some minor fixes that go along with it. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 2/7] [NEIGH]: Convert neighbour addition to new netlink api
Fixes: Return EAFNOSUPPORT if no table matches the specified address family. Signed-off-by: Thomas Graf [EMAIL PROTECTED] Index: net-2.6.19.git/net/core/neighbour.c === --- net-2.6.19.git.orig/net/core/neighbour.c +++ net-2.6.19.git/net/core/neighbour.c @@ -1503,76 +1503,88 @@ out: int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { - struct ndmsg *ndm = NLMSG_DATA(nlh); - struct rtattr **nda = arg; + struct ndmsg *ndm; + struct nlattr *tb[NDA_MAX+1]; struct neigh_table *tbl; struct net_device *dev = NULL; - int err = -ENODEV; + int err; + + err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL); + if (err 0) + goto out; - if (ndm-ndm_ifindex - (dev = dev_get_by_index(ndm-ndm_ifindex)) == NULL) + err = -EINVAL; + if (tb[NDA_DST] == NULL) goto out; + ndm = nlmsg_data(nlh); + if (ndm-ndm_ifindex) { + dev = dev_get_by_index(ndm-ndm_ifindex); + if (dev == NULL) { + err = -ENODEV; + goto out; + } + + if (tb[NDA_LLADDR] nla_len(tb[NDA_LLADDR]) dev-addr_len) + goto out_dev_put; + } + read_lock(neigh_tbl_lock); for (tbl = neigh_tables; tbl; tbl = tbl-next) { - struct rtattr *lladdr_attr = nda[NDA_LLADDR - 1]; - struct rtattr *dst_attr = nda[NDA_DST - 1]; - int override = 1; - struct neighbour *n; + int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE; + struct neighbour *neigh; + void *dst, *lladdr; if (tbl-family != ndm-ndm_family) continue; read_unlock(neigh_tbl_lock); - err = -EINVAL; - if (!dst_attr || RTA_PAYLOAD(dst_attr) tbl-key_len) + if (nla_len(tb[NDA_DST]) tbl-key_len) goto out_dev_put; + dst = nla_data(tb[NDA_DST]); + lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL; if (ndm-ndm_flags NTF_PROXY) { - err = -ENOBUFS; - if (pneigh_lookup(tbl, RTA_DATA(dst_attr), dev, 1)) - err = 0; + err = 0; + if (pneigh_lookup(tbl, dst, dev, 1) == NULL) + err = -ENOBUFS; goto out_dev_put; } - err = -EINVAL; - if (!dev) - goto out; - if (lladdr_attr RTA_PAYLOAD(lladdr_attr) dev-addr_len) + if (dev == NULL) goto out_dev_put; + + neigh = neigh_lookup(tbl, dst, dev); + if (neigh == NULL) { + if (!(nlh-nlmsg_flags NLM_F_CREATE)) { + err = -ENOENT; + goto out_dev_put; + } - n = neigh_lookup(tbl, RTA_DATA(dst_attr), dev); - if (n) { - if (nlh-nlmsg_flags NLM_F_EXCL) { - err = -EEXIST; - neigh_release(n); + neigh = __neigh_lookup_errno(tbl, dst, dev); + if (IS_ERR(neigh)) { + err = PTR_ERR(neigh); goto out_dev_put; } - - override = nlh-nlmsg_flags NLM_F_REPLACE; - } else if (!(nlh-nlmsg_flags NLM_F_CREATE)) { - err = -ENOENT; - goto out_dev_put; } else { - n = __neigh_lookup_errno(tbl, RTA_DATA(dst_attr), dev); - if (IS_ERR(n)) { - err = PTR_ERR(n); + if (nlh-nlmsg_flags NLM_F_EXCL) { + err = -EEXIST; + neigh_release(neigh); goto out_dev_put; } - } - err = neigh_update(n, - lladdr_attr ? RTA_DATA(lladdr_attr) : NULL, - ndm-ndm_state, - (override ? NEIGH_UPDATE_F_OVERRIDE : 0) | - NEIGH_UPDATE_F_ADMIN); + if (!(nlh-nlmsg_flags NLM_F_REPLACE)) + flags = ~NEIGH_UPDATE_F_OVERRIDE; + } - neigh_release(n); + err = neigh_update(neigh, lladdr, ndm-ndm_state, flags); + neigh_release(neigh); goto
[PATCH 3/7] [NEIGH]: Convert neighbour dumping to new netlink api
Signed-off-by: Thomas Graf [EMAIL PROTECTED] Index: net-2.6.19.git/net/core/neighbour.c === --- net-2.6.19.git.orig/net/core/neighbour.c +++ net-2.6.19.git/net/core/neighbour.c @@ -1898,48 +1898,49 @@ out: return skb-len; } -static int neigh_fill_info(struct sk_buff *skb, struct neighbour *n, - u32 pid, u32 seq, int event, unsigned int flags) +static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh, + u32 pid, u32 seq, int type, unsigned int flags) { unsigned long now = jiffies; - unsigned char *b = skb-tail; struct nda_cacheinfo ci; - int locked = 0; - u32 probes; - struct nlmsghdr *nlh = NLMSG_NEW(skb, pid, seq, event, -sizeof(struct ndmsg), flags); - struct ndmsg *ndm = NLMSG_DATA(nlh); + struct nlmsghdr *nlh; + struct ndmsg *ndm; + + nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags); + if (nlh == NULL) + return -ENOBUFS; - ndm-ndm_family = n-ops-family; + ndm = nlmsg_data(nlh); + ndm-ndm_family = neigh-ops-family; ndm-ndm_pad1= 0; ndm-ndm_pad2= 0; - ndm-ndm_flags = n-flags; - ndm-ndm_type= n-type; - ndm-ndm_ifindex = n-dev-ifindex; - RTA_PUT(skb, NDA_DST, n-tbl-key_len, n-primary_key); - read_lock_bh(n-lock); - locked = 1; - ndm-ndm_state = n-nud_state; - if (n-nud_state NUD_VALID) - RTA_PUT(skb, NDA_LLADDR, n-dev-addr_len, n-ha); - ci.ndm_used = now - n-used; - ci.ndm_confirmed = now - n-confirmed; - ci.ndm_updated = now - n-updated; - ci.ndm_refcnt= atomic_read(n-refcnt) - 1; - probes = atomic_read(n-probes); - read_unlock_bh(n-lock); - locked = 0; - RTA_PUT(skb, NDA_CACHEINFO, sizeof(ci), ci); - RTA_PUT(skb, NDA_PROBES, sizeof(probes), probes); - nlh-nlmsg_len = skb-tail - b; - return skb-len; + ndm-ndm_flags = neigh-flags; + ndm-ndm_type= neigh-type; + ndm-ndm_ifindex = neigh-dev-ifindex; + + NLA_PUT(skb, NDA_DST, neigh-tbl-key_len, neigh-primary_key); + + read_lock_bh(neigh-lock); + ndm-ndm_state = neigh-nud_state; + if ((neigh-nud_state NUD_VALID) + nla_put(skb, NDA_LLADDR, neigh-dev-addr_len, neigh-ha) 0) { + read_unlock_bh(neigh-lock); + goto nla_put_failure; + } -nlmsg_failure: -rtattr_failure: - if (locked) - read_unlock_bh(n-lock); - skb_trim(skb, b - skb-data); - return -1; + ci.ndm_used = now - neigh-used; + ci.ndm_confirmed = now - neigh-confirmed; + ci.ndm_updated = now - neigh-updated; + ci.ndm_refcnt= atomic_read(neigh-refcnt) - 1; + read_unlock_bh(neigh-lock); + + NLA_PUT_U32(skb, NDA_PROBES, atomic_read(neigh-probes)); + NLA_PUT(skb, NDA_CACHEINFO, sizeof(ci), ci); + + return nlmsg_end(skb, nlh); + +nla_put_failure: + return nlmsg_cancel(skb, nlh); } @@ -1983,7 +1984,7 @@ int neigh_dump_info(struct sk_buff *skb, int t, family, s_t; read_lock(neigh_tbl_lock); - family = ((struct rtgenmsg *)NLMSG_DATA(cb-nlh))-rtgen_family; + family = ((struct rtgenmsg *) nlmsg_data(cb-nlh))-rtgen_family; s_t = cb-args[0]; for (tbl = neigh_tables, t = 0; tbl; tbl = tbl-next, t++) { @@ -2364,39 +2365,34 @@ static struct file_operations neigh_stat #ifdef CONFIG_ARPD void neigh_app_ns(struct neighbour *n) { - struct nlmsghdr *nlh; - int size = NLMSG_SPACE(sizeof(struct ndmsg) + 256); - struct sk_buff *skb = alloc_skb(size, GFP_ATOMIC); + struct sk_buff *skb; - if (!skb) + skb = nlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC); + if (skb == NULL) return; - if (neigh_fill_info(skb, n, 0, 0, RTM_GETNEIGH, 0) 0) { + if (neigh_fill_info(skb, n, 0, 0, RTM_GETNEIGH, NLM_F_REQUEST) = 0) kfree_skb(skb); - return; + else { + NETLINK_CB(skb).dst_group = RTNLGRP_NEIGH; + netlink_broadcast(rtnl, skb, 0, RTNLGRP_NEIGH, GFP_ATOMIC); } - nlh= (struct nlmsghdr *)skb-data; - nlh-nlmsg_flags = NLM_F_REQUEST; - NETLINK_CB(skb).dst_group = RTNLGRP_NEIGH; - netlink_broadcast(rtnl, skb, 0, RTNLGRP_NEIGH, GFP_ATOMIC); } static void neigh_app_notify(struct neighbour *n) { - struct nlmsghdr *nlh; - int size = NLMSG_SPACE(sizeof(struct ndmsg) + 256); - struct sk_buff *skb = alloc_skb(size, GFP_ATOMIC); + struct sk_buff *skb; - if (!skb) + skb = nlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC); + if (skb == NULL) return; - if (neigh_fill_info(skb,
[PATCH 4/7] [NEIGH]: Move netlink neighbour bits to linux/neighbour.h
Moves netlink neighbour bits to linux/neighbour.h. Also moves bits to be exported to userspace from net/neighbour.h to linux/neighbour.h and removes __KERNEL__ guards, userspace is not supposed to be using it. rtnetlink_rcv_msg() is not longer required to parse attributes for the neighbour layer, remove dependency on obsolete and buggy rta_buf. Signed-off-by: Thomas Graf [EMAIL PROTECTED] Index: net-2.6.19.git/include/linux/neighbour.h === --- /dev/null +++ net-2.6.19.git/include/linux/neighbour.h @@ -0,0 +1,65 @@ +#ifndef __LINUX_NEIGHBOUR_H +#define __LINUX_NEIGHBOUR_H + +#include linux/netlink.h + +struct ndmsg +{ + __u8ndm_family; + __u8ndm_pad1; + __u16 ndm_pad2; + __s32 ndm_ifindex; + __u16 ndm_state; + __u8ndm_flags; + __u8ndm_type; +}; + +enum +{ + NDA_UNSPEC, + NDA_DST, + NDA_LLADDR, + NDA_CACHEINFO, + NDA_PROBES, + __NDA_MAX +}; + +#define NDA_MAX (__NDA_MAX - 1) + +/* + * Neighbor Cache Entry Flags + */ + +#define NTF_PROXY 0x08/* == ATF_PUBL */ +#define NTF_ROUTER 0x80 + +/* + * Neighbor Cache Entry States. + */ + +#define NUD_INCOMPLETE 0x01 +#define NUD_REACHABLE 0x02 +#define NUD_STALE 0x04 +#define NUD_DELAY 0x08 +#define NUD_PROBE 0x10 +#define NUD_FAILED 0x20 + +/* Dummy states */ +#define NUD_NOARP 0x40 +#define NUD_PERMANENT 0x80 +#define NUD_NONE 0x00 + +/* NUD_NOARP NUD_PERMANENT are pseudostates, they never change + and make no address resolution or NUD. + NUD_PERMANENT is also cannot be deleted by garbage collectors. + */ + +struct nda_cacheinfo +{ + __u32 ndm_confirmed; + __u32 ndm_used; + __u32 ndm_updated; + __u32 ndm_refcnt; +}; + +#endif Index: net-2.6.19.git/include/linux/rtnetlink.h === --- net-2.6.19.git.orig/include/linux/rtnetlink.h +++ net-2.6.19.git/include/linux/rtnetlink.h @@ -386,69 +386,6 @@ struct rta_session -/** - * Neighbour discovery. - / - -struct ndmsg -{ - unsigned char ndm_family; - unsigned char ndm_pad1; - unsigned short ndm_pad2; - int ndm_ifindex;/* Link index */ - __u16 ndm_state; - __u8ndm_flags; - __u8ndm_type; -}; - -enum -{ - NDA_UNSPEC, - NDA_DST, - NDA_LLADDR, - NDA_CACHEINFO, - NDA_PROBES, - __NDA_MAX -}; - -#define NDA_MAX (__NDA_MAX - 1) - -#define NDA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct ndmsg -#define NDA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct ndmsg)) - -/* - * Neighbor Cache Entry Flags - */ - -#define NTF_PROXY 0x08/* == ATF_PUBL */ -#define NTF_ROUTER 0x80 - -/* - * Neighbor Cache Entry States. - */ - -#define NUD_INCOMPLETE 0x01 -#define NUD_REACHABLE 0x02 -#define NUD_STALE 0x04 -#define NUD_DELAY 0x08 -#define NUD_PROBE 0x10 -#define NUD_FAILED 0x20 - -/* Dummy states */ -#define NUD_NOARP 0x40 -#define NUD_PERMANENT 0x80 -#define NUD_NONE 0x00 - - -struct nda_cacheinfo -{ - __u32 ndm_confirmed; - __u32 ndm_used; - __u32 ndm_updated; - __u32 ndm_refcnt; -}; - - /* * Neighbour tables specific messages. * Index: net-2.6.19.git/include/net/neighbour.h === --- net-2.6.19.git.orig/include/net/neighbour.h +++ net-2.6.19.git/include/net/neighbour.h @@ -1,6 +1,8 @@ #ifndef _NET_NEIGHBOUR_H #define _NET_NEIGHBOUR_H +#include linux/neighbour.h + /* * Generic neighbour manipulation * @@ -14,40 +16,6 @@ * - Add neighbour cache statistics like rtstat */ -/* The following flags states are exported to user space, - so that they should be moved to include/linux/ directory. - */ - -/* - * Neighbor Cache Entry Flags - */ - -#define NTF_PROXY 0x08/* == ATF_PUBL */ -#define NTF_ROUTER 0x80 - -/* - * Neighbor Cache Entry States. - */ - -#define NUD_INCOMPLETE 0x01 -#define NUD_REACHABLE 0x02 -#define NUD_STALE 0x04 -#define NUD_DELAY 0x08 -#define NUD_PROBE 0x10 -#define NUD_FAILED 0x20 - -/* Dummy states */ -#define NUD_NOARP 0x40 -#define NUD_PERMANENT 0x80 -#define NUD_NONE 0x00 - -/* NUD_NOARP NUD_PERMANENT are pseudostates, they never change - and make no address resolution or NUD. - NUD_PERMANENT is also cannot be deleted by garbage collectors. - */ - -#ifdef __KERNEL__ - #include asm/atomic.h #include linux/netdevice.h #include
[PATCH 5/7] [NEIGH]: Convert neighbour table modification to new netlink api
Signed-off-by: Thomas Graf [EMAIL PROTECTED] Index: net-2.6.19.git/net/core/neighbour.c === --- net-2.6.19.git.orig/net/core/neighbour.c +++ net-2.6.19.git/net/core/neighbour.c @@ -1751,28 +1751,61 @@ static inline struct neigh_parms *lookup return NULL; } +static struct nla_policy nl_neightbl_policy[NDTA_MAX+1] __read_mostly = { + [NDTA_NAME] = { .type = NLA_STRING }, + [NDTA_THRESH1] = { .type = NLA_U32 }, + [NDTA_THRESH2] = { .type = NLA_U32 }, + [NDTA_THRESH3] = { .type = NLA_U32 }, + [NDTA_GC_INTERVAL] = { .type = NLA_U64 }, + [NDTA_PARMS]= { .type = NLA_NESTED }, +}; + +static struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] __read_mostly = { + [NDTPA_IFINDEX] = { .type = NLA_U32 }, + [NDTPA_QUEUE_LEN] = { .type = NLA_U32 }, + [NDTPA_PROXY_QLEN] = { .type = NLA_U32 }, + [NDTPA_APP_PROBES] = { .type = NLA_U32 }, + [NDTPA_UCAST_PROBES]= { .type = NLA_U32 }, + [NDTPA_MCAST_PROBES]= { .type = NLA_U32 }, + [NDTPA_BASE_REACHABLE_TIME] = { .type = NLA_U64 }, + [NDTPA_GC_STALETIME]= { .type = NLA_U64 }, + [NDTPA_DELAY_PROBE_TIME]= { .type = NLA_U64 }, + [NDTPA_RETRANS_TIME]= { .type = NLA_U64 }, + [NDTPA_ANYCAST_DELAY] = { .type = NLA_U64 }, + [NDTPA_PROXY_DELAY] = { .type = NLA_U64 }, + [NDTPA_LOCKTIME]= { .type = NLA_U64 }, +}; + int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { struct neigh_table *tbl; - struct ndtmsg *ndtmsg = NLMSG_DATA(nlh); - struct rtattr **tb = arg; - int err = -EINVAL; + struct ndtmsg *ndtmsg; + struct nlattr *tb[NDTA_MAX+1]; + int err; + + err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX, + nl_neightbl_policy); + if (err 0) + goto errout; - if (!tb[NDTA_NAME - 1] || !RTA_PAYLOAD(tb[NDTA_NAME - 1])) - return -EINVAL; + if (tb[NDTA_NAME] == NULL) { + err = -EINVAL; + goto errout; + } + ndtmsg = nlmsg_data(nlh); read_lock(neigh_tbl_lock); for (tbl = neigh_tables; tbl; tbl = tbl-next) { if (ndtmsg-ndtm_family tbl-family != ndtmsg-ndtm_family) continue; - if (!rtattr_strcmp(tb[NDTA_NAME - 1], tbl-id)) + if (nla_strcmp(tb[NDTA_NAME], tbl-id) == 0) break; } if (tbl == NULL) { err = -ENOENT; - goto errout; + goto errout_locked; } /* @@ -1781,86 +1814,89 @@ int neightbl_set(struct sk_buff *skb, st */ write_lock_bh(tbl-lock); - if (tb[NDTA_THRESH1 - 1]) - tbl-gc_thresh1 = RTA_GET_U32(tb[NDTA_THRESH1 - 1]); - - if (tb[NDTA_THRESH2 - 1]) - tbl-gc_thresh2 = RTA_GET_U32(tb[NDTA_THRESH2 - 1]); - - if (tb[NDTA_THRESH3 - 1]) - tbl-gc_thresh3 = RTA_GET_U32(tb[NDTA_THRESH3 - 1]); - - if (tb[NDTA_GC_INTERVAL - 1]) - tbl-gc_interval = RTA_GET_MSECS(tb[NDTA_GC_INTERVAL - 1]); - - if (tb[NDTA_PARMS - 1]) { - struct rtattr *tbp[NDTPA_MAX]; + if (tb[NDTA_PARMS]) { + struct nlattr *tbp[NDTPA_MAX+1]; struct neigh_parms *p; - u32 ifindex = 0; + int i, ifindex = 0; - if (rtattr_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS - 1]) 0) - goto rtattr_failure; + err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS], + nl_ntbl_parm_policy); + if (err 0) + goto errout_tbl_lock; - if (tbp[NDTPA_IFINDEX - 1]) - ifindex = RTA_GET_U32(tbp[NDTPA_IFINDEX - 1]); + if (tbp[NDTPA_IFINDEX]) + ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]); p = lookup_neigh_params(tbl, ifindex); if (p == NULL) { err = -ENOENT; - goto rtattr_failure; + goto errout_tbl_lock; } - - if (tbp[NDTPA_QUEUE_LEN - 1]) - p-queue_len = RTA_GET_U32(tbp[NDTPA_QUEUE_LEN - 1]); - - if (tbp[NDTPA_PROXY_QLEN - 1]) - p-proxy_qlen = RTA_GET_U32(tbp[NDTPA_PROXY_QLEN - 1]); - - if (tbp[NDTPA_APP_PROBES - 1]) - p-app_probes = RTA_GET_U32(tbp[NDTPA_APP_PROBES - 1]); - if (tbp[NDTPA_UCAST_PROBES - 1]) - p-ucast_probes = -
[PATCH 6/7] [NEIGH]: Convert neighbour table dumping to new netlink api
Also fixes skipping of already dumped neighbours. Signed-off-by: Thomas Graf [EMAIL PROTECTED] Index: net-2.6.19.git/net/core/neighbour.c === --- net-2.6.19.git.orig/net/core/neighbour.c +++ net-2.6.19.git/net/core/neighbour.c @@ -1594,56 +1594,59 @@ out: static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms) { - struct rtattr *nest = NULL; - - nest = RTA_NEST(skb, NDTA_PARMS); + struct nlattr *nest; + + nest = nla_nest_start(skb, NDTA_PARMS); + if (nest == NULL) + return -ENOBUFS; if (parms-dev) - RTA_PUT_U32(skb, NDTPA_IFINDEX, parms-dev-ifindex); + NLA_PUT_U32(skb, NDTPA_IFINDEX, parms-dev-ifindex); - RTA_PUT_U32(skb, NDTPA_REFCNT, atomic_read(parms-refcnt)); - RTA_PUT_U32(skb, NDTPA_QUEUE_LEN, parms-queue_len); - RTA_PUT_U32(skb, NDTPA_PROXY_QLEN, parms-proxy_qlen); - RTA_PUT_U32(skb, NDTPA_APP_PROBES, parms-app_probes); - RTA_PUT_U32(skb, NDTPA_UCAST_PROBES, parms-ucast_probes); - RTA_PUT_U32(skb, NDTPA_MCAST_PROBES, parms-mcast_probes); - RTA_PUT_MSECS(skb, NDTPA_REACHABLE_TIME, parms-reachable_time); - RTA_PUT_MSECS(skb, NDTPA_BASE_REACHABLE_TIME, + NLA_PUT_U32(skb, NDTPA_REFCNT, atomic_read(parms-refcnt)); + NLA_PUT_U32(skb, NDTPA_QUEUE_LEN, parms-queue_len); + NLA_PUT_U32(skb, NDTPA_PROXY_QLEN, parms-proxy_qlen); + NLA_PUT_U32(skb, NDTPA_APP_PROBES, parms-app_probes); + NLA_PUT_U32(skb, NDTPA_UCAST_PROBES, parms-ucast_probes); + NLA_PUT_U32(skb, NDTPA_MCAST_PROBES, parms-mcast_probes); + NLA_PUT_MSECS(skb, NDTPA_REACHABLE_TIME, parms-reachable_time); + NLA_PUT_MSECS(skb, NDTPA_BASE_REACHABLE_TIME, parms-base_reachable_time); - RTA_PUT_MSECS(skb, NDTPA_GC_STALETIME, parms-gc_staletime); - RTA_PUT_MSECS(skb, NDTPA_DELAY_PROBE_TIME, parms-delay_probe_time); - RTA_PUT_MSECS(skb, NDTPA_RETRANS_TIME, parms-retrans_time); - RTA_PUT_MSECS(skb, NDTPA_ANYCAST_DELAY, parms-anycast_delay); - RTA_PUT_MSECS(skb, NDTPA_PROXY_DELAY, parms-proxy_delay); - RTA_PUT_MSECS(skb, NDTPA_LOCKTIME, parms-locktime); + NLA_PUT_MSECS(skb, NDTPA_GC_STALETIME, parms-gc_staletime); + NLA_PUT_MSECS(skb, NDTPA_DELAY_PROBE_TIME, parms-delay_probe_time); + NLA_PUT_MSECS(skb, NDTPA_RETRANS_TIME, parms-retrans_time); + NLA_PUT_MSECS(skb, NDTPA_ANYCAST_DELAY, parms-anycast_delay); + NLA_PUT_MSECS(skb, NDTPA_PROXY_DELAY, parms-proxy_delay); + NLA_PUT_MSECS(skb, NDTPA_LOCKTIME, parms-locktime); - return RTA_NEST_END(skb, nest); + return nla_nest_end(skb, nest); -rtattr_failure: - return RTA_NEST_CANCEL(skb, nest); +nla_put_failure: + return nla_nest_cancel(skb, nest); } -static int neightbl_fill_info(struct neigh_table *tbl, struct sk_buff *skb, - struct netlink_callback *cb) +static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl, + u32 pid, u32 seq, int type, int flags) { struct nlmsghdr *nlh; struct ndtmsg *ndtmsg; - nlh = NLMSG_NEW_ANSWER(skb, cb, RTM_NEWNEIGHTBL, sizeof(struct ndtmsg), - NLM_F_MULTI); + nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags); + if (nlh == NULL) + return -ENOBUFS; - ndtmsg = NLMSG_DATA(nlh); + ndtmsg = nlmsg_data(nlh); read_lock_bh(tbl-lock); ndtmsg-ndtm_family = tbl-family; ndtmsg-ndtm_pad1 = 0; ndtmsg-ndtm_pad2 = 0; - RTA_PUT_STRING(skb, NDTA_NAME, tbl-id); - RTA_PUT_MSECS(skb, NDTA_GC_INTERVAL, tbl-gc_interval); - RTA_PUT_U32(skb, NDTA_THRESH1, tbl-gc_thresh1); - RTA_PUT_U32(skb, NDTA_THRESH2, tbl-gc_thresh2); - RTA_PUT_U32(skb, NDTA_THRESH3, tbl-gc_thresh3); + NLA_PUT_STRING(skb, NDTA_NAME, tbl-id); + NLA_PUT_MSECS(skb, NDTA_GC_INTERVAL, tbl-gc_interval); + NLA_PUT_U32(skb, NDTA_THRESH1, tbl-gc_thresh1); + NLA_PUT_U32(skb, NDTA_THRESH2, tbl-gc_thresh2); + NLA_PUT_U32(skb, NDTA_THRESH3, tbl-gc_thresh3); { unsigned long now = jiffies; @@ -1662,7 +1665,7 @@ static int neightbl_fill_info(struct nei .ndtc_proxy_qlen= tbl-proxy_queue.qlen, }; - RTA_PUT(skb, NDTA_CONFIG, sizeof(ndc), ndc); + NLA_PUT(skb, NDTA_CONFIG, sizeof(ndc), ndc); } { @@ -1687,55 +1690,50 @@ static int neightbl_fill_info(struct nei ndst.ndts_forced_gc_runs+= st-forced_gc_runs; } - RTA_PUT(skb, NDTA_STATS, sizeof(ndst), ndst); + NLA_PUT(skb, NDTA_STATS, sizeof(ndst), ndst); } BUG_ON(tbl-parms.dev); if (neightbl_fill_parms(skb, tbl-parms) 0)
[PATCH 7/7] [NEIGH]: Move netlink neighbour table bits to linux/neighbour.h
rtnetlink_rcv_msg() is not longer required to parse attributes for the neighbour tables layer, remove dependency on obsolete and buggy rta_buf. Signed-off-by: Thomas Graf [EMAIL PROTECTED] Index: net-2.6.19.git/include/linux/neighbour.h === --- net-2.6.19.git.orig/include/linux/neighbour.h +++ net-2.6.19.git/include/linux/neighbour.h @@ -62,4 +62,98 @@ struct nda_cacheinfo __u32 ndm_refcnt; }; +/* + * Neighbour tables specific messages. + * + * To retrieve the neighbour tables send RTM_GETNEIGHTBL with the + * NLM_F_DUMP flag set. Every neighbour table configuration is + * spread over multiple messages to avoid running into message + * size limits on systems with many interfaces. The first message + * in the sequence transports all not device specific data such as + * statistics, configuration, and the default parameter set. + * This message is followed by 0..n messages carrying device + * specific parameter sets. + * Although the ordering should be sufficient, NDTA_NAME can be + * used to identify sequences. The initial message can be identified + * by checking for NDTA_CONFIG. The device specific messages do + * not contain this TLV but have NDTPA_IFINDEX set to the + * corresponding interface index. + * + * To change neighbour table attributes, send RTM_SETNEIGHTBL + * with NDTA_NAME set. Changeable attribute include NDTA_THRESH[1-3], + * NDTA_GC_INTERVAL, and all TLVs in NDTA_PARMS unless marked + * otherwise. Device specific parameter sets can be changed by + * setting NDTPA_IFINDEX to the interface index of the corresponding + * device. + / + +struct ndt_stats +{ + __u64 ndts_allocs; + __u64 ndts_destroys; + __u64 ndts_hash_grows; + __u64 ndts_res_failed; + __u64 ndts_lookups; + __u64 ndts_hits; + __u64 ndts_rcv_probes_mcast; + __u64 ndts_rcv_probes_ucast; + __u64 ndts_periodic_gc_runs; + __u64 ndts_forced_gc_runs; +}; + +enum { + NDTPA_UNSPEC, + NDTPA_IFINDEX, /* u32, unchangeable */ + NDTPA_REFCNT, /* u32, read-only */ + NDTPA_REACHABLE_TIME, /* u64, read-only, msecs */ + NDTPA_BASE_REACHABLE_TIME, /* u64, msecs */ + NDTPA_RETRANS_TIME, /* u64, msecs */ + NDTPA_GC_STALETIME, /* u64, msecs */ + NDTPA_DELAY_PROBE_TIME, /* u64, msecs */ + NDTPA_QUEUE_LEN,/* u32 */ + NDTPA_APP_PROBES, /* u32 */ + NDTPA_UCAST_PROBES, /* u32 */ + NDTPA_MCAST_PROBES, /* u32 */ + NDTPA_ANYCAST_DELAY,/* u64, msecs */ + NDTPA_PROXY_DELAY, /* u64, msecs */ + NDTPA_PROXY_QLEN, /* u32 */ + NDTPA_LOCKTIME, /* u64, msecs */ + __NDTPA_MAX +}; +#define NDTPA_MAX (__NDTPA_MAX - 1) + +struct ndtmsg +{ + __u8ndtm_family; + __u8ndtm_pad1; + __u16 ndtm_pad2; +}; + +struct ndt_config +{ + __u16 ndtc_key_len; + __u16 ndtc_entry_size; + __u32 ndtc_entries; + __u32 ndtc_last_flush;/* delta to now in msecs */ + __u32 ndtc_last_rand; /* delta to now in msecs */ + __u32 ndtc_hash_rnd; + __u32 ndtc_hash_mask; + __u32 ndtc_hash_chain_gc; + __u32 ndtc_proxy_qlen; +}; + +enum { + NDTA_UNSPEC, + NDTA_NAME, /* char *, unchangeable */ + NDTA_THRESH1, /* u32 */ + NDTA_THRESH2, /* u32 */ + NDTA_THRESH3, /* u32 */ + NDTA_CONFIG,/* struct ndt_config, read-only */ + NDTA_PARMS, /* nested TLV NDTPA_* */ + NDTA_STATS, /* struct ndt_stats, read-only */ + NDTA_GC_INTERVAL, /* u64, msecs */ + __NDTA_MAX +}; +#define NDTA_MAX (__NDTA_MAX - 1) + #endif Index: net-2.6.19.git/include/linux/rtnetlink.h === --- net-2.6.19.git.orig/include/linux/rtnetlink.h +++ net-2.6.19.git/include/linux/rtnetlink.h @@ -384,107 +384,6 @@ struct rta_session } u; }; - - -/* - * Neighbour tables specific messages. - * - * To retrieve the neighbour tables send RTM_GETNEIGHTBL with the - * NLM_F_DUMP flag set. Every neighbour table configuration is - * spread over multiple messages to avoid running into message - * size limits on systems with many interfaces. The first message - * in the sequence transports all not
Re: [PATCH wireless-dev] d80211: Switch d80211_mgmt.h to ieee80211.h style definitions
On Sat, Aug 05, 2006 at 01:45:33AM -0700, Michael Wu wrote: d80211: Switch d80211_mgmt.h to ieee80211.h style definitions This patch switches a number of defines in d80211_mgmt.h to enums taken from ieee80211.h and makes the corresponding changes in net/d80211. include/net/d80211_mgmt.h | 153 net/d80211/ieee80211.c |2 - net/d80211/ieee80211_sta.c | 50 +++--- Is this really needed? Based on the changes here, these are mostly used internally in net/d80211/ieee80211_sta.c and not in other parts of the tree. In addition, I would actually like to see ieee80211_sta.c be made mostly obsolete with a user space implementation of client MLME (e.g., the one in wpa_supplicant which is based on this ieee80211_sta.c file). Changing WLAN_STATUS_* and WLAN_REASON_* to be enum's while keeping their old names is fine. However, I would rather not change the names of the information element IDs (WLAN_EID_*) into MFIE_TYPE_*. What is that 'MFIE' anyway? Management Frame IE? These IEs are not limited to management frames.. In other words, I would skip all the changes into ieee80211_sta.c and would only change d80211_mgmt.h to use enum's with old names. -- Jouni MalinenPGP id EFC895FA - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: 802.11/crypto questions
On Tue, Aug 01, 2006 at 07:40:52AM +0200, Johannes Berg wrote: Since there's been lots of talk about multi-STA or multi-BSSID devices (more than one client, more than one AP on the same PHY) and I was just looking into some crypto stuff on bcm43xx, it got me wondering. The bcm43xx has an elaborate group key matching thingie which will be useful if it's all WPA2, but if it's say just plain WEP, then that is a problem since you'd either need to use the same keys or disable the bcm43xx crypto hardware. Hence, I suppose the question really is -- does the dscape stack allow changing the 'sw crypto needed' flag on the fly? Depends on what exactly you mean with on the fly. We have indeed changed between doing software and hardware crypto for some cases, e.g., when enabling another BSS while one BSS is using static WEP (which would need default WEP keys in hwaccel) in one BSS, we may disable hwaccel for the receive case. Not all configuration changes are yet supported with minimal modification, i.e., hostapd may end up having to disconnect all stations and start over with such a change, but still, the driver should be prepared on dynamically changing the key configuration and this can include moving some keys from hwaccel to software. If it can be done without major effort, I would recommend making the low-level drivers quite flexible as far as configuring keys to hardware accelaration is concerned. Depending on the hardware design, there may be multiple different ways of doing this and some multi-BSS configurations are likely to require changes in how the keys are used between hardware and software. -- Jouni MalinenPGP id EFC895FA - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[-mm patch] net/: make code static
This patch makes needlessly global code static. Signed-off-by: Adrian Bunk [EMAIL PROTECTED] --- BTW: It doesn't seem to be intended that the new ipv4/fib_rules.c:fib4_rules_cleanup() is completely unused? include/net/ip6_fib.h |4 net/ipv4/cipso_ipv4.c |2 +- net/ipv4/fib_rules.c |4 ++-- net/ipv6/fib6_rules.c |4 ++-- net/ipv6/ip6_fib.c |6 +++--- net/ipv6/route.c |6 +++--- net/netlabel/netlabel_domainhash.c |4 ++-- 7 files changed, 13 insertions(+), 17 deletions(-) --- linux-2.6.18-rc3-mm2-full/net/ipv4/cipso_ipv4.c.old 2006-08-07 16:39:05.0 +0200 +++ linux-2.6.18-rc3-mm2-full/net/ipv4/cipso_ipv4.c 2006-08-07 16:39:15.0 +0200 @@ -60,7 +60,7 @@ * if in practice there are a lot of different DOIs this list should * probably be turned into a hash table or something similar so we * can do quick lookups. */ -DEFINE_SPINLOCK(cipso_v4_doi_list_lock); +static DEFINE_SPINLOCK(cipso_v4_doi_list_lock); static struct list_head cipso_v4_doi_list = LIST_HEAD_INIT(cipso_v4_doi_list); /* Label mapping cache */ --- linux-2.6.18-rc3-mm2-full/net/ipv4/fib_rules.c.old 2006-08-07 16:39:33.0 +0200 +++ linux-2.6.18-rc3-mm2-full/net/ipv4/fib_rules.c 2006-08-07 16:39:51.0 +0200 @@ -101,8 +101,8 @@ return err; } -int fib4_rule_action(struct fib_rule *rule, struct flowi *flp, int flags, -struct fib_lookup_arg *arg) +static int fib4_rule_action(struct fib_rule *rule, struct flowi *flp, + int flags, struct fib_lookup_arg *arg) { int err = -EAGAIN; struct fib_table *tbl; --- linux-2.6.18-rc3-mm2-full/net/ipv6/fib6_rules.c.old 2006-08-07 16:41:07.0 +0200 +++ linux-2.6.18-rc3-mm2-full/net/ipv6/fib6_rules.c 2006-08-07 16:41:16.0 +0200 @@ -66,8 +66,8 @@ return (struct dst_entry *) arg.result; } -int fib6_rule_action(struct fib_rule *rule, struct flowi *flp, -int flags, struct fib_lookup_arg *arg) +static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp, + int flags, struct fib_lookup_arg *arg) { struct rt6_info *rt = NULL; struct fib6_table *table; --- linux-2.6.18-rc3-mm2-full/include/net/ip6_fib.h.old 2006-08-07 16:41:36.0 +0200 +++ linux-2.6.18-rc3-mm2-full/include/net/ip6_fib.h 2006-08-07 16:41:43.0 +0200 @@ -192,10 +192,6 @@ struct in6_addr *daddr, int dst_len, struct in6_addr *saddr, int src_len); -extern voidfib6_clean_tree(struct fib6_node *root, - int (*func)(struct rt6_info *, void *arg), - int prune, void *arg); - extern voidfib6_clean_all(int (*func)(struct rt6_info *, void *arg), int prune, void *arg); --- linux-2.6.18-rc3-mm2-full/net/ipv6/ip6_fib.c.old2006-08-07 16:41:51.0 +0200 +++ linux-2.6.18-rc3-mm2-full/net/ipv6/ip6_fib.c2006-08-07 16:42:05.0 +0200 @@ -1169,9 +1169,9 @@ * ignoring pure split nodes) will be scanned. */ -void fib6_clean_tree(struct fib6_node *root, -int (*func)(struct rt6_info *, void *arg), -int prune, void *arg) +static void fib6_clean_tree(struct fib6_node *root, + int (*func)(struct rt6_info *, void *arg), + int prune, void *arg) { struct fib6_cleaner_t c; --- linux-2.6.18-rc3-mm2-full/net/ipv6/route.c.old 2006-08-07 16:42:24.0 +0200 +++ linux-2.6.18-rc3-mm2-full/net/ipv6/route.c 2006-08-07 16:43:05.0 +0200 @@ -613,8 +613,8 @@ return rt; } -struct rt6_info *ip6_pol_route_input(struct fib6_table *table, struct flowi *fl, -int flags) +static struct rt6_info *ip6_pol_route_input(struct fib6_table *table, + struct flowi *fl, int flags) { struct fib6_node *fn; struct rt6_info *rt, *nrt; @@ -872,7 +872,7 @@ } static struct dst_entry *ndisc_dst_gc_list; -DEFINE_SPINLOCK(ndisc_lock); +static DEFINE_SPINLOCK(ndisc_lock); struct dst_entry *ndisc_dst_alloc(struct net_device *dev, struct neighbour *neigh, --- linux-2.6.18-rc3-mm2-full/net/netlabel/netlabel_domainhash.c.old 2006-08-07 16:43:27.0 +0200 +++ linux-2.6.18-rc3-mm2-full/net/netlabel/netlabel_domainhash.c 2006-08-07 16:43:53.0 +0200 @@ -50,11 +50,11 @@ /* Domain hash table */ /* XXX - updates should be so rare that having one spinlock for the entire * hash table should be okay */ -DEFINE_SPINLOCK(netlbl_domhsh_lock); +static
Re: [PATCH] [e1000]: Remove unnecessary tx_lock
On Mon, 2006-07-08 at 18:29 +0200, Edgar E. Iglesias wrote: Assuming you would get the code inside the jamal ifdefs working without deadlocks, you now have a tx_irq function which if fdesc = tx_ring-prunet essentially just checks for hw lockups. Let's speculate and further assume you could do the detect_tx_hung from some other context (timer or whatever) then you end up having a tx_irq function which most of the time does nothing. The next step could be to move the fdesc = tx_ring-prunet logic into the transmit path and completely disable the tx_irq when the condition is not met. Now you end up not taking the irq at all as long as fdesc = tx_ring-prunet. This was the logic I tried on the cris driver but ended up with deadlocks :) Like i said in one of my earlier postings (first email i CCed you on), this specific test i assumed was as close to what you did. But if i understand what you describe as deadlock then we have a slightly different problem that can only be solved by orphaning the skbs that are determined to have been put on the DMA ring cheers, jamal - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [Fwd: [IPROUTE2]: Explicit BNF definition for actions]
On Sat, 05 Aug 2006 12:42:35 -0400 jamal [EMAIL PROTECTED] wrote: Stephen, This is the one after the last one you responded to. Your git tree is not uptodate so i cant tell if you are missing it or not ;- I dont think i will be making more updates for this release - more to come later. cheers, jamal I needed to merge the branch in, the git tree is now up to date. -- Stephen Hemminger [EMAIL PROTECTED] And in the Packet there writ down that doome - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 9/9] [NET]: Move netlink interface bits to linux/if.h
On Fri, 04 Aug 2006 23:23:14 -0700 (PDT) David Miller [EMAIL PROTECTED] wrote: From: Stephen Hemminger [EMAIL PROTECTED] Date: Sat, 05 Aug 2006 22:42:18 -0700 Thomas Graf wrote: -/* The struct should be in sync with struct net_device_stats */ -struct rtnl_link_stats -{ - __u32 rx_packets; /* total packets received */ - __u32 tx_packets; /* total packets transmitted*/ - __u32 rx_bytes; /* total bytes received */ - __u32 tx_bytes; /* total bytes transmitted */ Make the packets and byte counts __u64 so if/when we extend net_device stats to bigger values, the message format doesn't have to change. It's an existing ABI that he is just moving from one place to another, we are not at liberty to change it's layout. Yeah that makes sense, I thought it was a new interface. -- Stephen Hemminger [EMAIL PROTECTED] And in the Packet there writ down that doome - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] [e1000]: Remove unnecessary tx_lock
On Mon, 2006-07-08 at 19:04 +0200, Edgar E. Iglesias wrote: I'll give you an example. Thanks - that matches my understanding. A TCP flow sends X data and later waits for a response, host is now quietly waiting. Assume fdesc = tx_ring-prunet, so we dont free any skbs, right? I am hoping they will be freed by a tx interrupt that will force poll to happen. Or a new packet arrival etc. Just like before. Why do you see the two as different? (the tx path pruning is still going on as i noted before). If all you are looking for is a scheme to quickly free the skbs so that TCP doesnt get charged, I am not sure if this is the right one. Now assume that some part of X data gets lost, our retransmit timer hits and we want to retransmit but our socket is charged with too much data sitting on the nics tx-ring, so we don't send anything. By orphaning, those skbs won't charge the socket and the flow can retransmit. I understand that as well as the dilemma that TCP not being charged for skbs (if you decide to orphan) it holds in its retransmit queue ;- Which is not a problem unless that queueu grows to be a huge one ;- cheers, jamal - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] tiacx sparse cleanups
From: Randy Dunlap [EMAIL PROTECTED] tiacx sparse cleanups: - use NULL instead of 0 for pointer value - use C99 struct initializers - use ANSI function declaration Signed-off-by: Randy Dunlap [EMAIL PROTECTED] --- drivers/net/wireless/tiacx/common.c |2 drivers/net/wireless/tiacx/ioctl.c | 202 ++-- drivers/net/wireless/tiacx/pci.c|2 drivers/net/wireless/tiacx/usb.c|2 4 files changed, 104 insertions(+), 104 deletions(-) --- linux-2618-rc3mm2.orig/drivers/net/wireless/tiacx/common.c +++ linux-2618-rc3mm2/drivers/net/wireless/tiacx/common.c @@ -1396,7 +1396,7 @@ manage_proc_entries(const struct net_dev log(L_INIT, %sing /proc entry %s\n, remove ? remov : creat, procbuf); if (!remove) { - if (!create_proc_read_entry(procbuf, 0, 0, proc_funcs[i], adev)) { + if (!create_proc_read_entry(procbuf, 0, NULL, proc_funcs[i], adev)) { printk(acx: cannot register /proc entry %s\n, procbuf); return NOT_OK; } --- linux-2618-rc3mm2.orig/drivers/net/wireless/tiacx/ioctl.c +++ linux-2618-rc3mm2/drivers/net/wireless/tiacx/ioctl.c @@ -2163,7 +2163,7 @@ acx_ioctl_set_rates(struct net_device *n log(L_IOCTL, set_rates %s\n, extra); result = fill_ratemasks(extra, brate, orate, - acx111_supported, acx111_gen_mask, 0); + acx111_supported, acx111_gen_mask, NULL); if (result) goto end; SET_BIT(orate, brate); log(L_IOCTL, brate %08X orate %08X\n, brate, orate); @@ -2615,107 +2615,107 @@ static const iw_handler acx_ioctl_privat static const struct iw_priv_args acx_ioctl_private_args[] = { #if ACX_DEBUG -{ cmd : ACX100_IOCTL_DEBUG, - set_args : IW_PRIV_TYPE_INT | IW_PRIV_SIZE_FIXED | 1, - get_args : 0, - name : SetDebug }, +{ .cmd = ACX100_IOCTL_DEBUG, + .set_args = IW_PRIV_TYPE_INT | IW_PRIV_SIZE_FIXED | 1, + .get_args = 0, + .name = SetDebug }, #endif -{ cmd : ACX100_IOCTL_SET_PLED, - set_args : IW_PRIV_TYPE_BYTE | 2, - get_args : 0, - name : SetLEDPower }, -{ cmd : ACX100_IOCTL_GET_PLED, - set_args : 0, - get_args : IW_PRIV_TYPE_BYTE | IW_PRIV_SIZE_FIXED | 2, - name : GetLEDPower }, -{ cmd : ACX100_IOCTL_SET_RATES, - set_args : IW_PRIV_TYPE_CHAR | 256, - get_args : 0, - name : SetRates }, -{ cmd : ACX100_IOCTL_LIST_DOM, - set_args : 0, - get_args : 0, - name : ListRegDomain }, -{ cmd : ACX100_IOCTL_SET_DOM, - set_args : IW_PRIV_TYPE_BYTE | IW_PRIV_SIZE_FIXED | 1, - get_args : 0, - name : SetRegDomain }, -{ cmd : ACX100_IOCTL_GET_DOM, - set_args : 0, - get_args : IW_PRIV_TYPE_BYTE | IW_PRIV_SIZE_FIXED | 1, - name : GetRegDomain }, -{ cmd : ACX100_IOCTL_SET_SCAN_PARAMS, - set_args : IW_PRIV_TYPE_INT | IW_PRIV_SIZE_FIXED | 4, - get_args : 0, - name : SetScanParams }, -{ cmd : ACX100_IOCTL_GET_SCAN_PARAMS, - set_args : 0, - get_args : IW_PRIV_TYPE_INT | IW_PRIV_SIZE_FIXED | 4, - name : GetScanParams }, -{ cmd : ACX100_IOCTL_SET_PREAMB, - set_args : IW_PRIV_TYPE_BYTE | IW_PRIV_SIZE_FIXED | 1, - get_args : 0, - name : SetSPreamble }, -{ cmd : ACX100_IOCTL_GET_PREAMB, - set_args : 0, - get_args : IW_PRIV_TYPE_BYTE | IW_PRIV_SIZE_FIXED | 1, - name : GetSPreamble }, -{ cmd : ACX100_IOCTL_SET_ANT, - set_args : IW_PRIV_TYPE_BYTE | IW_PRIV_SIZE_FIXED | 1, - get_args : 0, - name : SetAntenna }, -{ cmd : ACX100_IOCTL_GET_ANT, - set_args : 0, - get_args : 0, - name : GetAntenna }, -{ cmd : ACX100_IOCTL_RX_ANT, - set_args : IW_PRIV_TYPE_BYTE | IW_PRIV_SIZE_FIXED | 1, - get_args : 0, - name : SetRxAnt }, -{ cmd : ACX100_IOCTL_TX_ANT, - set_args : IW_PRIV_TYPE_BYTE | IW_PRIV_SIZE_FIXED | 1, - get_args : 0, - name : SetTxAnt }, -{ cmd : ACX100_IOCTL_SET_PHY_AMP_BIAS, - set_args : IW_PRIV_TYPE_BYTE | IW_PRIV_SIZE_FIXED | 1, - get_args : 0, - name : SetPhyAmpBias}, -{ cmd : ACX100_IOCTL_GET_PHY_CHAN_BUSY, - set_args : 0, - get_args : 0, - name : GetPhyChanBusy }, -{ cmd : ACX100_IOCTL_SET_ED, - set_args : IW_PRIV_TYPE_INT | IW_PRIV_SIZE_FIXED | 1, - get_args : 0, - name : SetED }, -{ cmd : ACX100_IOCTL_SET_CCA, - set_args : IW_PRIV_TYPE_BYTE | IW_PRIV_SIZE_FIXED | 1, - get_args : 0, - name : SetCCA }, -{ cmd : ACX100_IOCTL_MONITOR, - set_args : IW_PRIV_TYPE_INT | IW_PRIV_SIZE_FIXED | 2, - get_args : 0, - name : monitor }, -{ cmd : ACX100_IOCTL_TEST, - set_args : 0, - get_args : 0, - name : Test }, -{ cmd : ACX100_IOCTL_DBG_SET_MASKS, - set_args : IW_PRIV_TYPE_INT | IW_PRIV_SIZE_FIXED | 2, -
Re: [Fwd: [IPROUTE2]: Explicit BNF definition for actions]
On Mon, 2006-07-08 at 10:49 -0700, Stephen Hemminger wrote: I needed to merge the branch in, the git tree is now up to date. I have never gathered the energy to figure that stuff out so i dont envy you. In any case, seems to be missing the majority of the patches i sent, including this one whose subject you are responding to;- I dont have time to chase it right now, but i could resend later on. Or you could grab them off the list archive. cheers, jamal - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH wireless-dev] d80211: Switch d80211_mgmt.h to ieee80211.h style definitions
On Monday 07 August 2006 10:20, Jouni Malinen wrote: Changing WLAN_STATUS_* and WLAN_REASON_* to be enum's while keeping their old names is fine. However, I would rather not change the names of the information element IDs (WLAN_EID_*) into MFIE_TYPE_*. What is that 'MFIE' anyway? Management Frame IE? These IEs are not limited to management frames.. I agree these names aren't that great, but these changes make it easier for for existing fullmac drivers to switch to the d80211 headers. That is the only reason. In other words, I would skip all the changes into ieee80211_sta.c and would only change d80211_mgmt.h to use enum's with old names. Sure. -Michael Wu pgp1MZdwtVOF0.pgp Description: PGP signature
Re: [PATCH] [e1000]: Remove unnecessary tx_lock
On Mon, Aug 07, 2006 at 02:00:24PM -0400, jamal wrote: On Mon, 2006-07-08 at 19:04 +0200, Edgar E. Iglesias wrote: I'll give you an example. Thanks - that matches my understanding. A TCP flow sends X data and later waits for a response, host is now quietly waiting. Assume fdesc = tx_ring-prunet, so we dont free any skbs, right? I am hoping they will be freed by a tx interrupt that will force poll to happen. Or a new packet arrival etc. Just like before. Why do you see the two as different? (the tx path pruning is still going on as i noted before). If all you are looking for is a scheme to quickly free the skbs so that TCP doesnt get charged, I am not sure if this is the right one. I think we are out of sync :) My, fault I haven't been clear enough. First of all, I don't think the patch with jamal undefined has any problems. I assumed wrongly from the start that you somehow wanted that part to go in aswell, sorry about that. As you say, the flow goes just as before. Now, with jamal defined, I only see e1000_prune_tx_ring beeing called if fdesc tx_ring-prunet or fdesc tx_ring-waket. In other words, the freing of skbs is dependant on external events that might not become true if the host is quiet. Skb's could end up sitting on the ring indefinitely. Sorry for the confusion. -- Programmer Edgar E. Iglesias [EMAIL PROTECTED] 46.46.272.1946 - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] [e1000]: Remove unnecessary tx_lock
On Mon, 2006-07-08 at 20:47 +0200, Edgar E. Iglesias wrote: I think we are out of sync :) Imagine that, eh? ;- My, fault I haven't been clear enough. Not just your transmit but also my receive is at fault ;- (aka, I may not be listening as well as i should). Now two machines or CPUs you would think wont have this problem since they dont possess minds;- First of all, I don't think the patch with jamal undefined has any problems. I assumed wrongly from the start that you somehow wanted that part to go in aswell, sorry about that. As you say, the flow goes just as before. Now, with jamal defined, I only see e1000_prune_tx_ring beeing called if fdesc tx_ring-prunet or fdesc tx_ring-waket. Ok, thats the code that has been commented out, no? i.e there is no fdesc otherwise. In other words, the freing of skbs is dependant on external events that might not become true if the host is quiet. Skb's could end up sitting on the ring indefinitely. Yes, this has _always_ been true. In the patch i posted it merely converted things, example: -#define E1000_TX_WEIGHT 64 - /* weight of a sort for tx, to avoid endless transmit cleanup */ - if (count++ == E1000_TX_WEIGHT) break; + /* avoid endless transmit cleanup */ + if (count++ == tx_ring-prunet) break; As you can see E1000_TX_WEIGHT threshold exists today and you are right if no TX interupts, packet arrivals or scheduled wakes happen the that descriptor that was not pruned will sit there forever (which is a bad thing for TCP). Are we in sync? If yes, what is the likelihood they will sit there forever? I think perhaps some TX interupts will happen, no? cheers, jamal - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] [e1000]: Remove unnecessary tx_lock
On Mon, Aug 07, 2006 at 03:03:33PM -0400, jamal wrote: On Mon, 2006-07-08 at 20:47 +0200, Edgar E. Iglesias wrote: I think we are out of sync :) Imagine that, eh? ;- My, fault I haven't been clear enough. Not just your transmit but also my receive is at fault ;- (aka, I may not be listening as well as i should). Now two machines or CPUs you would think wont have this problem since they dont possess minds;- First of all, I don't think the patch with jamal undefined has any problems. I assumed wrongly from the start that you somehow wanted that part to go in aswell, sorry about that. As you say, the flow goes just as before. Now, with jamal defined, I only see e1000_prune_tx_ring beeing called if fdesc tx_ring-prunet or fdesc tx_ring-waket. Ok, thats the code that has been commented out, no? i.e there is no fdesc otherwise. Exactly. In other words, the freing of skbs is dependant on external events that might not become true if the host is quiet. Skb's could end up sitting on the ring indefinitely. Yes, this has _always_ been true. In the patch i posted it merely converted things, example: -#define E1000_TX_WEIGHT 64 - /* weight of a sort for tx, to avoid endless transmit cleanup */ - if (count++ == E1000_TX_WEIGHT) break; + /* avoid endless transmit cleanup */ + if (count++ == tx_ring-prunet) break; As you can see E1000_TX_WEIGHT threshold exists today and you are right if no TX interupts, packet arrivals or scheduled wakes happen the that descriptor that was not pruned will sit there forever (which is a bad thing for TCP). Are we in sync? Yep :) If yes, what is the likelihood they will sit there forever? I think perhaps some TX interupts will happen, no? with jamal undefined, absolutely. With jamal defined, TX interrupts will come but I couldnt find a way into e1000_prune_tx_ring unless fdesc met the conditions. Correct? Best regards -- Programmer Edgar E. Iglesias [EMAIL PROTECTED] 46.46.272.1946 - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] [e1000]: Remove unnecessary tx_lock
On Mon, 2006-07-08 at 21:14 +0200, Edgar E. Iglesias wrote: If yes, what is the likelihood they will sit there forever? I think perhaps some TX interupts will happen, no? with jamal undefined, absolutely. With jamal defined, TX interrupts will come but I couldnt find a way into e1000_prune_tx_ring unless fdesc met the conditions. Correct? Forgive me since i am still missing something .. Observe that the same threshold used in two different ways: 1) in tx path tx_ring-prunet is to check on when we should _start_ to prune. 2) on rx path tx_ring-prunet is to check when to _stop_ pruning. i.e #1 is a preemptive action. You seem to suggest doing it the way i was it made things worse? Note that TX interrupts will happen as long as you dont prune some descriptors (I am assuming this, I havent checked the settings). cheers, jamal - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH]: e1000: Janitor: Use #defined values for literals
Resending patch from 23 June 2006; there was some confusion about whether a similar patch had already been applied; seems it wasn't. Minor janitorial patch: use #defines for literal values. Signed-off-by: Linas Vepstas [EMAIL PROTECTED] drivers/net/e1000/e1000_main.c |4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) Index: linux-2.6.18-rc3-mm2/drivers/net/e1000/e1000_main.c === --- linux-2.6.18-rc3-mm2.orig/drivers/net/e1000/e1000_main.c2006-08-07 14:39:37.0 -0500 +++ linux-2.6.18-rc3-mm2/drivers/net/e1000/e1000_main.c 2006-08-07 15:06:31.0 -0500 @@ -4955,8 +4955,8 @@ static pci_ers_result_t e1000_io_slot_re } pci_set_master(pdev); - pci_enable_wake(pdev, 3, 0); - pci_enable_wake(pdev, 4, 0); /* 4 == D3 cold */ + pci_enable_wake(pdev, PCI_D3hot, 0); + pci_enable_wake(pdev, PCI_D3cold, 0); /* Perform card reset only on one instance of the card */ if (PCI_FUNC (pdev-devfn) != 0) - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH]: e1000: Janitor: Use #defined values for literals
Linas Vepstas wrote: Resending patch from 23 June 2006; there was some confusion about whether a similar patch had already been applied; seems it wasn't. Minor janitorial patch: use #defines for literal values. Signed-off-by: Linas Vepstas [EMAIL PROTECTED] Acked-by: Auke Kok [EMAIL PROTECTED] drivers/net/e1000/e1000_main.c |4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) Index: linux-2.6.18-rc3-mm2/drivers/net/e1000/e1000_main.c === --- linux-2.6.18-rc3-mm2.orig/drivers/net/e1000/e1000_main.c2006-08-07 14:39:37.0 -0500 +++ linux-2.6.18-rc3-mm2/drivers/net/e1000/e1000_main.c 2006-08-07 15:06:31.0 -0500 @@ -4955,8 +4955,8 @@ static pci_ers_result_t e1000_io_slot_re } pci_set_master(pdev); - pci_enable_wake(pdev, 3, 0); - pci_enable_wake(pdev, 4, 0); /* 4 == D3 cold */ + pci_enable_wake(pdev, PCI_D3hot, 0); + pci_enable_wake(pdev, PCI_D3cold, 0); /* Perform card reset only on one instance of the card */ if (PCI_FUNC (pdev-devfn) != 0) - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH]: e1000: Janitor: Use #defined values for literals
hopefully queued-by, as well... - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] [e1000]: Remove unnecessary tx_lock
On Mon, Aug 07, 2006 at 03:34:30PM -0400, jamal wrote: On Mon, 2006-07-08 at 21:14 +0200, Edgar E. Iglesias wrote: If yes, what is the likelihood they will sit there forever? I think perhaps some TX interupts will happen, no? with jamal undefined, absolutely. With jamal defined, TX interrupts will come but I couldnt find a way into e1000_prune_tx_ring unless fdesc met the conditions. Correct? Forgive me since i am still missing something .. Observe that the same threshold used in two different ways: 1) in tx path tx_ring-prunet is to check on when we should _start_ to prune. 2) on rx path tx_ring-prunet is to check when to _stop_ pruning. I can see two calls to e1000_prune_tx_ring with jamal _defined_. 1. tx path +#ifdef jamal + { + int fdesc = E1000_DESC_UNUSED(tx_ring); + if (unlikely(fdesc tx_ring-waket)) + e1000_prune_tx_ring(adapter,tx_ring); + } +#endif 2. tx and rx path +#ifdef jamal + spin_lock(tx_ring-tx_lock); + { + int fdesc = E1000_DESC_UNUSED(tx_ring); + if (fdesc tx_ring-prunet) { + if (e1000_prune_tx_ring(adapter,tx_ring)) + cleaned = TRUE; } } + spin_unlock(tx_ring-tx_lock); +#else + if (e1000_prune_tx_ring(adapter,tx_ring)) + cleaned = TRUE; +#endif Assume a ring of 64 entries, prunet of 16, waket of 8. Now host sends 40 skbs and stops. tx-ring holds 40 skbs, has 24 free. TX interrupts hit you, you may even be receiveing packets but I don't see how you enter prune_tx_ring without more packets going out via hard_start_xmit? skb's will sit on the ring until more packets are sent from the quiet host. As you can see, with jamal _undefined_ e1000_prune_tx_ring is called unconditionally and I beleive things will work ok. I am not familiar with this code nor the hw so I'm probably missing something fundamental. Best regards -- Programmer Edgar E. Iglesias [EMAIL PROTECTED] 46.46.272.1946 - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Excess use of packed attribute
After reading: http://bugzilla.kernel.org/show_bug.cgi?id=6693 I noticed there were stupid uses of packed attribute in several network headers. Silly offenders:include/net/ipx.h include/net/ieee80211.h include/net/ip6_tunnel.h include/net/ndisc.h include/linux/if_ether.h include/linux/if_fddi.h include/linux/sctp.h -- really bad -- - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 0/9] tulip patches from parisc-linux
[Val asked that I split this out, instead of just publishing a git tree, so here goes nothing. I don't think I've ever used git-send-email before, but hopefully I won't screw up too badly.] The following patch series brings the mainline tulip driver in synch with the modifications made in parisc-linux. Most of these patches have been in parisc-linux cvs for the better part of several years, so they are quite well tested. It contains the following changes, 21142.c |4 +- de2104x.c |6 +-- interrupt.c |4 ++ media.c | 40 +- timer.c | 14 ++- tulip.h | 43 ++-- tulip_core.c | 102 -- winbond-840.c | 68 +- 8 files changed, 164 insertions(+), 117 deletions(-) Francois Romieu: [TULIP] Defer tulip_select_media() to process context Grant Grundler: [TULIP] Fix PHY init and reset [TULIP] Print physical address in tulip_init_one [TULIP] Make tulip_stop_rxtx() failure case slightly more informative [TULIP] Clean tulip.h so it can be used by winbond-840.c [TULIP] Flush MMIO writes in reset sequence [TULIP] Fix IRQ/DMA race Helge Deller: [TULIP] Fix section mismatch in de2104x.c Thibaut Varene: [TULIP] Make DS21143 printout match lspci output Cheers! Kyle M. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Reboot-Shutdown signal through net device.
Within our PCI Network Device there is a special feature, that allows out-of-band signaling-messeging (by using special interrupt and shared registers) to control system state: Reboot, Shutdown. What would be an acceptable method of executing such controls? So, the sequence is: 1. Device Driver receives Reboot-or-Shutdown message. 2. Device Driver initiates Reboot-or-Shutdown sequence (how?). I have cosidere usermode helper, looks like this solution is favored... Any recommendations? - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] ipx: header length validation needed
IPX is not checking for non-linear (and short packets) in it's receive routine. This is serious because it may mean it ends up reading past end of skb. This maybe related to this bug, because sky2 will copy small packets into small skb's. http://bugzilla.kernel.org/show_bug.cgi?id=6693 Signed-off-by: Stephen Hemminger [EMAIL PROTECTED] --- a/net/ipx/af_ipx.c 2006-07-07 13:02:31.0 -0700 +++ b/net/ipx/af_ipx.c 2006-08-07 13:18:08.0 -0700 @@ -1642,6 +1642,9 @@ if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) goto out; + if (!pskb_may_pull(skb, sizeof(struct ipxhdr))) + goto drop; + ipx = ipx_hdr(skb); ipx_pktsize = ntohs(ipx-ipx_pktsize); - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 1/9] [TULIP] Fix PHY init and reset
From: Grant Grundler [EMAIL PROTECTED] A whole slew of fixes for tulip_select_media for: - Flush posted MMIO writes as per PCI spec - Polling the reset bit (bit 15) is required to determine when the init sequence can be sent. This fixes tulip on HP PA-RISC systems, which use DP83840A and LXT971D PHYs. Tested for several years on a variety of HP PA-RISC systems. [Initial work done by Grant Grundler, DS21142 support added by Thibaut Varene.] Signed-off-by: Grant Grundler [EMAIL PROTECTED] Signed-off-by: Thibaut Varene [EMAIL PROTECTED] Signed-off-by: Kyle McMartin [EMAIL PROTECTED] --- drivers/net/tulip/media.c | 40 +--- 1 files changed, 37 insertions(+), 3 deletions(-) diff --git a/drivers/net/tulip/media.c b/drivers/net/tulip/media.c index e9bc2a9..5093d87 100644 --- a/drivers/net/tulip/media.c +++ b/drivers/net/tulip/media.c @@ -44,8 +44,10 @@ static const unsigned char comet_miireg2 /* MII transceiver control section. Read and write the MII registers using software-generated serial - MDIO protocol. See the MII specifications or DP83840A data sheet - for details. */ + MDIO protocol. + See IEEE 802.3-2002.pdf (Section 2, Chapter 22.2.4 Management functions) + or DP83840A data sheet for more details. + */ int tulip_mdio_read(struct net_device *dev, int phy_id, int location) { @@ -261,24 +263,56 @@ void tulip_select_media(struct net_devic u16 *reset_sequence = ((u16*)(p+3))[init_length]; int reset_length = p[2 + init_length*2]; misc_info = reset_sequence + reset_length; - if (startup) + if (startup) { + int timeout = 10; for (i = 0; i reset_length; i++) iowrite32(get_u16(reset_sequence[i]) 16, ioaddr + CSR15); + + /* flush posted writes */ + ioread32(ioaddr + CSR15); + + /* Sect 3.10.3 in DP83840A.pdf (p39) */ + udelay(500); + + /* Section 4.2 in DP83840A.pdf (p43) */ + /* and IEEE 802.3 22.2.4.1.1 Reset */ + while (timeout-- + (tulip_mdio_read (dev, phy_num, MII_BMCR) BMCR_RESET)) + udelay(100); + } for (i = 0; i init_length; i++) iowrite32(get_u16(init_sequence[i]) 16, ioaddr + CSR15); + + ioread32(ioaddr + CSR15); /* flush posted writes */ } else { u8 *init_sequence = p + 2; u8 *reset_sequence = p + 3 + init_length; int reset_length = p[2 + init_length]; misc_info = (u16*)(reset_sequence + reset_length); if (startup) { + int timeout = 10; iowrite32(mtable-csr12dir | 0x100, ioaddr + CSR12); for (i = 0; i reset_length; i++) iowrite32(reset_sequence[i], ioaddr + CSR12); + + /* flush posted writes */ + ioread32(ioaddr + CSR12); + + /* Sect 3.10.3 in DP83840A.pdf (p39) */ + udelay(500); + + /* Section 4.2 in DP83840A.pdf (p43) */ + /* and IEEE 802.3 22.2.4.1.1 Reset */ + while (timeout-- + (tulip_mdio_read (dev, phy_num, MII_BMCR) BMCR_RESET)) + udelay(100); } for (i = 0; i init_length; i++) iowrite32(init_sequence[i], ioaddr + CSR12); + + ioread32(ioaddr + CSR12); /* flush posted writes */ } + tmp_info = get_u16(misc_info[1]); if (tmp_info) tp-advertising[phy_num] = tmp_info | 1; -- 1.4.1.1 - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 7/9] [TULIP] Defer tulip_select_media() to process context
From: Francois Romieu [EMAIL PROTECTED] Move tulip_select_media() processing to a workqueue, instead of delaying in interrupt context. [Edit by Kyle to use kevent thread, instead of creating its own workqueue.] Signed-off-by: Kyle McMartin [EMAIL PROTECTED] --- drivers/net/tulip/21142.c |4 +-- drivers/net/tulip/timer.c | 14 - drivers/net/tulip/tulip.h | 19 ++-- drivers/net/tulip/tulip_core.c | 64 +++- 4 files changed, 60 insertions(+), 41 deletions(-) diff --git a/drivers/net/tulip/21142.c b/drivers/net/tulip/21142.c index 683f14b..ffba0c1 100644 --- a/drivers/net/tulip/21142.c +++ b/drivers/net/tulip/21142.c @@ -26,9 +26,9 @@ static u16 t21142_csr15[] = { 0x0008, 0x /* Handle the 21143 uniquely: do autoselect with NWay, not the EEPROM list of available transceivers. */ -void t21142_timer(unsigned long data) +void t21142_media_task(void *data) { - struct net_device *dev = (struct net_device *)data; + struct net_device *dev = data; struct tulip_private *tp = netdev_priv(dev); void __iomem *ioaddr = tp-base_addr; int csr12 = ioread32(ioaddr + CSR12); diff --git a/drivers/net/tulip/timer.c b/drivers/net/tulip/timer.c index e058a9f..272ef62 100644 --- a/drivers/net/tulip/timer.c +++ b/drivers/net/tulip/timer.c @@ -18,13 +18,14 @@ #include linux/pci.h #include tulip.h -void tulip_timer(unsigned long data) +void tulip_media_task(void *data) { - struct net_device *dev = (struct net_device *)data; + struct net_device *dev = data; struct tulip_private *tp = netdev_priv(dev); void __iomem *ioaddr = tp-base_addr; u32 csr12 = ioread32(ioaddr + CSR12); int next_tick = 2*HZ; + unsigned long flags; if (tulip_debug 2) { printk(KERN_DEBUG %s: Media selection tick, %s, status %8.8x mode @@ -126,6 +127,15 @@ void tulip_timer(unsigned long data) } break; } + + + spin_lock_irqsave(tp-lock, flags); + if (tp-timeout_recovery) { + tulip_tx_timeout_complete(tp, ioaddr); + tp-timeout_recovery = 0; + } + spin_unlock_irqrestore(tp-lock, flags); + /* mod_timer synchronizes us with potential add_timer calls * from interrupts. */ diff --git a/drivers/net/tulip/tulip.h b/drivers/net/tulip/tulip.h index 951af5e..408fe46 100644 --- a/drivers/net/tulip/tulip.h +++ b/drivers/net/tulip/tulip.h @@ -43,7 +43,8 @@ struct tulip_chip_table { int io_size; int valid_intrs;/* CSR7 interrupt enable settings */ int flags; - void (*media_timer) (unsigned long data); + void (*media_timer) (unsigned long); + void (*media_task) (void *); }; @@ -371,6 +372,7 @@ #endif unsigned int medialock:1; /* Don't sense media type. */ unsigned int mediasense:1; /* Media sensing in progress. */ unsigned int nway:1, nwayset:1; /* 21143 internal NWay. */ + unsigned int timeout_recovery:1; unsigned int csr0; /* CSR0 setting. */ unsigned int csr6; /* Current CSR6 control settings. */ unsigned char eeprom[EEPROM_SIZE]; /* Serial EEPROM contents. */ @@ -389,6 +391,7 @@ #endif void __iomem *base_addr; int csr12_shadow; int pad0; /* Used for 8-byte alignment */ + struct work_struct media_work; }; @@ -403,7 +406,7 @@ struct eeprom_fixup { /* 21142.c */ extern u16 t21142_csr14[]; -void t21142_timer(unsigned long data); +void t21142_media_task(void *data); void t21142_start_nway(struct net_device *dev); void t21142_lnk_change(struct net_device *dev, int csr5); @@ -441,7 +444,7 @@ void pnic_lnk_change(struct net_device * void pnic_timer(unsigned long data); /* timer.c */ -void tulip_timer(unsigned long data); +void tulip_media_task(void *data); void mxic_timer(unsigned long data); void comet_timer(unsigned long data); @@ -493,4 +496,14 @@ static inline void tulip_restart_rxtx(st tulip_start_rxtx(tp); } +static inline void tulip_tx_timeout_complete(struct tulip_private *tp, void __iomem *ioaddr) +{ + /* Stop and restart the chip's Tx processes. */ + tulip_restart_rxtx(tp); + /* Trigger an immediate transmit demand. */ + iowrite32(0, ioaddr + CSR1); + + tp-stats.tx_errors++; +} + #endif /* __NET_TULIP_H__ */ diff --git a/drivers/net/tulip/tulip_core.c b/drivers/net/tulip/tulip_core.c index 363e5f6..bdb6698 100644 --- a/drivers/net/tulip/tulip_core.c +++ b/drivers/net/tulip/tulip_core.c @@ -130,7 +130,14 @@ #else int tulip_debug = 1; #endif +static void tulip_timer(unsigned long data) +{ + struct net_device *dev = (struct net_device *)data; + struct tulip_private *tp = netdev_priv(dev); + if (netif_running(dev)) + schedule_work(tp-media_work); +} /* * This table use during operation
[PATCH 9/9] [TULIP] Fix section mismatch in de2104x.c
From: Helge Deller [EMAIL PROTECTED] WARNING: drivers/net/tulip/de2104x.o - Section mismatch: reference to .init.text:de_init_one from .data.rel.local after 'de_driver' (at offset 0x20) WARNING: drivers/net/tulip/de2104x.o - Section mismatch: reference to .exit.text:de_remove_one from .data.rel.local after 'de_driver' (at offset 0x28) Signed-off-by: Helge Deller [EMAIL PROTECTED] Signed-off-by: Kyle McMartin [EMAIL PROTECTED] --- drivers/net/tulip/de2104x.c |6 +++--- 1 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/tulip/de2104x.c b/drivers/net/tulip/de2104x.c index d05c5aa..150a05a 100644 --- a/drivers/net/tulip/de2104x.c +++ b/drivers/net/tulip/de2104x.c @@ -1730,7 +1730,7 @@ static void __init de21040_get_media_inf } /* Note: this routine returns extra data bits for size detection. */ -static unsigned __init tulip_read_eeprom(void __iomem *regs, int location, int addr_len) +static unsigned __devinit tulip_read_eeprom(void __iomem *regs, int location, int addr_len) { int i; unsigned retval = 0; @@ -1926,7 +1926,7 @@ bad_srom: goto fill_defaults; } -static int __init de_init_one (struct pci_dev *pdev, +static int __devinit de_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) { struct net_device *dev; @@ -2082,7 +2082,7 @@ err_out_free: return rc; } -static void __exit de_remove_one (struct pci_dev *pdev) +static void __devexit de_remove_one (struct pci_dev *pdev) { struct net_device *dev = pci_get_drvdata(pdev); struct de_private *de = dev-priv; -- 1.4.1.1 - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 3/9] [TULIP] Make tulip_stop_rxtx() failure case slightly more informative
From: Grant Grundler [EMAIL PROTECTED] Signed-off-by: Grant Grundler [EMAIL PROTECTED] Signed-off-by: Kyle McMartin [EMAIL PROTECTED] --- drivers/net/tulip/tulip.h |7 +-- 1 files changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/tulip/tulip.h b/drivers/net/tulip/tulip.h index 3bcfbf3..d79c7ae 100644 --- a/drivers/net/tulip/tulip.h +++ b/drivers/net/tulip/tulip.h @@ -473,8 +473,11 @@ static inline void tulip_stop_rxtx(struc udelay(10); if (!i) - printk(KERN_DEBUG %s: tulip_stop_rxtx() failed\n, - pci_name(tp-pdev)); + printk(KERN_DEBUG %s: tulip_stop_rxtx() failed +(CSR5 0x%x CSR6 0x%x)\n, + pci_name(tp-pdev), + ioread32(ioaddr + CSR5), + ioread32(ioaddr + CSR6)); } } -- 1.4.1.1 - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 5/9] [TULIP] Flush MMIO writes in reset sequence
From: Grant Grundler [EMAIL PROTECTED] The obvious safe registers to read is one from PCI config space. Signed-off-by: Grant Grundler [EMAIL PROTECTED] Signed-off-by: Kyle McMartin [EMAIL PROTECTED] --- drivers/net/tulip/tulip_core.c |2 ++ 1 files changed, 2 insertions(+), 0 deletions(-) diff --git a/drivers/net/tulip/tulip_core.c b/drivers/net/tulip/tulip_core.c index 6b54572..81905f4 100644 --- a/drivers/net/tulip/tulip_core.c +++ b/drivers/net/tulip/tulip_core.c @@ -295,12 +295,14 @@ static void tulip_up(struct net_device * /* Reset the chip, holding bit 0 set at least 50 PCI cycles. */ iowrite32(0x0001, ioaddr + CSR0); + pci_read_config_dword(tp-pdev, PCI_COMMAND, i); /* flush write */ udelay(100); /* Deassert reset. Wait the specified 50 PCI cycles after a reset by initializing Tx and Rx queues and the address filter list. */ iowrite32(tp-csr0, ioaddr + CSR0); + pci_read_config_dword(tp-pdev, PCI_COMMAND, i); /* flush write */ udelay(100); if (tulip_debug 1) -- 1.4.1.1 - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 6/9] [TULIP] Fix IRQ/DMA race
From: Grant Grundler [EMAIL PROTECTED] IRQs are racing with tulip_down(). DMA can be restarted by tulip_interrupt() _after_ we call tulip_stop_rxtx() and the DMA buffers are unmapped. The result is an MCA (hard crash on ia64) because of an IO TLB miss. Signed-off-by: Grant Grundler [EMAIL PROTECTED] Signed-off-by: Kyle McMartin [EMAIL PROTECTED] --- drivers/net/tulip/interrupt.c |4 drivers/net/tulip/tulip_core.c | 17 +++-- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/drivers/net/tulip/interrupt.c b/drivers/net/tulip/interrupt.c index 99ccf2e..19faa0e 100644 --- a/drivers/net/tulip/interrupt.c +++ b/drivers/net/tulip/interrupt.c @@ -87,6 +87,10 @@ int tulip_refill_rx(struct net_device *d } tp-rx_ring[entry].status = cpu_to_le32(DescOwned); } + +/* FIXME: restarting DMA breaks tulip_down() code path. + tulip_down() will unmap the RX and TX descriptors. + */ if(tp-chip_id == LC82C168) { if(((ioread32(tp-base_addr + CSR5)17)0x07) == 4) { /* Rx stopped due to out of buffers, diff --git a/drivers/net/tulip/tulip_core.c b/drivers/net/tulip/tulip_core.c index 81905f4..363e5f6 100644 --- a/drivers/net/tulip/tulip_core.c +++ b/drivers/net/tulip/tulip_core.c @@ -742,21 +742,20 @@ #endif /* Disable interrupts by clearing the interrupt mask. */ iowrite32 (0x, ioaddr + CSR7); + ioread32 (ioaddr + CSR7); /* flush posted write */ - /* Stop the Tx and Rx processes. */ - tulip_stop_rxtx(tp); + spin_unlock_irqrestore (tp-lock, flags); - /* prepare receive buffers */ - tulip_refill_rx(dev); + free_irq (dev-irq, dev); /* no more races after this */ + tulip_stop_rxtx(tp);/* Stop DMA */ - /* release any unconsumed transmit buffers */ - tulip_clean_tx_ring(tp); + /* Put driver back into the state we start with */ + tulip_refill_rx(dev); /* prepare RX buffers */ + tulip_clean_tx_ring(tp);/* clean up unsent TX buffers */ if (ioread32 (ioaddr + CSR6) != 0x) tp-stats.rx_missed_errors += ioread32 (ioaddr + CSR8) 0x; - spin_unlock_irqrestore (tp-lock, flags); - init_timer(tp-timer); tp-timer.data = (unsigned long)dev; tp-timer.function = tulip_tbl[tp-chip_id].media_timer; @@ -782,7 +781,6 @@ static int tulip_close (struct net_devic printk (KERN_DEBUG %s: Shutting down ethercard, status was %2.2x.\n, dev-name, ioread32 (ioaddr + CSR5)); - free_irq (dev-irq, dev); /* Free all the skbuffs in the Rx queue. */ for (i = 0; i RX_RING_SIZE; i++) { @@ -1752,7 +1750,6 @@ static int tulip_suspend (struct pci_dev tulip_down(dev); netif_device_detach(dev); - free_irq(dev-irq, dev); pci_save_state(pdev); pci_disable_device(pdev); -- 1.4.1.1 - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 8/9] [TULIP] Make DS21143 printout match lspci output
From: Thibaut Varene [EMAIL PROTECTED] Signed-off-by: Thibaut Varene [EMAIL PROTECTED] Signed-off-by: Kyle McMartin [EMAIL PROTECTED] --- drivers/net/tulip/tulip_core.c |2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/drivers/net/tulip/tulip_core.c b/drivers/net/tulip/tulip_core.c index bdb6698..21eaeb2 100644 --- a/drivers/net/tulip/tulip_core.c +++ b/drivers/net/tulip/tulip_core.c @@ -155,7 +155,7 @@ struct tulip_chip_table tulip_tbl[] = { tulip_media_task }, /* DC21142, DC21143 */ - { Digital DS21143 Tulip, 128, 0x0801fbff, + { Digital DS21142/43 Tulip, 128, 0x0801fbff, HAS_MII | HAS_MEDIA_TABLE | ALWAYS_CHECK_MII | HAS_ACPI | HAS_NWAY | HAS_INTR_MITIGATION | HAS_PCI_MWI, tulip_timer, t21142_media_task }, -- 1.4.1.1 - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 4/9] [TULIP] Clean tulip.h so it can be used by winbond-840.c
From: Grant Grundler [EMAIL PROTECTED] Include tulip.h in winbond-840.c and clean up lots of redundant definitions. Signed-off-by: Grant Grundler [EMAIL PROTECTED] Signed-off-by: Kyle McMartin [EMAIL PROTECTED] --- drivers/net/tulip/tulip.h | 17 ++ drivers/net/tulip/tulip_core.c |7 +--- drivers/net/tulip/winbond-840.c | 68 ++- 3 files changed, 37 insertions(+), 55 deletions(-) diff --git a/drivers/net/tulip/tulip.h b/drivers/net/tulip/tulip.h index d79c7ae..951af5e 100644 --- a/drivers/net/tulip/tulip.h +++ b/drivers/net/tulip/tulip.h @@ -30,11 +30,10 @@ #include asm/irq.h /* undefine, or define to various debugging levels (4 == obscene levels) */ #define TULIP_DEBUG 1 -/* undefine USE_IO_OPS for MMIO, define for PIO */ #ifdef CONFIG_TULIP_MMIO -# undef USE_IO_OPS +#define TULIP_BAR 1 /* CBMA */ #else -# define USE_IO_OPS 1 +#define TULIP_BAR 0 /* CBIO */ #endif @@ -142,6 +141,7 @@ enum status_bits { RxNoBuf = 0x80, RxIntr = 0x40, TxFIFOUnderflow = 0x20, + RxErrIntr = 0x10, TxJabber = 0x08, TxNoBuf = 0x04, TxDied = 0x02, @@ -192,9 +192,14 @@ struct tulip_tx_desc { enum desc_status_bits { - DescOwned = 0x8000, - RxDescFatalErr = 0x8000, - RxWholePkt = 0x0300, + DescOwned= 0x8000, + DescWholePkt = 0x6000, + DescEndPkt = 0x4000, + DescStartPkt = 0x2000, + DescEndRing = 0x0200, + DescUseLink = 0x0100, + RxDescFatalErr = 0x008000, + RxWholePkt = 0x0300, }; diff --git a/drivers/net/tulip/tulip_core.c b/drivers/net/tulip/tulip_core.c index bf93679..6b54572 100644 --- a/drivers/net/tulip/tulip_core.c +++ b/drivers/net/tulip/tulip_core.c @@ -1361,11 +1361,8 @@ #endif if (pci_request_regions (pdev, tulip)) goto err_out_free_netdev; -#ifndef USE_IO_OPS - ioaddr = pci_iomap(pdev, 1, tulip_tbl[chip_idx].io_size); -#else - ioaddr = pci_iomap(pdev, 0, tulip_tbl[chip_idx].io_size); -#endif + ioaddr = pci_iomap(pdev, TULIP_BAR, tulip_tbl[chip_idx].io_size); + if (!ioaddr) goto err_out_free_res; diff --git a/drivers/net/tulip/winbond-840.c b/drivers/net/tulip/winbond-840.c index 7f41481..fa3a7b3 100644 --- a/drivers/net/tulip/winbond-840.c +++ b/drivers/net/tulip/winbond-840.c @@ -90,10 +90,8 @@ static int full_duplex[MAX_UNITS] = {-1, Making the Tx ring too large decreases the effectiveness of channel bonding and packet priority. There are no ill effects from too-large receive rings. */ -#define TX_RING_SIZE 16 #define TX_QUEUE_LEN 10 /* Limit ring entries actually used. */ #define TX_QUEUE_LEN_RESTART 5 -#define RX_RING_SIZE 32 #define TX_BUFLIMIT(1024-128) @@ -137,6 +135,8 @@ #include asm/processor.h /* Processor #include asm/io.h #include asm/irq.h +#include tulip.h + /* These identify the driver base version and may not be removed. */ static char version[] __devinitdata = KERN_INFO DRV_NAME .c:v DRV_VERSION (2.4 port) DRV_RELDATE Donald Becker [EMAIL PROTECTED]\n @@ -242,8 +242,8 @@ static const struct pci_id_info pci_id_t }; /* This driver was written to use PCI memory space, however some x86 systems - work only with I/O space accesses. Pass -DUSE_IO_OPS to use PCI I/O space - accesses instead of memory space. */ + work only with I/O space accesses. See CONFIG_TULIP_MMIO in .config +*/ /* Offsets to the Command and Status Registers, CSRs. While similar to the Tulip, these registers are longword aligned. @@ -261,21 +261,11 @@ enum w840_offsets { CurTxDescAddr=0x4C, CurTxBufAddr=0x50, }; -/* Bits in the interrupt status/enable registers. */ -/* The bits in the Intr Status/Enable registers, mostly interrupt sources. */ -enum intr_status_bits { - NormalIntr=0x1, AbnormalIntr=0x8000, - IntrPCIErr=0x2000, TimerInt=0x800, - IntrRxDied=0x100, RxNoBuf=0x80, IntrRxDone=0x40, - TxFIFOUnderflow=0x20, RxErrIntr=0x10, - TxIdle=0x04, IntrTxStopped=0x02, IntrTxDone=0x01, -}; - /* Bits in the NetworkConfig register. */ enum rx_mode_bits { - AcceptErr=0x80, AcceptRunt=0x40, - AcceptBroadcast=0x20, AcceptMulticast=0x10, - AcceptAllPhys=0x08, AcceptMyPhys=0x02, + AcceptErr=0x80, + RxAcceptBroadcast=0x20, AcceptMulticast=0x10, + RxAcceptAllPhys=0x08, AcceptMyPhys=0x02, }; enum mii_reg_bits { @@ -297,13 +287,6 @@ struct w840_tx_desc { u32 buffer1, buffer2; }; -/* Bits in network_desc.status */ -enum desc_status_bits { - DescOwn=0x8000, DescEndRing=0x0200, DescUseLink=0x0100, - DescWholePkt=0x6000, DescStartPkt=0x2000, DescEndPkt=0x4000, - DescIntr=0x8000, -}; - #define MII_CNT1 /* winbond only supports one MII */ struct netdev_private { struct
Re: [PATCH] [e1000]: Remove unnecessary tx_lock
On Mon, 7 Aug 2006, jamal wrote: -#define E1000_TX_WEIGHT 64 - /* weight of a sort for tx, to avoid endless transmit cleanup */ - if (count++ == E1000_TX_WEIGHT) break; + /* avoid endless transmit cleanup */ + if (count++ == tx_ring-prunet) break; As you can see E1000_TX_WEIGHT threshold exists today and you are right if no TX interupts, packet arrivals or scheduled wakes happen the that descriptor that was not pruned will sit there forever (which is a bad thing for TCP). Are we in sync? If yes, what is the likelihood they will sit there forever? I think perhaps some TX interupts will happen, no? we don't enable it right now, but you could use the TXQE (tx queue empty) interrupt to avoid the starvation case. I think it might flood you with TXQE interrupts however, so we'd probably have to figure out some way to turn it on occasionally. Jesse - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 2/9] [TULIP] Print physical address in tulip_init_one
From: Grant Grundler [EMAIL PROTECTED] As the cookie returned by pci_iomap() is fairly useless... Signed-off-by: Grant Grundler [EMAIL PROTECTED] Signed-off-by: Kyle McMartin [EMAIL PROTECTED] --- drivers/net/tulip/tulip_core.c | 10 -- 1 files changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/tulip/tulip_core.c b/drivers/net/tulip/tulip_core.c index 7351831..bf93679 100644 --- a/drivers/net/tulip/tulip_core.c +++ b/drivers/net/tulip/tulip_core.c @@ -1644,8 +1644,14 @@ #endif if (register_netdev(dev)) goto err_out_free_ring; - printk(KERN_INFO %s: %s rev %d at %p,, - dev-name, chip_name, chip_rev, ioaddr); + printk(KERN_INFO %s: %s rev %d at +#ifdef CONFIG_TULIP_MMIO + MMIO +#else + Port +#endif +0x%lx,, dev-name, chip_name, chip_rev, + pci_resource_start(pdev, TULIP_BAR)); pci_set_drvdata(pdev, dev); if (eeprom_missing) -- 1.4.1.1 - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: forcedeth gigabit detection
Frank v Waveren [EMAIL PROTECTED] writes: The nforce2 builtin network on my A7N8X-delux motherboard won't detect as gigabit-capable using the forcedeth driver. Asustek doesn't seem to indicate it has gigabit ports, are you sure your mb does have them? Perhaps it's a different version, something like A7N8X-E? -- Krzysztof Halasa - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] Replace pci_module_init with pci_register_driver in drivers/net
From: Pavel Roskin [EMAIL PROTECTED] --- drivers/net/3c59x.c |2 +- drivers/net/8139cp.c |2 +- drivers/net/8139too.c |2 +- drivers/net/acenic.c |2 +- drivers/net/amd8111e.c|2 +- drivers/net/arcnet/com20020-pci.c |2 +- drivers/net/b44.c |2 +- drivers/net/bnx2.c|2 +- drivers/net/cassini.c |2 +- drivers/net/chelsio/cxgb2.c |2 +- drivers/net/defxx.c |2 +- drivers/net/dl2k.c|2 +- drivers/net/e100.c|2 +- drivers/net/e1000/e1000_main.c|2 +- drivers/net/eepro100.c|2 +- drivers/net/epic100.c |2 +- drivers/net/fealnx.c |2 +- drivers/net/forcedeth.c |2 +- drivers/net/hp100.c |2 +- drivers/net/ixgb/ixgb_main.c |2 +- drivers/net/natsemi.c |2 +- drivers/net/ne2k-pci.c|2 +- drivers/net/ns83820.c |2 +- drivers/net/pci-skeleton.c|2 +- drivers/net/pcnet32.c |2 +- drivers/net/r8169.c |2 +- drivers/net/rrunner.c |2 +- drivers/net/s2io.c|2 +- drivers/net/saa9730.c |2 +- drivers/net/sis190.c |2 +- drivers/net/sis900.c |2 +- drivers/net/sk98lin/skge.c|2 +- drivers/net/skfp/skfddi.c |2 +- drivers/net/skge.c|2 +- drivers/net/starfire.c|2 +- drivers/net/sundance.c|2 +- drivers/net/sungem.c |2 +- drivers/net/tc35815.c |2 +- drivers/net/tg3.c |2 +- drivers/net/tokenring/3c359.c |2 +- drivers/net/tokenring/lanstreamer.c |2 +- drivers/net/tokenring/olympic.c |2 +- drivers/net/tulip/de2104x.c |2 +- drivers/net/tulip/de4x5.c |2 +- drivers/net/tulip/dmfe.c |2 +- drivers/net/tulip/tulip_core.c|2 +- drivers/net/tulip/uli526x.c |2 +- drivers/net/tulip/winbond-840.c |2 +- drivers/net/tulip/xircom_tulip_cb.c |2 +- drivers/net/typhoon.c |2 +- drivers/net/via-rhine.c |2 +- drivers/net/via-velocity.c|2 +- drivers/net/wan/dscc4.c |2 +- drivers/net/wan/farsync.c |2 +- drivers/net/wan/lmc/lmc_main.c|2 +- drivers/net/wan/pc300_drv.c |2 +- drivers/net/wan/pci200syn.c |2 +- drivers/net/wan/wanxl.c |2 +- drivers/net/wireless/atmel_pci.c |2 +- drivers/net/wireless/ipw2100.c|2 +- drivers/net/wireless/ipw2200.c|2 +- drivers/net/wireless/orinoco_nortel.c |2 +- drivers/net/wireless/orinoco_pci.c|2 +- drivers/net/wireless/orinoco_plx.c|2 +- drivers/net/wireless/orinoco_tmd.c|2 +- drivers/net/wireless/prism54/islpci_hotplug.c |2 +- drivers/net/yellowfin.c |2 +- 67 files changed, 67 insertions(+), 67 deletions(-) diff --git a/drivers/net/3c59x.c b/drivers/net/3c59x.c index 80e8ca0..7c23813 100644 --- a/drivers/net/3c59x.c +++ b/drivers/net/3c59x.c @@ -3169,7 +3169,7 @@ static int __init vortex_init(void) { int pci_rc, eisa_rc; - pci_rc = pci_module_init(vortex_driver); + pci_rc = pci_register_driver(vortex_driver); eisa_rc = vortex_eisa_init(); if (pci_rc == 0) diff --git a/drivers/net/8139cp.c b/drivers/net/8139cp.c index 1428bb7..7061a23 100644 --- a/drivers/net/8139cp.c +++ b/drivers/net/8139cp.c @@ -2098,7 +2098,7 @@ static int __init cp_init (void) #ifdef MODULE printk(%s, version); #endif - return pci_module_init (cp_driver); + return pci_register_driver (cp_driver); } static void __exit cp_exit (void) diff --git a/drivers/net/8139too.c b/drivers/net/8139too.c index e4f4eaf..0b58725 100644 --- a/drivers/net/8139too.c +++ b/drivers/net/8139too.c @@ -2629,7 +2629,7 @@ #ifdef MODULE printk (KERN_INFO RTL8139_DRIVER_NAME \n); #endif - return pci_module_init (rtl8139_pci_driver); +
[RFC: -mm patch] bcm43xx_main.c: remove 3 functions
This patch removes three no longer used functions (that are even generating gcc warnings). This patch doesn't look right, but it is the result of 58e5528ee464d38040b9489e10033c9387a10d56 in git-netdev... Signed-off-by: Adrian Bunk [EMAIL PROTECTED] --- drivers/net/wireless/bcm43xx/bcm43xx_main.c | 33 1 file changed, 33 deletions(-) --- linux-2.6.18-rc3-mm2-full/drivers/net/wireless/bcm43xx/bcm43xx_main.c.old 2006-08-07 18:21:31.0 +0200 +++ linux-2.6.18-rc3-mm2-full/drivers/net/wireless/bcm43xx/bcm43xx_main.c 2006-08-07 18:23:36.0 +0200 @@ -3194,39 +3194,6 @@ bcm43xx_clear_keys(bcm); } -static int bcm43xx_rng_read(struct hwrng *rng, u32 *data) -{ - struct bcm43xx_private *bcm = (struct bcm43xx_private *)rng-priv; - unsigned long flags; - - spin_lock_irqsave((bcm)-irq_lock, flags); - *data = bcm43xx_read16(bcm, BCM43xx_MMIO_RNG); - spin_unlock_irqrestore((bcm)-irq_lock, flags); - - return (sizeof(u16)); -} - -static void bcm43xx_rng_exit(struct bcm43xx_private *bcm) -{ - hwrng_unregister(bcm-rng); -} - -static int bcm43xx_rng_init(struct bcm43xx_private *bcm) -{ - int err; - - snprintf(bcm-rng_name, ARRAY_SIZE(bcm-rng_name), -%s_%s, KBUILD_MODNAME, bcm-net_dev-name); - bcm-rng.name = bcm-rng_name; - bcm-rng.data_read = bcm43xx_rng_read; - bcm-rng.priv = (unsigned long)bcm; - err = hwrng_register(bcm-rng); - if (err) - printk(KERN_ERR PFX RNG init failed (%d)\n, err); - - return err; -} - static int bcm43xx_shutdown_all_wireless_cores(struct bcm43xx_private *bcm) { int ret = 0; - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: forcedeth gigabit detection
You know, I suddenly feel very foolish. Sorry for having wasted everyone's time, and thanks for your keen eye. On Tue, Aug 08, 2006 at 12:19:24AM +0200, Krzysztof Halasa wrote: Frank v Waveren [EMAIL PROTECTED] writes: The nforce2 builtin network on my A7N8X-delux motherboard won't detect as gigabit-capable using the forcedeth driver. Asustek doesn't seem to indicate it has gigabit ports, are you sure your mb does have them? Perhaps it's a different version, something like A7N8X-E? -- Krzysztof Halasa -- Frank v Waveren Key fingerprint: BDD7 D61E [EMAIL PROTECTED] 5D39 CF05 4BFC F57A Public key: hkp://wwwkeys.pgp.net/468D62C8 FA00 7D51 468D 62C8 signature.asc Description: Digital signature
Re: [PATCH] fix alloc_skb comment typo
From: Christoph Hellwig [EMAIL PROTECTED] Date: Sat, 5 Aug 2006 14:59:06 +0200 Signed-off-by: Christoph Hellwig [EMAIL PROTECTED] Applied, thanks Christoph. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
tg3: tg3_stop_block timed out
Hi, I have seen a few reports like this, but now broadcom seems to actively support tg3, so I decided to send this. ... [many hamilton not responding messages] 4554928.798000] nfs: server hamilton not responding, still trying [4554935.319000] nfs: server hamilton not responding, still trying [4555468.94] NETDEV WATCHDOG: eth1: transmit timed out [4555468.94] tg3: eth1: transmit timed out, resetting [4555469.044000] tg3: tg3_stop_block timed out, ofs=3400 enable_bit=2 [4555469.147000] tg3: tg3_stop_block timed out, ofs=2400 enable_bit=2 [4555469.251000] tg3: tg3_stop_block timed out, ofs=1400 enable_bit=2 [4555469.354000] tg3: tg3_stop_block timed out, ofs=c00 enable_bit=2 [4555469.433000] tg3: eth1: Link is down. [4555472.593000] tg3: eth1: Link is up at 1000 Mbps, full duplex. [4555472.594000] tg3: eth1: Flow control is on for TX and on for RX. [4555498.016000] nfs: server 129.206.21.200 OK [4555648.015000] nfs: server 129.206.21.200 OK ... [many ok messages] It seems to be the first time that something like this happend, at least I don't find anything in the previous logs. This is with 2.6.16, would it be worth to try a more recent tg3 driver (e.g. from broadcom (3.58) or backported from 2.6.17 (3.59))? Thanks, Bernd - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH RESEND 1/2] in-kernel sockets API
Dave, Could you consider this for inclusion into 2.6.19 tree? Thanks Sridhar This patch implements wrapper functions that provide a convenient way to access the sockets API for in-kernel users like sunrpc, cifs ocfs2 etc and any future users. Signed-off-by: Sridhar Samudrala [EMAIL PROTECTED] Acked-by: James Morris [EMAIL PROTECTED] --- include/linux/net.h | 19 + net/socket.c| 113 +++ 2 files changed, 132 insertions(+), 0 deletions(-) diff --git a/include/linux/net.h b/include/linux/net.h index b20c53c..19da2c0 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -208,6 +208,25 @@ extern int kernel_recvmsg(struct struct kvec *vec, size_t num, size_t len, int flags); +extern int kernel_bind(struct socket *sock, struct sockaddr *addr, + int addrlen); +extern int kernel_listen(struct socket *sock, int backlog); +extern int kernel_accept(struct socket *sock, struct socket **newsock, +int flags); +extern int kernel_connect(struct socket *sock, struct sockaddr *addr, + int addrlen, int flags); +extern int kernel_getsockname(struct socket *sock, struct sockaddr *addr, + int *addrlen); +extern int kernel_getpeername(struct socket *sock, struct sockaddr *addr, + int *addrlen); +extern int kernel_getsockopt(struct socket *sock, int level, int optname, +char *optval, int *optlen); +extern int kernel_setsockopt(struct socket *sock, int level, int optname, +char *optval, int optlen); +extern int kernel_sendpage(struct socket *sock, struct page *page, int offset, + size_t size, int flags); +extern int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg); + #ifndef CONFIG_SMP #define SOCKOPS_WRAPPED(name) name #define SOCKOPS_WRAP(name, fam) diff --git a/net/socket.c b/net/socket.c index b4848ce..0c9d01d 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2160,6 +2160,109 @@ static long compat_sock_ioctl(struct fil } #endif +int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen) +{ + return sock-ops-bind(sock, addr, addrlen); +} + +int kernel_listen(struct socket *sock, int backlog) +{ + return sock-ops-listen(sock, backlog); +} + +int kernel_accept(struct socket *sock, struct socket **newsock, int flags) +{ + struct sock *sk = sock-sk; + int err; + + err = sock_create_lite(sk-sk_family, sk-sk_type, sk-sk_protocol, + newsock); + if (err 0) + goto done; + + err = sock-ops-accept(sock, *newsock, flags); + if (err 0) { + sock_release(*newsock); + goto done; + } + + (*newsock)-ops = sock-ops; + +done: + return err; +} + +int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen, + int flags) +{ + return sock-ops-connect(sock, addr, addrlen, flags); +} + +int kernel_getsockname(struct socket *sock, struct sockaddr *addr, +int *addrlen) +{ + return sock-ops-getname(sock, addr, addrlen, 0); +} + +int kernel_getpeername(struct socket *sock, struct sockaddr *addr, +int *addrlen) +{ + return sock-ops-getname(sock, addr, addrlen, 1); +} + +int kernel_getsockopt(struct socket *sock, int level, int optname, + char *optval, int *optlen) +{ + mm_segment_t oldfs = get_fs(); + int err; + + set_fs(KERNEL_DS); + if (level == SOL_SOCKET) + err = sock_getsockopt(sock, level, optname, optval, optlen); + else + err = sock-ops-getsockopt(sock, level, optname, optval, + optlen); + set_fs(oldfs); + return err; +} + +int kernel_setsockopt(struct socket *sock, int level, int optname, + char *optval, int optlen) +{ + mm_segment_t oldfs = get_fs(); + int err; + + set_fs(KERNEL_DS); + if (level == SOL_SOCKET) + err = sock_setsockopt(sock, level, optname, optval, optlen); + else + err = sock-ops-setsockopt(sock, level, optname, optval, + optlen); + set_fs(oldfs); + return err; +} + +int kernel_sendpage(struct socket *sock, struct page *page, int offset, + size_t size, int flags) +{ + if (sock-ops-sendpage) + return sock-ops-sendpage(sock, page, offset, size, flags); + + return sock_no_sendpage(sock, page, offset, size, flags); +} + +int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg) +{ + mm_segment_t oldfs = get_fs(); + int err; + + set_fs(KERNEL_DS); + err =
[PATCH RESEND 2/2] update sunrpc to use in-kernel sockets API
Update sunrpc to use in-kernel sockets API. Signed-off-by: Sridhar Samudrala [EMAIL PROTECTED] Acked-by: James Morris [EMAIL PROTECTED] --- net/sunrpc/svcsock.c | 38 ++ net/sunrpc/xprtsock.c |8 2 files changed, 18 insertions(+), 28 deletions(-) diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index d9a9573..953aff8 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -388,7 +388,7 @@ svc_sendto(struct svc_rqst *rqstp, struc /* send head */ if (slen == xdr-head[0].iov_len) flags = 0; - len = sock-ops-sendpage(sock, rqstp-rq_respages[0], 0, xdr-head[0].iov_len, flags); + len = kernel_sendpage(sock, rqstp-rq_respages[0], 0, xdr-head[0].iov_len, flags); if (len != xdr-head[0].iov_len) goto out; slen -= xdr-head[0].iov_len; @@ -400,7 +400,7 @@ svc_sendto(struct svc_rqst *rqstp, struc while (pglen 0) { if (slen == size) flags = 0; - result = sock-ops-sendpage(sock, *ppage, base, size, flags); + result = kernel_sendpage(sock, *ppage, base, size, flags); if (result 0) len += result; if (result != size) @@ -413,7 +413,7 @@ svc_sendto(struct svc_rqst *rqstp, struc } /* send tail */ if (xdr-tail[0].iov_len) { - result = sock-ops-sendpage(sock, rqstp-rq_respages[rqstp-rq_restailpage], + result = kernel_sendpage(sock, rqstp-rq_respages[rqstp-rq_restailpage], ((unsigned long)xdr-tail[0].iov_base) (PAGE_SIZE-1), xdr-tail[0].iov_len, 0); @@ -434,13 +434,10 @@ out: static int svc_recv_available(struct svc_sock *svsk) { - mm_segment_toldfs; struct socket *sock = svsk-sk_sock; int avail, err; - oldfs = get_fs(); set_fs(KERNEL_DS); - err = sock-ops-ioctl(sock, TIOCINQ, (unsigned long) avail); - set_fs(oldfs); + err = kernel_sock_ioctl(sock, TIOCINQ, (unsigned long) avail); return (err = 0)? avail : err; } @@ -472,7 +469,7 @@ svc_recvfrom(struct svc_rqst *rqstp, str * at accept time. FIXME */ alen = sizeof(rqstp-rq_addr); - sock-ops-getname(sock, (struct sockaddr *)rqstp-rq_addr, alen, 1); + kernel_getpeername(sock, (struct sockaddr *)rqstp-rq_addr, alen); dprintk(svc: socket %p recvfrom(%p, %Zu) = %d\n, rqstp-rq_sock, iov[0].iov_base, iov[0].iov_len, len); @@ -758,7 +755,6 @@ svc_tcp_accept(struct svc_sock *svsk) struct svc_serv *serv = svsk-sk_server; struct socket *sock = svsk-sk_sock; struct socket *newsock; - const struct proto_ops *ops; struct svc_sock *newsvsk; int err, slen; @@ -766,29 +762,23 @@ svc_tcp_accept(struct svc_sock *svsk) if (!sock) return; - err = sock_create_lite(PF_INET, SOCK_STREAM, IPPROTO_TCP, newsock); - if (err) { + clear_bit(SK_CONN, svsk-sk_flags); + err = kernel_accept(sock, newsock, O_NONBLOCK); + if (err 0) { if (err == -ENOMEM) printk(KERN_WARNING %s: no more sockets!\n, serv-sv_name); - return; - } - - dprintk(svc: tcp_accept %p allocated\n, newsock); - newsock-ops = ops = sock-ops; - - clear_bit(SK_CONN, svsk-sk_flags); - if ((err = ops-accept(sock, newsock, O_NONBLOCK)) 0) { - if (err != -EAGAIN net_ratelimit()) + else if (err != -EAGAIN net_ratelimit()) printk(KERN_WARNING %s: accept failed (err %d)!\n, serv-sv_name, -err); - goto failed;/* aborted connection or whatever */ + return; } + set_bit(SK_CONN, svsk-sk_flags); svc_sock_enqueue(svsk); slen = sizeof(sin); - err = ops-getname(newsock, (struct sockaddr *) sin, slen, 1); + err = kernel_getpeername(newsock, (struct sockaddr *) sin, slen); if (err 0) { if (net_ratelimit()) printk(KERN_WARNING %s: peername failed (err %d)!\n, @@ -1406,14 +1396,14 @@ svc_create_socket(struct svc_serv *serv, if (sin != NULL) { if (type == SOCK_STREAM) sock-sk-sk_reuse = 1; /* allow address reuse */ - error = sock-ops-bind(sock, (struct sockaddr *) sin, + error = kernel_bind(sock, (struct sockaddr *) sin, sizeof(*sin)); if (error 0) goto bummer; } if (protocol == IPPROTO_TCP) { - if ((error = sock-ops-listen(sock, 64)) 0) +
Re: tg3: tg3_stop_block timed out
On Tue, 2006-08-08 at 00:43 +0200, Bernd Schubert wrote: Hi, I have seen a few reports like this, but now broadcom seems to actively support tg3, so I decided to send this. ... [many hamilton not responding messages] 4554928.798000] nfs: server hamilton not responding, still trying [4554935.319000] nfs: server hamilton not responding, still trying [4555468.94] NETDEV WATCHDOG: eth1: transmit timed out [4555468.94] tg3: eth1: transmit timed out, resetting [4555469.044000] tg3: tg3_stop_block timed out, ofs=3400 enable_bit=2 [4555469.147000] tg3: tg3_stop_block timed out, ofs=2400 enable_bit=2 [4555469.251000] tg3: tg3_stop_block timed out, ofs=1400 enable_bit=2 [4555469.354000] tg3: tg3_stop_block timed out, ofs=c00 enable_bit=2 [4555469.433000] tg3: eth1: Link is down. [4555472.593000] tg3: eth1: Link is up at 1000 Mbps, full duplex. [4555472.594000] tg3: eth1: Flow control is on for TX and on for RX. [4555498.016000] nfs: server 129.206.21.200 OK [4555648.015000] nfs: server 129.206.21.200 OK ... [many ok messages] I need to know what hardware you're using so please send me the tg3 probing output for eth1 when you load the driver. Do you have TSO enabled? - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] move skb-dev assignment into netdev_alloc_skb
From: Christoph Hellwig [EMAIL PROTECTED] Date: Sat, 5 Aug 2006 15:01:09 +0200 All caller of netdev_alloc_skb need to assign skb-dev shortly afterwards. Move it into common code. I also had to fixup a little bit of the surrounding control flow in e1000 - it was just too convoluted. Signed-off-by: Christoph Hellwig [EMAIL PROTECTED] Since the e1000 change is non-trivial I'm not going to bypass the driver author on it, sorry. What I did do was put the netdev_alloc_skb() change into my tree, and since I'm co-author of the tg3 driver I'll apply that bit too. The e1000 bit will need to go through the e1000 maintainers. Thanks. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Excess use of packed attribute
From: Stephen Hemminger [EMAIL PROTECTED] Date: Mon, 7 Aug 2006 13:34:23 -0700 Silly offenders: include/net/ipx.h include/net/ieee80211.h include/net/ip6_tunnel.h include/net/ndisc.h include/linux/if_ether.h include/linux/if_fddi.h include/linux/sctp.h -- really bad The ndisc.h one, for example, is needed for cases like ARM. The if_ether.h one is also needed, or else for: struct ethhdr *eth; eth + 1 would do the wrong thing as the compiler would align the structure to the native pointer size or similar. This is an issue because ethhdr is 14 bytes in size. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH RESEND 1/2] in-kernel sockets API
On Mon, 07 Aug 2006 16:00:29 -0700 Sridhar Samudrala [EMAIL PROTECTED] wrote: Dave, Could you consider this for inclusion into 2.6.19 tree? Thanks Sridhar This patch implements wrapper functions that provide a convenient way to access the sockets API for in-kernel users like sunrpc, cifs ocfs2 etc and any future users. Signed-off-by: Sridhar Samudrala [EMAIL PROTECTED] Acked-by: James Morris [EMAIL PROTECTED] --- include/linux/net.h | 19 + net/socket.c| 113 +++ 2 files changed, 132 insertions(+), 0 deletions(-) diff --git a/include/linux/net.h b/include/linux/net.h index b20c53c..19da2c0 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -208,6 +208,25 @@ extern intkernel_recvmsg(struct struct kvec *vec, size_t num, size_t len, int flags); +extern int kernel_bind(struct socket *sock, struct sockaddr *addr, +int addrlen); +extern int kernel_listen(struct socket *sock, int backlog); +extern int kernel_accept(struct socket *sock, struct socket **newsock, + int flags); +extern int kernel_connect(struct socket *sock, struct sockaddr *addr, + int addrlen, int flags); +extern int kernel_getsockname(struct socket *sock, struct sockaddr *addr, + int *addrlen); +extern int kernel_getpeername(struct socket *sock, struct sockaddr *addr, + int *addrlen); +extern int kernel_getsockopt(struct socket *sock, int level, int optname, + char *optval, int *optlen); +extern int kernel_setsockopt(struct socket *sock, int level, int optname, + char *optval, int optlen); +extern int kernel_sendpage(struct socket *sock, struct page *page, int offset, +size_t size, int flags); +extern int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg); + #ifndef CONFIG_SMP #define SOCKOPS_WRAPPED(name) name #define SOCKOPS_WRAP(name, fam) diff --git a/net/socket.c b/net/socket.c index b4848ce..0c9d01d 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2160,6 +2160,109 @@ static long compat_sock_ioctl(struct fil } #endif +int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen) +{ + return sock-ops-bind(sock, addr, addrlen); +} + +int kernel_listen(struct socket *sock, int backlog) +{ + return sock-ops-listen(sock, backlog); +} + +int kernel_accept(struct socket *sock, struct socket **newsock, int flags) +{ + struct sock *sk = sock-sk; + int err; + + err = sock_create_lite(sk-sk_family, sk-sk_type, sk-sk_protocol, +newsock); + if (err 0) + goto done; + + err = sock-ops-accept(sock, *newsock, flags); + if (err 0) { + sock_release(*newsock); + goto done; + } + + (*newsock)-ops = sock-ops; + +done: + return err; +} + +int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen, + int flags) +{ + return sock-ops-connect(sock, addr, addrlen, flags); +} + +int kernel_getsockname(struct socket *sock, struct sockaddr *addr, + int *addrlen) +{ + return sock-ops-getname(sock, addr, addrlen, 0); +} + +int kernel_getpeername(struct socket *sock, struct sockaddr *addr, + int *addrlen) +{ + return sock-ops-getname(sock, addr, addrlen, 1); +} + +int kernel_getsockopt(struct socket *sock, int level, int optname, + char *optval, int *optlen) +{ + mm_segment_t oldfs = get_fs(); + int err; + + set_fs(KERNEL_DS); + if (level == SOL_SOCKET) + err = sock_getsockopt(sock, level, optname, optval, optlen); + else + err = sock-ops-getsockopt(sock, level, optname, optval, + optlen); + set_fs(oldfs); + return err; +} + +int kernel_setsockopt(struct socket *sock, int level, int optname, + char *optval, int optlen) +{ + mm_segment_t oldfs = get_fs(); + int err; + + set_fs(KERNEL_DS); + if (level == SOL_SOCKET) + err = sock_setsockopt(sock, level, optname, optval, optlen); + else + err = sock-ops-setsockopt(sock, level, optname, optval, + optlen); + set_fs(oldfs); + return err; +} + +int kernel_sendpage(struct socket *sock, struct page *page, int offset, + size_t size, int flags) +{ + if (sock-ops-sendpage) + return sock-ops-sendpage(sock, page, offset, size, flags); + + return sock_no_sendpage(sock, page, offset, size, flags); +} + +int kernel_sock_ioctl(struct socket *sock, int cmd,
[PATCH wireless-dev 2/6] d80211: Fix PS-Poll frame dropping
Fixed PS-Poll processing for STAs that are not authenticated or associated: - 80211.ko dropped these frames even though it should have sent them to hostapd (this was broken by addition of IBSS support) Signed-off-by: Jouni Malinen [EMAIL PROTECTED] Index: wireless-dev/net/d80211/ieee80211.c === --- wireless-dev.orig/net/d80211/ieee80211.c +++ wireless-dev/net/d80211/ieee80211.c @@ -3074,8 +3074,9 @@ ieee80211_rx_h_check(struct ieee80211_tx rx-sdata-type != IEEE80211_IF_TYPE_IBSS (!rx-sta || !(rx-sta-flags WLAN_STA_ASSOC { if ((!(rx-fc IEEE80211_FCTL_FROMDS) -!(rx-fc IEEE80211_FCTL_TODS)) || - !rx-u.rx.ra_match) { +!(rx-fc IEEE80211_FCTL_TODS) +(rx-fc IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA) + || !rx-u.rx.ra_match) { /* Drop IBSS frames and frames for other hosts * silently. */ return TXRX_DROP; -- -- Jouni MalinenPGP id EFC895FA - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] [e1000]: Remove unnecessary tx_lock
On Mon, Aug 07, 2006 at 03:03:33PM -0400, jamal wrote: -#define E1000_TX_WEIGHT 64 - /* weight of a sort for tx, to avoid endless transmit cleanup */ - if (count++ == E1000_TX_WEIGHT) break; + /* avoid endless transmit cleanup */ + if (count++ == tx_ring-prunet) break; As you can see E1000_TX_WEIGHT threshold exists today and you are right if no TX interupts, packet arrivals or scheduled wakes happen the that descriptor that was not pruned will sit there forever (which is a bad thing for TCP). Are we in sync? If yes, what is the likelihood they will sit there forever? I think perhaps some TX interupts will happen, no? I thought this code is only used for NAPI so as long as work was done it'll keep calling this. One thing I'm not sure about though is the time between it decides that there is no work and the point where the interrupts are reenabled. What if work arrives in that time and no work ever arrives after the interrupts are turned on again? Does that mean the work will sit there forever? Cheers, -- Visit Openswan at http://www.openswan.org/ Email: Herbert Xu ~{PmVHI~} [EMAIL PROTECTED] Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] ipx: header length validation needed
From: Stephen Hemminger [EMAIL PROTECTED] Date: Mon, 7 Aug 2006 13:46:36 -0700 IPX is not checking for non-linear (and short packets) in it's receive routine. This is serious because it may mean it ends up reading past end of skb. This takes care of ipx_rcv() but the rest of the IPX protocol handling still has the problem, so you'll need to meticuliously follow the whole receive path and fix up all the spots that parse subsequent parts of the IPX packet to fix this properly. For example, take a look at ipxitf_pprop(). - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH wireless-dev 3/6] d80211: Fix PLCP header length comment
Fixed a typo in a comment: PLCP header length is in microseconds, not milliseconds. Signed-off-by: Jouni Malinen [EMAIL PROTECTED] Index: wireless-dev/net/d80211/ieee80211.c === --- wireless-dev.orig/net/d80211/ieee80211.c +++ wireless-dev/net/d80211/ieee80211.c @@ -637,7 +637,7 @@ static int ieee80211_frame_duration(stru * 802.11 (DS): 15.3.3, 802.11b: 18.3.4 * aSIFSTime = 10 usec * aPreambleLength = 144 usec or 72 usec with short preamble -* aPLCPHeaderLength = 48 ms or 24 ms with short preamble +* aPLCPHeaderLength = 48 usec or 24 usec with short preamble */ dur = 10; /* aSIFSTime = 10 usec */ dur += short_preamble ? (72 + 24) : (144 + 48); -- -- Jouni MalinenPGP id EFC895FA - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH wireless-dev 4/6] d80211: Send Layer 2 Update frames in kernel
Send Layer 2 Update frame from the 802.11 code in kernel to the netdev that the STA is bound to. If the STA is bound to another VLAN netdev, send another update frame. This fixes an issue in which a local bridge table was not updated when hostapd sent this frame. Signed-off-by: Jouni Malinen [EMAIL PROTECTED] Index: wireless-dev/net/d80211/ieee80211_ioctl.c === --- wireless-dev.orig/net/d80211/ieee80211_ioctl.c +++ wireless-dev/net/d80211/ieee80211_ioctl.c @@ -15,6 +15,7 @@ #include linux/types.h #include linux/slab.h #include linux/skbuff.h +#include linux/etherdevice.h #include linux/if_arp.h #include linux/wireless.h #include net/iw_handler.h @@ -215,6 +216,52 @@ static int ieee80211_ioctl_flush(struct } +/* Layer 2 Update frame (802.2 Type 1 LLC XID Update response) */ +struct iapp_layer2_update { + u8 da[ETH_ALEN]; /* broadcast */ + u8 sa[ETH_ALEN]; /* STA addr */ + u16 len; /* 6 */ + u8 dsap; /* 0 */ + u8 ssap; /* 0 */ + u8 control; + u8 xid_info[3]; +} __attribute__ ((packed)); + +static void ieee80211_send_layer2_update(struct net_device *dev, +const u8 *addr) +{ + struct iapp_layer2_update *msg; + struct sk_buff *skb; + + /* Send Level 2 Update Frame to update forwarding tables in layer 2 +* bridge devices */ + + skb = dev_alloc_skb(sizeof(*msg)); + if (skb == NULL) + return; + msg = (struct iapp_layer2_update *) skb_put(skb, sizeof(*msg)); + + /* 802.2 Type 1 Logical Link Control (LLC) Exchange Identifier (XID) +* Update response frame; IEEE Std 802.2-1998, 5.4.1.2.1 */ + + memset(msg-da, 0xff, ETH_ALEN); + memcpy(msg-sa, addr, ETH_ALEN); + msg-len = htons(6); + msg-dsap = 0; + msg-ssap = 0x01; /* NULL LSAP, CR Bit: Response */ + msg-control = 0xaf; /* XID response lsb.F101. + * F=0 (no poll command; unsolicited frame) */ + msg-xid_info[0] = 0x81; /* XID format identifier */ + msg-xid_info[1] = 1; /* LLC types/classes: Type 1 LLC */ + msg-xid_info[2] = 0; /* XID sender's receive window size (RW) */ + + skb-dev = dev; + skb-protocol = eth_type_trans(skb, dev); + memset(skb-cb, 0, sizeof(skb-cb)); + netif_rx(skb); +} + + static int ieee80211_ioctl_add_sta(struct net_device *dev, struct prism2_hostapd_param *param) { @@ -296,6 +343,10 @@ static int ieee80211_ioctl_add_sta(struc sta_info_put(sta); + if (sdata-type == IEEE80211_IF_TYPE_AP || + sdata-type == IEEE80211_IF_TYPE_VLAN) + ieee80211_send_layer2_update(dev, param-sta_addr); + return 0; } @@ -1168,6 +1219,10 @@ static int ieee80211_ioctl_set_sta_vlan( dev-name, MAC_ARG(param-sta_addr), new_vlan_dev-name); #endif + if (sta-dev != new_vlan_dev) { + ieee80211_send_layer2_update(new_vlan_dev, +sta-addr); + } sta-dev = new_vlan_dev; sta-vlan_id = param-u.set_sta_vlan.vlan_id; dev_put(new_vlan_dev); -- -- Jouni MalinenPGP id EFC895FA - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH wireless-dev 6/6] d80211: Fix TKIP replay protection
Fixed TKIP replay protection for the case where hwaccel is enabled. rx_initialized flag was not set in this case and the TSC validation was skipped for the frames. Signed-off-by: Jouni Malinen [EMAIL PROTECTED] Index: wireless-dev/net/d80211/tkip.c === --- wireless-dev.orig/net/d80211/tkip.c +++ wireless-dev/net/d80211/tkip.c @@ -286,6 +286,7 @@ int ieee80211_tkip_decrypt_data(struct c if (only_iv) { res = TKIP_DECRYPT_OK; + key-u.tkip.rx_initialized[queue] = 1; goto done; } -- -- Jouni MalinenPGP id EFC895FA - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: tg3: tg3_stop_block timed out
Hi Michael, thanks for your help! On Tuesday 08 August 2006 01:07, Michael Chan wrote: ... [many hamilton not responding messages] 4554928.798000] nfs: server hamilton not responding, still trying [4554935.319000] nfs: server hamilton not responding, still trying [4555468.94] NETDEV WATCHDOG: eth1: transmit timed out [4555468.94] tg3: eth1: transmit timed out, resetting [4555469.044000] tg3: tg3_stop_block timed out, ofs=3400 enable_bit=2 [4555469.147000] tg3: tg3_stop_block timed out, ofs=2400 enable_bit=2 [4555469.251000] tg3: tg3_stop_block timed out, ofs=1400 enable_bit=2 [4555469.354000] tg3: tg3_stop_block timed out, ofs=c00 enable_bit=2 [4555469.433000] tg3: eth1: Link is down. [4555472.593000] tg3: eth1: Link is up at 1000 Mbps, full duplex. [4555472.594000] tg3: eth1: Flow control is on for TX and on for RX. [4555498.016000] nfs: server 129.206.21.200 OK [4555648.015000] nfs: server 129.206.21.200 OK ... [many ok messages] I need to know what hardware you're using so please send me the tg3 probing output for eth1 when you load the driver. Do you have TSO enabled? tg3.c:v3.49 (Feb 2, 2006) acpi_bus-0201 [01] bus_set_power : Device is not power manageable eth1: Tigon3 [partno(BCM95704A6) rev 2003 PHY(5704)] (PCIX:100MHz:64-bit) 10/100/1000BaseT Ethernet 00:e0:81:2b:aa:28 eth1: RXcsums[1] LinkChgREG[0] MIirq[0] ASF[1] Split[0] WireSpeed[1] TSOcap[0] eth1: dma_rwctrl[769f4000] dma_mask[64-bit] eth2: Tigon3 [partno(BCM95704A6) rev 2003 PHY(5704)] (PCIX:100MHz:64-bit) 10/100/1000BaseT Ethernet 00:e0:81:2b:aa:29 eth2: RXcsums[1] LinkChgREG[0] MIirq[0] ASF[0] Split[0] WireSpeed[1] TSOcap[1] eth2: dma_rwctrl[769f4000] dma_mask[64-bit] The NIC is onboard a Tyan S2882. :02:09.0 Ethernet controller: Broadcom Corporation NetXtreme BCM5704 Gigabit Ethernet (rev 03) Subsystem: Broadcom Corporation: Unknown device 1644 Flags: bus master, 66MHz, medium devsel, latency 64, IRQ 24 Memory at fc8c (64-bit, non-prefetchable) [size=64K] Memory at fc8b (64-bit, non-prefetchable) [size=64K] Capabilities: [40] Capabilities: [48] Power Management version 2 Capabilities: [50] Vital Product Data Capabilities: [58] Message Signalled Interrupts: 64bit+ Queue=0/3 Enable- :02:09.1 Ethernet controller: Broadcom Corporation NetXtreme BCM5704 Gigabit Ethernet (rev 03) Subsystem: Broadcom Corporation: Unknown device 1644 Flags: bus master, 66MHz, medium devsel, latency 64, IRQ 25 Memory at fc8e (64-bit, non-prefetchable) [size=64K] Memory at fc8d (64-bit, non-prefetchable) [size=64K] Capabilities: [40] Capabilities: [48] Power Management version 2 Capabilities: [50] Vital Product Data Capabilities: [58] Message Signalled Interrupts: 64bit+ Queue=0/3 Enable- The driver is compiled into the kernel (its a nfs-root booted system and NIC modules are presently not supported by our initrd). So the default option for tso is set. Is there any way to determine the present tso setting? With ethtool I only find the options to turn it off/on, but none to query the current state. Thanks a lot, Bernd -- Bernd Schubert PCI / Theoretische Chemie Universität Heidelberg INF 229 69120 Heidelberg - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH wireless-dev 0/6] Set of small fixes to net/d80211
Here's a set of small fixes to net/d80211 from the Devicescape tree. Please consider applying. -- -- Jouni MalinenPGP id EFC895FA - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH wireless-dev 1/6] d80211: Fix RTS threshold use
Fixed dot11RTSThreshold use which was off-by-3: - must add FCS_LEN to the skb-len - frame length needs to be greater than threshold; not greater than or equal Signed-off-by: Jouni Malinen [EMAIL PROTECTED] Index: wireless-dev/net/d80211/ieee80211.c === --- wireless-dev.orig/net/d80211/ieee80211.c +++ wireless-dev/net/d80211/ieee80211.c @@ -762,7 +762,7 @@ ieee80211_tx_h_misc(struct ieee80211_txr struct ieee80211_tx_control *control = tx-u.tx.control; if (!is_multicast_ether_addr(hdr-addr1)) { - if (tx-skb-len = tx-local-rts_threshold + if (tx-skb-len + FCS_LEN tx-local-rts_threshold tx-local-rts_threshold IEEE80211_MAX_RTS_THRESHOLD) { control-use_rts_cts = 1; control-retry_limit = -- -- Jouni MalinenPGP id EFC895FA - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH wireless-dev 5/6] d80211: Fix ieee80211_remove_tx_extra() if key not configured
QoS header processing mangled unencrypted WMM frames on software retry. The QoS data needs to be removed even when encryption key is not configured. Signed-off-by: Jouni Malinen [EMAIL PROTECTED] Index: wireless-dev/net/d80211/ieee80211.c === --- wireless-dev.orig/net/d80211/ieee80211.c +++ wireless-dev/net/d80211/ieee80211.c @@ -3977,11 +3977,11 @@ static void ieee80211_remove_tx_extra(st pkt_data-requeue = control-requeue; pkt_data-queue = control-queue; - if (key == NULL) - return; - hdrlen = ieee80211_get_hdrlen_from_skb(skb); + if (key == NULL) + goto no_key; + switch (key-alg) { case ALG_WEP: iv_len = WEP_IV_LEN; @@ -3996,7 +3996,7 @@ static void ieee80211_remove_tx_extra(st mic_len = CCMP_MIC_LEN; break; default: - return; + goto no_key; } if (skb-len = mic_len key-force_sw_encrypt) @@ -4006,6 +4006,7 @@ static void ieee80211_remove_tx_extra(st skb_pull(skb, iv_len); } +no_key: { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb-data; u16 fc = le16_to_cpu(hdr-frame_control); -- -- Jouni MalinenPGP id EFC895FA - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] ipx: header length validation needed
This patch will linearize and check there is enough data. It handles the pprop case as well as avoiding a whole audit of the routing code. Signed-off-by: Stephen Hemminger [EMAIL PROTECTED] --- a/net/ipx/af_ipx.c 2006-08-07 13:45:59.0 -0700 +++ b/net/ipx/af_ipx.c 2006-08-07 16:34:00.0 -0700 @@ -1649,7 +1649,8 @@ ipx_pktsize = ntohs(ipx-ipx_pktsize); /* Too small or invalid header? */ - if (ipx_pktsize sizeof(struct ipxhdr) || ipx_pktsize skb-len) + if (ipx_pktsize sizeof(struct ipxhdr) + || !pskb_may_pull(skb, ipx_pktsize)) goto drop; if (ipx-ipx_checksum != IPX_NO_CHECKSUM - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] [e1000]: Remove unnecessary tx_lock
On Tue, 8 Aug 2006, Herbert Xu wrote: -#define E1000_TX_WEIGHT 64 - /* weight of a sort for tx, to avoid endless transmit cleanup */ - if (count++ == E1000_TX_WEIGHT) break; + /* avoid endless transmit cleanup */ + if (count++ == tx_ring-prunet) break; As you can see E1000_TX_WEIGHT threshold exists today and you are right if no TX interupts, packet arrivals or scheduled wakes happen the that descriptor that was not pruned will sit there forever (which is a bad thing for TCP). Are we in sync? If yes, what is the likelihood they will sit there forever? I think perhaps some TX interupts will happen, no? I thought this code is only used for NAPI so as long as work was done it'll keep calling this. yes, you're correct. One thing I'm not sure about though is the time between it decides that there is no work and the point where the interrupts are reenabled. e1000 only clears the interrupts when it reads ICR in e1000_intr (before scheduling napi poll) so any interrupts that occur while polling (and interrupts are disabled) will cause a new assertion once interrupts are re-enabled. Sometimes a little bit inefficient due to extra trips through poll, but guarantees never to miss an int. I'm open to creative ways to avoid this, but adding an I/O read in e1000_clean would be pretty yucky. What if work arrives in that time and no work ever arrives after the interrupts are turned on again? Does that mean the work will sit there forever? nope, see above. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] [e1000]: Remove unnecessary tx_lock
On Mon, Aug 07, 2006 at 04:35:36PM -0700, Brandeburg, Jesse wrote: e1000 only clears the interrupts when it reads ICR in e1000_intr (before scheduling napi poll) so any interrupts that occur while polling (and interrupts are disabled) will cause a new assertion once interrupts are re-enabled. Sometimes a little bit inefficient due to extra trips through poll, but guarantees never to miss an int. I'm open to creative ways to avoid this, but adding an I/O read in e1000_clean would be pretty yucky. The standard solution in Linux is to clear and recheck. So just before you reenable the interrupts you'd clear pending interrupts again and check for rx/tx work, if there is work then you just go back to polling. Cheers, -- Visit Openswan at http://www.openswan.org/ Email: Herbert Xu ~{PmVHI~} [EMAIL PROTECTED] Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: tg3: tg3_stop_block timed out
On Tue, 2006-08-08 at 01:24 +0200, Bernd Schubert wrote: tg3.c:v3.49 (Feb 2, 2006) acpi_bus-0201 [01] bus_set_power : Device is not power manageable eth1: Tigon3 [partno(BCM95704A6) rev 2003 PHY(5704)] (PCIX:100MHz:64-bit) 10/100/1000BaseT Ethernet 00:e0:81:2b:aa:28 eth1: RXcsums[1] LinkChgREG[0] MIirq[0] ASF[1] Split[0] WireSpeed[1] TSOcap[0] eth1: dma_rwctrl[769f4000] dma_mask[64-bit] eth2: Tigon3 [partno(BCM95704A6) rev 2003 PHY(5704)] (PCIX:100MHz:64-bit) 10/100/1000BaseT Ethernet 00:e0:81:2b:aa:29 eth2: RXcsums[1] LinkChgREG[0] MIirq[0] ASF[0] Split[0] WireSpeed[1] TSOcap[1] eth2: dma_rwctrl[769f4000] dma_mask[64-bit] You have ASF enabled on eth1 but not on eth2 so I wonder if ASF is causing the problem. Can you run the same traffic on eth2 and see if you get the same timeout problem? Thanks. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Excess use of packed attribute
On Mon, 2006-08-07 at 13:34 -0700, Stephen Hemminger wrote: After reading: http://bugzilla.kernel.org/show_bug.cgi?id=6693 I noticed there were stupid uses of packed attribute in several network headers. Silly offenders: include/net/ipx.h include/net/ieee80211.h include/net/ip6_tunnel.h include/net/ndisc.h include/linux/if_ether.h include/linux/if_fddi.h include/linux/sctp.h -- really bad All the structures in sctp.h that use packed atrribute define standard on-wire SCTP chunk/parameter formats. They need to be at the exact offsets as they go on wire. I think we saw some issues without the packed attribute on 64-bit archs and just to be safe we added packed to all the on-wire structures. Thanks Sridhar - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC] avoid unnecessary alignement overhead in skb-data allocation.
On 8/7/06, Herbert Xu [EMAIL PROTECTED] wrote: On Mon, Aug 07, 2006 at 11:31:03AM +0400, Evgeniy Polyakov wrote: Only if they form contiguous region? Jesse, is it possible for every e1000 chip to split frame into several page-sized chunks i.e. create some kind of receiving scatter-gather? now you get to the meat of the problem. Yes, all versions of e1000 can receive packets longer than the receive data area in the descriptor. If the data area is shorter than the packet, then the data over flows into the next descriptor. Actually, it was Chris Leech who raised this possibility: : Yes, e1000 devices will spill over and use multiple buffers for a : single frame. We've been trying to find a good way to use multiple : buffers to take care of these allocation problems. The structure of : the sk_buff does not make it easy. Or should I say that it's the : limitation that drivers are not allowed to chain together multiple : sk_buffs to represent a single frame that does not make it easy. Perhaps he can enlighten us. Or since i'm here... in any case we had driver code (see driver 6.2.15) that did this at one point, but we removed it because it was using frag_list So here is our problem with the network driver API. the only way to indicate multiple buffer (descriptor) receives is to use nr_frags. Our non split-header hardware needs power of 2 allocations *except* in the 1500 byte MTU case where we can optimize by having the hardware drop all frames 1522 bytes we would like to have a method to use alloc_skb to get packets from slab to receive into and then chain them together. Right now that is not possible because you can't map alloc_skb'd data areas directly to pages to put into nr_frags. much of this comes from the requirement that the stack free the skb we allocated. if we had an async callback for the driver to take care of freeing the skb then we could a) recycle b) handle pages in some efficient manner. also, eth_type_trans wants skb-data to point to header, which would require us to memcpy data from a page back to skb-data. We could use help to get this done and mutiple drivers would benefit. I can't get it done by myself, as much as I would like to. As for Evgeniy's suggestion of using the end of the e1000 receive buffer to store something I think it is a bad idea. Our hardware deals with powers of 2. From the e1000 manual: = LPE controls whether long packet reception is permitted. Hardware discards long packets if LPE is 0. A long packet is one longer than 1522 bytes. If LPE is 1, the maximum packet size that the device can receive is 16384 bytes. = - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 2/3] FS_ENET: use PAL for mii management
This patch should update the fs_enet infrastructure to utilize Phy Abstraction Layer subsystem. Along with the abouve, there are apparent bugfixes, rehaul and improvements. Signed-off-by: Vitaly Bordug [EMAIL PROTECTED] --- drivers/net/fs_enet/Makefile |6 drivers/net/fs_enet/fec.h | 42 +++ drivers/net/fs_enet/fs_enet-main.c | 207 +-- drivers/net/fs_enet/fs_enet-mii.c | 505 drivers/net/fs_enet/fs_enet.h | 40 ++- drivers/net/fs_enet/mac-fcc.c | 32 ++ drivers/net/fs_enet/mac-fec.c | 142 +- drivers/net/fs_enet/mac-scc.c |4 drivers/net/fs_enet/mii-bitbang.c | 448 drivers/net/fs_enet/mii-fec.c | 243 + drivers/net/fs_enet/mii-fixed.c| 91 -- 11 files changed, 711 insertions(+), 1049 deletions(-) diff --git a/drivers/net/fs_enet/Makefile b/drivers/net/fs_enet/Makefile index d6dd3f2..02d4dc1 100644 --- a/drivers/net/fs_enet/Makefile +++ b/drivers/net/fs_enet/Makefile @@ -4,7 +4,7 @@ # obj-$(CONFIG_FS_ENET) += fs_enet.o -obj-$(CONFIG_8xx) += mac-fec.o mac-scc.o -obj-$(CONFIG_8260) += mac-fcc.o +obj-$(CONFIG_8xx) += mac-fec.o mac-scc.o mii-fec.o +obj-$(CONFIG_CPM2) += mac-fcc.o mii-bitbang.o -fs_enet-objs := fs_enet-main.o fs_enet-mii.o mii-bitbang.o mii-fixed.o +fs_enet-objs := fs_enet-main.o diff --git a/drivers/net/fs_enet/fec.h b/drivers/net/fs_enet/fec.h new file mode 100644 index 000..e980527 --- /dev/null +++ b/drivers/net/fs_enet/fec.h @@ -0,0 +1,42 @@ +#ifndef FS_ENET_FEC_H +#define FS_ENET_FEC_H + +/* CRC polynomium used by the FEC for the multicast group filtering */ +#define FEC_CRC_POLY 0x04C11DB7 + +#define FEC_MAX_MULTICAST_ADDRS64 + +/* Interrupt events/masks. +*/ +#define FEC_ENET_HBERR 0x8000U /* Heartbeat error */ +#define FEC_ENET_BABR 0x4000U /* Babbling receiver*/ +#define FEC_ENET_BABT 0x2000U /* Babbling transmitter */ +#define FEC_ENET_GRA 0x1000U /* Graceful stop complete */ +#define FEC_ENET_TXF 0x0800U /* Full frame transmitted */ +#define FEC_ENET_TXB 0x0400U /* A buffer was transmitted */ +#define FEC_ENET_RXF 0x0200U /* Full frame received */ +#define FEC_ENET_RXB 0x0100U /* A buffer was received*/ +#define FEC_ENET_MII 0x0080U /* MII interrupt*/ +#define FEC_ENET_EBERR 0x0040U /* SDMA bus error */ + +#define FEC_ECNTRL_PINMUX 0x0004 +#define FEC_ECNTRL_ETHER_EN0x0002 +#define FEC_ECNTRL_RESET 0x0001 + +#define FEC_RCNTRL_BC_REJ 0x0010 +#define FEC_RCNTRL_PROM0x0008 +#define FEC_RCNTRL_MII_MODE0x0004 +#define FEC_RCNTRL_DRT 0x0002 +#define FEC_RCNTRL_LOOP0x0001 + +#define FEC_TCNTRL_FDEN0x0004 +#define FEC_TCNTRL_HBC 0x0002 +#define FEC_TCNTRL_GTS 0x0001 + + + +/* + * Delay to wait for FEC reset command to complete (in us) + */ +#define FEC_RESET_DELAY50 +#endif diff --git a/drivers/net/fs_enet/fs_enet-main.c b/drivers/net/fs_enet/fs_enet-main.c index f6abff5..df62506 100644 --- a/drivers/net/fs_enet/fs_enet-main.c +++ b/drivers/net/fs_enet/fs_enet-main.c @@ -37,6 +37,7 @@ #include linux/ethtool.h #include linux/bitops.h #include linux/fs.h #include linux/platform_device.h +#include linux/phy.h #include linux/vmalloc.h #include asm/pgtable.h @@ -682,35 +683,6 @@ static void fs_free_irq(struct net_devic (*fep-ops-post_free_irq)(dev, irq); } -/**/ - -/* This interrupt occurs when the PHY detects a link change. */ -static irqreturn_t -fs_mii_link_interrupt(int irq, void *dev_id, struct pt_regs *regs) -{ - struct net_device *dev = dev_id; - struct fs_enet_private *fep; - const struct fs_platform_info *fpi; - - fep = netdev_priv(dev); - fpi = fep-fpi; - - /* -* Acknowledge the interrupt if possible. If we have not -* found the PHY yet we can't process or acknowledge the -* interrupt now. Instead we ignore this interrupt for now, -* which we can do since it is edge triggered. It will be -* acknowledged later by fs_enet_open(). -*/ - if (!fep-phy) - return IRQ_NONE; - - fs_mii_ack_int(dev); - fs_mii_link_status_change_check(dev, 0); - - return IRQ_HANDLED; -} - static void fs_timeout(struct net_device *dev) { struct fs_enet_private *fep = netdev_priv(dev); @@ -722,10 +694,13 @@ static void fs_timeout(struct net_device spin_lock_irqsave(fep-lock, flags); if (dev-flags IFF_UP) { + phy_stop(fep-phydev); (*fep-ops-stop)(dev); (*fep-ops-restart)(dev); + phy_start(fep-phydev);
[PATCH 1/3] PAL: Support of the fixed PHY
This makes it possible for HW PHY-less boards to utilize PAL goodies. Generic routines to connect to fixed PHY are provided, as well as ability to specify software callback that fills up link, speed, etc. information into PHY descriptor (the latter feature not tested so far). Signed-off-by: Vitaly Bordug [EMAIL PROTECTED] --- drivers/net/phy/Kconfig | 17 ++ drivers/net/phy/Makefile |1 drivers/net/phy/fixed.c | 358 ++ drivers/net/phy/phy_device.c | 51 -- include/linux/phy.h |1 5 files changed, 407 insertions(+), 21 deletions(-) diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig index 2ba6d3a..b79ec0d 100644 --- a/drivers/net/phy/Kconfig +++ b/drivers/net/phy/Kconfig @@ -56,5 +56,22 @@ config SMSC_PHY ---help--- Currently supports the LAN83C185 PHY +config FIXED_PHY + tristate Drivers for PHY emulation on fixed speed/link + depends on PHYLIB + ---help--- + Adds the driver to PHY layer to cover the boards that do not have any PHY bound, + but with the ability to manipulate with speed/link in software. The relavant MII + speed/duplex parameters could be effectively handled in user-specified fuction. + Currently tested with mpc866ads. + +config FIXED_MII_10_FDX + bool Emulation for 10M Fdx fixed PHY behavior + depends on FIXED_PHY + +config FIXED_MII_100_FDX + bool Emulation for 100M Fdx fixed PHY behavior + depends on FIXED_PHY + endmenu diff --git a/drivers/net/phy/Makefile b/drivers/net/phy/Makefile index a00e619..320f832 100644 --- a/drivers/net/phy/Makefile +++ b/drivers/net/phy/Makefile @@ -10,3 +10,4 @@ obj-$(CONFIG_LXT_PHY) += lxt.o obj-$(CONFIG_QSEMI_PHY)+= qsemi.o obj-$(CONFIG_SMSC_PHY) += smsc.o obj-$(CONFIG_VITESSE_PHY) += vitesse.o +obj-$(CONFIG_FIXED_PHY)+= fixed.o diff --git a/drivers/net/phy/fixed.c b/drivers/net/phy/fixed.c new file mode 100644 index 000..5d6442c --- /dev/null +++ b/drivers/net/phy/fixed.c @@ -0,0 +1,358 @@ +/* + * drivers/net/phy/fixed.c + * + * Driver for fixed PHYs, when transceiver is able to operate in one fixed mode. + * + * Author: Vitaly Bordug + * + * Copyright (c) 2006 MontaVista Software, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + */ +#include linux/config.h +#include linux/kernel.h +#include linux/sched.h +#include linux/string.h +#include linux/errno.h +#include linux/unistd.h +#include linux/slab.h +#include linux/interrupt.h +#include linux/init.h +#include linux/delay.h +#include linux/netdevice.h +#include linux/etherdevice.h +#include linux/skbuff.h +#include linux/spinlock.h +#include linux/mm.h +#include linux/module.h +#include linux/mii.h +#include linux/ethtool.h +#include linux/phy.h + +#include asm/io.h +#include asm/irq.h +#include asm/uaccess.h + +#define MII_REGS_NUM 7 + +/* +The idea is to emulate normal phy behavior by responding with +pre-defined values to mii BMCR read, so that read_status hook could +take all the needed info. +*/ + +struct fixed_phy_status { + u8 link; + u16 speed; + u8 duplex; +}; + +/*- + * Private information hoder for mii_bus + *-*/ +struct fixed_info { + u16 *regs; + u8 regs_num; + struct fixed_phy_status phy_status; + struct phy_device *phydev; /* pointer to the container */ + /* link speed cb */ + int(*link_update)(struct net_device*, struct fixed_phy_status*); + +}; + +/*- + * If something weird is required to be done with link/speed, + * network driver is able to assign a function to implement this. + * May be useful for PHY's that need to be software-driven. + *-*/ +int fixed_mdio_set_link_update(struct phy_device* phydev, + int(*link_update)(struct net_device*, struct fixed_phy_status*)) +{ + struct fixed_info *fixed; + + if(link_update == NULL) + return -EINVAL; + + if(phydev) { + if(phydev-bus) { + fixed = phydev-bus-priv; + fixed-link_update = link_update; + return 0; + } + } + return -EINVAL; +} +EXPORT_SYMBOL(fixed_mdio_set_link_update); + +/*- + * This is used for updating internal mii regs from the status +
[PATCH 3/3] ppc32: board-specific part of fs_enet update
This contains board-specific portion to respect driver changes (for 8272ads , 885ads and 866ads). Altered platform_data structures as well as initial setup routines relevant to fs_enet. Signed-off-by: Vitaly Bordug [EMAIL PROTECTED] --- arch/ppc/platforms/85xx/mpc8560_ads.c| 89 arch/ppc/platforms/85xx/mpc85xx_ads_common.h | 19 +++ arch/ppc/platforms/mpc8272ads_setup.c| 154 - arch/ppc/platforms/mpc866ads_setup.c | 192 +- arch/ppc/platforms/mpc885ads_setup.c | 175 +--- arch/ppc/platforms/pq2ads_pd.h | 82 --- arch/ppc/syslib/mpc85xx_devices.c| 89 arch/ppc/syslib/mpc8xx_devices.c |8 + arch/ppc/syslib/mpc8xx_sys.c |6 + arch/ppc/syslib/pq2_devices.c|5 + arch/ppc/syslib/pq2_sys.c|3 include/asm-ppc/cpm2.h | 95 + include/asm-ppc/mpc8260.h|1 include/asm-ppc/mpc8xx.h |1 include/linux/fs_enet_pd.h | 50 +++ 15 files changed, 578 insertions(+), 391 deletions(-) diff --git a/arch/ppc/platforms/85xx/mpc8560_ads.c b/arch/ppc/platforms/85xx/mpc8560_ads.c index d90cd24..94badaf 100644 --- a/arch/ppc/platforms/85xx/mpc8560_ads.c +++ b/arch/ppc/platforms/85xx/mpc8560_ads.c @@ -29,6 +29,7 @@ #include linux/serial_core.h #include linux/initrd.h #include linux/module.h #include linux/fsl_devices.h +#include linux/fs_enet_pd.h #include asm/system.h #include asm/pgtable.h @@ -58,6 +59,71 @@ #include syslib/ppc85xx_setup.h * Setup the architecture * */ +static void init_fcc_ioports(void) +{ + struct immap *immap; + struct io_port *io; + u32 tempval; + + immap = cpm2_immr; + + io = immap-im_ioport; + /* FCC2/3 are on the ports B/C. */ + tempval = in_be32(io-iop_pdirb); + tempval = ~PB2_DIRB0; + tempval |= PB2_DIRB1; + out_be32(io-iop_pdirb, tempval); + + tempval = in_be32(io-iop_psorb); + tempval = ~PB2_PSORB0; + tempval |= PB2_PSORB1; + out_be32(io-iop_psorb, tempval); + + tempval = in_be32(io-iop_pparb); + tempval |= (PB2_DIRB0 | PB2_DIRB1); + out_be32(io-iop_pparb, tempval); + + tempval = in_be32(io-iop_pdirb); + tempval = ~PB3_DIRB0; + tempval |= PB3_DIRB1; + out_be32(io-iop_pdirb, tempval); + + tempval = in_be32(io-iop_psorb); + tempval = ~PB3_PSORB0; + tempval |= PB3_PSORB1; + out_be32(io-iop_psorb, tempval); + + tempval = in_be32(io-iop_pparb); + tempval |= (PB3_DIRB0 | PB3_DIRB1); + out_be32(io-iop_pparb, tempval); + +tempval = in_be32(io-iop_pdirc); +tempval |= PC3_DIRC1; +out_be32(io-iop_pdirc, tempval); + +tempval = in_be32(io-iop_pparc); +tempval |= PC3_DIRC1; +out_be32(io-iop_pparc, tempval); + + /* Port C has clocks.. */ + tempval = in_be32(io-iop_psorc); + tempval = ~(CLK_TRX); + out_be32(io-iop_psorc, tempval); + + tempval = in_be32(io-iop_pdirc); + tempval = ~(CLK_TRX); + out_be32(io-iop_pdirc, tempval); + tempval = in_be32(io-iop_pparc); + tempval |= (CLK_TRX); + out_be32(io-iop_pparc, tempval); + + /* Configure Serial Interface clock routing. +* First, clear all FCC bits to zero, +* then set the ones we want. +*/ + immap-im_cpmux.cmx_fcr = ~(CPMUX_CLK_MASK); + immap-im_cpmux.cmx_fcr |= CPMUX_CLK_ROUTE; +} static void __init mpc8560ads_setup_arch(void) @@ -66,6 +132,7 @@ mpc8560ads_setup_arch(void) unsigned int freq; struct gianfar_platform_data *pdata; struct gianfar_mdio_data *mdata; + struct fs_platform_info *fpi; cpm2_reset(); @@ -110,6 +177,28 @@ #endif memcpy(pdata-mac_addr, binfo-bi_enet1addr, 6); } + init_fcc_ioports(); + ppc_sys_device_remove(MPC85xx_CPM_FCC1); + + fpi = (struct fs_platform_info *) ppc_sys_get_pdata(MPC85xx_CPM_FCC2); + if (fpi) { + memcpy(fpi-macaddr, binfo-bi_enet2addr, 6); + fpi-bus_id = 0:02; + fpi-phy_addr = 2; + fpi-dpram_offset = (u32)cpm2_immr-im_dprambase; + fpi-fcc_regs_c = (u32)cpm2_immr-im_fcc_c[1]; + } + + fpi = (struct fs_platform_info *) ppc_sys_get_pdata(MPC85xx_CPM_FCC3); + if (fpi) { + memcpy(fpi-macaddr, binfo-bi_enet2addr, 6); + fpi-macaddr[5] += 1; + fpi-bus_id = 0:03; + fpi-phy_addr = 3; + fpi-dpram_offset = (u32)cpm2_immr-im_dprambase; + fpi-fcc_regs_c = (u32)cpm2_immr-im_fcc_c[2]; + } + #ifdef CONFIG_BLK_DEV_INITRD if (initrd_start) ROOT_DEV = Root_RAM0; diff --git
[PATCH 0/3] FS_ENET: move to the PAL api
These are patches, that utilize Phy Abstraction Layer API in the fs_enet Freescale SoC Ethernet driver. Comments gavered from the community addressed, + minor fixes and improvements. -- Sincerely, Vitaly - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 2/3] FS_ENET: use PAL for mii management
This patch should update the fs_enet infrastructure to utilize Phy Abstraction Layer subsystem. Along with the above, there are apparent bugfixes, overhaul and improvements. Signed-off-by: Vitaly Bordug [EMAIL PROTECTED] --- drivers/net/fs_enet/Makefile |6 drivers/net/fs_enet/fec.h | 42 +++ drivers/net/fs_enet/fs_enet-main.c | 207 +-- drivers/net/fs_enet/fs_enet-mii.c | 505 drivers/net/fs_enet/fs_enet.h | 40 ++- drivers/net/fs_enet/mac-fcc.c | 32 ++ drivers/net/fs_enet/mac-fec.c | 142 +- drivers/net/fs_enet/mac-scc.c |4 drivers/net/fs_enet/mii-bitbang.c | 448 drivers/net/fs_enet/mii-fec.c | 243 + drivers/net/fs_enet/mii-fixed.c| 91 -- 11 files changed, 711 insertions(+), 1049 deletions(-) diff --git a/drivers/net/fs_enet/Makefile b/drivers/net/fs_enet/Makefile index d6dd3f2..02d4dc1 100644 --- a/drivers/net/fs_enet/Makefile +++ b/drivers/net/fs_enet/Makefile @@ -4,7 +4,7 @@ # obj-$(CONFIG_FS_ENET) += fs_enet.o -obj-$(CONFIG_8xx) += mac-fec.o mac-scc.o -obj-$(CONFIG_8260) += mac-fcc.o +obj-$(CONFIG_8xx) += mac-fec.o mac-scc.o mii-fec.o +obj-$(CONFIG_CPM2) += mac-fcc.o mii-bitbang.o -fs_enet-objs := fs_enet-main.o fs_enet-mii.o mii-bitbang.o mii-fixed.o +fs_enet-objs := fs_enet-main.o diff --git a/drivers/net/fs_enet/fec.h b/drivers/net/fs_enet/fec.h new file mode 100644 index 000..e980527 --- /dev/null +++ b/drivers/net/fs_enet/fec.h @@ -0,0 +1,42 @@ +#ifndef FS_ENET_FEC_H +#define FS_ENET_FEC_H + +/* CRC polynomium used by the FEC for the multicast group filtering */ +#define FEC_CRC_POLY 0x04C11DB7 + +#define FEC_MAX_MULTICAST_ADDRS64 + +/* Interrupt events/masks. +*/ +#define FEC_ENET_HBERR 0x8000U /* Heartbeat error */ +#define FEC_ENET_BABR 0x4000U /* Babbling receiver*/ +#define FEC_ENET_BABT 0x2000U /* Babbling transmitter */ +#define FEC_ENET_GRA 0x1000U /* Graceful stop complete */ +#define FEC_ENET_TXF 0x0800U /* Full frame transmitted */ +#define FEC_ENET_TXB 0x0400U /* A buffer was transmitted */ +#define FEC_ENET_RXF 0x0200U /* Full frame received */ +#define FEC_ENET_RXB 0x0100U /* A buffer was received*/ +#define FEC_ENET_MII 0x0080U /* MII interrupt*/ +#define FEC_ENET_EBERR 0x0040U /* SDMA bus error */ + +#define FEC_ECNTRL_PINMUX 0x0004 +#define FEC_ECNTRL_ETHER_EN0x0002 +#define FEC_ECNTRL_RESET 0x0001 + +#define FEC_RCNTRL_BC_REJ 0x0010 +#define FEC_RCNTRL_PROM0x0008 +#define FEC_RCNTRL_MII_MODE0x0004 +#define FEC_RCNTRL_DRT 0x0002 +#define FEC_RCNTRL_LOOP0x0001 + +#define FEC_TCNTRL_FDEN0x0004 +#define FEC_TCNTRL_HBC 0x0002 +#define FEC_TCNTRL_GTS 0x0001 + + + +/* + * Delay to wait for FEC reset command to complete (in us) + */ +#define FEC_RESET_DELAY50 +#endif diff --git a/drivers/net/fs_enet/fs_enet-main.c b/drivers/net/fs_enet/fs_enet-main.c index f6abff5..df62506 100644 --- a/drivers/net/fs_enet/fs_enet-main.c +++ b/drivers/net/fs_enet/fs_enet-main.c @@ -37,6 +37,7 @@ #include linux/ethtool.h #include linux/bitops.h #include linux/fs.h #include linux/platform_device.h +#include linux/phy.h #include linux/vmalloc.h #include asm/pgtable.h @@ -682,35 +683,6 @@ static void fs_free_irq(struct net_devic (*fep-ops-post_free_irq)(dev, irq); } -/**/ - -/* This interrupt occurs when the PHY detects a link change. */ -static irqreturn_t -fs_mii_link_interrupt(int irq, void *dev_id, struct pt_regs *regs) -{ - struct net_device *dev = dev_id; - struct fs_enet_private *fep; - const struct fs_platform_info *fpi; - - fep = netdev_priv(dev); - fpi = fep-fpi; - - /* -* Acknowledge the interrupt if possible. If we have not -* found the PHY yet we can't process or acknowledge the -* interrupt now. Instead we ignore this interrupt for now, -* which we can do since it is edge triggered. It will be -* acknowledged later by fs_enet_open(). -*/ - if (!fep-phy) - return IRQ_NONE; - - fs_mii_ack_int(dev); - fs_mii_link_status_change_check(dev, 0); - - return IRQ_HANDLED; -} - static void fs_timeout(struct net_device *dev) { struct fs_enet_private *fep = netdev_priv(dev); @@ -722,10 +694,13 @@ static void fs_timeout(struct net_device spin_lock_irqsave(fep-lock, flags); if (dev-flags IFF_UP) { + phy_stop(fep-phydev); (*fep-ops-stop)(dev); (*fep-ops-restart)(dev); + phy_start(fep-phydev);
[PATCH 1/3] PAL: Support of the fixed PHY
This makes it possible for HW PHY-less boards to utilize PAL goodies. Generic routines to connect to fixed PHY are provided, as well as ability to specify software callback that fills up link, speed, etc. information into PHY descriptor (the latter feature not tested so far). Signed-off-by: Vitaly Bordug [EMAIL PROTECTED] --- drivers/net/phy/Kconfig | 17 ++ drivers/net/phy/Makefile |1 drivers/net/phy/fixed.c | 358 ++ drivers/net/phy/phy_device.c | 51 -- include/linux/phy.h |1 5 files changed, 407 insertions(+), 21 deletions(-) diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig index 2ba6d3a..b79ec0d 100644 --- a/drivers/net/phy/Kconfig +++ b/drivers/net/phy/Kconfig @@ -56,5 +56,22 @@ config SMSC_PHY ---help--- Currently supports the LAN83C185 PHY +config FIXED_PHY + tristate Drivers for PHY emulation on fixed speed/link + depends on PHYLIB + ---help--- + Adds the driver to PHY layer to cover the boards that do not have any PHY bound, + but with the ability to manipulate with speed/link in software. The relavant MII + speed/duplex parameters could be effectively handled in user-specified fuction. + Currently tested with mpc866ads. + +config FIXED_MII_10_FDX + bool Emulation for 10M Fdx fixed PHY behavior + depends on FIXED_PHY + +config FIXED_MII_100_FDX + bool Emulation for 100M Fdx fixed PHY behavior + depends on FIXED_PHY + endmenu diff --git a/drivers/net/phy/Makefile b/drivers/net/phy/Makefile index a00e619..320f832 100644 --- a/drivers/net/phy/Makefile +++ b/drivers/net/phy/Makefile @@ -10,3 +10,4 @@ obj-$(CONFIG_LXT_PHY) += lxt.o obj-$(CONFIG_QSEMI_PHY)+= qsemi.o obj-$(CONFIG_SMSC_PHY) += smsc.o obj-$(CONFIG_VITESSE_PHY) += vitesse.o +obj-$(CONFIG_FIXED_PHY)+= fixed.o diff --git a/drivers/net/phy/fixed.c b/drivers/net/phy/fixed.c new file mode 100644 index 000..5d6442c --- /dev/null +++ b/drivers/net/phy/fixed.c @@ -0,0 +1,358 @@ +/* + * drivers/net/phy/fixed.c + * + * Driver for fixed PHYs, when transceiver is able to operate in one fixed mode. + * + * Author: Vitaly Bordug + * + * Copyright (c) 2006 MontaVista Software, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + */ +#include linux/config.h +#include linux/kernel.h +#include linux/sched.h +#include linux/string.h +#include linux/errno.h +#include linux/unistd.h +#include linux/slab.h +#include linux/interrupt.h +#include linux/init.h +#include linux/delay.h +#include linux/netdevice.h +#include linux/etherdevice.h +#include linux/skbuff.h +#include linux/spinlock.h +#include linux/mm.h +#include linux/module.h +#include linux/mii.h +#include linux/ethtool.h +#include linux/phy.h + +#include asm/io.h +#include asm/irq.h +#include asm/uaccess.h + +#define MII_REGS_NUM 7 + +/* +The idea is to emulate normal phy behavior by responding with +pre-defined values to mii BMCR read, so that read_status hook could +take all the needed info. +*/ + +struct fixed_phy_status { + u8 link; + u16 speed; + u8 duplex; +}; + +/*- + * Private information hoder for mii_bus + *-*/ +struct fixed_info { + u16 *regs; + u8 regs_num; + struct fixed_phy_status phy_status; + struct phy_device *phydev; /* pointer to the container */ + /* link speed cb */ + int(*link_update)(struct net_device*, struct fixed_phy_status*); + +}; + +/*- + * If something weird is required to be done with link/speed, + * network driver is able to assign a function to implement this. + * May be useful for PHY's that need to be software-driven. + *-*/ +int fixed_mdio_set_link_update(struct phy_device* phydev, + int(*link_update)(struct net_device*, struct fixed_phy_status*)) +{ + struct fixed_info *fixed; + + if(link_update == NULL) + return -EINVAL; + + if(phydev) { + if(phydev-bus) { + fixed = phydev-bus-priv; + fixed-link_update = link_update; + return 0; + } + } + return -EINVAL; +} +EXPORT_SYMBOL(fixed_mdio_set_link_update); + +/*- + * This is used for updating internal mii regs from the status +