[RFC][PATCH][IPSEC][2/3] IPv6 over IPv4 IPsec tunnel
This is the patch to support IPv6 over IPv4 IPsec Signed-off-by: Miika Komu <[EMAIL PROTECTED]> Signed-off-by: Diego Beltrami <[EMAIL PROTECTED]> Signed-off-by: Kazunori Miyazawa <[EMAIL PROTECTED]> diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c index e23c21d..e54c549 100644 --- a/net/ipv4/xfrm4_mode_tunnel.c +++ b/net/ipv4/xfrm4_mode_tunnel.c @@ -23,6 +23,12 @@ static inline void ipip_ecn_decapsulate( IP_ECN_set_ce(inner_iph); } +static inline void ipip6_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb) +{ + if (INET_ECN_is_ce(iph->tos)) + IP6_ECN_set_ce(skb->nh.ipv6h); +} + /* Add encapsulation header. * * The top IP header will be constructed per RFC 2401. The following fields @@ -36,6 +42,7 @@ static inline void ipip_ecn_decapsulate( static int xfrm4_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) { struct dst_entry *dst = skb->dst; + struct xfrm_dst *xdst = (struct xfrm_dst*)dst; struct iphdr *iph, *top_iph; int flags; @@ -48,15 +55,27 @@ static int xfrm4_tunnel_output(struct xf top_iph->ihl = 5; top_iph->version = 4; + flags = x->props.flags; + /* DS disclosed */ - top_iph->tos = INET_ECN_encapsulate(iph->tos, iph->tos); + if (xdst->route->ops->family == AF_INET) { + top_iph->protocol = IPPROTO_IPIP; + top_iph->tos = INET_ECN_encapsulate(iph->tos, iph->tos); + top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) ? + 0 : (iph->frag_off & htons(IP_DF)); + } +#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) + else { + struct ipv6hdr *ipv6h = (struct ipv6hdr*)iph; + top_iph->protocol = IPPROTO_IPV6; + top_iph->tos = INET_ECN_encapsulate(iph->tos, ipv6_get_dsfield(ipv6h)); + top_iph->frag_off = 0; + } +#endif - flags = x->props.flags; if (flags & XFRM_STATE_NOECN) IP_ECN_clear(top_iph); - top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) ? - 0 : (iph->frag_off & htons(IP_DF)); if (!top_iph->frag_off) __ip_select_ident(top_iph, dst->child, 0); @@ -64,7 +83,6 @@ static int xfrm4_tunnel_output(struct xf top_iph->saddr = x->props.saddr.a4; top_iph->daddr = x->id.daddr.a4; - top_iph->protocol = IPPROTO_IPIP; memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options)); return 0; @@ -75,8 +93,16 @@ static int xfrm4_tunnel_input(struct xfr struct iphdr *iph = skb->nh.iph; int err = -EINVAL; - if (iph->protocol != IPPROTO_IPIP) - goto out; + switch(iph->protocol){ + case IPPROTO_IPIP: +#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) + case IPPROTO_IPV6: + break; +#endif + default: + goto out; + } + if (!pskb_may_pull(skb, sizeof(struct iphdr))) goto out; @@ -84,10 +110,19 @@ static int xfrm4_tunnel_input(struct xfr (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) goto out; - if (x->props.flags & XFRM_STATE_DECAP_DSCP) - ipv4_copy_dscp(iph, skb->h.ipiph); - if (!(x->props.flags & XFRM_STATE_NOECN)) - ipip_ecn_decapsulate(skb); + if (iph->protocol == IPPROTO_IPIP) { + if (x->props.flags & XFRM_STATE_DECAP_DSCP) + ipv4_copy_dscp(iph, skb->h.ipiph); + if (!(x->props.flags & XFRM_STATE_NOECN)) + ipip_ecn_decapsulate(skb); + } +#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) + else { + if (!(x->props.flags & XFRM_STATE_NOECN)) + ipip6_ecn_decapsulate(iph, skb); + skb->protocol = htons(ETH_P_IPV6); + } +#endif skb->mac.raw = memmove(skb->data - skb->mac_len, skb->mac.raw, skb->mac_len); skb->nh.raw = skb->data; diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 8dffd4d..a1ac537 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -131,13 +131,11 @@ __xfrm6_bundle_create(struct xfrm_policy struct dst_entry *dst, *dst_prev; struct rt6_info *rt0 = (struct rt6_info*)(*dst_p); struct rt6_info *rt = rt0; - struct in6_addr *remote = &fl->fl6_dst; - struct in6_addr *local = &fl->fl6_src; struct flowi fl_tunnel = { .nl_u = { .ip6_u = { - .saddr = *local, - .daddr = *remote + .saddr = fl->fl6_src, + .daddr = fl->fl6_dst, } } }; @@ -153,7 +151,6 @@ __xfrm6_bundle_create(struct xfrm_policy
[RFC][PATCH][IPSEC][1/3]exporting xfrm_state_afinfo
This patch exports xfrm_state_afinfo. Signed-off-by: Miika Komu <[EMAIL PROTECTED]> Signed-off-by: Diego Beltrami <[EMAIL PROTECTED]> Signed-off-by: Kazunori Miyazawa <[EMAIL PROTECTED]> diff --git a/include/net/xfrm.h b/include/net/xfrm.h index e476541..bf91d63 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -252,10 +252,13 @@ struct xfrm_state_afinfo { xfrm_address_t *daddr, xfrm_address_t *saddr); int (*tmpl_sort)(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n); int (*state_sort)(struct xfrm_state **dst, struct xfrm_state **src, int n); + int (*output)(struct sk_buff *skb); }; extern int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo); extern int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo); +extern struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family); +extern void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo); extern void xfrm_state_delete_tunnel(struct xfrm_state *x); diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c index 3cc3df0..93e2c06 100644 --- a/net/ipv4/xfrm4_state.c +++ b/net/ipv4/xfrm4_state.c @@ -51,6 +51,7 @@ static struct xfrm_state_afinfo xfrm4_st .family = AF_INET, .init_flags = xfrm4_init_flags, .init_tempsel = __xfrm4_init_tempsel, + .output = xfrm4_output, }; void __init xfrm4_state_init(void) diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index c260ea1..bad0ad9 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -166,3 +166,5 @@ int xfrm6_output(struct sk_buff *skb) return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb, NULL, skb->dst->dev, xfrm6_output_finish); } + +EXPORT_SYMBOL(xfrm6_output); diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index 9ddaa9d..60ad5f0 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c @@ -171,6 +171,7 @@ static struct xfrm_state_afinfo xfrm6_st .init_tempsel = __xfrm6_init_tempsel, .tmpl_sort = __xfrm6_tmpl_sort, .state_sort = __xfrm6_state_sort, + .output = xfrm6_output, }; void __init xfrm6_state_init(void) diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index fdb08d9..24f7bfd 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -183,9 +183,6 @@ static DEFINE_SPINLOCK(xfrm_state_gc_loc int __xfrm_state_delete(struct xfrm_state *x); -static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family); -static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo); - int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol); void km_state_expired(struct xfrm_state *x, int hard, u32 pid); @@ -1458,7 +1455,7 @@ int xfrm_state_unregister_afinfo(struct } EXPORT_SYMBOL(xfrm_state_unregister_afinfo); -static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family) +struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family) { struct xfrm_state_afinfo *afinfo; if (unlikely(family >= NPROTO)) @@ -1470,11 +1467,14 @@ static struct xfrm_state_afinfo *xfrm_st return afinfo; } -static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo) +void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo) { read_unlock(&xfrm_state_afinfo_lock); } +EXPORT_SYMBOL(xfrm_state_get_afinfo); +EXPORT_SYMBOL(xfrm_state_put_afinfo); + /* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */ void xfrm_state_delete_tunnel(struct xfrm_state *x) { - To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[RFC][PATCH][IPSEC][3/3] IPv4 over IPv6 IPsec tunnel
This is the patch to support IPv4 over IPv6 IPsec. Signed-off-by: Miika Komu <[EMAIL PROTECTED]> Signed-off-by: Diego Beltrami <[EMAIL PROTECTED]> Signed-off-by: Kazunori Miyazawa <[EMAIL PROTECTED]> diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index fb9f69c..011136a 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -72,13 +72,11 @@ __xfrm4_bundle_create(struct xfrm_policy struct dst_entry *dst, *dst_prev; struct rtable *rt0 = (struct rtable*)(*dst_p); struct rtable *rt = rt0; - __be32 remote = fl->fl4_dst; - __be32 local = fl->fl4_src; struct flowi fl_tunnel = { .nl_u = { .ip4_u = { - .saddr = local, - .daddr = remote, + .saddr = fl->fl4_src, + .daddr = fl->fl4_dst, .tos = fl->fl4_tos } } @@ -94,7 +92,6 @@ __xfrm4_bundle_create(struct xfrm_policy for (i = 0; i < nx; i++) { struct dst_entry *dst1 = dst_alloc(&xfrm4_dst_ops); struct xfrm_dst *xdst; - int tunnel = 0; if (unlikely(dst1 == NULL)) { err = -ENOBUFS; @@ -116,19 +113,28 @@ __xfrm4_bundle_create(struct xfrm_policy dst1->next = dst_prev; dst_prev = dst1; - if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) { - remote = xfrm[i]->id.daddr.a4; - local = xfrm[i]->props.saddr.a4; - tunnel = 1; - } + header_len += xfrm[i]->props.header_len; trailer_len += xfrm[i]->props.trailer_len; - if (tunnel) { - fl_tunnel.fl4_src = local; - fl_tunnel.fl4_dst = remote; + if (xfrm[i]->props.mode == XFRM_MODE_TUNNEL) { + unsigned short encap_family = xfrm[i]->props.family; + switch(encap_family) { + case AF_INET: + fl_tunnel.fl4_dst = xfrm[i]->id.daddr.a4; + fl_tunnel.fl4_src = xfrm[i]->props.saddr.a4; + break; +#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) + case AF_INET6: + ipv6_addr_copy(&fl_tunnel.fl6_dst, (struct in6_addr*)&xfrm[i]->id.daddr.a6); + ipv6_addr_copy(&fl_tunnel.fl6_src, (struct in6_addr*)&xfrm[i]->props.saddr.a6); + break; +#endif + default: + BUG_ON(1); + } err = xfrm_dst_lookup((struct xfrm_dst **)&rt, - &fl_tunnel, AF_INET); + &fl_tunnel, encap_family); if (err) goto error; } else @@ -145,6 +151,7 @@ __xfrm4_bundle_create(struct xfrm_policy i = 0; for (; dst_prev != &rt->u.dst; dst_prev = dst_prev->child) { struct xfrm_dst *x = (struct xfrm_dst*)dst_prev; + struct xfrm_state_afinfo *afinfo; x->u.rt.fl = *fl; dst_prev->xfrm = xfrm[i++]; @@ -162,8 +169,17 @@ __xfrm4_bundle_create(struct xfrm_policy /* Copy neighbout for reachability confirmation */ dst_prev->neighbour = neigh_clone(rt->u.dst.neighbour); dst_prev->input = rt->u.dst.input; - dst_prev->output= xfrm4_output; - if (rt->peer) + /* XXX: When IPv6 module can be unloaded, we should manage reference +* to xfrm6_output in afinfo->output. Miyazawa +* */ + afinfo = xfrm_state_get_afinfo(dst_prev->xfrm->props.family); + if (!afinfo) { + dst = *dst_p; + goto error; + } + dst_prev->output = afinfo->output; + xfrm_state_put_afinfo(afinfo); + if (dst_prev->xfrm->props.family == AF_INET && rt->peer) atomic_inc(&rt->peer->refcnt); x->u.rt.peer = rt->peer; /* Sheit... I remember I did this right. Apparently, @@ -274,7 +290,7 @@ static void xfrm4_dst_destroy(struct dst if (likely(xdst->u.rt.idev)) in_dev_put(xdst->u.rt.idev); - if (likely(xdst->u.rt.peer)) + if (dst->xfrm->props.family == AF_INET && likely(xdst->u.rt.peer)) inet_putpeer(xdst->u.rt.peer); xfrm_dst_destroy(xdst); } diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c index 5e7d8a7..55cddcb 100644 ---
[RFC][PATCH][IPSEC][0/3]inter address family ipsec tunnel
Hello, I fixed the compile issue when we configure IPv6 as a module. [1/3] exporting xfrm_state_afinfo [2/3] supporting IPv6 over IPv4 IPsec [3/3] supporting IPv4 over IPv6 IPsec These patches can be applied to linux-2.6.20-rc2. Thank you, -- Kazunori Miyazawa - To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: netif_poll_enable() & barrier
On Thu, 2006-12-28 at 21:09 -0800, David Miller wrote: > From: Benjamin Herrenschmidt <[EMAIL PROTECTED]> > Date: Wed, 20 Dec 2006 14:44:12 +1100 > > > I stumbled accross what might be a bug on out of order architecture: > > > > netif_poll_enable() only does a clear_bit(). However, > > netif_poll_disable/enable pairs are often used as simili-spinlocks. > > > > (netif_poll_enable() has pretty much spin_lock semantics except that it > > schedules instead of looping). > > > > Thus, shouldn't netif_poll_disable() do an smp_wmb(); before clearing > > the bit to make sure that any stores done within the poll-disabled > > section are properly visible to the rest of the system before clearing > > the bit ? > > Although I couldn't find a problematic case with any current > in-tree drivers, it's better to be safe than sorry :-) > > So I'll add a smp_mb__before_clear_bit() to netif_poll_enable() :) Heh, thanks ! :-) I haven't seen any problematic case neither, though if there was one, it would result in weird problems very hard to track down, so as you said, better safe than sorry (unless you see a flaw in my reasoning). Cheers, Oh, and happy new year too ! :-) Ben. - To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: netif_poll_enable() & barrier
From: Benjamin Herrenschmidt <[EMAIL PROTECTED]> Date: Wed, 20 Dec 2006 14:44:12 +1100 > I stumbled accross what might be a bug on out of order architecture: > > netif_poll_enable() only does a clear_bit(). However, > netif_poll_disable/enable pairs are often used as simili-spinlocks. > > (netif_poll_enable() has pretty much spin_lock semantics except that it > schedules instead of looping). > > Thus, shouldn't netif_poll_disable() do an smp_wmb(); before clearing > the bit to make sure that any stores done within the poll-disabled > section are properly visible to the rest of the system before clearing > the bit ? Although I couldn't find a problematic case with any current in-tree drivers, it's better to be safe than sorry :-) So I'll add a smp_mb__before_clear_bit() to netif_poll_enable() :) - To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [patch sungem] improved locking
From: Benjamin Herrenschmidt <[EMAIL PROTECTED]> Date: Wed, 13 Dec 2006 15:07:24 +1100 > tg3 says > > tg3: eth0: Link is up at 1000 Mbps, full duplex. > tg3: eth0: Flow control is on for TX and on for RX. > > but sungem says > > eth0: Link is up at 1000 Mbps, full-duplex. > eth0: Pause is disabled > > Hrm... I suppose I need to dig more. No time to do that today though. I was about to try and debug this, and noticed immediately that I didn't recognize any of the code. Could you look into this, you rewrote all of this stuff and this looks like a regression added, because I know this pause stuff used to work perfectly when I wrote the original GEM driver. :-) - To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Network card IRQ balancing with Intel 5000 series chipsets
On Wed, 27 Dec 2006, jamal wrote: On Wed, 2006-27-12 at 09:09 +0200, Robert Iakobashvili wrote: My scenario is treatment of RTP packets in kernel space with a single network card (both Rx and Tx). The default of the Intel 5000 series chipset is affinity of each network card to a certain CPU. Currently, neither with irqbalance nor with kernel irq-balancing (MSI and io-apic attempted) I do not find a way to balance that irq. In the near future, when the NIC vendors wake up[1] because CPU vendors - including big bad Intel - are going to be putting out a large number of hardware threads, you should be able to do more clever things with such a setup. At the moment, just tie it to a single CPU and have your other processes that are related running/bound on the other cores so you can utilize them. OTOH, you say you are only using 30% of the one CPU, so it may not be a big deal to tie your single nic to on cpu. Anyway, it seems that with more advanced firewalls/routers kernel spends most of a time in IPSec/crypto code, netfilter conntrack and iptables rules/extensions, routing lookups, etc and not in hardware IRQ handler. So, it would be nice if this part coulde done by all CPUs. Best regards, Krzysztof Olędzki
Re: Generic PHY lib vs. locking
On Dec 21, 2006, at 22:07, Benjamin Herrenschmidt wrote: Hi Andy ! I've been looking at porting various drivers (EMAC, sungem, spider_net, ...) to the generic PHY stuff. However, I have one significant problem here. One of the things I've been trying to do lately with EMAC and that I plan to do with others, is to have the PHY polling entirely operate at task level (along with other "slow" parts of the network driver like timeout handling etc...). This makes a lot of locking easier, allowing to use mutexes instead of locks (less latencies), allowing to sleep waiting for MDIO operations to complete, etc... it's generall all benefit. It's especially useful in a case like EMAC where several EMACs can share MDIO lines, so we need exclusive access, and that might involve even a second layer of exclusion for access to the RGMII or ZMII layer. mutexes are really the best approach for that sort of non-speed critical activities. This sounds good to me. It was an eventual goal, but I wasn't familiar enough with the non-spin-lock locking rules to confidently implement it. However, the generic PHY layer defeats that by it's heavy usage of spin_lock_bh and timer. One solution would be to change it to use a mutex instead of a lock as well, though that would change the requirements of where phy_start/ stop can be called, and use a delayed work queue instead of a timer. I could do all of these changes provided everybody agrees, though I suppose all existing network drivers using that PHY layer might need to be adapted. How many do we have nowadays ? Great! At last glance, only gianfar, fs_enet, and au1000_eth. There are one or two others that haven't gone in, yet. My hope is that your changes will not require any changes to the drivers, but I'll leave that to your discretion. Also, I see your comments about not calling flush_scheduled_work() in phy_stop() because of rtnl_lock()... What is the problem here ? dev_close() ? Yup. However, I think a reasonable solution was proposed. The problem is that flush_scheduled_work() actually does all the scheduled work. And if it happens with rtnl_lock() held, and some of the scheduled work grabs rtnl_lock(), we deadlock. But another function was proposed, and I believe committed to the tree, which only deletes or does the work you own, and therefore lets you avoid that problem (assuming you know that your code doesn't grab such locks), and also lets you free memory. Andy - To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
2.6.20-rc2: known unfixed regressions
This email lists some known regressions in 2.6.20-rc2 compared to 2.6.19. If you find your name in the Cc header, you are either submitter of one of the bugs, maintainer of an affectected subsystem or driver, a patch of you caused a breakage or I'm considering you in any other way possibly involved with one or more of these issues. Due to the huge amount of recipients, please trim the Cc when answering. Subject: PCI_MULTITHREAD_PROBE breakage References : http://lkml.org/lkml/2006/12/12/21 Submitter : Ben Castricum <[EMAIL PROTECTED]> Caused-By : Greg Kroah-Hartman <[EMAIL PROTECTED]> commit 009af1ff78bfc30b9a27807dd0207fc32848218a Status : known to break many drivers; revert? Subject: Acer Extensa 3002 WLMi: 'shutdown -h now' reboots the system References : http://lkml.org/lkml/2006/12/25/40 Submitter : Berthold Cogel <[EMAIL PROTECTED]> Status : unknown Subject: i386: Oops in __find_get_block() References : http://lkml.org/lkml/2006/12/16/138 Submitter : Ben Collins <[EMAIL PROTECTED]> Daniel Holbach <[EMAIL PROTECTED]> Status : unknown Subject: ftp: get or put stops during file-transfer References : http://lkml.org/lkml/2006/12/16/174 Submitter : Komuro <[EMAIL PROTECTED]> Status : unknown Subject: forcedeth.c 0.59: problem with sideband managment References : http://bugzilla.kernel.org/show_bug.cgi?id=7684 Submitter : Michael Reske <[EMAIL PROTECTED]> Handled-By : Ayaz Abdulla <[EMAIL PROTECTED]> Status : problem is being debugged Subject: x86_64 boot failure: "IO-APIC + timer doesn't work" References : http://lkml.org/lkml/2006/12/16/101 Submitter : Tobias Diedrich <[EMAIL PROTECTED]> Caused-By : Andi Kleen <[EMAIL PROTECTED]> commit b026872601976f666bae77b609dc490d1834bf77 Handled-By : Yinghai Lu <[EMAIL PROTECTED]> "Eric W. Biederman" <[EMAIL PROTECTED]> Status : problem is being debugged Subject: kernel panics on boot (libata-sff) References : http://lkml.org/lkml/2006/12/3/99 http://lkml.org/lkml/2006/12/14/153 http://lkml.org/lkml/2006/12/24/33 Submitter : Alessandro Suardi <[EMAIL PROTECTED]> Caused-By : Alan Cox <[EMAIL PROTECTED]> commit 368c73d4f689dae0807d0a2aa74c61fd2b9b075f Handled-By : Alan Cox <[EMAIL PROTECTED]> Steve Wise <[EMAIL PROTECTED]> Alessandro Suardi <[EMAIL PROTECTED]> Status : people are working on a fix - To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
2.6.20-rc2: known regressions with patches available
This email lists some known regressions in 2.6.20-rc2 compared to 2.6.19 with patches available If you find your name in the Cc header, you are either submitter of one of the bugs, maintainer of an affectected subsystem or driver, a patch of you caused a breakage or I'm considering you in any other way possibly involved with one or more of these issues. Due to the huge amount of recipients, please trim the Cc when answering. Subject: selinux networking: sleeping function called from invalid context References : http://lkml.org/lkml/2006/12/24/78 Submitter : "Adam J. Richter" <[EMAIL PROTECTED]> Caused-By : Paul Moore <[EMAIL PROTECTED]> Handled-By : Parag Warudkar <[EMAIL PROTECTED]> Patch : http://lkml.org/lkml/2006/12/24/89 Status : patch available Subject: KVM Oops References : http://lkml.org/lkml/2006/12/27/171 Submitter : Parag Warudkar <[EMAIL PROTECTED]> Handled-By : Avi Kivity <[EMAIL PROTECTED]> Status : patch available Subject: drivers/macintosh/via-pmu-backlight.c compilation broken References : http://lkml.org/lkml/2006/12/24/49 Submitter : Andreas Schwab <[EMAIL PROTECTED]> Caused-By : Yu Luming <[EMAIL PROTECTED]> commit 519ab5f2be65b72cf12ae99c89752bbe79b44df6 Handled-By : Andreas Schwab <[EMAIL PROTECTED]> Patch : http://lkml.org/lkml/2006/12/24/49 Status : patch available Subject: NULL dereference in ieee80211softmac_get_network_by_bssid_locked ieee80211softmac_wx.c typo: mutex_lock -> mutex_unlock References : http://bugzilla.kernel.org/show_bug.cgi?id=7657 http://lkml.org/lkml/2006/12/16/141 http://lkml.org/lkml/2006/12/24/43 Submitter : Michael Bommarito <[EMAIL PROTECTED]> Ben Collins <[EMAIL PROTECTED]> Martin Pitt <[EMAIL PROTECTED]> Handled-By : Michael Bommarito <[EMAIL PROTECTED]> Larry Finger <[EMAIL PROTECTED] Patch : http://bugzilla.kernel.org/show_bug.cgi?id=7657 Status : patches available Subject: BUG at drivers/scsi/scsi_lib.c:1118 by "pktsetup dvd /dev/sr0" References : http://bugzilla.kernel.org/show_bug.cgi?id=7667 Submitter : Laurent Riffard <[EMAIL PROTECTED]> Caused-By : Christoph Hellwig <[EMAIL PROTECTED]> commit 3b00315799d78f76531b71435fbc2643cd71ae4c Handled-By : Christoph Hellwig <[EMAIL PROTECTED]> Patch : http://bugzilla.kernel.org/show_bug.cgi?id=7667 Status : patch available - To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
RE: one vs. two drivers for an iWARP-capable Ethernet NIC
Re-sending as a plain text to reach netdev. Sorry for the extra traffic, please ignore the earlier html version of this e-mail... Jeff/Roland/all, What is the preferred submission driver model for an iWARP-capable Ethernet NIC - two separate drivers (Ethernet and OpenFabrics) that interact with each other, or a single driver that supports both OpenFabrics and Ethernet interfaces? For our hardware we can go either way, although in case of separate drivers the interface between the two would get somewhat artificial... Thanks, Leonid - To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Generic PHY lib vs. locking
> Great! At last glance, only gianfar, fs_enet, and au1000_eth. There > are one or two others that haven't gone in, yet. My hope is that > your changes will not require any changes to the drivers, but I'll > leave that to your discretion. Unfortunately, it will probably have an impact on them. I'll have a look. Ben. - To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [take29 0/8] kevent: Generic event handling mechanism.
* Evgeniy Polyakov <[EMAIL PROTECTED]> wrote: > Generic event handling mechanism. it would be /very/ helpful to state against which kernel tree the patch-queue is. It does not apply to 2.6.20-rc1 nor to -rc2 nor to 2.6.19. At which point i gave up ... Ingo - To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [take29 0/8] kevent: Generic event handling mechanism.
* Evgeniy Polyakov <[EMAIL PROTECTED]> wrote: > Generic event handling mechanism. i see it covers alot of event sources, but i cannot see block IO notifications. Am i missing something? Ingo - To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] igmp: spin_lock_bh in timer (Re: BUG: soft lockup detected on CPU#0!)
On Wed, Dec 27, 2006 at 08:16:10AM -0800, Ben Greear wrote: ... > The system hangs and does not recover (well, a few processes > continue on the other processor for a few minutes before they > too deadlock...) > > I am guessing this problem has been around for a while, but it > is only triggered when interfaces are created, and probably only > when UDP traffic is already running heavily on the system. Most > systems w/out virtual devices will not trigger this sort of > race. Considering your contribution into kernel, many people here would like to help, I hope, but these informations are probably not enough. Maybe some more logs & dmesg? If it deadlocks anyway, maybe adding panic() after dump_stack() could tell something. Jarek P. - To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] tipc: checking returns and Re: Possible Circular Locking in TIPC
On 22-12-2006 15:28, Eric Sesterhenn wrote: > hi, > > while running my usual stuff on 2.6.20-rc1-git5, sfuzz > (http://www.digitaldwarf.be/products/sfuzz.c) > did the following, to produce the lockdep warning below: ... > Here is the stacktrace: > > [ 313.239556] === > [ 313.239718] [ INFO: possible circular locking dependency detected ] > [ 313.239795] 2.6.20-rc1-git5 #26 > [ 313.239858] --- > [ 313.239929] sfuzz/4133 is trying to acquire lock: > [ 313.239996] (ref_table_lock){-+..}, at: [] > tipc_ref_discard+0x29/0xe0 > [ 313.241101] > [ 313.241105] but task is already holding lock: > [ 313.241225] (&table[i].lock){-+..}, at: [] > tipc_deleteport+0x40/0x1a0 > [ 313.241524] > [ 313.241528] which lock already depends on the new lock. > [ 313.241535] > [ 313.241709] > [ 313.241713] the existing dependency chain (in reverse order) is: > [ 313.241837] > [ 313.241841] -> #1 (&table[i].lock){-+..}: > [ 313.242096][] __lock_acquire+0xd05/0xde0 > [ 313.242562][] lock_acquire+0x69/0xa0 > [ 313.243013][] _spin_lock_bh+0x40/0x60 > [ 313.243476][] tipc_ref_acquire+0x6b/0xe0 > [ 313.244115][] tipc_createport_raw+0x33/0x260 > [ 313.244562][] tipc_createport+0x41/0x120 > [ 313.245007][] tipc_subscr_start+0xcc/0x120 > [ 313.245458][] process_signal_queue+0x56/0xa0 > [ 313.245906][] tasklet_action+0x38/0x80 > [ 313.246361][] __do_softirq+0x5b/0xc0 > [ 313.246817][] do_softirq+0x88/0xe0 > [ 313.247450][] 0x > [ 313.247894] > [ 313.247898] -> #0 (ref_table_lock){-+..}: > [ 313.248155][] __lock_acquire+0xa55/0xde0 > [ 313.248601][] lock_acquire+0x69/0xa0 > [ 313.249037][] _write_lock_bh+0x40/0x60 > [ 313.249486][] tipc_ref_discard+0x29/0xe0 > [ 313.249922][] tipc_deleteport+0x5a/0x1a0 > [ 313.250543][] tipc_create+0x58/0x160 > [ 313.250980][] __sock_create+0x112/0x280 > [ 313.251422][] sock_create+0x1a/0x20 > [ 313.251863][] sys_socket+0x1b/0x40 > [ 313.252301][] sys_socketcall+0x92/0x260 > [ 313.252738][] syscall_call+0x7/0xb > [ 313.253175][] 0x > [ 313.253778] > [ 313.253782] other info that might help us debug this: > [ 313.253790] > [ 313.253956] 1 lock held by sfuzz/4133: > [ 313.254019] #0: (&table[i].lock){-+..}, at: [] > tipc_deleteport+0x40/0x1a0 > [ 313.254346] > [ 313.254351] stack backtrace: > [ 313.254470] [] show_trace_log_lvl+0x1a/0x40 > [ 313.254594] [] show_trace+0x12/0x20 > [ 313.254711] [] dump_stack+0x19/0x20 > [ 313.254829] [] print_circular_bug_tail+0x6f/0x80 > [ 313.254952] [] __lock_acquire+0xa55/0xde0 > [ 313.255070] [] lock_acquire+0x69/0xa0 > [ 313.255188] [] _write_lock_bh+0x40/0x60 > [ 313.255315] [] tipc_ref_discard+0x29/0xe0 > [ 313.255435] [] tipc_deleteport+0x5a/0x1a0 > [ 313.255565] [] tipc_create+0x58/0x160 > [ 313.255687] [] __sock_create+0x112/0x280 > [ 313.255811] [] sock_create+0x1a/0x20 > [ 313.255942] [] sys_socket+0x1b/0x40 > [ 313.256059] [] sys_socketcall+0x92/0x260 > [ 313.256179] [] syscall_call+0x7/0xb > [ 313.256300] === > > Greetings, Eric Hello, Maybe I misinterpret this but, IMHO lockdep complains about locks acquired in different order: tipc_ref_acquire() gets ref_table_lock and then tipc_ret_table.entries[index]->lock, but tipc_deleteport() inversely (with: tipc_port_lock() and tipc_ref_discard()). I hope maintainers will decide the correct order. Btw. there is a problem with tipc_ref_discard(): it should be called with tipc_port_lock, but how to discard a ref if this lock can't be acquired? Is it OK to call it without the lock like in subscr_named_msg_event()? Btw. #2: during this checking I've found two places where return values from tipc_ref_lock() and tipc_port_lock() are not checked, so I attach a patch proposal for this (compiled but not tested): Regards, Jarek P. --- [PATCH] tipc: checking returns from locking functions Checking of return values from tipc_ref_lock() and tipc_port_lock() added in 2 places. Signed-off-by: Jarek Poplawski <[EMAIL PROTECTED]> --- diff -Nurp linux-2.6.20-rc2-/net/tipc/port.c linux-2.6.20-rc2/net/tipc/port.c --- linux-2.6.20-rc2-/net/tipc/port.c 2006-11-29 22:57:37.0 +0100 +++ linux-2.6.20-rc2/net/tipc/port.c2006-12-28 11:05:17.0 +0100 @@ -238,7 +238,12 @@ u32 tipc_createport_raw(void *usr_handle return 0; } - tipc_port_lock(ref); + if (!tipc_port_lock(ref)) { + tipc_ref_discard(ref); + warn("Port creation failed, reference table invalid\n"); + kfree(p_ptr); + return 0; + } p_ptr->publ.ref = ref; msg = &p_ptr->publ.phdr; msg_init(msg, DATA_LOW, TIPC_NAMED_M
[patch] qeth: fix uaccess handling and get rid of unused variable
From: Heiko Carstens <[EMAIL PROTECTED]> [patch] qeth: fix uaccess handling and get rid of unused variable drivers/s390/net/qeth_main.c: In function `qeth_process_inbound_buffer': drivers/s390/net/qeth_main.c:2563: warning: unused variable `vlan_addr' include/asm/uaccess.h: In function `qeth_do_ioctl': drivers/s390/net/qeth_main.c:4847: warning: ignoring return value of `copy_to_user' drivers/s390/net/qeth_main.c:4849: warning: ignoring return value of `copy_to_user' drivers/s390/net/qeth_main.c:4996: warning: ignoring return value of `copy_to_user' Cc: Frank Pavlic <[EMAIL PROTECTED]> Signed-off-by: Heiko Carstens <[EMAIL PROTECTED]> --- drivers/s390/net/qeth_main.c | 13 - 1 files changed, 8 insertions(+), 5 deletions(-) Index: linux-2.6/drivers/s390/net/qeth_main.c === --- linux-2.6.orig/drivers/s390/net/qeth_main.c +++ linux-2.6/drivers/s390/net/qeth_main.c @@ -2560,7 +2560,6 @@ qeth_process_inbound_buffer(struct qeth_ int offset; int rxrc; __u16 vlan_tag = 0; - __u16 *vlan_addr; /* get first element of current buffer */ element = (struct qdio_buffer_element *)&buf->buffer->element[0]; @@ -4844,9 +4843,11 @@ qeth_arp_query(struct qeth_card *card, c "(0x%x/%d)\n", QETH_CARD_IFNAME(card), qeth_arp_get_error_cause(&rc), tmp, tmp); - copy_to_user(udata, qinfo.udata, 4); + if (copy_to_user(udata, qinfo.udata, 4)) + rc = -EFAULT; } else { - copy_to_user(udata, qinfo.udata, qinfo.udata_len); + if (copy_to_user(udata, qinfo.udata, qinfo.udata_len)) + rc = -EFAULT; } kfree(qinfo.udata); return rc; @@ -4992,8 +4993,10 @@ qeth_snmp_command(struct qeth_card *card if (rc) PRINT_WARN("SNMP command failed on %s: (0x%x)\n", QETH_CARD_IFNAME(card), rc); -else - copy_to_user(udata, qinfo.udata, qinfo.udata_len); + else { + if (copy_to_user(udata, qinfo.udata, qinfo.udata_len)) + rc = -EFAULT; + } kfree(ureq); kfree(qinfo.udata); - To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [take24 0/6] kevent: Generic event handling mechanism.
On Wed, Dec 27, 2006 at 12:45:50PM -0800, Ulrich Drepper ([EMAIL PROTECTED]) wrote: > Evgeniy Polyakov wrote: > > Why do we want to inject _ready_ event, when it is possible to mark > > event as ready and wakeup thread parked in syscall? > > Going back to this old one: > > How do you want to mark an event ready if you don't want to introduce > yet another layer of data structures? The event notification happens > through entries in the ring buffer. Userlevel code should never add > anything to the ring buffer directly, this would mean huge > synchronization problems. Yes, one could add additional data structures > accompanying the ring buffer which can specify userlevel-generated > events. But this is a) clumsy and b) a pain to use when the same ring > buffer is used in multiple threads (you'd have to have another shared > memory segment). > > It's much cleaner if the userlevel code can get the kernel to inject a > userlevel-generated event. This is the equivalent of userlevel code > generating a signal with kill(). Existing possibility to mark event as ready works following way: event is queued into storage queue (socket, inode or some other queue), when readiness condition becomes true, event is queued into ready queue (although it is still in the storage queueu). It happens completely asynchronosu to _any_ kind of userspace processing. When userspace calls apropriate syscall, event is being copied into ring buffer. Thus userspace readiness will just mark event as ready, i.e. it queues event into ready queue, so later usersapce will callsyscall to actually get the event. When one thread is parked in the syscall and there are _no_ events which should be marked as ready (for example only sockets are there, and it is not a good idea to wakeup the whole socket processing state machine), then there is no possibility to receive such event (although it is possible to interrupt and break syscall). So, according to injecting ready events, it can be done - just an addition of special flag which will force kevent core to move event into ready queue immediately. In this case userspace can event prepare a needed event (like signal event) and deliver it to process, so it will think (only from kevent point of view) that real signal has been arrived. I will also add special type of events - userspace events - which will not have empty callbacks, which will be intended to use for user-defined way (i.e. for inter thread communications). > -- > ➧ Ulrich Drepper ➧ Red Hat, Inc. ➧ 444 Castro St ➧ Mountain View, CA ❖ > -- Evgeniy Polyakov - To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html