[RFC][PATCH][IPSEC][2/3] IPv6 over IPv4 IPsec tunnel

2006-12-28 Thread Kazunori MIYAZAWA
This is the patch to support IPv6 over IPv4 IPsec

Signed-off-by: Miika Komu <[EMAIL PROTECTED]>
Signed-off-by: Diego Beltrami <[EMAIL PROTECTED]>
Signed-off-by: Kazunori Miyazawa <[EMAIL PROTECTED]>


diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c
index e23c21d..e54c549 100644
--- a/net/ipv4/xfrm4_mode_tunnel.c
+++ b/net/ipv4/xfrm4_mode_tunnel.c
@@ -23,6 +23,12 @@ static inline void ipip_ecn_decapsulate(
IP_ECN_set_ce(inner_iph);
 }
 
+static inline void ipip6_ecn_decapsulate(struct iphdr *iph, struct sk_buff 
*skb)
+{
+   if (INET_ECN_is_ce(iph->tos))
+   IP6_ECN_set_ce(skb->nh.ipv6h);
+}
+
 /* Add encapsulation header.
  *
  * The top IP header will be constructed per RFC 2401.  The following fields
@@ -36,6 +42,7 @@ static inline void ipip_ecn_decapsulate(
 static int xfrm4_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
 {
struct dst_entry *dst = skb->dst;
+   struct xfrm_dst *xdst = (struct xfrm_dst*)dst;
struct iphdr *iph, *top_iph;
int flags;
 
@@ -48,15 +55,27 @@ static int xfrm4_tunnel_output(struct xf
top_iph->ihl = 5;
top_iph->version = 4;
 
+   flags = x->props.flags;
+
/* DS disclosed */
-   top_iph->tos = INET_ECN_encapsulate(iph->tos, iph->tos);
+   if (xdst->route->ops->family == AF_INET) {
+   top_iph->protocol = IPPROTO_IPIP;
+   top_iph->tos = INET_ECN_encapsulate(iph->tos, iph->tos);
+   top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) ?
+   0 : (iph->frag_off & htons(IP_DF));
+   }
+#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+   else {
+   struct ipv6hdr *ipv6h = (struct ipv6hdr*)iph;
+   top_iph->protocol = IPPROTO_IPV6;
+   top_iph->tos = INET_ECN_encapsulate(iph->tos, 
ipv6_get_dsfield(ipv6h));
+   top_iph->frag_off = 0;
+   }
+#endif
 
-   flags = x->props.flags;
if (flags & XFRM_STATE_NOECN)
IP_ECN_clear(top_iph);
 
-   top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) ?
-   0 : (iph->frag_off & htons(IP_DF));
if (!top_iph->frag_off)
__ip_select_ident(top_iph, dst->child, 0);
 
@@ -64,7 +83,6 @@ static int xfrm4_tunnel_output(struct xf
 
top_iph->saddr = x->props.saddr.a4;
top_iph->daddr = x->id.daddr.a4;
-   top_iph->protocol = IPPROTO_IPIP;
 
memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
return 0;
@@ -75,8 +93,16 @@ static int xfrm4_tunnel_input(struct xfr
struct iphdr *iph = skb->nh.iph;
int err = -EINVAL;
 
-   if (iph->protocol != IPPROTO_IPIP)
-   goto out;
+   switch(iph->protocol){
+   case IPPROTO_IPIP:
+#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+   case IPPROTO_IPV6:
+   break;
+#endif
+   default:
+   goto out;
+   }
+
if (!pskb_may_pull(skb, sizeof(struct iphdr)))
goto out;
 
@@ -84,10 +110,19 @@ static int xfrm4_tunnel_input(struct xfr
(err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
goto out;
 
-   if (x->props.flags & XFRM_STATE_DECAP_DSCP)
-   ipv4_copy_dscp(iph, skb->h.ipiph);
-   if (!(x->props.flags & XFRM_STATE_NOECN))
-   ipip_ecn_decapsulate(skb);
+   if (iph->protocol == IPPROTO_IPIP) {
+   if (x->props.flags & XFRM_STATE_DECAP_DSCP)
+   ipv4_copy_dscp(iph, skb->h.ipiph);
+   if (!(x->props.flags & XFRM_STATE_NOECN))
+   ipip_ecn_decapsulate(skb);
+   }
+#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+   else {
+   if (!(x->props.flags & XFRM_STATE_NOECN))
+   ipip6_ecn_decapsulate(iph, skb);
+   skb->protocol = htons(ETH_P_IPV6);
+   }
+#endif
skb->mac.raw = memmove(skb->data - skb->mac_len,
   skb->mac.raw, skb->mac_len);
skb->nh.raw = skb->data;
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 8dffd4d..a1ac537 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -131,13 +131,11 @@ __xfrm6_bundle_create(struct xfrm_policy
struct dst_entry *dst, *dst_prev;
struct rt6_info *rt0 = (struct rt6_info*)(*dst_p);
struct rt6_info *rt  = rt0;
-   struct in6_addr *remote = &fl->fl6_dst;
-   struct in6_addr *local  = &fl->fl6_src;
struct flowi fl_tunnel = {
.nl_u = {
.ip6_u = {
-   .saddr = *local,
-   .daddr = *remote
+   .saddr = fl->fl6_src,
+   .daddr = fl->fl6_dst,
}
}
};
@@ -153,7 +151,6 @@ __xfrm6_bundle_create(struct xfrm_policy

[RFC][PATCH][IPSEC][1/3]exporting xfrm_state_afinfo

2006-12-28 Thread Kazunori MIYAZAWA
This patch exports xfrm_state_afinfo.

Signed-off-by: Miika Komu <[EMAIL PROTECTED]>
Signed-off-by: Diego Beltrami <[EMAIL PROTECTED]>
Signed-off-by: Kazunori Miyazawa <[EMAIL PROTECTED]>

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index e476541..bf91d63 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -252,10 +252,13 @@ struct xfrm_state_afinfo {
xfrm_address_t *daddr, 
xfrm_address_t *saddr);
int (*tmpl_sort)(struct xfrm_tmpl **dst, struct 
xfrm_tmpl **src, int n);
int (*state_sort)(struct xfrm_state **dst, struct 
xfrm_state **src, int n);
+   int (*output)(struct sk_buff *skb);
 };
 
 extern int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo);
 extern int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo);
+extern struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family);
+extern void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo);
 
 extern void xfrm_state_delete_tunnel(struct xfrm_state *x);
 
diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c
index 3cc3df0..93e2c06 100644
--- a/net/ipv4/xfrm4_state.c
+++ b/net/ipv4/xfrm4_state.c
@@ -51,6 +51,7 @@ static struct xfrm_state_afinfo xfrm4_st
.family = AF_INET,
.init_flags = xfrm4_init_flags,
.init_tempsel   = __xfrm4_init_tempsel,
+   .output = xfrm4_output,
 };
 
 void __init xfrm4_state_init(void)
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index c260ea1..bad0ad9 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -166,3 +166,5 @@ int xfrm6_output(struct sk_buff *skb)
return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb, NULL, skb->dst->dev,
   xfrm6_output_finish);
 }
+
+EXPORT_SYMBOL(xfrm6_output);
diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c
index 9ddaa9d..60ad5f0 100644
--- a/net/ipv6/xfrm6_state.c
+++ b/net/ipv6/xfrm6_state.c
@@ -171,6 +171,7 @@ static struct xfrm_state_afinfo xfrm6_st
.init_tempsel   = __xfrm6_init_tempsel,
.tmpl_sort  = __xfrm6_tmpl_sort,
.state_sort = __xfrm6_state_sort,
+   .output = xfrm6_output,
 };
 
 void __init xfrm6_state_init(void)
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index fdb08d9..24f7bfd 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -183,9 +183,6 @@ static DEFINE_SPINLOCK(xfrm_state_gc_loc
 
 int __xfrm_state_delete(struct xfrm_state *x);
 
-static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family);
-static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo);
-
 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy 
*pol);
 void km_state_expired(struct xfrm_state *x, int hard, u32 pid);
 
@@ -1458,7 +1455,7 @@ int xfrm_state_unregister_afinfo(struct 
 }
 EXPORT_SYMBOL(xfrm_state_unregister_afinfo);
 
-static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family)
+struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family)
 {
struct xfrm_state_afinfo *afinfo;
if (unlikely(family >= NPROTO))
@@ -1470,11 +1467,14 @@ static struct xfrm_state_afinfo *xfrm_st
return afinfo;
 }
 
-static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo)
+void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo)
 {
read_unlock(&xfrm_state_afinfo_lock);
 }
 
+EXPORT_SYMBOL(xfrm_state_get_afinfo);
+EXPORT_SYMBOL(xfrm_state_put_afinfo);
+
 /* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */
 void xfrm_state_delete_tunnel(struct xfrm_state *x)
 {
-
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RFC][PATCH][IPSEC][3/3] IPv4 over IPv6 IPsec tunnel

2006-12-28 Thread Kazunori MIYAZAWA
This is the patch to support IPv4 over IPv6 IPsec.

Signed-off-by: Miika Komu <[EMAIL PROTECTED]>
Signed-off-by: Diego Beltrami <[EMAIL PROTECTED]>
Signed-off-by: Kazunori Miyazawa <[EMAIL PROTECTED]>

diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index fb9f69c..011136a 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -72,13 +72,11 @@ __xfrm4_bundle_create(struct xfrm_policy
struct dst_entry *dst, *dst_prev;
struct rtable *rt0 = (struct rtable*)(*dst_p);
struct rtable *rt = rt0;
-   __be32 remote = fl->fl4_dst;
-   __be32 local  = fl->fl4_src;
struct flowi fl_tunnel = {
.nl_u = {
.ip4_u = {
-   .saddr = local,
-   .daddr = remote,
+   .saddr = fl->fl4_src,
+   .daddr = fl->fl4_dst,
.tos = fl->fl4_tos
}
}
@@ -94,7 +92,6 @@ __xfrm4_bundle_create(struct xfrm_policy
for (i = 0; i < nx; i++) {
struct dst_entry *dst1 = dst_alloc(&xfrm4_dst_ops);
struct xfrm_dst *xdst;
-   int tunnel = 0;
 
if (unlikely(dst1 == NULL)) {
err = -ENOBUFS;
@@ -116,19 +113,28 @@ __xfrm4_bundle_create(struct xfrm_policy
 
dst1->next = dst_prev;
dst_prev = dst1;
-   if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) {
-   remote = xfrm[i]->id.daddr.a4;
-   local  = xfrm[i]->props.saddr.a4;
-   tunnel = 1;
-   }
+
header_len += xfrm[i]->props.header_len;
trailer_len += xfrm[i]->props.trailer_len;
 
-   if (tunnel) {
-   fl_tunnel.fl4_src = local;
-   fl_tunnel.fl4_dst = remote;
+   if (xfrm[i]->props.mode == XFRM_MODE_TUNNEL) {
+   unsigned short encap_family = xfrm[i]->props.family;
+   switch(encap_family) {
+   case AF_INET:
+   fl_tunnel.fl4_dst = xfrm[i]->id.daddr.a4;
+   fl_tunnel.fl4_src = xfrm[i]->props.saddr.a4;
+   break;
+#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+   case AF_INET6:
+   ipv6_addr_copy(&fl_tunnel.fl6_dst, (struct 
in6_addr*)&xfrm[i]->id.daddr.a6);
+   ipv6_addr_copy(&fl_tunnel.fl6_src, (struct 
in6_addr*)&xfrm[i]->props.saddr.a6);
+   break;
+#endif
+   default:
+   BUG_ON(1);
+   }
err = xfrm_dst_lookup((struct xfrm_dst **)&rt,
- &fl_tunnel, AF_INET);
+ &fl_tunnel, encap_family);
if (err)
goto error;
} else
@@ -145,6 +151,7 @@ __xfrm4_bundle_create(struct xfrm_policy
i = 0;
for (; dst_prev != &rt->u.dst; dst_prev = dst_prev->child) {
struct xfrm_dst *x = (struct xfrm_dst*)dst_prev;
+   struct xfrm_state_afinfo *afinfo;
x->u.rt.fl = *fl;
 
dst_prev->xfrm = xfrm[i++];
@@ -162,8 +169,17 @@ __xfrm4_bundle_create(struct xfrm_policy
/* Copy neighbout for reachability confirmation */
dst_prev->neighbour = neigh_clone(rt->u.dst.neighbour);
dst_prev->input = rt->u.dst.input;
-   dst_prev->output= xfrm4_output;
-   if (rt->peer)
+   /* XXX: When IPv6 module can be unloaded, we should manage 
reference
+* to xfrm6_output in afinfo->output. Miyazawa
+* */
+   afinfo = xfrm_state_get_afinfo(dst_prev->xfrm->props.family);
+   if (!afinfo) {
+   dst = *dst_p;
+   goto error;
+   }
+   dst_prev->output = afinfo->output;
+   xfrm_state_put_afinfo(afinfo);
+   if (dst_prev->xfrm->props.family == AF_INET && rt->peer)
atomic_inc(&rt->peer->refcnt);
x->u.rt.peer = rt->peer;
/* Sheit... I remember I did this right. Apparently,
@@ -274,7 +290,7 @@ static void xfrm4_dst_destroy(struct dst
 
if (likely(xdst->u.rt.idev))
in_dev_put(xdst->u.rt.idev);
-   if (likely(xdst->u.rt.peer))
+   if (dst->xfrm->props.family == AF_INET && likely(xdst->u.rt.peer))
inet_putpeer(xdst->u.rt.peer);
xfrm_dst_destroy(xdst);
 }
diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c
index 5e7d8a7..55cddcb 100644
---

[RFC][PATCH][IPSEC][0/3]inter address family ipsec tunnel

2006-12-28 Thread Kazunori MIYAZAWA
Hello,

I fixed the compile issue when we configure IPv6 as a module.

[1/3] exporting xfrm_state_afinfo
[2/3] supporting IPv6 over IPv4 IPsec
[3/3] supporting IPv4 over IPv6 IPsec

These patches can be applied to linux-2.6.20-rc2.

Thank you,

--
Kazunori Miyazawa
-
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: netif_poll_enable() & barrier

2006-12-28 Thread Benjamin Herrenschmidt
On Thu, 2006-12-28 at 21:09 -0800, David Miller wrote:
> From: Benjamin Herrenschmidt <[EMAIL PROTECTED]>
> Date: Wed, 20 Dec 2006 14:44:12 +1100
> 
> > I stumbled accross what might be a bug on out of order architecture:
> > 
> > netif_poll_enable() only does a clear_bit(). However,
> > netif_poll_disable/enable pairs are often used as simili-spinlocks.
> > 
> > (netif_poll_enable() has pretty much spin_lock semantics except that it
> > schedules instead of looping).
> > 
> > Thus, shouldn't netif_poll_disable() do an smp_wmb(); before clearing
> > the bit to make sure that any stores done within the poll-disabled
> > section are properly visible to the rest of the system before clearing
> > the bit ?
> 
> Although I couldn't find a problematic case with any current
> in-tree drivers, it's better to be safe than sorry :-)
> 
> So I'll add a smp_mb__before_clear_bit() to netif_poll_enable() :)

Heh, thanks ! :-)

I haven't seen any problematic case neither, though if there was one, it
would result in weird problems very hard to track down, so as you said,
better safe than sorry (unless you see a flaw in my reasoning).

Cheers,

Oh, and happy new year too ! :-)

Ben.

-
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: netif_poll_enable() & barrier

2006-12-28 Thread David Miller
From: Benjamin Herrenschmidt <[EMAIL PROTECTED]>
Date: Wed, 20 Dec 2006 14:44:12 +1100

> I stumbled accross what might be a bug on out of order architecture:
> 
> netif_poll_enable() only does a clear_bit(). However,
> netif_poll_disable/enable pairs are often used as simili-spinlocks.
> 
> (netif_poll_enable() has pretty much spin_lock semantics except that it
> schedules instead of looping).
> 
> Thus, shouldn't netif_poll_disable() do an smp_wmb(); before clearing
> the bit to make sure that any stores done within the poll-disabled
> section are properly visible to the rest of the system before clearing
> the bit ?

Although I couldn't find a problematic case with any current
in-tree drivers, it's better to be safe than sorry :-)

So I'll add a smp_mb__before_clear_bit() to netif_poll_enable() :)
-
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [patch sungem] improved locking

2006-12-28 Thread David Miller
From: Benjamin Herrenschmidt <[EMAIL PROTECTED]>
Date: Wed, 13 Dec 2006 15:07:24 +1100

> tg3 says
> 
> tg3: eth0: Link is up at 1000 Mbps, full duplex.
> tg3: eth0: Flow control is on for TX and on for RX.
> 
> but sungem says
> 
> eth0: Link is up at 1000 Mbps, full-duplex.
> eth0: Pause is disabled
> 
> Hrm... I suppose I need to dig more. No time to do that today though.

I was about to try and debug this, and noticed immediately that I
didn't recognize any of the code.

Could you look into this, you rewrote all of this stuff and this
looks like a regression added, because I know this pause stuff
used to work perfectly when I wrote the original GEM driver. :-)
-
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Network card IRQ balancing with Intel 5000 series chipsets

2006-12-28 Thread Krzysztof Oledzki



On Wed, 27 Dec 2006, jamal wrote:


On Wed, 2006-27-12 at 09:09 +0200, Robert Iakobashvili wrote:



My scenario is treatment of RTP packets in kernel space with a single network
card (both Rx and Tx). The default of the Intel 5000 series chipset is
affinity of each
network card to a certain CPU. Currently, neither with irqbalance nor
with kernel
irq-balancing (MSI and io-apic attempted) I do not find a way to
balance that irq.


In the near future, when the NIC vendors wake up[1] because CPU vendors
- including big bad Intel -  are going to be putting out a large number
of hardware threads, you should be able to do more clever things with
such a setup. At the moment, just tie it to a single CPU and have your
other processes that are related running/bound on the other cores so you
can utilize them. OTOH, you say you are only using 30% of the one CPU,
so it may not be a big deal to tie your single nic to on cpu.


Anyway, it seems that with more advanced firewalls/routers kernel spends 
most of a time in IPSec/crypto code, netfilter conntrack and iptables 
rules/extensions, routing lookups, etc and not in hardware IRQ handler. 
So, it would be nice if this part coulde done by all CPUs.


Best regards,


Krzysztof Olędzki

Re: Generic PHY lib vs. locking

2006-12-28 Thread Andy Fleming


On Dec 21, 2006, at 22:07, Benjamin Herrenschmidt wrote:


Hi Andy !

I've been looking at porting various drivers (EMAC, sungem,
spider_net, ...) to the generic PHY stuff. However, I have one
significant problem here.

One of the things I've been trying to do lately with EMAC and that I
plan to do with others, is to have the PHY polling entirely operate at
task level (along with other "slow" parts of the network driver like
timeout handling etc...).

This makes a lot of locking easier, allowing to use mutexes instead of
locks (less latencies), allowing to sleep waiting for MDIO  
operations to

complete, etc... it's generall all benefit.

It's especially useful in a case like EMAC where several EMACs can  
share

MDIO lines, so we need exclusive access, and that might involve even a
second layer of exclusion for access to the RGMII or ZMII layer.  
mutexes

are really the best approach for that sort of non-speed critical
activities.



This sounds good to me.  It was an eventual goal, but I wasn't  
familiar enough with the non-spin-lock locking rules to confidently  
implement it.





However, the generic PHY layer defeats that by it's heavy usage of
spin_lock_bh and timer.

One solution would be to change it to use a mutex instead of a lock as
well, though that would change the requirements of where phy_start/ 
stop

can be called, and use a delayed work queue instead of a timer.

I could do all of these changes provided everybody agrees, though I
suppose all existing network drivers using that PHY layer might  
need to

be adapted. How many do we have nowadays ?



Great!  At last glance, only gianfar, fs_enet, and au1000_eth.  There  
are one or two others that haven't gone in, yet.  My hope is that  
your changes will not require any changes to the drivers, but I'll  
leave that to your discretion.





Also, I see your comments about not calling flush_scheduled_work() in
phy_stop() because of rtnl_lock()... What is the problem here ?
dev_close() ?



Yup.  However, I think a reasonable solution was proposed.  The  
problem is that flush_scheduled_work() actually does all the  
scheduled work.  And if it happens with rtnl_lock() held, and some of  
the scheduled work grabs rtnl_lock(), we deadlock.  But another  
function was proposed, and I believe committed to the tree, which  
only deletes or does the work you own, and therefore lets you avoid  
that problem (assuming you know that your code doesn't grab such  
locks), and also lets you free memory.


Andy
-
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


2.6.20-rc2: known unfixed regressions

2006-12-28 Thread Adrian Bunk
This email lists some known regressions in 2.6.20-rc2 compared to 2.6.19.

If you find your name in the Cc header, you are either submitter of one
of the bugs, maintainer of an affectected subsystem or driver, a patch
of you caused a breakage or I'm considering you in any other way possibly
involved with one or more of these issues.

Due to the huge amount of recipients, please trim the Cc when answering.


Subject: PCI_MULTITHREAD_PROBE breakage
References : http://lkml.org/lkml/2006/12/12/21
Submitter  : Ben Castricum <[EMAIL PROTECTED]>
Caused-By  : Greg Kroah-Hartman <[EMAIL PROTECTED]>
 commit 009af1ff78bfc30b9a27807dd0207fc32848218a
Status : known to break many drivers; revert?


Subject: Acer Extensa 3002 WLMi: 'shutdown -h now' reboots the system
References : http://lkml.org/lkml/2006/12/25/40
Submitter  : Berthold Cogel <[EMAIL PROTECTED]>
Status : unknown


Subject: i386: Oops in __find_get_block()
References : http://lkml.org/lkml/2006/12/16/138
Submitter  : Ben Collins <[EMAIL PROTECTED]>
 Daniel Holbach <[EMAIL PROTECTED]>
Status : unknown


Subject: ftp: get or put stops during file-transfer
References : http://lkml.org/lkml/2006/12/16/174
Submitter  : Komuro <[EMAIL PROTECTED]>
Status : unknown


Subject: forcedeth.c 0.59: problem with sideband managment
References : http://bugzilla.kernel.org/show_bug.cgi?id=7684
Submitter  : Michael Reske <[EMAIL PROTECTED]>
Handled-By : Ayaz Abdulla <[EMAIL PROTECTED]>
Status : problem is being debugged


Subject: x86_64 boot failure: "IO-APIC + timer doesn't work"
References : http://lkml.org/lkml/2006/12/16/101
Submitter  : Tobias Diedrich <[EMAIL PROTECTED]>
Caused-By  : Andi Kleen <[EMAIL PROTECTED]>
 commit b026872601976f666bae77b609dc490d1834bf77
Handled-By : Yinghai Lu <[EMAIL PROTECTED]>
 "Eric W. Biederman" <[EMAIL PROTECTED]>
Status : problem is being debugged


Subject: kernel panics on boot (libata-sff)
References : http://lkml.org/lkml/2006/12/3/99
 http://lkml.org/lkml/2006/12/14/153
 http://lkml.org/lkml/2006/12/24/33
Submitter  : Alessandro Suardi <[EMAIL PROTECTED]>
Caused-By  : Alan Cox <[EMAIL PROTECTED]>
 commit 368c73d4f689dae0807d0a2aa74c61fd2b9b075f
Handled-By : Alan Cox <[EMAIL PROTECTED]>
 Steve Wise <[EMAIL PROTECTED]>
 Alessandro Suardi <[EMAIL PROTECTED]>
Status : people are working on a fix


-
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


2.6.20-rc2: known regressions with patches available

2006-12-28 Thread Adrian Bunk
This email lists some known regressions in 2.6.20-rc2 compared to 2.6.19
with patches available

If you find your name in the Cc header, you are either submitter of one
of the bugs, maintainer of an affectected subsystem or driver, a patch
of you caused a breakage or I'm considering you in any other way possibly
involved with one or more of these issues.

Due to the huge amount of recipients, please trim the Cc when answering.


Subject: selinux networking: sleeping function called from invalid context
References : http://lkml.org/lkml/2006/12/24/78
Submitter  : "Adam J. Richter" <[EMAIL PROTECTED]>
Caused-By  : Paul Moore <[EMAIL PROTECTED]>
Handled-By : Parag Warudkar <[EMAIL PROTECTED]>
Patch  : http://lkml.org/lkml/2006/12/24/89
Status : patch available


Subject: KVM Oops
References : http://lkml.org/lkml/2006/12/27/171
Submitter  : Parag Warudkar <[EMAIL PROTECTED]>
Handled-By : Avi Kivity <[EMAIL PROTECTED]>
Status : patch available


Subject: drivers/macintosh/via-pmu-backlight.c compilation broken
References : http://lkml.org/lkml/2006/12/24/49
Submitter  : Andreas Schwab <[EMAIL PROTECTED]>
Caused-By  : Yu Luming <[EMAIL PROTECTED]>
 commit 519ab5f2be65b72cf12ae99c89752bbe79b44df6
Handled-By : Andreas Schwab <[EMAIL PROTECTED]>
Patch  : http://lkml.org/lkml/2006/12/24/49
Status : patch available


Subject: NULL dereference in ieee80211softmac_get_network_by_bssid_locked
 ieee80211softmac_wx.c typo: mutex_lock -> mutex_unlock
References : http://bugzilla.kernel.org/show_bug.cgi?id=7657
 http://lkml.org/lkml/2006/12/16/141
 http://lkml.org/lkml/2006/12/24/43
Submitter  : Michael Bommarito <[EMAIL PROTECTED]>
 Ben Collins <[EMAIL PROTECTED]>
 Martin Pitt <[EMAIL PROTECTED]>
Handled-By : Michael Bommarito <[EMAIL PROTECTED]>
 Larry Finger <[EMAIL PROTECTED]
Patch  : http://bugzilla.kernel.org/show_bug.cgi?id=7657
Status : patches available


Subject: BUG at drivers/scsi/scsi_lib.c:1118 by "pktsetup dvd /dev/sr0"
References : http://bugzilla.kernel.org/show_bug.cgi?id=7667
Submitter  : Laurent Riffard <[EMAIL PROTECTED]>
Caused-By  : Christoph Hellwig <[EMAIL PROTECTED]>
 commit 3b00315799d78f76531b71435fbc2643cd71ae4c
Handled-By : Christoph Hellwig <[EMAIL PROTECTED]>
Patch  : http://bugzilla.kernel.org/show_bug.cgi?id=7667
Status : patch available

-
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: one vs. two drivers for an iWARP-capable Ethernet NIC

2006-12-28 Thread Leonid Grossman
Re-sending as a plain text to reach netdev.
Sorry for the extra traffic, please ignore the earlier html version of
this e-mail...


Jeff/Roland/all,
What is the preferred submission driver model for an iWARP-capable
Ethernet NIC - two separate drivers (Ethernet and OpenFabrics) that
interact with each other, or a single driver that supports both
OpenFabrics and Ethernet interfaces?

For our hardware we can go either way, although in case of separate
drivers the interface between the two would get somewhat artificial...
 
Thanks, Leonid


-
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Generic PHY lib vs. locking

2006-12-28 Thread Benjamin Herrenschmidt

> Great!  At last glance, only gianfar, fs_enet, and au1000_eth.  There  
> are one or two others that haven't gone in, yet.  My hope is that  
> your changes will not require any changes to the drivers, but I'll  
> leave that to your discretion.

Unfortunately, it will probably have an impact on them. I'll have a
look.

Ben.


-
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [take29 0/8] kevent: Generic event handling mechanism.

2006-12-28 Thread Ingo Molnar

* Evgeniy Polyakov <[EMAIL PROTECTED]> wrote:

> Generic event handling mechanism.

it would be /very/ helpful to state against which kernel tree the 
patch-queue is. It does not apply to 2.6.20-rc1 nor to -rc2 nor to 
2.6.19. At which point i gave up ...

Ingo
-
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [take29 0/8] kevent: Generic event handling mechanism.

2006-12-28 Thread Ingo Molnar

* Evgeniy Polyakov <[EMAIL PROTECTED]> wrote:

> Generic event handling mechanism.

i see it covers alot of event sources, but i cannot see block IO 
notifications. Am i missing something?

Ingo
-
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] igmp: spin_lock_bh in timer (Re: BUG: soft lockup detected on CPU#0!)

2006-12-28 Thread Jarek Poplawski
On Wed, Dec 27, 2006 at 08:16:10AM -0800, Ben Greear wrote:
...
> The system hangs and does not recover (well, a few processes
> continue on the other processor for a few minutes before they
> too deadlock...)
> 
> I am guessing this problem has been around for a while, but it
> is only triggered when interfaces are created, and probably only
> when UDP traffic is already running heavily on the system.  Most
> systems w/out virtual devices will not trigger this sort of
> race.

Considering your contribution into kernel,
many people here would like to help, I hope,
but these informations are probably not enough.

Maybe some more logs & dmesg? If it deadlocks
anyway, maybe adding panic() after dump_stack()
could tell something.

Jarek P.
-
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] tipc: checking returns and Re: Possible Circular Locking in TIPC

2006-12-28 Thread Jarek Poplawski
On 22-12-2006 15:28, Eric Sesterhenn wrote:
> hi,
> 
> while running my usual stuff on 2.6.20-rc1-git5, sfuzz 
> (http://www.digitaldwarf.be/products/sfuzz.c)
> did the following, to produce the lockdep warning below:
...
> Here is the stacktrace:
> 
> [  313.239556] ===
> [  313.239718] [ INFO: possible circular locking dependency detected ]
> [  313.239795] 2.6.20-rc1-git5 #26
> [  313.239858] ---
> [  313.239929] sfuzz/4133 is trying to acquire lock:
> [  313.239996]  (ref_table_lock){-+..}, at: [] 
> tipc_ref_discard+0x29/0xe0
> [  313.241101] 
> [  313.241105] but task is already holding lock:
> [  313.241225]  (&table[i].lock){-+..}, at: [] 
> tipc_deleteport+0x40/0x1a0
> [  313.241524] 
> [  313.241528] which lock already depends on the new lock.
> [  313.241535] 
> [  313.241709] 
> [  313.241713] the existing dependency chain (in reverse order) is:
> [  313.241837] 
> [  313.241841] -> #1 (&table[i].lock){-+..}:
> [  313.242096][] __lock_acquire+0xd05/0xde0
> [  313.242562][] lock_acquire+0x69/0xa0
> [  313.243013][] _spin_lock_bh+0x40/0x60
> [  313.243476][] tipc_ref_acquire+0x6b/0xe0
> [  313.244115][] tipc_createport_raw+0x33/0x260
> [  313.244562][] tipc_createport+0x41/0x120
> [  313.245007][] tipc_subscr_start+0xcc/0x120
> [  313.245458][] process_signal_queue+0x56/0xa0
> [  313.245906][] tasklet_action+0x38/0x80
> [  313.246361][] __do_softirq+0x5b/0xc0
> [  313.246817][] do_softirq+0x88/0xe0
> [  313.247450][] 0x
> [  313.247894] 
> [  313.247898] -> #0 (ref_table_lock){-+..}:
> [  313.248155][] __lock_acquire+0xa55/0xde0
> [  313.248601][] lock_acquire+0x69/0xa0
> [  313.249037][] _write_lock_bh+0x40/0x60
> [  313.249486][] tipc_ref_discard+0x29/0xe0
> [  313.249922][] tipc_deleteport+0x5a/0x1a0
> [  313.250543][] tipc_create+0x58/0x160
> [  313.250980][] __sock_create+0x112/0x280
> [  313.251422][] sock_create+0x1a/0x20
> [  313.251863][] sys_socket+0x1b/0x40
> [  313.252301][] sys_socketcall+0x92/0x260
> [  313.252738][] syscall_call+0x7/0xb
> [  313.253175][] 0x
> [  313.253778] 
> [  313.253782] other info that might help us debug this:
> [  313.253790] 
> [  313.253956] 1 lock held by sfuzz/4133:
> [  313.254019]  #0:  (&table[i].lock){-+..}, at: [] 
> tipc_deleteport+0x40/0x1a0
> [  313.254346] 
> [  313.254351] stack backtrace:
> [  313.254470]  [] show_trace_log_lvl+0x1a/0x40
> [  313.254594]  [] show_trace+0x12/0x20
> [  313.254711]  [] dump_stack+0x19/0x20
> [  313.254829]  [] print_circular_bug_tail+0x6f/0x80
> [  313.254952]  [] __lock_acquire+0xa55/0xde0
> [  313.255070]  [] lock_acquire+0x69/0xa0
> [  313.255188]  [] _write_lock_bh+0x40/0x60
> [  313.255315]  [] tipc_ref_discard+0x29/0xe0
> [  313.255435]  [] tipc_deleteport+0x5a/0x1a0
> [  313.255565]  [] tipc_create+0x58/0x160
> [  313.255687]  [] __sock_create+0x112/0x280
> [  313.255811]  [] sock_create+0x1a/0x20
> [  313.255942]  [] sys_socket+0x1b/0x40
> [  313.256059]  [] sys_socketcall+0x92/0x260
> [  313.256179]  [] syscall_call+0x7/0xb
> [  313.256300]  ===
> 
> Greetings, Eric

Hello,

Maybe I misinterpret this but, IMHO lockdep
complains about locks acquired in different
order: tipc_ref_acquire() gets ref_table_lock 
and then tipc_ret_table.entries[index]->lock,
but tipc_deleteport() inversely (with:
tipc_port_lock() and tipc_ref_discard()).
I hope maintainers will decide the correct
order.

Btw. there is a problem with tipc_ref_discard():
it should be called with tipc_port_lock, but
how to discard a ref if this lock can't be
acquired? Is it OK to call it without the lock
like in subscr_named_msg_event()?

Btw. #2: during this checking I've found
two places where return values from
tipc_ref_lock() and tipc_port_lock() are not 
checked, so I attach a patch proposal for
this (compiled but not tested):

Regards,
Jarek P.
---

[PATCH] tipc: checking returns from locking functions

Checking of return values from tipc_ref_lock()
and tipc_port_lock() added in 2 places. 

Signed-off-by: Jarek Poplawski <[EMAIL PROTECTED]>
---

diff -Nurp linux-2.6.20-rc2-/net/tipc/port.c linux-2.6.20-rc2/net/tipc/port.c
--- linux-2.6.20-rc2-/net/tipc/port.c   2006-11-29 22:57:37.0 +0100
+++ linux-2.6.20-rc2/net/tipc/port.c2006-12-28 11:05:17.0 +0100
@@ -238,7 +238,12 @@ u32 tipc_createport_raw(void *usr_handle
return 0;
}
 
-   tipc_port_lock(ref);
+   if (!tipc_port_lock(ref)) {
+   tipc_ref_discard(ref);
+   warn("Port creation failed, reference table invalid\n");
+   kfree(p_ptr);
+   return 0;
+   }
p_ptr->publ.ref = ref;
msg = &p_ptr->publ.phdr;
msg_init(msg, DATA_LOW, TIPC_NAMED_M

[patch] qeth: fix uaccess handling and get rid of unused variable

2006-12-28 Thread Heiko Carstens
From: Heiko Carstens <[EMAIL PROTECTED]>

[patch] qeth: fix uaccess handling and get rid of unused variable

drivers/s390/net/qeth_main.c: In function `qeth_process_inbound_buffer':
drivers/s390/net/qeth_main.c:2563: warning: unused variable `vlan_addr'

include/asm/uaccess.h: In function `qeth_do_ioctl':
drivers/s390/net/qeth_main.c:4847: warning:
 ignoring return value of `copy_to_user'
drivers/s390/net/qeth_main.c:4849: warning:
 ignoring return value of `copy_to_user'
drivers/s390/net/qeth_main.c:4996: warning:
 ignoring return value of `copy_to_user'

Cc: Frank Pavlic <[EMAIL PROTECTED]>
Signed-off-by: Heiko Carstens <[EMAIL PROTECTED]>
---
 drivers/s390/net/qeth_main.c |   13 -
 1 files changed, 8 insertions(+), 5 deletions(-)

Index: linux-2.6/drivers/s390/net/qeth_main.c
===
--- linux-2.6.orig/drivers/s390/net/qeth_main.c
+++ linux-2.6/drivers/s390/net/qeth_main.c
@@ -2560,7 +2560,6 @@ qeth_process_inbound_buffer(struct qeth_
int offset;
int rxrc;
__u16 vlan_tag = 0;
-   __u16 *vlan_addr;
 
/* get first element of current buffer */
element = (struct qdio_buffer_element *)&buf->buffer->element[0];
@@ -4844,9 +4843,11 @@ qeth_arp_query(struct qeth_card *card, c
   "(0x%x/%d)\n",
   QETH_CARD_IFNAME(card), 
qeth_arp_get_error_cause(&rc),
   tmp, tmp);
-   copy_to_user(udata, qinfo.udata, 4);
+   if (copy_to_user(udata, qinfo.udata, 4))
+   rc = -EFAULT;
} else {
-   copy_to_user(udata, qinfo.udata, qinfo.udata_len);
+   if (copy_to_user(udata, qinfo.udata, qinfo.udata_len))
+   rc = -EFAULT;
}
kfree(qinfo.udata);
return rc;
@@ -4992,8 +4993,10 @@ qeth_snmp_command(struct qeth_card *card
if (rc)
PRINT_WARN("SNMP command failed on %s: (0x%x)\n",
   QETH_CARD_IFNAME(card), rc);
-else
-   copy_to_user(udata, qinfo.udata, qinfo.udata_len);
+   else {
+   if (copy_to_user(udata, qinfo.udata, qinfo.udata_len))
+   rc = -EFAULT;
+   }
 
kfree(ureq);
kfree(qinfo.udata);
-
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [take24 0/6] kevent: Generic event handling mechanism.

2006-12-28 Thread Evgeniy Polyakov
On Wed, Dec 27, 2006 at 12:45:50PM -0800, Ulrich Drepper ([EMAIL PROTECTED]) 
wrote:
> Evgeniy Polyakov wrote:
> > Why do we want to inject _ready_ event, when it is possible to mark
> > event as ready and wakeup thread parked in syscall?
> 
> Going back to this old one:
> 
> How do you want to mark an event ready if you don't want to introduce
> yet another layer of data structures?  The event notification happens
> through entries in the ring buffer.  Userlevel code should never add
> anything to the ring buffer directly, this would mean huge
> synchronization problems.  Yes, one could add additional data structures
> accompanying the ring buffer which can specify userlevel-generated
> events.  But this is a) clumsy and b) a pain to use when the same ring
> buffer is used in multiple threads (you'd have to have another shared
> memory segment).
> 
> It's much cleaner if the userlevel code can get the kernel to inject a
> userlevel-generated event.  This is the equivalent of userlevel code
> generating a signal with kill().

Existing possibility to mark event as ready works following way:
event is queued into storage queue (socket, inode or some other queue),
when readiness condition becomes true, event is queued into ready queue
(although it is still in the storage queueu). It happens completely
asynchronosu to _any_ kind of userspace processing.
When userspace calls apropriate syscall, event is being copied into ring
buffer.

Thus userspace readiness will just mark event as ready, i.e. it queues
event into ready queue, so later usersapce will callsyscall to actually
get the event.

When one thread is parked in the syscall and there are _no_ events
which should be marked as ready (for example only sockets are there, and
it is not a good idea to wakeup the whole socket processing state machine), 
then there is no possibility to receive such event (although it is
possible to interrupt and break syscall).

So, according to injecting ready events, it can be done - just an
addition of special flag which will force kevent core to move event into
ready queue immediately. In this case userspace can event prepare a
needed event (like signal event) and deliver it to process, so it will
think (only from kevent point of view) that real signal has been arrived.

I will also add special type of events - userspace events - which will
not have empty callbacks, which will be intended to use for user-defined
way (i.e. for inter thread communications).

> -- 
> ➧ Ulrich Drepper ➧ Red Hat, Inc. ➧ 444 Castro St ➧ Mountain View, CA ❖
> 



-- 
Evgeniy Polyakov
-
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html