subject:"\"\\\[PATCH\\\]\\\[PPPOL2TP\\\]\\\: Fix SMP oops in pppol2tp driver\""

[PATCH][PPPOL2TP]: Fix SMP oops in pppol2tp driver

2008-02-11 Thread James Chapman

Fix locking issues in the pppol2tp driver which can cause a kernel
crash on SMP boxes when hundreds of L2TP sessions are created/deleted
simultaneously (ISP environment). The driver was violating read_lock()
and write_lock() scheduling rules so we now consistently use the _irq
variants of the lock functions.

Signed-off-by: James Chapman <[EMAIL PROTECTED]>

--
This patch has been verified by the ISP that discovered the problem.

If the patch is accepted, it should be pushed to the stable 2.6.23 and
2.6.24 trees.

Index: linux-2.6.24/drivers/net/pppol2tp.c
===
--- linux-2.6.24.orig/drivers/net/pppol2tp.c
+++ linux-2.6.24/drivers/net/pppol2tp.c
@@ -301,15 +301,16 @@ pppol2tp_session_find(struct pppol2tp_tu
pppol2tp_session_id_hash(tunnel, session_id);
struct pppol2tp_session *session;
struct hlist_node *walk;
+   unsigned long flags;
 
-   read_lock(&tunnel->hlist_lock);
+   read_lock_irqsave(&tunnel->hlist_lock, flags);
hlist_for_each_entry(session, walk, session_list, hlist) {
if (session->tunnel_addr.s_session == session_id) {
-   read_unlock(&tunnel->hlist_lock);
+   read_unlock_irqrestore(&tunnel->hlist_lock, flags);
return session;
}
}
-   read_unlock(&tunnel->hlist_lock);
+   read_unlock_irqrestore(&tunnel->hlist_lock, flags);
 
return NULL;
 }
@@ -319,15 +320,16 @@ pppol2tp_session_find(struct pppol2tp_tu
 static struct pppol2tp_tunnel *pppol2tp_tunnel_find(u16 tunnel_id)
 {
struct pppol2tp_tunnel *tunnel = NULL;
+   unsigned long flags;
 
-   read_lock(&pppol2tp_tunnel_list_lock);
+   read_lock_irqsave(&pppol2tp_tunnel_list_lock, flags);
list_for_each_entry(tunnel, &pppol2tp_tunnel_list, list) {
if (tunnel->stats.tunnel_id == tunnel_id) {
-   read_unlock(&pppol2tp_tunnel_list_lock);
+   read_unlock_irqrestore(&pppol2tp_tunnel_list_lock, 
flags);
return tunnel;
}
}
-   read_unlock(&pppol2tp_tunnel_list_lock);
+   read_unlock_irqrestore(&pppol2tp_tunnel_list_lock, flags);
 
return NULL;
 }
@@ -1099,6 +1101,7 @@ static void pppol2tp_tunnel_closeall(str
struct hlist_node *tmp;
struct pppol2tp_session *session;
struct sock *sk;
+   unsigned long flags;
 
if (tunnel == NULL)
BUG();
@@ -1106,7 +1109,7 @@ static void pppol2tp_tunnel_closeall(str
PRINTK(tunnel->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
   "%s: closing all sessions...\n", tunnel->name);
 
-   write_lock(&tunnel->hlist_lock);
+   write_lock_irqsave(&tunnel->hlist_lock, flags);
for (hash = 0; hash < PPPOL2TP_HASH_SIZE; hash++) {
 again:
hlist_for_each_safe(walk, tmp, &tunnel->session_hlist[hash]) {
@@ -1126,7 +1129,7 @@ again:
 * disappear as we're jumping between locks.
 */
sock_hold(sk);
-   write_unlock(&tunnel->hlist_lock);
+   write_unlock_irqrestore(&tunnel->hlist_lock, flags);
lock_sock(sk);
 
if (sk->sk_state & (PPPOX_CONNECTED | PPPOX_BOUND)) {
@@ -1148,11 +1151,11 @@ again:
 * list so we are guaranteed to make forward
 * progress.
 */
-   write_lock(&tunnel->hlist_lock);
+   write_lock_irqsave(&tunnel->hlist_lock, flags);
goto again;
}
}
-   write_unlock(&tunnel->hlist_lock);
+   write_unlock_irqrestore(&tunnel->hlist_lock, flags);
 }
 
 /* Really kill the tunnel.
@@ -1160,10 +1163,12 @@ again:
  */
 static void pppol2tp_tunnel_free(struct pppol2tp_tunnel *tunnel)
 {
+   unsigned long flags;
+
/* Remove from socket list */
-   write_lock(&pppol2tp_tunnel_list_lock);
+   write_lock_irqsave(&pppol2tp_tunnel_list_lock, flags);
list_del_init(&tunnel->list);
-   write_unlock(&pppol2tp_tunnel_list_lock);
+   write_unlock_irqrestore(&pppol2tp_tunnel_list_lock, flags);
 
atomic_dec(&pppol2tp_tunnel_count);
kfree(tunnel);
@@ -1212,6 +1217,7 @@ end:
 static void pppol2tp_session_destruct(struct sock *sk)
 {
struct pppol2tp_session *session = NULL;
+   unsigned long flags;
 
if (sk->sk_user_data != NULL) {
struct pppol2tp_tunnel *tunnel;
@@ -1239,9 +1245,9 @@ static void pppol2tp_session_destruct(st
/* Delete the session socket from the
 * hash
 */
-   write_lock(&tunnel->hlist_lock);
+   write_lock_irqsave(&tu

Re: [PATCH][PPPOL2TP]: Fix SMP oops in pppol2tp driver

2008-02-11 Thread Jarek Poplawski

On Mon, Feb 11, 2008 at 11:49:24PM +0100, Jarek Poplawski wrote:
> On Mon, Feb 11, 2008 at 10:19:35PM +, James Chapman wrote:
> ...
> > Below is example output from lockdep. The oops is reproducible when  
> > creating/deleting lots of sessions while passing data. The lock is being  
> > acquired for read and write in softirq contexts.

...Hmmm... And according to this, changing read_locks should be
necessary too.

Jarek P.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH][PPPOL2TP]: Fix SMP oops in pppol2tp driver

2008-02-11 Thread James Chapman


Jarek Poplawski wrote:

James Chapman wrote, On 02/11/2008 10:22 AM:


Fix locking issues in the pppol2tp driver which can cause a kernel
crash on SMP boxes when hundreds of L2TP sessions are created/deleted
simultaneously (ISP environment). The driver was violating read_lock()
and write_lock() scheduling rules so we now consistently use the _irq
variants of the lock functions.
... 


Hi,

Could you explain what exactly scheduling rules do you mean here,
and why disabling interrupts is the best solution for this?


Below is example output from lockdep. The oops is reproducible when 
creating/deleting lots of sessions while passing data. The lock is being 
acquired for read and write in softirq contexts.


Is there a better way to fix this?

=
[ INFO: inconsistent lock state ]
2.6.24-core2 #1
-
inconsistent {in-softirq-R} -> {softirq-on-W} usage.
openl2tpd/3215 [HC0[0]:SC0[0]:HE1:SE1] takes:
  (&tunnel->hlist_lock){---?}, at: []
pppol2tp_connect+0x517/0x6d0 [pppol2tp]
{in-softirq-R} state was registered at:
   [] __lock_acquire+0x6bf/0x10a0
   [] fn_hash_lookup+0x1b/0xe0
   [] lock_acquire+0x74/0xa0
   [] pppol2tp_session_find+0x1f/0x80 [pppol2tp]
   [] _read_lock+0x2a/0x40
   [] pppol2tp_session_find+0x1f/0x80 [pppol2tp]
   [] pppol2tp_session_find+0x1f/0x80 [pppol2tp]
   [] pppol2tp_recv_core+0xd8/0x960 [pppol2tp]
   [] ipt_do_table+0x23a/0x500 [ip_tables]
   [] pppol2tp_udp_encap_recv+0x2e/0x70 [pppol2tp]
   [] _read_unlock+0x14/0x20
   [] udp_queue_rcv_skb+0x106/0x2a0
   [] __udp4_lib_rcv+0x42a/0x7e0
   [] ipt_hook+0x0/0x20 [iptable_filter]
   [] ip_local_deliver_finish+0xca/0x1c0
   [] ip_local_deliver_finish+0x2e/0x1c0
   [] ip_rcv_finish+0xff/0x360
   [] ip_rcv+0x20c/0x2a0
   [] ip_rcv_finish+0x0/0x360
   [] netif_receive_skb+0x317/0x4b0
   [] netif_receive_skb+0x100/0x4b0
   [] e1000_clean_rx_irq_ps+0x28a/0x560 [e1000]
   [] e1000_clean_rx_irq_ps+0x0/0x560 [e1000]
   [] e1000_clean+0x5d/0x290 [e1000]
   [] net_rx_action+0x1a0/0x2a0
   [] net_rx_action+0x5f/0x2a0
   [] __do_softirq+0x92/0x120
   [] do_softirq+0x78/0x80
   [] do_IRQ+0x4a/0xa0
   [] common_interrupt+0x24/0x34
   [] common_interrupt+0x2e/0x34
   [] mwait_idle_with_hints+0x46/0x60
   [] mwait_idle+0x0/0x20
   [] cpu_idle+0x74/0xe0
   [] start_kernel+0x30a/0x3a0
   [] unknown_bootoption+0x0/0x1f0
   [] 0x
irq event stamp: 275
hardirqs last  enabled at (275): [] local_bh_enable_ip+0xa7/0x120
hardirqs last disabled at (273): [] local_bh_enable_ip+0x36/0x120
softirqs last  enabled at (274): []
ppp_register_channel+0xdc/0xf0 [ppp_generic]
softirqs last disabled at (272): [] _spin_lock_bh+0xb/0x40


--
James Chapman
Katalix Systems Ltd
http://www.katalix.com
Catalysts for your Embedded Linux software development

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH][PPPOL2TP]: Fix SMP oops in pppol2tp driver

2008-02-11 Thread Jarek Poplawski

On Mon, Feb 11, 2008 at 10:19:35PM +, James Chapman wrote:
...
> Below is example output from lockdep. The oops is reproducible when  
> creating/deleting lots of sessions while passing data. The lock is being  
> acquired for read and write in softirq contexts.
>
> Is there a better way to fix this?
>
> =
> [ INFO: inconsistent lock state ]
> 2.6.24-core2 #1
> -
> inconsistent {in-softirq-R} -> {softirq-on-W} usage.
> openl2tpd/3215 [HC0[0]:SC0[0]:HE1:SE1] takes:
>   (&tunnel->hlist_lock){---?}, at: []
> pppol2tp_connect+0x517/0x6d0 [pppol2tp]
> {in-softirq-R} state was registered at:

IMHO, according to this, disabling bh should be enough. And if it's
like in this report: only read_lock is taken from softirqs, then this
should be necessary to change only all write_locks to write_lock_bh
(of course unless somewhere bhs are disabled already). Unless I miss
something?!

Cheers,
Jarek P.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH][PPPOL2TP]: Fix SMP oops in pppol2tp driver

2008-02-11 Thread David Miller

From: James Chapman <[EMAIL PROTECTED]>
Date: Mon, 11 Feb 2008 23:41:18 +

> Jarek Poplawski wrote:
> > On Mon, Feb 11, 2008 at 10:19:35PM +, James Chapman wrote:
> > ...
> >> Below is example output from lockdep. The oops is reproducible when  
> >> creating/deleting lots of sessions while passing data. The lock is being  
> >> acquired for read and write in softirq contexts.
> >>
> >> Is there a better way to fix this?
> >>
> >> =
> >> [ INFO: inconsistent lock state ]
> >> 2.6.24-core2 #1
> >> -
> >> inconsistent {in-softirq-R} -> {softirq-on-W} usage.
> >> openl2tpd/3215 [HC0[0]:SC0[0]:HE1:SE1] takes:
> >>   (&tunnel->hlist_lock){---?}, at: []
> >> pppol2tp_connect+0x517/0x6d0 [pppol2tp]
> >> {in-softirq-R} state was registered at:
> > 
> > IMHO, according to this, disabling bh should be enough. And if it's
> > like in this report: only read_lock is taken from softirqs, then this
> > should be necessary to change only all write_locks to write_lock_bh
> > (of course unless somewhere bhs are disabled already). Unless I miss
> > something?!
> 
> I thought so too. I tried _bh locks first and the problem still 
> occurred. Maybe I'll try it again in case I messed something up.

I agree with Jarek here, I look at all the code paths that take
->hlist_lock and all of them are in user context or software
interrupts.

Please get a lockdep trace with the change to _bh intead of
hw interrupt protection so we can find out what that doesn't
work.

Thanks!
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH][PPPOL2TP]: Fix SMP oops in pppol2tp driver

2008-02-11 Thread Jarek Poplawski

James Chapman wrote, On 02/11/2008 10:22 AM:

> Fix locking issues in the pppol2tp driver which can cause a kernel
> crash on SMP boxes when hundreds of L2TP sessions are created/deleted
> simultaneously (ISP environment). The driver was violating read_lock()
> and write_lock() scheduling rules so we now consistently use the _irq
> variants of the lock functions.
... 

Hi,

Could you explain what exactly scheduling rules do you mean here,
and why disabling interrupts is the best solution for this?

Thanks,
Jarek P.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH][PPPOL2TP]: Fix SMP oops in pppol2tp driver

2008-02-11 Thread Jarek Poplawski

On Mon, Feb 11, 2008 at 11:41:18PM +, James Chapman wrote:
> Jarek Poplawski wrote:
>> On Mon, Feb 11, 2008 at 10:19:35PM +, James Chapman wrote:
>> ...
>>> Below is example output from lockdep. The oops is reproducible when   
>>> creating/deleting lots of sessions while passing data. The lock is 
>>> being  acquired for read and write in softirq contexts.
>>>
>>> Is there a better way to fix this?
>>>
>>> =
>>> [ INFO: inconsistent lock state ]
>>> 2.6.24-core2 #1
>>> -
>>> inconsistent {in-softirq-R} -> {softirq-on-W} usage.
>>> openl2tpd/3215 [HC0[0]:SC0[0]:HE1:SE1] takes:
>>>   (&tunnel->hlist_lock){---?}, at: []
>>> pppol2tp_connect+0x517/0x6d0 [pppol2tp]
>>> {in-softirq-R} state was registered at:
>>
>> IMHO, according to this, disabling bh should be enough. And if it's
>> like in this report: only read_lock is taken from softirqs, then this
>> should be necessary to change only all write_locks to write_lock_bh
>> (of course unless somewhere bhs are disabled already). Unless I miss
>> something?!
>
> I thought so too. I tried _bh locks first and the problem still  
> occurred. Maybe I'll try it again in case I messed something up.

If there are any new lockdep warnings I'd be interested to have
a look. (BTW, with irqs disabling such ISP would probably get
considerable drop in performance?)

Regards,
Jarek P.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH][PPPOL2TP]: Fix SMP oops in pppol2tp driver

2008-02-11 Thread James Chapman


Jarek Poplawski wrote:

On Mon, Feb 11, 2008 at 11:49:24PM +0100, Jarek Poplawski wrote:

On Mon, Feb 11, 2008 at 10:19:35PM +, James Chapman wrote:
...
Below is example output from lockdep. The oops is reproducible when  
creating/deleting lots of sessions while passing data. The lock is being  
acquired for read and write in softirq contexts.


...Hmmm... And according to this, changing read_locks should be
necessary too.


The patch changes both read and write locks.

--
James Chapman
Katalix Systems Ltd
http://www.katalix.com
Catalysts for your Embedded Linux software development

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH][PPPOL2TP]: Fix SMP oops in pppol2tp driver

2008-02-11 Thread James Chapman


Jarek Poplawski wrote:

On Mon, Feb 11, 2008 at 10:19:35PM +, James Chapman wrote:
...
Below is example output from lockdep. The oops is reproducible when  
creating/deleting lots of sessions while passing data. The lock is being  
acquired for read and write in softirq contexts.


Is there a better way to fix this?

=
[ INFO: inconsistent lock state ]
2.6.24-core2 #1
-
inconsistent {in-softirq-R} -> {softirq-on-W} usage.
openl2tpd/3215 [HC0[0]:SC0[0]:HE1:SE1] takes:
  (&tunnel->hlist_lock){---?}, at: []
pppol2tp_connect+0x517/0x6d0 [pppol2tp]
{in-softirq-R} state was registered at:


IMHO, according to this, disabling bh should be enough. And if it's
like in this report: only read_lock is taken from softirqs, then this
should be necessary to change only all write_locks to write_lock_bh
(of course unless somewhere bhs are disabled already). Unless I miss
something?!


I thought so too. I tried _bh locks first and the problem still 
occurred. Maybe I'll try it again in case I messed something up.


--
James Chapman
Katalix Systems Ltd
http://www.katalix.com
Catalysts for your Embedded Linux software development

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH][PPPOL2TP]: Fix SMP oops in pppol2tp driver

2008-02-12 Thread David Miller

From: James Chapman <[EMAIL PROTECTED]>
Date: Tue, 12 Feb 2008 10:58:21 +

> Here is a trace from when we had _bh locks.

The problem is that the pppol2tp code calls sk_dst_get() in software
interrupt context and that is not allowed.

sk_dst_get() grabs sk->sk_dst_lock without any BH enabling or
disabling.

It can do that because the usage is to make all the lock
taking calls in user context, and in the packet processing
paths use __sk_dst_get().

Probably what the pppol2tp code should do is use __sk_dst_check()
instead of sk_dst_get().  You then have to be able to handle
NULL returns, just like UDP sendmsg() does, which means you'll
need to cook up a routing lookup if __sk_dst_check() gives you
NULL because the route became obsolete.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH][PPPOL2TP]: Fix SMP oops in pppol2tp driver

2008-02-12 Thread Jarek Poplawski

On Tue, Feb 12, 2008 at 10:58:21AM +, James Chapman wrote:
...
> Here is a trace from when we had _bh locks.

Very nice...

...But since it's quite long, and if you don't know all these paths
this could take some time, maybe one question: so if lockdep got these
locks right (sometimes it can be wrong when the same structures are
nested), then it seems some problem is with this place below. This
lock is taken for writing with softirqs enabled here, and IMHO it
would be interesting to test if changing this is enough for lockdep.
It seems this is in ip4_datagram_connect() during sk_dst_reset() or
sk_dst_set(). So maybe you could try with local_bh_disable/enable()
around them (or maybe some better idea)? Anyway, I'll try to learn
this more in the meantime.

Jarek P.

> Feb  5 16:26:32  to a soft-irq-unsafe lock:
> Feb  5 16:26:32  (&sk->sk_dst_lock){}
> Feb  5 16:26:32  ... which became soft-irq-unsafe at:
> Feb  5 16:26:32  ...  [] mark_held_locks+0x5e/0x80
> Feb  5 16:26:32  [] __lock_acquire+0x6a2/0x10a0
> Feb  5 16:26:32  [] save_stack_trace+0x20/0x40
> Feb  5 16:26:32  [] add_lock_to_list+0x44/0xb0
> Feb  5 16:26:32  [] __udp_lib_get_port+0x19/0x200
> Feb  5 16:26:32  [] __lock_acquire+0x1045/0x10a0
> Feb  5 16:26:32  [] lock_acquire+0x74/0xa0
> Feb  5 16:26:32  [] ip4_datagram_connect+0x53/0x380
> Feb  5 16:26:32  [] _write_lock+0x2a/0x40
> Feb  5 16:26:32  [] ip4_datagram_connect+0x53/0x380
> Feb  5 16:26:32  [] ip4_datagram_connect+0x53/0x380
> Feb  5 16:26:32  [] trace_hardirqs_on+0xc5/0x170
> Feb  5 16:26:32  [] local_bh_enable_ip+0xa7/0x120
> Feb  5 16:26:32  [] trace_hardirqs_on+0xc5/0x170
> Feb  5 16:26:32  [] _spin_lock_bh+0x2f/0x40
> Feb  5 16:26:32  [] inet_dgram_connect+0x35/0x80
> Feb  5 16:26:32  [] sys_connect+0x82/0xd0
> Feb  5 16:26:32  [] down_read_trylock+0x4f/0x60
> Feb  5 16:26:32  [] do_page_fault+0xfc/0x940
> Feb  5 16:26:32  [] _spin_unlock+0x14/0x20
> Feb  5 16:26:32  [] sys_socketcall+0x98/0x280
> Feb  5 16:26:32  [] trace_hardirqs_on+0xc5/0x170
> Feb  5 16:26:32  [] copy_to_user+0x3a/0x70
> Feb  5 16:26:32  [] restore_nocheck+0x12/0x15
> Feb  5 16:26:32  [] syscall_call+0x7/0xb
> Feb  5 16:26:32  [] 0x
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH][PPPOL2TP]: Fix SMP oops in pppol2tp driver

2008-02-12 Thread James Chapman


David Miller wrote:

From: James Chapman <[EMAIL PROTECTED]>
Date: Mon, 11 Feb 2008 23:41:18 +


Jarek Poplawski wrote:

On Mon, Feb 11, 2008 at 10:19:35PM +, James Chapman wrote:
...
Below is example output from lockdep. The oops is reproducible when  
creating/deleting lots of sessions while passing data. The lock is being  
acquired for read and write in softirq contexts.


Is there a better way to fix this?

=
[ INFO: inconsistent lock state ]
2.6.24-core2 #1
-
inconsistent {in-softirq-R} -> {softirq-on-W} usage.
openl2tpd/3215 [HC0[0]:SC0[0]:HE1:SE1] takes:
  (&tunnel->hlist_lock){---?}, at: []
pppol2tp_connect+0x517/0x6d0 [pppol2tp]
{in-softirq-R} state was registered at:

IMHO, according to this, disabling bh should be enough. And if it's
like in this report: only read_lock is taken from softirqs, then this
should be necessary to change only all write_locks to write_lock_bh
(of course unless somewhere bhs are disabled already). Unless I miss
something?!
I thought so too. I tried _bh locks first and the problem still 
occurred. Maybe I'll try it again in case I messed something up.


I agree with Jarek here, I look at all the code paths that take
->hlist_lock and all of them are in user context or software
interrupts.

Please get a lockdep trace with the change to _bh intead of
hw interrupt protection so we can find out what that doesn't
work.

Thanks!


Here is a trace from when we had _bh locks.

Feb  5 16:26:32  ==
Feb  5 16:26:32  [ INFO: soft-safe -> soft-unsafe lock order detected ]
Feb  5 16:26:32  2.6.24-core2 #1
Feb  5 16:26:32  --
Feb  5 16:26:32  pppd/3224 [HC0[0]:SC0[2]:HE1:SE0] is trying to acquire:
Feb  5 16:26:32  (&sk->sk_dst_lock){}, at: [] 
pppol2tp_xmit+0x23c/0x460 [pppol2tp]

Feb  5 16:26:32
Feb  5 16:26:32  and this task is already holding:
Feb  5 16:26:32  (&pch->downl){-...}, at: [] 
ppp_push+0x44e/0x620 [ppp_generic]

Feb  5 16:26:32  which would create a new lock dependency:
Feb  5 16:26:32  (&pch->downl){-...} -> (&sk->sk_dst_lock){}
Feb  5 16:26:32
Feb  5 16:26:32  but this new dependency connects a soft-irq-safe lock:
Feb  5 16:26:32  (&pch->upl){-.-+}
Feb  5 16:26:32  ... which became soft-irq-safe at:
Feb  5 16:26:32  [] check_usage_backwards+0x1f/0x50
Feb  5 16:26:32  [] save_trace+0x39/0xa0
Feb  5 16:26:32  [] __lock_acquire+0x6bf/0x10a0
Feb  5 16:26:32  [] __lock_acquire+0x79e/0x10a0
Feb  5 16:26:32  [] __lock_acquire+0x79e/0x10a0
Feb  5 16:26:32  [] lock_acquire+0x74/0xa0
Feb  5 16:26:32  [] ppp_input+0x62/0x140 [ppp_generic]
Feb  5 16:26:32  [] _read_lock_bh+0x2f/0x40
Feb  5 16:26:32  [] ppp_input+0x62/0x140 [ppp_generic]
Feb  5 16:26:32  [] ppp_input+0x62/0x140 [ppp_generic]
Feb  5 16:26:32  [] pppol2tp_recv_core+0x46a/0x960 [pppol2tp]
Feb  5 16:26:32  [] pppol2tp_udp_encap_recv+0x2e/0x70 [pppol2tp]
Feb  5 16:26:32  [] _read_unlock+0x14/0x20
Feb  5 16:26:32  [] udp_queue_rcv_skb+0x106/0x2a0
Feb  5 16:26:32  [] __udp4_lib_rcv+0x42a/0x7e0
Feb  5 16:26:32  [] ipt_hook+0x0/0x20 [iptable_filter]
Feb  5 16:26:32  [] ip_local_deliver_finish+0xca/0x1c0
Feb  5 16:26:32  [] ip_local_deliver_finish+0x2e/0x1c0
Feb  5 16:26:32  [] ip_rcv_finish+0xff/0x360
Feb  5 16:26:32  [] ip_rcv+0x20c/0x2a0
Feb  5 16:26:32  [] ip_rcv_finish+0x0/0x360
Feb  5 16:26:32  [] netif_receive_skb+0x317/0x4b0
Feb  5 16:26:32  [] netif_receive_skb+0x100/0x4b0
Feb  5 16:26:32  [] e1000_clean_rx_irq_ps+0x28a/0x560 [e1000]
Feb  5 16:26:32  [] e1000_clean_rx_irq_ps+0x0/0x560 [e1000]
Feb  5 16:26:32  [] e1000_clean+0x5d/0x290 [e1000]
Feb  5 16:26:32  [] net_rx_action+0x1a0/0x2a0
Feb  5 16:26:32  [] net_rx_action+0x5f/0x2a0
Feb  5 16:26:32  [] __do_softirq+0x92/0x120
Feb  5 16:26:32  [] do_softirq+0x78/0x80
Feb  5 16:26:32  [] do_IRQ+0x4a/0xa0
Feb  5 16:26:32  [] finish_task_switch+0x0/0xc0
Feb  5 16:26:32  [] common_interrupt+0x24/0x34
Feb  5 16:26:32  [] common_interrupt+0x2e/0x34
Feb  5 16:26:32  [] mwait_idle_with_hints+0x46/0x60
Feb  5 16:26:32  [] mwait_idle+0x0/0x20
Feb  5 16:26:32  [] cpu_idle+0x74/0xe0
Feb  5 16:26:32  [] start_kernel+0x30a/0x3a0
Feb  5 16:26:32  [] unknown_bootoption+0x0/0x1f0
Feb  5 16:26:32  [] 0x
Feb  5 16:26:32
Feb  5 16:26:32  to a soft-irq-unsafe lock:
Feb  5 16:26:32  (&sk->sk_dst_lock){}
Feb  5 16:26:32  ... which became soft-irq-unsafe at:
Feb  5 16:26:32  ...  [] mark_held_locks+0x5e/0x80
Feb  5 16:26:32  [] __lock_acquire+0x6a2/0x10a0
Feb  5 16:26:32  [] save_stack_trace+0x20/0x40
Feb  5 16:26:32  [] add_lock_to_list+0x44/0xb0
Feb  5 16:26:32  [] __udp_lib_get_port+0x19/0x200
Feb  5 16:26:32  [] __lock_acquire+0x1045/0x10a0
Feb  5 16:26:32  [] lock_acquire+0x74/0xa0
Feb  5 16:26:32  [] ip4_datagram_connect+0x53/0x380
Feb  5 16:26:32  [] _write_lock+0x2a/0x40
Feb  5 16:26:32  [] ip4_datagram_connect+0x53/0x380
Feb  5 16:26:32  [] ip4_datagram_connect+0x53/0x380
Feb  5 16:26:32

Re: [PATCH][PPPOL2TP]: Fix SMP oops in pppol2tp driver

2008-02-12 Thread Jarek Poplawski

On Mon, Feb 11, 2008 at 11:42:12PM +, James Chapman wrote:
> Jarek Poplawski wrote:
>> On Mon, Feb 11, 2008 at 11:49:24PM +0100, Jarek Poplawski wrote:
>>> On Mon, Feb 11, 2008 at 10:19:35PM +, James Chapman wrote:
>>> ...
 Below is example output from lockdep. The oops is reproducible when 
  creating/deleting lots of sessions while passing data. The lock is 
 being  acquired for read and write in softirq contexts.
>>
>> ...Hmmm... And according to this, changing read_locks should be
>> necessary too.
>
> The patch changes both read and write locks.

Right! This was only "errata" to my earlier comment where I considered
only lockdep info and forgot about yours...

Sorry,
Jarek P.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH][PPPOL2TP]: Fix SMP oops in pppol2tp driver

2008-02-12 Thread Jarek Poplawski

On Tue, Feb 12, 2008 at 10:00:03PM -0800, David Miller wrote:
> From: James Chapman <[EMAIL PROTECTED]>
> Date: Tue, 12 Feb 2008 10:58:21 +
> 
> > Here is a trace from when we had _bh locks.
> 
> The problem is that the pppol2tp code calls sk_dst_get() in software
> interrupt context and that is not allowed.

Actually, this lockdep report probably warns of something else:
sk_dst_lock which is seen in process context with softirqs enabled
implicitly endangers some other (ppp_generic) locks, which depend
on ppp_generic pch->upl read_lock, which is taken in softirq context.

I can't see on this report any hardirqs dependancies, so it seems even
blocking hard interrupts, just like in this patch, shouldn't solve
this problem: if BHs were really disabled in exactly the same places,
then it seems this warning should trigger in both variants.

I studied long ago some similar problem with pppoe, and it looked like
ppp_generic needed some fix, but now I've to recall or re-learn almost
all and it needs more time... Anyway, there is a possibility, this
warning isn't so dangerous.

> sk_dst_get() grabs sk->sk_dst_lock without any BH enabling or
> disabling.
> 
> It can do that because the usage is to make all the lock
> taking calls in user context, and in the packet processing
> paths use __sk_dst_get().

BTW, does it mean that ip4_datagram_connect() can be used only in user
context?

> Probably what the pppol2tp code should do is use __sk_dst_check()
> instead of sk_dst_get().  You then have to be able to handle
> NULL returns, just like UDP sendmsg() does, which means you'll
> need to cook up a routing lookup if __sk_dst_check() gives you
> NULL because the route became obsolete.

I think that without changing ppp_generic or changing ip_queue_xmit()
to something else in pppol2tp this would be really hard to avoid such
a warning (and maybe not very necessary).

Regards,
Jarek P.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH][PPPOL2TP]: Fix SMP oops in pppol2tp driver

2008-02-14 Thread Jarek Poplawski


Hi,

It seems, this nice report is still uncomplete: could you check if
there could have been something more yet?

Thanks,
Jarek P.

On Tue, Feb 12, 2008 at 10:58:21AM +, James Chapman wrote:
...
> Here is a trace from when we had _bh locks.
>
> Feb  5 16:26:32  ==
> Feb  5 16:26:32  [ INFO: soft-safe -> soft-unsafe lock order detected ]
> Feb  5 16:26:32  2.6.24-core2 #1
> Feb  5 16:26:32  --
> Feb  5 16:26:32  pppd/3224 [HC0[0]:SC0[2]:HE1:SE0] is trying to acquire:
...
> Feb  5 16:26:32  [] e1000_clean+0x5d/0x290 [e1000]
> Feb  5 16:26:32  [] net_rx_action+0x1a0/0x2a0
> Feb  5 16:26:32  [] net_rx_action+0x5f/0x2a0
> Feb  5 16:26:32  [] __do_softirq+0x92/0x120
> Feb  5 16:26:32  [] do_softirq+0x78/0x80
> Feb  5 16:26:32  [] do_IRQ+0x4a/0xa0
> Feb  5 16:26:32  [] finish_task_switch+0x0/0xc0
> Feb  5 16:26:32  [] common_interrupt+0x24/0x34
> Feb  5 16:26:32  [] common_interrupt+0x2e/0x34
> Feb  5 16:26:32  [] mwait_idle_with_hints+0x46/0x60
> Feb  5 16:26:32  [] mwait_idle+0x0/0x20
> Feb  5 16:26:32  [] cpu_idle+0x74/0xe0
> Feb  5 16:26:32  [] start_kernel+0x30a/0x3a0
>
> -- 
> James Chapman
> Katalix Systems Ltd
> http://www.katalix.com
> Catalysts for your Embedded Linux software development
>
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH][PPPOL2TP]: Fix SMP oops in pppol2tp driver

2008-02-18 Thread Jarek Poplawski

On Mon, Feb 18, 2008 at 10:09:24PM +, James Chapman wrote:
> Jarek Poplawski wrote:
>> Hi,
>>
>> It seems, this nice report is still uncomplete: could you check if
>> there could have been something more yet?
>
> Unfortunately the ISP's syslog stops. But I've been able to borrow two
> Quad Xeon boxes and have reproduced the problem.
>
> Here's a new version of the patch. The patch avoids disabling irqs and
> fixes the sk_dst_get() usage that DaveM mentioned. But even with this
> patch, lockdep still complains if hundreds of ppp sessions are inserted
> into a tunnel as rapidly as possible (lockdep trace is below). I can
> stop these errors by wrapping the call to ppp_input() in
> pppol2tp_recv_dequeue_skb() with local_irq_save/restore. What is a
> better fix?

Hmm... This is a really long report and quite a bit different from
the previous one. I need some time for this. BTW: you sent before a
lockdep report with hlist_lock problem. I think this could be fixed
in some independent patch to make this all more readable. Are all
the other changes in this current patch only because of this or
previous lockdep report or for some other reasons (or reports) yet?

Regards,
Jarek P.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH][PPPOL2TP]: Fix SMP oops in pppol2tp driver

2008-02-18 Thread David Miller

From: James Chapman <[EMAIL PROTECTED]>
Date: Mon, 18 Feb 2008 22:09:24 +

> Here's a new version of the patch. The patch avoids disabling irqs
> and fixes the sk_dst_get() usage that DaveM mentioned. But even with
> this patch, lockdep still complains if hundreds of ppp sessions are
> inserted into a tunnel as rapidly as possible (lockdep trace is
> below). I can stop these errors by wrapping the call to ppp_input()
> in pppol2tp_recv_dequeue_skb() with local_irq_save/restore. What is
> a better fix?

Firstly, let's fix one thing at a time.  Leave the sk_dst_get()
thing alone until we can prove that it's part of the lockdep
traces.

Next, I can't see why ppp_input() needs to be invoked with
interrupts disabled.  There are many other things that invoke
that in software interrupt context, such as pppoe.

Please provide the lockdep traces without the ppp_input() IRQ
disabling so this can be properly analyzed.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH][PPPOL2TP]: Fix SMP oops in pppol2tp driver

2008-02-19 Thread James Chapman

David Miller wrote:

From: James Chapman <[EMAIL PROTECTED]>
Date: Mon, 18 Feb 2008 22:09:24 +

Here's a new version of the patch. The patch avoids disabling irqs
and fixes the sk_dst_get() usage that DaveM mentioned. But even with
this patch, lockdep still complains if hundreds of ppp sessions are
inserted into a tunnel as rapidly as possible (lockdep trace is
below). I can stop these errors by wrapping the call to ppp_input()
in pppol2tp_recv_dequeue_skb() with local_irq_save/restore. What is
a better fix?

Firstly, let's fix one thing at a time.  Leave the sk_dst_get()
thing alone until we can prove that it's part of the lockdep
traces.

In reproducing the problem, I obtained several lockdep traces that 
implicated sk_dst_get(). I changed the code to use __sk_dst_check() as 
you suggested and they went away. At that point, I was hopeful the 
locking issues were fixed. But after several minutes of 
creating/deleting hundreds of ppp sessions, lockdep dumped another 
error. It is that error that I posted yesterday.

Next, I can't see why ppp_input() needs to be invoked with
interrupts disabled.  There are many other things that invoke
that in software interrupt context, such as pppoe.

I agree. I'm seeking advice on what the underlying cause is of this new 
trace.

Please provide the lockdep traces without the ppp_input() IRQ
disabling so this can be properly analyzed.

The trace _was_ without ppp_input IRQ disabling. The trace doesn't occur 
if I disable IRQs around the ppp_input() call. The patch I sent showed 
the changes I made before running the tests that created the new lockdep 
trace. I'm sorry this wasn't clear.

--
James Chapman
Katalix Systems Ltd
http://www.katalix.com
Catalysts for your Embedded Linux software development

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH][PPPOL2TP]: Fix SMP oops in pppol2tp driver

2008-02-19 Thread James Chapman


Jarek Poplawski wrote:

On Mon, Feb 18, 2008 at 10:09:24PM +, James Chapman wrote:

Jarek Poplawski wrote:

Hi,

It seems, this nice report is still uncomplete: could you check if
there could have been something more yet?

Unfortunately the ISP's syslog stops. But I've been able to borrow two
Quad Xeon boxes and have reproduced the problem.

Here's a new version of the patch. The patch avoids disabling irqs and
fixes the sk_dst_get() usage that DaveM mentioned. But even with this
patch, lockdep still complains if hundreds of ppp sessions are inserted
into a tunnel as rapidly as possible (lockdep trace is below). I can
stop these errors by wrapping the call to ppp_input() in
pppol2tp_recv_dequeue_skb() with local_irq_save/restore. What is a
better fix?


Hmm... This is a really long report and quite a bit different from
the previous one. I need some time for this. BTW: you sent before a
lockdep report with hlist_lock problem. I think this could be fixed
in some independent patch to make this all more readable. Are all
the other changes in this current patch only because of this or
previous lockdep report or for some other reasons (or reports) yet?


As I mentioned in my reply to davem, modifying the pppol2tp driver as 
described in the patch I sent made the original lockdep problems go away.



--
James Chapman
Katalix Systems Ltd
http://www.katalix.com
Catalysts for your Embedded Linux software development

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH][PPPOL2TP]: Fix SMP oops in pppol2tp driver

2008-02-19 Thread Jarek Poplawski

On Tue, Feb 19, 2008 at 10:30:47AM +, Jarek Poplawski wrote:
...
> IMHO, just like I wrote earlier, the main problem is in ppp_generic(),

...or maybe ppp_generic.c? Whatever...

Jarek P.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH][PPPOL2TP]: Fix SMP oops in pppol2tp driver

2008-02-19 Thread Jarek Poplawski

On Tue, Feb 19, 2008 at 09:03:12AM +, James Chapman wrote:
> David Miller wrote:
>> From: James Chapman <[EMAIL PROTECTED]>
>> Date: Mon, 18 Feb 2008 22:09:24 +
>>
>>> Here's a new version of the patch. The patch avoids disabling irqs
>>> and fixes the sk_dst_get() usage that DaveM mentioned. But even with
>>> this patch, lockdep still complains if hundreds of ppp sessions are
>>> inserted into a tunnel as rapidly as possible (lockdep trace is
>>> below). I can stop these errors by wrapping the call to ppp_input()
>>> in pppol2tp_recv_dequeue_skb() with local_irq_save/restore. What is
>>> a better fix?
>>
>> Firstly, let's fix one thing at a time.  Leave the sk_dst_get()
>> thing alone until we can prove that it's part of the lockdep
>> traces.
>
> In reproducing the problem, I obtained several lockdep traces that  
> implicated sk_dst_get().

As a matter of fact I missed just that kind information on previous
lockdep report, so if you could send them too this should be still
helpful.

...
> I agree. I'm seeking advice on what the underlying cause is of this new  
> trace.

IMHO, just like I wrote earlier, the main problem is in ppp_generic(),
especially ppp_connect_channel(), where main tx & rx locks are used.
I didn't know enough about this sk_dst_lock traces yet. I hope I could
help with this, but after these changes I need some time to figure
this out again.

Jarek P.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH][PPPOL2TP]: Fix SMP oops in pppol2tp driver

2008-02-19 Thread Jarek Poplawski

On Mon, Feb 18, 2008 at 10:09:24PM +, James Chapman wrote:
...
> Unfortunately the ISP's syslog stops. But I've been able to borrow
> two Quad Xeon boxes and have reproduced the problem.
>
> Here's a new version of the patch. The patch avoids disabling irqs
> and fixes the sk_dst_get() usage that DaveM mentioned. But even with
> this patch, lockdep still complains if hundreds of ppp sessions are
> inserted into a tunnel as rapidly as possible (lockdep trace is below).
> I can stop these errors by wrapping the call to ppp_input() in
> pppol2tp_recv_dequeue_skb() with local_irq_save/restore. What is a
> better fix?

I send here my proposal: it's intended for testing and to check one of
possible solutions here. IMHO your lockdep reports show there is no
use to change anything around sk_dst_lock: it would need the global
change of this lock to fix this problem. So the fix should be done
around pch->upl lock and this means changing ppp_generic.

In the patch below I've used trylock in places which seem to allow
for skipping some things (while config is changed only) or simply
don't need this lock because there is no ppp struct. This could be
modified to add some waiting loop if necessary. Another option is to
change the write side of this lock: it looks like more vulnerable if
something missed because there are more locks involved, but probably
should be enough to solve this problem too.

I think pppol2tp need to be first checked only with hlist_lock bh
patch, unless there were some lockdep reports on these other locks
too. (BTW, I added ppp maintainer to CC - I hope we get Paul's opinion
on this.)

Regards,
Jarek P.

(testing patch #1)
---

 drivers/net/ppp_generic.c |   33 +++--
 1 files changed, 23 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ppp_generic.c b/drivers/net/ppp_generic.c
index 4dc5b4b..5cbc534 100644
--- a/drivers/net/ppp_generic.c
+++ b/drivers/net/ppp_generic.c
@@ -1473,7 +1473,7 @@ void
 ppp_input(struct ppp_channel *chan, struct sk_buff *skb)
 {
struct channel *pch = chan->ppp;
-   int proto;
+   int proto, locked;
 
if (!pch || skb->len == 0) {
kfree_skb(skb);
@@ -1481,8 +1481,13 @@ ppp_input(struct ppp_channel *chan, struct sk_buff *skb)
}
 
proto = PPP_PROTO(skb);
-   read_lock_bh(&pch->upl);
-   if (!pch->ppp || proto >= 0xc000 || proto == PPP_CCPFRAG) {
+   /*
+* We use trylock to avoid dependency between soft-irq-safe upl lock
+* and soft-irq-unsafe sk_dst_lock.
+*/
+   local_bh_disable();
+   locked = read_trylock(&pch->upl);
+   if (!locked || !pch->ppp || proto >= 0xc000 || proto == PPP_CCPFRAG) {
/* put it on the channel queue */
skb_queue_tail(&pch->file.rq, skb);
/* drop old frames if queue too long */
@@ -1493,7 +1498,10 @@ ppp_input(struct ppp_channel *chan, struct sk_buff *skb)
} else {
ppp_do_recv(pch->ppp, skb, pch);
}
-   read_unlock_bh(&pch->upl);
+
+   if (locked)
+   read_unlock(&pch->upl);
+   local_bh_enable();
 }
 
 /* Put a 0-length skb in the receive queue as an error indication */
@@ -1506,16 +1514,18 @@ ppp_input_error(struct ppp_channel *chan, int code)
if (!pch)
return;
 
-   read_lock_bh(&pch->upl);
-   if (pch->ppp) {
+   /* a trylock comment in ppp_input() */
+   local_bh_disable();
+   if (read_trylock(&pch->upl) && pch->ppp) {
skb = alloc_skb(0, GFP_ATOMIC);
if (skb) {
skb->len = 0;   /* probably unnecessary */
skb->cb[0] = code;
ppp_do_recv(pch->ppp, skb, pch);
}
+   read_unlock(&pch->upl);
}
-   read_unlock_bh(&pch->upl);
+   local_bh_enable();
 }
 
 /*
@@ -2044,10 +2054,13 @@ int ppp_unit_number(struct ppp_channel *chan)
int unit = -1;
 
if (pch) {
-   read_lock_bh(&pch->upl);
-   if (pch->ppp)
+   /* a trylock comment in ppp_input() */
+   local_bh_disable();
+   if (read_trylock(&pch->upl) && pch->ppp) {
unit = pch->ppp->file.index;
-   read_unlock_bh(&pch->upl);
+   read_unlock(&pch->upl);
+   }
+   local_bh_enable();
}
return unit;
 }
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH][PPPOL2TP]: Fix SMP oops in pppol2tp driver

2008-02-19 Thread Jarek Poplawski

On Wed, Feb 20, 2008 at 12:06:40AM +0100, Jarek Poplawski wrote:
...
> (testing patch #1)

SORRY!!! > take 2 (unlocking fixed)

---

 drivers/net/ppp_generic.c |   39 +--
 1 files changed, 29 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ppp_generic.c b/drivers/net/ppp_generic.c
index 4dc5b4b..c4e3808 100644
--- a/drivers/net/ppp_generic.c
+++ b/drivers/net/ppp_generic.c
@@ -1473,7 +1473,7 @@ void
 ppp_input(struct ppp_channel *chan, struct sk_buff *skb)
 {
struct channel *pch = chan->ppp;
-   int proto;
+   int proto, locked;
 
if (!pch || skb->len == 0) {
kfree_skb(skb);
@@ -1481,8 +1481,13 @@ ppp_input(struct ppp_channel *chan, struct sk_buff *skb)
}
 
proto = PPP_PROTO(skb);
-   read_lock_bh(&pch->upl);
-   if (!pch->ppp || proto >= 0xc000 || proto == PPP_CCPFRAG) {
+   /*
+* We use trylock to avoid dependency between soft-irq-safe upl lock
+* and soft-irq-unsafe sk_dst_lock.
+*/
+   local_bh_disable();
+   locked = read_trylock(&pch->upl);
+   if (!locked || !pch->ppp || proto >= 0xc000 || proto == PPP_CCPFRAG) {
/* put it on the channel queue */
skb_queue_tail(&pch->file.rq, skb);
/* drop old frames if queue too long */
@@ -1493,7 +1498,10 @@ ppp_input(struct ppp_channel *chan, struct sk_buff *skb)
} else {
ppp_do_recv(pch->ppp, skb, pch);
}
-   read_unlock_bh(&pch->upl);
+
+   if (locked)
+   read_unlock(&pch->upl);
+   local_bh_enable();
 }
 
 /* Put a 0-length skb in the receive queue as an error indication */
@@ -1502,12 +1510,14 @@ ppp_input_error(struct ppp_channel *chan, int code)
 {
struct channel *pch = chan->ppp;
struct sk_buff *skb;
+   int locked;
 
if (!pch)
return;
 
-   read_lock_bh(&pch->upl);
-   if (pch->ppp) {
+   /* a trylock comment in ppp_input() */
+   local_bh_disable();
+   if ((locked = read_trylock(&pch->upl)) && pch->ppp) {
skb = alloc_skb(0, GFP_ATOMIC);
if (skb) {
skb->len = 0;   /* probably unnecessary */
@@ -1515,7 +1525,10 @@ ppp_input_error(struct ppp_channel *chan, int code)
ppp_do_recv(pch->ppp, skb, pch);
}
}
-   read_unlock_bh(&pch->upl);
+
+   if (locked)
+   read_unlock(&pch->upl);
+   local_bh_enable();
 }
 
 /*
@@ -2044,10 +2057,16 @@ int ppp_unit_number(struct ppp_channel *chan)
int unit = -1;
 
if (pch) {
-   read_lock_bh(&pch->upl);
-   if (pch->ppp)
+   int locked;
+
+   /* a trylock comment in ppp_input() */
+   local_bh_disable();
+   if ((locked = read_trylock(&pch->upl)) && pch->ppp)
unit = pch->ppp->file.index;
-   read_unlock_bh(&pch->upl);
+
+   if (locked)
+   read_unlock(&pch->upl);
+   local_bh_enable();
}
return unit;
 }
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH][PPPOL2TP]: Fix SMP oops in pppol2tp driver

2008-02-20 Thread James Chapman


Jarek Poplawski wrote:

On Mon, Feb 18, 2008 at 10:09:24PM +, James Chapman wrote:
...

Unfortunately the ISP's syslog stops. But I've been able to borrow
two Quad Xeon boxes and have reproduced the problem.

Here's a new version of the patch. The patch avoids disabling irqs
and fixes the sk_dst_get() usage that DaveM mentioned. But even with
this patch, lockdep still complains if hundreds of ppp sessions are
inserted into a tunnel as rapidly as possible (lockdep trace is below).
I can stop these errors by wrapping the call to ppp_input() in
pppol2tp_recv_dequeue_skb() with local_irq_save/restore. What is a
better fix?


I send here my proposal: it's intended for testing and to check one of
possible solutions here. IMHO your lockdep reports show there is no
use to change anything around sk_dst_lock: it would need the global
change of this lock to fix this problem. So the fix should be done
around pch->upl lock and this means changing ppp_generic.


Hmm, I need to study the lockdep report again. It seems I'm misreading 
the lockdep output. :(



In the patch below I've used trylock in places which seem to allow
for skipping some things (while config is changed only) or simply
don't need this lock because there is no ppp struct. This could be
modified to add some waiting loop if necessary. Another option is to
change the write side of this lock: it looks like more vulnerable if
something missed because there are more locks involved, but probably
should be enough to solve this problem too.

I think pppol2tp need to be first checked only with hlist_lock bh
patch, unless there were some lockdep reports on these other locks
too. (BTW, I added ppp maintainer to CC - I hope we get Paul's opinion
on this.)


I tried your ppp_generic patch with only the hlist_lock bh patch in 
pppol2tp and it seems to fix the ppp create/delete issue. However, when 
I added much more traffic into the test (flood pings over ppp interfaces 
while repeatedly creating/deleting the L2TP (PPP) sessions) I get a soft 
lockup detected in pppol2tp_xmit() after anything between 1 minute and 
an hour. :( I'm investigating that now.


Thanks for your help!


(testing patch #1)
---

 drivers/net/ppp_generic.c |   33 +++--
 1 files changed, 23 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ppp_generic.c b/drivers/net/ppp_generic.c
index 4dc5b4b..5cbc534 100644
--- a/drivers/net/ppp_generic.c
+++ b/drivers/net/ppp_generic.c
@@ -1473,7 +1473,7 @@ void
 ppp_input(struct ppp_channel *chan, struct sk_buff *skb)
 {
struct channel *pch = chan->ppp;
-   int proto;
+   int proto, locked;
 
 	if (!pch || skb->len == 0) {

kfree_skb(skb);
@@ -1481,8 +1481,13 @@ ppp_input(struct ppp_channel *chan, struct sk_buff *skb)
}
 
 	proto = PPP_PROTO(skb);

-   read_lock_bh(&pch->upl);
-   if (!pch->ppp || proto >= 0xc000 || proto == PPP_CCPFRAG) {
+   /*
+* We use trylock to avoid dependency between soft-irq-safe upl lock
+* and soft-irq-unsafe sk_dst_lock.
+*/
+   local_bh_disable();
+   locked = read_trylock(&pch->upl);
+   if (!locked || !pch->ppp || proto >= 0xc000 || proto == PPP_CCPFRAG) {
/* put it on the channel queue */
skb_queue_tail(&pch->file.rq, skb);
/* drop old frames if queue too long */
@@ -1493,7 +1498,10 @@ ppp_input(struct ppp_channel *chan, struct sk_buff *skb)
} else {
ppp_do_recv(pch->ppp, skb, pch);
}
-   read_unlock_bh(&pch->upl);
+
+   if (locked)
+   read_unlock(&pch->upl);
+   local_bh_enable();
 }
 
 /* Put a 0-length skb in the receive queue as an error indication */

@@ -1506,16 +1514,18 @@ ppp_input_error(struct ppp_channel *chan, int code)
if (!pch)
return;
 
-	read_lock_bh(&pch->upl);

-   if (pch->ppp) {
+   /* a trylock comment in ppp_input() */
+   local_bh_disable();
+   if (read_trylock(&pch->upl) && pch->ppp) {
skb = alloc_skb(0, GFP_ATOMIC);
if (skb) {
skb->len = 0;/* probably unnecessary */
skb->cb[0] = code;
ppp_do_recv(pch->ppp, skb, pch);
}
+   read_unlock(&pch->upl);
}
-   read_unlock_bh(&pch->upl);
+   local_bh_enable();
 }
 
 /*

@@ -2044,10 +2054,13 @@ int ppp_unit_number(struct ppp_channel *chan)
int unit = -1;
 
 	if (pch) {

-   read_lock_bh(&pch->upl);
-   if (pch->ppp)
+   /* a trylock comment in ppp_input() */
+   local_bh_disable();
+   if (read_trylock(&pch->upl) && pch->ppp) {
unit = pch->ppp->file.index;
-   read_unlock_bh(&pch->upl);
+   read_unlock(&pch->upl);
+   }
+   local_bh_enable();
}
return unit;
 }
--



--

Re: [PATCH][PPPOL2TP]: Fix SMP oops in pppol2tp driver

2008-02-20 Thread Jarek Poplawski

On Wed, Feb 20, 2008 at 04:02:52PM +, James Chapman wrote:
...
> I tried your ppp_generic patch with only the hlist_lock bh patch in  
> pppol2tp and it seems to fix the ppp create/delete issue. However, when  
> I added much more traffic into the test (flood pings over ppp interfaces  
> while repeatedly creating/deleting the L2TP (PPP) sessions) I get a soft  
> lockup detected in pppol2tp_xmit() after anything between 1 minute and  
> an hour. :( I'm investigating that now.
>
> Thanks for your help!

Not at all!

>
>> (testing patch #1)

But I hope you tested with the fixed (take 2) version of this patch...

Since it's quite experimental (testing) this patch could be wrong
as it is, but I hope it should show the proper way to solve this
problem. Probably you did some of these, but here are a few of my
suggestions for testing this:

1) try my patch with your full bh locking changing patch;
2) add while loops to these trylocks on failure, with e.g.  __delay(1);
   this should work like full locks again, but there should be no (this
   kind of) lockdep reports;
3) I send here another testing patch with this second way to do this:
   on the write side, but it's even more "experimental" and only a
   proof of concept (should be applied on vanilla ppp_generic).

Regards,
Jarek P.

(testing patch #2)

---

 drivers/net/ppp_generic.c |   20 +---
 1 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ppp_generic.c b/drivers/net/ppp_generic.c
index 4dc5b4b..70bd255 100644
--- a/drivers/net/ppp_generic.c
+++ b/drivers/net/ppp_generic.c
@@ -2606,11 +2606,16 @@ ppp_connect_channel(struct channel *pch, int unit)
ppp = ppp_find_unit(unit);
if (!ppp)
goto out;
-   write_lock_bh(&pch->upl);
+
ret = -EINVAL;
-   if (pch->ppp)
-   goto outl;
+   read_lock_bh(&pch->upl);
+   if (pch->ppp) {
+   read_unlock_bh(&pch->upl);
+   goto out;
+   }
+   read_unlock_bh(&pch->upl);
 
+   atomic_inc(&ppp->file.refcnt);
ppp_lock(ppp);
if (pch->file.hdrlen > ppp->file.hdrlen)
ppp->file.hdrlen = pch->file.hdrlen;
@@ -2619,13 +2624,14 @@ ppp_connect_channel(struct channel *pch, int unit)
ppp->dev->hard_header_len = hdrlen;
list_add_tail(&pch->clist, &ppp->channels);
++ppp->n_channels;
-   pch->ppp = ppp;
-   atomic_inc(&ppp->file.refcnt);
ppp_unlock(ppp);
-   ret = 0;
 
- outl:
+   /* avoid lock dependency with ppp_locks */
+   write_lock_bh(&pch->upl);
+   BUG_ON(pch->ppp);
+   pch->ppp = ppp;
write_unlock_bh(&pch->upl);
+   ret = 0;
  out:
mutex_unlock(&all_ppp_mutex);
return ret;
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH][PPPOL2TP]: Fix SMP oops in pppol2tp driver

2008-02-20 Thread James Chapman


Jarek Poplawski wrote:


(testing patch #1)


But I hope you tested with the fixed (take 2) version of this patch...


Yes I did. :)

But I just got another lockdep error (attached).


Since it's quite experimental (testing) this patch could be wrong
as it is, but I hope it should show the proper way to solve this
problem. Probably you did some of these, but here are a few of my
suggestions for testing this:

1) try my patch with your full bh locking changing patch;
2) add while loops to these trylocks on failure, with e.g.  __delay(1);
   this should work like full locks again, but there should be no (this
   kind of) lockdep reports;


Hmm, isn't this just bypassing the lockdep checks?


3) I send here another testing patch with this second way to do this:
   on the write side, but it's even more "experimental" and only a
   proof of concept (should be applied on vanilla ppp_generic).


I'll look over it. I think I need to take a step back and look at what's 
happening in more detail though.


--
James Chapman
Katalix Systems Ltd
http://www.katalix.com
Catalysts for your Embedded Linux software development

Feb 20 22:11:41 localhost kernel: =
Feb 20 22:11:43 localhost kernel: [ INFO: inconsistent lock state ]
Feb 20 22:11:44 localhost kernel: 2.6.24.2 #1
Feb 20 22:11:44 localhost kernel: -
Feb 20 22:11:44 localhost kernel: inconsistent {softirq-on-W} -> {in-softirq-R} 
usage.
Feb 20 22:11:44 localhost kernel: pppd/3744 [HC0[0]:SC1[5]:HE1:SE0] takes:
Feb 20 22:11:44 localhost kernel:  (&sk->sk_dst_lock){---?}, at: [] 
pppol2tp_xmit+0x320/0x3d9 [pppol2tp]
Feb 20 22:11:45 localhost kernel: {softirq-on-W} state was registered at:
Feb 20 22:11:45 localhost kernel:   [] __lock_acquire+0x48b/0xbf1
Feb 20 22:11:45 localhost kernel:   [] mark_held_locks+0x39/0x53
Feb 20 22:11:45 localhost kernel:   [] local_bh_enable+0x10e/0x115
Feb 20 22:11:45 localhost kernel:   [] inet_csk_get_port+0xc1/0x1cb
Feb 20 22:11:45 localhost kernel:   [] lock_acquire+0x70/0x8a
Feb 20 22:11:45 localhost kernel:   [] inet_csk_listen_start+0x75/0xed
Feb 20 22:11:45 localhost kernel:   [] _write_lock+0x29/0x34
Feb 20 22:11:45 localhost kernel:   [] inet_csk_listen_start+0x75/0xed
Feb 20 22:11:45 localhost kernel:   [] inet_csk_listen_start+0x75/0xed
Feb 20 22:11:45 localhost kernel:   [] inet_listen+0x3b/0x5e
Feb 20 22:11:45 localhost kernel:   [] sys_listen+0x43/0x5f
Feb 20 22:11:45 localhost kernel:   [] sys_socketcall+0xbd/0x261
Feb 20 22:11:45 localhost kernel:   [] sysenter_past_esp+0x9a/0xa5
Feb 20 22:11:45 localhost kernel:   [] trace_hardirqs_on+0x122/0x14c
Feb 20 22:11:45 localhost kernel:   [] sysenter_past_esp+0x5f/0xa5
Feb 20 22:11:45 localhost kernel:   [] 0x
Feb 20 22:11:45 localhost kernel: irq event stamp: 41702
Feb 20 22:11:45 localhost kernel: hardirqs last  enabled at (41702): 
[] kfree+0x9f/0xa6
Feb 20 22:11:45 localhost kernel: hardirqs last disabled at (41701): 
[] kfree+0x17/0xa6
Feb 20 22:11:45 localhost kernel: softirqs last  enabled at (41630): 
[] rt_run_flush+0x64/0x8b
Feb 20 22:11:45 localhost kernel: softirqs last disabled at (41631): 
[] do_softirq+0x5e/0xc7
Feb 20 22:11:45 localhost kernel: 
Feb 20 22:11:45 localhost kernel: other info that might help us debug this:
Feb 20 22:11:45 localhost kernel: 10 locks held by pppd/3744:
Feb 20 22:11:45 localhost kernel:  #0:  (rtnl_mutex){--..}, at: [] 
devinet_ioctl+0xf4/0x539
Feb 20 22:11:45 localhost kernel:  #1:  ((inetaddr_chain).rwsem){..--}, at: 
[] __blocking_notifier_call_chain+0x22/0x51
Feb 20 22:11:45 localhost kernel:  #2:  (rcu_read_lock){..--}, at: [] 
net_rx_action+0x4e/0x1b3
Feb 20 22:11:45 localhost kernel:  #3:  (rcu_read_lock){..--}, at: [] 
netif_receive_skb+0xf4/0x3d4
Feb 20 22:11:45 localhost kernel:  #4:  (rcu_read_lock){..--}, at: [] 
ip_local_deliver_finish+0x2e/0x1f8
Feb 20 22:11:45 localhost kernel:  #5:  (slock-AF_INET){-+..}, at: [] 
icmp_reply+0x52/0x1a4
Feb 20 22:11:45 localhost kernel:  #6:  (rcu_read_lock){..--}, at: [] 
dev_queue_xmit+0x125/0x2e9
Feb 20 22:11:45 localhost kernel:  #7:  (_xmit_PPP){-+..}, at: [] 
__qdisc_run+0x5b/0x156
Feb 20 22:11:45 localhost kernel:  #8:  (&ppp->wlock){-+..}, at: [] 
ppp_xmit_process+0x15/0x5a1 [ppp_generic]
Feb 20 22:11:45 localhost kernel:  #9:  (&pch->downl){-+..}, at: [] 
ppp_push+0x63/0x50d [ppp_generic]
Feb 20 22:11:45 localhost kernel: 
Feb 20 22:11:45 localhost kernel: stack backtrace:
Feb 20 22:11:45 localhost kernel: Pid: 3744, comm: pppd Not tainted 2.6.24.2 #1
Feb 20 22:11:45 localhost kernel:  [] print_usage_bug+0x139/0x143
Feb 20 22:11:45 localhost kernel:  [] mark_lock+0x1cb/0x454
Feb 20 22:11:45 localhost kernel:  [] find_usage_forwards+0x67/0x8b
Feb 20 22:11:45 localhost kernel:  [] __lock_acquire+0x424/0xbf1
Feb 20 22:11:46 localhost kernel:  [] check_noncircular+0x66/0x93
Feb 20 22:11:46 localhost kernel:  [] mark_held_locks+0x39/0x53
Feb 20 22:11:46 localhost kernel:  [] kfree+0x9f/0xa6
Feb 20 22:11:46 loc

Re: [PATCH][PPPOL2TP]: Fix SMP oops in pppol2tp driver

2008-02-21 Thread Jarek Poplawski

On Wed, Feb 20, 2008 at 10:37:57PM +, James Chapman wrote:
> Jarek Poplawski wrote:
>
 (testing patch #1)
>>
>> But I hope you tested with the fixed (take 2) version of this patch...
>
> Yes I did. :)
>
> But I just got another lockdep error (attached).
>
>> Since it's quite experimental (testing) this patch could be wrong
>> as it is, but I hope it should show the proper way to solve this
>> problem. Probably you did some of these, but here are a few of my
>> suggestions for testing this:
>>
>> 1) try my patch with your full bh locking changing patch;
>> 2) add while loops to these trylocks on failure, with e.g.  __delay(1);
>>this should work like full locks again, but there should be no (this
>>kind of) lockdep reports;
>
> Hmm, isn't this just bypassing the lockdep checks?

Yes! But it's only for debugging: to find if this change in locking
is to be blamed for these new lockups. It should effectively work just
like without this patch, but without this lockdep warning. So, if
after such change lockups still happen, then it would seem you didn't
test this enough before. Otherwise the new patch is to blame and needs
reworking.

>
>> 3) I send here another testing patch with this second way to do this:
>>on the write side, but it's even more "experimental" and only a
>>proof of concept (should be applied on vanilla ppp_generic).
>
> I'll look over it. I think I need to take a step back and look at what's  
> happening in more detail though.

This is something completely new and changes all the picture: the xmit
path wasn't expected (at least by me) to be called in softirq context
at all, and there were no traces of this on previous reports. But,
since lockdep always stops after the first warning, there could be
even more surprises like this in the future. I'll check this report.

Regards,
Jarek P.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH][PPPOL2TP]: Fix SMP oops in pppol2tp driver

2008-02-21 Thread James Chapman


Jarek Poplawski wrote:

On Wed, Feb 20, 2008 at 10:37:57PM +, James Chapman wrote:

Jarek Poplawski wrote:


(testing patch #1)

But I hope you tested with the fixed (take 2) version of this patch...

Yes I did. :)

But I just got another lockdep error (attached).


Since it's quite experimental (testing) this patch could be wrong
as it is, but I hope it should show the proper way to solve this
problem. Probably you did some of these, but here are a few of my
suggestions for testing this:

1) try my patch with your full bh locking changing patch;
2) add while loops to these trylocks on failure, with e.g.  __delay(1);
   this should work like full locks again, but there should be no (this
   kind of) lockdep reports;

Hmm, isn't this just bypassing the lockdep checks?


Yes! But it's only for debugging: to find if this change in locking
is to be blamed for these new lockups. It should effectively work just
like without this patch, but without this lockdep warning. So, if
after such change lockups still happen, then it would seem you didn't
test this enough before. Otherwise the new patch is to blame and needs
reworking.


The lockups still happen, but I think they are now due to a different 
problem, as you say.


Some background on this issue might be useful to help get feedback from 
others on the list. This issue was first reported by an ISP who found 
random lockups if an L2TP tunnel carrying hundreds/thousands of L2TP 
sessions went down due to a network outage and then recovered itself. On 
recovery, all of the tunnel's sessions (PPP) are created rapidly. 
Sometimes the tunnel would recover just fine, but other times not. The 
ISP put some effort into reproducing the problem and found that 
repeatedly creating/deleting a tunnel with lots of L2TP sessions would 
cause the failure after a random time between a few minutes and several 
hours. The original lockdep trace came from the ISP. I initially 
couldn't reproduce the problem but I borrowed two equivalent quad-core 
systems and can now reproduce it. Subsequent lockdep traces have been 
from my testing.


The _bh locking fixes in pppol2tp combined with your ppp_generic change 
solved that problem. So I then added data traffic into the mix (since 
this will happen in a real network) and found that lockups still happen. 
But the lockdep trace in this case is different, as you noted.


Does PPPoE stress the PPP setup code as much as this scenario? I guess 
in theory it could if lots of PPPoE clients connected at the same time, 
but there is no aggregate tunnel like there is with L2TP to cause all 
sessions to connect simultaneously. Perhaps PPTP also suffers from these 
issues? Perhaps not because it tends to be used only in VPN setups where 
there is only 1 session per tunnel.



3) I send here another testing patch with this second way to do this:
   on the write side, but it's even more "experimental" and only a
   proof of concept (should be applied on vanilla ppp_generic).
I'll look over it. I think I need to take a step back and look at what's  
happening in more detail though.


This is something completely new and changes all the picture: the xmit
path wasn't expected (at least by me) to be called in softirq context
at all, and there were no traces of this on previous reports. But,
since lockdep always stops after the first warning, there could be
even more surprises like this in the future. I'll check this report.


Doesn't the TX softirq do transmits if they've been queued up?

--
James Chapman
Katalix Systems Ltd
http://www.katalix.com
Catalysts for your Embedded Linux software development

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH][PPPOL2TP]: Fix SMP oops in pppol2tp driver

2008-02-21 Thread Jarek Poplawski

On Thu, Feb 21, 2008 at 09:53:56AM +, James Chapman wrote:
> Jarek Poplawski wrote:
...
> The _bh locking fixes in pppol2tp combined with your ppp_generic change  
> solved that problem. So I then added data traffic into the mix (since  
> this will happen in a real network) and found that lockups still happen.  
> But the lockdep trace in this case is different, as you noted.

I'm not sure what do you mean by "solved that problem": lack of lockups
or lack of this kind of lockdep reports. This lockdep report shows a
real danger in this case, probably very little probable, unless a lot
of tries. So if you think just this kind of lockup happend there, this
is would be nice. But there could be something less nice too: we are
"fighting" with lockdep in one place but the lockup happens somewhere
else for some totally different reason...

> Does PPPoE stress the PPP setup code as much as this scenario? I guess  
> in theory it could if lots of PPPoE clients connected at the same time,  
> but there is no aggregate tunnel like there is with L2TP to cause all  
> sessions to connect simultaneously. Perhaps PPTP also suffers from these  
> issues? Perhaps not because it tends to be used only in VPN setups where  
> there is only 1 session per tunnel.

I don't know this code enough, but it seems it should be easier to
maintain or debug if there are similar solutions where possible.

 3) I send here another testing patch with this second way to do this:
on the write side, but it's even more "experimental" and only a
proof of concept (should be applied on vanilla ppp_generic).
>>> I'll look over it. I think I need to take a step back and look at 
>>> what's  happening in more detail though.
>>
>> This is something completely new and changes all the picture: the xmit
>> path wasn't expected (at least by me) to be called in softirq context
>> at all, and there were no traces of this on previous reports. But,
>> since lockdep always stops after the first warning, there could be
>> even more surprises like this in the future. I'll check this report.
>
> Doesn't the TX softirq do transmits if they've been queued up?

I've probably too much looked at these reports, and should've expected
this could happen. Probably queueing could be separated, but since
there could be no queue at all and it's done like this, then this
current proof of concept seems to be dead end, and we have to go back
to fixing sk_dst_lock handling and my patches could be dumped...

So if I don't miss something again (and I need more time for this new
report) you should try to fix the problem not reported originally by
lockdep, but forseen by David(!): we need to avoid any path like:
ppp_generic -> pppol2tp -> something, which could take sk_dst_lock
while holding any ppp_generic writing lock:  they all are softirq
"safe" (i.e. endangered). David gave some example, but I'm not sure
you did your patch like this (sk_dst_set()). Probably ip_queue_xmit()
can't work with this too.

Another, probably simpler way would be to move almost all pppol2tp_xmit
code to a workqueue: this should let to break most of dependencies with
ppp_generic locks, but I don't know how much it would affect other
things (e.g. performance). So you should really rethink these things.

Jarek P.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH][PPPOL2TP]: Fix SMP oops in pppol2tp driver

2008-02-21 Thread Jarek Poplawski

Jarek Poplawski wrote, On 02/21/2008 01:08 PM:
...

> Another, probably simpler way would be to move almost all pppol2tp_xmit
...

Actually, the simplest off all seems to be now this old idea to maybe make
sk_dst_lock globally softirq immune. At least I think it's worth of testing,
to check for these other possible lockdep warnings. It should only need to
change all write_ and read_lock(&sk->sk_dst_lock) in very few places:
include/net/sock.h, include/net/ip6_route.h, and net/ipv6/ipv6_sockglue.c.
This could be tested together with you full _bh locking patch (maybe except
these other changes in pppol2tp_xmit).

Jarek P.

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH][PPPOL2TP]: Fix SMP oops in pppol2tp driver

2008-02-25 Thread James Chapman


Jarek Poplawski wrote:

Jarek Poplawski wrote, On 02/21/2008 01:08 PM:
...


Another, probably simpler way would be to move almost all pppol2tp_xmit

...

Actually, the simplest off all seems to be now this old idea to maybe make
sk_dst_lock globally softirq immune. At least I think it's worth of testing,
to check for these other possible lockdep warnings. It should only need to
change all write_ and read_lock(&sk->sk_dst_lock) in very few places:
include/net/sock.h, include/net/ip6_route.h, and net/ipv6/ipv6_sockglue.c.
This could be tested together with you full _bh locking patch (maybe except
these other changes in pppol2tp_xmit).


I did this and all lockdep errors have now gone. Tests ran all weekend. 
See attached patch.


Is this an acceptable solution? If so, I'll prepare and send official 
patches.



--
James Chapman
Katalix Systems Ltd
http://www.katalix.com
Catalysts for your Embedded Linux software development

Index: linux-2.6.24.2/include/net/ip6_route.h
===
--- linux-2.6.24.2.orig/include/net/ip6_route.h
+++ linux-2.6.24.2/include/net/ip6_route.h
@@ -150,9 +150,9 @@ static inline void __ip6_dst_store(struc
 static inline void ip6_dst_store(struct sock *sk, struct dst_entry *dst,
  struct in6_addr *daddr, struct in6_addr *saddr)
 {
-	write_lock(&sk->sk_dst_lock);
+	write_lock_bh(&sk->sk_dst_lock);
 	__ip6_dst_store(sk, dst, daddr, saddr);
-	write_unlock(&sk->sk_dst_lock);
+	write_unlock_bh(&sk->sk_dst_lock);
 }
 
 static inline int ipv6_unicast_destination(struct sk_buff *skb)
Index: linux-2.6.24.2/include/net/sock.h
===
--- linux-2.6.24.2.orig/include/net/sock.h
+++ linux-2.6.24.2/include/net/sock.h
@@ -1058,11 +1058,11 @@ sk_dst_get(struct sock *sk)
 {
 	struct dst_entry *dst;
 
-	read_lock(&sk->sk_dst_lock);
+	read_lock_bh(&sk->sk_dst_lock);
 	dst = sk->sk_dst_cache;
 	if (dst)
 		dst_hold(dst);
-	read_unlock(&sk->sk_dst_lock);
+	read_unlock_bh(&sk->sk_dst_lock);
 	return dst;
 }
 
@@ -1079,9 +1079,9 @@ __sk_dst_set(struct sock *sk, struct dst
 static inline void
 sk_dst_set(struct sock *sk, struct dst_entry *dst)
 {
-	write_lock(&sk->sk_dst_lock);
+	write_lock_bh(&sk->sk_dst_lock);
 	__sk_dst_set(sk, dst);
-	write_unlock(&sk->sk_dst_lock);
+	write_unlock_bh(&sk->sk_dst_lock);
 }
 
 static inline void
@@ -1097,9 +1097,9 @@ __sk_dst_reset(struct sock *sk)
 static inline void
 sk_dst_reset(struct sock *sk)
 {
-	write_lock(&sk->sk_dst_lock);
+	write_lock_bh(&sk->sk_dst_lock);
 	__sk_dst_reset(sk);
-	write_unlock(&sk->sk_dst_lock);
+	write_unlock_bh(&sk->sk_dst_lock);
 }
 
 extern struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie);
Index: linux-2.6.24.2/net/ipv6/ipv6_sockglue.c
===
--- linux-2.6.24.2.orig/net/ipv6/ipv6_sockglue.c
+++ linux-2.6.24.2/net/ipv6/ipv6_sockglue.c
@@ -440,9 +440,9 @@ static int do_ipv6_setsockopt(struct soc
 			opt = xchg(&np->opt, opt);
 			sk_dst_reset(sk);
 		} else {
-			write_lock(&sk->sk_dst_lock);
+			write_lock_bh(&sk->sk_dst_lock);
 			opt = xchg(&np->opt, opt);
-			write_unlock(&sk->sk_dst_lock);
+			write_unlock_bh(&sk->sk_dst_lock);
 			sk_dst_reset(sk);
 		}
 sticky_done:
@@ -504,9 +504,9 @@ update:
 			opt = xchg(&np->opt, opt);
 			sk_dst_reset(sk);
 		} else {
-			write_lock(&sk->sk_dst_lock);
+			write_lock_bh(&sk->sk_dst_lock);
 			opt = xchg(&np->opt, opt);
-			write_unlock(&sk->sk_dst_lock);
+			write_unlock_bh(&sk->sk_dst_lock);
 			sk_dst_reset(sk);
 		}

Re: [PATCH][PPPOL2TP]: Fix SMP oops in pppol2tp driver

2008-02-25 Thread Jarek Poplawski

On Mon, Feb 25, 2008 at 12:19:50PM +, James Chapman wrote:
> Jarek Poplawski wrote:
>> Jarek Poplawski wrote, On 02/21/2008 01:08 PM:
>> ...
>>
>>> Another, probably simpler way would be to move almost all pppol2tp_xmit
>> ...
>>
>> Actually, the simplest off all seems to be now this old idea to maybe make
>> sk_dst_lock globally softirq immune. At least I think it's worth of testing,
>> to check for these other possible lockdep warnings. It should only need to
>> change all write_ and read_lock(&sk->sk_dst_lock) in very few places:
>> include/net/sock.h, include/net/ip6_route.h, and net/ipv6/ipv6_sockglue.c.
>> This could be tested together with you full _bh locking patch (maybe except
>> these other changes in pppol2tp_xmit).
>
> I did this and all lockdep errors have now gone. Tests ran all weekend.  
> See attached patch.
>
> Is this an acceptable solution? If so, I'll prepare and send official  
> patches.

IMHO this should be acceptable because I can't see any reason for
changing properly working code if there is so simple and not costly
solution. But maybe David or somebody else finds some cons? Since
this patch isn't very big I think you could try to send this
officially and we will simply see...

Regards,
Jarek P.


>
>
> -- 
> James Chapman
> Katalix Systems Ltd
> http://www.katalix.com
> Catalysts for your Embedded Linux software development
>

> Index: linux-2.6.24.2/include/net/ip6_route.h
> ===
> --- linux-2.6.24.2.orig/include/net/ip6_route.h
> +++ linux-2.6.24.2/include/net/ip6_route.h
> @@ -150,9 +150,9 @@ static inline void __ip6_dst_store(struc
>  static inline void ip6_dst_store(struct sock *sk, struct dst_entry *dst,
>struct in6_addr *daddr, struct in6_addr *saddr)
>  {
> - write_lock(&sk->sk_dst_lock);
> + write_lock_bh(&sk->sk_dst_lock);
>   __ip6_dst_store(sk, dst, daddr, saddr);
> - write_unlock(&sk->sk_dst_lock);
> + write_unlock_bh(&sk->sk_dst_lock);
>  }
>  
>  static inline int ipv6_unicast_destination(struct sk_buff *skb)
> Index: linux-2.6.24.2/include/net/sock.h
> ===
> --- linux-2.6.24.2.orig/include/net/sock.h
> +++ linux-2.6.24.2/include/net/sock.h
> @@ -1058,11 +1058,11 @@ sk_dst_get(struct sock *sk)
>  {
>   struct dst_entry *dst;
>  
> - read_lock(&sk->sk_dst_lock);
> + read_lock_bh(&sk->sk_dst_lock);
>   dst = sk->sk_dst_cache;
>   if (dst)
>   dst_hold(dst);
> - read_unlock(&sk->sk_dst_lock);
> + read_unlock_bh(&sk->sk_dst_lock);
>   return dst;
>  }
>  
> @@ -1079,9 +1079,9 @@ __sk_dst_set(struct sock *sk, struct dst
>  static inline void
>  sk_dst_set(struct sock *sk, struct dst_entry *dst)
>  {
> - write_lock(&sk->sk_dst_lock);
> + write_lock_bh(&sk->sk_dst_lock);
>   __sk_dst_set(sk, dst);
> - write_unlock(&sk->sk_dst_lock);
> + write_unlock_bh(&sk->sk_dst_lock);
>  }
>  
>  static inline void
> @@ -1097,9 +1097,9 @@ __sk_dst_reset(struct sock *sk)
>  static inline void
>  sk_dst_reset(struct sock *sk)
>  {
> - write_lock(&sk->sk_dst_lock);
> + write_lock_bh(&sk->sk_dst_lock);
>   __sk_dst_reset(sk);
> - write_unlock(&sk->sk_dst_lock);
> + write_unlock_bh(&sk->sk_dst_lock);
>  }
>  
>  extern struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie);
> Index: linux-2.6.24.2/net/ipv6/ipv6_sockglue.c
> ===
> --- linux-2.6.24.2.orig/net/ipv6/ipv6_sockglue.c
> +++ linux-2.6.24.2/net/ipv6/ipv6_sockglue.c
> @@ -440,9 +440,9 @@ static int do_ipv6_setsockopt(struct soc
>   opt = xchg(&np->opt, opt);
>   sk_dst_reset(sk);
>   } else {
> - write_lock(&sk->sk_dst_lock);
> + write_lock_bh(&sk->sk_dst_lock);
>   opt = xchg(&np->opt, opt);
> - write_unlock(&sk->sk_dst_lock);
> + write_unlock_bh(&sk->sk_dst_lock);
>   sk_dst_reset(sk);
>   }
>  sticky_done:
> @@ -504,9 +504,9 @@ update:
>   opt = xchg(&np->opt, opt);
>   sk_dst_reset(sk);
>   } else {
> - write_lock(&sk->sk_dst_lock);
> + write_lock_bh(&sk->sk_dst_lock);
>   opt = xchg(&np->opt, opt);
> - write_unlock(&sk->sk_dst_lock);
> + write_unlock_bh(&sk->sk_dst_lock);
>   sk_dst_reset(sk);
>   }
>  

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH][PPPOL2TP]: Fix SMP oops in pppol2tp driver

2008-02-25 Thread Jarek Poplawski

On Mon, Feb 25, 2008 at 01:05:08PM +, Jarek Poplawski wrote:
...
> On Mon, Feb 25, 2008 at 12:19:50PM +, James Chapman wrote:
> > Is this an acceptable solution? If so, I'll prepare and send official  
> > patches.
> 
> IMHO this should be acceptable because I can't see any reason for
> changing properly working code if there is so simple and not costly
> solution. But maybe David or somebody else finds some cons? [...]

Hmm... Wait a minute! But on the other hand David has written about
his cons here, and it looks reasonable: this place would be fixed,
but some others can start reports like this. Maybe, it's better to
analyze yet if it's really so hard to eliminate taking this lock
on the xmit path?

Jarek P.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH][PPPOL2TP]: Fix SMP oops in pppol2tp driver

2008-02-25 Thread Jarek Poplawski

On Mon, Feb 25, 2008 at 01:39:48PM +, Jarek Poplawski wrote:
...
> Maybe, it's better to
> analyze yet if it's really so hard to eliminate taking this lock
> on the xmit path?

BTW, I'm not sure if it helps, but this matters only for the sockets
which could be used (and locked) outside of pppol2tp code (so before
pppol2tp code is called).

Jarek P.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH][PPPOL2TP]: Fix SMP oops in pppol2tp driver

2008-02-25 Thread Jarek Poplawski

Jarek Poplawski wrote, On 02/25/2008 02:39 PM:
...
> Hmm... Wait a minute! But on the other hand David has written about
> his cons here, and it looks reasonable: this place would be fixed,
> but some others can start reports like this. Maybe, it's better to
> analyze yet if it's really so hard to eliminate taking this lock
> on the xmit path?

James, I wonder if you could try to test this patch below?
ip_queue_xmit() seems to do proper things with __sk_dst_check(), and
if some other functions don't behave similarly lockdep should tell.
I think, you could test it with your "locks to _bh" patch (without
pppol2tp_xmit() part), and maybe with my sock.c lockdep patch (it
should help lockdep to see these locks a bit more distinctly).

Jarek P.

PS: Since ppp_generic isn't endangered for now I removed Paul from CC.

---

diff --git a/drivers/net/pppol2tp.c b/drivers/net/pppol2tp.c
index e0b072d..b94659a 100644
--- a/drivers/net/pppol2tp.c
+++ b/drivers/net/pppol2tp.c
@@ -1058,7 +1058,7 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct 
sk_buff *skb)
 
/* Get routing info from the tunnel socket */
dst_release(skb->dst);
-   skb->dst = sk_dst_get(sk_tun);
+   skb->dst = NULL;
skb_orphan(skb);
skb->sk = sk_tun;
 
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH][PPPOL2TP]: Fix SMP oops in pppol2tp driver

2008-02-26 Thread James Chapman


Jarek Poplawski wrote:

Jarek Poplawski wrote, On 02/25/2008 02:39 PM:
...

Hmm... Wait a minute! But on the other hand David has written about
his cons here, and it looks reasonable: this place would be fixed,
but some others can start reports like this. Maybe, it's better to
analyze yet if it's really so hard to eliminate taking this lock
on the xmit path?


James, I wonder if you could try to test this patch below?
ip_queue_xmit() seems to do proper things with __sk_dst_check(), and
if some other functions don't behave similarly lockdep should tell.
I think, you could test it with your "locks to _bh" patch (without
pppol2tp_xmit() part), and maybe with my sock.c lockdep patch (it
should help lockdep to see these locks a bit more distinctly).


I found the same thing and was running a variant of your patch last 
night; rather than set skb->dst to NULL though, I use __sk_dst_get() and 
let ip_queue_xmit() do the route lookup if it returns NULL. But this has 
the same symptoms as the code I tried a few days ago - no lockdep errors 
but a system lockup after up to several hours. Nothing is logged in the 
syslog.


Luckily, I'm in the lab where my two borrowed servers are today so I 
have access to their consoles. Hopefully I'll be able to find out why 
there are hanging. Btw, they don't hang if I disable irqs around the 
ppp_input() call.


Will update you later.

/james


PS: Since ppp_generic isn't endangered for now I removed Paul from CC.

---

diff --git a/drivers/net/pppol2tp.c b/drivers/net/pppol2tp.c
index e0b072d..b94659a 100644
--- a/drivers/net/pppol2tp.c
+++ b/drivers/net/pppol2tp.c
@@ -1058,7 +1058,7 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct 
sk_buff *skb)
 
 	/* Get routing info from the tunnel socket */

dst_release(skb->dst);
-   skb->dst = sk_dst_get(sk_tun);
+   skb->dst = NULL;
skb_orphan(skb);
skb->sk = sk_tun;
 
--

To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html




--
James Chapman
Katalix Systems Ltd
http://www.katalix.com
Catalysts for your Embedded Linux software development

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH][PPPOL2TP]: Fix SMP oops in pppol2tp driver

2008-02-26 Thread Jarek Poplawski

On Tue, Feb 26, 2008 at 12:14:26PM +, James Chapman wrote:
> Jarek Poplawski wrote:
>> Jarek Poplawski wrote, On 02/25/2008 02:39 PM:
>> ...
>>> Hmm... Wait a minute! But on the other hand David has written about
>>> his cons here, and it looks reasonable: this place would be fixed,
>>> but some others can start reports like this. Maybe, it's better to
>>> analyze yet if it's really so hard to eliminate taking this lock
>>> on the xmit path?
>>
>> James, I wonder if you could try to test this patch below?
>> ip_queue_xmit() seems to do proper things with __sk_dst_check(), and
>> if some other functions don't behave similarly lockdep should tell.
>> I think, you could test it with your "locks to _bh" patch (without
>> pppol2tp_xmit() part), and maybe with my sock.c lockdep patch (it
>> should help lockdep to see these locks a bit more distinctly).
>
> I found the same thing and was running a variant of your patch last  
> night; rather than set skb->dst to NULL though, I use __sk_dst_get() and  
> let ip_queue_xmit() do the route lookup if it returns NULL. But this has  
> the same symptoms as the code I tried a few days ago - no lockdep errors  
> but a system lockup after up to several hours. Nothing is logged in the  
> syslog.

I guess you are going to try this together with this sk_dst_lock with
bh patch too. If it's possible I'd suggest to try this skb->dst = NULL
as well (__sk_dst_get instead of __sk_dst_check seems to be too racy).

> Luckily, I'm in the lab where my two borrowed servers are today so I  
> have access to their consoles. Hopefully I'll be able to find out why  
> there are hanging. Btw, they don't hang if I disable irqs around the  
> ppp_input() call.

...and disabling bh instead isn't enough, BTW?

> Will update you later.

Thanks,
Jarek P.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH][PPPOL2TP]: Fix SMP oops in pppol2tp driver

2008-02-26 Thread Jarek Poplawski

On Tue, Feb 26, 2008 at 01:03:34PM +, Jarek Poplawski wrote:
> On Tue, Feb 26, 2008 at 12:14:26PM +, James Chapman wrote:
...
> > there are hanging. Btw, they don't hang if I disable irqs around the  
> > ppp_input() call.
> 
> ...and disabling bh instead isn't enough, BTW?

I guess not: they are mostly disabled by ppp_input() itself...

So, it looks like a network card could mess here?

Jarek P.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH][PPPOL2TP]: Fix SMP oops in pppol2tp driver

2008-02-26 Thread Jarek Poplawski

James Chapman wrote, On 02/26/2008 01:14 PM:
...
> Luckily, I'm in the lab where my two borrowed servers are today so I 
> have access to their consoles. Hopefully I'll be able to find out why 
> there are hanging. Btw, they don't hang if I disable irqs around the 
> ppp_input() call.

Maybe you've found the same, or there is some other reason yet, but
IMHO this locking break around ppp_input() is wrong. Probably there
is needed more advanced solution, but this should fix the problem if
it really exists (isn't there possible a race e.g. between receive
from socket and from network card?).

Jarek P.
---

 drivers/net/pppol2tp.c |   16 
 1 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/net/pppol2tp.c b/drivers/net/pppol2tp.c
index e0b072d..7c6fcb9 100644
--- a/drivers/net/pppol2tp.c
+++ b/drivers/net/pppol2tp.c
@@ -363,18 +363,17 @@ out:
spin_unlock(&session->reorder_q.lock);
 }
 
-/* Dequeue a single skb.
+/* Requeue a single skb.
  */
-static void pppol2tp_recv_dequeue_skb(struct pppol2tp_session *session, struct 
sk_buff *skb)
+static void pppol2tp_recv_requeue_skb(struct pppol2tp_session *session, struct 
sk_buff *skb)
 {
struct pppol2tp_tunnel *tunnel = session->tunnel;
int length = PPPOL2TP_SKB_CB(skb)->length;
struct sock *session_sock = NULL;
 
-   /* We're about to requeue the skb, so unlink it and return resources
+   /* We're about to requeue the skb, so return resources
 * to its current owner (a socket receive buffer).
 */
-   skb_unlink(skb, &session->reorder_q);
skb_orphan(skb);
 
tunnel->stats.rx_packets++;
@@ -436,14 +435,14 @@ static void pppol2tp_recv_dequeue_skb(struct 
pppol2tp_session *session, struct s
 static void pppol2tp_recv_dequeue(struct pppol2tp_session *session)
 {
struct sk_buff *skb;
-   struct sk_buff *tmp;
 
/* If the pkt at the head of the queue has the nr that we
 * expect to send up next, dequeue it and any other
 * in-sequence packets behind it.
 */
+again:
spin_lock(&session->reorder_q.lock);
-   skb_queue_walk_safe(&session->reorder_q, skb, tmp) {
+   skb_queue_walk(&session->reorder_q, skb) {
if (time_after(jiffies, PPPOL2TP_SKB_CB(skb)->expires)) {
session->stats.rx_seq_discards++;
session->stats.rx_errors++;
@@ -469,9 +468,10 @@ static void pppol2tp_recv_dequeue(struct pppol2tp_session 
*session)
goto out;
}
}
+   __skb_unlink(skb, &session->reorder_q);
spin_unlock(&session->reorder_q.lock);
-   pppol2tp_recv_dequeue_skb(session, skb);
-   spin_lock(&session->reorder_q.lock);
+   pppol2tp_recv_requeue_skb(session, skb);
+   goto again;
}
 
 out:
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

39 matches

Mail list logo