[PATCH][PPPOL2TP]: Fix SMP oops in pppol2tp driver
Fix locking issues in the pppol2tp driver which can cause a kernel crash on SMP boxes when hundreds of L2TP sessions are created/deleted simultaneously (ISP environment). The driver was violating read_lock() and write_lock() scheduling rules so we now consistently use the _irq variants of the lock functions. Signed-off-by: James Chapman <[EMAIL PROTECTED]> -- This patch has been verified by the ISP that discovered the problem. If the patch is accepted, it should be pushed to the stable 2.6.23 and 2.6.24 trees. Index: linux-2.6.24/drivers/net/pppol2tp.c === --- linux-2.6.24.orig/drivers/net/pppol2tp.c +++ linux-2.6.24/drivers/net/pppol2tp.c @@ -301,15 +301,16 @@ pppol2tp_session_find(struct pppol2tp_tu pppol2tp_session_id_hash(tunnel, session_id); struct pppol2tp_session *session; struct hlist_node *walk; + unsigned long flags; - read_lock(&tunnel->hlist_lock); + read_lock_irqsave(&tunnel->hlist_lock, flags); hlist_for_each_entry(session, walk, session_list, hlist) { if (session->tunnel_addr.s_session == session_id) { - read_unlock(&tunnel->hlist_lock); + read_unlock_irqrestore(&tunnel->hlist_lock, flags); return session; } } - read_unlock(&tunnel->hlist_lock); + read_unlock_irqrestore(&tunnel->hlist_lock, flags); return NULL; } @@ -319,15 +320,16 @@ pppol2tp_session_find(struct pppol2tp_tu static struct pppol2tp_tunnel *pppol2tp_tunnel_find(u16 tunnel_id) { struct pppol2tp_tunnel *tunnel = NULL; + unsigned long flags; - read_lock(&pppol2tp_tunnel_list_lock); + read_lock_irqsave(&pppol2tp_tunnel_list_lock, flags); list_for_each_entry(tunnel, &pppol2tp_tunnel_list, list) { if (tunnel->stats.tunnel_id == tunnel_id) { - read_unlock(&pppol2tp_tunnel_list_lock); + read_unlock_irqrestore(&pppol2tp_tunnel_list_lock, flags); return tunnel; } } - read_unlock(&pppol2tp_tunnel_list_lock); + read_unlock_irqrestore(&pppol2tp_tunnel_list_lock, flags); return NULL; } @@ -1099,6 +1101,7 @@ static void pppol2tp_tunnel_closeall(str struct hlist_node *tmp; struct pppol2tp_session *session; struct sock *sk; + unsigned long flags; if (tunnel == NULL) BUG(); @@ -1106,7 +1109,7 @@ static void pppol2tp_tunnel_closeall(str PRINTK(tunnel->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO, "%s: closing all sessions...\n", tunnel->name); - write_lock(&tunnel->hlist_lock); + write_lock_irqsave(&tunnel->hlist_lock, flags); for (hash = 0; hash < PPPOL2TP_HASH_SIZE; hash++) { again: hlist_for_each_safe(walk, tmp, &tunnel->session_hlist[hash]) { @@ -1126,7 +1129,7 @@ again: * disappear as we're jumping between locks. */ sock_hold(sk); - write_unlock(&tunnel->hlist_lock); + write_unlock_irqrestore(&tunnel->hlist_lock, flags); lock_sock(sk); if (sk->sk_state & (PPPOX_CONNECTED | PPPOX_BOUND)) { @@ -1148,11 +1151,11 @@ again: * list so we are guaranteed to make forward * progress. */ - write_lock(&tunnel->hlist_lock); + write_lock_irqsave(&tunnel->hlist_lock, flags); goto again; } } - write_unlock(&tunnel->hlist_lock); + write_unlock_irqrestore(&tunnel->hlist_lock, flags); } /* Really kill the tunnel. @@ -1160,10 +1163,12 @@ again: */ static void pppol2tp_tunnel_free(struct pppol2tp_tunnel *tunnel) { + unsigned long flags; + /* Remove from socket list */ - write_lock(&pppol2tp_tunnel_list_lock); + write_lock_irqsave(&pppol2tp_tunnel_list_lock, flags); list_del_init(&tunnel->list); - write_unlock(&pppol2tp_tunnel_list_lock); + write_unlock_irqrestore(&pppol2tp_tunnel_list_lock, flags); atomic_dec(&pppol2tp_tunnel_count); kfree(tunnel); @@ -1212,6 +1217,7 @@ end: static void pppol2tp_session_destruct(struct sock *sk) { struct pppol2tp_session *session = NULL; + unsigned long flags; if (sk->sk_user_data != NULL) { struct pppol2tp_tunnel *tunnel; @@ -1239,9 +1245,9 @@ static void pppol2tp_session_destruct(st /* Delete the session socket from the * hash */ - write_lock(&tunnel->hlist_lock); + write_lock_irqsave(&tu
Re: [PATCH][PPPOL2TP]: Fix SMP oops in pppol2tp driver
On Mon, Feb 11, 2008 at 11:49:24PM +0100, Jarek Poplawski wrote: > On Mon, Feb 11, 2008 at 10:19:35PM +, James Chapman wrote: > ... > > Below is example output from lockdep. The oops is reproducible when > > creating/deleting lots of sessions while passing data. The lock is being > > acquired for read and write in softirq contexts. ...Hmmm... And according to this, changing read_locks should be necessary too. Jarek P. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH][PPPOL2TP]: Fix SMP oops in pppol2tp driver
Jarek Poplawski wrote: James Chapman wrote, On 02/11/2008 10:22 AM: Fix locking issues in the pppol2tp driver which can cause a kernel crash on SMP boxes when hundreds of L2TP sessions are created/deleted simultaneously (ISP environment). The driver was violating read_lock() and write_lock() scheduling rules so we now consistently use the _irq variants of the lock functions. ... Hi, Could you explain what exactly scheduling rules do you mean here, and why disabling interrupts is the best solution for this? Below is example output from lockdep. The oops is reproducible when creating/deleting lots of sessions while passing data. The lock is being acquired for read and write in softirq contexts. Is there a better way to fix this? = [ INFO: inconsistent lock state ] 2.6.24-core2 #1 - inconsistent {in-softirq-R} -> {softirq-on-W} usage. openl2tpd/3215 [HC0[0]:SC0[0]:HE1:SE1] takes: (&tunnel->hlist_lock){---?}, at: [] pppol2tp_connect+0x517/0x6d0 [pppol2tp] {in-softirq-R} state was registered at: [] __lock_acquire+0x6bf/0x10a0 [] fn_hash_lookup+0x1b/0xe0 [] lock_acquire+0x74/0xa0 [] pppol2tp_session_find+0x1f/0x80 [pppol2tp] [] _read_lock+0x2a/0x40 [] pppol2tp_session_find+0x1f/0x80 [pppol2tp] [] pppol2tp_session_find+0x1f/0x80 [pppol2tp] [] pppol2tp_recv_core+0xd8/0x960 [pppol2tp] [] ipt_do_table+0x23a/0x500 [ip_tables] [] pppol2tp_udp_encap_recv+0x2e/0x70 [pppol2tp] [] _read_unlock+0x14/0x20 [] udp_queue_rcv_skb+0x106/0x2a0 [] __udp4_lib_rcv+0x42a/0x7e0 [] ipt_hook+0x0/0x20 [iptable_filter] [] ip_local_deliver_finish+0xca/0x1c0 [] ip_local_deliver_finish+0x2e/0x1c0 [] ip_rcv_finish+0xff/0x360 [] ip_rcv+0x20c/0x2a0 [] ip_rcv_finish+0x0/0x360 [] netif_receive_skb+0x317/0x4b0 [] netif_receive_skb+0x100/0x4b0 [] e1000_clean_rx_irq_ps+0x28a/0x560 [e1000] [] e1000_clean_rx_irq_ps+0x0/0x560 [e1000] [] e1000_clean+0x5d/0x290 [e1000] [] net_rx_action+0x1a0/0x2a0 [] net_rx_action+0x5f/0x2a0 [] __do_softirq+0x92/0x120 [] do_softirq+0x78/0x80 [] do_IRQ+0x4a/0xa0 [] common_interrupt+0x24/0x34 [] common_interrupt+0x2e/0x34 [] mwait_idle_with_hints+0x46/0x60 [] mwait_idle+0x0/0x20 [] cpu_idle+0x74/0xe0 [] start_kernel+0x30a/0x3a0 [] unknown_bootoption+0x0/0x1f0 [] 0x irq event stamp: 275 hardirqs last enabled at (275): [] local_bh_enable_ip+0xa7/0x120 hardirqs last disabled at (273): [] local_bh_enable_ip+0x36/0x120 softirqs last enabled at (274): [] ppp_register_channel+0xdc/0xf0 [ppp_generic] softirqs last disabled at (272): [] _spin_lock_bh+0xb/0x40 -- James Chapman Katalix Systems Ltd http://www.katalix.com Catalysts for your Embedded Linux software development -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH][PPPOL2TP]: Fix SMP oops in pppol2tp driver
On Mon, Feb 11, 2008 at 10:19:35PM +, James Chapman wrote: ... > Below is example output from lockdep. The oops is reproducible when > creating/deleting lots of sessions while passing data. The lock is being > acquired for read and write in softirq contexts. > > Is there a better way to fix this? > > = > [ INFO: inconsistent lock state ] > 2.6.24-core2 #1 > - > inconsistent {in-softirq-R} -> {softirq-on-W} usage. > openl2tpd/3215 [HC0[0]:SC0[0]:HE1:SE1] takes: > (&tunnel->hlist_lock){---?}, at: [] > pppol2tp_connect+0x517/0x6d0 [pppol2tp] > {in-softirq-R} state was registered at: IMHO, according to this, disabling bh should be enough. And if it's like in this report: only read_lock is taken from softirqs, then this should be necessary to change only all write_locks to write_lock_bh (of course unless somewhere bhs are disabled already). Unless I miss something?! Cheers, Jarek P. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH][PPPOL2TP]: Fix SMP oops in pppol2tp driver
From: James Chapman <[EMAIL PROTECTED]> Date: Mon, 11 Feb 2008 23:41:18 + > Jarek Poplawski wrote: > > On Mon, Feb 11, 2008 at 10:19:35PM +, James Chapman wrote: > > ... > >> Below is example output from lockdep. The oops is reproducible when > >> creating/deleting lots of sessions while passing data. The lock is being > >> acquired for read and write in softirq contexts. > >> > >> Is there a better way to fix this? > >> > >> = > >> [ INFO: inconsistent lock state ] > >> 2.6.24-core2 #1 > >> - > >> inconsistent {in-softirq-R} -> {softirq-on-W} usage. > >> openl2tpd/3215 [HC0[0]:SC0[0]:HE1:SE1] takes: > >> (&tunnel->hlist_lock){---?}, at: [] > >> pppol2tp_connect+0x517/0x6d0 [pppol2tp] > >> {in-softirq-R} state was registered at: > > > > IMHO, according to this, disabling bh should be enough. And if it's > > like in this report: only read_lock is taken from softirqs, then this > > should be necessary to change only all write_locks to write_lock_bh > > (of course unless somewhere bhs are disabled already). Unless I miss > > something?! > > I thought so too. I tried _bh locks first and the problem still > occurred. Maybe I'll try it again in case I messed something up. I agree with Jarek here, I look at all the code paths that take ->hlist_lock and all of them are in user context or software interrupts. Please get a lockdep trace with the change to _bh intead of hw interrupt protection so we can find out what that doesn't work. Thanks! -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH][PPPOL2TP]: Fix SMP oops in pppol2tp driver
James Chapman wrote, On 02/11/2008 10:22 AM: > Fix locking issues in the pppol2tp driver which can cause a kernel > crash on SMP boxes when hundreds of L2TP sessions are created/deleted > simultaneously (ISP environment). The driver was violating read_lock() > and write_lock() scheduling rules so we now consistently use the _irq > variants of the lock functions. ... Hi, Could you explain what exactly scheduling rules do you mean here, and why disabling interrupts is the best solution for this? Thanks, Jarek P. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH][PPPOL2TP]: Fix SMP oops in pppol2tp driver
On Mon, Feb 11, 2008 at 11:41:18PM +, James Chapman wrote: > Jarek Poplawski wrote: >> On Mon, Feb 11, 2008 at 10:19:35PM +, James Chapman wrote: >> ... >>> Below is example output from lockdep. The oops is reproducible when >>> creating/deleting lots of sessions while passing data. The lock is >>> being acquired for read and write in softirq contexts. >>> >>> Is there a better way to fix this? >>> >>> = >>> [ INFO: inconsistent lock state ] >>> 2.6.24-core2 #1 >>> - >>> inconsistent {in-softirq-R} -> {softirq-on-W} usage. >>> openl2tpd/3215 [HC0[0]:SC0[0]:HE1:SE1] takes: >>> (&tunnel->hlist_lock){---?}, at: [] >>> pppol2tp_connect+0x517/0x6d0 [pppol2tp] >>> {in-softirq-R} state was registered at: >> >> IMHO, according to this, disabling bh should be enough. And if it's >> like in this report: only read_lock is taken from softirqs, then this >> should be necessary to change only all write_locks to write_lock_bh >> (of course unless somewhere bhs are disabled already). Unless I miss >> something?! > > I thought so too. I tried _bh locks first and the problem still > occurred. Maybe I'll try it again in case I messed something up. If there are any new lockdep warnings I'd be interested to have a look. (BTW, with irqs disabling such ISP would probably get considerable drop in performance?) Regards, Jarek P. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH][PPPOL2TP]: Fix SMP oops in pppol2tp driver
Jarek Poplawski wrote: On Mon, Feb 11, 2008 at 11:49:24PM +0100, Jarek Poplawski wrote: On Mon, Feb 11, 2008 at 10:19:35PM +, James Chapman wrote: ... Below is example output from lockdep. The oops is reproducible when creating/deleting lots of sessions while passing data. The lock is being acquired for read and write in softirq contexts. ...Hmmm... And according to this, changing read_locks should be necessary too. The patch changes both read and write locks. -- James Chapman Katalix Systems Ltd http://www.katalix.com Catalysts for your Embedded Linux software development -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH][PPPOL2TP]: Fix SMP oops in pppol2tp driver
Jarek Poplawski wrote: On Mon, Feb 11, 2008 at 10:19:35PM +, James Chapman wrote: ... Below is example output from lockdep. The oops is reproducible when creating/deleting lots of sessions while passing data. The lock is being acquired for read and write in softirq contexts. Is there a better way to fix this? = [ INFO: inconsistent lock state ] 2.6.24-core2 #1 - inconsistent {in-softirq-R} -> {softirq-on-W} usage. openl2tpd/3215 [HC0[0]:SC0[0]:HE1:SE1] takes: (&tunnel->hlist_lock){---?}, at: [] pppol2tp_connect+0x517/0x6d0 [pppol2tp] {in-softirq-R} state was registered at: IMHO, according to this, disabling bh should be enough. And if it's like in this report: only read_lock is taken from softirqs, then this should be necessary to change only all write_locks to write_lock_bh (of course unless somewhere bhs are disabled already). Unless I miss something?! I thought so too. I tried _bh locks first and the problem still occurred. Maybe I'll try it again in case I messed something up. -- James Chapman Katalix Systems Ltd http://www.katalix.com Catalysts for your Embedded Linux software development -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH][PPPOL2TP]: Fix SMP oops in pppol2tp driver
From: James Chapman <[EMAIL PROTECTED]> Date: Tue, 12 Feb 2008 10:58:21 + > Here is a trace from when we had _bh locks. The problem is that the pppol2tp code calls sk_dst_get() in software interrupt context and that is not allowed. sk_dst_get() grabs sk->sk_dst_lock without any BH enabling or disabling. It can do that because the usage is to make all the lock taking calls in user context, and in the packet processing paths use __sk_dst_get(). Probably what the pppol2tp code should do is use __sk_dst_check() instead of sk_dst_get(). You then have to be able to handle NULL returns, just like UDP sendmsg() does, which means you'll need to cook up a routing lookup if __sk_dst_check() gives you NULL because the route became obsolete. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH][PPPOL2TP]: Fix SMP oops in pppol2tp driver
On Tue, Feb 12, 2008 at 10:58:21AM +, James Chapman wrote: ... > Here is a trace from when we had _bh locks. Very nice... ...But since it's quite long, and if you don't know all these paths this could take some time, maybe one question: so if lockdep got these locks right (sometimes it can be wrong when the same structures are nested), then it seems some problem is with this place below. This lock is taken for writing with softirqs enabled here, and IMHO it would be interesting to test if changing this is enough for lockdep. It seems this is in ip4_datagram_connect() during sk_dst_reset() or sk_dst_set(). So maybe you could try with local_bh_disable/enable() around them (or maybe some better idea)? Anyway, I'll try to learn this more in the meantime. Jarek P. > Feb 5 16:26:32 to a soft-irq-unsafe lock: > Feb 5 16:26:32 (&sk->sk_dst_lock){} > Feb 5 16:26:32 ... which became soft-irq-unsafe at: > Feb 5 16:26:32 ... [] mark_held_locks+0x5e/0x80 > Feb 5 16:26:32 [] __lock_acquire+0x6a2/0x10a0 > Feb 5 16:26:32 [] save_stack_trace+0x20/0x40 > Feb 5 16:26:32 [] add_lock_to_list+0x44/0xb0 > Feb 5 16:26:32 [] __udp_lib_get_port+0x19/0x200 > Feb 5 16:26:32 [] __lock_acquire+0x1045/0x10a0 > Feb 5 16:26:32 [] lock_acquire+0x74/0xa0 > Feb 5 16:26:32 [] ip4_datagram_connect+0x53/0x380 > Feb 5 16:26:32 [] _write_lock+0x2a/0x40 > Feb 5 16:26:32 [] ip4_datagram_connect+0x53/0x380 > Feb 5 16:26:32 [] ip4_datagram_connect+0x53/0x380 > Feb 5 16:26:32 [] trace_hardirqs_on+0xc5/0x170 > Feb 5 16:26:32 [] local_bh_enable_ip+0xa7/0x120 > Feb 5 16:26:32 [] trace_hardirqs_on+0xc5/0x170 > Feb 5 16:26:32 [] _spin_lock_bh+0x2f/0x40 > Feb 5 16:26:32 [] inet_dgram_connect+0x35/0x80 > Feb 5 16:26:32 [] sys_connect+0x82/0xd0 > Feb 5 16:26:32 [] down_read_trylock+0x4f/0x60 > Feb 5 16:26:32 [] do_page_fault+0xfc/0x940 > Feb 5 16:26:32 [] _spin_unlock+0x14/0x20 > Feb 5 16:26:32 [] sys_socketcall+0x98/0x280 > Feb 5 16:26:32 [] trace_hardirqs_on+0xc5/0x170 > Feb 5 16:26:32 [] copy_to_user+0x3a/0x70 > Feb 5 16:26:32 [] restore_nocheck+0x12/0x15 > Feb 5 16:26:32 [] syscall_call+0x7/0xb > Feb 5 16:26:32 [] 0x -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH][PPPOL2TP]: Fix SMP oops in pppol2tp driver
David Miller wrote: From: James Chapman <[EMAIL PROTECTED]> Date: Mon, 11 Feb 2008 23:41:18 + Jarek Poplawski wrote: On Mon, Feb 11, 2008 at 10:19:35PM +, James Chapman wrote: ... Below is example output from lockdep. The oops is reproducible when creating/deleting lots of sessions while passing data. The lock is being acquired for read and write in softirq contexts. Is there a better way to fix this? = [ INFO: inconsistent lock state ] 2.6.24-core2 #1 - inconsistent {in-softirq-R} -> {softirq-on-W} usage. openl2tpd/3215 [HC0[0]:SC0[0]:HE1:SE1] takes: (&tunnel->hlist_lock){---?}, at: [] pppol2tp_connect+0x517/0x6d0 [pppol2tp] {in-softirq-R} state was registered at: IMHO, according to this, disabling bh should be enough. And if it's like in this report: only read_lock is taken from softirqs, then this should be necessary to change only all write_locks to write_lock_bh (of course unless somewhere bhs are disabled already). Unless I miss something?! I thought so too. I tried _bh locks first and the problem still occurred. Maybe I'll try it again in case I messed something up. I agree with Jarek here, I look at all the code paths that take ->hlist_lock and all of them are in user context or software interrupts. Please get a lockdep trace with the change to _bh intead of hw interrupt protection so we can find out what that doesn't work. Thanks! Here is a trace from when we had _bh locks. Feb 5 16:26:32 == Feb 5 16:26:32 [ INFO: soft-safe -> soft-unsafe lock order detected ] Feb 5 16:26:32 2.6.24-core2 #1 Feb 5 16:26:32 -- Feb 5 16:26:32 pppd/3224 [HC0[0]:SC0[2]:HE1:SE0] is trying to acquire: Feb 5 16:26:32 (&sk->sk_dst_lock){}, at: [] pppol2tp_xmit+0x23c/0x460 [pppol2tp] Feb 5 16:26:32 Feb 5 16:26:32 and this task is already holding: Feb 5 16:26:32 (&pch->downl){-...}, at: [] ppp_push+0x44e/0x620 [ppp_generic] Feb 5 16:26:32 which would create a new lock dependency: Feb 5 16:26:32 (&pch->downl){-...} -> (&sk->sk_dst_lock){} Feb 5 16:26:32 Feb 5 16:26:32 but this new dependency connects a soft-irq-safe lock: Feb 5 16:26:32 (&pch->upl){-.-+} Feb 5 16:26:32 ... which became soft-irq-safe at: Feb 5 16:26:32 [] check_usage_backwards+0x1f/0x50 Feb 5 16:26:32 [] save_trace+0x39/0xa0 Feb 5 16:26:32 [] __lock_acquire+0x6bf/0x10a0 Feb 5 16:26:32 [] __lock_acquire+0x79e/0x10a0 Feb 5 16:26:32 [] __lock_acquire+0x79e/0x10a0 Feb 5 16:26:32 [] lock_acquire+0x74/0xa0 Feb 5 16:26:32 [] ppp_input+0x62/0x140 [ppp_generic] Feb 5 16:26:32 [] _read_lock_bh+0x2f/0x40 Feb 5 16:26:32 [] ppp_input+0x62/0x140 [ppp_generic] Feb 5 16:26:32 [] ppp_input+0x62/0x140 [ppp_generic] Feb 5 16:26:32 [] pppol2tp_recv_core+0x46a/0x960 [pppol2tp] Feb 5 16:26:32 [] pppol2tp_udp_encap_recv+0x2e/0x70 [pppol2tp] Feb 5 16:26:32 [] _read_unlock+0x14/0x20 Feb 5 16:26:32 [] udp_queue_rcv_skb+0x106/0x2a0 Feb 5 16:26:32 [] __udp4_lib_rcv+0x42a/0x7e0 Feb 5 16:26:32 [] ipt_hook+0x0/0x20 [iptable_filter] Feb 5 16:26:32 [] ip_local_deliver_finish+0xca/0x1c0 Feb 5 16:26:32 [] ip_local_deliver_finish+0x2e/0x1c0 Feb 5 16:26:32 [] ip_rcv_finish+0xff/0x360 Feb 5 16:26:32 [] ip_rcv+0x20c/0x2a0 Feb 5 16:26:32 [] ip_rcv_finish+0x0/0x360 Feb 5 16:26:32 [] netif_receive_skb+0x317/0x4b0 Feb 5 16:26:32 [] netif_receive_skb+0x100/0x4b0 Feb 5 16:26:32 [] e1000_clean_rx_irq_ps+0x28a/0x560 [e1000] Feb 5 16:26:32 [] e1000_clean_rx_irq_ps+0x0/0x560 [e1000] Feb 5 16:26:32 [] e1000_clean+0x5d/0x290 [e1000] Feb 5 16:26:32 [] net_rx_action+0x1a0/0x2a0 Feb 5 16:26:32 [] net_rx_action+0x5f/0x2a0 Feb 5 16:26:32 [] __do_softirq+0x92/0x120 Feb 5 16:26:32 [] do_softirq+0x78/0x80 Feb 5 16:26:32 [] do_IRQ+0x4a/0xa0 Feb 5 16:26:32 [] finish_task_switch+0x0/0xc0 Feb 5 16:26:32 [] common_interrupt+0x24/0x34 Feb 5 16:26:32 [] common_interrupt+0x2e/0x34 Feb 5 16:26:32 [] mwait_idle_with_hints+0x46/0x60 Feb 5 16:26:32 [] mwait_idle+0x0/0x20 Feb 5 16:26:32 [] cpu_idle+0x74/0xe0 Feb 5 16:26:32 [] start_kernel+0x30a/0x3a0 Feb 5 16:26:32 [] unknown_bootoption+0x0/0x1f0 Feb 5 16:26:32 [] 0x Feb 5 16:26:32 Feb 5 16:26:32 to a soft-irq-unsafe lock: Feb 5 16:26:32 (&sk->sk_dst_lock){} Feb 5 16:26:32 ... which became soft-irq-unsafe at: Feb 5 16:26:32 ... [] mark_held_locks+0x5e/0x80 Feb 5 16:26:32 [] __lock_acquire+0x6a2/0x10a0 Feb 5 16:26:32 [] save_stack_trace+0x20/0x40 Feb 5 16:26:32 [] add_lock_to_list+0x44/0xb0 Feb 5 16:26:32 [] __udp_lib_get_port+0x19/0x200 Feb 5 16:26:32 [] __lock_acquire+0x1045/0x10a0 Feb 5 16:26:32 [] lock_acquire+0x74/0xa0 Feb 5 16:26:32 [] ip4_datagram_connect+0x53/0x380 Feb 5 16:26:32 [] _write_lock+0x2a/0x40 Feb 5 16:26:32 [] ip4_datagram_connect+0x53/0x380 Feb 5 16:26:32 [] ip4_datagram_connect+0x53/0x380 Feb 5 16:26:32
Re: [PATCH][PPPOL2TP]: Fix SMP oops in pppol2tp driver
On Mon, Feb 11, 2008 at 11:42:12PM +, James Chapman wrote: > Jarek Poplawski wrote: >> On Mon, Feb 11, 2008 at 11:49:24PM +0100, Jarek Poplawski wrote: >>> On Mon, Feb 11, 2008 at 10:19:35PM +, James Chapman wrote: >>> ... Below is example output from lockdep. The oops is reproducible when creating/deleting lots of sessions while passing data. The lock is being acquired for read and write in softirq contexts. >> >> ...Hmmm... And according to this, changing read_locks should be >> necessary too. > > The patch changes both read and write locks. Right! This was only "errata" to my earlier comment where I considered only lockdep info and forgot about yours... Sorry, Jarek P. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH][PPPOL2TP]: Fix SMP oops in pppol2tp driver
On Tue, Feb 12, 2008 at 10:00:03PM -0800, David Miller wrote: > From: James Chapman <[EMAIL PROTECTED]> > Date: Tue, 12 Feb 2008 10:58:21 + > > > Here is a trace from when we had _bh locks. > > The problem is that the pppol2tp code calls sk_dst_get() in software > interrupt context and that is not allowed. Actually, this lockdep report probably warns of something else: sk_dst_lock which is seen in process context with softirqs enabled implicitly endangers some other (ppp_generic) locks, which depend on ppp_generic pch->upl read_lock, which is taken in softirq context. I can't see on this report any hardirqs dependancies, so it seems even blocking hard interrupts, just like in this patch, shouldn't solve this problem: if BHs were really disabled in exactly the same places, then it seems this warning should trigger in both variants. I studied long ago some similar problem with pppoe, and it looked like ppp_generic needed some fix, but now I've to recall or re-learn almost all and it needs more time... Anyway, there is a possibility, this warning isn't so dangerous. > sk_dst_get() grabs sk->sk_dst_lock without any BH enabling or > disabling. > > It can do that because the usage is to make all the lock > taking calls in user context, and in the packet processing > paths use __sk_dst_get(). BTW, does it mean that ip4_datagram_connect() can be used only in user context? > Probably what the pppol2tp code should do is use __sk_dst_check() > instead of sk_dst_get(). You then have to be able to handle > NULL returns, just like UDP sendmsg() does, which means you'll > need to cook up a routing lookup if __sk_dst_check() gives you > NULL because the route became obsolete. I think that without changing ppp_generic or changing ip_queue_xmit() to something else in pppol2tp this would be really hard to avoid such a warning (and maybe not very necessary). Regards, Jarek P. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH][PPPOL2TP]: Fix SMP oops in pppol2tp driver
Hi, It seems, this nice report is still uncomplete: could you check if there could have been something more yet? Thanks, Jarek P. On Tue, Feb 12, 2008 at 10:58:21AM +, James Chapman wrote: ... > Here is a trace from when we had _bh locks. > > Feb 5 16:26:32 == > Feb 5 16:26:32 [ INFO: soft-safe -> soft-unsafe lock order detected ] > Feb 5 16:26:32 2.6.24-core2 #1 > Feb 5 16:26:32 -- > Feb 5 16:26:32 pppd/3224 [HC0[0]:SC0[2]:HE1:SE0] is trying to acquire: ... > Feb 5 16:26:32 [] e1000_clean+0x5d/0x290 [e1000] > Feb 5 16:26:32 [] net_rx_action+0x1a0/0x2a0 > Feb 5 16:26:32 [] net_rx_action+0x5f/0x2a0 > Feb 5 16:26:32 [] __do_softirq+0x92/0x120 > Feb 5 16:26:32 [] do_softirq+0x78/0x80 > Feb 5 16:26:32 [] do_IRQ+0x4a/0xa0 > Feb 5 16:26:32 [] finish_task_switch+0x0/0xc0 > Feb 5 16:26:32 [] common_interrupt+0x24/0x34 > Feb 5 16:26:32 [] common_interrupt+0x2e/0x34 > Feb 5 16:26:32 [] mwait_idle_with_hints+0x46/0x60 > Feb 5 16:26:32 [] mwait_idle+0x0/0x20 > Feb 5 16:26:32 [] cpu_idle+0x74/0xe0 > Feb 5 16:26:32 [] start_kernel+0x30a/0x3a0 > > -- > James Chapman > Katalix Systems Ltd > http://www.katalix.com > Catalysts for your Embedded Linux software development > -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH][PPPOL2TP]: Fix SMP oops in pppol2tp driver
On Mon, Feb 18, 2008 at 10:09:24PM +, James Chapman wrote: > Jarek Poplawski wrote: >> Hi, >> >> It seems, this nice report is still uncomplete: could you check if >> there could have been something more yet? > > Unfortunately the ISP's syslog stops. But I've been able to borrow two > Quad Xeon boxes and have reproduced the problem. > > Here's a new version of the patch. The patch avoids disabling irqs and > fixes the sk_dst_get() usage that DaveM mentioned. But even with this > patch, lockdep still complains if hundreds of ppp sessions are inserted > into a tunnel as rapidly as possible (lockdep trace is below). I can > stop these errors by wrapping the call to ppp_input() in > pppol2tp_recv_dequeue_skb() with local_irq_save/restore. What is a > better fix? Hmm... This is a really long report and quite a bit different from the previous one. I need some time for this. BTW: you sent before a lockdep report with hlist_lock problem. I think this could be fixed in some independent patch to make this all more readable. Are all the other changes in this current patch only because of this or previous lockdep report or for some other reasons (or reports) yet? Regards, Jarek P. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH][PPPOL2TP]: Fix SMP oops in pppol2tp driver
From: James Chapman <[EMAIL PROTECTED]> Date: Mon, 18 Feb 2008 22:09:24 + > Here's a new version of the patch. The patch avoids disabling irqs > and fixes the sk_dst_get() usage that DaveM mentioned. But even with > this patch, lockdep still complains if hundreds of ppp sessions are > inserted into a tunnel as rapidly as possible (lockdep trace is > below). I can stop these errors by wrapping the call to ppp_input() > in pppol2tp_recv_dequeue_skb() with local_irq_save/restore. What is > a better fix? Firstly, let's fix one thing at a time. Leave the sk_dst_get() thing alone until we can prove that it's part of the lockdep traces. Next, I can't see why ppp_input() needs to be invoked with interrupts disabled. There are many other things that invoke that in software interrupt context, such as pppoe. Please provide the lockdep traces without the ppp_input() IRQ disabling so this can be properly analyzed. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH][PPPOL2TP]: Fix SMP oops in pppol2tp driver
David Miller wrote: From: James Chapman <[EMAIL PROTECTED]> Date: Mon, 18 Feb 2008 22:09:24 + Here's a new version of the patch. The patch avoids disabling irqs and fixes the sk_dst_get() usage that DaveM mentioned. But even with this patch, lockdep still complains if hundreds of ppp sessions are inserted into a tunnel as rapidly as possible (lockdep trace is below). I can stop these errors by wrapping the call to ppp_input() in pppol2tp_recv_dequeue_skb() with local_irq_save/restore. What is a better fix? Firstly, let's fix one thing at a time. Leave the sk_dst_get() thing alone until we can prove that it's part of the lockdep traces. In reproducing the problem, I obtained several lockdep traces that implicated sk_dst_get(). I changed the code to use __sk_dst_check() as you suggested and they went away. At that point, I was hopeful the locking issues were fixed. But after several minutes of creating/deleting hundreds of ppp sessions, lockdep dumped another error. It is that error that I posted yesterday. Next, I can't see why ppp_input() needs to be invoked with interrupts disabled. There are many other things that invoke that in software interrupt context, such as pppoe. I agree. I'm seeking advice on what the underlying cause is of this new trace. Please provide the lockdep traces without the ppp_input() IRQ disabling so this can be properly analyzed. The trace _was_ without ppp_input IRQ disabling. The trace doesn't occur if I disable IRQs around the ppp_input() call. The patch I sent showed the changes I made before running the tests that created the new lockdep trace. I'm sorry this wasn't clear. -- James Chapman Katalix Systems Ltd http://www.katalix.com Catalysts for your Embedded Linux software development -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH][PPPOL2TP]: Fix SMP oops in pppol2tp driver
Jarek Poplawski wrote: On Mon, Feb 18, 2008 at 10:09:24PM +, James Chapman wrote: Jarek Poplawski wrote: Hi, It seems, this nice report is still uncomplete: could you check if there could have been something more yet? Unfortunately the ISP's syslog stops. But I've been able to borrow two Quad Xeon boxes and have reproduced the problem. Here's a new version of the patch. The patch avoids disabling irqs and fixes the sk_dst_get() usage that DaveM mentioned. But even with this patch, lockdep still complains if hundreds of ppp sessions are inserted into a tunnel as rapidly as possible (lockdep trace is below). I can stop these errors by wrapping the call to ppp_input() in pppol2tp_recv_dequeue_skb() with local_irq_save/restore. What is a better fix? Hmm... This is a really long report and quite a bit different from the previous one. I need some time for this. BTW: you sent before a lockdep report with hlist_lock problem. I think this could be fixed in some independent patch to make this all more readable. Are all the other changes in this current patch only because of this or previous lockdep report or for some other reasons (or reports) yet? As I mentioned in my reply to davem, modifying the pppol2tp driver as described in the patch I sent made the original lockdep problems go away. -- James Chapman Katalix Systems Ltd http://www.katalix.com Catalysts for your Embedded Linux software development -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH][PPPOL2TP]: Fix SMP oops in pppol2tp driver
On Tue, Feb 19, 2008 at 10:30:47AM +, Jarek Poplawski wrote: ... > IMHO, just like I wrote earlier, the main problem is in ppp_generic(), ...or maybe ppp_generic.c? Whatever... Jarek P. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH][PPPOL2TP]: Fix SMP oops in pppol2tp driver
On Tue, Feb 19, 2008 at 09:03:12AM +, James Chapman wrote: > David Miller wrote: >> From: James Chapman <[EMAIL PROTECTED]> >> Date: Mon, 18 Feb 2008 22:09:24 + >> >>> Here's a new version of the patch. The patch avoids disabling irqs >>> and fixes the sk_dst_get() usage that DaveM mentioned. But even with >>> this patch, lockdep still complains if hundreds of ppp sessions are >>> inserted into a tunnel as rapidly as possible (lockdep trace is >>> below). I can stop these errors by wrapping the call to ppp_input() >>> in pppol2tp_recv_dequeue_skb() with local_irq_save/restore. What is >>> a better fix? >> >> Firstly, let's fix one thing at a time. Leave the sk_dst_get() >> thing alone until we can prove that it's part of the lockdep >> traces. > > In reproducing the problem, I obtained several lockdep traces that > implicated sk_dst_get(). As a matter of fact I missed just that kind information on previous lockdep report, so if you could send them too this should be still helpful. ... > I agree. I'm seeking advice on what the underlying cause is of this new > trace. IMHO, just like I wrote earlier, the main problem is in ppp_generic(), especially ppp_connect_channel(), where main tx & rx locks are used. I didn't know enough about this sk_dst_lock traces yet. I hope I could help with this, but after these changes I need some time to figure this out again. Jarek P. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH][PPPOL2TP]: Fix SMP oops in pppol2tp driver
On Mon, Feb 18, 2008 at 10:09:24PM +, James Chapman wrote: ... > Unfortunately the ISP's syslog stops. But I've been able to borrow > two Quad Xeon boxes and have reproduced the problem. > > Here's a new version of the patch. The patch avoids disabling irqs > and fixes the sk_dst_get() usage that DaveM mentioned. But even with > this patch, lockdep still complains if hundreds of ppp sessions are > inserted into a tunnel as rapidly as possible (lockdep trace is below). > I can stop these errors by wrapping the call to ppp_input() in > pppol2tp_recv_dequeue_skb() with local_irq_save/restore. What is a > better fix? I send here my proposal: it's intended for testing and to check one of possible solutions here. IMHO your lockdep reports show there is no use to change anything around sk_dst_lock: it would need the global change of this lock to fix this problem. So the fix should be done around pch->upl lock and this means changing ppp_generic. In the patch below I've used trylock in places which seem to allow for skipping some things (while config is changed only) or simply don't need this lock because there is no ppp struct. This could be modified to add some waiting loop if necessary. Another option is to change the write side of this lock: it looks like more vulnerable if something missed because there are more locks involved, but probably should be enough to solve this problem too. I think pppol2tp need to be first checked only with hlist_lock bh patch, unless there were some lockdep reports on these other locks too. (BTW, I added ppp maintainer to CC - I hope we get Paul's opinion on this.) Regards, Jarek P. (testing patch #1) --- drivers/net/ppp_generic.c | 33 +++-- 1 files changed, 23 insertions(+), 10 deletions(-) diff --git a/drivers/net/ppp_generic.c b/drivers/net/ppp_generic.c index 4dc5b4b..5cbc534 100644 --- a/drivers/net/ppp_generic.c +++ b/drivers/net/ppp_generic.c @@ -1473,7 +1473,7 @@ void ppp_input(struct ppp_channel *chan, struct sk_buff *skb) { struct channel *pch = chan->ppp; - int proto; + int proto, locked; if (!pch || skb->len == 0) { kfree_skb(skb); @@ -1481,8 +1481,13 @@ ppp_input(struct ppp_channel *chan, struct sk_buff *skb) } proto = PPP_PROTO(skb); - read_lock_bh(&pch->upl); - if (!pch->ppp || proto >= 0xc000 || proto == PPP_CCPFRAG) { + /* +* We use trylock to avoid dependency between soft-irq-safe upl lock +* and soft-irq-unsafe sk_dst_lock. +*/ + local_bh_disable(); + locked = read_trylock(&pch->upl); + if (!locked || !pch->ppp || proto >= 0xc000 || proto == PPP_CCPFRAG) { /* put it on the channel queue */ skb_queue_tail(&pch->file.rq, skb); /* drop old frames if queue too long */ @@ -1493,7 +1498,10 @@ ppp_input(struct ppp_channel *chan, struct sk_buff *skb) } else { ppp_do_recv(pch->ppp, skb, pch); } - read_unlock_bh(&pch->upl); + + if (locked) + read_unlock(&pch->upl); + local_bh_enable(); } /* Put a 0-length skb in the receive queue as an error indication */ @@ -1506,16 +1514,18 @@ ppp_input_error(struct ppp_channel *chan, int code) if (!pch) return; - read_lock_bh(&pch->upl); - if (pch->ppp) { + /* a trylock comment in ppp_input() */ + local_bh_disable(); + if (read_trylock(&pch->upl) && pch->ppp) { skb = alloc_skb(0, GFP_ATOMIC); if (skb) { skb->len = 0; /* probably unnecessary */ skb->cb[0] = code; ppp_do_recv(pch->ppp, skb, pch); } + read_unlock(&pch->upl); } - read_unlock_bh(&pch->upl); + local_bh_enable(); } /* @@ -2044,10 +2054,13 @@ int ppp_unit_number(struct ppp_channel *chan) int unit = -1; if (pch) { - read_lock_bh(&pch->upl); - if (pch->ppp) + /* a trylock comment in ppp_input() */ + local_bh_disable(); + if (read_trylock(&pch->upl) && pch->ppp) { unit = pch->ppp->file.index; - read_unlock_bh(&pch->upl); + read_unlock(&pch->upl); + } + local_bh_enable(); } return unit; } -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH][PPPOL2TP]: Fix SMP oops in pppol2tp driver
On Wed, Feb 20, 2008 at 12:06:40AM +0100, Jarek Poplawski wrote: ... > (testing patch #1) SORRY!!! > take 2 (unlocking fixed) --- drivers/net/ppp_generic.c | 39 +-- 1 files changed, 29 insertions(+), 10 deletions(-) diff --git a/drivers/net/ppp_generic.c b/drivers/net/ppp_generic.c index 4dc5b4b..c4e3808 100644 --- a/drivers/net/ppp_generic.c +++ b/drivers/net/ppp_generic.c @@ -1473,7 +1473,7 @@ void ppp_input(struct ppp_channel *chan, struct sk_buff *skb) { struct channel *pch = chan->ppp; - int proto; + int proto, locked; if (!pch || skb->len == 0) { kfree_skb(skb); @@ -1481,8 +1481,13 @@ ppp_input(struct ppp_channel *chan, struct sk_buff *skb) } proto = PPP_PROTO(skb); - read_lock_bh(&pch->upl); - if (!pch->ppp || proto >= 0xc000 || proto == PPP_CCPFRAG) { + /* +* We use trylock to avoid dependency between soft-irq-safe upl lock +* and soft-irq-unsafe sk_dst_lock. +*/ + local_bh_disable(); + locked = read_trylock(&pch->upl); + if (!locked || !pch->ppp || proto >= 0xc000 || proto == PPP_CCPFRAG) { /* put it on the channel queue */ skb_queue_tail(&pch->file.rq, skb); /* drop old frames if queue too long */ @@ -1493,7 +1498,10 @@ ppp_input(struct ppp_channel *chan, struct sk_buff *skb) } else { ppp_do_recv(pch->ppp, skb, pch); } - read_unlock_bh(&pch->upl); + + if (locked) + read_unlock(&pch->upl); + local_bh_enable(); } /* Put a 0-length skb in the receive queue as an error indication */ @@ -1502,12 +1510,14 @@ ppp_input_error(struct ppp_channel *chan, int code) { struct channel *pch = chan->ppp; struct sk_buff *skb; + int locked; if (!pch) return; - read_lock_bh(&pch->upl); - if (pch->ppp) { + /* a trylock comment in ppp_input() */ + local_bh_disable(); + if ((locked = read_trylock(&pch->upl)) && pch->ppp) { skb = alloc_skb(0, GFP_ATOMIC); if (skb) { skb->len = 0; /* probably unnecessary */ @@ -1515,7 +1525,10 @@ ppp_input_error(struct ppp_channel *chan, int code) ppp_do_recv(pch->ppp, skb, pch); } } - read_unlock_bh(&pch->upl); + + if (locked) + read_unlock(&pch->upl); + local_bh_enable(); } /* @@ -2044,10 +2057,16 @@ int ppp_unit_number(struct ppp_channel *chan) int unit = -1; if (pch) { - read_lock_bh(&pch->upl); - if (pch->ppp) + int locked; + + /* a trylock comment in ppp_input() */ + local_bh_disable(); + if ((locked = read_trylock(&pch->upl)) && pch->ppp) unit = pch->ppp->file.index; - read_unlock_bh(&pch->upl); + + if (locked) + read_unlock(&pch->upl); + local_bh_enable(); } return unit; } -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH][PPPOL2TP]: Fix SMP oops in pppol2tp driver
Jarek Poplawski wrote: On Mon, Feb 18, 2008 at 10:09:24PM +, James Chapman wrote: ... Unfortunately the ISP's syslog stops. But I've been able to borrow two Quad Xeon boxes and have reproduced the problem. Here's a new version of the patch. The patch avoids disabling irqs and fixes the sk_dst_get() usage that DaveM mentioned. But even with this patch, lockdep still complains if hundreds of ppp sessions are inserted into a tunnel as rapidly as possible (lockdep trace is below). I can stop these errors by wrapping the call to ppp_input() in pppol2tp_recv_dequeue_skb() with local_irq_save/restore. What is a better fix? I send here my proposal: it's intended for testing and to check one of possible solutions here. IMHO your lockdep reports show there is no use to change anything around sk_dst_lock: it would need the global change of this lock to fix this problem. So the fix should be done around pch->upl lock and this means changing ppp_generic. Hmm, I need to study the lockdep report again. It seems I'm misreading the lockdep output. :( In the patch below I've used trylock in places which seem to allow for skipping some things (while config is changed only) or simply don't need this lock because there is no ppp struct. This could be modified to add some waiting loop if necessary. Another option is to change the write side of this lock: it looks like more vulnerable if something missed because there are more locks involved, but probably should be enough to solve this problem too. I think pppol2tp need to be first checked only with hlist_lock bh patch, unless there were some lockdep reports on these other locks too. (BTW, I added ppp maintainer to CC - I hope we get Paul's opinion on this.) I tried your ppp_generic patch with only the hlist_lock bh patch in pppol2tp and it seems to fix the ppp create/delete issue. However, when I added much more traffic into the test (flood pings over ppp interfaces while repeatedly creating/deleting the L2TP (PPP) sessions) I get a soft lockup detected in pppol2tp_xmit() after anything between 1 minute and an hour. :( I'm investigating that now. Thanks for your help! (testing patch #1) --- drivers/net/ppp_generic.c | 33 +++-- 1 files changed, 23 insertions(+), 10 deletions(-) diff --git a/drivers/net/ppp_generic.c b/drivers/net/ppp_generic.c index 4dc5b4b..5cbc534 100644 --- a/drivers/net/ppp_generic.c +++ b/drivers/net/ppp_generic.c @@ -1473,7 +1473,7 @@ void ppp_input(struct ppp_channel *chan, struct sk_buff *skb) { struct channel *pch = chan->ppp; - int proto; + int proto, locked; if (!pch || skb->len == 0) { kfree_skb(skb); @@ -1481,8 +1481,13 @@ ppp_input(struct ppp_channel *chan, struct sk_buff *skb) } proto = PPP_PROTO(skb); - read_lock_bh(&pch->upl); - if (!pch->ppp || proto >= 0xc000 || proto == PPP_CCPFRAG) { + /* +* We use trylock to avoid dependency between soft-irq-safe upl lock +* and soft-irq-unsafe sk_dst_lock. +*/ + local_bh_disable(); + locked = read_trylock(&pch->upl); + if (!locked || !pch->ppp || proto >= 0xc000 || proto == PPP_CCPFRAG) { /* put it on the channel queue */ skb_queue_tail(&pch->file.rq, skb); /* drop old frames if queue too long */ @@ -1493,7 +1498,10 @@ ppp_input(struct ppp_channel *chan, struct sk_buff *skb) } else { ppp_do_recv(pch->ppp, skb, pch); } - read_unlock_bh(&pch->upl); + + if (locked) + read_unlock(&pch->upl); + local_bh_enable(); } /* Put a 0-length skb in the receive queue as an error indication */ @@ -1506,16 +1514,18 @@ ppp_input_error(struct ppp_channel *chan, int code) if (!pch) return; - read_lock_bh(&pch->upl); - if (pch->ppp) { + /* a trylock comment in ppp_input() */ + local_bh_disable(); + if (read_trylock(&pch->upl) && pch->ppp) { skb = alloc_skb(0, GFP_ATOMIC); if (skb) { skb->len = 0;/* probably unnecessary */ skb->cb[0] = code; ppp_do_recv(pch->ppp, skb, pch); } + read_unlock(&pch->upl); } - read_unlock_bh(&pch->upl); + local_bh_enable(); } /* @@ -2044,10 +2054,13 @@ int ppp_unit_number(struct ppp_channel *chan) int unit = -1; if (pch) { - read_lock_bh(&pch->upl); - if (pch->ppp) + /* a trylock comment in ppp_input() */ + local_bh_disable(); + if (read_trylock(&pch->upl) && pch->ppp) { unit = pch->ppp->file.index; - read_unlock_bh(&pch->upl); + read_unlock(&pch->upl); + } + local_bh_enable(); } return unit; } -- --
Re: [PATCH][PPPOL2TP]: Fix SMP oops in pppol2tp driver
On Wed, Feb 20, 2008 at 04:02:52PM +, James Chapman wrote: ... > I tried your ppp_generic patch with only the hlist_lock bh patch in > pppol2tp and it seems to fix the ppp create/delete issue. However, when > I added much more traffic into the test (flood pings over ppp interfaces > while repeatedly creating/deleting the L2TP (PPP) sessions) I get a soft > lockup detected in pppol2tp_xmit() after anything between 1 minute and > an hour. :( I'm investigating that now. > > Thanks for your help! Not at all! > >> (testing patch #1) But I hope you tested with the fixed (take 2) version of this patch... Since it's quite experimental (testing) this patch could be wrong as it is, but I hope it should show the proper way to solve this problem. Probably you did some of these, but here are a few of my suggestions for testing this: 1) try my patch with your full bh locking changing patch; 2) add while loops to these trylocks on failure, with e.g. __delay(1); this should work like full locks again, but there should be no (this kind of) lockdep reports; 3) I send here another testing patch with this second way to do this: on the write side, but it's even more "experimental" and only a proof of concept (should be applied on vanilla ppp_generic). Regards, Jarek P. (testing patch #2) --- drivers/net/ppp_generic.c | 20 +--- 1 files changed, 13 insertions(+), 7 deletions(-) diff --git a/drivers/net/ppp_generic.c b/drivers/net/ppp_generic.c index 4dc5b4b..70bd255 100644 --- a/drivers/net/ppp_generic.c +++ b/drivers/net/ppp_generic.c @@ -2606,11 +2606,16 @@ ppp_connect_channel(struct channel *pch, int unit) ppp = ppp_find_unit(unit); if (!ppp) goto out; - write_lock_bh(&pch->upl); + ret = -EINVAL; - if (pch->ppp) - goto outl; + read_lock_bh(&pch->upl); + if (pch->ppp) { + read_unlock_bh(&pch->upl); + goto out; + } + read_unlock_bh(&pch->upl); + atomic_inc(&ppp->file.refcnt); ppp_lock(ppp); if (pch->file.hdrlen > ppp->file.hdrlen) ppp->file.hdrlen = pch->file.hdrlen; @@ -2619,13 +2624,14 @@ ppp_connect_channel(struct channel *pch, int unit) ppp->dev->hard_header_len = hdrlen; list_add_tail(&pch->clist, &ppp->channels); ++ppp->n_channels; - pch->ppp = ppp; - atomic_inc(&ppp->file.refcnt); ppp_unlock(ppp); - ret = 0; - outl: + /* avoid lock dependency with ppp_locks */ + write_lock_bh(&pch->upl); + BUG_ON(pch->ppp); + pch->ppp = ppp; write_unlock_bh(&pch->upl); + ret = 0; out: mutex_unlock(&all_ppp_mutex); return ret; -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH][PPPOL2TP]: Fix SMP oops in pppol2tp driver
Jarek Poplawski wrote: (testing patch #1) But I hope you tested with the fixed (take 2) version of this patch... Yes I did. :) But I just got another lockdep error (attached). Since it's quite experimental (testing) this patch could be wrong as it is, but I hope it should show the proper way to solve this problem. Probably you did some of these, but here are a few of my suggestions for testing this: 1) try my patch with your full bh locking changing patch; 2) add while loops to these trylocks on failure, with e.g. __delay(1); this should work like full locks again, but there should be no (this kind of) lockdep reports; Hmm, isn't this just bypassing the lockdep checks? 3) I send here another testing patch with this second way to do this: on the write side, but it's even more "experimental" and only a proof of concept (should be applied on vanilla ppp_generic). I'll look over it. I think I need to take a step back and look at what's happening in more detail though. -- James Chapman Katalix Systems Ltd http://www.katalix.com Catalysts for your Embedded Linux software development Feb 20 22:11:41 localhost kernel: = Feb 20 22:11:43 localhost kernel: [ INFO: inconsistent lock state ] Feb 20 22:11:44 localhost kernel: 2.6.24.2 #1 Feb 20 22:11:44 localhost kernel: - Feb 20 22:11:44 localhost kernel: inconsistent {softirq-on-W} -> {in-softirq-R} usage. Feb 20 22:11:44 localhost kernel: pppd/3744 [HC0[0]:SC1[5]:HE1:SE0] takes: Feb 20 22:11:44 localhost kernel: (&sk->sk_dst_lock){---?}, at: [] pppol2tp_xmit+0x320/0x3d9 [pppol2tp] Feb 20 22:11:45 localhost kernel: {softirq-on-W} state was registered at: Feb 20 22:11:45 localhost kernel: [] __lock_acquire+0x48b/0xbf1 Feb 20 22:11:45 localhost kernel: [] mark_held_locks+0x39/0x53 Feb 20 22:11:45 localhost kernel: [] local_bh_enable+0x10e/0x115 Feb 20 22:11:45 localhost kernel: [] inet_csk_get_port+0xc1/0x1cb Feb 20 22:11:45 localhost kernel: [] lock_acquire+0x70/0x8a Feb 20 22:11:45 localhost kernel: [] inet_csk_listen_start+0x75/0xed Feb 20 22:11:45 localhost kernel: [] _write_lock+0x29/0x34 Feb 20 22:11:45 localhost kernel: [] inet_csk_listen_start+0x75/0xed Feb 20 22:11:45 localhost kernel: [] inet_csk_listen_start+0x75/0xed Feb 20 22:11:45 localhost kernel: [] inet_listen+0x3b/0x5e Feb 20 22:11:45 localhost kernel: [] sys_listen+0x43/0x5f Feb 20 22:11:45 localhost kernel: [] sys_socketcall+0xbd/0x261 Feb 20 22:11:45 localhost kernel: [] sysenter_past_esp+0x9a/0xa5 Feb 20 22:11:45 localhost kernel: [] trace_hardirqs_on+0x122/0x14c Feb 20 22:11:45 localhost kernel: [] sysenter_past_esp+0x5f/0xa5 Feb 20 22:11:45 localhost kernel: [] 0x Feb 20 22:11:45 localhost kernel: irq event stamp: 41702 Feb 20 22:11:45 localhost kernel: hardirqs last enabled at (41702): [] kfree+0x9f/0xa6 Feb 20 22:11:45 localhost kernel: hardirqs last disabled at (41701): [] kfree+0x17/0xa6 Feb 20 22:11:45 localhost kernel: softirqs last enabled at (41630): [] rt_run_flush+0x64/0x8b Feb 20 22:11:45 localhost kernel: softirqs last disabled at (41631): [] do_softirq+0x5e/0xc7 Feb 20 22:11:45 localhost kernel: Feb 20 22:11:45 localhost kernel: other info that might help us debug this: Feb 20 22:11:45 localhost kernel: 10 locks held by pppd/3744: Feb 20 22:11:45 localhost kernel: #0: (rtnl_mutex){--..}, at: [] devinet_ioctl+0xf4/0x539 Feb 20 22:11:45 localhost kernel: #1: ((inetaddr_chain).rwsem){..--}, at: [] __blocking_notifier_call_chain+0x22/0x51 Feb 20 22:11:45 localhost kernel: #2: (rcu_read_lock){..--}, at: [] net_rx_action+0x4e/0x1b3 Feb 20 22:11:45 localhost kernel: #3: (rcu_read_lock){..--}, at: [] netif_receive_skb+0xf4/0x3d4 Feb 20 22:11:45 localhost kernel: #4: (rcu_read_lock){..--}, at: [] ip_local_deliver_finish+0x2e/0x1f8 Feb 20 22:11:45 localhost kernel: #5: (slock-AF_INET){-+..}, at: [] icmp_reply+0x52/0x1a4 Feb 20 22:11:45 localhost kernel: #6: (rcu_read_lock){..--}, at: [] dev_queue_xmit+0x125/0x2e9 Feb 20 22:11:45 localhost kernel: #7: (_xmit_PPP){-+..}, at: [] __qdisc_run+0x5b/0x156 Feb 20 22:11:45 localhost kernel: #8: (&ppp->wlock){-+..}, at: [] ppp_xmit_process+0x15/0x5a1 [ppp_generic] Feb 20 22:11:45 localhost kernel: #9: (&pch->downl){-+..}, at: [] ppp_push+0x63/0x50d [ppp_generic] Feb 20 22:11:45 localhost kernel: Feb 20 22:11:45 localhost kernel: stack backtrace: Feb 20 22:11:45 localhost kernel: Pid: 3744, comm: pppd Not tainted 2.6.24.2 #1 Feb 20 22:11:45 localhost kernel: [] print_usage_bug+0x139/0x143 Feb 20 22:11:45 localhost kernel: [] mark_lock+0x1cb/0x454 Feb 20 22:11:45 localhost kernel: [] find_usage_forwards+0x67/0x8b Feb 20 22:11:45 localhost kernel: [] __lock_acquire+0x424/0xbf1 Feb 20 22:11:46 localhost kernel: [] check_noncircular+0x66/0x93 Feb 20 22:11:46 localhost kernel: [] mark_held_locks+0x39/0x53 Feb 20 22:11:46 localhost kernel: [] kfree+0x9f/0xa6 Feb 20 22:11:46 loc
Re: [PATCH][PPPOL2TP]: Fix SMP oops in pppol2tp driver
On Wed, Feb 20, 2008 at 10:37:57PM +, James Chapman wrote: > Jarek Poplawski wrote: > (testing patch #1) >> >> But I hope you tested with the fixed (take 2) version of this patch... > > Yes I did. :) > > But I just got another lockdep error (attached). > >> Since it's quite experimental (testing) this patch could be wrong >> as it is, but I hope it should show the proper way to solve this >> problem. Probably you did some of these, but here are a few of my >> suggestions for testing this: >> >> 1) try my patch with your full bh locking changing patch; >> 2) add while loops to these trylocks on failure, with e.g. __delay(1); >>this should work like full locks again, but there should be no (this >>kind of) lockdep reports; > > Hmm, isn't this just bypassing the lockdep checks? Yes! But it's only for debugging: to find if this change in locking is to be blamed for these new lockups. It should effectively work just like without this patch, but without this lockdep warning. So, if after such change lockups still happen, then it would seem you didn't test this enough before. Otherwise the new patch is to blame and needs reworking. > >> 3) I send here another testing patch with this second way to do this: >>on the write side, but it's even more "experimental" and only a >>proof of concept (should be applied on vanilla ppp_generic). > > I'll look over it. I think I need to take a step back and look at what's > happening in more detail though. This is something completely new and changes all the picture: the xmit path wasn't expected (at least by me) to be called in softirq context at all, and there were no traces of this on previous reports. But, since lockdep always stops after the first warning, there could be even more surprises like this in the future. I'll check this report. Regards, Jarek P. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH][PPPOL2TP]: Fix SMP oops in pppol2tp driver
Jarek Poplawski wrote: On Wed, Feb 20, 2008 at 10:37:57PM +, James Chapman wrote: Jarek Poplawski wrote: (testing patch #1) But I hope you tested with the fixed (take 2) version of this patch... Yes I did. :) But I just got another lockdep error (attached). Since it's quite experimental (testing) this patch could be wrong as it is, but I hope it should show the proper way to solve this problem. Probably you did some of these, but here are a few of my suggestions for testing this: 1) try my patch with your full bh locking changing patch; 2) add while loops to these trylocks on failure, with e.g. __delay(1); this should work like full locks again, but there should be no (this kind of) lockdep reports; Hmm, isn't this just bypassing the lockdep checks? Yes! But it's only for debugging: to find if this change in locking is to be blamed for these new lockups. It should effectively work just like without this patch, but without this lockdep warning. So, if after such change lockups still happen, then it would seem you didn't test this enough before. Otherwise the new patch is to blame and needs reworking. The lockups still happen, but I think they are now due to a different problem, as you say. Some background on this issue might be useful to help get feedback from others on the list. This issue was first reported by an ISP who found random lockups if an L2TP tunnel carrying hundreds/thousands of L2TP sessions went down due to a network outage and then recovered itself. On recovery, all of the tunnel's sessions (PPP) are created rapidly. Sometimes the tunnel would recover just fine, but other times not. The ISP put some effort into reproducing the problem and found that repeatedly creating/deleting a tunnel with lots of L2TP sessions would cause the failure after a random time between a few minutes and several hours. The original lockdep trace came from the ISP. I initially couldn't reproduce the problem but I borrowed two equivalent quad-core systems and can now reproduce it. Subsequent lockdep traces have been from my testing. The _bh locking fixes in pppol2tp combined with your ppp_generic change solved that problem. So I then added data traffic into the mix (since this will happen in a real network) and found that lockups still happen. But the lockdep trace in this case is different, as you noted. Does PPPoE stress the PPP setup code as much as this scenario? I guess in theory it could if lots of PPPoE clients connected at the same time, but there is no aggregate tunnel like there is with L2TP to cause all sessions to connect simultaneously. Perhaps PPTP also suffers from these issues? Perhaps not because it tends to be used only in VPN setups where there is only 1 session per tunnel. 3) I send here another testing patch with this second way to do this: on the write side, but it's even more "experimental" and only a proof of concept (should be applied on vanilla ppp_generic). I'll look over it. I think I need to take a step back and look at what's happening in more detail though. This is something completely new and changes all the picture: the xmit path wasn't expected (at least by me) to be called in softirq context at all, and there were no traces of this on previous reports. But, since lockdep always stops after the first warning, there could be even more surprises like this in the future. I'll check this report. Doesn't the TX softirq do transmits if they've been queued up? -- James Chapman Katalix Systems Ltd http://www.katalix.com Catalysts for your Embedded Linux software development -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH][PPPOL2TP]: Fix SMP oops in pppol2tp driver
On Thu, Feb 21, 2008 at 09:53:56AM +, James Chapman wrote: > Jarek Poplawski wrote: ... > The _bh locking fixes in pppol2tp combined with your ppp_generic change > solved that problem. So I then added data traffic into the mix (since > this will happen in a real network) and found that lockups still happen. > But the lockdep trace in this case is different, as you noted. I'm not sure what do you mean by "solved that problem": lack of lockups or lack of this kind of lockdep reports. This lockdep report shows a real danger in this case, probably very little probable, unless a lot of tries. So if you think just this kind of lockup happend there, this is would be nice. But there could be something less nice too: we are "fighting" with lockdep in one place but the lockup happens somewhere else for some totally different reason... > Does PPPoE stress the PPP setup code as much as this scenario? I guess > in theory it could if lots of PPPoE clients connected at the same time, > but there is no aggregate tunnel like there is with L2TP to cause all > sessions to connect simultaneously. Perhaps PPTP also suffers from these > issues? Perhaps not because it tends to be used only in VPN setups where > there is only 1 session per tunnel. I don't know this code enough, but it seems it should be easier to maintain or debug if there are similar solutions where possible. 3) I send here another testing patch with this second way to do this: on the write side, but it's even more "experimental" and only a proof of concept (should be applied on vanilla ppp_generic). >>> I'll look over it. I think I need to take a step back and look at >>> what's happening in more detail though. >> >> This is something completely new and changes all the picture: the xmit >> path wasn't expected (at least by me) to be called in softirq context >> at all, and there were no traces of this on previous reports. But, >> since lockdep always stops after the first warning, there could be >> even more surprises like this in the future. I'll check this report. > > Doesn't the TX softirq do transmits if they've been queued up? I've probably too much looked at these reports, and should've expected this could happen. Probably queueing could be separated, but since there could be no queue at all and it's done like this, then this current proof of concept seems to be dead end, and we have to go back to fixing sk_dst_lock handling and my patches could be dumped... So if I don't miss something again (and I need more time for this new report) you should try to fix the problem not reported originally by lockdep, but forseen by David(!): we need to avoid any path like: ppp_generic -> pppol2tp -> something, which could take sk_dst_lock while holding any ppp_generic writing lock: they all are softirq "safe" (i.e. endangered). David gave some example, but I'm not sure you did your patch like this (sk_dst_set()). Probably ip_queue_xmit() can't work with this too. Another, probably simpler way would be to move almost all pppol2tp_xmit code to a workqueue: this should let to break most of dependencies with ppp_generic locks, but I don't know how much it would affect other things (e.g. performance). So you should really rethink these things. Jarek P. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH][PPPOL2TP]: Fix SMP oops in pppol2tp driver
Jarek Poplawski wrote, On 02/21/2008 01:08 PM: ... > Another, probably simpler way would be to move almost all pppol2tp_xmit ... Actually, the simplest off all seems to be now this old idea to maybe make sk_dst_lock globally softirq immune. At least I think it's worth of testing, to check for these other possible lockdep warnings. It should only need to change all write_ and read_lock(&sk->sk_dst_lock) in very few places: include/net/sock.h, include/net/ip6_route.h, and net/ipv6/ipv6_sockglue.c. This could be tested together with you full _bh locking patch (maybe except these other changes in pppol2tp_xmit). Jarek P. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH][PPPOL2TP]: Fix SMP oops in pppol2tp driver
Jarek Poplawski wrote: Jarek Poplawski wrote, On 02/21/2008 01:08 PM: ... Another, probably simpler way would be to move almost all pppol2tp_xmit ... Actually, the simplest off all seems to be now this old idea to maybe make sk_dst_lock globally softirq immune. At least I think it's worth of testing, to check for these other possible lockdep warnings. It should only need to change all write_ and read_lock(&sk->sk_dst_lock) in very few places: include/net/sock.h, include/net/ip6_route.h, and net/ipv6/ipv6_sockglue.c. This could be tested together with you full _bh locking patch (maybe except these other changes in pppol2tp_xmit). I did this and all lockdep errors have now gone. Tests ran all weekend. See attached patch. Is this an acceptable solution? If so, I'll prepare and send official patches. -- James Chapman Katalix Systems Ltd http://www.katalix.com Catalysts for your Embedded Linux software development Index: linux-2.6.24.2/include/net/ip6_route.h === --- linux-2.6.24.2.orig/include/net/ip6_route.h +++ linux-2.6.24.2/include/net/ip6_route.h @@ -150,9 +150,9 @@ static inline void __ip6_dst_store(struc static inline void ip6_dst_store(struct sock *sk, struct dst_entry *dst, struct in6_addr *daddr, struct in6_addr *saddr) { - write_lock(&sk->sk_dst_lock); + write_lock_bh(&sk->sk_dst_lock); __ip6_dst_store(sk, dst, daddr, saddr); - write_unlock(&sk->sk_dst_lock); + write_unlock_bh(&sk->sk_dst_lock); } static inline int ipv6_unicast_destination(struct sk_buff *skb) Index: linux-2.6.24.2/include/net/sock.h === --- linux-2.6.24.2.orig/include/net/sock.h +++ linux-2.6.24.2/include/net/sock.h @@ -1058,11 +1058,11 @@ sk_dst_get(struct sock *sk) { struct dst_entry *dst; - read_lock(&sk->sk_dst_lock); + read_lock_bh(&sk->sk_dst_lock); dst = sk->sk_dst_cache; if (dst) dst_hold(dst); - read_unlock(&sk->sk_dst_lock); + read_unlock_bh(&sk->sk_dst_lock); return dst; } @@ -1079,9 +1079,9 @@ __sk_dst_set(struct sock *sk, struct dst static inline void sk_dst_set(struct sock *sk, struct dst_entry *dst) { - write_lock(&sk->sk_dst_lock); + write_lock_bh(&sk->sk_dst_lock); __sk_dst_set(sk, dst); - write_unlock(&sk->sk_dst_lock); + write_unlock_bh(&sk->sk_dst_lock); } static inline void @@ -1097,9 +1097,9 @@ __sk_dst_reset(struct sock *sk) static inline void sk_dst_reset(struct sock *sk) { - write_lock(&sk->sk_dst_lock); + write_lock_bh(&sk->sk_dst_lock); __sk_dst_reset(sk); - write_unlock(&sk->sk_dst_lock); + write_unlock_bh(&sk->sk_dst_lock); } extern struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie); Index: linux-2.6.24.2/net/ipv6/ipv6_sockglue.c === --- linux-2.6.24.2.orig/net/ipv6/ipv6_sockglue.c +++ linux-2.6.24.2/net/ipv6/ipv6_sockglue.c @@ -440,9 +440,9 @@ static int do_ipv6_setsockopt(struct soc opt = xchg(&np->opt, opt); sk_dst_reset(sk); } else { - write_lock(&sk->sk_dst_lock); + write_lock_bh(&sk->sk_dst_lock); opt = xchg(&np->opt, opt); - write_unlock(&sk->sk_dst_lock); + write_unlock_bh(&sk->sk_dst_lock); sk_dst_reset(sk); } sticky_done: @@ -504,9 +504,9 @@ update: opt = xchg(&np->opt, opt); sk_dst_reset(sk); } else { - write_lock(&sk->sk_dst_lock); + write_lock_bh(&sk->sk_dst_lock); opt = xchg(&np->opt, opt); - write_unlock(&sk->sk_dst_lock); + write_unlock_bh(&sk->sk_dst_lock); sk_dst_reset(sk); }
Re: [PATCH][PPPOL2TP]: Fix SMP oops in pppol2tp driver
On Mon, Feb 25, 2008 at 12:19:50PM +, James Chapman wrote: > Jarek Poplawski wrote: >> Jarek Poplawski wrote, On 02/21/2008 01:08 PM: >> ... >> >>> Another, probably simpler way would be to move almost all pppol2tp_xmit >> ... >> >> Actually, the simplest off all seems to be now this old idea to maybe make >> sk_dst_lock globally softirq immune. At least I think it's worth of testing, >> to check for these other possible lockdep warnings. It should only need to >> change all write_ and read_lock(&sk->sk_dst_lock) in very few places: >> include/net/sock.h, include/net/ip6_route.h, and net/ipv6/ipv6_sockglue.c. >> This could be tested together with you full _bh locking patch (maybe except >> these other changes in pppol2tp_xmit). > > I did this and all lockdep errors have now gone. Tests ran all weekend. > See attached patch. > > Is this an acceptable solution? If so, I'll prepare and send official > patches. IMHO this should be acceptable because I can't see any reason for changing properly working code if there is so simple and not costly solution. But maybe David or somebody else finds some cons? Since this patch isn't very big I think you could try to send this officially and we will simply see... Regards, Jarek P. > > > -- > James Chapman > Katalix Systems Ltd > http://www.katalix.com > Catalysts for your Embedded Linux software development > > Index: linux-2.6.24.2/include/net/ip6_route.h > === > --- linux-2.6.24.2.orig/include/net/ip6_route.h > +++ linux-2.6.24.2/include/net/ip6_route.h > @@ -150,9 +150,9 @@ static inline void __ip6_dst_store(struc > static inline void ip6_dst_store(struct sock *sk, struct dst_entry *dst, >struct in6_addr *daddr, struct in6_addr *saddr) > { > - write_lock(&sk->sk_dst_lock); > + write_lock_bh(&sk->sk_dst_lock); > __ip6_dst_store(sk, dst, daddr, saddr); > - write_unlock(&sk->sk_dst_lock); > + write_unlock_bh(&sk->sk_dst_lock); > } > > static inline int ipv6_unicast_destination(struct sk_buff *skb) > Index: linux-2.6.24.2/include/net/sock.h > === > --- linux-2.6.24.2.orig/include/net/sock.h > +++ linux-2.6.24.2/include/net/sock.h > @@ -1058,11 +1058,11 @@ sk_dst_get(struct sock *sk) > { > struct dst_entry *dst; > > - read_lock(&sk->sk_dst_lock); > + read_lock_bh(&sk->sk_dst_lock); > dst = sk->sk_dst_cache; > if (dst) > dst_hold(dst); > - read_unlock(&sk->sk_dst_lock); > + read_unlock_bh(&sk->sk_dst_lock); > return dst; > } > > @@ -1079,9 +1079,9 @@ __sk_dst_set(struct sock *sk, struct dst > static inline void > sk_dst_set(struct sock *sk, struct dst_entry *dst) > { > - write_lock(&sk->sk_dst_lock); > + write_lock_bh(&sk->sk_dst_lock); > __sk_dst_set(sk, dst); > - write_unlock(&sk->sk_dst_lock); > + write_unlock_bh(&sk->sk_dst_lock); > } > > static inline void > @@ -1097,9 +1097,9 @@ __sk_dst_reset(struct sock *sk) > static inline void > sk_dst_reset(struct sock *sk) > { > - write_lock(&sk->sk_dst_lock); > + write_lock_bh(&sk->sk_dst_lock); > __sk_dst_reset(sk); > - write_unlock(&sk->sk_dst_lock); > + write_unlock_bh(&sk->sk_dst_lock); > } > > extern struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie); > Index: linux-2.6.24.2/net/ipv6/ipv6_sockglue.c > === > --- linux-2.6.24.2.orig/net/ipv6/ipv6_sockglue.c > +++ linux-2.6.24.2/net/ipv6/ipv6_sockglue.c > @@ -440,9 +440,9 @@ static int do_ipv6_setsockopt(struct soc > opt = xchg(&np->opt, opt); > sk_dst_reset(sk); > } else { > - write_lock(&sk->sk_dst_lock); > + write_lock_bh(&sk->sk_dst_lock); > opt = xchg(&np->opt, opt); > - write_unlock(&sk->sk_dst_lock); > + write_unlock_bh(&sk->sk_dst_lock); > sk_dst_reset(sk); > } > sticky_done: > @@ -504,9 +504,9 @@ update: > opt = xchg(&np->opt, opt); > sk_dst_reset(sk); > } else { > - write_lock(&sk->sk_dst_lock); > + write_lock_bh(&sk->sk_dst_lock); > opt = xchg(&np->opt, opt); > - write_unlock(&sk->sk_dst_lock); > + write_unlock_bh(&sk->sk_dst_lock); > sk_dst_reset(sk); > } > -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH][PPPOL2TP]: Fix SMP oops in pppol2tp driver
On Mon, Feb 25, 2008 at 01:05:08PM +, Jarek Poplawski wrote: ... > On Mon, Feb 25, 2008 at 12:19:50PM +, James Chapman wrote: > > Is this an acceptable solution? If so, I'll prepare and send official > > patches. > > IMHO this should be acceptable because I can't see any reason for > changing properly working code if there is so simple and not costly > solution. But maybe David or somebody else finds some cons? [...] Hmm... Wait a minute! But on the other hand David has written about his cons here, and it looks reasonable: this place would be fixed, but some others can start reports like this. Maybe, it's better to analyze yet if it's really so hard to eliminate taking this lock on the xmit path? Jarek P. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH][PPPOL2TP]: Fix SMP oops in pppol2tp driver
On Mon, Feb 25, 2008 at 01:39:48PM +, Jarek Poplawski wrote: ... > Maybe, it's better to > analyze yet if it's really so hard to eliminate taking this lock > on the xmit path? BTW, I'm not sure if it helps, but this matters only for the sockets which could be used (and locked) outside of pppol2tp code (so before pppol2tp code is called). Jarek P. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH][PPPOL2TP]: Fix SMP oops in pppol2tp driver
Jarek Poplawski wrote, On 02/25/2008 02:39 PM: ... > Hmm... Wait a minute! But on the other hand David has written about > his cons here, and it looks reasonable: this place would be fixed, > but some others can start reports like this. Maybe, it's better to > analyze yet if it's really so hard to eliminate taking this lock > on the xmit path? James, I wonder if you could try to test this patch below? ip_queue_xmit() seems to do proper things with __sk_dst_check(), and if some other functions don't behave similarly lockdep should tell. I think, you could test it with your "locks to _bh" patch (without pppol2tp_xmit() part), and maybe with my sock.c lockdep patch (it should help lockdep to see these locks a bit more distinctly). Jarek P. PS: Since ppp_generic isn't endangered for now I removed Paul from CC. --- diff --git a/drivers/net/pppol2tp.c b/drivers/net/pppol2tp.c index e0b072d..b94659a 100644 --- a/drivers/net/pppol2tp.c +++ b/drivers/net/pppol2tp.c @@ -1058,7 +1058,7 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb) /* Get routing info from the tunnel socket */ dst_release(skb->dst); - skb->dst = sk_dst_get(sk_tun); + skb->dst = NULL; skb_orphan(skb); skb->sk = sk_tun; -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH][PPPOL2TP]: Fix SMP oops in pppol2tp driver
Jarek Poplawski wrote: Jarek Poplawski wrote, On 02/25/2008 02:39 PM: ... Hmm... Wait a minute! But on the other hand David has written about his cons here, and it looks reasonable: this place would be fixed, but some others can start reports like this. Maybe, it's better to analyze yet if it's really so hard to eliminate taking this lock on the xmit path? James, I wonder if you could try to test this patch below? ip_queue_xmit() seems to do proper things with __sk_dst_check(), and if some other functions don't behave similarly lockdep should tell. I think, you could test it with your "locks to _bh" patch (without pppol2tp_xmit() part), and maybe with my sock.c lockdep patch (it should help lockdep to see these locks a bit more distinctly). I found the same thing and was running a variant of your patch last night; rather than set skb->dst to NULL though, I use __sk_dst_get() and let ip_queue_xmit() do the route lookup if it returns NULL. But this has the same symptoms as the code I tried a few days ago - no lockdep errors but a system lockup after up to several hours. Nothing is logged in the syslog. Luckily, I'm in the lab where my two borrowed servers are today so I have access to their consoles. Hopefully I'll be able to find out why there are hanging. Btw, they don't hang if I disable irqs around the ppp_input() call. Will update you later. /james PS: Since ppp_generic isn't endangered for now I removed Paul from CC. --- diff --git a/drivers/net/pppol2tp.c b/drivers/net/pppol2tp.c index e0b072d..b94659a 100644 --- a/drivers/net/pppol2tp.c +++ b/drivers/net/pppol2tp.c @@ -1058,7 +1058,7 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb) /* Get routing info from the tunnel socket */ dst_release(skb->dst); - skb->dst = sk_dst_get(sk_tun); + skb->dst = NULL; skb_orphan(skb); skb->sk = sk_tun; -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html -- James Chapman Katalix Systems Ltd http://www.katalix.com Catalysts for your Embedded Linux software development -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH][PPPOL2TP]: Fix SMP oops in pppol2tp driver
On Tue, Feb 26, 2008 at 12:14:26PM +, James Chapman wrote: > Jarek Poplawski wrote: >> Jarek Poplawski wrote, On 02/25/2008 02:39 PM: >> ... >>> Hmm... Wait a minute! But on the other hand David has written about >>> his cons here, and it looks reasonable: this place would be fixed, >>> but some others can start reports like this. Maybe, it's better to >>> analyze yet if it's really so hard to eliminate taking this lock >>> on the xmit path? >> >> James, I wonder if you could try to test this patch below? >> ip_queue_xmit() seems to do proper things with __sk_dst_check(), and >> if some other functions don't behave similarly lockdep should tell. >> I think, you could test it with your "locks to _bh" patch (without >> pppol2tp_xmit() part), and maybe with my sock.c lockdep patch (it >> should help lockdep to see these locks a bit more distinctly). > > I found the same thing and was running a variant of your patch last > night; rather than set skb->dst to NULL though, I use __sk_dst_get() and > let ip_queue_xmit() do the route lookup if it returns NULL. But this has > the same symptoms as the code I tried a few days ago - no lockdep errors > but a system lockup after up to several hours. Nothing is logged in the > syslog. I guess you are going to try this together with this sk_dst_lock with bh patch too. If it's possible I'd suggest to try this skb->dst = NULL as well (__sk_dst_get instead of __sk_dst_check seems to be too racy). > Luckily, I'm in the lab where my two borrowed servers are today so I > have access to their consoles. Hopefully I'll be able to find out why > there are hanging. Btw, they don't hang if I disable irqs around the > ppp_input() call. ...and disabling bh instead isn't enough, BTW? > Will update you later. Thanks, Jarek P. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH][PPPOL2TP]: Fix SMP oops in pppol2tp driver
On Tue, Feb 26, 2008 at 01:03:34PM +, Jarek Poplawski wrote: > On Tue, Feb 26, 2008 at 12:14:26PM +, James Chapman wrote: ... > > there are hanging. Btw, they don't hang if I disable irqs around the > > ppp_input() call. > > ...and disabling bh instead isn't enough, BTW? I guess not: they are mostly disabled by ppp_input() itself... So, it looks like a network card could mess here? Jarek P. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH][PPPOL2TP]: Fix SMP oops in pppol2tp driver
James Chapman wrote, On 02/26/2008 01:14 PM: ... > Luckily, I'm in the lab where my two borrowed servers are today so I > have access to their consoles. Hopefully I'll be able to find out why > there are hanging. Btw, they don't hang if I disable irqs around the > ppp_input() call. Maybe you've found the same, or there is some other reason yet, but IMHO this locking break around ppp_input() is wrong. Probably there is needed more advanced solution, but this should fix the problem if it really exists (isn't there possible a race e.g. between receive from socket and from network card?). Jarek P. --- drivers/net/pppol2tp.c | 16 1 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/pppol2tp.c b/drivers/net/pppol2tp.c index e0b072d..7c6fcb9 100644 --- a/drivers/net/pppol2tp.c +++ b/drivers/net/pppol2tp.c @@ -363,18 +363,17 @@ out: spin_unlock(&session->reorder_q.lock); } -/* Dequeue a single skb. +/* Requeue a single skb. */ -static void pppol2tp_recv_dequeue_skb(struct pppol2tp_session *session, struct sk_buff *skb) +static void pppol2tp_recv_requeue_skb(struct pppol2tp_session *session, struct sk_buff *skb) { struct pppol2tp_tunnel *tunnel = session->tunnel; int length = PPPOL2TP_SKB_CB(skb)->length; struct sock *session_sock = NULL; - /* We're about to requeue the skb, so unlink it and return resources + /* We're about to requeue the skb, so return resources * to its current owner (a socket receive buffer). */ - skb_unlink(skb, &session->reorder_q); skb_orphan(skb); tunnel->stats.rx_packets++; @@ -436,14 +435,14 @@ static void pppol2tp_recv_dequeue_skb(struct pppol2tp_session *session, struct s static void pppol2tp_recv_dequeue(struct pppol2tp_session *session) { struct sk_buff *skb; - struct sk_buff *tmp; /* If the pkt at the head of the queue has the nr that we * expect to send up next, dequeue it and any other * in-sequence packets behind it. */ +again: spin_lock(&session->reorder_q.lock); - skb_queue_walk_safe(&session->reorder_q, skb, tmp) { + skb_queue_walk(&session->reorder_q, skb) { if (time_after(jiffies, PPPOL2TP_SKB_CB(skb)->expires)) { session->stats.rx_seq_discards++; session->stats.rx_errors++; @@ -469,9 +468,10 @@ static void pppol2tp_recv_dequeue(struct pppol2tp_session *session) goto out; } } + __skb_unlink(skb, &session->reorder_q); spin_unlock(&session->reorder_q.lock); - pppol2tp_recv_dequeue_skb(session, skb); - spin_lock(&session->reorder_q.lock); + pppol2tp_recv_requeue_skb(session, skb); + goto again; } out: -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html