[RFC PATCH 30/30] softirq: Tasklet/net-rx fixup

2018-10-10 Thread Frederic Weisbecker
From: Frederic Weisbecker 

Tasklets and net-rx vectors don't quite get along. If one is interrupted
by another, we may run into a nasty spin_lock recursion:

[  135.427198] Call Trace:
[  135.429650]  
[  135.431690]  dump_stack+0x67/0x95
[  135.435024]  spin_bug+0x95/0xf0
[  135.438187]  do_raw_spin_lock+0x77/0xa0
[  135.442079]  _raw_spin_lock_nested+0x40/0x50
[  135.446439]  ? tcp_v4_rcv+0x9da/0xb10
[  135.450131]  tcp_v4_rcv+0x9da/0xb10
[  135.453650]  ? ip_local_deliver+0x78/0x260
[  135.457758]  ip_local_deliver+0xdf/0x260
[  135.461728]  ip_rcv+0x4e/0x80
[  135.464716]  __netif_receive_skb_one_core+0x55/0x80
[  135.469623]  __netif_receive_skb+0x1b/0x70
[  135.473757]  netif_receive_skb_internal+0x92/0x390
[  135.478574]  napi_gro_receive+0xdf/0x1a0
[  135.482545]  rtl8169_poll+0x2b8/0x670
[  135.486211]  net_rx_action+0x1f8/0x3e0
[  135.489989]  __do_softirq+0x1a0/0x63c
[  135.493691]  irq_exit+0x10f/0x120
[  135.497033]  do_IRQ+0x71/0x130
[  135.500137]  common_interrupt+0xf/0xf
[  135.503839] RIP: 0010:_raw_spin_unlock_irqrestore+0x59/0x70
[  135.509471] Code: 75 21 53 9d e8 e8 1d a5 ff bf 01 00 00 00 e8 8e f6 
97 ff 65 8b 05 ef 06 8d 7e 85 c0 74 0e 5b 41 5c 5d c3 e8 c9 20 a5 ff 53 9d  
dd e8 90 d4 8b ff 5b 41 5c 5d c3 66 66 2e 0f 1f 84 00 00 00 00
[  135.528347] RSP: 0018:88021fb03d28 EFLAGS: 0246 ORIG_RAX: 
ffde
[  135.535989] RAX: 880217762480 RBX: 0246 RCX: 
0002
[  135.543201] RDX:  RSI: 880217762c70 RDI: 
880217762480
[  135.550332] RBP: 88021fb03d38 R08: 0001 R09: 

[  135.557519] R10:  R11:  R12: 
88021fa59b40
[  135.564719] R13: 88021fa59b40 R14: fffd78b5 R15: 
0e01
[  135.571905]  ? common_interrupt+0xa/0xf
[  135.575783]  mod_timer+0x196/0x440
[  135.579221]  sk_reset_timer+0x18/0x30
[  135.582940]  tcp_schedule_loss_probe+0xe9/0x120
[  135.587515]  tcp_write_xmit+0x2c4/0x1240
[  135.591468]  tcp_tsq_write.part.46+0x5e/0xb0
[  135.595756]  tcp_tsq_handler+0xa3/0xb0
[  135.599534]  tcp_tasklet_func+0xdc/0x120
[  135.603488]  tasklet_action_common.isra.17+0xa3/0xb0
[  135.608471]  tasklet_action+0x2d/0x30
[  135.612161]  __do_softirq+0x1a0/0x63c
[  135.615847]  irq_exit+0x10f/0x120
[  135.619173]  do_IRQ+0x71/0x130
[  135.622251]  common_interrupt+0xf/0xf
[  135.625949]  

This is an ugly workaround until we find a proper solution.

Signed-off-by: Frederic Weisbecker 
Cc: Ingo Molnar 
Cc: Sebastian Andrzej Siewior 
Cc: Thomas Gleixner 
Cc: Peter Zijlstra 
Cc: Linus Torvalds 
Cc: David S. Miller 
Cc: Mauro Carvalho Chehab 
Cc: Paul E. McKenney 
---
 kernel/softirq.c | 13 +
 1 file changed, 13 insertions(+)

diff --git a/kernel/softirq.c b/kernel/softirq.c
index f4cb1ea..d95295f 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -320,6 +320,7 @@ asmlinkage __visible void __softirq_entry __do_softirq(void)
unsigned long old_flags = current->flags;
int max_restart = MAX_SOFTIRQ_RESTART;
struct softirq_action *h;
+   bool tasklet_enabled = false, net_rx_enabled = false;
bool in_hardirq;
__u32 pending;
int softirq_bit;
@@ -338,6 +339,10 @@ asmlinkage __visible void __softirq_entry 
__do_softirq(void)
in_hardirq = lockdep_softirq_start();
 
 restart:
+   if (local_softirq_enabled() & TASKLET_SOFTIRQ)
+   tasklet_enabled = true;
+   if (local_softirq_enabled() & NET_RX_SOFTIRQ)
+   net_rx_enabled = true;
/* Reset the pending bitmask before enabling irqs */
softirq_pending_nand(pending);
 
@@ -358,8 +363,16 @@ asmlinkage __visible void __softirq_entry 
__do_softirq(void)
 
trace_softirq_entry(vec_nr);
softirq_enabled_nand(BIT(vec_nr));
+   if (vec_nr == NET_RX_SOFTIRQ && tasklet_enabled)
+   softirq_enabled_nand(BIT(TASKLET_SOFTIRQ));
+   if (vec_nr == TASKLET_SOFTIRQ && net_rx_enabled)
+   softirq_enabled_nand(BIT(NET_RX_SOFTIRQ));
barrier();
h->action(h);
+   if (vec_nr == TASKLET_SOFTIRQ && net_rx_enabled)
+   softirq_enabled_or(BIT(NET_RX_SOFTIRQ));
+   if (vec_nr == NET_RX_SOFTIRQ && tasklet_enabled)
+   softirq_enabled_or(BIT(TASKLET_SOFTIRQ));
softirq_enabled_or(BIT(vec_nr));
trace_softirq_exit(vec_nr);
if (unlikely(prev_count != preempt_count())) {
-- 
2.7.4



[RFC PATCH 30/30] softirq: Tasklet/net-rx fixup

2018-10-10 Thread Frederic Weisbecker
From: Frederic Weisbecker 

Tasklets and net-rx vectors don't quite get along. If one is interrupted
by another, we may run into a nasty spin_lock recursion:

[  135.427198] Call Trace:
[  135.429650]  
[  135.431690]  dump_stack+0x67/0x95
[  135.435024]  spin_bug+0x95/0xf0
[  135.438187]  do_raw_spin_lock+0x77/0xa0
[  135.442079]  _raw_spin_lock_nested+0x40/0x50
[  135.446439]  ? tcp_v4_rcv+0x9da/0xb10
[  135.450131]  tcp_v4_rcv+0x9da/0xb10
[  135.453650]  ? ip_local_deliver+0x78/0x260
[  135.457758]  ip_local_deliver+0xdf/0x260
[  135.461728]  ip_rcv+0x4e/0x80
[  135.464716]  __netif_receive_skb_one_core+0x55/0x80
[  135.469623]  __netif_receive_skb+0x1b/0x70
[  135.473757]  netif_receive_skb_internal+0x92/0x390
[  135.478574]  napi_gro_receive+0xdf/0x1a0
[  135.482545]  rtl8169_poll+0x2b8/0x670
[  135.486211]  net_rx_action+0x1f8/0x3e0
[  135.489989]  __do_softirq+0x1a0/0x63c
[  135.493691]  irq_exit+0x10f/0x120
[  135.497033]  do_IRQ+0x71/0x130
[  135.500137]  common_interrupt+0xf/0xf
[  135.503839] RIP: 0010:_raw_spin_unlock_irqrestore+0x59/0x70
[  135.509471] Code: 75 21 53 9d e8 e8 1d a5 ff bf 01 00 00 00 e8 8e f6 
97 ff 65 8b 05 ef 06 8d 7e 85 c0 74 0e 5b 41 5c 5d c3 e8 c9 20 a5 ff 53 9d  
dd e8 90 d4 8b ff 5b 41 5c 5d c3 66 66 2e 0f 1f 84 00 00 00 00
[  135.528347] RSP: 0018:88021fb03d28 EFLAGS: 0246 ORIG_RAX: 
ffde
[  135.535989] RAX: 880217762480 RBX: 0246 RCX: 
0002
[  135.543201] RDX:  RSI: 880217762c70 RDI: 
880217762480
[  135.550332] RBP: 88021fb03d38 R08: 0001 R09: 

[  135.557519] R10:  R11:  R12: 
88021fa59b40
[  135.564719] R13: 88021fa59b40 R14: fffd78b5 R15: 
0e01
[  135.571905]  ? common_interrupt+0xa/0xf
[  135.575783]  mod_timer+0x196/0x440
[  135.579221]  sk_reset_timer+0x18/0x30
[  135.582940]  tcp_schedule_loss_probe+0xe9/0x120
[  135.587515]  tcp_write_xmit+0x2c4/0x1240
[  135.591468]  tcp_tsq_write.part.46+0x5e/0xb0
[  135.595756]  tcp_tsq_handler+0xa3/0xb0
[  135.599534]  tcp_tasklet_func+0xdc/0x120
[  135.603488]  tasklet_action_common.isra.17+0xa3/0xb0
[  135.608471]  tasklet_action+0x2d/0x30
[  135.612161]  __do_softirq+0x1a0/0x63c
[  135.615847]  irq_exit+0x10f/0x120
[  135.619173]  do_IRQ+0x71/0x130
[  135.622251]  common_interrupt+0xf/0xf
[  135.625949]  

This is an ugly workaround until we find a proper solution.

Signed-off-by: Frederic Weisbecker 
Cc: Ingo Molnar 
Cc: Sebastian Andrzej Siewior 
Cc: Thomas Gleixner 
Cc: Peter Zijlstra 
Cc: Linus Torvalds 
Cc: David S. Miller 
Cc: Mauro Carvalho Chehab 
Cc: Paul E. McKenney 
---
 kernel/softirq.c | 13 +
 1 file changed, 13 insertions(+)

diff --git a/kernel/softirq.c b/kernel/softirq.c
index f4cb1ea..d95295f 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -320,6 +320,7 @@ asmlinkage __visible void __softirq_entry __do_softirq(void)
unsigned long old_flags = current->flags;
int max_restart = MAX_SOFTIRQ_RESTART;
struct softirq_action *h;
+   bool tasklet_enabled = false, net_rx_enabled = false;
bool in_hardirq;
__u32 pending;
int softirq_bit;
@@ -338,6 +339,10 @@ asmlinkage __visible void __softirq_entry 
__do_softirq(void)
in_hardirq = lockdep_softirq_start();
 
 restart:
+   if (local_softirq_enabled() & TASKLET_SOFTIRQ)
+   tasklet_enabled = true;
+   if (local_softirq_enabled() & NET_RX_SOFTIRQ)
+   net_rx_enabled = true;
/* Reset the pending bitmask before enabling irqs */
softirq_pending_nand(pending);
 
@@ -358,8 +363,16 @@ asmlinkage __visible void __softirq_entry 
__do_softirq(void)
 
trace_softirq_entry(vec_nr);
softirq_enabled_nand(BIT(vec_nr));
+   if (vec_nr == NET_RX_SOFTIRQ && tasklet_enabled)
+   softirq_enabled_nand(BIT(TASKLET_SOFTIRQ));
+   if (vec_nr == TASKLET_SOFTIRQ && net_rx_enabled)
+   softirq_enabled_nand(BIT(NET_RX_SOFTIRQ));
barrier();
h->action(h);
+   if (vec_nr == TASKLET_SOFTIRQ && net_rx_enabled)
+   softirq_enabled_or(BIT(NET_RX_SOFTIRQ));
+   if (vec_nr == NET_RX_SOFTIRQ && tasklet_enabled)
+   softirq_enabled_or(BIT(TASKLET_SOFTIRQ));
softirq_enabled_or(BIT(vec_nr));
trace_softirq_exit(vec_nr);
if (unlikely(prev_count != preempt_count())) {
-- 
2.7.4