[PATCH 4.15 24/72] RDMA/rdma_cm: Fix use after free race with process_one_req

2018-04-06 Thread Greg Kroah-Hartman
4.15-stable review patch.  If anyone has any objections, please let me know.

--

From: Jason Gunthorpe 

commit 9137108cc3d64ade13e753108ec611a0daed16a0 upstream.

process_one_req() can race with rdma_addr_cancel():

   CPU0 CPU1
    
 process_one_work()
  debug_work_deactivate(work);
  process_one_req()
rdma_addr_cancel()
  mutex_lock();
   set_timeout(>work,..);
  __queue_work()
   debug_work_activate(work);
  mutex_unlock();

   mutex_lock();
[..]
list_del(>list);
   mutex_unlock();
[..]

   // ODEBUG explodes since the work is still queued.
   kfree(req);

Causing ODEBUG to detect the use after free:

ODEBUG: free active (active state 0) object type: work_struct hint: 
process_one_req+0x0/0x6c0 include/net/dst.h:165
WARNING: CPU: 0 PID: 79 at lib/debugobjects.c:291 
debug_print_object+0x166/0x220 lib/debugobjects.c:288
kvm: emulating exchange as write
Kernel panic - not syncing: panic_on_warn set ...

CPU: 0 PID: 79 Comm: kworker/u4:3 Not tainted 4.16.0-rc6+ #361
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 
01/01/2011
Workqueue: ib_addr process_one_req
Call Trace:
 __dump_stack lib/dump_stack.c:17 [inline]
 dump_stack+0x194/0x24d lib/dump_stack.c:53
 panic+0x1e4/0x41c kernel/panic.c:183
 __warn+0x1dc/0x200 kernel/panic.c:547
 report_bug+0x1f4/0x2b0 lib/bug.c:186
 fixup_bug.part.11+0x37/0x80 arch/x86/kernel/traps.c:178
 fixup_bug arch/x86/kernel/traps.c:247 [inline]
 do_error_trap+0x2d7/0x3e0 arch/x86/kernel/traps.c:296
 do_invalid_op+0x1b/0x20 arch/x86/kernel/traps.c:315
 invalid_op+0x1b/0x40 arch/x86/entry/entry_64.S:986
RIP: 0010:debug_print_object+0x166/0x220 lib/debugobjects.c:288
RSP: :8801d966f210 EFLAGS: 00010086
RAX: dc08 RBX: 0003 RCX: 815acd6e
RDX:  RSI: 11003b2cddf2 RDI: 
RBP: 8801d966f250 R08:  R09: 11003b2cddc8
R10: ed003b2cde71 R11: 86f39a98 R12: 0001
R13: 86f15540 R14: 86408700 R15: 8147c0a0
 __debug_check_no_obj_freed lib/debugobjects.c:745 [inline]
 debug_check_no_obj_freed+0x662/0xf1f lib/debugobjects.c:774
 kfree+0xc7/0x260 mm/slab.c:3799
 process_one_req+0x2e7/0x6c0 drivers/infiniband/core/addr.c:592
 process_one_work+0xc47/0x1bb0 kernel/workqueue.c:2113
 worker_thread+0x223/0x1990 kernel/workqueue.c:2247
 kthread+0x33c/0x400 kernel/kthread.c:238
 ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:406

Fixes: 5fff41e1f89d ("IB/core: Fix race condition in resolving IP to MAC")
Reported-by: 
Signed-off-by: Jason Gunthorpe 
Signed-off-by: Greg Kroah-Hartman 

---
 drivers/infiniband/core/addr.c |9 +
 1 file changed, 9 insertions(+)

--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -598,6 +598,15 @@ static void process_one_req(struct work_
list_del(>list);
mutex_unlock();
 
+   /*
+* Although the work will normally have been canceled by the
+* workqueue, it can still be requeued as long as it is on the
+* req_list, so it could have been requeued before we grabbed 
+* We need to cancel it after it is removed from req_list to really be
+* sure it is safe to free.
+*/
+   cancel_delayed_work(>work);
+
req->callback(req->status, (struct sockaddr *)>src_addr,
req->addr, req->context);
put_client(req->client);




[PATCH 4.15 24/72] RDMA/rdma_cm: Fix use after free race with process_one_req

2018-04-06 Thread Greg Kroah-Hartman
4.15-stable review patch.  If anyone has any objections, please let me know.

--

From: Jason Gunthorpe 

commit 9137108cc3d64ade13e753108ec611a0daed16a0 upstream.

process_one_req() can race with rdma_addr_cancel():

   CPU0 CPU1
    
 process_one_work()
  debug_work_deactivate(work);
  process_one_req()
rdma_addr_cancel()
  mutex_lock();
   set_timeout(>work,..);
  __queue_work()
   debug_work_activate(work);
  mutex_unlock();

   mutex_lock();
[..]
list_del(>list);
   mutex_unlock();
[..]

   // ODEBUG explodes since the work is still queued.
   kfree(req);

Causing ODEBUG to detect the use after free:

ODEBUG: free active (active state 0) object type: work_struct hint: 
process_one_req+0x0/0x6c0 include/net/dst.h:165
WARNING: CPU: 0 PID: 79 at lib/debugobjects.c:291 
debug_print_object+0x166/0x220 lib/debugobjects.c:288
kvm: emulating exchange as write
Kernel panic - not syncing: panic_on_warn set ...

CPU: 0 PID: 79 Comm: kworker/u4:3 Not tainted 4.16.0-rc6+ #361
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 
01/01/2011
Workqueue: ib_addr process_one_req
Call Trace:
 __dump_stack lib/dump_stack.c:17 [inline]
 dump_stack+0x194/0x24d lib/dump_stack.c:53
 panic+0x1e4/0x41c kernel/panic.c:183
 __warn+0x1dc/0x200 kernel/panic.c:547
 report_bug+0x1f4/0x2b0 lib/bug.c:186
 fixup_bug.part.11+0x37/0x80 arch/x86/kernel/traps.c:178
 fixup_bug arch/x86/kernel/traps.c:247 [inline]
 do_error_trap+0x2d7/0x3e0 arch/x86/kernel/traps.c:296
 do_invalid_op+0x1b/0x20 arch/x86/kernel/traps.c:315
 invalid_op+0x1b/0x40 arch/x86/entry/entry_64.S:986
RIP: 0010:debug_print_object+0x166/0x220 lib/debugobjects.c:288
RSP: :8801d966f210 EFLAGS: 00010086
RAX: dc08 RBX: 0003 RCX: 815acd6e
RDX:  RSI: 11003b2cddf2 RDI: 
RBP: 8801d966f250 R08:  R09: 11003b2cddc8
R10: ed003b2cde71 R11: 86f39a98 R12: 0001
R13: 86f15540 R14: 86408700 R15: 8147c0a0
 __debug_check_no_obj_freed lib/debugobjects.c:745 [inline]
 debug_check_no_obj_freed+0x662/0xf1f lib/debugobjects.c:774
 kfree+0xc7/0x260 mm/slab.c:3799
 process_one_req+0x2e7/0x6c0 drivers/infiniband/core/addr.c:592
 process_one_work+0xc47/0x1bb0 kernel/workqueue.c:2113
 worker_thread+0x223/0x1990 kernel/workqueue.c:2247
 kthread+0x33c/0x400 kernel/kthread.c:238
 ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:406

Fixes: 5fff41e1f89d ("IB/core: Fix race condition in resolving IP to MAC")
Reported-by: 
Signed-off-by: Jason Gunthorpe 
Signed-off-by: Greg Kroah-Hartman 

---
 drivers/infiniband/core/addr.c |9 +
 1 file changed, 9 insertions(+)

--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -598,6 +598,15 @@ static void process_one_req(struct work_
list_del(>list);
mutex_unlock();
 
+   /*
+* Although the work will normally have been canceled by the
+* workqueue, it can still be requeued as long as it is on the
+* req_list, so it could have been requeued before we grabbed 
+* We need to cancel it after it is removed from req_list to really be
+* sure it is safe to free.
+*/
+   cancel_delayed_work(>work);
+
req->callback(req->status, (struct sockaddr *)>src_addr,
req->addr, req->context);
put_client(req->client);