[PATCH RFC v3 2/4] mm: userfault: return VM_FAULT_RETRY on signals

2018-11-20 Thread Peter Xu
There was a special path in handle_userfault() in the past that we'll
return a VM_FAULT_NOPAGE when we detected non-fatal signals when waiting
for userfault handling.  We did that by reacquiring the mmap_sem before
returning.  However that brings a risk in that the vmas might have
changed when we retake the mmap_sem and even we could be holding an
invalid vma structure.  The problem was reported by syzbot.

This patch removes the special path and we'll return a VM_FAULT_RETRY
with the common path even if we have got such signals.  Then for all the
architectures that is passing in VM_FAULT_ALLOW_RETRY into
handle_mm_fault(), we check not only for SIGKILL but for all the rest of
userspace pending signals right after we returned from
handle_mm_fault().

The idea comes from the upstream discussion between Linus and Andrea:

  https://lkml.org/lkml/2017/10/30/560

(This patch contains a potential fix for a double-free of mmap_sem on
 ARC architecture; please see https://lkml.org/lkml/2018/11/1/723 for
 more information)

Suggested-by: Linus Torvalds 
Suggested-by: Andrea Arcangeli 
Signed-off-by: Peter Xu 
---
 arch/alpha/mm/fault.c  |  2 +-
 arch/arc/mm/fault.c| 11 +++
 arch/arm/mm/fault.c| 14 ++
 arch/arm64/mm/fault.c  |  6 +++---
 arch/hexagon/mm/vm_fault.c |  2 +-
 arch/ia64/mm/fault.c   |  2 +-
 arch/m68k/mm/fault.c   |  2 +-
 arch/microblaze/mm/fault.c |  2 +-
 arch/mips/mm/fault.c   |  2 +-
 arch/nds32/mm/fault.c  |  6 +++---
 arch/nios2/mm/fault.c  |  2 +-
 arch/openrisc/mm/fault.c   |  2 +-
 arch/parisc/mm/fault.c |  2 +-
 arch/powerpc/mm/fault.c|  4 +++-
 arch/riscv/mm/fault.c  |  4 ++--
 arch/s390/mm/fault.c   |  9 ++---
 arch/sh/mm/fault.c |  4 
 arch/sparc/mm/fault_32.c   |  3 +++
 arch/sparc/mm/fault_64.c   |  3 +++
 arch/um/kernel/trap.c  |  5 -
 arch/unicore32/mm/fault.c  |  4 ++--
 arch/x86/mm/fault.c| 12 +++-
 arch/xtensa/mm/fault.c |  3 +++
 fs/userfaultfd.c   | 24 
 24 files changed, 73 insertions(+), 57 deletions(-)

diff --git a/arch/alpha/mm/fault.c b/arch/alpha/mm/fault.c
index d73dc473fbb9..46e5e420ad2a 100644
--- a/arch/alpha/mm/fault.c
+++ b/arch/alpha/mm/fault.c
@@ -150,7 +150,7 @@ do_page_fault(unsigned long address, unsigned long mmcsr,
   the fault.  */
fault = handle_mm_fault(vma, address, flags);
 
-   if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
+   if ((fault & VM_FAULT_RETRY) && signal_pending(current))
return;
 
if (unlikely(fault & VM_FAULT_ERROR)) {
diff --git a/arch/arc/mm/fault.c b/arch/arc/mm/fault.c
index c9da6102eb4f..52b6b71b74e2 100644
--- a/arch/arc/mm/fault.c
+++ b/arch/arc/mm/fault.c
@@ -142,11 +142,14 @@ void do_page_fault(unsigned long address, struct pt_regs 
*regs)
fault = handle_mm_fault(vma, address, flags);
 
/* If Pagefault was interrupted by SIGKILL, exit page fault "early" */
-   if (unlikely(fatal_signal_pending(current))) {
-   if ((fault & VM_FAULT_ERROR) && !(fault & VM_FAULT_RETRY))
+   if (unlikely(fatal_signal_pending(current) && user_mode(regs))) {
+   /*
+* VM_FAULT_RETRY means we have released the mmap_sem,
+* otherwise we need to drop it before leaving
+*/
+   if (!(fault & VM_FAULT_RETRY))
up_read(>mmap_sem);
-   if (user_mode(regs))
-   return;
+   return;
}
 
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index f4ea4c62c613..743077d19669 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -308,14 +308,20 @@ do_page_fault(unsigned long addr, unsigned int fsr, 
struct pt_regs *regs)
 
fault = __do_page_fault(mm, addr, fsr, flags, tsk);
 
-   /* If we need to retry but a fatal signal is pending, handle the
+   /* If we need to retry but a signal is pending, handle the
 * signal first. We do not need to release the mmap_sem because
 * it would already be released in __lock_page_or_retry in
 * mm/filemap.c. */
-   if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) {
-   if (!user_mode(regs))
+   if (fault & VM_FAULT_RETRY) {
+   if (fatal_signal_pending(current) && !user_mode(regs))
goto no_context;
-   return 0;
+   else if (signal_pending(current))
+   /*
+* It's either a common signal, or a fatal
+* signal but for the userspace, we return
+* immediately.
+*/
+   return 0;
}
 
/*
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 7d9571f4ae3d..744d6451ea83 100644
--- 

[PATCH RFC v3 2/4] mm: userfault: return VM_FAULT_RETRY on signals

2018-11-20 Thread Peter Xu
There was a special path in handle_userfault() in the past that we'll
return a VM_FAULT_NOPAGE when we detected non-fatal signals when waiting
for userfault handling.  We did that by reacquiring the mmap_sem before
returning.  However that brings a risk in that the vmas might have
changed when we retake the mmap_sem and even we could be holding an
invalid vma structure.  The problem was reported by syzbot.

This patch removes the special path and we'll return a VM_FAULT_RETRY
with the common path even if we have got such signals.  Then for all the
architectures that is passing in VM_FAULT_ALLOW_RETRY into
handle_mm_fault(), we check not only for SIGKILL but for all the rest of
userspace pending signals right after we returned from
handle_mm_fault().

The idea comes from the upstream discussion between Linus and Andrea:

  https://lkml.org/lkml/2017/10/30/560

(This patch contains a potential fix for a double-free of mmap_sem on
 ARC architecture; please see https://lkml.org/lkml/2018/11/1/723 for
 more information)

Suggested-by: Linus Torvalds 
Suggested-by: Andrea Arcangeli 
Signed-off-by: Peter Xu 
---
 arch/alpha/mm/fault.c  |  2 +-
 arch/arc/mm/fault.c| 11 +++
 arch/arm/mm/fault.c| 14 ++
 arch/arm64/mm/fault.c  |  6 +++---
 arch/hexagon/mm/vm_fault.c |  2 +-
 arch/ia64/mm/fault.c   |  2 +-
 arch/m68k/mm/fault.c   |  2 +-
 arch/microblaze/mm/fault.c |  2 +-
 arch/mips/mm/fault.c   |  2 +-
 arch/nds32/mm/fault.c  |  6 +++---
 arch/nios2/mm/fault.c  |  2 +-
 arch/openrisc/mm/fault.c   |  2 +-
 arch/parisc/mm/fault.c |  2 +-
 arch/powerpc/mm/fault.c|  4 +++-
 arch/riscv/mm/fault.c  |  4 ++--
 arch/s390/mm/fault.c   |  9 ++---
 arch/sh/mm/fault.c |  4 
 arch/sparc/mm/fault_32.c   |  3 +++
 arch/sparc/mm/fault_64.c   |  3 +++
 arch/um/kernel/trap.c  |  5 -
 arch/unicore32/mm/fault.c  |  4 ++--
 arch/x86/mm/fault.c| 12 +++-
 arch/xtensa/mm/fault.c |  3 +++
 fs/userfaultfd.c   | 24 
 24 files changed, 73 insertions(+), 57 deletions(-)

diff --git a/arch/alpha/mm/fault.c b/arch/alpha/mm/fault.c
index d73dc473fbb9..46e5e420ad2a 100644
--- a/arch/alpha/mm/fault.c
+++ b/arch/alpha/mm/fault.c
@@ -150,7 +150,7 @@ do_page_fault(unsigned long address, unsigned long mmcsr,
   the fault.  */
fault = handle_mm_fault(vma, address, flags);
 
-   if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
+   if ((fault & VM_FAULT_RETRY) && signal_pending(current))
return;
 
if (unlikely(fault & VM_FAULT_ERROR)) {
diff --git a/arch/arc/mm/fault.c b/arch/arc/mm/fault.c
index c9da6102eb4f..52b6b71b74e2 100644
--- a/arch/arc/mm/fault.c
+++ b/arch/arc/mm/fault.c
@@ -142,11 +142,14 @@ void do_page_fault(unsigned long address, struct pt_regs 
*regs)
fault = handle_mm_fault(vma, address, flags);
 
/* If Pagefault was interrupted by SIGKILL, exit page fault "early" */
-   if (unlikely(fatal_signal_pending(current))) {
-   if ((fault & VM_FAULT_ERROR) && !(fault & VM_FAULT_RETRY))
+   if (unlikely(fatal_signal_pending(current) && user_mode(regs))) {
+   /*
+* VM_FAULT_RETRY means we have released the mmap_sem,
+* otherwise we need to drop it before leaving
+*/
+   if (!(fault & VM_FAULT_RETRY))
up_read(>mmap_sem);
-   if (user_mode(regs))
-   return;
+   return;
}
 
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index f4ea4c62c613..743077d19669 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -308,14 +308,20 @@ do_page_fault(unsigned long addr, unsigned int fsr, 
struct pt_regs *regs)
 
fault = __do_page_fault(mm, addr, fsr, flags, tsk);
 
-   /* If we need to retry but a fatal signal is pending, handle the
+   /* If we need to retry but a signal is pending, handle the
 * signal first. We do not need to release the mmap_sem because
 * it would already be released in __lock_page_or_retry in
 * mm/filemap.c. */
-   if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) {
-   if (!user_mode(regs))
+   if (fault & VM_FAULT_RETRY) {
+   if (fatal_signal_pending(current) && !user_mode(regs))
goto no_context;
-   return 0;
+   else if (signal_pending(current))
+   /*
+* It's either a common signal, or a fatal
+* signal but for the userspace, we return
+* immediately.
+*/
+   return 0;
}
 
/*
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 7d9571f4ae3d..744d6451ea83 100644
---