On Thu, Oct 24, 2013 at 12:52:06PM +0200, Peter Zijlstra wrote:
> On Wed, Oct 23, 2013 at 10:48:38PM +0200, Peter Zijlstra wrote:
> > I'll also make sure to test we actually hit the fault path
> > by concurrently running something like:
> > 
> >  while :; echo 1 > /proc/sys/vm/drop_caches ; done
> > 
> > while doing perf top or so.. 
> 
> So the below appears to work; I've ran:
> 
>   while :; do echo 1 > /proc/sys/vm/drop_caches; sleep 1; done &
>   while :; do make O=defconfig-build/ clean; perf record -a -g fp -e 
> cycles:pp make O=defconfig-build/ -s -j64; done
> 
> And verified that the if (in_nmi()) trace_printk() was visible in the
> trace output verifying we indeed took the fault from the NMI code.
> 
> I've had this running for ~ 30 minutes or so and the machine is still
> healthy.
> 
> Don, can you give this stuff a spin on your system?

I'll try to grab the machine I was testing with and see what this patch
does.  Thanks!  I assume this can go on top of the other patch that was
committed to -tip last week?

Cheers,
Don

> 
> ---
>  arch/x86/lib/usercopy.c | 43 +++++++++++++++----------------------------
>  arch/x86/mm/fault.c     | 43 +++++++++++++++++++++++--------------------
>  2 files changed, 38 insertions(+), 48 deletions(-)
> 
> diff --git a/arch/x86/lib/usercopy.c b/arch/x86/lib/usercopy.c
> index 4f74d94c8d97..5465b8613944 100644
> --- a/arch/x86/lib/usercopy.c
> +++ b/arch/x86/lib/usercopy.c
> @@ -11,39 +11,26 @@
>  #include <linux/sched.h>
>  
>  /*
> - * best effort, GUP based copy_from_user() that is NMI-safe
> + * We rely on the nested NMI work to allow atomic faults from the NMI path; 
> the
> + * nested NMI paths are careful to preserve CR2.
>   */
>  unsigned long
>  copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
>  {
> -     unsigned long offset, addr = (unsigned long)from;
> -     unsigned long size, len = 0;
> -     struct page *page;
> -     void *map;
> -     int ret;
> +     unsigned long ret;
>  
>       if (__range_not_ok(from, n, TASK_SIZE))
> -             return len;
> -
> -     do {
> -             ret = __get_user_pages_fast(addr, 1, 0, &page);
> -             if (!ret)
> -                     break;
> -
> -             offset = addr & (PAGE_SIZE - 1);
> -             size = min(PAGE_SIZE - offset, n - len);
> -
> -             map = kmap_atomic(page);
> -             memcpy(to, map+offset, size);
> -             kunmap_atomic(map);
> -             put_page(page);
> -
> -             len  += size;
> -             to   += size;
> -             addr += size;
> -
> -     } while (len < n);
> -
> -     return len;
> +             return 0;
> +
> +     /*
> +      * Even though this function is typically called from NMI/IRQ context
> +      * disable pagefaults so that its behaviour is consistent even when
> +      * called form other contexts.
> +      */
> +     pagefault_disable();
> +     ret = __copy_from_user_inatomic(to, from, n);
> +     pagefault_enable();
> +
> +     return n - ret;
>  }
>  EXPORT_SYMBOL_GPL(copy_from_user_nmi);
> diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
> index 3aaeffcfd67a..506564b13ba7 100644
> --- a/arch/x86/mm/fault.c
> +++ b/arch/x86/mm/fault.c
> @@ -51,7 +51,7 @@ kmmio_fault(struct pt_regs *regs, unsigned long addr)
>       return 0;
>  }
>  
> -static inline int __kprobes notify_page_fault(struct pt_regs *regs)
> +static inline int __kprobes kprobes_fault(struct pt_regs *regs)
>  {
>       int ret = 0;
>  
> @@ -1048,7 +1048,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long 
> error_code)
>                       return;
>  
>               /* kprobes don't want to hook the spurious faults: */
> -             if (notify_page_fault(regs))
> +             if (kprobes_fault(regs))
>                       return;
>               /*
>                * Don't take the mm semaphore here. If we fixup a prefetch
> @@ -1060,23 +1060,8 @@ __do_page_fault(struct pt_regs *regs, unsigned long 
> error_code)
>       }
>  
>       /* kprobes don't want to hook the spurious faults: */
> -     if (unlikely(notify_page_fault(regs)))
> +     if (unlikely(kprobes_fault(regs)))
>               return;
> -     /*
> -      * It's safe to allow irq's after cr2 has been saved and the
> -      * vmalloc fault has been handled.
> -      *
> -      * User-mode registers count as a user access even for any
> -      * potential system fault or CPU buglet:
> -      */
> -     if (user_mode_vm(regs)) {
> -             local_irq_enable();
> -             error_code |= PF_USER;
> -             flags |= FAULT_FLAG_USER;
> -     } else {
> -             if (regs->flags & X86_EFLAGS_IF)
> -                     local_irq_enable();
> -     }
>  
>       if (unlikely(error_code & PF_RSVD))
>               pgtable_bad(regs, error_code, address);
> @@ -1088,17 +1073,35 @@ __do_page_fault(struct pt_regs *regs, unsigned long 
> error_code)
>               }
>       }
>  
> -     perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
> -
>       /*
>        * If we're in an interrupt, have no user context or are running
>        * in an atomic region then we must not take the fault:
>        */
>       if (unlikely(in_atomic() || !mm)) {
> +             if (in_nmi())
> +                     trace_printk("YIPEE!!!\n");
>               bad_area_nosemaphore(regs, error_code, address);
>               return;
>       }
>  
> +     /*
> +      * It's safe to allow irq's after cr2 has been saved and the
> +      * vmalloc fault has been handled.
> +      *
> +      * User-mode registers count as a user access even for any
> +      * potential system fault or CPU buglet:
> +      */
> +     if (user_mode_vm(regs)) {
> +             local_irq_enable();
> +             error_code |= PF_USER;
> +             flags |= FAULT_FLAG_USER;
> +     } else {
> +             if (regs->flags & X86_EFLAGS_IF)
> +                     local_irq_enable();
> +     }
> +
> +     perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
> +
>       if (error_code & PF_WRITE)
>               flags |= FAULT_FLAG_WRITE;
>  
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to