On Sun, Mar 03, 2019 at 08:28:04AM +0100, Jan Stancek wrote:
> LTP testcase mtest06 [1] can trigger a crash on s390x running 5.0.0-rc8.
> This is a stress test, where one thread mmaps/writes/munmaps memory area
> and other thread is trying to read from it:
> 
>   CPU: 0 PID: 2611 Comm: mmap1 Not tainted 5.0.0-rc8+ #51
>   Hardware name: IBM 2964 N63 400 (z/VM 6.4.0)
>   Krnl PSW : 0404e00180000000 00000000001ac8d8 (__lock_acquire+0x7/0x7a8)
>   Call Trace:
>   ([<0000000000000000>]           (null))
>    [<00000000001adae4>] lock_acquire+0xec/0x258
>    [<000000000080d1ac>] _raw_spin_lock_bh+0x5c/0x98
>    [<000000000012a780>] page_table_free+0x48/0x1a8
>    [<00000000002f6e54>] do_fault+0xdc/0x670
>    [<00000000002fadae>] __handle_mm_fault+0x416/0x5f0
>    [<00000000002fb138>] handle_mm_fault+0x1b0/0x320
>    [<00000000001248cc>] do_dat_exception+0x19c/0x2c8
>    [<000000000080e5ee>] pgm_check_handler+0x19e/0x200
> 
> page_table_free() is called with NULL mm parameter, but because
> "0" is a valid address on s390 (see S390_lowcore), it keeps
> going until it eventually crashes in lockdep's lock_acquire.
> This crash is reproducible at least since 4.14.
> 
> Problem is that "vmf->vma" used in do_fault() can become stale.
> Because mmap_sem may be released, other threads can come in,
> call munmap() and cause "vma" be returned to kmem cache, and
> get zeroed/re-initialized and re-used:
> 
> handle_mm_fault                           |
>   __handle_mm_fault                       |
>     do_fault                              |
>       vma = vmf->vma                      |
>       do_read_fault                       |
>         __do_fault                        |
>           vma->vm_ops->fault(vmf);        |
>             mmap_sem is released          |
>                                           |
>                                           | do_munmap()
>                                           |   remove_vma_list()
>                                           |     remove_vma()
>                                           |       vm_area_free()
>                                           |         # vma is released
>                                           | ...
>                                           | # same vma is allocated
>                                           | # from kmem cache
>                                           | do_mmap()
>                                           |   vm_area_alloc()
>                                           |     memset(vma, 0, ...)
>                                           |
>       pte_free(vma->vm_mm, ...);          |
>         page_table_free                   |
>           spin_lock_bh(&mm->context.lock);|
>             <crash>                       |
> 
> Cache mm_struct to avoid using potentially stale "vma".
> 
> [1] 
> https://github.com/linux-test-project/ltp/blob/master/testcases/kernel/mem/mtest06/mmap1.c
> 
> Signed-off-by: Jan Stancek <jstan...@redhat.com>
> Reviewed-by: Andrea Arcangeli <aarca...@redhat.com>
> ---
>  mm/memory.c | 5 ++++-
>  1 file changed, 4 insertions(+), 1 deletion(-)
> 
> diff --git a/mm/memory.c b/mm/memory.c
> index e11ca9dd823f..e8d69ade5acc 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -3517,10 +3517,13 @@ static vm_fault_t do_shared_fault(struct vm_fault 
> *vmf)
>   * but allow concurrent faults).
>   * The mmap_sem may have been released depending on flags and our
>   * return value.  See filemap_fault() and __lock_page_or_retry().
> + * If mmap_sem is released, vma may become invalid (for example
> + * by other thread calling munmap()).
>   */
>  static vm_fault_t do_fault(struct vm_fault *vmf)
>  {
>       struct vm_area_struct *vma = vmf->vma;
> +     struct mm_struct *vm_mm = vma->vm_mm;
>       vm_fault_t ret;
>  
>       /*
> @@ -3561,7 +3564,7 @@ static vm_fault_t do_fault(struct vm_fault *vmf)
>  
>       /* preallocated pagetable is unused: free it */
>       if (vmf->prealloc_pte) {
> -             pte_free(vma->vm_mm, vmf->prealloc_pte);
> +             pte_free(vm_mm, vmf->prealloc_pte);
>               vmf->prealloc_pte = NULL;
>       }
>       return ret;
> -- 
> 1.8.3.1
> 
Acked-by: Rafael Aquini <aqu...@redhat.com>

Reply via email to