On Tue, Apr 16, 2019 at 03:45:10PM +0200, Laurent Dufour wrote:
> Introducing a per mm_struct seqlock, mm_seq field, to protect the changes
> made in the MM RB tree. This allows to walk the RB tree without grabbing
> the mmap_sem, and on the walk is done to double check that sequence counter
> was stable during the walk.
> 
> The mm seqlock is held while inserting and removing entries into the MM RB
> tree.  Later in this series, it will be check when looking for a VMA
> without holding the mmap_sem.
> 
> This is based on the initial work from Peter Zijlstra:
> https://lore.kernel.org/linux-mm/20100104182813.479668...@chello.nl/
> 
> Signed-off-by: Laurent Dufour <lduf...@linux.ibm.com>

Reviewed-by: Jérôme Glisse <jgli...@redhat.com>

> ---
>  include/linux/mm_types.h |  3 +++
>  kernel/fork.c            |  3 +++
>  mm/init-mm.c             |  3 +++
>  mm/mmap.c                | 48 +++++++++++++++++++++++++++++++---------
>  4 files changed, 46 insertions(+), 11 deletions(-)
> 
> diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
> index e78f72eb2576..24b3f8ce9e42 100644
> --- a/include/linux/mm_types.h
> +++ b/include/linux/mm_types.h
> @@ -358,6 +358,9 @@ struct mm_struct {
>       struct {
>               struct vm_area_struct *mmap;            /* list of VMAs */
>               struct rb_root mm_rb;
> +#ifdef CONFIG_SPECULATIVE_PAGE_FAULT
> +             seqlock_t mm_seq;
> +#endif
>               u64 vmacache_seqnum;                   /* per-thread vmacache */
>  #ifdef CONFIG_MMU
>               unsigned long (*get_unmapped_area) (struct file *filp,
> diff --git a/kernel/fork.c b/kernel/fork.c
> index 2992d2c95256..3a1739197ebc 100644
> --- a/kernel/fork.c
> +++ b/kernel/fork.c
> @@ -1008,6 +1008,9 @@ static struct mm_struct *mm_init(struct mm_struct *mm, 
> struct task_struct *p,
>       mm->mmap = NULL;
>       mm->mm_rb = RB_ROOT;
>       mm->vmacache_seqnum = 0;
> +#ifdef CONFIG_SPECULATIVE_PAGE_FAULT
> +     seqlock_init(&mm->mm_seq);
> +#endif
>       atomic_set(&mm->mm_users, 1);
>       atomic_set(&mm->mm_count, 1);
>       init_rwsem(&mm->mmap_sem);
> diff --git a/mm/init-mm.c b/mm/init-mm.c
> index a787a319211e..69346b883a4e 100644
> --- a/mm/init-mm.c
> +++ b/mm/init-mm.c
> @@ -27,6 +27,9 @@
>   */
>  struct mm_struct init_mm = {
>       .mm_rb          = RB_ROOT,
> +#ifdef CONFIG_SPECULATIVE_PAGE_FAULT
> +     .mm_seq         = __SEQLOCK_UNLOCKED(init_mm.mm_seq),
> +#endif
>       .pgd            = swapper_pg_dir,
>       .mm_users       = ATOMIC_INIT(2),
>       .mm_count       = ATOMIC_INIT(1),
> diff --git a/mm/mmap.c b/mm/mmap.c
> index 13460b38b0fb..f7f6027a7dff 100644
> --- a/mm/mmap.c
> +++ b/mm/mmap.c
> @@ -170,6 +170,24 @@ void unlink_file_vma(struct vm_area_struct *vma)
>       }
>  }
>  
> +#ifdef CONFIG_SPECULATIVE_PAGE_FAULT
> +static inline void mm_write_seqlock(struct mm_struct *mm)
> +{
> +     write_seqlock(&mm->mm_seq);
> +}
> +static inline void mm_write_sequnlock(struct mm_struct *mm)
> +{
> +     write_sequnlock(&mm->mm_seq);
> +}
> +#else
> +static inline void mm_write_seqlock(struct mm_struct *mm)
> +{
> +}
> +static inline void mm_write_sequnlock(struct mm_struct *mm)
> +{
> +}
> +#endif /* CONFIG_SPECULATIVE_PAGE_FAULT */
> +
>  /*
>   * Close a vm structure and free it, returning the next.
>   */
> @@ -445,26 +463,32 @@ static void vma_gap_update(struct vm_area_struct *vma)
>  }
>  
>  static inline void vma_rb_insert(struct vm_area_struct *vma,
> -                              struct rb_root *root)
> +                              struct mm_struct *mm)
>  {
> +     struct rb_root *root = &mm->mm_rb;
> +
>       /* All rb_subtree_gap values must be consistent prior to insertion */
>       validate_mm_rb(root, NULL);
>  
>       rb_insert_augmented(&vma->vm_rb, root, &vma_gap_callbacks);
>  }
>  
> -static void __vma_rb_erase(struct vm_area_struct *vma, struct rb_root *root)
> +static void __vma_rb_erase(struct vm_area_struct *vma, struct mm_struct *mm)
>  {
> +     struct rb_root *root = &mm->mm_rb;
> +
>       /*
>        * Note rb_erase_augmented is a fairly large inline function,
>        * so make sure we instantiate it only once with our desired
>        * augmented rbtree callbacks.
>        */
> +     mm_write_seqlock(mm);
>       rb_erase_augmented(&vma->vm_rb, root, &vma_gap_callbacks);
> +     mm_write_sequnlock(mm); /* wmb */
>  }
>  
>  static __always_inline void vma_rb_erase_ignore(struct vm_area_struct *vma,
> -                                             struct rb_root *root,
> +                                             struct mm_struct *mm,
>                                               struct vm_area_struct *ignore)
>  {
>       /*
> @@ -472,21 +496,21 @@ static __always_inline void vma_rb_erase_ignore(struct 
> vm_area_struct *vma,
>        * with the possible exception of the "next" vma being erased if
>        * next->vm_start was reduced.
>        */
> -     validate_mm_rb(root, ignore);
> +     validate_mm_rb(&mm->mm_rb, ignore);
>  
> -     __vma_rb_erase(vma, root);
> +     __vma_rb_erase(vma, mm);
>  }
>  
>  static __always_inline void vma_rb_erase(struct vm_area_struct *vma,
> -                                      struct rb_root *root)
> +                                      struct mm_struct *mm)
>  {
>       /*
>        * All rb_subtree_gap values must be consistent prior to erase,
>        * with the possible exception of the vma being erased.
>        */
> -     validate_mm_rb(root, vma);
> +     validate_mm_rb(&mm->mm_rb, vma);
>  
> -     __vma_rb_erase(vma, root);
> +     __vma_rb_erase(vma, mm);
>  }
>  
>  /*
> @@ -601,10 +625,12 @@ void __vma_link_rb(struct mm_struct *mm, struct 
> vm_area_struct *vma,
>        * immediately update the gap to the correct value. Finally we
>        * rebalance the rbtree after all augmented values have been set.
>        */
> +     mm_write_seqlock(mm);
>       rb_link_node(&vma->vm_rb, rb_parent, rb_link);
>       vma->rb_subtree_gap = 0;
>       vma_gap_update(vma);
> -     vma_rb_insert(vma, &mm->mm_rb);
> +     vma_rb_insert(vma, mm);
> +     mm_write_sequnlock(mm);
>  }
>  
>  static void __vma_link_file(struct vm_area_struct *vma)
> @@ -680,7 +706,7 @@ static __always_inline void __vma_unlink_common(struct 
> mm_struct *mm,
>  {
>       struct vm_area_struct *next;
>  
> -     vma_rb_erase_ignore(vma, &mm->mm_rb, ignore);
> +     vma_rb_erase_ignore(vma, mm, ignore);
>       next = vma->vm_next;
>       if (has_prev)
>               prev->vm_next = next;
> @@ -2674,7 +2700,7 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct 
> vm_area_struct *vma,
>       insertion_point = (prev ? &prev->vm_next : &mm->mmap);
>       vma->vm_prev = NULL;
>       do {
> -             vma_rb_erase(vma, &mm->mm_rb);
> +             vma_rb_erase(vma, mm);
>               mm->map_count--;
>               tail_vma = vma;
>               vma = vma->vm_next;
> -- 
> 2.21.0
> 

Reply via email to