On Mon, Mar 23, 2026 at 11:04 AM Lorenzo Stoakes (Oracle)
<[email protected]> wrote:
>
> On Sat, Mar 21, 2026 at 10:43:08PM -0700, Suren Baghdasaryan wrote:
> > Replace vma_start_write() with vma_start_write_killable() when
> > process_vma_walk_lock() is used with PGWALK_WRLOCK option.
> > Adjust its direct and indirect users to check for a possible error
> > and handle it. Ensure users handle EINTR correctly and do not ignore
> > it.
> >
> > Signed-off-by: Suren Baghdasaryan <[email protected]>
> > ---
> >  fs/proc/task_mmu.c |  5 ++++-
> >  mm/mempolicy.c     |  1 +
> >  mm/pagewalk.c      | 20 ++++++++++++++------
> >  3 files changed, 19 insertions(+), 7 deletions(-)
> >
> > diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
> > index e091931d7ca1..2fe3d11aad03 100644
> > --- a/fs/proc/task_mmu.c
> > +++ b/fs/proc/task_mmu.c
> > @@ -1797,6 +1797,7 @@ static ssize_t clear_refs_write(struct file *file, 
> > const char __user *buf,
> >               struct clear_refs_private cp = {
> >                       .type = type,
> >               };
> > +             int err;
>
> Maybe better to make it a ssize_t given return type of function?

Ack.

>
> >
> >               if (mmap_write_lock_killable(mm)) {
> >                       count = -EINTR;
> > @@ -1824,7 +1825,9 @@ static ssize_t clear_refs_write(struct file *file, 
> > const char __user *buf,
> >                                               0, mm, 0, -1UL);
> >                       mmu_notifier_invalidate_range_start(&range);
> >               }
> > -             walk_page_range(mm, 0, -1, &clear_refs_walk_ops, &cp);
> > +             err = walk_page_range(mm, 0, -1, &clear_refs_walk_ops, &cp);
> > +             if (err)
> > +                     count = err;
>
> Hmm this is gross, but it's an established pattern here, ugh.
>
> Now we have an err though, could we update:
>
>                 if (mmap_write_lock_killable(mm)) {
> -                       count = -EINTR;
> +                       err = -EINTR;
>                         goto out_mm;
>                 }
>
> Then we can just do:
>
> +               err = walk_page_range(mm, 0, -1, &clear_refs_walk_ops, &cp);
>
> And at the end do:
>
>         return err ?: count;
>
> Which possibly _necessitates_ err being a ssize_t.

Sounds doable. Let me try that.

>
> >               if (type == CLEAR_REFS_SOFT_DIRTY) {
> >                       mmu_notifier_invalidate_range_end(&range);
> >                       flush_tlb_mm(mm);
> > diff --git a/mm/mempolicy.c b/mm/mempolicy.c
> > index 929e843543cf..bb5b0e83ce0f 100644
> > --- a/mm/mempolicy.c
> > +++ b/mm/mempolicy.c
> > @@ -969,6 +969,7 @@ static const struct mm_walk_ops 
> > queue_pages_lock_vma_walk_ops = {
> >   *      (a hugetlbfs page or a transparent huge page being counted as 1).
> >   * -EIO - a misplaced page found, when MPOL_MF_STRICT specified without 
> > MOVEs.
> >   * -EFAULT - a hole in the memory range, when MPOL_MF_DISCONTIG_OK 
> > unspecified.
> > + * -EINTR - walk got terminated due to pending fatal signal.
>
> Thanks!
>
> >   */
> >  static long
> >  queue_pages_range(struct mm_struct *mm, unsigned long start, unsigned long 
> > end,
> > diff --git a/mm/pagewalk.c b/mm/pagewalk.c
> > index eda74273c8ec..a42cd6a6d812 100644
> > --- a/mm/pagewalk.c
> > +++ b/mm/pagewalk.c
> > @@ -438,14 +438,13 @@ static inline void process_mm_walk_lock(struct 
> > mm_struct *mm,
> >               mmap_assert_write_locked(mm);
> >  }
> >
> > -static inline void process_vma_walk_lock(struct vm_area_struct *vma,
> > +static inline int process_vma_walk_lock(struct vm_area_struct *vma,
>
> NIT: Don't need this to be an inline any longer. May as well fix up while 
> we're
> here.

Ack.

>
> >                                        enum page_walk_lock walk_lock)
> >  {
> >  #ifdef CONFIG_PER_VMA_LOCK
> >       switch (walk_lock) {
> >       case PGWALK_WRLOCK:
> > -             vma_start_write(vma);
> > -             break;
> > +             return vma_start_write_killable(vma);
>
> LGTM
>
> >       case PGWALK_WRLOCK_VERIFY:
> >               vma_assert_write_locked(vma);
> >               break;
> > @@ -457,6 +456,7 @@ static inline void process_vma_walk_lock(struct 
> > vm_area_struct *vma,
> >               break;
> >       }
> >  #endif
> > +     return 0;
> >  }
> >
> >  /*
> > @@ -500,7 +500,9 @@ int walk_page_range_mm_unsafe(struct mm_struct *mm, 
> > unsigned long start,
> >                       if (ops->pte_hole)
> >                               err = ops->pte_hole(start, next, -1, &walk);
> >               } else { /* inside vma */
> > -                     process_vma_walk_lock(vma, ops->walk_lock);
> > +                     err = process_vma_walk_lock(vma, ops->walk_lock);
> > +                     if (err)
> > +                             break;
>
> In every other case we set walk.vma = vma or NULL. Is it a problem not setting
> it at all in this code path?

IIUC the other cases set walk.vma because they later call
ops->pte_hole(..., walk). In our case we immediately break out of the
loop and exit the function, which pushes "walk" variable out of scope.
So, walk.vma won't be used and setting it would achieve nothing.

>
> >                       walk.vma = vma;
> >                       next = min(end, vma->vm_end);
> >                       vma = find_vma(mm, vma->vm_end);
> > @@ -717,6 +719,7 @@ int walk_page_range_vma_unsafe(struct vm_area_struct 
> > *vma, unsigned long start,
> >               .vma            = vma,
> >               .private        = private,
> >       };
> > +     int err;
> >
> >       if (start >= end || !walk.mm)
> >               return -EINVAL;
> > @@ -724,7 +727,9 @@ int walk_page_range_vma_unsafe(struct vm_area_struct 
> > *vma, unsigned long start,
> >               return -EINVAL;
> >
> >       process_mm_walk_lock(walk.mm, ops->walk_lock);
> > -     process_vma_walk_lock(vma, ops->walk_lock);
> > +     err = process_vma_walk_lock(vma, ops->walk_lock);
> > +     if (err)
> > +             return err;
>
> LGTM
>
> >       return __walk_page_range(start, end, &walk);
> >  }
> >
> > @@ -747,6 +752,7 @@ int walk_page_vma(struct vm_area_struct *vma, const 
> > struct mm_walk_ops *ops,
> >               .vma            = vma,
> >               .private        = private,
> >       };
> > +     int err;
> >
> >       if (!walk.mm)
> >               return -EINVAL;
> > @@ -754,7 +760,9 @@ int walk_page_vma(struct vm_area_struct *vma, const 
> > struct mm_walk_ops *ops,
> >               return -EINVAL;
> >
> >       process_mm_walk_lock(walk.mm, ops->walk_lock);
> > -     process_vma_walk_lock(vma, ops->walk_lock);
> > +     err = process_vma_walk_lock(vma, ops->walk_lock);
> > +     if (err)
> > +             return err;
>
> LGTM
>
> >       return __walk_page_range(vma->vm_start, vma->vm_end, &walk);
> >  }
> >
> > --
> > 2.53.0.1018.g2bb0e51243-goog
> >
>
> Thanks, Lorenzo

Reply via email to