On Tue, Sep 24, 2019 at 05:52:08PM -0700, Matthew Wilcox wrote:
> From: "Matthew Wilcox (Oracle)" <wi...@infradead.org>
> 
> We return -EEXIST if there are any non-shadow entries in the page
> cache in the range covered by the large page.  If there are multiple
> shadow entries in the range, we set *shadowp to one of them (currently
> the one at the highest index).  If that turns out to be the wrong
> answer, we can implement something more complex.  This is mostly
> modelled after the equivalent function in the shmem code.
> 
> Signed-off-by: Matthew Wilcox (Oracle) <wi...@infradead.org>
> ---
>  mm/filemap.c | 37 ++++++++++++++++++++++++++-----------
>  1 file changed, 26 insertions(+), 11 deletions(-)
> 
> diff --git a/mm/filemap.c b/mm/filemap.c
> index bab97addbb1d..afe8f5d95810 100644
> --- a/mm/filemap.c
> +++ b/mm/filemap.c
> @@ -855,6 +855,7 @@ static int __add_to_page_cache_locked(struct page *page,
>       int huge = PageHuge(page);
>       struct mem_cgroup *memcg;
>       int error;
> +     unsigned int nr = 1;
>       void *old;
>  
>       VM_BUG_ON_PAGE(!PageLocked(page), page);
> @@ -866,31 +867,45 @@ static int __add_to_page_cache_locked(struct page *page,
>                                             gfp_mask, &memcg, false);
>               if (error)
>                       return error;
> +             xas_set_order(&xas, offset, compound_order(page));
> +             nr = compound_nr(page);
>       }
>  
> -     get_page(page);
> +     page_ref_add(page, nr);
>       page->mapping = mapping;
>       page->index = offset;
>  
>       do {
> +             unsigned long exceptional = 0;
> +             unsigned int i = 0;
> +
>               xas_lock_irq(&xas);
> -             old = xas_load(&xas);
> -             if (old && !xa_is_value(old))
> +             xas_for_each_conflict(&xas, old) {
> +                     if (!xa_is_value(old))
> +                             break;
> +                     exceptional++;
> +                     if (shadowp)
> +                             *shadowp = old;
> +             }
> +             if (old)
>                       xas_set_err(&xas, -EEXIST);

This made me confused.

Do we rely on 'old' to be NULL if the loop has completed without 'break'?
It's not very obvious.

Can we have a comment or call xas_set_err() within the loop next to the
'break'?

> -             xas_store(&xas, page);
> +             xas_create_range(&xas);
>               if (xas_error(&xas))
>                       goto unlock;
>  
> -             if (xa_is_value(old)) {
> -                     mapping->nrexceptional--;
> -                     if (shadowp)
> -                             *shadowp = old;
> +next:
> +             xas_store(&xas, page);
> +             if (++i < nr) {
> +                     xas_next(&xas);
> +                     goto next;
>               }
> -             mapping->nrpages++;
> +             mapping->nrexceptional -= exceptional;
> +             mapping->nrpages += nr;
>  
>               /* hugetlb pages do not participate in page cache accounting */
>               if (!huge)
> -                     __inc_node_page_state(page, NR_FILE_PAGES);
> +                     __mod_node_page_state(page_pgdat(page), NR_FILE_PAGES,
> +                                             nr);

We also need to bump NR_FILE_THPS here.

>  unlock:
>               xas_unlock_irq(&xas);
>       } while (xas_nomem(&xas, gfp_mask & GFP_RECLAIM_MASK));
> @@ -907,7 +922,7 @@ static int __add_to_page_cache_locked(struct page *page,
>       /* Leave page->index set: truncation relies upon it */
>       if (!huge)
>               mem_cgroup_cancel_charge(page, memcg, false);
> -     put_page(page);
> +     page_ref_sub(page, nr);
>       return xas_error(&xas);
>  }
>  ALLOW_ERROR_INJECTION(__add_to_page_cache_locked, ERRNO);
> -- 
> 2.23.0
> 
> 

-- 
 Kirill A. Shutemov

Reply via email to