Hi Huang,

On Wed, Nov 01, 2017 at 01:41:00PM +0800, Huang, Ying wrote:
> Hi, Minchan,
> 
> Minchan Kim <minc...@kernel.org> writes:
> 
> > When I see recent change of swap readahead, I am very unhappy
> > about current code structure which diverges two swap readahead
> > algorithm in do_swap_page. This patch is to clean it up.
> >
> > Main motivation is that fault handler doesn't need to be aware of
> > readahead algorithms but just should call swapin_readahead.
> >
> > As first step, this patch cleans up a little bit but not perfect
> > (I just separate for review easier) so next patch will make the goal
> > complete.
> >
> > Signed-off-by: Minchan Kim <minc...@kernel.org>
> > ---
> >  include/linux/swap.h | 17 ++--------
> >  mm/memory.c          | 17 +++-------
> >  mm/swap_state.c      | 89 
> > ++++++++++++++++++++++++++++------------------------
> >  3 files changed, 55 insertions(+), 68 deletions(-)
> >
> > diff --git a/include/linux/swap.h b/include/linux/swap.h
> > index 84255b3da7c1..7c7c8b344bc9 100644
> > --- a/include/linux/swap.h
> > +++ b/include/linux/swap.h
> > @@ -427,12 +427,8 @@ extern struct page 
> > *__read_swap_cache_async(swp_entry_t, gfp_t,
> >                     bool *new_page_allocated);
> >  extern struct page *swapin_readahead(swp_entry_t, gfp_t,
> >                     struct vm_area_struct *vma, unsigned long addr);
> > -
> > -extern struct page *swap_readahead_detect(struct vm_fault *vmf,
> > -                                     struct vma_swap_readahead *swap_ra);
> >  extern struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t 
> > gfp_mask,
> > -                                      struct vm_fault *vmf,
> > -                                      struct vma_swap_readahead *swap_ra);
> > +                                      struct vm_fault *vmf);
> >  
> >  /* linux/mm/swapfile.c */
> >  extern atomic_long_t nr_swap_pages;
> > @@ -551,15 +547,8 @@ static inline bool swap_use_vma_readahead(void)
> >     return false;
> >  }
> >  
> > -static inline struct page *swap_readahead_detect(
> > -   struct vm_fault *vmf, struct vma_swap_readahead *swap_ra)
> > -{
> > -   return NULL;
> > -}
> > -
> > -static inline struct page *do_swap_page_readahead(
> > -   swp_entry_t fentry, gfp_t gfp_mask,
> > -   struct vm_fault *vmf, struct vma_swap_readahead *swap_ra)
> > +static inline struct page *do_swap_page_readahead(swp_entry_t fentry,
> > +                           gfp_t gfp_mask, struct vm_fault *vmf)
> >  {
> >     return NULL;
> >  }
> > diff --git a/mm/memory.c b/mm/memory.c
> > index 8a0c410037d2..e955298e4290 100644
> > --- a/mm/memory.c
> > +++ b/mm/memory.c
> > @@ -2849,21 +2849,14 @@ int do_swap_page(struct vm_fault *vmf)
> >     struct vm_area_struct *vma = vmf->vma;
> >     struct page *page = NULL, *swapcache = NULL;
> >     struct mem_cgroup *memcg;
> > -   struct vma_swap_readahead swap_ra;
> >     swp_entry_t entry;
> >     pte_t pte;
> >     int locked;
> >     int exclusive = 0;
> >     int ret = 0;
> > -   bool vma_readahead = swap_use_vma_readahead();
> >  
> > -   if (vma_readahead)
> > -           page = swap_readahead_detect(vmf, &swap_ra);
> > -   if (!pte_unmap_same(vma->vm_mm, vmf->pmd, vmf->pte, vmf->orig_pte)) {
> > -           if (page)
> > -                   put_page(page);
> > +   if (!pte_unmap_same(vma->vm_mm, vmf->pmd, vmf->pte, vmf->orig_pte))
> >             goto out;
> > -   }
> 
> The page table holding PTE may be unmapped in pte_unmap_same(), so is it
> safe for us to access page table after this in do_swap_page_readahead()?

That's why I calls pte_offset_map in swap_ra_info before the access.

> 
> Best Regards,
> Huang, Ying
> 
> >     entry = pte_to_swp_entry(vmf->orig_pte);
> >     if (unlikely(non_swap_entry(entry))) {
> > @@ -2889,9 +2882,7 @@ int do_swap_page(struct vm_fault *vmf)
> >  
> >  
> >     delayacct_set_flag(DELAYACCT_PF_SWAPIN);
> > -   if (!page)
> > -           page = lookup_swap_cache(entry, vma_readahead ? vma : NULL,
> > -                                    vmf->address);
> > +   page = lookup_swap_cache(entry, vma, vmf->address);
> >     if (!page) {
> >             struct swap_info_struct *si = swp_swap_info(entry);
> >  
> > @@ -2907,9 +2898,9 @@ int do_swap_page(struct vm_fault *vmf)
> >                             swap_readpage(page, true);
> >                     }
> >             } else {
> > -                   if (vma_readahead)
> > +                   if (swap_use_vma_readahead())
> >                             page = do_swap_page_readahead(entry,
> > -                                   GFP_HIGHUSER_MOVABLE, vmf, &swap_ra);
> > +                                   GFP_HIGHUSER_MOVABLE, vmf);
> >                     else
> >                             page = swapin_readahead(entry,
> >                                    GFP_HIGHUSER_MOVABLE, vma, vmf->address);
> > diff --git a/mm/swap_state.c b/mm/swap_state.c
> > index 6c017ced11e6..e3c535fcd2df 100644
> > --- a/mm/swap_state.c
> > +++ b/mm/swap_state.c
> > @@ -331,32 +331,38 @@ struct page *lookup_swap_cache(swp_entry_t entry, 
> > struct vm_area_struct *vma,
> >                            unsigned long addr)
> >  {
> >     struct page *page;
> > -   unsigned long ra_info;
> > -   int win, hits, readahead;
> >  
> >     page = find_get_page(swap_address_space(entry), swp_offset(entry));
> >  
> >     INC_CACHE_INFO(find_total);
> >     if (page) {
> > +           bool vma_ra = swap_use_vma_readahead();
> > +           bool readahead = TestClearPageReadahead(page);
> > +
> >             INC_CACHE_INFO(find_success);
> >             if (unlikely(PageTransCompound(page)))
> >                     return page;
> > -           readahead = TestClearPageReadahead(page);
> > -           if (vma) {
> > -                   ra_info = GET_SWAP_RA_VAL(vma);
> > -                   win = SWAP_RA_WIN(ra_info);
> > -                   hits = SWAP_RA_HITS(ra_info);
> > +
> > +           if (vma && vma_ra) {
> > +                   unsigned long ra_val;
> > +                   int win, hits;
> > +
> > +                   ra_val = GET_SWAP_RA_VAL(vma);
> > +                   win = SWAP_RA_WIN(ra_val);
> > +                   hits = SWAP_RA_HITS(ra_val);
> >                     if (readahead)
> >                             hits = min_t(int, hits + 1, SWAP_RA_HITS_MAX);
> >                     atomic_long_set(&vma->swap_readahead_info,
> >                                     SWAP_RA_VAL(addr, win, hits));
> >             }
> > +
> >             if (readahead) {
> >                     count_vm_event(SWAP_RA_HIT);
> > -                   if (!vma)
> > +                   if (!vma || !vma_ra)
> >                             atomic_inc(&swapin_readahead_hits);
> >             }
> >     }
> > +
> >     return page;
> >  }
> >  
> > @@ -645,16 +651,15 @@ static inline void swap_ra_clamp_pfn(struct 
> > vm_area_struct *vma,
> >                 PFN_DOWN((faddr & PMD_MASK) + PMD_SIZE));
> >  }
> >  
> > -struct page *swap_readahead_detect(struct vm_fault *vmf,
> > -                              struct vma_swap_readahead *swap_ra)
> > +static void swap_ra_info(struct vm_fault *vmf,
> > +                   struct vma_swap_readahead *ra_info)
> >  {
> >     struct vm_area_struct *vma = vmf->vma;
> > -   unsigned long swap_ra_info;
> > -   struct page *page;
> > +   unsigned long ra_val;
> >     swp_entry_t entry;
> >     unsigned long faddr, pfn, fpfn;
> >     unsigned long start, end;
> > -   pte_t *pte;
> > +   pte_t *pte, *orig_pte;
> >     unsigned int max_win, hits, prev_win, win, left;
> >  #ifndef CONFIG_64BIT
> >     pte_t *tpte;
> > @@ -663,30 +668,32 @@ struct page *swap_readahead_detect(struct vm_fault 
> > *vmf,
> >     max_win = 1 << min_t(unsigned int, READ_ONCE(page_cluster),
> >                          SWAP_RA_ORDER_CEILING);
> >     if (max_win == 1) {
> > -           swap_ra->win = 1;
> > -           return NULL;
> > +           ra_info->win = 1;
> > +           return;
> >     }
> >  
> >     faddr = vmf->address;
> > -   entry = pte_to_swp_entry(vmf->orig_pte);
> > -   if ((unlikely(non_swap_entry(entry))))
> > -           return NULL;
> > -   page = lookup_swap_cache(entry, vma, faddr);
> > -   if (page)
> > -           return page;
> > +   orig_pte = pte = pte_offset_map(vmf->pmd, faddr);
> > +   entry = pte_to_swp_entry(*pte);
> > +   if ((unlikely(non_swap_entry(entry)))) {
> > +           pte_unmap(orig_pte);
> > +           return;
> > +   }
> >  
> >     fpfn = PFN_DOWN(faddr);
> > -   swap_ra_info = GET_SWAP_RA_VAL(vma);
> > -   pfn = PFN_DOWN(SWAP_RA_ADDR(swap_ra_info));
> > -   prev_win = SWAP_RA_WIN(swap_ra_info);
> > -   hits = SWAP_RA_HITS(swap_ra_info);
> > -   swap_ra->win = win = __swapin_nr_pages(pfn, fpfn, hits,
> > +   ra_val = GET_SWAP_RA_VAL(vma);
> > +   pfn = PFN_DOWN(SWAP_RA_ADDR(ra_val));
> > +   prev_win = SWAP_RA_WIN(ra_val);
> > +   hits = SWAP_RA_HITS(ra_val);
> > +   ra_info->win = win = __swapin_nr_pages(pfn, fpfn, hits,
> >                                            max_win, prev_win);
> >     atomic_long_set(&vma->swap_readahead_info,
> >                     SWAP_RA_VAL(faddr, win, 0));
> >  
> > -   if (win == 1)
> > -           return NULL;
> > +   if (win == 1) {
> > +           pte_unmap(orig_pte);
> > +           return;
> > +   }
> >  
> >     /* Copy the PTEs because the page table may be unmapped */
> >     if (fpfn == pfn + 1)
> > @@ -699,23 +706,21 @@ struct page *swap_readahead_detect(struct vm_fault 
> > *vmf,
> >             swap_ra_clamp_pfn(vma, faddr, fpfn - left, fpfn + win - left,
> >                               &start, &end);
> >     }
> > -   swap_ra->nr_pte = end - start;
> > -   swap_ra->offset = fpfn - start;
> > -   pte = vmf->pte - swap_ra->offset;
> > +   ra_info->nr_pte = end - start;
> > +   ra_info->offset = fpfn - start;
> > +   pte -= ra_info->offset;
> >  #ifdef CONFIG_64BIT
> > -   swap_ra->ptes = pte;
> > +   ra_info->ptes = pte;
> >  #else
> > -   tpte = swap_ra->ptes;
> > +   tpte = ra_info->ptes;
> >     for (pfn = start; pfn != end; pfn++)
> >             *tpte++ = *pte++;
> >  #endif
> > -
> > -   return NULL;
> > +   pte_unmap(orig_pte);
> >  }
> >  
> >  struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask,
> > -                               struct vm_fault *vmf,
> > -                               struct vma_swap_readahead *swap_ra)
> > +                               struct vm_fault *vmf)
> >  {
> >     struct blk_plug plug;
> >     struct vm_area_struct *vma = vmf->vma;
> > @@ -724,12 +729,14 @@ struct page *do_swap_page_readahead(swp_entry_t 
> > fentry, gfp_t gfp_mask,
> >     swp_entry_t entry;
> >     unsigned int i;
> >     bool page_allocated;
> > +   struct vma_swap_readahead ra_info = {0,};
> >  
> > -   if (swap_ra->win == 1)
> > +   swap_ra_info(vmf, &ra_info);
> > +   if (ra_info.win == 1)
> >             goto skip;
> >  
> >     blk_start_plug(&plug);
> > -   for (i = 0, pte = swap_ra->ptes; i < swap_ra->nr_pte;
> > +   for (i = 0, pte = ra_info.ptes; i < ra_info.nr_pte;
> >          i++, pte++) {
> >             pentry = *pte;
> >             if (pte_none(pentry))
> > @@ -745,7 +752,7 @@ struct page *do_swap_page_readahead(swp_entry_t fentry, 
> > gfp_t gfp_mask,
> >                     continue;
> >             if (page_allocated) {
> >                     swap_readpage(page, false);
> > -                   if (i != swap_ra->offset &&
> > +                   if (i != ra_info.offset &&
> >                         likely(!PageTransCompound(page))) {
> >                             SetPageReadahead(page);
> >                             count_vm_event(SWAP_RA);
> > @@ -757,7 +764,7 @@ struct page *do_swap_page_readahead(swp_entry_t fentry, 
> > gfp_t gfp_mask,
> >     lru_add_drain();
> >  skip:
> >     return read_swap_cache_async(fentry, gfp_mask, vma, vmf->address,
> > -                                swap_ra->win == 1);
> > +                                ra_info.win == 1);
> >  }
> >  
> >  #ifdef CONFIG_SYSFS
> 
> --
> To unsubscribe, send a message with 'unsubscribe linux-mm' in
> the body to majord...@kvack.org.  For more info on Linux MM,
> see: http://www.linux-mm.org/ .
> Don't email: <a href=mailto:"d...@kvack.org";> em...@kvack.org </a>

Reply via email to