Re: [PATCHv3 04/24] rmap: add argument to charge compound page

Kirill A. Shutemov Wed, 04 Mar 2015 03:54:09 -0800

On Mon, Feb 23, 2015 at 05:21:31PM +0100, Vlastimil Babka wrote:
> On 02/12/2015 05:18 PM, Kirill A. Shutemov wrote:
> > We're going to allow mapping of individual 4k pages of THP compound
> > page. It means we cannot rely on PageTransHuge() check to decide if map
> > small page or THP.
> > 
> > The patch adds new argument to rmap function to indicate whethe we want
> > to map whole compound page or only the small page.
> > 
> > Signed-off-by: Kirill A. Shutemov <[email protected]>
> > ---
> >  include/linux/rmap.h    | 14 +++++++++++---
> >  kernel/events/uprobes.c |  4 ++--
> >  mm/huge_memory.c        | 16 ++++++++--------
> >  mm/hugetlb.c            |  4 ++--
> >  mm/ksm.c                |  4 ++--
> >  mm/memory.c             | 14 +++++++-------
> >  mm/migrate.c            |  8 ++++----
> >  mm/rmap.c               | 43 +++++++++++++++++++++++++++----------------
> >  mm/swapfile.c           |  4 ++--
> >  9 files changed, 65 insertions(+), 46 deletions(-)
> > 
> > diff --git a/include/linux/rmap.h b/include/linux/rmap.h
> > index c4088feac1fc..3bf73620b672 100644
> > --- a/include/linux/rmap.h
> > +++ b/include/linux/rmap.h
> > @@ -168,16 +168,24 @@ static inline void anon_vma_merge(struct 
> > vm_area_struct *vma,
> >  
> >  struct anon_vma *page_get_anon_vma(struct page *page);
> >  
> > +/* flags for do_page_add_anon_rmap() */
> > +enum {
> > +   RMAP_EXCLUSIVE = 1,
> > +   RMAP_COMPOUND = 2,
> > +};
> > +
> >  /*
> >   * rmap interfaces called when adding or removing pte of page
> >   */
> >  void page_move_anon_rmap(struct page *, struct vm_area_struct *, unsigned 
> > long);
> > -void page_add_anon_rmap(struct page *, struct vm_area_struct *, unsigned 
> > long);
> > +void page_add_anon_rmap(struct page *, struct vm_area_struct *,
> > +           unsigned long, bool);
> >  void do_page_add_anon_rmap(struct page *, struct vm_area_struct *,
> >                        unsigned long, int);
> > -void page_add_new_anon_rmap(struct page *, struct vm_area_struct *, 
> > unsigned long);
> > +void page_add_new_anon_rmap(struct page *, struct vm_area_struct *,
> > +           unsigned long, bool);
> >  void page_add_file_rmap(struct page *);
> > -void page_remove_rmap(struct page *);
> > +void page_remove_rmap(struct page *, bool);
> >  
> >  void hugepage_add_anon_rmap(struct page *, struct vm_area_struct *,
> >                         unsigned long);
> > diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
> > index cb346f26a22d..5523daf59953 100644
> > --- a/kernel/events/uprobes.c
> > +++ b/kernel/events/uprobes.c
> > @@ -183,7 +183,7 @@ static int __replace_page(struct vm_area_struct *vma, 
> > unsigned long addr,
> >             goto unlock;
> >  
> >     get_page(kpage);
> > -   page_add_new_anon_rmap(kpage, vma, addr);
> > +   page_add_new_anon_rmap(kpage, vma, addr, false);
> >     mem_cgroup_commit_charge(kpage, memcg, false);
> >     lru_cache_add_active_or_unevictable(kpage, vma);
> >  
> > @@ -196,7 +196,7 @@ static int __replace_page(struct vm_area_struct *vma, 
> > unsigned long addr,
> >     ptep_clear_flush_notify(vma, addr, ptep);
> >     set_pte_at_notify(mm, addr, ptep, mk_pte(kpage, vma->vm_page_prot));
> >  
> > -   page_remove_rmap(page);
> > +   page_remove_rmap(page, false);
> >     if (!page_mapped(page))
> >             try_to_free_swap(page);
> >     pte_unmap_unlock(ptep, ptl);
> > diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> > index 5f4c97e1a6da..36637a80669e 100644
> > --- a/mm/huge_memory.c
> > +++ b/mm/huge_memory.c
> > @@ -743,7 +743,7 @@ static int __do_huge_pmd_anonymous_page(struct 
> > mm_struct *mm,
> >             pmd_t entry;
> >             entry = mk_huge_pmd(page, vma->vm_page_prot);
> >             entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
> > -           page_add_new_anon_rmap(page, vma, haddr);
> > +           page_add_new_anon_rmap(page, vma, haddr, true);
> >             mem_cgroup_commit_charge(page, memcg, false);
> >             lru_cache_add_active_or_unevictable(page, vma);
> >             pgtable_trans_huge_deposit(mm, pmd, pgtable);
> > @@ -1034,7 +1034,7 @@ static int do_huge_pmd_wp_page_fallback(struct 
> > mm_struct *mm,
> >             entry = maybe_mkwrite(pte_mkdirty(entry), vma);
> >             memcg = (void *)page_private(pages[i]);
> >             set_page_private(pages[i], 0);
> > -           page_add_new_anon_rmap(pages[i], vma, haddr);
> > +           page_add_new_anon_rmap(pages[i], vma, haddr, false);
> >             mem_cgroup_commit_charge(pages[i], memcg, false);
> >             lru_cache_add_active_or_unevictable(pages[i], vma);
> >             pte = pte_offset_map(&_pmd, haddr);
> > @@ -1046,7 +1046,7 @@ static int do_huge_pmd_wp_page_fallback(struct 
> > mm_struct *mm,
> >  
> >     smp_wmb(); /* make pte visible before pmd */
> >     pmd_populate(mm, pmd, pgtable);
> > -   page_remove_rmap(page);
> > +   page_remove_rmap(page, true);
> >     spin_unlock(ptl);
> >  
> >     mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
> > @@ -1168,7 +1168,7 @@ alloc:
> >             entry = mk_huge_pmd(new_page, vma->vm_page_prot);
> >             entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
> >             pmdp_clear_flush_notify(vma, haddr, pmd);
> > -           page_add_new_anon_rmap(new_page, vma, haddr);
> > +           page_add_new_anon_rmap(new_page, vma, haddr, true);
> >             mem_cgroup_commit_charge(new_page, memcg, false);
> >             lru_cache_add_active_or_unevictable(new_page, vma);
> >             set_pmd_at(mm, haddr, pmd, entry);
> > @@ -1178,7 +1178,7 @@ alloc:
> >                     put_huge_zero_page();
> >             } else {
> >                     VM_BUG_ON_PAGE(!PageHead(page), page);
> > -                   page_remove_rmap(page);
> > +                   page_remove_rmap(page, true);
> >                     put_page(page);
> >             }
> >             ret |= VM_FAULT_WRITE;
> > @@ -1431,7 +1431,7 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct 
> > vm_area_struct *vma,
> >                     put_huge_zero_page();
> >             } else {
> >                     page = pmd_page(orig_pmd);
> > -                   page_remove_rmap(page);
> > +                   page_remove_rmap(page, true);
> >                     VM_BUG_ON_PAGE(page_mapcount(page) < 0, page);
> >                     add_mm_counter(tlb->mm, MM_ANONPAGES, -HPAGE_PMD_NR);
> >                     VM_BUG_ON_PAGE(!PageHead(page), page);
> > @@ -2368,7 +2368,7 @@ static void __collapse_huge_page_copy(pte_t *pte, 
> > struct page *page,
> >                      * superfluous.
> >                      */
> >                     pte_clear(vma->vm_mm, address, _pte);
> > -                   page_remove_rmap(src_page);
> > +                   page_remove_rmap(src_page, false);
> >                     spin_unlock(ptl);
> >                     free_page_and_swap_cache(src_page);
> >             }
> > @@ -2658,7 +2658,7 @@ static void collapse_huge_page(struct mm_struct *mm,
> >  
> >     spin_lock(pmd_ptl);
> >     BUG_ON(!pmd_none(*pmd));
> > -   page_add_new_anon_rmap(new_page, vma, address);
> > +   page_add_new_anon_rmap(new_page, vma, address, true);
> >     mem_cgroup_commit_charge(new_page, memcg, false);
> >     lru_cache_add_active_or_unevictable(new_page, vma);
> >     pgtable_trans_huge_deposit(mm, pmd, pgtable);
> > diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> > index 0a9ac6c26832..ebb7329301c4 100644
> > --- a/mm/hugetlb.c
> > +++ b/mm/hugetlb.c
> > @@ -2688,7 +2688,7 @@ again:
> >             if (huge_pte_dirty(pte))
> >                     set_page_dirty(page);
> >  
> > -           page_remove_rmap(page);
> > +           page_remove_rmap(page, true);
> >             force_flush = !__tlb_remove_page(tlb, page);
> >             if (force_flush) {
> >                     address += sz;
> > @@ -2908,7 +2908,7 @@ retry_avoidcopy:
> >             mmu_notifier_invalidate_range(mm, mmun_start, mmun_end);
> >             set_huge_pte_at(mm, address, ptep,
> >                             make_huge_pte(vma, new_page, 1));
> > -           page_remove_rmap(old_page);
> > +           page_remove_rmap(old_page, true);
> >             hugepage_add_new_anon_rmap(new_page, vma, address);
> >             /* Make the old page be freed below */
> >             new_page = old_page;
> > diff --git a/mm/ksm.c b/mm/ksm.c
> > index 4162dce2eb44..92182eeba87d 100644
> > --- a/mm/ksm.c
> > +++ b/mm/ksm.c
> > @@ -957,13 +957,13 @@ static int replace_page(struct vm_area_struct *vma, 
> > struct page *page,
> >     }
> >  
> >     get_page(kpage);
> > -   page_add_anon_rmap(kpage, vma, addr);
> > +   page_add_anon_rmap(kpage, vma, addr, false);
> >  
> >     flush_cache_page(vma, addr, pte_pfn(*ptep));
> >     ptep_clear_flush_notify(vma, addr, ptep);
> >     set_pte_at_notify(mm, addr, ptep, mk_pte(kpage, vma->vm_page_prot));
> >  
> > -   page_remove_rmap(page);
> > +   page_remove_rmap(page, false);
> >     if (!page_mapped(page))
> >             try_to_free_swap(page);
> >     put_page(page);
> > diff --git a/mm/memory.c b/mm/memory.c
> > index 8ae52c918415..5529627d2cd6 100644
> > --- a/mm/memory.c
> > +++ b/mm/memory.c
> > @@ -1125,7 +1125,7 @@ again:
> >                                     mark_page_accessed(page);
> >                             rss[MM_FILEPAGES]--;
> >                     }
> > -                   page_remove_rmap(page);
> > +                   page_remove_rmap(page, false);
> >                     if (unlikely(page_mapcount(page) < 0))
> >                             print_bad_pte(vma, addr, ptent, page);
> >                     if (unlikely(!__tlb_remove_page(tlb, page))) {
> > @@ -2189,7 +2189,7 @@ gotten:
> >              * thread doing COW.
> >              */
> >             ptep_clear_flush_notify(vma, address, page_table);
> > -           page_add_new_anon_rmap(new_page, vma, address);
> > +           page_add_new_anon_rmap(new_page, vma, address, false);
> >             mem_cgroup_commit_charge(new_page, memcg, false);
> >             lru_cache_add_active_or_unevictable(new_page, vma);
> >             /*
> > @@ -2222,7 +2222,7 @@ gotten:
> >                      * mapcount is visible. So transitively, TLBs to
> >                      * old page will be flushed before it can be reused.
> >                      */
> > -                   page_remove_rmap(old_page);
> > +                   page_remove_rmap(old_page, false);
> >             }
> >  
> >             /* Free the old page.. */
> > @@ -2465,7 +2465,7 @@ static int do_swap_page(struct mm_struct *mm, struct 
> > vm_area_struct *vma,
> >             pte = maybe_mkwrite(pte_mkdirty(pte), vma);
> >             flags &= ~FAULT_FLAG_WRITE;
> >             ret |= VM_FAULT_WRITE;
> > -           exclusive = 1;
> > +           exclusive = RMAP_EXCLUSIVE;
> >     }
> >     flush_icache_page(vma, page);
> >     if (pte_swp_soft_dirty(orig_pte))
> > @@ -2475,7 +2475,7 @@ static int do_swap_page(struct mm_struct *mm, struct 
> > vm_area_struct *vma,
> >             do_page_add_anon_rmap(page, vma, address, exclusive);
> >             mem_cgroup_commit_charge(page, memcg, true);
> >     } else { /* ksm created a completely new copy */
> > -           page_add_new_anon_rmap(page, vma, address);
> > +           page_add_new_anon_rmap(page, vma, address, false);
> >             mem_cgroup_commit_charge(page, memcg, false);
> >             lru_cache_add_active_or_unevictable(page, vma);
> >     }
> > @@ -2613,7 +2613,7 @@ static int do_anonymous_page(struct mm_struct *mm, 
> > struct vm_area_struct *vma,
> >             goto release;
> >  
> >     inc_mm_counter_fast(mm, MM_ANONPAGES);
> > -   page_add_new_anon_rmap(page, vma, address);
> > +   page_add_new_anon_rmap(page, vma, address, false);
> >     mem_cgroup_commit_charge(page, memcg, false);
> >     lru_cache_add_active_or_unevictable(page, vma);
> >  setpte:
> > @@ -2701,7 +2701,7 @@ void do_set_pte(struct vm_area_struct *vma, unsigned 
> > long address,
> >             entry = maybe_mkwrite(pte_mkdirty(entry), vma);
> >     if (anon) {
> >             inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
> > -           page_add_new_anon_rmap(page, vma, address);
> > +           page_add_new_anon_rmap(page, vma, address, false);
> >     } else {
> >             inc_mm_counter_fast(vma->vm_mm, MM_FILEPAGES);
> >             page_add_file_rmap(page);
> > diff --git a/mm/migrate.c b/mm/migrate.c
> > index 85e042686031..0d2b3110277a 100644
> > --- a/mm/migrate.c
> > +++ b/mm/migrate.c
> > @@ -166,7 +166,7 @@ static int remove_migration_pte(struct page *new, 
> > struct vm_area_struct *vma,
> >             else
> >                     page_dup_rmap(new);
> >     } else if (PageAnon(new))
> > -           page_add_anon_rmap(new, vma, addr);
> > +           page_add_anon_rmap(new, vma, addr, false);
> >     else
> >             page_add_file_rmap(new);
> >  
> > @@ -1803,7 +1803,7 @@ fail_putback:
> >      * guarantee the copy is visible before the pagetable update.
> >      */
> >     flush_cache_range(vma, mmun_start, mmun_end);
> > -   page_add_anon_rmap(new_page, vma, mmun_start);
> > +   page_add_anon_rmap(new_page, vma, mmun_start, true);
> >     pmdp_clear_flush_notify(vma, mmun_start, pmd);
> >     set_pmd_at(mm, mmun_start, pmd, entry);
> >     flush_tlb_range(vma, mmun_start, mmun_end);
> > @@ -1814,13 +1814,13 @@ fail_putback:
> >             flush_tlb_range(vma, mmun_start, mmun_end);
> >             mmu_notifier_invalidate_range(mm, mmun_start, mmun_end);
> >             update_mmu_cache_pmd(vma, address, &entry);
> > -           page_remove_rmap(new_page);
> > +           page_remove_rmap(new_page, true);
> >             goto fail_putback;
> >     }
> >  
> >     mem_cgroup_migrate(page, new_page, false);
> >  
> > -   page_remove_rmap(page);
> > +   page_remove_rmap(page, true);
> >  
> >     spin_unlock(ptl);
> >     mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
> > diff --git a/mm/rmap.c b/mm/rmap.c
> > index 47b3ba87c2dd..f67e83be75e4 100644
> > --- a/mm/rmap.c
> > +++ b/mm/rmap.c
> > @@ -1041,9 +1041,9 @@ static void __page_check_anon_rmap(struct page *page,
> >   * (but PageKsm is never downgraded to PageAnon).
> >   */
> >  void page_add_anon_rmap(struct page *page,
> > -   struct vm_area_struct *vma, unsigned long address)
> > +   struct vm_area_struct *vma, unsigned long address, bool compound)
> >  {
> > -   do_page_add_anon_rmap(page, vma, address, 0);
> > +   do_page_add_anon_rmap(page, vma, address, compound ? RMAP_COMPOUND : 0);
> >  }
> >  
> >  /*
> > @@ -1052,21 +1052,24 @@ void page_add_anon_rmap(struct page *page,
> >   * Everybody else should continue to use page_add_anon_rmap above.
> >   */
> >  void do_page_add_anon_rmap(struct page *page,
> > -   struct vm_area_struct *vma, unsigned long address, int exclusive)
> > +   struct vm_area_struct *vma, unsigned long address, int flags)
> >  {
> >     int first = atomic_inc_and_test(&page->_mapcount);
> >     if (first) {
> > +           bool compound = flags & RMAP_COMPOUND;
> > +           int nr = compound ? hpage_nr_pages(page) : 1;
> 
> hpage_nr_pages(page) is:
> 
> static inline int hpage_nr_pages(struct page *page)
> {
>         if (unlikely(PageTransHuge(page)))
>                 return HPAGE_PMD_NR;
>         return 1;
> }
> 
> and later...
> 
> >             /*
> >              * We use the irq-unsafe __{inc|mod}_zone_page_stat because
> >              * these counters are not modified in interrupt context, and
> >              * pte lock(a spinlock) is held, which implies preemption
> >              * disabled.
> >              */
> > -           if (PageTransHuge(page))
> > +           if (compound) {
> > +                   VM_BUG_ON_PAGE(!PageTransHuge(page), page);
> 
> this means that we could assume that
> (compound == true) => (PageTransHuge(page) == true)
> 
> and simplify above to:
> 
> int nr = compound ? HPAGE_PMD_NR : 1;
> 
> Right?


No. HPAGE_PMD_NR is defined based on HPAGE_PMD_SHIFT which is BUILD_BUG()
without CONFIG_TRANSPARENT_HUGEPAGE. We will get compiler error without
the helper.

-- 
 Kirill A. Shutemov
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCHv3 04/24] rmap: add argument to charge compound page

Reply via email to