Re: [PATCH 1/8] mm: Add optional support for PUD-sized transparent hugepages

2016-01-04 Thread Kirill A. Shutemov
On Sat, Jan 02, 2016 at 12:06:38PM -0500, Matthew Wilcox wrote:
> On Mon, Dec 28, 2015 at 12:05:51PM +0200, Kirill A. Shutemov wrote:
> > On Thu, Dec 24, 2015 at 11:20:30AM -0500, Matthew Wilcox wrote:
> > > diff --git a/include/linux/mm.h b/include/linux/mm.h
> > > index 4bf3811..e14634f 100644
> > > --- a/include/linux/mm.h
> > > +++ b/include/linux/mm.h
> > > @@ -1958,6 +1977,17 @@ static inline spinlock_t *pmd_lock(struct 
> > > mm_struct *mm, pmd_t *pmd)
> > >   return ptl;
> > >  }
> > >  
> > > +/*
> > > + * No scalability reason to split PUD locks yet, but follow the same 
> > > pattern
> > > + * as the PMD locks to make it easier if we have to.
> > > + */
> > 
> > I don't think it makes any good unless you convert all other places where
> > we use page_table_lock to protect pud table (like __pud_alloc()) to the
> > same API.
> > I think this would deserve separate patch.
> 
> Sure, a separate patch to convert existing users of the PTL.  But I
> don't think it does any harm to introduce the PUD version of the PMD API.
> Maybe with a comment indicating that tere is significant work to be done
> in converting existing users to this API?

I think that's fine with the fat comment around pud_lock() definition.
 
> > > diff --git a/mm/memory.c b/mm/memory.c
> > > index 416b129..7328df0 100644
> > > --- a/mm/memory.c
> > > +++ b/mm/memory.c
> > > @@ -1220,9 +1220,27 @@ static inline unsigned long zap_pud_range(struct 
> > > mmu_gather *tlb,
> > >   pud = pud_offset(pgd, addr);
> > >   do {
> > >   next = pud_addr_end(addr, end);
> > > + if (pud_trans_huge(*pud) || pud_devmap(*pud)) {
> > > + if (next - addr != HPAGE_PUD_SIZE) {
> > > +#ifdef CONFIG_DEBUG_VM
> > 
> > IS_ENABLED(CONFIG_DEBUG_VM) ?
> > 
> > > + if (!rwsem_is_locked(>mm->mmap_sem)) {
> > > + pr_err("%s: mmap_sem is unlocked! 
> > > addr=0x%lx end=0x%lx vma->vm_start=0x%lx vma->vm_end=0x%lx\n",
> > > + __func__, addr, end,
> > > + vma->vm_start,
> > > + vma->vm_end);
> > 
> > dump_vma(), I guess.
> 
> These two issues are copy-and-paste from the existing PMD code.  I'm happy
> to update the PMD code to the new-and-improved way of doing things;
> I'm just not keen to have the PMD and PUD code diverge unnecessarily.

Yes, please update PMD too. It looks ugly. VM_BUG_ON_VMA() is probably
right way to deal with this.

-- 
 Kirill A. Shutemov
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 1/8] mm: Add optional support for PUD-sized transparent hugepages

2016-01-04 Thread Kirill A. Shutemov
On Sat, Jan 02, 2016 at 12:06:38PM -0500, Matthew Wilcox wrote:
> On Mon, Dec 28, 2015 at 12:05:51PM +0200, Kirill A. Shutemov wrote:
> > On Thu, Dec 24, 2015 at 11:20:30AM -0500, Matthew Wilcox wrote:
> > > diff --git a/include/linux/mm.h b/include/linux/mm.h
> > > index 4bf3811..e14634f 100644
> > > --- a/include/linux/mm.h
> > > +++ b/include/linux/mm.h
> > > @@ -1958,6 +1977,17 @@ static inline spinlock_t *pmd_lock(struct 
> > > mm_struct *mm, pmd_t *pmd)
> > >   return ptl;
> > >  }
> > >  
> > > +/*
> > > + * No scalability reason to split PUD locks yet, but follow the same 
> > > pattern
> > > + * as the PMD locks to make it easier if we have to.
> > > + */
> > 
> > I don't think it makes any good unless you convert all other places where
> > we use page_table_lock to protect pud table (like __pud_alloc()) to the
> > same API.
> > I think this would deserve separate patch.
> 
> Sure, a separate patch to convert existing users of the PTL.  But I
> don't think it does any harm to introduce the PUD version of the PMD API.
> Maybe with a comment indicating that tere is significant work to be done
> in converting existing users to this API?

I think that's fine with the fat comment around pud_lock() definition.
 
> > > diff --git a/mm/memory.c b/mm/memory.c
> > > index 416b129..7328df0 100644
> > > --- a/mm/memory.c
> > > +++ b/mm/memory.c
> > > @@ -1220,9 +1220,27 @@ static inline unsigned long zap_pud_range(struct 
> > > mmu_gather *tlb,
> > >   pud = pud_offset(pgd, addr);
> > >   do {
> > >   next = pud_addr_end(addr, end);
> > > + if (pud_trans_huge(*pud) || pud_devmap(*pud)) {
> > > + if (next - addr != HPAGE_PUD_SIZE) {
> > > +#ifdef CONFIG_DEBUG_VM
> > 
> > IS_ENABLED(CONFIG_DEBUG_VM) ?
> > 
> > > + if (!rwsem_is_locked(>mm->mmap_sem)) {
> > > + pr_err("%s: mmap_sem is unlocked! 
> > > addr=0x%lx end=0x%lx vma->vm_start=0x%lx vma->vm_end=0x%lx\n",
> > > + __func__, addr, end,
> > > + vma->vm_start,
> > > + vma->vm_end);
> > 
> > dump_vma(), I guess.
> 
> These two issues are copy-and-paste from the existing PMD code.  I'm happy
> to update the PMD code to the new-and-improved way of doing things;
> I'm just not keen to have the PMD and PUD code diverge unnecessarily.

Yes, please update PMD too. It looks ugly. VM_BUG_ON_VMA() is probably
right way to deal with this.

-- 
 Kirill A. Shutemov
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 1/8] mm: Add optional support for PUD-sized transparent hugepages

2016-01-02 Thread Matthew Wilcox
On Mon, Dec 28, 2015 at 12:05:51PM +0200, Kirill A. Shutemov wrote:
> On Thu, Dec 24, 2015 at 11:20:30AM -0500, Matthew Wilcox wrote:
> > diff --git a/include/linux/mm.h b/include/linux/mm.h
> > index 4bf3811..e14634f 100644
> > --- a/include/linux/mm.h
> > +++ b/include/linux/mm.h
> > @@ -1958,6 +1977,17 @@ static inline spinlock_t *pmd_lock(struct mm_struct 
> > *mm, pmd_t *pmd)
> > return ptl;
> >  }
> >  
> > +/*
> > + * No scalability reason to split PUD locks yet, but follow the same 
> > pattern
> > + * as the PMD locks to make it easier if we have to.
> > + */
> 
> I don't think it makes any good unless you convert all other places where
> we use page_table_lock to protect pud table (like __pud_alloc()) to the
> same API.
> I think this would deserve separate patch.

Sure, a separate patch to convert existing users of the PTL.  But I
don't think it does any harm to introduce the PUD version of the PMD API.
Maybe with a comment indicating that tere is significant work to be done
in converting existing users to this API?

> > diff --git a/mm/memory.c b/mm/memory.c
> > index 416b129..7328df0 100644
> > --- a/mm/memory.c
> > +++ b/mm/memory.c
> > @@ -1220,9 +1220,27 @@ static inline unsigned long zap_pud_range(struct 
> > mmu_gather *tlb,
> > pud = pud_offset(pgd, addr);
> > do {
> > next = pud_addr_end(addr, end);
> > +   if (pud_trans_huge(*pud) || pud_devmap(*pud)) {
> > +   if (next - addr != HPAGE_PUD_SIZE) {
> > +#ifdef CONFIG_DEBUG_VM
> 
> IS_ENABLED(CONFIG_DEBUG_VM) ?
> 
> > +   if (!rwsem_is_locked(>mm->mmap_sem)) {
> > +   pr_err("%s: mmap_sem is unlocked! 
> > addr=0x%lx end=0x%lx vma->vm_start=0x%lx vma->vm_end=0x%lx\n",
> > +   __func__, addr, end,
> > +   vma->vm_start,
> > +   vma->vm_end);
> 
> dump_vma(), I guess.

These two issues are copy-and-paste from the existing PMD code.  I'm happy
to update the PMD code to the new-and-improved way of doing things;
I'm just not keen to have the PMD and PUD code diverge unnecessarily.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 1/8] mm: Add optional support for PUD-sized transparent hugepages

2016-01-02 Thread Matthew Wilcox
On Mon, Dec 28, 2015 at 12:05:51PM +0200, Kirill A. Shutemov wrote:
> On Thu, Dec 24, 2015 at 11:20:30AM -0500, Matthew Wilcox wrote:
> > diff --git a/include/linux/mm.h b/include/linux/mm.h
> > index 4bf3811..e14634f 100644
> > --- a/include/linux/mm.h
> > +++ b/include/linux/mm.h
> > @@ -1958,6 +1977,17 @@ static inline spinlock_t *pmd_lock(struct mm_struct 
> > *mm, pmd_t *pmd)
> > return ptl;
> >  }
> >  
> > +/*
> > + * No scalability reason to split PUD locks yet, but follow the same 
> > pattern
> > + * as the PMD locks to make it easier if we have to.
> > + */
> 
> I don't think it makes any good unless you convert all other places where
> we use page_table_lock to protect pud table (like __pud_alloc()) to the
> same API.
> I think this would deserve separate patch.

Sure, a separate patch to convert existing users of the PTL.  But I
don't think it does any harm to introduce the PUD version of the PMD API.
Maybe with a comment indicating that tere is significant work to be done
in converting existing users to this API?

> > diff --git a/mm/memory.c b/mm/memory.c
> > index 416b129..7328df0 100644
> > --- a/mm/memory.c
> > +++ b/mm/memory.c
> > @@ -1220,9 +1220,27 @@ static inline unsigned long zap_pud_range(struct 
> > mmu_gather *tlb,
> > pud = pud_offset(pgd, addr);
> > do {
> > next = pud_addr_end(addr, end);
> > +   if (pud_trans_huge(*pud) || pud_devmap(*pud)) {
> > +   if (next - addr != HPAGE_PUD_SIZE) {
> > +#ifdef CONFIG_DEBUG_VM
> 
> IS_ENABLED(CONFIG_DEBUG_VM) ?
> 
> > +   if (!rwsem_is_locked(>mm->mmap_sem)) {
> > +   pr_err("%s: mmap_sem is unlocked! 
> > addr=0x%lx end=0x%lx vma->vm_start=0x%lx vma->vm_end=0x%lx\n",
> > +   __func__, addr, end,
> > +   vma->vm_start,
> > +   vma->vm_end);
> 
> dump_vma(), I guess.

These two issues are copy-and-paste from the existing PMD code.  I'm happy
to update the PMD code to the new-and-improved way of doing things;
I'm just not keen to have the PMD and PUD code diverge unnecessarily.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 1/8] mm: Add optional support for PUD-sized transparent hugepages

2015-12-28 Thread Kirill A. Shutemov
On Thu, Dec 24, 2015 at 11:20:30AM -0500, Matthew Wilcox wrote:
> The only major difference is how the new ->pud_entry method in mm_walk
> works.  The ->pmd_entry method replaces the ->pte_entry method, whereas
> the ->pud_entry method works along with either ->pmd_entry or ->pte_entry.

I think it makes pagewalk API confusing. We need something more coherent.

-- 
 Kirill A. Shutemov
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 1/8] mm: Add optional support for PUD-sized transparent hugepages

2015-12-28 Thread Kirill A. Shutemov
On Thu, Dec 24, 2015 at 11:20:30AM -0500, Matthew Wilcox wrote:
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 4bf3811..e14634f 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -1958,6 +1977,17 @@ static inline spinlock_t *pmd_lock(struct mm_struct 
> *mm, pmd_t *pmd)
>   return ptl;
>  }
>  
> +/*
> + * No scalability reason to split PUD locks yet, but follow the same pattern
> + * as the PMD locks to make it easier if we have to.
> + */

I don't think it makes any good unless you convert all other places where
we use page_table_lock to protect pud table (like __pud_alloc()) to the
same API.
I think this would deserve separate patch.

> +static inline spinlock_t *pud_lock(struct mm_struct *mm, pud_t *pud)
> +{
> + spinlock_t *ptl = >page_table_lock;
> + spin_lock(ptl);
> + return ptl;
> +}
> +
>  extern void free_area_init(unsigned long * zones_size);
>  extern void free_area_init_node(int nid, unsigned long * zones_size,
>   unsigned long zone_start_pfn, unsigned long *zholes_size);

...

> diff --git a/mm/memory.c b/mm/memory.c
> index 416b129..7328df0 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -1220,9 +1220,27 @@ static inline unsigned long zap_pud_range(struct 
> mmu_gather *tlb,
>   pud = pud_offset(pgd, addr);
>   do {
>   next = pud_addr_end(addr, end);
> + if (pud_trans_huge(*pud) || pud_devmap(*pud)) {
> + if (next - addr != HPAGE_PUD_SIZE) {
> +#ifdef CONFIG_DEBUG_VM

IS_ENABLED(CONFIG_DEBUG_VM) ?

> + if (!rwsem_is_locked(>mm->mmap_sem)) {
> + pr_err("%s: mmap_sem is unlocked! 
> addr=0x%lx end=0x%lx vma->vm_start=0x%lx vma->vm_end=0x%lx\n",
> + __func__, addr, end,
> + vma->vm_start,
> + vma->vm_end);

dump_vma(), I guess.

> + BUG();
> + }
> +#endif
> + split_huge_pud(vma, pud, addr);
> + } else if (zap_huge_pud(tlb, vma, pud, addr))
> + goto next;
> + /* fall through */
> + }
>   if (pud_none_or_clear_bad(pud))
>   continue;
>   next = zap_pmd_range(tlb, vma, pud, addr, next, details);
> +next:
> + cond_resched();
>   } while (pud++, addr = next, addr != end);
>  
>   return addr;
-- 
 Kirill A. Shutemov
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 1/8] mm: Add optional support for PUD-sized transparent hugepages

2015-12-28 Thread Kirill A. Shutemov
On Thu, Dec 24, 2015 at 11:20:30AM -0500, Matthew Wilcox wrote:
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 4bf3811..e14634f 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -1958,6 +1977,17 @@ static inline spinlock_t *pmd_lock(struct mm_struct 
> *mm, pmd_t *pmd)
>   return ptl;
>  }
>  
> +/*
> + * No scalability reason to split PUD locks yet, but follow the same pattern
> + * as the PMD locks to make it easier if we have to.
> + */

I don't think it makes any good unless you convert all other places where
we use page_table_lock to protect pud table (like __pud_alloc()) to the
same API.
I think this would deserve separate patch.

> +static inline spinlock_t *pud_lock(struct mm_struct *mm, pud_t *pud)
> +{
> + spinlock_t *ptl = >page_table_lock;
> + spin_lock(ptl);
> + return ptl;
> +}
> +
>  extern void free_area_init(unsigned long * zones_size);
>  extern void free_area_init_node(int nid, unsigned long * zones_size,
>   unsigned long zone_start_pfn, unsigned long *zholes_size);

...

> diff --git a/mm/memory.c b/mm/memory.c
> index 416b129..7328df0 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -1220,9 +1220,27 @@ static inline unsigned long zap_pud_range(struct 
> mmu_gather *tlb,
>   pud = pud_offset(pgd, addr);
>   do {
>   next = pud_addr_end(addr, end);
> + if (pud_trans_huge(*pud) || pud_devmap(*pud)) {
> + if (next - addr != HPAGE_PUD_SIZE) {
> +#ifdef CONFIG_DEBUG_VM

IS_ENABLED(CONFIG_DEBUG_VM) ?

> + if (!rwsem_is_locked(>mm->mmap_sem)) {
> + pr_err("%s: mmap_sem is unlocked! 
> addr=0x%lx end=0x%lx vma->vm_start=0x%lx vma->vm_end=0x%lx\n",
> + __func__, addr, end,
> + vma->vm_start,
> + vma->vm_end);

dump_vma(), I guess.

> + BUG();
> + }
> +#endif
> + split_huge_pud(vma, pud, addr);
> + } else if (zap_huge_pud(tlb, vma, pud, addr))
> + goto next;
> + /* fall through */
> + }
>   if (pud_none_or_clear_bad(pud))
>   continue;
>   next = zap_pmd_range(tlb, vma, pud, addr, next, details);
> +next:
> + cond_resched();
>   } while (pud++, addr = next, addr != end);
>  
>   return addr;
-- 
 Kirill A. Shutemov
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 1/8] mm: Add optional support for PUD-sized transparent hugepages

2015-12-28 Thread Kirill A. Shutemov
On Thu, Dec 24, 2015 at 11:20:30AM -0500, Matthew Wilcox wrote:
> The only major difference is how the new ->pud_entry method in mm_walk
> works.  The ->pmd_entry method replaces the ->pte_entry method, whereas
> the ->pud_entry method works along with either ->pmd_entry or ->pte_entry.

I think it makes pagewalk API confusing. We need something more coherent.

-- 
 Kirill A. Shutemov
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 1/8] mm: Add optional support for PUD-sized transparent hugepages

2015-12-24 Thread Matthew Wilcox
From: Matthew Wilcox 

The current transparent hugepage code only supports PMDs.  This patch
adds support for transparent use of PUDs with DAX.  It does not include
support for anonymous pages.

Most of this patch simply parallels the work that was done for huge PMDs.
The only major difference is how the new ->pud_entry method in mm_walk
works.  The ->pmd_entry method replaces the ->pte_entry method, whereas
the ->pud_entry method works along with either ->pmd_entry or ->pte_entry.
The pagewalk code takes care of locking the PUD before calling ->pud_walk,
so handlers do not need to worry whether the PUD is stable.

Signed-off-by: Matthew Wilcox 
---
 arch/Kconfig  |   3 +
 include/asm-generic/pgtable.h |  62 +++--
 include/asm-generic/tlb.h |  14 
 include/linux/huge_mm.h   |  52 ++-
 include/linux/mm.h|  30 +
 include/linux/mmu_notifier.h  |  13 
 mm/huge_memory.c  | 151 ++
 mm/memory.c   |  67 +++
 mm/pagewalk.c |  19 +-
 mm/pgtable-generic.c  |  14 
 10 files changed, 419 insertions(+), 6 deletions(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index dc5e0f2..3864ad8 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -462,6 +462,9 @@ config HAVE_IRQ_TIME_ACCOUNTING
 config HAVE_ARCH_TRANSPARENT_HUGEPAGE
bool
 
+config HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
+   bool
+
 config HAVE_ARCH_HUGE_VMAP
bool
 
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index 5459a66..9ea433a 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -36,6 +36,9 @@ extern int ptep_set_access_flags(struct vm_area_struct *vma,
 extern int pmdp_set_access_flags(struct vm_area_struct *vma,
 unsigned long address, pmd_t *pmdp,
 pmd_t entry, int dirty);
+extern int pudp_set_access_flags(struct vm_area_struct *vma,
+unsigned long address, pud_t *pudp,
+pud_t entry, int dirty);
 #else
 static inline int pmdp_set_access_flags(struct vm_area_struct *vma,
unsigned long address, pmd_t *pmdp,
@@ -44,6 +47,13 @@ static inline int pmdp_set_access_flags(struct 
vm_area_struct *vma,
BUILD_BUG();
return 0;
 }
+static inline int pudp_set_access_flags(struct vm_area_struct *vma,
+   unsigned long address, pud_t *pudp,
+   pud_t entry, int dirty)
+{
+   BUILD_BUG();
+   return 0;
+}
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 #endif
 
@@ -121,8 +131,8 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
 }
 #endif
 
-#ifndef __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#ifndef __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR
 static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
unsigned long address,
pmd_t *pmdp)
@@ -131,20 +141,39 @@ static inline pmd_t pmdp_huge_get_and_clear(struct 
mm_struct *mm,
pmd_clear(pmdp);
return pmd;
 }
+#endif /* __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR */
+#ifndef __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR
+static inline pud_t pudp_huge_get_and_clear(struct mm_struct *mm,
+   unsigned long address,
+   pud_t *pudp)
+{
+   pud_t pud = *pudp;
+   pud_clear(pudp);
+   return pud;
+}
+#endif /* __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR */
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
-#endif
 
-#ifndef __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR_FULL
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#ifndef __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR_FULL
 static inline pmd_t pmdp_huge_get_and_clear_full(struct mm_struct *mm,
unsigned long address, pmd_t *pmdp,
int full)
 {
return pmdp_huge_get_and_clear(mm, address, pmdp);
 }
-#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 #endif
 
+#ifndef __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR_FULL
+static inline pud_t pudp_huge_get_and_clear_full(struct mm_struct *mm,
+   unsigned long address, pud_t *pudp,
+   int full)
+{
+   return pudp_huge_get_and_clear(mm, address, pudp);
+}
+#endif
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
 #ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
 static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
unsigned long address, pte_t *ptep,
@@ -181,6 +210,9 @@ extern pte_t ptep_clear_flush(struct vm_area_struct *vma,
 extern pmd_t pmdp_huge_clear_flush(struct vm_area_struct *vma,
  unsigned long address,
   

[PATCH 1/8] mm: Add optional support for PUD-sized transparent hugepages

2015-12-24 Thread Matthew Wilcox
From: Matthew Wilcox 

The current transparent hugepage code only supports PMDs.  This patch
adds support for transparent use of PUDs with DAX.  It does not include
support for anonymous pages.

Most of this patch simply parallels the work that was done for huge PMDs.
The only major difference is how the new ->pud_entry method in mm_walk
works.  The ->pmd_entry method replaces the ->pte_entry method, whereas
the ->pud_entry method works along with either ->pmd_entry or ->pte_entry.
The pagewalk code takes care of locking the PUD before calling ->pud_walk,
so handlers do not need to worry whether the PUD is stable.

Signed-off-by: Matthew Wilcox 
---
 arch/Kconfig  |   3 +
 include/asm-generic/pgtable.h |  62 +++--
 include/asm-generic/tlb.h |  14 
 include/linux/huge_mm.h   |  52 ++-
 include/linux/mm.h|  30 +
 include/linux/mmu_notifier.h  |  13 
 mm/huge_memory.c  | 151 ++
 mm/memory.c   |  67 +++
 mm/pagewalk.c |  19 +-
 mm/pgtable-generic.c  |  14 
 10 files changed, 419 insertions(+), 6 deletions(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index dc5e0f2..3864ad8 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -462,6 +462,9 @@ config HAVE_IRQ_TIME_ACCOUNTING
 config HAVE_ARCH_TRANSPARENT_HUGEPAGE
bool
 
+config HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
+   bool
+
 config HAVE_ARCH_HUGE_VMAP
bool
 
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index 5459a66..9ea433a 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -36,6 +36,9 @@ extern int ptep_set_access_flags(struct vm_area_struct *vma,
 extern int pmdp_set_access_flags(struct vm_area_struct *vma,
 unsigned long address, pmd_t *pmdp,
 pmd_t entry, int dirty);
+extern int pudp_set_access_flags(struct vm_area_struct *vma,
+unsigned long address, pud_t *pudp,
+pud_t entry, int dirty);
 #else
 static inline int pmdp_set_access_flags(struct vm_area_struct *vma,
unsigned long address, pmd_t *pmdp,
@@ -44,6 +47,13 @@ static inline int pmdp_set_access_flags(struct 
vm_area_struct *vma,
BUILD_BUG();
return 0;
 }
+static inline int pudp_set_access_flags(struct vm_area_struct *vma,
+   unsigned long address, pud_t *pudp,
+   pud_t entry, int dirty)
+{
+   BUILD_BUG();
+   return 0;
+}
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 #endif
 
@@ -121,8 +131,8 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
 }
 #endif
 
-#ifndef __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#ifndef __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR
 static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
unsigned long address,
pmd_t *pmdp)
@@ -131,20 +141,39 @@ static inline pmd_t pmdp_huge_get_and_clear(struct 
mm_struct *mm,
pmd_clear(pmdp);
return pmd;
 }
+#endif /* __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR */
+#ifndef __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR
+static inline pud_t pudp_huge_get_and_clear(struct mm_struct *mm,
+   unsigned long address,
+   pud_t *pudp)
+{
+   pud_t pud = *pudp;
+   pud_clear(pudp);
+   return pud;
+}
+#endif /* __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR */
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
-#endif
 
-#ifndef __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR_FULL
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#ifndef __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR_FULL
 static inline pmd_t pmdp_huge_get_and_clear_full(struct mm_struct *mm,
unsigned long address, pmd_t *pmdp,
int full)
 {
return pmdp_huge_get_and_clear(mm, address, pmdp);
 }
-#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 #endif
 
+#ifndef __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR_FULL
+static inline pud_t pudp_huge_get_and_clear_full(struct mm_struct *mm,
+   unsigned long address, pud_t *pudp,
+   int full)
+{
+   return pudp_huge_get_and_clear(mm, address, pudp);
+}
+#endif
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
 #ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
 static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
unsigned long address, pte_t *ptep,
@@ -181,6 +210,9 @@ extern pte_t ptep_clear_flush(struct vm_area_struct *vma,
 extern pmd_t pmdp_huge_clear_flush(struct vm_area_struct *vma,