Re: [PATCH v5 06/11] mm: thp: check pmd migration entry in common path

2017-04-21 Thread Zi Yan


Anshuman Khandual wrote:
> On 04/21/2017 02:17 AM, Zi Yan wrote:
>> From: Zi Yan 
>>
>> If one of callers of page migration starts to handle thp,
>> memory management code start to see pmd migration entry, so we need
>> to prepare for it before enabling. This patch changes various code
>> point which checks the status of given pmds in order to prevent race
>> between thp migration and the pmd-related works.
>>
>> ChangeLog v1 -> v2:
>> - introduce pmd_related() (I know the naming is not good, but can't
>>   think up no better name. Any suggesntion is welcomed.)
>>
>> Signed-off-by: Naoya Horiguchi 
>>
>> ChangeLog v2 -> v3:
>> - add is_swap_pmd()
>> - a pmd entry should be pmd pointing to pte pages, is_swap_pmd(),
>>   pmd_trans_huge(), pmd_devmap(), or pmd_none()
>> - pmd_none_or_trans_huge_or_clear_bad() and pmd_trans_unstable() return
>>   true on pmd_migration_entry, so that migration entries are not
>>   treated as pmd page table entries.
>>
>> ChangeLog v4 -> v5:
>> - add explanation in pmd_none_or_trans_huge_or_clear_bad() to state
>>   the equivalence of !pmd_present() and is_pmd_migration_entry()
>> - fix migration entry wait deadlock code (from v1) in follow_page_mask()
>> - remove unnecessary code (from v1) in follow_trans_huge_pmd()
>> - use is_swap_pmd() instead of !pmd_present() for pmd migration entry,
>>   so it will not be confused with pmd_none()
>> - change author information
>>
>> Signed-off-by: Zi Yan 
>> ---
>>  arch/x86/mm/gup.c |  7 +++--
>>  fs/proc/task_mmu.c| 30 +
>>  include/asm-generic/pgtable.h | 17 +++-
>>  include/linux/huge_mm.h   | 14 --
>>  mm/gup.c  | 22 ++--
>>  mm/huge_memory.c  | 61 
>> ++-
>>  mm/memcontrol.c   |  5 
>>  mm/memory.c   | 12 +++--
>>  mm/mprotect.c |  4 +--
>>  mm/mremap.c   |  2 +-
>>  10 files changed, 145 insertions(+), 29 deletions(-)
>>
>> diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c
>> index 456dfdfd2249..096bbcc801e6 100644
>> --- a/arch/x86/mm/gup.c
>> +++ b/arch/x86/mm/gup.c
>> @@ -9,6 +9,7 @@
>>  #include 
>>  #include 
>>  #include 
>> +#include 
>>  #include 
>>  
>>  #include 
>> @@ -243,9 +244,11 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, 
>> unsigned long end,
>>  pmd_t pmd = *pmdp;
>>  
>>  next = pmd_addr_end(addr, end);
>> -if (pmd_none(pmd))
>> +if (!pmd_present(pmd)) {
>> +VM_BUG_ON(is_swap_pmd(pmd) && 
>> IS_ENABLED(CONFIG_MIGRATION) &&
>> +  !is_pmd_migration_entry(pmd));
>>  return 0;
>> -if (unlikely(pmd_large(pmd) || !pmd_present(pmd))) {
>> +} else if (unlikely(pmd_large(pmd))) {
>>  /*
>>   * NUMA hinting faults need to be handled in the GUP
>>   * slowpath for accounting purposes and so that they
>> diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
>> index 5c8359704601..57489dcd71c4 100644
>> --- a/fs/proc/task_mmu.c
>> +++ b/fs/proc/task_mmu.c
>> @@ -600,7 +600,8 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long 
>> addr, unsigned long end,
>>  
>>  ptl = pmd_trans_huge_lock(pmd, vma);
>>  if (ptl) {
>> -smaps_pmd_entry(pmd, addr, walk);
>> +if (pmd_present(*pmd))
>> +smaps_pmd_entry(pmd, addr, walk);
>>  spin_unlock(ptl);
>>  return 0;
>>  }
>> @@ -942,6 +943,9 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned 
>> long addr,
>>  goto out;
>>  }
>>  
>> +if (!pmd_present(*pmd))
>> +goto out;
>> +
> 
> These pmd_present() checks should have been done irrespective of the
> presence of new PMD migration entries. Please separate them out in a
> different clean up patch.

Not really. The introduction of PMD migration entries makes
pmd_trans_huge_lock() return a lock when PMD is a swap entry (See
changes on pmd_trans_huge_lock() in this patch). This was not the case
before, where pmd_trans_huge_lock() returned NULL if PMD entry was
pmd_none() and both two chunks were not reachable.

Maybe I should use is_swap_pmd() to clarify the confusion.



>> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
>> index 7406d88445bf..3479e9caf2fa 100644
>> --- a/mm/huge_memory.c
>> +++ b/mm/huge_memory.c
>> @@ -912,6 +912,22 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct 
>> mm_struct *src_mm,
>>  
>>  ret = -EAGAIN;
>>  pmd = *src_pmd;
>> +
>> +if (unlikely(is_swap_pmd(pmd))) {
>> +swp_entry_t entry = pmd_to_swp_entry(pmd);
>> +
>> +VM_BUG_ON(IS_ENABLED(CONFIG_MIGRATION) &&
>> +  

Re: [PATCH v5 06/11] mm: thp: check pmd migration entry in common path

2017-04-21 Thread Zi Yan


Anshuman Khandual wrote:
> On 04/21/2017 02:17 AM, Zi Yan wrote:
>> From: Zi Yan 
>>
>> If one of callers of page migration starts to handle thp,
>> memory management code start to see pmd migration entry, so we need
>> to prepare for it before enabling. This patch changes various code
>> point which checks the status of given pmds in order to prevent race
>> between thp migration and the pmd-related works.
>>
>> ChangeLog v1 -> v2:
>> - introduce pmd_related() (I know the naming is not good, but can't
>>   think up no better name. Any suggesntion is welcomed.)
>>
>> Signed-off-by: Naoya Horiguchi 
>>
>> ChangeLog v2 -> v3:
>> - add is_swap_pmd()
>> - a pmd entry should be pmd pointing to pte pages, is_swap_pmd(),
>>   pmd_trans_huge(), pmd_devmap(), or pmd_none()
>> - pmd_none_or_trans_huge_or_clear_bad() and pmd_trans_unstable() return
>>   true on pmd_migration_entry, so that migration entries are not
>>   treated as pmd page table entries.
>>
>> ChangeLog v4 -> v5:
>> - add explanation in pmd_none_or_trans_huge_or_clear_bad() to state
>>   the equivalence of !pmd_present() and is_pmd_migration_entry()
>> - fix migration entry wait deadlock code (from v1) in follow_page_mask()
>> - remove unnecessary code (from v1) in follow_trans_huge_pmd()
>> - use is_swap_pmd() instead of !pmd_present() for pmd migration entry,
>>   so it will not be confused with pmd_none()
>> - change author information
>>
>> Signed-off-by: Zi Yan 
>> ---
>>  arch/x86/mm/gup.c |  7 +++--
>>  fs/proc/task_mmu.c| 30 +
>>  include/asm-generic/pgtable.h | 17 +++-
>>  include/linux/huge_mm.h   | 14 --
>>  mm/gup.c  | 22 ++--
>>  mm/huge_memory.c  | 61 
>> ++-
>>  mm/memcontrol.c   |  5 
>>  mm/memory.c   | 12 +++--
>>  mm/mprotect.c |  4 +--
>>  mm/mremap.c   |  2 +-
>>  10 files changed, 145 insertions(+), 29 deletions(-)
>>
>> diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c
>> index 456dfdfd2249..096bbcc801e6 100644
>> --- a/arch/x86/mm/gup.c
>> +++ b/arch/x86/mm/gup.c
>> @@ -9,6 +9,7 @@
>>  #include 
>>  #include 
>>  #include 
>> +#include 
>>  #include 
>>  
>>  #include 
>> @@ -243,9 +244,11 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, 
>> unsigned long end,
>>  pmd_t pmd = *pmdp;
>>  
>>  next = pmd_addr_end(addr, end);
>> -if (pmd_none(pmd))
>> +if (!pmd_present(pmd)) {
>> +VM_BUG_ON(is_swap_pmd(pmd) && 
>> IS_ENABLED(CONFIG_MIGRATION) &&
>> +  !is_pmd_migration_entry(pmd));
>>  return 0;
>> -if (unlikely(pmd_large(pmd) || !pmd_present(pmd))) {
>> +} else if (unlikely(pmd_large(pmd))) {
>>  /*
>>   * NUMA hinting faults need to be handled in the GUP
>>   * slowpath for accounting purposes and so that they
>> diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
>> index 5c8359704601..57489dcd71c4 100644
>> --- a/fs/proc/task_mmu.c
>> +++ b/fs/proc/task_mmu.c
>> @@ -600,7 +600,8 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long 
>> addr, unsigned long end,
>>  
>>  ptl = pmd_trans_huge_lock(pmd, vma);
>>  if (ptl) {
>> -smaps_pmd_entry(pmd, addr, walk);
>> +if (pmd_present(*pmd))
>> +smaps_pmd_entry(pmd, addr, walk);
>>  spin_unlock(ptl);
>>  return 0;
>>  }
>> @@ -942,6 +943,9 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned 
>> long addr,
>>  goto out;
>>  }
>>  
>> +if (!pmd_present(*pmd))
>> +goto out;
>> +
> 
> These pmd_present() checks should have been done irrespective of the
> presence of new PMD migration entries. Please separate them out in a
> different clean up patch.

Not really. The introduction of PMD migration entries makes
pmd_trans_huge_lock() return a lock when PMD is a swap entry (See
changes on pmd_trans_huge_lock() in this patch). This was not the case
before, where pmd_trans_huge_lock() returned NULL if PMD entry was
pmd_none() and both two chunks were not reachable.

Maybe I should use is_swap_pmd() to clarify the confusion.



>> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
>> index 7406d88445bf..3479e9caf2fa 100644
>> --- a/mm/huge_memory.c
>> +++ b/mm/huge_memory.c
>> @@ -912,6 +912,22 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct 
>> mm_struct *src_mm,
>>  
>>  ret = -EAGAIN;
>>  pmd = *src_pmd;
>> +
>> +if (unlikely(is_swap_pmd(pmd))) {
>> +swp_entry_t entry = pmd_to_swp_entry(pmd);
>> +
>> +VM_BUG_ON(IS_ENABLED(CONFIG_MIGRATION) &&
>> +  !is_pmd_migration_entry(pmd));
>> +if (is_write_migration_entry(entry)) {
>> +   

Re: [PATCH v5 06/11] mm: thp: check pmd migration entry in common path

2017-04-21 Thread Anshuman Khandual
On 04/21/2017 02:17 AM, Zi Yan wrote:
> From: Zi Yan 
> 
> If one of callers of page migration starts to handle thp,
> memory management code start to see pmd migration entry, so we need
> to prepare for it before enabling. This patch changes various code
> point which checks the status of given pmds in order to prevent race
> between thp migration and the pmd-related works.
> 
> ChangeLog v1 -> v2:
> - introduce pmd_related() (I know the naming is not good, but can't
>   think up no better name. Any suggesntion is welcomed.)
> 
> Signed-off-by: Naoya Horiguchi 
> 
> ChangeLog v2 -> v3:
> - add is_swap_pmd()
> - a pmd entry should be pmd pointing to pte pages, is_swap_pmd(),
>   pmd_trans_huge(), pmd_devmap(), or pmd_none()
> - pmd_none_or_trans_huge_or_clear_bad() and pmd_trans_unstable() return
>   true on pmd_migration_entry, so that migration entries are not
>   treated as pmd page table entries.
> 
> ChangeLog v4 -> v5:
> - add explanation in pmd_none_or_trans_huge_or_clear_bad() to state
>   the equivalence of !pmd_present() and is_pmd_migration_entry()
> - fix migration entry wait deadlock code (from v1) in follow_page_mask()
> - remove unnecessary code (from v1) in follow_trans_huge_pmd()
> - use is_swap_pmd() instead of !pmd_present() for pmd migration entry,
>   so it will not be confused with pmd_none()
> - change author information
> 
> Signed-off-by: Zi Yan 
> ---
>  arch/x86/mm/gup.c |  7 +++--
>  fs/proc/task_mmu.c| 30 +
>  include/asm-generic/pgtable.h | 17 +++-
>  include/linux/huge_mm.h   | 14 --
>  mm/gup.c  | 22 ++--
>  mm/huge_memory.c  | 61 
> ++-
>  mm/memcontrol.c   |  5 
>  mm/memory.c   | 12 +++--
>  mm/mprotect.c |  4 +--
>  mm/mremap.c   |  2 +-
>  10 files changed, 145 insertions(+), 29 deletions(-)
> 
> diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c
> index 456dfdfd2249..096bbcc801e6 100644
> --- a/arch/x86/mm/gup.c
> +++ b/arch/x86/mm/gup.c
> @@ -9,6 +9,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  
>  #include 
> @@ -243,9 +244,11 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, 
> unsigned long end,
>   pmd_t pmd = *pmdp;
>  
>   next = pmd_addr_end(addr, end);
> - if (pmd_none(pmd))
> + if (!pmd_present(pmd)) {
> + VM_BUG_ON(is_swap_pmd(pmd) && 
> IS_ENABLED(CONFIG_MIGRATION) &&
> +   !is_pmd_migration_entry(pmd));
>   return 0;
> - if (unlikely(pmd_large(pmd) || !pmd_present(pmd))) {
> + } else if (unlikely(pmd_large(pmd))) {
>   /*
>* NUMA hinting faults need to be handled in the GUP
>* slowpath for accounting purposes and so that they
> diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
> index 5c8359704601..57489dcd71c4 100644
> --- a/fs/proc/task_mmu.c
> +++ b/fs/proc/task_mmu.c
> @@ -600,7 +600,8 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long 
> addr, unsigned long end,
>  
>   ptl = pmd_trans_huge_lock(pmd, vma);
>   if (ptl) {
> - smaps_pmd_entry(pmd, addr, walk);
> + if (pmd_present(*pmd))
> + smaps_pmd_entry(pmd, addr, walk);
>   spin_unlock(ptl);
>   return 0;
>   }
> @@ -942,6 +943,9 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long 
> addr,
>   goto out;
>   }
>  
> + if (!pmd_present(*pmd))
> + goto out;
> +

These pmd_present() checks should have been done irrespective of the
presence of new PMD migration entries. Please separate them out in a
different clean up patch.
 
>   page = pmd_page(*pmd);
>  
>   /* Clear accessed and referenced bits. */
> @@ -1221,28 +1225,32 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned 
> long addr, unsigned long end,
>   if (ptl) {
>   u64 flags = 0, frame = 0;
>   pmd_t pmd = *pmdp;
> + struct page *page = NULL;
>  
>   if ((vma->vm_flags & VM_SOFTDIRTY) || pmd_soft_dirty(pmd))
>   flags |= PM_SOFT_DIRTY;
>  
> - /*
> -  * Currently pmd for thp is always present because thp
> -  * can not be swapped-out, migrated, or HWPOISONed
> -  * (split in such cases instead.)
> -  * This if-check is just to prepare for future implementation.
> -  */
>   if (pmd_present(pmd)) {
> - struct page *page = pmd_page(pmd);
> -
> - if (page_mapcount(page) == 1)
> - flags |= PM_MMAP_EXCLUSIVE;
> +  

Re: [PATCH v5 06/11] mm: thp: check pmd migration entry in common path

2017-04-21 Thread Anshuman Khandual
On 04/21/2017 02:17 AM, Zi Yan wrote:
> From: Zi Yan 
> 
> If one of callers of page migration starts to handle thp,
> memory management code start to see pmd migration entry, so we need
> to prepare for it before enabling. This patch changes various code
> point which checks the status of given pmds in order to prevent race
> between thp migration and the pmd-related works.
> 
> ChangeLog v1 -> v2:
> - introduce pmd_related() (I know the naming is not good, but can't
>   think up no better name. Any suggesntion is welcomed.)
> 
> Signed-off-by: Naoya Horiguchi 
> 
> ChangeLog v2 -> v3:
> - add is_swap_pmd()
> - a pmd entry should be pmd pointing to pte pages, is_swap_pmd(),
>   pmd_trans_huge(), pmd_devmap(), or pmd_none()
> - pmd_none_or_trans_huge_or_clear_bad() and pmd_trans_unstable() return
>   true on pmd_migration_entry, so that migration entries are not
>   treated as pmd page table entries.
> 
> ChangeLog v4 -> v5:
> - add explanation in pmd_none_or_trans_huge_or_clear_bad() to state
>   the equivalence of !pmd_present() and is_pmd_migration_entry()
> - fix migration entry wait deadlock code (from v1) in follow_page_mask()
> - remove unnecessary code (from v1) in follow_trans_huge_pmd()
> - use is_swap_pmd() instead of !pmd_present() for pmd migration entry,
>   so it will not be confused with pmd_none()
> - change author information
> 
> Signed-off-by: Zi Yan 
> ---
>  arch/x86/mm/gup.c |  7 +++--
>  fs/proc/task_mmu.c| 30 +
>  include/asm-generic/pgtable.h | 17 +++-
>  include/linux/huge_mm.h   | 14 --
>  mm/gup.c  | 22 ++--
>  mm/huge_memory.c  | 61 
> ++-
>  mm/memcontrol.c   |  5 
>  mm/memory.c   | 12 +++--
>  mm/mprotect.c |  4 +--
>  mm/mremap.c   |  2 +-
>  10 files changed, 145 insertions(+), 29 deletions(-)
> 
> diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c
> index 456dfdfd2249..096bbcc801e6 100644
> --- a/arch/x86/mm/gup.c
> +++ b/arch/x86/mm/gup.c
> @@ -9,6 +9,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  
>  #include 
> @@ -243,9 +244,11 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, 
> unsigned long end,
>   pmd_t pmd = *pmdp;
>  
>   next = pmd_addr_end(addr, end);
> - if (pmd_none(pmd))
> + if (!pmd_present(pmd)) {
> + VM_BUG_ON(is_swap_pmd(pmd) && 
> IS_ENABLED(CONFIG_MIGRATION) &&
> +   !is_pmd_migration_entry(pmd));
>   return 0;
> - if (unlikely(pmd_large(pmd) || !pmd_present(pmd))) {
> + } else if (unlikely(pmd_large(pmd))) {
>   /*
>* NUMA hinting faults need to be handled in the GUP
>* slowpath for accounting purposes and so that they
> diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
> index 5c8359704601..57489dcd71c4 100644
> --- a/fs/proc/task_mmu.c
> +++ b/fs/proc/task_mmu.c
> @@ -600,7 +600,8 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long 
> addr, unsigned long end,
>  
>   ptl = pmd_trans_huge_lock(pmd, vma);
>   if (ptl) {
> - smaps_pmd_entry(pmd, addr, walk);
> + if (pmd_present(*pmd))
> + smaps_pmd_entry(pmd, addr, walk);
>   spin_unlock(ptl);
>   return 0;
>   }
> @@ -942,6 +943,9 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long 
> addr,
>   goto out;
>   }
>  
> + if (!pmd_present(*pmd))
> + goto out;
> +

These pmd_present() checks should have been done irrespective of the
presence of new PMD migration entries. Please separate them out in a
different clean up patch.
 
>   page = pmd_page(*pmd);
>  
>   /* Clear accessed and referenced bits. */
> @@ -1221,28 +1225,32 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned 
> long addr, unsigned long end,
>   if (ptl) {
>   u64 flags = 0, frame = 0;
>   pmd_t pmd = *pmdp;
> + struct page *page = NULL;
>  
>   if ((vma->vm_flags & VM_SOFTDIRTY) || pmd_soft_dirty(pmd))
>   flags |= PM_SOFT_DIRTY;
>  
> - /*
> -  * Currently pmd for thp is always present because thp
> -  * can not be swapped-out, migrated, or HWPOISONed
> -  * (split in such cases instead.)
> -  * This if-check is just to prepare for future implementation.
> -  */
>   if (pmd_present(pmd)) {
> - struct page *page = pmd_page(pmd);
> -
> - if (page_mapcount(page) == 1)
> - flags |= PM_MMAP_EXCLUSIVE;
> + page = pmd_page(pmd);
>  
>   

[PATCH v5 06/11] mm: thp: check pmd migration entry in common path

2017-04-20 Thread Zi Yan
From: Zi Yan 

If one of callers of page migration starts to handle thp,
memory management code start to see pmd migration entry, so we need
to prepare for it before enabling. This patch changes various code
point which checks the status of given pmds in order to prevent race
between thp migration and the pmd-related works.

ChangeLog v1 -> v2:
- introduce pmd_related() (I know the naming is not good, but can't
  think up no better name. Any suggesntion is welcomed.)

Signed-off-by: Naoya Horiguchi 

ChangeLog v2 -> v3:
- add is_swap_pmd()
- a pmd entry should be pmd pointing to pte pages, is_swap_pmd(),
  pmd_trans_huge(), pmd_devmap(), or pmd_none()
- pmd_none_or_trans_huge_or_clear_bad() and pmd_trans_unstable() return
  true on pmd_migration_entry, so that migration entries are not
  treated as pmd page table entries.

ChangeLog v4 -> v5:
- add explanation in pmd_none_or_trans_huge_or_clear_bad() to state
  the equivalence of !pmd_present() and is_pmd_migration_entry()
- fix migration entry wait deadlock code (from v1) in follow_page_mask()
- remove unnecessary code (from v1) in follow_trans_huge_pmd()
- use is_swap_pmd() instead of !pmd_present() for pmd migration entry,
  so it will not be confused with pmd_none()
- change author information

Signed-off-by: Zi Yan 
---
 arch/x86/mm/gup.c |  7 +++--
 fs/proc/task_mmu.c| 30 +
 include/asm-generic/pgtable.h | 17 +++-
 include/linux/huge_mm.h   | 14 --
 mm/gup.c  | 22 ++--
 mm/huge_memory.c  | 61 ++-
 mm/memcontrol.c   |  5 
 mm/memory.c   | 12 +++--
 mm/mprotect.c |  4 +--
 mm/mremap.c   |  2 +-
 10 files changed, 145 insertions(+), 29 deletions(-)

diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c
index 456dfdfd2249..096bbcc801e6 100644
--- a/arch/x86/mm/gup.c
+++ b/arch/x86/mm/gup.c
@@ -9,6 +9,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 #include 
@@ -243,9 +244,11 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, 
unsigned long end,
pmd_t pmd = *pmdp;
 
next = pmd_addr_end(addr, end);
-   if (pmd_none(pmd))
+   if (!pmd_present(pmd)) {
+   VM_BUG_ON(is_swap_pmd(pmd) && 
IS_ENABLED(CONFIG_MIGRATION) &&
+ !is_pmd_migration_entry(pmd));
return 0;
-   if (unlikely(pmd_large(pmd) || !pmd_present(pmd))) {
+   } else if (unlikely(pmd_large(pmd))) {
/*
 * NUMA hinting faults need to be handled in the GUP
 * slowpath for accounting purposes and so that they
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 5c8359704601..57489dcd71c4 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -600,7 +600,8 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, 
unsigned long end,
 
ptl = pmd_trans_huge_lock(pmd, vma);
if (ptl) {
-   smaps_pmd_entry(pmd, addr, walk);
+   if (pmd_present(*pmd))
+   smaps_pmd_entry(pmd, addr, walk);
spin_unlock(ptl);
return 0;
}
@@ -942,6 +943,9 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long 
addr,
goto out;
}
 
+   if (!pmd_present(*pmd))
+   goto out;
+
page = pmd_page(*pmd);
 
/* Clear accessed and referenced bits. */
@@ -1221,28 +1225,32 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long 
addr, unsigned long end,
if (ptl) {
u64 flags = 0, frame = 0;
pmd_t pmd = *pmdp;
+   struct page *page = NULL;
 
if ((vma->vm_flags & VM_SOFTDIRTY) || pmd_soft_dirty(pmd))
flags |= PM_SOFT_DIRTY;
 
-   /*
-* Currently pmd for thp is always present because thp
-* can not be swapped-out, migrated, or HWPOISONed
-* (split in such cases instead.)
-* This if-check is just to prepare for future implementation.
-*/
if (pmd_present(pmd)) {
-   struct page *page = pmd_page(pmd);
-
-   if (page_mapcount(page) == 1)
-   flags |= PM_MMAP_EXCLUSIVE;
+   page = pmd_page(pmd);
 
flags |= PM_PRESENT;
if (pm->show_pfn)
frame = pmd_pfn(pmd) +
((addr & ~PMD_MASK) >> PAGE_SHIFT);
+   } else if (is_swap_pmd(pmd)) {
+   swp_entry_t entry = 

[PATCH v5 06/11] mm: thp: check pmd migration entry in common path

2017-04-20 Thread Zi Yan
From: Zi Yan 

If one of callers of page migration starts to handle thp,
memory management code start to see pmd migration entry, so we need
to prepare for it before enabling. This patch changes various code
point which checks the status of given pmds in order to prevent race
between thp migration and the pmd-related works.

ChangeLog v1 -> v2:
- introduce pmd_related() (I know the naming is not good, but can't
  think up no better name. Any suggesntion is welcomed.)

Signed-off-by: Naoya Horiguchi 

ChangeLog v2 -> v3:
- add is_swap_pmd()
- a pmd entry should be pmd pointing to pte pages, is_swap_pmd(),
  pmd_trans_huge(), pmd_devmap(), or pmd_none()
- pmd_none_or_trans_huge_or_clear_bad() and pmd_trans_unstable() return
  true on pmd_migration_entry, so that migration entries are not
  treated as pmd page table entries.

ChangeLog v4 -> v5:
- add explanation in pmd_none_or_trans_huge_or_clear_bad() to state
  the equivalence of !pmd_present() and is_pmd_migration_entry()
- fix migration entry wait deadlock code (from v1) in follow_page_mask()
- remove unnecessary code (from v1) in follow_trans_huge_pmd()
- use is_swap_pmd() instead of !pmd_present() for pmd migration entry,
  so it will not be confused with pmd_none()
- change author information

Signed-off-by: Zi Yan 
---
 arch/x86/mm/gup.c |  7 +++--
 fs/proc/task_mmu.c| 30 +
 include/asm-generic/pgtable.h | 17 +++-
 include/linux/huge_mm.h   | 14 --
 mm/gup.c  | 22 ++--
 mm/huge_memory.c  | 61 ++-
 mm/memcontrol.c   |  5 
 mm/memory.c   | 12 +++--
 mm/mprotect.c |  4 +--
 mm/mremap.c   |  2 +-
 10 files changed, 145 insertions(+), 29 deletions(-)

diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c
index 456dfdfd2249..096bbcc801e6 100644
--- a/arch/x86/mm/gup.c
+++ b/arch/x86/mm/gup.c
@@ -9,6 +9,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 #include 
@@ -243,9 +244,11 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, 
unsigned long end,
pmd_t pmd = *pmdp;
 
next = pmd_addr_end(addr, end);
-   if (pmd_none(pmd))
+   if (!pmd_present(pmd)) {
+   VM_BUG_ON(is_swap_pmd(pmd) && 
IS_ENABLED(CONFIG_MIGRATION) &&
+ !is_pmd_migration_entry(pmd));
return 0;
-   if (unlikely(pmd_large(pmd) || !pmd_present(pmd))) {
+   } else if (unlikely(pmd_large(pmd))) {
/*
 * NUMA hinting faults need to be handled in the GUP
 * slowpath for accounting purposes and so that they
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 5c8359704601..57489dcd71c4 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -600,7 +600,8 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, 
unsigned long end,
 
ptl = pmd_trans_huge_lock(pmd, vma);
if (ptl) {
-   smaps_pmd_entry(pmd, addr, walk);
+   if (pmd_present(*pmd))
+   smaps_pmd_entry(pmd, addr, walk);
spin_unlock(ptl);
return 0;
}
@@ -942,6 +943,9 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long 
addr,
goto out;
}
 
+   if (!pmd_present(*pmd))
+   goto out;
+
page = pmd_page(*pmd);
 
/* Clear accessed and referenced bits. */
@@ -1221,28 +1225,32 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long 
addr, unsigned long end,
if (ptl) {
u64 flags = 0, frame = 0;
pmd_t pmd = *pmdp;
+   struct page *page = NULL;
 
if ((vma->vm_flags & VM_SOFTDIRTY) || pmd_soft_dirty(pmd))
flags |= PM_SOFT_DIRTY;
 
-   /*
-* Currently pmd for thp is always present because thp
-* can not be swapped-out, migrated, or HWPOISONed
-* (split in such cases instead.)
-* This if-check is just to prepare for future implementation.
-*/
if (pmd_present(pmd)) {
-   struct page *page = pmd_page(pmd);
-
-   if (page_mapcount(page) == 1)
-   flags |= PM_MMAP_EXCLUSIVE;
+   page = pmd_page(pmd);
 
flags |= PM_PRESENT;
if (pm->show_pfn)
frame = pmd_pfn(pmd) +
((addr & ~PMD_MASK) >> PAGE_SHIFT);
+   } else if (is_swap_pmd(pmd)) {
+   swp_entry_t entry = pmd_to_swp_entry(pmd);
+
+   frame = swp_type(entry) |
+