Re: [RFC PATCH 4/8] mm: Provide mm_struct and address to huge_ptep_get()

2024-05-26 Thread Christophe Leroy


Le 25/03/2024 à 17:35, Jason Gunthorpe a écrit :
> On Mon, Mar 25, 2024 at 03:55:57PM +0100, Christophe Leroy wrote:
> 
>>   arch/arm64/include/asm/hugetlb.h |  2 +-
>>   fs/hugetlbfs/inode.c |  2 +-
>>   fs/proc/task_mmu.c   |  8 +++---
>>   fs/userfaultfd.c |  2 +-
>>   include/asm-generic/hugetlb.h|  2 +-
>>   include/linux/swapops.h  |  2 +-
>>   mm/damon/vaddr.c |  6 ++---
>>   mm/gup.c |  2 +-
>>   mm/hmm.c |  2 +-
>>   mm/hugetlb.c | 46 
>>   mm/memory-failure.c  |  2 +-
>>   mm/mempolicy.c   |  2 +-
>>   mm/migrate.c |  4 +--
>>   mm/mincore.c |  2 +-
>>   mm/userfaultfd.c |  2 +-
>>   15 files changed, 43 insertions(+), 43 deletions(-)
>>
>> diff --git a/arch/qarm64/include/asm/hugetlb.h 
>> b/arch/arm64/include/asm/hugetlb.h
>> index 2ddc33d93b13..1af39a74e791 100644
>> --- a/arch/arm64/include/asm/hugetlb.h
>> +++ b/arch/arm64/include/asm/hugetlb.h
>> @@ -46,7 +46,7 @@ extern pte_t huge_ptep_clear_flush(struct vm_area_struct 
>> *vma,
>>   extern void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
>> pte_t *ptep, unsigned long sz);
>>   #define __HAVE_ARCH_HUGE_PTEP_GET
>> -extern pte_t huge_ptep_get(pte_t *ptep);
>> +extern pte_t huge_ptep_get(struct mm_struct *mm, unsigned long addr, pte_t 
>> *ptep);
> 
> The header changed but not the implementation? This will need to do
> riscv and s390 too.

It is now fixed.

> 
> Though, really, I think the right path is to work toward removing
> huge_ptep_get() from the arch code..
> 
> riscv and arm are doing the same thing - propogating dirty/young bits
> from the contig PTEs to the results. The core code can do this, maybe
> with a ARCH #define opt in.
> 
> s390.. Ouchy - is this because hugetlb wants to pretend that every
> level is encoded as a PTE so it takes the PGD and recodes the flags to
> the PTE layout??
> 
> Jason


Re: [RFC PATCH 4/8] mm: Provide mm_struct and address to huge_ptep_get()

2024-03-25 Thread Jason Gunthorpe
On Mon, Mar 25, 2024 at 03:55:57PM +0100, Christophe Leroy wrote:

>  arch/arm64/include/asm/hugetlb.h |  2 +-
>  fs/hugetlbfs/inode.c |  2 +-
>  fs/proc/task_mmu.c   |  8 +++---
>  fs/userfaultfd.c |  2 +-
>  include/asm-generic/hugetlb.h|  2 +-
>  include/linux/swapops.h  |  2 +-
>  mm/damon/vaddr.c |  6 ++---
>  mm/gup.c |  2 +-
>  mm/hmm.c |  2 +-
>  mm/hugetlb.c | 46 
>  mm/memory-failure.c  |  2 +-
>  mm/mempolicy.c   |  2 +-
>  mm/migrate.c |  4 +--
>  mm/mincore.c |  2 +-
>  mm/userfaultfd.c |  2 +-
>  15 files changed, 43 insertions(+), 43 deletions(-)
> 
> diff --git a/arch/qarm64/include/asm/hugetlb.h 
> b/arch/arm64/include/asm/hugetlb.h
> index 2ddc33d93b13..1af39a74e791 100644
> --- a/arch/arm64/include/asm/hugetlb.h
> +++ b/arch/arm64/include/asm/hugetlb.h
> @@ -46,7 +46,7 @@ extern pte_t huge_ptep_clear_flush(struct vm_area_struct 
> *vma,
>  extern void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
>  pte_t *ptep, unsigned long sz);
>  #define __HAVE_ARCH_HUGE_PTEP_GET
> -extern pte_t huge_ptep_get(pte_t *ptep);
> +extern pte_t huge_ptep_get(struct mm_struct *mm, unsigned long addr, pte_t 
> *ptep);

The header changed but not the implementation? This will need to do
riscv and s390 too.

Though, really, I think the right path is to work toward removing
huge_ptep_get() from the arch code..

riscv and arm are doing the same thing - propogating dirty/young bits
from the contig PTEs to the results. The core code can do this, maybe
with a ARCH #define opt in.

s390.. Ouchy - is this because hugetlb wants to pretend that every
level is encoded as a PTE so it takes the PGD and recodes the flags to
the PTE layout??

Jason


[RFC PATCH 4/8] mm: Provide mm_struct and address to huge_ptep_get()

2024-03-25 Thread Christophe Leroy
On powerpc 8xx huge_ptep_get() will need to know whether the given
ptep is a PTE entry or a PMD entry. This cannot be known with the
PMD entry itself because there is no easy way to know it from the
content of the entry.

So huge_ptep_get() will need to know either the size of the page
or get the pmd.

In order to be consistent with huge_ptep_get_and_clear(), give
mm and address to huge_ptep_get().

Signed-off-by: Christophe Leroy 
---
 arch/arm64/include/asm/hugetlb.h |  2 +-
 fs/hugetlbfs/inode.c |  2 +-
 fs/proc/task_mmu.c   |  8 +++---
 fs/userfaultfd.c |  2 +-
 include/asm-generic/hugetlb.h|  2 +-
 include/linux/swapops.h  |  2 +-
 mm/damon/vaddr.c |  6 ++---
 mm/gup.c |  2 +-
 mm/hmm.c |  2 +-
 mm/hugetlb.c | 46 
 mm/memory-failure.c  |  2 +-
 mm/mempolicy.c   |  2 +-
 mm/migrate.c |  4 +--
 mm/mincore.c |  2 +-
 mm/userfaultfd.c |  2 +-
 15 files changed, 43 insertions(+), 43 deletions(-)

diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h
index 2ddc33d93b13..1af39a74e791 100644
--- a/arch/arm64/include/asm/hugetlb.h
+++ b/arch/arm64/include/asm/hugetlb.h
@@ -46,7 +46,7 @@ extern pte_t huge_ptep_clear_flush(struct vm_area_struct *vma,
 extern void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
   pte_t *ptep, unsigned long sz);
 #define __HAVE_ARCH_HUGE_PTEP_GET
-extern pte_t huge_ptep_get(pte_t *ptep);
+extern pte_t huge_ptep_get(struct mm_struct *mm, unsigned long addr, pte_t 
*ptep);
 
 void __init arm64_hugetlb_cma_reserve(void);
 
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 6502c7e776d1..ec3ec87d29e7 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -425,7 +425,7 @@ static bool hugetlb_vma_maps_page(struct vm_area_struct 
*vma,
if (!ptep)
return false;
 
-   pte = huge_ptep_get(ptep);
+   pte = huge_ptep_get(vma->vm_mm, addr, ptep);
if (huge_pte_none(pte) || !pte_present(pte))
return false;
 
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 23fbab954c20..b14081bcdafe 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -1572,7 +1572,7 @@ static int pagemap_hugetlb_range(pte_t *ptep, unsigned 
long hmask,
if (vma->vm_flags & VM_SOFTDIRTY)
flags |= PM_SOFT_DIRTY;
 
-   pte = huge_ptep_get(ptep);
+   pte = huge_ptep_get(walk->mm, addr, ptep);
if (pte_present(pte)) {
struct page *page = pte_page(pte);
 
@@ -2256,7 +2256,7 @@ static int pagemap_scan_hugetlb_entry(pte_t *ptep, 
unsigned long hmask,
if (~p->arg.flags & PM_SCAN_WP_MATCHING) {
/* Go the short route when not write-protecting pages. */
 
-   pte = huge_ptep_get(ptep);
+   pte = huge_ptep_get(walk->mm, start, ptep);
categories = p->cur_vma_category | 
pagemap_hugetlb_category(pte);
 
if (!pagemap_scan_is_interesting_page(categories, p))
@@ -2268,7 +2268,7 @@ static int pagemap_scan_hugetlb_entry(pte_t *ptep, 
unsigned long hmask,
i_mmap_lock_write(vma->vm_file->f_mapping);
ptl = huge_pte_lock(hstate_vma(vma), vma->vm_mm, ptep);
 
-   pte = huge_ptep_get(ptep);
+   pte = huge_ptep_get(walk->mm, start, ptep);
categories = p->cur_vma_category | pagemap_hugetlb_category(pte);
 
if (!pagemap_scan_is_interesting_page(categories, p))
@@ -2663,7 +2663,7 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long 
addr,
 static int gather_hugetlb_stats(pte_t *pte, unsigned long hmask,
unsigned long addr, unsigned long end, struct mm_walk *walk)
 {
-   pte_t huge_pte = huge_ptep_get(pte);
+   pte_t huge_pte = huge_ptep_get(walk->mm, addr, pte);
struct numa_maps *md;
struct page *page;
 
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 60dcfafdc11a..177fe1ff14d7 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -256,7 +256,7 @@ static inline bool userfaultfd_huge_must_wait(struct 
userfaultfd_ctx *ctx,
goto out;
 
ret = false;
-   pte = huge_ptep_get(ptep);
+   pte = huge_ptep_get(vma->vm_mm, vmf->address, ptep);
 
/*
 * Lockless access: we're in a wait_event so it's ok if it
diff --git a/include/asm-generic/hugetlb.h b/include/asm-generic/hugetlb.h
index 6dcf4d576970..594d5905f615 100644
--- a/include/asm-generic/hugetlb.h
+++ b/include/asm-generic/hugetlb.h
@@ -144,7 +144,7 @@ static inline int huge_ptep_set_access_flags(struct 
vm_area_struct *vma,
 #endif
 
 #ifndef __HAVE_ARCH_HUGE_PTEP_GET
-static inline pte_t huge_ptep_get(pte_t *ptep)
+static inline pte_t huge_ptep_get(struct mm_struct *mm, unsigned long addr, 
pte_t *ptep)
 {
return ptep_get(ptep);
 }