Re: [PATCH hmm 7/8] mm/hmm: return -EFAULT when setting HMM_PFN_ERROR on requested valid pages

2020-03-16 Thread Christoph Hellwig
On Wed, Mar 11, 2020 at 03:35:05PM -0300, Jason Gunthorpe wrote:
> From: Jason Gunthorpe 
> 
> hmm_range_fault() should never return 0 if the caller requested a valid
> page, but the pfns output for that page would be HMM_PFN_ERROR.
> 
> hmm_pte_need_fault() must always be called before setting HMM_PFN_ERROR to
> detect if the page is in faulting mode or not.
> 
> Fix two cases in hmm_vma_walk_pmd() and reorganize some of the duplicated
> code.

Looks good,

Reviewed-by: Christoph Hellwig 
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH hmm 7/8] mm/hmm: return -EFAULT when setting HMM_PFN_ERROR on requested valid pages

2020-03-12 Thread Jason Gunthorpe
On Wed, Mar 11, 2020 at 06:36:47PM -0700, Ralph Campbell wrote:
> > @@ -390,8 +384,15 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp,
> > return -EBUSY;
> > }
> > return hmm_pfns_fill(start, end, range, HMM_PFN_NONE);
> > -   } else if (!pmd_present(pmd))
> > +   }
> > +
> > +   if (!pmd_present(pmd)) {
> > +   hmm_range_need_fault(hmm_vma_walk, pfns, npages, 0, ,
> > +_fault);
> > +   if (fault || write_fault)
> > +   return -EFAULT;
> > return hmm_pfns_fill(start, end, range, HMM_PFN_ERROR);
> 
> Shouldn't this fill with HMM_PFN_NONE instead of HMM_PFN_ERROR?
> Otherwise, when a THP is swapped out, you will get a different
> value than if a PTE is swapped out and you are prefetching/snapshotting.

If this is the case then the problem is that the return -EFAULT path
needs to do something else.. ie since the above code can't trigger
swap in, it is correct to return PFN_ERROR.

I'm completely guessing, but do we need to call pmd_to_swp_entry() and
handle things similarly to the pte? What swp_entries are valid for a
pmd?

Do you understand this better, or know how to trigger a !pmd_present
for test?

I suppose another option would be this:

if (!pmd_present(pmd)) {
hmm_range_need_fault(hmm_vma_walk, pfns, npages, 0, ,
 _fault);
/* We can't handle this. Cause the PMD to be split and
 * handle it in the pte handler. */
if (fault || write_fault)
return 0;
return hmm_pfns_fill(start, end, range, HMM_PFN_NONE);
}

Which, I think, must be correct, but inefficient?

Jason
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH hmm 7/8] mm/hmm: return -EFAULT when setting HMM_PFN_ERROR on requested valid pages

2020-03-11 Thread Ralph Campbell



On 3/11/20 11:35 AM, Jason Gunthorpe wrote:

From: Jason Gunthorpe 

hmm_range_fault() should never return 0 if the caller requested a valid
page, but the pfns output for that page would be HMM_PFN_ERROR.

hmm_pte_need_fault() must always be called before setting HMM_PFN_ERROR to
detect if the page is in faulting mode or not.

Fix two cases in hmm_vma_walk_pmd() and reorganize some of the duplicated
code.

Fixes: d08faca018c4 ("mm/hmm: properly handle migration pmd")
Fixes: da4c3c735ea4 ("mm/hmm/mirror: helper to snapshot CPU page table")
Signed-off-by: Jason Gunthorpe 


Reviewed-by: Ralph Campbell 


---
  mm/hmm.c | 38 +-
  1 file changed, 21 insertions(+), 17 deletions(-)

diff --git a/mm/hmm.c b/mm/hmm.c
index bf676cfef3e8ee..f61fddf2ef6505 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -363,8 +363,10 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp,
  {
struct hmm_vma_walk *hmm_vma_walk = walk->private;
struct hmm_range *range = hmm_vma_walk->range;
-   uint64_t *pfns = range->pfns;
-   unsigned long addr = start, i;
+   uint64_t *pfns = >pfns[(start - range->start) >> PAGE_SHIFT];
+   unsigned long npages = (end - start) >> PAGE_SHIFT;
+   unsigned long addr = start;
+   bool fault, write_fault;
pte_t *ptep;
pmd_t pmd;
  
@@ -374,14 +376,6 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp,

return hmm_vma_walk_hole(start, end, -1, walk);
  
  	if (thp_migration_supported() && is_pmd_migration_entry(pmd)) {

-   bool fault, write_fault;
-   unsigned long npages;
-   uint64_t *pfns;
-
-   i = (addr - range->start) >> PAGE_SHIFT;
-   npages = (end - addr) >> PAGE_SHIFT;
-   pfns = >pfns[i];
-
hmm_range_need_fault(hmm_vma_walk, pfns, npages,
 0, , _fault);
if (fault || write_fault) {
@@ -390,8 +384,15 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp,
return -EBUSY;
}
return hmm_pfns_fill(start, end, range, HMM_PFN_NONE);
-   } else if (!pmd_present(pmd))
+   }
+
+   if (!pmd_present(pmd)) {
+   hmm_range_need_fault(hmm_vma_walk, pfns, npages, 0, ,
+_fault);
+   if (fault || write_fault)
+   return -EFAULT;
return hmm_pfns_fill(start, end, range, HMM_PFN_ERROR);


Shouldn't this fill with HMM_PFN_NONE instead of HMM_PFN_ERROR?
Otherwise, when a THP is swapped out, you will get a different
value than if a PTE is swapped out and you are prefetching/snapshotting.


+   }
  
  	if (pmd_devmap(pmd) || pmd_trans_huge(pmd)) {

/*
@@ -408,8 +409,7 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp,
if (!pmd_devmap(pmd) && !pmd_trans_huge(pmd))
goto again;
  
-		i = (addr - range->start) >> PAGE_SHIFT;

-   return hmm_vma_handle_pmd(walk, addr, end, [i], pmd);
+   return hmm_vma_handle_pmd(walk, addr, end, pfns, pmd);
}
  
  	/*

@@ -418,15 +418,19 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp,
 * entry pointing to pte directory or it is a bad pmd that will not
 * recover.
 */
-   if (pmd_bad(pmd))
+   if (pmd_bad(pmd)) {
+   hmm_range_need_fault(hmm_vma_walk, pfns, npages, 0, ,
+_fault);
+   if (fault || write_fault)
+   return -EFAULT;
return hmm_pfns_fill(start, end, range, HMM_PFN_ERROR);
+   }
  
  	ptep = pte_offset_map(pmdp, addr);

-   i = (addr - range->start) >> PAGE_SHIFT;
-   for (; addr < end; addr += PAGE_SIZE, ptep++, i++) {
+   for (; addr < end; addr += PAGE_SIZE, ptep++, pfns++) {
int r;
  
-		r = hmm_vma_handle_pte(walk, addr, end, pmdp, ptep, [i]);

+   r = hmm_vma_handle_pte(walk, addr, end, pmdp, ptep, pfns);
if (r) {
/* hmm_vma_handle_pte() did pte_unmap() */
hmm_vma_walk->last = addr;


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH hmm 7/8] mm/hmm: return -EFAULT when setting HMM_PFN_ERROR on requested valid pages

2020-03-11 Thread Jason Gunthorpe
From: Jason Gunthorpe 

hmm_range_fault() should never return 0 if the caller requested a valid
page, but the pfns output for that page would be HMM_PFN_ERROR.

hmm_pte_need_fault() must always be called before setting HMM_PFN_ERROR to
detect if the page is in faulting mode or not.

Fix two cases in hmm_vma_walk_pmd() and reorganize some of the duplicated
code.

Fixes: d08faca018c4 ("mm/hmm: properly handle migration pmd")
Fixes: da4c3c735ea4 ("mm/hmm/mirror: helper to snapshot CPU page table")
Signed-off-by: Jason Gunthorpe 
---
 mm/hmm.c | 38 +-
 1 file changed, 21 insertions(+), 17 deletions(-)

diff --git a/mm/hmm.c b/mm/hmm.c
index bf676cfef3e8ee..f61fddf2ef6505 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -363,8 +363,10 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp,
 {
struct hmm_vma_walk *hmm_vma_walk = walk->private;
struct hmm_range *range = hmm_vma_walk->range;
-   uint64_t *pfns = range->pfns;
-   unsigned long addr = start, i;
+   uint64_t *pfns = >pfns[(start - range->start) >> PAGE_SHIFT];
+   unsigned long npages = (end - start) >> PAGE_SHIFT;
+   unsigned long addr = start;
+   bool fault, write_fault;
pte_t *ptep;
pmd_t pmd;
 
@@ -374,14 +376,6 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp,
return hmm_vma_walk_hole(start, end, -1, walk);
 
if (thp_migration_supported() && is_pmd_migration_entry(pmd)) {
-   bool fault, write_fault;
-   unsigned long npages;
-   uint64_t *pfns;
-
-   i = (addr - range->start) >> PAGE_SHIFT;
-   npages = (end - addr) >> PAGE_SHIFT;
-   pfns = >pfns[i];
-
hmm_range_need_fault(hmm_vma_walk, pfns, npages,
 0, , _fault);
if (fault || write_fault) {
@@ -390,8 +384,15 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp,
return -EBUSY;
}
return hmm_pfns_fill(start, end, range, HMM_PFN_NONE);
-   } else if (!pmd_present(pmd))
+   }
+
+   if (!pmd_present(pmd)) {
+   hmm_range_need_fault(hmm_vma_walk, pfns, npages, 0, ,
+_fault);
+   if (fault || write_fault)
+   return -EFAULT;
return hmm_pfns_fill(start, end, range, HMM_PFN_ERROR);
+   }
 
if (pmd_devmap(pmd) || pmd_trans_huge(pmd)) {
/*
@@ -408,8 +409,7 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp,
if (!pmd_devmap(pmd) && !pmd_trans_huge(pmd))
goto again;
 
-   i = (addr - range->start) >> PAGE_SHIFT;
-   return hmm_vma_handle_pmd(walk, addr, end, [i], pmd);
+   return hmm_vma_handle_pmd(walk, addr, end, pfns, pmd);
}
 
/*
@@ -418,15 +418,19 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp,
 * entry pointing to pte directory or it is a bad pmd that will not
 * recover.
 */
-   if (pmd_bad(pmd))
+   if (pmd_bad(pmd)) {
+   hmm_range_need_fault(hmm_vma_walk, pfns, npages, 0, ,
+_fault);
+   if (fault || write_fault)
+   return -EFAULT;
return hmm_pfns_fill(start, end, range, HMM_PFN_ERROR);
+   }
 
ptep = pte_offset_map(pmdp, addr);
-   i = (addr - range->start) >> PAGE_SHIFT;
-   for (; addr < end; addr += PAGE_SIZE, ptep++, i++) {
+   for (; addr < end; addr += PAGE_SIZE, ptep++, pfns++) {
int r;
 
-   r = hmm_vma_handle_pte(walk, addr, end, pmdp, ptep, [i]);
+   r = hmm_vma_handle_pte(walk, addr, end, pmdp, ptep, pfns);
if (r) {
/* hmm_vma_handle_pte() did pte_unmap() */
hmm_vma_walk->last = addr;
-- 
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx