Re: [PATCH hmm 7/8] mm/hmm: return -EFAULT when setting HMM_PFN_ERROR on requested valid pages
On Wed, Mar 11, 2020 at 03:35:05PM -0300, Jason Gunthorpe wrote: > From: Jason Gunthorpe > > hmm_range_fault() should never return 0 if the caller requested a valid > page, but the pfns output for that page would be HMM_PFN_ERROR. > > hmm_pte_need_fault() must always be called before setting HMM_PFN_ERROR to > detect if the page is in faulting mode or not. > > Fix two cases in hmm_vma_walk_pmd() and reorganize some of the duplicated > code. Looks good, Reviewed-by: Christoph Hellwig ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH hmm 7/8] mm/hmm: return -EFAULT when setting HMM_PFN_ERROR on requested valid pages
On Wed, Mar 11, 2020 at 06:36:47PM -0700, Ralph Campbell wrote: > > @@ -390,8 +384,15 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp, > > return -EBUSY; > > } > > return hmm_pfns_fill(start, end, range, HMM_PFN_NONE); > > - } else if (!pmd_present(pmd)) > > + } > > + > > + if (!pmd_present(pmd)) { > > + hmm_range_need_fault(hmm_vma_walk, pfns, npages, 0, , > > +_fault); > > + if (fault || write_fault) > > + return -EFAULT; > > return hmm_pfns_fill(start, end, range, HMM_PFN_ERROR); > > Shouldn't this fill with HMM_PFN_NONE instead of HMM_PFN_ERROR? > Otherwise, when a THP is swapped out, you will get a different > value than if a PTE is swapped out and you are prefetching/snapshotting. If this is the case then the problem is that the return -EFAULT path needs to do something else.. ie since the above code can't trigger swap in, it is correct to return PFN_ERROR. I'm completely guessing, but do we need to call pmd_to_swp_entry() and handle things similarly to the pte? What swp_entries are valid for a pmd? Do you understand this better, or know how to trigger a !pmd_present for test? I suppose another option would be this: if (!pmd_present(pmd)) { hmm_range_need_fault(hmm_vma_walk, pfns, npages, 0, , _fault); /* We can't handle this. Cause the PMD to be split and * handle it in the pte handler. */ if (fault || write_fault) return 0; return hmm_pfns_fill(start, end, range, HMM_PFN_NONE); } Which, I think, must be correct, but inefficient? Jason ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH hmm 7/8] mm/hmm: return -EFAULT when setting HMM_PFN_ERROR on requested valid pages
On 3/11/20 11:35 AM, Jason Gunthorpe wrote: From: Jason Gunthorpe hmm_range_fault() should never return 0 if the caller requested a valid page, but the pfns output for that page would be HMM_PFN_ERROR. hmm_pte_need_fault() must always be called before setting HMM_PFN_ERROR to detect if the page is in faulting mode or not. Fix two cases in hmm_vma_walk_pmd() and reorganize some of the duplicated code. Fixes: d08faca018c4 ("mm/hmm: properly handle migration pmd") Fixes: da4c3c735ea4 ("mm/hmm/mirror: helper to snapshot CPU page table") Signed-off-by: Jason Gunthorpe Reviewed-by: Ralph Campbell --- mm/hmm.c | 38 +- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/mm/hmm.c b/mm/hmm.c index bf676cfef3e8ee..f61fddf2ef6505 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -363,8 +363,10 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp, { struct hmm_vma_walk *hmm_vma_walk = walk->private; struct hmm_range *range = hmm_vma_walk->range; - uint64_t *pfns = range->pfns; - unsigned long addr = start, i; + uint64_t *pfns = >pfns[(start - range->start) >> PAGE_SHIFT]; + unsigned long npages = (end - start) >> PAGE_SHIFT; + unsigned long addr = start; + bool fault, write_fault; pte_t *ptep; pmd_t pmd; @@ -374,14 +376,6 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp, return hmm_vma_walk_hole(start, end, -1, walk); if (thp_migration_supported() && is_pmd_migration_entry(pmd)) { - bool fault, write_fault; - unsigned long npages; - uint64_t *pfns; - - i = (addr - range->start) >> PAGE_SHIFT; - npages = (end - addr) >> PAGE_SHIFT; - pfns = >pfns[i]; - hmm_range_need_fault(hmm_vma_walk, pfns, npages, 0, , _fault); if (fault || write_fault) { @@ -390,8 +384,15 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp, return -EBUSY; } return hmm_pfns_fill(start, end, range, HMM_PFN_NONE); - } else if (!pmd_present(pmd)) + } + + if (!pmd_present(pmd)) { + hmm_range_need_fault(hmm_vma_walk, pfns, npages, 0, , +_fault); + if (fault || write_fault) + return -EFAULT; return hmm_pfns_fill(start, end, range, HMM_PFN_ERROR); Shouldn't this fill with HMM_PFN_NONE instead of HMM_PFN_ERROR? Otherwise, when a THP is swapped out, you will get a different value than if a PTE is swapped out and you are prefetching/snapshotting. + } if (pmd_devmap(pmd) || pmd_trans_huge(pmd)) { /* @@ -408,8 +409,7 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp, if (!pmd_devmap(pmd) && !pmd_trans_huge(pmd)) goto again; - i = (addr - range->start) >> PAGE_SHIFT; - return hmm_vma_handle_pmd(walk, addr, end, [i], pmd); + return hmm_vma_handle_pmd(walk, addr, end, pfns, pmd); } /* @@ -418,15 +418,19 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp, * entry pointing to pte directory or it is a bad pmd that will not * recover. */ - if (pmd_bad(pmd)) + if (pmd_bad(pmd)) { + hmm_range_need_fault(hmm_vma_walk, pfns, npages, 0, , +_fault); + if (fault || write_fault) + return -EFAULT; return hmm_pfns_fill(start, end, range, HMM_PFN_ERROR); + } ptep = pte_offset_map(pmdp, addr); - i = (addr - range->start) >> PAGE_SHIFT; - for (; addr < end; addr += PAGE_SIZE, ptep++, i++) { + for (; addr < end; addr += PAGE_SIZE, ptep++, pfns++) { int r; - r = hmm_vma_handle_pte(walk, addr, end, pmdp, ptep, [i]); + r = hmm_vma_handle_pte(walk, addr, end, pmdp, ptep, pfns); if (r) { /* hmm_vma_handle_pte() did pte_unmap() */ hmm_vma_walk->last = addr; ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH hmm 7/8] mm/hmm: return -EFAULT when setting HMM_PFN_ERROR on requested valid pages
From: Jason Gunthorpe hmm_range_fault() should never return 0 if the caller requested a valid page, but the pfns output for that page would be HMM_PFN_ERROR. hmm_pte_need_fault() must always be called before setting HMM_PFN_ERROR to detect if the page is in faulting mode or not. Fix two cases in hmm_vma_walk_pmd() and reorganize some of the duplicated code. Fixes: d08faca018c4 ("mm/hmm: properly handle migration pmd") Fixes: da4c3c735ea4 ("mm/hmm/mirror: helper to snapshot CPU page table") Signed-off-by: Jason Gunthorpe --- mm/hmm.c | 38 +- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/mm/hmm.c b/mm/hmm.c index bf676cfef3e8ee..f61fddf2ef6505 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -363,8 +363,10 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp, { struct hmm_vma_walk *hmm_vma_walk = walk->private; struct hmm_range *range = hmm_vma_walk->range; - uint64_t *pfns = range->pfns; - unsigned long addr = start, i; + uint64_t *pfns = >pfns[(start - range->start) >> PAGE_SHIFT]; + unsigned long npages = (end - start) >> PAGE_SHIFT; + unsigned long addr = start; + bool fault, write_fault; pte_t *ptep; pmd_t pmd; @@ -374,14 +376,6 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp, return hmm_vma_walk_hole(start, end, -1, walk); if (thp_migration_supported() && is_pmd_migration_entry(pmd)) { - bool fault, write_fault; - unsigned long npages; - uint64_t *pfns; - - i = (addr - range->start) >> PAGE_SHIFT; - npages = (end - addr) >> PAGE_SHIFT; - pfns = >pfns[i]; - hmm_range_need_fault(hmm_vma_walk, pfns, npages, 0, , _fault); if (fault || write_fault) { @@ -390,8 +384,15 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp, return -EBUSY; } return hmm_pfns_fill(start, end, range, HMM_PFN_NONE); - } else if (!pmd_present(pmd)) + } + + if (!pmd_present(pmd)) { + hmm_range_need_fault(hmm_vma_walk, pfns, npages, 0, , +_fault); + if (fault || write_fault) + return -EFAULT; return hmm_pfns_fill(start, end, range, HMM_PFN_ERROR); + } if (pmd_devmap(pmd) || pmd_trans_huge(pmd)) { /* @@ -408,8 +409,7 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp, if (!pmd_devmap(pmd) && !pmd_trans_huge(pmd)) goto again; - i = (addr - range->start) >> PAGE_SHIFT; - return hmm_vma_handle_pmd(walk, addr, end, [i], pmd); + return hmm_vma_handle_pmd(walk, addr, end, pfns, pmd); } /* @@ -418,15 +418,19 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp, * entry pointing to pte directory or it is a bad pmd that will not * recover. */ - if (pmd_bad(pmd)) + if (pmd_bad(pmd)) { + hmm_range_need_fault(hmm_vma_walk, pfns, npages, 0, , +_fault); + if (fault || write_fault) + return -EFAULT; return hmm_pfns_fill(start, end, range, HMM_PFN_ERROR); + } ptep = pte_offset_map(pmdp, addr); - i = (addr - range->start) >> PAGE_SHIFT; - for (; addr < end; addr += PAGE_SIZE, ptep++, i++) { + for (; addr < end; addr += PAGE_SIZE, ptep++, pfns++) { int r; - r = hmm_vma_handle_pte(walk, addr, end, pmdp, ptep, [i]); + r = hmm_vma_handle_pte(walk, addr, end, pmdp, ptep, pfns); if (r) { /* hmm_vma_handle_pte() did pte_unmap() */ hmm_vma_walk->last = addr; -- 2.25.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx