Re: [PATCH RFC 10/19] iommu/amd: Add unmap_read_dirty() support
On 5/31/22 13:39, Suravee Suthikulpanit wrote: > On 4/29/22 4:09 AM, Joao Martins wrote: >> AMD implementation of unmap_read_dirty() is pretty simple as >> mostly reuses unmap code with the extra addition of marshalling >> the dirty bit into the bitmap as it walks the to-be-unmapped >> IOPTE. >> >> Extra care is taken though, to switch over to cmpxchg as opposed >> to a non-serialized store to the PTE and testing the dirty bit >> only set until cmpxchg succeeds to set to 0. >> >> Signed-off-by: Joao Martins >> --- >> drivers/iommu/amd/io_pgtable.c | 44 +- >> drivers/iommu/amd/iommu.c | 22 + >> 2 files changed, 60 insertions(+), 6 deletions(-) >> >> diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c >> index 8325ef193093..1868c3b58e6d 100644 >> --- a/drivers/iommu/amd/io_pgtable.c >> +++ b/drivers/iommu/amd/io_pgtable.c >> @@ -355,6 +355,16 @@ static void free_clear_pte(u64 *pte, u64 pteval, struct >> list_head *freelist) >> free_sub_pt(pt, mode, freelist); >> } >> >> +static bool free_pte_dirty(u64 *pte, u64 pteval) > > Nitpick: Since we free and clearing the dirty bit, should we change > the function name to free_clear_pte_dirty()? > We free and *read* the dirty bit. It just so happens that we clear dirty bit and every other one in the process. Just to make sure that I am not clear the dirty bit explicitly (like the read_and_clear_dirty()) >> +{ >> +bool dirty = false; >> + >> +while (IOMMU_PTE_DIRTY(cmpxchg64(pte, pteval, 0))) > > We should use 0ULL instead of 0. > ack. >> +dirty = true; >> + >> +return dirty; >> +} >> + > > Actually, what do you think if we enhance the current free_clear_pte() > to also handle the check dirty as well? > See further below, about dropping this patch. >> /* >>* Generic mapping functions. It maps a physical address into a DMA >>* address space. It allocates the page table pages if necessary. >> @@ -428,10 +438,11 @@ static int iommu_v1_map_page(struct io_pgtable_ops >> *ops, unsigned long iova, >> return ret; >> } >> >> -static unsigned long iommu_v1_unmap_page(struct io_pgtable_ops *ops, >> - unsigned long iova, >> - size_t size, >> - struct iommu_iotlb_gather *gather) >> +static unsigned long __iommu_v1_unmap_page(struct io_pgtable_ops *ops, >> + unsigned long iova, >> + size_t size, >> + struct iommu_iotlb_gather *gather, >> + struct iommu_dirty_bitmap *dirty) >> { >> struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops); >> unsigned long long unmapped; >> @@ -445,11 +456,15 @@ static unsigned long iommu_v1_unmap_page(struct >> io_pgtable_ops *ops, >> while (unmapped < size) { >> pte = fetch_pte(pgtable, iova, _size); >> if (pte) { >> -int i, count; >> +unsigned long i, count; >> +bool pte_dirty = false; >> >> count = PAGE_SIZE_PTE_COUNT(unmap_size); >> for (i = 0; i < count; i++) >> -pte[i] = 0ULL; >> +pte_dirty |= free_pte_dirty([i], pte[i]); >> + > > Actually, what if we change the existing free_clear_pte() to > free_and_clear_dirty_pte(), > and incorporate the logic for > Likewise, but otherwise it would be a good idea. >> ... >> diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c >> index 0a86392b2367..a8fcb6e9a684 100644 >> --- a/drivers/iommu/amd/iommu.c >> +++ b/drivers/iommu/amd/iommu.c >> @@ -2144,6 +2144,27 @@ static size_t amd_iommu_unmap(struct iommu_domain >> *dom, unsigned long iova, >> return r; >> } >> >> +static size_t amd_iommu_unmap_read_dirty(struct iommu_domain *dom, >> + unsigned long iova, size_t page_size, >> + struct iommu_iotlb_gather *gather, >> + struct iommu_dirty_bitmap *dirty) >> +{ >> +struct protection_domain *domain = to_pdomain(dom); >> +struct io_pgtable_ops *ops = >iop.iop.ops; >> +size_t r; >> + >> +if ((amd_iommu_pgtable == AMD_IOMMU_V1) && >> +(domain->iop.mode == PAGE_MODE_NONE)) >> +return 0; >> + >> +r = (ops->unmap_read_dirty) ? >> +ops->unmap_read_dirty(ops, iova, page_size, gather, dirty) : 0; >> + >> +amd_iommu_iotlb_gather_add_page(dom, gather, iova, page_size); >> + >> +return r; >> +} >> + > > Instead of creating a new function, what if we enhance the current > amd_iommu_unmap() > to also handle read dirty part as well (e.g. __amd_iommu_unmap_read_dirty()), > and > then both amd_iommu_unmap() and
Re: [PATCH RFC 10/19] iommu/amd: Add unmap_read_dirty() support
On 4/29/22 4:09 AM, Joao Martins wrote: AMD implementation of unmap_read_dirty() is pretty simple as mostly reuses unmap code with the extra addition of marshalling the dirty bit into the bitmap as it walks the to-be-unmapped IOPTE. Extra care is taken though, to switch over to cmpxchg as opposed to a non-serialized store to the PTE and testing the dirty bit only set until cmpxchg succeeds to set to 0. Signed-off-by: Joao Martins --- drivers/iommu/amd/io_pgtable.c | 44 +- drivers/iommu/amd/iommu.c | 22 + 2 files changed, 60 insertions(+), 6 deletions(-) diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c index 8325ef193093..1868c3b58e6d 100644 --- a/drivers/iommu/amd/io_pgtable.c +++ b/drivers/iommu/amd/io_pgtable.c @@ -355,6 +355,16 @@ static void free_clear_pte(u64 *pte, u64 pteval, struct list_head *freelist) free_sub_pt(pt, mode, freelist); } +static bool free_pte_dirty(u64 *pte, u64 pteval) Nitpick: Since we free and clearing the dirty bit, should we change the function name to free_clear_pte_dirty()? +{ + bool dirty = false; + + while (IOMMU_PTE_DIRTY(cmpxchg64(pte, pteval, 0))) We should use 0ULL instead of 0. + dirty = true; + + return dirty; +} + Actually, what do you think if we enhance the current free_clear_pte() to also handle the check dirty as well? /* * Generic mapping functions. It maps a physical address into a DMA * address space. It allocates the page table pages if necessary. @@ -428,10 +438,11 @@ static int iommu_v1_map_page(struct io_pgtable_ops *ops, unsigned long iova, return ret; } -static unsigned long iommu_v1_unmap_page(struct io_pgtable_ops *ops, - unsigned long iova, - size_t size, - struct iommu_iotlb_gather *gather) +static unsigned long __iommu_v1_unmap_page(struct io_pgtable_ops *ops, + unsigned long iova, + size_t size, + struct iommu_iotlb_gather *gather, + struct iommu_dirty_bitmap *dirty) { struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops); unsigned long long unmapped; @@ -445,11 +456,15 @@ static unsigned long iommu_v1_unmap_page(struct io_pgtable_ops *ops, while (unmapped < size) { pte = fetch_pte(pgtable, iova, _size); if (pte) { - int i, count; + unsigned long i, count; + bool pte_dirty = false; count = PAGE_SIZE_PTE_COUNT(unmap_size); for (i = 0; i < count; i++) - pte[i] = 0ULL; + pte_dirty |= free_pte_dirty([i], pte[i]); + Actually, what if we change the existing free_clear_pte() to free_and_clear_dirty_pte(), and incorporate the logic for ... diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 0a86392b2367..a8fcb6e9a684 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -2144,6 +2144,27 @@ static size_t amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova, return r; } +static size_t amd_iommu_unmap_read_dirty(struct iommu_domain *dom, +unsigned long iova, size_t page_size, +struct iommu_iotlb_gather *gather, +struct iommu_dirty_bitmap *dirty) +{ + struct protection_domain *domain = to_pdomain(dom); + struct io_pgtable_ops *ops = >iop.iop.ops; + size_t r; + + if ((amd_iommu_pgtable == AMD_IOMMU_V1) && + (domain->iop.mode == PAGE_MODE_NONE)) + return 0; + + r = (ops->unmap_read_dirty) ? + ops->unmap_read_dirty(ops, iova, page_size, gather, dirty) : 0; + + amd_iommu_iotlb_gather_add_page(dom, gather, iova, page_size); + + return r; +} + Instead of creating a new function, what if we enhance the current amd_iommu_unmap() to also handle read dirty part as well (e.g. __amd_iommu_unmap_read_dirty()), and then both amd_iommu_unmap() and amd_iommu_unmap_read_dirty() can call the __amd_iommu_unmap_read_dirty()? Best Regards, Suravee ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH RFC 10/19] iommu/amd: Add unmap_read_dirty() support
AMD implementation of unmap_read_dirty() is pretty simple as mostly reuses unmap code with the extra addition of marshalling the dirty bit into the bitmap as it walks the to-be-unmapped IOPTE. Extra care is taken though, to switch over to cmpxchg as opposed to a non-serialized store to the PTE and testing the dirty bit only set until cmpxchg succeeds to set to 0. Signed-off-by: Joao Martins --- drivers/iommu/amd/io_pgtable.c | 44 +- drivers/iommu/amd/iommu.c | 22 + 2 files changed, 60 insertions(+), 6 deletions(-) diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c index 8325ef193093..1868c3b58e6d 100644 --- a/drivers/iommu/amd/io_pgtable.c +++ b/drivers/iommu/amd/io_pgtable.c @@ -355,6 +355,16 @@ static void free_clear_pte(u64 *pte, u64 pteval, struct list_head *freelist) free_sub_pt(pt, mode, freelist); } +static bool free_pte_dirty(u64 *pte, u64 pteval) +{ + bool dirty = false; + + while (IOMMU_PTE_DIRTY(cmpxchg64(pte, pteval, 0))) + dirty = true; + + return dirty; +} + /* * Generic mapping functions. It maps a physical address into a DMA * address space. It allocates the page table pages if necessary. @@ -428,10 +438,11 @@ static int iommu_v1_map_page(struct io_pgtable_ops *ops, unsigned long iova, return ret; } -static unsigned long iommu_v1_unmap_page(struct io_pgtable_ops *ops, - unsigned long iova, - size_t size, - struct iommu_iotlb_gather *gather) +static unsigned long __iommu_v1_unmap_page(struct io_pgtable_ops *ops, + unsigned long iova, + size_t size, + struct iommu_iotlb_gather *gather, + struct iommu_dirty_bitmap *dirty) { struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops); unsigned long long unmapped; @@ -445,11 +456,15 @@ static unsigned long iommu_v1_unmap_page(struct io_pgtable_ops *ops, while (unmapped < size) { pte = fetch_pte(pgtable, iova, _size); if (pte) { - int i, count; + unsigned long i, count; + bool pte_dirty = false; count = PAGE_SIZE_PTE_COUNT(unmap_size); for (i = 0; i < count; i++) - pte[i] = 0ULL; + pte_dirty |= free_pte_dirty([i], pte[i]); + + if (unlikely(pte_dirty && dirty)) + iommu_dirty_bitmap_record(dirty, iova, unmap_size); } iova = (iova & ~(unmap_size - 1)) + unmap_size; @@ -461,6 +476,22 @@ static unsigned long iommu_v1_unmap_page(struct io_pgtable_ops *ops, return unmapped; } +static unsigned long iommu_v1_unmap_page(struct io_pgtable_ops *ops, +unsigned long iova, +size_t size, +struct iommu_iotlb_gather *gather) +{ + return __iommu_v1_unmap_page(ops, iova, size, gather, NULL); +} + +static unsigned long iommu_v1_unmap_page_read_dirty(struct io_pgtable_ops *ops, + unsigned long iova, size_t size, + struct iommu_iotlb_gather *gather, + struct iommu_dirty_bitmap *dirty) +{ + return __iommu_v1_unmap_page(ops, iova, size, gather, dirty); +} + static phys_addr_t iommu_v1_iova_to_phys(struct io_pgtable_ops *ops, unsigned long iova) { struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops); @@ -575,6 +606,7 @@ static struct io_pgtable *v1_alloc_pgtable(struct io_pgtable_cfg *cfg, void *coo pgtable->iop.ops.unmap= iommu_v1_unmap_page; pgtable->iop.ops.iova_to_phys = iommu_v1_iova_to_phys; pgtable->iop.ops.read_and_clear_dirty = iommu_v1_read_and_clear_dirty; + pgtable->iop.ops.unmap_read_dirty = iommu_v1_unmap_page_read_dirty; return >iop; } diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 0a86392b2367..a8fcb6e9a684 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -2144,6 +2144,27 @@ static size_t amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova, return r; } +static size_t amd_iommu_unmap_read_dirty(struct iommu_domain *dom, +unsigned long iova, size_t page_size, +struct iommu_iotlb_gather *gather, +struct iommu_dirty_bitmap *dirty) +{ + struct protection_domain *domain = to_pdomain(dom); + struct io_pgtable_ops *ops = >iop.iop.ops; +