On Tue, Dec 16, 2025 at 09:10:14PM +0100, Francois Dugast wrote:
> This enables support for Transparent Huge Pages (THP) for device pages by
> using MIGRATE_VMA_SELECT_COMPOUND during migration. It removes the need to
> split folios and loop multiple times over all pages to perform required
> operations at page level. Instead, we rely on newly introduced support for
> higher orders in drm_pagemap and folio-level API.
>
> In Xe, this drastically improves performance when using SVM. The GT stats
> below collected after a 2MB page fault show overall servicing is more than
> 7 times faster, and thanks to reduced CPU overhead the time spent on the
> actual copy goes from 23% without THP to 80% with THP:
>
> Without THP:
>
> svm_2M_pagefault_us: 966
> svm_2M_migrate_us: 942
> svm_2M_device_copy_us: 223
> svm_2M_get_pages_us: 9
> svm_2M_bind_us: 10
>
> With THP:
>
> svm_2M_pagefault_us: 132
> svm_2M_migrate_us: 128
> svm_2M_device_copy_us: 106
> svm_2M_get_pages_us: 1
> svm_2M_bind_us: 2
>
> Cc: Matthew Brost <[email protected]>
> Cc: Thomas Hellström <[email protected]>
> Cc: Michal Mrozek <[email protected]>
> Signed-off-by: Francois Dugast <[email protected]>
> ---
> drivers/gpu/drm/drm_pagemap.c | 88 +++++++++++++++++++++++++++++------
> drivers/gpu/drm/xe/xe_svm.c | 5 ++
> include/drm/drm_pagemap.h | 5 +-
> 3 files changed, 83 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/gpu/drm/drm_pagemap.c b/drivers/gpu/drm/drm_pagemap.c
> index b71e47136112..797ec2094fdf 100644
> --- a/drivers/gpu/drm/drm_pagemap.c
> +++ b/drivers/gpu/drm/drm_pagemap.c
> @@ -211,16 +211,20 @@ void *drm_pagemap_page_zone_device_data(struct page
> *page)
> /**
> * drm_pagemap_get_devmem_page() - Get a reference to a device memory page
> * @page: Pointer to the page
> + * @order: Order
> * @zdd: Pointer to the GPU SVM zone device data
> *
> * This function associates the given page with the specified GPU SVM zone
> * device data and initializes it for zone device usage.
> */
> static void drm_pagemap_get_devmem_page(struct page *page,
> + unsigned int order,
> struct drm_pagemap_zdd *zdd)
> {
> - page->zone_device_data = drm_pagemap_zdd_get(zdd);
> - zone_device_page_init(page, 0);
> + struct folio *folio = page_folio(page);
> +
> + folio_set_zone_device_data(folio, drm_pagemap_zdd_get(zdd));
> + zone_device_page_init(page, order);
> }
>
> /**
> @@ -345,11 +349,13 @@ int drm_pagemap_migrate_to_devmem(struct
> drm_pagemap_devmem *devmem_allocation,
> void *pgmap_owner)
> {
> const struct drm_pagemap_devmem_ops *ops = devmem_allocation->ops;
> + struct drm_pagemap *dpagemap = devmem_allocation->dpagemap;
> struct migrate_vma migrate = {
> .start = start,
> .end = end,
> .pgmap_owner = pgmap_owner,
> - .flags = MIGRATE_VMA_SELECT_SYSTEM,
> + .flags = MIGRATE_VMA_SELECT_SYSTEM
> + | MIGRATE_VMA_SELECT_COMPOUND,
> };
> unsigned long i, npages = npages_in_range(start, end);
> struct vm_area_struct *vas;
> @@ -409,11 +415,6 @@ int drm_pagemap_migrate_to_devmem(struct
> drm_pagemap_devmem *devmem_allocation,
> goto err_free;
> }
>
> - if (migrate.cpages != npages) {
I don't we want to blindly delete this. I believe if the original check
fails, call a subsequent function to calculate cpages based on the pages
in returned migrate.src, if that doesn't match npages bail out.
> - err = -EBUSY;
> - goto err_finalize;
> - }
> -
> err = ops->populate_devmem_pfn(devmem_allocation, npages, migrate.dst);
> if (err)
> goto err_finalize;
> @@ -424,13 +425,38 @@ int drm_pagemap_migrate_to_devmem(struct
> drm_pagemap_devmem *devmem_allocation,
> if (err)
> goto err_finalize;
>
> - for (i = 0; i < npages; ++i) {
> + mutex_lock(&dpagemap->folio_split_lock);
> + for (i = 0; i < npages;) {
> + unsigned long j;
> struct page *page = pfn_to_page(migrate.dst[i]);
> + unsigned int order;
>
> pages[i] = page;
> migrate.dst[i] = migrate_pfn(migrate.dst[i]);
> - drm_pagemap_get_devmem_page(page, zdd);
> +
> + if (migrate.src[i] & MIGRATE_PFN_COMPOUND) {
> + order = HPAGE_PMD_ORDER;
> +
> + migrate.dst[i] |= MIGRATE_PFN_COMPOUND;
> +
> + drm_pagemap_get_devmem_page(page, order, zdd);
> +
> + for (j = 1; j < NR_PAGES(order) && i + j < npages; j++)
> + migrate.dst[i + j] = 0;
> +
> + } else {
> + order = 0;
> +
> + if (folio_order(page_folio(page)))
> + migrate_device_split_page(page);
> +
> + zone_device_page_init(page, 0);
> + page->zone_device_data = drm_pagemap_zdd_get(zdd);
drm_pagemap_get_devmem_page(page, order, zdd); ?
If so, this part could be moved outside out the if/else clause.
Matt
> + }
> +
> + i += NR_PAGES(order);
> }
> + mutex_unlock(&dpagemap->folio_split_lock);
>
> err = ops->copy_to_devmem(pages, pagemap_addr, npages);
> if (err)
> @@ -516,6 +542,8 @@ static int drm_pagemap_migrate_populate_ram_pfn(struct
> vm_area_struct *vas,
> page = folio_page(folio, 0);
> mpfn[i] = migrate_pfn(page_to_pfn(page));
>
> + if (order)
> + mpfn[i] |= MIGRATE_PFN_COMPOUND;
> next:
> if (page)
> addr += page_size(page);
> @@ -617,8 +645,15 @@ int drm_pagemap_evict_to_ram(struct drm_pagemap_devmem
> *devmem_allocation)
> if (err)
> goto err_finalize;
>
> - for (i = 0; i < npages; ++i)
> + for (i = 0; i < npages;) {
> + unsigned int order = 0;
> +
> pages[i] = migrate_pfn_to_page(src[i]);
> + if (pages[i])
> + order = folio_order(page_folio(pages[i]));
> +
> + i += NR_PAGES(order);
> + }
>
> err = ops->copy_to_ram(pages, pagemap_addr, npages);
> if (err)
> @@ -671,8 +706,9 @@ static int __drm_pagemap_migrate_to_ram(struct
> vm_area_struct *vas,
> struct migrate_vma migrate = {
> .vma = vas,
> .pgmap_owner = device_private_page_owner,
> - .flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE |
> - MIGRATE_VMA_SELECT_DEVICE_COHERENT,
> + .flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE
> + | MIGRATE_VMA_SELECT_DEVICE_COHERENT
> + | MIGRATE_VMA_SELECT_COMPOUND,
> .fault_page = page,
> };
> struct drm_pagemap_zdd *zdd;
> @@ -753,8 +789,15 @@ static int __drm_pagemap_migrate_to_ram(struct
> vm_area_struct *vas,
> if (err)
> goto err_finalize;
>
> - for (i = 0; i < npages; ++i)
> + for (i = 0; i < npages;) {
> + unsigned int order = 0;
> +
> pages[i] = migrate_pfn_to_page(migrate.src[i]);
> + if (pages[i])
> + order = folio_order(page_folio(pages[i]));
> +
> + i += NR_PAGES(order);
> + }
>
> err = ops->copy_to_ram(pages, pagemap_addr, npages);
> if (err)
> @@ -813,9 +856,26 @@ static vm_fault_t drm_pagemap_migrate_to_ram(struct
> vm_fault *vmf)
> return err ? VM_FAULT_SIGBUS : 0;
> }
>
> +static void drm_pagemap_folio_split(struct folio *orig_folio, struct folio
> *new_folio)
> +{
> + struct drm_pagemap_zdd *zdd;
> +
> + if (!new_folio)
> + return;
> +
> + new_folio->pgmap = orig_folio->pgmap;
> + zdd = folio_zone_device_data(orig_folio);
> + if (folio_order(new_folio))
> + folio_set_zone_device_data(new_folio, drm_pagemap_zdd_get(zdd));
> + else
> + folio_page(new_folio, 0)->zone_device_data =
> + drm_pagemap_zdd_get(zdd);
> +}
> +
> static const struct dev_pagemap_ops drm_pagemap_pagemap_ops = {
> .folio_free = drm_pagemap_folio_free,
> .migrate_to_ram = drm_pagemap_migrate_to_ram,
> + .folio_split = drm_pagemap_folio_split,
> };
>
> /**
> diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c
> index 93550c7c84ac..037c77de2757 100644
> --- a/drivers/gpu/drm/xe/xe_svm.c
> +++ b/drivers/gpu/drm/xe/xe_svm.c
> @@ -4,6 +4,7 @@
> */
>
> #include <drm/drm_drv.h>
> +#include <drm/drm_managed.h>
>
> #include "xe_bo.h"
> #include "xe_exec_queue_types.h"
> @@ -1470,6 +1471,10 @@ int xe_devm_add(struct xe_tile *tile, struct
> xe_vram_region *vr)
> void *addr;
> int ret;
>
> + ret = drmm_mutex_init(&tile->xe->drm, &vr->dpagemap.folio_split_lock);
> + if (ret)
> + return ret;
> +
> res = devm_request_free_mem_region(dev, &iomem_resource,
> vr->usable_size);
> if (IS_ERR(res)) {
> diff --git a/include/drm/drm_pagemap.h b/include/drm/drm_pagemap.h
> index 3a8d0e1cef43..82b9c0e6392e 100644
> --- a/include/drm/drm_pagemap.h
> +++ b/include/drm/drm_pagemap.h
> @@ -129,11 +129,14 @@ struct drm_pagemap_ops {
> * struct drm_pagemap: Additional information for a struct dev_pagemap
> * used for device p2p handshaking.
> * @ops: The struct drm_pagemap_ops.
> - * @dev: The struct drevice owning the device-private memory.
> + * @dev: The struct device owning the device-private memory.
> + * @folio_split_lock: Lock to protect device folio splitting.
> */
> struct drm_pagemap {
> const struct drm_pagemap_ops *ops;
> struct device *dev;
> + /* Protect device folio splitting */
> + struct mutex folio_split_lock;
> };
>
> struct drm_pagemap_devmem;
> --
> 2.43.0
>