On Fri, Jan 09, 2026 at 09:54:27AM +0100, Francois Dugast wrote:
> This enables support for Transparent Huge Pages (THP) for device pages by
> using MIGRATE_VMA_SELECT_COMPOUND during migration. It removes the need to
> split folios and loop multiple times over all pages to perform required
> operations at page level. Instead, we rely on newly introduced support for
> higher orders in drm_pagemap and folio-level API.
> 
> In Xe, this drastically improves performance when using SVM. The GT stats
> below collected after a 2MB page fault show overall servicing is more than
> 7 times faster, and thanks to reduced CPU overhead the time spent on the
> actual copy goes from 23% without THP to 80% with THP:
> 
> Without THP:
> 
>     svm_2M_pagefault_us: 966
>     svm_2M_migrate_us: 942
>     svm_2M_device_copy_us: 223
>     svm_2M_get_pages_us: 9
>     svm_2M_bind_us: 10
> 
> With THP:
> 
>     svm_2M_pagefault_us: 132
>     svm_2M_migrate_us: 128
>     svm_2M_device_copy_us: 106
>     svm_2M_get_pages_us: 1
>     svm_2M_bind_us: 2
> 
> v2:
> - Fix one occurrence of drm_pagemap_get_devmem_page() (Matthew Brost)
> 
> v3:
> - Remove migrate_device_split_page() and folio_split_lock, instead rely on
>   free_zone_device_folio() to split folios before freeing (Matthew Brost)
> - Assert folio order is HPAGE_PMD_ORDER (Matthew Brost)
> - Always use folio_set_zone_device_data() in split (Matthew Brost)
> 
> Cc: Matthew Brost <[email protected]>
> Cc: Thomas Hellström <[email protected]>
> Cc: Michal Mrozek <[email protected]>
> Signed-off-by: Francois Dugast <[email protected]>
> ---
>  drivers/gpu/drm/drm_pagemap.c | 69 +++++++++++++++++++++++++++++++----
>  1 file changed, 61 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/gpu/drm/drm_pagemap.c b/drivers/gpu/drm/drm_pagemap.c
> index 121234cef38c..5b89140edb8e 100644
> --- a/drivers/gpu/drm/drm_pagemap.c
> +++ b/drivers/gpu/drm/drm_pagemap.c
> @@ -200,16 +200,20 @@ static void 
> drm_pagemap_migration_unlock_put_pages(unsigned long npages,
>  /**
>   * drm_pagemap_get_devmem_page() - Get a reference to a device memory page
>   * @page: Pointer to the page
> + * @order: Order
>   * @zdd: Pointer to the GPU SVM zone device data
>   *
>   * This function associates the given page with the specified GPU SVM zone
>   * device data and initializes it for zone device usage.
>   */
>  static void drm_pagemap_get_devmem_page(struct page *page,
> +                                     unsigned int order,
>                                       struct drm_pagemap_zdd *zdd)
>  {
> -     page->zone_device_data = drm_pagemap_zdd_get(zdd);
> -     zone_device_page_init(page, 0);
> +     struct folio *folio = page_folio(page);
> +
> +     folio_set_zone_device_data(folio, drm_pagemap_zdd_get(zdd));
> +     zone_device_page_init(page, order);
>  }
>  
>  /**
> @@ -534,7 +538,8 @@ int drm_pagemap_migrate_to_devmem(struct 
> drm_pagemap_devmem *devmem_allocation,
>                * rare and only occur when the madvise attributes of memory are
>                * changed or atomics are being used.
>                */
> -             .flags          = MIGRATE_VMA_SELECT_SYSTEM | 
> MIGRATE_VMA_SELECT_DEVICE_COHERENT,
> +             .flags          = MIGRATE_VMA_SELECT_SYSTEM | 
> MIGRATE_VMA_SELECT_DEVICE_COHERENT |
> +                               MIGRATE_VMA_SELECT_COMPOUND,
>       };
>       unsigned long i, npages = npages_in_range(start, end);
>       unsigned long own_pages = 0, migrated_pages = 0;
> @@ -640,10 +645,12 @@ int drm_pagemap_migrate_to_devmem(struct 
> drm_pagemap_devmem *devmem_allocation,
>  
>       own_pages = 0;
>  
> -     for (i = 0; i < npages; ++i) {
> +     for (i = 0; i < npages;) {
> +             unsigned long j;
>               struct page *page = pfn_to_page(migrate.dst[i]);
>               struct page *src_page = migrate_pfn_to_page(migrate.src[i]);
>               cur.start = i;
> +             unsigned int order = 0;

Move 'cur.start = i;' after order variable.

How about this warning too? As we have agreed device folios should be
split in the core MM upon freeing.

drm_WARN_ONCE(dpagemap->drm, folio_order(page_folio(page)),
              "Unexpected compound device page found\n");

>  
>               pages[i] = NULL;
>               if (src_page && is_device_private_page(src_page)) {
> @@ -670,7 +677,20 @@ int drm_pagemap_migrate_to_devmem(struct 
> drm_pagemap_devmem *devmem_allocation,
>                       pages[i] = page;
>               }
>               migrate.dst[i] = migrate_pfn(migrate.dst[i]);

You can't see this in this diff but above here has a 'continue'
statement. With increment being removed from the loop this could just
loop forever. So s/continue/goto next/ or increment 'i' before the
continue.

> -             drm_pagemap_get_devmem_page(page, zdd);
> +
> +             if (migrate.src[i] & MIGRATE_PFN_COMPOUND) {
> +                     drm_WARN_ONCE(dpagemap->drm, src_page &&
> +                                   folio_order(page_folio(src_page)) != 
> HPAGE_PMD_ORDER,
> +                                   "Unexpected folio order\n");
> +
> +                     order = HPAGE_PMD_ORDER;
> +                     migrate.dst[i] |= MIGRATE_PFN_COMPOUND;
> +
> +                     for (j = 1; j < NR_PAGES(order) && i + j < npages; j++)
> +                             migrate.dst[i + j] = 0;
> +             }
> +
> +             drm_pagemap_get_devmem_page(page, order, zdd);
>  
>               /* If we switched the migrating drm_pagemap, migrate previous 
> pages now */
>               err = drm_pagemap_migrate_range(devmem_allocation, migrate.src, 
> migrate.dst,
> @@ -680,7 +700,10 @@ int drm_pagemap_migrate_to_devmem(struct 
> drm_pagemap_devmem *devmem_allocation,
>                       npages = i + 1;
>                       goto err_finalize;
>               }
> +

next:
> +             i += NR_PAGES(order);

Matt

>       }
> +
>       cur.start = npages;
>       cur.ops = NULL; /* Force migration */
>       err = drm_pagemap_migrate_range(devmem_allocation, migrate.src, 
> migrate.dst,
> @@ -789,6 +812,8 @@ static int drm_pagemap_migrate_populate_ram_pfn(struct 
> vm_area_struct *vas,
>               page = folio_page(folio, 0);
>               mpfn[i] = migrate_pfn(page_to_pfn(page));
>  
> +             if (order)
> +                     mpfn[i] |= MIGRATE_PFN_COMPOUND;
>  next:
>               if (page)
>                       addr += page_size(page);
> @@ -1044,8 +1069,15 @@ int drm_pagemap_evict_to_ram(struct drm_pagemap_devmem 
> *devmem_allocation)
>       if (err)
>               goto err_finalize;
>  
> -     for (i = 0; i < npages; ++i)
> +     for (i = 0; i < npages;) {
> +             unsigned int order = 0;
> +
>               pages[i] = migrate_pfn_to_page(src[i]);
> +             if (pages[i])
> +                     order = folio_order(page_folio(pages[i]));
> +
> +             i += NR_PAGES(order);
> +     }
>  
>       err = ops->copy_to_ram(pages, pagemap_addr, npages, NULL);
>       if (err)
> @@ -1098,7 +1130,8 @@ static int __drm_pagemap_migrate_to_ram(struct 
> vm_area_struct *vas,
>               .vma            = vas,
>               .pgmap_owner    = page_pgmap(page)->owner,
>               .flags          = MIGRATE_VMA_SELECT_DEVICE_PRIVATE |
> -             MIGRATE_VMA_SELECT_DEVICE_COHERENT,
> +                               MIGRATE_VMA_SELECT_DEVICE_COHERENT |
> +                               MIGRATE_VMA_SELECT_COMPOUND,
>               .fault_page     = page,
>       };
>       struct drm_pagemap_migrate_details mdetails = {};
> @@ -1164,8 +1197,15 @@ static int __drm_pagemap_migrate_to_ram(struct 
> vm_area_struct *vas,
>       if (err)
>               goto err_finalize;
>  
> -     for (i = 0; i < npages; ++i)
> +     for (i = 0; i < npages;) {
> +             unsigned int order = 0;
> +
>               pages[i] = migrate_pfn_to_page(migrate.src[i]);
> +             if (pages[i])
> +                     order = folio_order(page_folio(pages[i]));
> +
> +             i += NR_PAGES(order);
> +     }
>  
>       err = ops->copy_to_ram(pages, pagemap_addr, npages, NULL);
>       if (err)
> @@ -1223,9 +1263,22 @@ static vm_fault_t drm_pagemap_migrate_to_ram(struct 
> vm_fault *vmf)
>       return err ? VM_FAULT_SIGBUS : 0;
>  }
>  
> +static void drm_pagemap_folio_split(struct folio *orig_folio, struct folio 
> *new_folio)
> +{
> +     struct drm_pagemap_zdd *zdd;
> +
> +     if (!new_folio)
> +             return;
> +
> +     new_folio->pgmap = orig_folio->pgmap;
> +     zdd = folio_zone_device_data(orig_folio);
> +     folio_set_zone_device_data(new_folio, drm_pagemap_zdd_get(zdd));
> +}
> +
>  static const struct dev_pagemap_ops drm_pagemap_pagemap_ops = {
>       .folio_free = drm_pagemap_folio_free,
>       .migrate_to_ram = drm_pagemap_migrate_to_ram,
> +     .folio_split = drm_pagemap_folio_split,
>  };
>  
>  /**
> -- 
> 2.43.0
> 

Reply via email to