Alistair Popple wrote:
> Zone device pages are used to represent various type of device memory
> managed by device drivers. Currently compound zone device pages are
> not supported. This is because MEMORY_DEVICE_FS_DAX pages are the only
> user of higher order zone device pages and have their own page
> reference counting.
> 
> A future change will unify FS DAX reference counting with normal page
> reference counting rules and remove the special FS DAX reference
> counting. Supporting that requires compound zone device pages.
> 
> Supporting compound zone device pages requires compound_head() to
> distinguish between head and tail pages whilst still preserving the
> special struct page fields that are specific to zone device pages.
> 
> A tail page is distinguished by having bit zero being set in
> page->compound_head, with the remaining bits pointing to the head
> page. For zone device pages page->compound_head is shared with
> page->pgmap.
> 
> The page->pgmap field is common to all pages within a memory section.
> Therefore pgmap is the same for both head and tail pages and can be
> moved into the folio and we can use the standard scheme to find
> compound_head from a tail page.
> 
> Signed-off-by: Alistair Popple <apop...@nvidia.com>
> Reviewed-by: Jason Gunthorpe <j...@nvidia.com>
> 
> ---
> 
> Changes since v1:
> 
>  - Move pgmap to the folio as suggested by Matthew Wilcox
> ---
>  drivers/gpu/drm/nouveau/nouveau_dmem.c |  3 ++-
>  drivers/pci/p2pdma.c                   |  6 +++---
>  include/linux/memremap.h               |  6 +++---
>  include/linux/migrate.h                |  4 ++--
>  include/linux/mm_types.h               |  9 +++++++--
>  include/linux/mmzone.h                 |  8 +++++++-
>  lib/test_hmm.c                         |  3 ++-
>  mm/hmm.c                               |  2 +-
>  mm/memory.c                            |  4 +++-
>  mm/memremap.c                          | 14 +++++++-------
>  mm/migrate_device.c                    |  7 +++++--
>  mm/mm_init.c                           |  2 +-
>  12 files changed, 43 insertions(+), 25 deletions(-)
> 
> diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c 
> b/drivers/gpu/drm/nouveau/nouveau_dmem.c
> index 6fb65b0..58d308c 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_dmem.c
> +++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c
> @@ -88,7 +88,8 @@ struct nouveau_dmem {
>  
>  static struct nouveau_dmem_chunk *nouveau_page_to_chunk(struct page *page)
>  {
> -     return container_of(page->pgmap, struct nouveau_dmem_chunk, pagemap);
> +     return container_of(page_dev_pagemap(page), struct nouveau_dmem_chunk,

page_dev_pagemap() feels like a mouthful. I would be ok with
page_pgmap() since that is the most common idenifier for struct
struct dev_pagemap instances.

> +                         pagemap);
>  }
>  
>  static struct nouveau_drm *page_to_drm(struct page *page)
> diff --git a/drivers/pci/p2pdma.c b/drivers/pci/p2pdma.c
> index 210b9f4..a58f2c1 100644
> --- a/drivers/pci/p2pdma.c
> +++ b/drivers/pci/p2pdma.c
> @@ -199,7 +199,7 @@ static const struct attribute_group p2pmem_group = {
>  
>  static void p2pdma_page_free(struct page *page)
>  {
> -     struct pci_p2pdma_pagemap *pgmap = to_p2p_pgmap(page->pgmap);
> +     struct pci_p2pdma_pagemap *pgmap = to_p2p_pgmap(page_dev_pagemap(page));
>       /* safe to dereference while a reference is held to the percpu ref */
>       struct pci_p2pdma *p2pdma =
>               rcu_dereference_protected(pgmap->provider->p2pdma, 1);
> @@ -1022,8 +1022,8 @@ enum pci_p2pdma_map_type
>  pci_p2pdma_map_segment(struct pci_p2pdma_map_state *state, struct device 
> *dev,
>                      struct scatterlist *sg)
>  {
> -     if (state->pgmap != sg_page(sg)->pgmap) {
> -             state->pgmap = sg_page(sg)->pgmap;
> +     if (state->pgmap != page_dev_pagemap(sg_page(sg))) {
> +             state->pgmap = page_dev_pagemap(sg_page(sg));
>               state->map = pci_p2pdma_map_type(state->pgmap, dev);
>               state->bus_off = to_p2p_pgmap(state->pgmap)->bus_offset;
>       }
> diff --git a/include/linux/memremap.h b/include/linux/memremap.h
> index 3f7143a..14273e6 100644
> --- a/include/linux/memremap.h
> +++ b/include/linux/memremap.h
> @@ -161,7 +161,7 @@ static inline bool is_device_private_page(const struct 
> page *page)
>  {
>       return IS_ENABLED(CONFIG_DEVICE_PRIVATE) &&
>               is_zone_device_page(page) &&
> -             page->pgmap->type == MEMORY_DEVICE_PRIVATE;
> +             page_dev_pagemap(page)->type == MEMORY_DEVICE_PRIVATE;
>  }
>  
>  static inline bool folio_is_device_private(const struct folio *folio)
> @@ -173,13 +173,13 @@ static inline bool is_pci_p2pdma_page(const struct page 
> *page)
>  {
>       return IS_ENABLED(CONFIG_PCI_P2PDMA) &&
>               is_zone_device_page(page) &&
> -             page->pgmap->type == MEMORY_DEVICE_PCI_P2PDMA;
> +             page_dev_pagemap(page)->type == MEMORY_DEVICE_PCI_P2PDMA;
>  }
>  
>  static inline bool is_device_coherent_page(const struct page *page)
>  {
>       return is_zone_device_page(page) &&
> -             page->pgmap->type == MEMORY_DEVICE_COHERENT;
> +             page_dev_pagemap(page)->type == MEMORY_DEVICE_COHERENT;
>  }
>  
>  static inline bool folio_is_device_coherent(const struct folio *folio)
> diff --git a/include/linux/migrate.h b/include/linux/migrate.h
> index 002e49b..9a85a82 100644
> --- a/include/linux/migrate.h
> +++ b/include/linux/migrate.h
> @@ -207,8 +207,8 @@ struct migrate_vma {
>       unsigned long           end;
>  
>       /*
> -      * Set to the owner value also stored in page->pgmap->owner for
> -      * migrating out of device private memory. The flags also need to
> +      * Set to the owner value also stored in page_dev_pagemap(page)->owner
> +      * for migrating out of device private memory. The flags also need to
>        * be set to MIGRATE_VMA_SELECT_DEVICE_PRIVATE.
>        * The caller should always set this field when using mmu notifier
>        * callbacks to avoid device MMU invalidations for device private
> diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
> index 6e3bdf8..c2f1d53 100644
> --- a/include/linux/mm_types.h
> +++ b/include/linux/mm_types.h
> @@ -129,8 +129,11 @@ struct page {
>                       unsigned long compound_head;    /* Bit zero is set */
>               };
>               struct {        /* ZONE_DEVICE pages */
> -                     /** @pgmap: Points to the hosting device page map. */
> -                     struct dev_pagemap *pgmap;
> +                     /*
> +                      * The first word is used for compound_head or folio
> +                      * pgmap
> +                      */
> +                     void *_unused;

I would feel better with "_unused_pgmap_compound_head", similar to how
_unused_slab_obj_exts in 'struct foio' indicates the placeholer
contents.

>                       void *zone_device_data;
>                       /*
>                        * ZONE_DEVICE private pages are counted as being
> @@ -299,6 +302,7 @@ typedef struct {
>   * @_refcount: Do not access this member directly.  Use folio_ref_count()
>   *    to find how many references there are to this folio.
>   * @memcg_data: Memory Control Group data.
> + * @pgmap: Metadata for ZONE_DEVICE mappings
>   * @virtual: Virtual address in the kernel direct map.
>   * @_last_cpupid: IDs of last CPU and last process that accessed the folio.
>   * @_entire_mapcount: Do not use directly, call folio_entire_mapcount().
> @@ -337,6 +341,7 @@ struct folio {
>       /* private: */
>                               };
>       /* public: */
> +                     struct dev_pagemap *pgmap;
>                       };
>                       struct address_space *mapping;
>                       pgoff_t index;
> diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
> index 17506e4..e191434 100644
> --- a/include/linux/mmzone.h
> +++ b/include/linux/mmzone.h
> @@ -1134,6 +1134,12 @@ static inline bool is_zone_device_page(const struct 
> page *page)
>       return page_zonenum(page) == ZONE_DEVICE;
>  }
>  
> +static inline struct dev_pagemap *page_dev_pagemap(const struct page *page)
> +{
> +     WARN_ON(!is_zone_device_page(page));

VM_WARN_ON()?

With the above fixups:

Reviewed-by: Dan Williams <dan.j.willi...@intel.com>

Reply via email to