Re: [PATCH v5 01/10] mm: add zone device coherent type memory support

2022-01-31 Thread Alistair Popple
Looks good, feel free to add:

Reviewed-by: Alistair Popple 

On Saturday, 29 January 2022 7:08:16 AM AEDT Alex Sierra wrote:
> Device memory that is cache coherent from device and CPU point of view.
> This is used on platforms that have an advanced system bus (like CAPI
> or CXL). Any page of a process can be migrated to such memory. However,
> no one should be allowed to pin such memory so that it can always be
> evicted.
> 
> Signed-off-by: Alex Sierra 
> Acked-by: Felix Kuehling 
> ---
> v4:
> - use the same system entry path for coherent device pages at
> migrate_vma_insert_page.
> 
> - Add coherent device type support for try_to_migrate /
> try_to_migrate_one.
> ---
>  include/linux/memremap.h |  8 +++
>  include/linux/mm.h   | 16 ++
>  mm/memcontrol.c  |  6 +++---
>  mm/memory-failure.c  |  8 +--
>  mm/memremap.c| 14 -
>  mm/migrate.c | 45 
>  mm/rmap.c|  5 +++--
>  7 files changed, 71 insertions(+), 31 deletions(-)
> 
> diff --git a/include/linux/memremap.h b/include/linux/memremap.h
> index 1fafcc38acba..727b8c789193 100644
> --- a/include/linux/memremap.h
> +++ b/include/linux/memremap.h
> @@ -39,6 +39,13 @@ struct vmem_altmap {
>   * A more complete discussion of unaddressable memory may be found in
>   * include/linux/hmm.h and Documentation/vm/hmm.rst.
>   *
> + * MEMORY_DEVICE_COHERENT:
> + * Device memory that is cache coherent from device and CPU point of view. 
> This
> + * is used on platforms that have an advanced system bus (like CAPI or CXL). 
> A
> + * driver can hotplug the device memory using ZONE_DEVICE and with that 
> memory
> + * type. Any page of a process can be migrated to such memory. However no one
> + * should be allowed to pin such memory so that it can always be evicted.
> + *
>   * MEMORY_DEVICE_FS_DAX:
>   * Host memory that has similar access semantics as System RAM i.e. DMA
>   * coherent and supports page pinning. In support of coordinating page
> @@ -59,6 +66,7 @@ struct vmem_altmap {
>  enum memory_type {
>   /* 0 is reserved to catch uninitialized type fields */
>   MEMORY_DEVICE_PRIVATE = 1,
> + MEMORY_DEVICE_COHERENT,
>   MEMORY_DEVICE_FS_DAX,
>   MEMORY_DEVICE_GENERIC,
>   MEMORY_DEVICE_PCI_P2PDMA,
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index e1a84b1e6787..0c61bf40edef 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -1106,6 +1106,7 @@ static inline bool page_is_devmap_managed(struct page 
> *page)
>   return false;
>   switch (page->pgmap->type) {
>   case MEMORY_DEVICE_PRIVATE:
> + case MEMORY_DEVICE_COHERENT:
>   case MEMORY_DEVICE_FS_DAX:
>   return true;
>   default:
> @@ -1135,6 +1136,21 @@ static inline bool is_device_private_page(const struct 
> page *page)
>   page->pgmap->type == MEMORY_DEVICE_PRIVATE;
>  }
>  
> +static inline bool is_device_coherent_page(const struct page *page)
> +{
> + return IS_ENABLED(CONFIG_DEV_PAGEMAP_OPS) &&
> + is_zone_device_page(page) &&
> + page->pgmap->type == MEMORY_DEVICE_COHERENT;
> +}
> +
> +static inline bool is_dev_private_or_coherent_page(const struct page *page)
> +{
> + return IS_ENABLED(CONFIG_DEV_PAGEMAP_OPS) &&
> + is_zone_device_page(page) &&
> + (page->pgmap->type == MEMORY_DEVICE_PRIVATE ||
> + page->pgmap->type == MEMORY_DEVICE_COHERENT);
> +}
> +
>  static inline bool is_pci_p2pdma_page(const struct page *page)
>  {
>   return IS_ENABLED(CONFIG_DEV_PAGEMAP_OPS) &&
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 09d342c7cbd0..0882b5b2a857 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -5691,8 +5691,8 @@ static int mem_cgroup_move_account(struct page *page,
>   *   2(MC_TARGET_SWAP): if the swap entry corresponding to this pte is a
>   * target for charge migration. if @target is not NULL, the entry is 
> stored
>   * in target->ent.
> - *   3(MC_TARGET_DEVICE): like MC_TARGET_PAGE  but page is 
> MEMORY_DEVICE_PRIVATE
> - * (so ZONE_DEVICE page and thus not on the lru).
> + *   3(MC_TARGET_DEVICE): like MC_TARGET_PAGE  but page is device memory and
> + *   thus not on the lru.
>   * For now we such page is charge like a regular page would be as for all
>   * intent and purposes it is just special memory taking the place of a
>   * regular page.
> @@ -5726,7 +5726,7 @@ static enum mc_target_type get_mctgt_type(struct 
> vm_area_struct *vma,
>*/
>   if (page_memcg(page) == mc.from) {
>   ret = MC_TARGET_PAGE;
> - if (is_device_private_page(page))
> + if (is_dev_private_or_coherent_page(page))
>   ret = MC_TARGET_DEVICE;
>   if (target)
>   target->page = page;
> diff --git 

[PATCH v5 01/10] mm: add zone device coherent type memory support

2022-01-28 Thread Alex Sierra
Device memory that is cache coherent from device and CPU point of view.
This is used on platforms that have an advanced system bus (like CAPI
or CXL). Any page of a process can be migrated to such memory. However,
no one should be allowed to pin such memory so that it can always be
evicted.

Signed-off-by: Alex Sierra 
Acked-by: Felix Kuehling 
---
v4:
- use the same system entry path for coherent device pages at
migrate_vma_insert_page.

- Add coherent device type support for try_to_migrate /
try_to_migrate_one.
---
 include/linux/memremap.h |  8 +++
 include/linux/mm.h   | 16 ++
 mm/memcontrol.c  |  6 +++---
 mm/memory-failure.c  |  8 +--
 mm/memremap.c| 14 -
 mm/migrate.c | 45 
 mm/rmap.c|  5 +++--
 7 files changed, 71 insertions(+), 31 deletions(-)

diff --git a/include/linux/memremap.h b/include/linux/memremap.h
index 1fafcc38acba..727b8c789193 100644
--- a/include/linux/memremap.h
+++ b/include/linux/memremap.h
@@ -39,6 +39,13 @@ struct vmem_altmap {
  * A more complete discussion of unaddressable memory may be found in
  * include/linux/hmm.h and Documentation/vm/hmm.rst.
  *
+ * MEMORY_DEVICE_COHERENT:
+ * Device memory that is cache coherent from device and CPU point of view. This
+ * is used on platforms that have an advanced system bus (like CAPI or CXL). A
+ * driver can hotplug the device memory using ZONE_DEVICE and with that memory
+ * type. Any page of a process can be migrated to such memory. However no one
+ * should be allowed to pin such memory so that it can always be evicted.
+ *
  * MEMORY_DEVICE_FS_DAX:
  * Host memory that has similar access semantics as System RAM i.e. DMA
  * coherent and supports page pinning. In support of coordinating page
@@ -59,6 +66,7 @@ struct vmem_altmap {
 enum memory_type {
/* 0 is reserved to catch uninitialized type fields */
MEMORY_DEVICE_PRIVATE = 1,
+   MEMORY_DEVICE_COHERENT,
MEMORY_DEVICE_FS_DAX,
MEMORY_DEVICE_GENERIC,
MEMORY_DEVICE_PCI_P2PDMA,
diff --git a/include/linux/mm.h b/include/linux/mm.h
index e1a84b1e6787..0c61bf40edef 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1106,6 +1106,7 @@ static inline bool page_is_devmap_managed(struct page 
*page)
return false;
switch (page->pgmap->type) {
case MEMORY_DEVICE_PRIVATE:
+   case MEMORY_DEVICE_COHERENT:
case MEMORY_DEVICE_FS_DAX:
return true;
default:
@@ -1135,6 +1136,21 @@ static inline bool is_device_private_page(const struct 
page *page)
page->pgmap->type == MEMORY_DEVICE_PRIVATE;
 }
 
+static inline bool is_device_coherent_page(const struct page *page)
+{
+   return IS_ENABLED(CONFIG_DEV_PAGEMAP_OPS) &&
+   is_zone_device_page(page) &&
+   page->pgmap->type == MEMORY_DEVICE_COHERENT;
+}
+
+static inline bool is_dev_private_or_coherent_page(const struct page *page)
+{
+   return IS_ENABLED(CONFIG_DEV_PAGEMAP_OPS) &&
+   is_zone_device_page(page) &&
+   (page->pgmap->type == MEMORY_DEVICE_PRIVATE ||
+   page->pgmap->type == MEMORY_DEVICE_COHERENT);
+}
+
 static inline bool is_pci_p2pdma_page(const struct page *page)
 {
return IS_ENABLED(CONFIG_DEV_PAGEMAP_OPS) &&
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 09d342c7cbd0..0882b5b2a857 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -5691,8 +5691,8 @@ static int mem_cgroup_move_account(struct page *page,
  *   2(MC_TARGET_SWAP): if the swap entry corresponding to this pte is a
  * target for charge migration. if @target is not NULL, the entry is stored
  * in target->ent.
- *   3(MC_TARGET_DEVICE): like MC_TARGET_PAGE  but page is 
MEMORY_DEVICE_PRIVATE
- * (so ZONE_DEVICE page and thus not on the lru).
+ *   3(MC_TARGET_DEVICE): like MC_TARGET_PAGE  but page is device memory and
+ *   thus not on the lru.
  * For now we such page is charge like a regular page would be as for all
  * intent and purposes it is just special memory taking the place of a
  * regular page.
@@ -5726,7 +5726,7 @@ static enum mc_target_type get_mctgt_type(struct 
vm_area_struct *vma,
 */
if (page_memcg(page) == mc.from) {
ret = MC_TARGET_PAGE;
-   if (is_device_private_page(page))
+   if (is_dev_private_or_coherent_page(page))
ret = MC_TARGET_DEVICE;
if (target)
target->page = page;
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 14ae5c18e776..e83740f7f05e 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1611,12 +1611,16 @@ static int memory_failure_dev_pagemap(unsigned long 
pfn, int flags,
goto unlock;
}
 
-   if (pgmap->type == MEMORY_DEVICE_PRIVATE) {
+