When a ZONE_DEVICE page refcount reach 1 it means it is free and nobody
is holding a reference on it (only device to which the memory belong do).
Add a callback and call it when that happen so device driver can implement
their own free page management.

Signed-off-by: Jérôme Glisse <jgli...@redhat.com>
Cc: Dan Williams <dan.j.willi...@intel.com>
Cc: Ross Zwisler <ross.zwis...@linux.intel.com>
---
 drivers/dax/pmem.c                |  3 ++-
 drivers/nvdimm/pmem.c             |  5 +++--
 include/linux/memremap.h          | 17 ++++++++++++++---
 kernel/memremap.c                 | 14 +++++++++++++-
 tools/testing/nvdimm/test/iomap.c |  2 +-
 5 files changed, 33 insertions(+), 8 deletions(-)

diff --git a/drivers/dax/pmem.c b/drivers/dax/pmem.c
index 033f49b3..66af7b1 100644
--- a/drivers/dax/pmem.c
+++ b/drivers/dax/pmem.c
@@ -111,7 +111,8 @@ static int dax_pmem_probe(struct device *dev)
        if (rc)
                return rc;
 
-       addr = devm_memremap_pages(dev, &res, &dax_pmem->ref, altmap);
+       addr = devm_memremap_pages(dev, &res, &dax_pmem->ref,
+                                  altmap, NULL, NULL);
        if (IS_ERR(addr))
                return PTR_ERR(addr);
 
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 8755317..f2f1904 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -282,7 +282,7 @@ static int pmem_attach_disk(struct device *dev,
        pmem->pfn_flags = PFN_DEV;
        if (is_nd_pfn(dev)) {
                addr = devm_memremap_pages(dev, &pfn_res, &q->q_usage_counter,
-                               altmap);
+                                          altmap, NULL, NULL);
                pfn_sb = nd_pfn->pfn_sb;
                pmem->data_offset = le64_to_cpu(pfn_sb->dataoff);
                pmem->pfn_pad = resource_size(res) - resource_size(&pfn_res);
@@ -291,7 +291,8 @@ static int pmem_attach_disk(struct device *dev,
                res->start += pmem->data_offset;
        } else if (pmem_should_map_pages(dev)) {
                addr = devm_memremap_pages(dev, &nsio->res,
-                               &q->q_usage_counter, NULL);
+                                          &q->q_usage_counter,
+                                          NULL, NULL, NULL);
                pmem->pfn_flags |= PFN_MAP;
        } else
                addr = devm_memremap(dev, pmem->phys_addr,
diff --git a/include/linux/memremap.h b/include/linux/memremap.h
index aee8477..582561f 100644
--- a/include/linux/memremap.h
+++ b/include/linux/memremap.h
@@ -35,23 +35,31 @@ static inline struct vmem_altmap *to_vmem_altmap(unsigned 
long memmap_start)
 }
 #endif
 
+typedef void (*dev_page_free_t)(struct page *page, void *data);
+
 /**
  * struct dev_pagemap - metadata for ZONE_DEVICE mappings
+ * @page_free: free page callback when page refcount reach 1
  * @altmap: pre-allocated/reserved memory for vmemmap allocations
  * @res: physical address range covered by @ref
  * @ref: reference count that pins the devm_memremap_pages() mapping
  * @dev: host device of the mapping for debug
+ * @data: privata data pointer for page_free
  */
 struct dev_pagemap {
+       dev_page_free_t page_free;
        struct vmem_altmap *altmap;
        const struct resource *res;
        struct percpu_ref *ref;
        struct device *dev;
+       void *data;
 };
 
 #ifdef CONFIG_ZONE_DEVICE
 void *devm_memremap_pages(struct device *dev, struct resource *res,
-               struct percpu_ref *ref, struct vmem_altmap *altmap);
+                         struct percpu_ref *ref, struct vmem_altmap *altmap,
+                         dev_page_free_t page_free,
+                         void *data);
 struct dev_pagemap *find_dev_pagemap(resource_size_t phys);
 int devm_memunmap_pages(struct device *dev, void *start);
 
@@ -62,8 +70,11 @@ static inline bool dev_page_allow_migrate(const struct page 
*page)
 }
 #else
 static inline void *devm_memremap_pages(struct device *dev,
-               struct resource *res, struct percpu_ref *ref,
-               struct vmem_altmap *altmap)
+                                       struct resource *res,
+                                       struct percpu_ref *ref,
+                                       struct vmem_altmap *altmap,
+                                       dev_page_free_t page_free,
+                                       void *data)
 {
        /*
         * Fail attempts to call devm_memremap_pages() without
diff --git a/kernel/memremap.c b/kernel/memremap.c
index f0d4ea2..7e47e64 100644
--- a/kernel/memremap.c
+++ b/kernel/memremap.c
@@ -190,6 +190,12 @@ EXPORT_SYMBOL(get_zone_device_page);
 
 void put_zone_device_page(struct page *page)
 {
+       /*
+        * If refcount is 1 then page is freed and refcount is stable as nobody
+        * holds a reference on the page.
+        */
+       if (page->pgmap->page_free && page_count(page) == 1)
+               page->pgmap->page_free(page, page->pgmap->data);
        put_dev_pagemap(page->pgmap);
 }
 EXPORT_SYMBOL(put_zone_device_page);
@@ -270,6 +276,8 @@ struct dev_pagemap *find_dev_pagemap(resource_size_t phys)
  * @res: "host memory" address range
  * @ref: a live per-cpu reference count
  * @altmap: optional descriptor for allocating the memmap from @res
+ * @page_free: callback call when page refcount reach 1 ie it is free
+ * @data: privata data pointer for page_free
  *
  * Notes:
  * 1/ @ref must be 'live' on entry and 'dead' before devm_memunmap_pages() time
@@ -280,7 +288,9 @@ struct dev_pagemap *find_dev_pagemap(resource_size_t phys)
  *    this is not enforced.
  */
 void *devm_memremap_pages(struct device *dev, struct resource *res,
-               struct percpu_ref *ref, struct vmem_altmap *altmap)
+                         struct percpu_ref *ref, struct vmem_altmap *altmap,
+                         dev_page_free_t page_free,
+                         void *data)
 {
        resource_size_t key, align_start, align_size, align_end;
        pgprot_t pgprot = PAGE_KERNEL;
@@ -322,6 +332,8 @@ void *devm_memremap_pages(struct device *dev, struct 
resource *res,
        }
        pgmap->ref = ref;
        pgmap->res = &page_map->res;
+       pgmap->page_free = page_free;
+       pgmap->data = data;
 
        mutex_lock(&pgmap_lock);
        error = 0;
diff --git a/tools/testing/nvdimm/test/iomap.c 
b/tools/testing/nvdimm/test/iomap.c
index 64cae1a..9992a7c 100644
--- a/tools/testing/nvdimm/test/iomap.c
+++ b/tools/testing/nvdimm/test/iomap.c
@@ -112,7 +112,7 @@ void *__wrap_devm_memremap_pages(struct device *dev, struct 
resource *res,
 
        if (nfit_res)
                return nfit_res->buf + offset - nfit_res->res.start;
-       return devm_memremap_pages(dev, res, ref, altmap);
+       return devm_memremap_pages(dev, res, ref, altmap, NULL, NULL);
 }
 EXPORT_SYMBOL(__wrap_devm_memremap_pages);
 
-- 
2.4.3

Reply via email to