When registering VTL0 memory via MSHV_ADD_VTL0_MEMORY, the kernel
computes pgmap->vmemmap_shift as the number of trailing zeros in the
OR of start_pfn and last_pfn, intending to use the largest compound
page order both endpoints are aligned to.

However, this value is not clamped to MAX_FOLIO_ORDER, so a
sufficiently aligned range (e.g. physical range
[0x800000000000, 0x800080000000), corresponding to start_pfn=0x800000000
with 35 trailing zeros) can produce a shift larger than what
memremap_pages() accepts, triggering a WARN and returning -EINVAL:

  WARNING: ... memremap_pages+0x512/0x650
  requested folio size unsupported

The MAX_FOLIO_ORDER check was added by
commit 646b67d57589 ("mm/memremap: reject unreasonable folio/compound
page sizes in memremap_pages()").

Fix this by clamping vmemmap_shift to MAX_FOLIO_ORDER so we always
request the largest order the kernel supports, in those cases, rather
than an out-of-range value.

Also fix the error path to propagate the actual error code from
devm_memremap_pages() instead of hard-coding -EFAULT, which was
masking the real -EINVAL return.

Fixes: 7bfe3b8ea6e3 ("Drivers: hv: Introduce mshv_vtl driver")
Cc: [email protected]
Signed-off-by: Naman Jain <[email protected]>
---
Changes since v1:
https://lore.kernel.org/all/[email protected]/
Addressed Michael's comments:
* remove MAX_FOLIO_ORDER value related text in commit msg
* Change change summary to keep prefix "mshv_vtl:"
* Add comments regarding last_pfn to avoid confusion
* use min instead of min_t
---
 drivers/hv/mshv_vtl_main.c | 12 +++++++++---
 include/uapi/linux/mshv.h  |  2 +-
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/drivers/hv/mshv_vtl_main.c b/drivers/hv/mshv_vtl_main.c
index 5856975f32e1..c19400701467 100644
--- a/drivers/hv/mshv_vtl_main.c
+++ b/drivers/hv/mshv_vtl_main.c
@@ -386,7 +386,6 @@ static int mshv_vtl_ioctl_add_vtl0_mem(struct mshv_vtl 
*vtl, void __user *arg)
 
        if (copy_from_user(&vtl0_mem, arg, sizeof(vtl0_mem)))
                return -EFAULT;
-       /* vtl0_mem.last_pfn is excluded in the pagemap range for VTL0 as per 
design */
        if (vtl0_mem.last_pfn <= vtl0_mem.start_pfn) {
                dev_err(vtl->module_dev, "range start pfn (%llx) > end pfn 
(%llx)\n",
                        vtl0_mem.start_pfn, vtl0_mem.last_pfn);
@@ -397,6 +396,10 @@ static int mshv_vtl_ioctl_add_vtl0_mem(struct mshv_vtl 
*vtl, void __user *arg)
        if (!pgmap)
                return -ENOMEM;
 
+       /*
+        * vtl0_mem.last_pfn is excluded in the pagemap range for VTL0 as per 
design.
+        * last_pfn is not reserved or wasted, and reflects 'start_pfn + size' 
of pagemap range.
+        */
        pgmap->ranges[0].start = PFN_PHYS(vtl0_mem.start_pfn);
        pgmap->ranges[0].end = PFN_PHYS(vtl0_mem.last_pfn) - 1;
        pgmap->nr_range = 1;
@@ -405,8 +408,11 @@ static int mshv_vtl_ioctl_add_vtl0_mem(struct mshv_vtl 
*vtl, void __user *arg)
        /*
         * Determine the highest page order that can be used for the given 
memory range.
         * This works best when the range is aligned; i.e. both the start and 
the length.
+        * Clamp to MAX_FOLIO_ORDER to avoid a WARN in memremap_pages() when 
the range
+        * alignment exceeds the maximum supported folio order for this kernel 
config.
         */
-       pgmap->vmemmap_shift = count_trailing_zeros(vtl0_mem.start_pfn | 
vtl0_mem.last_pfn);
+       pgmap->vmemmap_shift = min(count_trailing_zeros(vtl0_mem.start_pfn | 
vtl0_mem.last_pfn),
+                                  MAX_FOLIO_ORDER);
        dev_dbg(vtl->module_dev,
                "Add VTL0 memory: start: 0x%llx, end_pfn: 0x%llx, page order: 
%lu\n",
                vtl0_mem.start_pfn, vtl0_mem.last_pfn, pgmap->vmemmap_shift);
@@ -415,7 +421,7 @@ static int mshv_vtl_ioctl_add_vtl0_mem(struct mshv_vtl 
*vtl, void __user *arg)
        if (IS_ERR(addr)) {
                dev_err(vtl->module_dev, "devm_memremap_pages error: %ld\n", 
PTR_ERR(addr));
                kfree(pgmap);
-               return -EFAULT;
+               return PTR_ERR(addr);
        }
 
        /* Don't free pgmap, since it has to stick around until the memory
diff --git a/include/uapi/linux/mshv.h b/include/uapi/linux/mshv.h
index e0645a34b55b..32ff92b6342b 100644
--- a/include/uapi/linux/mshv.h
+++ b/include/uapi/linux/mshv.h
@@ -357,7 +357,7 @@ struct mshv_vtl_sint_post_msg {
 
 struct mshv_vtl_ram_disposition {
        __u64 start_pfn;
-       __u64 last_pfn;
+       __u64 last_pfn; /* last_pfn is excluded from the range [start_pfn, 
last_pfn) */
 };
 
 struct mshv_vtl_set_poll_file {
-- 
2.43.0


Reply via email to