From: Stanislav Kinsburskii <[email protected]> Sent: Monday, 
March 30, 2026 1:05 PM
> 
> Map any populated pages into the hypervisor upfront when creating a
> movable region, rather than waiting for faults. Previously, movable
> regions were created with all pages marked as HV_MAP_GPA_NO_ACCESS
> regardless of whether the userspace mapping contained populated pages.
> 
> This guarantees that if the caller passes a populated mapping, those
> present pages will be mapped into the hypervisor immediately during
> region creation instead of being faulted in later.
> 
> Signed-off-by: Stanislav Kinsburskii <[email protected]>
> ---
>  drivers/hv/mshv_regions.c   |   65 
> ++++++++++++++++++++++++++++++++-----------
>  drivers/hv/mshv_root.h      |    1 +
>  drivers/hv/mshv_root_main.c |   10 +------
>  3 files changed, 50 insertions(+), 26 deletions(-)
> 
> diff --git a/drivers/hv/mshv_regions.c b/drivers/hv/mshv_regions.c
> index 133ec7771812..28d3f488d89f 100644
> --- a/drivers/hv/mshv_regions.c
> +++ b/drivers/hv/mshv_regions.c
> @@ -519,7 +519,8 @@ int mshv_region_get(struct mshv_mem_region *region)
>  static int mshv_region_hmm_fault_and_lock(struct mshv_mem_region *region,
>                                         unsigned long start,
>                                         unsigned long end,
> -                                       unsigned long *pfns)
> +                                       unsigned long *pfns,
> +                                       bool do_fault)
>  {
>       struct hmm_range range = {
>               .notifier = &region->mreg_mni,
> @@ -540,9 +541,12 @@ static int mshv_region_hmm_fault_and_lock(struct 
> mshv_mem_region *region,
>               range.hmm_pfns = pfns;
>               range.start = start;
>               range.end = min(vma->vm_end, end);
> -             range.default_flags = HMM_PFN_REQ_FAULT;
> -             if (vma->vm_flags & VM_WRITE)
> -                     range.default_flags |= HMM_PFN_REQ_WRITE;
> +             range.default_flags = 0;
> +             if (do_fault) {
> +                     range.default_flags = HMM_PFN_REQ_FAULT;
> +                     if (vma->vm_flags & VM_WRITE)
> +                             range.default_flags |= HMM_PFN_REQ_WRITE;
> +             }
> 
>               ret = hmm_range_fault(&range);
>               if (ret)
> @@ -567,26 +571,40 @@ static int mshv_region_hmm_fault_and_lock(struct 
> mshv_mem_region *region,
>  }
> 
>  /**
> - * mshv_region_range_fault - Handle memory range faults for a given region.
> - * @region: Pointer to the memory region structure.
> - * @pfn_offset: Offset of the page within the region.
> - * @pfn_count: Number of pages to handle.
> + * mshv_region_collect_and_map - Collect PFNs for a user range and map them
> + * @region    : memory region being processed
> + * @pfn_offset: PFNs offset within the region
> + * @pfn_count : number of PFNs to process
> + * @do_fault  : if true, fault in missing pages;
> + *              if false, collect only present pages
>   *
> - * This function resolves memory faults for a specified range of pages
> - * within a memory region. It uses HMM (Heterogeneous Memory Management)
> - * to fault in the required pages and updates the region's page array.
> + * Collects PFNs for the specified portion of @region from the
> + * corresponding userspace VMA and maps them into the hypervisor. The

Actually, this should be "userspace VMAs" (i.e., plural)

> + * behavior depends on @do_fault:
>   *
> - * Return: 0 on success, negative error code on failure.
> + * - true: Fault in missing pages from userspace, ensuring all pages in the
> + *   range are present. Used for on-demand page population.
> + * - false: Collect PFNs only for pages already present in userspace,
> + *   leaving missing pages as invalid PFN markers.
> + *   Used for initial region setup.
> + *
> + * Collected PFNs are stored in region->mreg_pfns[] with HMM bookkeeping
> + * flags cleared, then the range is mapped into the hypervisor. Present
> + * PFNs get mapped with region access permissions; missing PFNs (zero
> + * entries) get mapped with no-access permissions.

Hmmm. The missing PFNs are just skipped and the mreg_pfns[] array
is not updated. Is the corresponding entry in mreg_pfns[] known to
already be set to MSHV_INVALID_PFN? When mapping a new movable
region, that appears to be so. I'm less sure about the 
mshv_region_range_fault() case, though mshv_region_invalidate_pfns()
does such initialization of any entries that are invalidated. At that point
in the code, I'd add a comment about that assumption, as it took me a
bit to figure it out.

So does the comment about "zero entries" refer to what is returned
by hmm_range_fault() via mshv_region_hmm_fault_and_lock()?
The mention of "zero entries" here is a bit confusing.

> + *
> + * Return: 0 on success, negative errno on failure.
>   */
> -static int mshv_region_range_fault(struct mshv_mem_region *region,
> -                                u64 pfn_offset, u64 pfn_count)
> +static int mshv_region_collect_and_map(struct mshv_mem_region *region,
> +                                    u64 pfn_offset, u64 pfn_count,
> +                                    bool do_fault)
>  {
>       unsigned long start, end;
>       unsigned long *pfns;
>       int ret;
>       u64 i;
> 
> -     pfns = kmalloc_array(pfn_count, sizeof(*pfns), GFP_KERNEL);
> +     pfns = vmalloc_array(pfn_count, sizeof(unsigned long));
>       if (!pfns)
>               return -ENOMEM;
> 
> @@ -595,7 +613,7 @@ static int mshv_region_range_fault(struct mshv_mem_region 
> *region,
> 
>       do {
>               ret = mshv_region_hmm_fault_and_lock(region, start, end,
> -                                                  pfns);
> +                                                  pfns, do_fault);
>       } while (ret == -EBUSY);
> 
>       if (ret)
> @@ -613,10 +631,17 @@ static int mshv_region_range_fault(struct 
> mshv_mem_region *region,
> 
>       mutex_unlock(&region->mreg_mutex);
>  out:
> -     kfree(pfns);
> +     vfree(pfns);
>       return ret;
>  }
> 
> +static int mshv_region_range_fault(struct mshv_mem_region *region,
> +                                u64 pfn_offset, u64 pfn_count)
> +{
> +     return mshv_region_collect_and_map(region, pfn_offset, pfn_count,
> +                                        true);
> +}
> +
>  bool mshv_region_handle_gfn_fault(struct mshv_mem_region *region, u64 gfn)
>  {
>       u64 pfn_offset, pfn_count;
> @@ -800,3 +825,9 @@ int mshv_map_pinned_region(struct mshv_mem_region
> *region)
>  err_out:
>       return ret;
>  }
> +
> +int mshv_map_movable_region(struct mshv_mem_region *region)
> +{
> +     return mshv_region_collect_and_map(region, 0, region->nr_pfns,
> +                                        false);
> +}
> diff --git a/drivers/hv/mshv_root.h b/drivers/hv/mshv_root.h
> index d2e65a137bf4..02c1c11f701c 100644
> --- a/drivers/hv/mshv_root.h
> +++ b/drivers/hv/mshv_root.h
> @@ -374,5 +374,6 @@ bool mshv_region_handle_gfn_fault(struct mshv_mem_region
> *region, u64 gfn);
>  void mshv_region_movable_fini(struct mshv_mem_region *region);
>  bool mshv_region_movable_init(struct mshv_mem_region *region);
>  int mshv_map_pinned_region(struct mshv_mem_region *region);
> +int mshv_map_movable_region(struct mshv_mem_region *region);
> 
>  #endif /* _MSHV_ROOT_H_ */
> diff --git a/drivers/hv/mshv_root_main.c b/drivers/hv/mshv_root_main.c
> index c393b5144e0b..91dab2a3bc92 100644
> --- a/drivers/hv/mshv_root_main.c
> +++ b/drivers/hv/mshv_root_main.c
> @@ -1299,15 +1299,7 @@ mshv_map_user_memory(struct mshv_partition
> *partition,
>               ret = mshv_map_pinned_region(region);
>               break;
>       case MSHV_REGION_TYPE_MEM_MOVABLE:
> -             /*
> -              * For movable memory regions, remap with no access to let
> -              * the hypervisor track dirty pages, enabling pre-copy live
> -              * migration.
> -              */
> -             ret = hv_call_map_ram_pfns(partition->pt_id,
> -                                        region->start_gfn,
> -                                        region->nr_pfns,
> -                                        HV_MAP_GPA_NO_ACCESS, NULL);
> +             ret = mshv_map_movable_region(region);
>               break;
>       case MSHV_REGION_TYPE_MMIO:
>               ret = hv_call_map_mmio_pfns(partition->pt_id,
> 
> 


Reply via email to