On Wed, 2025-09-24 at 16:22 +0100, "Roy, Patrick" wrote:

[...]

> diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
> index 55b8d739779f..b7129c4868c5 100644
> --- a/virt/kvm/guest_memfd.c
> +++ b/virt/kvm/guest_memfd.c
> @@ -4,6 +4,9 @@
>  #include <linux/kvm_host.h>
>  #include <linux/pagemap.h>
>  #include <linux/anon_inodes.h>
> +#include <linux/set_memory.h>
> +
> +#include <asm/tlbflush.h>
>  
>  #include "kvm_mm.h"
>  
> @@ -42,6 +45,44 @@ static int __kvm_gmem_prepare_folio(struct kvm *kvm, 
> struct kvm_memory_slot *slo
>       return 0;
>  }
>  
> +#define KVM_GMEM_FOLIO_NO_DIRECT_MAP BIT(0)
> +
> +static bool kvm_gmem_folio_no_direct_map(struct folio *folio)
> +{
> +     return ((u64) folio->private) & KVM_GMEM_FOLIO_NO_DIRECT_MAP;
> +}
> +
> +static int kvm_gmem_folio_zap_direct_map(struct folio *folio)
> +{
> +     if (kvm_gmem_folio_no_direct_map(folio))
> +             return 0;
> +
> +     int r = set_direct_map_valid_noflush(folio_page(folio, 0), 
> folio_nr_pages(folio),
> +                                      false);
> +
> +     if (!r) {
> +             unsigned long addr = (unsigned long) folio_address(folio);
> +             folio->private = (void *) ((u64) folio->private & 
> KVM_GMEM_FOLIO_NO_DIRECT_MAP);
> +             flush_tlb_kernel_range(addr, addr + folio_size(folio));
> +     }
> +
> +     return r;
> +}

No idea how I managed to mess this function up so completely, but it
should be more like

static int kvm_gmem_folio_zap_direct_map(struct folio *folio)
{
        int r = 0;
        unsigned long addr = (unsigned long) folio_address(folio);
        u64 gmem_flags = (u64) folio_inode(folio)->i_private;

        if (kvm_gmem_folio_no_direct_map(folio) || !(gmem_flags & 
GUEST_MEMFD_FLAG_NO_DIRECT_MAP))
                goto out;

        r = set_direct_map_valid_noflush(folio_page(folio, 0), 
folio_nr_pages(folio), false);

        if (r)
                goto out;

        folio->private = (void *) KVM_GMEM_FOLIO_NO_DIRECT_MAP;
        flush_tlb_kernel_range(addr, addr + folio_size(folio));

out:
        return r;
}

the version I sent (a) does not respect the flags passed to guest_memfd
on creation, and (b) does not correctly set the bit in folio->private.

> +static void kvm_gmem_folio_restore_direct_map(struct folio *folio)
> +{
> +     /*
> +      * Direct map restoration cannot fail, as the only error condition
> +      * for direct map manipulation is failure to allocate page tables
> +      * when splitting huge pages, but this split would have already
> +      * happened in set_direct_map_invalid_noflush() in 
> kvm_gmem_folio_zap_direct_map().
> +      * Thus set_direct_map_valid_noflush() here only updates prot bits.
> +      */
> +     if (kvm_gmem_folio_no_direct_map(folio))
> +             set_direct_map_valid_noflush(folio_page(folio, 0), 
> folio_nr_pages(folio),
> +                                      true);
> +}
> +
>  static inline void kvm_gmem_mark_prepared(struct folio *folio)
>  {
>       folio_mark_uptodate(folio);
> @@ -324,13 +365,14 @@ static vm_fault_t kvm_gmem_fault_user_mapping(struct 
> vm_fault *vmf)
>       struct inode *inode = file_inode(vmf->vma->vm_file);
>       struct folio *folio;
>       vm_fault_t ret = VM_FAULT_LOCKED;
> +     int err;
>  
>       if (((loff_t)vmf->pgoff << PAGE_SHIFT) >= i_size_read(inode))
>               return VM_FAULT_SIGBUS;
>  
>       folio = kvm_gmem_get_folio(inode, vmf->pgoff);
>       if (IS_ERR(folio)) {
> -             int err = PTR_ERR(folio);
> +             err = PTR_ERR(folio);
>  
>               if (err == -EAGAIN)
>                       return VM_FAULT_RETRY;
> @@ -348,6 +390,13 @@ static vm_fault_t kvm_gmem_fault_user_mapping(struct 
> vm_fault *vmf)
>               kvm_gmem_mark_prepared(folio);
>       }
>  
> +     err = kvm_gmem_folio_zap_direct_map(folio);
> +
> +     if (err) {
> +             ret = vmf_error(err);
> +             goto out_folio;
> +     }
> +
>       vmf->page = folio_file_page(folio, vmf->pgoff);
>  
>  out_folio:
> @@ -435,6 +484,8 @@ static void kvm_gmem_free_folio(struct folio *folio)
>       kvm_pfn_t pfn = page_to_pfn(page);
>       int order = folio_order(folio);
>  
> +     kvm_gmem_folio_restore_direct_map(folio);
> +
>       kvm_arch_gmem_invalidate(pfn, pfn + (1ul << order));
>  }
>  
> @@ -499,6 +550,9 @@ static int __kvm_gmem_create(struct kvm *kvm, loff_t 
> size, u64 flags)
>       /* Unmovable mappings are supposed to be marked unevictable as well. */
>       WARN_ON_ONCE(!mapping_unevictable(inode->i_mapping));
>  
> +     if (flags & GUEST_MEMFD_FLAG_NO_DIRECT_MAP)
> +             mapping_set_no_direct_map(inode->i_mapping);
> +
>       kvm_get_kvm(kvm);
>       gmem->kvm = kvm;
>       xa_init(&gmem->bindings);
> @@ -523,6 +577,9 @@ int kvm_gmem_create(struct kvm *kvm, struct 
> kvm_create_guest_memfd *args)
>       if (kvm_arch_supports_gmem_mmap(kvm))
>               valid_flags |= GUEST_MEMFD_FLAG_MMAP;
>  
> +     if (kvm_arch_gmem_supports_no_direct_map())
> +             valid_flags |= GUEST_MEMFD_FLAG_NO_DIRECT_MAP;
> +
>       if (flags & ~valid_flags)
>               return -EINVAL;
>  
> @@ -687,6 +744,8 @@ int kvm_gmem_get_pfn(struct kvm *kvm, struct 
> kvm_memory_slot *slot,
>       if (!is_prepared)
>               r = kvm_gmem_prepare_folio(kvm, slot, gfn, folio);
>  
> +     kvm_gmem_folio_zap_direct_map(folio);
> +
>       folio_unlock(folio);
>  
>       if (!r)

[...]

Reply via email to