On 12/5/25 17:58, Kalyazin, Nikita wrote:
> +static int kvm_gmem_folio_zap_direct_map(struct folio *folio)
> +{
> + int r = 0;
> + unsigned long addr = (unsigned long) folio_address(folio);
> + u64 gmem_flags = GMEM_I(folio_inode(folio))->flags;
> +
> + if (kvm_gmem_folio_no_direct_map(folio) || !(gmem_flags &
> GUEST_MEMFD_FLAG_NO_DIRECT_MAP))
> + goto out;
> +
> + r = set_direct_map_valid_noflush(folio_page(folio, 0),
> folio_nr_pages(folio),
> + false);
> +
> + if (r)
> + goto out;
> +
> + folio->private = (void *) KVM_GMEM_FOLIO_NO_DIRECT_MAP;
With Dave's suggestion on patch 1/13 to have folio_zap_direct_map(), setting
this folio->private flag wouldn't be possible between the zap and tlb flush,
but it's not an issue to set it before the zap, right?
> + flush_tlb_kernel_range(addr, addr + folio_size(folio));
> +
> +out:
> + return r;
> +}
> +
> +static void kvm_gmem_folio_restore_direct_map(struct folio *folio)
> +{
> + /*
> + * Direct map restoration cannot fail, as the only error condition
> + * for direct map manipulation is failure to allocate page tables
> + * when splitting huge pages, but this split would have already
> + * happened in set_direct_map_invalid_noflush() in
> kvm_gmem_folio_zap_direct_map().
> + * Thus set_direct_map_valid_noflush() here only updates prot bits.
> + */
> + if (kvm_gmem_folio_no_direct_map(folio))
> + set_direct_map_valid_noflush(folio_page(folio, 0),
> folio_nr_pages(folio),
> + true);
I think you're missing here clearing KVM_GMEM_FOLIO_NO_DIRECT_MAP from
folio->private, which means if there's another
kvm_gmem_folio_zap_direct_map() call on it in the future, it will do nothing?
> +}
> +
> static inline void kvm_gmem_mark_prepared(struct folio *folio)
> {
> folio_mark_uptodate(folio);
> @@ -398,6 +444,7 @@ static vm_fault_t kvm_gmem_fault_user_mapping(struct
> vm_fault *vmf)
> struct inode *inode = file_inode(vmf->vma->vm_file);
> struct folio *folio;
> vm_fault_t ret = VM_FAULT_LOCKED;
> + int err;
>
> if (((loff_t)vmf->pgoff << PAGE_SHIFT) >= i_size_read(inode))
> return VM_FAULT_SIGBUS;
> @@ -423,6 +470,12 @@ static vm_fault_t kvm_gmem_fault_user_mapping(struct
> vm_fault *vmf)
> kvm_gmem_mark_prepared(folio);
> }
>
> + err = kvm_gmem_folio_zap_direct_map(folio);
> + if (err) {
> + ret = vmf_error(err);
> + goto out_folio;
> + }
> +
> vmf->page = folio_file_page(folio, vmf->pgoff);
>
> out_folio:
> @@ -533,6 +586,8 @@ static void kvm_gmem_free_folio(struct folio *folio)
> kvm_pfn_t pfn = page_to_pfn(page);
> int order = folio_order(folio);
>
> + kvm_gmem_folio_restore_direct_map(folio);
> +
> kvm_arch_gmem_invalidate(pfn, pfn + (1ul << order));
> }
>
> @@ -596,6 +651,9 @@ static int __kvm_gmem_create(struct kvm *kvm, loff_t
> size, u64 flags)
> /* Unmovable mappings are supposed to be marked unevictable as well. */
> WARN_ON_ONCE(!mapping_unevictable(inode->i_mapping));
>
> + if (flags & GUEST_MEMFD_FLAG_NO_DIRECT_MAP)
> + mapping_set_no_direct_map(inode->i_mapping);
> +
> GMEM_I(inode)->flags = flags;
>
> file = alloc_file_pseudo(inode, kvm_gmem_mnt, name, O_RDWR,
> &kvm_gmem_fops);
> @@ -807,6 +865,8 @@ int kvm_gmem_get_pfn(struct kvm *kvm, struct
> kvm_memory_slot *slot,
> if (!is_prepared)
> r = kvm_gmem_prepare_folio(kvm, slot, gfn, folio);
>
> + kvm_gmem_folio_zap_direct_map(folio);
> +
> folio_unlock(folio);
>
> if (!r)