"Kalyazin, Nikita" <[email protected]> writes: > From: Patrick Roy <[email protected]> > > Add AS_NO_DIRECT_MAP for mappings where direct map entries of folios are > set to not present. Currently, mappings that match this description are > secretmem mappings (memfd_secret()). Later, some guest_memfd > configurations will also fall into this category. > > Reject this new type of mappings in all locations that currently reject > secretmem mappings, on the assumption that if secretmem mappings are > rejected somewhere, it is precisely because of an inability to deal with > folios without direct map entries, and then make memfd_secret() use > AS_NO_DIRECT_MAP on its address_space to drop its special > vma_is_secretmem()/secretmem_mapping() checks. > > Use a new flag instead of overloading AS_INACCESSIBLE (which is already > set by guest_memfd) because not all guest_memfd mappings will end up > being direct map removed (e.g. in pKVM setups, parts of guest_memfd that > can be mapped to userspace should also be GUP-able, and generally not > have restrictions on who can access it). > > Acked-by: Mike Rapoport (Microsoft) <[email protected]> > Acked-by: David Hildenbrand (Red Hat) <[email protected]> > Signed-off-by: Patrick Roy <[email protected]> > Acked-by: Vlastimil Babka <[email protected]> > Signed-off-by: Nikita Kalyazin <[email protected]> > --- > include/linux/pagemap.h | 16 ++++++++++++++++ > include/linux/secretmem.h | 18 ------------------ > lib/buildid.c | 4 ++-- > mm/gup.c | 10 +++++----- > mm/mlock.c | 2 +- > mm/secretmem.c | 8 ++------ > 6 files changed, 26 insertions(+), 32 deletions(-) > > diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h > index 31a848485ad9..6ce7301d474a 100644 > --- a/include/linux/pagemap.h > +++ b/include/linux/pagemap.h > @@ -210,6 +210,7 @@ enum mapping_flags { > AS_WRITEBACK_MAY_DEADLOCK_ON_RECLAIM = 9, > AS_KERNEL_FILE = 10, /* mapping for a fake kernel file that shouldn't > account usage to user cgroups */ > + AS_NO_DIRECT_MAP = 11, /* Folios in the mapping are not in the direct > map */ > /* Bits 16-25 are used for FOLIO_ORDER */ > AS_FOLIO_ORDER_BITS = 5, > AS_FOLIO_ORDER_MIN = 16, > @@ -345,6 +346,21 @@ static inline bool > mapping_writeback_may_deadlock_on_reclaim(const struct addres > return test_bit(AS_WRITEBACK_MAY_DEADLOCK_ON_RECLAIM, &mapping->flags); > } > > +static inline void mapping_set_no_direct_map(struct address_space *mapping) > +{ > + set_bit(AS_NO_DIRECT_MAP, &mapping->flags); > +} > + > +static inline bool mapping_no_direct_map(const struct address_space *mapping) > +{ > + return test_bit(AS_NO_DIRECT_MAP, &mapping->flags); > +} > + > +static inline bool vma_has_no_direct_map(const struct vm_area_struct *vma) > +{ > + return vma->vm_file && mapping_no_direct_map(vma->vm_file->f_mapping); > +} > + > static inline gfp_t mapping_gfp_mask(const struct address_space *mapping) > { > return mapping->gfp_mask; > diff --git a/include/linux/secretmem.h b/include/linux/secretmem.h > index e918f96881f5..0ae1fb057b3d 100644 > --- a/include/linux/secretmem.h > +++ b/include/linux/secretmem.h > @@ -4,28 +4,10 @@ > > #ifdef CONFIG_SECRETMEM > > -extern const struct address_space_operations secretmem_aops; > - > -static inline bool secretmem_mapping(struct address_space *mapping) > -{ > - return mapping->a_ops == &secretmem_aops; > -} > - > -bool vma_is_secretmem(struct vm_area_struct *vma); > bool secretmem_active(void); > > #else > > -static inline bool vma_is_secretmem(struct vm_area_struct *vma) > -{ > - return false; > -} > - > -static inline bool secretmem_mapping(struct address_space *mapping) > -{ > - return false; > -} > - > static inline bool secretmem_active(void) > { > return false; > diff --git a/lib/buildid.c b/lib/buildid.c > index aaf61dfc0919..b78fe5797e9c 100644 > --- a/lib/buildid.c > +++ b/lib/buildid.c > @@ -46,8 +46,8 @@ static int freader_get_folio(struct freader *r, loff_t > file_off) > > freader_put_folio(r); > > - /* reject secretmem folios created with memfd_secret() */ > - if (secretmem_mapping(r->file->f_mapping)) > + /* reject folios without direct map entries (e.g. from memfd_secret() > or guest_memfd()) */ > + if (mapping_no_direct_map(r->file->f_mapping)) > return -EFAULT; > > r->folio = filemap_get_folio(r->file->f_mapping, file_off >> > PAGE_SHIFT); > diff --git a/mm/gup.c b/mm/gup.c > index 9cad53acbc99..11461a54b3ae 100644 > --- a/mm/gup.c > +++ b/mm/gup.c > @@ -11,7 +11,6 @@ > #include <linux/rmap.h> > #include <linux/swap.h> > #include <linux/swapops.h> > -#include <linux/secretmem.h> > > #include <linux/sched/signal.h> > #include <linux/rwsem.h> > @@ -1216,7 +1215,7 @@ static int check_vma_flags(struct vm_area_struct *vma, > unsigned long gup_flags) > if ((gup_flags & FOLL_SPLIT_PMD) && is_vm_hugetlb_page(vma)) > return -EOPNOTSUPP; > > - if (vma_is_secretmem(vma)) > + if (vma_has_no_direct_map(vma)) > return -EFAULT; > > if (write) { > @@ -2724,7 +2723,7 @@ EXPORT_SYMBOL(get_user_pages_unlocked); > * This call assumes the caller has pinned the folio, that the lowest page > table > * level still points to this folio, and that interrupts have been disabled. > * > - * GUP-fast must reject all secretmem folios. > + * GUP-fast must reject all folios without direct map entries (such as > secretmem). > * > * Writing to pinned file-backed dirty tracked folios is inherently > problematic > * (see comment describing the writable_file_mapping_allowed() function). We > @@ -2753,7 +2752,7 @@ static bool gup_fast_folio_allowed(struct folio *folio, > unsigned int flags) > if (WARN_ON_ONCE(folio_test_slab(folio))) > return false; > > - /* hugetlb neither requires dirty-tracking nor can be secretmem. */ > + /* hugetlb neither requires dirty-tracking nor can be without direct > map. */ > if (folio_test_hugetlb(folio)) > return true; > > @@ -2791,8 +2790,9 @@ static bool gup_fast_folio_allowed(struct folio *folio, > unsigned int flags) > * At this point, we know the mapping is non-null and points to an > * address_space object. > */ > - if (secretmem_mapping(mapping)) > + if (mapping_no_direct_map(mapping)) > return false; > + > /* The only remaining allowed file system is shmem. */ > return !reject_file_backed || shmem_mapping(mapping); > } > diff --git a/mm/mlock.c b/mm/mlock.c > index 2f699c3497a5..a6f4b3df4f3f 100644 > --- a/mm/mlock.c > +++ b/mm/mlock.c > @@ -474,7 +474,7 @@ static int mlock_fixup(struct vma_iterator *vmi, struct > vm_area_struct *vma, > > if (newflags == oldflags || (oldflags & VM_SPECIAL) || > is_vm_hugetlb_page(vma) || vma == get_gate_vma(current->mm) || > - vma_is_dax(vma) || vma_is_secretmem(vma) || (oldflags & > VM_DROPPABLE)) > + vma_is_dax(vma) || vma_has_no_direct_map(vma) || (oldflags & > VM_DROPPABLE)) > /* don't set VM_LOCKED or VM_LOCKONFAULT and don't count */ > goto out; > > diff --git a/mm/secretmem.c b/mm/secretmem.c > index edf111e0a1bb..560cdbe1fe5d 100644 > --- a/mm/secretmem.c > +++ b/mm/secretmem.c > @@ -134,11 +134,6 @@ static int secretmem_mmap_prepare(struct vm_area_desc > *desc) > return 0; > } > > -bool vma_is_secretmem(struct vm_area_struct *vma) > -{ > - return vma->vm_ops == &secretmem_vm_ops; > -} > - > static const struct file_operations secretmem_fops = { > .release = secretmem_release, > .mmap_prepare = secretmem_mmap_prepare, > @@ -156,7 +151,7 @@ static void secretmem_free_folio(struct folio *folio) > folio_zero_segment(folio, 0, folio_size(folio)); > } > > -const struct address_space_operations secretmem_aops = { > +static const struct address_space_operations secretmem_aops = { > .dirty_folio = noop_dirty_folio, > .free_folio = secretmem_free_folio, > .migrate_folio = secretmem_migrate_folio, > @@ -205,6 +200,7 @@ static struct file *secretmem_file_create(unsigned long > flags) > > mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER); > mapping_set_unevictable(inode->i_mapping); > + mapping_set_no_direct_map(inode->i_mapping); > > inode->i_op = &secretmem_iops; > inode->i_mapping->a_ops = &secretmem_aops; > -- > 2.50.1
Thanks also for the cleanups! Reviewed-by: Ackerley Tng <[email protected]>
