Re: [PATCH v3 hmm 03/11] mm/mmu_notifiers: add a get/put scheme for the registration

2019-08-14 Thread Jason Gunthorpe
On Wed, Aug 14, 2019 at 02:20:31PM -0700, Ralph Campbell wrote:
> 
> On 8/6/19 4:15 PM, Jason Gunthorpe wrote:
> > From: Jason Gunthorpe 
> > 
> > Many places in the kernel have a flow where userspace will create some
> > object and that object will need to connect to the subsystem's
> > mmu_notifier subscription for the duration of its lifetime.
> > 
> > In this case the subsystem is usually tracking multiple mm_structs and it
> > is difficult to keep track of what struct mmu_notifier's have been
> > allocated for what mm's.
> > 
> > Since this has been open coded in a variety of exciting ways, provide core
> > functionality to do this safely.
> > 
> > This approach uses the strct mmu_notifier_ops * as a key to determine if
> 
> s/strct/struct

Yes, thanks for all of this, I like having comments, but I'm a
terrible proofreader :(

Jason
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v3 hmm 03/11] mm/mmu_notifiers: add a get/put scheme for the registration

2019-08-14 Thread Ralph Campbell



On 8/6/19 4:15 PM, Jason Gunthorpe wrote:

From: Jason Gunthorpe 

Many places in the kernel have a flow where userspace will create some
object and that object will need to connect to the subsystem's
mmu_notifier subscription for the duration of its lifetime.

In this case the subsystem is usually tracking multiple mm_structs and it
is difficult to keep track of what struct mmu_notifier's have been
allocated for what mm's.

Since this has been open coded in a variety of exciting ways, provide core
functionality to do this safely.

This approach uses the strct mmu_notifier_ops * as a key to determine if


s/strct/struct


the subsystem has a notifier registered on the mm or not. If there is a
registration then the existing notifier struct is returned, otherwise the
ops->alloc_notifiers() is used to create a new per-subsystem notifier for
the mm.

The destroy side incorporates an async call_srcu based destruction which
will avoid bugs in the callers such as commit 6d7c3cde93c1 ("mm/hmm: fix
use after free with struct hmm in the mmu notifiers").

Since we are inside the mmu notifier core locking is fairly simple, the
allocation uses the same approach as for mmu_notifier_mm, the write side
of the mmap_sem makes everything deterministic and we only need to do
hlist_add_head_rcu() under the mm_take_all_locks(). The new users count
and the discoverability in the hlist is fully serialized by the
mmu_notifier_mm->lock.

Co-developed-by: Christoph Hellwig 
Signed-off-by: Christoph Hellwig 
Signed-off-by: Jason Gunthorpe 


Reviewed-by: Ralph Campbell 


---
  include/linux/mmu_notifier.h |  35 
  mm/mmu_notifier.c| 156 +--
  2 files changed, 185 insertions(+), 6 deletions(-)

diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index b6c004bd9f6ad9..31aa971315a142 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -211,6 +211,19 @@ struct mmu_notifier_ops {
 */
void (*invalidate_range)(struct mmu_notifier *mn, struct mm_struct *mm,
 unsigned long start, unsigned long end);
+
+   /*
+* These callbacks are used with the get/put interface to manage the
+* lifetime of the mmu_notifier memory. alloc_notifier() returns a new
+* notifier for use with the mm.
+*
+* free_notifier() is only called after the mmu_notifier has been
+* fully put, calls to any ops callback are prevented and no ops
+* callbacks are currently running. It is called from a SRCU callback
+* and cannot sleep.
+*/
+   struct mmu_notifier *(*alloc_notifier)(struct mm_struct *mm);
+   void (*free_notifier)(struct mmu_notifier *mn);
  };
  
  /*

@@ -227,6 +240,9 @@ struct mmu_notifier_ops {
  struct mmu_notifier {
struct hlist_node hlist;
const struct mmu_notifier_ops *ops;
+   struct mm_struct *mm;
+   struct rcu_head rcu;
+   unsigned int users;
  };
  
  static inline int mm_has_notifiers(struct mm_struct *mm)

@@ -234,6 +250,21 @@ static inline int mm_has_notifiers(struct mm_struct *mm)
return unlikely(mm->mmu_notifier_mm);
  }
  
+struct mmu_notifier *mmu_notifier_get_locked(const struct mmu_notifier_ops *ops,

+struct mm_struct *mm);
+static inline struct mmu_notifier *
+mmu_notifier_get(const struct mmu_notifier_ops *ops, struct mm_struct *mm)
+{
+   struct mmu_notifier *ret;
+
+   down_write(&mm->mmap_sem);
+   ret = mmu_notifier_get_locked(ops, mm);
+   up_write(&mm->mmap_sem);
+   return ret;
+}
+void mmu_notifier_put(struct mmu_notifier *mn);
+void mmu_notifier_synchronize(void);
+
  extern int mmu_notifier_register(struct mmu_notifier *mn,
 struct mm_struct *mm);
  extern int __mmu_notifier_register(struct mmu_notifier *mn,
@@ -581,6 +612,10 @@ static inline void mmu_notifier_mm_destroy(struct 
mm_struct *mm)
  #define pudp_huge_clear_flush_notify pudp_huge_clear_flush
  #define set_pte_at_notify set_pte_at
  
+static inline void mmu_notifier_synchronize(void)

+{
+}
+
  #endif /* CONFIG_MMU_NOTIFIER */
  
  #endif /* _LINUX_MMU_NOTIFIER_H */

diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c
index 696810f632ade1..4a770b5211b71d 100644
--- a/mm/mmu_notifier.c
+++ b/mm/mmu_notifier.c
@@ -248,6 +248,9 @@ int __mmu_notifier_register(struct mmu_notifier *mn, struct 
mm_struct *mm)
lockdep_assert_held_write(&mm->mmap_sem);
BUG_ON(atomic_read(&mm->mm_users) <= 0);
  
+	mn->mm = mm;

+   mn->users = 1;
+
if (!mm->mmu_notifier_mm) {
/*
 * kmalloc cannot be called under mm_take_all_locks(), but we
@@ -295,18 +298,24 @@ int __mmu_notifier_register(struct mmu_notifier *mn, 
struct mm_struct *mm)
  }
  EXPORT_SYMBOL_GPL(__mmu_notifier_register);
  
-/*

+/**
+ * mmu_notifier_register - Register a notifier on a mm
+ * @mn: The notifier

[PATCH v3 hmm 03/11] mm/mmu_notifiers: add a get/put scheme for the registration

2019-08-06 Thread Jason Gunthorpe
From: Jason Gunthorpe 

Many places in the kernel have a flow where userspace will create some
object and that object will need to connect to the subsystem's
mmu_notifier subscription for the duration of its lifetime.

In this case the subsystem is usually tracking multiple mm_structs and it
is difficult to keep track of what struct mmu_notifier's have been
allocated for what mm's.

Since this has been open coded in a variety of exciting ways, provide core
functionality to do this safely.

This approach uses the strct mmu_notifier_ops * as a key to determine if
the subsystem has a notifier registered on the mm or not. If there is a
registration then the existing notifier struct is returned, otherwise the
ops->alloc_notifiers() is used to create a new per-subsystem notifier for
the mm.

The destroy side incorporates an async call_srcu based destruction which
will avoid bugs in the callers such as commit 6d7c3cde93c1 ("mm/hmm: fix
use after free with struct hmm in the mmu notifiers").

Since we are inside the mmu notifier core locking is fairly simple, the
allocation uses the same approach as for mmu_notifier_mm, the write side
of the mmap_sem makes everything deterministic and we only need to do
hlist_add_head_rcu() under the mm_take_all_locks(). The new users count
and the discoverability in the hlist is fully serialized by the
mmu_notifier_mm->lock.

Co-developed-by: Christoph Hellwig 
Signed-off-by: Christoph Hellwig 
Signed-off-by: Jason Gunthorpe 
---
 include/linux/mmu_notifier.h |  35 
 mm/mmu_notifier.c| 156 +--
 2 files changed, 185 insertions(+), 6 deletions(-)

diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index b6c004bd9f6ad9..31aa971315a142 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -211,6 +211,19 @@ struct mmu_notifier_ops {
 */
void (*invalidate_range)(struct mmu_notifier *mn, struct mm_struct *mm,
 unsigned long start, unsigned long end);
+
+   /*
+* These callbacks are used with the get/put interface to manage the
+* lifetime of the mmu_notifier memory. alloc_notifier() returns a new
+* notifier for use with the mm.
+*
+* free_notifier() is only called after the mmu_notifier has been
+* fully put, calls to any ops callback are prevented and no ops
+* callbacks are currently running. It is called from a SRCU callback
+* and cannot sleep.
+*/
+   struct mmu_notifier *(*alloc_notifier)(struct mm_struct *mm);
+   void (*free_notifier)(struct mmu_notifier *mn);
 };
 
 /*
@@ -227,6 +240,9 @@ struct mmu_notifier_ops {
 struct mmu_notifier {
struct hlist_node hlist;
const struct mmu_notifier_ops *ops;
+   struct mm_struct *mm;
+   struct rcu_head rcu;
+   unsigned int users;
 };
 
 static inline int mm_has_notifiers(struct mm_struct *mm)
@@ -234,6 +250,21 @@ static inline int mm_has_notifiers(struct mm_struct *mm)
return unlikely(mm->mmu_notifier_mm);
 }
 
+struct mmu_notifier *mmu_notifier_get_locked(const struct mmu_notifier_ops 
*ops,
+struct mm_struct *mm);
+static inline struct mmu_notifier *
+mmu_notifier_get(const struct mmu_notifier_ops *ops, struct mm_struct *mm)
+{
+   struct mmu_notifier *ret;
+
+   down_write(&mm->mmap_sem);
+   ret = mmu_notifier_get_locked(ops, mm);
+   up_write(&mm->mmap_sem);
+   return ret;
+}
+void mmu_notifier_put(struct mmu_notifier *mn);
+void mmu_notifier_synchronize(void);
+
 extern int mmu_notifier_register(struct mmu_notifier *mn,
 struct mm_struct *mm);
 extern int __mmu_notifier_register(struct mmu_notifier *mn,
@@ -581,6 +612,10 @@ static inline void mmu_notifier_mm_destroy(struct 
mm_struct *mm)
 #define pudp_huge_clear_flush_notify pudp_huge_clear_flush
 #define set_pte_at_notify set_pte_at
 
+static inline void mmu_notifier_synchronize(void)
+{
+}
+
 #endif /* CONFIG_MMU_NOTIFIER */
 
 #endif /* _LINUX_MMU_NOTIFIER_H */
diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c
index 696810f632ade1..4a770b5211b71d 100644
--- a/mm/mmu_notifier.c
+++ b/mm/mmu_notifier.c
@@ -248,6 +248,9 @@ int __mmu_notifier_register(struct mmu_notifier *mn, struct 
mm_struct *mm)
lockdep_assert_held_write(&mm->mmap_sem);
BUG_ON(atomic_read(&mm->mm_users) <= 0);
 
+   mn->mm = mm;
+   mn->users = 1;
+
if (!mm->mmu_notifier_mm) {
/*
 * kmalloc cannot be called under mm_take_all_locks(), but we
@@ -295,18 +298,24 @@ int __mmu_notifier_register(struct mmu_notifier *mn, 
struct mm_struct *mm)
 }
 EXPORT_SYMBOL_GPL(__mmu_notifier_register);
 
-/*
+/**
+ * mmu_notifier_register - Register a notifier on a mm
+ * @mn: The notifier to attach
+ * @mm : The mm to attach the notifier to
+ *
  * Must not hold mmap_sem nor any other VM related lock when ca