The only safe way for non core/arch code to use follow_pfn() is
together with an mmu_notifier subscription. follow_pfn() is already
marked as _GPL and the kerneldoc explains this restriction.

This patch here enforces all this by adding a mmu_notifier argument
and verifying that it is registered for the correct mm_struct.

Motivated by discussions with Christoph Hellwig and Jason Gunthorpe.

Signed-off-by: Daniel Vetter <daniel.vet...@intel.com>
Cc: Christoph Hellwig <h...@infradead.org>
Cc: Jason Gunthorpe <j...@ziepe.ca>
Cc: Kees Cook <keesc...@chromium.org>
Cc: Dan Williams <dan.j.willi...@intel.com>
Cc: Andrew Morton <a...@linux-foundation.org>
Cc: John Hubbard <jhubb...@nvidia.com>
Cc: Jérôme Glisse <jgli...@redhat.com>
Cc: Jan Kara <j...@suse.cz>
Cc: Dan Williams <dan.j.willi...@intel.com>
Cc: linux...@kvack.org
Cc: linux-arm-ker...@lists.infradead.org
Cc: linux-samsung-...@vger.kernel.org
Cc: linux-me...@vger.kernel.org
Cc: k...@vger.kernel.org
Signed-off-by: Daniel Vetter <daniel.vet...@ffwll.ch>
---
 include/linux/mm.h  |  3 ++-
 mm/memory.c         | 39 ++++++++++++++++++++++++++-------------
 mm/nommu.c          | 23 ++++++++++++++++++-----
 virt/kvm/kvm_main.c |  4 ++--
 4 files changed, 48 insertions(+), 21 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index aa0087feab24..14453f366efd 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1651,6 +1651,7 @@ void unmap_vmas(struct mmu_gather *tlb, struct 
vm_area_struct *start_vma,
                unsigned long start, unsigned long end);
 
 struct mmu_notifier_range;
+struct mmu_notifier;
 
 void free_pgd_range(struct mmu_gather *tlb, unsigned long addr,
                unsigned long end, unsigned long floor, unsigned long ceiling);
@@ -1660,7 +1661,7 @@ int follow_pte_pmd(struct mm_struct *mm, unsigned long 
address,
                   struct mmu_notifier_range *range,
                   pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp);
 int follow_pfn(struct vm_area_struct *vma, unsigned long address,
-       unsigned long *pfn);
+       unsigned long *pfn, struct mmu_notifier *subscription);
 int unsafe_follow_pfn(struct vm_area_struct *vma, unsigned long address,
                      unsigned long *pfn);
 int follow_phys(struct vm_area_struct *vma, unsigned long address,
diff --git a/mm/memory.c b/mm/memory.c
index 0db0c5e233fd..51fc0507663a 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -4789,11 +4789,30 @@ int follow_pte_pmd(struct mm_struct *mm, unsigned long 
address,
 }
 EXPORT_SYMBOL(follow_pte_pmd);
 
+static int __follow_pfn(struct vm_area_struct *vma, unsigned long address,
+                       unsigned long *pfn)
+{
+       int ret = -EINVAL;
+       spinlock_t *ptl;
+       pte_t *ptep;
+
+       if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
+               return ret;
+
+       ret = follow_pte(vma->vm_mm, address, &ptep, &ptl);
+       if (ret)
+               return ret;
+       *pfn = pte_pfn(*ptep);
+       pte_unmap_unlock(ptep, ptl);
+       return 0;
+}
+
 /**
  * follow_pfn - look up PFN at a user virtual address
  * @vma: memory mapping
  * @address: user virtual address
  * @pfn: location to store found PFN
+ * @subscription: mmu_notifier subscription for the mm @vma is part of
  *
  * Only IO mappings and raw PFN mappings are allowed. Note that callers must
  * ensure coherency with pte updates by using a &mmu_notifier to follow 
updates.
@@ -4805,21 +4824,15 @@ EXPORT_SYMBOL(follow_pte_pmd);
  * Return: zero and the pfn at @pfn on success, -ve otherwise.
  */
 int follow_pfn(struct vm_area_struct *vma, unsigned long address,
-       unsigned long *pfn)
+       unsigned long *pfn, struct mmu_notifier *subscription)
 {
-       int ret = -EINVAL;
-       spinlock_t *ptl;
-       pte_t *ptep;
+       if (WARN_ON(!subscription->mm))
+               return -EINVAL;
 
-       if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
-               return ret;
+       if (WARN_ON(subscription->mm != vma->vm_mm))
+               return -EINVAL;
 
-       ret = follow_pte(vma->vm_mm, address, &ptep, &ptl);
-       if (ret)
-               return ret;
-       *pfn = pte_pfn(*ptep);
-       pte_unmap_unlock(ptep, ptl);
-       return 0;
+       return __follow_pfn(vma, address, pfn);
 }
 EXPORT_SYMBOL_GPL(follow_pfn);
 
@@ -4844,7 +4857,7 @@ int unsafe_follow_pfn(struct vm_area_struct *vma, 
unsigned long address,
        WARN_ONCE(1, "unsafe follow_pfn usage\n");
        add_taint(TAINT_USER, LOCKDEP_STILL_OK);
 
-       return follow_pfn(vma, address, pfn);
+       return __follow_pfn(vma, address, pfn);
 }
 EXPORT_SYMBOL(unsafe_follow_pfn);
 
diff --git a/mm/nommu.c b/mm/nommu.c
index 79fc98a6c94a..2a6b46fe1906 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -111,24 +111,37 @@ unsigned int kobjsize(const void *objp)
        return page_size(page);
 }
 
+static int __follow_pfn(struct vm_area_struct *vma, unsigned long address,
+                       unsigned long *pfn)
+{
+       if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
+               return -EINVAL;
+
+       *pfn = address >> PAGE_SHIFT;
+       return 0;
+}
+
 /**
  * follow_pfn - look up PFN at a user virtual address
  * @vma: memory mapping
  * @address: user virtual address
  * @pfn: location to store found PFN
+ * @subscription: mmu_notifier subscription for the mm @vma is part of
  *
  * Only IO mappings and raw PFN mappings are allowed.
  *
  * Returns zero and the pfn at @pfn on success, -ve otherwise.
  */
 int follow_pfn(struct vm_area_struct *vma, unsigned long address,
-       unsigned long *pfn)
+       unsigned long *pfn, struct mmu_notifier *subscription)
 {
-       if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
+       if (WARN_ON(!subscription->mm))
                return -EINVAL;
 
-       *pfn = address >> PAGE_SHIFT;
-       return 0;
+       if (WARN_ON(subscription->mm != vma->vm_mm))
+               return -EINVAL;
+
+       return __follow_pfn(vma, address, pfn);
 }
 EXPORT_SYMBOL_GPL(follow_pfn);
 
@@ -153,7 +166,7 @@ int unsafe_follow_pfn(struct vm_area_struct *vma, unsigned 
long address,
        WARN_ONCE(1, "unsafe follow_pfn usage\n");
        add_taint(TAINT_USER, LOCKDEP_STILL_OK);
 
-       return follow_pfn(vma, address, pfn);
+       return __follow_pfn(vma, address, pfn);
 }
 EXPORT_SYMBOL(unsafe_follow_pfn);
 
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 417f3d470c3e..6f6786524eff 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1891,7 +1891,7 @@ static int hva_to_pfn_remapped(struct kvm *kvm, struct 
vm_area_struct *vma,
        unsigned long pfn;
        int r;
 
-       r = follow_pfn(vma, addr, &pfn);
+       r = follow_pfn(vma, addr, &pfn, &kvm->mmu_notifier);
        if (r) {
                /*
                 * get_user_pages fails for VM_IO and VM_PFNMAP vmas and does
@@ -1906,7 +1906,7 @@ static int hva_to_pfn_remapped(struct kvm *kvm, struct 
vm_area_struct *vma,
                if (r)
                        return r;
 
-               r = follow_pfn(vma, addr, &pfn);
+               r = follow_pfn(vma, addr, &pfn, &kvm->mmu_notifier);
                if (r)
                        return r;
 
-- 
2.29.2

Reply via email to