This starts from passing cp_flags into hugetlb_change_protection() so hugetlb
will be able to handle MM_CP_UFFD_WP[_RESOLVE] requests.

huge_pte_clear_uffd_wp() is introduced to handle the case where the
UFFDIO_WRITEPROTECT is requested upon migrating huge page entries.

Signed-off-by: Peter Xu <pet...@redhat.com>
---
 include/asm-generic/hugetlb.h |  5 +++++
 include/linux/hugetlb.h       |  6 ++++--
 mm/hugetlb.c                  | 13 ++++++++++++-
 mm/mprotect.c                 |  3 ++-
 mm/userfaultfd.c              |  8 ++++++++
 5 files changed, 31 insertions(+), 4 deletions(-)

diff --git a/include/asm-generic/hugetlb.h b/include/asm-generic/hugetlb.h
index 548212eccbd6..181cdc3297e7 100644
--- a/include/asm-generic/hugetlb.h
+++ b/include/asm-generic/hugetlb.h
@@ -32,6 +32,11 @@ static inline pte_t huge_pte_mkuffd_wp(pte_t pte)
        return pte_mkuffd_wp(pte);
 }
 
+static inline pte_t huge_pte_clear_uffd_wp(pte_t pte)
+{
+       return pte_clear_uffd_wp(pte);
+}
+
 static inline pte_t huge_pte_modify(pte_t pte, pgprot_t newprot)
 {
        return pte_modify(pte, newprot);
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index ef8d2b8427b1..92710600596e 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -190,7 +190,8 @@ struct page *follow_huge_pgd(struct mm_struct *mm, unsigned 
long address,
 int pmd_huge(pmd_t pmd);
 int pud_huge(pud_t pud);
 unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
-               unsigned long address, unsigned long end, pgprot_t newprot);
+               unsigned long address, unsigned long end, pgprot_t newprot,
+               unsigned long cp_flags);
 
 bool is_hugetlb_entry_migration(pte_t pte);
 void hugetlb_unshare_all_pmds(struct vm_area_struct *vma);
@@ -352,7 +353,8 @@ static inline void move_hugetlb_state(struct page *oldpage,
 
 static inline unsigned long hugetlb_change_protection(
                        struct vm_area_struct *vma, unsigned long address,
-                       unsigned long end, pgprot_t newprot)
+                       unsigned long end, pgprot_t newprot,
+                       unsigned long cp_flags)
 {
        return 0;
 }
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index f0e55b341ebd..fd3e87517e10 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -5063,7 +5063,8 @@ long follow_hugetlb_page(struct mm_struct *mm, struct 
vm_area_struct *vma,
 }
 
 unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
-               unsigned long address, unsigned long end, pgprot_t newprot)
+               unsigned long address, unsigned long end,
+               pgprot_t newprot, unsigned long cp_flags)
 {
        struct mm_struct *mm = vma->vm_mm;
        unsigned long start = address;
@@ -5073,6 +5074,8 @@ unsigned long hugetlb_change_protection(struct 
vm_area_struct *vma,
        unsigned long pages = 0;
        bool shared_pmd = false;
        struct mmu_notifier_range range;
+       bool uffd_wp = cp_flags & MM_CP_UFFD_WP;
+       bool uffd_wp_resolve = cp_flags & MM_CP_UFFD_WP_RESOLVE;
 
        /*
         * In the case of shared PMDs, the area to flush could be beyond
@@ -5113,6 +5116,10 @@ unsigned long hugetlb_change_protection(struct 
vm_area_struct *vma,
 
                                make_migration_entry_read(&entry);
                                newpte = swp_entry_to_pte(entry);
+                               if (uffd_wp)
+                                       newpte = pte_swp_mkuffd_wp(newpte);
+                               else if (uffd_wp_resolve)
+                                       newpte = pte_swp_clear_uffd_wp(newpte);
                                set_huge_swap_pte_at(mm, address, ptep,
                                                     newpte, huge_page_size(h));
                                pages++;
@@ -5126,6 +5133,10 @@ unsigned long hugetlb_change_protection(struct 
vm_area_struct *vma,
                        old_pte = huge_ptep_modify_prot_start(vma, address, 
ptep);
                        pte = pte_mkhuge(huge_pte_modify(old_pte, newprot));
                        pte = arch_make_huge_pte(pte, vma, NULL, 0);
+                       if (uffd_wp)
+                               pte = 
huge_pte_mkuffd_wp(huge_pte_wrprotect(pte));
+                       else if (uffd_wp_resolve)
+                               pte = huge_pte_clear_uffd_wp(pte);
                        huge_ptep_modify_prot_commit(vma, address, ptep, 
old_pte, pte);
                        pages++;
                }
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 51c954afa406..fe5a5b96a61f 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -416,7 +416,8 @@ unsigned long change_protection(struct vm_area_struct *vma, 
unsigned long start,
        BUG_ON((cp_flags & MM_CP_UFFD_WP_ALL) == MM_CP_UFFD_WP_ALL);
 
        if (is_vm_hugetlb_page(vma))
-               pages = hugetlb_change_protection(vma, start, end, newprot);
+               pages = hugetlb_change_protection(vma, start, end, newprot,
+                                                 cp_flags);
        else
                pages = change_protection_range(vma, start, end, newprot,
                                                cp_flags);
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 78471ae3d25c..01170197a3d7 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -654,6 +654,7 @@ int mwriteprotect_range(struct mm_struct *dst_mm, unsigned 
long start,
                        unsigned long len, bool enable_wp, bool *mmap_changing)
 {
        struct vm_area_struct *dst_vma;
+       unsigned long page_mask;
        pgprot_t newprot;
        int err;
 
@@ -690,6 +691,13 @@ int mwriteprotect_range(struct mm_struct *dst_mm, unsigned 
long start,
        if (!vma_is_anonymous(dst_vma))
                goto out_unlock;
 
+       if (is_vm_hugetlb_page(dst_vma)) {
+               err = -EINVAL;
+               page_mask = vma_kernel_pagesize(dst_vma) - 1;
+               if ((start & page_mask) || (len & page_mask))
+                       goto out_unlock;
+       }
+
        if (enable_wp)
                newprot = vm_get_page_prot(dst_vma->vm_flags & ~(VM_WRITE));
        else
-- 
2.26.2

Reply via email to