On 26 Apr  9:19, David Hildenbrand wrote:
> A couple of points:
> 
> a) Don't use page_mapcount(). Either folio_mapcount(), but likely you want
> to check PageAnonExclusive.
> 
> b) If you're not following the can_follow_write_pte/_pmd model, you are
> doing something wrong :)
> 
> c) The code was heavily changed in mm/mm-unstable. It was merged with t
> the common code.
> 
> Likely, in mm/mm-unstable, the existing can_follow_write_pte and
> can_follow_write_pmd checks will already cover what you want in most cases.
> 
> We'd need a can_follow_write_pud() to cover follow_huge_pud() and
> (unfortunately) something to handle follow_hugepd() as well similarly.
> 
> Copy-pasting what we do in can_follow_write_pte() and adjusting for
> different PTE types is the right thing to do. Maybe now it's time to factor
> out the common checks into a separate helper.

I tried to get the hugepd stuff right but this was the first I heard
about it :-) Afaict follow_huge_pmd and friends were already DTRT

diff --git a/mm/gup.c b/mm/gup.c
index 2f7baf96f65..9df97ea555f 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -517,6 +517,34 @@ static int record_subpages(struct page *page, unsigned 
long sz,
 }
 #endif /* CONFIG_ARCH_HAS_HUGEPD || CONFIG_HAVE_GUP_FAST */
 
+/* Common code for can_follow_write_* */
+static inline bool can_follow_write_common(struct page *page,
+                                          struct vm_area_struct *vma,
+                                          unsigned int flags)
+{
+       /* Maybe FOLL_FORCE is set to override it? */
+       if (!(flags & FOLL_FORCE))
+               return false;
+
+       /* But FOLL_FORCE has no effect on shared mappings */
+       if (vma->vm_flags & (VM_MAYSHARE | VM_SHARED))
+               return false;
+
+       /* ... or read-only private ones */
+       if (!(vma->vm_flags & VM_MAYWRITE))
+               return false;
+
+       /* ... or already writable ones that just need to take a write fault */
+       if (vma->vm_flags & VM_WRITE)
+               return false;
+
+       /*
+        * See can_change_pte_writable(): we broke COW and could map the page
+        * writable if we have an exclusive anonymous page ...
+        */
+       return page && PageAnon(page) && PageAnonExclusive(page);
+}
+
 #ifdef CONFIG_ARCH_HAS_HUGEPD
 static unsigned long hugepte_addr_end(unsigned long addr, unsigned long end,
                                      unsigned long sz)
@@ -525,6 +553,17 @@ static unsigned long hugepte_addr_end(unsigned long addr, 
unsigned long end,
        return (__boundary - 1 < end - 1) ? __boundary : end;
 }
 
+static inline bool can_follow_write_hugepd(pte_t pte, struct page* page,
+                                          struct vm_area_struct *vma,
+                                          unsigned int flags)
+{
+       /* If the pte is writable, we can write to the page. */
+       if (pte_access_permitted(pte, flags & FOLL_WRITE))
+               return true;
+
+       return can_follow_write_common(page, vma, flags);
+}
+
 static int gup_fast_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
                unsigned long end, unsigned int flags, struct page **pages,
                int *nr)
@@ -541,13 +580,13 @@ static int gup_fast_hugepte(pte_t *ptep, unsigned long 
sz, unsigned long addr,
 
        pte = huge_ptep_get(ptep);
 
-       if (!pte_access_permitted(pte, flags & FOLL_WRITE))
-               return 0;
-
        /* hugepages are never "special" */
        VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
 
        page = pte_page(pte);
+       if (!can_follow_write_hugepd(pte, page, vma, flags))
+               return 0;
+
        refs = record_subpages(page, sz, addr, end, pages + *nr);
 
        folio = try_grab_folio(page, refs, flags);
@@ -668,7 +707,25 @@ static struct page *no_page_table(struct vm_area_struct 
*vma,
        return NULL;
 }
 
+
+
 #ifdef CONFIG_PGTABLE_HAS_HUGE_LEAVES
+/* FOLL_FORCE can write to even unwritable PUDs in COW mappings. */
+static inline bool can_follow_write_pud(pud_t pud, struct page *page,
+                                       struct vm_area_struct *vma,
+                                       unsigned int flags)
+{
+       /* If the pud is writable, we can write to the page. */
+       if (pud_write(pud))
+               return true;
+
+       if (!can_follow_write_common(page, vma, flags))
+               return false;
+
+       /* ... and a write-fault isn't required for other reasons. */
+       return !vma_soft_dirty_enabled(vma) || pud_soft_dirty(pud);
+}
+
 static struct page *follow_huge_pud(struct vm_area_struct *vma,
                                    unsigned long addr, pud_t *pudp,
                                    int flags, struct follow_page_context *ctx)
@@ -681,7 +738,8 @@ static struct page *follow_huge_pud(struct vm_area_struct 
*vma,
 
        assert_spin_locked(pud_lockptr(mm, pudp));
 
-       if ((flags & FOLL_WRITE) && !pud_write(pud))
+       if ((flags & FOLL_WRITE) &&
+           !can_follow_write_pud(pud, page, vma, flags))
                return NULL;
 
        if (!pud_present(pud))
@@ -733,27 +791,7 @@ static inline bool can_follow_write_pmd(pmd_t pmd, struct 
page *page,
        if (pmd_write(pmd))
                return true;
 
-       /* Maybe FOLL_FORCE is set to override it? */
-       if (!(flags & FOLL_FORCE))
-               return false;
-
-       /* But FOLL_FORCE has no effect on shared mappings */
-       if (vma->vm_flags & (VM_MAYSHARE | VM_SHARED))
-               return false;
-
-       /* ... or read-only private ones */
-       if (!(vma->vm_flags & VM_MAYWRITE))
-               return false;
-
-       /* ... or already writable ones that just need to take a write fault */
-       if (vma->vm_flags & VM_WRITE)
-               return false;
-
-       /*
-        * See can_change_pte_writable(): we broke COW and could map the page
-        * writable if we have an exclusive anonymous page ...
-        */
-       if (!page || !PageAnon(page) || !PageAnonExclusive(page))
+       if (!can_follow_write_common(page, vma, flags))
                return false;
 
        /* ... and a write-fault isn't required for other reasons. */
@@ -854,27 +892,7 @@ static inline bool can_follow_write_pte(pte_t pte, struct 
page *page,
        if (pte_write(pte))
                return true;
 
-       /* Maybe FOLL_FORCE is set to override it? */
-       if (!(flags & FOLL_FORCE))
-               return false;
-
-       /* But FOLL_FORCE has no effect on shared mappings */
-       if (vma->vm_flags & (VM_MAYSHARE | VM_SHARED))
-               return false;
-
-       /* ... or read-only private ones */
-       if (!(vma->vm_flags & VM_MAYWRITE))
-               return false;
-
-       /* ... or already writable ones that just need to take a write fault */
-       if (vma->vm_flags & VM_WRITE)
-               return false;
-
-       /*
-        * See can_change_pte_writable(): we broke COW and could map the page
-        * writable if we have an exclusive anonymous page ...
-        */
-       if (!page || !PageAnon(page) || !PageAnonExclusive(page))
+       if (!can_follow_write_common(page, vma, flags))
                return false;
 
        /* ... and a write-fault isn't required for other reasons. */
@@ -1374,9 +1392,7 @@ static int check_vma_flags(struct vm_area_struct *vma, 
unsigned long gup_flags)
                if (!(vm_flags & VM_WRITE) || (vm_flags & VM_SHADOW_STACK)) {
                        if (!(gup_flags & FOLL_FORCE))
                                return -EFAULT;
-                       /* hugetlb does not support FOLL_FORCE|FOLL_WRITE. */
-                       if (is_vm_hugetlb_page(vma))
-                               return -EFAULT;
+
                        /*
                         * We used to let the write,force case do COW in a
                         * VM_MAYWRITE VM_SHARED !VM_WRITE vma, so ptrace could
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 417fc5cdb6e..099a71ee028 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -5320,6 +5320,13 @@ static void set_huge_ptep_writable(struct vm_area_struct 
*vma,
                update_mmu_cache(vma, address, ptep);
 }
 
+static void set_huge_ptep_maybe_writable(struct vm_area_struct *vma,
+                                        unsigned long address, pte_t *ptep)
+{
+       if (vma->vm_flags & VM_WRITE)
+               set_huge_ptep_writable(vma, address, ptep);
+}
+
 bool is_hugetlb_entry_migration(pte_t pte)
 {
        swp_entry_t swp;
@@ -5942,13 +5949,6 @@ static vm_fault_t hugetlb_wp(struct folio 
*pagecache_folio,
        if (!unshare && huge_pte_uffd_wp(pte))
                return 0;
 
-       /*
-        * hugetlb does not support FOLL_FORCE-style write faults that keep the
-        * PTE mapped R/O such as maybe_mkwrite() would do.
-        */
-       if (WARN_ON_ONCE(!unshare && !(vma->vm_flags & VM_WRITE)))
-               return VM_FAULT_SIGSEGV;
-
        /* Let's take out MAP_SHARED mappings first. */
        if (vma->vm_flags & VM_MAYSHARE) {
                set_huge_ptep_writable(vma, vmf->address, vmf->pte);
@@ -5970,7 +5970,7 @@ static vm_fault_t hugetlb_wp(struct folio 
*pagecache_folio,
                        SetPageAnonExclusive(&old_folio->page);
                }
                if (likely(!unshare))
-                       set_huge_ptep_writable(vma, vmf->address, vmf->pte);
+                       set_huge_ptep_maybe_writable(vma, vmf->address, 
vmf->pte);
 
                delayacct_wpcopy_end();
                return 0;
@@ -6076,7 +6076,8 @@ static vm_fault_t hugetlb_wp(struct folio 
*pagecache_folio,
        spin_lock(vmf->ptl);
        vmf->pte = hugetlb_walk(vma, vmf->address, huge_page_size(h));
        if (likely(vmf->pte && pte_same(huge_ptep_get(vmf->pte), pte))) {
-               pte_t newpte = make_huge_pte(vma, &new_folio->page, !unshare);
+               const bool writable = !unshare && (vma->vm_flags & VM_WRITE);
+               pte_t newpte = make_huge_pte(vma, &new_folio->page, writable);
 
                /* Break COW or unshare */
                huge_ptep_clear_flush(vma, vmf->address, vmf->pte);


-- 
Guillaume Morin <guilla...@morinfr.org>

Reply via email to