Currently, both HugeTLB and sparse-vmemmap have their own logic to get
or allocate the shared tail page for vmemmap optimization. The HugeTLB
version handles runtime concurrency using cmpxchg, while the
sparse-vmemmap version (used only at boot time) was simpler.

This patch unifies them into a single function in mm/sparse-vmemmap.c.

The new function of vmemmap_shared_tail_page() is introduced: it returns
the shared page frame used to map the tail vmemmap pages of a compound
page.

Furthermore, vmemmap_alloc_block_zero() is used as a safe allocation
method for both situations:

1. It calls alloc_pages_node() (via vmemmap_alloc_block()) when slab is
   available.

2. It falls back to bootmem allocation during early boot, making the function
   suitable for use in both early boot (sparse-vmemmap init) and runtime
   (HugeTLB HVO) contexts.

This reduces code duplication and ensures consistent behavior.

Signed-off-by: Muchun Song <[email protected]>
---
 include/linux/mm.h   |  1 +
 mm/hugetlb_vmemmap.c | 28 +---------------------------
 mm/sparse-vmemmap.c  | 42 +++++++++++++++++++++---------------------
 3 files changed, 23 insertions(+), 48 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 93e447468131..15841829b7eb 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -4880,6 +4880,7 @@ int vmemmap_populate(unsigned long start, unsigned long 
end, int node,
 void vmemmap_wrprotect_hvo(unsigned long start, unsigned long end, int node,
                          unsigned long headsize);
 void vmemmap_populate_print_last(void);
+struct page *vmemmap_shared_tail_page(unsigned int order, struct zone *zone);
 #ifdef CONFIG_MEMORY_HOTPLUG
 void vmemmap_free(unsigned long start, unsigned long end,
                struct vmem_altmap *altmap);
diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c
index a190b9b94346..a7ea98fcc18e 100644
--- a/mm/hugetlb_vmemmap.c
+++ b/mm/hugetlb_vmemmap.c
@@ -493,32 +493,6 @@ static bool vmemmap_should_optimize_folio(const struct 
hstate *h, struct folio *
        return true;
 }
 
-static struct page *vmemmap_get_tail(unsigned int order, struct zone *zone)
-{
-       const unsigned int idx = order - OPTIMIZABLE_FOLIO_MIN_ORDER;
-       struct page *tail, *p;
-       int node = zone_to_nid(zone);
-
-       tail = READ_ONCE(zone->vmemmap_tails[idx]);
-       if (likely(tail))
-               return tail;
-
-       tail = alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0);
-       if (!tail)
-               return NULL;
-
-       p = page_to_virt(tail);
-       for (int i = 0; i < PAGE_SIZE / sizeof(struct page); i++)
-               init_compound_tail(p + i, NULL, order, zone);
-
-       if (cmpxchg(&zone->vmemmap_tails[idx], NULL, tail)) {
-               __free_page(tail);
-               tail = READ_ONCE(zone->vmemmap_tails[idx]);
-       }
-
-       return tail;
-}
-
 static int __hugetlb_vmemmap_optimize_folio(const struct hstate *h,
                                            struct folio *folio,
                                            struct list_head *vmemmap_pages,
@@ -535,7 +509,7 @@ static int __hugetlb_vmemmap_optimize_folio(const struct 
hstate *h,
                return ret;
 
        nid = folio_nid(folio);
-       vmemmap_tail = vmemmap_get_tail(h->order, folio_zone(folio));
+       vmemmap_tail = vmemmap_shared_tail_page(h->order, folio_zone(folio));
        if (!vmemmap_tail)
                return -ENOMEM;
 
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
index c35d912a1fef..309d935fb05e 100644
--- a/mm/sparse-vmemmap.c
+++ b/mm/sparse-vmemmap.c
@@ -143,8 +143,6 @@ void __meminit vmemmap_verify(pte_t *pte, int node,
                        start, end - 1);
 }
 
-static __meminit struct page *vmemmap_get_tail(unsigned int order, struct zone 
*zone);
-
 static pte_t * __meminit vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, 
int node,
                                              struct vmem_altmap *altmap,
                                              unsigned long ptpfn)
@@ -160,8 +158,8 @@ static pte_t * __meminit vmemmap_pte_populate(pmd_t *pmd, 
unsigned long addr, in
                        unsigned long pfn = page_to_pfn((struct page *)addr);
                        const struct mem_section *ms = __pfn_to_section(pfn);
 
-                       page = vmemmap_get_tail(section_order(ms),
-                                               pfn_to_zone(pfn, node));
+                       page = vmemmap_shared_tail_page(section_order(ms),
+                                                       pfn_to_zone(pfn, node));
                        if (!page)
                                return NULL;
                        ptpfn = page_to_pfn(page);
@@ -338,32 +336,34 @@ void vmemmap_wrprotect_hvo(unsigned long addr, unsigned 
long end,
        }
 }
 
-static __meminit struct page *vmemmap_get_tail(unsigned int order, struct zone 
*zone)
+struct page *vmemmap_shared_tail_page(unsigned int order, struct zone *zone)
 {
-       struct page *p, *tail;
-       unsigned int idx;
-       int node = zone_to_nid(zone);
+       void *addr;
+       struct page *page;
+       unsigned int idx = order - OPTIMIZABLE_FOLIO_MIN_ORDER;
 
-       if (WARN_ON_ONCE(order < OPTIMIZABLE_FOLIO_MIN_ORDER))
-               return NULL;
-       if (WARN_ON_ONCE(order > MAX_FOLIO_ORDER))
+       if (WARN_ON_ONCE(idx >= ARRAY_SIZE(zone->vmemmap_tails)))
                return NULL;
 
-       idx = order - OPTIMIZABLE_FOLIO_MIN_ORDER;
-       tail = zone->vmemmap_tails[idx];
-       if (tail)
-               return tail;
+       page = READ_ONCE(zone->vmemmap_tails[idx]);
+       if (likely(page))
+               return page;
 
-       p = vmemmap_alloc_block_zero(PAGE_SIZE, node);
-       if (!p)
+       addr = vmemmap_alloc_block_zero(PAGE_SIZE, zone_to_nid(zone));
+       if (!addr)
                return NULL;
+
        for (int i = 0; i < PAGE_SIZE / sizeof(struct page); i++)
-               init_compound_tail(p + i, NULL, order, zone);
+               init_compound_tail((struct page *)addr + i, NULL, order, zone);
 
-       tail = virt_to_page(p);
-       zone->vmemmap_tails[idx] = tail;
+       page = virt_to_page(addr);
+       if (cmpxchg(&zone->vmemmap_tails[idx], NULL, page) != NULL) {
+               VM_BUG_ON(!slab_is_available());
+               __free_page(page);
+               page = READ_ONCE(zone->vmemmap_tails[idx]);
+       }
 
-       return tail;
+       return page;
 }
 
 void __weak __meminit vmemmap_set_pmd(pmd_t *pmd, void *p, int node,
-- 
2.20.1


Reply via email to