Currently, memmap_init_range() unconditionally initializes all struct pages within a section. However, when HugeTLB Vmemmap Optimization (HVO) is enabled, shared vmemmap tail pages are allocated during the vmemmap population phase (e.g., via vmemmap_get_tail()). These shared tail pages are left intentionally uninitialized at that time because the subsequent memmap_init() would simply overwrite them.
If memmap_init_range() continues to initialize these shared tail pages, it will overwrite the carefully constructed HVO mappings and metadata. This forces subsystems like HugeTLB to implement workarounds (like re-initializing or compensating for the overwritten data in their own init routines, as seen in hugetlb_vmemmap_init()). Therefore, the primary motivation of this patch is to prevent memmap_init_range() from incorrectly overwriting the shared vmemmap tail pages. By detecting if a page is an optimizable compound vmemmap page (using the newly introduced section order), we can safely skip its redundant initialization. As a significant side-effect, skipping the initialization of these shared tail pages also saves substantial CPU cycles during the early boot stage. Signed-off-by: Muchun Song <[email protected]> --- mm/internal.h | 11 +++++++++++ mm/mm_init.c | 19 +++++++++++++++---- 2 files changed, 26 insertions(+), 4 deletions(-) diff --git a/mm/internal.h b/mm/internal.h index a8acabcd1d93..1060d7c07f5b 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -1011,6 +1011,17 @@ static inline void sparse_init_subsection_map(void) } #endif /* CONFIG_SPARSEMEM_VMEMMAP */ +static inline bool vmemmap_page_optimizable(const struct page *page) +{ + unsigned long pfn = page_to_pfn(page); + unsigned int order = section_order(__pfn_to_section(pfn)); + + if (!is_power_of_2(sizeof(struct page))) + return false; + + return (pfn & ((1L << order) - 1)) >= OPTIMIZED_FOLIO_VMEMMAP_PAGE_STRUCTS; +} + #if defined CONFIG_COMPACTION || defined CONFIG_CMA /* diff --git a/mm/mm_init.c b/mm/mm_init.c index 977a837b7ef6..7f5b326e9298 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -676,12 +676,13 @@ static inline void fixup_hashdist(void) {} static __meminit void pageblock_migratetype_init_range(unsigned long pfn, unsigned long nr_pages, - int migratetype) + int migratetype, + bool isolate) { unsigned long end = pfn + nr_pages; for (pfn = pageblock_align(pfn); pfn < end; pfn += pageblock_nr_pages) { - init_pageblock_migratetype(pfn_to_page(pfn), migratetype, false); + init_pageblock_migratetype(pfn_to_page(pfn), migratetype, isolate); cond_resched(); } } @@ -912,6 +913,16 @@ void __meminit memmap_init_range(unsigned long size, int nid, unsigned long zone } page = pfn_to_page(pfn); + if (vmemmap_page_optimizable(page)) { + struct mem_section *ms = __pfn_to_section(pfn); + unsigned long start = pfn; + + pfn = min(ALIGN(start, 1L << section_order(ms)), end_pfn); + pageblock_migratetype_init_range(start, pfn - start, migratetype, + isolate_pageblock); + continue; + } + __init_single_page(page, pfn, zone, nid); if (context == MEMINIT_HOTPLUG) { #ifdef CONFIG_ZONE_DEVICE @@ -1138,7 +1149,7 @@ void __ref memmap_init_zone_device(struct zone *zone, * Please note that MEMINIT_HOTPLUG path doesn't clear memmap * because this is done early in section_activate() */ - pageblock_migratetype_init_range(start_pfn, nr_pages, MIGRATE_MOVABLE); + pageblock_migratetype_init_range(start_pfn, nr_pages, MIGRATE_MOVABLE, false); pr_debug("%s initialised %lu pages in %ums\n", __func__, nr_pages, jiffies_to_msecs(jiffies - start)); @@ -1963,7 +1974,7 @@ static void __init deferred_free_pages(unsigned long pfn, if (!nr_pages) return; - pageblock_migratetype_init_range(pfn, nr_pages, MIGRATE_MOVABLE); + pageblock_migratetype_init_range(pfn, nr_pages, MIGRATE_MOVABLE, false); page = pfn_to_page(pfn); -- 2.20.1
