Teach sparse-vmemmap population code to use the compound page order when deciding whether a vmemmap page can be optimized.
With this information, the common sparse-vmemmap population path can allocate or reuse shared tail vmemmap pages directly instead of relying on HugeTLB/DAX-specific handling. This centralizes vmemmap optimization logic in the sparse-vmemmap code, based on section metadata, and prepares for sharing the same mechanism across different users of vmemmap optimization, including HugeTLB and DAX. Signed-off-by: Muchun Song <[email protected]> --- include/linux/mmzone.h | 2 +- mm/internal.h | 3 ++ mm/sparse-vmemmap.c | 89 +++++++++++++++++++++++++----------------- mm/sparse.c | 34 +++++++++++++++- 4 files changed, 89 insertions(+), 39 deletions(-) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 0974205abd3d..bf4c40818b63 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -1147,7 +1147,7 @@ struct zone { /* Zone statistics */ atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS]; atomic_long_t vm_numa_event[NR_VM_NUMA_EVENT_ITEMS]; -#ifdef CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP +#ifdef CONFIG_SPARSEMEM_VMEMMAP struct page *vmemmap_tails[NR_OPTIMIZABLE_FOLIO_ORDERS]; #endif } ____cacheline_internodealigned_in_smp; diff --git a/mm/internal.h b/mm/internal.h index 1f1c07eb70e2..2defdef1aedf 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -995,6 +995,9 @@ static inline void __section_mark_present(struct mem_section *ms, ms->section_mem_map |= SECTION_MARKED_PRESENT; } + +int section_nr_vmemmap_pages(unsigned long pfn, unsigned long nr_pages, + struct vmem_altmap *altmap, struct dev_pagemap *pgmap); #else static inline void sparse_init(void) {} #endif /* CONFIG_SPARSEMEM */ diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c index 94964363d95c..69ae40692e41 100644 --- a/mm/sparse-vmemmap.c +++ b/mm/sparse-vmemmap.c @@ -139,17 +139,49 @@ void __meminit vmemmap_verify(pte_t *pte, int node, start, end - 1); } +static struct zone __meminit *pfn_to_zone(unsigned long pfn, int nid) +{ + pg_data_t *pgdat = NODE_DATA(nid); + + for (enum zone_type zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++) { + struct zone *zone = &pgdat->node_zones[zone_type]; + + if (zone_spans_pfn(zone, pfn)) + return zone; + } + + return NULL; +} + +static __meminit struct page *vmemmap_get_tail(unsigned int order, struct zone *zone); + static pte_t * __meminit vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node, struct vmem_altmap *altmap, unsigned long ptpfn) { pte_t *pte = pte_offset_kernel(pmd, addr); + if (pte_none(ptep_get(pte))) { pte_t entry; - void *p; + + if (vmemmap_page_optimizable((struct page *)addr) && + ptpfn == (unsigned long)-1) { + struct page *page; + unsigned long pfn = page_to_pfn((struct page *)addr); + const struct mem_section *ms = __pfn_to_section(pfn); + struct zone *zone = pfn_to_zone(pfn, node); + + if (WARN_ON_ONCE(!zone)) + return NULL; + page = vmemmap_get_tail(section_order(ms), zone); + if (!page) + return NULL; + ptpfn = page_to_pfn(page); + } if (ptpfn == (unsigned long)-1) { - p = vmemmap_alloc_block_buf(PAGE_SIZE, node, altmap); + void *p = vmemmap_alloc_block_buf(PAGE_SIZE, node, altmap); + if (!p) return NULL; ptpfn = PHYS_PFN(__pa(p)); @@ -168,7 +200,8 @@ static pte_t * __meminit vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, in } entry = pfn_pte(ptpfn, PAGE_KERNEL); set_pte_at(&init_mm, addr, pte, entry); - } + } else if (WARN_ON_ONCE(vmemmap_page_optimizable((struct page *)addr))) + return NULL; return pte; } @@ -311,7 +344,6 @@ void vmemmap_wrprotect_hvo(unsigned long addr, unsigned long end, } } -#ifdef CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP static __meminit struct page *vmemmap_get_tail(unsigned int order, struct zone *zone) { struct page *p, *tail; @@ -340,6 +372,7 @@ static __meminit struct page *vmemmap_get_tail(unsigned int order, struct zone * return tail; } +#ifdef CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP int __meminit vmemmap_populate_hvo(unsigned long addr, unsigned long end, unsigned int order, struct zone *zone, unsigned long headsize) @@ -388,6 +421,9 @@ int __meminit vmemmap_populate_hugepages(unsigned long start, unsigned long end, pmd_t *pmd; for (addr = start; addr < end; addr = next) { + unsigned long pfn = page_to_pfn((struct page *)addr); + struct mem_section *ms = __pfn_to_section(pfn); + next = pmd_addr_end(addr, end); pgd = vmemmap_pgd_populate(addr, node); @@ -403,7 +439,7 @@ int __meminit vmemmap_populate_hugepages(unsigned long start, unsigned long end, return -ENOMEM; pmd = pmd_offset(pud, addr); - if (pmd_none(pmdp_get(pmd))) { + if (pmd_none(pmdp_get(pmd)) && !section_vmemmap_optimizable(ms)) { void *p; p = vmemmap_alloc_block_buf(PMD_SIZE, node, altmap); @@ -421,8 +457,19 @@ int __meminit vmemmap_populate_hugepages(unsigned long start, unsigned long end, */ return -ENOMEM; } - } else if (vmemmap_check_pmd(pmd, node, addr, next)) + } else if (vmemmap_check_pmd(pmd, node, addr, next)) { + const struct mem_section *start_ms; + unsigned long align = max(1UL << section_order(ms), PAGES_PER_SECTION); + + /* HVO-covered sections must not use PMD mappings. */ + start_ms = __pfn_to_section(ALIGN_DOWN(pfn, align)); + if (!IS_ALIGNED(pfn, align) && section_vmemmap_optimizable(start_ms)) + return -ENOTSUPP; + + /* PMD mappings end HVO coverage for this section. */ + section_set_order(ms, 0); continue; + } if (vmemmap_populate_basepages(addr, next, node, altmap)) return -ENOMEM; } @@ -626,36 +673,6 @@ void offline_mem_sections(unsigned long start_pfn, unsigned long end_pfn) } } -static int __meminit section_nr_vmemmap_pages(unsigned long pfn, unsigned long nr_pages, - struct vmem_altmap *altmap, struct dev_pagemap *pgmap) -{ - const struct mem_section *ms = __pfn_to_section(pfn); - const unsigned int order = pgmap ? pgmap->vmemmap_shift : section_order(ms); - const unsigned long pages_per_compound = 1UL << order; - unsigned int vmemmap_pages = OPTIMIZED_FOLIO_VMEMMAP_PAGES; - - VM_WARN_ON_ONCE(!IS_ALIGNED(pfn | nr_pages, PAGES_PER_SUBSECTION)); - VM_WARN_ON_ONCE(nr_pages > PAGES_PER_SECTION); - - if (vmemmap_can_optimize(altmap, pgmap)) - vmemmap_pages = VMEMMAP_RESERVE_NR; - - if (!vmemmap_can_optimize(altmap, pgmap) && !section_vmemmap_optimizable(ms)) - return DIV_ROUND_UP(nr_pages * sizeof(struct page), PAGE_SIZE); - - if (order < PFN_SECTION_SHIFT) { - VM_WARN_ON_ONCE(!IS_ALIGNED(pfn | nr_pages, pages_per_compound)); - return vmemmap_pages * nr_pages / pages_per_compound; - } - - VM_WARN_ON_ONCE(!IS_ALIGNED(pfn | nr_pages, PAGES_PER_SECTION)); - - if (IS_ALIGNED(pfn, pages_per_compound)) - return vmemmap_pages; - - return 0; -} - static struct page * __meminit populate_section_memmap(unsigned long pfn, unsigned long nr_pages, int nid, struct vmem_altmap *altmap, struct dev_pagemap *pgmap) diff --git a/mm/sparse.c b/mm/sparse.c index 9457a4d6a6fc..3e96478a63e0 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -284,6 +284,36 @@ static void __init sparse_usage_fini(void) sparse_usagebuf = sparse_usagebuf_end = NULL; } +int __meminit section_nr_vmemmap_pages(unsigned long pfn, unsigned long nr_pages, + struct vmem_altmap *altmap, struct dev_pagemap *pgmap) +{ + const struct mem_section *ms = __pfn_to_section(pfn); + const unsigned int order = pgmap ? pgmap->vmemmap_shift : section_order(ms); + const unsigned long pages_per_compound = 1UL << order; + unsigned int vmemmap_pages = OPTIMIZED_FOLIO_VMEMMAP_PAGES; + + VM_WARN_ON_ONCE(!IS_ALIGNED(pfn | nr_pages, PAGES_PER_SUBSECTION)); + VM_WARN_ON_ONCE(nr_pages > PAGES_PER_SECTION); + + if (vmemmap_can_optimize(altmap, pgmap)) + vmemmap_pages = VMEMMAP_RESERVE_NR; + + if (!vmemmap_can_optimize(altmap, pgmap) && !section_vmemmap_optimizable(ms)) + return DIV_ROUND_UP(nr_pages * sizeof(struct page), PAGE_SIZE); + + if (order < PFN_SECTION_SHIFT) { + VM_WARN_ON_ONCE(!IS_ALIGNED(pfn | nr_pages, pages_per_compound)); + return vmemmap_pages * nr_pages / pages_per_compound; + } + + VM_WARN_ON_ONCE(!IS_ALIGNED(pfn | nr_pages, PAGES_PER_SECTION)); + + if (IS_ALIGNED(pfn, pages_per_compound)) + return vmemmap_pages; + + return 0; +} + /* * Initialize sparse on a specific node. The node spans [pnum_begin, pnum_end) * And number of present sections in this node is map_count. @@ -314,8 +344,8 @@ static void __init sparse_init_nid(int nid, unsigned long pnum_begin, nid, NULL, NULL); if (!map) panic("Failed to allocate memmap for section %lu\n", pnum); - memmap_boot_pages_add(DIV_ROUND_UP(PAGES_PER_SECTION * sizeof(struct page), - PAGE_SIZE)); + memmap_boot_pages_add(section_nr_vmemmap_pages(pfn, PAGES_PER_SECTION, + NULL, NULL)); sparse_init_early_section(nid, map, pnum, 0); } } -- 2.54.0
