After moving hugetlb reservation after free_area_init(), zone information becomes available during bootmem huge page allocation. This allows us to identify and handle cross-zone gigantic pages more precisely.
During alloc_bootmem(), pages that intersect multiple zones are added to the head of huge_boot_pages[nid] list (without ZONES_VALID flag), while pages with valid zones are added to the tail (with ZONES_VALID flag). After allocation completes, hugetlb_free_cross_zone_pages() iterates through the list and frees those cross-zone pages (entries without HUGE_BOOTMEM_ZONES_VALID flag). The count of freed pages is subtracted from the allocated count to ensure the final number reflects only valid huge pages. This applies to both per-node allocation path and the global gigantic allocation path, simplifying the code by avoiding cross-zone checks at later stages. Signed-off-by: Muchun Song <[email protected]> --- mm/hugetlb.c | 53 ++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 47 insertions(+), 6 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index d6ea11113f1d..238495fd04e4 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -3049,6 +3049,11 @@ struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma, return ERR_PTR(-ENOSPC); } +static bool __init hugetlb_bootmem_page_earlycma(struct huge_bootmem_page *m) +{ + return m->flags & HUGE_BOOTMEM_CMA; +} + static __init void *alloc_bootmem(struct hstate *h, int nid, bool node_exact) { struct huge_bootmem_page *m; @@ -3092,7 +3097,14 @@ static __init void *alloc_bootmem(struct hstate *h, int nid, bool node_exact) * is not up yet. */ INIT_LIST_HEAD(&m->list); - list_add(&m->list, &huge_boot_pages[listnode]); + if (pfn_range_intersects_zones(listnode, PHYS_PFN(virt_to_phys(m)), + pages_per_huge_page(h))) { + VM_BUG_ON(hugetlb_bootmem_page_earlycma(m)); + list_add(&m->list, &huge_boot_pages[listnode]); + } else { + list_add_tail(&m->list, &huge_boot_pages[listnode]); + m->flags |= HUGE_BOOTMEM_ZONES_VALID; + } m->hstate = h; } @@ -3186,11 +3198,6 @@ static bool __init hugetlb_bootmem_page_prehvo(struct huge_bootmem_page *m) return m->flags & HUGE_BOOTMEM_HVO; } -static bool __init hugetlb_bootmem_page_earlycma(struct huge_bootmem_page *m) -{ - return m->flags & HUGE_BOOTMEM_CMA; -} - /* * memblock-allocated pageblocks might not have the migrate type set * if marked with the 'noinit' flag. Set it to the default (MIGRATE_MOVABLE) @@ -3393,6 +3400,34 @@ static void __init gather_bootmem_prealloc(void) padata_do_multithreaded(&job); } +static unsigned long __init hugetlb_free_cross_zone_pages(struct hstate *h, int nid) +{ + unsigned long freed = 0; + struct huge_bootmem_page *m, *tmp; + + if (!hstate_is_gigantic(h)) + return freed; + + list_for_each_entry_safe(m, tmp, &huge_boot_pages[nid], list) { + if (m->flags & HUGE_BOOTMEM_ZONES_VALID) + break; + + list_del(&m->list); + memblock_free(m, huge_page_size(h)); + freed++; + } + + if (freed) { + char buf[32]; + + string_get_size(huge_page_size(h), 1, STRING_UNITS_2, buf, sizeof(buf)); + pr_warn("HugeTLB: freeing %lu cross-zone hugepage of page size %s failed node%d.\n", + freed, buf, nid); + } + + return freed; +} + static void __init hugetlb_hstate_alloc_pages_onenode(struct hstate *h, int nid) { unsigned long i; @@ -3423,6 +3458,8 @@ static void __init hugetlb_hstate_alloc_pages_onenode(struct hstate *h, int nid) cond_resched(); } + i -= hugetlb_free_cross_zone_pages(h, nid); + if (!list_empty(&folio_list)) prep_and_add_allocated_folios(h, &folio_list); @@ -3496,6 +3533,7 @@ static void __init hugetlb_pages_alloc_boot_node(unsigned long start, unsigned l static unsigned long __init hugetlb_gigantic_pages_alloc_boot(struct hstate *h) { + int nid; unsigned long i; for (i = 0; i < h->max_huge_pages; ++i) { @@ -3504,6 +3542,9 @@ static unsigned long __init hugetlb_gigantic_pages_alloc_boot(struct hstate *h) cond_resched(); } + for_each_node(nid) + i -= hugetlb_free_cross_zone_pages(h, nid); + return i; } -- 2.20.1
