The routine set_max_huge_pages reduces the number of hugetlb_pages, by calling free_pool_huge_page in a loop. It does this as long as persistent_huge_pages() is above a calculated min_count value. However, this loop can conditionally drop hugetlb_lock and in some circumstances free_pool_huge_page can drop hugetlb_lock. If the lock is dropped, counters could change the calculated min_count value may no longer be valid.
The routine try_to_free_low has the same issue. Recalculate min_count in each loop iteration as hugetlb_lock may have been dropped. Signed-off-by: Mike Kravetz <mike.krav...@oracle.com> --- mm/hugetlb.c | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index d5be25f910e8..c537274c2a38 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2521,11 +2521,20 @@ static void __init report_hugepages(void) } } +static inline unsigned long min_hp_count(struct hstate *h, unsigned long count) +{ + unsigned long min_count; + + min_count = h->resv_huge_pages + h->nr_huge_pages - h->free_huge_pages; + return max(count, min_count); +} + #ifdef CONFIG_HIGHMEM static void try_to_free_low(struct hstate *h, unsigned long count, nodemask_t *nodes_allowed) { int i; + unsigned long min_count = min_hp_count(h, count); if (hstate_is_gigantic(h)) return; @@ -2534,7 +2543,7 @@ static void try_to_free_low(struct hstate *h, unsigned long count, struct page *page, *next; struct list_head *freel = &h->hugepage_freelists[i]; list_for_each_entry_safe(page, next, freel, lru) { - if (count >= h->nr_huge_pages) + if (min_count >= h->nr_huge_pages) return; if (PageHighMem(page)) continue; @@ -2542,6 +2551,12 @@ static void try_to_free_low(struct hstate *h, unsigned long count, update_and_free_page(h, page); h->free_huge_pages--; h->free_huge_pages_node[page_to_nid(page)]--; + + /* + * update_and_free_page could have dropped lock so + * recompute min_count. + */ + min_count = min_hp_count(h, count); } } } @@ -2695,13 +2710,15 @@ static int set_max_huge_pages(struct hstate *h, unsigned long count, int nid, * and won't grow the pool anywhere else. Not until one of the * sysctls are changed, or the surplus pages go out of use. */ - min_count = h->resv_huge_pages + h->nr_huge_pages - h->free_huge_pages; - min_count = max(count, min_count); - try_to_free_low(h, min_count, nodes_allowed); + min_count = min_hp_count(h, count); + try_to_free_low(h, count, nodes_allowed); while (min_count < persistent_huge_pages(h)) { if (!free_pool_huge_page(h, nodes_allowed, 0)) break; cond_resched_lock(&hugetlb_lock); + + /* Recompute min_count in case hugetlb_lock was dropped */ + min_count = min_hp_count(h, count); } while (count < persistent_huge_pages(h)) { if (!adjust_pool_surplus(h, nodes_allowed, 1)) -- 2.30.2