free_pool_huge_page was called with hugetlb_lock held. It would remove a hugetlb page, and then free the corresponding pages to the lower level allocators such as buddy. free_pool_huge_page was called in a loop to remove hugetlb pages and these loops could hold the hugetlb_lock for a considerable time.
Create new routine remove_pool_huge_page to replace free_pool_huge_page. remove_pool_huge_page will remove the hugetlb page, and it must be called with the hugetlb_lock held. It will return the removed page and it is the responsibility of the caller to free the page to the lower level allocators. The hugetlb_lock is dropped before freeing to these allocators which results in shorter lock hold times. Signed-off-by: Mike Kravetz <mike.krav...@oracle.com> --- mm/hugetlb.c | 53 +++++++++++++++++++++++++++++----------------------- 1 file changed, 30 insertions(+), 23 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 3028cf10d504..f60a24e326c2 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1184,7 +1184,7 @@ static int hstate_next_node_to_alloc(struct hstate *h, } /* - * helper for free_pool_huge_page() - return the previously saved + * helper for remove_pool_huge_page() - return the previously saved * node ["this node"] from which to free a huge page. Advance the * next node id whether or not we find a free huge page to free so * that the next attempt to free addresses the next node. @@ -1699,16 +1699,18 @@ static int alloc_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed, } /* - * Free huge page from pool from next node to free. - * Attempt to keep persistent huge pages more or less - * balanced over allowed nodes. + * Remove huge page from pool from next node to free. Attempt to keep + * persistent huge pages more or less balanced over allowed nodes. + * This routine only 'removes' the hugetlb page. The caller must make + * an additional call to free the page to low level allocators. * Called with hugetlb_lock locked. */ -static int free_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed, - bool acct_surplus) +static struct page *remove_pool_huge_page(struct hstate *h, + nodemask_t *nodes_allowed, + bool acct_surplus) { int nr_nodes, node; - int ret = 0; + struct page *page = NULL; for_each_node_mask_to_free(h, nr_nodes, node, nodes_allowed) { /* @@ -1717,23 +1719,14 @@ static int free_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed, */ if ((!acct_surplus || h->surplus_huge_pages_node[node]) && !list_empty(&h->hugepage_freelists[node])) { - struct page *page = - list_entry(h->hugepage_freelists[node].next, + page = list_entry(h->hugepage_freelists[node].next, struct page, lru); remove_hugetlb_page(h, page, acct_surplus); - /* - * unlock/lock around update_and_free_page is temporary - * and will be removed with subsequent patch. - */ - spin_unlock(&hugetlb_lock); - update_and_free_page(h, page); - spin_lock(&hugetlb_lock); - ret = 1; break; } } - return ret; + return page; } /* @@ -2064,6 +2057,7 @@ static void return_unused_surplus_pages(struct hstate *h, unsigned long unused_resv_pages) { unsigned long nr_pages; + struct page *page; /* Cannot return gigantic pages currently */ if (hstate_is_gigantic(h)) @@ -2080,7 +2074,7 @@ static void return_unused_surplus_pages(struct hstate *h, * evenly across all nodes with memory. Iterate across these nodes * until we can no longer free unreserved surplus pages. This occurs * when the nodes with surplus pages have no free pages. - * free_pool_huge_page() will balance the freed pages across the + * remove_pool_huge_page() will balance the freed pages across the * on-line nodes with memory and will handle the hstate accounting. * * Note that we decrement resv_huge_pages as we free the pages. If @@ -2090,9 +2084,15 @@ static void return_unused_surplus_pages(struct hstate *h, while (nr_pages--) { h->resv_huge_pages--; unused_resv_pages--; - if (!free_pool_huge_page(h, &node_states[N_MEMORY], 1)) + page = remove_pool_huge_page(h, &node_states[N_MEMORY], 1); + if (!page) goto out; - cond_resched_lock(&hugetlb_lock); + + /* Drop lock and free page to buddy as it could sleep */ + spin_unlock(&hugetlb_lock); + update_and_free_page(h, page); + cond_resched(); + spin_lock(&hugetlb_lock); } out: @@ -2631,6 +2631,7 @@ static int set_max_huge_pages(struct hstate *h, unsigned long count, int nid, nodemask_t *nodes_allowed) { unsigned long min_count, ret; + struct page *page; NODEMASK_ALLOC(nodemask_t, node_alloc_noretry, GFP_KERNEL); /* @@ -2740,9 +2741,15 @@ static int set_max_huge_pages(struct hstate *h, unsigned long count, int nid, min_count = min_hp_count(h, count); try_to_free_low(h, count, nodes_allowed); while (min_count < persistent_huge_pages(h)) { - if (!free_pool_huge_page(h, nodes_allowed, 0)) + page = remove_pool_huge_page(h, nodes_allowed, 0); + if (!page) break; - cond_resched_lock(&hugetlb_lock); + + /* Drop lock as free routines may sleep */ + spin_unlock(&hugetlb_lock); + update_and_free_page(h, page); + cond_resched(); + spin_lock(&hugetlb_lock); /* Recompute min_count in case hugetlb_lock was dropped */ min_count = min_hp_count(h, count); -- 2.30.2