Le 11/02/2022 à 17:41, Zi Yan a écrit :
> From: Zi Yan <z...@nvidia.com>
> 
> alloc_contig_range() worked at MAX_ORDER-1 granularity to avoid merging
> pageblocks with different migratetypes. It might unnecessarily convert
> extra pageblocks at the beginning and at the end of the range. Change
> alloc_contig_range() to work at pageblock granularity.
> 
> Special handling is needed for free pages and in-use pages across the
> boundaries of the range specified alloc_contig_range(). Because these
> partially isolated pages causes free page accounting issues. The free
> pages will be split and freed into separate migratetype lists; the
> in-use pages will be migrated then the freed pages will be handled.
> 
> Signed-off-by: Zi Yan <z...@nvidia.com>
> ---
>   include/linux/page-isolation.h |   2 +-
>   mm/internal.h                  |   3 +
>   mm/memory_hotplug.c            |   3 +-
>   mm/page_alloc.c                | 235 +++++++++++++++++++++++++--------
>   mm/page_isolation.c            |  33 ++++-
>   5 files changed, 211 insertions(+), 65 deletions(-)
> 
> diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h
> index 4ef7be6def83..78ff940cc169 100644
> --- a/include/linux/page-isolation.h
> +++ b/include/linux/page-isolation.h
> @@ -54,7 +54,7 @@ int move_freepages_block(struct zone *zone, struct page 
> *page,
>    */
>   int
>   start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
> -                      unsigned migratetype, int flags);
> +                      unsigned migratetype, int flags, gfp_t gfp_flags);
>   
>   /*
>    * Changes MIGRATE_ISOLATE to MIGRATE_MOVABLE.
> diff --git a/mm/internal.h b/mm/internal.h
> index 0d240e876831..509cbdc25992 100644
> --- a/mm/internal.h
> +++ b/mm/internal.h
> @@ -319,6 +319,9 @@ isolate_freepages_range(struct compact_control *cc,
>   int
>   isolate_migratepages_range(struct compact_control *cc,
>                          unsigned long low_pfn, unsigned long end_pfn);
> +
> +int
> +isolate_single_pageblock(unsigned long boundary_pfn, gfp_t gfp_flags, int 
> isolate_before_boundary);
>   #endif
>   int find_suitable_fallback(struct free_area *area, unsigned int order,
>                       int migratetype, bool only_stealable, bool *can_steal);
> diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
> index ce68098832aa..82406d2f3e46 100644
> --- a/mm/memory_hotplug.c
> +++ b/mm/memory_hotplug.c
> @@ -1863,7 +1863,8 @@ int __ref offline_pages(unsigned long start_pfn, 
> unsigned long nr_pages,
>       /* set above range as isolated */
>       ret = start_isolate_page_range(start_pfn, end_pfn,
>                                      MIGRATE_MOVABLE,
> -                                    MEMORY_OFFLINE | REPORT_FAILURE);
> +                                    MEMORY_OFFLINE | REPORT_FAILURE,
> +                                    GFP_USER | __GFP_MOVABLE | 
> __GFP_RETRY_MAYFAIL);
>       if (ret) {
>               reason = "failure to isolate range";
>               goto failed_removal_pcplists_disabled;
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index 62ef78f3d771..7a4fa21aea5c 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -8985,7 +8985,7 @@ static inline void alloc_contig_dump_pages(struct 
> list_head *page_list)
>   #endif
>   
>   /* [start, end) must belong to a single zone. */
> -static int __alloc_contig_migrate_range(struct compact_control *cc,
> +int __alloc_contig_migrate_range(struct compact_control *cc,
>                                       unsigned long start, unsigned long end)
>   {
>       /* This function is based on compact_zone() from compaction.c. */
> @@ -9043,6 +9043,167 @@ static int __alloc_contig_migrate_range(struct 
> compact_control *cc,
>       return 0;
>   }
>   
> +/**
> + * split_free_page() -- split a free page at split_pfn_offset
> + * @free_page:               the original free page
> + * @order:           the order of the page
> + * @split_pfn_offset:        split offset within the page
> + *
> + * It is used when the free page crosses two pageblocks with different 
> migratetypes
> + * at split_pfn_offset within the page. The split free page will be put into
> + * separate migratetype lists afterwards. Otherwise, the function achieves
> + * nothing.
> + */
> +static inline void split_free_page(struct page *free_page,
> +                             int order, unsigned long split_pfn_offset)
> +{
> +     struct zone *zone = page_zone(free_page);
> +     unsigned long free_page_pfn = page_to_pfn(free_page);
> +     unsigned long pfn;
> +     unsigned long flags;
> +     int free_page_order;
> +
> +     spin_lock_irqsave(&zone->lock, flags);
> +     del_page_from_free_list(free_page, zone, order);
> +     for (pfn = free_page_pfn;
> +          pfn < free_page_pfn + (1UL << order);) {
> +             int mt = get_pfnblock_migratetype(pfn_to_page(pfn), pfn);
> +
> +             free_page_order = order_base_2(split_pfn_offset);
> +             __free_one_page(pfn_to_page(pfn), pfn, zone, free_page_order,
> +                             mt, FPI_NONE);
> +             pfn += 1UL << free_page_order;
> +             split_pfn_offset -= (1UL << free_page_order);
> +             /* we have done the first part, now switch to second part */
> +             if (split_pfn_offset == 0)
> +                     split_pfn_offset = (1UL << order) - (pfn - 
> free_page_pfn);
> +     }
> +     spin_unlock_irqrestore(&zone->lock, flags);
> +}
> +
> +/**
> + * isolate_single_pageblock() -- tries to isolate a pageblock that might be
> + * within a free or in-use page.
> + * @boundary_pfn:            pageblock-aligned pfn that a page might cross
> + * @gfp_flags:                       GFP flags used for migrating pages
> + * @isolate_before_boundary: isolate the pageblock before (1) or after (0)
> + *                           the boundary_pfn
> + *
> + * Free and in-use pages can be as big as MAX_ORDER-1 and contain more than 
> one
> + * pageblock. When not all pageblocks within a page are isolated at the same
> + * time, free page accounting can go wrong. For example, in the case of
> + * MAX_ORDER-1 = pageblock_order + 1, a MAX_ORDER-1 page has two pagelbocks.
> + * [        MAX_ORDER-1          ]
> + * [  pageblock0  |  pageblock1  ]
> + * When either pageblock is isolated, if it is a free page, the page is not
> + * split into separate migratetype lists, which is supposed to; if it is an
> + * in-use page and freed later, __free_one_page() does not split the free 
> page
> + * either. The function handles this by splitting the free page or migrating
> + * the in-use page then splitting the free page.
> + */
> +int isolate_single_pageblock(unsigned long boundary_pfn, gfp_t gfp_flags,
> +                     int isolate_before_boundary)

Do you need such big param name ?

See 
https://www.kernel.org/doc/html/latest/process/coding-style.html?highlight=style#naming

isolate_before_boundary could probably be shorter.

And should be a bool by the way.

> +{
> +     unsigned char saved_mt;
> +     /*
> +      * scan at max(MAX_ORDER_NR_PAGES, pageblock_nr_pages) aligned range to
> +      * avoid isolate pageblocks belonging to a bigger free or in-use page
> +      */
> +     unsigned long start_pfn = pfn_max_align_down(boundary_pfn);
> +     unsigned long isolated_pageblock_pfn;

Variable name too long.

> +     unsigned long pfn;
> +
> +     VM_BUG_ON(!IS_ALIGNED(boundary_pfn, pageblock_nr_pages));
> +
> +     if (isolate_before_boundary)
> +             isolated_pageblock_pfn = boundary_pfn - pageblock_nr_pages;
> +     else
> +             isolated_pageblock_pfn = boundary_pfn;
> +
> +     saved_mt = 
> get_pageblock_migratetype(pfn_to_page(isolated_pageblock_pfn));
> +     set_pageblock_migratetype(pfn_to_page(isolated_pageblock_pfn), 
> MIGRATE_ISOLATE);
> +
> +     for (pfn = start_pfn; pfn < boundary_pfn;) {

This loop is a bit long a deep. Isn't there a way to put what's in "if 
(PageHuge(page) || PageTransCompound(page))" into a sub-function ?

See 
https://www.kernel.org/doc/html/latest/process/coding-style.html?highlight=style#functions

> +             struct page *page = pfn_to_page(pfn);
> +
> +             /*
> +              * start_pfn is max(MAX_ORDER_NR_PAGES, pageblock_nr_pages)
> +              * aligned, if there is any free pages in [start_pfn, 
> boundary_pfn),
> +              * its head page will always be in the range.
> +              */
> +             if (PageBuddy(page)) {
> +                     int order = buddy_order(page);
> +
> +                     if (pfn + (1UL << order) > boundary_pfn)
> +                             split_free_page(page, order, boundary_pfn - 
> pfn);
> +                     pfn += (1UL << order);
> +                     continue;
> +             }
> +             /*
> +              * migrate compound pages then let the free page handling code
> +              * above do the rest
> +              */
> +             if (PageHuge(page) || PageTransCompound(page)) {
> +                     unsigned long nr_pages = compound_nr(page);
> +                     int order = compound_order(page);
> +                     struct page *head = compound_head(page);
> +                     unsigned long head_pfn = page_to_pfn(head);
> +
> +                     if (head_pfn + nr_pages >= boundary_pfn) {
> +                             int ret;
> +                             struct compact_control cc = {
> +                                     .nr_migratepages = 0,
> +                                     .order = -1,
> +                                     .zone = 
> page_zone(pfn_to_page(head_pfn)),
> +                                     .mode = MIGRATE_SYNC,
> +                                     .ignore_skip_hint = true,
> +                                     .no_set_skip_hint = true,
> +                                     .gfp_mask = 
> current_gfp_context(gfp_flags),
> +                                     .alloc_contig = true,
> +                             };
> +
> +                             INIT_LIST_HEAD(&cc.migratepages);
> +
> +                             ret = __alloc_contig_migrate_range(&cc, 
> head_pfn,
> +                                                     head_pfn + nr_pages);
> +
> +                             if (ret) {
> +                                     /* restore the original migratetype */
> +                                     set_pageblock_migratetype(
> +                                             
> pfn_to_page(isolated_pageblock_pfn),
> +                                             saved_mt);
> +                                     return -EBUSY;
> +                             }
> +                             /*
> +                              * reset pfn, let the free page handling code
> +                              * above split the free page to the right
> +                              * migratetype list.
> +                              *
> +                              * head_pfn is not used here as a hugetlb page
> +                              * order can be bigger than MAX_ORDER-1, but
> +                              * after it is freed, the free page order is 
> not.
> +                              * Use pfn within the range to find the head of
> +                              * the free page and reset order to 0 if a 
> hugetlb
> +                              * page with >MAX_ORDER-1 order is encountered.
> +                              */
> +                             if (order > MAX_ORDER-1)
> +                                     order = 0;
> +                             while (!PageBuddy(pfn_to_page(pfn))) {
> +                                     order++;
> +                                     pfn &= ~0UL << order;
> +                             }
> +                             continue;
> +                     }
> +                     pfn += nr_pages;
> +                     continue;
> +             }
> +
> +             pfn++;
> +     }
> +     return 0;
> +}
> +
> +
>   /**
>    * alloc_contig_range() -- tries to allocate given range of pages
>    * @start:  start PFN to allocate
> @@ -9067,8 +9228,9 @@ static int __alloc_contig_migrate_range(struct 
> compact_control *cc,
>   int alloc_contig_range(unsigned long start, unsigned long end,
>                      unsigned migratetype, gfp_t gfp_mask)
>   {
> -     unsigned long outer_start, outer_end;
> -     unsigned int order;
> +     unsigned long outer_end;
> +     unsigned long alloc_start = ALIGN_DOWN(start, pageblock_nr_pages);
> +     unsigned long alloc_end = ALIGN(end, pageblock_nr_pages);
>       int ret = 0;
>   
>       struct compact_control cc = {
> @@ -9087,14 +9249,11 @@ int alloc_contig_range(unsigned long start, unsigned 
> long end,
>        * What we do here is we mark all pageblocks in range as
>        * MIGRATE_ISOLATE.  Because pageblock and max order pages may
>        * have different sizes, and due to the way page allocator
> -      * work, we align the range to biggest of the two pages so
> -      * that page allocator won't try to merge buddies from
> -      * different pageblocks and change MIGRATE_ISOLATE to some
> -      * other migration type.
> +      * work, start_isolate_page_range() has special handlings for this.
>        *
>        * Once the pageblocks are marked as MIGRATE_ISOLATE, we
>        * migrate the pages from an unaligned range (ie. pages that
> -      * we are interested in).  This will put all the pages in
> +      * we are interested in). This will put all the pages in
>        * range back to page allocator as MIGRATE_ISOLATE.
>        *
>        * When this is done, we take the pages in range from page
> @@ -9107,9 +9266,9 @@ int alloc_contig_range(unsigned long start, unsigned 
> long end,
>        * put back to page allocator so that buddy can use them.
>        */
>   
> -     ret = start_isolate_page_range(start, end, migratetype, 0);
> +     ret = start_isolate_page_range(start, end, migratetype, 0, gfp_mask);
>       if (ret)
> -             return ret;
> +             goto done;
>   
>       drain_all_pages(cc.zone);
>   
> @@ -9128,68 +9287,28 @@ int alloc_contig_range(unsigned long start, unsigned 
> long end,
>               goto done;
>       ret = 0;
>   
> -     /*
> -      * Pages from [start, end) are within a MAX_ORDER_NR_PAGES
> -      * aligned blocks that are marked as MIGRATE_ISOLATE.  What's
> -      * more, all pages in [start, end) are free in page allocator.
> -      * What we are going to do is to allocate all pages from
> -      * [start, end) (that is remove them from page allocator).
> -      *
> -      * The only problem is that pages at the beginning and at the
> -      * end of interesting range may be not aligned with pages that
> -      * page allocator holds, ie. they can be part of higher order
> -      * pages.  Because of this, we reserve the bigger range and
> -      * once this is done free the pages we are not interested in.
> -      *
> -      * We don't have to hold zone->lock here because the pages are
> -      * isolated thus they won't get removed from buddy.
> -      */
> -
> -     order = 0;
> -     outer_start = start;
> -     while (!PageBuddy(pfn_to_page(outer_start))) {
> -             if (++order >= MAX_ORDER) {
> -                     outer_start = start;
> -                     break;
> -             }
> -             outer_start &= ~0UL << order;
> -     }
> -
> -     if (outer_start != start) {
> -             order = buddy_order(pfn_to_page(outer_start));
> -
> -             /*
> -              * outer_start page could be small order buddy page and
> -              * it doesn't include start page. Adjust outer_start
> -              * in this case to report failed page properly
> -              * on tracepoint in test_pages_isolated()
> -              */
> -             if (outer_start + (1UL << order) <= start)
> -                     outer_start = start;
> -     }
> -
>       /* Make sure the range is really isolated. */
> -     if (test_pages_isolated(outer_start, end, 0)) {
> +     if (test_pages_isolated(alloc_start, alloc_end, 0)) {
>               ret = -EBUSY;
>               goto done;
>       }
>   
>       /* Grab isolated pages from freelists. */
> -     outer_end = isolate_freepages_range(&cc, outer_start, end);
> +     outer_end = isolate_freepages_range(&cc, alloc_start, alloc_end);
>       if (!outer_end) {
>               ret = -EBUSY;
>               goto done;
>       }
>   
>       /* Free head and tail (if any) */
> -     if (start != outer_start)
> -             free_contig_range(outer_start, start - outer_start);
> -     if (end != outer_end)
> -             free_contig_range(end, outer_end - end);
> +     if (start != alloc_start)
> +             free_contig_range(alloc_start, start - alloc_start);
> +     if (end != alloc_end)
> +             free_contig_range(end, alloc_end - end);
>   
>   done:
> -     undo_isolate_page_range(pfn_max_align_down(start),
> -                             pfn_max_align_up(end), migratetype);
> +     undo_isolate_page_range(alloc_start,
> +                             alloc_end, migratetype);
>       return ret;
>   }
>   EXPORT_SYMBOL(alloc_contig_range);
> diff --git a/mm/page_isolation.c b/mm/page_isolation.c
> index 64d093ab83ec..0256d5e1032c 100644
> --- a/mm/page_isolation.c
> +++ b/mm/page_isolation.c
> @@ -285,6 +285,8 @@ __first_valid_page(unsigned long pfn, unsigned long 
> nr_pages)
>    *                                   and PageOffline() pages.
>    *                  REPORT_FAILURE - report details about the failure to
>    *                  isolate the range
> + * @gfp_flags:               GFP flags used for migrating pages that sit 
> across the
> + *                   range boundaries.
>    *
>    * Making page-allocation-type to be MIGRATE_ISOLATE means free pages in
>    * the range will never be allocated. Any free pages and pages freed in the
> @@ -293,6 +295,10 @@ __first_valid_page(unsigned long pfn, unsigned long 
> nr_pages)
>    * pages in the range finally, the caller have to free all pages in the 
> range.
>    * test_page_isolated() can be used for test it.
>    *
> + * The function first tries to isolate the pageblocks at the beginning and 
> end
> + * of the range, since there might be pages across the range boundaries.
> + * Afterwards, it isolates the rest of the range.
> + *
>    * There is no high level synchronization mechanism that prevents two 
> threads
>    * from trying to isolate overlapping ranges. If this happens, one thread
>    * will notice pageblocks in the overlapping range already set to isolate.
> @@ -313,21 +319,38 @@ __first_valid_page(unsigned long pfn, unsigned long 
> nr_pages)
>    * Return: 0 on success and -EBUSY if any part of range cannot be isolated.
>    */
>   int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
> -                          unsigned migratetype, int flags)
> +                          unsigned migratetype, int flags, gfp_t gfp_flags)
>   {
>       unsigned long pfn;
>       struct page *page;
> +     /* isolation is done at page block granularity */
> +     unsigned long isolate_start = ALIGN_DOWN(start_pfn, pageblock_nr_pages);
> +     unsigned long isolate_end = ALIGN(end_pfn, pageblock_nr_pages);
> +     int ret;
>   
> -     unsigned long isolate_start = pfn_max_align_down(start_pfn);
> -     unsigned long isolate_end = pfn_max_align_up(end_pfn);
> +     /* isolate [isolate_start, isolate_start + pageblock_nr_pages] 
> pageblock */
> +     ret = isolate_single_pageblock(isolate_start, gfp_flags, 0);
> +     if (ret)
> +             return ret;
> +
> +     /* isolate [isolate_end - pageblock_nr_pages, isolate_end] pageblock */
> +     ret = isolate_single_pageblock(isolate_end, gfp_flags, 1);
> +     if (ret) {
> +             unset_migratetype_isolate(pfn_to_page(isolate_start), 
> migratetype);
> +             return ret;
> +     }
>   
> -     for (pfn = isolate_start;
> -          pfn < isolate_end;
> +     /* skip isolated pageblocks at the beginning and end */
> +     for (pfn = isolate_start + pageblock_nr_pages;
> +          pfn < isolate_end - pageblock_nr_pages;
>            pfn += pageblock_nr_pages) {
>               page = __first_valid_page(pfn, pageblock_nr_pages);
>               if (page && set_migratetype_isolate(page, migratetype, flags,
>                                       start_pfn, end_pfn)) {
>                       undo_isolate_page_range(isolate_start, pfn, 
> migratetype);
> +                     unset_migratetype_isolate(
> +                             pfn_to_page(isolate_end - pageblock_nr_pages),
> +                             migratetype);
>                       return -EBUSY;
>               }
>       }

Reply via email to