From: Zi Yan <z...@nvidia.com>

During the process of generating physically contiguous memory, it is
possible that we want to move a THP to a place with 512 base pages.
Exchange pages has not implemented the exchange of a THP and 512 base
pages. Instead, we can split the THP and exchange 512 base pages.
This increases the chance of creating a large contiguous region.
A split THP could be promoted back after all 512 pages are moved to the
destination or if none of its subpages is moved.
In-place THP promotion will be introduced later in this patch serie.

Signed-off-by: Zi Yan <z...@nvidia.com>
---
 mm/internal.h   |   4 ++
 mm/mem_defrag.c | 155 +++++++++++++++++++++++++++++++++++++-----------
 mm/page_alloc.c |  45 ++++++++++++++
 3 files changed, 168 insertions(+), 36 deletions(-)

diff --git a/mm/internal.h b/mm/internal.h
index 4fe8d1a4d7bb..70a6ef603e5b 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -574,6 +574,10 @@ void expand(struct zone *zone, struct page *page,
        int low, int high, struct free_area *area,
        int migratetype);
 
+int expand_free_page(struct zone *zone, struct page *buddy_head,
+       struct page *page, int buddy_order, int page_order,
+       struct free_area *area, int migratetype);
+
 void prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags,
                                                        unsigned int 
alloc_flags);
 
diff --git a/mm/mem_defrag.c b/mm/mem_defrag.c
index 414909e1c19c..4d458b125c95 100644
--- a/mm/mem_defrag.c
+++ b/mm/mem_defrag.c
@@ -643,6 +643,15 @@ static void exchange_free(struct page *freepage, unsigned 
long data)
                head->num_freepages++;
 }
 
+static bool page_can_migrate(struct page *page)
+{
+       if (PageAnon(page))
+               return true;
+       if (page_mapping(page))
+               return true;
+       return false;
+}
+
 int defrag_address_range(struct mm_struct *mm, struct vm_area_struct *vma,
                unsigned long start_addr, unsigned long end_addr,
                struct page *anchor_page, unsigned long page_vaddr,
@@ -655,6 +664,7 @@ int defrag_address_range(struct mm_struct *mm, struct 
vm_area_struct *vma,
        int not_present = 0;
        bool src_thp = false;
 
+restart:
        for (scan_address = start_addr; scan_address < end_addr;
                 scan_address += page_size) {
                struct page *scan_page;
@@ -683,6 +693,8 @@ int defrag_address_range(struct mm_struct *mm, struct 
vm_area_struct *vma,
                if ((scan_page == compound_head(scan_page)) &&
                        PageTransHuge(scan_page) && !PageHuge(scan_page))
                        src_thp = true;
+               else
+                       src_thp = false;
 
                /* Allow THPs  */
                if (PageCompound(scan_page) && !src_thp) {
@@ -720,13 +732,17 @@ int defrag_address_range(struct mm_struct *mm, struct 
vm_area_struct *vma,
                        }
 
 retry_defrag:
-                       /* migrate */
-                       if (PageBuddy(dest_page)) {
+                               /* free pages */
+                       if (page_count(dest_page) == 0 && dest_page->mapping == 
NULL) {
+                               int buddy_page_order = 0;
+                               unsigned long pfn = page_to_pfn(dest_page);
+                               unsigned long buddy_pfn;
+                               struct page *buddy = dest_page;
                                struct zone *zone = page_zone(dest_page);
                                spinlock_t *zone_lock = &zone->lock;
                                unsigned long zone_lock_flags;
                                unsigned long free_page_order = 0;
-                               int err = 0;
+                               int err = 0, expand_err = 0;
                                struct exchange_alloc_head exchange_alloc_head 
= {0};
                                int migratetype = 
get_pageblock_migratetype(dest_page);
 
@@ -734,32 +750,77 @@ int defrag_address_range(struct mm_struct *mm, struct 
vm_area_struct *vma,
                                INIT_LIST_HEAD(&exchange_alloc_head.freelist);
                                
INIT_LIST_HEAD(&exchange_alloc_head.migratepage_list);
 
-                               count_vm_events(MEM_DEFRAG_DST_FREE_PAGES, 
1<<scan_page_order);
+                               /* not managed pages  */
+                               if (!dest_page->flags) {
+                                       failed += 1;
+                                       defrag_stats->dst_out_of_bound_failed 
+= 1;
 
+                                       defrag_stats->not_defrag_vpn = 
scan_address + page_size;
+                                       goto quit_defrag;
+                               }
+                               /* spill order-0 pages to buddy allocator from 
pcplist */
+                               if (!PageBuddy(dest_page) && !page_drained) {
+                                       drain_all_pages(zone);
+                                       page_drained = 1;
+                                       goto retry_defrag;
+                               }
                                /* lock page_zone(dest_page)->lock  */
                                spin_lock_irqsave(zone_lock, zone_lock_flags);
 
-                               if (!PageBuddy(dest_page)) {
+                               while (!PageBuddy(buddy) && buddy_page_order < 
MAX_ORDER) {
+                                       buddy_pfn = pfn & 
~((1<<buddy_page_order) - 1);
+                                       buddy = dest_page - (pfn - buddy_pfn);
+                                       buddy_page_order++;
+                               }
+                               if (!PageBuddy(buddy)) {
                                        err = -EINVAL;
                                        goto freepage_isolate_fail;
                                }
 
-                               free_page_order = page_order(dest_page);
+                               count_vm_events(MEM_DEFRAG_DST_FREE_PAGES, 
1<<scan_page_order);
 
-                               /* fail early if not enough free pages */
-                               if (free_page_order < scan_page_order) {
+                               free_page_order = page_order(buddy);
+
+                               /* caught some transient-state page */
+                               if (free_page_order < buddy_page_order) {
                                        err = -ENOMEM;
                                        goto freepage_isolate_fail;
                                }
 
+                               /* fail early if not enough free pages */
+                               if (free_page_order < scan_page_order) {
+                                       int ret;
+
+                                       spin_unlock_irqrestore(zone_lock, 
zone_lock_flags);
+
+                                       if (is_huge_zero_page(scan_page)) {
+                                               err = -ENOMEM;
+                                               goto 
freepage_isolate_fail_unlocked;
+                                       }
+                                       get_page(scan_page);
+                                       lock_page(scan_page);
+                                       ret = split_huge_page(scan_page);
+                                       unlock_page(scan_page);
+                                       put_page(scan_page);
+                                       if (ret) {
+                                               err = -ENOMEM;
+                                               goto 
freepage_isolate_fail_unlocked;
+                                       } else {
+                                               goto restart;
+                                       }
+                               }
+
                                /* __isolate_free_page()  */
-                               err = isolate_free_page_no_wmark(dest_page, 
free_page_order);
+                               err = isolate_free_page_no_wmark(buddy, 
free_page_order);
                                if (!err)
                                        goto freepage_isolate_fail;
 
-                               expand(zone, dest_page, scan_page_order, 
free_page_order,
+                               expand_err = expand_free_page(zone, buddy, 
dest_page,
+                                       free_page_order, scan_page_order,
                                        &(zone->free_area[free_page_order]),
                                        migratetype);
+                               if (expand_err)
+                                       goto freepage_isolate_fail;
 
                                if (!is_migrate_isolate(migratetype))
                                        __mod_zone_freepage_state(zone, -(1UL 
<< scan_page_order),
@@ -778,7 +839,7 @@ int defrag_address_range(struct mm_struct *mm, struct 
vm_area_struct *vma,
 
 freepage_isolate_fail:
                                spin_unlock_irqrestore(zone_lock, 
zone_lock_flags);
-
+freepage_isolate_fail_unlocked:
                                if (err < 0) {
                                        failed += (page_size/PAGE_SIZE);
                                        defrag_stats->dst_isolate_free_failed 
+= (page_size/PAGE_SIZE);
@@ -844,6 +905,8 @@ int defrag_address_range(struct mm_struct *mm, struct 
vm_area_struct *vma,
                                if ((dest_page == compound_head(dest_page)) &&
                                        PageTransHuge(dest_page) && 
!PageHuge(dest_page))
                                        dst_thp = true;
+                               else
+                                       dst_thp = false;
 
                                if (PageCompound(dest_page) && !dst_thp) {
                                        failed += 
get_contig_page_size(dest_page);
@@ -854,37 +917,56 @@ int defrag_address_range(struct mm_struct *mm, struct 
vm_area_struct *vma,
                                }
 
                                if (src_thp != dst_thp) {
-                                       failed += 
get_contig_page_size(scan_page);
-                                       if (src_thp && !dst_thp)
-                                               
defrag_stats->src_thp_dst_not_failed +=
-                                                       page_size/PAGE_SIZE;
-                                       else /* !src_thp && dst_thp */
-                                               
defrag_stats->dst_thp_src_not_failed +=
-                                                       page_size/PAGE_SIZE;
+                                       if (src_thp && !dst_thp) {
+                                               int ret;
+
+                                               if 
(!page_can_migrate(dest_page)) {
+                                                       failed += 
get_contig_page_size(scan_page);
+                                                       
defrag_stats->not_defrag_vpn = scan_address + page_size;
+                                                       goto quit_defrag;
+                                               }
 
+                                               get_page(scan_page);
+                                               lock_page(scan_page);
+                                               if (!PageCompound(scan_page) || 
is_huge_zero_page(scan_page)) {
+                                                       ret = 0;
+                                                       src_thp = false;
+                                                       goto split_src_done;
+                                               }
+                                               ret = 
split_huge_page(scan_page);
+split_src_done:
+                                               unlock_page(scan_page);
+                                               put_page(scan_page);
+                                               if (ret)
+                                                       
defrag_stats->src_thp_dst_not_failed += page_size/PAGE_SIZE;
+                                               else
+                                                       goto restart;
+                                       } else {/* !src_thp && dst_thp */
+                                               int ret;
+
+                                               get_page(dest_page);
+                                               lock_page(dest_page);
+                                               if (!PageCompound(dest_page) || 
is_huge_zero_page(dest_page)) {
+                                                       ret = 0;
+                                                       dst_thp = false;
+                                                       goto split_dst_done;
+                                               }
+                                               ret = 
split_huge_page(dest_page);
+split_dst_done:
+                                               unlock_page(dest_page);
+                                               put_page(dest_page);
+                                               if (ret)
+                                                       
defrag_stats->dst_thp_src_not_failed += page_size/PAGE_SIZE;
+                                               else
+                                                       goto retry_defrag;
+                                       }
+
+                                       failed += 
get_contig_page_size(scan_page);
                                        defrag_stats->not_defrag_vpn = 
scan_address + page_size;
                                        goto quit_defrag;
                                        /*continue;*/
                                }
 
-                               /* free page on pcplist */
-                               if (page_count(dest_page) == 0) {
-                                       /* not managed pages  */
-                                       if (!dest_page->flags) {
-                                               failed += 1;
-                                               
defrag_stats->dst_out_of_bound_failed += 1;
-
-                                               defrag_stats->not_defrag_vpn = 
scan_address + page_size;
-                                               goto quit_defrag;
-                                       }
-                                       /* spill order-0 pages to buddy 
allocator from pcplist */
-                                       if (!page_drained) {
-                                               drain_all_pages(NULL);
-                                               page_drained = 1;
-                                               goto retry_defrag;
-                                       }
-                               }
-
                                if (PageAnon(dest_page)) {
                                        
count_vm_events(MEM_DEFRAG_DST_ANON_PAGES,
                                                        1<<scan_page_order);
@@ -895,6 +977,7 @@ int defrag_address_range(struct mm_struct *mm, struct 
vm_area_struct *vma,
                                                                
1<<scan_page_order);
                                                failed += 1<<scan_page_order;
                                                defrag_stats->dst_anon_failed 
+= 1<<scan_page_order;
+                                               /*print_page_stats(dest_page, 
"anonymous page");*/
                                        }
                                } else if (page_mapping(dest_page)) {
                                        
count_vm_events(MEM_DEFRAG_DST_FILE_PAGES,
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index a35605e0924a..9ba2cdc320f2 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1855,6 +1855,51 @@ inline void expand(struct zone *zone, struct page *page,
        }
 }
 
+inline int expand_free_page(struct zone *zone, struct page *buddy_head,
+       struct page *page, int buddy_order, int page_order, struct free_area 
*area,
+       int migratetype)
+{
+       unsigned long size = 1 << buddy_order;
+
+       if (!(page >= buddy_head && page < (buddy_head + (1<<buddy_order)))) {
+               int mapcount = PageSlab(buddy_head) ? 0 : 
page_mapcount(buddy_head);
+
+               mapcount = PageSlab(page) ? 0 : page_mapcount(page);
+               __free_one_page(buddy_head, page_to_pfn(buddy_head), zone, 
buddy_order,
+                               migratetype);
+               return -EINVAL;
+       }
+
+       while (buddy_order > page_order) {
+               struct page *page_to_free;
+
+               area--;
+               buddy_order--;
+               size >>= 1;
+
+               if (page < (buddy_head + size))
+                       page_to_free = buddy_head + size;
+               else {
+                       page_to_free = buddy_head;
+                       buddy_head = buddy_head + size;
+               }
+
+               /*
+                * Mark as guard pages (or page), that will allow to
+                * merge back to allocator when buddy will be freed.
+                * Corresponding page table entries will not be touched,
+                * pages will stay not present in virtual address space
+                */
+               if (set_page_guard(zone, page_to_free, buddy_order, 
migratetype))
+                       continue;
+
+               list_add(&page_to_free->lru, &area->free_list[migratetype]);
+               area->nr_free++;
+               set_page_order(page_to_free, buddy_order);
+       }
+       return 0;
+}
+
 static void check_new_page_bad(struct page *page)
 {
        const char *bad_reason = NULL;
-- 
2.20.1

Reply via email to