It is still possible that we pin movable CMA pages if there are isolation
errors and cma_page_list stays empty when we check again.

Check for isolation errors, and return success only when there are no
isolation errors, and cma_page_list is empty after checking.

Because isolation errors are transient, we retry indefinitely.

Fixes: 9a4e9f3b2d73 ("mm: update get_user_pages_longterm to migrate pages 
allocated from CMA region")
Signed-off-by: Pavel Tatashin <pasha.tatas...@soleen.com>
Reviewed-by: Jason Gunthorpe <j...@nvidia.com>
---
 mm/gup.c | 60 ++++++++++++++++++++++++++++++++------------------------
 1 file changed, 34 insertions(+), 26 deletions(-)

diff --git a/mm/gup.c b/mm/gup.c
index eb8c39953d53..b1f6d56182b3 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1547,8 +1547,8 @@ static long check_and_migrate_cma_pages(struct mm_struct 
*mm,
                                        struct vm_area_struct **vmas,
                                        unsigned int gup_flags)
 {
-       unsigned long i;
-       bool drain_allow = true;
+       unsigned long i, isolation_error_count;
+       bool drain_allow;
        LIST_HEAD(cma_page_list);
        long ret = nr_pages;
        struct page *prev_head, *head;
@@ -1559,6 +1559,8 @@ static long check_and_migrate_cma_pages(struct mm_struct 
*mm,
 
 check_again:
        prev_head = NULL;
+       isolation_error_count = 0;
+       drain_allow = true;
        for (i = 0; i < nr_pages; i++) {
                head = compound_head(pages[i]);
                if (head == prev_head)
@@ -1570,25 +1572,35 @@ static long check_and_migrate_cma_pages(struct 
mm_struct *mm,
                 * of the CMA zone if possible.
                 */
                if (is_migrate_cma_page(head)) {
-                       if (PageHuge(head))
-                               isolate_huge_page(head, &cma_page_list);
-                       else {
+                       if (PageHuge(head)) {
+                               if (!isolate_huge_page(head, &cma_page_list))
+                                       isolation_error_count++;
+                       } else {
                                if (!PageLRU(head) && drain_allow) {
                                        lru_add_drain_all();
                                        drain_allow = false;
                                }
 
-                               if (!isolate_lru_page(head)) {
-                                       list_add_tail(&head->lru, 
&cma_page_list);
-                                       mod_node_page_state(page_pgdat(head),
-                                                           NR_ISOLATED_ANON +
-                                                           
page_is_file_lru(head),
-                                                           thp_nr_pages(head));
+                               if (isolate_lru_page(head)) {
+                                       isolation_error_count++;
+                                       continue;
                                }
+                               list_add_tail(&head->lru, &cma_page_list);
+                               mod_node_page_state(page_pgdat(head),
+                                                   NR_ISOLATED_ANON +
+                                                   page_is_file_lru(head),
+                                                   thp_nr_pages(head));
                        }
                }
        }
 
+       /*
+        * If list is empty, and no isolation errors, means that all pages are
+        * in the correct zone.
+        */
+       if (list_empty(&cma_page_list) && !isolation_error_count)
+               return ret;
+
        if (!list_empty(&cma_page_list)) {
                /*
                 * drop the above get_user_pages reference.
@@ -1608,23 +1620,19 @@ static long check_and_migrate_cma_pages(struct 
mm_struct *mm,
                        return ret > 0 ? -ENOMEM : ret;
                }
 
-               /*
-                * We did migrate all the pages, Try to get the page references
-                * again migrating any new CMA pages which we failed to isolate
-                * earlier.
-                */
-               ret = __get_user_pages_locked(mm, start, nr_pages,
-                                                  pages, vmas, NULL,
-                                                  gup_flags);
-
-               if (ret > 0) {
-                       nr_pages = ret;
-                       drain_allow = true;
-                       goto check_again;
-               }
+               /* We unpinned pages before migration, pin them again */
+               ret = __get_user_pages_locked(mm, start, nr_pages, pages, vmas,
+                                             NULL, gup_flags);
+               if (ret <= 0)
+                       return ret;
+               nr_pages = ret;
        }
 
-       return ret;
+       /*
+        * check again because pages were unpinned, and we also might have
+        * had isolation errors and need more pages to migrate.
+        */
+       goto check_again;
 }
 #else
 static long check_and_migrate_cma_pages(struct mm_struct *mm,
-- 
2.25.1

Reply via email to