What we have works; but involves tricky "account_head" handling, and more
trips around the shmem_recovery_populate() loop than I'm comfortable with.

Tighten it all up with a MIGRATE_SHMEM_RECOVERY mode, and
shmem_recovery_migrate_page() callout from migrate_page_move_mapping(),
so that the migrated page can be made PageTeam immediately.

Which allows the SHMEM_RETRY_HUGE_PAGE hugehint to be reintroduced,
for what little that's worth.

Signed-off-by: Hugh Dickins <hu...@google.com>
---
 include/linux/migrate_mode.h   |    2 
 include/linux/shmem_fs.h       |    6 +
 include/trace/events/migrate.h |    3 
 mm/migrate.c                   |   17 ++++-
 mm/shmem.c                     |   99 ++++++++++++-------------------
 5 files changed, 62 insertions(+), 65 deletions(-)

--- a/include/linux/migrate_mode.h
+++ b/include/linux/migrate_mode.h
@@ -6,11 +6,13 @@
  *     on most operations but not ->writepage as the potential stall time
  *     is too significant
  * MIGRATE_SYNC will block when migrating pages
+ * MIGRATE_SHMEM_RECOVERY is a MIGRATE_SYNC specific to huge tmpfs recovery.
  */
 enum migrate_mode {
        MIGRATE_ASYNC,
        MIGRATE_SYNC_LIGHT,
        MIGRATE_SYNC,
+       MIGRATE_SHMEM_RECOVERY,
 };
 
 #endif         /* MIGRATE_MODE_H_INCLUDED */
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -85,6 +85,7 @@ static inline long shmem_fcntl(struct fi
 #endif /* CONFIG_TMPFS */
 
 #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && defined(CONFIG_SHMEM)
+extern bool shmem_recovery_migrate_page(struct page *new, struct page *page);
 # ifdef CONFIG_SYSCTL
 struct ctl_table;
 extern int shmem_huge, shmem_huge_min, shmem_huge_max;
@@ -92,6 +93,11 @@ extern int shmem_huge_recoveries;
 extern int shmem_huge_sysctl(struct ctl_table *table, int write,
                             void __user *buffer, size_t *lenp, loff_t *ppos);
 # endif /* CONFIG_SYSCTL */
+#else
+static inline bool shmem_recovery_migrate_page(struct page *new, struct page 
*p)
+{
+       return true;    /* Never called: true will optimize out the fallback */
+}
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE && CONFIG_SHMEM */
 
 #endif
--- a/include/trace/events/migrate.h
+++ b/include/trace/events/migrate.h
@@ -9,7 +9,8 @@
 #define MIGRATE_MODE                                           \
        EM( MIGRATE_ASYNC,      "MIGRATE_ASYNC")                \
        EM( MIGRATE_SYNC_LIGHT, "MIGRATE_SYNC_LIGHT")           \
-       EMe(MIGRATE_SYNC,       "MIGRATE_SYNC")
+       EM( MIGRATE_SYNC,       "MIGRATE_SYNC")                 \
+       EMe(MIGRATE_SHMEM_RECOVERY, "MIGRATE_SHMEM_RECOVERY")
 
 
 #define MIGRATE_REASON                                         \
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -23,6 +23,7 @@
 #include <linux/pagevec.h>
 #include <linux/ksm.h>
 #include <linux/rmap.h>
+#include <linux/shmem_fs.h>
 #include <linux/topology.h>
 #include <linux/cpu.h>
 #include <linux/cpuset.h>
@@ -371,6 +372,15 @@ int migrate_page_move_mapping(struct add
                return -EAGAIN;
        }
 
+       if (mode == MIGRATE_SHMEM_RECOVERY) {
+               if (!shmem_recovery_migrate_page(newpage, page)) {
+                       page_ref_unfreeze(page, expected_count);
+                       spin_unlock_irq(&mapping->tree_lock);
+                       return -ENOMEM; /* quit migrate_pages() immediately */
+               }
+       } else
+               get_page(newpage);      /* add cache reference */
+
        /*
         * Now we know that no one else is looking at the page:
         * no turning back from here.
@@ -380,7 +390,6 @@ int migrate_page_move_mapping(struct add
        if (PageSwapBacked(page))
                __SetPageSwapBacked(newpage);
 
-       get_page(newpage);      /* add cache reference */
        if (PageSwapCache(page)) {
                SetPageSwapCache(newpage);
                set_page_private(newpage, page_private(page));
@@ -786,7 +795,7 @@ static int move_to_new_page(struct page
 }
 
 static int __unmap_and_move(struct page *page, struct page *newpage,
-               int force, enum migrate_mode mode, enum migrate_reason reason)
+                               int force, enum migrate_mode mode)
 {
        int rc = -EAGAIN;
        int page_was_mapped = 0;
@@ -821,7 +830,7 @@ static int __unmap_and_move(struct page
         * already in use, on lru, with data newly written for that offset.
         * We can only be sure of this check once we have the page locked.
         */
-       if (reason == MR_SHMEM_RECOVERY && !page->mapping) {
+       if (mode == MIGRATE_SHMEM_RECOVERY && !page->mapping) {
                rc = -ENOMEM;   /* quit migrate_pages() immediately */
                goto out_unlock;
        }
@@ -973,7 +982,7 @@ static ICE_noinline int unmap_and_move(n
                        goto out;
        }
 
-       rc = __unmap_and_move(page, newpage, force, mode, reason);
+       rc = __unmap_and_move(page, newpage, force, mode);
        if (rc == MIGRATEPAGE_SUCCESS) {
                put_new_page = NULL;
                set_page_owner_migrate_reason(newpage, reason);
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -306,6 +306,7 @@ static bool shmem_confirm_swap(struct ad
 /* hugehint values: NULL to choose a small page always */
 #define SHMEM_ALLOC_SMALL_PAGE ((struct page *)1)
 #define SHMEM_ALLOC_HUGE_PAGE  ((struct page *)2)
+#define SHMEM_RETRY_HUGE_PAGE  ((struct page *)3)
 /* otherwise hugehint is the hugeteam page to be used */
 
 /* tag for shrinker to locate unfilled hugepages */
@@ -368,20 +369,6 @@ restart:
                        put_page(page);
                return SHMEM_ALLOC_SMALL_PAGE;
        }
-       if (PageSwapBacked(page)) {
-               if (speculative)
-                       put_page(page);
-               /*
-                * This is very often a case of two tasks racing to instantiate
-                * the same hole in the huge page, and we don't particularly
-                * want to allocate a small page.  But holepunch racing with
-                * recovery migration, in between migrating to the page and
-                * marking it team, can leave a PageSwapBacked NULL mapping
-                * page here which we should avoid, and this is the easiest
-                * way to handle all the cases correctly.
-                */
-               return SHMEM_ALLOC_SMALL_PAGE;
-       }
        return page;
 }
 
@@ -784,7 +771,6 @@ struct recovery {
        struct inode *inode;
        struct page *page;
        pgoff_t head_index;
-       struct page *migrated_head;
        bool exposed_team;
 };
 
@@ -988,8 +974,7 @@ static void shmem_recovery_swapin(struct
 static struct page *shmem_get_recovery_page(struct page *page,
                                        unsigned long private, int **result)
 {
-       struct recovery *recovery = (struct recovery *)private;
-       struct page *head = recovery->page;
+       struct page *head = (struct page *)private;
        struct page *newpage = head + (page->index & (HPAGE_PMD_NR-1));
 
        /* Increment refcount to match other routes through recovery_populate */
@@ -999,19 +984,33 @@ static struct page *shmem_get_recovery_p
                put_page(newpage);
                return NULL;
        }
-       /* Note when migrating to head: tricky case because already PageTeam */
-       if (newpage == head)
-               recovery->migrated_head = head;
        return newpage;
 }
 
-static void shmem_put_recovery_page(struct page *newpage, unsigned long 
private)
+/*
+ * shmem_recovery_migrate_page() is called from the heart of page migration's
+ * migrate_page_move_mapping(): with interrupts disabled, mapping->tree_lock
+ * held, page's reference count frozen to 0, and no other reason to turn back.
+ */
+bool shmem_recovery_migrate_page(struct page *newpage, struct page *page)
 {
-       struct recovery *recovery = (struct recovery *)private;
+       struct page *head = newpage - (page->index & (HPAGE_PMD_NR-1));
+
+       if (!PageTeam(head))
+               return false;
+       if (newpage != head) {
+               /* Needs to be initialized before shmem_added_to_hugeteam() */
+               atomic_long_set(&newpage->team_usage, TEAM_LRU_WEIGHT_ONE);
+               SetPageTeam(newpage);
+               newpage->mapping = page->mapping;
+               newpage->index = page->index;
+       }
+       shmem_added_to_hugeteam(newpage, page_zone(newpage), NULL);
+       return true;
+}
 
-       /* Must reset migrated_head if in the end it was not used */
-       if (recovery->migrated_head == newpage)
-               recovery->migrated_head = NULL;
+static void shmem_put_recovery_page(struct page *newpage, unsigned long 
private)
+{
        /* Decrement refcount again if newpage was not used */
        put_page(newpage);
 }
@@ -1024,9 +1023,7 @@ static int shmem_recovery_populate(struc
        struct zone *zone = page_zone(head);
        pgoff_t index;
        bool drained_all = false;
-       bool account_head = false;
-       int migratable;
-       int unmigratable;
+       int unmigratable = 0;
        struct page *team;
        struct page *endteam = head + HPAGE_PMD_NR;
        struct page *page;
@@ -1039,12 +1036,9 @@ static int shmem_recovery_populate(struc
 
        shmem_recovery_swapin(recovery, head);
 again:
-       migratable = 0;
-       unmigratable = 0;
        index = recovery->head_index;
        for (team = head; team < endteam && !error; index++, team++) {
-               if (PageTeam(team) && PageUptodate(team) && PageDirty(team) &&
-                   !account_head)
+               if (PageTeam(team) && PageUptodate(team) && PageDirty(team))
                        continue;
 
                page = team;    /* used as hint if not yet instantiated */
@@ -1070,8 +1064,7 @@ again:
                         */
                        if (page != team)
                                error = -ENOENT;
-                       if (error || !account_head)
-                               goto unlock;
+                       goto unlock;
                }
 
                if (PageSwapBacked(team) && page != team) {
@@ -1098,8 +1091,6 @@ again:
                        SetPageTeam(head);
                        head->mapping = mapping;
                        head->index = index;
-                       if (page == head)
-                               account_head = true;
                }
 
                /* Eviction or truncation or hole-punch already disbanded? */
@@ -1132,12 +1123,9 @@ again:
                                                        TEAM_LRU_WEIGHT_ONE);
                                        SetPageTeam(page);
                                }
-                               if (page != head || account_head) {
-                                       shmem_added_to_hugeteam(page, zone,
-                                                               NULL);
-                                       put_page(page);
-                                       shr_stats(page_teamed);
-                               }
+                               shmem_added_to_hugeteam(page, zone, NULL);
+                               put_page(page);
+                               shr_stats(page_teamed);
                        }
                        spin_unlock_irq(&mapping->tree_lock);
                        if (page_mapped(page)) {
@@ -1145,16 +1133,13 @@ again:
                                page_remove_rmap(page, false);
                                preempt_enable();
                        }
-                       account_head = false;
                } else {
-                       VM_BUG_ON(account_head);
                        if (!PageLRU(page))
                                lru_add_drain();
                        if (isolate_lru_page(page) == 0) {
                                inc_zone_page_state(page, NR_ISOLATED_ANON);
                                list_add_tail(&page->lru, &migrate);
                                shr_stats(page_migrate);
-                               migratable++;
                        } else {
                                shr_stats(page_off_lru);
                                unmigratable++;
@@ -1169,12 +1154,9 @@ unlock:
        if (!list_empty(&migrate)) {
                lru_add_drain(); /* not necessary but may help debugging */
                if (!error) {
-                       VM_BUG_ON(recovery->page != head);
-                       recovery->migrated_head = NULL;
                        nr = migrate_pages(&migrate, shmem_get_recovery_page,
-                               shmem_put_recovery_page, (unsigned long)
-                               recovery, MIGRATE_SYNC, MR_SHMEM_RECOVERY);
-                       account_head = !!recovery->migrated_head;
+                               shmem_put_recovery_page, (unsigned long)head,
+                               MIGRATE_SHMEM_RECOVERY, MR_SHMEM_RECOVERY);
                        if (nr < 0) {
                                /*
                                 * If migrate_pages() returned error (-ENOMEM)
@@ -1189,7 +1171,6 @@ unlock:
                        if (nr > 0) {
                                shr_stats_add(page_unmigrated, nr);
                                unmigratable += nr;
-                               migratable -= nr;
                        }
                }
                putback_movable_pages(&migrate);
@@ -1208,10 +1189,6 @@ unlock:
                        shr_stats(recov_retried);
                        goto again;
                }
-               if (migratable) {
-                       /* Make another pass to SetPageTeam on them */
-                       goto again;
-               }
        }
 
        lock_page(head);
@@ -2687,11 +2664,9 @@ static struct page *shmem_alloc_page(gfp
                         * add_to_page_cache has the tree_lock.
                         */
                        lock_page(page);
-                       if (!PageSwapBacked(page) && PageTeam(head))
-                               goto out;
-                       unlock_page(page);
-                       put_page(page);
-                       *hugehint = SHMEM_ALLOC_SMALL_PAGE;
+                       if (PageSwapBacked(page) || !PageTeam(head))
+                               *hugehint = SHMEM_RETRY_HUGE_PAGE;
+                       goto out;
                }
        }
 
@@ -2991,6 +2966,10 @@ repeat:
                        error = -ENOMEM;
                        goto decused;
                }
+               if (hugehint == SHMEM_RETRY_HUGE_PAGE) {
+                       error = -EEXIST;
+                       goto decused;
+               }
                if (sgp == SGP_WRITE)
                        __SetPageReferenced(page);
 memcg:

Reply via email to