set_hwpoison_free_buddy_page() could fail, then the target page is
finally not isolated, so it's better to report -EBUSY for userspace
to know the failure and chance of retry.

And for consistency, this patch moves set_hwpoison_free_buddy_page()
in unmap_and_move() to __soft_offline_page().

Fixes: 6bc9b56433b7 ("mm: fix race on soft-offlining free huge pages")
Signed-off-by: Naoya Horiguchi <n-horigu...@ah.jp.nec.com>
---
 mm/memory-failure.c | 15 ++++++++++++---
 mm/migrate.c        |  9 ---------
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git v4.19-mmotm-2018-10-30-16-08/mm/memory-failure.c 
v4.19-mmotm-2018-10-30-16-08_patched/mm/memory-failure.c
index 9f09bf3..11e283e 100644
--- v4.19-mmotm-2018-10-30-16-08/mm/memory-failure.c
+++ v4.19-mmotm-2018-10-30-16-08_patched/mm/memory-failure.c
@@ -1719,14 +1719,18 @@ static int soft_offline_huge_page(struct page *page, 
int flags)
                /*
                 * We set PG_hwpoison only when the migration source hugepage
                 * was successfully dissolved, because otherwise hwpoisoned
-                * hugepage remains on free hugepage list, then userspace will
-                * find it as SIGBUS by allocation failure. That's not expected
-                * in soft-offlining.
+                * hugepage remains on free hugepage list. The allocator ignores
+                * such a hwpoisoned page so it's never allocated, but it could
+                * kill a process because of no-memory rather than hwpoison.
+                * Soft-offline never impacts the userspace, so this is
+                * undesired.
                 */
                ret = dissolve_free_huge_page(page);
                if (!ret) {
                        if (set_hwpoison_free_buddy_page(page))
                                num_poisoned_pages_inc();
+                       else
+                               ret = -EBUSY;
                }
        }
        return ret;
@@ -1804,6 +1808,11 @@ static int __soft_offline_page(struct page *page, int 
flags)
                                pfn, ret, page->flags, &page->flags);
                        if (ret > 0)
                                ret = -EIO;
+               } else {
+                       if (set_hwpoison_free_buddy_page(page))
+                               num_poisoned_pages_inc();
+                       else
+                               ret = -EBUSY;
                }
        } else {
                pr_info("soft offline: %#lx: isolation failed: %d, page count 
%d, type %lx (%pGp)\n",
diff --git v4.19-mmotm-2018-10-30-16-08/mm/migrate.c 
v4.19-mmotm-2018-10-30-16-08_patched/mm/migrate.c
index f7e4bfd..1742372 100644
--- v4.19-mmotm-2018-10-30-16-08/mm/migrate.c
+++ v4.19-mmotm-2018-10-30-16-08_patched/mm/migrate.c
@@ -1199,15 +1199,6 @@ static ICE_noinline int unmap_and_move(new_page_t 
get_new_page,
         */
        if (rc == MIGRATEPAGE_SUCCESS) {
                put_page(page);
-               if (reason == MR_MEMORY_FAILURE) {
-                       /*
-                        * Set PG_HWPoison on just freed page
-                        * intentionally. Although it's rather weird,
-                        * it's how HWPoison flag works at the moment.
-                        */
-                       if (set_hwpoison_free_buddy_page(page))
-                               num_poisoned_pages_inc();
-               }
        } else {
                if (rc != -EAGAIN) {
                        if (likely(!__PageMovable(page))) {
-- 
2.7.0

Reply via email to