When MPOL_MF_STRICT was specified and an existing page was already
on a node that does not follow the policy, mbind() should return -EIO.
But commit 6f4576e3687b ("mempolicy: apply page table walker on
queue_pages_range()") broke the rule.

And, commit c8633798497c ("mm: mempolicy: mbind and migrate_pages
support thp migration") didn't return the correct value for THP mbind()
too.

If MPOL_MF_STRICT is set, ignore vma_migratable() to make sure it reaches
queue_pages_to_pte_range() or queue_pages_pmd() to check if an existing
page was already on a node that does not follow the policy.  And,
non-migratable vma may be used, return -EIO too if MPOL_MF_MOVE or
MPOL_MF_MOVE_ALL was specified.

Tested with 
https://github.com/metan-ucw/ltp/blob/master/testcases/kernel/syscalls/mbind/mbind02.c

Fixes: 6f4576e3687b ("mempolicy: apply page table walker on 
queue_pages_range()")
Reported-by: Cyril Hrubis <chru...@suse.cz>
Cc: Vlastimil Babka <vba...@suse.cz>
Cc: sta...@vger.kernel.org
Suggested-by: Kirill A. Shutemov <kir...@shutemov.name>
Signed-off-by: Yang Shi <yang....@linux.alibaba.com>
Signed-off-by: Oscar Salvador <osalva...@suse.de>
---
 mm/mempolicy.c | 40 +++++++++++++++++++++++++++++++++-------
 1 file changed, 33 insertions(+), 7 deletions(-)

diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index abe7a67..401c817 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -447,6 +447,13 @@ static inline bool queue_pages_required(struct page *page,
        return node_isset(nid, *qp->nmask) == !(flags & MPOL_MF_INVERT);
 }
 
+/*
+ * The queue_pages_pmd() may have three kind of return value.
+ * 1 - pages are placed on he right node or queued successfully.
+ * 0 - THP get split.
+ * -EIO - is migration entry or MPOL_MF_STRICT was specified and an existing
+ *        page was already on a node that does not follow the policy.
+ */
 static int queue_pages_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr,
                                unsigned long end, struct mm_walk *walk)
 {
@@ -456,7 +463,7 @@ static int queue_pages_pmd(pmd_t *pmd, spinlock_t *ptl, 
unsigned long addr,
        unsigned long flags;
 
        if (unlikely(is_pmd_migration_entry(*pmd))) {
-               ret = 1;
+               ret = -EIO;
                goto unlock;
        }
        page = pmd_page(*pmd);
@@ -473,8 +480,15 @@ static int queue_pages_pmd(pmd_t *pmd, spinlock_t *ptl, 
unsigned long addr,
        ret = 1;
        flags = qp->flags;
        /* go to thp migration */
-       if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))
+       if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) {
+               if (!vma_migratable(walk->vma)) {
+                       ret = -EIO;
+                       goto unlock;
+               }
+
                migrate_page_add(page, qp->pagelist, flags);
+       } else
+               ret = -EIO;
 unlock:
        spin_unlock(ptl);
 out:
@@ -499,8 +513,10 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long 
addr,
        ptl = pmd_trans_huge_lock(pmd, vma);
        if (ptl) {
                ret = queue_pages_pmd(pmd, ptl, addr, end, walk);
-               if (ret)
+               if (ret > 0)
                        return 0;
+               else if (ret < 0)
+                       return ret;
        }
 
        if (pmd_trans_unstable(pmd))
@@ -521,11 +537,16 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned 
long addr,
                        continue;
                if (!queue_pages_required(page, qp))
                        continue;
-               migrate_page_add(page, qp->pagelist, flags);
+               if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) {
+                       if (!vma_migratable(vma))
+                               break;
+                       migrate_page_add(page, qp->pagelist, flags);
+               } else
+                       break;
        }
        pte_unmap_unlock(pte - 1, ptl);
        cond_resched();
-       return 0;
+       return addr != end ? -EIO : 0;
 }
 
 static int queue_pages_hugetlb(pte_t *pte, unsigned long hmask,
@@ -595,7 +616,12 @@ static int queue_pages_test_walk(unsigned long start, 
unsigned long end,
        unsigned long endvma = vma->vm_end;
        unsigned long flags = qp->flags;
 
-       if (!vma_migratable(vma))
+       /*
+        * Need check MPOL_MF_STRICT to return -EIO if possible
+        * regardless of vma_migratable
+        */ 
+       if (!vma_migratable(vma) &&
+           !(flags & MPOL_MF_STRICT))
                return 1;
 
        if (endvma > end)
@@ -622,7 +648,7 @@ static int queue_pages_test_walk(unsigned long start, 
unsigned long end,
        }
 
        /* queue pages from current vma */
-       if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))
+       if (flags & MPOL_MF_VALID)
                return 0;
        return 1;
 }
-- 
1.8.3.1

Reply via email to