On 09/22, Oleg Nesterov wrote:
>
> On 09/21, Peter Xu wrote:
> >
> > @@ -859,6 +989,25 @@ static int copy_pte_range(struct mm_struct *dst_mm, 
> > struct mm_struct *src_mm,
> >                         spin_needbreak(src_ptl) || spin_needbreak(dst_ptl))
> >                             break;
> >             }
> > +
> > +           if (unlikely(data.cow_new_page)) {
> > +                   /*
> > +                    * If cow_new_page set, we must be at the 2nd round of
> > +                    * a previous COPY_MM_BREAK_COW.  Try to arm the new
> > +                    * page now.  Note that in all cases page_break_cow()
> > +                    * will properly release the objects in copy_mm_data.
> > +                    */
> > +                   WARN_ON_ONCE(copy_ret != COPY_MM_BREAK_COW);
> > +                   if (pte_install_copied_page(dst_mm, new, src_pte,
> > +                                               dst_pte, addr, rss,
> > +                                               &data)) {
> > +                           /* We installed the pte successfully; move on */
> > +                           progress++;
> > +                           continue;
>
> I'm afraid I misread this patch too ;)
>
> But it seems to me in this case the main loop can really "leak"
> COPY_MM_BREAK_COW. Suppose the the next 31 pte's are pte_none() and
> need_resched() is true.
>
> No?

If yes, perhaps we can simplify the copy_ret/cow_new_page logic and make
it more explicit?

Something like below, on top of this patch...

Oleg.


--- x/mm/memory.c
+++ x/mm/memory.c
@@ -704,17 +704,6 @@
        };
 };
 
-static inline void page_release_cow(struct copy_mm_data *data)
-{
-       /* The old page should only be released in page_duplicate() */
-       WARN_ON_ONCE(data->cow_old_page);
-
-       if (data->cow_new_page) {
-               put_page(data->cow_new_page);
-               data->cow_new_page = NULL;
-       }
-}
-
 /*
  * Duplicate the page for this PTE.  Returns zero if page copied (so we need to
  * retry on the same PTE again to arm the copied page very soon), or negative
@@ -925,7 +914,7 @@
 
        if (!pte_same(*src_pte, data->cow_oldpte)) {
                /* PTE has changed under us.  Release the page and retry */
-               page_release_cow(data);
+               put_page(data->cow_new_page);
                return false;
        }
 
@@ -936,12 +925,6 @@
        set_pte_at(dst_mm, addr, dst_pte, entry);
        rss[mm_counter(new_page)]++;
 
-       /*
-        * Manually clear the new page pointer since we've moved ownership to
-        * the newly armed PTE.
-        */
-       data->cow_new_page = NULL;
-
        return true;
 }
 
@@ -958,16 +941,12 @@
        struct copy_mm_data data;
 
 again:
-       /* We don't reset this for COPY_MM_BREAK_COW */
-       memset(&data, 0, sizeof(data));
-
-again_break_cow:
        init_rss_vec(rss);
 
        dst_pte = pte_alloc_map_lock(dst_mm, dst_pmd, addr, &dst_ptl);
        if (!dst_pte) {
-               /* Guarantee that the new page is released if there is */
-               page_release_cow(&data);
+               if (unlikely(copy_ret == COPY_MM_BREAK_COW))
+                       put_page(data.cow_new_page);
                return -ENOMEM;
        }
        src_pte = pte_offset_map(src_pmd, addr);
@@ -978,6 +957,22 @@
        arch_enter_lazy_mmu_mode();
 
        progress = 0;
+       if (unlikely(copy_ret == COPY_MM_BREAK_COW)) {
+               /*
+                * Note that in all cases pte_install_copied_page()
+                * will properly release the objects in copy_mm_data.
+                */
+               copy_ret = COPY_MM_DONE;
+               if (pte_install_copied_page(dst_mm, new, src_pte,
+                                           dst_pte, addr, rss,
+                                           &data)) {
+                       /* We installed the pte successfully; move on */
+                       progress++;
+                       goto next;
+               }
+               /* PTE changed.  Retry this pte (falls through) */
+       }
+
        do {
                /*
                 * We are holding two locks at this point - either of them
@@ -990,24 +985,6 @@
                                break;
                }
 
-               if (unlikely(data.cow_new_page)) {
-                       /*
-                        * If cow_new_page set, we must be at the 2nd round of
-                        * a previous COPY_MM_BREAK_COW.  Try to arm the new
-                        * page now.  Note that in all cases page_break_cow()
-                        * will properly release the objects in copy_mm_data.
-                        */
-                       WARN_ON_ONCE(copy_ret != COPY_MM_BREAK_COW);
-                       if (pte_install_copied_page(dst_mm, new, src_pte,
-                                                   dst_pte, addr, rss,
-                                                   &data)) {
-                               /* We installed the pte successfully; move on */
-                               progress++;
-                               continue;
-                       }
-                       /* PTE changed.  Retry this pte (falls through) */
-               }
-
                if (pte_none(*src_pte)) {
                        progress++;
                        continue;
@@ -1017,6 +994,7 @@
                if (copy_ret != COPY_MM_DONE)
                        break;
                progress += 8;
+next:
        } while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end);
 
        arch_leave_lazy_mmu_mode();
@@ -1030,13 +1008,14 @@
        case COPY_MM_SWAP_CONT:
                if (add_swap_count_continuation(data.entry, GFP_KERNEL) < 0)
                        return -ENOMEM;
-               break;
+               copy_ret = COPY_MM_DONE;
+               goto again;
        case COPY_MM_BREAK_COW:
                /* Do accounting onto parent mm directly */
                ret = page_duplicate(src_mm, vma, addr, &data);
                if (ret)
                        return ret;
-               goto again_break_cow;
+               goto again;
        case COPY_MM_DONE:
                /* This means we're all good. */
                break;

Reply via email to