On Sat, Sep 21, 2019 at 09:50:54PM +0800, Jia He wrote:
> @@ -2151,21 +2163,53 @@ static inline void cow_user_page(struct page *dst, 
> struct page *src, unsigned lo
>        * fails, we just zero-fill it. Live with it.
>        */
>       if (unlikely(!src)) {
> -             void *kaddr = kmap_atomic(dst);
> -             void __user *uaddr = (void __user *)(va & PAGE_MASK);
> +             void *kaddr;
> +             pte_t entry;
> +             void __user *uaddr = (void __user *)(addr & PAGE_MASK);
>  
> +             /* On architectures with software "accessed" bits, we would
> +              * take a double page fault, so mark it accessed here.
> +              */

Nitpick: please follow the kernel coding style for multi-line comments
(above and the for the rest of the patch):

                /*
                 * Your multi-line comment.
                 */

> +             if (arch_faults_on_old_pte() && !pte_young(vmf->orig_pte)) {
> +                     vmf->pte = pte_offset_map_lock(mm, vmf->pmd, addr,
> +                                                    &vmf->ptl);
> +                     if (likely(pte_same(*vmf->pte, vmf->orig_pte))) {
> +                             entry = pte_mkyoung(vmf->orig_pte);
> +                             if (ptep_set_access_flags(vma, addr,
> +                                                       vmf->pte, entry, 0))
> +                                     update_mmu_cache(vma, addr, vmf->pte);
> +                     } else {
> +                             /* Other thread has already handled the fault
> +                              * and we don't need to do anything. If it's
> +                              * not the case, the fault will be triggered
> +                              * again on the same address.
> +                              */
> +                             pte_unmap_unlock(vmf->pte, vmf->ptl);
> +                             return false;
> +                     }
> +                     pte_unmap_unlock(vmf->pte, vmf->ptl);
> +             }

Another nit, you could rewrite this block slightly to avoid too much
indentation. Something like (untested):

                if (arch_faults_on_old_pte() && !pte_young(vmf->orig_pte)) {
                        vmf->pte = pte_offset_map_lock(mm, vmf->pmd, addr,
                                                       &vmf->ptl);
                        if (unlikely(!pte_same(*vmf->pte, vmf->orig_pte))) {
                                /*
                                 * Other thread has already handled the fault
                                 * and we don't need to do anything. If it's
                                 * not the case, the fault will be triggered
                                 * again on the same address.
                                 */
                                pte_unmap_unlock(vmf->pte, vmf->ptl);
                                return false;
                        }
                        entry = pte_mkyoung(vmf->orig_pte);
                        if (ptep_set_access_flags(vma, addr,
                                                  vmf->pte, entry, 0))
                                update_mmu_cache(vma, addr, vmf->pte);
                        pte_unmap_unlock(vmf->pte, vmf->ptl);
                }

> +
> +             kaddr = kmap_atomic(dst);

Since you moved the kmap_atomic() here, could the above
arch_faults_on_old_pte() run in a preemptible context? I suggested to
add a WARN_ON in patch 2 to be sure.

>               /*
>                * This really shouldn't fail, because the page is there
>                * in the page tables. But it might just be unreadable,
>                * in which case we just give up and fill the result with
>                * zeroes.
>                */
> -             if (__copy_from_user_inatomic(kaddr, uaddr, PAGE_SIZE))
> +             if (__copy_from_user_inatomic(kaddr, uaddr, PAGE_SIZE)) {
> +                     /* Give a warn in case there can be some obscure
> +                      * use-case
> +                      */
> +                     WARN_ON_ONCE(1);

That's more of a question for the mm guys: at this point we do the
copying with the ptl released; is there anything else that could have
made the pte old in the meantime? I think unuse_pte() is only called on
anonymous vmas, so it shouldn't be the case here.

>                       clear_page(kaddr);
> +             }
>               kunmap_atomic(kaddr);
>               flush_dcache_page(dst);
>       } else
> -             copy_user_highpage(dst, src, va, vma);
> +             copy_user_highpage(dst, src, addr, vma);
> +
> +     return true;
>  }

-- 
Catalin

Reply via email to