Author: lcapitulino
Date: Tue Feb 13 13:37:06 2007
New Revision: 120366

Added:
   
packages/updates/2007.0/kernel-2.6/current/PATCHES/patches/CA78_zeromap_may_find_a_pte.patch
Modified:
   packages/updates/2007.0/kernel-2.6/current/SPECS/kernel-2.6.spec

Log:
read_zero_pagealigned() locking fix (thanks to Hugh Dickins <[EMAIL PROTECTED]>)


Added: 
packages/updates/2007.0/kernel-2.6/current/PATCHES/patches/CA78_zeromap_may_find_a_pte.patch
==============================================================================
--- (empty file)
+++ 
packages/updates/2007.0/kernel-2.6/current/PATCHES/patches/CA78_zeromap_may_find_a_pte.patch
        Tue Feb 13 13:37:06 2007
@@ -0,0 +1,133 @@
+Ramiro Voicu hits the BUG_ON(!pte_none(*pte)) in zeromap_pte_range:
+kernel bugzilla 7645.  Right: read_zero_pagealigned uses down_read of
+mmap_sem, but another thread's racing read of /dev/zero, or a normal
+fault, can easily set that pte again, in between zap_page_range and
+zeromap_page_range getting there.  It's been wrong ever since 2.4.3.
+
+The simple fix is to use down_write instead, but that would serialize
+reads of /dev/zero more than at present: perhaps some app would be
+badly affected.  So instead let zeromap_page_range return the error
+instead of BUG_ON, and read_zero_pagealigned break to the slower
+clear_user loop in that case - there's no need to optimize for it.
+
+Use -EEXIST for when a pte is found: BUG_ON in mmap_zero (the other
+user of zeromap_page_range), though it really isn't interesting there.
+And since mmap_zero wants -EAGAIN for out-of-memory, the zeromaps
+better return that than -ENOMEM.
+
+Signed-off-by: Hugh Dickins <[EMAIL PROTECTED]>
+
+---
+ drivers/char/mem.c |   12 ++++++++----
+ mm/memory.c        |   32 +++++++++++++++++++++-----------
+ 2 files changed, 29 insertions(+), 15 deletions(-)
+
+--- linux-2.6.17.orig/drivers/char/mem.c
++++ linux-2.6.17/drivers/char/mem.c
+@@ -521,7 +521,8 @@ static inline size_t read_zero_pagealign
+                       count = size;
+ 
+               zap_page_range(vma, addr, count, NULL);
+-              zeromap_page_range(vma, addr, count, PAGE_COPY);
++              if (zeromap_page_range(vma, addr, count, PAGE_COPY))
++                      break;
+ 
+               size -= count;
+               buf += count;
+@@ -588,11 +589,14 @@ out:
+ 
+ static int mmap_zero(struct file * file, struct vm_area_struct * vma)
+ {
++      int err;
++
+       if (vma->vm_flags & VM_SHARED)
+               return shmem_zero_setup(vma);
+-      if (zeromap_page_range(vma, vma->vm_start, vma->vm_end - vma->vm_start, 
vma->vm_page_prot))
+-              return -EAGAIN;
+-      return 0;
++      err = zeromap_page_range(vma, vma->vm_start,
++                      vma->vm_end - vma->vm_start, vma->vm_page_prot);
++      BUG_ON(err == -EEXIST);
++      return err;
+ }
+ #else /* CONFIG_MMU */
+ static ssize_t read_zero(struct file * file, char * buf, 
+--- linux-2.6.17.orig/mm/memory.c
++++ linux-2.6.17/mm/memory.c
+@@ -1091,21 +1091,27 @@ static int zeromap_pte_range(struct mm_s
+ {
+       pte_t *pte;
+       spinlock_t *ptl;
++      int err = 0;
+ 
+       pte = pte_alloc_map_lock(mm, pmd, addr, &ptl);
+       if (!pte)
+-              return -ENOMEM;
++              return -EAGAIN;
+       do {
+               struct page *page = ZERO_PAGE(addr);
+               pte_t zero_pte = pte_wrprotect(mk_pte(page, prot));
++
++              if (unlikely(!pte_none(*pte))) {
++                      err = -EEXIST;
++                      pte++;
++                      break;
++              }
+               page_cache_get(page);
+               page_add_file_rmap(page);
+               inc_mm_counter(mm, file_rss);
+-              BUG_ON(!pte_none(*pte));
+               set_pte_at(mm, addr, pte, zero_pte);
+       } while (pte++, addr += PAGE_SIZE, addr != end);
+       pte_unmap_unlock(pte - 1, ptl);
+-      return 0;
++      return err;
+ }
+ 
+ static inline int zeromap_pmd_range(struct mm_struct *mm, pud_t *pud,
+@@ -1113,16 +1119,18 @@ static inline int zeromap_pmd_range(stru
+ {
+       pmd_t *pmd;
+       unsigned long next;
++      int err;
+ 
+       pmd = pmd_alloc(mm, pud, addr);
+       if (!pmd)
+-              return -ENOMEM;
++              return -EAGAIN;
+       do {
+               next = pmd_addr_end(addr, end);
+-              if (zeromap_pte_range(mm, pmd, addr, next, prot))
+-                      return -ENOMEM;
++              err = zeromap_pte_range(mm, pmd, addr, next, prot);
++              if (err)
++                      break;
+       } while (pmd++, addr = next, addr != end);
+-      return 0;
++      return err;
+ }
+ 
+ static inline int zeromap_pud_range(struct mm_struct *mm, pgd_t *pgd,
+@@ -1130,16 +1138,18 @@ static inline int zeromap_pud_range(stru
+ {
+       pud_t *pud;
+       unsigned long next;
++      int err;
+ 
+       pud = pud_alloc(mm, pgd, addr);
+       if (!pud)
+-              return -ENOMEM;
++              return -EAGAIN;
+       do {
+               next = pud_addr_end(addr, end);
+-              if (zeromap_pmd_range(mm, pud, addr, next, prot))
+-                      return -ENOMEM;
++              err = zeromap_pmd_range(mm, pud, addr, next, prot);
++              if (err)
++                      break;
+       } while (pud++, addr = next, addr != end);
+-      return 0;
++      return err;
+ }
+ 
+ int zeromap_page_range(struct vm_area_struct *vma,

Modified: packages/updates/2007.0/kernel-2.6/current/SPECS/kernel-2.6.spec
==============================================================================
--- packages/updates/2007.0/kernel-2.6/current/SPECS/kernel-2.6.spec    
(original)
+++ packages/updates/2007.0/kernel-2.6/current/SPECS/kernel-2.6.spec    Tue Feb 
13 13:37:06 2007
@@ -1041,6 +1041,8 @@
     - atiixp.c: sb600 ide only has one channel (#28505 - thanks to
       Wolke <[EMAIL PROTECTED]>)
     - PCI: ATI sb600 sata quirk (#28363 - thanks to Wolke <[EMAIL PROTECTED]>)
+    - read_zero_pagealigned() locking fix (thanks to Hugh Dickins
+      <[EMAIL PROTECTED]>)
 
 * Fri Feb 02 2007 Luiz Capitulino <[EMAIL PROTECTED]> 2.6.17-10mdv2007.0
   o Gwenole Beauchesne <[EMAIL PROTECTED]>

Reply via email to