Below is the description that I described couple of days back in a mail
about the stale data issue on IA-64.
Seth, Rohit <> wrote on Tuesday, March 15, 2005 9:44 AM:
> Recently on IA-64, we have found an issue where old data could be used
> by apps. The sequence of operations includes few mprotects from user
> space (glibc) goes like this:
>
> 1- The text region of an executable is mmaped using
> PROT_READ|PROT_EXEC. As a result, a shared page is allocated to user.
>
> 2- User then requests the text region to be mprotected with
> PROT_READ|PROT_WRITE. Kernel removes the execute permission and
> leave the read permission on the text region.
>
> 3- Subsequent write operation by user results in page fault and
> eventually resulting in COW break. User gets a new private copy of the
> page. At this point kernel marks the new page for defered flush.
>
> 4- User then request the text region to be mprotected back with
> PROT_READ|PROT_EXEC. mprotect suppport code in kernel, flushes the
> caches, updates the PTEs and then flushes the TLBs. Though after
> updating the PTEs with new permissions, we don't let the arch
> specific code know about the new mappings (through update_mmu_cache
> like routine). IA-64 typically uses update_mmu_cache to check for the
> defered flush flag (that got set in step 3) to maintain cache
> coherency lazily (The local I and D caches on IA-64 are incoherent).
>
> DavidM suggeested that we would need to add a hook in the function
> change_pte_range in mm/mprotect.c This would let the architecture
> specific code to look at the new ptes to decide if it needs to update
> any other architectual/kernel state based on the updated (new
> permissions) PTE values.
>
We have added a new hook lazy_mmu_prot_update(pte_t) that gets called
when protection bits in PTEs change. This hook provides an opportunity to
arch specific code to do needful. On IA-64 this will be used for lazily
making the I and D caches coherent.
Signed-off-by: David Mosberger <[EMAIL PROTECTED]>
Signed-off-by: Rohit Seth <[EMAIL PROTECTED]>
Validate on IA-64 and x86 platforms.
diff -Naru linux-2.6.11/include/asm-generic/pgtable.h
linux-2.6.11.new/include/asm-generic/pgtable.h
--- linux-2.6.11/include/asm-generic/pgtable.h 2005-03-01 23:37:54.000000000
-0800
+++ linux-2.6.11.new/include/asm-generic/pgtable.h 2005-03-17
21:12:21.181174495 -0800
@@ -134,4 +134,7 @@
#define pgd_offset_gate(mm, addr) pgd_offset(mm, addr)
#endif
+#ifndef __HAVE_ARCH_LAZY_MMU_UPDATE
+#define lazy_mmu_prot_update(pte) do { } while (0)
+#endif
#endif /* _ASM_GENERIC_PGTABLE_H */
diff -Naru linux-2.6.11/include/asm-ia64/pgtable.h
linux-2.6.11.new/include/asm-ia64/pgtable.h
--- linux-2.6.11/include/asm-ia64/pgtable.h 2005-03-01 23:37:53.000000000
-0800
+++ linux-2.6.11.new/include/asm-ia64/pgtable.h 2005-03-18 02:41:24.202417018
-0800
@@ -420,6 +420,7 @@
{
return pte_val(a) == pte_val(b);
}
+#define update_mmu_cache(vma,address,pte) do { } while (0)
extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
extern void paging_init (void);
@@ -482,7 +483,7 @@
* information. However, we use this routine to take care of any (delayed)
i-cache
* flushing that may be necessary.
*/
-extern void update_mmu_cache (struct vm_area_struct *vma, unsigned long vaddr,
pte_t pte);
+extern void lazy_mmu_prot_update (pte_t pte);
#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
/*
@@ -561,6 +562,7 @@
#define __HAVE_ARCH_PTEP_MKDIRTY
#define __HAVE_ARCH_PTE_SAME
#define __HAVE_ARCH_PGD_OFFSET_GATE
+#define __HAVE_ARCH_LAZY_MMU_UPDATE
#include <asm-generic/pgtable.h>
#include <asm-generic/pgtable-nopud.h>
diff -Naru linux-2.6.11/mm/memory.c linux-2.6.11.new/mm/memory.c
--- linux-2.6.11/mm/memory.c 2005-03-01 23:38:08.000000000 -0800
+++ linux-2.6.11.new/mm/memory.c 2005-03-18 01:49:05.498353905 -0800
@@ -1252,6 +1252,7 @@
vma);
ptep_establish(vma, address, page_table, entry);
update_mmu_cache(vma, address, entry);
+ lazy_mmu_prot_update(entry);
}
/*
@@ -1304,6 +1305,7 @@
vma);
ptep_set_access_flags(vma, address, page_table, entry,
1);
update_mmu_cache(vma, address, entry);
+ lazy_mmu_prot_update(entry);
pte_unmap(page_table);
spin_unlock(&mm->page_table_lock);
return VM_FAULT_MINOR;
@@ -1829,6 +1831,7 @@
/* No need to invalidate - it was non-present before */
update_mmu_cache(vma, addr, entry);
+ lazy_mmu_prot_update(entry);
spin_unlock(&mm->page_table_lock);
out:
return VM_FAULT_MINOR;
@@ -1956,6 +1959,7 @@
/* no need to invalidate: a not-present page shouldn't be cached */
update_mmu_cache(vma, address, entry);
+ lazy_mmu_prot_update(entry);
spin_unlock(&mm->page_table_lock);
out:
return ret;
@@ -2050,6 +2054,7 @@
entry = pte_mkyoung(entry);
ptep_set_access_flags(vma, address, pte, entry, write_access);
update_mmu_cache(vma, address, entry);
+ lazy_mmu_prot_update(entry);
pte_unmap(pte);
spin_unlock(&mm->page_table_lock);
return VM_FAULT_MINOR;
--- linux-2.6.11/arch/ia64/hp/common/sba_iommu.c 2005-03-01
23:37:49.000000000 -0800
+++ linux-2.6.11.new/arch/ia64/hp/common/sba_iommu.c 2005-03-18
02:55:29.661391037 -0800
@@ -761,7 +761,7 @@
#ifdef ENABLE_MARK_CLEAN
/**
* Since DMA is i-cache coherent, any (complete) pages that were written via
- * DMA can be marked as "clean" so that update_mmu_cache() doesn't have to
+ * DMA can be marked as "clean" so that lazy_mmu_prot_update() doesn't have to
* flush them when they get mapped into an executable vm-area.
*/
static void
--- linux-2.6.11/arch/ia64/lib/swiotlb.c 2005-03-01 23:38:17.000000000
-0800
+++ linux-2.6.11.new/arch/ia64/lib/swiotlb.c 2005-03-18 02:55:58.480726621
-0800
@@ -444,7 +444,7 @@
/*
* Since DMA is i-cache coherent, any (complete) pages that were written via
- * DMA can be marked as "clean" so that update_mmu_cache() doesn't have to
+ * DMA can be marked as "clean" so that lazy_mmu_prot_update() doesn't have to
* flush them when they get mapped into an executable vm-area.
*/
static void
--- linux-2.6.11/mm/mprotect.c 2005-03-01 23:38:37.000000000 -0800
+++ linux-2.6.11.new/mm/mprotect.c 2005-03-18 16:31:47.874658241 -0800
@@ -52,8 +52,9 @@
* bits by wiping the pte and then setting the new pte
* into place.
*/
- entry = ptep_get_and_clear(pte);
- set_pte(pte, pte_modify(entry, newprot));
+ entry = pte_modify(ptep_get_and_clear(pte), newprot);
+ set_pte(pte, entry);
+ lazy_mmu_prot_update(entry);
}
address += PAGE_SIZE;
pte++;
--- linux-2.6.11/Documentation/cachetlb.txt 2005-03-01 23:37:59.000000000
-0800
+++ linux-2.6.11.new/Documentation/cachetlb.txt 2005-03-18 19:07:29.565950055
-0800
@@ -142,6 +142,11 @@
The ia64 sn2 platform is one example of a platform
that uses this interface.
+8) void lazy_mmu_prot_update (pte_t pte)
+ This interface is called whenever the protection on
+ any user PTEs change. This interface provides a notification
+ to architecture specific code to take appropiate action.
+
Next, we have the cache flushing interfaces. In general, when Linux
is changing an existing virtual-->physical mapping to a new value,
--- linux-2.6.11/arch/ia64/mm/init.c 2005-03-01 23:38:07.000000000 -0800
+++ linux-2.6.11.new/arch/ia64/mm/init.c 2005-03-18 18:42:58.915577446
-0800
@@ -76,7 +76,7 @@
}
void
-update_mmu_cache (struct vm_area_struct *vma, unsigned long vaddr, pte_t pte)
+lazy_mmu_prot_update (pte_t pte)
{
unsigned long addr;
struct page *page;
@@ -85,7 +85,7 @@
return; /* not an executable page... */
page = pte_page(pte);
- /* don't use VADDR: it may not be mapped on this CPU (or may have just
been flushed): */
+
addr = (unsigned long) page_address(page);
if (test_bit(PG_arch_1, &page->flags))