Re: [PATCH] mmu notifiers #v3
On Tue, 22 Jan 2008, Andrea Arcangeli wrote: > > Then I will have to update KVM so that it will free the kvm structure > after waiting a quiescent point to avoid kernel crashing memory > corruption after applying your changes to the mmu notifier. It may not be suitable (I've not looked into your needs), but consider SLAB_DESTROY_BY_RCU: it might give you the easiest way to do that. Hugh -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] mmu notifiers #v3
On Tue, 22 Jan 2008, Peter Zijlstra wrote: > I think we can get rid of this rwlock as I think this will seriously > hurt larger machines. Correct. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] mmu notifiers #v3
On Tue, Jan 22, 2008 at 08:28:47PM +0100, Peter Zijlstra wrote: > I think we can get rid of this rwlock as I think this will seriously > hurt larger machines. Yep, I initially considered it, nevertheless given you solved part of the complication I can add it now ;). The only technical reason for not using RCU is if certain users of the notifiers are registering and unregistering at high frequency through objects that may need to be freed quickly. I can tell the KVM usage of the mmu notifiers is sure fine to use RCU. Then I will have to update KVM so that it will free the kvm structure after waiting a quiescent point to avoid kernel crashing memory corruption after applying your changes to the mmu notifier. Thanks! -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] mmu notifiers #v3
On Mon, 2008-01-21 at 13:52 +0100, Andrea Arcangeli wrote: > diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h > new file mode 100644 > --- /dev/null > +++ b/include/linux/mmu_notifier.h > @@ -0,0 +1,79 @@ > +#ifndef _LINUX_MMU_NOTIFIER_H > +#define _LINUX_MMU_NOTIFIER_H > + > +#include > +#include > + > +#ifdef CONFIG_MMU_NOTIFIER > + > +struct mmu_notifier; > + > +struct mmu_notifier_ops { > + void (*release)(struct mmu_notifier *mn, > + struct mm_struct *mm); > + void (*age_page)(struct mmu_notifier *mn, > + struct mm_struct *mm, > + unsigned long address); > + void (*invalidate_page)(struct mmu_notifier *mn, > + struct mm_struct *mm, > + unsigned long address); > + void (*invalidate_range)(struct mmu_notifier *mn, > + struct mm_struct *mm, > + unsigned long start, unsigned long end); > +}; > + > +struct mmu_notifier_head { > + struct hlist_head head; > + rwlock_t lock; spinlock_t lock; I think we can get rid of this rwlock as I think this will seriously hurt larger machines. > +}; > + > +struct mmu_notifier { > + struct hlist_node hlist; > + const struct mmu_notifier_ops *ops; > +}; > + > +#include > + > +extern void mmu_notifier_register(struct mmu_notifier *mn, > + struct mm_struct *mm); > +extern void mmu_notifier_unregister(struct mmu_notifier *mn, > + struct mm_struct *mm); > +extern void mmu_notifier_release(struct mm_struct *mm); > + > +static inline void mmu_notifier_head_init(struct mmu_notifier_head *mnh) > +{ > + INIT_HLIST_HEAD(>head); > + rwlock_init(>lock); > +} > + > +#define mmu_notifier(function, mm, args...) \ > + do {\ > + struct mmu_notifier *__mn; \ > + struct hlist_node *__n; \ > + \ > + if (unlikely(!hlist_empty(&(mm)->mmu_notifier.head))) { \ > + read_lock(&(mm)->mmu_notifier.lock);\ rcu_read_lock(); > + hlist_for_each_entry(__mn, __n, \ hlist_for_each_entry_rcu > + &(mm)->mmu_notifier.head, \ > + hlist) \ > + if (__mn->ops->function)\ > + __mn->ops->function(__mn, \ > + mm, \ > + args); \ > + read_unlock(&(mm)->mmu_notifier.lock); \ rcu_read_unlock(); > + } \ > + } while (0) > + > +#else /* CONFIG_MMU_NOTIFIER */ > + > +#define mmu_notifier_register(mn, mm) do {} while(0) > +#define mmu_notifier_unregister(mn, mm) do {} while (0) > +#define mmu_notifier_release(mm) do {} while (0) > +#define mmu_notifier_head_init(mmh) do {} while (0) > + > +#define mmu_notifier(function, mm, args...) \ > + do { } while (0) > + > +#endif /* CONFIG_MMU_NOTIFIER */ > + > +#endif /* _LINUX_MMU_NOTIFIER_H */ > diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c > new file mode 100644 > --- /dev/null > +++ b/mm/mmu_notifier.c > @@ -0,0 +1,44 @@ > +/* > + * linux/mm/mmu_notifier.c > + * > + * Copyright (C) 2008 Qumranet, Inc. > + * > + * This work is licensed under the terms of the GNU GPL, version 2. See > + * the COPYING file in the top-level directory. > + */ > + > +#include > +#include > + > +void mmu_notifier_release(struct mm_struct *mm) > +{ > + struct mmu_notifier *mn; > + struct hlist_node *n, *tmp; > + > + if (unlikely(!hlist_empty(>mmu_notifier.head))) { > + read_lock(>mmu_notifier.lock); rcu_read_lock(); > + hlist_for_each_entry_safe(mn, n, tmp, hlist_for_each_entry_safe_rcu > + >mmu_notifier.head, hlist) { > + if (mn->ops->release) > + mn->ops->release(mn, mm); > + hlist_del(>hlist); hlist_del_rcu > + } > + read_unlock(>mmu_notifier.lock); rcu_read_unlock(); > + } > +} > + > +void mmu_notifier_register(struct mmu_notifier *mn, struct mm_struct *mm) > +{ > + write_lock(>mmu_notifier.lock); spin_lock > + hlist_add_head(>hlist, >mmu_notifier.head); hlist_add_head_rcu > + write_unlock(>mmu_notifier.lock);
Re: [kvm-devel] [PATCH] mmu notifiers #v3
On Tue, Jan 22, 2008 at 04:12:34PM +0200, Avi Kivity wrote: > Andrea Arcangeli wrote: >> diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h >> --- a/include/asm-generic/pgtable.h >> +++ b/include/asm-generic/pgtable.h >> @@ -44,8 +44,10 @@ >> ({ \ >> int __young;\ >> __young = ptep_test_and_clear_young(__vma, __address, __ptep); \ >> -if (__young)\ >> +if (__young) { \ >> flush_tlb_page(__vma, __address); \ >> +mmu_notifier(age_page, (__vma)->vm_mm, __address); \ >> +} \ >> __young;\ >> }) >> > > I think that unconditionally doing > > __young |= mmu_notifier(test_and_clear_young, ...); > > allows hardware with accessed bits more control over what is going on. Agreed, likely it'll have to be mmu_notifier_age_page(). -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [kvm-devel] [PATCH] mmu notifiers #v3
Andrea Arcangeli wrote: diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -44,8 +44,10 @@ ({ \ int __young;\ __young = ptep_test_and_clear_young(__vma, __address, __ptep); \ - if (__young)\ + if (__young) { \ flush_tlb_page(__vma, __address); \ + mmu_notifier(age_page, (__vma)->vm_mm, __address); \ + } \ __young;\ }) I think that unconditionally doing __young |= mmu_notifier(test_and_clear_young, ...); allows hardware with accessed bits more control over what is going on. -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [kvm-devel] [PATCH] mmu notifiers #v3
Andrea Arcangeli wrote: diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -44,8 +44,10 @@ ({ \ int __young;\ __young = ptep_test_and_clear_young(__vma, __address, __ptep); \ - if (__young)\ + if (__young) { \ flush_tlb_page(__vma, __address); \ + mmu_notifier(age_page, (__vma)-vm_mm, __address); \ + } \ __young;\ }) I think that unconditionally doing __young |= mmu_notifier(test_and_clear_young, ...); allows hardware with accessed bits more control over what is going on. -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [kvm-devel] [PATCH] mmu notifiers #v3
On Tue, Jan 22, 2008 at 04:12:34PM +0200, Avi Kivity wrote: Andrea Arcangeli wrote: diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -44,8 +44,10 @@ ({ \ int __young;\ __young = ptep_test_and_clear_young(__vma, __address, __ptep); \ -if (__young)\ +if (__young) { \ flush_tlb_page(__vma, __address); \ +mmu_notifier(age_page, (__vma)-vm_mm, __address); \ +} \ __young;\ }) I think that unconditionally doing __young |= mmu_notifier(test_and_clear_young, ...); allows hardware with accessed bits more control over what is going on. Agreed, likely it'll have to be mmu_notifier_age_page(). -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] mmu notifiers #v3
On Mon, 2008-01-21 at 13:52 +0100, Andrea Arcangeli wrote: diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h new file mode 100644 --- /dev/null +++ b/include/linux/mmu_notifier.h @@ -0,0 +1,79 @@ +#ifndef _LINUX_MMU_NOTIFIER_H +#define _LINUX_MMU_NOTIFIER_H + +#include linux/list.h +#include linux/spinlock.h + +#ifdef CONFIG_MMU_NOTIFIER + +struct mmu_notifier; + +struct mmu_notifier_ops { + void (*release)(struct mmu_notifier *mn, + struct mm_struct *mm); + void (*age_page)(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long address); + void (*invalidate_page)(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long address); + void (*invalidate_range)(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long start, unsigned long end); +}; + +struct mmu_notifier_head { + struct hlist_head head; + rwlock_t lock; spinlock_t lock; I think we can get rid of this rwlock as I think this will seriously hurt larger machines. +}; + +struct mmu_notifier { + struct hlist_node hlist; + const struct mmu_notifier_ops *ops; +}; + +#include linux/mm_types.h + +extern void mmu_notifier_register(struct mmu_notifier *mn, + struct mm_struct *mm); +extern void mmu_notifier_unregister(struct mmu_notifier *mn, + struct mm_struct *mm); +extern void mmu_notifier_release(struct mm_struct *mm); + +static inline void mmu_notifier_head_init(struct mmu_notifier_head *mnh) +{ + INIT_HLIST_HEAD(mnh-head); + rwlock_init(mnh-lock); +} + +#define mmu_notifier(function, mm, args...) \ + do {\ + struct mmu_notifier *__mn; \ + struct hlist_node *__n; \ + \ + if (unlikely(!hlist_empty((mm)-mmu_notifier.head))) { \ + read_lock((mm)-mmu_notifier.lock);\ rcu_read_lock(); + hlist_for_each_entry(__mn, __n, \ hlist_for_each_entry_rcu + (mm)-mmu_notifier.head, \ + hlist) \ + if (__mn-ops-function)\ + __mn-ops-function(__mn, \ + mm, \ + args); \ + read_unlock((mm)-mmu_notifier.lock); \ rcu_read_unlock(); + } \ + } while (0) + +#else /* CONFIG_MMU_NOTIFIER */ + +#define mmu_notifier_register(mn, mm) do {} while(0) +#define mmu_notifier_unregister(mn, mm) do {} while (0) +#define mmu_notifier_release(mm) do {} while (0) +#define mmu_notifier_head_init(mmh) do {} while (0) + +#define mmu_notifier(function, mm, args...) \ + do { } while (0) + +#endif /* CONFIG_MMU_NOTIFIER */ + +#endif /* _LINUX_MMU_NOTIFIER_H */ diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c new file mode 100644 --- /dev/null +++ b/mm/mmu_notifier.c @@ -0,0 +1,44 @@ +/* + * linux/mm/mmu_notifier.c + * + * Copyright (C) 2008 Qumranet, Inc. + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + */ + +#include linux/mmu_notifier.h +#include linux/module.h + +void mmu_notifier_release(struct mm_struct *mm) +{ + struct mmu_notifier *mn; + struct hlist_node *n, *tmp; + + if (unlikely(!hlist_empty(mm-mmu_notifier.head))) { + read_lock(mm-mmu_notifier.lock); rcu_read_lock(); + hlist_for_each_entry_safe(mn, n, tmp, hlist_for_each_entry_safe_rcu + mm-mmu_notifier.head, hlist) { + if (mn-ops-release) + mn-ops-release(mn, mm); + hlist_del(mn-hlist); hlist_del_rcu + } + read_unlock(mm-mmu_notifier.lock); rcu_read_unlock(); + } +} + +void mmu_notifier_register(struct mmu_notifier *mn, struct mm_struct *mm) +{ + write_lock(mm-mmu_notifier.lock); spin_lock + hlist_add_head(mn-hlist, mm-mmu_notifier.head); hlist_add_head_rcu + write_unlock(mm-mmu_notifier.lock); spin_unlock +}
Re: [PATCH] mmu notifiers #v3
On Tue, 22 Jan 2008, Peter Zijlstra wrote: I think we can get rid of this rwlock as I think this will seriously hurt larger machines. Correct. -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] mmu notifiers #v3
On Tue, Jan 22, 2008 at 08:28:47PM +0100, Peter Zijlstra wrote: I think we can get rid of this rwlock as I think this will seriously hurt larger machines. Yep, I initially considered it, nevertheless given you solved part of the complication I can add it now ;). The only technical reason for not using RCU is if certain users of the notifiers are registering and unregistering at high frequency through objects that may need to be freed quickly. I can tell the KVM usage of the mmu notifiers is sure fine to use RCU. Then I will have to update KVM so that it will free the kvm structure after waiting a quiescent point to avoid kernel crashing memory corruption after applying your changes to the mmu notifier. Thanks! -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] mmu notifiers #v3
On Tue, 22 Jan 2008, Andrea Arcangeli wrote: Then I will have to update KVM so that it will free the kvm structure after waiting a quiescent point to avoid kernel crashing memory corruption after applying your changes to the mmu notifier. It may not be suitable (I've not looked into your needs), but consider SLAB_DESTROY_BY_RCU: it might give you the easiest way to do that. Hugh -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] mmu notifiers #v3
On Mon, 21 Jan 2008 13:52:04 +0100 Andrea Arcangeli <[EMAIL PROTECTED]> wrote: > Signed-off-by: Andrea Arcangeli <[EMAIL PROTECTED]> Reviewed-by: Rik van Riel <[EMAIL PROTECTED]> -- All rights reversed. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH] mmu notifiers #v3
On Thu, Jan 17, 2008 at 08:32:52PM +0100, Andrea Arcangeli wrote: > To make this work we still need notification from the VM about memory > pressure [..] Ok I thought some more at the aging issue of the hot kvm pages (to prevent the guest-OS very-hot working set to be swapped out). So I now hooked a age_page mmu notifier in the page_referenced mkold path. This way when the linux pte is marked old, we can also drop the spte. This way we give the guest-OS a whole round scan of the inactive list in order to generate a vmexit minor fault by touching the hot page. The very-lightweight vmexit will call into follow_page again that I accordingly changed to mark the pte young (which is nicer because it truly simulates what a regular access through the virtual address would do). For direct-io it makes no difference and this way the next time page_referenced runs it'll find the pte young again and it'll mark the pte old again and in turn it'll call ->age_page again that will drop the spte again, etc... So the working set will be sticky in ram and it won't generate spurious swapouts (this is the theory at least). It works well in practice so far but I don't have hard numbers myself (I just implemented what I think is a quite effective aging strategy to do a not random page replacement on the very hot guest-OS working set). In absence of memory pressure (or with little pressure) there will be no age_page calls at all and the spte cache can grow freely without any vmexit. This provides peak performance in absence of memory pressure. This keeps the VM aging decision in the VM instead of having an lru of sptes to collect. The lru of sptes to collect would still be interesting for the shrinker method though (similar to dcache/inode lru etc..). This update also adds some locking so multiple subsystems can register/unregister for the notifiers at any time (something that had to be handled by design with external serialization before and effectively it was a bit fragile). BTW, when MMU_NOTIFIER=n the kernel compile spawns a warning in memory.c about two unused variables, not sure if it worth hiding it given I suppose most people will have MMU_NOTIFIER=y. One easy way to avoid the warning is to move the mmu_notifier call out of line and to have one function per notifier (which was suggested by Christoph already as an icache optimization). But this implementation keeps the patch smaller and quicker to improve for now... I'd like to know if this could be possibly merged soon and what I need to change to make this happen. Thanks! The kvm side of this can be found here: http://marc.info/?l=kvm-devel=120091930324366=2 http://marc.info/?l=kvm-devel=120091906724000=2 http://marc.info/?l=kvm-devel=120091939024572=2 Signed-off-by: Andrea Arcangeli <[EMAIL PROTECTED]> diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -44,8 +44,10 @@ ({ \ int __young;\ __young = ptep_test_and_clear_young(__vma, __address, __ptep); \ - if (__young)\ + if (__young) { \ flush_tlb_page(__vma, __address); \ + mmu_notifier(age_page, (__vma)->vm_mm, __address); \ + } \ __young;\ }) #endif @@ -86,6 +88,7 @@ do { \ pte_t __pte;\ __pte = ptep_get_and_clear((__vma)->vm_mm, __address, __ptep); \ flush_tlb_page(__vma, __address); \ + mmu_notifier(invalidate_page, (__vma)->vm_mm, __address); \ __pte; \ }) #endif diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -219,6 +220,10 @@ struct mm_struct { /* aio bits */ rwlock_tioctx_list_lock; struct kioctx *ioctx_list; + +#ifdef CONFIG_MMU_NOTIFIER + struct mmu_notifier_head mmu_notifier; /* MMU notifier list */ +#endif }; #endif /* _LINUX_MM_TYPES_H */ diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h new file mode 100644 --- /dev/null +++ b/include/linux/mmu_notifier.h @@ -0,0 +1,79 @@ +#ifndef _LINUX_MMU_NOTIFIER_H +#define _LINUX_MMU_NOTIFIER_H + +#include +#include + +#ifdef CONFIG_MMU_NOTIFIER + +struct mmu_notifier; + +struct mmu_notifier_ops { + void
[PATCH] mmu notifiers #v3
On Thu, Jan 17, 2008 at 08:32:52PM +0100, Andrea Arcangeli wrote: To make this work we still need notification from the VM about memory pressure [..] Ok I thought some more at the aging issue of the hot kvm pages (to prevent the guest-OS very-hot working set to be swapped out). So I now hooked a age_page mmu notifier in the page_referenced mkold path. This way when the linux pte is marked old, we can also drop the spte. This way we give the guest-OS a whole round scan of the inactive list in order to generate a vmexit minor fault by touching the hot page. The very-lightweight vmexit will call into follow_page again that I accordingly changed to mark the pte young (which is nicer because it truly simulates what a regular access through the virtual address would do). For direct-io it makes no difference and this way the next time page_referenced runs it'll find the pte young again and it'll mark the pte old again and in turn it'll call -age_page again that will drop the spte again, etc... So the working set will be sticky in ram and it won't generate spurious swapouts (this is the theory at least). It works well in practice so far but I don't have hard numbers myself (I just implemented what I think is a quite effective aging strategy to do a not random page replacement on the very hot guest-OS working set). In absence of memory pressure (or with little pressure) there will be no age_page calls at all and the spte cache can grow freely without any vmexit. This provides peak performance in absence of memory pressure. This keeps the VM aging decision in the VM instead of having an lru of sptes to collect. The lru of sptes to collect would still be interesting for the shrinker method though (similar to dcache/inode lru etc..). This update also adds some locking so multiple subsystems can register/unregister for the notifiers at any time (something that had to be handled by design with external serialization before and effectively it was a bit fragile). BTW, when MMU_NOTIFIER=n the kernel compile spawns a warning in memory.c about two unused variables, not sure if it worth hiding it given I suppose most people will have MMU_NOTIFIER=y. One easy way to avoid the warning is to move the mmu_notifier call out of line and to have one function per notifier (which was suggested by Christoph already as an icache optimization). But this implementation keeps the patch smaller and quicker to improve for now... I'd like to know if this could be possibly merged soon and what I need to change to make this happen. Thanks! The kvm side of this can be found here: http://marc.info/?l=kvm-develm=120091930324366w=2 http://marc.info/?l=kvm-develm=120091906724000w=2 http://marc.info/?l=kvm-develm=120091939024572w=2 Signed-off-by: Andrea Arcangeli [EMAIL PROTECTED] diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -44,8 +44,10 @@ ({ \ int __young;\ __young = ptep_test_and_clear_young(__vma, __address, __ptep); \ - if (__young)\ + if (__young) { \ flush_tlb_page(__vma, __address); \ + mmu_notifier(age_page, (__vma)-vm_mm, __address); \ + } \ __young;\ }) #endif @@ -86,6 +88,7 @@ do { \ pte_t __pte;\ __pte = ptep_get_and_clear((__vma)-vm_mm, __address, __ptep); \ flush_tlb_page(__vma, __address); \ + mmu_notifier(invalidate_page, (__vma)-vm_mm, __address); \ __pte; \ }) #endif diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -10,6 +10,7 @@ #include linux/rbtree.h #include linux/rwsem.h #include linux/completion.h +#include linux/mmu_notifier.h #include asm/page.h #include asm/mmu.h @@ -219,6 +220,10 @@ struct mm_struct { /* aio bits */ rwlock_tioctx_list_lock; struct kioctx *ioctx_list; + +#ifdef CONFIG_MMU_NOTIFIER + struct mmu_notifier_head mmu_notifier; /* MMU notifier list */ +#endif }; #endif /* _LINUX_MM_TYPES_H */ diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h new file mode 100644 --- /dev/null +++ b/include/linux/mmu_notifier.h @@ -0,0 +1,79 @@ +#ifndef _LINUX_MMU_NOTIFIER_H +#define _LINUX_MMU_NOTIFIER_H + +#include linux/list.h +#include
Re: [PATCH] mmu notifiers #v3
On Mon, 21 Jan 2008 13:52:04 +0100 Andrea Arcangeli [EMAIL PROTECTED] wrote: Signed-off-by: Andrea Arcangeli [EMAIL PROTECTED] Reviewed-by: Rik van Riel [EMAIL PROTECTED] -- All rights reversed. -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/