Re: [PATCH 8/11] ksm: make !merge_across_nodes migration safe
On Fri, 2013-01-25 at 18:05 -0800, Hugh Dickins wrote: > The new KSM NUMA merge_across_nodes knob introduces a problem, when it's > set to non-default 0: if a KSM page is migrated to a different NUMA node, > how do we migrate its stable node to the right tree? And what if that > collides with an existing stable node? > > ksm_migrate_page() can do no more than it's already doing, updating > stable_node->kpfn: the stable tree itself cannot be manipulated without > holding ksm_thread_mutex. So accept that a stable tree may temporarily > indicate a page belonging to the wrong NUMA node, leave updating until > the next pass of ksmd, just be careful not to merge other pages on to a How you not to merge other pages on to a misplaced page? I don't see it. > misplaced page. Note nid of holding tree in stable_node, and recognize > that it will not always match nid of kpfn. > > A misplaced KSM page is discovered, either when ksm_do_scan() next comes > around to one of its rmap_items (we now have to go to cmp_and_merge_page > even on pages in a stable tree), or when stable_tree_search() arrives at > a matching node for another page, and this node page is found misplaced. > > In each case, move the misplaced stable_node to a list of migrate_nodes > (and use the address of migrate_nodes as magic by which to identify them): > we don't need them in a tree. If stable_tree_search() finds no match for > a page, but it's currently exiled to this list, then slot its stable_node > right there into the tree, bringing all of its mappings with it; otherwise > they get migrated one by one to the original page of the colliding node. > stable_tree_search() is now modelled more like stable_tree_insert(), > in order to handle these insertions of migrated nodes. When node will be removed from migrate_nodes list and insert to stable tree? > > remove_node_from_stable_tree(), remove_all_stable_nodes() and > ksm_check_stable_tree() have to handle the migrate_nodes list as well as > the stable tree itself. Less obviously, we do need to prune the list of > stale entries from time to time (scan_get_next_rmap_item() does it once > each full scan): > whereas stale nodes in the stable tree get naturally > pruned as searches try to brush past them, these migrate_nodes may get > forgotten and accumulate. Hard to understand this description. Could you explain it? :) > Signed-off-by: Hugh Dickins What will happen if page node of an unstable tree migrate to a new numa node? Also need to handle colliding? > --- > mm/ksm.c | 164 +++-- > 1 file changed, 134 insertions(+), 30 deletions(-) > > --- mmotm.orig/mm/ksm.c 2013-01-25 14:37:03.832206218 -0800 > +++ mmotm/mm/ksm.c2013-01-25 14:37:06.880206290 -0800 > @@ -122,13 +122,25 @@ struct ksm_scan { > /** > * struct stable_node - node of the stable rbtree > * @node: rb node of this ksm page in the stable tree > + * @head: (overlaying parent) _nodes indicates temporarily on that > list > + * @list: linked into migrate_nodes, pending placement in the proper node > tree > * @hlist: hlist head of rmap_items using this ksm page > - * @kpfn: page frame number of this ksm page > + * @kpfn: page frame number of this ksm page (perhaps temporarily on wrong > nid) > + * @nid: NUMA node id of stable tree in which linked (may not match kpfn) > */ > struct stable_node { > - struct rb_node node; > + union { > + struct rb_node node;/* when node of stable tree */ > + struct {/* when listed for migration */ > + struct list_head *head; > + struct list_head list; > + }; > + }; > struct hlist_head hlist; > unsigned long kpfn; > +#ifdef CONFIG_NUMA > + int nid; > +#endif > }; > > /** > @@ -169,6 +181,9 @@ struct rmap_item { > static struct rb_root root_unstable_tree[MAX_NUMNODES]; > static struct rb_root root_stable_tree[MAX_NUMNODES]; > > +/* Recently migrated nodes of stable tree, pending proper placement */ > +static LIST_HEAD(migrate_nodes); > + > #define MM_SLOTS_HASH_BITS 10 > static DEFINE_HASHTABLE(mm_slots_hash, MM_SLOTS_HASH_BITS); > > @@ -311,11 +326,6 @@ static void insert_to_mm_slots_hash(stru > hash_add(mm_slots_hash, _slot->link, (unsigned long)mm); > } > > -static inline int in_stable_tree(struct rmap_item *rmap_item) > -{ > - return rmap_item->address & STABLE_FLAG; > -} > - > /* > * ksmd, and unmerge_and_remove_all_rmap_items(), must not touch an mm's > * page tables after it has passed through ksm_exit() - which, if necessary, > @@ -476,7 +486,6 @@ static void remove_node_from_stable_tree > { > struct rmap_item *rmap_item; > struct hlist_node *hlist; > - int nid; > > hlist_for_each_entry(rmap_item, hlist, _node->hlist, hlist) { > if (rmap_item->hlist.next) > @@ -488,8 +497,11 @@ static void remove_node_from_stable_tree >
Re: [PATCH 8/11] ksm: make !merge_across_nodes migration safe
On Sun, 27 Jan 2013, Simon Jeons wrote: > On Fri, 2013-01-25 at 18:05 -0800, Hugh Dickins wrote: > > @@ -1344,10 +1401,29 @@ static void cmp_and_merge_page(struct pa > > unsigned int checksum; > > int err; > > > > - remove_rmap_item_from_tree(rmap_item); > > + stable_node = page_stable_node(page); > > + if (stable_node) { > > + if (stable_node->head != _nodes && > > + get_kpfn_nid(stable_node->kpfn) != NUMA(stable_node->nid)) { > > + rb_erase(_node->node, > > +_stable_tree[NUMA(stable_node->nid)]); > > + stable_node->head = _nodes; > > + list_add(_node->list, stable_node->head); > > Why list add _node->list to stable_node->head? stable_node->head > is used for queue what? Read that as list_add(_node->list, _nodes) if you prefer. stable_node->head (overlaying stable_node->node.__rb_parent_color, which would never point to migrate_nodes as an rb_node) _nodes is used as "magic" to show that that rb_node is currently saved on this list, rather than linked into the stable tree itself. We could do some #define MIGRATE_NODES_MAGIC 0xwhatever and put that in head instead. > > @@ -1464,6 +1540,27 @@ static struct rmap_item *scan_get_next_r > > */ > > lru_add_drain_all(); > > > > + /* > > +* Whereas stale stable_nodes on the stable_tree itself > > +* get pruned in the regular course of stable_tree_search(), > > Which kinds of stable_nodes can be treated as stale? I just see remove > rmap_item in stable_tree_search() and scan_get_next_rmap_item(). See get_ksm_page(). -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 8/11] ksm: make !merge_across_nodes migration safe
On Fri, 2013-01-25 at 18:05 -0800, Hugh Dickins wrote: > The new KSM NUMA merge_across_nodes knob introduces a problem, when it's > set to non-default 0: if a KSM page is migrated to a different NUMA node, > how do we migrate its stable node to the right tree? And what if that > collides with an existing stable node? > > ksm_migrate_page() can do no more than it's already doing, updating > stable_node->kpfn: the stable tree itself cannot be manipulated without > holding ksm_thread_mutex. So accept that a stable tree may temporarily > indicate a page belonging to the wrong NUMA node, leave updating until > the next pass of ksmd, just be careful not to merge other pages on to a > misplaced page. Note nid of holding tree in stable_node, and recognize > that it will not always match nid of kpfn. > > A misplaced KSM page is discovered, either when ksm_do_scan() next comes > around to one of its rmap_items (we now have to go to cmp_and_merge_page > even on pages in a stable tree), or when stable_tree_search() arrives at > a matching node for another page, and this node page is found misplaced. > > In each case, move the misplaced stable_node to a list of migrate_nodes > (and use the address of migrate_nodes as magic by which to identify them): > we don't need them in a tree. If stable_tree_search() finds no match for > a page, but it's currently exiled to this list, then slot its stable_node > right there into the tree, bringing all of its mappings with it; otherwise > they get migrated one by one to the original page of the colliding node. > stable_tree_search() is now modelled more like stable_tree_insert(), > in order to handle these insertions of migrated nodes. > > remove_node_from_stable_tree(), remove_all_stable_nodes() and > ksm_check_stable_tree() have to handle the migrate_nodes list as well as > the stable tree itself. Less obviously, we do need to prune the list of > stale entries from time to time (scan_get_next_rmap_item() does it once > each full scan): whereas stale nodes in the stable tree get naturally > pruned as searches try to brush past them, these migrate_nodes may get > forgotten and accumulate. > > Signed-off-by: Hugh Dickins > --- > mm/ksm.c | 164 +++-- > 1 file changed, 134 insertions(+), 30 deletions(-) > > --- mmotm.orig/mm/ksm.c 2013-01-25 14:37:03.832206218 -0800 > +++ mmotm/mm/ksm.c2013-01-25 14:37:06.880206290 -0800 > @@ -122,13 +122,25 @@ struct ksm_scan { > /** > * struct stable_node - node of the stable rbtree > * @node: rb node of this ksm page in the stable tree > + * @head: (overlaying parent) _nodes indicates temporarily on that > list > + * @list: linked into migrate_nodes, pending placement in the proper node > tree > * @hlist: hlist head of rmap_items using this ksm page > - * @kpfn: page frame number of this ksm page > + * @kpfn: page frame number of this ksm page (perhaps temporarily on wrong > nid) > + * @nid: NUMA node id of stable tree in which linked (may not match kpfn) > */ > struct stable_node { > - struct rb_node node; > + union { > + struct rb_node node;/* when node of stable tree */ > + struct {/* when listed for migration */ > + struct list_head *head; > + struct list_head list; > + }; > + }; > struct hlist_head hlist; > unsigned long kpfn; > +#ifdef CONFIG_NUMA > + int nid; > +#endif > }; > > /** > @@ -169,6 +181,9 @@ struct rmap_item { > static struct rb_root root_unstable_tree[MAX_NUMNODES]; > static struct rb_root root_stable_tree[MAX_NUMNODES]; > > +/* Recently migrated nodes of stable tree, pending proper placement */ > +static LIST_HEAD(migrate_nodes); > + > #define MM_SLOTS_HASH_BITS 10 > static DEFINE_HASHTABLE(mm_slots_hash, MM_SLOTS_HASH_BITS); > > @@ -311,11 +326,6 @@ static void insert_to_mm_slots_hash(stru > hash_add(mm_slots_hash, _slot->link, (unsigned long)mm); > } > > -static inline int in_stable_tree(struct rmap_item *rmap_item) > -{ > - return rmap_item->address & STABLE_FLAG; > -} > - > /* > * ksmd, and unmerge_and_remove_all_rmap_items(), must not touch an mm's > * page tables after it has passed through ksm_exit() - which, if necessary, > @@ -476,7 +486,6 @@ static void remove_node_from_stable_tree > { > struct rmap_item *rmap_item; > struct hlist_node *hlist; > - int nid; > > hlist_for_each_entry(rmap_item, hlist, _node->hlist, hlist) { > if (rmap_item->hlist.next) > @@ -488,8 +497,11 @@ static void remove_node_from_stable_tree > cond_resched(); > } > > - nid = get_kpfn_nid(stable_node->kpfn); > - rb_erase(_node->node, _stable_tree[nid]); > + if (stable_node->head == _nodes) > + list_del(_node->list); > + else > + rb_erase(_node->node, > + _stable_tree[NUMA(stable_node->nid)]);
Re: [PATCH 8/11] ksm: make !merge_across_nodes migration safe
On Fri, 2013-01-25 at 18:05 -0800, Hugh Dickins wrote: The new KSM NUMA merge_across_nodes knob introduces a problem, when it's set to non-default 0: if a KSM page is migrated to a different NUMA node, how do we migrate its stable node to the right tree? And what if that collides with an existing stable node? ksm_migrate_page() can do no more than it's already doing, updating stable_node-kpfn: the stable tree itself cannot be manipulated without holding ksm_thread_mutex. So accept that a stable tree may temporarily indicate a page belonging to the wrong NUMA node, leave updating until the next pass of ksmd, just be careful not to merge other pages on to a misplaced page. Note nid of holding tree in stable_node, and recognize that it will not always match nid of kpfn. A misplaced KSM page is discovered, either when ksm_do_scan() next comes around to one of its rmap_items (we now have to go to cmp_and_merge_page even on pages in a stable tree), or when stable_tree_search() arrives at a matching node for another page, and this node page is found misplaced. In each case, move the misplaced stable_node to a list of migrate_nodes (and use the address of migrate_nodes as magic by which to identify them): we don't need them in a tree. If stable_tree_search() finds no match for a page, but it's currently exiled to this list, then slot its stable_node right there into the tree, bringing all of its mappings with it; otherwise they get migrated one by one to the original page of the colliding node. stable_tree_search() is now modelled more like stable_tree_insert(), in order to handle these insertions of migrated nodes. remove_node_from_stable_tree(), remove_all_stable_nodes() and ksm_check_stable_tree() have to handle the migrate_nodes list as well as the stable tree itself. Less obviously, we do need to prune the list of stale entries from time to time (scan_get_next_rmap_item() does it once each full scan): whereas stale nodes in the stable tree get naturally pruned as searches try to brush past them, these migrate_nodes may get forgotten and accumulate. Signed-off-by: Hugh Dickins hu...@google.com --- mm/ksm.c | 164 +++-- 1 file changed, 134 insertions(+), 30 deletions(-) --- mmotm.orig/mm/ksm.c 2013-01-25 14:37:03.832206218 -0800 +++ mmotm/mm/ksm.c2013-01-25 14:37:06.880206290 -0800 @@ -122,13 +122,25 @@ struct ksm_scan { /** * struct stable_node - node of the stable rbtree * @node: rb node of this ksm page in the stable tree + * @head: (overlaying parent) migrate_nodes indicates temporarily on that list + * @list: linked into migrate_nodes, pending placement in the proper node tree * @hlist: hlist head of rmap_items using this ksm page - * @kpfn: page frame number of this ksm page + * @kpfn: page frame number of this ksm page (perhaps temporarily on wrong nid) + * @nid: NUMA node id of stable tree in which linked (may not match kpfn) */ struct stable_node { - struct rb_node node; + union { + struct rb_node node;/* when node of stable tree */ + struct {/* when listed for migration */ + struct list_head *head; + struct list_head list; + }; + }; struct hlist_head hlist; unsigned long kpfn; +#ifdef CONFIG_NUMA + int nid; +#endif }; /** @@ -169,6 +181,9 @@ struct rmap_item { static struct rb_root root_unstable_tree[MAX_NUMNODES]; static struct rb_root root_stable_tree[MAX_NUMNODES]; +/* Recently migrated nodes of stable tree, pending proper placement */ +static LIST_HEAD(migrate_nodes); + #define MM_SLOTS_HASH_BITS 10 static DEFINE_HASHTABLE(mm_slots_hash, MM_SLOTS_HASH_BITS); @@ -311,11 +326,6 @@ static void insert_to_mm_slots_hash(stru hash_add(mm_slots_hash, mm_slot-link, (unsigned long)mm); } -static inline int in_stable_tree(struct rmap_item *rmap_item) -{ - return rmap_item-address STABLE_FLAG; -} - /* * ksmd, and unmerge_and_remove_all_rmap_items(), must not touch an mm's * page tables after it has passed through ksm_exit() - which, if necessary, @@ -476,7 +486,6 @@ static void remove_node_from_stable_tree { struct rmap_item *rmap_item; struct hlist_node *hlist; - int nid; hlist_for_each_entry(rmap_item, hlist, stable_node-hlist, hlist) { if (rmap_item-hlist.next) @@ -488,8 +497,11 @@ static void remove_node_from_stable_tree cond_resched(); } - nid = get_kpfn_nid(stable_node-kpfn); - rb_erase(stable_node-node, root_stable_tree[nid]); + if (stable_node-head == migrate_nodes) + list_del(stable_node-list); + else + rb_erase(stable_node-node, + root_stable_tree[NUMA(stable_node-nid)]); free_stable_node(stable_node); } @@ -712,6
Re: [PATCH 8/11] ksm: make !merge_across_nodes migration safe
On Sun, 27 Jan 2013, Simon Jeons wrote: On Fri, 2013-01-25 at 18:05 -0800, Hugh Dickins wrote: @@ -1344,10 +1401,29 @@ static void cmp_and_merge_page(struct pa unsigned int checksum; int err; - remove_rmap_item_from_tree(rmap_item); + stable_node = page_stable_node(page); + if (stable_node) { + if (stable_node-head != migrate_nodes + get_kpfn_nid(stable_node-kpfn) != NUMA(stable_node-nid)) { + rb_erase(stable_node-node, +root_stable_tree[NUMA(stable_node-nid)]); + stable_node-head = migrate_nodes; + list_add(stable_node-list, stable_node-head); Why list add stable_node-list to stable_node-head? stable_node-head is used for queue what? Read that as list_add(stable_node-list, migrate_nodes) if you prefer. stable_node-head (overlaying stable_node-node.__rb_parent_color, which would never point to migrate_nodes as an rb_node) migrate_nodes is used as magic to show that that rb_node is currently saved on this list, rather than linked into the stable tree itself. We could do some #define MIGRATE_NODES_MAGIC 0xwhatever and put that in head instead. @@ -1464,6 +1540,27 @@ static struct rmap_item *scan_get_next_r */ lru_add_drain_all(); + /* +* Whereas stale stable_nodes on the stable_tree itself +* get pruned in the regular course of stable_tree_search(), Which kinds of stable_nodes can be treated as stale? I just see remove rmap_item in stable_tree_search() and scan_get_next_rmap_item(). See get_ksm_page(). -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 8/11] ksm: make !merge_across_nodes migration safe
On Fri, 2013-01-25 at 18:05 -0800, Hugh Dickins wrote: The new KSM NUMA merge_across_nodes knob introduces a problem, when it's set to non-default 0: if a KSM page is migrated to a different NUMA node, how do we migrate its stable node to the right tree? And what if that collides with an existing stable node? ksm_migrate_page() can do no more than it's already doing, updating stable_node-kpfn: the stable tree itself cannot be manipulated without holding ksm_thread_mutex. So accept that a stable tree may temporarily indicate a page belonging to the wrong NUMA node, leave updating until the next pass of ksmd, just be careful not to merge other pages on to a How you not to merge other pages on to a misplaced page? I don't see it. misplaced page. Note nid of holding tree in stable_node, and recognize that it will not always match nid of kpfn. A misplaced KSM page is discovered, either when ksm_do_scan() next comes around to one of its rmap_items (we now have to go to cmp_and_merge_page even on pages in a stable tree), or when stable_tree_search() arrives at a matching node for another page, and this node page is found misplaced. In each case, move the misplaced stable_node to a list of migrate_nodes (and use the address of migrate_nodes as magic by which to identify them): we don't need them in a tree. If stable_tree_search() finds no match for a page, but it's currently exiled to this list, then slot its stable_node right there into the tree, bringing all of its mappings with it; otherwise they get migrated one by one to the original page of the colliding node. stable_tree_search() is now modelled more like stable_tree_insert(), in order to handle these insertions of migrated nodes. When node will be removed from migrate_nodes list and insert to stable tree? remove_node_from_stable_tree(), remove_all_stable_nodes() and ksm_check_stable_tree() have to handle the migrate_nodes list as well as the stable tree itself. Less obviously, we do need to prune the list of stale entries from time to time (scan_get_next_rmap_item() does it once each full scan): whereas stale nodes in the stable tree get naturally pruned as searches try to brush past them, these migrate_nodes may get forgotten and accumulate. Hard to understand this description. Could you explain it? :) Signed-off-by: Hugh Dickins hu...@google.com What will happen if page node of an unstable tree migrate to a new numa node? Also need to handle colliding? --- mm/ksm.c | 164 +++-- 1 file changed, 134 insertions(+), 30 deletions(-) --- mmotm.orig/mm/ksm.c 2013-01-25 14:37:03.832206218 -0800 +++ mmotm/mm/ksm.c2013-01-25 14:37:06.880206290 -0800 @@ -122,13 +122,25 @@ struct ksm_scan { /** * struct stable_node - node of the stable rbtree * @node: rb node of this ksm page in the stable tree + * @head: (overlaying parent) migrate_nodes indicates temporarily on that list + * @list: linked into migrate_nodes, pending placement in the proper node tree * @hlist: hlist head of rmap_items using this ksm page - * @kpfn: page frame number of this ksm page + * @kpfn: page frame number of this ksm page (perhaps temporarily on wrong nid) + * @nid: NUMA node id of stable tree in which linked (may not match kpfn) */ struct stable_node { - struct rb_node node; + union { + struct rb_node node;/* when node of stable tree */ + struct {/* when listed for migration */ + struct list_head *head; + struct list_head list; + }; + }; struct hlist_head hlist; unsigned long kpfn; +#ifdef CONFIG_NUMA + int nid; +#endif }; /** @@ -169,6 +181,9 @@ struct rmap_item { static struct rb_root root_unstable_tree[MAX_NUMNODES]; static struct rb_root root_stable_tree[MAX_NUMNODES]; +/* Recently migrated nodes of stable tree, pending proper placement */ +static LIST_HEAD(migrate_nodes); + #define MM_SLOTS_HASH_BITS 10 static DEFINE_HASHTABLE(mm_slots_hash, MM_SLOTS_HASH_BITS); @@ -311,11 +326,6 @@ static void insert_to_mm_slots_hash(stru hash_add(mm_slots_hash, mm_slot-link, (unsigned long)mm); } -static inline int in_stable_tree(struct rmap_item *rmap_item) -{ - return rmap_item-address STABLE_FLAG; -} - /* * ksmd, and unmerge_and_remove_all_rmap_items(), must not touch an mm's * page tables after it has passed through ksm_exit() - which, if necessary, @@ -476,7 +486,6 @@ static void remove_node_from_stable_tree { struct rmap_item *rmap_item; struct hlist_node *hlist; - int nid; hlist_for_each_entry(rmap_item, hlist, stable_node-hlist, hlist) { if (rmap_item-hlist.next) @@ -488,8 +497,11 @@ static void remove_node_from_stable_tree cond_resched(); } - nid =
[PATCH 8/11] ksm: make !merge_across_nodes migration safe
The new KSM NUMA merge_across_nodes knob introduces a problem, when it's set to non-default 0: if a KSM page is migrated to a different NUMA node, how do we migrate its stable node to the right tree? And what if that collides with an existing stable node? ksm_migrate_page() can do no more than it's already doing, updating stable_node->kpfn: the stable tree itself cannot be manipulated without holding ksm_thread_mutex. So accept that a stable tree may temporarily indicate a page belonging to the wrong NUMA node, leave updating until the next pass of ksmd, just be careful not to merge other pages on to a misplaced page. Note nid of holding tree in stable_node, and recognize that it will not always match nid of kpfn. A misplaced KSM page is discovered, either when ksm_do_scan() next comes around to one of its rmap_items (we now have to go to cmp_and_merge_page even on pages in a stable tree), or when stable_tree_search() arrives at a matching node for another page, and this node page is found misplaced. In each case, move the misplaced stable_node to a list of migrate_nodes (and use the address of migrate_nodes as magic by which to identify them): we don't need them in a tree. If stable_tree_search() finds no match for a page, but it's currently exiled to this list, then slot its stable_node right there into the tree, bringing all of its mappings with it; otherwise they get migrated one by one to the original page of the colliding node. stable_tree_search() is now modelled more like stable_tree_insert(), in order to handle these insertions of migrated nodes. remove_node_from_stable_tree(), remove_all_stable_nodes() and ksm_check_stable_tree() have to handle the migrate_nodes list as well as the stable tree itself. Less obviously, we do need to prune the list of stale entries from time to time (scan_get_next_rmap_item() does it once each full scan): whereas stale nodes in the stable tree get naturally pruned as searches try to brush past them, these migrate_nodes may get forgotten and accumulate. Signed-off-by: Hugh Dickins --- mm/ksm.c | 164 +++-- 1 file changed, 134 insertions(+), 30 deletions(-) --- mmotm.orig/mm/ksm.c 2013-01-25 14:37:03.832206218 -0800 +++ mmotm/mm/ksm.c 2013-01-25 14:37:06.880206290 -0800 @@ -122,13 +122,25 @@ struct ksm_scan { /** * struct stable_node - node of the stable rbtree * @node: rb node of this ksm page in the stable tree + * @head: (overlaying parent) _nodes indicates temporarily on that list + * @list: linked into migrate_nodes, pending placement in the proper node tree * @hlist: hlist head of rmap_items using this ksm page - * @kpfn: page frame number of this ksm page + * @kpfn: page frame number of this ksm page (perhaps temporarily on wrong nid) + * @nid: NUMA node id of stable tree in which linked (may not match kpfn) */ struct stable_node { - struct rb_node node; + union { + struct rb_node node;/* when node of stable tree */ + struct {/* when listed for migration */ + struct list_head *head; + struct list_head list; + }; + }; struct hlist_head hlist; unsigned long kpfn; +#ifdef CONFIG_NUMA + int nid; +#endif }; /** @@ -169,6 +181,9 @@ struct rmap_item { static struct rb_root root_unstable_tree[MAX_NUMNODES]; static struct rb_root root_stable_tree[MAX_NUMNODES]; +/* Recently migrated nodes of stable tree, pending proper placement */ +static LIST_HEAD(migrate_nodes); + #define MM_SLOTS_HASH_BITS 10 static DEFINE_HASHTABLE(mm_slots_hash, MM_SLOTS_HASH_BITS); @@ -311,11 +326,6 @@ static void insert_to_mm_slots_hash(stru hash_add(mm_slots_hash, _slot->link, (unsigned long)mm); } -static inline int in_stable_tree(struct rmap_item *rmap_item) -{ - return rmap_item->address & STABLE_FLAG; -} - /* * ksmd, and unmerge_and_remove_all_rmap_items(), must not touch an mm's * page tables after it has passed through ksm_exit() - which, if necessary, @@ -476,7 +486,6 @@ static void remove_node_from_stable_tree { struct rmap_item *rmap_item; struct hlist_node *hlist; - int nid; hlist_for_each_entry(rmap_item, hlist, _node->hlist, hlist) { if (rmap_item->hlist.next) @@ -488,8 +497,11 @@ static void remove_node_from_stable_tree cond_resched(); } - nid = get_kpfn_nid(stable_node->kpfn); - rb_erase(_node->node, _stable_tree[nid]); + if (stable_node->head == _nodes) + list_del(_node->list); + else + rb_erase(_node->node, +_stable_tree[NUMA(stable_node->nid)]); free_stable_node(stable_node); } @@ -712,6 +724,7 @@ static int remove_stable_node(struct sta static int remove_all_stable_nodes(void) { struct stable_node *stable_node; + struct list_head *this, *next;
[PATCH 8/11] ksm: make !merge_across_nodes migration safe
The new KSM NUMA merge_across_nodes knob introduces a problem, when it's set to non-default 0: if a KSM page is migrated to a different NUMA node, how do we migrate its stable node to the right tree? And what if that collides with an existing stable node? ksm_migrate_page() can do no more than it's already doing, updating stable_node-kpfn: the stable tree itself cannot be manipulated without holding ksm_thread_mutex. So accept that a stable tree may temporarily indicate a page belonging to the wrong NUMA node, leave updating until the next pass of ksmd, just be careful not to merge other pages on to a misplaced page. Note nid of holding tree in stable_node, and recognize that it will not always match nid of kpfn. A misplaced KSM page is discovered, either when ksm_do_scan() next comes around to one of its rmap_items (we now have to go to cmp_and_merge_page even on pages in a stable tree), or when stable_tree_search() arrives at a matching node for another page, and this node page is found misplaced. In each case, move the misplaced stable_node to a list of migrate_nodes (and use the address of migrate_nodes as magic by which to identify them): we don't need them in a tree. If stable_tree_search() finds no match for a page, but it's currently exiled to this list, then slot its stable_node right there into the tree, bringing all of its mappings with it; otherwise they get migrated one by one to the original page of the colliding node. stable_tree_search() is now modelled more like stable_tree_insert(), in order to handle these insertions of migrated nodes. remove_node_from_stable_tree(), remove_all_stable_nodes() and ksm_check_stable_tree() have to handle the migrate_nodes list as well as the stable tree itself. Less obviously, we do need to prune the list of stale entries from time to time (scan_get_next_rmap_item() does it once each full scan): whereas stale nodes in the stable tree get naturally pruned as searches try to brush past them, these migrate_nodes may get forgotten and accumulate. Signed-off-by: Hugh Dickins hu...@google.com --- mm/ksm.c | 164 +++-- 1 file changed, 134 insertions(+), 30 deletions(-) --- mmotm.orig/mm/ksm.c 2013-01-25 14:37:03.832206218 -0800 +++ mmotm/mm/ksm.c 2013-01-25 14:37:06.880206290 -0800 @@ -122,13 +122,25 @@ struct ksm_scan { /** * struct stable_node - node of the stable rbtree * @node: rb node of this ksm page in the stable tree + * @head: (overlaying parent) migrate_nodes indicates temporarily on that list + * @list: linked into migrate_nodes, pending placement in the proper node tree * @hlist: hlist head of rmap_items using this ksm page - * @kpfn: page frame number of this ksm page + * @kpfn: page frame number of this ksm page (perhaps temporarily on wrong nid) + * @nid: NUMA node id of stable tree in which linked (may not match kpfn) */ struct stable_node { - struct rb_node node; + union { + struct rb_node node;/* when node of stable tree */ + struct {/* when listed for migration */ + struct list_head *head; + struct list_head list; + }; + }; struct hlist_head hlist; unsigned long kpfn; +#ifdef CONFIG_NUMA + int nid; +#endif }; /** @@ -169,6 +181,9 @@ struct rmap_item { static struct rb_root root_unstable_tree[MAX_NUMNODES]; static struct rb_root root_stable_tree[MAX_NUMNODES]; +/* Recently migrated nodes of stable tree, pending proper placement */ +static LIST_HEAD(migrate_nodes); + #define MM_SLOTS_HASH_BITS 10 static DEFINE_HASHTABLE(mm_slots_hash, MM_SLOTS_HASH_BITS); @@ -311,11 +326,6 @@ static void insert_to_mm_slots_hash(stru hash_add(mm_slots_hash, mm_slot-link, (unsigned long)mm); } -static inline int in_stable_tree(struct rmap_item *rmap_item) -{ - return rmap_item-address STABLE_FLAG; -} - /* * ksmd, and unmerge_and_remove_all_rmap_items(), must not touch an mm's * page tables after it has passed through ksm_exit() - which, if necessary, @@ -476,7 +486,6 @@ static void remove_node_from_stable_tree { struct rmap_item *rmap_item; struct hlist_node *hlist; - int nid; hlist_for_each_entry(rmap_item, hlist, stable_node-hlist, hlist) { if (rmap_item-hlist.next) @@ -488,8 +497,11 @@ static void remove_node_from_stable_tree cond_resched(); } - nid = get_kpfn_nid(stable_node-kpfn); - rb_erase(stable_node-node, root_stable_tree[nid]); + if (stable_node-head == migrate_nodes) + list_del(stable_node-list); + else + rb_erase(stable_node-node, +root_stable_tree[NUMA(stable_node-nid)]); free_stable_node(stable_node); } @@ -712,6 +724,7 @@ static int remove_stable_node(struct sta static int remove_all_stable_nodes(void) { struct stable_node