Re: [PATCH 8/11] ksm: make !merge_across_nodes migration safe

2013-01-27 Thread Simon Jeons
On Fri, 2013-01-25 at 18:05 -0800, Hugh Dickins wrote:
> The new KSM NUMA merge_across_nodes knob introduces a problem, when it's
> set to non-default 0: if a KSM page is migrated to a different NUMA node,
> how do we migrate its stable node to the right tree?  And what if that
> collides with an existing stable node?
> 
> ksm_migrate_page() can do no more than it's already doing, updating
> stable_node->kpfn: the stable tree itself cannot be manipulated without
> holding ksm_thread_mutex.  So accept that a stable tree may temporarily
> indicate a page belonging to the wrong NUMA node, leave updating until
> the next pass of ksmd, just be careful not to merge other pages on to a

How you not to merge other pages on to a misplaced page? I don't see it.

> misplaced page.  Note nid of holding tree in stable_node, and recognize
> that it will not always match nid of kpfn.
> 
> A misplaced KSM page is discovered, either when ksm_do_scan() next comes
> around to one of its rmap_items (we now have to go to cmp_and_merge_page
> even on pages in a stable tree), or when stable_tree_search() arrives at
> a matching node for another page, and this node page is found misplaced.
> 
> In each case, move the misplaced stable_node to a list of migrate_nodes
> (and use the address of migrate_nodes as magic by which to identify them):
> we don't need them in a tree.  If stable_tree_search() finds no match for
> a page, but it's currently exiled to this list, then slot its stable_node
> right there into the tree, bringing all of its mappings with it; otherwise
> they get migrated one by one to the original page of the colliding node.
> stable_tree_search() is now modelled more like stable_tree_insert(),
> in order to handle these insertions of migrated nodes.

When node will be removed from migrate_nodes list and insert to stable
tree?

> 
> remove_node_from_stable_tree(), remove_all_stable_nodes() and
> ksm_check_stable_tree() have to handle the migrate_nodes list as well as
> the stable tree itself.  Less obviously, we do need to prune the list of
> stale entries from time to time (scan_get_next_rmap_item() does it once
> each full scan):

>  whereas stale nodes in the stable tree get naturally
> pruned as searches try to brush past them, these migrate_nodes may get
> forgotten and accumulate.

Hard to understand this description. Could you explain it? :)

> Signed-off-by: Hugh Dickins 

What will happen if page node of an unstable tree migrate to a new numa
node? Also need to handle colliding? 

> ---
>  mm/ksm.c |  164 +++--
>  1 file changed, 134 insertions(+), 30 deletions(-)
> 
> --- mmotm.orig/mm/ksm.c   2013-01-25 14:37:03.832206218 -0800
> +++ mmotm/mm/ksm.c2013-01-25 14:37:06.880206290 -0800
> @@ -122,13 +122,25 @@ struct ksm_scan {
>  /**
>   * struct stable_node - node of the stable rbtree
>   * @node: rb node of this ksm page in the stable tree
> + * @head: (overlaying parent) _nodes indicates temporarily on that 
> list
> + * @list: linked into migrate_nodes, pending placement in the proper node 
> tree
>   * @hlist: hlist head of rmap_items using this ksm page
> - * @kpfn: page frame number of this ksm page
> + * @kpfn: page frame number of this ksm page (perhaps temporarily on wrong 
> nid)
> + * @nid: NUMA node id of stable tree in which linked (may not match kpfn)
>   */
>  struct stable_node {
> - struct rb_node node;
> + union {
> + struct rb_node node;/* when node of stable tree */
> + struct {/* when listed for migration */
> + struct list_head *head;
> + struct list_head list;
> + };
> + };
>   struct hlist_head hlist;
>   unsigned long kpfn;
> +#ifdef CONFIG_NUMA
> + int nid;
> +#endif
>  };
>  
>  /**
> @@ -169,6 +181,9 @@ struct rmap_item {
>  static struct rb_root root_unstable_tree[MAX_NUMNODES];
>  static struct rb_root root_stable_tree[MAX_NUMNODES];
>  
> +/* Recently migrated nodes of stable tree, pending proper placement */
> +static LIST_HEAD(migrate_nodes);
> +
>  #define MM_SLOTS_HASH_BITS 10
>  static DEFINE_HASHTABLE(mm_slots_hash, MM_SLOTS_HASH_BITS);
>  
> @@ -311,11 +326,6 @@ static void insert_to_mm_slots_hash(stru
>   hash_add(mm_slots_hash, _slot->link, (unsigned long)mm);
>  }
>  
> -static inline int in_stable_tree(struct rmap_item *rmap_item)
> -{
> - return rmap_item->address & STABLE_FLAG;
> -}
> -
>  /*
>   * ksmd, and unmerge_and_remove_all_rmap_items(), must not touch an mm's
>   * page tables after it has passed through ksm_exit() - which, if necessary,
> @@ -476,7 +486,6 @@ static void remove_node_from_stable_tree
>  {
>   struct rmap_item *rmap_item;
>   struct hlist_node *hlist;
> - int nid;
>  
>   hlist_for_each_entry(rmap_item, hlist, _node->hlist, hlist) {
>   if (rmap_item->hlist.next)
> @@ -488,8 +497,11 @@ static void remove_node_from_stable_tree
>  

Re: [PATCH 8/11] ksm: make !merge_across_nodes migration safe

2013-01-27 Thread Hugh Dickins
On Sun, 27 Jan 2013, Simon Jeons wrote:
> On Fri, 2013-01-25 at 18:05 -0800, Hugh Dickins wrote:
> > @@ -1344,10 +1401,29 @@ static void cmp_and_merge_page(struct pa
> > unsigned int checksum;
> > int err;
> >  
> > -   remove_rmap_item_from_tree(rmap_item);
> > +   stable_node = page_stable_node(page);
> > +   if (stable_node) {
> > +   if (stable_node->head != _nodes &&
> > +   get_kpfn_nid(stable_node->kpfn) != NUMA(stable_node->nid)) {
> > +   rb_erase(_node->node,
> > +_stable_tree[NUMA(stable_node->nid)]);
> > +   stable_node->head = _nodes;
> > +   list_add(_node->list, stable_node->head);
> 
> Why list add _node->list to stable_node->head? stable_node->head
> is used for queue what?

Read that as list_add(_node->list, _nodes) if you prefer.
stable_node->head (overlaying stable_node->node.__rb_parent_color, which
would never point to migrate_nodes as an rb_node) _nodes is used
as "magic" to show that that rb_node is currently saved on this list,
rather than linked into the stable tree itself.  We could do some
#define MIGRATE_NODES_MAGIC 0xwhatever and put that in head instead.

> > @@ -1464,6 +1540,27 @@ static struct rmap_item *scan_get_next_r
> >  */
> > lru_add_drain_all();
> >  
> > +   /*
> > +* Whereas stale stable_nodes on the stable_tree itself
> > +* get pruned in the regular course of stable_tree_search(),
> 
> Which kinds of stable_nodes can be treated as stale? I just see remove
> rmap_item in stable_tree_search() and scan_get_next_rmap_item().

See get_ksm_page().
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 8/11] ksm: make !merge_across_nodes migration safe

2013-01-27 Thread Simon Jeons
On Fri, 2013-01-25 at 18:05 -0800, Hugh Dickins wrote:
> The new KSM NUMA merge_across_nodes knob introduces a problem, when it's
> set to non-default 0: if a KSM page is migrated to a different NUMA node,
> how do we migrate its stable node to the right tree?  And what if that
> collides with an existing stable node?
> 
> ksm_migrate_page() can do no more than it's already doing, updating
> stable_node->kpfn: the stable tree itself cannot be manipulated without
> holding ksm_thread_mutex.  So accept that a stable tree may temporarily
> indicate a page belonging to the wrong NUMA node, leave updating until
> the next pass of ksmd, just be careful not to merge other pages on to a
> misplaced page.  Note nid of holding tree in stable_node, and recognize
> that it will not always match nid of kpfn.
> 
> A misplaced KSM page is discovered, either when ksm_do_scan() next comes
> around to one of its rmap_items (we now have to go to cmp_and_merge_page
> even on pages in a stable tree), or when stable_tree_search() arrives at
> a matching node for another page, and this node page is found misplaced.
> 
> In each case, move the misplaced stable_node to a list of migrate_nodes
> (and use the address of migrate_nodes as magic by which to identify them):
> we don't need them in a tree.  If stable_tree_search() finds no match for
> a page, but it's currently exiled to this list, then slot its stable_node
> right there into the tree, bringing all of its mappings with it; otherwise
> they get migrated one by one to the original page of the colliding node.
> stable_tree_search() is now modelled more like stable_tree_insert(),
> in order to handle these insertions of migrated nodes.
> 
> remove_node_from_stable_tree(), remove_all_stable_nodes() and
> ksm_check_stable_tree() have to handle the migrate_nodes list as well as
> the stable tree itself.  Less obviously, we do need to prune the list of
> stale entries from time to time (scan_get_next_rmap_item() does it once
> each full scan): whereas stale nodes in the stable tree get naturally
> pruned as searches try to brush past them, these migrate_nodes may get
> forgotten and accumulate.
> 
> Signed-off-by: Hugh Dickins 
> ---
>  mm/ksm.c |  164 +++--
>  1 file changed, 134 insertions(+), 30 deletions(-)
> 
> --- mmotm.orig/mm/ksm.c   2013-01-25 14:37:03.832206218 -0800
> +++ mmotm/mm/ksm.c2013-01-25 14:37:06.880206290 -0800
> @@ -122,13 +122,25 @@ struct ksm_scan {
>  /**
>   * struct stable_node - node of the stable rbtree
>   * @node: rb node of this ksm page in the stable tree
> + * @head: (overlaying parent) _nodes indicates temporarily on that 
> list
> + * @list: linked into migrate_nodes, pending placement in the proper node 
> tree
>   * @hlist: hlist head of rmap_items using this ksm page
> - * @kpfn: page frame number of this ksm page
> + * @kpfn: page frame number of this ksm page (perhaps temporarily on wrong 
> nid)
> + * @nid: NUMA node id of stable tree in which linked (may not match kpfn)
>   */
>  struct stable_node {
> - struct rb_node node;
> + union {
> + struct rb_node node;/* when node of stable tree */
> + struct {/* when listed for migration */
> + struct list_head *head;
> + struct list_head list;
> + };
> + };
>   struct hlist_head hlist;
>   unsigned long kpfn;
> +#ifdef CONFIG_NUMA
> + int nid;
> +#endif
>  };
>  
>  /**
> @@ -169,6 +181,9 @@ struct rmap_item {
>  static struct rb_root root_unstable_tree[MAX_NUMNODES];
>  static struct rb_root root_stable_tree[MAX_NUMNODES];
>  
> +/* Recently migrated nodes of stable tree, pending proper placement */
> +static LIST_HEAD(migrate_nodes);
> +
>  #define MM_SLOTS_HASH_BITS 10
>  static DEFINE_HASHTABLE(mm_slots_hash, MM_SLOTS_HASH_BITS);
>  
> @@ -311,11 +326,6 @@ static void insert_to_mm_slots_hash(stru
>   hash_add(mm_slots_hash, _slot->link, (unsigned long)mm);
>  }
>  
> -static inline int in_stable_tree(struct rmap_item *rmap_item)
> -{
> - return rmap_item->address & STABLE_FLAG;
> -}
> -
>  /*
>   * ksmd, and unmerge_and_remove_all_rmap_items(), must not touch an mm's
>   * page tables after it has passed through ksm_exit() - which, if necessary,
> @@ -476,7 +486,6 @@ static void remove_node_from_stable_tree
>  {
>   struct rmap_item *rmap_item;
>   struct hlist_node *hlist;
> - int nid;
>  
>   hlist_for_each_entry(rmap_item, hlist, _node->hlist, hlist) {
>   if (rmap_item->hlist.next)
> @@ -488,8 +497,11 @@ static void remove_node_from_stable_tree
>   cond_resched();
>   }
>  
> - nid = get_kpfn_nid(stable_node->kpfn);
> - rb_erase(_node->node, _stable_tree[nid]);
> + if (stable_node->head == _nodes)
> + list_del(_node->list);
> + else
> + rb_erase(_node->node,
> +  _stable_tree[NUMA(stable_node->nid)]);

Re: [PATCH 8/11] ksm: make !merge_across_nodes migration safe

2013-01-27 Thread Simon Jeons
On Fri, 2013-01-25 at 18:05 -0800, Hugh Dickins wrote:
 The new KSM NUMA merge_across_nodes knob introduces a problem, when it's
 set to non-default 0: if a KSM page is migrated to a different NUMA node,
 how do we migrate its stable node to the right tree?  And what if that
 collides with an existing stable node?
 
 ksm_migrate_page() can do no more than it's already doing, updating
 stable_node-kpfn: the stable tree itself cannot be manipulated without
 holding ksm_thread_mutex.  So accept that a stable tree may temporarily
 indicate a page belonging to the wrong NUMA node, leave updating until
 the next pass of ksmd, just be careful not to merge other pages on to a
 misplaced page.  Note nid of holding tree in stable_node, and recognize
 that it will not always match nid of kpfn.
 
 A misplaced KSM page is discovered, either when ksm_do_scan() next comes
 around to one of its rmap_items (we now have to go to cmp_and_merge_page
 even on pages in a stable tree), or when stable_tree_search() arrives at
 a matching node for another page, and this node page is found misplaced.
 
 In each case, move the misplaced stable_node to a list of migrate_nodes
 (and use the address of migrate_nodes as magic by which to identify them):
 we don't need them in a tree.  If stable_tree_search() finds no match for
 a page, but it's currently exiled to this list, then slot its stable_node
 right there into the tree, bringing all of its mappings with it; otherwise
 they get migrated one by one to the original page of the colliding node.
 stable_tree_search() is now modelled more like stable_tree_insert(),
 in order to handle these insertions of migrated nodes.
 
 remove_node_from_stable_tree(), remove_all_stable_nodes() and
 ksm_check_stable_tree() have to handle the migrate_nodes list as well as
 the stable tree itself.  Less obviously, we do need to prune the list of
 stale entries from time to time (scan_get_next_rmap_item() does it once
 each full scan): whereas stale nodes in the stable tree get naturally
 pruned as searches try to brush past them, these migrate_nodes may get
 forgotten and accumulate.
 
 Signed-off-by: Hugh Dickins hu...@google.com
 ---
  mm/ksm.c |  164 +++--
  1 file changed, 134 insertions(+), 30 deletions(-)
 
 --- mmotm.orig/mm/ksm.c   2013-01-25 14:37:03.832206218 -0800
 +++ mmotm/mm/ksm.c2013-01-25 14:37:06.880206290 -0800
 @@ -122,13 +122,25 @@ struct ksm_scan {
  /**
   * struct stable_node - node of the stable rbtree
   * @node: rb node of this ksm page in the stable tree
 + * @head: (overlaying parent) migrate_nodes indicates temporarily on that 
 list
 + * @list: linked into migrate_nodes, pending placement in the proper node 
 tree
   * @hlist: hlist head of rmap_items using this ksm page
 - * @kpfn: page frame number of this ksm page
 + * @kpfn: page frame number of this ksm page (perhaps temporarily on wrong 
 nid)
 + * @nid: NUMA node id of stable tree in which linked (may not match kpfn)
   */
  struct stable_node {
 - struct rb_node node;
 + union {
 + struct rb_node node;/* when node of stable tree */
 + struct {/* when listed for migration */
 + struct list_head *head;
 + struct list_head list;
 + };
 + };
   struct hlist_head hlist;
   unsigned long kpfn;
 +#ifdef CONFIG_NUMA
 + int nid;
 +#endif
  };
  
  /**
 @@ -169,6 +181,9 @@ struct rmap_item {
  static struct rb_root root_unstable_tree[MAX_NUMNODES];
  static struct rb_root root_stable_tree[MAX_NUMNODES];
  
 +/* Recently migrated nodes of stable tree, pending proper placement */
 +static LIST_HEAD(migrate_nodes);
 +
  #define MM_SLOTS_HASH_BITS 10
  static DEFINE_HASHTABLE(mm_slots_hash, MM_SLOTS_HASH_BITS);
  
 @@ -311,11 +326,6 @@ static void insert_to_mm_slots_hash(stru
   hash_add(mm_slots_hash, mm_slot-link, (unsigned long)mm);
  }
  
 -static inline int in_stable_tree(struct rmap_item *rmap_item)
 -{
 - return rmap_item-address  STABLE_FLAG;
 -}
 -
  /*
   * ksmd, and unmerge_and_remove_all_rmap_items(), must not touch an mm's
   * page tables after it has passed through ksm_exit() - which, if necessary,
 @@ -476,7 +486,6 @@ static void remove_node_from_stable_tree
  {
   struct rmap_item *rmap_item;
   struct hlist_node *hlist;
 - int nid;
  
   hlist_for_each_entry(rmap_item, hlist, stable_node-hlist, hlist) {
   if (rmap_item-hlist.next)
 @@ -488,8 +497,11 @@ static void remove_node_from_stable_tree
   cond_resched();
   }
  
 - nid = get_kpfn_nid(stable_node-kpfn);
 - rb_erase(stable_node-node, root_stable_tree[nid]);
 + if (stable_node-head == migrate_nodes)
 + list_del(stable_node-list);
 + else
 + rb_erase(stable_node-node,
 +  root_stable_tree[NUMA(stable_node-nid)]);
   free_stable_node(stable_node);
  }
  
 @@ -712,6 

Re: [PATCH 8/11] ksm: make !merge_across_nodes migration safe

2013-01-27 Thread Hugh Dickins
On Sun, 27 Jan 2013, Simon Jeons wrote:
 On Fri, 2013-01-25 at 18:05 -0800, Hugh Dickins wrote:
  @@ -1344,10 +1401,29 @@ static void cmp_and_merge_page(struct pa
  unsigned int checksum;
  int err;
   
  -   remove_rmap_item_from_tree(rmap_item);
  +   stable_node = page_stable_node(page);
  +   if (stable_node) {
  +   if (stable_node-head != migrate_nodes 
  +   get_kpfn_nid(stable_node-kpfn) != NUMA(stable_node-nid)) {
  +   rb_erase(stable_node-node,
  +root_stable_tree[NUMA(stable_node-nid)]);
  +   stable_node-head = migrate_nodes;
  +   list_add(stable_node-list, stable_node-head);
 
 Why list add stable_node-list to stable_node-head? stable_node-head
 is used for queue what?

Read that as list_add(stable_node-list, migrate_nodes) if you prefer.
stable_node-head (overlaying stable_node-node.__rb_parent_color, which
would never point to migrate_nodes as an rb_node) migrate_nodes is used
as magic to show that that rb_node is currently saved on this list,
rather than linked into the stable tree itself.  We could do some
#define MIGRATE_NODES_MAGIC 0xwhatever and put that in head instead.

  @@ -1464,6 +1540,27 @@ static struct rmap_item *scan_get_next_r
   */
  lru_add_drain_all();
   
  +   /*
  +* Whereas stale stable_nodes on the stable_tree itself
  +* get pruned in the regular course of stable_tree_search(),
 
 Which kinds of stable_nodes can be treated as stale? I just see remove
 rmap_item in stable_tree_search() and scan_get_next_rmap_item().

See get_ksm_page().
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 8/11] ksm: make !merge_across_nodes migration safe

2013-01-27 Thread Simon Jeons
On Fri, 2013-01-25 at 18:05 -0800, Hugh Dickins wrote:
 The new KSM NUMA merge_across_nodes knob introduces a problem, when it's
 set to non-default 0: if a KSM page is migrated to a different NUMA node,
 how do we migrate its stable node to the right tree?  And what if that
 collides with an existing stable node?
 
 ksm_migrate_page() can do no more than it's already doing, updating
 stable_node-kpfn: the stable tree itself cannot be manipulated without
 holding ksm_thread_mutex.  So accept that a stable tree may temporarily
 indicate a page belonging to the wrong NUMA node, leave updating until
 the next pass of ksmd, just be careful not to merge other pages on to a

How you not to merge other pages on to a misplaced page? I don't see it.

 misplaced page.  Note nid of holding tree in stable_node, and recognize
 that it will not always match nid of kpfn.
 
 A misplaced KSM page is discovered, either when ksm_do_scan() next comes
 around to one of its rmap_items (we now have to go to cmp_and_merge_page
 even on pages in a stable tree), or when stable_tree_search() arrives at
 a matching node for another page, and this node page is found misplaced.
 
 In each case, move the misplaced stable_node to a list of migrate_nodes
 (and use the address of migrate_nodes as magic by which to identify them):
 we don't need them in a tree.  If stable_tree_search() finds no match for
 a page, but it's currently exiled to this list, then slot its stable_node
 right there into the tree, bringing all of its mappings with it; otherwise
 they get migrated one by one to the original page of the colliding node.
 stable_tree_search() is now modelled more like stable_tree_insert(),
 in order to handle these insertions of migrated nodes.

When node will be removed from migrate_nodes list and insert to stable
tree?

 
 remove_node_from_stable_tree(), remove_all_stable_nodes() and
 ksm_check_stable_tree() have to handle the migrate_nodes list as well as
 the stable tree itself.  Less obviously, we do need to prune the list of
 stale entries from time to time (scan_get_next_rmap_item() does it once
 each full scan):

  whereas stale nodes in the stable tree get naturally
 pruned as searches try to brush past them, these migrate_nodes may get
 forgotten and accumulate.

Hard to understand this description. Could you explain it? :)

 Signed-off-by: Hugh Dickins hu...@google.com

What will happen if page node of an unstable tree migrate to a new numa
node? Also need to handle colliding? 

 ---
  mm/ksm.c |  164 +++--
  1 file changed, 134 insertions(+), 30 deletions(-)
 
 --- mmotm.orig/mm/ksm.c   2013-01-25 14:37:03.832206218 -0800
 +++ mmotm/mm/ksm.c2013-01-25 14:37:06.880206290 -0800
 @@ -122,13 +122,25 @@ struct ksm_scan {
  /**
   * struct stable_node - node of the stable rbtree
   * @node: rb node of this ksm page in the stable tree
 + * @head: (overlaying parent) migrate_nodes indicates temporarily on that 
 list
 + * @list: linked into migrate_nodes, pending placement in the proper node 
 tree
   * @hlist: hlist head of rmap_items using this ksm page
 - * @kpfn: page frame number of this ksm page
 + * @kpfn: page frame number of this ksm page (perhaps temporarily on wrong 
 nid)
 + * @nid: NUMA node id of stable tree in which linked (may not match kpfn)
   */
  struct stable_node {
 - struct rb_node node;
 + union {
 + struct rb_node node;/* when node of stable tree */
 + struct {/* when listed for migration */
 + struct list_head *head;
 + struct list_head list;
 + };
 + };
   struct hlist_head hlist;
   unsigned long kpfn;
 +#ifdef CONFIG_NUMA
 + int nid;
 +#endif
  };
  
  /**
 @@ -169,6 +181,9 @@ struct rmap_item {
  static struct rb_root root_unstable_tree[MAX_NUMNODES];
  static struct rb_root root_stable_tree[MAX_NUMNODES];
  
 +/* Recently migrated nodes of stable tree, pending proper placement */
 +static LIST_HEAD(migrate_nodes);
 +
  #define MM_SLOTS_HASH_BITS 10
  static DEFINE_HASHTABLE(mm_slots_hash, MM_SLOTS_HASH_BITS);
  
 @@ -311,11 +326,6 @@ static void insert_to_mm_slots_hash(stru
   hash_add(mm_slots_hash, mm_slot-link, (unsigned long)mm);
  }
  
 -static inline int in_stable_tree(struct rmap_item *rmap_item)
 -{
 - return rmap_item-address  STABLE_FLAG;
 -}
 -
  /*
   * ksmd, and unmerge_and_remove_all_rmap_items(), must not touch an mm's
   * page tables after it has passed through ksm_exit() - which, if necessary,
 @@ -476,7 +486,6 @@ static void remove_node_from_stable_tree
  {
   struct rmap_item *rmap_item;
   struct hlist_node *hlist;
 - int nid;
  
   hlist_for_each_entry(rmap_item, hlist, stable_node-hlist, hlist) {
   if (rmap_item-hlist.next)
 @@ -488,8 +497,11 @@ static void remove_node_from_stable_tree
   cond_resched();
   }
  
 - nid = 

[PATCH 8/11] ksm: make !merge_across_nodes migration safe

2013-01-25 Thread Hugh Dickins
The new KSM NUMA merge_across_nodes knob introduces a problem, when it's
set to non-default 0: if a KSM page is migrated to a different NUMA node,
how do we migrate its stable node to the right tree?  And what if that
collides with an existing stable node?

ksm_migrate_page() can do no more than it's already doing, updating
stable_node->kpfn: the stable tree itself cannot be manipulated without
holding ksm_thread_mutex.  So accept that a stable tree may temporarily
indicate a page belonging to the wrong NUMA node, leave updating until
the next pass of ksmd, just be careful not to merge other pages on to a
misplaced page.  Note nid of holding tree in stable_node, and recognize
that it will not always match nid of kpfn.

A misplaced KSM page is discovered, either when ksm_do_scan() next comes
around to one of its rmap_items (we now have to go to cmp_and_merge_page
even on pages in a stable tree), or when stable_tree_search() arrives at
a matching node for another page, and this node page is found misplaced.

In each case, move the misplaced stable_node to a list of migrate_nodes
(and use the address of migrate_nodes as magic by which to identify them):
we don't need them in a tree.  If stable_tree_search() finds no match for
a page, but it's currently exiled to this list, then slot its stable_node
right there into the tree, bringing all of its mappings with it; otherwise
they get migrated one by one to the original page of the colliding node.
stable_tree_search() is now modelled more like stable_tree_insert(),
in order to handle these insertions of migrated nodes.

remove_node_from_stable_tree(), remove_all_stable_nodes() and
ksm_check_stable_tree() have to handle the migrate_nodes list as well as
the stable tree itself.  Less obviously, we do need to prune the list of
stale entries from time to time (scan_get_next_rmap_item() does it once
each full scan): whereas stale nodes in the stable tree get naturally
pruned as searches try to brush past them, these migrate_nodes may get
forgotten and accumulate.

Signed-off-by: Hugh Dickins 
---
 mm/ksm.c |  164 +++--
 1 file changed, 134 insertions(+), 30 deletions(-)

--- mmotm.orig/mm/ksm.c 2013-01-25 14:37:03.832206218 -0800
+++ mmotm/mm/ksm.c  2013-01-25 14:37:06.880206290 -0800
@@ -122,13 +122,25 @@ struct ksm_scan {
 /**
  * struct stable_node - node of the stable rbtree
  * @node: rb node of this ksm page in the stable tree
+ * @head: (overlaying parent) _nodes indicates temporarily on that list
+ * @list: linked into migrate_nodes, pending placement in the proper node tree
  * @hlist: hlist head of rmap_items using this ksm page
- * @kpfn: page frame number of this ksm page
+ * @kpfn: page frame number of this ksm page (perhaps temporarily on wrong nid)
+ * @nid: NUMA node id of stable tree in which linked (may not match kpfn)
  */
 struct stable_node {
-   struct rb_node node;
+   union {
+   struct rb_node node;/* when node of stable tree */
+   struct {/* when listed for migration */
+   struct list_head *head;
+   struct list_head list;
+   };
+   };
struct hlist_head hlist;
unsigned long kpfn;
+#ifdef CONFIG_NUMA
+   int nid;
+#endif
 };
 
 /**
@@ -169,6 +181,9 @@ struct rmap_item {
 static struct rb_root root_unstable_tree[MAX_NUMNODES];
 static struct rb_root root_stable_tree[MAX_NUMNODES];
 
+/* Recently migrated nodes of stable tree, pending proper placement */
+static LIST_HEAD(migrate_nodes);
+
 #define MM_SLOTS_HASH_BITS 10
 static DEFINE_HASHTABLE(mm_slots_hash, MM_SLOTS_HASH_BITS);
 
@@ -311,11 +326,6 @@ static void insert_to_mm_slots_hash(stru
hash_add(mm_slots_hash, _slot->link, (unsigned long)mm);
 }
 
-static inline int in_stable_tree(struct rmap_item *rmap_item)
-{
-   return rmap_item->address & STABLE_FLAG;
-}
-
 /*
  * ksmd, and unmerge_and_remove_all_rmap_items(), must not touch an mm's
  * page tables after it has passed through ksm_exit() - which, if necessary,
@@ -476,7 +486,6 @@ static void remove_node_from_stable_tree
 {
struct rmap_item *rmap_item;
struct hlist_node *hlist;
-   int nid;
 
hlist_for_each_entry(rmap_item, hlist, _node->hlist, hlist) {
if (rmap_item->hlist.next)
@@ -488,8 +497,11 @@ static void remove_node_from_stable_tree
cond_resched();
}
 
-   nid = get_kpfn_nid(stable_node->kpfn);
-   rb_erase(_node->node, _stable_tree[nid]);
+   if (stable_node->head == _nodes)
+   list_del(_node->list);
+   else
+   rb_erase(_node->node,
+_stable_tree[NUMA(stable_node->nid)]);
free_stable_node(stable_node);
 }
 
@@ -712,6 +724,7 @@ static int remove_stable_node(struct sta
 static int remove_all_stable_nodes(void)
 {
struct stable_node *stable_node;
+   struct list_head *this, *next;
  

[PATCH 8/11] ksm: make !merge_across_nodes migration safe

2013-01-25 Thread Hugh Dickins
The new KSM NUMA merge_across_nodes knob introduces a problem, when it's
set to non-default 0: if a KSM page is migrated to a different NUMA node,
how do we migrate its stable node to the right tree?  And what if that
collides with an existing stable node?

ksm_migrate_page() can do no more than it's already doing, updating
stable_node-kpfn: the stable tree itself cannot be manipulated without
holding ksm_thread_mutex.  So accept that a stable tree may temporarily
indicate a page belonging to the wrong NUMA node, leave updating until
the next pass of ksmd, just be careful not to merge other pages on to a
misplaced page.  Note nid of holding tree in stable_node, and recognize
that it will not always match nid of kpfn.

A misplaced KSM page is discovered, either when ksm_do_scan() next comes
around to one of its rmap_items (we now have to go to cmp_and_merge_page
even on pages in a stable tree), or when stable_tree_search() arrives at
a matching node for another page, and this node page is found misplaced.

In each case, move the misplaced stable_node to a list of migrate_nodes
(and use the address of migrate_nodes as magic by which to identify them):
we don't need them in a tree.  If stable_tree_search() finds no match for
a page, but it's currently exiled to this list, then slot its stable_node
right there into the tree, bringing all of its mappings with it; otherwise
they get migrated one by one to the original page of the colliding node.
stable_tree_search() is now modelled more like stable_tree_insert(),
in order to handle these insertions of migrated nodes.

remove_node_from_stable_tree(), remove_all_stable_nodes() and
ksm_check_stable_tree() have to handle the migrate_nodes list as well as
the stable tree itself.  Less obviously, we do need to prune the list of
stale entries from time to time (scan_get_next_rmap_item() does it once
each full scan): whereas stale nodes in the stable tree get naturally
pruned as searches try to brush past them, these migrate_nodes may get
forgotten and accumulate.

Signed-off-by: Hugh Dickins hu...@google.com
---
 mm/ksm.c |  164 +++--
 1 file changed, 134 insertions(+), 30 deletions(-)

--- mmotm.orig/mm/ksm.c 2013-01-25 14:37:03.832206218 -0800
+++ mmotm/mm/ksm.c  2013-01-25 14:37:06.880206290 -0800
@@ -122,13 +122,25 @@ struct ksm_scan {
 /**
  * struct stable_node - node of the stable rbtree
  * @node: rb node of this ksm page in the stable tree
+ * @head: (overlaying parent) migrate_nodes indicates temporarily on that list
+ * @list: linked into migrate_nodes, pending placement in the proper node tree
  * @hlist: hlist head of rmap_items using this ksm page
- * @kpfn: page frame number of this ksm page
+ * @kpfn: page frame number of this ksm page (perhaps temporarily on wrong nid)
+ * @nid: NUMA node id of stable tree in which linked (may not match kpfn)
  */
 struct stable_node {
-   struct rb_node node;
+   union {
+   struct rb_node node;/* when node of stable tree */
+   struct {/* when listed for migration */
+   struct list_head *head;
+   struct list_head list;
+   };
+   };
struct hlist_head hlist;
unsigned long kpfn;
+#ifdef CONFIG_NUMA
+   int nid;
+#endif
 };
 
 /**
@@ -169,6 +181,9 @@ struct rmap_item {
 static struct rb_root root_unstable_tree[MAX_NUMNODES];
 static struct rb_root root_stable_tree[MAX_NUMNODES];
 
+/* Recently migrated nodes of stable tree, pending proper placement */
+static LIST_HEAD(migrate_nodes);
+
 #define MM_SLOTS_HASH_BITS 10
 static DEFINE_HASHTABLE(mm_slots_hash, MM_SLOTS_HASH_BITS);
 
@@ -311,11 +326,6 @@ static void insert_to_mm_slots_hash(stru
hash_add(mm_slots_hash, mm_slot-link, (unsigned long)mm);
 }
 
-static inline int in_stable_tree(struct rmap_item *rmap_item)
-{
-   return rmap_item-address  STABLE_FLAG;
-}
-
 /*
  * ksmd, and unmerge_and_remove_all_rmap_items(), must not touch an mm's
  * page tables after it has passed through ksm_exit() - which, if necessary,
@@ -476,7 +486,6 @@ static void remove_node_from_stable_tree
 {
struct rmap_item *rmap_item;
struct hlist_node *hlist;
-   int nid;
 
hlist_for_each_entry(rmap_item, hlist, stable_node-hlist, hlist) {
if (rmap_item-hlist.next)
@@ -488,8 +497,11 @@ static void remove_node_from_stable_tree
cond_resched();
}
 
-   nid = get_kpfn_nid(stable_node-kpfn);
-   rb_erase(stable_node-node, root_stable_tree[nid]);
+   if (stable_node-head == migrate_nodes)
+   list_del(stable_node-list);
+   else
+   rb_erase(stable_node-node,
+root_stable_tree[NUMA(stable_node-nid)]);
free_stable_node(stable_node);
 }
 
@@ -712,6 +724,7 @@ static int remove_stable_node(struct sta
 static int remove_all_stable_nodes(void)
 {
struct stable_node