[PATCH][for -mm] per-zone and reclaim enhancements for memory controller take 3 [10/10] per-zone-lock for cgroup

2007-11-26 Thread KAMEZAWA Hiroyuki
Now, lru is per-zone.

Then, lru_lock can be (should be) per-zone, too.
This patch implementes per-zone lru lock.

lru_lock is placed into mem_cgroup_per_zone struct.

lock can be accessed by
   mz = mem_cgroup_zoneinfo(mem_cgroup, node, zone);
   >lru_lock

   or
   mz = page_cgroup_zoneinfo(page_cgroup);
   >lru_lock


Signed-off-by: KAMEZAWA hiroyuki <[EMAIL PROTECTED]>

 mm/memcontrol.c |   71 ++--
 1 file changed, 44 insertions(+), 27 deletions(-)

Index: linux-2.6.24-rc3-mm1/mm/memcontrol.c
===
--- linux-2.6.24-rc3-mm1.orig/mm/memcontrol.c   2007-11-27 11:24:16.0 
+0900
+++ linux-2.6.24-rc3-mm1/mm/memcontrol.c2007-11-27 11:24:22.0 
+0900
@@ -89,6 +89,10 @@
 };
 
 struct mem_cgroup_per_zone {
+   /*
+* spin_lock to protect the per cgroup LRU
+*/
+   spinlock_t  lru_lock;
struct list_headactive_list;
struct list_headinactive_list;
unsigned long count[NR_MEM_CGROUP_ZSTAT];
@@ -126,10 +130,7 @@
 * per zone LRU lists.
 */
struct mem_cgroup_lru_info info;
-   /*
-* spin_lock to protect the per cgroup LRU
-*/
-   spinlock_t lru_lock;
+
unsigned long control_type; /* control RSS or RSS+Pagecache */
int prev_priority;  /* for recording reclaim priority */
/*
@@ -410,15 +411,16 @@
  */
 void mem_cgroup_move_lists(struct page_cgroup *pc, bool active)
 {
-   struct mem_cgroup *mem;
+   struct mem_cgroup_per_zone *mz;
+   unsigned long flags;
+
if (!pc)
return;
 
-   mem = pc->mem_cgroup;
-
-   spin_lock(>lru_lock);
+   mz = page_cgroup_zoneinfo(pc);
+   spin_lock_irqsave(>lru_lock, flags);
__mem_cgroup_move_lists(pc, active);
-   spin_unlock(>lru_lock);
+   spin_unlock_irqrestore(>lru_lock, flags);
 }
 
 /*
@@ -528,7 +530,7 @@
src = >inactive_list;
 
 
-   spin_lock(_cont->lru_lock);
+   spin_lock(>lru_lock);
scan = 0;
list_for_each_entry_safe_reverse(pc, tmp, src, lru) {
if (scan >= nr_to_scan)
@@ -558,7 +560,7 @@
}
 
list_splice(_list, src);
-   spin_unlock(_cont->lru_lock);
+   spin_unlock(>lru_lock);
 
*scanned = scan;
return nr_taken;
@@ -577,6 +579,7 @@
struct page_cgroup *pc;
unsigned long flags;
unsigned long nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
+   struct mem_cgroup_per_zone *mz;
 
/*
 * Should page_cgroup's go to their own slab?
@@ -688,10 +691,11 @@
goto retry;
}
 
-   spin_lock_irqsave(>lru_lock, flags);
+   mz = page_cgroup_zoneinfo(pc);
+   spin_lock_irqsave(>lru_lock, flags);
/* Update statistics vector */
__mem_cgroup_add_list(pc);
-   spin_unlock_irqrestore(>lru_lock, flags);
+   spin_unlock_irqrestore(>lru_lock, flags);
 
 done:
return 0;
@@ -733,6 +737,7 @@
 void mem_cgroup_uncharge(struct page_cgroup *pc)
 {
struct mem_cgroup *mem;
+   struct mem_cgroup_per_zone *mz;
struct page *page;
unsigned long flags;
 
@@ -745,6 +750,7 @@
 
if (atomic_dec_and_test(>ref_cnt)) {
page = pc->page;
+   mz = page_cgroup_zoneinfo(pc);
/*
 * get page->cgroup and clear it under lock.
 * force_empty can drop page->cgroup without checking refcnt.
@@ -753,9 +759,9 @@
mem = pc->mem_cgroup;
css_put(>css);
res_counter_uncharge(>res, PAGE_SIZE);
-   spin_lock_irqsave(>lru_lock, flags);
+   spin_lock_irqsave(>lru_lock, flags);
__mem_cgroup_remove_list(pc);
-   spin_unlock_irqrestore(>lru_lock, flags);
+   spin_unlock_irqrestore(>lru_lock, flags);
kfree(pc);
}
}
@@ -794,24 +800,29 @@
struct page_cgroup *pc;
struct mem_cgroup *mem;
unsigned long flags;
+   struct mem_cgroup_per_zone *mz;
 retry:
pc = page_get_page_cgroup(page);
if (!pc)
return;
mem = pc->mem_cgroup;
+   mz = page_cgroup_zoneinfo(pc);
if (clear_page_cgroup(page, pc) != pc)
goto retry;
-
-   spin_lock_irqsave(>lru_lock, flags);
+   spin_lock_irqsave(>lru_lock, flags);
 
__mem_cgroup_remove_list(pc);
+   spin_unlock_irqrestore(>lru_lock, flags);
+
pc->page = newpage;
lock_page_cgroup(newpage);
page_assign_page_cgroup(newpage, pc);
unlock_page_cgroup(newpage);
-   __mem_cgroup_add_list(pc);
 
-   spin_unlock_irqrestore(>lru_lock, flags);
+   mz = page_cgroup_zoneinfo(pc);
+   

[PATCH][for -mm] per-zone and reclaim enhancements for memory controller take 3 [10/10] per-zone-lock for cgroup

2007-11-26 Thread KAMEZAWA Hiroyuki
Now, lru is per-zone.

Then, lru_lock can be (should be) per-zone, too.
This patch implementes per-zone lru lock.

lru_lock is placed into mem_cgroup_per_zone struct.

lock can be accessed by
   mz = mem_cgroup_zoneinfo(mem_cgroup, node, zone);
   mz-lru_lock

   or
   mz = page_cgroup_zoneinfo(page_cgroup);
   mz-lru_lock


Signed-off-by: KAMEZAWA hiroyuki [EMAIL PROTECTED]

 mm/memcontrol.c |   71 ++--
 1 file changed, 44 insertions(+), 27 deletions(-)

Index: linux-2.6.24-rc3-mm1/mm/memcontrol.c
===
--- linux-2.6.24-rc3-mm1.orig/mm/memcontrol.c   2007-11-27 11:24:16.0 
+0900
+++ linux-2.6.24-rc3-mm1/mm/memcontrol.c2007-11-27 11:24:22.0 
+0900
@@ -89,6 +89,10 @@
 };
 
 struct mem_cgroup_per_zone {
+   /*
+* spin_lock to protect the per cgroup LRU
+*/
+   spinlock_t  lru_lock;
struct list_headactive_list;
struct list_headinactive_list;
unsigned long count[NR_MEM_CGROUP_ZSTAT];
@@ -126,10 +130,7 @@
 * per zone LRU lists.
 */
struct mem_cgroup_lru_info info;
-   /*
-* spin_lock to protect the per cgroup LRU
-*/
-   spinlock_t lru_lock;
+
unsigned long control_type; /* control RSS or RSS+Pagecache */
int prev_priority;  /* for recording reclaim priority */
/*
@@ -410,15 +411,16 @@
  */
 void mem_cgroup_move_lists(struct page_cgroup *pc, bool active)
 {
-   struct mem_cgroup *mem;
+   struct mem_cgroup_per_zone *mz;
+   unsigned long flags;
+
if (!pc)
return;
 
-   mem = pc-mem_cgroup;
-
-   spin_lock(mem-lru_lock);
+   mz = page_cgroup_zoneinfo(pc);
+   spin_lock_irqsave(mz-lru_lock, flags);
__mem_cgroup_move_lists(pc, active);
-   spin_unlock(mem-lru_lock);
+   spin_unlock_irqrestore(mz-lru_lock, flags);
 }
 
 /*
@@ -528,7 +530,7 @@
src = mz-inactive_list;
 
 
-   spin_lock(mem_cont-lru_lock);
+   spin_lock(mz-lru_lock);
scan = 0;
list_for_each_entry_safe_reverse(pc, tmp, src, lru) {
if (scan = nr_to_scan)
@@ -558,7 +560,7 @@
}
 
list_splice(pc_list, src);
-   spin_unlock(mem_cont-lru_lock);
+   spin_unlock(mz-lru_lock);
 
*scanned = scan;
return nr_taken;
@@ -577,6 +579,7 @@
struct page_cgroup *pc;
unsigned long flags;
unsigned long nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
+   struct mem_cgroup_per_zone *mz;
 
/*
 * Should page_cgroup's go to their own slab?
@@ -688,10 +691,11 @@
goto retry;
}
 
-   spin_lock_irqsave(mem-lru_lock, flags);
+   mz = page_cgroup_zoneinfo(pc);
+   spin_lock_irqsave(mz-lru_lock, flags);
/* Update statistics vector */
__mem_cgroup_add_list(pc);
-   spin_unlock_irqrestore(mem-lru_lock, flags);
+   spin_unlock_irqrestore(mz-lru_lock, flags);
 
 done:
return 0;
@@ -733,6 +737,7 @@
 void mem_cgroup_uncharge(struct page_cgroup *pc)
 {
struct mem_cgroup *mem;
+   struct mem_cgroup_per_zone *mz;
struct page *page;
unsigned long flags;
 
@@ -745,6 +750,7 @@
 
if (atomic_dec_and_test(pc-ref_cnt)) {
page = pc-page;
+   mz = page_cgroup_zoneinfo(pc);
/*
 * get page-cgroup and clear it under lock.
 * force_empty can drop page-cgroup without checking refcnt.
@@ -753,9 +759,9 @@
mem = pc-mem_cgroup;
css_put(mem-css);
res_counter_uncharge(mem-res, PAGE_SIZE);
-   spin_lock_irqsave(mem-lru_lock, flags);
+   spin_lock_irqsave(mz-lru_lock, flags);
__mem_cgroup_remove_list(pc);
-   spin_unlock_irqrestore(mem-lru_lock, flags);
+   spin_unlock_irqrestore(mz-lru_lock, flags);
kfree(pc);
}
}
@@ -794,24 +800,29 @@
struct page_cgroup *pc;
struct mem_cgroup *mem;
unsigned long flags;
+   struct mem_cgroup_per_zone *mz;
 retry:
pc = page_get_page_cgroup(page);
if (!pc)
return;
mem = pc-mem_cgroup;
+   mz = page_cgroup_zoneinfo(pc);
if (clear_page_cgroup(page, pc) != pc)
goto retry;
-
-   spin_lock_irqsave(mem-lru_lock, flags);
+   spin_lock_irqsave(mz-lru_lock, flags);
 
__mem_cgroup_remove_list(pc);
+   spin_unlock_irqrestore(mz-lru_lock, flags);
+
pc-page = newpage;
lock_page_cgroup(newpage);
page_assign_page_cgroup(newpage, pc);
unlock_page_cgroup(newpage);
-   __mem_cgroup_add_list(pc);
 
-   spin_unlock_irqrestore(mem-lru_lock, flags);
+   mz =