Hi Maeda,

Here is a patch that should fix the hang problem you reported. Could you
try it?
Before applying this patch (fix_kernbench_pb.patch), you must remove the
previous one I sent you (reclaim_mapped_pages.patch).

Sometimes on my machine, under memory pressure in a class, kswapd is not
awaken any more. The patch "fix_shrink_atlimit_pb.patch" fixes this
problem.

Thanks for your help,

   Valérie


diff -ruNp a/include/linux/ckrm_mem.h b/include/linux/ckrm_mem.h
--- a/include/linux/ckrm_mem.h	2006-02-01 15:42:13.000000000 +0100
+++ b/include/linux/ckrm_mem.h	2006-02-10 14:53:55.000000000 +0100
@@ -34,6 +34,8 @@ struct ckrm_zone {
 	unsigned long nr_inactive;
 	unsigned long active_over;
 	unsigned long inactive_over;
+	unsigned long pages_scanned;
+	int all_unreclaimable;
 
 	struct list_head guar_list;	/* list of all over guar classes */
 	struct zone *zone;
@@ -74,6 +76,7 @@ struct ckrm_mem_res {
 };
 
 #define CLS_AT_LIMIT		(1)
+#define CLS_CONGESTION		(2)
 
 extern struct ckrm_res_ctlr mem_rcbs;
 extern struct ckrm_mem_res *ckrm_mem_root_class;
diff -ruNp a/include/linux/ckrm_mem_inline.h b/include/linux/ckrm_mem_inline.h
--- a/include/linux/ckrm_mem_inline.h	2006-02-01 15:42:13.000000000 +0100
+++ b/include/linux/ckrm_mem_inline.h	2006-02-08 11:24:09.000000000 +0100
@@ -274,18 +274,20 @@ ckrm_zone_add_inactive(struct ckrm_zone 
 }
 
 static inline void
-ckrm_zone_sub_active(struct ckrm_zone *czone, int cnt)
+ckrm_zone_sub_active(struct ckrm_zone *czone, int cnt, int pgscanned)
 {
 	czone->nr_active -= cnt;
+	czone->pages_scanned += pgscanned;
 	sub_use_count(czone->memcls, 0, ckrm_czone_idx(czone), cnt);
 	while (cnt--)
 		kref_put(&czone->memcls->nr_users, memclass_release);
 }
 
 static inline void
-ckrm_zone_sub_inactive(struct ckrm_zone *czone, int cnt)
+ckrm_zone_sub_inactive(struct ckrm_zone *czone, int cnt, int pgscanned)
 {
 	czone->nr_inactive -= cnt;
+	czone->pages_scanned += pgscanned;
 	sub_use_count(czone->memcls, 0, ckrm_czone_idx(czone), cnt);
 	while (cnt--)
 		kref_put(&czone->memcls->nr_users, memclass_release);
@@ -394,8 +396,8 @@ static inline void ckrm_mem_dec_inactive
 
 #define ckrm_zone_add_active(a, b)	do {} while (0)
 #define ckrm_zone_add_inactive(a, b)	do {} while (0)
-#define ckrm_zone_sub_active(a, b)	do {} while (0)
-#define ckrm_zone_sub_inactive(a, b)	do {} while (0)
+#define ckrm_zone_sub_active(a, b, c)	do {} while (0)
+#define ckrm_zone_sub_inactive(a, b, c)	do {} while (0)
 #define set_page_ckrmzone(a, b)		do {} while (0)
 
 #define ckrm_class_limit_ok(a)						(1)
diff -ruNp a/mm/page_alloc.c b/mm/page_alloc.c
--- a/mm/page_alloc.c	2006-02-01 15:44:37.000000000 +0100
+++ b/mm/page_alloc.c	2006-02-10 11:06:12.000000000 +0100
@@ -379,6 +379,9 @@ free_pages_bulk(struct zone *zone, int c
 	unsigned long flags;
 	struct page *page = NULL;
 	int ret = 0;
+#ifdef CONFIG_CKRM_RES_MEM
+	struct ckrm_zone *czone;
+#endif
 
 	spin_lock_irqsave(&zone->lock, flags);
 	zone->all_unreclaimable = 0;
@@ -388,6 +391,14 @@ free_pages_bulk(struct zone *zone, int c
 		/* have to delete it as __free_pages_bulk list manipulates */
 		list_del(&page->lru);
 		__free_pages_bulk(page, zone, order);
+#ifdef CONFIG_CKRM_RES_MEM
+		if ((czone = page_ckrmzone(page))) {
+			czone->pages_scanned = 0;
+			czone->all_unreclaimable = 0;
+			if (czone->memcls)
+				clear_bit(CLS_CONGESTION, &czone->memcls->flags);
+		}
+#endif
 		ret++;
 	}
 	spin_unlock_irqrestore(&zone->lock, flags);
@@ -871,16 +882,36 @@ __alloc_pages(gfp_t gfp_mask, unsigned i
 	int do_retry;
 	int alloc_flags;
 	int did_some_progress;
+#ifdef CONFIG_CKRM_RES_MEM
+	struct ckrm_mem_res *cls = ckrm_task_memclass(p);
+	struct zone *zone;
+#endif
 
 	might_sleep_if(wait);
 
-	if (!in_interrupt() && !ckrm_class_limit_ok(ckrm_task_memclass(p))
+#ifdef CONFIG_CKRM_RES_MEM
+	if (!in_interrupt() && !ckrm_class_limit_ok(cls)
 			&& wait) {
 		/* take a nap, let kswapd refresh zone */
 		blk_congestion_wait(WRITE, HZ/50);
-		while (!ckrm_class_limit_ok(ckrm_task_memclass(p)))
+		while (!ckrm_class_limit_ok(cls)) {
 			blk_congestion_wait(WRITE, HZ/50);
+			if (test_and_clear_bit(CLS_CONGESTION, &cls->flags)) {
+				int czindex = 0;
+				for_each_zone(zone) {
+					struct ckrm_zone *czone;
+					if (zone->present_pages == 0)
+						continue;
+					czone = &cls->ckrm_zone[czindex];
+					czone->pages_scanned = 0;
+					czone->all_unreclaimable = 0;
+					czindex++;
+				}
+				return NULL;
+			}
+		}
 	}
+#endif
 
 restart:
 	z = zonelist->zones;  /* the list of zones suitable for gfp_mask */
diff -ruNp a/mm/vmscan.c b/mm/vmscan.c
--- a/mm/vmscan.c	2006-02-01 15:54:01.000000000 +0100
+++ b/mm/vmscan.c	2006-02-08 15:20:11.000000000 +0100
@@ -621,7 +621,6 @@ static int isolate_lru_pages(int nr_to_s
 			continue;
 		} else {
 			list_add(&page->lru, dst);
-			set_page_ckrmzone(page, NULL);
 			nr_taken++;
 		}
 	}
@@ -665,7 +664,7 @@ static void shrink_cache(struct zone *zo
 					     inactive_list,
 					     &page_list, &nr_scan);
 		zone->nr_inactive -= nr_taken;
-		ckrm_zone_sub_inactive(ckrm_zone, nr_taken);
+		ckrm_zone_sub_inactive(ckrm_zone, nr_taken, nr_scan);
 		zone->pages_scanned += nr_scan;
 		spin_unlock_irq(&zone->lru_lock);
 
@@ -701,7 +700,6 @@ static void shrink_cache(struct zone *zo
 				zone->nr_inactive++;
 				list_add(&page->lru, inactive_list);
 			}
-			set_page_ckrmzone(page, ckrm_zone);
 			if (!pagevec_add(&pvec, page)) {
 				spin_unlock_irq(&zone->lru_lock);
 				__pagevec_release(&pvec);
@@ -766,7 +764,7 @@ refill_inactive_zone(struct zone *zone, 
 				    &l_hold, &pgscanned);
 	zone->pages_scanned += pgscanned;
 	zone->nr_active -= pgmoved;
-	ckrm_zone_sub_active(ckrm_zone, pgmoved);
+	ckrm_zone_sub_active(ckrm_zone, pgmoved, pgscanned);
 	spin_unlock_irq(&zone->lru_lock);
 
 	/*
@@ -800,6 +798,11 @@ refill_inactive_zone(struct zone *zone, 
 	if (swap_tendency >= 100)
 		reclaim_mapped = 1;
 
+#ifdef CONFIG_CKRM_RES_MEM
+	if (ckrm_zone->pages_scanned > ckrm_zone->nr_active + ckrm_zone->nr_inactive)
+		reclaim_mapped = 1;
+#endif
+
 	while (!list_empty(&l_hold)) {
 		cond_resched();
 		page = lru_to_page(&l_hold);
@@ -826,7 +829,6 @@ refill_inactive_zone(struct zone *zone, 
 		if (!TestClearPageActive(page))
 			BUG();
 		list_move(&page->lru, inactive_list);
-		set_page_ckrmzone(page, ckrm_zone);
 		pgmoved++;
 		if (!pagevec_add(&pvec, page)) {
 			zone->nr_inactive += pgmoved;
@@ -857,7 +859,6 @@ refill_inactive_zone(struct zone *zone, 
 			BUG();
 		BUG_ON(!PageActive(page));
 		list_move(&page->lru, active_list);
-		set_page_ckrmzone(page, ckrm_zone);
 		pgmoved++;
 		if (!pagevec_add(&pvec, page)) {
 			zone->nr_active += pgmoved;
@@ -906,6 +907,7 @@ ckrm_shrink_class(struct ckrm_mem_res *c
 	struct scan_control sc;
 	struct zone *zone;
 	int czindex = 0, cnt, act_credit = 0, inact_credit = 0;
+	int all_zones_unreclaim = 1;
 
 	sc.nr_mapped = read_page_state(nr_mapped);
 	sc.nr_scanned = 0;
@@ -927,6 +929,10 @@ ckrm_shrink_class(struct ckrm_mem_res *c
 		czone = &cls->ckrm_zone[czindex];
 		BUG_ON(czone->zone != zone);
 
+		if (czone->all_unreclaimable) {
+			czindex++;
+			continue;
+		}
 		zone->temp_priority = zone->prev_priority;
 		zone->prev_priority = sc.priority;
 
@@ -960,10 +966,20 @@ ckrm_shrink_class(struct ckrm_mem_res *c
 		if (sc.ckrm_active || sc.ckrm_inactive) {
 			sc.nr_to_reclaim = sc.ckrm_inactive;
 			shrink_ckrmzone(czone, &sc);
+			if (czone->pages_scanned >
+				(czone->nr_active + czone->nr_inactive) * 4)
+				czone->all_unreclaimable = 1;
+			else
+				all_zones_unreclaim = 0;
 		}
+		else
+			czone->all_unreclaimable = 1;
 		zone->prev_priority = zone->temp_priority;
 		czindex++;
 	}
+	if (all_zones_unreclaim)
+		/* kswapd is getting into trouble */
+		set_bit(CLS_CONGESTION, &cls->flags);
 }
 
 static void
diff -ruNp a/mm/vmscan.c b/mm/vmscan.c
--- a/mm/vmscan.c	2006-02-01 15:54:01.000000000 +0100
+++ b/mm/vmscan.c	2006-02-01 15:53:43.000000000 +0100
@@ -800,6 +800,9 @@ refill_inactive_zone(struct zone *zone, 
 	if (swap_tendency >= 100)
 		reclaim_mapped = 1;
 
+	if (!ckrm_class_limit_ok(ckrm_zone->memcls))
+		reclaim_mapped = 1;
+
 	while (!list_empty(&l_hold)) {
 		cond_resched();
 		page = lru_to_page(&l_hold);
diff -ruNp a/include/linux/ckrm_mem.h b/include/linux/ckrm_mem.h
--- a/include/linux/ckrm_mem.h	2006-02-10 14:53:55.000000000 +0100
+++ b/include/linux/ckrm_mem.h	2006-02-10 14:58:16.000000000 +0100
@@ -70,7 +70,7 @@ struct ckrm_mem_res {
  	struct list_head shrink_list;	/* list of classes that are near
 				 	 * limit and need to be shrunk
 					 */
-	int shrink_count;
+	atomic_t shrink_count;
 	unsigned long last_shrink;
 	struct ckrm_zone ckrm_zone[0];	/* must be the last element */
 };
diff -ruNp a/kernel/ckrm/ckrm_memctlr.c b/kernel/ckrm/ckrm_memctlr.c
--- a/kernel/ckrm/ckrm_memctlr.c	2006-02-10 14:53:28.000000000 +0100
+++ b/kernel/ckrm/ckrm_memctlr.c	2006-02-10 14:58:33.000000000 +0100
@@ -448,10 +448,10 @@ ckrm_shrink_atlimit(struct ckrm_mem_res 
 	if (time_after(jiffies, cls->last_shrink +
 				ckrm_mem_shrink_interval * HZ)) {
 		cls->last_shrink = jiffies;
-		cls->shrink_count = 0;
+		atomic_set(&cls->shrink_count, 0);
 	}
-	cls->shrink_count++;
-	if (cls->shrink_count > ckrm_mem_shrink_count) {
+	atomic_inc(&cls->shrink_count);
+	if (atomic_read(&cls->shrink_count) > ckrm_mem_shrink_count) {
 		clear_bit(CLS_AT_LIMIT, &cls->flags);
 		return;
 	}

Reply via email to