the attached vmfixes-B2 patch adds the following fixes/cleanups: vmscan.c: - check for __GFP_WAIT not __GFP_IO when yielding the CPU. This fixes GFP_BUFFER deadlocks. In fact since no caller to do_try_to_free_pages() can expect that function to not block, we dont test for __GFP_WAIT either. [GFP_KSWAPD is the only caller without __GFP_WAIT set.] - do shrink_[d|i]cache_memory() even if !__GFP_IO. This improves balance. - push the __GFP_IO test into shm_swap(). - after shm_swap() do not test for !count but for <= 0, because count could be negative if in the future the shrink_ functions return bigger than 1, and we could then get into an infinite loop. Same after swap_out() and refill_inactive_scan(). No performance penalty, test for zero is exchanged with test for sign. - kmem_cache_reap() is done within refill_inactive(), so it's unnecessery to call it at the beginning of do_try_to_free_pages(). Moved to the else branch. (i saw kmem_cache_reap() show up in profiles) - (small codestyle cleanup.) page_alloc.c: - in __alloc_pages(), the infinite allocation loop yields the CPU if necessery. This prevents a potential lockup on UP, and even on SMP it can prevent livelocks. (i saw this happen.) mm.h: - made the GFP_ flag definitions easier to parse for humans :-) - remove shrink_mmap() prototype, it doesnt exist anymore. shm.c: - the trivial test for __GFP_IO. swap_state.c, filemap.c: - (shrink_mmap doesnt exist anymore, it's refill_inactive.) (The patch applies and compiles cleanly, and is tested under various VM loads i use.) Ingo
--- linux/mm/vmscan.c.orig Sun Sep 24 11:41:38 2000 +++ linux/mm/vmscan.c Sun Sep 24 12:20:27 2000 @@ -119,7 +119,7 @@ * our scan. * * Basically, this just makes it possible for us to do - * some real work in the future in "shrink_mmap()". + * some real work in the future in "refill_inactive()". */ if (!pte_dirty(pte)) { flush_cache_page(vma, address); @@ -159,7 +159,7 @@ * NOTE NOTE NOTE! This should just set a * dirty bit in 'page', and just drop the * pte. All the hard work would be done by - * shrink_mmap(). + * refill_inactive(). * * That would get rid of a lot of problems. */ @@ -891,7 +891,7 @@ do { made_progress = 0; - if (current->need_resched && (gfp_mask & __GFP_IO)) { + if (current->need_resched) { __set_current_state(TASK_RUNNING); schedule(); } @@ -899,34 +899,32 @@ while (refill_inactive_scan(priority, 1) || swap_out(priority, gfp_mask, idle_time)) { made_progress = 1; - if (!--count) + if (--count <= 0) goto done; } - /* Try to get rid of some shared memory pages.. */ - if (gfp_mask & __GFP_IO) { - /* - * don't be too light against the d/i cache since - * shrink_mmap() almost never fail when there's - * really plenty of memory free. - */ - count -= shrink_dcache_memory(priority, gfp_mask); - count -= shrink_icache_memory(priority, gfp_mask); - /* - * Not currently working, see fixme in shrink_?cache_memory - * In the inner funtions there is a comment: - * "To help debugging, a zero exit status indicates - * all slabs were released." (-arca?) - * lets handle it in a primitive but working way... - * if (count <= 0) - * goto done; - */ + /* + * don't be too light against the d/i cache since + * refill_inactive() almost never fail when there's + * really plenty of memory free. + */ + count -= shrink_dcache_memory(priority, gfp_mask); + count -= shrink_icache_memory(priority, gfp_mask); + /* + * Not currently working, see fixme in shrink_?cache_memory + * In the inner funtions there is a comment: + * "To help debugging, a zero exit status indicates + * all slabs were released." (-arca?) + * lets handle it in a primitive but working way... + * if (count <= 0) + * goto done; + */ - while (shm_swap(priority, gfp_mask)) { - made_progress = 1; - if (!--count) - goto done; - } + /* Try to get rid of some shared memory pages.. */ + while (shm_swap(priority, gfp_mask)) { + made_progress = 1; + if (--count <= 0) + goto done; } /* @@ -934,7 +932,7 @@ */ while (swap_out(priority, gfp_mask, 0)) { made_progress = 1; - if (!--count) + if (--count <= 0) goto done; } @@ -955,9 +953,9 @@ priority--; } while (priority >= 0); - /* Always end on a shrink_mmap.., may sleep... */ + /* Always end on a refill_inactive.., may sleep... */ while (refill_inactive_scan(0, 1)) { - if (!--count) + if (--count <= 0) goto done; } @@ -970,11 +968,6 @@ int ret = 0; /* - * First, reclaim unused slab cache memory. - */ - kmem_cache_reap(gfp_mask); - - /* * If we're low on free pages, move pages from the * inactive_dirty list to the inactive_clean list. * @@ -992,13 +985,14 @@ * the inode and dentry cache whenever we do this. */ if (free_shortage() || inactive_shortage()) { - if (gfp_mask & __GFP_IO) { - ret += shrink_dcache_memory(6, gfp_mask); - ret += shrink_icache_memory(6, gfp_mask); - } - + ret += shrink_dcache_memory(6, gfp_mask); + ret += shrink_icache_memory(6, gfp_mask); ret += refill_inactive(gfp_mask, user); } else { + /* + * Reclaim unused slab cache memory. + */ + kmem_cache_reap(gfp_mask); ret = 1; } @@ -1153,9 +1147,8 @@ { int ret = 1; - if (gfp_mask & __GFP_WAIT) { + if (gfp_mask & __GFP_WAIT) ret = do_try_to_free_pages(gfp_mask, 1); - } return ret; } --- linux/mm/page_alloc.c.orig Sun Sep 24 11:44:59 2000 +++ linux/mm/page_alloc.c Sun Sep 24 11:52:00 2000 @@ -444,6 +444,13 @@ * processes, etc). */ if (gfp_mask & __GFP_WAIT) { + /* + * Give other processes a chance to run: + */ + if (current->need_resched) { + __set_current_state(TASK_RUNNING); + schedule(); + } try_to_free_pages(gfp_mask); memory_pressure++; goto try_again; --- linux/mm/filemap.c.orig Sun Sep 24 12:20:35 2000 +++ linux/mm/filemap.c Sun Sep 24 12:20:48 2000 @@ -1925,10 +1925,10 @@ * Application no longer needs these pages. If the pages are dirty, * it's OK to just throw them away. The app will be more careful about * data it wants to keep. Be sure to free swap resources too. The - * zap_page_range call sets things up for shrink_mmap to actually free + * zap_page_range call sets things up for refill_inactive to actually free * these pages later if no one else has touched them in the meantime, * although we could add these pages to a global reuse list for - * shrink_mmap to pick up before reclaiming other pages. + * refill_inactive to pick up before reclaiming other pages. * * NB: This interface discards data rather than pushes it out to swap, * as some implementations do. This has performance implications for --- linux/mm/swap_state.c.orig Sun Sep 24 12:21:02 2000 +++ linux/mm/swap_state.c Sun Sep 24 12:21:13 2000 @@ -166,7 +166,7 @@ return 0; /* * Though the "found" page was in the swap cache an instant - * earlier, it might have been removed by shrink_mmap etc. + * earlier, it might have been removed by refill_inactive etc. * Re search ... Since find_lock_page grabs a reference on * the page, it can not be reused for anything else, namely * it can not be associated with another swaphandle, so it --- linux/include/linux/mm.h.orig Sun Sep 24 11:46:37 2000 +++ linux/include/linux/mm.h Sun Sep 24 12:21:54 2000 @@ -441,7 +441,6 @@ /* filemap.c */ extern void remove_inode_page(struct page *); extern unsigned long page_unuse(struct page *); -extern int shrink_mmap(int, int); extern void truncate_inode_pages(struct address_space *, loff_t); /* generic vm_area_ops exported for stackable file systems */ @@ -469,11 +468,11 @@ #define GFP_BUFFER (__GFP_HIGH | __GFP_WAIT) #define GFP_ATOMIC (__GFP_HIGH) -#define GFP_USER (__GFP_WAIT | __GFP_IO) -#define GFP_HIGHUSER (GFP_USER | __GFP_HIGHMEM) +#define GFP_USER ( __GFP_WAIT | __GFP_IO) +#define GFP_HIGHUSER ( __GFP_WAIT | __GFP_IO | __GFP_HIGHMEM) #define GFP_KERNEL (__GFP_HIGH | __GFP_WAIT | __GFP_IO) #define GFP_NFS (__GFP_HIGH | __GFP_WAIT | __GFP_IO) -#define GFP_KSWAPD (__GFP_IO) +#define GFP_KSWAPD ( __GFP_IO) /* Flag - indicates that the buffer will be suitable for DMA. Ignored on some platforms, used as appropriate on others */ --- linux/ipc/shm.c.orig Sun Sep 24 11:45:16 2000 +++ linux/ipc/shm.c Sun Sep 24 11:53:59 2000 @@ -1536,6 +1536,12 @@ int counter; struct page * page_map; + /* + * Push this inside: + */ + if (!(gfp_mask & __GFP_IO)) + return 0; + zshm_swap(prio, gfp_mask); counter = shm_rss >> prio; if (!counter)