On Tue, 24 Apr 2001, Linus Torvalds wrote:
> Basically, I don't want to mix synchronous and asynchronous
> interfaces. Everything should be asynchronous by default, and waiting
> should be explicit.
The following patch changes all swap IO functions to be asynchronous by
default and changes the callers to wait when needed (except
rw_swap_page_base which will block writers if there are too many in flight
swap IOs).
Ingo's find_get_swapcache_page() does not wait on locked pages anymore,
which is now up to the callers.
time make -j32 test with 4 CPUs machine, 128M ram and 128M swap:
pre5
----
228.04user 28.14system 5:16.52elapsed 80%CPU (0avgtext+0avgdata
0maxresident)k
0inputs+0outputs (525113major+678617minor)pagefaults 0swaps
pre5 + attached patch
--------------------
227.18user 25.49system 3:40.53elapsed 114%CPU (0avgtext+0avgdata
0maxresident)k
0inputs+0outputs (495387major+644924minor)pagefaults 0swaps
Comments?
diff -Nur linux.orig/include/linux/pagemap.h linux/include/linux/pagemap.h
--- linux.orig/include/linux/pagemap.h Wed Apr 25 00:51:36 2001
+++ linux/include/linux/pagemap.h Wed Apr 25 00:53:04 2001
@@ -77,7 +77,12 @@
unsigned long index, struct page **hash);
extern void lock_page(struct page *page);
#define find_lock_page(mapping, index) \
- __find_lock_page(mapping, index, page_hash(mapping, index))
+ __find_lock_page(mapping, index, page_hash(mapping, index))
+
+extern struct page * __find_get_swapcache_page (struct address_space * mapping,
+ unsigned long index, struct page **hash);
+#define find_get_swapcache_page(mapping, index) \
+ __find_get_swapcache_page(mapping, index, page_hash(mapping, index))
extern void __add_page_to_hash_queue(struct page * page, struct page **p);
diff -Nur linux.orig/include/linux/swap.h linux/include/linux/swap.h
--- linux.orig/include/linux/swap.h Wed Apr 25 00:51:36 2001
+++ linux/include/linux/swap.h Wed Apr 25 00:53:04 2001
@@ -111,8 +111,8 @@
extern int try_to_free_pages(unsigned int gfp_mask);
/* linux/mm/page_io.c */
-extern void rw_swap_page(int, struct page *, int);
-extern void rw_swap_page_nolock(int, swp_entry_t, char *, int);
+extern void rw_swap_page(int, struct page *);
+extern void rw_swap_page_nolock(int, swp_entry_t, char *);
/* linux/mm/page_alloc.c */
@@ -121,8 +121,7 @@
extern void add_to_swap_cache(struct page *, swp_entry_t);
extern int swap_check_entry(unsigned long);
extern struct page * lookup_swap_cache(swp_entry_t);
-extern struct page * read_swap_cache_async(swp_entry_t, int);
-#define read_swap_cache(entry) read_swap_cache_async(entry, 1);
+extern struct page * read_swap_cache_async(swp_entry_t);
/* linux/mm/oom_kill.c */
extern int out_of_memory(void);
diff -Nur linux.orig/mm/filemap.c linux/mm/filemap.c
--- linux.orig/mm/filemap.c Wed Apr 25 00:51:35 2001
+++ linux/mm/filemap.c Wed Apr 25 00:53:04 2001
@@ -678,6 +678,34 @@
}
/*
+ * Find a swapcache page (and get a reference) or return NULL.
+ * The SwapCache check is protected by the pagecache lock.
+ */
+struct page * __find_get_swapcache_page(struct address_space *mapping,
+ unsigned long offset, struct page **hash)
+{
+ struct page *page;
+
+ /*
+ * We need the LRU lock to protect against page_launder().
+ */
+
+ spin_lock(&pagecache_lock);
+ page = __find_page_nolock(mapping, offset, *hash);
+ if (page) {
+ spin_lock(&pagemap_lru_lock);
+ if (PageSwapCache(page))
+ page_cache_get(page);
+ else
+ page = NULL;
+ spin_unlock(&pagemap_lru_lock);
+ }
+ spin_unlock(&pagecache_lock);
+
+ return page;
+}
+
+/*
* Same as the above, but lock the page too, verifying that
* it's still valid once we own it.
*/
diff -Nur linux.orig/mm/memory.c linux/mm/memory.c
--- linux.orig/mm/memory.c Wed Apr 25 00:51:35 2001
+++ linux/mm/memory.c Wed Apr 25 00:53:04 2001
@@ -1040,7 +1040,7 @@
break;
}
/* Ok, do the async read-ahead now */
- new_page = read_swap_cache_async(SWP_ENTRY(SWP_TYPE(entry), offset),
0);
+ new_page = read_swap_cache_async(SWP_ENTRY(SWP_TYPE(entry), offset));
if (new_page != NULL)
page_cache_release(new_page);
swap_free(SWP_ENTRY(SWP_TYPE(entry), offset));
@@ -1063,13 +1063,13 @@
if (!page) {
lock_kernel();
swapin_readahead(entry);
- page = read_swap_cache(entry);
+ page = read_swap_cache_async(entry);
unlock_kernel();
if (!page) {
spin_lock(&mm->page_table_lock);
return -1;
}
-
+ wait_on_page(page);
flush_page_to_ram(page);
flush_icache_page(vma, page);
}
diff -Nur linux.orig/mm/page_io.c linux/mm/page_io.c
--- linux.orig/mm/page_io.c Wed Apr 25 00:51:35 2001
+++ linux/mm/page_io.c Wed Apr 25 00:53:04 2001
@@ -33,7 +33,7 @@
* that shared pages stay shared while being swapped.
*/
-static int rw_swap_page_base(int rw, swp_entry_t entry, struct page *page, int wait)
+static int rw_swap_page_base(int rw, swp_entry_t entry, struct page *page)
{
unsigned long offset;
int zones[PAGE_SIZE/512];
@@ -41,6 +41,7 @@
kdev_t dev = 0;
int block_size;
struct inode *swapf = 0;
+ int wait = 0;
/* Don't allow too many pending pages in flight.. */
if ((rw == WRITE) && atomic_read(&nr_async_pages) >
@@ -104,7 +105,7 @@
* - it's marked as being swap-cache
* - it's associated with the swap inode
*/
-void rw_swap_page(int rw, struct page *page, int wait)
+void rw_swap_page(int rw, struct page *page)
{
swp_entry_t entry;
@@ -116,7 +117,7 @@
PAGE_BUG(page);
if (page->mapping != &swapper_space)
PAGE_BUG(page);
- if (!rw_swap_page_base(rw, entry, page, wait))
+ if (!rw_swap_page_base(rw, entry, page))
UnlockPage(page);
}
@@ -125,7 +126,7 @@
* Therefore we can't use it. Later when we can remove the need for the
* lock map and we can reduce the number of functions exported.
*/
-void rw_swap_page_nolock(int rw, swp_entry_t entry, char *buf, int wait)
+void rw_swap_page_nolock(int rw, swp_entry_t entry, char *buf)
{
struct page *page = virt_to_page(buf);
@@ -137,7 +138,8 @@
PAGE_BUG(page);
/* needs sync_page to wait I/O completation */
page->mapping = &swapper_space;
- if (!rw_swap_page_base(rw, entry, page, wait))
+ if (!rw_swap_page_base(rw, entry, page))
UnlockPage(page);
+ wait_on_page(page);
page->mapping = NULL;
}
diff -Nur linux.orig/mm/shmem.c linux/mm/shmem.c
--- linux.orig/mm/shmem.c Wed Apr 25 00:51:35 2001
+++ linux/mm/shmem.c Wed Apr 25 00:53:04 2001
@@ -124,6 +124,7 @@
*ptr = (swp_entry_t){0};
freed++;
if ((page = lookup_swap_cache(entry)) != NULL) {
+ wait_on_page(page);
delete_from_swap_cache(page);
page_cache_release(page);
}
@@ -329,10 +330,11 @@
spin_unlock (&info->lock);
lock_kernel();
swapin_readahead(*entry);
- page = read_swap_cache(*entry);
+ page = read_swap_cache_async(*entry);
unlock_kernel();
if (!page)
return ERR_PTR(-ENOMEM);
+ wait_on_page(page);
if (!Page_Uptodate(page)) {
page_cache_release(page);
return ERR_PTR(-EIO);
diff -Nur linux.orig/mm/swap_state.c linux/mm/swap_state.c
--- linux.orig/mm/swap_state.c Wed Apr 25 00:51:35 2001
+++ linux/mm/swap_state.c Wed Apr 25 00:53:04 2001
@@ -34,7 +34,7 @@
return 0;
in_use:
- rw_swap_page(WRITE, page, 0);
+ rw_swap_page(WRITE, page);
return 0;
}
@@ -163,37 +163,18 @@
/*
* Right now the pagecache is 32-bit only. But it's a 32 bit index. =)
*/
-repeat:
- found = find_lock_page(&swapper_space, entry.val);
+ found = find_get_swapcache_page(&swapper_space, entry.val);
if (!found)
return 0;
- /*
- * Though the "found" page was in the swap cache an instant
- * earlier, it might have been removed by refill_inactive etc.
- * Re search ... Since find_lock_page grabs a reference on
- * the page, it can not be reused for anything else, namely
- * it can not be associated with another swaphandle, so it
- * is enough to check whether the page is still in the scache.
- */
- if (!PageSwapCache(found)) {
- UnlockPage(found);
- page_cache_release(found);
- goto repeat;
- }
+ if (!PageSwapCache(found))
+ BUG();
if (found->mapping != &swapper_space)
- goto out_bad;
+ BUG();
#ifdef SWAP_CACHE_INFO
swap_cache_find_success++;
#endif
- UnlockPage(found);
return found;
}
-
-out_bad:
- printk (KERN_ERR "VM: Found a non-swapper swap page!\n");
- UnlockPage(found);
- page_cache_release(found);
- return 0;
}
/*
@@ -205,7 +186,7 @@
* the swap entry is no longer in use.
*/
-struct page * read_swap_cache_async(swp_entry_t entry, int wait)
+struct page * read_swap_cache_async(swp_entry_t entry)
{
struct page *found_page = 0, *new_page;
unsigned long new_page_addr;
@@ -238,7 +219,7 @@
*/
lock_page(new_page);
add_to_swap_cache(new_page, entry);
- rw_swap_page(READ, new_page, wait);
+ rw_swap_page(READ, new_page);
return new_page;
out_free_page:
diff -Nur linux.orig/mm/swapfile.c linux/mm/swapfile.c
--- linux.orig/mm/swapfile.c Wed Apr 25 00:51:35 2001
+++ linux/mm/swapfile.c Wed Apr 25 00:53:24 2001
@@ -369,13 +369,15 @@
/* Get a page for the entry, using the existing swap
cache page if there is one. Otherwise, get a clean
page and read the swap into it. */
- page = read_swap_cache(entry);
+ page = read_swap_cache_async(entry);
if (!page) {
swap_free(entry);
return -ENOMEM;
}
+ lock_page(page);
if (PageSwapCache(page))
- delete_from_swap_cache(page);
+ delete_from_swap_cache_nolock(page);
+ UnlockPage(page);
read_lock(&tasklist_lock);
for_each_task(p)
unuse_process(p->mm, entry, page);
@@ -650,7 +652,7 @@
}
lock_page(virt_to_page(swap_header));
- rw_swap_page_nolock(READ, SWP_ENTRY(type,0), (char *) swap_header, 1);
+ rw_swap_page_nolock(READ, SWP_ENTRY(type,0), (char *) swap_header);
if (!memcmp("SWAP-SPACE",swap_header->magic.magic,10))
swap_header_version = 1;
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/