Okay, in light of all the discussions we had - I reworked the patch and decided to call it nobh_write_page() support (Thanks to Shaggy). This is for the filesystems which doesn't need buffer heads, but do not use page->private for anything else.
Here is the patch, without handling "confused" case. Depending on how folks view this, I would add support to handle infinite recursion (for "confused" case). Andrew, what do you think ? Thanks, Badari
diff -Narup -X dontdiff linux-2.6.10/fs/buffer.c linux-2.6.10.nobh/fs/buffer.c --- linux-2.6.10/fs/buffer.c 2004-12-24 13:34:58.000000000 -0800 +++ linux-2.6.10.nobh/fs/buffer.c 2005-02-16 16:51:20.708492872 -0800 @@ -39,6 +39,7 @@ #include <linux/notifier.h> #include <linux/cpu.h> #include <linux/bitops.h> +#include <linux/mpage.h> static int fsync_buffers_list(spinlock_t *lock, struct list_head *list); static void invalidate_bh_lrus(void); @@ -2492,6 +2493,55 @@ int nobh_commit_write(struct file *file, EXPORT_SYMBOL(nobh_commit_write); /* + * nobh_write_page() - based on block_full_write_page() except + * that it tries to operate without attaching bufferheads to + * the page. + */ +int nobh_write_page(struct page *page, get_block_t *get_block, + struct writeback_control *wbc) +{ + struct inode * const inode = page->mapping->host; + loff_t i_size = i_size_read(inode); + const pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; + unsigned offset; + void *kaddr; + + /* Is the page fully inside i_size? */ + if (page->index < end_index) { + return mpage_writepage(page, get_block, wbc); + } + + /* Is the page fully outside i_size? (truncate in progress) */ + offset = i_size & (PAGE_CACHE_SIZE-1); + if (page->index >= end_index+1 || !offset) { + /* + * The page may have dirty, unmapped buffers. For example, + * they may have been added in ext3_writepage(). Make them + * freeable here, so the page does not leak. + */ +#if 0 + /* I am not really sure, if we need this */ + do_invalidatepage(page, 0); +#endif + unlock_page(page); + return 0; /* don't care */ + } + + /* + * The page straddles i_size. It must be zeroed out on each and every + * writepage invocation because it may be mmapped. "A file is mapped + * in multiples of the page size. For a file that is not a multiple of + * the page size, the remaining memory is zeroed when mapped, and + * writes to that region are not written out to the file." + */ + kaddr = kmap_atomic(page, KM_USER0); + memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset); + flush_dcache_page(page); + kunmap_atomic(kaddr, KM_USER0); + return mpage_writepage(page, get_block, wbc); +} +EXPORT_SYMBOL(nobh_write_page); + +/* * This function assumes that ->prepare_write() uses nobh_prepare_write(). */ int nobh_truncate_page(struct address_space *mapping, loff_t from) diff -Narup -X dontdiff linux-2.6.10/fs/ext2/inode.c linux-2.6.10.nobh/fs/ext2/inode.c --- linux-2.6.10/fs/ext2/inode.c 2004-12-24 13:33:51.000000000 -0800 +++ linux-2.6.10.nobh/fs/ext2/inode.c 2005-02-16 16:27:32.000000000 -0800 @@ -626,6 +626,12 @@ ext2_nobh_prepare_write(struct file *fil return nobh_prepare_write(page,from,to,ext2_get_block); } +static int ext2_nobh_writepage(struct page *page, + struct writeback_control *wbc) +{ + return nobh_write_page(page, ext2_get_block, wbc); +} + static sector_t ext2_bmap(struct address_space *mapping, sector_t block) { return generic_block_bmap(mapping,block,ext2_get_block); @@ -675,7 +681,7 @@ struct address_space_operations ext2_aop struct address_space_operations ext2_nobh_aops = { .readpage = ext2_readpage, .readpages = ext2_readpages, - .writepage = ext2_writepage, + .writepage = ext2_nobh_writepage, .sync_page = block_sync_page, .prepare_write = ext2_nobh_prepare_write, .commit_write = nobh_commit_write, diff -Narup -X dontdiff linux-2.6.10/fs/jfs/inode.c linux-2.6.10.nobh/fs/jfs/inode.c --- linux-2.6.10/fs/jfs/inode.c 2004-12-24 13:33:48.000000000 -0800 +++ linux-2.6.10.nobh/fs/jfs/inode.c 2005-02-16 16:27:42.000000000 -0800 @@ -281,7 +281,7 @@ static int jfs_get_block(struct inode *i static int jfs_writepage(struct page *page, struct writeback_control *wbc) { - return block_write_full_page(page, jfs_get_block, wbc); + return nobh_write_page(page, jfs_get_block, wbc); } static int jfs_writepages(struct address_space *mapping, diff -Narup -X dontdiff linux-2.6.10/fs/mpage.c linux-2.6.10.nobh/fs/mpage.c --- linux-2.6.10/fs/mpage.c 2004-12-24 13:34:26.000000000 -0800 +++ linux-2.6.10.nobh/fs/mpage.c 2005-02-16 16:23:19.000000000 -0800 @@ -386,7 +386,7 @@ EXPORT_SYMBOL(mpage_readpage); * just allocate full-size (16-page) BIOs. */ static struct bio * -mpage_writepage(struct bio *bio, struct page *page, get_block_t get_block, +__mpage_writepage(struct bio *bio, struct page *page, get_block_t get_block, sector_t *last_block_in_bio, int *ret, struct writeback_control *wbc) { struct address_space *mapping = page->mapping; @@ -706,7 +706,7 @@ retry: &mapping->flags); } } else { - bio = mpage_writepage(bio, page, get_block, + bio = __mpage_writepage(bio, page, get_block, &last_block_in_bio, &ret, wbc); } if (ret || (--(wbc->nr_to_write) <= 0)) @@ -734,4 +734,21 @@ retry: mpage_bio_submit(WRITE, bio); return ret; } + +int +mpage_writepage(struct page *page, get_block_t get_block, + struct writeback_control *wbc) +{ + int ret = 0; + struct bio *bio = NULL; + sector_t last_block_in_bio = 0; + + bio = __mpage_writepage(bio, page, get_block, + &last_block_in_bio, &ret, wbc); + if (bio) + mpage_bio_submit(WRITE, bio); + + return ret; +} + EXPORT_SYMBOL(mpage_writepages); diff -Narup -X dontdiff linux-2.6.10/include/linux/buffer_head.h linux-2.6.10.nobh/include/linux/buffer_head.h --- linux-2.6.10/include/linux/buffer_head.h 2004-12-24 13:33:49.000000000 -0800 +++ linux-2.6.10.nobh/include/linux/buffer_head.h 2005-02-16 16:22:51.000000000 -0800 @@ -203,6 +203,9 @@ int file_fsync(struct file *, struct den int nobh_prepare_write(struct page*, unsigned, unsigned, get_block_t*); int nobh_commit_write(struct file *, struct page *, unsigned, unsigned); int nobh_truncate_page(struct address_space *, loff_t); +int nobh_write_page(struct page *page, get_block_t *get_block, + struct writeback_control *wbc); + /* * inline definitions diff -Narup -X dontdiff linux-2.6.10/include/linux/mpage.h linux-2.6.10.nobh/include/linux/mpage.h --- linux-2.6.10/include/linux/mpage.h 2004-12-24 13:34:32.000000000 -0800 +++ linux-2.6.10.nobh/include/linux/mpage.h 2005-02-15 16:18:50.000000000 -0800 @@ -17,6 +17,8 @@ int mpage_readpages(struct address_space int mpage_readpage(struct page *page, get_block_t get_block); int mpage_writepages(struct address_space *mapping, struct writeback_control *wbc, get_block_t get_block); +int mpage_writepage(struct page *page, get_block_t *get_block, + struct writeback_control *wbc); static inline int generic_writepages(struct address_space *mapping, struct writeback_control *wbc)