On Mon, Feb 14, 2005 at 03:02:56PM -0500, Sonny Rao wrote:
> On Fri, Feb 11, 2005 at 04:09:46PM -0800, Badari Pulavarty wrote:
> > On Fri, 2005-02-11 at 15:58, Andrew Morton wrote:
> > > Badari Pulavarty <[EMAIL PROTECTED]> wrote:
> > > >
> > > > Due to lack of interesting suggestions to solve
> > > > mpage_writepages() -> ext3_writeback_writepage() problem,
> > > > I fixed it in the dumbest possible way.
> > >
> > > I've actually forgotten what the problem was. It was 100 patches ago :(
> >
> > The problem was
> > ext3_writeback_writepages() -> mpage_writepages() could
> > call back ext3_writeback_writepage() in the "confused" case.
> > ext3_writeback_writepage() could end up doing nothing, since the
> > we already have a journal handle.
> >
> > I added a writepage_helper to handle this case.
> >
> > >
> > > > Let me know, what you think.
> > >
> > >
> > > If it works, let's get some benchmark numbers so we can decide whether it
> > > justifies more development?
> > >
> >
> > Yep. I will get some numbers to see ..
> >
>
> I'm helping Badari collecting data on this...
>
> My setup is a P4 2.0Ghz booted with 1GB of RAM and 1 cpu attached via
> Fiber to a seven disk raid0 array with write-caching turned off
> (write-cacheing can skew numbers significantly if you aren't careful
> to let the cache drain between runs, etc.)
>
> The test is a single-threaded sequential overwrite of a 20GB data set
> divided into 512MB files which are selected randomly and overwritten.
>
> All of these numbers represent the average of three five-minute runs.
>
> All ext3 tests are in writeback mode
>
> FS Throughput Cpu Utilizaiton
> -- ---------- ---------------
> Ext3 78 MB/sec 75.9 %
> Ext3 + wpages 85 MB/sec 74.7 %
>
> Just for comparison:
>
> Ext2 88.5 MB/sec 74.2 %
> Ext2 + nobh 89.6 MB/sec 71.7 %
>
> JFS 94.8 MB/sec 85.6 %
> XFS 100 MB/sec 95.5 %
>
>
> So, Badari's writepages patch improves performance on this particular
> setup by almost 10 %
>
>
> I can rerun with more processors/ram, or different disk configurations
> if anyone is interested.
>
> Sonny
One other detail,
I had to fix the patch, for some reason it was malformed ?
I've attached my version of the patch.
Sonny
diff -Naurp linux-2.6.10-orig/fs/ext3/inode.c linux-2.6.10/fs/ext3/inode.c
--- linux-2.6.10-orig/fs/ext3/inode.c 2005-01-21 00:51:27.000000000 -0600
+++ linux-2.6.10/fs/ext3/inode.c 2005-02-14 11:36:52.387332295 -0600
@@ -856,6 +856,12 @@ get_block:
return ret;
}
+static int ext3_writepages_get_block(struct inode *inode, sector_t iblock,
+ struct buffer_head *bh, int create)
+{
+ return ext3_direct_io_get_blocks(inode, iblock, 1, bh, create);
+}
+
/*
* `handle' can be NULL if create is zero
*/
@@ -1321,6 +1327,44 @@ out_fail:
return ret;
}
+static int ext3_writeback_writepage_helper(struct page *page,
+ struct writeback_control *wbc)
+{
+ return block_write_full_page(page, ext3_get_block, wbc);
+}
+
+static int
+ext3_writeback_writepages(struct address_space *mapping,
+ struct writeback_control *wbc)
+{
+ struct inode *inode = mapping->host;
+ handle_t *handle = NULL;
+ int err, ret = 0;
+
+ if (!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
+ return ret;
+
+ handle = ext3_journal_start(inode, ext3_writepage_trans_blocks(inode));
+ if (IS_ERR(handle)) {
+ ret = PTR_ERR(handle);
+ return ret;
+ }
+
+ ret = __mpage_writepages(mapping, wbc, ext3_writepages_get_block,
+ ext3_writeback_writepage_helper);
+
+ /*
+ * Need to reaquire the handle since ext3_writepages_get_block()
+ * can restart the handle
+ */
+ handle = journal_current_handle();
+
+ err = ext3_journal_stop(handle);
+ if (!ret)
+ ret = err;
+ return ret;
+}
+
static int ext3_writeback_writepage(struct page *page,
struct writeback_control *wbc)
{
@@ -1552,6 +1596,7 @@ static struct address_space_operations e
.readpage = ext3_readpage,
.readpages = ext3_readpages,
.writepage = ext3_writeback_writepage,
+ .writepages = ext3_writeback_writepages,
.sync_page = block_sync_page,
.prepare_write = ext3_prepare_write,
.commit_write = ext3_writeback_commit_write,
diff -Naurp linux-2.6.10-orig/fs/mpage.c linux-2.6.10/fs/mpage.c
--- linux-2.6.10-orig/fs/mpage.c 2004-10-18 16:53:43.000000000 -0500
+++ linux-2.6.10/fs/mpage.c 2005-02-14 11:36:52.383332918 -0600
@@ -387,7 +387,8 @@ EXPORT_SYMBOL(mpage_readpage);
*/
static struct bio *
mpage_writepage(struct bio *bio, struct page *page, get_block_t get_block,
- sector_t *last_block_in_bio, int *ret, struct writeback_control *wbc)
+ sector_t *last_block_in_bio, int *ret, struct writeback_control *wbc,
+ writepage_t writepage_helper)
{
struct address_space *mapping = page->mapping;
struct inode *inode = page->mapping->host;
@@ -580,7 +581,7 @@ alloc_new:
confused:
if (bio)
bio = mpage_bio_submit(WRITE, bio);
- *ret = page->mapping->a_ops->writepage(page, wbc);
+ *ret = writepage_helper(page, wbc);
/*
* The caller has a ref on the inode, so *mapping is stable
*/
@@ -619,6 +620,15 @@ int
mpage_writepages(struct address_space *mapping,
struct writeback_control *wbc, get_block_t get_block)
{
+ return __mpage_writepages(mapping, wbc, get_block,
+ mapping->a_ops->writepage);
+}
+
+int
+__mpage_writepages(struct address_space *mapping,
+ struct writeback_control *wbc, get_block_t get_block,
+ writepage_t writepage_helper)
+{
struct backing_dev_info *bdi = mapping->backing_dev_info;
struct bio *bio = NULL;
sector_t last_block_in_bio = 0;
@@ -707,7 +717,8 @@ retry:
}
} else {
bio = mpage_writepage(bio, page, get_block,
- &last_block_in_bio, &ret, wbc);
+ &last_block_in_bio, &ret, wbc,
+ writepage_helper);
}
if (ret || (--(wbc->nr_to_write) <= 0))
done = 1;
@@ -735,3 +746,4 @@ retry:
return ret;
}
EXPORT_SYMBOL(mpage_writepages);
+EXPORT_SYMBOL(__mpage_writepages);
diff -Naurp linux-2.6.10-orig/include/linux/fs.h linux-2.6.10/include/linux/fs.h
--- linux-2.6.10-orig/include/linux/fs.h 2005-01-21 00:51:40.000000000
-0600
+++ linux-2.6.10/include/linux/fs.h 2005-02-14 11:36:19.214499355 -0600
@@ -27,6 +27,7 @@ struct poll_table_struct;
struct kstatfs;
struct vm_area_struct;
struct vfsmount;
+struct writeback_control;
/*
* It's silly to have NR_OPEN bigger than NR_FILE, but you can change
@@ -244,6 +245,7 @@ typedef int (get_blocks_t)(struct inode
struct buffer_head *bh_result, int create);
typedef void (dio_iodone_t)(struct inode *inode, loff_t offset,
ssize_t bytes, void *private);
+typedef int (writepage_t)(struct page *page, struct writeback_control *wbc);
/*
* Attribute flags. These should be or-ed together to figure out what
diff -Naurp linux-2.6.10-orig/include/linux/mpage.h
linux-2.6.10/include/linux/mpage.h
--- linux-2.6.10-orig/include/linux/mpage.h 2004-10-18 16:53:51.000000000
-0500
+++ linux-2.6.10/include/linux/mpage.h 2005-02-14 11:36:52.381333229 -0600
@@ -17,6 +17,9 @@ int mpage_readpages(struct address_space
int mpage_readpage(struct page *page, get_block_t get_block);
int mpage_writepages(struct address_space *mapping,
struct writeback_control *wbc, get_block_t get_block);
+int __mpage_writepages(struct address_space *mapping,
+ struct writeback_control *wbc, get_block_t get_block,
+ writepage_t writepage);
static inline int
generic_writepages(struct address_space *mapping, struct writeback_control
*wbc)