Re: [PATCH] nfs: fix congestion control -v3
On Sat, 20 Jan 2007, Peter Zijlstra wrote: > Subject: nfs: fix congestion control I am not sure if its too valuable since I have limited experience with NFS but it looks fine to me. Acked-by: Christoph Lameter <[EMAIL PROTECTED]> - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] nfs: fix congestion control -v3
On Sat, 2007-01-20 at 08:01 +0100, Peter Zijlstra wrote: > Subject: nfs: fix congestion control > > The current NFS client congestion logic is severly broken, it marks the > backing > device congested during each nfs_writepages() call but doesn't mirror this in > nfs_writepage() which makes for deadlocks. Also it implements its own > waitqueue. > > Replace this by a more regular congestion implementation that puts a cap on > the > number of active writeback pages and uses the bdi congestion waitqueue. > > Signed-off-by: Peter Zijlstra <[EMAIL PROTECTED]> > Cc: Trond Myklebust <[EMAIL PROTECTED]> > --- > fs/nfs/super.c |4 - > fs/nfs/sysctl.c |8 +++ > fs/nfs/write.c | 116 > > include/linux/backing-dev.h |1 > include/linux/nfs_fs.h |1 > include/linux/nfs_fs_sb.h |1 > mm/backing-dev.c| 16 ++ > 7 files changed, 104 insertions(+), 43 deletions(-) > > Index: linux-2.6-git/fs/nfs/write.c > === > --- linux-2.6-git.orig/fs/nfs/write.c 2007-01-20 07:20:10.0 +0100 > +++ linux-2.6-git/fs/nfs/write.c 2007-01-20 07:20:12.0 +0100 > @@ -89,8 +89,6 @@ static struct kmem_cache *nfs_wdata_cach > static mempool_t *nfs_wdata_mempool; > static mempool_t *nfs_commit_mempool; > > -static DECLARE_WAIT_QUEUE_HEAD(nfs_write_congestion); > - > struct nfs_write_data *nfs_commit_alloc(void) > { > struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, GFP_NOFS); > @@ -245,6 +243,39 @@ static int wb_priority(struct writeback_ > } > > /* > + * NFS congestion control > + */ > + > +int nfs_congestion_kb; > + > +#define NFS_CONGESTION_ON_THRESH (nfs_congestion_kb >> (PAGE_SHIFT-10)) > +#define NFS_CONGESTION_OFF_THRESH\ > + (NFS_CONGESTION_ON_THRESH - (NFS_CONGESTION_ON_THRESH >> 2)) > + > +static inline void nfs_set_page_writeback(struct page *page) > +{ > + if (!test_set_page_writeback(page)) { > + struct inode *inode = page->mapping->host; > + struct nfs_server *nfss = NFS_SERVER(inode); > + > + if (atomic_inc_return(&nfss->writeback) > > NFS_CONGESTION_ON_THRESH) > + set_bdi_congested(&nfss->backing_dev_info, WRITE); > + } > +} > + > +static inline void nfs_end_page_writeback(struct page *page) > +{ > + struct inode *inode = page->mapping->host; > + struct nfs_server *nfss = NFS_SERVER(inode); > + > + end_page_writeback(page); > + if (atomic_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH) { > + clear_bdi_congested(&nfss->backing_dev_info, WRITE); > + congestion_end(WRITE); > + } > +} > + > +/* > * Find an associated nfs write request, and prepare to flush it out > * Returns 1 if there was no write request, or if the request was > * already tagged by nfs_set_page_dirty.Returns 0 if the request > @@ -281,7 +312,7 @@ static int nfs_page_mark_flush(struct pa > spin_unlock(req_lock); > if (test_and_set_bit(PG_FLUSHING, &req->wb_flags) == 0) { > nfs_mark_request_dirty(req); > - set_page_writeback(page); > + nfs_set_page_writeback(page); > } > ret = test_bit(PG_NEED_FLUSH, &req->wb_flags); > nfs_unlock_request(req); > @@ -336,13 +367,8 @@ int nfs_writepage(struct page *page, str > return err; > } > > -/* > - * Note: causes nfs_update_request() to block on the assumption > - *that the writeback is generated due to memory pressure. > - */ > int nfs_writepages(struct address_space *mapping, struct writeback_control > *wbc) > { > - struct backing_dev_info *bdi = mapping->backing_dev_info; > struct inode *inode = mapping->host; > int err; > > @@ -351,11 +377,6 @@ int nfs_writepages(struct address_space > err = generic_writepages(mapping, wbc); > if (err) > return err; > - while (test_and_set_bit(BDI_write_congested, &bdi->state) != 0) { > - if (wbc->nonblocking) > - return 0; > - nfs_wait_on_write_congestion(mapping, 0); > - } > err = nfs_flush_mapping(mapping, wbc, wb_priority(wbc)); > if (err < 0) > goto out; > @@ -369,9 +390,6 @@ int nfs_writepages(struct address_space > if (err > 0) > err = 0; > out: > - clear_bit(BDI_write_congested, &bdi->state); > - wake_up_all(&nfs_write_congestion); > - congestion_end(WRITE); > return err; > } > > @@ -401,7 +419,7 @@ static int nfs_inode_add_request(struct > } > > /* > - * Insert a write request into an inode > + * Remove a write request from an inode > */ > static void nfs_inode_remove_request(struct nfs_page *req) > { > @@ -585,8 +603,8 @@ static inline int nfs_scan_commit(struct > > static int nfs_wait_on_write_congestion(struct address_space *mapping, int > int
[PATCH] nfs: fix congestion control -v3
Subject: nfs: fix congestion control The current NFS client congestion logic is severly broken, it marks the backing device congested during each nfs_writepages() call but doesn't mirror this in nfs_writepage() which makes for deadlocks. Also it implements its own waitqueue. Replace this by a more regular congestion implementation that puts a cap on the number of active writeback pages and uses the bdi congestion waitqueue. Signed-off-by: Peter Zijlstra <[EMAIL PROTECTED]> Cc: Trond Myklebust <[EMAIL PROTECTED]> --- fs/nfs/super.c |4 - fs/nfs/sysctl.c |8 +++ fs/nfs/write.c | 116 include/linux/backing-dev.h |1 include/linux/nfs_fs.h |1 include/linux/nfs_fs_sb.h |1 mm/backing-dev.c| 16 ++ 7 files changed, 104 insertions(+), 43 deletions(-) Index: linux-2.6-git/fs/nfs/write.c === --- linux-2.6-git.orig/fs/nfs/write.c 2007-01-20 07:20:10.0 +0100 +++ linux-2.6-git/fs/nfs/write.c2007-01-20 07:20:12.0 +0100 @@ -89,8 +89,6 @@ static struct kmem_cache *nfs_wdata_cach static mempool_t *nfs_wdata_mempool; static mempool_t *nfs_commit_mempool; -static DECLARE_WAIT_QUEUE_HEAD(nfs_write_congestion); - struct nfs_write_data *nfs_commit_alloc(void) { struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, GFP_NOFS); @@ -245,6 +243,39 @@ static int wb_priority(struct writeback_ } /* + * NFS congestion control + */ + +int nfs_congestion_kb; + +#define NFS_CONGESTION_ON_THRESH (nfs_congestion_kb >> (PAGE_SHIFT-10)) +#define NFS_CONGESTION_OFF_THRESH \ + (NFS_CONGESTION_ON_THRESH - (NFS_CONGESTION_ON_THRESH >> 2)) + +static inline void nfs_set_page_writeback(struct page *page) +{ + if (!test_set_page_writeback(page)) { + struct inode *inode = page->mapping->host; + struct nfs_server *nfss = NFS_SERVER(inode); + + if (atomic_inc_return(&nfss->writeback) > NFS_CONGESTION_ON_THRESH) + set_bdi_congested(&nfss->backing_dev_info, WRITE); + } +} + +static inline void nfs_end_page_writeback(struct page *page) +{ + struct inode *inode = page->mapping->host; + struct nfs_server *nfss = NFS_SERVER(inode); + + end_page_writeback(page); + if (atomic_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH) { + clear_bdi_congested(&nfss->backing_dev_info, WRITE); + congestion_end(WRITE); + } +} + +/* * Find an associated nfs write request, and prepare to flush it out * Returns 1 if there was no write request, or if the request was * already tagged by nfs_set_page_dirty.Returns 0 if the request @@ -281,7 +312,7 @@ static int nfs_page_mark_flush(struct pa spin_unlock(req_lock); if (test_and_set_bit(PG_FLUSHING, &req->wb_flags) == 0) { nfs_mark_request_dirty(req); - set_page_writeback(page); + nfs_set_page_writeback(page); } ret = test_bit(PG_NEED_FLUSH, &req->wb_flags); nfs_unlock_request(req); @@ -336,13 +367,8 @@ int nfs_writepage(struct page *page, str return err; } -/* - * Note: causes nfs_update_request() to block on the assumption - * that the writeback is generated due to memory pressure. - */ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) { - struct backing_dev_info *bdi = mapping->backing_dev_info; struct inode *inode = mapping->host; int err; @@ -351,11 +377,6 @@ int nfs_writepages(struct address_space err = generic_writepages(mapping, wbc); if (err) return err; - while (test_and_set_bit(BDI_write_congested, &bdi->state) != 0) { - if (wbc->nonblocking) - return 0; - nfs_wait_on_write_congestion(mapping, 0); - } err = nfs_flush_mapping(mapping, wbc, wb_priority(wbc)); if (err < 0) goto out; @@ -369,9 +390,6 @@ int nfs_writepages(struct address_space if (err > 0) err = 0; out: - clear_bit(BDI_write_congested, &bdi->state); - wake_up_all(&nfs_write_congestion); - congestion_end(WRITE); return err; } @@ -401,7 +419,7 @@ static int nfs_inode_add_request(struct } /* - * Insert a write request into an inode + * Remove a write request from an inode */ static void nfs_inode_remove_request(struct nfs_page *req) { @@ -585,8 +603,8 @@ static inline int nfs_scan_commit(struct static int nfs_wait_on_write_congestion(struct address_space *mapping, int intr) { + struct inode *inode = mapping->host; struct backing_dev_info *bdi = mapping->backing_dev_info; - DEFINE_WAIT(wait); int ret = 0; might_sleep(); @@ -594,31 +612,27 @@ static int nfs_wait_on_writ