I'm cleaning up various bits and pieces of the buffer layer. Part of this is the removal of inode.i_dirty_data_buffers, writeout_one_page(), waitfor_one_page() and generic_buffer_fdatasync().
i_dirty_data_buffers is the problem. It's no longer used with my buffer-layer changes - all dirty buffers have their pages marked dirty and they are no longer attached to i_dirty_data_buffers(). If you want to write back all of an inode's pages, you run filemap_fdatasync against i_mapping. But there's a snag with JFS. It locks the page which the metapage owns, so filemap_fdatasync deadlocks. It's not clear to me that the pages actually need to be locked. This would never have protected them from I/O, because their buffers are on the dirty buffer LRU and are eligible for writeback at any time. So what I've done is to not lock those pages. ->count is elevated, so they won't be disappearing. It works OK, but the deadlock problem is preventing much testing. My current patch series is at http://www.zip.com.au/~akpm/linux/patches/2.5/2.5.9/ Below is the JFS part. I'd really appreciate some help in sorting this out. Thanks. --- linux-2.5.9/fs/jfs/file.c Sun Apr 14 15:45:07 2002 +++ 25/fs/jfs/file.c Wed Apr 24 00:49:44 2002 @@ -33,8 +33,6 @@ int jfs_fsync(struct file *file, struct struct inode *inode = dentry->d_inode; int rc = 0; - rc = fsync_inode_data_buffers(inode); - if (!(inode->i_state & I_DIRTY)) return rc; if (datasync || !(inode->i_state & I_DIRTY_DATASYNC)) --- linux-2.5.9/fs/jfs/jfs_dmap.c Sun Apr 14 15:45:07 2002 +++ 25/fs/jfs/jfs_dmap.c Wed Apr 24 00:49:44 2002 @@ -325,7 +325,8 @@ int dbSync(struct inode *ipbmap) /* * write out dirty pages of bmap */ - fsync_inode_data_buffers(ipbmap); + filemap_fdatasync(ipbmap->i_mapping); + filemap_fdatawait(ipbmap->i_mapping); ipbmap->i_state |= I_DIRTY; diWriteSpecial(ipbmap); --- linux-2.5.9/fs/jfs/jfs_imap.c Sun Apr 14 15:45:07 2002 +++ 25/fs/jfs/jfs_imap.c Wed Apr 24 00:49:44 2002 @@ -282,7 +282,8 @@ int diSync(struct inode *ipimap) /* * write out dirty pages of imap */ - fsync_inode_data_buffers(ipimap); + filemap_fdatasync(ipimap->i_mapping); + filemap_fdatawait(ipimap->i_mapping); diWriteSpecial(ipimap); @@ -607,7 +608,8 @@ void diFreeSpecial(struct inode *ip) jERROR(1, ("diFreeSpecial called with NULL ip!\n")); return; } - fsync_inode_data_buffers(ip); + filemap_fdatasync(ip->i_mapping); + filemap_fdatawait(ip->i_mapping); truncate_inode_pages(ip->i_mapping, 0); iput(ip); } --- linux-2.5.9/fs/jfs/jfs_logmgr.c Sun Apr 14 15:45:07 2002 +++ 25/fs/jfs/jfs_logmgr.c Wed Apr 24 00:49:44 2002 @@ -966,9 +966,21 @@ int lmLogSync(log_t * log, int nosyncwai * We need to make sure all of the "written" metapages * actually make it to disk */ - fsync_inode_data_buffers(sbi->ipbmap); - fsync_inode_data_buffers(sbi->ipimap); - fsync_inode_data_buffers(sbi->direct_inode); + /* + * NOTE! It's more efficient to perform the three + * fdatasyncs and then the three fdatawaits, rather + * than sync/wait/sync/wait/sync/wait. If there are + * no ordering requirements here, then it's recommended. + * [EMAIL PROTECTED] + */ + filemap_fdatasync(sbi->ipbmap->i_mapping); + filemap_fdatawait(sbi->ipbmap->i_mapping); + + filemap_fdatasync(sbi->ipimap->i_mapping); + filemap_fdatawait(sbi->ipimap->i_mapping); + + filemap_fdatasync(sbi->direct_inode->i_mapping); + filemap_fdatawait(sbi->direct_inode->i_mapping); lrd.logtid = 0; lrd.backchain = 0; --- linux-2.5.9/fs/jfs/jfs_metapage.c Mon Apr 22 16:41:03 2002 +++ 25/fs/jfs/jfs_metapage.c Wed Apr 24 00:49:44 2002 @@ -349,6 +349,9 @@ metapage_t *__get_metapage(struct inode page_index = lblock >> l2BlocksPerPage; page_offset = (lblock - (page_index << l2BlocksPerPage)) << l2bsize; + /* + * AKPM: s/PAGE_SIZE/PAGE_CACHE_SIZE/ ? + */ if ((page_offset + size) > PAGE_SIZE) { spin_unlock(&meta_lock); jERROR(1, ("MetaData crosses page boundary!!\n")); @@ -394,8 +397,10 @@ metapage_t *__get_metapage(struct inode __free_metapage(mp); spin_unlock(&meta_lock); return NULL; - } else + } else { INCREMENT(mpStat.pagealloc); + unlock_page(mp->page); + } } else { jFYI(1, ("__get_metapage: Calling read_cache_page\n")); @@ -412,7 +417,6 @@ metapage_t *__get_metapage(struct inode return NULL; } else INCREMENT(mpStat.pagealloc); - lock_page(mp->page); } mp->data = (void *) (kmap(mp->page) + page_offset); } @@ -459,6 +463,7 @@ static void __write_metapage(metapage_t page_offset = (mp->index - (page_index << l2BlocksPerPage)) << l2bsize; + lock_page(mp->page); rc = mp->mapping->a_ops->prepare_write(NULL, mp->page, page_offset, page_offset + mp->logical_size); @@ -466,6 +471,7 @@ static void __write_metapage(metapage_t jERROR(1, ("prepare_write return %d!\n", rc)); ClearPageUptodate(mp->page); kunmap(mp->page); + unlock_page(mp->page); clear_bit(META_dirty, &mp->flag); return; } @@ -476,6 +482,7 @@ static void __write_metapage(metapage_t jERROR(1, ("commit_write returned %d\n", rc)); } + unlock_page(mp->page); clear_bit(META_dirty, &mp->flag); jFYI(1, ("__write_metapage done\n")); @@ -489,12 +496,10 @@ static inline void sync_metapage(metapag lock_page(page); /* we're done with this page - no need to check for errors */ - if (page_has_buffers(page)) { - writeout_one_page(page); - waitfor_one_page(page); - } - - UnlockPage(page); + if (page_has_buffers(page)) + write_one_page(page, 1); + else + unlock_page(page); page_cache_release(page); } @@ -527,7 +532,6 @@ void release_metapage(metapage_t * mp) mp->data = 0; if (test_bit(META_dirty, &mp->flag)) __write_metapage(mp); - UnlockPage(mp->page); if (test_bit(META_sync, &mp->flag)) { sync_metapage(mp); clear_bit(META_sync, &mp->flag); @@ -536,7 +540,7 @@ void release_metapage(metapage_t * mp) if (test_bit(META_discard, &mp->flag)) { lock_page(mp->page); block_flushpage(mp->page, 0); - UnlockPage(mp->page); + unlock_page(mp->page); } page_cache_release(mp->page); @@ -587,13 +591,15 @@ void invalidate_metapages(struct inode * /* * If in the metapage cache, we've got the page locked */ + lock_page(mp->page); block_flushpage(mp->page, 0); + unlock_page(mp->page); } else { spin_unlock(&meta_lock); page = find_lock_page(mapping, lblock>>l2BlocksPerPage); if (page) { block_flushpage(page, 0); - UnlockPage(page); + unlock_page(page); } } } @@ -610,7 +616,6 @@ void invalidate_inode_metapages(struct i clear_bit(META_dirty, &mp->flag); set_bit(META_discard, &mp->flag); kunmap(mp->page); - UnlockPage(mp->page); page_cache_release(mp->page); INCREMENT(mpStat.pagefree); mp->data = 0; --- linux-2.5.9/fs/jfs/jfs_txnmgr.c Sun Apr 14 15:45:07 2002 +++ 25/fs/jfs/jfs_txnmgr.c Wed Apr 24 00:49:44 2002 @@ -1163,8 +1163,10 @@ int txCommit(tid_t tid, /* transaction * committing transactions and use i_sem instead. */ if ((!S_ISDIR(ip->i_mode)) - && (tblk->flag & COMMIT_DELETE) == 0) - fsync_inode_data_buffers(ip); + && (tblk->flag & COMMIT_DELETE) == 0) { + filemap_fdatasync(ip->i_mapping); + filemap_fdatawait(ip->i_mapping); + } /* * Mark inode as not dirty. It will still be on the dirty --- linux-2.5.9/fs/jfs/namei.c Sun Apr 14 15:45:07 2002 +++ 25/fs/jfs/namei.c Wed Apr 24 00:49:45 2002 @@ -969,7 +969,7 @@ int jfs_symlink(struct inode *dip, struc memcpy(mp->data, name, copy_size); flush_metapage(mp); #if 0 - mark_buffer_uptodate(bp, 1); + set_buffer_uptodate(bp); mark_buffer_dirty(bp, 1); if (IS_SYNC(dip)) { ll_rw_block(WRITE, 1, &bp); --- linux-2.5.9/fs/jfs/super.c Sun Apr 14 15:45:07 2002 +++ 25/fs/jfs/super.c Wed Apr 24 00:49:44 2002 @@ -151,7 +151,12 @@ static void jfs_put_super(struct super_b * We need to clean out the direct_inode pages since this inode * is not in the inode hash. */ - fsync_inode_data_buffers(sbi->direct_inode); + /* + * Is this right? Should we writeback sbi->direct_mapping instead? + * - [EMAIL PROTECTED] + */ + filemap_fdatasync(sbi->direct_inode->i_mapping); + filemap_fdatawait(sbi->direct_inode->i_mapping); truncate_inode_pages(sbi->direct_mapping, 0); iput(sbi->direct_inode); sbi->direct_inode = NULL; @@ -337,7 +342,8 @@ out_no_rw: jERROR(1, ("jfs_umount failed with return code %d\n", rc)); } out_mount_failed: - fsync_inode_data_buffers(sbi->direct_inode); + filemap_fdatasync(sbi->direct_inode->i_mapping); + filemap_fdatawait(sbi->direct_inode->i_mapping); truncate_inode_pages(sbi->direct_mapping, 0); make_bad_inode(sbi->direct_inode); iput(sbi->direct_inode); _______________________________________________ Jfs-discussion mailing list [EMAIL PROTECTED] http://www-124.ibm.com/developerworks/oss/mailman/listinfo/jfs-discussion