On Fri, 2005-03-04 at 16:31, Andrew Morton wrote:
> Andrew Morton <[EMAIL PROTECTED]> wrote:
> >
> > page reclaim can come in, grab the page lock and
> > whip the page off the mapping.
> 
> No it can't - we hold an additional ref on the page, so reclaim will back
> off.  Still, it feels a bit flakey.
> 
> And we're not supposed to take lock_page() inside journal_start, because
> that's a ranking violation.  Probably i_sem will prevent an actual deadlock
> occurring, however.
> 

Here is the new patch to deal with locking, with more code :(

Thanks,
Badari


diff -Naurp -Xdontdiff linux-2.6.11/fs/ext3/inode.c linux-2.6.11.new/fs/ext3/inode.c
--- linux-2.6.11/fs/ext3/inode.c	2005-03-04 16:43:22.536143072 -0800
+++ linux-2.6.11.new/fs/ext3/inode.c	2005-03-04 17:43:16.719744112 -0800
@@ -20,6 +20,7 @@
  * 	([EMAIL PROTECTED])
  *
  *  Assorted race fixes, rewrite of ext3_get_block() by Al Viro, 2000
+ *  Add "nobh" support for ext3 writeback mode - [EMAIL PROTECTED]
  */
 
 #include <linux/module.h>
@@ -1016,7 +1017,10 @@ retry:
 		ret = PTR_ERR(handle);
 		goto out;
 	}
-	ret = block_prepare_write(page, from, to, ext3_get_block);
+	if (test_opt(inode->i_sb, NOBH))
+		ret = nobh_prepare_write(page, from, to, ext3_get_block);
+	else
+		ret = block_prepare_write(page, from, to, ext3_get_block);
 	if (ret)
 		goto prepare_write_failed;
 
@@ -1100,7 +1104,12 @@ static int ext3_writeback_commit_write(s
 	new_i_size = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
 	if (new_i_size > EXT3_I(inode)->i_disksize)
 		EXT3_I(inode)->i_disksize = new_i_size;
-	ret = generic_commit_write(file, page, from, to);
+
+	if (test_opt(inode->i_sb, NOBH))
+		ret = nobh_commit_write(file, page, from, to);
+	else
+		ret = generic_commit_write(file, page, from, to);
+
 	ret2 = ext3_journal_stop(handle);
 	if (!ret)
 		ret = ret2;
@@ -1385,7 +1394,11 @@ static int ext3_writeback_writepage(stru
 		goto out_fail;
 	}
 
-	ret = block_write_full_page(page, ext3_get_block, wbc);
+	if (test_opt(inode->i_sb, NOBH))
+		ret = nobh_writepage(page, ext3_get_block, wbc);
+	else
+		ret = block_write_full_page(page, ext3_get_block, wbc);
+
 	err = ext3_journal_stop(handle);
 	if (!ret)
 		ret = err;
@@ -1646,13 +1659,46 @@ static int ext3_block_truncate_page(hand
 	unsigned blocksize, iblock, length, pos;
 	struct inode *inode = mapping->host;
 	struct buffer_head *bh;
-	int err;
+	int err = 0;
 	void *kaddr;
 
 	blocksize = inode->i_sb->s_blocksize;
 	length = blocksize - (offset & (blocksize - 1));
 	iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
 
+	if (test_opt(inode->i_sb, NOBH) && !page_has_buffers(page)) {
+		if (!PageUptodate(page)) {
+			struct buffer_head map_bh;
+			bh = &map_bh;
+			bh->b_state = 0;
+			clear_buffer_mapped(bh);
+			ext3_get_block(inode, iblock, bh, 0);
+			if (!buffer_mapped(bh)) 
+				goto unlock;
+			err = -EIO;
+			set_bh_page(bh, page, 0);
+			bh->b_this_page = 0;
+			bh->b_size = 1 << inode->i_blkbits;
+			ll_rw_block(READ, 1, &bh);
+			wait_on_buffer(bh);
+			if (!buffer_uptodate(bh))
+				goto unlock;
+			SetPageMappedToDisk(page);
+		}
+		if (err == 0 && PageUptodate(page)) {
+			kaddr = kmap_atomic(page, KM_USER0);
+			memset(kaddr + offset, 0, length);
+			flush_dcache_page(page);
+			kunmap_atomic(kaddr, KM_USER0);
+			set_page_dirty(page);
+			goto unlock;
+		}
+		/* 
+		 * Well, we tried to work without buffers and failed.
+		 * Fallback to creating buffers
+		 */
+	}
+	
 	if (!page_has_buffers(page))
 		create_empty_buffers(page, blocksize, 0);
 
diff -Naurp -Xdontdiff linux-2.6.11/fs/ext3/super.c linux-2.6.11.new/fs/ext3/super.c
--- linux-2.6.11/fs/ext3/super.c	2005-03-01 23:38:38.000000000 -0800
+++ linux-2.6.11.new/fs/ext3/super.c	2005-03-04 16:45:22.038975880 -0800
@@ -576,7 +576,7 @@ enum {
 	Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro,
 	Opt_nouid32, Opt_check, Opt_nocheck, Opt_debug, Opt_oldalloc, Opt_orlov,
 	Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl,
-	Opt_reservation, Opt_noreservation, Opt_noload,
+	Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh,
 	Opt_commit, Opt_journal_update, Opt_journal_inum,
 	Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
 	Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
@@ -611,6 +611,7 @@ static match_table_t tokens = {
 	{Opt_reservation, "reservation"},
 	{Opt_noreservation, "noreservation"},
 	{Opt_noload, "noload"},
+	{Opt_nobh, "nobh"},
 	{Opt_commit, "commit=%u"},
 	{Opt_journal_update, "journal=update"},
 	{Opt_journal_inum, "journal=%u"},
@@ -924,6 +925,9 @@ clear_qf_name:
 			match_int(&args[0], &option);
 			*n_blocks_count = option;
 			break;
+		case Opt_nobh:
+			set_opt(sbi->s_mount_opt, NOBH);
+			break;
 		default:
 			printk (KERN_ERR
 				"EXT3-fs: Unrecognized mount option \"%s\" "
@@ -1563,6 +1567,19 @@ static int ext3_fill_super (struct super
 		break;
 	}
 
+	if (test_opt(sb, NOBH)) {
+		if (sb->s_blocksize_bits != PAGE_CACHE_SHIFT) {
+			printk(KERN_WARNING "EXT3-fs: Ignoring nobh option "
+				"since filesystem blocksize doesn't match "
+				"pagesize\n");
+			clear_opt(sbi->s_mount_opt, NOBH);
+		}
+		if (!(test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_WRITEBACK_DATA)) {
+			printk(KERN_WARNING "EXT3-fs: Ignoring nobh option - "
+				"its supported only with writeback mode\n");
+			clear_opt(sbi->s_mount_opt, NOBH);
+		}
+	}
 	/*
 	 * The journal_load will have done any necessary log recovery,
 	 * so we can safely mount the rest of the filesystem now.
diff -Naurp -Xdontdiff linux-2.6.11/include/linux/ext3_fs.h linux-2.6.11.new/include/linux/ext3_fs.h
--- linux-2.6.11/include/linux/ext3_fs.h	2005-03-01 23:38:10.000000000 -0800
+++ linux-2.6.11.new/include/linux/ext3_fs.h	2005-03-04 16:46:29.101780784 -0800
@@ -357,6 +357,7 @@ struct ext3_inode {
 #define EXT3_MOUNT_POSIX_ACL		0x08000	/* POSIX Access Control Lists */
 #define EXT3_MOUNT_RESERVATION		0x10000	/* Preallocation */
 #define EXT3_MOUNT_BARRIER		0x20000 /* Use block barriers */
+#define EXT3_MOUNT_NOBH			0x40000 /* No bufferheads */
 
 /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
 #ifndef _LINUX_EXT2_FS_H

Reply via email to