Lars Marowsky-Bree <[EMAIL PROTECTED]> wrote: > > On 2005-03-04T01:44:06, Junfeng Yang <[EMAIL PROTECTED]> wrote: > > > > That would be a bug. Please send the e2fsck output. > > > > Here is the trace > > > > 1. file system is made with sbin/mkfs.ext2 -F -b 1024 /dev/hda9 60 > > and mounted with -o sync,dirsync > > > > 1. operations FiSC did: > > > > creat(/mnt/sbd0/0001) > > write(/mnt/sbd0/0001) > > rename(/mnt/sbd0/0001, /mnt/sbd0/0002) > > mkdir(/mnt/sbd0/0003) > > > > 2. FiSC "crashed" the test machine after mkdir returns. Crashed > > disk image can be downloaded at: http://fisc.stanford.edu/bug2/crash.img.bz2 > > I've run into similar issues. For example, a "touch foo" also isn't > synchronous with -o sync, but stays entirely in the cache. Andrea tells > me this is expected behaviour, so I've given up on this one... >
Why is that expected behaviour? I have vague memories which agree with that, but I cannot remember the reasoning. >From a quick parse, ext2 seems to be full of MS_SYNCHRONOUS holes, and there might be some O_SYNC ones there as well. Problem is, it's subtle because we try to defer I/O until the last stage, to avoid doing extra I/O. So this wild scattergun patch probably does extra work and possibly extra I/O all over the place, but I'd be interested if Junfeng could give it a quick test. It's against 2.6.11. A real patch would take some painstaking work. diff -puN fs/ext2/balloc.c~ext2-sync-fix fs/ext2/balloc.c --- 25/fs/ext2/balloc.c~ext2-sync-fix 2005-03-04 02:49:00.000000000 -0800 +++ 25-akpm/fs/ext2/balloc.c 2005-03-04 02:49:00.000000000 -0800 @@ -139,8 +139,9 @@ static void release_blocks(struct super_ } } -static int group_reserve_blocks(struct ext2_sb_info *sbi, int group_no, - struct ext2_group_desc *desc, struct buffer_head *bh, int count) +static int group_reserve_blocks(struct super_block *sb, + struct ext2_sb_info *sbi, int group_no, struct ext2_group_desc *desc, + struct buffer_head *bh, int count) { unsigned free_blocks; @@ -154,6 +155,8 @@ static int group_reserve_blocks(struct e desc->bg_free_blocks_count = cpu_to_le16(free_blocks - count); spin_unlock(sb_bgl_lock(sbi, group_no)); mark_buffer_dirty(bh); + if (sb->s_flags & MS_SYNCHRONOUS) + sync_dirty_buffer(bh); return count; } @@ -170,6 +173,8 @@ static void group_release_blocks(struct spin_unlock(sb_bgl_lock(sbi, group_no)); sb->s_dirt = 1; mark_buffer_dirty(bh); + if (sb->s_flags & MS_SYNCHRONOUS) + sync_dirty_buffer(bh); } } @@ -377,7 +382,7 @@ int ext2_new_block(struct inode *inode, goto io_error; } - group_alloc = group_reserve_blocks(sbi, group_no, desc, + group_alloc = group_reserve_blocks(sb, sbi, group_no, desc, gdp_bh, es_alloc); if (group_alloc) { ret_block = ((goal - le32_to_cpu(es->s_first_data_block)) % @@ -413,7 +418,7 @@ retry: desc = ext2_get_group_desc(sb, group_no, &gdp_bh); if (!desc) goto io_error; - group_alloc = group_reserve_blocks(sbi, group_no, desc, + group_alloc = group_reserve_blocks(sb, sbi, group_no, desc, gdp_bh, es_alloc); } if (!group_alloc) { diff -puN fs/ext2/ialloc.c~ext2-sync-fix fs/ext2/ialloc.c --- 25/fs/ext2/ialloc.c~ext2-sync-fix 2005-03-04 02:49:00.000000000 -0800 +++ 25-akpm/fs/ext2/ialloc.c 2005-03-04 02:54:13.000000000 -0800 @@ -86,6 +86,8 @@ static void ext2_release_inode(struct su percpu_counter_dec(&EXT2_SB(sb)->s_dirs_counter); sb->s_dirt = 1; mark_buffer_dirty(bh); + if (sb->s_flags & MS_SYNCHRONOUS) + sync_dirty_buffer(bh); } /* @@ -563,6 +565,8 @@ got: sb->s_dirt = 1; mark_buffer_dirty(bh2); + if (sb->s_flags & MS_SYNCHRONOUS) + sync_dirty_buffer(bh2); inode->i_uid = current->fsuid; if (test_opt (sb, GRPID)) inode->i_gid = dir->i_gid; @@ -614,7 +618,7 @@ got: DQUOT_FREE_INODE(inode); goto fail2; } - mark_inode_dirty(inode); + ext2_mark_inode_dirty(inode); ext2_debug("allocating inode %lu\n", inode->i_ino); ext2_preread_inode(inode); return inode; diff -puN fs/ext2/super.c~ext2-sync-fix fs/ext2/super.c --- 25/fs/ext2/super.c~ext2-sync-fix 2005-03-04 02:49:00.000000000 -0800 +++ 25-akpm/fs/ext2/super.c 2005-03-04 02:49:00.000000000 -0800 @@ -1097,6 +1097,8 @@ static ssize_t ext2_quota_write(struct s set_buffer_uptodate(bh); mark_buffer_dirty(bh); unlock_buffer(bh); + if (sb->s_flags & MS_SYNCHRONOUS) + sync_dirty_buffer(bh); brelse(bh); offset = 0; towrite -= tocopy; @@ -1110,8 +1112,8 @@ out: i_size_write(inode, off+len-towrite); inode->i_version++; inode->i_mtime = inode->i_ctime = CURRENT_TIME; - mark_inode_dirty(inode); up(&inode->i_sem); + ext2_mark_inode_dirty(inode); return len - towrite; } diff -puN fs/ext2/xattr.c~ext2-sync-fix fs/ext2/xattr.c --- 25/fs/ext2/xattr.c~ext2-sync-fix 2005-03-04 02:49:00.000000000 -0800 +++ 25-akpm/fs/ext2/xattr.c 2005-03-04 02:49:00.000000000 -0800 @@ -348,6 +348,8 @@ static void ext2_xattr_update_super_bloc sb->s_dirt = 1; mark_buffer_dirty(EXT2_SB(sb)->s_sbh); unlock_super(sb); + if (sb->s_flags & MS_SYNCHRONOUS) + sync_dirty_buffer(EXT2_SB(sb)->s_sbh); } /* diff -puN fs/ext2/dir.c~ext2-sync-fix fs/ext2/dir.c --- 25/fs/ext2/dir.c~ext2-sync-fix 2005-03-04 02:49:00.000000000 -0800 +++ 25-akpm/fs/ext2/dir.c 2005-03-04 02:49:00.000000000 -0800 @@ -428,7 +428,7 @@ void ext2_set_link(struct inode *dir, st ext2_put_page(page); dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC; EXT2_I(dir)->i_flags &= ~EXT2_BTREE_FL; - mark_inode_dirty(dir); + ext2_mark_inode_dirty(dir); } /* @@ -518,7 +518,7 @@ got_it: err = ext2_commit_chunk(page, from, to); dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC; EXT2_I(dir)->i_flags &= ~EXT2_BTREE_FL; - mark_inode_dirty(dir); + ext2_mark_inode_dirty(dir); /* OFFSET_CACHE */ out_put: ext2_put_page(page); @@ -566,7 +566,7 @@ int ext2_delete_entry (struct ext2_dir_e err = ext2_commit_chunk(page, from, to); inode->i_ctime = inode->i_mtime = CURRENT_TIME_SEC; EXT2_I(inode)->i_flags &= ~EXT2_BTREE_FL; - mark_inode_dirty(inode); + ext2_mark_inode_dirty(inode); out: ext2_put_page(page); return err; diff -puN fs/ext2/inode.c~ext2-sync-fix fs/ext2/inode.c --- 25/fs/ext2/inode.c~ext2-sync-fix 2005-03-04 02:49:00.000000000 -0800 +++ 25-akpm/fs/ext2/inode.c 2005-03-04 02:49:00.000000000 -0800 @@ -41,6 +41,17 @@ MODULE_LICENSE("GPL"); static int ext2_update_inode(struct inode * inode, int do_sync); /* + * dirty an ext2 inode and sync it if needed + */ +int ext2_mark_inode_dirty(struct inode *inode) +{ + mark_inode_dirty(inode); + if (inode_needs_sync(inode)) + return ext2_update_inode(inode, 1); + return 0; +} + +/* * Test whether an inode is a fast symlink. */ static inline int ext2_inode_is_fast_symlink(struct inode *inode) @@ -60,8 +71,7 @@ void ext2_delete_inode (struct inode * i if (is_bad_inode(inode)) goto no_delete; EXT2_I(inode)->i_dtime = get_seconds(); - mark_inode_dirty(inode); - ext2_update_inode(inode, inode_needs_sync(inode)); + ext2_mark_inode_dirty(inode); inode->i_size = 0; if (inode->i_blocks) diff -puN fs/ext2/acl.c~ext2-sync-fix fs/ext2/acl.c diff -puN fs/ext2/ioctl.c~ext2-sync-fix fs/ext2/ioctl.c --- 25/fs/ext2/ioctl.c~ext2-sync-fix 2005-03-04 02:49:00.000000000 -0800 +++ 25-akpm/fs/ext2/ioctl.c 2005-03-04 02:49:00.000000000 -0800 @@ -60,7 +60,7 @@ int ext2_ioctl (struct inode * inode, st ext2_set_inode_flags(inode); inode->i_ctime = CURRENT_TIME_SEC; - mark_inode_dirty(inode); + ext2_mark_inode_dirty(inode); return 0; } case EXT2_IOC_GETVERSION: @@ -73,7 +73,7 @@ int ext2_ioctl (struct inode * inode, st if (get_user(inode->i_generation, (int __user *) arg)) return -EFAULT; inode->i_ctime = CURRENT_TIME_SEC; - mark_inode_dirty(inode); + ext2_mark_inode_dirty(inode); return 0; default: return -ENOTTY; diff -puN fs/ext2/namei.c~ext2-sync-fix fs/ext2/namei.c --- 25/fs/ext2/namei.c~ext2-sync-fix 2005-03-04 02:49:00.000000000 -0800 +++ 25-akpm/fs/ext2/namei.c 2005-03-04 02:55:15.000000000 -0800 @@ -132,7 +132,7 @@ static int ext2_create (struct inode * d inode->i_mapping->a_ops = &ext2_nobh_aops; else inode->i_mapping->a_ops = &ext2_aops; - mark_inode_dirty(inode); + ext2_mark_inode_dirty(inode); err = ext2_add_nondir(dentry, inode); } return err; @@ -153,7 +153,7 @@ static int ext2_mknod (struct inode * di #ifdef CONFIG_EXT2_FS_XATTR inode->i_op = &ext2_special_inode_operations; #endif - mark_inode_dirty(inode); + ext2_mark_inode_dirty(inode); err = ext2_add_nondir(dentry, inode); } return err; @@ -191,7 +191,7 @@ static int ext2_symlink (struct inode * memcpy((char*)(EXT2_I(inode)->i_data),symname,l); inode->i_size = l-1; } - mark_inode_dirty(inode); + ext2_mark_inode_dirty(inode); err = ext2_add_nondir(dentry, inode); out: diff -puN fs/ext2/ext2.h~ext2-sync-fix fs/ext2/ext2.h --- 25/fs/ext2/ext2.h~ext2-sync-fix 2005-03-04 02:49:00.000000000 -0800 +++ 25-akpm/fs/ext2/ext2.h 2005-03-04 02:49:00.000000000 -0800 @@ -116,6 +116,7 @@ extern unsigned long ext2_count_free (st /* inode.c */ extern void ext2_read_inode (struct inode *); extern int ext2_write_inode (struct inode *, int); +int ext2_mark_inode_dirty(struct inode *inode); extern void ext2_delete_inode (struct inode *); extern int ext2_sync_inode (struct inode *); extern void ext2_discard_prealloc (struct inode *); _ - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/