Re: [f2fs-dev] [PATCH] f2fs: fix to recover inode's uid/gid during POR
On Fri, Sep 21, 2018 at 02:20:49PM +0800, Chao Yu wrote: > No problem, I just plan to. > > Out of curiosity, are you filter some key words like 'recover' in your > email client to find this so quickly? No, I just do a quick glanceof lkml every morning, and this just caught my eye. ___ Linux-f2fs-devel mailing list Linux-f2fs-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
Re: [f2fs-dev] [PATCH] f2fs: fix to recover inode's uid/gid during POR
On Thu, Sep 20, 2018 at 05:41:30PM +0800, Chao Yu wrote: > Step to reproduce this bug: > 1. logon as root > 2. mount -t f2fs /dev/sdd /mnt; > 3. touch /mnt/file; > 4. chown system /mnt/file; chgrp system /mnt/file; > 5. xfs_io -f /mnt/file -c "fsync"; > 6. godown /mnt; > 7. umount /mnt; > 8. mount -t f2fs /dev/sdd /mnt; Please wire this up for xfstests (as a generic test). ___ Linux-f2fs-devel mailing list Linux-f2fs-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
Re: [f2fs-dev] [PATCH] f2fs: fix to recover inode's uid/gid during POR
Hi Christoph, On 2018/9/21 14:06, Christoph Hellwig wrote: > On Thu, Sep 20, 2018 at 05:41:30PM +0800, Chao Yu wrote: >> Step to reproduce this bug: >> 1. logon as root >> 2. mount -t f2fs /dev/sdd /mnt; >> 3. touch /mnt/file; >> 4. chown system /mnt/file; chgrp system /mnt/file; >> 5. xfs_io -f /mnt/file -c "fsync"; >> 6. godown /mnt; >> 7. umount /mnt; >> 8. mount -t f2fs /dev/sdd /mnt; > > Please wire this up for xfstests (as a generic test). No problem, I just plan to. Out of curiosity, are you filter some key words like 'recover' in your email client to find this so quickly? Thanks, > > ___ Linux-f2fs-devel mailing list Linux-f2fs-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
Re: [f2fs-dev] [PATCH 4/5] f2fs-tools: introduce sb checksum
On 2018/9/21 5:38, Jaegeuk Kim wrote: > On 09/20, Junling Zheng wrote: >> Hi, Jaegeuk >> >> On 2018/9/20 7:35, Jaegeuk Kim wrote: >>> On 09/19, Junling Zheng wrote: This patch introduced crc for superblock. Signed-off-by: Junling Zheng --- fsck/mount.c | 33 + fsck/resize.c | 12 ++-- include/f2fs_fs.h | 6 +- mkfs/f2fs_format.c | 8 4 files changed, 52 insertions(+), 7 deletions(-) diff --git a/fsck/mount.c b/fsck/mount.c index 74ff7c6..9019921 100644 --- a/fsck/mount.c +++ b/fsck/mount.c @@ -340,6 +340,7 @@ void print_raw_sb_info(struct f2fs_super_block *sb) DISP_u32(sb, node_ino); DISP_u32(sb, meta_ino); DISP_u32(sb, cp_payload); + DISP_u32(sb, crc); DISP("%-.256s", sb, version); printf("\n"); } @@ -467,6 +468,9 @@ void print_sb_state(struct f2fs_super_block *sb) if (f & cpu_to_le32(F2FS_FEATURE_LOST_FOUND)) { MSG(0, "%s", " lost_found"); } + if (f & cpu_to_le32(F2FS_FEATURE_SB_CHKSUM)) { + MSG(0, "%s", " sb_checksum"); + } MSG(0, "\n"); MSG(0, "Info: superblock encrypt level = %d, salt = ", sb->encryption_level); @@ -479,10 +483,20 @@ void update_superblock(struct f2fs_super_block *sb, int sb_mask) { int index, ret; u_int8_t *buf; + u32 old_crc, new_crc; buf = calloc(BLOCK_SZ, 1); ASSERT(buf); + if (get_sb(feature) & F2FS_FEATURE_SB_CHKSUM) { + old_crc = get_sb(crc); + new_crc = f2fs_cal_crc32(F2FS_SUPER_MAGIC, sb, + SB_CHKSUM_OFFSET); + set_sb(crc, new_crc); + MSG(1, "Info: SB CRC is updated (0x%x -> 0x%x)\n", + old_crc, new_crc); + } + memcpy(buf + F2FS_SUPER_OFFSET, sb, sizeof(*sb)); for (index = 0; index < SB_ADDR_MAX; index++) { if ((1 << index) & sb_mask) { @@ -575,10 +589,29 @@ static inline int sanity_check_area_boundary(struct f2fs_super_block *sb, return 0; } +static int verify_sb_chksum(struct f2fs_super_block *sb) +{ + if (SB_CHKSUM_OFFSET != get_sb(checksum_offset)) { + MSG(0, "\tInvalid SB CRC offset: %u\n", + get_sb(checksum_offset)); + return -1; + } + if (f2fs_crc_valid(get_sb(crc), sb, + get_sb(checksum_offset))) { + MSG(0, "\tInvalid SB CRC: 0x%x\n", get_sb(crc)); + return -1; + } + return 0; +} + int sanity_check_raw_super(struct f2fs_super_block *sb, enum SB_ADDR sb_addr) { unsigned int blocksize; + if ((get_sb(feature) & F2FS_FEATURE_SB_CHKSUM) && + verify_sb_chksum(sb)) + return -1; + if (F2FS_SUPER_MAGIC != get_sb(magic)) return -1; diff --git a/fsck/resize.c b/fsck/resize.c index 5161a1f..3462165 100644 --- a/fsck/resize.c +++ b/fsck/resize.c @@ -603,9 +603,6 @@ static int f2fs_resize_grow(struct f2fs_sb_info *sbi) } } - print_raw_sb_info(sb); - print_raw_sb_info(new_sb); >>> >>> It'd be worth to keep this to show the previous states. >>> >> >> Here, I just want to move the printing of sb and new_sb to the place >> behind update_superblock(), where the crc in sb will be updated. > > We'd better to see the changes, no? > Yeah, we print sb and new_sb here just for seeing the changes of superblock. However, the crc in new_sb will not be updated until calling update_superblock(), so if we keep the current printing location, we can't get the changes. Thanks, >> - old_main_blkaddr = get_sb(main_blkaddr); new_main_blkaddr = get_newsb(main_blkaddr); offset = new_main_blkaddr - old_main_blkaddr; @@ -629,6 +626,9 @@ static int f2fs_resize_grow(struct f2fs_sb_info *sbi) migrate_sit(sbi, new_sb, offset_seg); rebuild_checkpoint(sbi, new_sb, offset_seg); update_superblock(new_sb, SB_ALL); + print_raw_sb_info(sb); + print_raw_sb_info(new_sb); + return 0; } @@ -658,9 +658,6 @@ static int f2fs_resize_shrink(struct f2fs_sb_info *sbi) } } - print_raw_sb_info(sb); - print_raw_sb_info(new_sb); >>> >>> Ditto. >>> - old_main_blkaddr = get_sb(main_blkaddr); new_main_blkaddr = get_newsb(main_blkaddr); offset = old_main_blkaddr - new_main_blkaddr; @@ -690,6 +687,9 @@ static int f2fs_resize_shrink(struct f2fs_sb_info *sbi) /* move whole data region */ >
[f2fs-dev] [Bug 200871] F2FS experiences data loss (entry is completely lost) when an I/O failure occurs.
https://bugzilla.kernel.org/show_bug.cgi?id=200871 --- Comment #9 from Chao Yu (c...@kernel.org) --- Sorry for putting it off, I just struggled to fix quota things in these days, let me try to reproduce this issue. -- You are receiving this mail because: You are watching the assignee of the bug. ___ Linux-f2fs-devel mailing list Linux-f2fs-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
Re: [f2fs-dev] [Bug 200871] F2FS experiences data loss (entry is completely lost) when an I/O failure occurs.
Dear all, I was kindly wondering if there are updates on this issue. Thanks a lot in advance. Kind regards, Stathis Maneas > On Sep 5, 2018, at 5:42 PM, bugzilla-dae...@bugzilla.kernel.org wrote: > > https://bugzilla.kernel.org/show_bug.cgi?id=200871 > > --- Comment #8 from Stathis Maneas (sman...@cs.toronto.edu) --- > Created attachment 278337 > --> https://bugzilla.kernel.org/attachment.cgi?id=278337&action=edit > Simple C file that updates the permissions of a file. > > -- > You are receiving this mail because: > You are watching the assignee of the bug. > -- > Check out the vibrant tech community on one of the world's most > engaging tech sites, Slashdot.org! http://sdm.link/slashdot > ___ > Linux-f2fs-devel mailing list > Linux-f2fs-devel@lists.sourceforge.net > https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel ___ Linux-f2fs-devel mailing list Linux-f2fs-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
[f2fs-dev] [Bug 200773] An issue was discovered in the Linux kernel through 4.17.3. There is a NULL pointer dereference in get_checkpoint_version() in fs/f2fs/checkpoint.c when mounting crafted f2fs i
https://bugzilla.kernel.org/show_bug.cgi?id=200773 Chao Yu (c...@kernel.org) changed: What|Removed |Added CC||datadan...@163.com --- Comment #3 from Chao Yu (c...@kernel.org) --- Hi Shuaibing, Can you confirm this issue is fixed? -- You are receiving this mail because: You are watching the assignee of the bug. ___ Linux-f2fs-devel mailing list Linux-f2fs-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
[f2fs-dev] [Bug 200465] null ptr dereference in fscrypt_do_page_crypto() when operating a file on a corrupted f2fs image
https://bugzilla.kernel.org/show_bug.cgi?id=200465 Chao Yu (c...@kernel.org) changed: What|Removed |Added Status|ASSIGNED|NEEDINFO --- Comment #5 from Chao Yu (c...@kernel.org) --- (In reply to Wen Xu from comment #4) > (In reply to Chao Yu from comment #3) > > Steve, > > > > I figure out that patch to solve issue which I encounter with image > attached > > by Wen Xu, the bug can be triggered with below scripts: > > - mount image /mnt/f2fs/ > > - cd /mnt/f2fs/foo/bar/ > > - ls -l > > > > After applying that patch, the problem was gone. > > > > But when the bug triggeres, related call stack is not the same as reported > > one, also I can't reproduce reported call stack with the method provided > > from Wen Xu. > > > > I guess the right producing way is adding master key for encrypted file, > I'd > > like to confirm with Wen Xu. > > Hi Chao, > > Sorry for a late reply! Eh the first thing is that I never did anything like > adding master key for encrypted file. Second, I feel I pasted wrong > (mismatched) kernel message/PoC...but unfortunately I do not have a local > copy on my laptop now. Hi Wen, Oops, if you got another similar kernel message, please let me know. BTW, let me tag status of this issue as NEEDINFO -- You are receiving this mail because: You are watching the assignee of the bug. ___ Linux-f2fs-devel mailing list Linux-f2fs-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
Re: [f2fs-dev] [PATCH 1/2 v3] f2fs: report ENOENT correct in f2fs_rename
On 2018/9/21 5:35, Jaegeuk Kim wrote: > This fixes wrong error report in f2fs_rename. > > Signed-off-by: Jaegeuk Kim Reviewed-by: Chao Yu Thanks, ___ Linux-f2fs-devel mailing list Linux-f2fs-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
Re: [f2fs-dev] [PATCH 2/2 v2] f2fs: avoid f2fs_bug_on if f2fs_get_meta_page_nofail got EIO
On 2018/9/21 5:46, Jaegeuk Kim wrote: > On 09/20, Chao Yu wrote: >> On 2018/9/19 1:56, Jaegeuk Kim wrote: >>> This patch avoids BUG_ON when f2fs_get_meta_page_nofail got EIO during >>> xfstests/generic/475. >>> >>> Signed-off-by: Jaegeuk Kim >>> --- >>> Change log from v1: >>> - propagate errno >>> - drop sum_pages >>> >>> fs/f2fs/checkpoint.c | 10 +- >>> fs/f2fs/f2fs.h | 2 +- >>> fs/f2fs/gc.c | 9 + >>> fs/f2fs/node.c | 32 >>> fs/f2fs/recovery.c | 2 ++ >>> fs/f2fs/segment.c| 3 +++ >>> 6 files changed, 44 insertions(+), 14 deletions(-) >>> >>> diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c >>> index 01e0d8f5bbbe..1ddf332ce4b2 100644 >>> --- a/fs/f2fs/checkpoint.c >>> +++ b/fs/f2fs/checkpoint.c >>> @@ -119,11 +119,8 @@ struct page *f2fs_get_meta_page_nofail(struct >>> f2fs_sb_info *sbi, pgoff_t index) >>> if (PTR_ERR(page) == -EIO && >>> ++count <= DEFAULT_RETRY_IO_COUNT) >>> goto retry; >>> - >>> f2fs_stop_checkpoint(sbi, false); >>> - f2fs_bug_on(sbi, 1); >>> } >>> - >>> return page; >>> } >>> >>> @@ -1512,7 +1509,10 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, >>> struct cp_control *cpc) >>> ckpt->checkpoint_ver = cpu_to_le64(++ckpt_ver); >>> >>> /* write cached NAT/SIT entries to NAT/SIT area */ >>> - f2fs_flush_nat_entries(sbi, cpc); >>> + err = f2fs_flush_nat_entries(sbi, cpc); >>> + if (err) >>> + goto stop; >> >> To make sure, if partial NAT pages become dirty, flush them back to device >> outside checkpoint() is not harmful, right? > > I think so. Oh, one more case, now we use NAT #0, if partial NAT pages were writebacked, then we start to use NAT #1, later, in another checkpoint(), we update those NAT pages again, we will write them to NAT #0, which is belong to last valid checkpoint(), it's may cause corrupted checkpoint in scenario of SPO. So it's harmfull, right? Thanks, > >> >>> + >>> f2fs_flush_sit_entries(sbi, cpc); >>> >>> /* unlock all the fs_lock[] in do_checkpoint() */ >>> @@ -1521,7 +1521,7 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, >>> struct cp_control *cpc) >>> f2fs_release_discard_addrs(sbi); >>> else >>> f2fs_clear_prefree_segments(sbi, cpc); >>> - >>> +stop: >>> unblock_operations(sbi); >>> stat_inc_cp_count(sbi->stat_info); >>> >>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h >>> index a0c868127a7c..29021dbc8f2a 100644 >>> --- a/fs/f2fs/f2fs.h >>> +++ b/fs/f2fs/f2fs.h >>> @@ -2895,7 +2895,7 @@ int f2fs_recover_xattr_data(struct inode *inode, >>> struct page *page); >>> int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct page *page); >>> int f2fs_restore_node_summary(struct f2fs_sb_info *sbi, >>> unsigned int segno, struct f2fs_summary_block *sum); >>> -void f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control >>> *cpc); >>> +int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control >>> *cpc); >>> int f2fs_build_node_manager(struct f2fs_sb_info *sbi); >>> void f2fs_destroy_node_manager(struct f2fs_sb_info *sbi); >>> int __init f2fs_create_node_manager_caches(void); >>> diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c >>> index 4bcc8a59fdef..c7d14282dbf4 100644 >>> --- a/fs/f2fs/gc.c >>> +++ b/fs/f2fs/gc.c >>> @@ -1070,6 +1070,15 @@ static int do_garbage_collect(struct f2fs_sb_info >>> *sbi, >>> /* reference all summary page */ >>> while (segno < end_segno) { >>> sum_page = f2fs_get_sum_page(sbi, segno++); >>> + if (IS_ERR(sum_page)) { >>> + end_segno = segno - 1; >>> + for (segno = start_segno; segno < end_segno; segno++) { >>> + sum_page = find_get_page(META_MAPPING(sbi), >>> + GET_SUM_BLOCK(sbi, segno)); >>> + f2fs_put_page(sum_page, 0); >> >> find_get_page() will add one more reference on page, so we need to call >> f2fs_put_page(sum_page, 0) twice. > > Done. > >> >> Thanks, >> >>> + } >>> + return PTR_ERR(sum_page); >>> + } >>> unlock_page(sum_page); >>> } >>> >>> diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c >>> index fa2381c0bc47..79b6fee354f7 100644 >>> --- a/fs/f2fs/node.c >>> +++ b/fs/f2fs/node.c >>> @@ -126,6 +126,8 @@ static struct page *get_next_nat_page(struct >>> f2fs_sb_info *sbi, nid_t nid) >>> >>> /* get current nat block page with lock */ >>> src_page = get_current_nat_page(sbi, nid); >>> + if (IS_ERR(src_page)) >>> + return src_page; >>> dst_page = f2fs_grab_meta_page(sbi, dst_off); >>> f2fs_bug_on(sbi, PageDirty(src_page)); >>> >>> @@ -2265,15 +2267,19 @@ static int __f2fs_build_free_nids(struct >>> f2fs_sb_info *sbi, >>>
[f2fs-dev] [PATCH] f2fs: update i_size after DIO completion
This is related to ee70daaba82d ("xfs: update i_size after unwritten conversion in dio completion") If we update i_size during dio_write, dio_read can read out stale data, which breaks xfstests/465. Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 15 +++ fs/f2fs/f2fs.h | 1 + 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index aac426109489..5a80654499c9 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -881,7 +881,6 @@ static int __allocate_data_block(struct dnode_of_data *dn, int seg_type) struct f2fs_summary sum; struct node_info ni; block_t old_blkaddr; - pgoff_t fofs; blkcnt_t count = 1; int err; @@ -910,12 +909,10 @@ static int __allocate_data_block(struct dnode_of_data *dn, int seg_type) old_blkaddr, old_blkaddr); f2fs_set_data_blkaddr(dn); - /* update i_size */ - fofs = f2fs_start_bidx_of_node(ofs_of_node(dn->node_page), dn->inode) + - dn->ofs_in_node; - if (i_size_read(dn->inode) < ((loff_t)(fofs + 1) << PAGE_SHIFT)) - f2fs_i_size_write(dn->inode, - ((loff_t)(fofs + 1) << PAGE_SHIFT)); + /* +* i_size will be updated by direct_IO. Otherwise, we'll get stale +* data from unwritten block via dio_read. +*/ return 0; } @@ -1081,6 +1078,8 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, last_ofs_in_node = dn.ofs_in_node; } } else { + WARN_ON(flag != F2FS_GET_BLOCK_PRE_DIO && + flag != F2FS_GET_BLOCK_DIO); err = __allocate_data_block(&dn, map->m_seg_type); if (!err) @@ -1260,7 +1259,7 @@ static int get_data_block_dio(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) { return __get_data_block(inode, iblock, bh_result, create, - F2FS_GET_BLOCK_DEFAULT, NULL, + F2FS_GET_BLOCK_DIO, NULL, f2fs_rw_hint_to_seg_type( inode->i_write_hint)); } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index b5c1f8e3d62f..50af57c6ecf3 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -602,6 +602,7 @@ enum { F2FS_GET_BLOCK_DEFAULT, F2FS_GET_BLOCK_FIEMAP, F2FS_GET_BLOCK_BMAP, + F2FS_GET_BLOCK_DIO, F2FS_GET_BLOCK_PRE_DIO, F2FS_GET_BLOCK_PRE_AIO, F2FS_GET_BLOCK_PRECACHE, -- 2.17.0.441.gb46fe60e1d-goog ___ Linux-f2fs-devel mailing list Linux-f2fs-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
Re: [f2fs-dev] [PATCH 2/2 v3] f2fs: avoid f2fs_bug_on if f2fs_get_meta_page_nofail got EIO
This patch avoids BUG_ON when f2fs_get_meta_page_nofail got EIO during xfstests/generic/475. Signed-off-by: Jaegeuk Kim --- Change log from v2: - fix sum_page error report - fix missing put_page for sum_page fs/f2fs/checkpoint.c | 10 +- fs/f2fs/f2fs.h | 2 +- fs/f2fs/gc.c | 12 fs/f2fs/node.c | 32 fs/f2fs/recovery.c | 2 ++ fs/f2fs/segment.c| 3 +++ 6 files changed, 47 insertions(+), 14 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 01e0d8f5bbbe..1ddf332ce4b2 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -119,11 +119,8 @@ struct page *f2fs_get_meta_page_nofail(struct f2fs_sb_info *sbi, pgoff_t index) if (PTR_ERR(page) == -EIO && ++count <= DEFAULT_RETRY_IO_COUNT) goto retry; - f2fs_stop_checkpoint(sbi, false); - f2fs_bug_on(sbi, 1); } - return page; } @@ -1512,7 +1509,10 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) ckpt->checkpoint_ver = cpu_to_le64(++ckpt_ver); /* write cached NAT/SIT entries to NAT/SIT area */ - f2fs_flush_nat_entries(sbi, cpc); + err = f2fs_flush_nat_entries(sbi, cpc); + if (err) + goto stop; + f2fs_flush_sit_entries(sbi, cpc); /* unlock all the fs_lock[] in do_checkpoint() */ @@ -1521,7 +1521,7 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) f2fs_release_discard_addrs(sbi); else f2fs_clear_prefree_segments(sbi, cpc); - +stop: unblock_operations(sbi); stat_inc_cp_count(sbi->stat_info); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index b18e8a44eea1..caf3b755503c 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2919,7 +2919,7 @@ int f2fs_recover_xattr_data(struct inode *inode, struct page *page); int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct page *page); int f2fs_restore_node_summary(struct f2fs_sb_info *sbi, unsigned int segno, struct f2fs_summary_block *sum); -void f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc); +int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc); int f2fs_build_node_manager(struct f2fs_sb_info *sbi); void f2fs_destroy_node_manager(struct f2fs_sb_info *sbi); int __init f2fs_create_node_manager_caches(void); diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 519af97e1f3f..e7c69a143bee 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1070,6 +1070,18 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, /* reference all summary page */ while (segno < end_segno) { sum_page = f2fs_get_sum_page(sbi, segno++); + if (IS_ERR(sum_page)) { + int err = PTR_ERR(sum_page); + + end_segno = segno - 1; + for (segno = start_segno; segno < end_segno; segno++) { + sum_page = find_get_page(META_MAPPING(sbi), + GET_SUM_BLOCK(sbi, segno)); + f2fs_put_page(sum_page, 0); + f2fs_put_page(sum_page, 0); + } + return err; + } unlock_page(sum_page); } diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index fa2381c0bc47..79b6fee354f7 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -126,6 +126,8 @@ static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid) /* get current nat block page with lock */ src_page = get_current_nat_page(sbi, nid); + if (IS_ERR(src_page)) + return src_page; dst_page = f2fs_grab_meta_page(sbi, dst_off); f2fs_bug_on(sbi, PageDirty(src_page)); @@ -2265,15 +2267,19 @@ static int __f2fs_build_free_nids(struct f2fs_sb_info *sbi, nm_i->nat_block_bitmap)) { struct page *page = get_current_nat_page(sbi, nid); - ret = scan_nat_page(sbi, page, nid); - f2fs_put_page(page, 1); + if (IS_ERR(page)) { + ret = PTR_ERR(page); + } else { + ret = scan_nat_page(sbi, page, nid); + f2fs_put_page(page, 1); + } if (ret) { up_read(&nm_i->nat_tree_lock); f2fs_bug_on(sbi, !mount); f2fs_msg(sbi->sb, KERN_ERR, "NAT is corrupt, run fsck to fix it"); - return -EINVAL; + return
Re: [f2fs-dev] [PATCH 2/2 v2] f2fs: avoid f2fs_bug_on if f2fs_get_meta_page_nofail got EIO
On 09/20, Chao Yu wrote: > On 2018/9/19 1:56, Jaegeuk Kim wrote: > > This patch avoids BUG_ON when f2fs_get_meta_page_nofail got EIO during > > xfstests/generic/475. > > > > Signed-off-by: Jaegeuk Kim > > --- > > Change log from v1: > > - propagate errno > > - drop sum_pages > > > > fs/f2fs/checkpoint.c | 10 +- > > fs/f2fs/f2fs.h | 2 +- > > fs/f2fs/gc.c | 9 + > > fs/f2fs/node.c | 32 > > fs/f2fs/recovery.c | 2 ++ > > fs/f2fs/segment.c| 3 +++ > > 6 files changed, 44 insertions(+), 14 deletions(-) > > > > diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c > > index 01e0d8f5bbbe..1ddf332ce4b2 100644 > > --- a/fs/f2fs/checkpoint.c > > +++ b/fs/f2fs/checkpoint.c > > @@ -119,11 +119,8 @@ struct page *f2fs_get_meta_page_nofail(struct > > f2fs_sb_info *sbi, pgoff_t index) > > if (PTR_ERR(page) == -EIO && > > ++count <= DEFAULT_RETRY_IO_COUNT) > > goto retry; > > - > > f2fs_stop_checkpoint(sbi, false); > > - f2fs_bug_on(sbi, 1); > > } > > - > > return page; > > } > > > > @@ -1512,7 +1509,10 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, > > struct cp_control *cpc) > > ckpt->checkpoint_ver = cpu_to_le64(++ckpt_ver); > > > > /* write cached NAT/SIT entries to NAT/SIT area */ > > - f2fs_flush_nat_entries(sbi, cpc); > > + err = f2fs_flush_nat_entries(sbi, cpc); > > + if (err) > > + goto stop; > > To make sure, if partial NAT pages become dirty, flush them back to device > outside checkpoint() is not harmful, right? I think so. > > > + > > f2fs_flush_sit_entries(sbi, cpc); > > > > /* unlock all the fs_lock[] in do_checkpoint() */ > > @@ -1521,7 +1521,7 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, > > struct cp_control *cpc) > > f2fs_release_discard_addrs(sbi); > > else > > f2fs_clear_prefree_segments(sbi, cpc); > > - > > +stop: > > unblock_operations(sbi); > > stat_inc_cp_count(sbi->stat_info); > > > > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h > > index a0c868127a7c..29021dbc8f2a 100644 > > --- a/fs/f2fs/f2fs.h > > +++ b/fs/f2fs/f2fs.h > > @@ -2895,7 +2895,7 @@ int f2fs_recover_xattr_data(struct inode *inode, > > struct page *page); > > int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct page *page); > > int f2fs_restore_node_summary(struct f2fs_sb_info *sbi, > > unsigned int segno, struct f2fs_summary_block *sum); > > -void f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control > > *cpc); > > +int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control > > *cpc); > > int f2fs_build_node_manager(struct f2fs_sb_info *sbi); > > void f2fs_destroy_node_manager(struct f2fs_sb_info *sbi); > > int __init f2fs_create_node_manager_caches(void); > > diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c > > index 4bcc8a59fdef..c7d14282dbf4 100644 > > --- a/fs/f2fs/gc.c > > +++ b/fs/f2fs/gc.c > > @@ -1070,6 +1070,15 @@ static int do_garbage_collect(struct f2fs_sb_info > > *sbi, > > /* reference all summary page */ > > while (segno < end_segno) { > > sum_page = f2fs_get_sum_page(sbi, segno++); > > + if (IS_ERR(sum_page)) { > > + end_segno = segno - 1; > > + for (segno = start_segno; segno < end_segno; segno++) { > > + sum_page = find_get_page(META_MAPPING(sbi), > > + GET_SUM_BLOCK(sbi, segno)); > > + f2fs_put_page(sum_page, 0); > > find_get_page() will add one more reference on page, so we need to call > f2fs_put_page(sum_page, 0) twice. Done. > > Thanks, > > > + } > > + return PTR_ERR(sum_page); > > + } > > unlock_page(sum_page); > > } > > > > diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c > > index fa2381c0bc47..79b6fee354f7 100644 > > --- a/fs/f2fs/node.c > > +++ b/fs/f2fs/node.c > > @@ -126,6 +126,8 @@ static struct page *get_next_nat_page(struct > > f2fs_sb_info *sbi, nid_t nid) > > > > /* get current nat block page with lock */ > > src_page = get_current_nat_page(sbi, nid); > > + if (IS_ERR(src_page)) > > + return src_page; > > dst_page = f2fs_grab_meta_page(sbi, dst_off); > > f2fs_bug_on(sbi, PageDirty(src_page)); > > > > @@ -2265,15 +2267,19 @@ static int __f2fs_build_free_nids(struct > > f2fs_sb_info *sbi, > > nm_i->nat_block_bitmap)) { > > struct page *page = get_current_nat_page(sbi, nid); > > > > - ret = scan_nat_page(sbi, page, nid); > > - f2fs_put_page(page, 1); > > + if (IS_ERR(page)) { > > + ret = PTR_ERR(page); > > + } else { > > + ret = sc
Re: [f2fs-dev] [PATCH] f2fs: fix quota info to adjust recovered data
On 09/20, Chao Yu wrote: > On 2018/9/20 6:38, Jaegeuk Kim wrote: > > On 09/19, Chao Yu wrote: > >> On 2018/9/19 0:45, Jaegeuk Kim wrote: > >>> On 09/18, Chao Yu wrote: > On 2018/9/18 10:05, Jaegeuk Kim wrote: > > On 09/18, Chao Yu wrote: > >> On 2018/9/18 9:19, Jaegeuk Kim wrote: > >>> On 09/13, Chao Yu wrote: > On 2018/9/13 3:54, Jaegeuk Kim wrote: > > On 09/12, Chao Yu wrote: > >> On 2018/9/12 9:40, Chao Yu wrote: > >>> On 2018/9/12 9:25, Jaegeuk Kim wrote: > On 09/12, Chao Yu wrote: > > On 2018/9/12 8:27, Jaegeuk Kim wrote: > >> On 09/11, Jaegeuk Kim wrote: > >>> On 09/12, Chao Yu wrote: > On 2018/9/12 4:15, Jaegeuk Kim wrote: > > fsck.f2fs is able to recover the quota structure, since > > roll-forward recovery > > can recover it based on previous user information. > > I didn't get it, both fsck and kernel recover quota file > based all inodes' > uid/gid/prjid, if {x}id didn't change, wouldn't those two > recovery result be the > same? > >>> > >>> I thought that, but had to add this, since I was encountering > >>> quota errors right > >>> after getting some files recovered. And, I thought it'd make > >>> it more safe to do > >>> fsck after roll-forward recovery. > >>> > >>> Anyway, let me test again without this patch for a while. > >> > >> Hmm, I just got a fsck failure right after some files > >> recovered. > > > > To make sure, do you test with "f2fs: guarantee journalled > > quota data by > > checkpoint"? if not, I think there is no guarantee that f2fs > > can recover > > quote info into correct quote file, because, in last > > checkpoint, quota file > > may was corrupted/inconsistent. Right? > >>> > >>> Oh, I forget to mention that, I add a patch to fsck to let it > >>> noticing > >>> CP_QUOTA_NEED_FSCK_FLAG flag, and by default, fsck will fix > >>> corrupted quote > >>> file if the flag is set, but w/o this flag, quota file is still > >>> corrupted > >>> detected by fsck, I guess there is bug in v8. > >> > >> In v8, there are two cases we didn't guarantee quota file's > >> consistence: > >> 1. flush time in block_operation exceed a threshold. > >> 2. dquot subsystem error occurs. > >> > >> For above case, fsck should repair the quota file by default. > > > > Okay, I got another failure and it seems CP_QUOTA_NEED_FSCK_FLAG > > was not set > > during the recovery. So, we have something missing in the recovery > > in terms > > of quota updates. > > Yeah, I checked the code, just found one suspected place: > > find_fsync_dnodes() > - f2fs_recover_inode_page > - inc_valid_node_count > - dquot_reserve_block dquot info is not initialized now > - add_fsync_inode > - dquot_initialize > > I think we should reserve block for inode block after > dquot_initialize(), can > you confirm this? > >>> > >>> Let me test this. > >>> > >>> >From b90260bc577fe87570b1ef7b134554a8295b1f6c Mon Sep 17 00:00:00 > >>> >2001 > >>> From: Jaegeuk Kim > >>> Date: Mon, 17 Sep 2018 18:14:41 -0700 > >>> Subject: [PATCH] f2fs: count inode block for recovered files > >>> > >>> If a new file is recovered, we missed to reserve its inode block. > >> > >> I remember, in order to keep line with other filesystem, unlike > >> on-disk, we > >> have to keep backward compatibilty, in memory we don't account block > >> number > >> for f2fs' inode block, but only account inode number for it, so here > >> like > >> we did in inc_valid_node_count(), we don't need to do this. > > > > Okay, I just hit the error again w/o your patch. Another one coming to > > my mind > > is that caused by uid/gid change during recovery. Let me try out your > > patch. > > I guess we should update dquot and inode's uid/gid atomically under > lock_op() in f2fs_setattr() to prevent corruption on sys quota file. > > v9 can pass all xfstest cases and por_fsstress case w/ sys quota file > enabled, but w/ normal quota file, I got one regression reported by > generic/232, I fixed in v10, will do some tests and release it later. > > Note that, my fsck can fix corrupted quota file automatically once > CP_QUOTA_NEED_FSCK_FLAG is set. > >>> > >>
Re: [f2fs-dev] [PATCH] f2fs: fix to recover inode's uid/gid during POR
On 09/20, Chao Yu wrote: > Step to reproduce this bug: > 1. logon as root > 2. mount -t f2fs /dev/sdd /mnt; > 3. touch /mnt/file; > 4. chown system /mnt/file; chgrp system /mnt/file; > 5. xfs_io -f /mnt/file -c "fsync"; > 6. godown /mnt; > 7. umount /mnt; > 8. mount -t f2fs /dev/sdd /mnt; > > After step 8) we will expect file's uid/gid are all system, but during > recovery, these two fields were not been recovered, fix it. Hmm, I tried this before, but wasn't enough to fix everything. :( > > Signed-off-by: Chao Yu > --- > fs/f2fs/recovery.c | 2 ++ > 1 file changed, 2 insertions(+) > > diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c > index ba678efe7cad..da9f59b9044e 100644 > --- a/fs/f2fs/recovery.c > +++ b/fs/f2fs/recovery.c > @@ -209,6 +209,8 @@ static void recover_inode(struct inode *inode, struct > page *page) > char *name; > > inode->i_mode = le16_to_cpu(raw->i_mode); > + i_uid_write(inode, le32_to_cpu(raw->i_uid)); > + i_gid_write(inode, le32_to_cpu(raw->i_gid)); > f2fs_i_size_write(inode, le64_to_cpu(raw->i_size)); > inode->i_atime.tv_sec = le64_to_cpu(raw->i_atime); > inode->i_ctime.tv_sec = le64_to_cpu(raw->i_ctime); > -- > 2.18.0.rc1 ___ Linux-f2fs-devel mailing list Linux-f2fs-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
Re: [f2fs-dev] [PATCH 4/5] f2fs-tools: introduce sb checksum
On 09/20, Junling Zheng wrote: > Hi, Jaegeuk > > On 2018/9/20 7:35, Jaegeuk Kim wrote: > > On 09/19, Junling Zheng wrote: > >> This patch introduced crc for superblock. > >> > >> Signed-off-by: Junling Zheng > >> --- > >> fsck/mount.c | 33 + > >> fsck/resize.c | 12 ++-- > >> include/f2fs_fs.h | 6 +- > >> mkfs/f2fs_format.c | 8 > >> 4 files changed, 52 insertions(+), 7 deletions(-) > >> > >> diff --git a/fsck/mount.c b/fsck/mount.c > >> index 74ff7c6..9019921 100644 > >> --- a/fsck/mount.c > >> +++ b/fsck/mount.c > >> @@ -340,6 +340,7 @@ void print_raw_sb_info(struct f2fs_super_block *sb) > >>DISP_u32(sb, node_ino); > >>DISP_u32(sb, meta_ino); > >>DISP_u32(sb, cp_payload); > >> + DISP_u32(sb, crc); > >>DISP("%-.256s", sb, version); > >>printf("\n"); > >> } > >> @@ -467,6 +468,9 @@ void print_sb_state(struct f2fs_super_block *sb) > >>if (f & cpu_to_le32(F2FS_FEATURE_LOST_FOUND)) { > >>MSG(0, "%s", " lost_found"); > >>} > >> + if (f & cpu_to_le32(F2FS_FEATURE_SB_CHKSUM)) { > >> + MSG(0, "%s", " sb_checksum"); > >> + } > >>MSG(0, "\n"); > >>MSG(0, "Info: superblock encrypt level = %d, salt = ", > >>sb->encryption_level); > >> @@ -479,10 +483,20 @@ void update_superblock(struct f2fs_super_block *sb, > >> int sb_mask) > >> { > >>int index, ret; > >>u_int8_t *buf; > >> + u32 old_crc, new_crc; > >> > >>buf = calloc(BLOCK_SZ, 1); > >>ASSERT(buf); > >> > >> + if (get_sb(feature) & F2FS_FEATURE_SB_CHKSUM) { > >> + old_crc = get_sb(crc); > >> + new_crc = f2fs_cal_crc32(F2FS_SUPER_MAGIC, sb, > >> + SB_CHKSUM_OFFSET); > >> + set_sb(crc, new_crc); > >> + MSG(1, "Info: SB CRC is updated (0x%x -> 0x%x)\n", > >> + old_crc, new_crc); > >> + } > >> + > >>memcpy(buf + F2FS_SUPER_OFFSET, sb, sizeof(*sb)); > >>for (index = 0; index < SB_ADDR_MAX; index++) { > >>if ((1 << index) & sb_mask) { > >> @@ -575,10 +589,29 @@ static inline int sanity_check_area_boundary(struct > >> f2fs_super_block *sb, > >>return 0; > >> } > >> > >> +static int verify_sb_chksum(struct f2fs_super_block *sb) > >> +{ > >> + if (SB_CHKSUM_OFFSET != get_sb(checksum_offset)) { > >> + MSG(0, "\tInvalid SB CRC offset: %u\n", > >> + get_sb(checksum_offset)); > >> + return -1; > >> + } > >> + if (f2fs_crc_valid(get_sb(crc), sb, > >> + get_sb(checksum_offset))) { > >> + MSG(0, "\tInvalid SB CRC: 0x%x\n", get_sb(crc)); > >> + return -1; > >> + } > >> + return 0; > >> +} > >> + > >> int sanity_check_raw_super(struct f2fs_super_block *sb, enum SB_ADDR > >> sb_addr) > >> { > >>unsigned int blocksize; > >> > >> + if ((get_sb(feature) & F2FS_FEATURE_SB_CHKSUM) && > >> + verify_sb_chksum(sb)) > >> + return -1; > >> + > >>if (F2FS_SUPER_MAGIC != get_sb(magic)) > >>return -1; > >> > >> diff --git a/fsck/resize.c b/fsck/resize.c > >> index 5161a1f..3462165 100644 > >> --- a/fsck/resize.c > >> +++ b/fsck/resize.c > >> @@ -603,9 +603,6 @@ static int f2fs_resize_grow(struct f2fs_sb_info *sbi) > >>} > >>} > >> > >> - print_raw_sb_info(sb); > >> - print_raw_sb_info(new_sb); > > > > It'd be worth to keep this to show the previous states. > > > > Here, I just want to move the printing of sb and new_sb to the place > behind update_superblock(), where the crc in sb will be updated. We'd better to see the changes, no? > > >> - > >>old_main_blkaddr = get_sb(main_blkaddr); > >>new_main_blkaddr = get_newsb(main_blkaddr); > >>offset = new_main_blkaddr - old_main_blkaddr; > >> @@ -629,6 +626,9 @@ static int f2fs_resize_grow(struct f2fs_sb_info *sbi) > >>migrate_sit(sbi, new_sb, offset_seg); > >>rebuild_checkpoint(sbi, new_sb, offset_seg); > >>update_superblock(new_sb, SB_ALL); > >> + print_raw_sb_info(sb); > >> + print_raw_sb_info(new_sb); > >> + > >>return 0; > >> } > >> > >> @@ -658,9 +658,6 @@ static int f2fs_resize_shrink(struct f2fs_sb_info *sbi) > >>} > >>} > >> > >> - print_raw_sb_info(sb); > >> - print_raw_sb_info(new_sb); > > > > Ditto. > > > >> - > >>old_main_blkaddr = get_sb(main_blkaddr); > >>new_main_blkaddr = get_newsb(main_blkaddr); > >>offset = old_main_blkaddr - new_main_blkaddr; > >> @@ -690,6 +687,9 @@ static int f2fs_resize_shrink(struct f2fs_sb_info *sbi) > >>/* move whole data region */ > >>//if (err) > >>// migrate_main(sbi, offset); > >> + print_raw_sb_info(sb); > >> + print_raw_sb_info(new_sb); > >> + > >>return 0; > >> } > >> > >> diff --git a/include/f2fs_fs.h b/include/f2fs_fs.h > >> index 38a0da4..f80632a 100644 > >> --- a/include/f2fs_fs.h >
Re: [f2fs-dev] [PATCH 1/2 v3] f2fs: report ENOENT correct in f2fs_rename
This fixes wrong error report in f2fs_rename. Signed-off-by: Jaegeuk Kim --- Change log from v2: - remove needless err assignment - relocate err precisely fs/f2fs/namei.c | 6 -- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 98d3ab7c3ce6..abbad9610fad 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -833,7 +833,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, struct f2fs_dir_entry *old_entry; struct f2fs_dir_entry *new_entry; bool is_old_inline = f2fs_has_inline_dentry(old_dir); - int err = -ENOENT; + int err; if (unlikely(f2fs_cp_error(sbi))) return -EIO; @@ -860,6 +860,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, goto out; } + err = -ENOENT; old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page); if (!old_entry) { if (IS_ERR(old_page)) @@ -1025,7 +1026,7 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, struct f2fs_dir_entry *old_dir_entry = NULL, *new_dir_entry = NULL; struct f2fs_dir_entry *old_entry, *new_entry; int old_nlink = 0, new_nlink = 0; - int err = -ENOENT; + int err; if (unlikely(f2fs_cp_error(sbi))) return -EIO; @@ -1049,6 +1050,7 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, if (err) goto out; + err = -ENOENT; old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page); if (!old_entry) { if (IS_ERR(old_page)) -- 2.17.0.441.gb46fe60e1d-goog ___ Linux-f2fs-devel mailing list Linux-f2fs-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
[f2fs-dev] [Bug 200465] null ptr dereference in fscrypt_do_page_crypto() when operating a file on a corrupted f2fs image
https://bugzilla.kernel.org/show_bug.cgi?id=200465 --- Comment #4 from Wen Xu (wen...@gatech.edu) --- (In reply to Chao Yu from comment #3) > Steve, > > I figure out that patch to solve issue which I encounter with image attached > by Wen Xu, the bug can be triggered with below scripts: > - mount image /mnt/f2fs/ > - cd /mnt/f2fs/foo/bar/ > - ls -l > > After applying that patch, the problem was gone. > > But when the bug triggeres, related call stack is not the same as reported > one, also I can't reproduce reported call stack with the method provided > from Wen Xu. > > I guess the right producing way is adding master key for encrypted file, I'd > like to confirm with Wen Xu. Hi Chao, Sorry for a late reply! Eh the first thing is that I never did anything like adding master key for encrypted file. Second, I feel I pasted wrong (mismatched) kernel message/PoC...but unfortunately I do not have a local copy on my laptop now. -- You are receiving this mail because: You are watching the assignee of the bug. ___ Linux-f2fs-devel mailing list Linux-f2fs-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
[f2fs-dev] [PATCH v3] f2fs: allow out-place-update for direct IO in LFS mode
From: Chao Yu Normally, DIO uses in-pllace-update, but in LFS mode, f2fs doesn't allow triggering any in-place-update writes, so we fallback direct write to buffered write, result in bad performance of large size write. This patch adds to support triggering out-place-update for direct IO to enhance its performance. Note that it needs to exclude direct read IO during direct write, since new data writing to new block address will no be valid until write finished. storage: zram time xfs_io -f -d /mnt/f2fs/file -c "pwrite 0 1073741824" -c "fsync" Before: real0m13.061s user0m0.327s sys 0m12.486s After: real0m6.448s user0m0.228s sys 0m6.212s Signed-off-by: Chao Yu --- v3: - don't set FI_UPDATE_WRITE if we do OPU for DIO pointed out by Sahitya Tummala. fs/f2fs/data.c | 44 +++- fs/f2fs/f2fs.h | 45 + fs/f2fs/file.c | 3 ++- 3 files changed, 78 insertions(+), 14 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index b96f8588d565..38d5baa1c35d 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -894,7 +894,7 @@ static int __allocate_data_block(struct dnode_of_data *dn, int seg_type) dn->data_blkaddr = datablock_addr(dn->inode, dn->node_page, dn->ofs_in_node); - if (dn->data_blkaddr == NEW_ADDR) + if (dn->data_blkaddr != NULL_ADDR) goto alloc; if (unlikely((err = inc_valid_block_count(sbi, dn->inode, &count @@ -950,7 +950,7 @@ int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from) if (direct_io) { map.m_seg_type = f2fs_rw_hint_to_seg_type(iocb->ki_hint); - flag = f2fs_force_buffered_io(inode, WRITE) ? + flag = f2fs_force_buffered_io(inode, iocb, from) ? F2FS_GET_BLOCK_PRE_AIO : F2FS_GET_BLOCK_PRE_DIO; goto map_blocks; @@ -1069,7 +1069,15 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, goto sync_out; } - if (!is_valid_data_blkaddr(sbi, blkaddr)) { + if (is_valid_data_blkaddr(sbi, blkaddr)) { + /* use out-place-update for driect IO under LFS mode */ + if (test_opt(sbi, LFS) && create && + flag == F2FS_GET_BLOCK_DEFAULT) { + err = __allocate_data_block(&dn, map->m_seg_type); + if (!err) + set_inode_flag(inode, FI_APPEND_WRITE); + } + } else { if (create) { if (unlikely(f2fs_cp_error(sbi))) { err = -EIO; @@ -2493,36 +2501,53 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) struct address_space *mapping = iocb->ki_filp->f_mapping; struct inode *inode = mapping->host; struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct f2fs_inode_info *fi = F2FS_I(inode); size_t count = iov_iter_count(iter); loff_t offset = iocb->ki_pos; int rw = iov_iter_rw(iter); int err; enum rw_hint hint = iocb->ki_hint; int whint_mode = F2FS_OPTION(sbi).whint_mode; + bool do_opu; err = check_direct_IO(inode, iter, offset); if (err) return err < 0 ? err : 0; - if (f2fs_force_buffered_io(inode, rw)) + if (f2fs_force_buffered_io(inode, iocb, iter)) return 0; + do_opu = allow_outplace_dio(inode, iocb, iter); + trace_f2fs_direct_IO_enter(inode, offset, count, rw); if (rw == WRITE && whint_mode == WHINT_MODE_OFF) iocb->ki_hint = WRITE_LIFE_NOT_SET; - if (!down_read_trylock(&F2FS_I(inode)->i_gc_rwsem[rw])) { - if (iocb->ki_flags & IOCB_NOWAIT) { + if (iocb->ki_flags & IOCB_NOWAIT) { + if (!down_read_trylock(&fi->i_gc_rwsem[rw])) { + iocb->ki_hint = hint; + err = -EAGAIN; + goto out; + } + if (do_opu && !down_read_trylock(&fi->i_gc_rwsem[READ])) { + up_read(&fi->i_gc_rwsem[rw]); iocb->ki_hint = hint; err = -EAGAIN; goto out; } - down_read(&F2FS_I(inode)->i_gc_rwsem[rw]); + } else { + down_read(&fi->i_gc_rwsem[rw]); + if (do_opu) + down_read(&fi->i_gc_rwsem[READ]); } err = blockdev_direct_IO(iocb, inode, iter, get_data_block_dio); - up_read(&F2FS_I(inode)->i_gc_rwsem[rw]); + + if (do_opu) + up_read(&fi->i_gc_rwsem[READ]); + + up_read(&fi->i_gc_rwsem[rw]); if (rw == WRITE) { if (whint_mode == WHINT_MODE
Re: [f2fs-dev] [PATCH v2] f2fs: allow out-place-update for direct IO in LFS mode
On 2018/9/20 19:25, Sahitya Tummala wrote: > On Thu, Sep 20, 2018 at 04:57:18PM +0800, Chao Yu wrote: >> Normally, DIO uses in-pllace-update, but in LFS mode, f2fs doesn't >> allow triggering any in-place-update writes, so we fallback direct >> write to buffered write, result in bad performance of large size >> write. >> >> This patch adds to support triggering out-place-update for direct IO >> to enhance its performance. >> >> Note that it needs to exclude direct read IO during direct write, >> since new data writing to new block address will no be valid until >> write finished. >> >> storage: zram >> >> time xfs_io -f -d /mnt/f2fs/file -c "pwrite 0 1073741824" -c "fsync" >> >> Before: >> real 0m13.061s >> user 0m0.327s >> sys 0m12.486s >> >> After: >> real 0m6.448s >> user 0m0.228s >> sys 0m6.212s >> >> Signed-off-by: Chao Yu >> --- >> v2: >> - don't use direct IO for block zoned device. >> fs/f2fs/data.c | 41 + >> fs/f2fs/f2fs.h | 45 + >> fs/f2fs/file.c | 3 ++- >> 3 files changed, 76 insertions(+), 13 deletions(-) >> >> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c >> index b96f8588d565..e709f0fbb7a8 100644 >> --- a/fs/f2fs/data.c >> +++ b/fs/f2fs/data.c >> @@ -894,7 +894,7 @@ static int __allocate_data_block(struct dnode_of_data >> *dn, int seg_type) >> >> dn->data_blkaddr = datablock_addr(dn->inode, >> dn->node_page, dn->ofs_in_node); >> -if (dn->data_blkaddr == NEW_ADDR) >> +if (dn->data_blkaddr != NULL_ADDR) >> goto alloc; >> >> if (unlikely((err = inc_valid_block_count(sbi, dn->inode, &count >> @@ -950,7 +950,7 @@ int f2fs_preallocate_blocks(struct kiocb *iocb, struct >> iov_iter *from) >> >> if (direct_io) { >> map.m_seg_type = f2fs_rw_hint_to_seg_type(iocb->ki_hint); >> -flag = f2fs_force_buffered_io(inode, WRITE) ? >> +flag = f2fs_force_buffered_io(inode, iocb, from) ? >> F2FS_GET_BLOCK_PRE_AIO : >> F2FS_GET_BLOCK_PRE_DIO; >> goto map_blocks; >> @@ -1069,7 +1069,15 @@ int f2fs_map_blocks(struct inode *inode, struct >> f2fs_map_blocks *map, >> goto sync_out; >> } >> >> -if (!is_valid_data_blkaddr(sbi, blkaddr)) { >> +if (is_valid_data_blkaddr(sbi, blkaddr)) { >> +/* use out-place-update for driect IO under LFS mode */ >> +if (test_opt(sbi, LFS) && create && >> +flag == F2FS_GET_BLOCK_DEFAULT) { >> +err = __allocate_data_block(&dn, map->m_seg_type); >> +if (!err) >> +set_inode_flag(inode, FI_APPEND_WRITE); >> +} >> +} else { >> if (create) { >> if (unlikely(f2fs_cp_error(sbi))) { >> err = -EIO; >> @@ -2493,36 +2501,53 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, >> struct iov_iter *iter) >> struct address_space *mapping = iocb->ki_filp->f_mapping; >> struct inode *inode = mapping->host; >> struct f2fs_sb_info *sbi = F2FS_I_SB(inode); >> +struct f2fs_inode_info *fi = F2FS_I(inode); >> size_t count = iov_iter_count(iter); >> loff_t offset = iocb->ki_pos; >> int rw = iov_iter_rw(iter); >> int err; >> enum rw_hint hint = iocb->ki_hint; >> int whint_mode = F2FS_OPTION(sbi).whint_mode; >> +bool lock_read; >> >> err = check_direct_IO(inode, iter, offset); >> if (err) >> return err < 0 ? err : 0; >> >> -if (f2fs_force_buffered_io(inode, rw)) >> +if (f2fs_force_buffered_io(inode, iocb, iter)) >> return 0; >> >> +lock_read = allow_outplace_dio(inode, iocb, iter); >> + >> trace_f2fs_direct_IO_enter(inode, offset, count, rw); >> >> if (rw == WRITE && whint_mode == WHINT_MODE_OFF) >> iocb->ki_hint = WRITE_LIFE_NOT_SET; >> >> -if (!down_read_trylock(&F2FS_I(inode)->i_gc_rwsem[rw])) { >> -if (iocb->ki_flags & IOCB_NOWAIT) { >> +if (iocb->ki_flags & IOCB_NOWAIT) { >> +if (!down_read_trylock(&fi->i_gc_rwsem[rw])) { >> +iocb->ki_hint = hint; >> +err = -EAGAIN; >> +goto out; >> +} >> +if (lock_read && !down_read_trylock(&fi->i_gc_rwsem[READ])) { >> +up_read(&fi->i_gc_rwsem[rw]); >> iocb->ki_hint = hint; >> err = -EAGAIN; >> goto out; >> } >> -down_read(&F2FS_I(inode)->i_gc_rwsem[rw]); >> +} else { >> +down_read(&fi->i_gc_rwsem[rw]); >> +if (lock_read) >> +down_read(&fi->i_gc_rwsem[READ]); >> } >> >> err = blockdev_direct_IO(iocb, inode, iter, get_data_block_dio); >> -up_read(&F2FS_I(
[f2fs-dev] [PATCH v11] f2fs: guarantee journalled quota data by checkpoint
From: Chao Yu For journalled quota mode, let checkpoint to flush dquot dirty data and quota file data to guarntee persistence of all quota sysfile in last checkpoint, by this way, we can avoid corrupting quota sysfile when encountering SPO. The implementation is as below: 1. add a global state SBI_QUOTA_NEED_FLUSH to indicate that there is cached dquot metadata changes in quota subsystem, and later checkpoint should: a) flush dquot metadata into quota file. b) flush quota file to storage to keep file usage be consistent. 2. add a global state SBI_QUOTA_NEED_REPAIR to indicate that quota operation failed due to -EIO or -ENOSPC, so later, a) checkpoint will skip syncing dquot metadata. b) CP_QUOTA_NEED_FSCK_FLAG will be set in last cp pack to give a hint for fsck repairing. 3. add a global state SBI_QUOTA_SKIP_FLUSH, in checkpoint, if quota data updating is very heavy, it may cause hungtask in block_operation(). To avoid this, if our retry time exceed threshold, let's just skip flushing and retry in next checkpoint(). Signed-off-by: Weichao Guo Signed-off-by: Chao Yu --- v11: - transfer quota data if fsynced inode's i_{u,g}id changed during recovery. fs/f2fs/checkpoint.c| 56 +-- fs/f2fs/data.c | 18 -- fs/f2fs/f2fs.h | 50 ++--- fs/f2fs/file.c | 31 --- fs/f2fs/inline.c| 4 +- fs/f2fs/inode.c | 11 +++- fs/f2fs/namei.c | 4 -- fs/f2fs/recovery.c | 43 +- fs/f2fs/super.c | 120 include/linux/f2fs_fs.h | 1 + 10 files changed, 289 insertions(+), 49 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index d312d2829d5a..d624d7983197 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -1083,6 +1083,21 @@ static void __prepare_cp_block(struct f2fs_sb_info *sbi) ckpt->next_free_nid = cpu_to_le32(last_nid); } +static bool __need_flush_quota(struct f2fs_sb_info *sbi) +{ + if (!is_journalled_quota(sbi)) + return false; + if (is_sbi_flag_set(sbi, SBI_QUOTA_SKIP_FLUSH)) + return false; + if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_REPAIR)) + return false; + if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_FLUSH)) + return true; + if (get_pages(sbi, F2FS_DIRTY_QDATA)) + return true; + return false; +} + /* * Freeze all the FS-operations for checkpoint. */ @@ -1094,12 +1109,30 @@ static int block_operations(struct f2fs_sb_info *sbi) .for_reclaim = 0, }; struct blk_plug plug; - int err = 0; + int err = 0, cnt = 0; blk_start_plug(&plug); -retry_flush_dents: +retry_flush_quotas: + if (__need_flush_quota(sbi)) { + if (++cnt > DEFAULT_RETRY_QUOTA_FLUSH_COUNT) { + set_sbi_flag(sbi, SBI_QUOTA_SKIP_FLUSH); + f2fs_lock_all(sbi); + goto retry_flush_dents; + } + clear_sbi_flag(sbi, SBI_QUOTA_NEED_FLUSH); + + f2fs_quota_sync(sbi->sb, -1); + } + f2fs_lock_all(sbi); + if (__need_flush_quota(sbi)) { + f2fs_unlock_all(sbi); + cond_resched(); + goto retry_flush_quotas; + } + +retry_flush_dents: /* write all the dirty dentry pages */ if (get_pages(sbi, F2FS_DIRTY_DENTS)) { f2fs_unlock_all(sbi); @@ -1107,7 +1140,7 @@ static int block_operations(struct f2fs_sb_info *sbi) if (err) goto out; cond_resched(); - goto retry_flush_dents; + goto retry_flush_quotas; } /* @@ -1116,6 +1149,12 @@ static int block_operations(struct f2fs_sb_info *sbi) */ down_write(&sbi->node_change); + if (__need_flush_quota(sbi)) { + up_write(&sbi->node_change); + f2fs_unlock_all(sbi); + goto retry_flush_quotas; + } + if (get_pages(sbi, F2FS_DIRTY_IMETA)) { up_write(&sbi->node_change); f2fs_unlock_all(sbi); @@ -1123,7 +1162,7 @@ static int block_operations(struct f2fs_sb_info *sbi) if (err) goto out; cond_resched(); - goto retry_flush_dents; + goto retry_flush_quotas; } retry_flush_nodes: @@ -1214,6 +1253,14 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc) if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) __set_ckpt_flags(ckpt, CP_FSCK_FLAG); + if (is_sbi_flag_set(sbi, SBI_QUOTA_SKIP_FLUSH)) + __set_ckpt_flags(ckpt, CP_QUOTA_NEED_FSCK_FLAG); + else + __clear_ckpt_flags(ckpt, CP_QUOTA_NEED_FSCK_FLAG); + + if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_REPAIR)) +
Re: [f2fs-dev] [PATCH v2] f2fs: allow out-place-update for direct IO in LFS mode
On Thu, Sep 20, 2018 at 04:57:18PM +0800, Chao Yu wrote: > Normally, DIO uses in-pllace-update, but in LFS mode, f2fs doesn't > allow triggering any in-place-update writes, so we fallback direct > write to buffered write, result in bad performance of large size > write. > > This patch adds to support triggering out-place-update for direct IO > to enhance its performance. > > Note that it needs to exclude direct read IO during direct write, > since new data writing to new block address will no be valid until > write finished. > > storage: zram > > time xfs_io -f -d /mnt/f2fs/file -c "pwrite 0 1073741824" -c "fsync" > > Before: > real 0m13.061s > user 0m0.327s > sys 0m12.486s > > After: > real 0m6.448s > user 0m0.228s > sys 0m6.212s > > Signed-off-by: Chao Yu > --- > v2: > - don't use direct IO for block zoned device. > fs/f2fs/data.c | 41 + > fs/f2fs/f2fs.h | 45 + > fs/f2fs/file.c | 3 ++- > 3 files changed, 76 insertions(+), 13 deletions(-) > > diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c > index b96f8588d565..e709f0fbb7a8 100644 > --- a/fs/f2fs/data.c > +++ b/fs/f2fs/data.c > @@ -894,7 +894,7 @@ static int __allocate_data_block(struct dnode_of_data > *dn, int seg_type) > > dn->data_blkaddr = datablock_addr(dn->inode, > dn->node_page, dn->ofs_in_node); > - if (dn->data_blkaddr == NEW_ADDR) > + if (dn->data_blkaddr != NULL_ADDR) > goto alloc; > > if (unlikely((err = inc_valid_block_count(sbi, dn->inode, &count > @@ -950,7 +950,7 @@ int f2fs_preallocate_blocks(struct kiocb *iocb, struct > iov_iter *from) > > if (direct_io) { > map.m_seg_type = f2fs_rw_hint_to_seg_type(iocb->ki_hint); > - flag = f2fs_force_buffered_io(inode, WRITE) ? > + flag = f2fs_force_buffered_io(inode, iocb, from) ? > F2FS_GET_BLOCK_PRE_AIO : > F2FS_GET_BLOCK_PRE_DIO; > goto map_blocks; > @@ -1069,7 +1069,15 @@ int f2fs_map_blocks(struct inode *inode, struct > f2fs_map_blocks *map, > goto sync_out; > } > > - if (!is_valid_data_blkaddr(sbi, blkaddr)) { > + if (is_valid_data_blkaddr(sbi, blkaddr)) { > + /* use out-place-update for driect IO under LFS mode */ > + if (test_opt(sbi, LFS) && create && > + flag == F2FS_GET_BLOCK_DEFAULT) { > + err = __allocate_data_block(&dn, map->m_seg_type); > + if (!err) > + set_inode_flag(inode, FI_APPEND_WRITE); > + } > + } else { > if (create) { > if (unlikely(f2fs_cp_error(sbi))) { > err = -EIO; > @@ -2493,36 +2501,53 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, > struct iov_iter *iter) > struct address_space *mapping = iocb->ki_filp->f_mapping; > struct inode *inode = mapping->host; > struct f2fs_sb_info *sbi = F2FS_I_SB(inode); > + struct f2fs_inode_info *fi = F2FS_I(inode); > size_t count = iov_iter_count(iter); > loff_t offset = iocb->ki_pos; > int rw = iov_iter_rw(iter); > int err; > enum rw_hint hint = iocb->ki_hint; > int whint_mode = F2FS_OPTION(sbi).whint_mode; > + bool lock_read; > > err = check_direct_IO(inode, iter, offset); > if (err) > return err < 0 ? err : 0; > > - if (f2fs_force_buffered_io(inode, rw)) > + if (f2fs_force_buffered_io(inode, iocb, iter)) > return 0; > > + lock_read = allow_outplace_dio(inode, iocb, iter); > + > trace_f2fs_direct_IO_enter(inode, offset, count, rw); > > if (rw == WRITE && whint_mode == WHINT_MODE_OFF) > iocb->ki_hint = WRITE_LIFE_NOT_SET; > > - if (!down_read_trylock(&F2FS_I(inode)->i_gc_rwsem[rw])) { > - if (iocb->ki_flags & IOCB_NOWAIT) { > + if (iocb->ki_flags & IOCB_NOWAIT) { > + if (!down_read_trylock(&fi->i_gc_rwsem[rw])) { > + iocb->ki_hint = hint; > + err = -EAGAIN; > + goto out; > + } > + if (lock_read && !down_read_trylock(&fi->i_gc_rwsem[READ])) { > + up_read(&fi->i_gc_rwsem[rw]); > iocb->ki_hint = hint; > err = -EAGAIN; > goto out; > } > - down_read(&F2FS_I(inode)->i_gc_rwsem[rw]); > + } else { > + down_read(&fi->i_gc_rwsem[rw]); > + if (lock_read) > + down_read(&fi->i_gc_rwsem[READ]); > } > > err = blockdev_direct_IO(iocb, inode, iter, get_data_block_dio); > - up_read(&F2FS_I(inode)->i_gc_rwsem[rw]); > + > + if (lock_read) > + up_read(&fi->i_gc_rws
Re: [f2fs-dev] [PATCH] f2fs: fix quota info to adjust recovered data
On 2018/9/20 6:38, Jaegeuk Kim wrote: > On 09/19, Chao Yu wrote: >> On 2018/9/19 0:45, Jaegeuk Kim wrote: >>> On 09/18, Chao Yu wrote: On 2018/9/18 10:05, Jaegeuk Kim wrote: > On 09/18, Chao Yu wrote: >> On 2018/9/18 9:19, Jaegeuk Kim wrote: >>> On 09/13, Chao Yu wrote: On 2018/9/13 3:54, Jaegeuk Kim wrote: > On 09/12, Chao Yu wrote: >> On 2018/9/12 9:40, Chao Yu wrote: >>> On 2018/9/12 9:25, Jaegeuk Kim wrote: On 09/12, Chao Yu wrote: > On 2018/9/12 8:27, Jaegeuk Kim wrote: >> On 09/11, Jaegeuk Kim wrote: >>> On 09/12, Chao Yu wrote: On 2018/9/12 4:15, Jaegeuk Kim wrote: > fsck.f2fs is able to recover the quota structure, since > roll-forward recovery > can recover it based on previous user information. I didn't get it, both fsck and kernel recover quota file based all inodes' uid/gid/prjid, if {x}id didn't change, wouldn't those two recovery result be the same? >>> >>> I thought that, but had to add this, since I was encountering >>> quota errors right >>> after getting some files recovered. And, I thought it'd make it >>> more safe to do >>> fsck after roll-forward recovery. >>> >>> Anyway, let me test again without this patch for a while. >> >> Hmm, I just got a fsck failure right after some files recovered. > > To make sure, do you test with "f2fs: guarantee journalled quota > data by > checkpoint"? if not, I think there is no guarantee that f2fs can > recover > quote info into correct quote file, because, in last checkpoint, > quota file > may was corrupted/inconsistent. Right? >>> >>> Oh, I forget to mention that, I add a patch to fsck to let it >>> noticing >>> CP_QUOTA_NEED_FSCK_FLAG flag, and by default, fsck will fix >>> corrupted quote >>> file if the flag is set, but w/o this flag, quota file is still >>> corrupted >>> detected by fsck, I guess there is bug in v8. >> >> In v8, there are two cases we didn't guarantee quota file's >> consistence: >> 1. flush time in block_operation exceed a threshold. >> 2. dquot subsystem error occurs. >> >> For above case, fsck should repair the quota file by default. > > Okay, I got another failure and it seems CP_QUOTA_NEED_FSCK_FLAG was > not set > during the recovery. So, we have something missing in the recovery in > terms > of quota updates. Yeah, I checked the code, just found one suspected place: find_fsync_dnodes() - f2fs_recover_inode_page - inc_valid_node_count - dquot_reserve_block dquot info is not initialized now - add_fsync_inode - dquot_initialize I think we should reserve block for inode block after dquot_initialize(), can you confirm this? >>> >>> Let me test this. >>> >>> >From b90260bc577fe87570b1ef7b134554a8295b1f6c Mon Sep 17 00:00:00 2001 >>> From: Jaegeuk Kim >>> Date: Mon, 17 Sep 2018 18:14:41 -0700 >>> Subject: [PATCH] f2fs: count inode block for recovered files >>> >>> If a new file is recovered, we missed to reserve its inode block. >> >> I remember, in order to keep line with other filesystem, unlike on-disk, >> we >> have to keep backward compatibilty, in memory we don't account block >> number >> for f2fs' inode block, but only account inode number for it, so here like >> we did in inc_valid_node_count(), we don't need to do this. > > Okay, I just hit the error again w/o your patch. Another one coming to my > mind > is that caused by uid/gid change during recovery. Let me try out your > patch. I guess we should update dquot and inode's uid/gid atomically under lock_op() in f2fs_setattr() to prevent corruption on sys quota file. v9 can pass all xfstest cases and por_fsstress case w/ sys quota file enabled, but w/ normal quota file, I got one regression reported by generic/232, I fixed in v10, will do some tests and release it later. Note that, my fsck can fix corrupted quota file automatically once CP_QUOTA_NEED_FSCK_FLAG is set. >>> >>> I hit failures again with your v9 w/ sysfile quota and modified fsck to >>> detect >> >> That's strange, in my environment, before v9, I always encounter corrupted >> quota sysfile after step 9), after v9, I never hit failure again. >> >> 1) enable fault injection >> 2) run fsstress
[f2fs-dev] [PATCH] f2fs: fix to recover inode's uid/gid during POR
Step to reproduce this bug: 1. logon as root 2. mount -t f2fs /dev/sdd /mnt; 3. touch /mnt/file; 4. chown system /mnt/file; chgrp system /mnt/file; 5. xfs_io -f /mnt/file -c "fsync"; 6. godown /mnt; 7. umount /mnt; 8. mount -t f2fs /dev/sdd /mnt; After step 8) we will expect file's uid/gid are all system, but during recovery, these two fields were not been recovered, fix it. Signed-off-by: Chao Yu --- fs/f2fs/recovery.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index ba678efe7cad..da9f59b9044e 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -209,6 +209,8 @@ static void recover_inode(struct inode *inode, struct page *page) char *name; inode->i_mode = le16_to_cpu(raw->i_mode); + i_uid_write(inode, le32_to_cpu(raw->i_uid)); + i_gid_write(inode, le32_to_cpu(raw->i_gid)); f2fs_i_size_write(inode, le64_to_cpu(raw->i_size)); inode->i_atime.tv_sec = le64_to_cpu(raw->i_atime); inode->i_ctime.tv_sec = le64_to_cpu(raw->i_ctime); -- 2.18.0.rc1 ___ Linux-f2fs-devel mailing list Linux-f2fs-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
[f2fs-dev] [PATCH v2] f2fs: allow out-place-update for direct IO in LFS mode
Normally, DIO uses in-pllace-update, but in LFS mode, f2fs doesn't allow triggering any in-place-update writes, so we fallback direct write to buffered write, result in bad performance of large size write. This patch adds to support triggering out-place-update for direct IO to enhance its performance. Note that it needs to exclude direct read IO during direct write, since new data writing to new block address will no be valid until write finished. storage: zram time xfs_io -f -d /mnt/f2fs/file -c "pwrite 0 1073741824" -c "fsync" Before: real0m13.061s user0m0.327s sys 0m12.486s After: real0m6.448s user0m0.228s sys 0m6.212s Signed-off-by: Chao Yu --- v2: - don't use direct IO for block zoned device. fs/f2fs/data.c | 41 + fs/f2fs/f2fs.h | 45 + fs/f2fs/file.c | 3 ++- 3 files changed, 76 insertions(+), 13 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index b96f8588d565..e709f0fbb7a8 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -894,7 +894,7 @@ static int __allocate_data_block(struct dnode_of_data *dn, int seg_type) dn->data_blkaddr = datablock_addr(dn->inode, dn->node_page, dn->ofs_in_node); - if (dn->data_blkaddr == NEW_ADDR) + if (dn->data_blkaddr != NULL_ADDR) goto alloc; if (unlikely((err = inc_valid_block_count(sbi, dn->inode, &count @@ -950,7 +950,7 @@ int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from) if (direct_io) { map.m_seg_type = f2fs_rw_hint_to_seg_type(iocb->ki_hint); - flag = f2fs_force_buffered_io(inode, WRITE) ? + flag = f2fs_force_buffered_io(inode, iocb, from) ? F2FS_GET_BLOCK_PRE_AIO : F2FS_GET_BLOCK_PRE_DIO; goto map_blocks; @@ -1069,7 +1069,15 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, goto sync_out; } - if (!is_valid_data_blkaddr(sbi, blkaddr)) { + if (is_valid_data_blkaddr(sbi, blkaddr)) { + /* use out-place-update for driect IO under LFS mode */ + if (test_opt(sbi, LFS) && create && + flag == F2FS_GET_BLOCK_DEFAULT) { + err = __allocate_data_block(&dn, map->m_seg_type); + if (!err) + set_inode_flag(inode, FI_APPEND_WRITE); + } + } else { if (create) { if (unlikely(f2fs_cp_error(sbi))) { err = -EIO; @@ -2493,36 +2501,53 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) struct address_space *mapping = iocb->ki_filp->f_mapping; struct inode *inode = mapping->host; struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct f2fs_inode_info *fi = F2FS_I(inode); size_t count = iov_iter_count(iter); loff_t offset = iocb->ki_pos; int rw = iov_iter_rw(iter); int err; enum rw_hint hint = iocb->ki_hint; int whint_mode = F2FS_OPTION(sbi).whint_mode; + bool lock_read; err = check_direct_IO(inode, iter, offset); if (err) return err < 0 ? err : 0; - if (f2fs_force_buffered_io(inode, rw)) + if (f2fs_force_buffered_io(inode, iocb, iter)) return 0; + lock_read = allow_outplace_dio(inode, iocb, iter); + trace_f2fs_direct_IO_enter(inode, offset, count, rw); if (rw == WRITE && whint_mode == WHINT_MODE_OFF) iocb->ki_hint = WRITE_LIFE_NOT_SET; - if (!down_read_trylock(&F2FS_I(inode)->i_gc_rwsem[rw])) { - if (iocb->ki_flags & IOCB_NOWAIT) { + if (iocb->ki_flags & IOCB_NOWAIT) { + if (!down_read_trylock(&fi->i_gc_rwsem[rw])) { + iocb->ki_hint = hint; + err = -EAGAIN; + goto out; + } + if (lock_read && !down_read_trylock(&fi->i_gc_rwsem[READ])) { + up_read(&fi->i_gc_rwsem[rw]); iocb->ki_hint = hint; err = -EAGAIN; goto out; } - down_read(&F2FS_I(inode)->i_gc_rwsem[rw]); + } else { + down_read(&fi->i_gc_rwsem[rw]); + if (lock_read) + down_read(&fi->i_gc_rwsem[READ]); } err = blockdev_direct_IO(iocb, inode, iter, get_data_block_dio); - up_read(&F2FS_I(inode)->i_gc_rwsem[rw]); + + if (lock_read) + up_read(&fi->i_gc_rwsem[READ]); + + up_read(&fi->i_gc_rwsem[rw]); if (rw == WRITE) { if (whint_mode == WHINT_MODE_OFF) diff --git a/fs/f2fs/f2fs.h b/fs/