from:"Sahitya Tummala"

Re: [f2fs-dev] [PATCH v2] f2fs: check blkaddr more accuratly before issue a bio

2018-03-05 Thread Sahitya Tummala

On Mon, Mar 05, 2018 at 03:07:06PM +0800, Yunlei He wrote:
> This patch check blkaddr more accuratly before issue a
> write or read bio.
> 
> Signed-off-by: Yunlei He 
> ---
>  fs/f2fs/checkpoint.c |  2 ++
>  fs/f2fs/data.c   |  5 +++--
>  fs/f2fs/f2fs.h   |  1 +
>  fs/f2fs/segment.h| 29 +
>  4 files changed, 31 insertions(+), 6 deletions(-)
> 
> diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
> index 5b2db75..bf77946 100644
> --- a/fs/f2fs/checkpoint.c
> +++ b/fs/f2fs/checkpoint.c
> @@ -68,6 +68,7 @@ static struct page *__get_meta_page(struct f2fs_sb_info 
> *sbi, pgoff_t index,
>   .old_blkaddr = index,
>   .new_blkaddr = index,
>   .encrypted_page = NULL,
> + .is_meta = is_meta,
>   };
>  
>   if (unlikely(!is_meta))
> @@ -162,6 +163,7 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t 
> start, int nrpages,
>   .op_flags = sync ? (REQ_META | REQ_PRIO) : REQ_RAHEAD,
>   .encrypted_page = NULL,
>   .in_list = false,
> + .is_meta = (type != META_POR),
>   };
>   struct blk_plug plug;
>  
> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> index 6c3c978..de036e8 100644
> --- a/fs/f2fs/data.c
> +++ b/fs/f2fs/data.c
> @@ -383,6 +383,7 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio)
>   struct page *page = fio->encrypted_page ?
>   fio->encrypted_page : fio->page;
>  
> + verify_block_addr(fio, fio->new_blkaddr);
>   trace_f2fs_submit_page_bio(page, fio);
>   f2fs_trace_ios(fio, 0);
>  
> @@ -428,8 +429,8 @@ int f2fs_submit_page_write(struct f2fs_io_info *fio)
>   }
>  
>   if (fio->old_blkaddr != NEW_ADDR)
> - verify_block_addr(sbi, fio->old_blkaddr);
> - verify_block_addr(sbi, fio->new_blkaddr);
> + verify_block_addr(fio, fio->old_blkaddr);
> + verify_block_addr(fio, fio->new_blkaddr);
>  
>   bio_page = fio->encrypted_page ? fio->encrypted_page : fio->page;
>  
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index f6dc706..842adea 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -980,6 +980,7 @@ struct f2fs_io_info {
>   bool submitted; /* indicate IO submission */
>   int need_lock;  /* indicate we need to lock cp_rwsem */
>   bool in_list;   /* indicate fio is in io_list */
> + bool is_meta;   /* indicate borrow meta inode mapping or not */
>   enum iostat_type io_type;   /* io type */
>   struct writeback_control *io_wbc; /* writeback control */
>  };
> diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
> index dbb774aa..dabe400 100644
> --- a/fs/f2fs/segment.h
> +++ b/fs/f2fs/segment.h
> @@ -632,10 +632,31 @@ static inline void check_seg_range(struct f2fs_sb_info 
> *sbi, unsigned int segno)
>   f2fs_bug_on(sbi, segno > TOTAL_SEGS(sbi) - 1);
>  }
>  
> -static inline void verify_block_addr(struct f2fs_sb_info *sbi, block_t 
> blk_addr)
> -{
> - BUG_ON(blk_addr < SEG0_BLKADDR(sbi)
> - || blk_addr >= MAX_BLKADDR(sbi));
> +static inline void verify_block_addr(struct f2fs_io_info *fio, block_t 
> blk_addr)
> +{
> + struct f2fs_sb_info *sbi = fio->sbi;
> + enum page_type btype = PAGE_TYPE_OF_BIO(fio->type);
> + struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
> + block_t seg0_blkaddr = le32_to_cpu(raw_super->segment0_blkaddr);
> + block_t main_blkaddr = le32_to_cpu(raw_super->main_blkaddr);
> + block_t max_blkaddr = (le32_to_cpu(raw_super->segment_count)
> + << sbi->log_blocks_per_seg) + seg0_blkaddr;

Why not use the existing macros - MAIN_BLKADDR, SEG0_BLKADDR and MAX_BLKADDR?

> +
> + if (is_read_io(fio->op)) {
> + if (btype == META && fio->is_meta)
> + BUG_ON(blk_addr < seg0_blkaddr
> + || blk_addr >= main_blkaddr);
> + else
> + BUG_ON(blk_addr < main_blkaddr
> + || blk_addr >= max_blkaddr);
> + } else {
> + if (btype == META)
> + BUG_ON(blk_addr < seg0_blkaddr
> + || blk_addr >= main_blkaddr);
> + else
> + BUG_ON(blk_addr < main_blkaddr
> + || blk_addr >= max_blkaddr);
> + }
>  }
>  
>  /*
> -- 
> 1.9.1
> 
> 
> --
> Check out the vibrant tech community on one of the world's most
> engaging tech sites, Slashdot.org! http://sdm.link/slashdot
> ___
> Linux-f2fs-devel mailing list
> Linux-f2fs-devel@lists.sourceforge.net
> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

-- 
--
Sent by a consultant of the Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum.

Re: [f2fs-dev] [PATCH v4] f2fs: support in-memory inode checksum when checking consistency

2018-03-13 Thread Sahitya Tummala

On Thu, Mar 08, 2018 at 05:10:40AM +0800, Weichao Guo wrote:
> @@ -159,8 +162,12 @@ bool f2fs_inode_chksum_verify(struct f2fs_sb_info *sbi, 
> struct page *page)
>   struct f2fs_inode *ri;
>   __u32 provided, calculated;
>  
> +#ifdef CONFIG_F2FS_CHECK_FS
> + if (!f2fs_enable_inode_chksum(sbi, page))
> +#else
>   if (!f2fs_enable_inode_chksum(sbi, page) ||
>   PageDirty(page) || PageWriteback(page))

I see that f2fs_inode_chksum_set() is set only if CONFIG_F2FS_CHECK_FS is
enabled. So in this #else case, if sb has inode checksum enabled but
PageDirty() or PageWriteback() is not set, then we may proceed below and do
the comparison between provided and calculated check sum and it may fail
resulting into checksum invalid error?

> +#endif
>   return true;
>  
>   ri = &F2FS_NODE(page)->i;
> @@ -445,6 +452,9 @@ void update_inode(struct inode *inode, struct page 
> *node_page)
>   if (inode->i_nlink == 0)
>   clear_inline_node(node_page);
>  
> +#ifdef CONFIG_F2FS_CHECK_FS
> + f2fs_inode_chksum_set(F2FS_I_SB(inode), node_page);
> +#endif
>  }
>  
>  void update_inode_page(struct inode *inode)
> diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
> index 177c438..2adeb74 100644
> --- a/fs/f2fs/node.c
> +++ b/fs/f2fs/node.c
> @@ -1113,8 +1113,10 @@ static int read_node_page(struct page *page, int 
> op_flags)
>   .encrypted_page = NULL,
>   };
>  
> - if (PageUptodate(page))
> + if (PageUptodate(page)) {
> + f2fs_bug_on(sbi, !f2fs_inode_chksum_verify(sbi, page));
>   return LOCKED_PAGE;
> + }
>  
>   get_node_info(sbi, page->index, &ni);
>  
> diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
> index 081ef0d..b6015b7 100644
> --- a/fs/f2fs/node.h
> +++ b/fs/f2fs/node.h
> @@ -278,6 +278,10 @@ static inline void fill_node_footer(struct page *page, 
> nid_t nid,
>   /* should remain old flag bits such as COLD_BIT_SHIFT */
>   rn->footer.flag = cpu_to_le32((ofs << OFFSET_BIT_SHIFT) |
>   (old_flag & OFFSET_BIT_MASK));
> +#ifdef CONFIG_F2FS_CHECK_FS
> + if (IN_INODE(page))
> + f2fs_inode_chksum_set(F2FS_P_SB(page), page);
> +#endif
>  }
>  
>  static inline void copy_node_footer(struct page *dst, struct page *src)
> @@ -285,6 +289,10 @@ static inline void copy_node_footer(struct page *dst, 
> struct page *src)
>   struct f2fs_node *src_rn = F2FS_NODE(src);
>   struct f2fs_node *dst_rn = F2FS_NODE(dst);
>   memcpy(&dst_rn->footer, &src_rn->footer, sizeof(struct node_footer));
> +#ifdef CONFIG_F2FS_CHECK_FS
> + if (IN_INODE(dst))
> + f2fs_inode_chksum_set(F2FS_P_SB(dst), dst);
> +#endif
>  }
>  
>  static inline void fill_node_footer_blkaddr(struct page *page, block_t 
> blkaddr)
> @@ -298,6 +306,10 @@ static inline void fill_node_footer_blkaddr(struct page 
> *page, block_t blkaddr)
>  
>   rn->footer.cp_ver = cpu_to_le64(cp_ver);
>   rn->footer.next_blkaddr = cpu_to_le32(blkaddr);
> +#ifdef CONFIG_F2FS_CHECK_FS
> + if (IN_INODE(page))
> + f2fs_inode_chksum_set(F2FS_P_SB(page), page);
> +#endif
>  }
>  
>  static inline bool is_recoverable_dnode(struct page *page)
> @@ -364,6 +376,10 @@ static inline int set_nid(struct page *p, int off, nid_t 
> nid, bool i)
>   rn->i.i_nid[off - NODE_DIR1_BLOCK] = cpu_to_le32(nid);
>   else
>   rn->in.nid[off] = cpu_to_le32(nid);
> +#ifdef CONFIG_F2FS_CHECK_FS
> + if (IN_INODE(p))
> + f2fs_inode_chksum_set(F2FS_P_SB(p), p);
> +#endif
>   return set_page_dirty(p);
>  }
>  
> @@ -432,6 +448,10 @@ static inline void set_cold_node(struct inode *inode, 
> struct page *page)
>   else
>   flag |= (0x1 << COLD_BIT_SHIFT);
>   rn->footer.flag = cpu_to_le32(flag);
> +#ifdef CONFIG_F2FS_CHECK_FS
> + if (IN_INODE(page))
> + f2fs_inode_chksum_set(F2FS_I_SB(inode), page);
> +#endif
>  }
>  
>  static inline void set_mark(struct page *page, int mark, int type)
> @@ -443,6 +463,10 @@ static inline void set_mark(struct page *page, int mark, 
> int type)
>   else
>   flag &= ~(0x1 << type);
>   rn->footer.flag = cpu_to_le32(flag);
> +#ifdef CONFIG_F2FS_CHECK_FS
> + if (IN_INODE(page))
> + f2fs_inode_chksum_set(F2FS_P_SB(page), page);
> +#endif
>  }
>  #define set_dentry_mark(page, mark)  set_mark(page, mark, DENT_BIT_SHIFT)
>  #define set_fsync_mark(page, mark)   set_mark(page, mark, FSYNC_BIT_SHIFT)
> diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
> index ae2dfa7..572bc17 100644
> --- a/fs/f2fs/xattr.c
> +++ b/fs/f2fs/xattr.c
> @@ -424,6 +424,9 @@ static inline int write_all_xattrs(struct inode *inode, 
> __u32 hsize,
>   return err;
>   }
>   memcpy(inline_addr, txattr_addr, inline_size);
> +#ifdef CONFIG_F2FS_CHECK_FS
> + f2fs_inode_chksum_set(sbi, ipage ? ipage : in_page

Re: [f2fs-dev] [PATCH v4] f2fs: support in-memory inode checksum when checking consistency

2018-03-13 Thread Sahitya Tummala

On Tue, Mar 13, 2018 at 05:19:51PM +0800, guoweichao wrote:
> 
> 
> On 2018/3/13 16:49, Sahitya Tummala wrote:
> > On Thu, Mar 08, 2018 at 05:10:40AM +0800, Weichao Guo wrote:
> >> @@ -159,8 +162,12 @@ bool f2fs_inode_chksum_verify(struct f2fs_sb_info 
> >> *sbi, struct page *page)
> >>struct f2fs_inode *ri;
> >>__u32 provided, calculated;
> >>  
> >> +#ifdef CONFIG_F2FS_CHECK_FS
> >> +  if (!f2fs_enable_inode_chksum(sbi, page))
> >> +#else
> >>if (!f2fs_enable_inode_chksum(sbi, page) ||
> >>PageDirty(page) || PageWriteback(page))
> > 
> > I see that f2fs_inode_chksum_set() is set only if CONFIG_F2FS_CHECK_FS is
> f2fs_inode_chksum_set is also called in allocate_data_block when fs write 
> back inode to disk.

yes, I got it now. Thanks for providing the clarification.

> > enabled. So in this #else case, if sb has inode checksum enabled but
> > PageDirty() or PageWriteback() is not set, then we may proceed below and do
> So when the inode is read from disk, e.g. PageDirty / PageWriteback is not 
> set,
> the checksum verify process should be ok.
> > the comparison between provided and calculated check sum and it may fail
> > resulting into checksum invalid error?
> > 

--
Sent by a consultant of the Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum.

--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

[f2fs-dev] [PATCH] f2fs: Fix deadlock in shutdown ioctl

2018-05-10 Thread Sahitya Tummala

f2fs_ioc_shutdown() ioctl gets stuck in the below path
when going down with full sync (F2FS_GOING_DOWN_FULLSYNC)
option.

__switch_to+0x90/0xc4
percpu_down_write+0x8c/0xc0
freeze_super+0xec/0x1e4
freeze_bdev+0xc4/0xcc
f2fs_ioctl+0xc0c/0x1ce0
f2fs_compat_ioctl+0x98/0x1f0

Fix this by not holding write access during this ioctl.

Signed-off-by: Sahitya Tummala 
---
 fs/f2fs/file.c | 5 -
 1 file changed, 5 deletions(-)

diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index b926df7..2c2e61b 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -1835,10 +1835,6 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned 
long arg)
if (get_user(in, (__u32 __user *)arg))
return -EFAULT;
 
-   ret = mnt_want_write_file(filp);
-   if (ret)
-   return ret;
-
switch (in) {
case F2FS_GOING_DOWN_FULLSYNC:
sb = freeze_bdev(sb->s_bdev);
@@ -1878,7 +1874,6 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned 
long arg)
 
f2fs_update_time(sbi, REQ_TIME);
 out:
-   mnt_drop_write_file(filp);
return ret;
 }
 
-- 
Qualcomm India Private Limited, on behalf of Qualcomm Innovation Center, Inc.
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux 
Foundation Collaborative Project.


--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

Re: [f2fs-dev] [PATCH] f2fs: Fix deadlock in shutdown ioctl

2018-05-15 Thread Sahitya Tummala

On Mon, May 14, 2018 at 11:39:42AM +0800, Chao Yu wrote:
> On 2018/5/10 21:20, Sahitya Tummala wrote:
> > f2fs_ioc_shutdown() ioctl gets stuck in the below path
> > when going down with full sync (F2FS_GOING_DOWN_FULLSYNC)
> > option.
> > 
> > __switch_to+0x90/0xc4
> > percpu_down_write+0x8c/0xc0
> > freeze_super+0xec/0x1e4
> > freeze_bdev+0xc4/0xcc
> > f2fs_ioctl+0xc0c/0x1ce0
> > f2fs_compat_ioctl+0x98/0x1f0
> > 
> > Fix this by not holding write access during this ioctl.
> 
> I think we can just remove lock coverage for F2FS_GOING_DOWN_FULLSYNC path, 
> for
> other path, we need to keep as it is.
>

Thanks, I thought about it too but then I checked that XFS shutdown ioctl is
not taking any lock for this ioctl. Hence, I followed the same in F2FS.
Do you know why XFS is not taking any lock? Is it really needed in shutdown
ioctl?

--
Sent by a consultant of the Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum.

--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

Re: [f2fs-dev] [PATCH] f2fs: Fix deadlock in shutdown ioctl

2018-05-16 Thread Sahitya Tummala

On Wed, May 16, 2018 at 02:48:42PM +0800, Chao Yu wrote:
> > 
> > Thanks, I thought about it too but then I checked that XFS shutdown ioctl is
> > not taking any lock for this ioctl. Hence, I followed the same in F2FS.
> > Do you know why XFS is not taking any lock? 
> 
> I don't know. :(

No problem :)

> 
> > Is it really needed in shutdown ioctl?
> 
> IMO, yes, we should keep freeze and remount be aware of the shutdown 
> operation.

Thanks for your inputs. I will send the updated patch set.

--
Sent by a consultant of the Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum.

--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

[f2fs-dev] [PATCH v2] f2fs: Fix deadlock in shutdown ioctl

2018-05-17 Thread Sahitya Tummala

f2fs_ioc_shutdown() ioctl gets stuck in the below path
when issued with F2FS_GOING_DOWN_FULLSYNC option.

__switch_to+0x90/0xc4
percpu_down_write+0x8c/0xc0
freeze_super+0xec/0x1e4
freeze_bdev+0xc4/0xcc
f2fs_ioctl+0xc0c/0x1ce0
f2fs_compat_ioctl+0x98/0x1f0

Signed-off-by: Sahitya Tummala 
---
v2:
remove lock coverage for only F2FS_GOING_DOWN_FULLSYNC case as suggested by 
Chao.

 fs/f2fs/file.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 6b94f19..5a132c9 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -1857,6 +1857,7 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned 
long arg)
 
switch (in) {
case F2FS_GOING_DOWN_FULLSYNC:
+   mnt_drop_write_file(filp);
sb = freeze_bdev(sb->s_bdev);
if (IS_ERR(sb)) {
ret = PTR_ERR(sb);
@@ -1894,7 +1895,8 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned 
long arg)
 
f2fs_update_time(sbi, REQ_TIME);
 out:
-   mnt_drop_write_file(filp);
+   if (in != F2FS_GOING_DOWN_FULLSYNC)
+   mnt_drop_write_file(filp);
return ret;
 }
 
-- 
Qualcomm India Private Limited, on behalf of Qualcomm Innovation Center, Inc.
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux 
Foundation Collaborative Project.


--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

[f2fs-dev] [PATCH v3] f2fs: Fix deadlock in shutdown ioctl

2018-05-18 Thread Sahitya Tummala

f2fs_ioc_shutdown() ioctl gets stuck in the below path
when issued with F2FS_GOING_DOWN_FULLSYNC option.

__switch_to+0x90/0xc4
percpu_down_write+0x8c/0xc0
freeze_super+0xec/0x1e4
freeze_bdev+0xc4/0xcc
f2fs_ioctl+0xc0c/0x1ce0
f2fs_compat_ioctl+0x98/0x1f0

Signed-off-by: Sahitya Tummala 
---
 fs/f2fs/file.c | 11 +++
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 6b94f19..5d99fd1 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -1851,9 +1851,11 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned 
long arg)
if (get_user(in, (__u32 __user *)arg))
return -EFAULT;
 
-   ret = mnt_want_write_file(filp);
-   if (ret)
-   return ret;
+   if (in != F2FS_GOING_DOWN_FULLSYNC) {
+   ret = mnt_want_write_file(filp);
+   if (ret)
+   return ret;
+   }
 
switch (in) {
case F2FS_GOING_DOWN_FULLSYNC:
@@ -1894,7 +1896,8 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned 
long arg)
 
f2fs_update_time(sbi, REQ_TIME);
 out:
-   mnt_drop_write_file(filp);
+   if (in != F2FS_GOING_DOWN_FULLSYNC)
+   mnt_drop_write_file(filp);
return ret;
 }
 
-- 
Qualcomm India Private Limited, on behalf of Qualcomm Innovation Center, Inc.
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux 
Foundation Collaborative Project.


--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

Re: [f2fs-dev] [PATCH v2] f2fs: Fix deadlock in shutdown ioctl

2018-05-18 Thread Sahitya Tummala

On Thu, May 17, 2018 at 06:08:26PM -0700, Jaegeuk Kim wrote:
> 
> if (in == F2FS_GOING_DOWN_FULLSYNC) {
>   sb = freeze_bdev();
>   if (IS_ERR(sb))
>   return PTR_ERR(sb);
>   if (unlikely(!sb))
>   return -EINVAL;
> }
> 
> ret = mnt_want_write_file();

It will be stuck/blocked here as freeze_bdev() now holds the write 
lock for all the cases including SB_FREEZE_WRITE. As freeze_bdev()
holds the write lock, I think f2fs_stop_checkpoint() should be safe
even without mnt_want_write_file().

I have posted v3 as per Chao's comments to exclude mnt_want_write_file()
for F2FS_GOING_DOWN_FULLSYNC case.
Please check and let me know if there are any further comments.

> ...
> switch() {
> case F2FS_GOING_DOWN_FULLSYNC:
>   f2fs_stop_checkpoint();
>   dhaw_bdev();
>   break;
> ...
> }
> 
> drop:
> ...
> 
> 
--
Sent by a consultant of the Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum.

--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

[f2fs-dev] deadlock during writeback when using f2fs filesystem

2018-06-01 Thread Sahitya Tummala

Hi,

We are observing a deadlock scenario during FS writeback under low-memory
condition with F2FS filesystem.

Here is the callstack of this scenario -

shrink_inactive_list()
shrink_node_memcg.isra.74()
shrink_node()
shrink_zones(inline)
do_try_to_free_pages(inline)
try_to_free_pages()
__perform_reclaim(inline)
__alloc_pages_direct_reclaim(inline)
__alloc_pages_slowpath(inline)
no_zone()
__alloc_pages(inline)
__alloc_pages_node(inline)
alloc_pages_node(inline)
__page_cache_alloc(inline)
pagecache_get_page()
find_or_create_page(inline)
grab_cache_page(inline)
f2fs_grab_cache_page(inline)
__get_node_page.part.32()
__get_node_page(inline)
get_node_page()
update_inode_page()
f2fs_write_inode()
write_inode(inline)
__writeback_single_inode()
writeback_sb_inodes()
__writeback_inodes_wb()
wb_writeback()
wb_do_writeback(inline)
wb_workfn()

The writeback thread is entering into the direct reclaim path due to low-memory 
and is
getting stuck in shrink_inactive_list(), as shrink_inactive_list() is inturn 
waiting for
writeback to happen for the dirty pages present in the inactive list.

Do you think we can use GFP_NOWAIT for node mapping gfp_mask so that we can 
avoid direct
reclaim path in the writeback context? As we may now see allocation failures 
with this flag,
do you see any risk or issue in using it w.r.t F2FS FS and writeback?
Appreciate your suggestions on this.

diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 89c838b..d3daf3b 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -316,7 +316,7 @@ struct inode *f2fs_iget(struct super_block *sb, unsigned 
long ino)
 make_now:
if (ino == F2FS_NODE_INO(sbi)) {
inode->i_mapping->a_ops = &f2fs_node_aops;
-   mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO);
+   mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_NODE_MAPPING);
} else if (ino == F2FS_META_INO(sbi)) {
inode->i_mapping->a_ops = &f2fs_meta_aops;
mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO);
diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index 58aecb6..bb985cd 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -47,6 +47,7 @@
 /* This flag is used by node and meta inodes, and by recovery */
 #define GFP_F2FS_ZERO  (GFP_NOFS | __GFP_ZERO)
 #define GFP_F2FS_HIGH_ZERO (GFP_NOFS | __GFP_ZERO | __GFP_HIGHMEM)
+#define GFP_F2FS_NODE_MAPPING  (GFP_NOWAIT | __GFP_IO | __GFP_ZERO)

Thanks,
Sahitya.
-- 
--
Sent by a consultant of the Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum.

--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

Re: [f2fs-dev] deadlock during writeback when using f2fs filesystem

2018-06-01 Thread Sahitya Tummala

On Fri, Jun 01, 2018 at 12:26:09PM +0200, Michal Hocko wrote:
> On Fri 01-06-18 15:02:35, Sahitya Tummala wrote:
> > Hi,
> > 
> > We are observing a deadlock scenario during FS writeback under low-memory
> > condition with F2FS filesystem.
> > 
> > Here is the callstack of this scenario -
> > 
> > shrink_inactive_list()
> > shrink_node_memcg.isra.74()
> > shrink_node()
> > shrink_zones(inline)
> > do_try_to_free_pages(inline)
> > try_to_free_pages()
> > __perform_reclaim(inline)
> > __alloc_pages_direct_reclaim(inline)
> > __alloc_pages_slowpath(inline)
> > no_zone()
> > __alloc_pages(inline)
> > __alloc_pages_node(inline)
> > alloc_pages_node(inline)
> > __page_cache_alloc(inline)
> > pagecache_get_page()
> > find_or_create_page(inline)
> > grab_cache_page(inline)
> > f2fs_grab_cache_page(inline)
> > __get_node_page.part.32()
> > __get_node_page(inline)
> > get_node_page()
> > update_inode_page()
> > f2fs_write_inode()
> > write_inode(inline)
> > __writeback_single_inode()
> > writeback_sb_inodes()
> > __writeback_inodes_wb()
> > wb_writeback()
> > wb_do_writeback(inline)
> > wb_workfn()
> > 
> > The writeback thread is entering into the direct reclaim path due to 
> > low-memory and is
> > getting stuck in shrink_inactive_list(), as shrink_inactive_list() is 
> > inturn waiting for
> > writeback to happen for the dirty pages present in the inactive list.
> 
> shrink_page_list waits only for writeback pages when we are in the memcg
> reclaim. The above seems to be the global reclaim though. Moreover
> GFP_F2FS_ZERO is GFP_NOFS so we are not waiting for writeback pages at
> all. Are you sure the above is really a deadlock?
> 

Let me correct my statement. It could be more of a livelock scenario.

The direct reclaim path is not doing any writeback here, so the GFP_NOFS doesn't
make any difference. In this case, the direct reclaim has to reclaim ~32 pages,
which it picks up from the tail of the list. All of those tail pages are dirty
and since direct reclaim path can't do any writeback, it just loops picking and
skipping them.

> > Do you think we can use GFP_NOWAIT for node mapping gfp_mask so that we can 
> > avoid direct
> > reclaim path in the writeback context? As we may now see allocation 
> > failures with this flag,
> > do you see any risk or issue in using it w.r.t F2FS FS and writeback?
> > Appreciate your suggestions on this.
> > 
> > diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
> > index 89c838b..d3daf3b 100644
> > --- a/fs/f2fs/inode.c
> > +++ b/fs/f2fs/inode.c
> > @@ -316,7 +316,7 @@ struct inode *f2fs_iget(struct super_block *sb, 
> > unsigned long ino)
> >  make_now:
> > if (ino == F2FS_NODE_INO(sbi)) {
> > inode->i_mapping->a_ops = &f2fs_node_aops;
> > -   mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO);
> > +   mapping_set_gfp_mask(inode->i_mapping, 
> > GFP_F2FS_NODE_MAPPING);
> > } else if (ino == F2FS_META_INO(sbi)) {
> > inode->i_mapping->a_ops = &f2fs_meta_aops;
> > mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO);
> > diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
> > index 58aecb6..bb985cd 100644
> > --- a/include/linux/f2fs_fs.h
> > +++ b/include/linux/f2fs_fs.h
> > @@ -47,6 +47,7 @@
> >  /* This flag is used by node and meta inodes, and by recovery */
> >  #define GFP_F2FS_ZERO  (GFP_NOFS | __GFP_ZERO)
> >  #define GFP_F2FS_HIGH_ZERO (GFP_NOFS | __GFP_ZERO | __GFP_HIGHMEM)
> > +#define GFP_F2FS_NODE_MAPPING  (GFP_NOWAIT | __GFP_IO | __GFP_ZERO)
> > 
> > Thanks,
> > Sahitya.
> > -- 
> > --
> > Sent by a consultant of the Qualcomm Innovation Center, Inc.
> > The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum.
> 
> -- 
> Michal Hocko
> SUSE Labs

-- 
--
Sent by a consultant of the Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum.

--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

[f2fs-dev] [PATCH] fsck: Fix data loss issue observed during SPO

2019-07-04 Thread Sahitya Tummala

With the commit <979b25727800> ("fsck.f2fs: check validity of nat journal"),
there is a serious data loss issue observed during sudden power off tests.

The reason is due to incorrect update of journal->n_nats in that patch.
When fsck encounters a nat journal entry with invalid blkaddr/nid, it
updates journal->n_nats with the index of such first entry and then continues
the for loop which scans all nats in journal in f2fs_init_nid_bitmap().
But that loop doesn't continue and doesn't update nm_i->nid_bitmap as well,
as it fails the for loop condition i < nats_in_cursum(journal).

There could be several other valid entries in the journal after this
invalid entry. Due to this invalid journal->n_nats, build_nat_area_bitmap()
could not be build properly for valid nat journal entries. It further results
into these fsck asserts/errors and causes data loss due to incorrect fsck fix.

[ASSERT] (sanity_check_nid: 372)  --> nid[0xfb01] ino is 0\x0a
[FIX] (__chk_dentries:1438)  --> Unlink [0xfb01] - 
_JeNcl242yB3Apz2MW,VOh6WmjsVht1W len[0x1c], type[0x1]\x0a
[ASSERT] (sanity_check_nid: 372)  --> nid[0xfa2d] ino is 0\x0a
[FIX] (__chk_dentries:1438)  --> Unlink [0xfa2d] - 
_++vN7axccjjGNI,TQEVmcOemY1E len[0x14], type[0x1]\x0a
...
NID[0xba5] is unreachable\x0a
NID[0xfa9] is unreachable\x0a
...
[FIX] (nullify_nat_entry:2089)  --> Remove nid [0xba5] in NAT\x0a
[FIX] (nullify_nat_entry:2089)  --> Remove nid [0xfa9] in NAT\x0a
...

Fixes: 979b25727800 ("fsck.f2fs: check validity of nat journal")
Signed-off-by: Sahitya Tummala 
---
 fsck/mount.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/fsck/mount.c b/fsck/mount.c
index 60e0e4a..1b93f49 100644
--- a/fsck/mount.c
+++ b/fsck/mount.c
@@ -1160,7 +1160,6 @@ static int f2fs_init_nid_bitmap(struct f2fs_sb_info *sbi)
addr = le32_to_cpu(nat_in_journal(journal, i).block_addr);
if (!IS_VALID_BLK_ADDR(sbi, addr)) {
MSG(0, "\tError: f2fs_init_nid_bitmap: addr(%u) is 
invalid!!!\n", addr);
-   journal->n_nats = cpu_to_le16(i);
c.fix_on = 1;
continue;
}
@@ -1168,7 +1167,6 @@ static int f2fs_init_nid_bitmap(struct f2fs_sb_info *sbi)
nid = le32_to_cpu(nid_in_journal(journal, i));
if (!IS_VALID_NID(sbi, nid)) {
MSG(0, "\tError: f2fs_init_nid_bitmap: nid(%u) is 
invalid!!!\n", nid);
-   journal->n_nats = cpu_to_le16(i);
c.fix_on = 1;
continue;
}
-- 
Qualcomm India Private Limited, on behalf of Qualcomm Innovation Center, Inc.
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux 
Foundation Collaborative Project.



___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

Re: [f2fs-dev] [PATCH] fsck: Fix data loss issue observed during SPO

2019-07-05 Thread Sahitya Tummala

Hi Chao,

On Fri, Jul 05, 2019 at 06:11:47PM +0800, Chao Yu wrote:
> Hi Sahitya,
> 
> On 2019/7/5 13:01, Sahitya Tummala wrote:
> > With the commit <979b25727800> ("fsck.f2fs: check validity of nat journal"),
> > there is a serious data loss issue observed during sudden power off tests.
> 
> In that patch, since journal's data is untrusty, I just truncated n_nats to 
> last
> valid one to fix original reported issue.

The original issue will be fixed even without updating/truncating the n_nats
as we still have sanity checks on nid and blkaddr, right?

> 
> Actually, the problem here is we don't know which data is correct and which 
> data
> is corrupted in nat or sit journal, so even we skip broken entry without
> modifying journal->n_{nats,sits}, latter entry may still contain broken 
> data(but
> it may pass validation check), and those entries may cause potential data
> loss/corruption...
> 

This issue was reported by a customer in the following test scenario -

Update the password in Android and then force crash. It is verified that
all the files related to password data has fsync() already.

After some iterations (3-4) of this test, it is found password related files are
lost, failing the phone to bootup.

Without this patch ("fsck.f2fs: check validity of nat journal"), they don't see
such issue. It means that the data present in journal seems to be valid atleast
in this case and truncating is causing to loose the valid data/files.

> It looks no matter how we change the codes, we still may face data
> loos/corruption issues.
> 

Yes, maybe in some cases, the data loss is still expected, but there are
chances that data may be valid as well, which is confirmed by the test case 
above.

> IMO, we need to find out the root cause of data corruption, So my question is
> that why sudden power off tests can make nat journal being corrupted? I doubt
> that in-place update method on checkpoint area of fsck may cause this... any
> other thoughts?
> 

In case of sudden power off, I have printed the all the journal entries in the
NAT and I several entries with valid blkaddr and nid as below -

[   21.007649] fsck.f2fs: withfix: 0: addr 1c373 nid 45b\x0a
[   21.014399] fsck.f2fs: withfix: 1: addr 1c2c1 nid 1b5\x0a
[   21.021151] fsck.f2fs: \x09Error: f2fs_init_nid_bitmap: addr(0) i/nats 2/37 
is invalid!!!\x0a
[   21.030860] fsck.f2fs: withfix: 3: addr 1c1f7 nid 19b2\x0a
[   21.037674] fsck.f2fs: \x09Error: f2fs_init_nid_bitmap: addr(0) i/nats 4/37 
is invalid!!!\x0a
[   21.047353] fsck.f2fs: \x09Error: f2fs_init_nid_bitmap: addr(0) i/nats 5/37 
is invalid!!!\x0a
[   21.057062] fsck.f2fs: withfix: 6: addr 1c377 nid ac3\x0a
[   21.063841] fsck.f2fs: withfix: 7: addr 1c378 nid ac9\x0a
[   21.070634] fsck.f2fs: withfix: 8: addr 1b2fe nid 807\x0a
[   21.077356] fsck.f2fs: withfix: 9: addr 17aeb nid 805\x0a
[   21.084102] fsck.f2fs: \x09Error: f2fs_init_nid_bitmap: addr(0) i/nats 10/37 
is invalid!!!\x0a
[   21.093891] fsck.f2fs: \x09Error: f2fs_init_nid_bitmap: addr(0) i/nats 11/37 
is invalid!!!\x0a
[   21.103662] fsck.f2fs: \x09Error: f2fs_init_nid_bitmap: addr(0) i/nats 12/37 
is invalid!!!\x0a
[   21.113450] fsck.f2fs: \x09Error: f2fs_init_nid_bitmap: addr(0) i/nats 13/37 
is invalid!!!\x0a
[   21.123227] fsck.f2fs: \x09Error: f2fs_init_nid_bitmap: addr(0) i/nats 14/37 
is invalid!!!\x0a
[   21.133022] fsck.f2fs: \x09Error: f2fs_init_nid_bitmap: addr(0) i/nats 15/37 
is invalid!!!\x0a
[   21.142792] fsck.f2fs: \x09Error: f2fs_init_nid_bitmap: addr(0) i/nats 16/37 
is invalid!!!\x0a
[   21.152580] fsck.f2fs: withfix: 17: addr 1c37f nid ff4\x0a
[   21.159415] fsck.f2fs: withfix: 18: addr 1c36c nid ff9\x0a
[   21.166229] fsck.f2fs: \x09Error: f2fs_init_nid_bitmap: addr(0) i/nats 19/37 
is invalid!!!\x0a
[   21.176039] fsck.f2fs: \x09Error: f2fs_init_nid_bitmap: addr(0) i/nats 20/37 
is invalid!!!\x0a
[   21.185831] fsck.f2fs: \x09Error: f2fs_init_nid_bitmap: addr(0) i/nats 21/37 
is invalid!!!\x0a
[   21.195631] fsck.f2fs: \x09Error: f2fs_init_nid_bitmap: addr(0) i/nats 22/37 
is invalid!!!\x0a
[   21.205394] fsck.f2fs: withfix: 23: addr 1c2f3 nid 61c\x0a
[   21.212257] fsck.f2fs: withfix: 24: addr 1c391 nid 641\x0a
[   21.219093] fsck.f2fs: withfix: 25: addr 1c386 nid 33b\x0a
[   21.225905] fsck.f2fs: withfix: 26: addr 1c356 nid 258\x0a
[   21.232767] fsck.f2fs: withfix: 27: addr 1c2f2 nid 382\x0a
[   21.239601] fsck.f2fs: \x09Error: f2fs_init_nid_bitmap: addr(0) i/nats 28/37 
is invalid!!!\x0a
[   21.249398] fsck.f2fs: \x09Error: f2fs_init_nid_bitmap: addr(0) i/nats 29/37 
is invalid!!!\x0a
[   21.259160] fsck.f2fs: \x09Error: f2fs_init_nid_bitmap: addr(0) i/nats 30/37 
is invalid!!!\x0a
[   21.268970] fsck.f2fs: withfix: 31: addr 1ac03 nid 1ccd\x0a
[   21.275874] fsck.f2fs: withfix: 32: addr 1c395 nid a57\x0a
[   21.282709] fsck.f2fs: withfix: 33: addr 1c392 nid 92e\x0a
[   21.28

Re: [f2fs-dev] [PATCH] fsck: Fix data loss issue observed during SPO

2019-07-05 Thread Sahitya Tummala

Hi Chao,

The journal doesn't look to be corrupted, but only block_addr
in f2fs_nat_entry is 0 whereas nid and version seems to be valid.
These could be just truncated nodes, right?

Here is the full dump of nat journal entries after a SPO (this is not a 
issue case, but just a normal case with SPO) -

[   21.121325] fsck.f2fs: 0: addr 0 nid 5c5a ver 1\x0a
[   21.127509] fsck.f2fs: 1: addr 0 nid 4c1d ver 1\x0a
[   21.133760] fsck.f2fs: 2: addr 12075 nid 444f ver 0\x0a
[   21.140325] fsck.f2fs: 3: addr 0 nid 4307 ver 0\x0a
[   21.146510] fsck.f2fs: 4: addr 0 nid 4059 ver 1\x0a
[   21.152745] fsck.f2fs: 5: addr 0 nid 3e69 ver 1\x0a
[   21.158944] fsck.f2fs: 6: addr 0 nid 3cf9 ver 1\x0a
[   21.165128] fsck.f2fs: 7: addr 0 nid 3b7b ver 1\x0a
[   21.171362] fsck.f2fs: 8: addr 18f65 nid 34e2 ver 0\x0a
[   21.177903] fsck.f2fs: 9: addr 12126 nid 2c87 ver 0\x0a
[   21.184466] fsck.f2fs: 10: addr 0 nid 2a0a ver 1\x0a
[   21.190795] fsck.f2fs: 11: addr 0 nid 2565 ver 1\x0a
[   21.197070] fsck.f2fs: 12: addr 0 nid 1f48 ver 0\x0a
[   21.203369] fsck.f2fs: 13: addr 155bf nid 1d76 ver 0\x0a
[   21.210042] fsck.f2fs: 14: addr 19538 nid 18da ver 0\x0a
[   21.216675] fsck.f2fs: 15: addr 0 nid 157f ver 0\x0a
[   21.223037] fsck.f2fs: 16: addr d117 nid ac3 ver 0\x0a
[   21.229547] fsck.f2fs: 17: addr 11e48 nid 805 ver 0\x0a
[   21.236088] fsck.f2fs: 18: addr d112 nid 61c ver 0\x0a
[   21.242585] fsck.f2fs: 19: addr d00b nid 45b ver 0\x0a
[   21.249086] fsck.f2fs: 20: addr d111 nid 30d ver 0\x0a
[   21.255544] fsck.f2fs: 21: addr d110 nid 1b5 ver 0\x0a
[   21.262025] fsck.f2fs: 22: addr 0 nid 5e41 ver 1\x0a
[   21.268342] fsck.f2fs: 23: addr 0 nid 5e46 ver 0\x0a
[   21.274618] fsck.f2fs: 24: addr 0 nid 5a34 ver 1\x0a
[   21.280916] fsck.f2fs: 25: addr 0 nid 59fa ver 1\x0a
[   21.287192] fsck.f2fs: 26: addr 0 nid 57c3 ver 1\x0a
[   21.293526] fsck.f2fs: 27: addr 0 nid 5776 ver 1\x0a
[   21.299816] fsck.f2fs: 28: addr 0 nid 4f20 ver 1\x0a
[   21.306091] fsck.f2fs: 29: addr 0 nid 4ff7 ver 0\x0a
[   21.312416] fsck.f2fs: 30: addr 0 nid 47e1 ver 1\x0a
[   21.318715] fsck.f2fs: 31: addr 0 nid 4880 ver 1\x0a
[   21.324990] fsck.f2fs: 32: addr d118 nid ff4 ver 0\x0a
[   21.331498] fsck.f2fs: 33: addr d10c nid ff9 ver 0\x0a
[   21.337957] fsck.f2fs: 34: addr 0 nid 49ed ver 1\x0a
[   21.344290] fsck.f2fs: 35: addr 0 nid 497e ver 0\x0a
[   21.350612] fsck.f2fs: 36: addr fc5a nid 49d2 ver 0\x0a

Thanks,

On Fri, Jul 05, 2019 at 04:33:57PM +0530, Sahitya Tummala wrote:
> Hi Chao,
> 
> On Fri, Jul 05, 2019 at 06:11:47PM +0800, Chao Yu wrote:
> > Hi Sahitya,
> > 
> > On 2019/7/5 13:01, Sahitya Tummala wrote:
> > > With the commit <979b25727800> ("fsck.f2fs: check validity of nat 
> > > journal"),
> > > there is a serious data loss issue observed during sudden power off tests.
> > 
> > In that patch, since journal's data is untrusty, I just truncated n_nats to 
> > last
> > valid one to fix original reported issue.
> 
> The original issue will be fixed even without updating/truncating the n_nats
> as we still have sanity checks on nid and blkaddr, right?
> 
> > 
> > Actually, the problem here is we don't know which data is correct and which 
> > data
> > is corrupted in nat or sit journal, so even we skip broken entry without
> > modifying journal->n_{nats,sits}, latter entry may still contain broken 
> > data(but
> > it may pass validation check), and those entries may cause potential data
> > loss/corruption...
> > 
> 
> This issue was reported by a customer in the following test scenario -
> 
> Update the password in Android and then force crash. It is verified that
> all the files related to password data has fsync() already.
> 
> After some iterations (3-4) of this test, it is found password related files 
> are
> lost, failing the phone to bootup.
> 
> Without this patch ("fsck.f2fs: check validity of nat journal"), they don't 
> see
> such issue. It means that the data present in journal seems to be valid 
> atleast
> in this case and truncating is causing to loose the valid data/files.
> 
> > It looks no matter how we change the codes, we still may face data
> > loos/corruption issues.
> > 
> 
> Yes, maybe in some cases, the data loss is still expected, but there are
> chances that data may be valid as well, which is confirmed by the test case 
> above.
> 
> > IMO, we need to find out the root cause of data corruption, So my question 
> > is
> > that why sudden power off tests can make nat journal being corrupted? I 
> > doubt
> > that in-place update method on checkpoint area of fsck may cause this... any
> > other thoughts?
> > 
> 
> In case of sudden power off, I have printed the all the journal entries in the
&

Re: [f2fs-dev] [PATCH] fsck: Fix data loss issue observed during SPO

2019-07-05 Thread Sahitya Tummala

Hi Chao,

On Sat, Jul 06, 2019 at 09:14:59AM +0800, Chao Yu wrote:
> Hi Sahitya,
> 
> On 2019-7-5 21:48, Sahitya Tummala wrote:
> > Hi Chao,
> > 
> > The journal doesn't look to be corrupted, but only block_addr
> > in f2fs_nat_entry is 0 whereas nid and version seems to be valid.
> > These could be just truncated nodes, right?
> 
> Yes, but that should not fail the condition of journal check and result in
> journal size truncation, right?
> 

Yes, the patch fails for this condition as well and results into
truncation of journal, due to this check  addr < SM_I(sbi)->main_blkaddr
in IS_VALID_BLK_ADDR().

Do you think we can correct it this way?

diff --git a/fsck/f2fs.h b/fsck/f2fs.h
index 417ca0b..00fb740 100644
--- a/fsck/f2fs.h
+++ b/fsck/f2fs.h
@@ -364,8 +364,8 @@ static inline bool IS_VALID_NID(struct f2fs_sb_info *sbi, 
u32 nid)

 static inline bool IS_VALID_BLK_ADDR(struct f2fs_sb_info *sbi, u32 addr)
 {
-   if (addr >= le64_to_cpu(F2FS_RAW_SUPER(sbi)->block_count) ||
-   addr < SM_I(sbi)->main_blkaddr) {
+   if (addr && (addr >= le64_to_cpu(F2FS_RAW_SUPER(sbi)->block_count) ||
+   addr < SM_I(sbi)->main_blkaddr)) {
DBG(1, "block addr [0x%x]\n", addr);
return 0;
}

Thanks,

> Thanks,
> 
> > 
> > Here is the full dump of nat journal entries after a SPO (this is not a 
> > issue case, but just a normal case with SPO) -
> > 
> > [   21.121325] fsck.f2fs: 0: addr 0 nid 5c5a ver 1\x0a
> > [   21.127509] fsck.f2fs: 1: addr 0 nid 4c1d ver 1\x0a
> > [   21.133760] fsck.f2fs: 2: addr 12075 nid 444f ver 0\x0a
> > [   21.140325] fsck.f2fs: 3: addr 0 nid 4307 ver 0\x0a
> > [   21.146510] fsck.f2fs: 4: addr 0 nid 4059 ver 1\x0a
> > [   21.152745] fsck.f2fs: 5: addr 0 nid 3e69 ver 1\x0a
> > [   21.158944] fsck.f2fs: 6: addr 0 nid 3cf9 ver 1\x0a
> > [   21.165128] fsck.f2fs: 7: addr 0 nid 3b7b ver 1\x0a
> > [   21.171362] fsck.f2fs: 8: addr 18f65 nid 34e2 ver 0\x0a
> > [   21.177903] fsck.f2fs: 9: addr 12126 nid 2c87 ver 0\x0a
> > [   21.184466] fsck.f2fs: 10: addr 0 nid 2a0a ver 1\x0a
> > [   21.190795] fsck.f2fs: 11: addr 0 nid 2565 ver 1\x0a
> > [   21.197070] fsck.f2fs: 12: addr 0 nid 1f48 ver 0\x0a
> > [   21.203369] fsck.f2fs: 13: addr 155bf nid 1d76 ver 0\x0a
> > [   21.210042] fsck.f2fs: 14: addr 19538 nid 18da ver 0\x0a
> > [   21.216675] fsck.f2fs: 15: addr 0 nid 157f ver 0\x0a
> > [   21.223037] fsck.f2fs: 16: addr d117 nid ac3 ver 0\x0a
> > [   21.229547] fsck.f2fs: 17: addr 11e48 nid 805 ver 0\x0a
> > [   21.236088] fsck.f2fs: 18: addr d112 nid 61c ver 0\x0a
> > [   21.242585] fsck.f2fs: 19: addr d00b nid 45b ver 0\x0a
> > [   21.249086] fsck.f2fs: 20: addr d111 nid 30d ver 0\x0a
> > [   21.255544] fsck.f2fs: 21: addr d110 nid 1b5 ver 0\x0a
> > [   21.262025] fsck.f2fs: 22: addr 0 nid 5e41 ver 1\x0a
> > [   21.268342] fsck.f2fs: 23: addr 0 nid 5e46 ver 0\x0a
> > [   21.274618] fsck.f2fs: 24: addr 0 nid 5a34 ver 1\x0a
> > [   21.280916] fsck.f2fs: 25: addr 0 nid 59fa ver 1\x0a
> > [   21.287192] fsck.f2fs: 26: addr 0 nid 57c3 ver 1\x0a
> > [   21.293526] fsck.f2fs: 27: addr 0 nid 5776 ver 1\x0a
> > [   21.299816] fsck.f2fs: 28: addr 0 nid 4f20 ver 1\x0a
> > [   21.306091] fsck.f2fs: 29: addr 0 nid 4ff7 ver 0\x0a
> > [   21.312416] fsck.f2fs: 30: addr 0 nid 47e1 ver 1\x0a
> > [   21.318715] fsck.f2fs: 31: addr 0 nid 4880 ver 1\x0a
> > [   21.324990] fsck.f2fs: 32: addr d118 nid ff4 ver 0\x0a
> > [   21.331498] fsck.f2fs: 33: addr d10c nid ff9 ver 0\x0a
> > [   21.337957] fsck.f2fs: 34: addr 0 nid 49ed ver 1\x0a
> > [   21.344290] fsck.f2fs: 35: addr 0 nid 497e ver 0\x0a
> > [   21.350612] fsck.f2fs: 36: addr fc5a nid 49d2 ver 0\x0a
> > 
> > Thanks,
> > 
> > On Fri, Jul 05, 2019 at 04:33:57PM +0530, Sahitya Tummala wrote:
> >> Hi Chao,
> >>
> >> On Fri, Jul 05, 2019 at 06:11:47PM +0800, Chao Yu wrote:
> >>> Hi Sahitya,
> >>>
> >>> On 2019/7/5 13:01, Sahitya Tummala wrote:
> >>>> With the commit <979b25727800> ("fsck.f2fs: check validity of nat 
> >>>> journal"),
> >>>> there is a serious data loss issue observed during sudden power off 
> >>>> tests.
> >>>
> >>> In that patch, since journal's data is untrusty, I just truncated n_nats 
> >>> to last
> >>> valid one to fix original reported issue.
> >>
> >> The original issue will be fixed even without updating/truncating the 
> >> n_nats
> >> as we still

Re: [f2fs-dev] [PATCH] fsck: Fix data loss issue observed during SPO

2019-07-05 Thread Sahitya Tummala

Hi Chao,

I just realized that there is a patch from you to fix this already.

commit 92ea71c711681a8f575d09d57d5ceaebd6644fcc
Author: Chao Yu 
Date:   Wed Jan 9 10:48:25 2019 +0800

fsck.f2fs: fix incorrect boundary of IS_VALID_NID()

nid should never equal to max_nid, fix it.

Signed-off-by: Chao Yu 
Signed-off-by: Jaegeuk Kim 

In my code base, I was not having this patch but only have
"fsck.f2fs: check validity of nat journal", thus resulting into
data loss issues. I will now verify with this patch included.

Thanks for your support on this issue.

Thanks,
Sahitya.

On Sat, Jul 06, 2019 at 07:01:12AM +0530, Sahitya Tummala wrote:
> Hi Chao,
> 
> On Sat, Jul 06, 2019 at 09:14:59AM +0800, Chao Yu wrote:
> > Hi Sahitya,
> > 
> > On 2019-7-5 21:48, Sahitya Tummala wrote:
> > > Hi Chao,
> > > 
> > > The journal doesn't look to be corrupted, but only block_addr
> > > in f2fs_nat_entry is 0 whereas nid and version seems to be valid.
> > > These could be just truncated nodes, right?
> > 
> > Yes, but that should not fail the condition of journal check and result in
> > journal size truncation, right?
> > 
> 
> Yes, the patch fails for this condition as well and results into
> truncation of journal, due to this check  addr < SM_I(sbi)->main_blkaddr
> in IS_VALID_BLK_ADDR().
> 
> Do you think we can correct it this way?
> 
> diff --git a/fsck/f2fs.h b/fsck/f2fs.h
> index 417ca0b..00fb740 100644
> --- a/fsck/f2fs.h
> +++ b/fsck/f2fs.h
> @@ -364,8 +364,8 @@ static inline bool IS_VALID_NID(struct f2fs_sb_info *sbi, 
> u32 nid)
> 
>  static inline bool IS_VALID_BLK_ADDR(struct f2fs_sb_info *sbi, u32 addr)
>  {
> -   if (addr >= le64_to_cpu(F2FS_RAW_SUPER(sbi)->block_count) ||
> -   addr < SM_I(sbi)->main_blkaddr) {
> +   if (addr && (addr >= le64_to_cpu(F2FS_RAW_SUPER(sbi)->block_count) ||
> +   addr < SM_I(sbi)->main_blkaddr)) {
> DBG(1, "block addr [0x%x]\n", addr);
> return 0;
> }
> 
> Thanks,
> 
> > Thanks,
> > 
> > > 
> > > Here is the full dump of nat journal entries after a SPO (this is not a 
> > > issue case, but just a normal case with SPO) -
> > > 
> > > [   21.121325] fsck.f2fs: 0: addr 0 nid 5c5a ver 1\x0a
> > > [   21.127509] fsck.f2fs: 1: addr 0 nid 4c1d ver 1\x0a
> > > [   21.133760] fsck.f2fs: 2: addr 12075 nid 444f ver 0\x0a
> > > [   21.140325] fsck.f2fs: 3: addr 0 nid 4307 ver 0\x0a
> > > [   21.146510] fsck.f2fs: 4: addr 0 nid 4059 ver 1\x0a
> > > [   21.152745] fsck.f2fs: 5: addr 0 nid 3e69 ver 1\x0a
> > > [   21.158944] fsck.f2fs: 6: addr 0 nid 3cf9 ver 1\x0a
> > > [   21.165128] fsck.f2fs: 7: addr 0 nid 3b7b ver 1\x0a
> > > [   21.171362] fsck.f2fs: 8: addr 18f65 nid 34e2 ver 0\x0a
> > > [   21.177903] fsck.f2fs: 9: addr 12126 nid 2c87 ver 0\x0a
> > > [   21.184466] fsck.f2fs: 10: addr 0 nid 2a0a ver 1\x0a
> > > [   21.190795] fsck.f2fs: 11: addr 0 nid 2565 ver 1\x0a
> > > [   21.197070] fsck.f2fs: 12: addr 0 nid 1f48 ver 0\x0a
> > > [   21.203369] fsck.f2fs: 13: addr 155bf nid 1d76 ver 0\x0a
> > > [   21.210042] fsck.f2fs: 14: addr 19538 nid 18da ver 0\x0a
> > > [   21.216675] fsck.f2fs: 15: addr 0 nid 157f ver 0\x0a
> > > [   21.223037] fsck.f2fs: 16: addr d117 nid ac3 ver 0\x0a
> > > [   21.229547] fsck.f2fs: 17: addr 11e48 nid 805 ver 0\x0a
> > > [   21.236088] fsck.f2fs: 18: addr d112 nid 61c ver 0\x0a
> > > [   21.242585] fsck.f2fs: 19: addr d00b nid 45b ver 0\x0a
> > > [   21.249086] fsck.f2fs: 20: addr d111 nid 30d ver 0\x0a
> > > [   21.255544] fsck.f2fs: 21: addr d110 nid 1b5 ver 0\x0a
> > > [   21.262025] fsck.f2fs: 22: addr 0 nid 5e41 ver 1\x0a
> > > [   21.268342] fsck.f2fs: 23: addr 0 nid 5e46 ver 0\x0a
> > > [   21.274618] fsck.f2fs: 24: addr 0 nid 5a34 ver 1\x0a
> > > [   21.280916] fsck.f2fs: 25: addr 0 nid 59fa ver 1\x0a
> > > [   21.287192] fsck.f2fs: 26: addr 0 nid 57c3 ver 1\x0a
> > > [   21.293526] fsck.f2fs: 27: addr 0 nid 5776 ver 1\x0a
> > > [   21.299816] fsck.f2fs: 28: addr 0 nid 4f20 ver 1\x0a
> > > [   21.306091] fsck.f2fs: 29: addr 0 nid 4ff7 ver 0\x0a
> > > [   21.312416] fsck.f2fs: 30: addr 0 nid 47e1 ver 1\x0a
> > > [   21.318715] fsck.f2fs: 31: addr 0 nid 4880 ver 1\x0a
> > > [   21.324990] fsck.f2fs: 32: addr d118 nid ff4 ver 0\x0a
> > > [   21.331498] fsck.f2fs: 33: addr d10c nid ff9 ver 0\x0a
> > > [   21.337957] fsck.f2fs: 34: addr

[PATCH] f2fs: Fix indefinite loop in f2fs_gc()

2019-07-28 Thread Sahitya Tummala

Policy - foreground GC, LFS mode and greedy GC mode.

Under this policy, f2fs_gc() loops forever to GC as it doesn't have
enough free segements to proceed and thus it keeps calling gc_more
for the same victim segment.  This can happen if the selected victim
segment could not be GC'd due to failed blkaddr validity check i.e.
is_alive() returns false for the blocks set in current validity map.

Fix this by not resetting the sbi->cur_victim_sec to NULL_SEGNO, when
the segment selected could not be GC'd. This helps to select another
segment for GC and thus helps to proceed forward with GC.

Signed-off-by: Sahitya Tummala 
---
 fs/f2fs/gc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 8974672..7bbcc4a 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -1303,7 +1303,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
round++;
}
 
-   if (gc_type == FG_GC)
+   if (gc_type == FG_GC && seg_freed)
sbi->cur_victim_sec = NULL_SEGNO;
 
if (sync)
-- 
Qualcomm India Private Limited, on behalf of Qualcomm Innovation Center, Inc.
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux 
Foundation Collaborative Project.

Re: [f2fs-dev] [PATCH] f2fs: Fix indefinite loop in f2fs_gc()

2019-07-29 Thread Sahitya Tummala

Hi Chao,

On Tue, Jul 30, 2019 at 12:00:45AM +0800, Chao Yu wrote:
> Hi Sahitya,
> 
> On 2019-7-29 13:20, Sahitya Tummala wrote:
> > Policy - foreground GC, LFS mode and greedy GC mode.
> > 
> > Under this policy, f2fs_gc() loops forever to GC as it doesn't have
> > enough free segements to proceed and thus it keeps calling gc_more
> > for the same victim segment.  This can happen if the selected victim
> > segment could not be GC'd due to failed blkaddr validity check i.e.
> > is_alive() returns false for the blocks set in current validity map.
> > 
> > Fix this by not resetting the sbi->cur_victim_sec to NULL_SEGNO, when
> > the segment selected could not be GC'd. This helps to select another
> > segment for GC and thus helps to proceed forward with GC.
> > 
> > Signed-off-by: Sahitya Tummala 
> > ---
> >  fs/f2fs/gc.c | 2 +-
> >  1 file changed, 1 insertion(+), 1 deletion(-)
> > 
> > diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
> > index 8974672..7bbcc4a 100644
> > --- a/fs/f2fs/gc.c
> > +++ b/fs/f2fs/gc.c
> > @@ -1303,7 +1303,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
> > round++;
> > }
> >  
> > -   if (gc_type == FG_GC)
> > +   if (gc_type == FG_GC && seg_freed)
> > sbi->cur_victim_sec = NULL_SEGNO;
> 
> In some cases, we may remain last victim in sbi->cur_victim_sec, and jump out 
> of
> GC cycle, then SSR can skip the last victim due to sec_usage_check()...
> 

I see. I have a few questions on how to fix this issue. Please share your
comments.

1. Do you think the scenario described is valid? It happens rarely, not very
easy to reproduce.  From the dumps, I see that only block is set as valid in
the sentry->cur_valid_map for which I see that summary block check is_alive()
could return false. As only one block is set as valid, chances are there it
can be always selected as the victim by get_victim_by_default() under FG_GC.

2. What are the possible scenarios where summary block check is_alive() could
fail for a segment?

3. How does GC handle such segments?

Thanks,

> Thanks,
> 
> >  
> > if (sync)
> > 

-- 
--
Sent by a consultant of the Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum.


___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

Re: [f2fs-dev] [PATCH] f2fs: Fix indefinite loop in f2fs_gc()

2019-07-30 Thread Sahitya Tummala

Hi Chao,

On Tue, Jul 30, 2019 at 08:35:46PM +0800, Chao Yu wrote:
> Hi Sahitya,
> 
> On 2019/7/30 12:36, Sahitya Tummala wrote:
> > Hi Chao,
> > 
> > On Tue, Jul 30, 2019 at 12:00:45AM +0800, Chao Yu wrote:
> >> Hi Sahitya,
> >>
> >> On 2019-7-29 13:20, Sahitya Tummala wrote:
> >>> Policy - foreground GC, LFS mode and greedy GC mode.
> >>>
> >>> Under this policy, f2fs_gc() loops forever to GC as it doesn't have
> >>> enough free segements to proceed and thus it keeps calling gc_more
> >>> for the same victim segment.  This can happen if the selected victim
> >>> segment could not be GC'd due to failed blkaddr validity check i.e.
> >>> is_alive() returns false for the blocks set in current validity map.
> >>>
> >>> Fix this by not resetting the sbi->cur_victim_sec to NULL_SEGNO, when
> >>> the segment selected could not be GC'd. This helps to select another
> >>> segment for GC and thus helps to proceed forward with GC.
> >>>
> >>> Signed-off-by: Sahitya Tummala 
> >>> ---
> >>>  fs/f2fs/gc.c | 2 +-
> >>>  1 file changed, 1 insertion(+), 1 deletion(-)
> >>>
> >>> diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
> >>> index 8974672..7bbcc4a 100644
> >>> --- a/fs/f2fs/gc.c
> >>> +++ b/fs/f2fs/gc.c
> >>> @@ -1303,7 +1303,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
> >>>   round++;
> >>>   }
> >>>  
> >>> - if (gc_type == FG_GC)
> >>> + if (gc_type == FG_GC && seg_freed)
> >>>   sbi->cur_victim_sec = NULL_SEGNO;
> >>
> >> In some cases, we may remain last victim in sbi->cur_victim_sec, and jump 
> >> out of
> >> GC cycle, then SSR can skip the last victim due to sec_usage_check()...
> >>
> > 
> > I see. I have a few questions on how to fix this issue. Please share your
> > comments.
> > 
> > 1. Do you think the scenario described is valid? It happens rarely, not very
> 
> IIRC, we suffered endless gc loop due to there is valid block belong to an
> opened atomic write file. (because we will skip directly once we hit atomic 
> file)
> 
> For your case, I'm not sure that would happen, did you look into is_alive(), 
> why
> will it fail? block address not match? If so, it looks like summary info and
> dnode block and nat entry are inconsistent.

Yes, from the ramdumps, I could see that block address is not matching and
hence, is_alive() could fail in the issue scenario. Have you observed any such
cases before? What could be the reason for this mismatch?

Thanks,

> 
> > easy to reproduce.  From the dumps, I see that only block is set as valid in
> > the sentry->cur_valid_map for which I see that summary block check 
> > is_alive()
> > could return false. As only one block is set as valid, chances are there it
> > can be always selected as the victim by get_victim_by_default() under FG_GC.
> > 
> > 2. What are the possible scenarios where summary block check is_alive() 
> > could
> > fail for a segment?
> 
> I guess, maybe after check_valid_map(), the block is been truncated before
> is_alive(). If so the victim should be prefree directly instead of being
> selected again...
> 
> > 
> > 3. How does GC handle such segments?
> 
> I think that's not a normal case, or I'm missing something.
> 
> Thanks,
> 
> > 
> > Thanks,
> > 
> >> Thanks,
> >>
> >>>  
> >>>   if (sync)
> >>>
> > 

-- 
--
Sent by a consultant of the Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum.


___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

[PATCH v2] f2fs: Fix indefinite loop in f2fs_gc()

2019-08-06 Thread Sahitya Tummala

Policy - Foreground GC, LFS and greedy GC mode.

Under this policy, f2fs_gc() loops forever to GC as it doesn't have
enough free segements to proceed and thus it keeps calling gc_more
for the same victim segment.  This can happen if the selected victim
segment could not be GC'd due to failed blkaddr validity check i.e.
is_alive() returns false for the blocks set in current validity map.

Fix this by keeping track of such invalid segments and skip those
segments for selection in get_victim_by_default() to avoid endless
GC loop under such error scenarios.

Signed-off-by: Sahitya Tummala 
---
v2: fix as per Chao's suggestion to handle this error case

 fs/f2fs/gc.c  | 15 ++-
 fs/f2fs/segment.c |  5 +
 fs/f2fs/segment.h |  3 +++
 3 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 8974672..321a78a 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -382,6 +382,14 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
nsearched++;
}
 
+   /*
+* skip selecting the invalid segno (that is failed due to block
+* validity check failed during GC) to avoid endless GC loop in
+* such cases.
+*/
+   if (test_bit(segno, sm->invalid_segmap))
+   goto next;
+
secno = GET_SEC_FROM_SEG(sbi, segno);
 
if (sec_usage_check(sbi, secno))
@@ -975,6 +983,7 @@ static int gc_data_segment(struct f2fs_sb_info *sbi, struct 
f2fs_summary *sum,
int off;
int phase = 0;
int submitted = 0;
+   struct sit_info *sit_i = SIT_I(sbi);
 
start_addr = START_BLOCK(sbi, segno);
 
@@ -1008,8 +1017,12 @@ static int gc_data_segment(struct f2fs_sb_info *sbi, 
struct f2fs_summary *sum,
}
 
/* Get an inode by ino with checking validity */
-   if (!is_alive(sbi, entry, &dni, start_addr + off, &nofs))
+   if (!is_alive(sbi, entry, &dni, start_addr + off, &nofs)) {
+   if (!test_and_set_bit(segno, sit_i->invalid_segmap))
+   f2fs_err(sbi, "invalid blkaddr %u in seg %u is 
found\n",
+   start_addr + off, segno);
continue;
+   }
 
if (phase == 2) {
f2fs_ra_node_page(sbi, dni.ino);
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index a661ac3..d45a1d3 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -4017,6 +4017,10 @@ static int build_sit_info(struct f2fs_sb_info *sbi)
return -ENOMEM;
 #endif
 
+   sit_i->invalid_segmap = f2fs_kvzalloc(sbi, bitmap_size, GFP_KERNEL);
+   if (!sit_i->invalid_segmap)
+   return -ENOMEM;
+
/* init SIT information */
sit_i->s_ops = &default_salloc_ops;
 
@@ -4518,6 +4522,7 @@ static void destroy_sit_info(struct f2fs_sb_info *sbi)
 #ifdef CONFIG_F2FS_CHECK_FS
kvfree(sit_i->sit_bitmap_mir);
 #endif
+   kvfree(sit_i->invalid_segmap);
kvfree(sit_i);
 }
 
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index b746028..bc5dbe8 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -246,6 +246,9 @@ struct sit_info {
unsigned long long min_mtime;   /* min. modification time */
unsigned long long max_mtime;   /* max. modification time */
 
+   /* list of segments to be ignored by GC in case of errors */
+   unsigned long *invalid_segmap;
+
unsigned int last_victim[MAX_GC_POLICY]; /* last victim segment # */
 };
 
-- 
Qualcomm India Private Limited, on behalf of Qualcomm Innovation Center, Inc.
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux 
Foundation Collaborative Project.

Re: [PATCH v2] f2fs: Fix indefinite loop in f2fs_gc()

2019-08-06 Thread Sahitya Tummala

Hi Chao,

On Wed, Aug 07, 2019 at 10:04:16AM +0800, Chao Yu wrote:
> Hi Sahitya,
> 
> On 2019/8/6 19:19, Sahitya Tummala wrote:
> > Policy - Foreground GC, LFS and greedy GC mode.
> > 
> > Under this policy, f2fs_gc() loops forever to GC as it doesn't have
> > enough free segements to proceed and thus it keeps calling gc_more
> > for the same victim segment.  This can happen if the selected victim
> > segment could not be GC'd due to failed blkaddr validity check i.e.
> > is_alive() returns false for the blocks set in current validity map.
> > 
> > Fix this by keeping track of such invalid segments and skip those
> > segments for selection in get_victim_by_default() to avoid endless
> > GC loop under such error scenarios.
> > 
> > Signed-off-by: Sahitya Tummala 
> > ---
> > v2: fix as per Chao's suggestion to handle this error case
> > 
> >  fs/f2fs/gc.c  | 15 ++-
> >  fs/f2fs/segment.c |  5 +
> >  fs/f2fs/segment.h |  3 +++
> >  3 files changed, 22 insertions(+), 1 deletion(-)
> > 
> > diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
> > index 8974672..321a78a 100644
> > --- a/fs/f2fs/gc.c
> > +++ b/fs/f2fs/gc.c
> > @@ -382,6 +382,14 @@ static int get_victim_by_default(struct f2fs_sb_info 
> > *sbi,
> > nsearched++;
> > }
> >  
> > +   /*
> > +* skip selecting the invalid segno (that is failed due to block
> > +* validity check failed during GC) to avoid endless GC loop in
> > +* such cases.
> > +*/
> > +   if (test_bit(segno, sm->invalid_segmap))
> > +   goto next;
> > +
> > secno = GET_SEC_FROM_SEG(sbi, segno);
> >  
> > if (sec_usage_check(sbi, secno))
> > @@ -975,6 +983,7 @@ static int gc_data_segment(struct f2fs_sb_info *sbi, 
> > struct f2fs_summary *sum,
> > int off;
> > int phase = 0;
> > int submitted = 0;
> > +   struct sit_info *sit_i = SIT_I(sbi);
> >  
> > start_addr = START_BLOCK(sbi, segno);
> >  
> > @@ -1008,8 +1017,12 @@ static int gc_data_segment(struct f2fs_sb_info *sbi, 
> > struct f2fs_summary *sum,
> > }
> >  
> > /* Get an inode by ino with checking validity */
> > -   if (!is_alive(sbi, entry, &dni, start_addr + off, &nofs))
> > +   if (!is_alive(sbi, entry, &dni, start_addr + off, &nofs)) {
> > +   if (!test_and_set_bit(segno, sit_i->invalid_segmap))
> > +   f2fs_err(sbi, "invalid blkaddr %u in seg %u is 
> > found\n",
> > +   start_addr + off, segno);
> 
> Oh, there is some normal cases in is_alive(), such as f2fs_get_node_page() or
> f2fs_get_node_info() failure due to no memory, we should bypass such cases. I

Oh, yes, I have missed this point.

> guess something like this:
> 
> if (source_blkaddr != blkaddr) {
>   if (unlikely(check_valid_map(sbi, segno, off))) {

check_valid_map() is validated before is_alive(). So I think this check again
may not be needed. What do you think?

>   if (!test_and_set_bit(segno, sit_i->invalid_segmap)) {
>   f2fs_err(sbi, "invalid blkaddr %u in seg %u is found\n",
>   start_addr + off, segno);
>   set_sbi_flag(sbi, SBI_NEED_FSCK);
>   }
>   }
>   return false;
> }
> 
> I think this will be safe to call check_valid_map(), because there should be 
> no
> race in between is_alive() and update_sit_entry() from all paths due to node
> page lock dependence.
> 
> One more concern is should we use this under CONFIG_F2FS_CHECK_FS? If there is
> actually such a bug can cause data inconsistency, we'd better find the root
> cause in debug version.
> 

Yes, I agree with you. I will include this under CONFIG_F2FS_CHECK_FS.

Thanks,

> Thanks,
> 
> > continue;
> > +   }
> >  
> > if (phase == 2) {
> > f2fs_ra_node_page(sbi, dni.ino);
> > diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> > index a661ac3..d45a1d3 100644
> > --- a/fs/f2fs/segment.c
> > +++ b/fs/f2fs/segment.c
> > @@ -4017,6 +4017,10 @@ static int build_sit_info(struct f2fs_sb_info *sbi)
> > return -ENOMEM;
> >  #endif
> >  
> > +   sit_i->invalid_segmap = f2fs_kvzalloc(sbi, bitmap_size, GFP_KERNEL);
> > +   if (!sit_i->inv

Re: [PATCH v2] f2fs: Fix indefinite loop in f2fs_gc()

2019-08-06 Thread Sahitya Tummala

On Wed, Aug 07, 2019 at 11:37:22AM +0800, Chao Yu wrote:
> Hi Sahitya,
> 
> On 2019/8/7 11:24, Sahitya Tummala wrote:
> > Hi Chao,
> > 
> > On Wed, Aug 07, 2019 at 10:04:16AM +0800, Chao Yu wrote:
> >> Hi Sahitya,
> >>
> >> On 2019/8/6 19:19, Sahitya Tummala wrote:
> >>> Policy - Foreground GC, LFS and greedy GC mode.
> >>>
> >>> Under this policy, f2fs_gc() loops forever to GC as it doesn't have
> >>> enough free segements to proceed and thus it keeps calling gc_more
> >>> for the same victim segment.  This can happen if the selected victim
> >>> segment could not be GC'd due to failed blkaddr validity check i.e.
> >>> is_alive() returns false for the blocks set in current validity map.
> >>>
> >>> Fix this by keeping track of such invalid segments and skip those
> >>> segments for selection in get_victim_by_default() to avoid endless
> >>> GC loop under such error scenarios.
> >>>
> >>> Signed-off-by: Sahitya Tummala 
> >>> ---
> >>> v2: fix as per Chao's suggestion to handle this error case
> >>>
> >>>  fs/f2fs/gc.c  | 15 ++-
> >>>  fs/f2fs/segment.c |  5 +
> >>>  fs/f2fs/segment.h |  3 +++
> >>>  3 files changed, 22 insertions(+), 1 deletion(-)
> >>>
> >>> diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
> >>> index 8974672..321a78a 100644
> >>> --- a/fs/f2fs/gc.c
> >>> +++ b/fs/f2fs/gc.c
> >>> @@ -382,6 +382,14 @@ static int get_victim_by_default(struct f2fs_sb_info 
> >>> *sbi,
> >>>   nsearched++;
> >>>   }
> >>>  
> >>> + /*
> >>> +  * skip selecting the invalid segno (that is failed due to block
> >>> +  * validity check failed during GC) to avoid endless GC loop in
> >>> +  * such cases.
> >>> +  */
> >>> + if (test_bit(segno, sm->invalid_segmap))
> >>> + goto next;
> >>> +
> >>>   secno = GET_SEC_FROM_SEG(sbi, segno);
> >>>  
> >>>   if (sec_usage_check(sbi, secno))
> >>> @@ -975,6 +983,7 @@ static int gc_data_segment(struct f2fs_sb_info *sbi, 
> >>> struct f2fs_summary *sum,
> >>>   int off;
> >>>   int phase = 0;
> >>>   int submitted = 0;
> >>> + struct sit_info *sit_i = SIT_I(sbi);
> >>>  
> >>>   start_addr = START_BLOCK(sbi, segno);
> >>>  
> >>> @@ -1008,8 +1017,12 @@ static int gc_data_segment(struct f2fs_sb_info 
> >>> *sbi, struct f2fs_summary *sum,
> >>>   }
> >>>  
> >>>   /* Get an inode by ino with checking validity */
> >>> - if (!is_alive(sbi, entry, &dni, start_addr + off, &nofs))
> >>> + if (!is_alive(sbi, entry, &dni, start_addr + off, &nofs)) {
> >>> + if (!test_and_set_bit(segno, sit_i->invalid_segmap))
> >>> + f2fs_err(sbi, "invalid blkaddr %u in seg %u is 
> >>> found\n",
> >>> + start_addr + off, segno);
> >>
> >> Oh, there is some normal cases in is_alive(), such as f2fs_get_node_page() 
> >> or
> >> f2fs_get_node_info() failure due to no memory, we should bypass such 
> >> cases. I
> > 
> > Oh, yes, I have missed this point.
> > 
> >> guess something like this:
> >>
> >> if (source_blkaddr != blkaddr) {
> >>if (unlikely(check_valid_map(sbi, segno, off))) {
> > 
> > check_valid_map() is validated before is_alive(). So I think this check 
> > again
> > may not be needed. What do you think?
> 
> > race in between is_alive() and update_sit_entry()
> 
> There will be a race case:
> 
> gc_data_segment   f2fs_truncate_data_blocks_range
> check_valid_map
>   f2fs_invalidate_blocks
>   update_sit_entry
>   f2fs_test_and_clear_bit(, se->cur_valid_map);
>   unlock_page(node_page)
> is_alive
> lock_page(node_page)
> blkaddr should be NULL and not equal to source_blkaddr, I think this is a 
> normal
> case, right?
> 

Got it, thanks for the clarification.

> Thanks,
> 
> > 
> >>if (!test_and

[PATCH v3] f2fs: Fix indefinite loop in f2fs_gc()

2019-08-07 Thread Sahitya Tummala

Policy - Foreground GC, LFS and greedy GC mode.

Under this policy, f2fs_gc() loops forever to GC as it doesn't have
enough free segements to proceed and thus it keeps calling gc_more
for the same victim segment.  This can happen if the selected victim
segment could not be GC'd due to failed blkaddr validity check i.e.
is_alive() returns false for the blocks set in current validity map.

Fix this by keeping track of such invalid segments and skip those
segments for selection in get_victim_by_default() to avoid endless
GC loop under such error scenarios.

Signed-off-by: Sahitya Tummala 
---
v3: address Chao's comments and also add logic to clear invalid_segmap

 fs/f2fs/gc.c  | 25 +++--
 fs/f2fs/segment.c | 10 +-
 fs/f2fs/segment.h |  3 +++
 3 files changed, 35 insertions(+), 3 deletions(-)

diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 8974672..f7b9602 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -382,6 +382,14 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
nsearched++;
}
 
+   /*
+* skip selecting the invalid segno (that is failed due to block
+* validity check failure during GC) to avoid endless GC loop in
+* such cases.
+*/
+   if (test_bit(segno, sm->invalid_segmap))
+   goto next;
+
secno = GET_SEC_FROM_SEG(sbi, segno);
 
if (sec_usage_check(sbi, secno))
@@ -602,8 +610,13 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct 
f2fs_summary *sum,
 {
struct page *node_page;
nid_t nid;
-   unsigned int ofs_in_node;
+   unsigned int ofs_in_node, segno;
block_t source_blkaddr;
+   unsigned long offset;
+   struct sit_info *sit_i = SIT_I(sbi);
+
+   segno = GET_SEGNO(sbi, blkaddr);
+   offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
 
nid = le32_to_cpu(sum->nid);
ofs_in_node = le16_to_cpu(sum->ofs_in_node);
@@ -627,8 +640,16 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct 
f2fs_summary *sum,
source_blkaddr = datablock_addr(NULL, node_page, ofs_in_node);
f2fs_put_page(node_page, 1);
 
-   if (source_blkaddr != blkaddr)
+   if (source_blkaddr != blkaddr) {
+   if (unlikely(check_valid_map(sbi, segno, offset))) {
+   if (!test_and_set_bit(segno, sit_i->invalid_segmap)) {
+   f2fs_err(sbi, "mismatched blkaddr %u 
(source_blkaddr %u) in seg %u\n",
+   blkaddr, source_blkaddr, segno);
+   f2fs_bug_on(sbi, 1);
+   }
+   }
return false;
+   }
return true;
 }
 
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index a661ac3..c3ba9e7 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -806,6 +806,7 @@ static void __remove_dirty_segment(struct f2fs_sb_info 
*sbi, unsigned int segno,
enum dirty_type dirty_type)
 {
struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
+   struct sit_info *sit_i = SIT_I(sbi);
 
if (test_and_clear_bit(segno, dirty_i->dirty_segmap[dirty_type]))
dirty_i->nr_dirty[dirty_type]--;
@@ -817,9 +818,11 @@ static void __remove_dirty_segment(struct f2fs_sb_info 
*sbi, unsigned int segno,
if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t]))
dirty_i->nr_dirty[t]--;
 
-   if (get_valid_blocks(sbi, segno, true) == 0)
+   if (get_valid_blocks(sbi, segno, true) == 0) {
clear_bit(GET_SEC_FROM_SEG(sbi, segno),
dirty_i->victim_secmap);
+   clear_bit(segno, sit_i->invalid_segmap);
+   }
}
 }
 
@@ -4017,6 +4020,10 @@ static int build_sit_info(struct f2fs_sb_info *sbi)
return -ENOMEM;
 #endif
 
+   sit_i->invalid_segmap = f2fs_kvzalloc(sbi, bitmap_size, GFP_KERNEL);
+   if (!sit_i->invalid_segmap)
+   return -ENOMEM;
+
/* init SIT information */
sit_i->s_ops = &default_salloc_ops;
 
@@ -4518,6 +4525,7 @@ static void destroy_sit_info(struct f2fs_sb_info *sbi)
 #ifdef CONFIG_F2FS_CHECK_FS
kvfree(sit_i->sit_bitmap_mir);
 #endif
+   kvfree(sit_i->invalid_segmap);
kvfree(sit_i);
 }
 
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index b746028..3918155c 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -246,6 +246,9 @@ struct sit_info {
unsigned long long min_mtime;   /* min. modification time */
unsigned long long max_mtime;   /* max. modification time */
 
+   /* bitmap of segments to be ignored by GC in case of errors */
+   unsigned long *invalid_segmap;

Re: [PATCH v3] f2fs: Fix indefinite loop in f2fs_gc()

2019-08-07 Thread Sahitya Tummala

Hi Chao,

On Wed, Aug 07, 2019 at 05:29:24PM +0800, Chao Yu wrote:
> On 2019/8/7 16:52, Sahitya Tummala wrote:
> > Policy - Foreground GC, LFS and greedy GC mode.
> > 
> > Under this policy, f2fs_gc() loops forever to GC as it doesn't have
> > enough free segements to proceed and thus it keeps calling gc_more
> > for the same victim segment.  This can happen if the selected victim
> > segment could not be GC'd due to failed blkaddr validity check i.e.
> > is_alive() returns false for the blocks set in current validity map.
> > 
> > Fix this by keeping track of such invalid segments and skip those
> > segments for selection in get_victim_by_default() to avoid endless
> > GC loop under such error scenarios.
> > 
> > Signed-off-by: Sahitya Tummala 
> > ---
> > v3: address Chao's comments and also add logic to clear invalid_segmap
> 
> Hi Sahitya,
> 
> I meant we could cover all invalid_segmap related codes w/ 
> CONFIG_F2FS_CHECK_FS
> in upstream code, like we did for sit_info.sit_bitmap_mir. In private code
> (qualconn or others), if this issue happens frequently, we can enable it by
> default before it is fixed.
> 
> How do you think?
> 
Sure, we can do it that way.

> Btw, still no fsck log on broken image?
>
I have requested customers to provide this log next time when the issue is
reproduced again. I will update you once I get the log.

Thanks,

> Thanks,
> 
> > 
> >  fs/f2fs/gc.c  | 25 +++--
> >  fs/f2fs/segment.c | 10 +-
> >  fs/f2fs/segment.h |  3 +++
> >  3 files changed, 35 insertions(+), 3 deletions(-)
> > 
> > diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
> > index 8974672..f7b9602 100644
> > --- a/fs/f2fs/gc.c
> > +++ b/fs/f2fs/gc.c
> > @@ -382,6 +382,14 @@ static int get_victim_by_default(struct f2fs_sb_info 
> > *sbi,
> > nsearched++;
> > }
> >  
> > +   /*
> > +* skip selecting the invalid segno (that is failed due to block
> > +* validity check failure during GC) to avoid endless GC loop in
> > +* such cases.
> > +*/
> > +   if (test_bit(segno, sm->invalid_segmap))
> > +   goto next;
> > +
> > secno = GET_SEC_FROM_SEG(sbi, segno);
> >  
> > if (sec_usage_check(sbi, secno))
> > @@ -602,8 +610,13 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct 
> > f2fs_summary *sum,
> >  {
> > struct page *node_page;
> > nid_t nid;
> > -   unsigned int ofs_in_node;
> > +   unsigned int ofs_in_node, segno;
> > block_t source_blkaddr;
> > +   unsigned long offset;
> > +   struct sit_info *sit_i = SIT_I(sbi);
> > +
> > +   segno = GET_SEGNO(sbi, blkaddr);
> > +   offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
> >  
> > nid = le32_to_cpu(sum->nid);
> > ofs_in_node = le16_to_cpu(sum->ofs_in_node);
> > @@ -627,8 +640,16 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct 
> > f2fs_summary *sum,
> > source_blkaddr = datablock_addr(NULL, node_page, ofs_in_node);
> > f2fs_put_page(node_page, 1);
> >  
> > -   if (source_blkaddr != blkaddr)
> > +   if (source_blkaddr != blkaddr) {
> > +   if (unlikely(check_valid_map(sbi, segno, offset))) {
> > +   if (!test_and_set_bit(segno, sit_i->invalid_segmap)) {
> > +   f2fs_err(sbi, "mismatched blkaddr %u 
> > (source_blkaddr %u) in seg %u\n",
> > +   blkaddr, source_blkaddr, segno);
> > +   f2fs_bug_on(sbi, 1);
> > +   }
> > +   }
> > return false;
> > +   }
> > return true;
> >  }
> >  
> > diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> > index a661ac3..c3ba9e7 100644
> > --- a/fs/f2fs/segment.c
> > +++ b/fs/f2fs/segment.c
> > @@ -806,6 +806,7 @@ static void __remove_dirty_segment(struct f2fs_sb_info 
> > *sbi, unsigned int segno,
> > enum dirty_type dirty_type)
> >  {
> > struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
> > +   struct sit_info *sit_i = SIT_I(sbi);
> >  
> > if (test_and_clear_bit(segno, dirty_i->dirty_segmap[dirty_type]))
> > dirty_i->nr_dirty[dirty_type]--;
> > @@ -817,9 +818,11 @@ static void __remove_dirty_segment(struct f2fs_sb_info 
> > *sbi, unsigned int segno,
> > if (test_and_clear_bit(segno, dirty_i->dir

[PATCH v4] f2fs: Fix indefinite loop in f2fs_gc()

2019-08-07 Thread Sahitya Tummala

Policy - Foreground GC, LFS and greedy GC mode.

Under this policy, f2fs_gc() loops forever to GC as it doesn't have
enough free segements to proceed and thus it keeps calling gc_more
for the same victim segment.  This can happen if the selected victim
segment could not be GC'd due to failed blkaddr validity check i.e.
is_alive() returns false for the blocks set in current validity map.

Fix this by keeping track of such invalid segments and skip those
segments for selection in get_victim_by_default() to avoid endless
GC loop under such error scenarios. Currently, add this logic under
CONFIG_F2FS_CHECK_FS to be able to root cause the issue in debug
version.

Signed-off-by: Sahitya Tummala 
---
v4: Cover all logic with CONFIG_F2FS_CHECK_FS

 fs/f2fs/gc.c  | 31 +--
 fs/f2fs/segment.c | 14 +-
 fs/f2fs/segment.h |  3 +++
 3 files changed, 45 insertions(+), 3 deletions(-)

diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 8974672..cbcacbd 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -382,6 +382,16 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
nsearched++;
}
 
+#ifdef CONFIG_F2FS_CHECK_FS
+   /*
+* skip selecting the invalid segno (that is failed due to block
+* validity check failure during GC) to avoid endless GC loop in
+* such cases.
+*/
+   if (test_bit(segno, sm->invalid_segmap))
+   goto next;
+#endif
+
secno = GET_SEC_FROM_SEG(sbi, segno);
 
if (sec_usage_check(sbi, secno))
@@ -602,8 +612,15 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct 
f2fs_summary *sum,
 {
struct page *node_page;
nid_t nid;
-   unsigned int ofs_in_node;
+   unsigned int ofs_in_node, segno;
block_t source_blkaddr;
+   unsigned long offset;
+#ifdef CONFIG_F2FS_CHECK_FS
+   struct sit_info *sit_i = SIT_I(sbi);
+#endif
+
+   segno = GET_SEGNO(sbi, blkaddr);
+   offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
 
nid = le32_to_cpu(sum->nid);
ofs_in_node = le16_to_cpu(sum->ofs_in_node);
@@ -627,8 +644,18 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct 
f2fs_summary *sum,
source_blkaddr = datablock_addr(NULL, node_page, ofs_in_node);
f2fs_put_page(node_page, 1);
 
-   if (source_blkaddr != blkaddr)
+   if (source_blkaddr != blkaddr) {
+#ifdef CONFIG_F2FS_CHECK_FS
+   if (unlikely(check_valid_map(sbi, segno, offset))) {
+   if (!test_and_set_bit(segno, sit_i->invalid_segmap)) {
+   f2fs_err(sbi, "mismatched blkaddr %u 
(source_blkaddr %u) in seg %u\n",
+   blkaddr, source_blkaddr, segno);
+   f2fs_bug_on(sbi, 1);
+   }
+   }
+#endif
return false;
+   }
return true;
 }
 
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index a661ac3..ee795b1 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -806,6 +806,9 @@ static void __remove_dirty_segment(struct f2fs_sb_info 
*sbi, unsigned int segno,
enum dirty_type dirty_type)
 {
struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
+#ifdef CONFIG_F2FS_CHECK_FS
+   struct sit_info *sit_i = SIT_I(sbi);
+#endif
 
if (test_and_clear_bit(segno, dirty_i->dirty_segmap[dirty_type]))
dirty_i->nr_dirty[dirty_type]--;
@@ -817,9 +820,13 @@ static void __remove_dirty_segment(struct f2fs_sb_info 
*sbi, unsigned int segno,
if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t]))
dirty_i->nr_dirty[t]--;
 
-   if (get_valid_blocks(sbi, segno, true) == 0)
+   if (get_valid_blocks(sbi, segno, true) == 0) {
clear_bit(GET_SEC_FROM_SEG(sbi, segno),
dirty_i->victim_secmap);
+#ifdef CONFIG_F2FS_CHECK_FS
+   clear_bit(segno, sit_i->invalid_segmap);
+#endif
+   }
}
 }
 
@@ -4015,6 +4022,10 @@ static int build_sit_info(struct f2fs_sb_info *sbi)
sit_i->sit_bitmap_mir = kmemdup(src_bitmap, bitmap_size, GFP_KERNEL);
if (!sit_i->sit_bitmap_mir)
return -ENOMEM;
+
+   sit_i->invalid_segmap = f2fs_kvzalloc(sbi, bitmap_size, GFP_KERNEL);
+   if (!sit_i->invalid_segmap)
+   return -ENOMEM;
 #endif
 
/* init SIT information */
@@ -4517,6 +4528,7 @@ static void destroy_sit_info(struct f2fs_sb_info *sbi)
kvfree(sit_i->sit_bitmap);
 #ifdef CONFIG_F2FS_CHECK_FS
kvfree(sit_i->sit_bitmap_mir);
+   kvfree(sit_i->invalid_segmap);
 #endif
kvfree(sit_i);
 }
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index b746028..9370d53

[PATCH] f2fs: add a condition to detect overflow in f2fs_ioc_gc_range()

2019-09-16 Thread Sahitya Tummala

end = range.start + range.len;

If the range.start/range.len is a very large value, then end can overflow
in this operation. It results into a crash in get_valid_blocks() when
accessing the invalid range.start segno.

This issue is reported in ioctl fuzz testing.

Signed-off-by: Sahitya Tummala 
---
 fs/f2fs/file.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 5474aaa..c2b4767 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -2123,9 +2123,8 @@ static int f2fs_ioc_gc_range(struct file *filp, unsigned 
long arg)
return -EROFS;
 
end = range.start + range.len;
-   if (range.start < MAIN_BLKADDR(sbi) || end >= MAX_BLKADDR(sbi)) {
+   if (end < range.start || range.start < MAIN_BLKADDR(sbi) || end >= 
MAX_BLKADDR(sbi))
return -EINVAL;
-   }
 
ret = mnt_want_write_file(filp);
if (ret)
-- 
Qualcomm India Private Limited, on behalf of Qualcomm Innovation Center, Inc.
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux 
Foundation Collaborative Project.

Re: [f2fs-dev] [PATCH v4] f2fs: Fix indefinite loop in f2fs_gc()

2019-09-16 Thread Sahitya Tummala

Hi Chao,

On Fri, Sep 06, 2019 at 07:00:32PM +0800, Chao Yu wrote:
> Hi Sahitya,
> 
> Luckily, I can reproduce this issue with generic/269, and have sent another
> patch for the issue, could you please check that one?
> 

Thanks for the fix. The issue could not get reproduced yet, so could not make
much progress on the customer case.

thanks,

> Thanks,
> 
> On 2019/8/7 22:06, Chao Yu wrote:
> > On 2019-8-7 21:40, Sahitya Tummala wrote:
> >> Policy - Foreground GC, LFS and greedy GC mode.
> >>
> >> Under this policy, f2fs_gc() loops forever to GC as it doesn't have
> >> enough free segements to proceed and thus it keeps calling gc_more
> >> for the same victim segment.  This can happen if the selected victim
> >> segment could not be GC'd due to failed blkaddr validity check i.e.
> >> is_alive() returns false for the blocks set in current validity map.
> >>
> >> Fix this by keeping track of such invalid segments and skip those
> >> segments for selection in get_victim_by_default() to avoid endless
> >> GC loop under such error scenarios. Currently, add this logic under
> >> CONFIG_F2FS_CHECK_FS to be able to root cause the issue in debug
> >> version.
> >>
> >> Signed-off-by: Sahitya Tummala 
> >> ---
> >> v4: Cover all logic with CONFIG_F2FS_CHECK_FS
> >>
> >>  fs/f2fs/gc.c  | 31 +--
> >>  fs/f2fs/segment.c | 14 +-
> >>  fs/f2fs/segment.h |  3 +++
> >>  3 files changed, 45 insertions(+), 3 deletions(-)
> >>
> >> diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
> >> index 8974672..cbcacbd 100644
> >> --- a/fs/f2fs/gc.c
> >> +++ b/fs/f2fs/gc.c
> >> @@ -382,6 +382,16 @@ static int get_victim_by_default(struct f2fs_sb_info 
> >> *sbi,
> >>nsearched++;
> >>}
> >>  
> >> +#ifdef CONFIG_F2FS_CHECK_FS
> >> +  /*
> >> +   * skip selecting the invalid segno (that is failed due to block
> >> +   * validity check failure during GC) to avoid endless GC loop in
> >> +   * such cases.
> >> +   */
> >> +  if (test_bit(segno, sm->invalid_segmap))
> >> +  goto next;
> >> +#endif
> >> +
> >>secno = GET_SEC_FROM_SEG(sbi, segno);
> >>  
> >>if (sec_usage_check(sbi, secno))
> >> @@ -602,8 +612,15 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct 
> >> f2fs_summary *sum,
> >>  {
> >>struct page *node_page;
> >>nid_t nid;
> >> -  unsigned int ofs_in_node;
> >> +  unsigned int ofs_in_node, segno;
> >>block_t source_blkaddr;
> >> +  unsigned long offset;
> >> +#ifdef CONFIG_F2FS_CHECK_FS
> >> +  struct sit_info *sit_i = SIT_I(sbi);
> >> +#endif
> >> +
> >> +  segno = GET_SEGNO(sbi, blkaddr);
> >> +  offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
> >>  
> >>nid = le32_to_cpu(sum->nid);
> >>ofs_in_node = le16_to_cpu(sum->ofs_in_node);
> >> @@ -627,8 +644,18 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct 
> >> f2fs_summary *sum,
> >>source_blkaddr = datablock_addr(NULL, node_page, ofs_in_node);
> >>f2fs_put_page(node_page, 1);
> >>  
> >> -  if (source_blkaddr != blkaddr)
> >> +  if (source_blkaddr != blkaddr) {
> >> +#ifdef CONFIG_F2FS_CHECK_FS
> > 
> > unsigned int segno = GET_SEGNO(sbi, blkaddr);
> > unsigned int offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
> > 
> > Should be local, otherwise it looks good to me, I think Jaegeuk can help to 
> > fix
> > this while merging.
> > 
> > Reviewed-by: Chao Yu 
> > 
> > Thanks,
> > 
> >> +  if (unlikely(check_valid_map(sbi, segno, offset))) {
> >> +  if (!test_and_set_bit(segno, sit_i->invalid_segmap)) {
> >> +  f2fs_err(sbi, "mismatched blkaddr %u 
> >> (source_blkaddr %u) in seg %u\n",
> >> +  blkaddr, source_blkaddr, segno);
> >> +  f2fs_bug_on(sbi, 1);
> >> +  }
> >> +  }
> >> +#endif
> >>return false;
> >> +  }
> >>return true;
> >>  }
> >>  
> >> diff --git a/fs/f2fs/segment.c b/fs/f2fs/s

[f2fs-dev] [PATCH] f2fs: Fix deadlock under storage almost full/dirty condition

2019-11-08 Thread Sahitya Tummala

There could be a potential deadlock when the storage capacity
is almost full and theren't enough free segments available, due
to which FG_GC is needed in the atomic commit ioctl as shown in
the below callstack -

schedule_timeout
io_schedule_timeout
congestion_wait
f2fs_drop_inmem_pages_all
f2fs_gc
f2fs_balance_fs
__write_node_page
f2fs_fsync_node_pages
f2fs_do_sync_file
f2fs_ioctl

If this inode doesn't have i_gc_failures[GC_FAILURE_ATOMIC] set,
then it waits forever in f2fs_drop_inmem_pages_all(), for this
atomic inode to be dropped. And the rest of the system is stuck
waiting for sbi->gc_mutex lock, which is acquired by f2fs_balance_fs()
in the stack above.

Signed-off-by: Sahitya Tummala 
---
 fs/f2fs/segment.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index da830fc..335ec09 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -300,7 +300,8 @@ void f2fs_drop_inmem_pages_all(struct f2fs_sb_info *sbi, 
bool gc_failure)
 
if (inode) {
if (gc_failure) {
-   if (fi->i_gc_failures[GC_FAILURE_ATOMIC])
+   if (fi->i_gc_failures[GC_FAILURE_ATOMIC] ||
+   F2FS_I(inode)->inmem_task == current)
goto drop;
goto skip;
}
-- 
Qualcomm India Private Limited, on behalf of Qualcomm Innovation Center, Inc.
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux 
Foundation Collaborative Project.



___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

Re: [f2fs-dev] [PATCH] f2fs: Fix deadlock under storage almost full/dirty condition

2019-11-10 Thread Sahitya Tummala

Hi Chao,

On Mon, Nov 11, 2019 at 10:51:10AM +0800, Chao Yu wrote:
> On 2019/11/8 19:03, Sahitya Tummala wrote:
> > There could be a potential deadlock when the storage capacity
> > is almost full and theren't enough free segments available, due
> > to which FG_GC is needed in the atomic commit ioctl as shown in
> > the below callstack -
> > 
> > schedule_timeout
> > io_schedule_timeout
> > congestion_wait
> > f2fs_drop_inmem_pages_all
> > f2fs_gc
> > f2fs_balance_fs
> > __write_node_page
> > f2fs_fsync_node_pages
> > f2fs_do_sync_file
> > f2fs_ioctl
> > 
> > If this inode doesn't have i_gc_failures[GC_FAILURE_ATOMIC] set,
> > then it waits forever in f2fs_drop_inmem_pages_all(), for this
> > atomic inode to be dropped. And the rest of the system is stuck
> > waiting for sbi->gc_mutex lock, which is acquired by f2fs_balance_fs()
> > in the stack above.
> 
> I think the root cause of this issue is there is potential infinite loop in
> f2fs_drop_inmem_pages_all() for the case of gc_failure is true, because once 
> the
> first inode in inode_list[ATOMIC_FILE] list didn't suffer gc failure, we will
> skip dropping its in-memory cache and calling iput(), and traverse the list
> again, most possibly there is the same inode in the head of that list.
> 

I thought we are expecting for those atomic updates (without any gc failures) 
to be
committed by doing congestion_wait() and thus retrying again. Hence, I just
fixed only if we are ending up waiting for commit to happen in the atomic
commit path itself, which will be a deadlock.

> Could you please check below fix:
> 
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index 7bf7b0194944..8a3a35b42a37 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -1395,6 +1395,7 @@ struct f2fs_sb_info {
>   unsigned int gc_mode;   /* current GC state */
>   unsigned int next_victim_seg[2];/* next segment in victim 
> section */
>   /* for skip statistic */
> + unsigned int atomic_files;  /* # of opened atomic file */
>   unsigned long long skipped_atomic_files[2]; /* FG_GC and BG_GC */
>   unsigned long long skipped_gc_rwsem;/* FG_GC only */
> 
> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> index ecd063239642..79f4b348951a 100644
> --- a/fs/f2fs/file.c
> +++ b/fs/f2fs/file.c
> @@ -2047,6 +2047,7 @@ static int f2fs_ioc_start_atomic_write(struct file 
> *filp)
>   spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
>   if (list_empty(&fi->inmem_ilist))
>   list_add_tail(&fi->inmem_ilist, &sbi->inode_list[ATOMIC_FILE]);
> + sbi->atomic_files++;
>   spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
> 
>   /* add inode in inmem_list first and set atomic_file */
> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> index 8b977bbd6822..6aa0bb693697 100644
> --- a/fs/f2fs/segment.c
> +++ b/fs/f2fs/segment.c
> @@ -288,6 +288,8 @@ void f2fs_drop_inmem_pages_all(struct f2fs_sb_info *sbi,
> bool gc_failure)
>   struct list_head *head = &sbi->inode_list[ATOMIC_FILE];
>   struct inode *inode;
>   struct f2fs_inode_info *fi;
> + unsigned int count = sbi->atomic_files;

If the sbi->atomic_files decrements just after this, then the below exit 
condition
may not work. In that case, looped will never be >= count.

> + unsigned int looped = 0;
>  next:
>   spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
>   if (list_empty(head)) {
> @@ -296,22 +298,29 @@ void f2fs_drop_inmem_pages_all(struct f2fs_sb_info *sbi,
> bool gc_failure)
>   }
>   fi = list_first_entry(head, struct f2fs_inode_info, inmem_ilist);
>   inode = igrab(&fi->vfs_inode);
> + if (inode)
> + list_move_tail(&fi->inmem_ilist, head);
>   spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
> 
>   if (inode) {
>   if (gc_failure) {
> - if (fi->i_gc_failures[GC_FAILURE_ATOMIC])
> - goto drop;
> - goto skip;
> + if (!fi->i_gc_failures[GC_FAILURE_ATOMIC])
> + goto skip;
>   }
> -drop:
>   set_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST);
>   f2fs_drop_inmem_pages(inode);
> +skip:
>   iput(inode);

Does this result into f2fs_evict_inode() in this context for this inode?

thanks,

>   }
> -skip:
> +
>   congestion_wait(BLK_RW_ASYNC, HZ/50);
>   cond_resched();
> +
> + if (gc_failure) {
> + if (++looped >= count)
> +

Re: [f2fs-dev] [PATCH] f2fs: Fix deadlock under storage almost full/dirty condition

2019-11-10 Thread Sahitya Tummala

Hi Chao,

On Mon, Nov 11, 2019 at 02:28:47PM +0800, Chao Yu wrote:
> Hi Sahitya,
> 
> On 2019/11/11 11:40, Sahitya Tummala wrote:
> > Hi Chao,
> > 
> > On Mon, Nov 11, 2019 at 10:51:10AM +0800, Chao Yu wrote:
> >> On 2019/11/8 19:03, Sahitya Tummala wrote:
> >>> There could be a potential deadlock when the storage capacity
> >>> is almost full and theren't enough free segments available, due
> >>> to which FG_GC is needed in the atomic commit ioctl as shown in
> >>> the below callstack -
> >>>
> >>> schedule_timeout
> >>> io_schedule_timeout
> >>> congestion_wait
> >>> f2fs_drop_inmem_pages_all
> >>> f2fs_gc
> >>> f2fs_balance_fs
> >>> __write_node_page
> >>> f2fs_fsync_node_pages
> >>> f2fs_do_sync_file
> >>> f2fs_ioctl
> >>>
> >>> If this inode doesn't have i_gc_failures[GC_FAILURE_ATOMIC] set,
> >>> then it waits forever in f2fs_drop_inmem_pages_all(), for this
> >>> atomic inode to be dropped. And the rest of the system is stuck
> >>> waiting for sbi->gc_mutex lock, which is acquired by f2fs_balance_fs()
> >>> in the stack above.
> >>
> >> I think the root cause of this issue is there is potential infinite loop in
> >> f2fs_drop_inmem_pages_all() for the case of gc_failure is true, because 
> >> once the
> >> first inode in inode_list[ATOMIC_FILE] list didn't suffer gc failure, we 
> >> will
> >> skip dropping its in-memory cache and calling iput(), and traverse the list
> >> again, most possibly there is the same inode in the head of that list.
> >>
> > 
> > I thought we are expecting for those atomic updates (without any gc 
> > failures) to be
> > committed by doing congestion_wait() and thus retrying again. Hence, I just
> 
> Nope, we only need to drop inode which encounter gc failures, and keep the 
> rest
> inodes.
> 
> > fixed only if we are ending up waiting for commit to happen in the atomic
> > commit path itself, which will be a deadlock.
> 
> Look into call stack you provide, I don't think it's correct to drop such 
> inode,
> as its dirty pages should be committed before f2fs_fsync_node_pages(), so
> calling f2fs_drop_inmem_pages won't release any inmem pages, and won't help
> looped GC caused by skipping due to inmem pages.
> 
> And then I figure out below fix...
> 

Thanks for the explanation.
The fix below looks good to me.

Thanks,
Sahitya.

> > 
> >> Could you please check below fix:
> >>
> >> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> >> index 7bf7b0194944..8a3a35b42a37 100644
> >> --- a/fs/f2fs/f2fs.h
> >> +++ b/fs/f2fs/f2fs.h
> >> @@ -1395,6 +1395,7 @@ struct f2fs_sb_info {
> >>unsigned int gc_mode;   /* current GC state */
> >>unsigned int next_victim_seg[2];/* next segment in victim 
> >> section */
> >>/* for skip statistic */
> >> +  unsigned int atomic_files;  /* # of opened atomic file */
> >>unsigned long long skipped_atomic_files[2]; /* FG_GC and BG_GC */
> >>unsigned long long skipped_gc_rwsem;/* FG_GC only */
> >>
> >> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> >> index ecd063239642..79f4b348951a 100644
> >> --- a/fs/f2fs/file.c
> >> +++ b/fs/f2fs/file.c
> >> @@ -2047,6 +2047,7 @@ static int f2fs_ioc_start_atomic_write(struct file 
> >> *filp)
> >>spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
> >>if (list_empty(&fi->inmem_ilist))
> >>list_add_tail(&fi->inmem_ilist, &sbi->inode_list[ATOMIC_FILE]);
> >> +  sbi->atomic_files++;
> >>spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
> >>
> >>/* add inode in inmem_list first and set atomic_file */
> >> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> >> index 8b977bbd6822..6aa0bb693697 100644
> >> --- a/fs/f2fs/segment.c
> >> +++ b/fs/f2fs/segment.c
> >> @@ -288,6 +288,8 @@ void f2fs_drop_inmem_pages_all(struct f2fs_sb_info 
> >> *sbi,
> >> bool gc_failure)
> >>struct list_head *head = &sbi->inode_list[ATOMIC_FILE];
> >>struct inode *inode;
> >>struct f2fs_inode_info *fi;
> >> +  unsigned int count = sbi->atomic_files;
> > 
> > If the sbi->atomic_files decrements just after this, then the below exit 
> >

[f2fs-dev] [PATCH] f2fs: Fix deadlock in f2fs_gc() context during atomic files handling

2019-11-13 Thread Sahitya Tummala

The FS got stuck in the below stack when the storage is almost
full/dirty condition (when FG_GC is being done).

schedule_timeout
io_schedule_timeout
congestion_wait
f2fs_drop_inmem_pages_all
f2fs_gc
f2fs_balance_fs
__write_node_page
f2fs_fsync_node_pages
f2fs_do_sync_file
f2fs_ioctl

The root cause for this issue is there is a potential infinite loop
in f2fs_drop_inmem_pages_all() for the case where gc_failure is true
and when there an inode whose i_gc_failures[GC_FAILURE_ATOMIC] is
not set. Fix this by keeping track of the total atomic files
currently opened and using that to exit from this condition.

Fix-suggested-by: Chao Yu 
Signed-off-by: Chao Yu 
Signed-off-by: Sahitya Tummala 
---
v2:
- change fix as per Chao's suggestion
- decrement sbi->atomic_files protected under sbi->inode_lock[ATOMIC_FILE] and
  only when atomic flag is cleared for the first time, otherwise, the count
  goes to an invalid/high value as f2fs_drop_inmem_pages() can be called from
  two contexts at the same time.

 fs/f2fs/f2fs.h|  1 +
 fs/f2fs/file.c|  1 +
 fs/f2fs/segment.c | 21 +++--
 3 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index c681f51..e04a665 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1297,6 +1297,7 @@ struct f2fs_sb_info {
unsigned int gc_mode;   /* current GC state */
unsigned int next_victim_seg[2];/* next segment in victim 
section */
/* for skip statistic */
+   unsigned int atomic_files;  /* # of opened atomic file */
unsigned long long skipped_atomic_files[2]; /* FG_GC and BG_GC */
unsigned long long skipped_gc_rwsem;/* FG_GC only */
 
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index f6c038e..22c4949 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -1919,6 +1919,7 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
if (list_empty(&fi->inmem_ilist))
list_add_tail(&fi->inmem_ilist, &sbi->inode_list[ATOMIC_FILE]);
+   sbi->atomic_files++;
spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
 
/* add inode in inmem_list first and set atomic_file */
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index da830fc..0b7a33b 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -288,6 +288,8 @@ void f2fs_drop_inmem_pages_all(struct f2fs_sb_info *sbi, 
bool gc_failure)
struct list_head *head = &sbi->inode_list[ATOMIC_FILE];
struct inode *inode;
struct f2fs_inode_info *fi;
+   unsigned int count = sbi->atomic_files;
+   unsigned int looped = 0;
 next:
spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
if (list_empty(head)) {
@@ -296,22 +298,26 @@ void f2fs_drop_inmem_pages_all(struct f2fs_sb_info *sbi, 
bool gc_failure)
}
fi = list_first_entry(head, struct f2fs_inode_info, inmem_ilist);
inode = igrab(&fi->vfs_inode);
+   if (inode)
+   list_move_tail(&fi->inmem_ilist, head);
spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
 
if (inode) {
if (gc_failure) {
-   if (fi->i_gc_failures[GC_FAILURE_ATOMIC])
-   goto drop;
-   goto skip;
+   if (!fi->i_gc_failures[GC_FAILURE_ATOMIC])
+   goto skip;
}
-drop:
set_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST);
f2fs_drop_inmem_pages(inode);
+skip:
iput(inode);
}
-skip:
congestion_wait(BLK_RW_ASYNC, HZ/50);
cond_resched();
+   if (gc_failure) {
+   if (++looped >= count)
+   return;
+   }
goto next;
 }
 
@@ -327,13 +333,16 @@ void f2fs_drop_inmem_pages(struct inode *inode)
mutex_unlock(&fi->inmem_lock);
}
 
-   clear_inode_flag(inode, FI_ATOMIC_FILE);
fi->i_gc_failures[GC_FAILURE_ATOMIC] = 0;
stat_dec_atomic_write(inode);
 
spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
if (!list_empty(&fi->inmem_ilist))
list_del_init(&fi->inmem_ilist);
+   if (f2fs_is_atomic_file(inode)) {
+   clear_inode_flag(inode, FI_ATOMIC_FILE);
+   sbi->atomic_files--;
+   }
spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
 }
 
-- 
Qualcomm India Private Limited, on behalf of Qualcomm Innovation Center, Inc.
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux 
Foundation Collaborative Project.



___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

Re: [f2fs-dev] [PATCH] f2fs: Fix deadlock in f2fs_gc() context during atomic files handling

2019-11-25 Thread Sahitya Tummala

Hi Jaegeuk,

On Fri, Nov 22, 2019 at 08:53:28AM -0800, Jaegeuk Kim wrote:
> On 11/13, Sahitya Tummala wrote:
> > The FS got stuck in the below stack when the storage is almost
> > full/dirty condition (when FG_GC is being done).
> > 
> > schedule_timeout
> > io_schedule_timeout
> > congestion_wait
> > f2fs_drop_inmem_pages_all
> > f2fs_gc
> > f2fs_balance_fs
> > __write_node_page
> > f2fs_fsync_node_pages
> > f2fs_do_sync_file
> > f2fs_ioctl
> > 
> > The root cause for this issue is there is a potential infinite loop
> > in f2fs_drop_inmem_pages_all() for the case where gc_failure is true
> > and when there an inode whose i_gc_failures[GC_FAILURE_ATOMIC] is
> > not set. Fix this by keeping track of the total atomic files
> > currently opened and using that to exit from this condition.
> > 
> > Fix-suggested-by: Chao Yu 
> > Signed-off-by: Chao Yu 
> > Signed-off-by: Sahitya Tummala 
> > ---
> > v2:
> > - change fix as per Chao's suggestion
> > - decrement sbi->atomic_files protected under sbi->inode_lock[ATOMIC_FILE] 
> > and
> >   only when atomic flag is cleared for the first time, otherwise, the count
> >   goes to an invalid/high value as f2fs_drop_inmem_pages() can be called 
> > from
> >   two contexts at the same time.
> > 
> >  fs/f2fs/f2fs.h|  1 +
> >  fs/f2fs/file.c|  1 +
> >  fs/f2fs/segment.c | 21 +++--
> >  3 files changed, 17 insertions(+), 6 deletions(-)
> > 
> > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> > index c681f51..e04a665 100644
> > --- a/fs/f2fs/f2fs.h
> > +++ b/fs/f2fs/f2fs.h
> > @@ -1297,6 +1297,7 @@ struct f2fs_sb_info {
> > unsigned int gc_mode;   /* current GC state */
> > unsigned int next_victim_seg[2];/* next segment in victim 
> > section */
> > /* for skip statistic */
> > +   unsigned int atomic_files;  /* # of opened atomic file */
> > unsigned long long skipped_atomic_files[2]; /* FG_GC and BG_GC */
> > unsigned long long skipped_gc_rwsem;/* FG_GC only */
> >  
> > diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> > index f6c038e..22c4949 100644
> > --- a/fs/f2fs/file.c
> > +++ b/fs/f2fs/file.c
> > @@ -1919,6 +1919,7 @@ static int f2fs_ioc_start_atomic_write(struct file 
> > *filp)
> > spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
> > if (list_empty(&fi->inmem_ilist))
> > list_add_tail(&fi->inmem_ilist, &sbi->inode_list[ATOMIC_FILE]);
> > +   sbi->atomic_files++;
> > spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
> >  
> > /* add inode in inmem_list first and set atomic_file */
> > diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> > index da830fc..0b7a33b 100644
> > --- a/fs/f2fs/segment.c
> > +++ b/fs/f2fs/segment.c
> > @@ -288,6 +288,8 @@ void f2fs_drop_inmem_pages_all(struct f2fs_sb_info 
> > *sbi, bool gc_failure)
> > struct list_head *head = &sbi->inode_list[ATOMIC_FILE];
> > struct inode *inode;
> > struct f2fs_inode_info *fi;
> > +   unsigned int count = sbi->atomic_files;
> > +   unsigned int looped = 0;
> >  next:
> > spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
> > if (list_empty(head)) {
> > @@ -296,22 +298,26 @@ void f2fs_drop_inmem_pages_all(struct f2fs_sb_info 
> > *sbi, bool gc_failure)
> > }
> > fi = list_first_entry(head, struct f2fs_inode_info, inmem_ilist);
> > inode = igrab(&fi->vfs_inode);
> > +   if (inode)
> > +   list_move_tail(&fi->inmem_ilist, head);
> > spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
> >  
> > if (inode) {
> > if (gc_failure) {
> > -   if (fi->i_gc_failures[GC_FAILURE_ATOMIC])
> > -   goto drop;
> > -   goto skip;
> > +   if (!fi->i_gc_failures[GC_FAILURE_ATOMIC])
> > +   goto skip;
> > }
> > -drop:
> > set_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST);
> > f2fs_drop_inmem_pages(inode);
> > +skip:
> > iput(inode);
> > }
> > -skip:
> > congestion_wait(BLK_RW_ASYNC, HZ/50);
> > cond_resched();
> > +   if (gc_failure) {
> > +   if (++looped >= count)
> 
> There is a race condition when handling sbi->atomic_files?
> 
There is no concern here in this function w.

[f2fs-dev] [PATCH] f2fs: cleanup duplicate stats for atomic files

2019-12-04 Thread Sahitya Tummala

Remove duplicate sbi->aw_cnt stats counter that tracks
the number of atomic files currently opened (it also shows
incorrect value sometimes). Use more reliable sbi->atomic_files
to show in the stats.

Signed-off-by: Sahitya Tummala 
---
 fs/f2fs/debug.c   | 3 +--
 fs/f2fs/f2fs.h| 7 +--
 fs/f2fs/file.c| 1 -
 fs/f2fs/segment.c | 1 -
 4 files changed, 2 insertions(+), 10 deletions(-)

diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index 9b0bedd..0e87813 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -56,7 +56,7 @@ static void update_general_status(struct f2fs_sb_info *sbi)
si->nquota_files = sbi->nquota_files;
si->ndirty_all = sbi->ndirty_inode[DIRTY_META];
si->inmem_pages = get_pages(sbi, F2FS_INMEM_PAGES);
-   si->aw_cnt = atomic_read(&sbi->aw_cnt);
+   si->aw_cnt = sbi->atomic_files;
si->vw_cnt = atomic_read(&sbi->vw_cnt);
si->max_aw_cnt = atomic_read(&sbi->max_aw_cnt);
si->max_vw_cnt = atomic_read(&sbi->max_vw_cnt);
@@ -495,7 +495,6 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi)
for (i = META_CP; i < META_MAX; i++)
atomic_set(&sbi->meta_count[i], 0);
 
-   atomic_set(&sbi->aw_cnt, 0);
atomic_set(&sbi->vw_cnt, 0);
atomic_set(&sbi->max_aw_cnt, 0);
atomic_set(&sbi->max_vw_cnt, 0);
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 5a888a0..26a61e8 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1327,7 +1327,6 @@ struct f2fs_sb_info {
atomic_t inline_xattr;  /* # of inline_xattr inodes */
atomic_t inline_inode;  /* # of inline_data inodes */
atomic_t inline_dir;/* # of inline_dentry inodes */
-   atomic_t aw_cnt;/* # of atomic writes */
atomic_t vw_cnt;/* # of volatile writes */
atomic_t max_aw_cnt;/* max # of atomic writes */
atomic_t max_vw_cnt;/* max # of volatile writes */
@@ -3386,13 +3385,9 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct 
f2fs_sb_info *sbi)
((sbi)->block_count[(curseg)->alloc_type]++)
 #define stat_inc_inplace_blocks(sbi)   \
(atomic_inc(&(sbi)->inplace_count))
-#define stat_inc_atomic_write(inode)   \
-   (atomic_inc(&F2FS_I_SB(inode)->aw_cnt))
-#define stat_dec_atomic_write(inode)   \
-   (atomic_dec(&F2FS_I_SB(inode)->aw_cnt))
 #define stat_update_max_atomic_write(inode)\
do {\
-   int cur = atomic_read(&F2FS_I_SB(inode)->aw_cnt);   \
+   int cur = F2FS_I_SB(inode)->atomic_files;   \
int max = atomic_read(&F2FS_I_SB(inode)->max_aw_cnt);   \
if (cur > max)  \
atomic_set(&F2FS_I_SB(inode)->max_aw_cnt, cur); \
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 85af112..dfe6efe 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -1935,7 +1935,6 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
 
f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
F2FS_I(inode)->inmem_task = current;
-   stat_inc_atomic_write(inode);
stat_update_max_atomic_write(inode);
 out:
inode_unlock(inode);
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 56e8144..c0917d5 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -334,7 +334,6 @@ void f2fs_drop_inmem_pages(struct inode *inode)
}
 
fi->i_gc_failures[GC_FAILURE_ATOMIC] = 0;
-   stat_dec_atomic_write(inode);
 
spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
if (!list_empty(&fi->inmem_ilist))
-- 
Qualcomm India Private Limited, on behalf of Qualcomm Innovation Center, Inc.
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux 
Foundation Collaborative Project.



___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

[f2fs-dev] [PATCH] f2fs: show the CP_PAUSE reason in checkpoint traces

2020-01-02 Thread Sahitya Tummala

Remove the duplicate CP_UMOUNT enum and add the new CP_PAUSE
enum to show the checkpoint reason in the trace prints.

Signed-off-by: Sahitya Tummala 
---
 include/trace/events/f2fs.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
index 1796ff9..3a17252 100644
--- a/include/trace/events/f2fs.h
+++ b/include/trace/events/f2fs.h
@@ -49,6 +49,7 @@
 TRACE_DEFINE_ENUM(CP_RECOVERY);
 TRACE_DEFINE_ENUM(CP_DISCARD);
 TRACE_DEFINE_ENUM(CP_TRIMMED);
+TRACE_DEFINE_ENUM(CP_PAUSE);
 
 #define show_block_type(type)  \
__print_symbolic(type,  \
@@ -124,7 +125,7 @@
{ CP_SYNC,  "Sync" },   \
{ CP_RECOVERY,  "Recovery" },   \
{ CP_DISCARD,   "Discard" },\
-   { CP_UMOUNT,"Umount" }, \
+   { CP_PAUSE, "Pause" },  \
{ CP_TRIMMED,   "Trimmed" })
 
 #define show_fsync_cpreason(type)  \
-- 
Qualcomm India Private Limited, on behalf of Qualcomm Innovation Center, Inc.
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux 
Foundation Collaborative Project.


___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

[f2fs-dev] [PATCH] f2fs: fix the panic in do_checkpoint()

2020-02-12 Thread Sahitya Tummala

There could be a scenario where f2fs_sync_meta_pages() will not
ensure that all F2FS_DIRTY_META pages are submitted for IO. Thus,
resulting in the below panic in do_checkpoint() -

f2fs_bug_on(sbi, get_pages(sbi, F2FS_DIRTY_META) &&
!f2fs_cp_error(sbi));

This can happen in a low-memory condition, where shrinker could
also be doing the writepage operation (stack shown below)
at the same time when checkpoint is running on another core.

schedule
down_write
f2fs_submit_page_write -> by this time, this page in page cache is tagged
as PAGECACHE_TAG_WRITEBACK and PAGECACHE_TAG_DIRTY
is cleared, due to which f2fs_sync_meta_pages()
cannot sync this page in do_checkpoint() path.
f2fs_do_write_meta_page
__f2fs_write_meta_page
f2fs_write_meta_page
shrink_page_list
shrink_inactive_list
shrink_node_memcg
shrink_node
kswapd

Signed-off-by: Sahitya Tummala 
---
 fs/f2fs/checkpoint.c | 16 
 fs/f2fs/f2fs.h   |  2 +-
 fs/f2fs/super.c  |  2 +-
 3 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index ffdaba0..2b651a3 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -1250,14 +1250,14 @@ static void unblock_operations(struct f2fs_sb_info *sbi)
f2fs_unlock_all(sbi);
 }
 
-void f2fs_wait_on_all_pages_writeback(struct f2fs_sb_info *sbi)
+void f2fs_wait_on_all_pages(struct f2fs_sb_info *sbi, int type)
 {
DEFINE_WAIT(wait);
 
for (;;) {
prepare_to_wait(&sbi->cp_wait, &wait, TASK_UNINTERRUPTIBLE);
 
-   if (!get_pages(sbi, F2FS_WB_CP_DATA))
+   if (!get_pages(sbi, type))
break;
 
if (unlikely(f2fs_cp_error(sbi)))
@@ -1384,8 +1384,8 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct 
cp_control *cpc)
 
/* Flush all the NAT/SIT pages */
f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO);
-   f2fs_bug_on(sbi, get_pages(sbi, F2FS_DIRTY_META) &&
-   !f2fs_cp_error(sbi));
+   /* Wait for all dirty meta pages to be submitted for IO */
+   f2fs_wait_on_all_pages(sbi, F2FS_DIRTY_META);
 
/*
 * modify checkpoint
@@ -1493,11 +1493,11 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, 
struct cp_control *cpc)
 
/* Here, we have one bio having CP pack except cp pack 2 page */
f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO);
-   f2fs_bug_on(sbi, get_pages(sbi, F2FS_DIRTY_META) &&
-   !f2fs_cp_error(sbi));
+   /* Wait for all dirty meta pages to be submitted for IO */
+   f2fs_wait_on_all_pages(sbi, F2FS_DIRTY_META);
 
/* wait for previous submitted meta pages writeback */
-   f2fs_wait_on_all_pages_writeback(sbi);
+   f2fs_wait_on_all_pages(sbi, F2FS_WB_CP_DATA);
 
/* flush all device cache */
err = f2fs_flush_device_cache(sbi);
@@ -1506,7 +1506,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct 
cp_control *cpc)
 
/* barrier and flush checkpoint cp pack 2 page if it can */
commit_checkpoint(sbi, ckpt, start_blk);
-   f2fs_wait_on_all_pages_writeback(sbi);
+   f2fs_wait_on_all_pages(sbi, F2FS_WB_CP_DATA);
 
/*
 * invalidate intermediate page cache borrowed from meta inode
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 5a888a0..b0e0535 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -3196,7 +3196,7 @@ bool f2fs_is_dirty_device(struct f2fs_sb_info *sbi, nid_t 
ino,
 void f2fs_update_dirty_page(struct inode *inode, struct page *page);
 void f2fs_remove_dirty_inode(struct inode *inode);
 int f2fs_sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type);
-void f2fs_wait_on_all_pages_writeback(struct f2fs_sb_info *sbi);
+void f2fs_wait_on_all_pages(struct f2fs_sb_info *sbi, int type);
 int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc);
 void f2fs_init_ino_entry_info(struct f2fs_sb_info *sbi);
 int __init f2fs_create_checkpoint_caches(void);
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 5111e1f..084633b 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1105,7 +1105,7 @@ static void f2fs_put_super(struct super_block *sb)
/* our cp_error case, we can wait for any writeback page */
f2fs_flush_merged_writes(sbi);
 
-   f2fs_wait_on_all_pages_writeback(sbi);
+   f2fs_wait_on_all_pages(sbi, F2FS_WB_CP_DATA);
 
f2fs_bug_on(sbi, sbi->fsync_node_num);
 
-- 
Qualcomm India Private Limited, on behalf of Qualcomm Innovation Center, Inc.
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux 
Foundation Collaborative Project.


___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

[f2fs-dev] [PATCH V2] f2fs: fix the panic in do_checkpoint()

2020-02-16 Thread Sahitya Tummala

There could be a scenario where f2fs_sync_meta_pages() will not
ensure that all F2FS_DIRTY_META pages are submitted for IO. Thus,
resulting in the below panic in do_checkpoint() -

f2fs_bug_on(sbi, get_pages(sbi, F2FS_DIRTY_META) &&
!f2fs_cp_error(sbi));

This can happen in a low-memory condition, where shrinker could
also be doing the writepage operation (stack shown below)
at the same time when checkpoint is running on another core.

schedule
down_write
f2fs_submit_page_write -> by this time, this page in page cache is tagged
as PAGECACHE_TAG_WRITEBACK and PAGECACHE_TAG_DIRTY
is cleared, due to which f2fs_sync_meta_pages()
cannot sync this page in do_checkpoint() path.
f2fs_do_write_meta_page
__f2fs_write_meta_page
f2fs_write_meta_page
shrink_page_list
shrink_inactive_list
shrink_node_memcg
shrink_node
kswapd

Signed-off-by: Sahitya Tummala 
---
v2:
- changed the io_schedule_timeout to HZ/50.

 fs/f2fs/checkpoint.c | 18 +-
 fs/f2fs/f2fs.h   |  2 +-
 fs/f2fs/super.c  |  2 +-
 3 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index ffdaba0..d5601cc 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -1250,20 +1250,20 @@ static void unblock_operations(struct f2fs_sb_info *sbi)
f2fs_unlock_all(sbi);
 }
 
-void f2fs_wait_on_all_pages_writeback(struct f2fs_sb_info *sbi)
+void f2fs_wait_on_all_pages(struct f2fs_sb_info *sbi, int type)
 {
DEFINE_WAIT(wait);
 
for (;;) {
prepare_to_wait(&sbi->cp_wait, &wait, TASK_UNINTERRUPTIBLE);
 
-   if (!get_pages(sbi, F2FS_WB_CP_DATA))
+   if (!get_pages(sbi, type))
break;
 
if (unlikely(f2fs_cp_error(sbi)))
break;
 
-   io_schedule_timeout(5*HZ);
+   io_schedule_timeout(HZ/50);
}
finish_wait(&sbi->cp_wait, &wait);
 }
@@ -1384,8 +1384,8 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct 
cp_control *cpc)
 
/* Flush all the NAT/SIT pages */
f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO);
-   f2fs_bug_on(sbi, get_pages(sbi, F2FS_DIRTY_META) &&
-   !f2fs_cp_error(sbi));
+   /* Wait for all dirty meta pages to be submitted for IO */
+   f2fs_wait_on_all_pages(sbi, F2FS_DIRTY_META);
 
/*
 * modify checkpoint
@@ -1493,11 +1493,11 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, 
struct cp_control *cpc)
 
/* Here, we have one bio having CP pack except cp pack 2 page */
f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO);
-   f2fs_bug_on(sbi, get_pages(sbi, F2FS_DIRTY_META) &&
-   !f2fs_cp_error(sbi));
+   /* Wait for all dirty meta pages to be submitted for IO */
+   f2fs_wait_on_all_pages(sbi, F2FS_DIRTY_META);
 
/* wait for previous submitted meta pages writeback */
-   f2fs_wait_on_all_pages_writeback(sbi);
+   f2fs_wait_on_all_pages(sbi, F2FS_WB_CP_DATA);
 
/* flush all device cache */
err = f2fs_flush_device_cache(sbi);
@@ -1506,7 +1506,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct 
cp_control *cpc)
 
/* barrier and flush checkpoint cp pack 2 page if it can */
commit_checkpoint(sbi, ckpt, start_blk);
-   f2fs_wait_on_all_pages_writeback(sbi);
+   f2fs_wait_on_all_pages(sbi, F2FS_WB_CP_DATA);
 
/*
 * invalidate intermediate page cache borrowed from meta inode
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 5a888a0..b0e0535 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -3196,7 +3196,7 @@ bool f2fs_is_dirty_device(struct f2fs_sb_info *sbi, nid_t 
ino,
 void f2fs_update_dirty_page(struct inode *inode, struct page *page);
 void f2fs_remove_dirty_inode(struct inode *inode);
 int f2fs_sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type);
-void f2fs_wait_on_all_pages_writeback(struct f2fs_sb_info *sbi);
+void f2fs_wait_on_all_pages(struct f2fs_sb_info *sbi, int type);
 int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc);
 void f2fs_init_ino_entry_info(struct f2fs_sb_info *sbi);
 int __init f2fs_create_checkpoint_caches(void);
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 5111e1f..084633b 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1105,7 +1105,7 @@ static void f2fs_put_super(struct super_block *sb)
/* our cp_error case, we can wait for any writeback page */
f2fs_flush_merged_writes(sbi);
 
-   f2fs_wait_on_all_pages_writeback(sbi);
+   f2fs_wait_on_all_pages(sbi, F2FS_WB_CP_DATA);
 
f2fs_bug_on(sbi, sbi->fsync_node_num);
 
-- 
Qualcomm India Private Limited, on behalf of Qualcomm Innovation Center, Inc.
Qualcomm Innovat

[f2fs-dev] [PATCH v3] f2fs: fix the panic in do_checkpoint()

2020-02-17 Thread Sahitya Tummala

There could be a scenario where f2fs_sync_meta_pages() will not
ensure that all F2FS_DIRTY_META pages are submitted for IO. Thus,
resulting in the below panic in do_checkpoint() -

f2fs_bug_on(sbi, get_pages(sbi, F2FS_DIRTY_META) &&
!f2fs_cp_error(sbi));

This can happen in a low-memory condition, where shrinker could
also be doing the writepage operation (stack shown below)
at the same time when checkpoint is running on another core.

schedule
down_write
f2fs_submit_page_write -> by this time, this page in page cache is tagged
as PAGECACHE_TAG_WRITEBACK and PAGECACHE_TAG_DIRTY
is cleared, due to which f2fs_sync_meta_pages()
cannot sync this page in do_checkpoint() path.
f2fs_do_write_meta_page
__f2fs_write_meta_page
f2fs_write_meta_page
shrink_page_list
shrink_inactive_list
shrink_node_memcg
shrink_node
kswapd

Signed-off-by: Sahitya Tummala 
---
v3: Just rebase on dev branch of Jaegeuk's tree.

 fs/f2fs/checkpoint.c | 18 +-
 fs/f2fs/f2fs.h   |  2 +-
 fs/f2fs/super.c  |  2 +-
 3 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 44e84ac..751815c 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -1250,20 +1250,20 @@ static void unblock_operations(struct f2fs_sb_info *sbi)
f2fs_unlock_all(sbi);
 }
 
-void f2fs_wait_on_all_pages_writeback(struct f2fs_sb_info *sbi)
+void f2fs_wait_on_all_pages(struct f2fs_sb_info *sbi, int type)
 {
DEFINE_WAIT(wait);
 
for (;;) {
prepare_to_wait(&sbi->cp_wait, &wait, TASK_UNINTERRUPTIBLE);
 
-   if (!get_pages(sbi, F2FS_WB_CP_DATA))
+   if (!get_pages(sbi, type))
break;
 
if (unlikely(f2fs_cp_error(sbi)))
break;
 
-   io_schedule_timeout(5*HZ);
+   io_schedule_timeout(HZ/50);
}
finish_wait(&sbi->cp_wait, &wait);
 }
@@ -1384,8 +1384,8 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct 
cp_control *cpc)
 
/* Flush all the NAT/SIT pages */
f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO);
-   f2fs_bug_on(sbi, get_pages(sbi, F2FS_DIRTY_META) &&
-   !f2fs_cp_error(sbi));
+   /* Wait for all dirty meta pages to be submitted for IO */
+   f2fs_wait_on_all_pages(sbi, F2FS_DIRTY_META);
 
/*
 * modify checkpoint
@@ -1493,11 +1493,11 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, 
struct cp_control *cpc)
 
/* Here, we have one bio having CP pack except cp pack 2 page */
f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO);
-   f2fs_bug_on(sbi, get_pages(sbi, F2FS_DIRTY_META) &&
-   !f2fs_cp_error(sbi));
+   /* Wait for all dirty meta pages to be submitted for IO */
+   f2fs_wait_on_all_pages(sbi, F2FS_DIRTY_META);
 
/* wait for previous submitted meta pages writeback */
-   f2fs_wait_on_all_pages_writeback(sbi);
+   f2fs_wait_on_all_pages(sbi, F2FS_WB_CP_DATA);
 
/* flush all device cache */
err = f2fs_flush_device_cache(sbi);
@@ -1506,7 +1506,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct 
cp_control *cpc)
 
/* barrier and flush checkpoint cp pack 2 page if it can */
commit_checkpoint(sbi, ckpt, start_blk);
-   f2fs_wait_on_all_pages_writeback(sbi);
+   f2fs_wait_on_all_pages(sbi, F2FS_WB_CP_DATA);
 
/*
 * invalidate intermediate page cache borrowed from meta inode which are
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 5355be6..d39f5de 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -3308,7 +3308,7 @@ bool f2fs_is_dirty_device(struct f2fs_sb_info *sbi, nid_t 
ino,
 void f2fs_update_dirty_page(struct inode *inode, struct page *page);
 void f2fs_remove_dirty_inode(struct inode *inode);
 int f2fs_sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type);
-void f2fs_wait_on_all_pages_writeback(struct f2fs_sb_info *sbi);
+void f2fs_wait_on_all_pages(struct f2fs_sb_info *sbi, int type);
 int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc);
 void f2fs_init_ino_entry_info(struct f2fs_sb_info *sbi);
 int __init f2fs_create_checkpoint_caches(void);
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 65a7a43..686f540 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1172,7 +1172,7 @@ static void f2fs_put_super(struct super_block *sb)
/* our cp_error case, we can wait for any writeback page */
f2fs_flush_merged_writes(sbi);
 
-   f2fs_wait_on_all_pages_writeback(sbi);
+   f2fs_wait_on_all_pages(sbi, F2FS_WB_CP_DATA);
 
f2fs_bug_on(sbi, sbi->fsync_node_num);
 
-- 
Qualcomm India Private Limited, on behalf of Qualcomm Innovation Center,

[f2fs-dev] [PATCH 1/2] f2fs: Fix mount failure due to SPO after a successful online resize FS

2020-02-27 Thread Sahitya Tummala

Even though online resize is successfully done, a SPO immediately
after resize, still causes below error in the next mount.

[   11.294650] F2FS-fs (sda8): Wrong user_block_count: 2233856
[   11.300272] F2FS-fs (sda8): Failed to get valid F2FS checkpoint

This is because after FS metadata is updated in update_fs_metadata()
if the SBI_IS_DIRTY is not dirty, then CP will not be done to reflect
the new user_block_count.

Signed-off-by: Sahitya Tummala 
---
 fs/f2fs/gc.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index a92fa49..a14a75f 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -1577,6 +1577,7 @@ int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 
block_count)
 
update_fs_metadata(sbi, -secs);
clear_sbi_flag(sbi, SBI_IS_RESIZEFS);
+   set_sbi_flag(sbi, SBI_IS_DIRTY);
err = f2fs_sync_fs(sbi->sb, 1);
if (err) {
update_fs_metadata(sbi, secs);
-- 
Qualcomm India Private Limited, on behalf of Qualcomm Innovation Center, Inc.
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux 
Foundation Collaborative Project.


___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

[f2fs-dev] [PATCH 2/2] f2fs: Add a new CP flag to help fsck fix resize SPO issues

2020-02-27 Thread Sahitya Tummala

Add and set a new CP flag CP_RESIZEFS_FLAG during
online resize FS to help fsck fix the metadata mismatch
that may happen due to SPO during resize, where SB
got updated but CP data couldn't be written yet.

fsck errors -
Info: CKPT version = 6ed7bccb
Wrong user_block_count(2233856)
[f2fs_do_mount:3365] Checkpoint is polluted

Signed-off-by: Sahitya Tummala 
---
 fs/f2fs/checkpoint.c| 8 ++--
 include/linux/f2fs_fs.h | 1 +
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index fdd7f3d..0bd4cdb 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -1301,10 +1301,14 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, 
struct cp_control *cpc)
else
__clear_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG);
 
-   if (is_sbi_flag_set(sbi, SBI_NEED_FSCK) ||
-   is_sbi_flag_set(sbi, SBI_IS_RESIZEFS))
+   if (is_sbi_flag_set(sbi, SBI_NEED_FSCK))
__set_ckpt_flags(ckpt, CP_FSCK_FLAG);
 
+   if (is_sbi_flag_set(sbi, SBI_IS_RESIZEFS))
+   __set_ckpt_flags(ckpt, CP_RESIZEFS_FLAG);
+   else
+   __clear_ckpt_flags(ckpt, CP_RESIZEFS_FLAG);
+
if (is_sbi_flag_set(sbi, SBI_CP_DISABLED))
__set_ckpt_flags(ckpt, CP_DISABLED_FLAG);
else
diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index ac3f488..3c383dd 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -125,6 +125,7 @@ struct f2fs_super_block {
 /*
  * For checkpoint
  */
+#define CP_RESIZEFS_FLAG   0x4000
 #define CP_DISABLED_QUICK_FLAG 0x2000
 #define CP_DISABLED_FLAG   0x1000
 #define CP_QUOTA_NEED_FSCK_FLAG0x0800
-- 
Qualcomm India Private Limited, on behalf of Qualcomm Innovation Center, Inc.
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux 
Foundation Collaborative Project.


___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

[f2fs-dev] [PATCH] fsck.f2fs: allow fsck to fix issues with online resize due to SPO

2020-02-27 Thread Sahitya Tummala

Add support for new CP flag CP_RESIZEFS_FLAG set during online
resize FS. If SPO happens after SB is updated but CP isn't, then
allow fsck to fix it.

fsck errors without this fix -
Info: CKPT version = 6ed7bccb
Wrong user_block_count(2233856)
[f2fs_do_mount:3365] Checkpoint is polluted

the subsequent mount failure without this fix -
[   11.294650] F2FS-fs (sda8): Wrong user_block_count: 2233856
[   11.300272] F2FS-fs (sda8): Failed to get valid F2FS checkpoint

Signed-off-by: Sahitya Tummala 
---
 fsck/mount.c  | 30 +++---
 include/f2fs_fs.h |  1 +
 2 files changed, 28 insertions(+), 3 deletions(-)

diff --git a/fsck/mount.c b/fsck/mount.c
index e4ba048..374c9c4 100644
--- a/fsck/mount.c
+++ b/fsck/mount.c
@@ -1128,6 +1128,7 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi)
unsigned int total, fsmeta;
struct f2fs_super_block *sb = F2FS_RAW_SUPER(sbi);
struct f2fs_checkpoint *cp = F2FS_CKPT(sbi);
+   unsigned int flag = get_cp(ckpt_flags);
unsigned int ovp_segments, reserved_segments;
unsigned int main_segs, blocks_per_seg;
unsigned int sit_segs, nat_segs;
@@ -1164,7 +1165,30 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi)
log_blocks_per_seg = get_sb(log_blocks_per_seg);
if (!user_block_count || user_block_count >=
segment_count_main << log_blocks_per_seg) {
-   MSG(0, "\tWrong user_block_count(%u)\n", user_block_count);
+   if (flag & CP_RESIZEFS_FLAG) {
+   u32 valid_user_block_cnt;
+   u32 seg_cnt_main = get_sb(segment_count) -
+   (get_sb(segment_count_ckpt) +
+get_sb(segment_count_sit) +
+get_sb(segment_count_nat) +
+get_sb(segment_count_ssa));
+
+   /* validate segment_count_main in sb first */
+   if (seg_cnt_main != get_sb(segment_count_main)) {
+   MSG(0, "inconsistent segment counts found\n");
+   return 1;
+   }
+   valid_user_block_cnt = ((get_sb(segment_count_main) -
+   get_cp(overprov_segment_count)) 
* c.blks_per_seg);
+   MSG(0, "Info: Fix wrong user_block_count in CP: (%u) -> 
(%u)\n",
+   user_block_count, valid_user_block_cnt);
+   set_cp(user_block_count, valid_user_block_cnt);
+   c.fix_on = 1;
+   c.bug_on = 1;
+   } else {
+   MSG(0, "\tWrong user_block_count(%u)\n", 
user_block_count);
+   return 1;
+   }
return 1;
}
 
@@ -3361,6 +3385,8 @@ int f2fs_do_mount(struct f2fs_sb_info *sbi)
return -1;
}
 
+   c.bug_on = 0;
+
if (sanity_check_ckpt(sbi)) {
ERR_MSG("Checkpoint is polluted\n");
return -1;
@@ -3380,8 +3406,6 @@ int f2fs_do_mount(struct f2fs_sb_info *sbi)
c.fix_on = 1;
}
 
-   c.bug_on = 0;
-
if (tune_sb_features(sbi))
return -1;
 
diff --git a/include/f2fs_fs.h b/include/f2fs_fs.h
index af31bc5..265f50c 100644
--- a/include/f2fs_fs.h
+++ b/include/f2fs_fs.h
@@ -678,6 +678,7 @@ struct f2fs_super_block {
 /*
  * For checkpoint
  */
+#define CP_RESIZEFS_FLAG0x4000
 #define CP_DISABLED_FLAG   0x1000
 #define CP_QUOTA_NEED_FSCK_FLAG0x0800
 #define CP_LARGE_NAT_BITMAP_FLAG   0x0400
-- 
Qualcomm India Private Limited, on behalf of Qualcomm Innovation Center, Inc.
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux 
Foundation Collaborative Project.


___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

Re: [f2fs-dev] [PATCH 1/2] f2fs: Fix mount failure due to SPO after a successful online resize FS

2020-03-01 Thread Sahitya Tummala

Hi Chao,

On Fri, Feb 28, 2020 at 04:35:37PM +0800, Chao Yu wrote:
> Hi Sahitya,
> 
> Good catch.
> 
> On 2020/2/27 18:39, Sahitya Tummala wrote:
> > Even though online resize is successfully done, a SPO immediately
> > after resize, still causes below error in the next mount.
> > 
> > [   11.294650] F2FS-fs (sda8): Wrong user_block_count: 2233856
> > [   11.300272] F2FS-fs (sda8): Failed to get valid F2FS checkpoint
> > 
> > This is because after FS metadata is updated in update_fs_metadata()
> > if the SBI_IS_DIRTY is not dirty, then CP will not be done to reflect
> > the new user_block_count.
> > 
> > Signed-off-by: Sahitya Tummala 
> > ---
> >  fs/f2fs/gc.c | 1 +
> >  1 file changed, 1 insertion(+)
> > 
> > diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
> > index a92fa49..a14a75f 100644
> > --- a/fs/f2fs/gc.c
> > +++ b/fs/f2fs/gc.c
> > @@ -1577,6 +1577,7 @@ int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 
> > block_count)
> >  
> > update_fs_metadata(sbi, -secs);
> > clear_sbi_flag(sbi, SBI_IS_RESIZEFS);
> 
> Need a barrier here to keep order in between above code and 
> set_sbi_flag(DIRTY)?

I don't think a barrier will help here. Let us say there is a another context
doing CP already, then it races with update_fs_metadata(), so it may or may not
see the resize updates and it will also clear the SBI_IS_DIRTY flag set by 
resize
(even with a barrier).

I think we need to synchronize this with CP context, so that these resize 
changes
will be reflected properly. Please see the new diff below and help with the 
review.

diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index a14a75f..5554af8 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -1467,6 +1467,7 @@ static void update_fs_metadata(struct f2fs_sb_info *sbi, 
int secs)
long long user_block_count =
le64_to_cpu(F2FS_CKPT(sbi)->user_block_count);

+   clear_sbi_flag(sbi, SBI_IS_DIRTY);
SM_I(sbi)->segment_count = (int)SM_I(sbi)->segment_count + segs;
MAIN_SEGS(sbi) = (int)MAIN_SEGS(sbi) + segs;
FREE_I(sbi)->free_sections = (int)FREE_I(sbi)->free_sections + secs;
@@ -1575,9 +1576,12 @@ int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 
block_count)
goto out;
}

+   mutex_lock(&sbi->cp_mutex);
update_fs_metadata(sbi, -secs);
clear_sbi_flag(sbi, SBI_IS_RESIZEFS);
set_sbi_flag(sbi, SBI_IS_DIRTY);
+   mutex_unlock(&sbi->cp_mutex);
+
err = f2fs_sync_fs(sbi->sb, 1);
if (err) {
update_fs_metadata(sbi, secs);

thanks,

> 
> > +   set_sbi_flag(sbi, SBI_IS_DIRTY);
> > err = f2fs_sync_fs(sbi->sb, 1);
> > if (err) {
> > update_fs_metadata(sbi, secs);
> 
> Do we need to add clear_sbi_flag(, SBI_IS_DIRTY) into update_fs_metadata(), 
> so above
> path can be covered as well?
> 
> Thanks,
> 
> > 

-- 
--
Sent by a consultant of the Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum.


___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

Re: [f2fs-dev] [PATCH 1/2] f2fs: Fix mount failure due to SPO after a successful online resize FS

2020-03-03 Thread Sahitya Tummala

Hi Chao,

On Tue, Mar 03, 2020 at 08:06:21PM +0800, Chao Yu wrote:
> Hi Sahitya,
> 
> On 2020/3/2 12:39, Sahitya Tummala wrote:
> > Hi Chao,
> > 
> > On Fri, Feb 28, 2020 at 04:35:37PM +0800, Chao Yu wrote:
> >> Hi Sahitya,
> >>
> >> Good catch.
> >>
> >> On 2020/2/27 18:39, Sahitya Tummala wrote:
> >>> Even though online resize is successfully done, a SPO immediately
> >>> after resize, still causes below error in the next mount.
> >>>
> >>> [   11.294650] F2FS-fs (sda8): Wrong user_block_count: 2233856
> >>> [   11.300272] F2FS-fs (sda8): Failed to get valid F2FS checkpoint
> >>>
> >>> This is because after FS metadata is updated in update_fs_metadata()
> >>> if the SBI_IS_DIRTY is not dirty, then CP will not be done to reflect
> >>> the new user_block_count.
> >>>
> >>> Signed-off-by: Sahitya Tummala 
> >>> ---
> >>>  fs/f2fs/gc.c | 1 +
> >>>  1 file changed, 1 insertion(+)
> >>>
> >>> diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
> >>> index a92fa49..a14a75f 100644
> >>> --- a/fs/f2fs/gc.c
> >>> +++ b/fs/f2fs/gc.c
> >>> @@ -1577,6 +1577,7 @@ int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 
> >>> block_count)
> >>>  
> >>>   update_fs_metadata(sbi, -secs);
> >>>   clear_sbi_flag(sbi, SBI_IS_RESIZEFS);
> >>
> >> Need a barrier here to keep order in between above code and 
> >> set_sbi_flag(DIRTY)?
> > 
> > I don't think a barrier will help here. Let us say there is a another 
> > context
> > doing CP already, then it races with update_fs_metadata(), so it may or may 
> > not
> > see the resize updates and it will also clear the SBI_IS_DIRTY flag set by 
> > resize
> > (even with a barrier).
> 
> I agreed, actually, we didn't consider race condition in between CP and
> update_fs_metadata(), it should be fixed.
> 
> > 
> > I think we need to synchronize this with CP context, so that these resize 
> > changes
> > will be reflected properly. Please see the new diff below and help with the 
> > review.
> > 
> > diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
> > index a14a75f..5554af8 100644
> > --- a/fs/f2fs/gc.c
> > +++ b/fs/f2fs/gc.c
> > @@ -1467,6 +1467,7 @@ static void update_fs_metadata(struct f2fs_sb_info 
> > *sbi, int secs)
> > long long user_block_count =
> > 
> > le64_to_cpu(F2FS_CKPT(sbi)->user_block_count);
> > 
> > +   clear_sbi_flag(sbi, SBI_IS_DIRTY);
> 
> Why clear dirty flag here?

Yes, it is not required. I will remove it.

> 
> And why not use cp_mutex to protect update_fs_metadata() in error path of
> f2fs_sync_fs() below?

Yes, will add a lock there too.

Thanks,

> 
> > SM_I(sbi)->segment_count = (int)SM_I(sbi)->segment_count + segs;
> > MAIN_SEGS(sbi) = (int)MAIN_SEGS(sbi) + segs;
> > FREE_I(sbi)->free_sections = (int)FREE_I(sbi)->free_sections + secs;
> > @@ -1575,9 +1576,12 @@ int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 
> > block_count)
> > goto out;
> > }
> > 
> > +   mutex_lock(&sbi->cp_mutex);
> > update_fs_metadata(sbi, -secs);
> > clear_sbi_flag(sbi, SBI_IS_RESIZEFS);
> > set_sbi_flag(sbi, SBI_IS_DIRTY);
> > +   mutex_unlock(&sbi->cp_mutex);
> > +
> > err = f2fs_sync_fs(sbi->sb, 1);
> > if (err) {
> > update_fs_metadata(sbi, secs);
> 
> ^^
> 
> In addition, I found that we missed to use sb_lock to protect f2fs_super_block
> fields update, will submit a patch for that.
> 
> Thanks,
> 
> > 
> > thanks,
> > 
> >>
> >>> + set_sbi_flag(sbi, SBI_IS_DIRTY);
> >>>   err = f2fs_sync_fs(sbi->sb, 1);
> >>>   if (err) {
> >>>   update_fs_metadata(sbi, secs);
> >>
> >> Do we need to add clear_sbi_flag(, SBI_IS_DIRTY) into 
> >> update_fs_metadata(), so above
> >> path can be covered as well?
> >>
> >> Thanks,
> >>
> >>>
> > 

-- 
--
Sent by a consultant of the Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum.


___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

[f2fs-dev] [PATCH V2 2/2] f2fs: Add a new CP flag to help fsck fix resize SPO issues

2020-03-03 Thread Sahitya Tummala

Add and set a new CP flag CP_RESIZEFS_FLAG during
online resize FS to help fsck fix the metadata mismatch
that may happen due to SPO during resize, where SB
got updated but CP data couldn't be written yet.

fsck errors -
Info: CKPT version = 6ed7bccb
Wrong user_block_count(2233856)
[f2fs_do_mount:3365] Checkpoint is polluted

Signed-off-by: Sahitya Tummala 
---
 fs/f2fs/checkpoint.c| 8 ++--
 include/linux/f2fs_fs.h | 1 +
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index fdd7f3d..0bd4cdb 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -1301,10 +1301,14 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, 
struct cp_control *cpc)
else
__clear_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG);
 
-   if (is_sbi_flag_set(sbi, SBI_NEED_FSCK) ||
-   is_sbi_flag_set(sbi, SBI_IS_RESIZEFS))
+   if (is_sbi_flag_set(sbi, SBI_NEED_FSCK))
__set_ckpt_flags(ckpt, CP_FSCK_FLAG);
 
+   if (is_sbi_flag_set(sbi, SBI_IS_RESIZEFS))
+   __set_ckpt_flags(ckpt, CP_RESIZEFS_FLAG);
+   else
+   __clear_ckpt_flags(ckpt, CP_RESIZEFS_FLAG);
+
if (is_sbi_flag_set(sbi, SBI_CP_DISABLED))
__set_ckpt_flags(ckpt, CP_DISABLED_FLAG);
else
diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index ac3f488..3c383dd 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -125,6 +125,7 @@ struct f2fs_super_block {
 /*
  * For checkpoint
  */
+#define CP_RESIZEFS_FLAG   0x4000
 #define CP_DISABLED_QUICK_FLAG 0x2000
 #define CP_DISABLED_FLAG   0x1000
 #define CP_QUOTA_NEED_FSCK_FLAG0x0800
-- 
Qualcomm India Private Limited, on behalf of Qualcomm Innovation Center, Inc.
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux 
Foundation Collaborative Project.


___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

Re: [f2fs-dev] [PATCH V2 2/2] f2fs: Add a new CP flag to help fsck fix resize SPO issues

2020-03-05 Thread Sahitya Tummala

Hi Chao,

On Fri, Mar 06, 2020 at 09:19:39AM +0800, Chao Yu wrote:
> On 2020/3/3 22:29, Sahitya Tummala wrote:
> > Add and set a new CP flag CP_RESIZEFS_FLAG during
> > online resize FS to help fsck fix the metadata mismatch
> > that may happen due to SPO during resize, where SB
> > got updated but CP data couldn't be written yet.
> > 
> > fsck errors -
> > Info: CKPT version = 6ed7bccb
> > Wrong user_block_count(2233856)
> > [f2fs_do_mount:3365] Checkpoint is polluted
> 
> I'm not against this patch, however without this change, could
> fsck have any chance to repair old image?

Sure, I will update the fsck patch to handle it.

thanks,
> 
> > 
> > Signed-off-by: Sahitya Tummala 
> 
> Reviewed-by: Chao Yu 

-- 
--
Sent by a consultant of the Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum.


___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

Re: [f2fs-dev] [PATCH] f2fs: fix long latency due to discard during umount

2020-03-12 Thread Sahitya Tummala

On Thu, Mar 12, 2020 at 06:45:35PM -0700, Jaegeuk Kim wrote:
> On 03/13, Sahitya Tummala wrote:
> > On Thu, Mar 12, 2020 at 10:02:42AM -0700, Jaegeuk Kim wrote:
> > > On 03/12, Sahitya Tummala wrote:
> > > > F2FS already has a default timeout of 5 secs for discards that
> > > > can be issued during umount, but it can take more than the 5 sec
> > > > timeout if the underlying UFS device queue is already full and there
> > > > are no more available free tags to be used. In that case, submit_bio()
> > > > will wait for the already queued discard requests to complete to get
> > > > a free tag, which can potentially take way more than 5 sec.
> > > > 
> > > > Fix this by submitting the discard requests with REQ_NOWAIT
> > > > flags during umount. This will return -EAGAIN for UFS queue/tag full
> > > > scenario without waiting in the context of submit_bio(). The FS can
> > > > then handle these requests by retrying again within the stipulated
> > > > discard timeout period to avoid long latencies.
> > > > 
> > > > Signed-off-by: Sahitya Tummala 
> > > > ---
> > > >  fs/f2fs/segment.c | 14 +-
> > > >  1 file changed, 13 insertions(+), 1 deletion(-)
> > > > 
> > > > diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> > > > index fb3e531..a06bbac 100644
> > > > --- a/fs/f2fs/segment.c
> > > > +++ b/fs/f2fs/segment.c
> > > > @@ -1124,10 +1124,13 @@ static int __submit_discard_cmd(struct 
> > > > f2fs_sb_info *sbi,
> > > > struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
> > > > struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) 
> > > > ?
> > > > &(dcc->fstrim_list) : 
> > > > &(dcc->wait_list);
> > > > -   int flag = dpolicy->sync ? REQ_SYNC : 0;
> > > > +   int flag;
> > > > block_t lstart, start, len, total_len;
> > > > int err = 0;
> > > >  
> > > > +   flag = dpolicy->sync ? REQ_SYNC : 0;
> > > > +   flag |= dpolicy->type == DPOLICY_UMOUNT ? REQ_NOWAIT : 0;
> > > > +
> > > > if (dc->state != D_PREP)
> > > > return 0;
> > > >  
> > > > @@ -1203,6 +1206,11 @@ static int __submit_discard_cmd(struct 
> > > > f2fs_sb_info *sbi,
> > > > bio->bi_end_io = f2fs_submit_discard_endio;
> > > > bio->bi_opf |= flag;
> > > > submit_bio(bio);
> > > > +   if ((flag & REQ_NOWAIT) && (dc->error == -EAGAIN)) {
> > > > +   dc->state = D_PREP;
> > > > +   err = dc->error;
> > > > +   break;
> > > > +   }
> > > >  
> > > > atomic_inc(&dcc->issued_discard);
> > > >  
> > > > @@ -1510,6 +1518,10 @@ static int __issue_discard_cmd(struct 
> > > > f2fs_sb_info *sbi,
> > > > }
> > > >  
> > > > __submit_discard_cmd(sbi, dpolicy, dc, &issued);
> > > > +   if (dc->error == -EAGAIN) {
> > > > +   congestion_wait(BLK_RW_ASYNC, HZ/50);
> > > 
> > >   --> need to be 
> > > DEFAULT_IO_TIMEOUT
> > 
> > Yes, i will update it.
> > 
> > > 
> > > > +   __relocate_discard_cmd(dcc, dc);
> > > 
> > > It seems we need to submit bio first, and then move dc to wait_list, if 
> > > there's
> > > no error, in __submit_discard_cmd().
> > 
> > Yes, that is not changed and it still happens for the failed request
> > that is re-queued here too when it gets submitted again later.
> > 
> > I am requeuing the discard request failed with -EAGAIN error back to 
> > dcc->pend_list[] from wait_list. It will call submit_bio() for this request
> > and also move to wait_list when it calls __submit_discard_cmd() again next
> > time. Please let me know if I am missing anything?
> 
> This patch has no problem, but I'm thinking that __submit_discard_cmd() needs
> to return with any values by assumption where the waiting list should have
> submitted comman

Re: [f2fs-dev] [PATCH] f2fs: fix long latency due to discard during umount

2020-03-15 Thread Sahitya Tummala

Hi Chao,

On Mon, Mar 16, 2020 at 08:52:25AM +0800, Chao Yu wrote:
> On 2020/3/13 19:08, Sahitya Tummala wrote:
> > On Fri, Mar 13, 2020 at 02:30:55PM +0800, Chao Yu wrote:
> >> On 2020/3/13 11:39, Sahitya Tummala wrote:
> >>> On Fri, Mar 13, 2020 at 10:20:04AM +0800, Chao Yu wrote:
> >>>> On 2020/3/12 19:14, Sahitya Tummala wrote:
> >>>>> F2FS already has a default timeout of 5 secs for discards that
> >>>>> can be issued during umount, but it can take more than the 5 sec
> >>>>> timeout if the underlying UFS device queue is already full and there
> >>>>> are no more available free tags to be used. In that case, submit_bio()
> >>>>> will wait for the already queued discard requests to complete to get
> >>>>> a free tag, which can potentially take way more than 5 sec.
> >>>>>
> >>>>> Fix this by submitting the discard requests with REQ_NOWAIT
> >>>>> flags during umount. This will return -EAGAIN for UFS queue/tag full
> >>>>> scenario without waiting in the context of submit_bio(). The FS can
> >>>>> then handle these requests by retrying again within the stipulated
> >>>>> discard timeout period to avoid long latencies.
> >>>>>
> >>>>> Signed-off-by: Sahitya Tummala 
> >>>>> ---
> >>>>>  fs/f2fs/segment.c | 14 +-
> >>>>>  1 file changed, 13 insertions(+), 1 deletion(-)
> >>>>>
> >>>>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> >>>>> index fb3e531..a06bbac 100644
> >>>>> --- a/fs/f2fs/segment.c
> >>>>> +++ b/fs/f2fs/segment.c
> >>>>> @@ -1124,10 +1124,13 @@ static int __submit_discard_cmd(struct 
> >>>>> f2fs_sb_info *sbi,
> >>>>> struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
> >>>>> struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) 
> >>>>> ?
> >>>>> &(dcc->fstrim_list) : 
> >>>>> &(dcc->wait_list);
> >>>>> -   int flag = dpolicy->sync ? REQ_SYNC : 0;
> >>>>> +   int flag;
> >>>>> block_t lstart, start, len, total_len;
> >>>>> int err = 0;
> >>>>>  
> >>>>> +   flag = dpolicy->sync ? REQ_SYNC : 0;
> >>>>> +   flag |= dpolicy->type == DPOLICY_UMOUNT ? REQ_NOWAIT : 0;
> >>>>> +
> >>>>> if (dc->state != D_PREP)
> >>>>> return 0;
> >>>>>  
> >>>>> @@ -1203,6 +1206,11 @@ static int __submit_discard_cmd(struct 
> >>>>> f2fs_sb_info *sbi,
> >>>>> bio->bi_end_io = f2fs_submit_discard_endio;
> >>>>> bio->bi_opf |= flag;
> >>>>> submit_bio(bio);
> >>>>> +   if ((flag & REQ_NOWAIT) && (dc->error == -EAGAIN)) {
> >>>>
> >>>> If we want to update dc->state, we need to cover it with dc->lock.
> >>>
> >>> Sure, will update it.
> >>>
> >>>>
> >>>>> +   dc->state = D_PREP;
> >>>>
> >>>> BTW, one dc can be referenced by multiple bios, so dc->state could be 
> >>>> updated to
> >>>> D_DONE later by f2fs_submit_discard_endio(), however we just relocate it 
> >>>> to
> >>>> pending list... which is inconsistent status.
> >>>
> >>> In that case dc->bio_ref will reflect it and until it becomes 0, the 
> >>> dc->state
> >>> will not be updated to D_DONE in f2fs_submit_discard_endio()?
> >>
> >> __submit_discard_cmd()
> >>  lock()
> >>  dc->state = D_SUBMIT;
> >>  dc->bio_ref++;
> >>  unlock()
> >> ...
> >>  submit_bio()
> >>f2fs_submit_discard_endio()
> >> dc->error = -EAGAIN;
> >> lock()
> >> dc->bio_ref--;
> >>
> >>  dc->state = D_PREP;
> >>
> >> dc->state = D_DONE;
> >> unlock()
> >>
> &

Re: [f2fs-dev] [PATCH V2] fsck.f2fs: allow fsck to fix issues with online resize due to SPO

2020-03-19 Thread Sahitya Tummala

Hi Chao,

On Mon, Mar 16, 2020 at 11:19:11AM +0800, Chao Yu wrote:
> On 2020/3/6 11:52, Sahitya Tummala wrote:
> > Add support for new CP flag CP_RESIZEFS_FLAG set during online
> > resize FS. If SPO happens after SB is updated but CP isn't, then
> > allow fsck to fix it.
> > 
> > fsck errors without this fix -
> > Info: CKPT version = 6ed7bccb
> > Wrong user_block_count(2233856)
> > [f2fs_do_mount:3365] Checkpoint is polluted
> > 
> > the subsequent mount failure without this fix -
> > [   11.294650] F2FS-fs (sda8): Wrong user_block_count: 2233856
> > [   11.300272] F2FS-fs (sda8): Failed to get valid F2FS checkpoint
> > 
> > Signed-off-by: Sahitya Tummala 
> > ---
> > v2:
> > - fix even if CP_FSCK_FLAG is set for backward compatibility
> > - update print_cp_state()
> > 
> >  fsck/mount.c  | 34 +++---
> >  include/f2fs_fs.h |  1 +
> >  2 files changed, 32 insertions(+), 3 deletions(-)
> > 
> > diff --git a/fsck/mount.c b/fsck/mount.c
> > index e4ba048..8d32e41 100644
> > --- a/fsck/mount.c
> > +++ b/fsck/mount.c
> > @@ -429,6 +429,8 @@ void print_cp_state(u32 flag)
> > MSG(0, "%s", " orphan_inodes");
> > if (flag & CP_DISABLED_FLAG)
> > MSG(0, "%s", " disabled");
> > +   if (flag & CP_RESIZEFS_FLAG)
> > +   MSG(0, "%s", " resizefs");
> > if (flag & CP_UMOUNT_FLAG)
> > MSG(0, "%s", " unmount");
> > else
> > @@ -1128,6 +1130,7 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi)
> > unsigned int total, fsmeta;
> > struct f2fs_super_block *sb = F2FS_RAW_SUPER(sbi);
> > struct f2fs_checkpoint *cp = F2FS_CKPT(sbi);
> > +   unsigned int flag = get_cp(ckpt_flags);
> > unsigned int ovp_segments, reserved_segments;
> > unsigned int main_segs, blocks_per_seg;
> > unsigned int sit_segs, nat_segs;
> > @@ -1164,7 +1167,32 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi)
> > log_blocks_per_seg = get_sb(log_blocks_per_seg);
> > if (!user_block_count || user_block_count >=
> > segment_count_main << log_blocks_per_seg) {
> > -   MSG(0, "\tWrong user_block_count(%u)\n", user_block_count);
> > +   if (flag & (CP_FSCK_FLAG | CP_RESIZEFS_FLAG)) {
> > +   u32 valid_user_block_cnt;
> > +   u32 seg_cnt_main = get_sb(segment_count) -
> > +   (get_sb(segment_count_ckpt) +
> > +get_sb(segment_count_sit) +
> > +get_sb(segment_count_nat) +
> > +get_sb(segment_count_ssa));
> > +
> > +   /* validate segment_count_main in sb first */
> > +   if (seg_cnt_main != get_sb(segment_count_main)) {
> > +   MSG(0, "\tWrong user_block_count(%u) and 
> > inconsistent segment_cnt_main %u\n",
> > +   user_block_count,
> > +   segment_count_main << 
> > log_blocks_per_seg);
> > +   return 1;
> > +   }
> > +   valid_user_block_cnt = ((get_sb(segment_count_main) -
> > +   get_cp(overprov_segment_count)) 
> > * c.blks_per_seg);
> > +   MSG(0, "\tInfo: Fix wrong user_block_count in CP: (%u) 
> > -> (%u)\n",
> > +   user_block_count, valid_user_block_cnt);
> 
> By default, we should only fix such bug if c.fix_on is true, something
> like this:
> 
> ASSERT_MSG("\tWrong user_block_count(%u)\n", user_block_count);
> 
> if (!c.fix_on)
>   return 1;
> 
> valid_user_block_cnt = ((get_sb(segment_count_main) -
>   get_cp(overprov_segment_count)) * c.blks_per_seg);
> 
> MSG(0, "\tInfo: Fix wrong user_block_count in CP: (%u) -> (%u)\n",
>   user_block_count, valid_user_block_cnt);
> 
Since this is a fatal error which fails the basic mount itself, I thought it
must be fixed by default with fsck independent of -f option. Can we do so for
such critical bugs?

Thanks,

> Thanks,
> 
> 
> > +   set_cp(user_block_count, valid_user_block_cnt);
> > +   c.fix_on = 1;
> > +   c.bug_on = 1;
> > +   } else {
> > +

Re: [f2fs-dev] [PATCH V2] fsck.f2fs: allow fsck to fix issues with online resize due to SPO

2020-03-19 Thread Sahitya Tummala

On Thu, Mar 19, 2020 at 06:47:07PM +0800, Chao Yu wrote:
> Hi Sahitya,
> 
> On 2020/3/19 17:15, Sahitya Tummala wrote:
> > Hi Chao,
> > 
> > On Mon, Mar 16, 2020 at 11:19:11AM +0800, Chao Yu wrote:
> >> On 2020/3/6 11:52, Sahitya Tummala wrote:
> >>> Add support for new CP flag CP_RESIZEFS_FLAG set during online
> >>> resize FS. If SPO happens after SB is updated but CP isn't, then
> >>> allow fsck to fix it.
> >>>
> >>> fsck errors without this fix -
> >>> Info: CKPT version = 6ed7bccb
> >>> Wrong user_block_count(2233856)
> >>> [f2fs_do_mount:3365] Checkpoint is polluted
> >>>
> >>> the subsequent mount failure without this fix -
> >>> [   11.294650] F2FS-fs (sda8): Wrong user_block_count: 2233856
> >>> [   11.300272] F2FS-fs (sda8): Failed to get valid F2FS checkpoint
> >>>
> >>> Signed-off-by: Sahitya Tummala 
> >>> ---
> >>> v2:
> >>> - fix even if CP_FSCK_FLAG is set for backward compatibility
> >>> - update print_cp_state()
> >>>
> >>>  fsck/mount.c  | 34 +++---
> >>>  include/f2fs_fs.h |  1 +
> >>>  2 files changed, 32 insertions(+), 3 deletions(-)
> >>>
> >>> diff --git a/fsck/mount.c b/fsck/mount.c
> >>> index e4ba048..8d32e41 100644
> >>> --- a/fsck/mount.c
> >>> +++ b/fsck/mount.c
> >>> @@ -429,6 +429,8 @@ void print_cp_state(u32 flag)
> >>>   MSG(0, "%s", " orphan_inodes");
> >>>   if (flag & CP_DISABLED_FLAG)
> >>>   MSG(0, "%s", " disabled");
> >>> + if (flag & CP_RESIZEFS_FLAG)
> >>> + MSG(0, "%s", " resizefs");
> >>>   if (flag & CP_UMOUNT_FLAG)
> >>>   MSG(0, "%s", " unmount");
> >>>   else
> >>> @@ -1128,6 +1130,7 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi)
> >>>   unsigned int total, fsmeta;
> >>>   struct f2fs_super_block *sb = F2FS_RAW_SUPER(sbi);
> >>>   struct f2fs_checkpoint *cp = F2FS_CKPT(sbi);
> >>> + unsigned int flag = get_cp(ckpt_flags);
> >>>   unsigned int ovp_segments, reserved_segments;
> >>>   unsigned int main_segs, blocks_per_seg;
> >>>   unsigned int sit_segs, nat_segs;
> >>> @@ -1164,7 +1167,32 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi)
> >>>   log_blocks_per_seg = get_sb(log_blocks_per_seg);
> >>>   if (!user_block_count || user_block_count >=
> >>>   segment_count_main << log_blocks_per_seg) {
> >>> - MSG(0, "\tWrong user_block_count(%u)\n", user_block_count);
> >>> + if (flag & (CP_FSCK_FLAG | CP_RESIZEFS_FLAG)) {
> >>> + u32 valid_user_block_cnt;
> >>> + u32 seg_cnt_main = get_sb(segment_count) -
> >>> + (get_sb(segment_count_ckpt) +
> >>> +  get_sb(segment_count_sit) +
> >>> +  get_sb(segment_count_nat) +
> >>> +  get_sb(segment_count_ssa));
> >>> +
> >>> + /* validate segment_count_main in sb first */
> >>> + if (seg_cnt_main != get_sb(segment_count_main)) {
> >>> + MSG(0, "\tWrong user_block_count(%u) and 
> >>> inconsistent segment_cnt_main %u\n",
> >>> + user_block_count,
> >>> + segment_count_main << 
> >>> log_blocks_per_seg);
> >>> + return 1;
> >>> + }
> >>> + valid_user_block_cnt = ((get_sb(segment_count_main) -
> >>> + get_cp(overprov_segment_count)) 
> >>> * c.blks_per_seg);
> >>> + MSG(0, "\tInfo: Fix wrong user_block_count in CP: (%u) 
> >>> -> (%u)\n",
> >>> + user_block_count, valid_user_block_cnt);
> >>
> >> By default, we should only fix such bug if c.fix_on is true, something
> >> like this:
> >>
> >> ASSERT_MSG("\tWrong user_block_count(%u)\n", user_block_count);
> >>
&g

Re: [f2fs-dev] [PATCH v3] fsck.f2fs: allow fsck to fix issues with online resize due to SPO

2020-03-23 Thread Sahitya Tummala

On Mon, Mar 23, 2020 at 09:54:20AM +0800, Chao Yu wrote:
> On 2020/3/20 11:53, Sahitya Tummala wrote:
> > Add support for new CP flag CP_RESIZEFS_FLAG set during online
> > resize FS. If SPO happens after SB is updated but CP isn't, then
> > allow fsck to fix it.
> > 
> > The fsck errors without this fix -
> > Info: CKPT version = 6ed7bccb
> > Wrong user_block_count(2233856)
> > [f2fs_do_mount:3365] Checkpoint is polluted
> > 
> > The subsequent mount failure without this fix -
> > [   11.294650] F2FS-fs (sda8): Wrong user_block_count: 2233856
> > [   11.300272] F2FS-fs (sda8): Failed to get valid F2FS checkpoint
> > 
> > Signed-off-by: Sahitya Tummala 
> > ---
> > v3:
> > - allow fsck to fix based on -f option
> > 
> >  fsck/mount.c  | 34 ++
> >  include/f2fs_fs.h |  1 +
> >  2 files changed, 31 insertions(+), 4 deletions(-)
> > 
> > diff --git a/fsck/mount.c b/fsck/mount.c
> > index e4ba048..1e3731f 100644
> > --- a/fsck/mount.c
> > +++ b/fsck/mount.c
> > @@ -429,6 +429,8 @@ void print_cp_state(u32 flag)
> > MSG(0, "%s", " orphan_inodes");
> > if (flag & CP_DISABLED_FLAG)
> > MSG(0, "%s", " disabled");
> > +   if (flag & CP_RESIZEFS_FLAG)
> > +   MSG(0, "%s", " resizefs");
> > if (flag & CP_UMOUNT_FLAG)
> > MSG(0, "%s", " unmount");
> > else
> > @@ -1128,6 +1130,7 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi)
> > unsigned int total, fsmeta;
> > struct f2fs_super_block *sb = F2FS_RAW_SUPER(sbi);
> > struct f2fs_checkpoint *cp = F2FS_CKPT(sbi);
> > +   unsigned int flag = get_cp(ckpt_flags);
> > unsigned int ovp_segments, reserved_segments;
> > unsigned int main_segs, blocks_per_seg;
> > unsigned int sit_segs, nat_segs;
> > @@ -1164,8 +1167,31 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi)
> > log_blocks_per_seg = get_sb(log_blocks_per_seg);
> > if (!user_block_count || user_block_count >=
> > segment_count_main << log_blocks_per_seg) {
> > -   MSG(0, "\tWrong user_block_count(%u)\n", user_block_count);
> > -   return 1;
> > +   ASSERT_MSG("\tWrong user_block_count(%u)\n", user_block_count);
> > +   if (!c.fix_on)
> > +   return 1;
> 
> It looks fix_on will be revised to 1 on several CP status when user uses
> -a or -p? we should consider this as well?

Sure, let me add it.

Thanks,

> 
>   if (!c.fix_on && (c.auto_fix || c.preen_mode)) {
>   u32 flag = get_cp(ckpt_flags);
> 
>   if (flag & CP_FSCK_FLAG ||
>   flag & CP_QUOTA_NEED_FSCK_FLAG ||
>   (exist_qf_ino(sb) && (flag & CP_ERROR_FLAG))) {
>   c.fix_on = 1;
> 
> > +
> > +   if (flag & (CP_FSCK_FLAG | CP_RESIZEFS_FLAG)) {
> > +   u32 valid_user_block_cnt;
> > +   u32 seg_cnt_main = get_sb(segment_count) -
> > +   (get_sb(segment_count_ckpt) +
> > +get_sb(segment_count_sit) +
> > +get_sb(segment_count_nat) +
> > +get_sb(segment_count_ssa));
> > +
> > +   /* validate segment_count_main in sb first */
> > +   if (seg_cnt_main != get_sb(segment_count_main)) {
> > +   MSG(0, "Inconsistent segment_cnt_main %u in 
> > sb\n",
> > +   segment_count_main << 
> > log_blocks_per_seg);
> > +   return 1;
> > +   }
> > +   valid_user_block_cnt = ((get_sb(segment_count_main) -
> > +   get_cp(overprov_segment_count)) 
> > * c.blks_per_seg);
> > +   MSG(0, "Info: Fix wrong user_block_count in CP: (%u) -> 
> > (%u)\n",
> > +   user_block_count, valid_user_block_cnt);
> > +   set_cp(user_block_count, valid_user_block_cnt);
> > +   c.bug_on = 1;
> > +   }
> > }
> >  
> > main_segs = get_sb(segment_count_main);
> > @@ -3361,6 +3387,8 @@ int f2fs_do_mount(struct f2fs_sb_info *sbi)
> >

[f2fs-dev] [PATCH v4] fsck.f2fs: allow fsck to fix issues with online resize due to SPO

2020-03-23 Thread Sahitya Tummala

Add support for new CP flag CP_RESIZEFS_FLAG set during online
resize FS. If SPO happens after SB is updated but CP isn't, then
allow fsck to fix it.

The fsck errors without this fix -
Info: CKPT version = 6ed7bccb
Wrong user_block_count(2233856)
[f2fs_do_mount:3365] Checkpoint is polluted

The subsequent mount failure without this fix -
[   11.294650] F2FS-fs (sda8): Wrong user_block_count: 2233856
[   11.300272] F2FS-fs (sda8): Failed to get valid F2FS checkpoint

Signed-off-by: Sahitya Tummala 
---
v4:
- add conditions to allow fix for -a or -p option as well

 fsck/mount.c  | 62 +--
 include/f2fs_fs.h |  1 +
 2 files changed, 47 insertions(+), 16 deletions(-)

diff --git a/fsck/mount.c b/fsck/mount.c
index e4ba048..387957f 100644
--- a/fsck/mount.c
+++ b/fsck/mount.c
@@ -429,6 +429,8 @@ void print_cp_state(u32 flag)
MSG(0, "%s", " orphan_inodes");
if (flag & CP_DISABLED_FLAG)
MSG(0, "%s", " disabled");
+   if (flag & CP_RESIZEFS_FLAG)
+   MSG(0, "%s", " resizefs");
if (flag & CP_UMOUNT_FLAG)
MSG(0, "%s", " unmount");
else
@@ -1123,11 +1125,26 @@ fail_no_cp:
return -EINVAL;
 }
 
+static int f2fs_chk_fix_on_state(struct f2fs_super_block *sb, u32 flag)
+{
+   if (!c.fix_on && (c.auto_fix || c.preen_mode)) {
+   if (flag & CP_FSCK_FLAG ||
+   flag & CP_QUOTA_NEED_FSCK_FLAG ||
+   (exist_qf_ino(sb) && (flag & CP_ERROR_FLAG))) {
+   c.fix_on = 1;
+   } else if (!c.preen_mode) {
+   print_cp_state(flag);
+   }
+   }
+   return c.fix_on;
+}
+
 int sanity_check_ckpt(struct f2fs_sb_info *sbi)
 {
unsigned int total, fsmeta;
struct f2fs_super_block *sb = F2FS_RAW_SUPER(sbi);
struct f2fs_checkpoint *cp = F2FS_CKPT(sbi);
+   unsigned int flag = get_cp(ckpt_flags);
unsigned int ovp_segments, reserved_segments;
unsigned int main_segs, blocks_per_seg;
unsigned int sit_segs, nat_segs;
@@ -1164,8 +1181,31 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi)
log_blocks_per_seg = get_sb(log_blocks_per_seg);
if (!user_block_count || user_block_count >=
segment_count_main << log_blocks_per_seg) {
-   MSG(0, "\tWrong user_block_count(%u)\n", user_block_count);
-   return 1;
+   ASSERT_MSG("\tWrong user_block_count(%u)\n", user_block_count);
+   if (!f2fs_chk_fix_on_state(sb, flag))
+   return 1;
+
+   if (flag & (CP_FSCK_FLAG | CP_RESIZEFS_FLAG)) {
+   u32 valid_user_block_cnt;
+   u32 seg_cnt_main = get_sb(segment_count) -
+   (get_sb(segment_count_ckpt) +
+get_sb(segment_count_sit) +
+get_sb(segment_count_nat) +
+get_sb(segment_count_ssa));
+
+   /* validate segment_count_main in sb first */
+   if (seg_cnt_main != get_sb(segment_count_main)) {
+   MSG(0, "Inconsistent segment_cnt_main %u in 
sb\n",
+   segment_count_main << 
log_blocks_per_seg);
+   return 1;
+   }
+   valid_user_block_cnt = ((get_sb(segment_count_main) -
+   get_cp(overprov_segment_count)) 
* c.blks_per_seg);
+   MSG(0, "Info: Fix wrong user_block_count in CP: (%u) -> 
(%u)\n",
+   user_block_count, valid_user_block_cnt);
+   set_cp(user_block_count, valid_user_block_cnt);
+   c.bug_on = 1;
+   }
}
 
main_segs = get_sb(segment_count_main);
@@ -3361,6 +3401,8 @@ int f2fs_do_mount(struct f2fs_sb_info *sbi)
return -1;
}
 
+   c.bug_on = 0;
+
if (sanity_check_ckpt(sbi)) {
ERR_MSG("Checkpoint is polluted\n");
return -1;
@@ -3380,8 +3422,6 @@ int f2fs_do_mount(struct f2fs_sb_info *sbi)
c.fix_on = 1;
}
 
-   c.bug_on = 0;
-
if (tune_sb_features(sbi))
return -1;
 
@@ -3411,18 +3451,8 @@ int f2fs_do_mount(struct f2fs_sb_info *sbi)
return -1;
}
 
-   if (!c.fix_on && (c.auto_fix || c.preen_mode)) {
-   u32 flag = get_cp(ckpt_flags);
-
-   if (flag & CP_FSCK_FLAG ||

Re: [f2fs-dev] [PATCH v4] fsck.f2fs: allow fsck to fix issues with online resize due to SPO

2020-03-24 Thread Sahitya Tummala

On Tue, Mar 24, 2020 at 02:19:46PM +0800, Chao Yu wrote:
> On 2020/3/23 18:58, Sahitya Tummala wrote:
> > Add support for new CP flag CP_RESIZEFS_FLAG set during online
> > resize FS. If SPO happens after SB is updated but CP isn't, then
> > allow fsck to fix it.
> > 
> > The fsck errors without this fix -
> > Info: CKPT version = 6ed7bccb
> > Wrong user_block_count(2233856)
> > [f2fs_do_mount:3365] Checkpoint is polluted
> > 
> > The subsequent mount failure without this fix -
> > [   11.294650] F2FS-fs (sda8): Wrong user_block_count: 2233856
> > [   11.300272] F2FS-fs (sda8): Failed to get valid F2FS checkpoint
> > 
> > Signed-off-by: Sahitya Tummala 
> > ---
> > v4:
> > - add conditions to allow fix for -a or -p option as well
> > 
> >  fsck/mount.c  | 62 
> > +--
> >  include/f2fs_fs.h |  1 +
> >  2 files changed, 47 insertions(+), 16 deletions(-)
> > 
> > diff --git a/fsck/mount.c b/fsck/mount.c
> > index e4ba048..387957f 100644
> > --- a/fsck/mount.c
> > +++ b/fsck/mount.c
> > @@ -429,6 +429,8 @@ void print_cp_state(u32 flag)
> > MSG(0, "%s", " orphan_inodes");
> > if (flag & CP_DISABLED_FLAG)
> > MSG(0, "%s", " disabled");
> > +   if (flag & CP_RESIZEFS_FLAG)
> > +   MSG(0, "%s", " resizefs");
> > if (flag & CP_UMOUNT_FLAG)
> > MSG(0, "%s", " unmount");
> > else
> > @@ -1123,11 +1125,26 @@ fail_no_cp:
> > return -EINVAL;
> >  }
> >  
> > +static int f2fs_chk_fix_on_state(struct f2fs_super_block *sb, u32 flag)
> > +{
> > +   if (!c.fix_on && (c.auto_fix || c.preen_mode)) {
> > +   if (flag & CP_FSCK_FLAG ||
> > +   flag & CP_QUOTA_NEED_FSCK_FLAG ||
> > +   (exist_qf_ino(sb) && (flag & CP_ERROR_FLAG))) {
> > +   c.fix_on = 1;
> > +   } else if (!c.preen_mode) {
> > +   print_cp_state(flag);
> > +   }
> > +   }
> > +   return c.fix_on;
> > +}
> > +
> >  int sanity_check_ckpt(struct f2fs_sb_info *sbi)
> >  {
> > unsigned int total, fsmeta;
> > struct f2fs_super_block *sb = F2FS_RAW_SUPER(sbi);
> > struct f2fs_checkpoint *cp = F2FS_CKPT(sbi);
> > +   unsigned int flag = get_cp(ckpt_flags);
> > unsigned int ovp_segments, reserved_segments;
> > unsigned int main_segs, blocks_per_seg;
> > unsigned int sit_segs, nat_segs;
> > @@ -1164,8 +1181,31 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi)
> > log_blocks_per_seg = get_sb(log_blocks_per_seg);
> > if (!user_block_count || user_block_count >=
> > segment_count_main << log_blocks_per_seg) {
> > -   MSG(0, "\tWrong user_block_count(%u)\n", user_block_count);
> > -   return 1;
> > +   ASSERT_MSG("\tWrong user_block_count(%u)\n", user_block_count);
> > +   if (!f2fs_chk_fix_on_state(sb, flag))
> > +   return 1;
> > +
> > +   if (flag & (CP_FSCK_FLAG | CP_RESIZEFS_FLAG)) {
> > +   u32 valid_user_block_cnt;
> > +   u32 seg_cnt_main = get_sb(segment_count) -
> > +   (get_sb(segment_count_ckpt) +
> > +get_sb(segment_count_sit) +
> > +get_sb(segment_count_nat) +
> > +get_sb(segment_count_ssa));
> > +
> > +   /* validate segment_count_main in sb first */
> > +   if (seg_cnt_main != get_sb(segment_count_main)) {
> > +   MSG(0, "Inconsistent segment_cnt_main %u in 
> > sb\n",
> > +   segment_count_main << 
> > log_blocks_per_seg);
> > +   return 1;
> > +   }
> > +   valid_user_block_cnt = ((get_sb(segment_count_main) -
> > +   get_cp(overprov_segment_count)) 
> > * c.blks_per_seg);
> > +   MSG(0, "Info: Fix wrong user_block_count in CP: (%u) -> 
> > (%u)\n",
> > +   user_block_count, valid_user_block_cnt);
> > +   set_cp(user_block_count, valid_user_block_cnt);

[f2fs-dev] [PATCH] f2fs: prevent meta updates while checkpoint is in progress

2020-03-26 Thread Sahitya Tummala

allocate_segment_for_resize() can cause metapage updates if
it requires to change the current node/data segments for resizing.
Stop these meta updates when there is a checkpoint already
in progress to prevent inconsistent CP data.

Signed-off-by: Sahitya Tummala 
---
 fs/f2fs/gc.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 5bca560..6122bad 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -1399,8 +1399,10 @@ static int free_segment_range(struct f2fs_sb_info *sbi, 
unsigned int start,
int err = 0;
 
/* Move out cursegs from the target range */
+   f2fs_lock_op(sbi);
for (type = CURSEG_HOT_DATA; type < NR_CURSEG_TYPE; type++)
allocate_segment_for_resize(sbi, type, start, end);
+   f2fs_unlock_op(sbi);
 
/* do GC to move out valid blocks in the range */
for (segno = start; segno <= end; segno += sbi->segs_per_sec) {
-- 
Qualcomm India Private Limited, on behalf of Qualcomm Innovation Center, Inc.
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux 
Foundation Collaborative Project.


___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

Re: [f2fs-dev] [PATCH] f2fs: fix long latency due to discard during umount

2020-03-26 Thread Sahitya Tummala

Hi Chao,

On Thu, Mar 26, 2020 at 05:00:18PM +0800, Chao Yu wrote:
> Hi Sahitya,
> 
> On 2020/3/18 12:44, Sahitya Tummala wrote:
> > F2FS already has a default timeout of 5 secs for discards that
> > can be issued during umount, but it can take more than the 5 sec
> > timeout if the underlying UFS device queue is already full and there
> > are no more available free tags to be used. In that case, submit_bio()
> > will wait for the already queued discard requests to complete to get
> > a free tag, which can potentially take way more than 5 sec.
> > 
> > Fix this by submitting the discard requests with REQ_NOWAIT
> > flags during umount. This will return -EAGAIN for UFS queue/tag full
> > scenario without waiting in the context of submit_bio(). The FS can
> > then handle these requests by retrying again within the stipulated
> > discard timeout period to avoid long latencies.
> > 
> > Signed-off-by: Sahitya Tummala 
> > ---
> > v2:
> > - Handle the case where a dc can have multiple bios associated with it
> > 
> >  fs/f2fs/f2fs.h|  1 +
> >  fs/f2fs/segment.c | 83 
> > ---
> >  2 files changed, 74 insertions(+), 10 deletions(-)
> > 
> > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> > index 12a197e..67b8dcc 100644
> > --- a/fs/f2fs/f2fs.h
> > +++ b/fs/f2fs/f2fs.h
> > @@ -340,6 +340,7 @@ struct discard_cmd_control {
> > struct list_head pend_list[MAX_PLIST_NUM];/* store pending entries */
> > struct list_head wait_list; /* store on-flushing entries */
> > struct list_head fstrim_list;   /* in-flight discard from 
> > fstrim */
> > +   struct list_head retry_list;/* list of cmds to retry */
> > wait_queue_head_t discard_wait_queue;   /* waiting queue for wake-up */
> > unsigned int discard_wake;  /* to wake up discard thread */
> > struct mutex cmd_lock;
> > diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> > index fb3e531..4162c76 100644
> > --- a/fs/f2fs/segment.c
> > +++ b/fs/f2fs/segment.c
> > @@ -1029,13 +1029,16 @@ static void f2fs_submit_discard_endio(struct bio 
> > *bio)
> > struct discard_cmd *dc = (struct discard_cmd *)bio->bi_private;
> > unsigned long flags;
> >  
> > -   dc->error = blk_status_to_errno(bio->bi_status);
> > -
> > spin_lock_irqsave(&dc->lock, flags);
> > +   if (!dc->error)
> > +   dc->error = blk_status_to_errno(bio->bi_status);
> > +
> > dc->bio_ref--;
> > -   if (!dc->bio_ref && dc->state == D_SUBMIT) {
> > -   dc->state = D_DONE;
> > -   complete_all(&dc->wait);
> > +   if (!dc->bio_ref) {
> > +   if (dc->error || dc->state == D_SUBMIT) {
> > +   dc->state = D_DONE;
> > +   complete_all(&dc->wait);
> > +   }
> > }
> > spin_unlock_irqrestore(&dc->lock, flags);
> > bio_put(bio);
> > @@ -1124,10 +1127,13 @@ static int __submit_discard_cmd(struct f2fs_sb_info 
> > *sbi,
> > struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
> > struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ?
> > &(dcc->fstrim_list) : &(dcc->wait_list);
> > -   int flag = dpolicy->sync ? REQ_SYNC : 0;
> > -   block_t lstart, start, len, total_len;
> > +   int flag;
> > +   block_t lstart, start, len, total_len, orig_len;
> > int err = 0;
> >  
> > +   flag = dpolicy->sync ? REQ_SYNC : 0;
> > +   flag |= dpolicy->type == DPOLICY_UMOUNT ? REQ_NOWAIT : 0;
> > +
> > if (dc->state != D_PREP)
> > return 0;
> >  
> > @@ -1139,7 +1145,7 @@ static int __submit_discard_cmd(struct f2fs_sb_info 
> > *sbi,
> > lstart = dc->lstart;
> > start = dc->start;
> > len = dc->len;
> > -   total_len = len;
> > +   orig_len = total_len = len;
> >  
> > dc->len = 0;
> >  
> > @@ -1203,6 +1209,14 @@ static int __submit_discard_cmd(struct f2fs_sb_info 
> > *sbi,
> > bio->bi_end_io = f2fs_submit_discard_endio;
> > bio->bi_opf |= flag;
> > submit_bio(bio);
> > +   if (flag & REQ_NOWAIT) {
> > +   if (dc->error == -EAGAIN) {
> > +   dc->len = orig_len;
> > +   list_move(&dc->l

Re: [f2fs-dev] [PATCH] f2fs: fix long latency due to discard during umount

2020-03-29 Thread Sahitya Tummala

Hi Chao,

On Fri, Mar 27, 2020 at 08:35:42AM +0530, Sahitya Tummala wrote:
> On Fri, Mar 27, 2020 at 09:51:43AM +0800, Chao Yu wrote:
> > 
> > With this patch, most of xfstest cases cost 5 * n second longer than before.
> > 
> > E.g. generic/003, during umount(), we looped into retrying one bio
> > submission.
> > 
> > [61279.829724] F2FS-fs (zram1): Found nat_bits in checkpoint
> > [61279.885337] F2FS-fs (zram1): Mounted with checkpoint version = 5cf3cb8e
> > [61281.912832] submit discard bio start [23555,1]
> > [61281.912835] f2fs_submit_discard_endio [23555,1] err:-11
> > [61281.912836] submit discard bio end [23555,1]
> > [61281.912836] move dc to retry list [23555,1]
> > 
> > ...
> > 
> > [61286.881212] submit discard bio start [23555,1]
> > [61286.881217] f2fs_submit_discard_endio [23555,1] err:-11
> > [61286.881223] submit discard bio end [23555,1]
> > [61286.881224] move dc to retry list [23555,1]
> > [61286.905198] submit discard bio start [23555,1]
> > [61286.905203] f2fs_submit_discard_endio [23555,1] err:-11
> > [61286.905205] submit discard bio end [23555,1]
> > [61286.905206] move dc to retry list [23555,1]
> > [61286.929157] F2FS-fs (zram1): Issue discard(23555, 23555, 1) failed, ret: 
> > -11
> > 
> > Could you take a look at this issue?
> 
> Let me check and get back on this.

I found the issue. The dc with multiple bios is getting requeued again and
again in case if one of its bio gets -EAGAIN error. Even the successfully
completed bios are getting requeued again resulting into long latency.
I have fixed it by splitting the dc in such case so that we can requeue only
the leftover bios into a new dc and retry that later within the 5 sec timeout.

Please help to review v3 posted and if it looks good, I would like to request
you to test the earlier regression scenario with it to check the result again?

thanks,

> 
> Thanks,
> 
> > 
> > Thanks,
> > 
> > > 
> > > Thanks,
> > > 
> > >> Thanks,
> > >>
> > >>> +   break;
> > >>> +   }
> > >>> +   }
> > >>>  
> > >>> atomic_inc(&dcc->issued_discard);
> > >>>  
> > >>> @@ -1463,6 +1477,40 @@ static unsigned int 
> > >>> __issue_discard_cmd_orderly(struct f2fs_sb_info *sbi,
> > >>> return issued;
> > >>>  }
> > >>>  
> > >>> +static bool __should_discard_retry(struct f2fs_sb_info *sbi,
> > > s> > +struct discard_policy *dpolicy)
> > >>> +{
> > >>> +   struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
> > >>> +   struct discard_cmd *dc, *tmp;
> > >>> +   bool retry = false;
> > >>> +   unsigned long flags;
> > >>> +
> > >>> +   if (dpolicy->type != DPOLICY_UMOUNT)
> > >>> +   f2fs_bug_on(sbi, 1);
> > >>> +
> > >>> +   mutex_lock(&dcc->cmd_lock);
> > >>> +   list_for_each_entry_safe(dc, tmp, &(dcc->retry_list), list) {
> > >>> +   if (dpolicy->timeout != 0 &&
> > >>> +   f2fs_time_over(sbi, dpolicy->timeout)) {
> > >>> +   retry = false;
> > >>> +   break;
> > >>> +   }
> > >>> +
> > >>> +   spin_lock_irqsave(&dc->lock, flags);
> > >>> +   if (!dc->bio_ref) {
> > >>> +   dc->state = D_PREP;
> > >>> +   dc->error = 0;
> > >>> +   reinit_completion(&dc->wait);
> > >>> +   __relocate_discard_cmd(dcc, dc);
> > >>> +   retry = true;
> > >>> +   }
> > >>> +   spin_unlock_irqrestore(&dc->lock, flags);
> > >>> +   }
> > >>> +   mutex_unlock(&dcc->cmd_lock);
> > >>> +
> > >>> +   return retry;
> > >>> +}
> > >>> +
> > >>>  static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
> > >>> struct discard_policy *dpolicy)
> > >>>  {
> > >>> @@ -1470,12 +1518,13 @@ sta

Re: [f2fs-dev] [PATCH] f2fs: prevent meta updates while checkpoint is in progress

2020-03-30 Thread Sahitya Tummala

On Sat, Mar 28, 2020 at 04:38:00PM +0800, Chao Yu wrote:
> Hi all,
> 
> On 2020/3/28 3:24, Jaegeuk Kim wrote:
> > Hi Sahitya,
> > 
> > On 03/26, Sahitya Tummala wrote:
> >> allocate_segment_for_resize() can cause metapage updates if
> >> it requires to change the current node/data segments for resizing.
> >> Stop these meta updates when there is a checkpoint already
> >> in progress to prevent inconsistent CP data.
> > 
> > Doesn't freeze|thaw_bdev(sbi->sb->s_bdev); work for you?
> 
> That can avoid foreground ops racing? rather than background ops like
> balance_fs() from kworker?
> 

Yes, that can only prevent foreground ops but not the background ops
invoked in the context of kworker thread.

> BTW, I found that {freeze,thaw}_bdev is not enough to freeze all
> foreground fs ops, it needs to use {freeze,thaw}_super instead.
> 

Yes, I agree.

Thanks,

> ---
>  fs/f2fs/gc.c | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
> 
> diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
> index 26248c8936db..acdc8b99b543 100644
> --- a/fs/f2fs/gc.c
> +++ b/fs/f2fs/gc.c
> @@ -1538,7 +1538,7 @@ int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 
> block_count)
>   return -EINVAL;
>   }
> 
> - freeze_bdev(sbi->sb->s_bdev);
> + freeze_super(sbi->sb);
> 
>   shrunk_blocks = old_block_count - block_count;
>   secs = div_u64(shrunk_blocks, BLKS_PER_SEC(sbi));
> @@ -1551,7 +1551,7 @@ int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 
> block_count)
>   sbi->user_block_count -= shrunk_blocks;
>   spin_unlock(&sbi->stat_lock);
>   if (err) {
> - thaw_bdev(sbi->sb->s_bdev, sbi->sb);
> + thaw_super(sbi->sb);
>   return err;
>   }
> 
> @@ -1613,6 +1613,6 @@ int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 
> block_count)
>   }
>   clear_sbi_flag(sbi, SBI_IS_RESIZEFS);
>   mutex_unlock(&sbi->resize_mutex);
> - thaw_bdev(sbi->sb->s_bdev, sbi->sb);
> + thaw_super(sbi->sb);
>   return err;
>  }
> -- 
> 2.18.0.rc1
> 
> > 
> >>
> >> Signed-off-by: Sahitya Tummala 
> >> ---
> >>  fs/f2fs/gc.c | 2 ++
> >>  1 file changed, 2 insertions(+)
> >>
> >> diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
> >> index 5bca560..6122bad 100644
> >> --- a/fs/f2fs/gc.c
> >> +++ b/fs/f2fs/gc.c
> >> @@ -1399,8 +1399,10 @@ static int free_segment_range(struct f2fs_sb_info 
> >> *sbi, unsigned int start,
> >>int err = 0;
> >>  
> >>/* Move out cursegs from the target range */
> >> +  f2fs_lock_op(sbi);
> >>for (type = CURSEG_HOT_DATA; type < NR_CURSEG_TYPE; type++)
> >>allocate_segment_for_resize(sbi, type, start, end);
> >> +  f2fs_unlock_op(sbi);
> >>  
> >>/* do GC to move out valid blocks in the range */
> >>for (segno = start; segno <= end; segno += sbi->segs_per_sec) {
> >> -- 
> >> Qualcomm India Private Limited, on behalf of Qualcomm Innovation Center, 
> >> Inc.
> >> Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux 
> >> Foundation Collaborative Project.
> > .
> > 

-- 
--
Sent by a consultant of the Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum.


___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

Re: [f2fs-dev] [PATCH] f2fs: fix long latency due to discard during umount

2020-03-30 Thread Sahitya Tummala

Hi Chao,

On Mon, Mar 30, 2020 at 06:16:40PM +0800, Chao Yu wrote:
> On 2020/3/30 16:38, Chao Yu wrote:
> > Hi Sahitya,
> > 
> > Bad news, :( I guess we didn't catch the root cause, as after applying v3,
> > I still can reproduce this issue:
> > 
> > generic/003 10s ...  30s
> 
> I use zram as backend device of fstest,
> 
> Call Trace:
>  dump_stack+0x66/0x8b
>  f2fs_submit_discard_endio+0x88/0xa0 [f2fs]
>  generic_make_request_checks+0x70/0x5f0
>  generic_make_request+0x3e/0x2e0
>  submit_bio+0x72/0x140
>  __submit_discard_cmd.isra.50+0x4a8/0x710 [f2fs]
>  __issue_discard_cmd+0x171/0x3a0 [f2fs]
> 
> Does this mean zram uses single queue, so we may always fail to submit 
> 'nowait'
> IO due to below condition:
> 
>   /*
>* Non-mq queues do not honor REQ_NOWAIT, so complete a bio
>* with BLK_STS_AGAIN status in order to catch -EAGAIN and
>* to give a chance to the caller to repeat request gracefully.
>*/
>   if ((bio->bi_opf & REQ_NOWAIT) && !queue_is_mq(q)) {
>   status = BLK_STS_AGAIN;
>   goto end_io;
>   }
> 

Yes, I have also just figured out that as the reason. But most of the real block
devic drivers support MQ. Can we thus fix this case by checking for MQ status
before enabling REQ_NOWAIT as below? Please share your comments.

diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index cda7935..e7e2ffe 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -1131,7 +1131,9 @@ static int __submit_discard_cmd(struct f2fs_sb_info *sbi,

flag = dpolicy->sync ? REQ_SYNC : 0;
-   flag |= dpolicy->type == DPOLICY_UMOUNT ? REQ_NOWAIT : 0;
+
+   if (sbi->sb->s_bdev->bd_queue && queue_is_mq(sbi->sb->s_bdev->bd_queue))
+   flag |= dpolicy->type == DPOLICY_UMOUNT ? REQ_NOWAIT : 0;

if (dc->state != D_PREP)
    return 0;

Thanks,

> 
> 
> > 
> > Thanks,
> > 
> > On 2020/3/30 14:53, Sahitya Tummala wrote:
> >> Hi Chao,
> >>
> >> On Fri, Mar 27, 2020 at 08:35:42AM +0530, Sahitya Tummala wrote:
> >>> On Fri, Mar 27, 2020 at 09:51:43AM +0800, Chao Yu wrote:
> >>>>
> >>>> With this patch, most of xfstest cases cost 5 * n second longer than 
> >>>> before.
> >>>>
> >>>> E.g. generic/003, during umount(), we looped into retrying one bio
> >>>> submission.
> >>>>
> >>>> [61279.829724] F2FS-fs (zram1): Found nat_bits in checkpoint
> >>>> [61279.885337] F2FS-fs (zram1): Mounted with checkpoint version = 
> >>>> 5cf3cb8e
> >>>> [61281.912832] submit discard bio start [23555,1]
> >>>> [61281.912835] f2fs_submit_discard_endio [23555,1] err:-11
> >>>> [61281.912836] submit discard bio end [23555,1]
> >>>> [61281.912836] move dc to retry list [23555,1]
> >>>>
> >>>> ...
> >>>>
> >>>> [61286.881212] submit discard bio start [23555,1]
> >>>> [61286.881217] f2fs_submit_discard_endio [23555,1] err:-11
> >>>> [61286.881223] submit discard bio end [23555,1]
> >>>> [61286.881224] move dc to retry list [23555,1]
> >>>> [61286.905198] submit discard bio start [23555,1]
> >>>> [61286.905203] f2fs_submit_discard_endio [23555,1] err:-11
> >>>> [61286.905205] submit discard bio end [23555,1]
> >>>> [61286.905206] move dc to retry list [23555,1]
> >>>> [61286.929157] F2FS-fs (zram1): Issue discard(23555, 23555, 1) failed, 
> >>>> ret: -11
> >>>>
> >>>> Could you take a look at this issue?
> >>>
> >>> Let me check and get back on this.
> >>
> >> I found the issue. The dc with multiple bios is getting requeued again and
> >> again in case if one of its bio gets -EAGAIN error. Even the successfully
> >> completed bios are getting requeued again resulting into long latency.
> >> I have fixed it by splitting the dc in such case so that we can requeue 
> >> only
> >> the leftover bios into a new dc and retry that later within the 5 sec 
> >> timeout.
> >>
> >> Please help to review v3 posted and if it looks good, I would like to 
> >> request
> >> you to test the earlier regression scenario with it to check the result 
> >> again?
> >>
> >> thanks,
> >>
> >>>
> >>> Thanks,
> >>>
> >>>>
> >>>> Thanks,

Re: [f2fs-dev] [PATCH] f2fs: fix long latency due to discard during umount

2020-03-30 Thread Sahitya Tummala



Sure Chao. Let us put this patch on hold for now then.

Jaeguek,

Please drop this patch from your tree for time being as it needs
further discussion.

Thanks,

On Tue, Mar 31, 2020 at 09:46:30AM +0800, Chao Yu wrote:
> Hi Sahitya,
> 
> On 2020/3/30 18:51, Sahitya Tummala wrote:
> > Hi Chao,
> > 
> > On Mon, Mar 30, 2020 at 06:16:40PM +0800, Chao Yu wrote:
> >> On 2020/3/30 16:38, Chao Yu wrote:
> >>> Hi Sahitya,
> >>>
> >>> Bad news, :( I guess we didn't catch the root cause, as after applying v3,
> >>> I still can reproduce this issue:
> >>>
> >>> generic/003 10s ...  30s
> >>
> >> I use zram as backend device of fstest,
> >>
> >> Call Trace:
> >>  dump_stack+0x66/0x8b
> >>  f2fs_submit_discard_endio+0x88/0xa0 [f2fs]
> >>  generic_make_request_checks+0x70/0x5f0
> >>  generic_make_request+0x3e/0x2e0
> >>  submit_bio+0x72/0x140
> >>  __submit_discard_cmd.isra.50+0x4a8/0x710 [f2fs]
> >>  __issue_discard_cmd+0x171/0x3a0 [f2fs]
> >>
> >> Does this mean zram uses single queue, so we may always fail to submit 
> >> 'nowait'
> >> IO due to below condition:
> >>
> >>/*
> >> * Non-mq queues do not honor REQ_NOWAIT, so complete a bio
> >> * with BLK_STS_AGAIN status in order to catch -EAGAIN and
> >> * to give a chance to the caller to repeat request gracefully.
> >> */
> >>if ((bio->bi_opf & REQ_NOWAIT) && !queue_is_mq(q)) {
> >>status = BLK_STS_AGAIN;
> >>goto end_io;
> >>}
> >>
> > 
> > Yes, I have also just figured out that as the reason. But most of the real 
> > block
> > devic drivers support MQ. Can we thus fix this case by checking for MQ 
> > status
> > before enabling REQ_NOWAIT as below? Please share your comments.
> > 
> > diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> > index cda7935..e7e2ffe 100644
> > --- a/fs/f2fs/segment.c
> > +++ b/fs/f2fs/segment.c
> > @@ -1131,7 +1131,9 @@ static int __submit_discard_cmd(struct f2fs_sb_info 
> > *sbi,
> > 
> > flag = dpolicy->sync ? REQ_SYNC : 0;
> > -   flag |= dpolicy->type == DPOLICY_UMOUNT ? REQ_NOWAIT : 0;
> > +
> > +   if (sbi->sb->s_bdev->bd_queue && 
> > queue_is_mq(sbi->sb->s_bdev->bd_queue))
> > +   flag |= dpolicy->type == DPOLICY_UMOUNT ? REQ_NOWAIT : 0;
> 
> IMO, it's too tight to couple with block layer logic? however, I don't have
> any better idea about the solution.
> 
> Anyway, I guess we can Cc to Jan and block mailing list for comments to see
> whether there is a better solution.
> 
> Thoughts?
> 
> Thanks,
> 
> > 
> > if (dc->state != D_PREP)
> > return 0;
> > 
> > Thanks,
> > 
> >>
> >>
> >>>
> >>> Thanks,
> >>>
> >>> On 2020/3/30 14:53, Sahitya Tummala wrote:
> >>>> Hi Chao,
> >>>>
> >>>> On Fri, Mar 27, 2020 at 08:35:42AM +0530, Sahitya Tummala wrote:
> >>>>> On Fri, Mar 27, 2020 at 09:51:43AM +0800, Chao Yu wrote:
> >>>>>>
> >>>>>> With this patch, most of xfstest cases cost 5 * n second longer than 
> >>>>>> before.
> >>>>>>
> >>>>>> E.g. generic/003, during umount(), we looped into retrying one bio
> >>>>>> submission.
> >>>>>>
> >>>>>> [61279.829724] F2FS-fs (zram1): Found nat_bits in checkpoint
> >>>>>> [61279.885337] F2FS-fs (zram1): Mounted with checkpoint version = 
> >>>>>> 5cf3cb8e
> >>>>>> [61281.912832] submit discard bio start [23555,1]
> >>>>>> [61281.912835] f2fs_submit_discard_endio [23555,1] err:-11
> >>>>>> [61281.912836] submit discard bio end [23555,1]
> >>>>>> [61281.912836] move dc to retry list [23555,1]
> >>>>>>
> >>>>>> ...
> >>>>>>
> >>>>>> [61286.881212] submit discard bio start [23555,1]
> >>>>>> [61286.881217] f2fs_submit_discard_endio [23555,1] err:-11
> >>>>>> [61286.881223] submit discard bio end [23555,1]
> >>>>>> [61286.881224] move dc to retry list [23555,1]
> >>>>>> [61286.905198] submit d

Re: [f2fs-dev] [PATCH] f2fs: prevent meta updates while checkpoint is in progress

2020-03-31 Thread Sahitya Tummala

On Mon, Mar 30, 2020 at 08:54:19PM -0700, Jaegeuk Kim wrote:
> On 03/26, Sahitya Tummala wrote:
> > allocate_segment_for_resize() can cause metapage updates if
> > it requires to change the current node/data segments for resizing.
> > Stop these meta updates when there is a checkpoint already
> > in progress to prevent inconsistent CP data.
> 
> I'd prefer to use f2fs_lock_op() in bigger coverage.

Do you mean to cover the entire free_segment_range() function within
f2fs_lock_op()? Please clarify.

Thanks,

> 
> > 
> > Signed-off-by: Sahitya Tummala 
> > ---
> >  fs/f2fs/gc.c | 2 ++
> >  1 file changed, 2 insertions(+)
> > 
> > diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
> > index 5bca560..6122bad 100644
> > --- a/fs/f2fs/gc.c
> > +++ b/fs/f2fs/gc.c
> > @@ -1399,8 +1399,10 @@ static int free_segment_range(struct f2fs_sb_info 
> > *sbi, unsigned int start,
> > int err = 0;
> >  
> > /* Move out cursegs from the target range */
> > +   f2fs_lock_op(sbi);
> > for (type = CURSEG_HOT_DATA; type < NR_CURSEG_TYPE; type++)
> > allocate_segment_for_resize(sbi, type, start, end);
> > +   f2fs_unlock_op(sbi);
> >  
> > /* do GC to move out valid blocks in the range */
> > for (segno = start; segno <= end; segno += sbi->segs_per_sec) {
> > -- 
> > Qualcomm India Private Limited, on behalf of Qualcomm Innovation Center, 
> > Inc.
> > Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux 
> > Foundation Collaborative Project.

-- 
--
Sent by a consultant of the Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum.


___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

Re: [f2fs-dev] [PATCH] f2fs: prevent meta updates while checkpoint is in progress

2020-03-31 Thread Sahitya Tummala

Hi Jaegeuk,

Got it.
The diff below looks good to me.
Would you like me to test it and put a patch for this?

Thanks,

On Tue, Mar 31, 2020 at 11:43:07AM -0700, Jaegeuk Kim wrote:
> On 03/31, Sahitya Tummala wrote:
> > On Mon, Mar 30, 2020 at 08:54:19PM -0700, Jaegeuk Kim wrote:
> > > On 03/26, Sahitya Tummala wrote:
> > > > allocate_segment_for_resize() can cause metapage updates if
> > > > it requires to change the current node/data segments for resizing.
> > > > Stop these meta updates when there is a checkpoint already
> > > > in progress to prevent inconsistent CP data.
> > > 
> > > I'd prefer to use f2fs_lock_op() in bigger coverage.
> > 
> > Do you mean to cover the entire free_segment_range() function within
> > f2fs_lock_op()? Please clarify.
> 
> I didn't test tho, something like this?
> 
> ---
>  fs/f2fs/checkpoint.c|  6 --
>  fs/f2fs/f2fs.h  |  2 +-
>  fs/f2fs/gc.c| 28 ++--
>  fs/f2fs/super.c |  1 -
>  include/trace/events/f2fs.h |  4 +++-
>  5 files changed, 22 insertions(+), 19 deletions(-)
> 
> diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
> index 852890b72d6ac..531995192b714 100644
> --- a/fs/f2fs/checkpoint.c
> +++ b/fs/f2fs/checkpoint.c
> @@ -1553,7 +1553,8 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, 
> struct cp_control *cpc)
>   return 0;
>   f2fs_warn(sbi, "Start checkpoint disabled!");
>   }
> - mutex_lock(&sbi->cp_mutex);
> + if (cpc->reason != CP_RESIZE)
> + mutex_lock(&sbi->cp_mutex);
>  
>   if (!is_sbi_flag_set(sbi, SBI_IS_DIRTY) &&
>   ((cpc->reason & CP_FASTBOOT) || (cpc->reason & CP_SYNC) ||
> @@ -1622,7 +1623,8 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, 
> struct cp_control *cpc)
>   f2fs_update_time(sbi, CP_TIME);
>   trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish checkpoint");
>  out:
> - mutex_unlock(&sbi->cp_mutex);
> + if (cpc->reason != CP_RESIZE)
> + mutex_unlock(&sbi->cp_mutex);
>   return err;
>  }
>  
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index c84442eefc56d..7c98dca3ec1d6 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -193,6 +193,7 @@ enum {
>  #define  CP_DISCARD  0x0010
>  #define CP_TRIMMED   0x0020
>  #define CP_PAUSE 0x0040
> +#define CP_RESIZE0x0080
>  
>  #define MAX_DISCARD_BLOCKS(sbi)  BLKS_PER_SEC(sbi)
>  #define DEF_MAX_DISCARD_REQUEST  8   /* issue 8 discards per 
> round */
> @@ -1417,7 +1418,6 @@ struct f2fs_sb_info {
>   unsigned int segs_per_sec;  /* segments per section */
>   unsigned int secs_per_zone; /* sections per zone */
>   unsigned int total_sections;/* total section count */
> - struct mutex resize_mutex;  /* for resize exclusion */
>   unsigned int total_node_count;  /* total node block count */
>   unsigned int total_valid_node_count;/* valid node block count */
>   loff_t max_file_blocks; /* max block index of file */
> diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
> index 26248c8936db0..1e5a06fda09d3 100644
> --- a/fs/f2fs/gc.c
> +++ b/fs/f2fs/gc.c
> @@ -1402,8 +1402,9 @@ void f2fs_build_gc_manager(struct f2fs_sb_info *sbi)
>  static int free_segment_range(struct f2fs_sb_info *sbi, unsigned int start,
>   unsigned int end)
>  {
> - int type;
>   unsigned int segno, next_inuse;
> + struct cp_control cpc = { CP_RESIZE, 0, 0, 0 };
> + int type;
>   int err = 0;
>  
>   /* Move out cursegs from the target range */
> @@ -1417,16 +1418,14 @@ static int free_segment_range(struct f2fs_sb_info 
> *sbi, unsigned int start,
>   .iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS),
>   };
>  
> - down_write(&sbi->gc_lock);
>   do_garbage_collect(sbi, segno, &gc_list, FG_GC);
> - up_write(&sbi->gc_lock);
>   put_gc_inode(&gc_list);
>  
>   if (get_valid_blocks(sbi, segno, true))
>   return -EAGAIN;
>   }
>  
> - err = f2fs_sync_fs(sbi->sb, 1);
> + err = f2fs_write_checkpoint(sbi, &cpc);
>   if (err)
>   return err;
>  
> @@ -1502,6 +1501,7 @@ static void update_fs_metadata(struct f2fs_sb_info 
> *sbi, int secs)
>  int f2fs_resize

Re: [f2fs-dev] [PATCH v3] f2fs: fix long latency due to discard during umount

2020-04-01 Thread Sahitya Tummala

Hi Jaegeuk,

On Tue, Mar 31, 2020 at 11:46:55AM -0700, Jaegeuk Kim wrote:
> On 03/30, Sahitya Tummala wrote:
> > F2FS already has a default timeout of 5 secs for discards that
> > can be issued during umount, but it can take more than the 5 sec
> > timeout if the underlying UFS device queue is already full and there
> > are no more available free tags to be used. In that case, submit_bio()
> > will wait for the already queued discard requests to complete to get
> > a free tag, which can potentially take way more than 5 sec.
> > 
> > Fix this by submitting the discard requests with REQ_NOWAIT
> > flags during umount. This will return -EAGAIN for UFS queue/tag full
> > scenario without waiting in the context of submit_bio(). The FS can
> > then handle these requests by retrying again within the stipulated
> > discard timeout period to avoid long latencies.
> 
> Sorry, Sahitya, but, do we really need to do like this? How about just
> controlling # of outstanding discarding bios in __issue_discard_cmd()?

Do you mean something like this?

diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 1a62b27..860dd43 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -1099,7 +1099,7 @@ static void __init_discard_policy(struct f2fs_sb_info 
*sbi,
} else if (discard_type == DPOLICY_FSTRIM) {
dpolicy->io_aware = false;
} else if (discard_type == DPOLICY_UMOUNT) {
-   dpolicy->max_requests = UINT_MAX;
+   dpolicy->max_requests = 30;
dpolicy->io_aware = false;
/* we need to issue all to keep CP_TRIMMED_FLAG */
dpolicy->granularity = 1;
@@ -1470,12 +1470,14 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
struct list_head *pend_list;
struct discard_cmd *dc, *tmp;
struct blk_plug plug;
-   int i, issued = 0;
+   int i, issued;
bool io_interrupted = false;

if (dpolicy->timeout != 0)
f2fs_update_time(sbi, dpolicy->timeout);

+retry:
+   issued = 0;
for (i = MAX_PLIST_NUM - 1; i >= 0; i--) {
if (dpolicy->timeout != 0 &&
f2fs_time_over(sbi, dpolicy->timeout))
@@ -1522,6 +1524,11 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
break;
}

+   if (dpolicy->type == DPOLICY_UMOUNT && issued) {
+   __wait_all_discard_cmd(sbi, dpolicy);
+   goto retry;
+   }
+
if (!issued && io_interrupted)
issued = -1;

Thanks,

> 
> > 
> > Signed-off-by: Sahitya Tummala 
> > ---
> > v3:
> > -Handle the regression reported by Chao with v2.
> > -simplify the logic to split the dc with multiple bios incase any bio 
> > returns
> >  EAGAIN and retry those new dc within 5 sec timeout.
> > 
> >  fs/f2fs/segment.c | 65 
> > +++
> >  1 file changed, 51 insertions(+), 14 deletions(-)
> > 
> > diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> > index fb3e531..55d18c7 100644
> > --- a/fs/f2fs/segment.c
> > +++ b/fs/f2fs/segment.c
> > @@ -1029,13 +1029,16 @@ static void f2fs_submit_discard_endio(struct bio 
> > *bio)
> > struct discard_cmd *dc = (struct discard_cmd *)bio->bi_private;
> > unsigned long flags;
> >  
> > -   dc->error = blk_status_to_errno(bio->bi_status);
> > -
> > spin_lock_irqsave(&dc->lock, flags);
> > +   if (!dc->error)
> > +   dc->error = blk_status_to_errno(bio->bi_status);
> > +
> > dc->bio_ref--;
> > -   if (!dc->bio_ref && dc->state == D_SUBMIT) {
> > -   dc->state = D_DONE;
> > -   complete_all(&dc->wait);
> > +   if (!dc->bio_ref) {
> > +   if (dc->error || dc->state == D_SUBMIT) {
> > +   dc->state = D_DONE;
> > +   complete_all(&dc->wait);
> > +   }
> > }
> > spin_unlock_irqrestore(&dc->lock, flags);
> > bio_put(bio);
> > @@ -1124,10 +1127,13 @@ static int __submit_discard_cmd(struct f2fs_sb_info 
> > *sbi,
> > struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
> > struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ?
> > &(dcc->fstrim_list) : &(dcc->wait_list);
> > -   int flag = dpolicy->sync ? REQ_SYNC : 0;
> > +   int flag;
> > block_t lstart, start, len, total_len;
> > int err = 0;
> >  
> >

Re: [f2fs-dev] [PATCH] f2fs: prevent meta updates while checkpoint is in progress

2020-04-14 Thread Sahitya Tummala

On Mon, Apr 13, 2020 at 10:42:37AM -0700, Jaegeuk Kim wrote:
> On 04/03, Jaegeuk Kim wrote:
> > On 04/03, Jaegeuk Kim wrote:
> > > On 04/01, Sahitya Tummala wrote:
> > > > Hi Jaegeuk,
> > > > 
> > > > Got it.
> > > > The diff below looks good to me.
> > > > Would you like me to test it and put a patch for this?
> > > 
> > > Sahitya, Chao,
> > > 
> > > Could you please take a look at this patch and test intensively?
> > > 
> > > Thanks,
> v3:
>  - fix gc_lock
> 
> From d10c09dfedc7a10cef7dd34493ddbd7c27889033 Mon Sep 17 00:00:00 2001
> From: Jaegeuk Kim 
> Date: Tue, 31 Mar 2020 11:43:07 -0700
> Subject: [PATCH] f2fs: refactor resize_fs to avoid meta updates in progress
> 
> Sahitya raised an issue:
> - prevent meta updates while checkpoint is in progress
> 
> allocate_segment_for_resize() can cause metapage updates if
> it requires to change the current node/data segments for resizing.
> Stop these meta updates when there is a checkpoint already
> in progress to prevent inconsistent CP data.
> 
> Signed-off-by: Sahitya Tummala 
> Signed-off-by: Jaegeuk Kim 
> ---
>  fs/f2fs/checkpoint.c|   6 ++-
>  fs/f2fs/f2fs.h  |   2 +-
>  fs/f2fs/file.c  |   5 +-
>  fs/f2fs/gc.c| 105 
>  fs/f2fs/super.c |   1 -
>  include/trace/events/f2fs.h |   4 +-
>  6 files changed, 67 insertions(+), 56 deletions(-)
> 
> diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
> index 6be357c8e0020..dcb3a15574c99 100644
> --- a/fs/f2fs/checkpoint.c
> +++ b/fs/f2fs/checkpoint.c
> @@ -1554,7 +1554,8 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, 
> struct cp_control *cpc)
>   return 0;
>   f2fs_warn(sbi, "Start checkpoint disabled!");
>   }
> - mutex_lock(&sbi->cp_mutex);
> + if (cpc->reason != CP_RESIZE)
> + mutex_lock(&sbi->cp_mutex);
>  
>   if (!is_sbi_flag_set(sbi, SBI_IS_DIRTY) &&
>   ((cpc->reason & CP_FASTBOOT) || (cpc->reason & CP_SYNC) ||
> @@ -1623,7 +1624,8 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, 
> struct cp_control *cpc)
>   f2fs_update_time(sbi, CP_TIME);
>   trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish checkpoint");
>  out:
> - mutex_unlock(&sbi->cp_mutex);
> + if (cpc->reason != CP_RESIZE)
> + mutex_unlock(&sbi->cp_mutex);
>   return err;
>  }
>  
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index 1241a397bf53c..e8e26ab723eba 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -194,6 +194,7 @@ enum {
>  #define  CP_DISCARD  0x0010
>  #define CP_TRIMMED   0x0020
>  #define CP_PAUSE 0x0040
> +#define CP_RESIZE0x0080
>  
>  #define MAX_DISCARD_BLOCKS(sbi)  BLKS_PER_SEC(sbi)
>  #define DEF_MAX_DISCARD_REQUEST  8   /* issue 8 discards per 
> round */
> @@ -1422,7 +1423,6 @@ struct f2fs_sb_info {
>   unsigned int segs_per_sec;  /* segments per section */
>   unsigned int secs_per_zone; /* sections per zone */
>   unsigned int total_sections;/* total section count */
> - struct mutex resize_mutex;  /* for resize exclusion */
>   unsigned int total_node_count;  /* total node block count */
>   unsigned int total_valid_node_count;/* valid node block count */
>   loff_t max_file_blocks; /* max block index of file */
> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> index f06b029c00d8d..0514fab8d2eb8 100644
> --- a/fs/f2fs/file.c
> +++ b/fs/f2fs/file.c
> @@ -3313,7 +3313,6 @@ static int f2fs_ioc_resize_fs(struct file *filp, 
> unsigned long arg)
>  {
>   struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(filp));
>   __u64 block_count;
> - int ret;
>  
>   if (!capable(CAP_SYS_ADMIN))
>   return -EPERM;
> @@ -3325,9 +3324,7 @@ static int f2fs_ioc_resize_fs(struct file *filp, 
> unsigned long arg)
>  sizeof(block_count)))
>   return -EFAULT;
>  
> - ret = f2fs_resize_fs(sbi, block_count);
> -
> - return ret;
> + return f2fs_resize_fs(sbi, block_count);
>  }
>  
>  static int f2fs_ioc_enable_verity(struct file *filp, unsigned long arg)
> diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
> index 26248c8936db0..3d003397252b8 100644
> --- a/fs/f2fs/gc.c
> +++ b/fs/f2fs/gc.c
> @@ -1399,12 +1399,29 @@ void f2fs_build_gc_manager(struct f2fs_sb_info *s

[f2fs-dev] [PATCH] f2fs: show the fsync_mode=nobarrier mount option

2018-07-01 Thread Sahitya Tummala

This patch shows the fsync_mode=nobarrier mount option in
f2fs_show_options().

Signed-off-by: Sahitya Tummala 
---
 fs/f2fs/super.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 3995e92..12e3d2d 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1343,6 +1343,8 @@ static int f2fs_show_options(struct seq_file *seq, struct 
dentry *root)
seq_printf(seq, ",fsync_mode=%s", "posix");
else if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT)
seq_printf(seq, ",fsync_mode=%s", "strict");
+   else if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_NOBARRIER)
+   seq_printf(seq, ",fsync_mode=%s", "nobarrier");
return 0;
 }
 
-- 
Qualcomm India Private Limited, on behalf of Qualcomm Innovation Center, Inc.
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux 
Foundation Collaborative Project.


--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

Re: [f2fs-dev] [PATCH] f2fs: avoid wrong decrypted data from disk

2018-08-29 Thread Sahitya Tummala

On Mon, Aug 27, 2018 at 03:52:26PM -0700, Jaegeuk Kim wrote:
> 1. Create a file in an encrypted directory
> 2. Do GC & drop caches
> 3. Read stale data before its bio for metapage was not issued yet
> 
> Signed-off-by: Jaegeuk Kim 
> ---
>  fs/f2fs/data.c | 7 +++
>  1 file changed, 7 insertions(+)
> 
> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> index 382c1ef9a9e4..c3557fd4a0bd 100644
> --- a/fs/f2fs/data.c
> +++ b/fs/f2fs/data.c
> @@ -1550,6 +1550,13 @@ static int f2fs_mpage_readpages(struct address_space 
> *mapping,
>   bio = NULL;
>   }
>   if (bio == NULL) {
> + /*
> +  * If the page is under writeback, we need to wait for
> +  * its completion to see the correct decrypted data.
> +  */
> + if (unlikely(f2fs_encrypted_file(inode)))
> + f2fs_wait_on_block_writeback(F2FS_I_SB(inode), 
> block_nr);
> +

I am not sure if this really helps the case.

When the data is being moved by GC, the writeback is set on the encrypted page
which belongs to meta mapping. But before that writeback could complete, the 
read
will happen on the original file where it's corresponding page will not have any
writeback set, right?

>   bio = f2fs_grab_read_bio(inode, block_nr, nr_pages,
>   is_readahead ? REQ_RAHEAD : 0);
>   if (IS_ERR(bio)) {
> -- 
> 2.17.0.441.gb46fe60e1d-goog
> 
> 
> --
> Check out the vibrant tech community on one of the world's most
> engaging tech sites, Slashdot.org! http://sdm.link/slashdot
> ___
> Linux-f2fs-devel mailing list
> Linux-f2fs-devel@lists.sourceforge.net
> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

-- 
--
Sent by a consultant of the Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum.

--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

Re: [f2fs-dev] [PATCH] f2fs: avoid wrong decrypted data from disk

2018-08-30 Thread Sahitya Tummala

On Thu, Aug 30, 2018 at 11:59:03AM +0530, Sahitya Tummala wrote:
> On Mon, Aug 27, 2018 at 03:52:26PM -0700, Jaegeuk Kim wrote:
> > 1. Create a file in an encrypted directory
> > 2. Do GC & drop caches
> > 3. Read stale data before its bio for metapage was not issued yet
> > 
> > Signed-off-by: Jaegeuk Kim 
> > ---
> >  fs/f2fs/data.c | 7 +++
> >  1 file changed, 7 insertions(+)
> > 
> > diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> > index 382c1ef9a9e4..c3557fd4a0bd 100644
> > --- a/fs/f2fs/data.c
> > +++ b/fs/f2fs/data.c
> > @@ -1550,6 +1550,13 @@ static int f2fs_mpage_readpages(struct address_space 
> > *mapping,
> > bio = NULL;
> > }
> > if (bio == NULL) {
> > +   /*
> > +* If the page is under writeback, we need to wait for
> > +* its completion to see the correct decrypted data.
> > +*/
> > +   if (unlikely(f2fs_encrypted_file(inode)))
> > +   f2fs_wait_on_block_writeback(F2FS_I_SB(inode), 
> > block_nr);
> > +
> 
> I am not sure if this really helps the case.
> 
> When the data is being moved by GC, the writeback is set on the encrypted page
> which belongs to meta mapping. But before that writeback could complete, the 
> read
> will happen on the original file where it's corresponding page will not have 
> any
> writeback set, right?

Never mind, got it. Tested with your latest v3 patch and it is fixing the
problem. Thanks.

> 
> > bio = f2fs_grab_read_bio(inode, block_nr, nr_pages,
> > is_readahead ? REQ_RAHEAD : 0);
> > if (IS_ERR(bio)) {
> > -- 
> > 2.17.0.441.gb46fe60e1d-goog
> > 
> > 
> > --
> > Check out the vibrant tech community on one of the world's most
> > engaging tech sites, Slashdot.org! http://sdm.link/slashdot
> > ___
> > Linux-f2fs-devel mailing list
> > Linux-f2fs-devel@lists.sourceforge.net
> > https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
> 
> -- 
> --
> Sent by a consultant of the Qualcomm Innovation Center, Inc.
> The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum.
> 
> --
> Check out the vibrant tech community on one of the world's most
> engaging tech sites, Slashdot.org! http://sdm.link/slashdot
> ___
> Linux-f2fs-devel mailing list
> Linux-f2fs-devel@lists.sourceforge.net
> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

-- 
--
Sent by a consultant of the Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum.

--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

[f2fs-dev] [PATCH] f2fs: fix unnecessary periodic wakeup of discard thread when dev is busy

2018-08-31 Thread Sahitya Tummala

When dev is busy, discard thread wake up timeout can be aligned with the
exact time that it needs to wait for dev to come out of busy. This helps
to avoid unnecessary periodic wakeups and thus save some power.

Signed-off-by: Sahitya Tummala 
---
 fs/f2fs/segment.c | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 8bcbb50..df14030 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -1379,6 +1379,8 @@ static int issue_discard_thread(void *data)
struct discard_policy dpolicy;
unsigned int wait_ms = DEF_MIN_DISCARD_ISSUE_TIME;
int issued;
+   unsigned long interval = sbi->interval_time[REQ_TIME] * HZ;
+   long delta;
 
set_freezable();
 
@@ -1410,7 +1412,11 @@ static int issue_discard_thread(void *data)
__wait_all_discard_cmd(sbi, &dpolicy);
wait_ms = dpolicy.min_interval;
} else if (issued == -1){
-   wait_ms = dpolicy.mid_interval;
+   delta = (sbi->last_time[REQ_TIME] + interval) - jiffies;
+   if (delta > 0)
+   wait_ms = jiffies_to_msecs(delta);
+   else
+   wait_ms = dpolicy.mid_interval;
} else {
wait_ms = dpolicy.max_interval;
}
-- 
Qualcomm India Private Limited, on behalf of Qualcomm Innovation Center, Inc.
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux 
Foundation Collaborative Project.


--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

Re: [f2fs-dev] [PATCH] f2fs: fix unnecessary periodic wakeup of discard thread when dev is busy

2018-09-02 Thread Sahitya Tummala

On Sun, Sep 02, 2018 at 04:52:40PM +0800, Chao Yu wrote:
> On 2018/8/31 17:39, Sahitya Tummala wrote:
> > When dev is busy, discard thread wake up timeout can be aligned with the
> > exact time that it needs to wait for dev to come out of busy. This helps
> > to avoid unnecessary periodic wakeups and thus save some power.
> > 
> > Signed-off-by: Sahitya Tummala 
> > ---
> >  fs/f2fs/segment.c | 8 +++-
> >  1 file changed, 7 insertions(+), 1 deletion(-)
> > 
> > diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> > index 8bcbb50..df14030 100644
> > --- a/fs/f2fs/segment.c
> > +++ b/fs/f2fs/segment.c
> > @@ -1379,6 +1379,8 @@ static int issue_discard_thread(void *data)
> > struct discard_policy dpolicy;
> > unsigned int wait_ms = DEF_MIN_DISCARD_ISSUE_TIME;
> > int issued;
> > +   unsigned long interval = sbi->interval_time[REQ_TIME] * HZ;
> > +   long delta;
> >  
> > set_freezable();
> >  
> > @@ -1410,7 +1412,11 @@ static int issue_discard_thread(void *data)
> > __wait_all_discard_cmd(sbi, &dpolicy);
> > wait_ms = dpolicy.min_interval;
> > } else if (issued == -1){
> > -   wait_ms = dpolicy.mid_interval;
> > +   delta = (sbi->last_time[REQ_TIME] + interval) - jiffies;
> 
> I agree that we need to consider power consumption. One more consideration is
> that discard thread may need different submission frequency comparing to 
> garbage
> collection thread, maybe a little fast, would it be better to split
> sbi->interval_time[REQ_TIME] according to gc/discard type.
> 
> How do you think?
> 
> Thanks,
> 

Thanks for the review.

You mean when GC type is urgent? I see that for that case, the discard policy is
changed to DPOLICY_FORCE, which sets dpolicy->io_aware as false and hence,
cannot fall into this (issued == -1) case at all.

> > +   if (delta > 0)
> > +   wait_ms = jiffies_to_msecs(delta);
> > +   else
> > +   wait_ms = dpolicy.mid_interval;
> > } else {
> > wait_ms = dpolicy.max_interval;
> > }
> > 

-- 
--
Sent by a consultant of the Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum.

--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

[f2fs-dev] [PATCH] f2fs: add new idle interval timing for discard and gc paths

2018-09-09 Thread Sahitya Tummala

This helps to control the frequency of submission of discard and
GC requests independently, based on the need. The sleep timing of
GC thread is now aligned with this idle time when the dev is busy,
to avoid unnecessary periodic wakeups.

Suggested-by: Chao Yu 
Signed-off-by: Sahitya Tummala 
---
 Documentation/ABI/testing/sysfs-fs-f2fs | 17 -
 fs/f2fs/f2fs.h  | 31 +++
 fs/f2fs/gc.c|  6 --
 fs/f2fs/segment.c   | 14 +-
 fs/f2fs/super.c |  2 ++
 fs/f2fs/sysfs.c |  5 +
 6 files changed, 59 insertions(+), 16 deletions(-)

diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs 
b/Documentation/ABI/testing/sysfs-fs-f2fs
index 94a24ae..3ac4177 100644
--- a/Documentation/ABI/testing/sysfs-fs-f2fs
+++ b/Documentation/ABI/testing/sysfs-fs-f2fs
@@ -121,7 +121,22 @@ What:  /sys/fs/f2fs//idle_interval
 Date:  January 2016
 Contact:   "Jaegeuk Kim" 
 Description:
-Controls the idle timing.
+Controls the idle timing for all paths other than
+discard and gc path.
+
+What:  /sys/fs/f2fs//discard_idle_interval
+Date:  September 2018
+Contact:   "Chao Yu" 
+Contact:   "Sahitya Tummala" 
+Description:
+Controls the idle timing for discard path.
+
+What:  /sys/fs/f2fs//gc_idle_interval
+Date:  September 2018
+Contact:   "Chao Yu" 
+Contact:   "Sahitya Tummala" 
+Description:
+Controls the idle timing for gc path.
 
 What:  /sys/fs/f2fs//iostat_enable
 Date:  August 2017
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index abf9256..6070681 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1093,6 +1093,8 @@ enum {
 enum {
CP_TIME,
REQ_TIME,
+   DISCARD_TIME,
+   GC_TIME,
MAX_TIME,
 };
 
@@ -1347,14 +1349,35 @@ static inline void f2fs_update_time(struct f2fs_sb_info 
*sbi, int type)
sbi->last_time[type] = jiffies;
 }
 
-static inline bool f2fs_time_over(struct f2fs_sb_info *sbi, int type)
+static inline bool f2fs_time_over_cp(struct f2fs_sb_info *sbi)
+{
+   unsigned long interval = sbi->interval_time[CP_TIME] * HZ;
+
+   return time_after(jiffies, sbi->last_time[CP_TIME] + interval);
+}
+
+static inline bool f2fs_time_over_req(struct f2fs_sb_info *sbi, int type)
+{
+   unsigned long interval = sbi->interval_time[type] * HZ;
+
+   return time_after(jiffies, sbi->last_time[REQ_TIME] + interval);
+}
+
+static inline unsigned int f2fs_get_wait_time(struct f2fs_sb_info *sbi,
+   int type)
 {
unsigned long interval = sbi->interval_time[type] * HZ;
+   unsigned int wait_ms = 0;
+   long delta;
+
+   delta = (sbi->last_time[REQ_TIME] + interval) - jiffies;
+   if (delta > 0)
+   wait_ms = jiffies_to_msecs(delta);
 
-   return time_after(jiffies, sbi->last_time[type] + interval);
+   return wait_ms;
 }
 
-static inline bool is_idle(struct f2fs_sb_info *sbi)
+static inline bool is_idle(struct f2fs_sb_info *sbi, int type)
 {
struct block_device *bdev = sbi->sb->s_bdev;
struct request_queue *q = bdev_get_queue(bdev);
@@ -1363,7 +1386,7 @@ static inline bool is_idle(struct f2fs_sb_info *sbi)
if (rl->count[BLK_RW_SYNC] || rl->count[BLK_RW_ASYNC])
return false;
 
-   return f2fs_time_over(sbi, REQ_TIME);
+   return f2fs_time_over_req(sbi, type);
 }
 
 /*
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 5c8d004..c0bafea 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -83,8 +83,10 @@ static int gc_thread_func(void *data)
if (!mutex_trylock(&sbi->gc_mutex))
goto next;
 
-   if (!is_idle(sbi)) {
-   increase_sleep_time(gc_th, &wait_ms);
+   if (!is_idle(sbi, GC_TIME)) {
+   wait_ms = f2fs_get_wait_time(sbi, GC_TIME);
+   if (!wait_ms)
+   increase_sleep_time(gc_th, &wait_ms);
mutex_unlock(&sbi->gc_mutex);
goto next;
}
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index c5024f8..f8a6a7b 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -511,7 +511,7 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
else
f2fs_build_free_nids(sbi, false, false);
 
-   if (!is_idle(sbi) &&
+   if (!is_idle(sbi, REQ_TIME) &&
(!excess_dirty_nats(sbi) && !excess_dirty_nodes(sbi)))
return;
 
@@ -521,7 +521,7 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
excess_prefree_segs(sbi) ||

[f2fs-dev] [PATCH v2] f2fs: add new idle interval timing for discard and gc paths

2018-09-09 Thread Sahitya Tummala

This helps to control the frequency of submission of discard and
GC requests independently, based on the need. The sleep timing of
GC thread is now aligned with this idle time when the dev is busy,
to avoid unnecessary periodic wakeups.

Suggested-by: Chao Yu 
Signed-off-by: Sahitya Tummala 
---
v2:
-fix __issue_discard_cmd_orderly() path

 Documentation/ABI/testing/sysfs-fs-f2fs | 17 -
 fs/f2fs/f2fs.h  | 31 +++
 fs/f2fs/gc.c|  6 --
 fs/f2fs/segment.c   | 16 ++--
 fs/f2fs/super.c |  2 ++
 fs/f2fs/sysfs.c |  5 +
 6 files changed, 60 insertions(+), 17 deletions(-)

diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs 
b/Documentation/ABI/testing/sysfs-fs-f2fs
index 94a24ae..3ac4177 100644
--- a/Documentation/ABI/testing/sysfs-fs-f2fs
+++ b/Documentation/ABI/testing/sysfs-fs-f2fs
@@ -121,7 +121,22 @@ What:  /sys/fs/f2fs//idle_interval
 Date:  January 2016
 Contact:   "Jaegeuk Kim" 
 Description:
-Controls the idle timing.
+Controls the idle timing for all paths other than
+discard and gc path.
+
+What:  /sys/fs/f2fs//discard_idle_interval
+Date:  September 2018
+Contact:   "Chao Yu" 
+Contact:   "Sahitya Tummala" 
+Description:
+Controls the idle timing for discard path.
+
+What:  /sys/fs/f2fs//gc_idle_interval
+Date:  September 2018
+Contact:   "Chao Yu" 
+Contact:   "Sahitya Tummala" 
+Description:
+Controls the idle timing for gc path.
 
 What:  /sys/fs/f2fs//iostat_enable
 Date:  August 2017
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index abf9256..6070681 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1093,6 +1093,8 @@ enum {
 enum {
CP_TIME,
REQ_TIME,
+   DISCARD_TIME,
+   GC_TIME,
MAX_TIME,
 };
 
@@ -1347,14 +1349,35 @@ static inline void f2fs_update_time(struct f2fs_sb_info 
*sbi, int type)
sbi->last_time[type] = jiffies;
 }
 
-static inline bool f2fs_time_over(struct f2fs_sb_info *sbi, int type)
+static inline bool f2fs_time_over_cp(struct f2fs_sb_info *sbi)
+{
+   unsigned long interval = sbi->interval_time[CP_TIME] * HZ;
+
+   return time_after(jiffies, sbi->last_time[CP_TIME] + interval);
+}
+
+static inline bool f2fs_time_over_req(struct f2fs_sb_info *sbi, int type)
+{
+   unsigned long interval = sbi->interval_time[type] * HZ;
+
+   return time_after(jiffies, sbi->last_time[REQ_TIME] + interval);
+}
+
+static inline unsigned int f2fs_get_wait_time(struct f2fs_sb_info *sbi,
+   int type)
 {
unsigned long interval = sbi->interval_time[type] * HZ;
+   unsigned int wait_ms = 0;
+   long delta;
+
+   delta = (sbi->last_time[REQ_TIME] + interval) - jiffies;
+   if (delta > 0)
+   wait_ms = jiffies_to_msecs(delta);
 
-   return time_after(jiffies, sbi->last_time[type] + interval);
+   return wait_ms;
 }
 
-static inline bool is_idle(struct f2fs_sb_info *sbi)
+static inline bool is_idle(struct f2fs_sb_info *sbi, int type)
 {
struct block_device *bdev = sbi->sb->s_bdev;
struct request_queue *q = bdev_get_queue(bdev);
@@ -1363,7 +1386,7 @@ static inline bool is_idle(struct f2fs_sb_info *sbi)
if (rl->count[BLK_RW_SYNC] || rl->count[BLK_RW_ASYNC])
return false;
 
-   return f2fs_time_over(sbi, REQ_TIME);
+   return f2fs_time_over_req(sbi, type);
 }
 
 /*
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 5c8d004..c0bafea 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -83,8 +83,10 @@ static int gc_thread_func(void *data)
if (!mutex_trylock(&sbi->gc_mutex))
goto next;
 
-   if (!is_idle(sbi)) {
-   increase_sleep_time(gc_th, &wait_ms);
+   if (!is_idle(sbi, GC_TIME)) {
+   wait_ms = f2fs_get_wait_time(sbi, GC_TIME);
+   if (!wait_ms)
+   increase_sleep_time(gc_th, &wait_ms);
mutex_unlock(&sbi->gc_mutex);
goto next;
}
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index c5024f8..2d15733 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -511,7 +511,7 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
else
f2fs_build_free_nids(sbi, false, false);
 
-   if (!is_idle(sbi) &&
+   if (!is_idle(sbi, REQ_TIME) &&
(!excess_dirty_nats(sbi) && !excess_dirty_nodes(sbi)))
return;
 
@@ -521,7 +521,7 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)

Re: [f2fs-dev] [PATCH v2] f2fs: add new idle interval timing for discard and gc paths

2018-09-12 Thread Sahitya Tummala

On Tue, Sep 11, 2018 at 03:09:58PM -0700, Jaegeuk Kim wrote:
> On 09/11, Chao Yu wrote:
> > On 2018/9/10 11:47, Sahitya Tummala wrote:
> > > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> > > index abf9256..6070681 100644
> > > --- a/fs/f2fs/f2fs.h
> > > +++ b/fs/f2fs/f2fs.h
> > > @@ -1093,6 +1093,8 @@ enum {
> > >  enum {
> > >   CP_TIME,
> > >   REQ_TIME,
> > > + DISCARD_TIME,
> > > + GC_TIME,
> > >   MAX_TIME,
> > >  };
> > >  
> > > @@ -1347,14 +1349,35 @@ static inline void f2fs_update_time(struct 
> > > f2fs_sb_info *sbi, int type)
> > >   sbi->last_time[type] = jiffies;
> > >  }
> > >  
> > > -static inline bool f2fs_time_over(struct f2fs_sb_info *sbi, int type)
> > > +static inline bool f2fs_time_over_cp(struct f2fs_sb_info *sbi)
> 
> I don't see why we need this separately.

Yes, not really required. I will update it.

> 
> > > +{
> > > + unsigned long interval = sbi->interval_time[CP_TIME] * HZ;
> > > +
> > > + return time_after(jiffies, sbi->last_time[CP_TIME] + interval);
> > > +}
> > > +
> > > +static inline bool f2fs_time_over_req(struct f2fs_sb_info *sbi, int type)
> > > +{
> > > + unsigned long interval = sbi->interval_time[type] * HZ;
> > > +
> > > + return time_after(jiffies, sbi->last_time[REQ_TIME] + interval);
> > > +}
> > > +
> > > +static inline unsigned int f2fs_get_wait_time(struct f2fs_sb_info *sbi,
> > > + int type)
> 
> f2fs_time_to_wait()?

Sure.

> > > diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
> > > index 5c8d004..c0bafea 100644
> > > --- a/fs/f2fs/gc.c
> > > +++ b/fs/f2fs/gc.c
> > > @@ -83,8 +83,10 @@ static int gc_thread_func(void *data)
> > >   if (!mutex_trylock(&sbi->gc_mutex))
> > >   goto next;
> > >  
> > > - if (!is_idle(sbi)) {
> > > - increase_sleep_time(gc_th, &wait_ms);
> > > + if (!is_idle(sbi, GC_TIME)) {
> > > + wait_ms = f2fs_get_wait_time(sbi, GC_TIME);
> > 
> > It seems this patch changes the method of increasing wait_ms here, if 
> > device is
> > busy, we may wake up GC thread earlier than before, not sure we should do 
> > this.
> > 
> > To Jaegeuk, how do you think of this?
> 
> Yes, please let us discuss this in another patch.

Sure, I will submit this in another patch for discussion.

-- 
--
Sent by a consultant of the Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum.


___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

[f2fs-dev] [PATCH v3] f2fs: add new idle interval timing for discard and gc paths

2018-09-17 Thread Sahitya Tummala

This helps to control the frequency of submission of discard and
GC requests independently, based on the need.

Suggested-by: Chao Yu 
Signed-off-by: Sahitya Tummala 
---
v3:
-don't change gc thread wait_ms in this patch
-Use the existing function f2fs_time_over() to handle this
-change f2fs_get_wait_time() to f2fs_time_to_wait()

 Documentation/ABI/testing/sysfs-fs-f2fs | 17 -
 fs/f2fs/f2fs.h  | 28 +---
 fs/f2fs/gc.c|  2 +-
 fs/f2fs/segment.c   | 14 +-
 fs/f2fs/super.c |  2 ++
 fs/f2fs/sysfs.c |  5 +
 6 files changed, 54 insertions(+), 14 deletions(-)

diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs 
b/Documentation/ABI/testing/sysfs-fs-f2fs
index 94a24ae..3ac4177 100644
--- a/Documentation/ABI/testing/sysfs-fs-f2fs
+++ b/Documentation/ABI/testing/sysfs-fs-f2fs
@@ -121,7 +121,22 @@ What:  /sys/fs/f2fs//idle_interval
 Date:  January 2016
 Contact:   "Jaegeuk Kim" 
 Description:
-Controls the idle timing.
+Controls the idle timing for all paths other than
+discard and gc path.
+
+What:  /sys/fs/f2fs//discard_idle_interval
+Date:  September 2018
+Contact:   "Chao Yu" 
+Contact:   "Sahitya Tummala" 
+Description:
+Controls the idle timing for discard path.
+
+What:  /sys/fs/f2fs//gc_idle_interval
+Date:  September 2018
+Contact:   "Chao Yu" 
+Contact:   "Sahitya Tummala" 
+Description:
+Controls the idle timing for gc path.
 
 What:  /sys/fs/f2fs//iostat_enable
 Date:  August 2017
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 88b8d50..41e00d3 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1095,6 +1095,8 @@ enum {
 enum {
CP_TIME,
REQ_TIME,
+   DISCARD_TIME,
+   GC_TIME,
MAX_TIME,
 };
 
@@ -1352,11 +1354,31 @@ static inline void f2fs_update_time(struct f2fs_sb_info 
*sbi, int type)
 static inline bool f2fs_time_over(struct f2fs_sb_info *sbi, int type)
 {
unsigned long interval = sbi->interval_time[type] * HZ;
+   unsigned long last_time;
 
-   return time_after(jiffies, sbi->last_time[type] + interval);
+   if (type == CP_TIME)
+   last_time = sbi->last_time[CP_TIME];
+   else
+   last_time = sbi->last_time[REQ_TIME];
+
+   return time_after(jiffies, last_time + interval);
+}
+
+static inline unsigned int f2fs_time_to_wait(struct f2fs_sb_info *sbi,
+   int type)
+{
+   unsigned long interval = sbi->interval_time[type] * HZ;
+   unsigned int wait_ms = 0;
+   long delta;
+
+   delta = (sbi->last_time[REQ_TIME] + interval) - jiffies;
+   if (delta > 0)
+   wait_ms = jiffies_to_msecs(delta);
+
+   return wait_ms;
 }
 
-static inline bool is_idle(struct f2fs_sb_info *sbi)
+static inline bool is_idle(struct f2fs_sb_info *sbi, int type)
 {
struct block_device *bdev = sbi->sb->s_bdev;
struct request_queue *q = bdev_get_queue(bdev);
@@ -1365,7 +1387,7 @@ static inline bool is_idle(struct f2fs_sb_info *sbi)
if (rl->count[BLK_RW_SYNC] || rl->count[BLK_RW_ASYNC])
return false;
 
-   return f2fs_time_over(sbi, REQ_TIME);
+   return f2fs_time_over(sbi, type);
 }
 
 /*
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 5c8d004..49e2328 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -83,7 +83,7 @@ static int gc_thread_func(void *data)
if (!mutex_trylock(&sbi->gc_mutex))
goto next;
 
-   if (!is_idle(sbi)) {
+   if (!is_idle(sbi, GC_TIME)) {
increase_sleep_time(gc_th, &wait_ms);
mutex_unlock(&sbi->gc_mutex);
goto next;
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 187c848..67cf7e4 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -511,7 +511,7 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
else
f2fs_build_free_nids(sbi, false, false);
 
-   if (!is_idle(sbi) &&
+   if (!is_idle(sbi, REQ_TIME) &&
(!excess_dirty_nats(sbi) && !excess_dirty_nodes(sbi)))
return;
 
@@ -1311,7 +1311,7 @@ static unsigned int __issue_discard_cmd_orderly(struct 
f2fs_sb_info *sbi,
if (dc->state != D_PREP)
goto next;
 
-   if (dpolicy->io_aware && !is_idle(sbi)) {
+   if (dpolicy->io_aware && !is_idle(sbi, DISCARD_TIME)) {
io_interrupted = true;
break;
}
@@ -1371,7 +1371,7 @@ static int __issu

Re: [f2fs-dev] [PATCH v3] f2fs: add new idle interval timing for discard and gc paths

2018-09-17 Thread Sahitya Tummala

On Mon, Sep 17, 2018 at 06:30:39PM -0700, Jaegeuk Kim wrote:
> On 09/17, Sahitya Tummala wrote:
> > This helps to control the frequency of submission of discard and
> > GC requests independently, based on the need.
> > 
> > Suggested-by: Chao Yu 
> > Signed-off-by: Sahitya Tummala 
> > ---
> > v3:
> > -don't change gc thread wait_ms in this patch
> > -Use the existing function f2fs_time_over() to handle this
> > -change f2fs_get_wait_time() to f2fs_time_to_wait()
> > 
> >  Documentation/ABI/testing/sysfs-fs-f2fs | 17 -
> >  fs/f2fs/f2fs.h  | 28 +---
> >  fs/f2fs/gc.c|  2 +-
> >  fs/f2fs/segment.c   | 14 +-
> >  fs/f2fs/super.c |  2 ++
> >  fs/f2fs/sysfs.c |  5 +
> >  6 files changed, 54 insertions(+), 14 deletions(-)
> > 
> > diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs 
> > b/Documentation/ABI/testing/sysfs-fs-f2fs
> > index 94a24ae..3ac4177 100644
> > --- a/Documentation/ABI/testing/sysfs-fs-f2fs
> > +++ b/Documentation/ABI/testing/sysfs-fs-f2fs
> > @@ -121,7 +121,22 @@ What:  /sys/fs/f2fs//idle_interval
> >  Date:  January 2016
> >  Contact:   "Jaegeuk Kim" 
> >  Description:
> > -Controls the idle timing.
> > +Controls the idle timing for all paths other than
> > +discard and gc path.
> > +
> > +What:  /sys/fs/f2fs//discard_idle_interval
> > +Date:  September 2018
> > +Contact:   "Chao Yu" 
> > +Contact:   "Sahitya Tummala" 
> > +Description:
> > +Controls the idle timing for discard path.
> > +
> > +What:  /sys/fs/f2fs//gc_idle_interval
> > +Date:  September 2018
> > +Contact:   "Chao Yu" 
> > +Contact:   "Sahitya Tummala" 
> > +Description:
> > +Controls the idle timing for gc path.
> >  
> >  What:  /sys/fs/f2fs//iostat_enable
> >  Date:  August 2017
> > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> > index 88b8d50..41e00d3 100644
> > --- a/fs/f2fs/f2fs.h
> > +++ b/fs/f2fs/f2fs.h
> > @@ -1095,6 +1095,8 @@ enum {
> >  enum {
> > CP_TIME,
> > REQ_TIME,
> > +   DISCARD_TIME,
> > +   GC_TIME,
> > MAX_TIME,
> >  };
> >  
> > @@ -1352,11 +1354,31 @@ static inline void f2fs_update_time(struct 
> > f2fs_sb_info *sbi, int type)
> >  static inline bool f2fs_time_over(struct f2fs_sb_info *sbi, int type)
> >  {
> > unsigned long interval = sbi->interval_time[type] * HZ;
> > +   unsigned long last_time;
> >  
> > -   return time_after(jiffies, sbi->last_time[type] + interval);
> > +   if (type == CP_TIME)
> > +   last_time = sbi->last_time[CP_TIME];
> > +   else
> > +   last_time = sbi->last_time[REQ_TIME];
> 
> Why can't we use just this?
> 
>   return time_after(jiffies, sbi->last_time[type] + interval);
> 

There is no sbi_->last_time[discard_time] and sbi->last_time[gc_time]. Only
sbi->interval_time[discard_time] and sbi->interval_time[gc_time] is defined to
determine the idle interval time since the last request time, separately for
these two paths.

> > +
> > +   return time_after(jiffies, last_time + interval);
> > +}
> > +
> > +static inline unsigned int f2fs_time_to_wait(struct f2fs_sb_info *sbi,
> > +   int type)
> > +{
> > +   unsigned long interval = sbi->interval_time[type] * HZ;
> > +   unsigned int wait_ms = 0;
> > +   long delta;
> > +
> > +   delta = (sbi->last_time[REQ_TIME] + interval) - jiffies;
> > +   if (delta > 0)
> > +   wait_ms = jiffies_to_msecs(delta);
> > +
> > +   return wait_ms;
> >  }
> >  
> > -static inline bool is_idle(struct f2fs_sb_info *sbi)
> > +static inline bool is_idle(struct f2fs_sb_info *sbi, int type)
> >  {
> > struct block_device *bdev = sbi->sb->s_bdev;
> > struct request_queue *q = bdev_get_queue(bdev);
> > @@ -1365,7 +1387,7 @@ static inline bool is_idle(struct f2fs_sb_info *sbi)
> > if (rl->count[BLK_RW_SYNC] || rl->count[BLK_RW_ASYNC])
> > return false;
> >  
> > -   return f2fs_time_over(sbi, REQ_TIME);
> > +   return f2fs_time_over(sbi, type);
> >  }
> >  
&

Re: [f2fs-dev] [PATCH v3] f2fs: add new idle interval timing for discard and gc paths

2018-09-18 Thread Sahitya Tummala

On Tue, Sep 18, 2018 at 09:39:54AM -0700, Jaegeuk Kim wrote:
> On 09/18, Sahitya Tummala wrote:
> > On Mon, Sep 17, 2018 at 06:30:39PM -0700, Jaegeuk Kim wrote:
> > > On 09/17, Sahitya Tummala wrote:
> > > > This helps to control the frequency of submission of discard and
> > > > GC requests independently, based on the need.
> > > > 
> > > > Suggested-by: Chao Yu 
> > > > Signed-off-by: Sahitya Tummala 
> > > > ---
> > > > v3:
> > > > -don't change gc thread wait_ms in this patch
> > > > -Use the existing function f2fs_time_over() to handle this
> > > > -change f2fs_get_wait_time() to f2fs_time_to_wait()
> > > > 
> > > >  Documentation/ABI/testing/sysfs-fs-f2fs | 17 -
> > > >  fs/f2fs/f2fs.h  | 28 
> > > > +---
> > > >  fs/f2fs/gc.c|  2 +-
> > > >  fs/f2fs/segment.c   | 14 +-
> > > >  fs/f2fs/super.c |  2 ++
> > > >  fs/f2fs/sysfs.c |  5 +
> > > >  6 files changed, 54 insertions(+), 14 deletions(-)
> > > > 
> > > > diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs 
> > > > b/Documentation/ABI/testing/sysfs-fs-f2fs
> > > > index 94a24ae..3ac4177 100644
> > > > --- a/Documentation/ABI/testing/sysfs-fs-f2fs
> > > > +++ b/Documentation/ABI/testing/sysfs-fs-f2fs
> > > > @@ -121,7 +121,22 @@ What:  
> > > > /sys/fs/f2fs//idle_interval
> > > >  Date:  January 2016
> > > >  Contact:   "Jaegeuk Kim" 
> > > >  Description:
> > > > -Controls the idle timing.
> > > > +Controls the idle timing for all paths other than
> > > > +discard and gc path.
> > > > +
> > > > +What:  /sys/fs/f2fs//discard_idle_interval
> > > > +Date:  September 2018
> > > > +Contact:   "Chao Yu" 
> > > > +Contact:   "Sahitya Tummala" 
> > > > +Description:
> > > > +Controls the idle timing for discard path.
> > > > +
> > > > +What:  /sys/fs/f2fs//gc_idle_interval
> > > > +Date:  September 2018
> > > > +Contact:   "Chao Yu" 
> > > > +Contact:   "Sahitya Tummala" 
> > > > +Description:
> > > > +Controls the idle timing for gc path.
> > > >  
> > > >  What:  /sys/fs/f2fs//iostat_enable
> > > >  Date:  August 2017
> > > > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> > > > index 88b8d50..41e00d3 100644
> > > > --- a/fs/f2fs/f2fs.h
> > > > +++ b/fs/f2fs/f2fs.h
> > > > @@ -1095,6 +1095,8 @@ enum {
> > > >  enum {
> > > > CP_TIME,
> > > > REQ_TIME,
> > > > +   DISCARD_TIME,
> > > > +   GC_TIME,
> > > > MAX_TIME,
> > > >  };
> > > >  
> > > > @@ -1352,11 +1354,31 @@ static inline void f2fs_update_time(struct 
> > > > f2fs_sb_info *sbi, int type)
> > > >  static inline bool f2fs_time_over(struct f2fs_sb_info *sbi, int type)
> > > >  {
> > > > unsigned long interval = sbi->interval_time[type] * HZ;
> > > > +   unsigned long last_time;
> > > >  
> > > > -   return time_after(jiffies, sbi->last_time[type] + interval);
> > > > +   if (type == CP_TIME)
> > > > +   last_time = sbi->last_time[CP_TIME];
> > > > +   else
> > > > +   last_time = sbi->last_time[REQ_TIME];
> > > 
> > > Why can't we use just this?
> > > 
> > >   return time_after(jiffies, sbi->last_time[type] + interval);
> > > 
> > 
> > There is no sbi_->last_time[discard_time] and sbi->last_time[gc_time]. Only
> > sbi->interval_time[discard_time] and sbi->interval_time[gc_time] is defined 
> > to
> > determine the idle interval time since the last request time, separately for
> > these two paths.
> 
> If you add enums above, sbi will have last_time slots for 
> DISCARD_TIME/GC_TIME.
> Doesn't it make sense to add the below in f2fs_update_time(), if you want to
> introduce two additional types?

Re: [f2fs-dev] [PATCH 1/2] f2fs: report ENOENT correct in f2fs_rename

2018-09-18 Thread Sahitya Tummala

On Mon, Sep 17, 2018 at 07:18:04PM -0700, Jaegeuk Kim wrote:
> This fixes wrong error report in f2fs_rename.

I think f2fs_cross_rename() path also needs the same fix?

> 
> Signed-off-by: Jaegeuk Kim 
> ---
>  fs/f2fs/namei.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
> index 98d3ab7c3ce6..d653be777529 100644
> --- a/fs/f2fs/namei.c
> +++ b/fs/f2fs/namei.c
> @@ -833,7 +833,7 @@ static int f2fs_rename(struct inode *old_dir, struct 
> dentry *old_dentry,
>   struct f2fs_dir_entry *old_entry;
>   struct f2fs_dir_entry *new_entry;
>   bool is_old_inline = f2fs_has_inline_dentry(old_dir);
> - int err = -ENOENT;
> + int err;
>  
>   if (unlikely(f2fs_cp_error(sbi)))
>   return -EIO;
> @@ -854,6 +854,7 @@ static int f2fs_rename(struct inode *old_dir, struct 
> dentry *old_dentry,
>   if (err)
>   goto out;
>  
> + err = -ENOENT;
>   if (new_inode) {
>   err = dquot_initialize(new_inode);
>   if (err)
> -- 
> 2.17.0.441.gb46fe60e1d-goog
> 
> 
> 
> ___
> Linux-f2fs-devel mailing list
> Linux-f2fs-devel@lists.sourceforge.net
> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

-- 
--
Sent by a consultant of the Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum.


___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

[f2fs-dev] [PATCH v4] f2fs: add new idle interval timing for discard and gc paths

2018-09-19 Thread Sahitya Tummala

This helps to control the frequency of submission of discard and
GC requests independently, based on the need.

Suggested-by: Chao Yu 
Signed-off-by: Sahitya Tummala 
---
v4:
- initialize and use sbi->last_time[discard_time], sbi->last_time[gc_time]

 Documentation/ABI/testing/sysfs-fs-f2fs | 17 -
 fs/f2fs/f2fs.h  | 30 +++---
 fs/f2fs/gc.c|  2 +-
 fs/f2fs/segment.c   | 14 +-
 fs/f2fs/super.c |  2 ++
 fs/f2fs/sysfs.c |  5 +
 6 files changed, 56 insertions(+), 14 deletions(-)

diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs 
b/Documentation/ABI/testing/sysfs-fs-f2fs
index 94a24ae..3ac4177 100644
--- a/Documentation/ABI/testing/sysfs-fs-f2fs
+++ b/Documentation/ABI/testing/sysfs-fs-f2fs
@@ -121,7 +121,22 @@ What:  /sys/fs/f2fs//idle_interval
 Date:  January 2016
 Contact:   "Jaegeuk Kim" 
 Description:
-Controls the idle timing.
+Controls the idle timing for all paths other than
+discard and gc path.
+
+What:  /sys/fs/f2fs//discard_idle_interval
+Date:  September 2018
+Contact:   "Chao Yu" 
+Contact:   "Sahitya Tummala" 
+Description:
+Controls the idle timing for discard path.
+
+What:  /sys/fs/f2fs//gc_idle_interval
+Date:  September 2018
+Contact:   "Chao Yu" 
+Contact:   "Sahitya Tummala" 
+Description:
+Controls the idle timing for gc path.
 
 What:  /sys/fs/f2fs//iostat_enable
 Date:  August 2017
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 88b8d50..c47b7d2 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1095,6 +1095,8 @@ enum {
 enum {
CP_TIME,
REQ_TIME,
+   DISCARD_TIME,
+   GC_TIME,
MAX_TIME,
 };
 
@@ -1346,7 +1348,15 @@ static inline bool time_to_inject(struct f2fs_sb_info 
*sbi, int type)
 
 static inline void f2fs_update_time(struct f2fs_sb_info *sbi, int type)
 {
-   sbi->last_time[type] = jiffies;
+   unsigned long now = jiffies;
+
+   sbi->last_time[type] = now;
+
+   /* DISCARD_TIME and GC_TIME are based on REQ_TIME */
+   if (type == REQ_TIME) {
+   sbi->last_time[DISCARD_TIME] = now;
+   sbi->last_time[GC_TIME] = now;
+   }
 }
 
 static inline bool f2fs_time_over(struct f2fs_sb_info *sbi, int type)
@@ -1356,7 +1366,21 @@ static inline bool f2fs_time_over(struct f2fs_sb_info 
*sbi, int type)
return time_after(jiffies, sbi->last_time[type] + interval);
 }
 
-static inline bool is_idle(struct f2fs_sb_info *sbi)
+static inline unsigned int f2fs_time_to_wait(struct f2fs_sb_info *sbi,
+   int type)
+{
+   unsigned long interval = sbi->interval_time[type] * HZ;
+   unsigned int wait_ms = 0;
+   long delta;
+
+   delta = (sbi->last_time[type] + interval) - jiffies;
+   if (delta > 0)
+   wait_ms = jiffies_to_msecs(delta);
+
+   return wait_ms;
+}
+
+static inline bool is_idle(struct f2fs_sb_info *sbi, int type)
 {
struct block_device *bdev = sbi->sb->s_bdev;
struct request_queue *q = bdev_get_queue(bdev);
@@ -1365,7 +1389,7 @@ static inline bool is_idle(struct f2fs_sb_info *sbi)
if (rl->count[BLK_RW_SYNC] || rl->count[BLK_RW_ASYNC])
return false;
 
-   return f2fs_time_over(sbi, REQ_TIME);
+   return f2fs_time_over(sbi, type);
 }
 
 /*
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 5c8d004..49e2328 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -83,7 +83,7 @@ static int gc_thread_func(void *data)
if (!mutex_trylock(&sbi->gc_mutex))
goto next;
 
-   if (!is_idle(sbi)) {
+   if (!is_idle(sbi, GC_TIME)) {
increase_sleep_time(gc_th, &wait_ms);
mutex_unlock(&sbi->gc_mutex);
goto next;
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 187c848..67cf7e4 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -511,7 +511,7 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
else
f2fs_build_free_nids(sbi, false, false);
 
-   if (!is_idle(sbi) &&
+   if (!is_idle(sbi, REQ_TIME) &&
(!excess_dirty_nats(sbi) && !excess_dirty_nodes(sbi)))
return;
 
@@ -1311,7 +1311,7 @@ static unsigned int __issue_discard_cmd_orderly(struct 
f2fs_sb_info *sbi,
if (dc->state != D_PREP)
goto next;
 
-   if (dpolicy->io_aware && !is_idle(sbi)) {
+   if (dpolicy->io_aware && !is_idle(sbi, DISCARD_TIME)) {

Re: [f2fs-dev] [PATCH v2] f2fs: allow out-place-update for direct IO in LFS mode

2018-09-20 Thread Sahitya Tummala

On Thu, Sep 20, 2018 at 04:57:18PM +0800, Chao Yu wrote:
> Normally, DIO uses in-pllace-update, but in LFS mode, f2fs doesn't
> allow triggering any in-place-update writes, so we fallback direct
> write to buffered write, result in bad performance of large size
> write.
> 
> This patch adds to support triggering out-place-update for direct IO
> to enhance its performance.
> 
> Note that it needs to exclude direct read IO during direct write,
> since new data writing to new block address will no be valid until
> write finished.
> 
> storage: zram
> 
> time xfs_io -f -d /mnt/f2fs/file -c "pwrite 0 1073741824" -c "fsync"
> 
> Before:
> real  0m13.061s
> user  0m0.327s
> sys   0m12.486s
> 
> After:
> real  0m6.448s
> user  0m0.228s
> sys   0m6.212s
> 
> Signed-off-by: Chao Yu 
> ---
> v2:
> - don't use direct IO for block zoned device.
>  fs/f2fs/data.c | 41 +
>  fs/f2fs/f2fs.h | 45 +
>  fs/f2fs/file.c |  3 ++-
>  3 files changed, 76 insertions(+), 13 deletions(-)
> 
> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> index b96f8588d565..e709f0fbb7a8 100644
> --- a/fs/f2fs/data.c
> +++ b/fs/f2fs/data.c
> @@ -894,7 +894,7 @@ static int __allocate_data_block(struct dnode_of_data 
> *dn, int seg_type)
>  
>   dn->data_blkaddr = datablock_addr(dn->inode,
>   dn->node_page, dn->ofs_in_node);
> - if (dn->data_blkaddr == NEW_ADDR)
> + if (dn->data_blkaddr != NULL_ADDR)
>   goto alloc;
>  
>   if (unlikely((err = inc_valid_block_count(sbi, dn->inode, &count
> @@ -950,7 +950,7 @@ int f2fs_preallocate_blocks(struct kiocb *iocb, struct 
> iov_iter *from)
>  
>   if (direct_io) {
>   map.m_seg_type = f2fs_rw_hint_to_seg_type(iocb->ki_hint);
> - flag = f2fs_force_buffered_io(inode, WRITE) ?
> + flag = f2fs_force_buffered_io(inode, iocb, from) ?
>   F2FS_GET_BLOCK_PRE_AIO :
>   F2FS_GET_BLOCK_PRE_DIO;
>   goto map_blocks;
> @@ -1069,7 +1069,15 @@ int f2fs_map_blocks(struct inode *inode, struct 
> f2fs_map_blocks *map,
>   goto sync_out;
>   }
>  
> - if (!is_valid_data_blkaddr(sbi, blkaddr)) {
> + if (is_valid_data_blkaddr(sbi, blkaddr)) {
> + /* use out-place-update for driect IO under LFS mode */
> + if (test_opt(sbi, LFS) && create &&
> + flag == F2FS_GET_BLOCK_DEFAULT) {
> + err = __allocate_data_block(&dn, map->m_seg_type);
> + if (!err)
> + set_inode_flag(inode, FI_APPEND_WRITE);
> + }
> + } else {
>   if (create) {
>   if (unlikely(f2fs_cp_error(sbi))) {
>   err = -EIO;
> @@ -2493,36 +2501,53 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, 
> struct iov_iter *iter)
>   struct address_space *mapping = iocb->ki_filp->f_mapping;
>   struct inode *inode = mapping->host;
>   struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> + struct f2fs_inode_info *fi = F2FS_I(inode);
>   size_t count = iov_iter_count(iter);
>   loff_t offset = iocb->ki_pos;
>   int rw = iov_iter_rw(iter);
>   int err;
>   enum rw_hint hint = iocb->ki_hint;
>   int whint_mode = F2FS_OPTION(sbi).whint_mode;
> + bool lock_read;
>  
>   err = check_direct_IO(inode, iter, offset);
>   if (err)
>   return err < 0 ? err : 0;
>  
> - if (f2fs_force_buffered_io(inode, rw))
> + if (f2fs_force_buffered_io(inode, iocb, iter))
>   return 0;
>  
> + lock_read = allow_outplace_dio(inode, iocb, iter);
> +
>   trace_f2fs_direct_IO_enter(inode, offset, count, rw);
>  
>   if (rw == WRITE && whint_mode == WHINT_MODE_OFF)
>   iocb->ki_hint = WRITE_LIFE_NOT_SET;
>  
> - if (!down_read_trylock(&F2FS_I(inode)->i_gc_rwsem[rw])) {
> - if (iocb->ki_flags & IOCB_NOWAIT) {
> + if (iocb->ki_flags & IOCB_NOWAIT) {
> + if (!down_read_trylock(&fi->i_gc_rwsem[rw])) {
> + iocb->ki_hint = hint;
> + err = -EAGAIN;
> + goto out;
> + }
> + if (lock_read && !down_read_trylock(&fi->i_gc_rwsem[READ])) {
> + up_read(&fi->i_gc_rwsem[rw]);
>   iocb->ki_hint = hint;
>   err = -EAGAIN;
>   goto out;
>   }
> - down_read(&F2FS_I(inode)->i_gc_rwsem[rw]);
> + } else {
> + down_read(&fi->i_gc_rwsem[rw]);
> + if (lock_read)
> + down_read(&fi->i_gc_rwsem[READ]);
>   }
>  
>   err = blockdev_direct_IO(iocb, inode, iter, get_data_block_dio);
> - up_read(&F2FS_I(inode)->i_gc_rwsem[rw]);
> +
> + if (lock_read)
> + up_read(&fi->i_gc_rws

Re: [f2fs-dev] [PATCH v4] f2fs: allow out-place-update for direct IO in LFS mode

2018-09-26 Thread Sahitya Tummala

On Fri, Sep 21, 2018 at 09:12:22PM +0800, Chao Yu wrote:
> From: Chao Yu 
> 
> Normally, DIO uses in-pllace-update, but in LFS mode, f2fs doesn't
> allow triggering any in-place-update writes, so we fallback direct
> write to buffered write, result in bad performance of large size
> write.
> 
> This patch adds to support triggering out-place-update for direct IO
> to enhance its performance.
> 
> Note that it needs to exclude direct read IO during direct write,
> since new data writing to new block address will no be valid until
> write finished.
> 
> storage: zram
> 
> time xfs_io -f -d /mnt/f2fs/file -c "pwrite 0 1073741824" -c "fsync"
> 
> Before:
> real  0m13.061s
> user  0m0.327s
> sys   0m12.486s
> 
> After:
> real  0m6.448s
> user  0m0.228s
> sys   0m6.212s
> 
> Signed-off-by: Chao Yu 
> ---
> v4:
> - correct parameter in f2fs_sb_has_blkzoned()
>  fs/f2fs/data.c | 44 +++-
>  fs/f2fs/f2fs.h | 45 +
>  fs/f2fs/file.c |  3 ++-
>  3 files changed, 78 insertions(+), 14 deletions(-)
> 
> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> index b96f8588d565..38d5baa1c35d 100644
> --- a/fs/f2fs/data.c
> +++ b/fs/f2fs/data.c
> @@ -894,7 +894,7 @@ static int __allocate_data_block(struct dnode_of_data 
> *dn, int seg_type)
>  
>   dn->data_blkaddr = datablock_addr(dn->inode,
>   dn->node_page, dn->ofs_in_node);
> - if (dn->data_blkaddr == NEW_ADDR)
> + if (dn->data_blkaddr != NULL_ADDR)
>   goto alloc;
>  
>   if (unlikely((err = inc_valid_block_count(sbi, dn->inode, &count
> @@ -950,7 +950,7 @@ int f2fs_preallocate_blocks(struct kiocb *iocb, struct 
> iov_iter *from)
>  
>   if (direct_io) {
>   map.m_seg_type = f2fs_rw_hint_to_seg_type(iocb->ki_hint);
> - flag = f2fs_force_buffered_io(inode, WRITE) ?
> + flag = f2fs_force_buffered_io(inode, iocb, from) ?
>   F2FS_GET_BLOCK_PRE_AIO :
>   F2FS_GET_BLOCK_PRE_DIO;
>   goto map_blocks;
> @@ -1069,7 +1069,15 @@ int f2fs_map_blocks(struct inode *inode, struct 
> f2fs_map_blocks *map,
>   goto sync_out;
>   }
>  
> - if (!is_valid_data_blkaddr(sbi, blkaddr)) {
> + if (is_valid_data_blkaddr(sbi, blkaddr)) {
> + /* use out-place-update for driect IO under LFS mode */
> + if (test_opt(sbi, LFS) && create &&
> + flag == F2FS_GET_BLOCK_DEFAULT) {

One of the recent patches from Jaegeuk, 0a4daae5ffea ("f2fs: update i_size 
after DIO
completion") added new flag for DIO - F2FS_GET_BLOCK_DIO. I think this patch 
needs to be
updated accordingly.

> + err = __allocate_data_block(&dn, map->m_seg_type);
> + if (!err)
> + set_inode_flag(inode, FI_APPEND_WRITE);
> + }
> + } else {
>   if (create) {
>   if (unlikely(f2fs_cp_error(sbi))) {
>   err = -EIO;
> @@ -2493,36 +2501,53 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, 
> struct iov_iter *iter)
>   struct address_space *mapping = iocb->ki_filp->f_mapping;
>   struct inode *inode = mapping->host;
>   struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> + struct f2fs_inode_info *fi = F2FS_I(inode);
>   size_t count = iov_iter_count(iter);
>   loff_t offset = iocb->ki_pos;
>   int rw = iov_iter_rw(iter);
>   int err;
>   enum rw_hint hint = iocb->ki_hint;
>   int whint_mode = F2FS_OPTION(sbi).whint_mode;
> + bool do_opu;
>  
>   err = check_direct_IO(inode, iter, offset);
>   if (err)
>   return err < 0 ? err : 0;
>  
> - if (f2fs_force_buffered_io(inode, rw))
> + if (f2fs_force_buffered_io(inode, iocb, iter))
>   return 0;
>  
> + do_opu = allow_outplace_dio(inode, iocb, iter);
> +
>   trace_f2fs_direct_IO_enter(inode, offset, count, rw);
>  
>   if (rw == WRITE && whint_mode == WHINT_MODE_OFF)
>   iocb->ki_hint = WRITE_LIFE_NOT_SET;
>  
> - if (!down_read_trylock(&F2FS_I(inode)->i_gc_rwsem[rw])) {
> - if (iocb->ki_flags & IOCB_NOWAIT) {
> + if (iocb->ki_flags & IOCB_NOWAIT) {
> + if (!down_read_trylock(&fi->i_gc_rwsem[rw])) {
> + iocb->ki_hint = hint;
> + err = -EAGAIN;
> + goto out;
> + }
> + if (do_opu && !down_read_trylock(&fi->i_gc_rwsem[READ])) {
> + up_read(&fi->i_gc_rwsem[rw]);
>   iocb->ki_hint = hint;
>   err = -EAGAIN;
>   goto out;
>   }
> - down_read(&F2FS_I(inode)->i_gc_rwsem[rw]);
> + } else {
> + down_read(&fi->i_gc_rwsem[rw]);
> + if (do_opu)
> + down_read(&fi->i_gc_rwse

Re: [f2fs-dev] [PATCH RESEND v2] Revert: "f2fs: check last page index in cached bio to decide submission"

2018-09-27 Thread Sahitya Tummala

On Wed, Sep 26, 2018 at 10:44:56PM +0800, Chao Yu wrote:
> From: Chao Yu 
> 
> There is one case that we can leave bio in f2fs, result in hanging
> page writeback waiter.
> 
> Thread A  Thread B
> - f2fs_write_cache_pages
>  - f2fs_submit_page_write
>  page #0 cached in bio #0 of cold log
>  - f2fs_submit_page_write
>  page #1 cached in bio #1 of warm log
>   - f2fs_write_cache_pages
>- f2fs_submit_page_write
>bio is full, submit bio #1 contain 
> page #1
>  - f2fs_submit_merged_write_cond(, page #1)
>  fail to submit bio #0 due to page #1 is not in any cached bios.
> 
> Signed-off-by: Chao Yu 
> ---
> v2:
> - rebase to dev-test
>  fs/f2fs/checkpoint.c |  2 +-
>  fs/f2fs/data.c   | 38 +++---
>  fs/f2fs/f2fs.h   |  4 ++--
>  fs/f2fs/node.c   | 12 ++--
>  fs/f2fs/segment.c| 11 +--
>  5 files changed, 33 insertions(+), 34 deletions(-)
> 
> diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
> index d624d7983197..2f63b362ce63 100644
> --- a/fs/f2fs/checkpoint.c
> +++ b/fs/f2fs/checkpoint.c
> @@ -280,7 +280,7 @@ static int __f2fs_write_meta_page(struct page *page,
>  
>   if (wbc->for_reclaim)
>   f2fs_submit_merged_write_cond(sbi, page->mapping->host,
> - 0, page->index, META);
> + page, 0, META);
>  
>   unlock_page(page);
>  
> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> index be69b6ac6870..b03f9d163175 100644
> --- a/fs/f2fs/data.c
> +++ b/fs/f2fs/data.c
> @@ -322,8 +322,8 @@ static void __submit_merged_bio(struct f2fs_bio_info *io)
>   io->bio = NULL;
>  }
>  
> -static bool __has_merged_page(struct f2fs_bio_info *io,
> - struct inode *inode, nid_t ino, pgoff_t idx)
> +static bool __has_merged_page(struct f2fs_bio_info *io, struct inode *inode,
> + struct page *page, nid_t ino)
>  {
>   struct bio_vec *bvec;
>   struct page *target;
> @@ -332,7 +332,7 @@ static bool __has_merged_page(struct f2fs_bio_info *io,
>   if (!io->bio)
>   return false;
>  
> - if (!inode && !ino)
> + if (!inode && !page && !ino)
>   return true;
>  
>   bio_for_each_segment_all(bvec, io->bio, i) {
> @@ -342,11 +342,10 @@ static bool __has_merged_page(struct f2fs_bio_info *io,
>   else
>   target = fscrypt_control_page(bvec->bv_page);
>  
> - if (idx != target->index)
> - continue;
> -
>   if (inode && inode == target->mapping->host)
>   return true;
> + if (page && page == target)
> + return true;

If both inode and page are passed, then I think it we should check for page 
first
followed by inode checks. What do you think?

>   if (ino && ino == ino_of_node(target))
>   return true;
>   }
> @@ -355,7 +354,8 @@ static bool __has_merged_page(struct f2fs_bio_info *io,
>  }
>  
>  static bool has_merged_page(struct f2fs_sb_info *sbi, struct inode *inode,
> - nid_t ino, pgoff_t idx, enum page_type type)
> + struct page *page, nid_t ino,
> + enum page_type type)
>  {
>   enum page_type btype = PAGE_TYPE_OF_BIO(type);
>   enum temp_type temp;
> @@ -366,7 +366,7 @@ static bool has_merged_page(struct f2fs_sb_info *sbi, 
> struct inode *inode,
>   io = sbi->write_io[btype] + temp;
>  
>   down_read(&io->io_rwsem);
> - ret = __has_merged_page(io, inode, ino, idx);
> + ret = __has_merged_page(io, inode, page, ino);
>   up_read(&io->io_rwsem);
>  
>   /* TODO: use HOT temp only for meta pages now. */
> @@ -397,12 +397,12 @@ static void __f2fs_submit_merged_write(struct 
> f2fs_sb_info *sbi,
>  }
>  
>  static void __submit_merged_write_cond(struct f2fs_sb_info *sbi,
> - struct inode *inode, nid_t ino, pgoff_t idx,
> - enum page_type type, bool force)
> + struct inode *inode, struct page *page,
> + nid_t ino, enum page_type type, bool force)
>  {
>   enum temp_type temp;
>  
> - if (!force && !has_merged_page(sbi, inode, ino, idx, type))
> + if (!force && !has_merged_page(sbi, inode, page, ino, type))
>   return;
>  
>   for (temp = HOT; temp < NR_TEMP_TYPE; temp++) {
> @@ -421,10 +421,10 @@ void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, 
> enum page_type type)
>  }
>  
>  void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi,
> - struct inode *inode, nid_t ino, p

Re: [f2fs-dev] [PATCH v3] f2fs: submit cached bio to avoid endless PageWriteback

2018-09-30 Thread Sahitya Tummala

On Wed, Sep 26, 2018 at 12:20:38PM +0800, Chao Yu wrote:

Hi Chao, Jaegeuk,

Is there any further any conclusion on this thread?

I think we still need this patch in addition to another patch from Chao -
"Revert: "f2fs: check last page index in cached bio to decide submission""
to make sure it covers the encrypted data block path as well.

Thanks,
Sahitya.

> On 2018/9/26 11:32, Jaegeuk Kim wrote:
> > On 09/26, Chao Yu wrote:
> >> On 2018/9/26 9:42, Jaegeuk Kim wrote:
> >>> On 09/26, Chao Yu wrote:
>  On 2018/9/26 8:20, Jaegeuk Kim wrote:
> > On 09/21, Chao Yu wrote:
> >> On 2018/9/18 10:14, Chao Yu wrote:
> >>> On 2018/9/18 10:02, Jaegeuk Kim wrote:
>  On 09/18, Chao Yu wrote:
> > On 2018/9/18 9:37, Jaegeuk Kim wrote:
> >> On 09/18, Chao Yu wrote:
> >>> On 2018/9/18 9:04, Jaegeuk Kim wrote:
>  On 09/13, Chao Yu wrote:
> > From: Chao Yu 
> >
> > When migrating encrypted block from background GC thread, we 
> > only add
> > them into f2fs inner bio cache, but forget to submit the cached 
> > bio, it
> > may cause potential deadlock when we are waiting page 
> > writebacked, fix
> > it.
> >
> > Signed-off-by: Chao Yu 
> > ---
> > v3:
> > clean up codes suggested by Jaegeuk.
> >  fs/f2fs/f2fs.h |  2 +-
> >  fs/f2fs/gc.c   | 71 
> > +++---
> >  fs/f2fs/node.c | 13 ++---
> >  3 files changed, 61 insertions(+), 25 deletions(-)
> >
> > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> > index b676b82312e0..917b2ca76aac 100644
> > --- a/fs/f2fs/f2fs.h
> > +++ b/fs/f2fs/f2fs.h
> > @@ -2869,7 +2869,7 @@ struct page *f2fs_new_node_page(struct 
> > dnode_of_data *dn, unsigned int ofs);
> >  void f2fs_ra_node_page(struct f2fs_sb_info *sbi, nid_t nid);
> >  struct page *f2fs_get_node_page(struct f2fs_sb_info *sbi, 
> > pgoff_t nid);
> >  struct page *f2fs_get_node_page_ra(struct page *parent, int 
> > start);
> > -void f2fs_move_node_page(struct page *node_page, int gc_type);
> > +int f2fs_move_node_page(struct page *node_page, int gc_type);
> >  int f2fs_fsync_node_pages(struct f2fs_sb_info *sbi, struct 
> > inode *inode,
> > struct writeback_control *wbc, bool 
> > atomic,
> > unsigned int *seq_id);
> > diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
> > index a4c1a419611d..f57622cfe058 100644
> > --- a/fs/f2fs/gc.c
> > +++ b/fs/f2fs/gc.c
> > @@ -461,7 +461,7 @@ static int check_valid_map(struct 
> > f2fs_sb_info *sbi,
> >   * On validity, copy that node with cold status, otherwise 
> > (invalid node)
> >   * ignore that.
> >   */
> > -static void gc_node_segment(struct f2fs_sb_info *sbi,
> > +static int gc_node_segment(struct f2fs_sb_info *sbi,
> > struct f2fs_summary *sum, unsigned int segno, 
> > int gc_type)
> >  {
> > struct f2fs_summary *entry;
> > @@ -469,6 +469,7 @@ static void gc_node_segment(struct 
> > f2fs_sb_info *sbi,
> > int off;
> > int phase = 0;
> > bool fggc = (gc_type == FG_GC);
> > +   int submitted = 0;
> >  
> > start_addr = START_BLOCK(sbi, segno);
> >  
> > @@ -482,10 +483,11 @@ static void gc_node_segment(struct 
> > f2fs_sb_info *sbi,
> > nid_t nid = le32_to_cpu(entry->nid);
> > struct page *node_page;
> > struct node_info ni;
> > +   int err;
> >  
> > /* stop BG_GC if there is not enough free 
> > sections. */
> > if (gc_type == BG_GC && 
> > has_not_enough_free_secs(sbi, 0, 0))
> > -   return;
> > +   return submitted;
> >  
> > if (check_valid_map(sbi, segno, off) == 0)
> > continue;
> > @@ -522,7 +524,9 @@ static void gc_node_segment(struct 
> > f2fs_sb_info *sbi,
> > continue;
> > }
> >  
> > -   f2fs_move_node_page(node_page, gc_type);
> > +

[f2fs-dev] [PATCH 2/2] f2fs: update REQ_TIME in f2fs_cross_rename()

2018-10-04 Thread Sahitya Tummala

Update REQ_TIME in the missing path - f2fs_cross_rename().

Signed-off-by: Sahitya Tummala 
---
 fs/f2fs/namei.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index a146327..e37af23 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -1154,6 +1154,8 @@ static int f2fs_cross_rename(struct inode *old_dir, 
struct dentry *old_dentry,
 
if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir))
f2fs_sync_fs(sbi->sb, 1);
+
+   f2fs_update_time(sbi, REQ_TIME);
return 0;
 out_new_dir:
if (new_dir_entry) {
-- 
Qualcomm India Private Limited, on behalf of Qualcomm Innovation Center, Inc.
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux 
Foundation Collaborative Project.



___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

[f2fs-dev] [PATCH 1/2] f2fs: do not update REQ_TIME in case of error conditions

2018-10-04 Thread Sahitya Tummala

The REQ_TIME should be updated only in case of success cases
as followed at all other places in the file system.

Signed-off-by: Sahitya Tummala 
---
 fs/f2fs/dir.c  | 2 +-
 fs/f2fs/file.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index c77a580..e02db5d 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -655,9 +655,9 @@ int f2fs_do_tmpfile(struct inode *inode, struct inode *dir)
f2fs_put_page(page, 1);
 
clear_inode_flag(inode, FI_NEW_INODE);
+   f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
 fail:
up_write(&F2FS_I(inode)->i_sem);
-   f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
return err;
 }
 
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index b1aaa73..543c742 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -106,6 +106,7 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf)
SetPageUptodate(page);
 
f2fs_update_iostat(sbi, APP_MAPPED_IO, F2FS_BLKSIZE);
+   f2fs_update_time(sbi, REQ_TIME);
 
trace_f2fs_vm_page_mkwrite(page, DATA);
 out_sem:
@@ -114,7 +115,6 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf)
f2fs_balance_fs(sbi, dn.node_changed);
 
sb_end_pagefault(inode->i_sb);
-   f2fs_update_time(sbi, REQ_TIME);
 err:
return block_page_mkwrite_return(err);
 }
-- 
Qualcomm India Private Limited, on behalf of Qualcomm Innovation Center, Inc.
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux 
Foundation Collaborative Project.



___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

[f2fs-dev] [PATCH] f2fs: fix data corruption issue with hardware encryption

2018-10-09 Thread Sahitya Tummala

Direct IO can be used in case of hardware encryption. The following
scenario results into data corruption issue in this path -

Thread A -  Thread B-
-> write file#1 in direct IO
-> GC gets kicked in
-> GC submitted bio on meta mapping
   for file#1, but pending completion
-> write file#1 again with new data
   in direct IO
-> GC bio gets completed now
-> GC writes old data to the new
   location and thus file#1 is
   corrupted.

Fix this by submitting and waiting for pending io on meta mapping
for direct IO case in f2fs_map_blocks().

Signed-off-by: Sahitya Tummala 
---
 fs/f2fs/data.c | 12 
 1 file changed, 12 insertions(+)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 9ef6f1f..7b2fef0 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1028,6 +1028,12 @@ int f2fs_map_blocks(struct inode *inode, struct 
f2fs_map_blocks *map,
map->m_pblk = ei.blk + pgofs - ei.fofs;
map->m_len = min((pgoff_t)maxblocks, ei.fofs + ei.len - pgofs);
map->m_flags = F2FS_MAP_MAPPED;
+   /* for HW encryption, but to avoid potential issue in future */
+   if (flag == F2FS_GET_BLOCK_DIO) {
+   blkaddr = map->m_pblk;
+   for (; blkaddr < map->m_pblk + map->m_len; blkaddr++)
+   f2fs_wait_on_block_writeback(sbi, blkaddr);
+   }
if (map->m_next_extent)
*map->m_next_extent = pgofs + map->m_len;
goto out;
@@ -1188,6 +1194,12 @@ int f2fs_map_blocks(struct inode *inode, struct 
f2fs_map_blocks *map,
goto next_dnode;
 
 sync_out:
+   /* for hardware encryption, but to avoid potential issue in future */
+   if (flag == F2FS_GET_BLOCK_DIO && map->m_flags & F2FS_MAP_MAPPED) {
+   blkaddr = map->m_pblk;
+   for (; blkaddr < map->m_pblk + map->m_len; blkaddr++)
+   f2fs_wait_on_block_writeback(sbi, blkaddr);
+   }
if (flag == F2FS_GET_BLOCK_PRECACHE) {
if (map->m_flags & F2FS_MAP_MAPPED) {
unsigned int ofs = start_pgofs - map->m_lblk;
-- 
Qualcomm India Private Limited, on behalf of Qualcomm Innovation Center, Inc.
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux 
Foundation Collaborative Project.



___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

Re: [f2fs-dev] [PATCH] f2fs: fix data corruption issue with hardware encryption

2018-10-10 Thread Sahitya Tummala

On Wed, Oct 10, 2018 at 02:34:02PM -0700, Jaegeuk Kim wrote:
> On 10/10, Sahitya Tummala wrote:
> > Direct IO can be used in case of hardware encryption. The following
> > scenario results into data corruption issue in this path -
> > 
> > Thread A -  Thread B-
> > -> write file#1 in direct IO
> > -> GC gets kicked in
> > -> GC submitted bio on meta mapping
> >for file#1, but pending completion
> > -> write file#1 again with new data
> >in direct IO
> > -> GC bio gets completed now
> > -> GC writes old data to the new
> >location and thus file#1 is
> >corrupted.
> > 
> > Fix this by submitting and waiting for pending io on meta mapping
> > for direct IO case in f2fs_map_blocks().
> > 
> > Signed-off-by: Sahitya Tummala 
> > ---
> >  fs/f2fs/data.c | 12 
> >  1 file changed, 12 insertions(+)
> > 
> > diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> > index 9ef6f1f..7b2fef0 100644
> > --- a/fs/f2fs/data.c
> > +++ b/fs/f2fs/data.c
> > @@ -1028,6 +1028,12 @@ int f2fs_map_blocks(struct inode *inode, struct 
> > f2fs_map_blocks *map,
> > map->m_pblk = ei.blk + pgofs - ei.fofs;
> > map->m_len = min((pgoff_t)maxblocks, ei.fofs + ei.len - pgofs);
> > map->m_flags = F2FS_MAP_MAPPED;
> > +   /* for HW encryption, but to avoid potential issue in future */
> > +   if (flag == F2FS_GET_BLOCK_DIO) {
> > +   blkaddr = map->m_pblk;
> > +   for (; blkaddr < map->m_pblk + map->m_len; blkaddr++)
> > +   f2fs_wait_on_block_writeback(sbi, blkaddr);
> 
> Do we need this? IIRC, DIO would give create=1.

Yes, we need it. When we are overwriting an existing file, DIO calls
f2fs_map_blocks() with create=0. From the DIO code, I see that this happens
because blockdev_direct_IO() passes this dio flag DIO_SKIP_HOLES. And then
in get_more_blocks(), below code updates create=0, when we are overwriting
an existing file.

create = dio->op == REQ_OP_WRITE;
if (dio->flags & DIO_SKIP_HOLES) {
if (fs_startblk <= ((i_size_read(dio->inode) - 1) >>
i_blkbits))
create = 0;
}

ret = (*sdio->get_block)(dio->inode, fs_startblk,
map_bh, create);

> 
> > +   }
> > if (map->m_next_extent)
> > *map->m_next_extent = pgofs + map->m_len;
> > goto out;
> > @@ -1188,6 +1194,12 @@ int f2fs_map_blocks(struct inode *inode, struct 
> > f2fs_map_blocks *map,
> > goto next_dnode;
> >  
> >  sync_out:
> > +   /* for hardware encryption, but to avoid potential issue in future */
> > +   if (flag == F2FS_GET_BLOCK_DIO && map->m_flags & F2FS_MAP_MAPPED) {
> > +   blkaddr = map->m_pblk;
> > +   for (; blkaddr < map->m_pblk + map->m_len; blkaddr++)
> > +   f2fs_wait_on_block_writeback(sbi, blkaddr);
> > +   }
> > if (flag == F2FS_GET_BLOCK_PRECACHE) {
> > if (map->m_flags & F2FS_MAP_MAPPED) {
> > unsigned int ofs = start_pgofs - map->m_lblk;
> > -- 
> > Qualcomm India Private Limited, on behalf of Qualcomm Innovation Center, 
> > Inc.
> > Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux 
> > Foundation Collaborative Project.

-- 
--
Sent by a consultant of the Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum.


___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

Re: [f2fs-dev] [PATCH] f2fs: fix data corruption issue with hardware encryption

2018-10-10 Thread Sahitya Tummala

On Wed, Oct 10, 2018 at 08:05:44PM -0700, Jaegeuk Kim wrote:
> On 10/10, Jaegeuk Kim wrote:
> > On 10/11, Sahitya Tummala wrote:
> > > On Wed, Oct 10, 2018 at 02:34:02PM -0700, Jaegeuk Kim wrote:
> > > > On 10/10, Sahitya Tummala wrote:
> > > > > Direct IO can be used in case of hardware encryption. The following
> > > > > scenario results into data corruption issue in this path -
> > > > > 
> > > > > Thread A -  Thread B-
> > > > > -> write file#1 in direct IO
> > > > > -> GC gets kicked in
> > > > > -> GC submitted bio on meta 
> > > > > mapping
> > > > >  for file#1, but pending 
> > > > > completion
> > > > > -> write file#1 again with new data
> > > > >in direct IO
> > > > > -> GC bio gets completed now
> > > > > -> GC writes old data to the new
> > > > >        location and thus file#1 is
> > > > >  corrupted.
> > > > > 
> > > > > Fix this by submitting and waiting for pending io on meta mapping
> > > > > for direct IO case in f2fs_map_blocks().
> > > > > 
> > > > > Signed-off-by: Sahitya Tummala 
> > > > > ---
> > > > >  fs/f2fs/data.c | 12 
> > > > >  1 file changed, 12 insertions(+)
> > > > > 
> > > > > diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> > > > > index 9ef6f1f..7b2fef0 100644
> > > > > --- a/fs/f2fs/data.c
> > > > > +++ b/fs/f2fs/data.c
> > > > > @@ -1028,6 +1028,12 @@ int f2fs_map_blocks(struct inode *inode, 
> > > > > struct f2fs_map_blocks *map,
> > > > >   map->m_pblk = ei.blk + pgofs - ei.fofs;
> > > > >   map->m_len = min((pgoff_t)maxblocks, ei.fofs + ei.len - 
> > > > > pgofs);
> > > > >   map->m_flags = F2FS_MAP_MAPPED;
> > > > > + /* for HW encryption, but to avoid potential issue in 
> > > > > future */
> > > > > + if (flag == F2FS_GET_BLOCK_DIO) {
> > > > > + blkaddr = map->m_pblk;
> > > > > + for (; blkaddr < map->m_pblk + map->m_len; 
> > > > > blkaddr++)
> > > > > + f2fs_wait_on_block_writeback(sbi, 
> > > > > blkaddr);
> > > > 
> > > > Do we need this? IIRC, DIO would give create=1.
> > > 
> > > Yes, we need it. When we are overwriting an existing file, DIO calls
> > > f2fs_map_blocks() with create=0. From the DIO code, I see that this 
> > > happens
> > > because blockdev_direct_IO() passes this dio flag DIO_SKIP_HOLES. And then
> > > in get_more_blocks(), below code updates create=0, when we are overwriting
> > > an existing file.
> > > 
> > > create = dio->op == REQ_OP_WRITE;
> > > if (dio->flags & DIO_SKIP_HOLES) {
> > > if (fs_startblk <= ((i_size_read(dio->inode) - 1) 
> > > >>
> > > i_blkbits))
> > > create = 0;
> > > }
> > > 
> > > ret = (*sdio->get_block)(dio->inode, fs_startblk,
> > > map_bh, create);
> > > 
> > 
> > Got it.
> > How about this?
> > 
> 
> Sorry, this is v2.

Looks good to me. Thanks for updating it :)

> 
> From b78dd7b2e0317be18716b9496269e9792829f63e Mon Sep 17 00:00:00 2001
> From: Sahitya Tummala 
> Date: Wed, 10 Oct 2018 10:56:22 +0530
> Subject: [PATCH] f2fs: fix data corruption issue with hardware encryption
> 
> Direct IO can be used in case of hardware encryption. The following
> scenario results into data corruption issue in this path -
> 
> Thread A -  Thread B-
> -> write file#1 in direct IO
> -> GC gets kicked in
> -> GC submitted bio on meta mapping
>  for file#1, but pending

[f2fs-dev] [PATCH 2/2] f2fs: fix memory leak of quota files extent tree and it's nodes

2018-11-22 Thread Sahitya Tummala

When there is a failure in f2fs_fill_super() after the quota is
enabled, then f2fs_quota_off_umount() is called in the error handling.
Then sbi is freed up and f2fs_fill_super() retries again.
But f2fs_quota_off_umount() doesn't guarantee that quota file's extent
tree/nodes are removed/freed. It will just add to sbi->zombie_list,
if those files are referenced. In the next retry, quota is enabled
again with the new extent tree and nodes, causing memory leak for the
previously allocated memory.

Fix this by cleaning up the sbi->zombie_list before freeing sbi and
before the next retry.

Signed-off-by: Sahitya Tummala 
---
 fs/f2fs/extent_cache.c | 21 +
 fs/f2fs/f2fs.h |  1 +
 fs/f2fs/super.c|  5 +
 3 files changed, 27 insertions(+)

diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
index 763ba83..c2bcd88 100644
--- a/fs/f2fs/extent_cache.c
+++ b/fs/f2fs/extent_cache.c
@@ -629,6 +629,27 @@ static void f2fs_update_extent_tree_range(struct inode 
*inode,
f2fs_mark_inode_dirty_sync(inode, true);
 }
 
+void f2fs_cleanup_zombie_list(struct f2fs_sb_info *sbi)
+{
+   struct extent_tree *et, *next;
+
+   mutex_lock(&sbi->extent_tree_lock);
+   list_for_each_entry_safe(et, next, &sbi->zombie_list, list) {
+   if (atomic_read(&et->node_cnt)) {
+   write_lock(&et->lock);
+   __free_extent_tree(sbi, et);
+   write_unlock(&et->lock);
+   }
+   f2fs_bug_on(sbi, atomic_read(&et->node_cnt));
+   list_del_init(&et->list);
+   radix_tree_delete(&sbi->extent_tree_root, et->ino);
+   kmem_cache_free(extent_tree_slab, et);
+   atomic_dec(&sbi->total_ext_tree);
+   atomic_dec(&sbi->total_zombie_tree);
+   }
+   mutex_unlock(&sbi->extent_tree_lock);
+}
+
 unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
 {
struct extent_tree *et, *next;
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index db8a919..6807815 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -3419,6 +3419,7 @@ void f2fs_update_extent_cache_range(struct dnode_of_data 
*dn,
 void f2fs_init_extent_cache_info(struct f2fs_sb_info *sbi);
 int __init f2fs_create_extent_cache(void);
 void f2fs_destroy_extent_cache(void);
+void f2fs_cleanup_zombie_list(struct f2fs_sb_info *sbi);
 
 /*
  * sysfs.c
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index f41ac43..521fe3f 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -3023,6 +3023,11 @@ void f2fs_cleanup_extent_cache(struct f2fs_sb_info *sbi)
 
list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list)
f2fs_destroy_extent_tree(inode, true);
+
+   f2fs_cleanup_zombie_list(sbi);
+
+   f2fs_bug_on(sbi, !list_empty(&sbi->zombie_list));
+   f2fs_bug_on(sbi, !list_empty(&sbi->extent_list));
 }
 
 static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
-- 
Qualcomm India Private Limited, on behalf of Qualcomm Innovation Center, Inc.
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux 
Foundation Collaborative Project.



___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

[f2fs-dev] [PATCH 1/2] f2fs: fix sbi->extent_list corruption issue

2018-11-22 Thread Sahitya Tummala

When there is a failure in f2fs_fill_super() after/during
the recovery of fsync'd nodes, it frees the current sbi and
retries again. This time the mount is successful, but the files
that got recovered before retry, still holds the extent tree,
whose extent nodes list is corrupted since sbi and sbi->extent_list
is freed up. The list_del corruption issue is observed when the
file system is getting unmounted and when those recoverd files extent
node is being freed up in the below context.

list_del corruption. prev->next should be fff1e1ef5480, but was (null)
<...>
kernel BUG at kernel/msm-4.14/lib/list_debug.c:53!
task: fff1f46f2280 task.stack: ff8008068000
lr : __list_del_entry_valid+0x94/0xb4
pc : __list_del_entry_valid+0x94/0xb4
<...>
Call trace:
__list_del_entry_valid+0x94/0xb4
__release_extent_node+0xb0/0x114
__free_extent_tree+0x58/0x7c
f2fs_shrink_extent_tree+0xdc/0x3b0
f2fs_leave_shrinker+0x28/0x7c
f2fs_put_super+0xfc/0x1e0
generic_shutdown_super+0x70/0xf4
kill_block_super+0x2c/0x5c
kill_f2fs_super+0x44/0x50
deactivate_locked_super+0x60/0x8c
deactivate_super+0x68/0x74
cleanup_mnt+0x40/0x78
__cleanup_mnt+0x1c/0x28
task_work_run+0x48/0xd0
do_notify_resume+0x678/0xe98
work_pending+0x8/0x14

Fix this by cleaning up the extent tree of those recovered files
before freeing up sbi and before next retry.

Signed-off-by: Sahitya Tummala 
---
 fs/f2fs/extent_cache.c |  6 +-
 fs/f2fs/f2fs.h |  2 +-
 fs/f2fs/inode.c|  2 +-
 fs/f2fs/super.c| 10 ++
 4 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
index 1cb0fcc..763ba83 100644
--- a/fs/f2fs/extent_cache.c
+++ b/fs/f2fs/extent_cache.c
@@ -743,7 +743,7 @@ void f2fs_drop_extent_tree(struct inode *inode)
f2fs_mark_inode_dirty_sync(inode, true);
 }
 
-void f2fs_destroy_extent_tree(struct inode *inode)
+void f2fs_destroy_extent_tree(struct inode *inode, bool force)
 {
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct extent_tree *et = F2FS_I(inode)->extent_tree;
@@ -752,6 +752,9 @@ void f2fs_destroy_extent_tree(struct inode *inode)
if (!et)
return;
 
+   if (force)
+   goto destroy_et;
+
if (inode->i_nlink && !is_bad_inode(inode) &&
atomic_read(&et->node_cnt)) {
mutex_lock(&sbi->extent_tree_lock);
@@ -761,6 +764,7 @@ void f2fs_destroy_extent_tree(struct inode *inode)
return;
}
 
+destroy_et:
/* free all extent info belong to this extent tree */
node_cnt = f2fs_destroy_extent_node(inode);
 
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 1e03197..db8a919 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -3410,7 +3410,7 @@ bool f2fs_check_rb_tree_consistence(struct f2fs_sb_info 
*sbi,
 bool f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext);
 void f2fs_drop_extent_tree(struct inode *inode);
 unsigned int f2fs_destroy_extent_node(struct inode *inode);
-void f2fs_destroy_extent_tree(struct inode *inode);
+void f2fs_destroy_extent_tree(struct inode *inode, bool force);
 bool f2fs_lookup_extent_cache(struct inode *inode, pgoff_t pgofs,
struct extent_info *ei);
 void f2fs_update_extent_cache(struct dnode_of_data *dn);
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 91ceee0..39e3ade3 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -649,7 +649,7 @@ void f2fs_evict_inode(struct inode *inode)
f2fs_bug_on(sbi, get_dirty_pages(inode));
f2fs_remove_dirty_inode(inode);
 
-   f2fs_destroy_extent_tree(inode);
+   f2fs_destroy_extent_tree(inode, false);
 
if (inode->i_nlink || is_bad_inode(inode))
goto no_delete;
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index af58b2c..f41ac43 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -3016,6 +3016,15 @@ static void f2fs_tuning_parameters(struct f2fs_sb_info 
*sbi)
sbi->readdir_ra = 1;
 }
 
+void f2fs_cleanup_extent_cache(struct f2fs_sb_info *sbi)
+{
+   struct super_block *sb = sbi->sb;
+   struct inode *inode, *next;
+
+   list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list)
+   f2fs_destroy_extent_tree(inode, true);
+}
+
 static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
 {
struct f2fs_sb_info *sbi;
@@ -3402,6 +3411,7 @@ static int f2fs_fill_super(struct super_block *sb, void 
*data, int silent)
 * falls into an infinite loop in f2fs_sync_meta_pages().
 */
truncate_inode_pages_final(META_MAPPING(sbi));
+   f2fs_cleanup_extent_cache(sbi);
f2fs_unregister_sysfs(sbi);
 free_root_inode:
dput(sb->s_root);
-- 
Qualcomm India Private Limited, on behalf of Qualcomm Innovation Center, Inc.
Qualcomm Innovation Center, In

Re: [f2fs-dev] [PATCH 1/2] f2fs: fix sbi->extent_list corruption issue

2018-11-22 Thread Sahitya Tummala

On Thu, Nov 22, 2018 at 04:11:07AM -0800, Jaegeuk Kim wrote:
> On 11/22, Chao Yu wrote:
> > On 2018/11/22 18:59, Sahitya Tummala wrote:
> > > When there is a failure in f2fs_fill_super() after/during
> > > the recovery of fsync'd nodes, it frees the current sbi and
> > > retries again. This time the mount is successful, but the files
> > > that got recovered before retry, still holds the extent tree,
> > > whose extent nodes list is corrupted since sbi and sbi->extent_list
> > > is freed up. The list_del corruption issue is observed when the
> > > file system is getting unmounted and when those recoverd files extent
> > > node is being freed up in the below context.
> > > 
> > > list_del corruption. prev->next should be fff1e1ef5480, but was (null)
> > > <...>
> > > kernel BUG at kernel/msm-4.14/lib/list_debug.c:53!
> > > task: fff1f46f2280 task.stack: ff8008068000
> > > lr : __list_del_entry_valid+0x94/0xb4
> > > pc : __list_del_entry_valid+0x94/0xb4
> > > <...>
> > > Call trace:
> > > __list_del_entry_valid+0x94/0xb4
> > > __release_extent_node+0xb0/0x114
> > > __free_extent_tree+0x58/0x7c
> > > f2fs_shrink_extent_tree+0xdc/0x3b0
> > > f2fs_leave_shrinker+0x28/0x7c
> > > f2fs_put_super+0xfc/0x1e0
> > > generic_shutdown_super+0x70/0xf4
> > > kill_block_super+0x2c/0x5c
> > > kill_f2fs_super+0x44/0x50
> > > deactivate_locked_super+0x60/0x8c
> > > deactivate_super+0x68/0x74
> > > cleanup_mnt+0x40/0x78
> > > __cleanup_mnt+0x1c/0x28
> > > task_work_run+0x48/0xd0
> > > do_notify_resume+0x678/0xe98
> > > work_pending+0x8/0x14
> > > 
> > > Fix this by cleaning up the extent tree of those recovered files
> > > before freeing up sbi and before next retry.
> > 
> > Would it be more clear to call shrink_dcache_sb earlier to invalid all
> > inodes and call f2fs_shrink_extent_tree release cached entries and trees in
> > error path?
> 
> Agreed.
> 
I have tried doing shrink_dcache_sb() earlier but that doesn't call
f2fs_shrink_extent_tree(). So I have moved f2fs_join_shrinker() earlier and 
tried calling f2fs_leave_shrinker() in the error path. That helps to clean up
the cached extent nodes. However, I see that extent tree is left intact for
those recovered files, which should not be a problem as it gets freed as part
of next umount/rm. Only one small problem I see with this is - during rm/umount 
when
those previoulsy recovered files are being evicted, extent tree memory gets
free'd but the counter sbi->total_ext_tree gets invalid as these recovered
files are not present as part of current sbi->extent_tree_root. So i have come
up with this patch below to fix this. Let me know if this looks good?

diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
index 1cb0fcc..3e4801e 100644
--- a/fs/f2fs/extent_cache.c
+++ b/fs/f2fs/extent_cache.c
@@ -654,9 +654,9 @@ unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info 
*sbi, int nr_shrink)
}
f2fs_bug_on(sbi, atomic_read(&et->node_cnt));
list_del_init(&et->list);
-   radix_tree_delete(&sbi->extent_tree_root, et->ino);
+   if (radix_tree_delete(&sbi->extent_tree_root, et->ino))
+   atomic_dec(&sbi->total_ext_tree);
kmem_cache_free(extent_tree_slab, et);
-   atomic_dec(&sbi->total_ext_tree);
atomic_dec(&sbi->total_zombie_tree);
tree_cnt++;
 
@@ -767,7 +767,8 @@ void f2fs_destroy_extent_tree(struct inode *inode)
/* delete extent tree entry in radix tree */
mutex_lock(&sbi->extent_tree_lock);
f2fs_bug_on(sbi, atomic_read(&et->node_cnt));
-   radix_tree_delete(&sbi->extent_tree_root, inode->i_ino);
+   if (radix_tree_delete(&sbi->extent_tree_root, inode->i_ino))
+   atomic_dec(&sbi->total_ext_tree);
kmem_cache_free(extent_tree_slab, et);
atomic_dec(&sbi->total_ext_tree);
mutex_unlock(&sbi->extent_tree_lock);
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index af58b2c..3e5588f 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -3295,6 +3295,7 @@ static int f2fs_fill_super(struct super_block *sb, void 
*data, int silent)
if (err)
goto free_root_inode;
 
+   f2fs_join_shrinker(sbi);
 #ifdef CONFIG_QUOTA
/* Enable quota usage during mount */
if (f2fs_sb_has_quota_ino(sb) && !f2fs_readonly(sb)) {
@@ -3379,8 +3380,6 @@ static int f2fs_fill_super(struct super_block *sb, void 
*data, int silent

[f2fs-dev] [PATCH] f2fs: fix to allow node segment for GC by ioctl path

2018-11-22 Thread Sahitya Tummala

Allow node type segments also to be GC'd via f2fs ioctls
F2FS_IOC_GARBAGE_COLLECT and F2FS_IOC_GARBAGE_COLLECT_RANGE.

Signed-off-by: Sahitya Tummala 
---
 fs/f2fs/gc.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index a07241f..e4689c6 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -323,8 +323,7 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
p.min_cost = get_max_cost(sbi, &p);
 
if (*result != NULL_SEGNO) {
-   if (IS_DATASEG(get_seg_entry(sbi, *result)->type) &&
-   get_valid_blocks(sbi, *result, false) &&
+   if (get_valid_blocks(sbi, *result, false) &&
!sec_usage_check(sbi, GET_SEC_FROM_SEG(sbi, *result)))
p.min_segno = *result;
goto out;
@@ -404,11 +403,12 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
}
*result = (p.min_segno / p.ofs_unit) * p.ofs_unit;
 
+   }
+out:
+   if (p.min_segno != NULL_SEGNO)
trace_f2fs_get_victim(sbi->sb, type, gc_type, &p,
sbi->cur_victim_sec,
prefree_segments(sbi), free_segments(sbi));
-   }
-out:
mutex_unlock(&dirty_i->seglist_lock);
 
return (p.min_segno == NULL_SEGNO) ? 0 : 1;
-- 
Qualcomm India Private Limited, on behalf of Qualcomm Innovation Center, Inc.
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux 
Foundation Collaborative Project.



___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

Re: [f2fs-dev] [PATCH 1/2] f2fs: fix sbi->extent_list corruption issue

2018-11-23 Thread Sahitya Tummala

On Fri, Nov 23, 2018 at 05:52:16PM +0800, Chao Yu wrote:
> On 2018/11/23 11:42, Sahitya Tummala wrote:
> > On Thu, Nov 22, 2018 at 04:11:07AM -0800, Jaegeuk Kim wrote:
> >> On 11/22, Chao Yu wrote:
> >>> On 2018/11/22 18:59, Sahitya Tummala wrote:
> >>>> When there is a failure in f2fs_fill_super() after/during
> >>>> the recovery of fsync'd nodes, it frees the current sbi and
> >>>> retries again. This time the mount is successful, but the files
> >>>> that got recovered before retry, still holds the extent tree,
> >>>> whose extent nodes list is corrupted since sbi and sbi->extent_list
> >>>> is freed up. The list_del corruption issue is observed when the
> >>>> file system is getting unmounted and when those recoverd files extent
> >>>> node is being freed up in the below context.
> >>>>
> >>>> list_del corruption. prev->next should be fff1e1ef5480, but was 
> >>>> (null)
> >>>> <...>
> >>>> kernel BUG at kernel/msm-4.14/lib/list_debug.c:53!
> >>>> task: fff1f46f2280 task.stack: ff8008068000
> >>>> lr : __list_del_entry_valid+0x94/0xb4
> >>>> pc : __list_del_entry_valid+0x94/0xb4
> >>>> <...>
> >>>> Call trace:
> >>>> __list_del_entry_valid+0x94/0xb4
> >>>> __release_extent_node+0xb0/0x114
> >>>> __free_extent_tree+0x58/0x7c
> >>>> f2fs_shrink_extent_tree+0xdc/0x3b0
> >>>> f2fs_leave_shrinker+0x28/0x7c
> >>>> f2fs_put_super+0xfc/0x1e0
> >>>> generic_shutdown_super+0x70/0xf4
> >>>> kill_block_super+0x2c/0x5c
> >>>> kill_f2fs_super+0x44/0x50
> >>>> deactivate_locked_super+0x60/0x8c
> >>>> deactivate_super+0x68/0x74
> >>>> cleanup_mnt+0x40/0x78
> >>>> __cleanup_mnt+0x1c/0x28
> >>>> task_work_run+0x48/0xd0
> >>>> do_notify_resume+0x678/0xe98
> >>>> work_pending+0x8/0x14
> >>>>
> >>>> Fix this by cleaning up the extent tree of those recovered files
> >>>> before freeing up sbi and before next retry.
> >>>
> >>> Would it be more clear to call shrink_dcache_sb earlier to invalid all
> >>> inodes and call f2fs_shrink_extent_tree release cached entries and trees 
> >>> in
> >>> error path?
> >>
> >> Agreed.
> >>
> > I have tried doing shrink_dcache_sb() earlier but that doesn't call
> > f2fs_shrink_extent_tree(). So I have moved f2fs_join_shrinker() earlier and 
> > tried calling f2fs_leave_shrinker() in the error path. That helps to clean 
> > up
> > the cached extent nodes. However, I see that extent tree is left intact for
> 
> I didn't get it, you mean, in error path, after we call shrink_dcache_sb &
> f2fs_leave_shrinker, for those recovered files, their extent nodes were
> evicted, but their extent trees are still in cache?
> 

Yes, only extent tree is present with zero extent nodes as
f2fs_leave_shrinker() is only clearing the exntent nodes from
sbi->extent_list.

> > those recovered files, which should not be a problem as it gets freed as 
> > part
> > of next umount/rm. Only one small problem I see with this is - during 
> > rm/umount when
> > those previoulsy recovered files are being evicted, extent tree memory gets
> > free'd but the counter sbi->total_ext_tree gets invalid as these recovered
> > files are not present as part of current sbi->extent_tree_root. So i have 
> > come
> > up with this patch below to fix this. Let me know if this looks good?
> > 
> > diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
> > index 1cb0fcc..3e4801e 100644
> > --- a/fs/f2fs/extent_cache.c
> > +++ b/fs/f2fs/extent_cache.c
> > @@ -654,9 +654,9 @@ unsigned int f2fs_shrink_extent_tree(struct 
> > f2fs_sb_info *sbi, int nr_shrink)
> > }
> > f2fs_bug_on(sbi, atomic_read(&et->node_cnt));
> > list_del_init(&et->list);
> > -   radix_tree_delete(&sbi->extent_tree_root, et->ino);
> > +   if (radix_tree_delete(&sbi->extent_tree_root, et->ino))
> > +   atomic_dec(&sbi->total_ext_tree);
> > kmem_cache_free(extent_tree_slab, et);
> > -   atomic_dec(&sbi->total_ext_tree);
> > atomic_dec(&sbi->total_zombie_tree);
> > tr

Re: [f2fs-dev] [PATCH 1/2] f2fs: fix sbi->extent_list corruption issue

2018-11-25 Thread Sahitya Tummala

On Sat, Nov 24, 2018 at 05:36:53PM +0800, Chao Yu wrote:
> On 2018/11/23 18:19, Sahitya Tummala wrote:
> > On Fri, Nov 23, 2018 at 05:52:16PM +0800, Chao Yu wrote:
> >> On 2018/11/23 11:42, Sahitya Tummala wrote:
> >>> On Thu, Nov 22, 2018 at 04:11:07AM -0800, Jaegeuk Kim wrote:
> >>>> On 11/22, Chao Yu wrote:
> >>>>> On 2018/11/22 18:59, Sahitya Tummala wrote:
> >>>>>> When there is a failure in f2fs_fill_super() after/during
> >>>>>> the recovery of fsync'd nodes, it frees the current sbi and
> >>>>>> retries again. This time the mount is successful, but the files
> >>>>>> that got recovered before retry, still holds the extent tree,
> >>>>>> whose extent nodes list is corrupted since sbi and sbi->extent_list
> >>>>>> is freed up. The list_del corruption issue is observed when the
> >>>>>> file system is getting unmounted and when those recoverd files extent
> >>>>>> node is being freed up in the below context.
> >>>>>>
> >>>>>> list_del corruption. prev->next should be fff1e1ef5480, but was 
> >>>>>> (null)
> >>>>>> <...>
> >>>>>> kernel BUG at kernel/msm-4.14/lib/list_debug.c:53!
> >>>>>> task: fff1f46f2280 task.stack: ff8008068000
> >>>>>> lr : __list_del_entry_valid+0x94/0xb4
> >>>>>> pc : __list_del_entry_valid+0x94/0xb4
> >>>>>> <...>
> >>>>>> Call trace:
> >>>>>> __list_del_entry_valid+0x94/0xb4
> >>>>>> __release_extent_node+0xb0/0x114
> >>>>>> __free_extent_tree+0x58/0x7c
> >>>>>> f2fs_shrink_extent_tree+0xdc/0x3b0
> >>>>>> f2fs_leave_shrinker+0x28/0x7c
> >>>>>> f2fs_put_super+0xfc/0x1e0
> >>>>>> generic_shutdown_super+0x70/0xf4
> >>>>>> kill_block_super+0x2c/0x5c
> >>>>>> kill_f2fs_super+0x44/0x50
> >>>>>> deactivate_locked_super+0x60/0x8c
> >>>>>> deactivate_super+0x68/0x74
> >>>>>> cleanup_mnt+0x40/0x78
> >>>>>> __cleanup_mnt+0x1c/0x28
> >>>>>> task_work_run+0x48/0xd0
> >>>>>> do_notify_resume+0x678/0xe98
> >>>>>> work_pending+0x8/0x14
> >>>>>>
> >>>>>> Fix this by cleaning up the extent tree of those recovered files
> >>>>>> before freeing up sbi and before next retry.
> >>>>>
> >>>>> Would it be more clear to call shrink_dcache_sb earlier to invalid all
> >>>>> inodes and call f2fs_shrink_extent_tree release cached entries and 
> >>>>> trees in
> >>>>> error path?
> >>>>
> >>>> Agreed.
> >>>>
> >>> I have tried doing shrink_dcache_sb() earlier but that doesn't call
> >>> f2fs_shrink_extent_tree(). So I have moved f2fs_join_shrinker() earlier 
> >>> and 
> >>> tried calling f2fs_leave_shrinker() in the error path. That helps to 
> >>> clean up
> >>> the cached extent nodes. However, I see that extent tree is left intact 
> >>> for
> >>
> >> I didn't get it, you mean, in error path, after we call shrink_dcache_sb &
> >> f2fs_leave_shrinker, for those recovered files, their extent nodes were
> >> evicted, but their extent trees are still in cache?
> >>
> > 
> > Yes, only extent tree is present with zero extent nodes as
> > f2fs_leave_shrinker() is only clearing the exntent nodes from
> > sbi->extent_list.
> 
> Oh, recovered inodes are in cache due to we didn't call evict_inodes, so
> they are still referenced to extent tree...
> 
> How about calling evict_inodes after shrink_dcache_sb?
> 

Thanks Chao for your suggestion. Yes, calling evict_inodes() is working fine.
But before doing evict_inodes(), we should also do sync_filesystem() otherwise
we hit this f2fs_bug_on in f2fs_evict_inode():
if (likely(!is_set_ckpt_flags(sbi, CP_ERROR_FLAG)))
f2fs_bug_on(sbi, is_inode_flag_set(inode,
FI_DIRTY_INODE));

I will post the updated patchset for review.

Thanks.

> Thanks,
> 
> > 
> >>> those recovered files, which should not be a problem as it gets freed as 
> >>> part
> >>> of next umount/rm. Only o

Re: [f2fs-dev] [PATCH] f2fs: fix to allow node segment for GC by ioctl path

2018-11-25 Thread Sahitya Tummala

On Sat, Nov 24, 2018 at 06:23:55PM +0800, Chao Yu wrote:
> On 2018/11/23 13:12, Sahitya Tummala wrote:
> > Allow node type segments also to be GC'd via f2fs ioctls
> > F2FS_IOC_GARBAGE_COLLECT and F2FS_IOC_GARBAGE_COLLECT_RANGE.
> 
> IIRC, only F2FS_IOC_GARBAGE_COLLECT_RANGE pass a valid segment number via
> @result parameter in get_victim_by_default(), so in commit message, it
> needs to remove F2FS_IOC_GARBAGE_COLLECT, right?
> 

Yes Chao, my mistake. Thanks for pointing it out. I will update it
and send the patch for review.

> Thanks,
> 
> > 
> > Signed-off-by: Sahitya Tummala 
> > ---
> >  fs/f2fs/gc.c | 8 
> >  1 file changed, 4 insertions(+), 4 deletions(-)
> > 
> > diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
> > index a07241f..e4689c6 100644
> > --- a/fs/f2fs/gc.c
> > +++ b/fs/f2fs/gc.c
> > @@ -323,8 +323,7 @@ static int get_victim_by_default(struct f2fs_sb_info 
> > *sbi,
> > p.min_cost = get_max_cost(sbi, &p);
> >  
> > if (*result != NULL_SEGNO) {
> > -   if (IS_DATASEG(get_seg_entry(sbi, *result)->type) &&
> > -   get_valid_blocks(sbi, *result, false) &&
> > +   if (get_valid_blocks(sbi, *result, false) &&
> > !sec_usage_check(sbi, GET_SEC_FROM_SEG(sbi, *result)))
> > p.min_segno = *result;
> > goto out;
> > @@ -404,11 +403,12 @@ static int get_victim_by_default(struct f2fs_sb_info 
> > *sbi,
> > }
> > *result = (p.min_segno / p.ofs_unit) * p.ofs_unit;
> >  
> > +   }
> > +out:
> > +   if (p.min_segno != NULL_SEGNO)
> > trace_f2fs_get_victim(sbi->sb, type, gc_type, &p,
> > sbi->cur_victim_sec,
> > prefree_segments(sbi), free_segments(sbi));
> > -   }
> > -out:
> > mutex_unlock(&dirty_i->seglist_lock);
> >  
> > return (p.min_segno == NULL_SEGNO) ? 0 : 1;
> > 
> 

-- 
--
Sent by a consultant of the Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum.


___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

[f2fs-dev] [PATCH v2] f2fs: fix sbi->extent_list corruption issue

2018-11-25 Thread Sahitya Tummala

When there is a failure in f2fs_fill_super() after/during
the recovery of fsync'd nodes, it frees the current sbi and
retries again. This time the mount is successful, but the files
that got recovered before retry, still holds the extent tree,
whose extent nodes list is corrupted since sbi and sbi->extent_list
is freed up. The list_del corruption issue is observed when the
file system is getting unmounted and when those recoverd files extent
node is being freed up in the below context.

list_del corruption. prev->next should be fff1e1ef5480, but was (null)
<...>
kernel BUG at kernel/msm-4.14/lib/list_debug.c:53!
task: fff1f46f2280 task.stack: ff8008068000
lr : __list_del_entry_valid+0x94/0xb4
pc : __list_del_entry_valid+0x94/0xb4
<...>
Call trace:
__list_del_entry_valid+0x94/0xb4
__release_extent_node+0xb0/0x114
__free_extent_tree+0x58/0x7c
f2fs_shrink_extent_tree+0xdc/0x3b0
f2fs_leave_shrinker+0x28/0x7c
f2fs_put_super+0xfc/0x1e0
generic_shutdown_super+0x70/0xf4
kill_block_super+0x2c/0x5c
kill_f2fs_super+0x44/0x50
deactivate_locked_super+0x60/0x8c
deactivate_super+0x68/0x74
cleanup_mnt+0x40/0x78
__cleanup_mnt+0x1c/0x28
task_work_run+0x48/0xd0
do_notify_resume+0x678/0xe98
work_pending+0x8/0x14

Fix this by cleaning up inodes, extent tree and nodes of those
recovered files before freeing up sbi and before next retry.

Signed-off-by: Sahitya Tummala 
---
v2:
-call evict_inodes() and f2fs_shrink_extent_tree() to cleanup inodes

 fs/f2fs/f2fs.h |  1 +
 fs/f2fs/shrinker.c |  2 +-
 fs/f2fs/super.c| 13 -
 3 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 1e03197..aaee63b 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -3407,6 +3407,7 @@ struct rb_entry *f2fs_lookup_rb_tree_ret(struct 
rb_root_cached *root,
 bool f2fs_check_rb_tree_consistence(struct f2fs_sb_info *sbi,
struct rb_root_cached *root);
 unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink);
+unsigned long __count_extent_cache(struct f2fs_sb_info *sbi);
 bool f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext);
 void f2fs_drop_extent_tree(struct inode *inode);
 unsigned int f2fs_destroy_extent_node(struct inode *inode);
diff --git a/fs/f2fs/shrinker.c b/fs/f2fs/shrinker.c
index 9e13db9..7e3c13b 100644
--- a/fs/f2fs/shrinker.c
+++ b/fs/f2fs/shrinker.c
@@ -30,7 +30,7 @@ static unsigned long __count_free_nids(struct f2fs_sb_info 
*sbi)
return count > 0 ? count : 0;
 }
 
-static unsigned long __count_extent_cache(struct f2fs_sb_info *sbi)
+unsigned long __count_extent_cache(struct f2fs_sb_info *sbi)
 {
return atomic_read(&sbi->total_zombie_tree) +
atomic_read(&sbi->total_ext_node);
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index af58b2c..769e7b1 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -3016,6 +3016,16 @@ static void f2fs_tuning_parameters(struct f2fs_sb_info 
*sbi)
sbi->readdir_ra = 1;
 }
 
+static void f2fs_cleanup_inodes(struct f2fs_sb_info *sbi)
+{
+   struct super_block *sb = sbi->sb;
+
+   sync_filesystem(sb);
+   shrink_dcache_sb(sb);
+   evict_inodes(sb);
+   f2fs_shrink_extent_tree(sbi, __count_extent_cache(sbi));
+}
+
 static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
 {
struct f2fs_sb_info *sbi;
@@ -3402,6 +3412,8 @@ static int f2fs_fill_super(struct super_block *sb, void 
*data, int silent)
 * falls into an infinite loop in f2fs_sync_meta_pages().
 */
truncate_inode_pages_final(META_MAPPING(sbi));
+   /* cleanup recovery and quota inodes */
+   f2fs_cleanup_inodes(sbi);
f2fs_unregister_sysfs(sbi);
 free_root_inode:
dput(sb->s_root);
@@ -3445,7 +3457,6 @@ static int f2fs_fill_super(struct super_block *sb, void 
*data, int silent)
/* give only one another chance */
if (retry) {
retry = false;
-   shrink_dcache_sb(sb);
goto try_onemore;
}
return err;
-- 
Qualcomm India Private Limited, on behalf of Qualcomm Innovation Center, Inc.
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux 
Foundation Collaborative Project.



___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

[f2fs-dev] [PATCH v2] f2fs: fix to allow node segment for GC by ioctl path

2018-11-25 Thread Sahitya Tummala

Allow node type segments also to be GC'd via f2fs ioctl
F2FS_IOC_GARBAGE_COLLECT_RANGE.

Signed-off-by: Sahitya Tummala 
---
v2:
update commit message

 fs/f2fs/gc.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index a07241f..e4689c6 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -323,8 +323,7 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
p.min_cost = get_max_cost(sbi, &p);
 
if (*result != NULL_SEGNO) {
-   if (IS_DATASEG(get_seg_entry(sbi, *result)->type) &&
-   get_valid_blocks(sbi, *result, false) &&
+   if (get_valid_blocks(sbi, *result, false) &&
!sec_usage_check(sbi, GET_SEC_FROM_SEG(sbi, *result)))
p.min_segno = *result;
goto out;
@@ -404,11 +403,12 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
}
*result = (p.min_segno / p.ofs_unit) * p.ofs_unit;
 
+   }
+out:
+   if (p.min_segno != NULL_SEGNO)
trace_f2fs_get_victim(sbi->sb, type, gc_type, &p,
sbi->cur_victim_sec,
prefree_segments(sbi), free_segments(sbi));
-   }
-out:
mutex_unlock(&dirty_i->seglist_lock);
 
return (p.min_segno == NULL_SEGNO) ? 0 : 1;
-- 
Qualcomm India Private Limited, on behalf of Qualcomm Innovation Center, Inc.
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux 
Foundation Collaborative Project.



___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

[f2fs-dev] [PATCH v3 1/2] f2fs: fix to allow node segment for GC by ioctl path

2018-11-26 Thread Sahitya Tummala

Allow node type segments also to be GC'd via f2fs ioctl
F2FS_IOC_GARBAGE_COLLECT_RANGE.

Signed-off-by: Sahitya Tummala 
---
v3:
seperate the trace print change from this patch

 fs/f2fs/gc.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index a07241f..d720551 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -323,8 +323,7 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
p.min_cost = get_max_cost(sbi, &p);
 
if (*result != NULL_SEGNO) {
-   if (IS_DATASEG(get_seg_entry(sbi, *result)->type) &&
-   get_valid_blocks(sbi, *result, false) &&
+   if (get_valid_blocks(sbi, *result, false) &&
!sec_usage_check(sbi, GET_SEC_FROM_SEG(sbi, *result)))
p.min_segno = *result;
goto out;
-- 
Qualcomm India Private Limited, on behalf of Qualcomm Innovation Center, Inc.
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux 
Foundation Collaborative Project.



___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

[f2fs-dev] [PATCH v3 2/2] f2fs: adjust trace print in f2fs_get_victim() to cover all paths

2018-11-26 Thread Sahitya Tummala

Adjust the trace print in f2fs_get_victim() to cover GC done by
F2FS_IOC_GARBAGE_COLLECT_RANGE.

Signed-off-by: Sahitya Tummala 
---
 fs/f2fs/gc.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index d720551..e4689c6 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -403,11 +403,12 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
}
*result = (p.min_segno / p.ofs_unit) * p.ofs_unit;
 
+   }
+out:
+   if (p.min_segno != NULL_SEGNO)
trace_f2fs_get_victim(sbi->sb, type, gc_type, &p,
sbi->cur_victim_sec,
prefree_segments(sbi), free_segments(sbi));
-   }
-out:
mutex_unlock(&dirty_i->seglist_lock);
 
return (p.min_segno == NULL_SEGNO) ? 0 : 1;
-- 
Qualcomm India Private Limited, on behalf of Qualcomm Innovation Center, Inc.
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux 
Foundation Collaborative Project.



___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

Re: [f2fs-dev] [PATCH v2] f2fs: fix sbi->extent_list corruption issue

2018-11-28 Thread Sahitya Tummala



On Tue, Nov 27, 2018 at 09:42:39AM +0800, Chao Yu wrote:
> On 2018/11/27 8:30, Jaegeuk Kim wrote:
> > On 11/26, Sahitya Tummala wrote:
> >> When there is a failure in f2fs_fill_super() after/during
> >> the recovery of fsync'd nodes, it frees the current sbi and
> >> retries again. This time the mount is successful, but the files
> >> that got recovered before retry, still holds the extent tree,
> >> whose extent nodes list is corrupted since sbi and sbi->extent_list
> >> is freed up. The list_del corruption issue is observed when the
> >> file system is getting unmounted and when those recoverd files extent
> >> node is being freed up in the below context.
> >>
> >> list_del corruption. prev->next should be fff1e1ef5480, but was (null)
> >> <...>
> >> kernel BUG at kernel/msm-4.14/lib/list_debug.c:53!
> >> task: fff1f46f2280 task.stack: ff8008068000
> >> lr : __list_del_entry_valid+0x94/0xb4
> >> pc : __list_del_entry_valid+0x94/0xb4
> >> <...>
> >> Call trace:
> >> __list_del_entry_valid+0x94/0xb4
> >> __release_extent_node+0xb0/0x114
> >> __free_extent_tree+0x58/0x7c
> >> f2fs_shrink_extent_tree+0xdc/0x3b0
> >> f2fs_leave_shrinker+0x28/0x7c
> >> f2fs_put_super+0xfc/0x1e0
> >> generic_shutdown_super+0x70/0xf4
> >> kill_block_super+0x2c/0x5c
> >> kill_f2fs_super+0x44/0x50
> >> deactivate_locked_super+0x60/0x8c
> >> deactivate_super+0x68/0x74
> >> cleanup_mnt+0x40/0x78
> >> __cleanup_mnt+0x1c/0x28
> >> task_work_run+0x48/0xd0
> >> do_notify_resume+0x678/0xe98
> >> work_pending+0x8/0x14
> >>
> >> Fix this by cleaning up inodes, extent tree and nodes of those
> >> recovered files before freeing up sbi and before next retry.
> >>
> >> Signed-off-by: Sahitya Tummala 
> >> ---
> >> v2:
> >> -call evict_inodes() and f2fs_shrink_extent_tree() to cleanup inodes
> >>
> >>  fs/f2fs/f2fs.h |  1 +
> >>  fs/f2fs/shrinker.c |  2 +-
> >>  fs/f2fs/super.c| 13 -
> >>  3 files changed, 14 insertions(+), 2 deletions(-)
> >>
> >> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> >> index 1e03197..aaee63b 100644
> >> --- a/fs/f2fs/f2fs.h
> >> +++ b/fs/f2fs/f2fs.h
> >> @@ -3407,6 +3407,7 @@ struct rb_entry *f2fs_lookup_rb_tree_ret(struct 
> >> rb_root_cached *root,
> >>  bool f2fs_check_rb_tree_consistence(struct f2fs_sb_info *sbi,
> >>struct rb_root_cached *root);
> >>  unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int 
> >> nr_shrink);
> >> +unsigned long __count_extent_cache(struct f2fs_sb_info *sbi);
> >>  bool f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent 
> >> *i_ext);
> >>  void f2fs_drop_extent_tree(struct inode *inode);
> >>  unsigned int f2fs_destroy_extent_node(struct inode *inode);
> >> diff --git a/fs/f2fs/shrinker.c b/fs/f2fs/shrinker.c
> >> index 9e13db9..7e3c13b 100644
> >> --- a/fs/f2fs/shrinker.c
> >> +++ b/fs/f2fs/shrinker.c
> >> @@ -30,7 +30,7 @@ static unsigned long __count_free_nids(struct 
> >> f2fs_sb_info *sbi)
> >>return count > 0 ? count : 0;
> >>  }
> >>  
> >> -static unsigned long __count_extent_cache(struct f2fs_sb_info *sbi)
> >> +unsigned long __count_extent_cache(struct f2fs_sb_info *sbi)
> >>  {
> >>return atomic_read(&sbi->total_zombie_tree) +
> >>atomic_read(&sbi->total_ext_node);
> >> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> >> index af58b2c..769e7b1 100644
> >> --- a/fs/f2fs/super.c
> >> +++ b/fs/f2fs/super.c
> >> @@ -3016,6 +3016,16 @@ static void f2fs_tuning_parameters(struct 
> >> f2fs_sb_info *sbi)
> >>sbi->readdir_ra = 1;
> >>  }
> >>  
> >> +static void f2fs_cleanup_inodes(struct f2fs_sb_info *sbi)
> >> +{
> >> +  struct super_block *sb = sbi->sb;
> >> +
> >> +  sync_filesystem(sb);
> > 
> > This writes another checkpoint, which would not be what this retrial 
> > intended.
> 
> Actually, checkpoint will not be triggered due to SBI_POR_DOING flag check
> as below:
> 
> int f2fs_sync_fs(struct super_block *sb, int sync)
> {
> ...
>   if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
>   return -EAGAIN;
> ...
> }

Re: [f2fs-dev] [PATCH v2] f2fs: fix sbi->extent_list corruption issue

2018-12-11 Thread Sahitya Tummala

On Fri, Dec 07, 2018 at 05:47:31PM +0800, Chao Yu wrote:
> On 2018/12/1 4:33, Jaegeuk Kim wrote:
> > On 11/29, Sahitya Tummala wrote:
> >>
> >> On Tue, Nov 27, 2018 at 09:42:39AM +0800, Chao Yu wrote:
> >>> On 2018/11/27 8:30, Jaegeuk Kim wrote:
> >>>> On 11/26, Sahitya Tummala wrote:
> >>>>> When there is a failure in f2fs_fill_super() after/during
> >>>>> the recovery of fsync'd nodes, it frees the current sbi and
> >>>>> retries again. This time the mount is successful, but the files
> >>>>> that got recovered before retry, still holds the extent tree,
> >>>>> whose extent nodes list is corrupted since sbi and sbi->extent_list
> >>>>> is freed up. The list_del corruption issue is observed when the
> >>>>> file system is getting unmounted and when those recoverd files extent
> >>>>> node is being freed up in the below context.
> >>>>>
> >>>>> list_del corruption. prev->next should be fff1e1ef5480, but was 
> >>>>> (null)
> >>>>> <...>
> >>>>> kernel BUG at kernel/msm-4.14/lib/list_debug.c:53!
> >>>>> task: fff1f46f2280 task.stack: ff8008068000
> >>>>> lr : __list_del_entry_valid+0x94/0xb4
> >>>>> pc : __list_del_entry_valid+0x94/0xb4
> >>>>> <...>
> >>>>> Call trace:
> >>>>> __list_del_entry_valid+0x94/0xb4
> >>>>> __release_extent_node+0xb0/0x114
> >>>>> __free_extent_tree+0x58/0x7c
> >>>>> f2fs_shrink_extent_tree+0xdc/0x3b0
> >>>>> f2fs_leave_shrinker+0x28/0x7c
> >>>>> f2fs_put_super+0xfc/0x1e0
> >>>>> generic_shutdown_super+0x70/0xf4
> >>>>> kill_block_super+0x2c/0x5c
> >>>>> kill_f2fs_super+0x44/0x50
> >>>>> deactivate_locked_super+0x60/0x8c
> >>>>> deactivate_super+0x68/0x74
> >>>>> cleanup_mnt+0x40/0x78
> >>>>> __cleanup_mnt+0x1c/0x28
> >>>>> task_work_run+0x48/0xd0
> >>>>> do_notify_resume+0x678/0xe98
> >>>>> work_pending+0x8/0x14
> >>>>>
> >>>>> Fix this by cleaning up inodes, extent tree and nodes of those
> >>>>> recovered files before freeing up sbi and before next retry.
> >>>>>
> >>>>> Signed-off-by: Sahitya Tummala 
> >>>>> ---
> >>>>> v2:
> >>>>> -call evict_inodes() and f2fs_shrink_extent_tree() to cleanup inodes
> >>>>>
> >>>>>  fs/f2fs/f2fs.h |  1 +
> >>>>>  fs/f2fs/shrinker.c |  2 +-
> >>>>>  fs/f2fs/super.c| 13 -
> >>>>>  3 files changed, 14 insertions(+), 2 deletions(-)
> >>>>>
> >>>>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> >>>>> index 1e03197..aaee63b 100644
> >>>>> --- a/fs/f2fs/f2fs.h
> >>>>> +++ b/fs/f2fs/f2fs.h
> >>>>> @@ -3407,6 +3407,7 @@ struct rb_entry *f2fs_lookup_rb_tree_ret(struct 
> >>>>> rb_root_cached *root,
> >>>>>  bool f2fs_check_rb_tree_consistence(struct f2fs_sb_info *sbi,
> >>>>> struct rb_root_cached 
> >>>>> *root);
> >>>>>  unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int 
> >>>>> nr_shrink);
> >>>>> +unsigned long __count_extent_cache(struct f2fs_sb_info *sbi);
> >>>>>  bool f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent 
> >>>>> *i_ext);
> >>>>>  void f2fs_drop_extent_tree(struct inode *inode);
> >>>>>  unsigned int f2fs_destroy_extent_node(struct inode *inode);
> >>>>> diff --git a/fs/f2fs/shrinker.c b/fs/f2fs/shrinker.c
> >>>>> index 9e13db9..7e3c13b 100644
> >>>>> --- a/fs/f2fs/shrinker.c
> >>>>> +++ b/fs/f2fs/shrinker.c
> >>>>> @@ -30,7 +30,7 @@ static unsigned long __count_free_nids(struct 
> >>>>> f2fs_sb_info *sbi)
> >>>>> return count > 0 ? count : 0;
> >>>>>  }
> >>>>>  
> >>>>> -static unsigned long __count_extent_cache(struct f2fs_sb_info *sbi)
> >>>>> +unsigned long __count_extent_cache(struct f2fs_sb_info *sbi)

Re: [f2fs-dev] [PATCH v2] f2fs: fix sbi->extent_list corruption issue

2018-12-13 Thread Sahitya Tummala

On Wed, Dec 12, 2018 at 11:36:08AM +0800, Chao Yu wrote:
> On 2018/12/12 11:17, Sahitya Tummala wrote:
> > On Fri, Dec 07, 2018 at 05:47:31PM +0800, Chao Yu wrote:
> >> On 2018/12/1 4:33, Jaegeuk Kim wrote:
> >>> On 11/29, Sahitya Tummala wrote:
> >>>>
> >>>> On Tue, Nov 27, 2018 at 09:42:39AM +0800, Chao Yu wrote:
> >>>>> On 2018/11/27 8:30, Jaegeuk Kim wrote:
> >>>>>> On 11/26, Sahitya Tummala wrote:
> >>>>>>> When there is a failure in f2fs_fill_super() after/during
> >>>>>>> the recovery of fsync'd nodes, it frees the current sbi and
> >>>>>>> retries again. This time the mount is successful, but the files
> >>>>>>> that got recovered before retry, still holds the extent tree,
> >>>>>>> whose extent nodes list is corrupted since sbi and sbi->extent_list
> >>>>>>> is freed up. The list_del corruption issue is observed when the
> >>>>>>> file system is getting unmounted and when those recoverd files extent
> >>>>>>> node is being freed up in the below context.
> >>>>>>>
> >>>>>>> list_del corruption. prev->next should be fff1e1ef5480, but was 
> >>>>>>> (null)
> >>>>>>> <...>
> >>>>>>> kernel BUG at kernel/msm-4.14/lib/list_debug.c:53!
> >>>>>>> task: fff1f46f2280 task.stack: ff8008068000
> >>>>>>> lr : __list_del_entry_valid+0x94/0xb4
> >>>>>>> pc : __list_del_entry_valid+0x94/0xb4
> >>>>>>> <...>
> >>>>>>> Call trace:
> >>>>>>> __list_del_entry_valid+0x94/0xb4
> >>>>>>> __release_extent_node+0xb0/0x114
> >>>>>>> __free_extent_tree+0x58/0x7c
> >>>>>>> f2fs_shrink_extent_tree+0xdc/0x3b0
> >>>>>>> f2fs_leave_shrinker+0x28/0x7c
> >>>>>>> f2fs_put_super+0xfc/0x1e0
> >>>>>>> generic_shutdown_super+0x70/0xf4
> >>>>>>> kill_block_super+0x2c/0x5c
> >>>>>>> kill_f2fs_super+0x44/0x50
> >>>>>>> deactivate_locked_super+0x60/0x8c
> >>>>>>> deactivate_super+0x68/0x74
> >>>>>>> cleanup_mnt+0x40/0x78
> >>>>>>> __cleanup_mnt+0x1c/0x28
> >>>>>>> task_work_run+0x48/0xd0
> >>>>>>> do_notify_resume+0x678/0xe98
> >>>>>>> work_pending+0x8/0x14
> >>>>>>>
> >>>>>>> Fix this by cleaning up inodes, extent tree and nodes of those
> >>>>>>> recovered files before freeing up sbi and before next retry.
> >>>>>>>
> >>>>>>> Signed-off-by: Sahitya Tummala 
> >>>>>>> ---
> >>>>>>> v2:
> >>>>>>> -call evict_inodes() and f2fs_shrink_extent_tree() to cleanup inodes
> >>>>>>>
> >>>>>>>  fs/f2fs/f2fs.h |  1 +
> >>>>>>>  fs/f2fs/shrinker.c |  2 +-
> >>>>>>>  fs/f2fs/super.c| 13 -
> >>>>>>>  3 files changed, 14 insertions(+), 2 deletions(-)
> >>>>>>>
> >>>>>>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> >>>>>>> index 1e03197..aaee63b 100644
> >>>>>>> --- a/fs/f2fs/f2fs.h
> >>>>>>> +++ b/fs/f2fs/f2fs.h
> >>>>>>> @@ -3407,6 +3407,7 @@ struct rb_entry *f2fs_lookup_rb_tree_ret(struct 
> >>>>>>> rb_root_cached *root,
> >>>>>>>  bool f2fs_check_rb_tree_consistence(struct f2fs_sb_info *sbi,
> >>>>>>>   struct rb_root_cached 
> >>>>>>> *root);
> >>>>>>>  unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int 
> >>>>>>> nr_shrink);
> >>>>>>> +unsigned long __count_extent_cache(struct f2fs_sb_info *sbi);
> >>>>>>>  bool f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent 
> >>>>>>> *i_ext);
> >>>>>>>  void f2fs_drop_extent_tree(struct inode *inode);
> >>>>>>>  unsigned int f2fs_destroy_extent_node(struct inode *inode);
> &

[f2fs-dev] [PATCH v3] f2fs: fix sbi->extent_list corruption issue

2018-12-18 Thread Sahitya Tummala

When there is a failure in f2fs_fill_super() after/during
the recovery of fsync'd nodes, it frees the current sbi and
retries again. This time the mount is successful, but the files
that got recovered before retry, still holds the extent tree,
whose extent nodes list is corrupted since sbi and sbi->extent_list
is freed up. The list_del corruption issue is observed when the
file system is getting unmounted and when those recoverd files extent
node is being freed up in the below context.

list_del corruption. prev->next should be fff1e1ef5480, but was (null)
<...>
kernel BUG at kernel/msm-4.14/lib/list_debug.c:53!
lr : __list_del_entry_valid+0x94/0xb4
pc : __list_del_entry_valid+0x94/0xb4
<...>
Call trace:
__list_del_entry_valid+0x94/0xb4
__release_extent_node+0xb0/0x114
__free_extent_tree+0x58/0x7c
f2fs_shrink_extent_tree+0xdc/0x3b0
f2fs_leave_shrinker+0x28/0x7c
f2fs_put_super+0xfc/0x1e0
generic_shutdown_super+0x70/0xf4
kill_block_super+0x2c/0x5c
kill_f2fs_super+0x44/0x50
deactivate_locked_super+0x60/0x8c
deactivate_super+0x68/0x74
cleanup_mnt+0x40/0x78
__cleanup_mnt+0x1c/0x28
task_work_run+0x48/0xd0
do_notify_resume+0x678/0xe98
work_pending+0x8/0x14

Fix this by not creating extents for those recovered files if shrinker is
not registered yet. Once mount is successful and shrinker is registered,
those files can have extents again.

Signed-off-by: Sahitya Tummala 
---
v3:
-do not create extents itself in the first place for those recovered files,
instead of cleaning it up later via sync/evict_inodes.

 fs/f2fs/f2fs.h | 11 ++-
 fs/f2fs/shrinker.c |  2 +-
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 7cec897..1380f07 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -2695,10 +2695,19 @@ static inline bool is_dot_dotdot(const struct qstr *str)
 
 static inline bool f2fs_may_extent_tree(struct inode *inode)
 {
-   if (!test_opt(F2FS_I_SB(inode), EXTENT_CACHE) ||
+   struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+
+   if (!test_opt(sbi, EXTENT_CACHE) ||
is_inode_flag_set(inode, FI_NO_EXTENT))
return false;
 
+   /*
+* for recovered files during mount do not create extents
+* if shrinker is not registered.
+*/
+   if (list_empty(&sbi->s_list))
+   return false;
+
return S_ISREG(inode->i_mode);
 }
 
diff --git a/fs/f2fs/shrinker.c b/fs/f2fs/shrinker.c
index 9e13db9..a467aca 100644
--- a/fs/f2fs/shrinker.c
+++ b/fs/f2fs/shrinker.c
@@ -135,6 +135,6 @@ void f2fs_leave_shrinker(struct f2fs_sb_info *sbi)
f2fs_shrink_extent_tree(sbi, __count_extent_cache(sbi));
 
spin_lock(&f2fs_list_lock);
-   list_del(&sbi->s_list);
+   list_del_init(&sbi->s_list);
spin_unlock(&f2fs_list_lock);
 }
-- 
Qualcomm India Private Limited, on behalf of Qualcomm Innovation Center, Inc.
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux 
Foundation Collaborative Project.



___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

[f2fs-dev] [PATCH] f2fs: fix use-after-free issue with sbi->stat_info

2018-12-24 Thread Sahitya Tummala

iput() on sbi->node_inode can update sbi->stat_info
in the below context, if the f2fs_write_checkpoint()
has failed with error.

f2fs_balance_fs_bg+0x1ac/0x1ec
f2fs_write_node_pages+0x4c/0x260
do_writepages+0x80/0xbc
__writeback_single_inode+0xdc/0x4ac
writeback_single_inode+0x9c/0x144
write_inode_now+0xc4/0xec
iput+0x194/0x22c
f2fs_put_super+0x11c/0x1e8
generic_shutdown_super+0x70/0xf4
kill_block_super+0x2c/0x5c
kill_f2fs_super+0x44/0x50
deactivate_locked_super+0x60/0x8c
deactivate_super+0x68/0x74
cleanup_mnt+0x40/0x78

Fix this by moving f2fs_destroy_stats() further below iput().

Signed-off-by: Sahitya Tummala 
---
 fs/f2fs/super.c | 9 ++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index e184ad4e..df41a3a 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1058,9 +1058,6 @@ static void f2fs_put_super(struct super_block *sb)
f2fs_write_checkpoint(sbi, &cpc);
}
 
-   /* f2fs_write_checkpoint can update stat informaion */
-   f2fs_destroy_stats(sbi);
-
/*
 * normally superblock is clean, so we need to release this.
 * In addition, EIO will skip do checkpoint, we need this as well.
@@ -1080,6 +1077,12 @@ static void f2fs_put_super(struct super_block *sb)
iput(sbi->node_inode);
iput(sbi->meta_inode);
 
+   /*
+* iput() can update stat information, if f2fs_write_checkpoint()
+* above failed with error.
+*/
+   f2fs_destroy_stats(sbi);
+
/* destroy f2fs internal modules */
f2fs_destroy_node_manager(sbi);
f2fs_destroy_segment_manager(sbi);
-- 
Qualcomm India Private Limited, on behalf of Qualcomm Innovation Center, Inc.
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux 
Foundation Collaborative Project.



___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

Re: [f2fs-dev] [PATCH] f2fs: fix use-after-free issue with sbi->stat_info

2018-12-25 Thread Sahitya Tummala

On Tue, Dec 25, 2018 at 11:06:45AM +0800, Chao Yu wrote:
> On 2018/12/24 21:06, Sahitya Tummala wrote:
> > iput() on sbi->node_inode can update sbi->stat_info
> > in the below context, if the f2fs_write_checkpoint()
> > has failed with error.
> > 
> > f2fs_balance_fs_bg+0x1ac/0x1ec
> > f2fs_write_node_pages+0x4c/0x260
> > do_writepages+0x80/0xbc
> > __writeback_single_inode+0xdc/0x4ac
> > writeback_single_inode+0x9c/0x144
> > write_inode_now+0xc4/0xec
> > iput+0x194/0x22c
> > f2fs_put_super+0x11c/0x1e8
> > generic_shutdown_super+0x70/0xf4
> > kill_block_super+0x2c/0x5c
> > kill_f2fs_super+0x44/0x50
> > deactivate_locked_super+0x60/0x8c
> > deactivate_super+0x68/0x74
> > cleanup_mnt+0x40/0x78
> > 
> > Fix this by moving f2fs_destroy_stats() further below iput().
> > 
> > Signed-off-by: Sahitya Tummala 
> > ---
> >  fs/f2fs/super.c | 9 ++---
> >  1 file changed, 6 insertions(+), 3 deletions(-)
> > 
> > diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> > index e184ad4e..df41a3a 100644
> > --- a/fs/f2fs/super.c
> > +++ b/fs/f2fs/super.c
> > @@ -1058,9 +1058,6 @@ static void f2fs_put_super(struct super_block *sb)
> > f2fs_write_checkpoint(sbi, &cpc);
> > }
> >  
> > -   /* f2fs_write_checkpoint can update stat informaion */
> > -   f2fs_destroy_stats(sbi);
> 
> The code order in error path of fill_super is almost the same as the one of
> put_super, could you please check that as well?
> 

Thanks Chao, for pointing it out. Yes, the same issue exists in error path of
fill_super as well. I will fix it.

> Thanks,
> 
> > -
> > /*
> >  * normally superblock is clean, so we need to release this.
> >  * In addition, EIO will skip do checkpoint, we need this as well.
> > @@ -1080,6 +1077,12 @@ static void f2fs_put_super(struct super_block *sb)
> > iput(sbi->node_inode);
> > iput(sbi->meta_inode);
> >  
> > +   /*
> > +* iput() can update stat information, if f2fs_write_checkpoint()
> > +* above failed with error.
> > +*/
> > +   f2fs_destroy_stats(sbi);
> > +
> > /* destroy f2fs internal modules */
> > f2fs_destroy_node_manager(sbi);
> > f2fs_destroy_segment_manager(sbi);
> > 
> 

-- 
--
Sent by a consultant of the Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum.


___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

1 2 >

1 - 100 of 158 matches

Mail list logo