Re: [f2fs-dev] [RFC PATCH v2] fsck.f2fs: write checkpoint out of place first

2018-08-20 Thread guoweichao
Hi Jaegeuk,

Is your test using a kernel include some unstable patches like "f2fs: guarantee 
journalled quota data by checkpoint"?
I am planing to reproduce the problem with dev branch. Is that OK? Any hints 
for reproducing?

Thank,
Weichao

On 2018/8/18 2:32, Jaegeuk Kim wrote:
> Hi Weichao,
> 
> This is corrupting the checkpoint showing dangling nids when running my test
> where injects faults with shutdown loop.
> 
> Thanks,
> 
> On 07/28, Chao Yu wrote:
>> On 2018/7/27 4:54, Weichao Guo wrote:
>>> We may encounter both checkpoints invalid in such a case:
>>> 1. write checkpoint A, B, C;
>>> 2. sudden power-cut during write checkpoint D;
>>> 3. fsck changes the total block count of checkpoint C;
>>> 4. sudden power-cut during fsck write checkpoint C in place
>>>
>>>  -   -
>>> |  ver C  | |  ver D  |
>>> |   ...   | |   ...   |
>>> | content | | content |
>>> |   ...   | |   ...   |
>>> |  ver C  | | |
>>> |  ver A  | |  ver B  |
>>>  -   -
>>>
>>> As the total # of checkpoint C is changed, an old cp block
>>> like ver A or an invalid cp block may be referenced.
>>> To avoid both checkpoints invalid, and considering fsck should
>>> not update the checkpoint version, fsck could write checkpoint
>>> out of place first and then write checkpoint in place. This
>>> makes sure the file system is fixed by fsck and at least one
>>> of the two checkpoints is valid.
>>>
>>> Signed-off-by: Weichao Guo 
>>> ---
>>>  fsck/defrag.c |  2 +-
>>>  fsck/fsck.c   | 14 +-
>>>  fsck/fsck.h   |  1 +
>>>  fsck/mount.c  | 16 ++--
>>>  fsck/sload.c  |  2 +-
>>>  5 files changed, 30 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/fsck/defrag.c b/fsck/defrag.c
>>> index bea0293..9fc932f 100644
>>> --- a/fsck/defrag.c
>>> +++ b/fsck/defrag.c
>>> @@ -96,7 +96,7 @@ int f2fs_defragment(struct f2fs_sb_info *sbi, u64 from, 
>>> u64 len, u64 to, int lef
>>> /* flush dirty sit entries */
>>> flush_sit_entries(sbi);
>>>  
>>> -   write_checkpoint(sbi);
>>> +   __write_checkpoint(sbi);
>>>  
>>> return 0;
>>>  }
>>> diff --git a/fsck/fsck.c b/fsck/fsck.c
>>> index 91c8529..f2ff4bc 100644
>>> --- a/fsck/fsck.c
>>> +++ b/fsck/fsck.c
>>> @@ -1943,7 +1943,7 @@ static void flush_curseg_sit_entries(struct 
>>> f2fs_sb_info *sbi)
>>> free(sit_blk);
>>>  }
>>>  
>>> -static void fix_checkpoint(struct f2fs_sb_info *sbi)
>>> +static void __fix_checkpoint(struct f2fs_sb_info *sbi)
>>>  {
>>> struct f2fs_fsck *fsck = F2FS_FSCK(sbi);
>>> struct f2fs_super_block *sb = F2FS_RAW_SUPER(sbi);
>>> @@ -2004,6 +2004,18 @@ static void fix_checkpoint(struct f2fs_sb_info *sbi)
>>> write_nat_bits(sbi, sb, cp, sbi->cur_cp);
>>>  }
>>>  
>>> +static void fix_checkpoint(struct f2fs_sb_info *sbi)
>>> +{
>>> +   int i = 0;
>>> +
>>> +   for (i = 0; i < 2; i++) {
>>> +   /* write checkpoint out of place first */
>>> +   sbi->cur_cp = sbi->cur_cp % 2 + 1;
>>> +   __fix_checkpoint(sbi);
>>> +   f2fs_fsync_device();
>>
>> It needs to check return value here.
>>
>> We can add below codes in the end of __fix_checkpoint()?
>>
>>  ret = f2fs_fsync_device();
>>  ASSERT(ret >= 0);
>>
>>> +   }
>>> +}
>>> +
>>>  int check_curseg_offset(struct f2fs_sb_info *sbi)
>>>  {
>>> int i;
>>> diff --git a/fsck/fsck.h b/fsck/fsck.h
>>> index 8e133fa..068dd34 100644
>>> --- a/fsck/fsck.h
>>> +++ b/fsck/fsck.h
>>> @@ -175,6 +175,7 @@ extern void flush_sit_entries(struct f2fs_sb_info *);
>>>  extern void move_curseg_info(struct f2fs_sb_info *, u64);
>>>  extern void write_curseg_info(struct f2fs_sb_info *);
>>>  extern int find_next_free_block(struct f2fs_sb_info *, u64 *, int, int);
>>> +extern void __write_checkpoint(struct f2fs_sb_info *);
>>>  extern void write_checkpoint(struct f2fs_sb_info *);
>>>  extern void update_data_blkaddr(struct f2fs_sb_info *, nid_t, u16, 
>>> block_t);
>>>  extern void update_nat_blkaddr(struct f2fs_sb_info *, nid_t, nid_t, 
>>> block_t);
>>> diff --git a/fsck/mount.c b/fsck/mount.c
>>> index e5574c5..8a29421 100644
>>> --- a/fsck/mount.c
>>> +++ b/fsck/mount.c
>>> @@ -1856,7 +1856,7 @@ void flush_journal_entries(struct f2fs_sb_info *sbi)
>>> int n_sits = flush_sit_journal_entries(sbi);
>>>  
>>> if (n_nats || n_sits)
>>> -   write_checkpoint(sbi);
>>> +   __write_checkpoint(sbi);
>>>  }
>>>  
>>>  void flush_sit_entries(struct f2fs_sb_info *sbi)
>>> @@ -2079,7 +2079,7 @@ void nullify_nat_entry(struct f2fs_sb_info *sbi, u32 
>>> nid)
>>> free(nat_block);
>>>  }
>>>  
>>> -void write_checkpoint(struct f2fs_sb_info *sbi)
>>> +void __write_checkpoint(struct f2fs_sb_info *sbi)
>>>  {
>>> struct f2fs_checkpoint *cp = F2FS_CKPT(sbi);
>>> struct f2fs_super_block *sb = F2FS_RAW_SUPER(sbi);
>>> @@ -2144,6 +2144,18 @@ void write_checkpoint(struct f2fs_sb_info *sbi)
>>> ASSERT(ret >= 0);
>>>  }
>>>  
>>> +void 

Re: [f2fs-dev] [PATCH v3] fsck.f2fs: write checkpoint out of place first

2018-07-30 Thread guoweichao



On 2018/7/30 14:41, Chao Yu wrote:
> Hi Weichao,
> 
> On 2018/7/30 18:46, Weichao Guo wrote:
> 
> Could you check and adjust your server's date, it looks like we are receiving
> patch from future. ;)
> 
Oh, I didn't notice that. It will not happen any more.

Thanks,

>> We may encounter both checkpoints invalid in such a case:
>> 1. write checkpoint A, B, C;
>> 2. sudden power-cut during write checkpoint D;
>> 3. fsck changes the total block count of checkpoint C;
>> 4. sudden power-cut during fsck write checkpoint C in place
>>
>>  -   -
>> |  ver C  | |  ver D  |
>> |   ...   | |   ...   |
>> | content | | content |
>> |   ...   | |   ...   |
>> |  ver C  | | |
>> |  ver A  | |  ver B  |
>>  -   -
>>
>> As the total # of checkpoint C is changed, an old cp block
>> like ver A or an invalid cp block may be referenced.
>> To avoid both checkpoints invalid, and considering fsck should
>> not update the checkpoint version, fsck could write checkpoint
>> out of place first and then write checkpoint in place. This
>> makes sure the file system is fixed by fsck and at least one
>> of the two checkpoints is valid.
>>
>> Signed-off-by: Weichao Guo 
> Reviewed-by: Chao Yu 
> 
> Thanks,
> 
> 
> .
> 


--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


Re: [f2fs-dev] [PATCH RFC] fsck.f2fs: write checkpoint out of place first

2018-07-26 Thread guoweichao



On 2018/7/26 19:52, Chao Yu wrote:
> On 2018/7/24 23:28, Weichao Guo wrote:
>> We may encounter both checkpoints invalid in such a case:
>> 1. write checkpoint A, B, C;
>> 2. sudden power-cut during write checkpoint D;
>> 3. fsck changes the total block count of checkpoint C;
>> 4. sudden power-cut during fsck write checkpoint C in place
>>
>>  -   -
>> |  ver C  | |  ver D  |
>> |   ...   | |   ...   |
>> | content | | content |
>> |   ...   | |   ...   |
>> |  ver C  | | |
>> |  ver A  | |  ver B  |
>>  -   -
>>
>> As the total # of checkpoint C is changed, an old cp block
>> like ver A or an invalid cp block may be referenced.
>> To avoid both checkpoints invalid, and considering fsck should
>> not update the checkpoint version, fsck could write checkpoint
>> out of place first and then write checkpoint in place. This
>> makes sure the file system is fixed by fsck and at least one
>> of the two checkpoints is valid.
>>
>> Signed-off-by: Weichao Guo 
>> ---
>>  fsck/fsck.c  | 13 -
>>  fsck/mount.c | 13 -
>>  2 files changed, 24 insertions(+), 2 deletions(-)
>>
>> diff --git a/fsck/fsck.c b/fsck/fsck.c
>> index 91c8529..28320d5 100644
>> --- a/fsck/fsck.c
>> +++ b/fsck/fsck.c
>> @@ -1954,6 +1954,7 @@ static void fix_checkpoint(struct f2fs_sb_info *sbi)
>>  u32 i;
>>  int ret;
>>  u_int32_t crc = 0;
>> +int phase = 0;
>>  
>>  if (is_set_ckpt_flags(cp, CP_ORPHAN_PRESENT_FLAG)) {
>>  orphan_blks = __start_sum_addr(sbi) - 1;
>> @@ -1975,9 +1976,11 @@ static void fix_checkpoint(struct f2fs_sb_info *sbi)
>>  *((__le32 *)((unsigned char *)cp + CHECKSUM_OFFSET)) = cpu_to_le32(crc);
>>  
>>  cp_blk_no = get_sb(cp_blkaddr);
>> -if (sbi->cur_cp == 2)
>> +/* write checkpoint with current version out of place first */
>> +if (sbi->cur_cp == 1)
>>  cp_blk_no += 1 << get_sb(log_blocks_per_seg);
>>  
>> +next_step:
>>  ret = dev_write_block(cp, cp_blk_no++);
>>  ASSERT(ret >= 0);
>>  
>> @@ -2002,6 +2005,14 @@ static void fix_checkpoint(struct f2fs_sb_info *sbi)
>>  /* Write nat bits */
>>  if (flags & CP_NAT_BITS_FLAG)
>>  write_nat_bits(sbi, sb, cp, sbi->cur_cp);
>> +
>> +if (phase == 0) {
>> +cp_blk_no = get_sb(cp_blkaddr);
>> +if (sbi->cur_cp == 2)
>> +cp_blk_no += 1 << get_sb(log_blocks_per_seg);
>> +phase++;
>> +goto next_step;
> 
> Before goes to second round, we need to keep all cached data being persistent,
> so additional fsync() is needed?
Yes.
> 
> Since we are trying to implement OPU style checkpoint() in f2fs-tools, can you
> check whether all caller of {fix,write}_checkpoint() need such logic?
OK, I will resend a revised version of this patch.

Thanks,

> 
> Thanks,
> 
>> +}
>>  }
>>  
>>  int check_curseg_offset(struct f2fs_sb_info *sbi)
>> diff --git a/fsck/mount.c b/fsck/mount.c
>> index e5574c5..8d7a9bb 100644
>> --- a/fsck/mount.c
>> +++ b/fsck/mount.c
>> @@ -2088,6 +2088,7 @@ void write_checkpoint(struct f2fs_sb_info *sbi)
>>  u32 flags = CP_UMOUNT_FLAG;
>>  int i, ret;
>>  u_int32_t crc = 0;
>> +int phase = 0;
>>  
>>  if (is_set_ckpt_flags(cp, CP_ORPHAN_PRESENT_FLAG)) {
>>  orphan_blks = __start_sum_addr(sbi) - 1;
>> @@ -2105,9 +2106,11 @@ void write_checkpoint(struct f2fs_sb_info *sbi)
>>  *((__le32 *)((unsigned char *)cp + CHECKSUM_OFFSET)) = cpu_to_le32(crc);
>>  
>>  cp_blk_no = get_sb(cp_blkaddr);
>> -if (sbi->cur_cp == 2)
>> +/* write checkpoint with current version out of place first */
>> +if (sbi->cur_cp == 1)
>>  cp_blk_no += 1 << get_sb(log_blocks_per_seg);
>>  
>> +next_step:
>>  /* write the first cp */
>>  ret = dev_write_block(cp, cp_blk_no++);
>>  ASSERT(ret >= 0);
>> @@ -2142,6 +2145,14 @@ void write_checkpoint(struct f2fs_sb_info *sbi)
>>  /* write the last cp */
>>  ret = dev_write_block(cp, cp_blk_no++);
>>  ASSERT(ret >= 0);
>> +
>> +if (phase == 0) {
>> +cp_blk_no = get_sb(cp_blkaddr);
>> +if (sbi->cur_cp == 2)
>> +cp_blk_no += 1 << get_sb(log_blocks_per_seg);
>> +phase++;
>> +goto next_step;
>> +}
>>  }
>>  
>>  void build_nat_area_bitmap(struct f2fs_sb_info *sbi)
>>
> 
> 
> .
> 


--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


Re: [f2fs-dev] [PATCH v4] f2fs: support in-memory inode checksum when checking consistency

2018-03-13 Thread guoweichao


On 2018/3/13 16:49, Sahitya Tummala wrote:
> On Thu, Mar 08, 2018 at 05:10:40AM +0800, Weichao Guo wrote:
>> @@ -159,8 +162,12 @@ bool f2fs_inode_chksum_verify(struct f2fs_sb_info *sbi, 
>> struct page *page)
>>  struct f2fs_inode *ri;
>>  __u32 provided, calculated;
>>  
>> +#ifdef CONFIG_F2FS_CHECK_FS
>> +if (!f2fs_enable_inode_chksum(sbi, page))
>> +#else
>>  if (!f2fs_enable_inode_chksum(sbi, page) ||
>>  PageDirty(page) || PageWriteback(page))
> 
> I see that f2fs_inode_chksum_set() is set only if CONFIG_F2FS_CHECK_FS is
f2fs_inode_chksum_set is also called in allocate_data_block when fs write back 
inode to disk.
> enabled. So in this #else case, if sb has inode checksum enabled but
> PageDirty() or PageWriteback() is not set, then we may proceed below and do
So when the inode is read from disk, e.g. PageDirty / PageWriteback is not set,
the checksum verify process should be ok.
> the comparison between provided and calculated check sum and it may fail
> resulting into checksum invalid error?
> 
>> +#endif
>>  return true;
>>  
>>  ri = _NODE(page)->i;
>> @@ -445,6 +452,9 @@ void update_inode(struct inode *inode, struct page 
>> *node_page)
>>  if (inode->i_nlink == 0)
>>  clear_inline_node(node_page);
>>  
>> +#ifdef CONFIG_F2FS_CHECK_FS
>> +f2fs_inode_chksum_set(F2FS_I_SB(inode), node_page);
>> +#endif
>>  }
>>  
>>  void update_inode_page(struct inode *inode)
>> diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
>> index 177c438..2adeb74 100644
>> --- a/fs/f2fs/node.c
>> +++ b/fs/f2fs/node.c
>> @@ -1113,8 +1113,10 @@ static int read_node_page(struct page *page, int 
>> op_flags)
>>  .encrypted_page = NULL,
>>  };
>>  
>> -if (PageUptodate(page))
>> +if (PageUptodate(page)) {
>> +f2fs_bug_on(sbi, !f2fs_inode_chksum_verify(sbi, page));
>>  return LOCKED_PAGE;
>> +}
>>  
>>  get_node_info(sbi, page->index, );
>>  
>> diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
>> index 081ef0d..b6015b7 100644
>> --- a/fs/f2fs/node.h
>> +++ b/fs/f2fs/node.h
>> @@ -278,6 +278,10 @@ static inline void fill_node_footer(struct page *page, 
>> nid_t nid,
>>  /* should remain old flag bits such as COLD_BIT_SHIFT */
>>  rn->footer.flag = cpu_to_le32((ofs << OFFSET_BIT_SHIFT) |
>>  (old_flag & OFFSET_BIT_MASK));
>> +#ifdef CONFIG_F2FS_CHECK_FS
>> +if (IN_INODE(page))
>> +f2fs_inode_chksum_set(F2FS_P_SB(page), page);
>> +#endif
>>  }
>>  
>>  static inline void copy_node_footer(struct page *dst, struct page *src)
>> @@ -285,6 +289,10 @@ static inline void copy_node_footer(struct page *dst, 
>> struct page *src)
>>  struct f2fs_node *src_rn = F2FS_NODE(src);
>>  struct f2fs_node *dst_rn = F2FS_NODE(dst);
>>  memcpy(_rn->footer, _rn->footer, sizeof(struct node_footer));
>> +#ifdef CONFIG_F2FS_CHECK_FS
>> +if (IN_INODE(dst))
>> +f2fs_inode_chksum_set(F2FS_P_SB(dst), dst);
>> +#endif
>>  }
>>  
>>  static inline void fill_node_footer_blkaddr(struct page *page, block_t 
>> blkaddr)
>> @@ -298,6 +306,10 @@ static inline void fill_node_footer_blkaddr(struct page 
>> *page, block_t blkaddr)
>>  
>>  rn->footer.cp_ver = cpu_to_le64(cp_ver);
>>  rn->footer.next_blkaddr = cpu_to_le32(blkaddr);
>> +#ifdef CONFIG_F2FS_CHECK_FS
>> +if (IN_INODE(page))
>> +f2fs_inode_chksum_set(F2FS_P_SB(page), page);
>> +#endif
>>  }
>>  
>>  static inline bool is_recoverable_dnode(struct page *page)
>> @@ -364,6 +376,10 @@ static inline int set_nid(struct page *p, int off, 
>> nid_t nid, bool i)
>>  rn->i.i_nid[off - NODE_DIR1_BLOCK] = cpu_to_le32(nid);
>>  else
>>  rn->in.nid[off] = cpu_to_le32(nid);
>> +#ifdef CONFIG_F2FS_CHECK_FS
>> +if (IN_INODE(p))
>> +f2fs_inode_chksum_set(F2FS_P_SB(p), p);
>> +#endif
>>  return set_page_dirty(p);
>>  }
>>  
>> @@ -432,6 +448,10 @@ static inline void set_cold_node(struct inode *inode, 
>> struct page *page)
>>  else
>>  flag |= (0x1 << COLD_BIT_SHIFT);
>>  rn->footer.flag = cpu_to_le32(flag);
>> +#ifdef CONFIG_F2FS_CHECK_FS
>> +if (IN_INODE(page))
>> +f2fs_inode_chksum_set(F2FS_I_SB(inode), page);
>> +#endif
>>  }
>>  
>>  static inline void set_mark(struct page *page, int mark, int type)
>> @@ -443,6 +463,10 @@ static inline void set_mark(struct page *page, int 
>> mark, int type)
>>  else
>>  flag &= ~(0x1 << type);
>>  rn->footer.flag = cpu_to_le32(flag);
>> +#ifdef CONFIG_F2FS_CHECK_FS
>> +if (IN_INODE(page))
>> +f2fs_inode_chksum_set(F2FS_P_SB(page), page);
>> +#endif
>>  }
>>  #define set_dentry_mark(page, mark) set_mark(page, mark, DENT_BIT_SHIFT)
>>  #define set_fsync_mark(page, mark)  set_mark(page, mark, FSYNC_BIT_SHIFT)
>> diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
>> index ae2dfa7..572bc17 100644
>> --- a/fs/f2fs/xattr.c
>> +++ b/fs/f2fs/xattr.c
>> 

Re: [f2fs-dev] [PATCH v5] f2fs: support in-memory inode checksum when checking consistency

2018-03-08 Thread guoweichao


On 2018/3/9 13:49, Chao Yu wrote:
> On 2018/3/9 5:31, Weichao Guo wrote:
>> Only enable in-memory inode checksum to protect metadata blocks
>> from in-memory scribbles when checking consistency, which has no
>> performance requirements.
> 
> Signed-off-by?
Sorry for this mistake.
I will send a new version of this patch.
Thanks,
> 
> Thanks,
> 
>> ---
>>  fs/f2fs/inode.c |  7 +++
>>  fs/f2fs/node.c  | 10 +-
>>  2 files changed, 16 insertions(+), 1 deletion(-)
>>
>> diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
>> index 205add3..e405b28 100644
>> --- a/fs/f2fs/inode.c
>> +++ b/fs/f2fs/inode.c
>> @@ -159,8 +159,12 @@ bool f2fs_inode_chksum_verify(struct f2fs_sb_info *sbi, 
>> struct page *page)
>>  struct f2fs_inode *ri;
>>  __u32 provided, calculated;
>>  
>> +#ifdef CONFIG_F2FS_CHECK_FS
>> +if (!f2fs_enable_inode_chksum(sbi, page))
>> +#else
>>  if (!f2fs_enable_inode_chksum(sbi, page) ||
>>  PageDirty(page) || PageWriteback(page))
>> +#endif
>>  return true;
>>  
>>  ri = _NODE(page)->i;
>> @@ -445,6 +449,9 @@ void update_inode(struct inode *inode, struct page 
>> *node_page)
>>  if (inode->i_nlink == 0)
>>  clear_inline_node(node_page);
>>  
>> +#ifdef CONFIG_F2FS_CHECK_FS
>> +f2fs_inode_chksum_set(F2FS_I_SB(inode), node_page);
>> +#endif
>>  }
>>  
>>  void update_inode_page(struct inode *inode)
>> diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
>> index 177c438..6107935 100644
>> --- a/fs/f2fs/node.c
>> +++ b/fs/f2fs/node.c
>> @@ -1113,8 +1113,12 @@ static int read_node_page(struct page *page, int 
>> op_flags)
>>  .encrypted_page = NULL,
>>  };
>>  
>> -if (PageUptodate(page))
>> +if (PageUptodate(page)) {
>> +#ifdef CONFIG_F2FS_CHECK_FS
>> +f2fs_bug_on(sbi, !f2fs_inode_chksum_verify(sbi, page));
>> +#endif
>>  return LOCKED_PAGE;
>> +}
>>  
>>  get_node_info(sbi, page->index, );
>>  
>> @@ -1731,6 +1735,10 @@ static int f2fs_set_node_page_dirty(struct page *page)
>>  
>>  if (!PageUptodate(page))
>>  SetPageUptodate(page);
>> +#ifdef CONFIG_F2FS_CHECK_FS
>> +if (IS_INODE(page))
>> +f2fs_inode_chksum_set(F2FS_P_SB(page), page);
>> +#endif
>>  if (!PageDirty(page)) {
>>  f2fs_set_page_dirty_nobuffers(page);
>>  inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_NODES);
>>
> 
> 
> .
> 


--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


Re: [f2fs-dev] [PATCH v4] f2fs: support in-memory inode checksum when checking consistency

2018-03-08 Thread guoweichao
Hi Chao,

On 2018/3/8 17:11, Chao Yu wrote:
> Hi Weichao,
> 
> On 2018/3/8 15:56, guoweichao wrote:
>> Hi Chao,
>>
>> On 2018/3/8 14:15, Chao Yu wrote:
>>> On 2018/3/8 5:10, Weichao Guo wrote:
>>>> Only enable in-memory inode checksum to protect metadata blocks
>>>> from in-memory scribbles when checking consistency, which has no
>>>> performance requirements.
>>>
>>> Once we modify the node page of inode, we will call set_page_dirty, not 
>>> sure,
>>> can you check that we can just call f2fs_inode_chksum_set in set_page_dirty?
>>>
>> In most cases, we call set_page_dirty after modifying the node page of inode.
>> But in the case of update_inode, set_page_dirty is called before modifying 
>> the node page.
> 
> IIRC, we can not change that order, but we can add addition
> f2fs_inode_chksum_set in the end of update_inode.
> 
>> And in fsync_node_pages, if the node page of inode is already dirtied, 
>> set_page_dirty will
>> be skipped.
I reviewed this case: we call __write_node_page after set dentry/fsync flag in 
node footer of
the inode page, and no need to update inode checksum here. It's safe.

Another case is f2fs_do_tmpfile, when init_inode_metadata, if inode is not in 
FI_NEW_INODE state,
cold bit in the node footer is set and there is no set_page_dirty followed.
IMO, the inode should be FI_NEW_INODE in init_inode_metada when creating temp 
file.
So we don't have to worry about it.

Thanks,
> 
> We can handle that exception by adding f2fs_inode_chksum_set there too.
> 
> Thanks,
> 
>>
>> IMO, calling f2fs_inode_chksum_set in set_page_dirty is more clear and 
>> reasonable.
>> Can we add or move set_page_dirty in the exceptional cases?
>>
>> Thank,
>>
>>> Thanks,
>>>
>>>
>>> .
>>>
>>
>>
>> .
>>
> 
> 
> .
> 


--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


Re: [f2fs-dev] [PATCH v3] f2fs: in-memory inode checksum when checking consistency

2018-02-28 Thread guoweichao


On 2018/2/28 17:58, Chao Yu wrote:
> On 2018/2/27 3:28, Weichao Guo wrote:
>> Enable in-memory inode checksum to protect metadata blocks
>> from in-memory scribbles only when checking consistency with
>> no performance requirements.
>>
>> Signed-off-by: Weichao Guo 
>> ---
>>  fs/f2fs/inode.c | 7 +++
>>  fs/f2fs/node.c  | 4 +++-
>>  2 files changed, 10 insertions(+), 1 deletion(-)
>>
>> diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
>> index 205add3d0f3a..b04129d23d21 100644
>> --- a/fs/f2fs/inode.c
>> +++ b/fs/f2fs/inode.c
>> @@ -159,8 +159,12 @@ bool f2fs_inode_chksum_verify(struct f2fs_sb_info *sbi, 
>> struct page *page)
>>  struct f2fs_inode *ri;
>>  __u32 provided, calculated;
>>  
>> +#ifdef CONFIG_F2FS_CHECK_FS
>> +if (!f2fs_enable_inode_chksum(sbi, page))
>> +#else
>>  if (!f2fs_enable_inode_chksum(sbi, page) ||
>>  PageDirty(page) || PageWriteback(page))
>> +#endif
>>  return true;
>>  
>>  ri = _NODE(page)->i;
>> @@ -464,6 +468,9 @@ void update_inode_page(struct inode *inode)
>>  return;
>>  }
>>  update_inode(inode, node_page);
>> +#ifdef CONFIG_F2FS_CHECK_FS
>> +f2fs_inode_chksum_set(sbi, node_page);
>> +#endif
> 
>> This gives a panic easily by fsstress. Could you please check it again?
> 
> Sorry, I missed some cases, it looks that we need to cover updating place of 
> all
> fields like i_addr, inline_xxx, footer with f2fs_inode_chksum_set.
> 
Sorry for that , I will do more test in the following.
Thanks,

> Thanks,
> 
>>  f2fs_put_page(node_page, 1);
>>  }
>>  
>> diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
>> index 177c438e4a56..2adeb74ae055 100644
>> --- a/fs/f2fs/node.c
>> +++ b/fs/f2fs/node.c
>> @@ -1113,8 +1113,10 @@ static int read_node_page(struct page *page, int 
>> op_flags)
>>  .encrypted_page = NULL,
>>  };
>>  
>> -if (PageUptodate(page))
>> +if (PageUptodate(page)) {
>> +f2fs_bug_on(sbi, !f2fs_inode_chksum_verify(sbi, page));
>>  return LOCKED_PAGE;
>> +}
>>  
>>  get_node_info(sbi, page->index, );
>>  
>>
> 
> 
> .
> 


--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


Re: [f2fs-dev] [PATCH] f2fs: report cp block corrupted & add a backup of cp2

2018-02-28 Thread guoweichao
Hi Jaegeuk,

On 2018/2/28 12:57, Jaegeuk Kim wrote:
> On 02/26, Weichao Guo wrote:
>> There is a potential inconsistent metadata case due to a cp block
>> crc invalid in the latest checkpoint caused by hardware issues:
>> 1) write nodes into segment x;
>> 2) write checkpoint A;
>> 3) remove nodes in segment x;
>> 4) write checkpoint B;
>> 5) issue discard or write datas into segment x;
>> 6) sudden power-cut;
>> 7) use checkpoint A after reboot as checkpoint B is invalid
>>
>> This inconsistency may be found after several reboots long time
>> later and the kernel log about cp block crc invalid has disappeared.
>> This makes the root cause of the inconsistency is hard to locate.
>> Let us separate such other part issues from f2fs logical bugs in
>> debug version, and try to validate the checkpoint with the backup
>> when cp2 is corrupted.
> 
> How can you guarantee it happened caused by real power-cut during checkpoint?
it is caused by system panic or SPO before or during checkpoint and a cp block
corrupted when rebooting. 
> Does it really make sense to write addition block at every checkpoint?
My purpose of adding a backup is to distinguish a real corrupted cp block
and other blocks in checkpoint when crc invalid is reported.
Anyway, adding a third CP block is a little tricky. Forget it.

IMO, protecting the whole checkpoint with crc and parity methods should be
an enhancement for F2FS stability.

Thanks,
Weichao
> 
>>
>> Signed-off-by: Weichao Guo 
>> ---
>>  fs/f2fs/checkpoint.c | 27 +++
>>  1 file changed, 23 insertions(+), 4 deletions(-)
>>
>> diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
>> index 512dca8..81c4acd 100644
>> --- a/fs/f2fs/checkpoint.c
>> +++ b/fs/f2fs/checkpoint.c
>> @@ -761,15 +761,29 @@ static struct page *validate_checkpoint(struct 
>> f2fs_sb_info *sbi,
>>  
>>  err = get_checkpoint_version(sbi, cp_addr, _block,
>>  _page_1, version);
>> -if (err)
>> +if (err) {
>> +f2fs_msg(sbi->sb, KERN_WARNING,
>> +"corrupted cp1 at blk_addr: 0x%x", cp_addr);
>> +f2fs_bug_on(sbi, 1);
>>  goto invalid_cp1;
>> +}
>>  pre_version = *version;
>>  
>>  cp_addr += le32_to_cpu(cp_block->cp_pack_total_block_count) - 1;
>>  err = get_checkpoint_version(sbi, cp_addr, _block,
>>  _page_2, version);
>> -if (err)
>> -goto invalid_cp2;
>> +if (err) {
>> +err = get_checkpoint_version(sbi, cp_addr + 1, _block,
>> +_page_2, version);
>> +if (err) {
>> +goto invalid_cp2;
>> +}
>> +else if (*version == pre_version) {
>> +f2fs_msg(sbi->sb, KERN_WARNING,
>> +"corrupted cp2 at blk_addr: 0x%x", cp_addr);
>> +f2fs_bug_on(sbi, 1);
>> +}
>> +}
>>  cur_version = *version;
>>  
>>  if (cur_version == pre_version) {
>> @@ -1302,7 +1316,12 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, 
>> struct cp_control *cpc)
>>  }
>>  
>>  /* writeout checkpoint block */
>> -update_meta_page(sbi, ckpt, start_blk);
>> +update_meta_page(sbi, ckpt, start_blk++);
>> +/* writeout a backup of cp2 if available */
>> +if (!enabled_nat_bits(sbi, cpc) ||
>> +(ckpt->cp_pack_total_block_count + 1 <=
>> +sbi->blocks_per_seg - NM_I(sbi)->nat_bits_blocks))
>> +update_meta_page(sbi, ckpt, start_blk);
>>  
>>  /* wait for previous submitted node/meta pages writeback */
>>  wait_on_all_pages_writeback(sbi);
>> -- 
>> 2.10.1
> 
> .
> 


--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


Re: [f2fs-dev] [PATCH v3] f2fs: report cp block corrupted

2018-02-12 Thread guoweichao


On 2018/2/13 2:03, Jaegeuk Kim wrote:
> On 02/12, guoweichao wrote:
>>
>>
>> On 2018/2/12 11:40, Jaegeuk Kim wrote:
>>> On 02/12, guoweichao wrote:
>>>>
>>>>
>>>> On 2018/2/12 10:32, Jaegeuk Kim wrote:
>>>>> On 02/12, guoweichao wrote:
>>>>>> Hi Jaegeuk,
>>>>>>
>>>>>> On 2018/2/12 7:32, Jaegeuk Kim wrote:
>>>>>>> On 02/06, Weichao Guo wrote:
>>>>>>>> There is a potential inconsistent metadata case due to a cp block
>>>>>>>> crc invalid in the latest checkpoint caused by hardware issues:
>>>>>>>> 1) write nodes into segment x;
>>>>>>>> 2) write checkpoint A;
>>>>>>>> 3) remove nodes in segment x;
>>>>>>>> 4) write checkpoint B;
>>>>>>>> 5) issue discard or write datas into segment x;
>>>>>>>> 6) sudden power-cut;
>>>>>>>> 7) use checkpoint A after reboot as checkpoint B is invalid
>>>>>>>>
>>>>>>>> This inconsistency may be found after several reboots long time later
>>>>>>>> and the kernel log about cp block crc invalid has disappeared. This
>>>>>>>> makes the root cause of the inconsistency is hard to locate. Let us
>>>>>>>> separate such other part issues from f2fs logical bugs in debug 
>>>>>>>> version.
>>>>>>>>
>>>>>>>> Signed-off-by: Weichao Guo <guoweic...@huawei.com>
>>>>>>>> ---
>>>>>>>>  fs/f2fs/checkpoint.c | 8 ++--
>>>>>>>>  1 file changed, 6 insertions(+), 2 deletions(-)
>>>>>>>>
>>>>>>>> diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
>>>>>>>> index 8b0945b..16ba96a 100644
>>>>>>>> --- a/fs/f2fs/checkpoint.c
>>>>>>>> +++ b/fs/f2fs/checkpoint.c
>>>>>>>> @@ -737,13 +737,17 @@ static int get_checkpoint_version(struct 
>>>>>>>> f2fs_sb_info *sbi, block_t cp_addr,
>>>>>>>>crc_offset = le32_to_cpu((*cp_block)->checksum_offset);
>>>>>>>>if (crc_offset > (blk_size - sizeof(__le32))) {
>>>>>>>>f2fs_msg(sbi->sb, KERN_WARNING,
>>>>>>>> -  "invalid crc_offset: %zu", crc_offset);
>>>>>>>> +  "invalid crc_offset: %zu at blk_addr: 0x%x",
>>>>>>>> +  crc_offset, cp_addr);
>>>>>>>> +  f2fs_bug_on(sbi, 1);
>>>>>>>
>>>>>>> I don't think we can use bug_on here, since we're easily getting this 
>>>>>>> when
>>>>>>> power-cut happened in the middle of checkpoint pack writes, which is an 
>>>>>>> expected
>>>>>>> behavior. Hmm, we need to consider another way to detect that.
>>>>>> We only check CP block crc here. The two CP blocks may have different CP 
>>>>>> versions when
>>>>>> power-cut happened, but their crc value should be valid. IMO, this patch 
>>>>>> will trigger a
>>>>>> bug_on only when some external issues cause CP block crc invalid as one 
>>>>>> 4K page is
>>>>>> persisted atomically.
>>>>>
>>>>> Huh? This checks crc_offset, not crc? Unfortunately, my simple fault 
>>>>> injection
>>>>> test gave this bug_on within a day. The below bug_on seems what you're 
>>>>> saying
>>>>> about tho.
>>>> oh sorry, I didn't notice the code line carefully. But which fault 
>>>> injection trigger
>>>> this bug_on? The crc_offset is also parts of the CP block, it seems 
>>>> power-cut happened
>>>> in middle of writing checkpoint should not produce an invalid crc_offset.
>>>
>>> The second cp block can have stale data used by previous part of checkpoint.
>> Yes, but in all CP packs, a cp block crc_offset should always be 
>> CHECKSUM_OFFSET
>> which is configured at mkfs, right? I'm still not figure out why crc_offset 
>> invalid
>> in the stale data of previous part of checkpoint?
> 
> The second block position can be varied by some ckpt flags, so that we could 
> get
> it from previous summary block for example.
Got it. How about adding a magic number at a specific position of CP block? In 
such a way,
We can distinguish a broken CP block from other type of blocks in a checkpoint.

Thanks,
> 
> Thanks,
> 
>>
>> Thanks,
>>>
>>>>
>>>> Thanks,
>>>>
>>>>>
>>>>>>
>>>>>> Thanks,
>>>>>>>
>>>>>>> Thanks,
>>>>>>>
>>>>>>>>return -EINVAL;
>>>>>>>>}
>>>>>>>>  
>>>>>>>>crc = cur_cp_crc(*cp_block);
>>>>>>>>if (!f2fs_crc_valid(sbi, crc, *cp_block, crc_offset)) {
>>>>>>>> -  f2fs_msg(sbi->sb, KERN_WARNING, "invalid crc value");
>>>>>>>> +  f2fs_msg(sbi->sb, KERN_WARNING,
>>>>>>>> +  "invalid crc value at blk_addr: 0x%x", cp_addr);
>>>>>>>> +  f2fs_bug_on(sbi, 1);
>>>>>>>>return -EINVAL;
>>>>>>>>}
>>>>>>>>  
>>>>>>>> -- 
>>>>>>>> 2.10.1
>>>>>>>
>>>>>>> .
>>>>>>>
>>>>>
>>>>> .
>>>>>
>>>
>>> .
>>>
> 
> .
> 


--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


Re: [f2fs-dev] [PATCH v3] f2fs: report cp block corrupted

2018-02-11 Thread guoweichao


On 2018/2/12 11:40, Jaegeuk Kim wrote:
> On 02/12, guoweichao wrote:
>>
>>
>> On 2018/2/12 10:32, Jaegeuk Kim wrote:
>>> On 02/12, guoweichao wrote:
>>>> Hi Jaegeuk,
>>>>
>>>> On 2018/2/12 7:32, Jaegeuk Kim wrote:
>>>>> On 02/06, Weichao Guo wrote:
>>>>>> There is a potential inconsistent metadata case due to a cp block
>>>>>> crc invalid in the latest checkpoint caused by hardware issues:
>>>>>> 1) write nodes into segment x;
>>>>>> 2) write checkpoint A;
>>>>>> 3) remove nodes in segment x;
>>>>>> 4) write checkpoint B;
>>>>>> 5) issue discard or write datas into segment x;
>>>>>> 6) sudden power-cut;
>>>>>> 7) use checkpoint A after reboot as checkpoint B is invalid
>>>>>>
>>>>>> This inconsistency may be found after several reboots long time later
>>>>>> and the kernel log about cp block crc invalid has disappeared. This
>>>>>> makes the root cause of the inconsistency is hard to locate. Let us
>>>>>> separate such other part issues from f2fs logical bugs in debug version.
>>>>>>
>>>>>> Signed-off-by: Weichao Guo <guoweic...@huawei.com>
>>>>>> ---
>>>>>>  fs/f2fs/checkpoint.c | 8 ++--
>>>>>>  1 file changed, 6 insertions(+), 2 deletions(-)
>>>>>>
>>>>>> diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
>>>>>> index 8b0945b..16ba96a 100644
>>>>>> --- a/fs/f2fs/checkpoint.c
>>>>>> +++ b/fs/f2fs/checkpoint.c
>>>>>> @@ -737,13 +737,17 @@ static int get_checkpoint_version(struct 
>>>>>> f2fs_sb_info *sbi, block_t cp_addr,
>>>>>>  crc_offset = le32_to_cpu((*cp_block)->checksum_offset);
>>>>>>  if (crc_offset > (blk_size - sizeof(__le32))) {
>>>>>>  f2fs_msg(sbi->sb, KERN_WARNING,
>>>>>> -"invalid crc_offset: %zu", crc_offset);
>>>>>> +"invalid crc_offset: %zu at blk_addr: 0x%x",
>>>>>> +crc_offset, cp_addr);
>>>>>> +f2fs_bug_on(sbi, 1);
>>>>>
>>>>> I don't think we can use bug_on here, since we're easily getting this when
>>>>> power-cut happened in the middle of checkpoint pack writes, which is an 
>>>>> expected
>>>>> behavior. Hmm, we need to consider another way to detect that.
>>>> We only check CP block crc here. The two CP blocks may have different CP 
>>>> versions when
>>>> power-cut happened, but their crc value should be valid. IMO, this patch 
>>>> will trigger a
>>>> bug_on only when some external issues cause CP block crc invalid as one 4K 
>>>> page is
>>>> persisted atomically.
>>>
>>> Huh? This checks crc_offset, not crc? Unfortunately, my simple fault 
>>> injection
>>> test gave this bug_on within a day. The below bug_on seems what you're 
>>> saying
>>> about tho.
>> oh sorry, I didn't notice the code line carefully. But which fault injection 
>> trigger
>> this bug_on? The crc_offset is also parts of the CP block, it seems 
>> power-cut happened
>> in middle of writing checkpoint should not produce an invalid crc_offset.
> 
> The second cp block can have stale data used by previous part of checkpoint.
Yes, but in all CP packs, a cp block crc_offset should always be CHECKSUM_OFFSET
which is configured at mkfs, right? I'm still not figure out why crc_offset 
invalid
in the stale data of previous part of checkpoint?

Thanks,
> 
>>
>> Thanks,
>>
>>>
>>>>
>>>> Thanks,
>>>>>
>>>>> Thanks,
>>>>>
>>>>>>  return -EINVAL;
>>>>>>  }
>>>>>>  
>>>>>>  crc = cur_cp_crc(*cp_block);
>>>>>>  if (!f2fs_crc_valid(sbi, crc, *cp_block, crc_offset)) {
>>>>>> -f2fs_msg(sbi->sb, KERN_WARNING, "invalid crc value");
>>>>>> +f2fs_msg(sbi->sb, KERN_WARNING,
>>>>>> +"invalid crc value at blk_addr: 0x%x", cp_addr);
>>>>>> +f2fs_bug_on(sbi, 1);
>>>>>>  return -EINVAL;
>>>>>>  }
>>>>>>  
>>>>>> -- 
>>>>>> 2.10.1
>>>>>
>>>>> .
>>>>>
>>>
>>> .
>>>
> 
> .
> 


--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


Re: [f2fs-dev] [PATCH v3] f2fs: report cp block corrupted

2018-02-11 Thread guoweichao


On 2018/2/12 10:32, Jaegeuk Kim wrote:
> On 02/12, guoweichao wrote:
>> Hi Jaegeuk,
>>
>> On 2018/2/12 7:32, Jaegeuk Kim wrote:
>>> On 02/06, Weichao Guo wrote:
>>>> There is a potential inconsistent metadata case due to a cp block
>>>> crc invalid in the latest checkpoint caused by hardware issues:
>>>> 1) write nodes into segment x;
>>>> 2) write checkpoint A;
>>>> 3) remove nodes in segment x;
>>>> 4) write checkpoint B;
>>>> 5) issue discard or write datas into segment x;
>>>> 6) sudden power-cut;
>>>> 7) use checkpoint A after reboot as checkpoint B is invalid
>>>>
>>>> This inconsistency may be found after several reboots long time later
>>>> and the kernel log about cp block crc invalid has disappeared. This
>>>> makes the root cause of the inconsistency is hard to locate. Let us
>>>> separate such other part issues from f2fs logical bugs in debug version.
>>>>
>>>> Signed-off-by: Weichao Guo <guoweic...@huawei.com>
>>>> ---
>>>>  fs/f2fs/checkpoint.c | 8 ++--
>>>>  1 file changed, 6 insertions(+), 2 deletions(-)
>>>>
>>>> diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
>>>> index 8b0945b..16ba96a 100644
>>>> --- a/fs/f2fs/checkpoint.c
>>>> +++ b/fs/f2fs/checkpoint.c
>>>> @@ -737,13 +737,17 @@ static int get_checkpoint_version(struct 
>>>> f2fs_sb_info *sbi, block_t cp_addr,
>>>>crc_offset = le32_to_cpu((*cp_block)->checksum_offset);
>>>>if (crc_offset > (blk_size - sizeof(__le32))) {
>>>>f2fs_msg(sbi->sb, KERN_WARNING,
>>>> -  "invalid crc_offset: %zu", crc_offset);
>>>> +  "invalid crc_offset: %zu at blk_addr: 0x%x",
>>>> +  crc_offset, cp_addr);
>>>> +  f2fs_bug_on(sbi, 1);
>>>
>>> I don't think we can use bug_on here, since we're easily getting this when
>>> power-cut happened in the middle of checkpoint pack writes, which is an 
>>> expected
>>> behavior. Hmm, we need to consider another way to detect that.
>> We only check CP block crc here. The two CP blocks may have different CP 
>> versions when
>> power-cut happened, but their crc value should be valid. IMO, this patch 
>> will trigger a
>> bug_on only when some external issues cause CP block crc invalid as one 4K 
>> page is
>> persisted atomically.
> 
> Huh? This checks crc_offset, not crc? Unfortunately, my simple fault injection
> test gave this bug_on within a day. The below bug_on seems what you're saying
> about tho.
oh sorry, I didn't notice the code line carefully. But which fault injection 
trigger
this bug_on? The crc_offset is also parts of the CP block, it seems power-cut 
happened
in middle of writing checkpoint should not produce an invalid crc_offset.

Thanks,

> 
>>
>> Thanks,
>>>
>>> Thanks,
>>>
>>>>return -EINVAL;
>>>>}
>>>>  
>>>>crc = cur_cp_crc(*cp_block);
>>>>if (!f2fs_crc_valid(sbi, crc, *cp_block, crc_offset)) {
>>>> -  f2fs_msg(sbi->sb, KERN_WARNING, "invalid crc value");
>>>> +  f2fs_msg(sbi->sb, KERN_WARNING,
>>>> +  "invalid crc value at blk_addr: 0x%x", cp_addr);
>>>> +  f2fs_bug_on(sbi, 1);
>>>>return -EINVAL;
>>>>}
>>>>  
>>>> -- 
>>>> 2.10.1
>>>
>>> .
>>>
> 
> .
> 


--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


Re: [f2fs-dev] [PATCH v3] f2fs: report cp block corrupted

2018-02-11 Thread guoweichao
Hi Jaegeuk,

On 2018/2/12 7:32, Jaegeuk Kim wrote:
> On 02/06, Weichao Guo wrote:
>> There is a potential inconsistent metadata case due to a cp block
>> crc invalid in the latest checkpoint caused by hardware issues:
>> 1) write nodes into segment x;
>> 2) write checkpoint A;
>> 3) remove nodes in segment x;
>> 4) write checkpoint B;
>> 5) issue discard or write datas into segment x;
>> 6) sudden power-cut;
>> 7) use checkpoint A after reboot as checkpoint B is invalid
>>
>> This inconsistency may be found after several reboots long time later
>> and the kernel log about cp block crc invalid has disappeared. This
>> makes the root cause of the inconsistency is hard to locate. Let us
>> separate such other part issues from f2fs logical bugs in debug version.
>>
>> Signed-off-by: Weichao Guo 
>> ---
>>  fs/f2fs/checkpoint.c | 8 ++--
>>  1 file changed, 6 insertions(+), 2 deletions(-)
>>
>> diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
>> index 8b0945b..16ba96a 100644
>> --- a/fs/f2fs/checkpoint.c
>> +++ b/fs/f2fs/checkpoint.c
>> @@ -737,13 +737,17 @@ static int get_checkpoint_version(struct f2fs_sb_info 
>> *sbi, block_t cp_addr,
>>  crc_offset = le32_to_cpu((*cp_block)->checksum_offset);
>>  if (crc_offset > (blk_size - sizeof(__le32))) {
>>  f2fs_msg(sbi->sb, KERN_WARNING,
>> -"invalid crc_offset: %zu", crc_offset);
>> +"invalid crc_offset: %zu at blk_addr: 0x%x",
>> +crc_offset, cp_addr);
>> +f2fs_bug_on(sbi, 1);
> 
> I don't think we can use bug_on here, since we're easily getting this when
> power-cut happened in the middle of checkpoint pack writes, which is an 
> expected
> behavior. Hmm, we need to consider another way to detect that.
We only check CP block crc here. The two CP blocks may have different CP 
versions when
power-cut happened, but their crc value should be valid. IMO, this patch will 
trigger a
bug_on only when some external issues cause CP block crc invalid as one 4K page 
is
persisted atomically.

Thanks,
> 
> Thanks,
> 
>>  return -EINVAL;
>>  }
>>  
>>  crc = cur_cp_crc(*cp_block);
>>  if (!f2fs_crc_valid(sbi, crc, *cp_block, crc_offset)) {
>> -f2fs_msg(sbi->sb, KERN_WARNING, "invalid crc value");
>> +f2fs_msg(sbi->sb, KERN_WARNING,
>> +"invalid crc value at blk_addr: 0x%x", cp_addr);
>> +f2fs_bug_on(sbi, 1);
>>  return -EINVAL;
>>  }
>>  
>> -- 
>> 2.10.1
> 
> .
> 


--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


Re: [f2fs-dev] [PATCH v2] f2fs: report cp block corrupted

2018-02-04 Thread guoweichao


On 2018/2/5 15:41, Chao Yu wrote:
> On 2018/2/5 18:18, Weichao Guo wrote:
>> There is a potential inconsistent metadata case due to a cp block
>> crc invalid in the latest checkpoint caused by hardware issues:
>> 1) write nodes into segment x;
>> 2) write checkpoint A;
>> 3) remove nodes in segment x;
>> 4) write checkpoint B;
>> 5) issue discard or write datas into segment x;
>> 6) sudden power-cut;
>> 7) use checkpoint A after reboot as checkpoint B is invalid
>>
>> This inconsistency may be found after several reboots long time later
>> and the kernel log about cp block crc invalid has disappeared. This
>> makes the root cause of the inconsistency is hard to locate. Let us
>> separate such other part issues from f2fs logical bugs in debug version.
> 
> Need a Signed-off here?
My mistake, I've resend another version.

Thanks,
> 
> Anyway,
> 
> Reviewed-by: Chao Yu 
> 
> Thanks,
> 
> 
> .
> 


--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


Re: [f2fs-dev] [PATCH] f2fs: no need to flush NAT bits if no enough space

2018-02-04 Thread guoweichao


On 2018/2/5 15:42, Chao Yu wrote:
> On 2018/2/5 22:30, Weichao Guo wrote:
>> NAT bits are saved at spare space in CP pack. Flushed NAT bits
>> may be overwritten by the CP pack if there is no enough space
>> for NAT bits. And NAT bits will be obsolesced at next mount time
>> if crc|cp_ver not matched. So just skip to flush NAT bits in such
>> needless cases.
>>
>> Signed-off-by: Weichao Guo 
>> ---
>>  fs/f2fs/checkpoint.c | 4 +++-
>>  1 file changed, 3 insertions(+), 1 deletion(-)
>>
>> diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
>> index 0eaafb8..bdc03c9 100644
>> --- a/fs/f2fs/checkpoint.c
>> +++ b/fs/f2fs/checkpoint.c
>> @@ -1248,7 +1248,9 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, 
>> struct cp_control *cpc)
>>  start_blk = __start_cp_next_addr(sbi);
>>  
>>  /* write nat bits */
>> -if (enabled_nat_bits(sbi, cpc)) {
>> +if (enabled_nat_bits(sbi, cpc) &&
>> +le32_to_cpu(ckpt->cp_pack_total_block_count) <=
>> +sbi->blocks_per_seg - NM_I(sbi)->nat_bits_blocks) {
> 
> Have checked this condition in update_ckpt_flags, right?
Oh,yes. It's my mistake.

Thanks,
> 
> thanks,
> 
>>  __u64 cp_ver = cur_cp_version(ckpt);
>>  block_t blk;
>>  
>>
> 
> 
> .
> 


--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


Re: [f2fs-dev] [PATCH] f2fs: no need to flush NAT bits if no enough space

2018-02-04 Thread guoweichao

On 2018/2/5 22:30, Weichao Guo wrote:
> NAT bits are saved at spare space in CP pack. Flushed NAT bits
> may be overwritten by the CP pack if there is no enough space
> for NAT bits. And NAT bits will be obsolesced at next mount time
Sorry, there is a typo here. 'obsolesced' should be obsolete.

Thanks,

> if crc|cp_ver not matched. So just skip to flush NAT bits in such
> needless cases.
> 
> Signed-off-by: Weichao Guo 
> ---
>  fs/f2fs/checkpoint.c | 4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
> 
> diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
> index 0eaafb8..bdc03c9 100644
> --- a/fs/f2fs/checkpoint.c
> +++ b/fs/f2fs/checkpoint.c
> @@ -1248,7 +1248,9 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, 
> struct cp_control *cpc)
>   start_blk = __start_cp_next_addr(sbi);
>  
>   /* write nat bits */
> - if (enabled_nat_bits(sbi, cpc)) {
> + if (enabled_nat_bits(sbi, cpc) &&
> + le32_to_cpu(ckpt->cp_pack_total_block_count) <=
> + sbi->blocks_per_seg - NM_I(sbi)->nat_bits_blocks) {
>   __u64 cp_ver = cur_cp_version(ckpt);
>   block_t blk;
>  
> 


--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


Re: [f2fs-dev] [PATCH] f2fs: report cp block corrupted

2018-02-04 Thread guoweichao

Hi Chao,
On 2018/2/4 23:16, Chao Yu wrote:
> On 2018/2/3 20:12, Weichao Guo wrote:
>> There is a potential inconsistent metadata case due to a cp block
>> crc invalid in the latest checkpoint caused by hardware issues:
>> 1) write nodes into segment x;
>> 2) write checkpoint A;
>> 3) remove nodes in segment x;
>> 4) write checkpoint B;
>> 5) issue discard or write datas into segment x;
>> 6) sudden power-cut;
>> 7) use checkpoint A after reboot as checkpoint B is invalid
>>
>> This inconsistency may be found after several reboots long time later
>> and the kernel log about cp block crc invalid has disappeared. This
>> makes the root cause of the inconsistency is hard to locate. Let us
>> separate such other part issues from f2fs logical bugs in debug version.
>>
>> Signed-off-by: Weichao Guo 
>> ---
>>  fs/f2fs/checkpoint.c | 15 ++-
>>  1 file changed, 10 insertions(+), 5 deletions(-)
>>
>> diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
>> index 512dca8..15baba75 100644
>> --- a/fs/f2fs/checkpoint.c
>> +++ b/fs/f2fs/checkpoint.c
>> @@ -730,6 +730,7 @@ static int get_checkpoint_version(struct f2fs_sb_info 
>> *sbi, block_t cp_addr,
>>  unsigned long blk_size = sbi->blocksize;
>>  size_t crc_offset = 0;
>>  __u32 crc = 0;
>> +int err = 0;
>>
>>  *cp_page = get_meta_page(sbi, cp_addr);
>>  *cp_block = (struct f2fs_checkpoint *)page_address(*cp_page);
>> @@ -737,18 +738,22 @@ static int get_checkpoint_version(struct f2fs_sb_info 
>> *sbi, block_t cp_addr,
>>  crc_offset = le32_to_cpu((*cp_block)->checksum_offset);
>>  if (crc_offset > (blk_size - sizeof(__le32))) {
>>  f2fs_msg(sbi->sb, KERN_WARNING,
>> -"invalid crc_offset: %zu", crc_offset);
>> -return -EINVAL;
>> +"invalid crc_offset: %zu at blk_addr: 0x%x",
>> +crc_offset, cp_addr);
>> +err = -EINVAL;
>> +f2fs_bug_on(sbi, 1);
> 
> return -EINVAL?

At first, I just added f2fs_bug_on before return -EINVAL. But the pclint check 
tools report
unreachable code warnings. It seems that this suggestion should be ignored. I 
will resend a
new version of this patch.

Thanks,
> 
>>  }
>>
>>  crc = cur_cp_crc(*cp_block);
>>  if (!f2fs_crc_valid(sbi, crc, *cp_block, crc_offset)) {
>> -f2fs_msg(sbi->sb, KERN_WARNING, "invalid crc value");
>> -return -EINVAL;
>> +f2fs_msg(sbi->sb, KERN_WARNING,
>> +"invalid crc value at blk_addr: 0x%x", cp_addr);
>> +err = -EINVAL;
>> +f2fs_bug_on(sbi, 1);
> 
> Ditto,
> 
> Thanks,
> 
>>  }
>>
>>  *version = cur_cp_version(*cp_block);
>> -return 0;
>> +return err;
>>  }
>>
>>  static struct page *validate_checkpoint(struct f2fs_sb_info *sbi,
>> --
>> 2.10.1
>>
>>
>> --
>> Check out the vibrant tech community on one of the world's most
>> engaging tech sites, Slashdot.org! http://sdm.link/slashdot
>> ___
>> Linux-f2fs-devel mailing list
>> Linux-f2fs-devel@lists.sourceforge.net
>> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
>>
> 
> .
> 


--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


Re: [f2fs-dev] [PATCH RFC] f2fs: add PRE2 to mark segments free to one checkpoint but obsolete to the other

2018-01-22 Thread guoweichao


On 2018/1/22 14:19, Chao Yu wrote:
> On 2018/1/22 10:40, guoweichao wrote:
>> Hi Chao,
>>
>> On 2018/1/21 10:34, Chao Yu wrote:
>>> Hi Weichao,
>>>
>>> On 2018/1/20 23:50, guoweichao wrote:
>>>> Hi Chao,
>>>>
>>>> Yes, it is exactly what I mean.
>>>> It seems that F2FS has no responsibility to cover hardware problems.
>>>> However, file systems usually choose redundancy for super block fault 
>>>> tolerance.
>>>> So I think we actually have considered some external errors when designing 
>>>> a file system.
>>>> Our dual checkpoint mechanism is mainly designed to keep at least one 
>>>> stable
>>>> CP pack while creating a new one in case of SPO. And it has fault tolerant 
>>>> effects.
>>>> As CP pack is also very critical to F2FS, why not make checkpoint more 
>>>> robustness
>>>
>>> I think you're trying to change basic design of A/B upgrade system to A/B/C 
>>> one,
>>> which can keep always two checkpoint valid. There will be no simple 
>>> modification
>>> to implement that one, in where we should cover not only prefree case but 
>>> also
>>> SSR case.
>>>
>> Nope, I do not intent to add another checkpoint. My main idea is reusing the 
>> invalid blocks
>> after two checkpoints are recorded them as invalid. We could mark or record 
>> the blocks that
>> are free to one checkpoint and valid to the other one as PARTIAL_FREE. And 
>> when set prefree
>> segments as free or use invalid blocks in SSR mode, just igore PARTIAL_FREE 
>> blocks. If there
>> is no other blocks can be used, just write a new checkpoint.
> 
> Actually, your implementation looks like a variant of A/B/C upgrade system
> (or A/B upgrade system), as checkpoint C (or inflight A/B) uses main area
> separated from checkpoint A/B, and its inflight dirty metadata will persist
> into checkpoint A or B since we haven't actually allocated checkpoint C
> area during mkfs.
> 
> Anyway, I don't think we have a strong reason to implement this one.
> 
>>
>>> IMO, the biggest problem there is available space, since in checkpoint C, 
>>> we can
>>> only use invalid blocks in both checkpoint A and B, so in some cases there 
>>> will
>>> almost be no valid space we can use during allocation, result in frequently
>>> checkpoint.
>>>
>>> IMO, what we can do is trying to keep last valid checkpoint being integrity 
>>> as
>>> possible as we can. One way is that we can add mirror or parity for the
>>> checkpoint which can help to do recovery once checkpoint is corrupted. At
>>> least, I hope that with it in debug version we can help hardware staff to 
>>> fix
>>> their issue instead of wasting much time to troubleshoot filesystem issue.
>>>
>> Yes, I agree. Adding a backup for the latest checkpoint in debug version is 
>> OK. Sometimes,
> 
> Parity will be better due to light overhead?
Yes, backup may also cause disk layout changes. How about adding a parity block 
for the blocks
in a checkpoint similar to RAID-5 or other erasure code methods?

Thanks,

> 
> Thanks,
> 
>> hardware issues are hard to uncover. We should try to separate them from 
>> F2FS.
>>
>> Thanks,
>>
>>> Thanks,
>>>
>>>> with simple modification in current design and little overhead except for 
>>>> FG_GC.
>>>> Of cause, keep unchanged is OK. I just want to discuss this proposal. :)
>>>>
>>>> Thanks,
>>>> *From:*Chao Yu
>>>> *To:*guoweichao,jaeg...@kernel.org,
>>>> *Cc:*linux-f2fs-devel@lists.sourceforge.net,linux-fsde...@vger.kernel.org,heyunlei,
>>>> *Date:*2018-01-20 15:43:23
>>>> *Subject:*Re: [PATCH RFC] f2fs: add PRE2 to mark segments free to one 
>>>> checkpoint but obsolete to the other
>>>>
>>>> Hi Weichao,
>>>>
>>>> On 2018/1/20 7:29, Weichao Guo wrote:
>>>>> Currently, we set prefree segments as free ones after writing
>>>>> a checkpoint, then believe the segments could be used safely.
>>>>> However, if a sudden power-off coming and the newer checkpoint
>>>>> corrupted due to hardware issues at the same time, we will try
>>>>> to use the old checkpoint and may face an inconsistent file
>>>>> system status.
>>>>
>>>> IIUC, you mean:
>>>>
>>>> 1. write nodes into seg

Re: [f2fs-dev] [PATCH RFC] f2fs: add PRE2 to mark segments free to one checkpoint but obsolete to the other

2018-01-21 Thread guoweichao
Hi Xiang,

On 2018/1/21 11:27, Gao Xiang wrote:
> Hi Weichao,
> 
> 
> On 2018/1/21 0:02, guoweichao wrote:
>> Hi Xiang,
>>
>> it's not related to SPOR. Just consider the case given by Chao Yu.
>>
> 
> (It seems this email was not sent successfully, I resend it just for 
> reference only)
> Oh I see, I have considered the scenario what Chao said before.
> 
> 1. write data into segment x;
> 2. write checkpoint A;
> 3. remove data in segment x;
> 4. write checkpoint B;
> 5. issue discard or write data into segment x;
> 6. sudden power-cut
> 
> Since f2fs is designed for double backup, 5) and 6), I think, actually 
> belongs to checkpoint C.
> and when we are in checkpoint C, checkpoint A becomes unsafe because the 
> latest checkpoint is B.
> and I think in that case we cannot prevent data writeback or something to 
> pollute checkpoint A.
We do not have to prevent data writeback, we could just delay to reuse the 
invalid blocks which are
valid to the older checkpoint.
> 
> However, node segments (another metadata) would be special,
> but I have no idea whether introducing PRE2 would make all cases safe or not.
By adding a PRE2 status, we could set prefree node segments in two steps: 
PRE->PRE2->FREE, and therefore
make sure using free segments are safe to both chepoints. I forgot to consider 
SSR at first, but the main
idea is similar: avoiding to use invalid blocks that are not invalid to the 
older checkpoint, writing a
new checkpoint if there is no other invalid blocks.
> 
> In addition, if some data segments change between checkpoint A and C,
> some weird data that it didn't have (new data or data from other files) would 
> be gotten when switching back to checkpoint A.
Data blocks damage do not affect file system consistency.

Anyway, this is a hardware related problem. We could just keep unchanged and 
try to exposure hardware issues.

Thanks,
> 
> 
> Thanks,
> 
>> Thanks,
>> *From:*Gao Xiang
>> *To:*guoweichao,
>> *Cc:*linux-f2fs-devel@lists.sourceforge.net,heyunlei,
>> *Date:*2018-01-20 11:49:22
>> *Subject:*Re: [PATCH RFC] f2fs: add PRE2 to mark segments free to one 
>> checkpoint but obsolete to the other
>>
>> Hi Weichao,
>>
>>
>> On 2018/1/19 23:47, guoweichao wrote:
>> > A critical case is using the free segments as data segments which
>> > are previously node segments to the old checkpoint. With fault injecting
>> > of the newer CP pack, fsck can find errors when checking the sanity of nid.
>> Sorry to interrupt because I'm just curious about this scenario and the
>> detail.
>>
>> As far as I know, if the whole blocks in a segment become invalid,
>> the segment will become PREFREE, and then if a checkpoint is followed,
>> we can reuse this segment or
>> discard the whole segment safely after this checkpoint was done
>> (I think It makes sure that this segment is certainly FREE and not
>> reused in this checkpoint).
>>
>> If the segment in the old checkpoint is a node segment, and node blocks
>> in the segment are all invalid until the new checkpoint.
>> It seems no danger to reuse the FREE node segment as a data segment in
>> the next checkpoint?
>>
>> or something related to SPOR? In my mind f2fs-tools ignores POR node
>> chain...
>>
>> Thanks,
>> > On 2018/1/20 7:29, Weichao Guo wrote:
>> >> Currently, we set prefree segments as free ones after writing
>> >> a checkpoint, then believe the segments could be used safely.
>> >> However, if a sudden power-off coming and the newer checkpoint
>> >> corrupted due to hardware issues at the same time, we will try
>> >> to use the old checkpoint and may face an inconsistent file
>> >> system status.
>> >>
>> >> How about add an PRE2 status for prefree segments, and make
>> >> sure the segments could be used safely to both checkpoints?
>> >> Or any better solutions? Or this is not a problem?
>> >>
>> >> Look forward to your comments!
>> >>
>> >> Signed-off-by: Weichao Guo <guoweic...@huawei.com>
>> >> ---
>> >>   fs/f2fs/gc.c  | 11 +--
>> >>   fs/f2fs/segment.c | 21 ++---
>> >>   fs/f2fs/segment.h |  6 ++
>> >>   3 files changed, 33 insertions(+), 5 deletions(-)
>> >>
>> >> diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
>> >> index 33e7969..153e3ea 100644
>> >> --- a/fs/f2fs/gc.c
>> >> +++ b/fs/f2fs/gc.c
>> >> @@ -1030,7 +1030,12 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool syn

Re: [f2fs-dev] [PATCH RFC] f2fs: add PRE2 to mark segments free to one checkpoint but obsolete to the other

2018-01-21 Thread guoweichao
Hi Chao,

On 2018/1/21 10:34, Chao Yu wrote:
> Hi Weichao,
> 
> On 2018/1/20 23:50, guoweichao wrote:
>> Hi Chao,
>>
>> Yes, it is exactly what I mean.
>> It seems that F2FS has no responsibility to cover hardware problems.
>> However, file systems usually choose redundancy for super block fault 
>> tolerance.
>> So I think we actually have considered some external errors when designing a 
>> file system.
>> Our dual checkpoint mechanism is mainly designed to keep at least one stable
>> CP pack while creating a new one in case of SPO. And it has fault tolerant 
>> effects.
>> As CP pack is also very critical to F2FS, why not make checkpoint more 
>> robustness
> 
> I think you're trying to change basic design of A/B upgrade system to A/B/C 
> one,
> which can keep always two checkpoint valid. There will be no simple 
> modification
> to implement that one, in where we should cover not only prefree case but also
> SSR case.
> 
Nope, I do not intent to add another checkpoint. My main idea is reusing the 
invalid blocks
after two checkpoints are recorded them as invalid. We could mark or record the 
blocks that
are free to one checkpoint and valid to the other one as PARTIAL_FREE. And when 
set prefree
segments as free or use invalid blocks in SSR mode, just igore PARTIAL_FREE 
blocks. If there
is no other blocks can be used, just write a new checkpoint.

> IMO, the biggest problem there is available space, since in checkpoint C, we 
> can
> only use invalid blocks in both checkpoint A and B, so in some cases there 
> will
> almost be no valid space we can use during allocation, result in frequently
> checkpoint.
> 
> IMO, what we can do is trying to keep last valid checkpoint being integrity as
> possible as we can. One way is that we can add mirror or parity for the
> checkpoint which can help to do recovery once checkpoint is corrupted. At
> least, I hope that with it in debug version we can help hardware staff to fix
> their issue instead of wasting much time to troubleshoot filesystem issue.
> 
Yes, I agree. Adding a backup for the latest checkpoint in debug version is OK. 
Sometimes,
hardware issues are hard to uncover. We should try to separate them from F2FS.

Thanks,

> Thanks,
> 
>> with simple modification in current design and little overhead except for 
>> FG_GC.
>> Of cause, keep unchanged is OK. I just want to discuss this proposal. :)
>>
>> Thanks,
>> *From:*Chao Yu
>> *To:*guoweichao,jaeg...@kernel.org,
>> *Cc:*linux-f2fs-devel@lists.sourceforge.net,linux-fsde...@vger.kernel.org,heyunlei,
>> *Date:*2018-01-20 15:43:23
>> *Subject:*Re: [PATCH RFC] f2fs: add PRE2 to mark segments free to one 
>> checkpoint but obsolete to the other
>>
>> Hi Weichao,
>>
>> On 2018/1/20 7:29, Weichao Guo wrote:
>>> Currently, we set prefree segments as free ones after writing
>>> a checkpoint, then believe the segments could be used safely.
>>> However, if a sudden power-off coming and the newer checkpoint
>>> corrupted due to hardware issues at the same time, we will try
>>> to use the old checkpoint and may face an inconsistent file
>>> system status.
>>
>> IIUC, you mean:
>>
>> 1. write nodes into segment x;
>> 2. write checkpoint A;
>> 3. remove nodes in segment x;
>> 4. write checkpoint B;
>> 5. issue discard or write datas into segment x;
>> 6. sudden power-cut
>>
>> But after reboot, we found checkpoint B is corrupted due to hardware, and
>> then start to use checkpoint A, but nodes in segment x recorded as valid
>> data in checkpoint A has been overcovered in step 5), so we will encounter
>> inconsistent meta data, right?
>>
>> Thanks,
>>
>>>
>>> How about add an PRE2 status for prefree segments, and make
>>> sure the segments could be used safely to both checkpoints?
>>> Or any better solutions? Or this is not a problem?
>>>
>>> Look forward to your comments!
>>>
>>> Signed-off-by: Weichao Guo <guoweic...@huawei.com>
>>> ---
>>>  fs/f2fs/gc.c  | 11 +--
>>>  fs/f2fs/segment.c | 21 ++---
>>>  fs/f2fs/segment.h |  6 ++
>>>  3 files changed, 33 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
>>> index 33e7969..153e3ea 100644
>>> --- a/fs/f2fs/gc.c
>>> +++ b/fs/f2fs/gc.c
>>> @@ -1030,7 +1030,12 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
>>>* threshold, we can make them free by checkpoint. Then, we
>>>* secure free segments w

Re: [f2fs-dev] [PATCH RFC] f2fs: add PRE2 to mark segments free to one checkpoint but obsolete to the other

2018-01-19 Thread guoweichao

A critical case is using the free segments as data segments which
are previously node segments to the old checkpoint. With fault injecting
of the newer CP pack, fsck can find errors when checking the sanity of nid.

On 2018/1/20 7:29, Weichao Guo wrote:
> Currently, we set prefree segments as free ones after writing
> a checkpoint, then believe the segments could be used safely.
> However, if a sudden power-off coming and the newer checkpoint
> corrupted due to hardware issues at the same time, we will try
> to use the old checkpoint and may face an inconsistent file
> system status.
> 
> How about add an PRE2 status for prefree segments, and make
> sure the segments could be used safely to both checkpoints?
> Or any better solutions? Or this is not a problem?
> 
> Look forward to your comments!
> 
> Signed-off-by: Weichao Guo 
> ---
>  fs/f2fs/gc.c  | 11 +--
>  fs/f2fs/segment.c | 21 ++---
>  fs/f2fs/segment.h |  6 ++
>  3 files changed, 33 insertions(+), 5 deletions(-)
> 
> diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
> index 33e7969..153e3ea 100644
> --- a/fs/f2fs/gc.c
> +++ b/fs/f2fs/gc.c
> @@ -1030,7 +1030,12 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
>* threshold, we can make them free by checkpoint. Then, we
>* secure free segments which doesn't need fggc any more.
>*/
> - if (prefree_segments(sbi)) {
> + if (prefree_segments(sbi) || prefree2_segments(sbi)) {
> + ret = write_checkpoint(sbi, );
> + if (ret)
> + goto stop;
> + }
> + if (has_not_enough_free_secs(sbi, 0, 0) && 
> prefree2_segments(sbi)) {
>   ret = write_checkpoint(sbi, );
>   if (ret)
>   goto stop;
> @@ -1063,8 +1068,10 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
>   goto gc_more;
>   }
>  
> - if (gc_type == FG_GC)
> + if (gc_type == FG_GC) {
> + ret = write_checkpoint(sbi, );
>   ret = write_checkpoint(sbi, );
> + }
>   }
>  stop:
>   SIT_I(sbi)->last_victim[ALLOC_NEXT] = 0;
> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> index 2e8e054d..9dec445 100644
> --- a/fs/f2fs/segment.c
> +++ b/fs/f2fs/segment.c
> @@ -1606,7 +1606,7 @@ static void set_prefree_as_free_segments(struct 
> f2fs_sb_info *sbi)
>   unsigned int segno;
>  
>   mutex_lock(_i->seglist_lock);
> - for_each_set_bit(segno, dirty_i->dirty_segmap[PRE], MAIN_SEGS(sbi))
> + for_each_set_bit(segno, dirty_i->dirty_segmap[PRE2], MAIN_SEGS(sbi))
>   __set_test_and_free(sbi, segno);
>   mutex_unlock(_i->seglist_lock);
>  }
> @@ -1617,13 +1617,17 @@ void clear_prefree_segments(struct f2fs_sb_info *sbi, 
> struct cp_control *cpc)
>   struct list_head *head = >entry_list;
>   struct discard_entry *entry, *this;
>   struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
> - unsigned long *prefree_map = dirty_i->dirty_segmap[PRE];
> + unsigned long *prefree_map;
>   unsigned int start = 0, end = -1;
>   unsigned int secno, start_segno;
>   bool force = (cpc->reason & CP_DISCARD);
> + int phase = 0;
> + enum dirty_type dirty_type = PRE2;
>  
>   mutex_lock(_i->seglist_lock);
>  
> +next_step:
> + prefree_map = dirty_i->dirty_segmap[dirty_type];
>   while (1) {
>   int i;
>   start = find_next_bit(prefree_map, MAIN_SEGS(sbi), end + 1);
> @@ -1635,7 +1639,7 @@ void clear_prefree_segments(struct f2fs_sb_info *sbi, 
> struct cp_control *cpc)
>   for (i = start; i < end; i++)
>   clear_bit(i, prefree_map);
>  
> - dirty_i->nr_dirty[PRE] -= end - start;
> + dirty_i->nr_dirty[dirty_type] -= end - start;
>  
>   if (!test_opt(sbi, DISCARD))
>   continue;
> @@ -1663,6 +1667,16 @@ void clear_prefree_segments(struct f2fs_sb_info *sbi, 
> struct cp_control *cpc)
>   else
>   end = start - 1;
>   }
> + if (phase == 0) {
> + /* status change: PRE -> PRE2 */
> + for_each_set_bit(segno, dirty_i->dirty_segmap[PRE], 
> MAIN_SEGS(sbi))
> + if (!test_and_set_bit(segno, prefree_map))
> + dirty_i->nr_dirty[PRE2]++;
> +
> + phase = 1;
> + dirty_type = PRE;
> + goto next_step;
> + }
>   mutex_unlock(_i->seglist_lock);
>  
>   /* send small discards */
> @@ -2245,6 +2259,7 @@ static void change_curseg(struct f2fs_sb_info *sbi, int 
> type)
>  
>   mutex_lock(_i->seglist_lock);
>   __remove_dirty_segment(sbi, new_segno, PRE);
> + __remove_dirty_segment(sbi, new_segno, PRE2);
>   __remove_dirty_segment(sbi, new_segno, DIRTY);
>   

Re: [f2fs-dev] [PATCH] f2fs: no need to punch hole far beyond the file size

2017-10-13 Thread guoweichao


On 2017/10/13 20:43, Chao Yu wrote:
> 
> On 2017/10/14 0:59, Weichao Guo wrote:
>> Punching hole with a length which far larger than file size will cause 
>> a long time looping in truncate_hole after lock_op, it can be 
>> reproduced as the following:
>>
>> $ echo "abc" > file
>> $ xfs_io -c "fpunch 0 HUGE_LENGTH" file
>>
>> This may cause other I/Os blocked a long time. Let's fix this case by 
>> setting the hole to end after the page that contains i_size.
> You know, we can reserve blocks across EOF through fallocate(fd, 
> FALLOC_FL_KEEP_SIZE, ofs, len), so this patch will cause f2fs losing ability 
> of punching preallocated blocks across EOF.
> 
Oh, I see. Let me prepare another patch.
> How about below change:
> 
> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index cfee75bf88d9..91fd2ef7221b 
> 100644
> --- a/fs/f2fs/file.c
> +++ b/fs/f2fs/file.c
> @@ -847,7 +847,7 @@ int truncate_hole(struct inode *inode, pgoff_t pg_start, 
> pgoff_t pg_end)
>   err = get_dnode_of_data(, pg_start, LOOKUP_NODE);
>   if (err) {
>   if (err == -ENOENT) {
> - pg_start++;
> + pg_start = get_next_page_offset(, pg_start);
>   continue;
>   }
>   return err;
> 
> Thanks,
> 
>>
>> Signed-off-by: Weichao Guo 
>> ---
>>  fs/f2fs/file.c | 8 
>>  1 file changed, 8 insertions(+)
>>
>> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 517e112..1b44d26 
>> 100644
>> --- a/fs/f2fs/file.c
>> +++ b/fs/f2fs/file.c
>> @@ -873,6 +873,14 @@ static int punch_hole(struct inode *inode, loff_t 
>> offset, loff_t len)
>>  if (ret)
>>  return ret;
>>  
>> +/*
>> + * If the hole extends beyond i_size, set the hole
>> + * to end after the page that contains i_size
>> + */
>> +if (offset + len > inode->i_size)
>> +len = inode->i_size + PAGE_CACHE_SIZE -
>> +(inode->i_size & (PAGE_CACHE_SIZE - 1)) - offset;
>> +
>>  pg_start = ((unsigned long long) offset) >> PAGE_SHIFT;
>>  pg_end = ((unsigned long long) offset + len) >> PAGE_SHIFT;
>>  
>>


--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


[f2fs-dev] [PATCH v2] f2fs: convert inline data for direct I/O & FI_NO_PREALLOC

2017-09-29 Thread guoweichao
From: Weichao Guo 

In FI_NO_PREALLOC cases, direct I/O path may allocate blocks for an
inode but keep its inline data flag. This inconsistency may trigger
vfs clear_inode nrpages bug_on when evicting the inode. We should
convert inline data first in this case.

Signed-off-by: Weichao Guo 
---
 fs/f2fs/data.c | 13 -
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 36b5352..3793ba4 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -833,6 +833,13 @@ int f2fs_preallocate_blocks(struct kiocb *iocb, struct 
iov_iter *from)
struct f2fs_map_blocks map;
int err = 0;
 
+   /* convert inline data for Direct I/O*/
+   if (iocb->ki_flags & IOCB_DIRECT) {
+   err = f2fs_convert_inline_inode(inode);
+   if (err)
+   return err;
+   }
+
if (is_inode_flag_set(inode, FI_NO_PREALLOC))
return 0;
 
@@ -845,15 +852,11 @@ int f2fs_preallocate_blocks(struct kiocb *iocb, struct 
iov_iter *from)
 
map.m_next_pgofs = NULL;
 
-   if (iocb->ki_flags & IOCB_DIRECT) {
-   err = f2fs_convert_inline_inode(inode);
-   if (err)
-   return err;
+   if (iocb->ki_flags & IOCB_DIRECT)
return f2fs_map_blocks(inode, , 1,
__force_buffered_io(inode, WRITE) ?
F2FS_GET_BLOCK_PRE_AIO :
F2FS_GET_BLOCK_PRE_DIO);
-   }
if (iocb->ki_pos + iov_iter_count(from) > MAX_INLINE_DATA(inode)) {
err = f2fs_convert_inline_inode(inode);
if (err)
-- 
2.10.1


--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


Re: [f2fs-dev] [PATCH] f2fs: convert inline data for direct I/O & FI_NO_PREALLOC

2017-09-29 Thread guoweichao
Sorry, made some typos in this patch. Ignore it.

On 2017/9/29 22:25, guoweic...@huawei.com wrote:
> From: Weichao Guo 
> 
> In FI_NO_PREALLOC cases, direct I/O path may allocate blocks for an
> inode but keep its inline data flag. This inconsistency may trigger
> vfs clear_inode nrpages bug_on when evicting the inode. We should
> convert inline data first in this case.
> 
> Signed-off-by: Weichao Guo 
> ---
>  fs/f2fs/data.c | 13 -
>  1 file changed, 8 insertions(+), 5 deletions(-)
> 
> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> index 36b5352..64c9cfc 100644
> --- a/fs/f2fs/data.c
> +++ b/fs/f2fs/data.c
> @@ -833,6 +833,13 @@ int f2fs_preallocate_blocks(struct kiocb *iocb, struct 
> iov_iter *from)
>   struct f2fs_map_blocks map;
>   int err = 0;
>  
> + /* convert inline data for Direct I/O*/
> + if (iocb->ki_flags & IOCB_DIRECT) {
> + ret = f2fs_convert_inline_inode(inode);
> + if (ret)
> + return ret;
> + }
> +
>   if (is_inode_flag_set(inode, FI_NO_PREALLOC))
>   return 0;
>  
> @@ -845,15 +852,11 @@ int f2fs_preallocate_blocks(struct kiocb *iocb, struct 
> iov_iter *from)
>  
>   map.m_next_pgofs = NULL;
>  
> - if (iocb->ki_flags & IOCB_DIRECT) {
> - err = f2fs_convert_inline_inode(inode);
> - if (err)
> - return err;
> + if (iocb->ki_flags & IOCB_DIRECT)
>   return f2fs_map_blocks(inode, , 1,
>   __force_buffered_io(inode, WRITE) ?
>   F2FS_GET_BLOCK_PRE_AIO :
>   F2FS_GET_BLOCK_PRE_DIO);
> - }
>   if (iocb->ki_pos + iov_iter_count(from) > MAX_INLINE_DATA(inode)) {
>   err = f2fs_convert_inline_inode(inode);
>   if (err)
> 


--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


[f2fs-dev] [PATCH] f2fs: convert inline data for direct I/O & FI_NO_PREALLOC

2017-09-29 Thread guoweichao
From: Weichao Guo 

In FI_NO_PREALLOC cases, direct I/O path may allocate blocks for an
inode but keep its inline data flag. This inconsistency may trigger
vfs clear_inode nrpages bug_on when evicting the inode. We should
convert inline data first in this case.

Signed-off-by: Weichao Guo 
---
 fs/f2fs/data.c | 13 -
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 36b5352..64c9cfc 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -833,6 +833,13 @@ int f2fs_preallocate_blocks(struct kiocb *iocb, struct 
iov_iter *from)
struct f2fs_map_blocks map;
int err = 0;
 
+   /* convert inline data for Direct I/O*/
+   if (iocb->ki_flags & IOCB_DIRECT) {
+   ret = f2fs_convert_inline_inode(inode);
+   if (ret)
+   return ret;
+   }
+
if (is_inode_flag_set(inode, FI_NO_PREALLOC))
return 0;
 
@@ -845,15 +852,11 @@ int f2fs_preallocate_blocks(struct kiocb *iocb, struct 
iov_iter *from)
 
map.m_next_pgofs = NULL;
 
-   if (iocb->ki_flags & IOCB_DIRECT) {
-   err = f2fs_convert_inline_inode(inode);
-   if (err)
-   return err;
+   if (iocb->ki_flags & IOCB_DIRECT)
return f2fs_map_blocks(inode, , 1,
__force_buffered_io(inode, WRITE) ?
F2FS_GET_BLOCK_PRE_AIO :
F2FS_GET_BLOCK_PRE_DIO);
-   }
if (iocb->ki_pos + iov_iter_count(from) > MAX_INLINE_DATA(inode)) {
err = f2fs_convert_inline_inode(inode);
if (err)
-- 
2.10.1


--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


Re: [f2fs-dev] [PATCH] f2fs: direct I/O -> buffered I/O for FI_NO_PREALLOC & FI_INLINE_DATA

2017-09-28 Thread guoweichao
Sure, I will prepare another patch to fix this case.

On 2017/9/28 22:23, Chao Yu wrote:
> On 2017/9/29 5:47, guoweic...@huawei.com wrote:
>> From: Weichao Guo 
>>
>> In FI_NO_PREALLOC cases, direct I/O path may allocate blocks for an inode but
>> keep its inline data flag. This inconsistency may trigger vfs clear_inode
>> nrpages bug_on when evicting the inode. Let buffered I/O handle this case.
> 
> Good catch!
> 
> Could we just convert inline data by force in f2fs_preallocate_blocks for this
> case? it can avoid write fallback from direct IO to buffered IO.
> 
> Thanks,
> 
> 
>>
>> Signed-off-by: Weichao Guo 
>> ---
>>  fs/f2fs/data.c | 2 +-
>>  1 file changed, 1 insertion(+), 1 deletion(-)
>>
>> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
>> index 36b5352..039f6d8 100644
>> --- a/fs/f2fs/data.c
>> +++ b/fs/f2fs/data.c
>> @@ -822,7 +822,7 @@ static int __allocate_data_block(struct dnode_of_data 
>> *dn)
>>  
>>  static inline bool __force_buffered_io(struct inode *inode, int rw)
>>  {
>> -return (f2fs_encrypted_file(inode) ||
>> +return (f2fs_encrypted_file(inode) || f2fs_has_inline_data(inode) ||
>>  (rw == WRITE && test_opt(F2FS_I_SB(inode), LFS)) ||
>>  F2FS_I_SB(inode)->s_ndevs);
>>  }
>>
> 
> .
> 


--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


[f2fs-dev] 答复: [PATCH] f2fs: make sure f2fs_gc returns consistent errno

2017-05-10 Thread guoweichao
Agree, I will send a new verison of this patch.

-邮件原件-
发件人: Chao Yu [mailto:c...@kernel.org] 
发送时间: 2017年5月9日 23:26
收件人: guoweichao; jaeg...@kernel.org
抄送: linux-f2fs-devel@lists.sourceforge.net
主题: Re: [f2fs-dev] [PATCH] f2fs: make sure f2fs_gc returns consistent errno

On 2017/5/9 19:37, Weichao Guo wrote:
> By default, f2fs_gc returns -EINVAL in general error cases, e.g., no 
> victim was selected. However, the default errno may be overwritten in two 
> cases:
> gc_more and BG_GC -> FG_GC. We should return consistent errno in such cases.

How about clean up this patch as below, which always set return value before 
jumping to 'stop' tag.

diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index ac2f74e40eea..6906a284c98d 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -957,7 +957,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,  {
int gc_type = sync ? FG_GC : BG_GC;
int sec_freed = 0;
-   int ret = -EINVAL;
+   int ret;
struct cp_control cpc;
unsigned int init_segno = segno;
struct gc_inode_list gc_list = {
@@ -967,8 +967,10 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,

cpc.reason = __get_cp_reason(sbi);
 gc_more:
-   if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE)))
+   if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE))) {
+   ret = -EINVAL;
goto stop;
+   }
if (unlikely(f2fs_cp_error(sbi))) {
ret = -EIO;
goto stop;
@@ -989,6 +991,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
gc_type = FG_GC;
}

+   ret = -EINVAL;
/* f2fs_balance_fs doesn't need to do BG_GC in critical path. */
if (gc_type == BG_GC && !background)
goto stop;

Thanks,

> 
> Signed-off-by: Weichao Guo <guoweic...@huawei.com>
> ---
>  fs/f2fs/gc.c | 3 +++
>  1 file changed, 3 insertions(+)
> 
> diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 0265221..32c9463 100644
> --- a/fs/f2fs/gc.c
> +++ b/fs/f2fs/gc.c
> @@ -965,6 +965,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
>  
>   cpc.reason = __get_cp_reason(sbi);
>  gc_more:
> + ret = -EINVAL;
>   if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE)))
>   goto stop;
>   if (unlikely(f2fs_cp_error(sbi))) {
> @@ -982,6 +983,8 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
>   ret = write_checkpoint(sbi, );
>   if (ret)
>   goto stop;
> + else
> + ret = -EINVAL;
>   }
>   if (has_not_enough_free_secs(sbi, 0, 0))
>   gc_type = FG_GC;
> 
--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


Re: [f2fs-dev] [PATCH] f2fs: skip checkpoint if having a dirty segment but no prefree at BG_GC -> FG_GC

2017-02-25 Thread guoweichao
Hi Jaegeuk,

I regard no enough free sections as a precondition when talking about
BG_GC -> FG_GC. I mean that for both case a) and b) I mentioned has no enough
free sections implicitly. 

On 2017/2/25 2:49, Jaegeuk Kim wrote:
> Hi Weichao,
> 
> On 02/25, Weichao Guo wrote:
>> When turning to FG_GC from BG_GC, we need to write checkpoint in 2 cases:
>> * a) BG_GC have made some progress, e.g.: some prefree segments.
>> * b) There is no victim and no prefree segment.
> 
> You missed
>   * c) has_not_enough_free_secs() introduced by
>   6e17bfbc75a5cb ("f2fs: fix to overcome inline_data floods")
As we have enabled SSR for warm node(5b6c6be2d8 ("f2fs: use SSR for warm node 
as well")),
I think inline data floods should not be a problem in most cases.
> 
> And, Yunlong pointed that we can't find a case to avoid write_checkpoint()
> mostly due to c) condition.
As inline data floods is an extreme case, and there is little possibility 
caused panic
for inline data floods now, there should be lots of chance to skip checkpoint. 
I mean
that we can make some accurate inline data floods checking before writing 
checkpoint.
> 
> Thanks,
> 
>>
>> For case a), previously, we also check if there is a dirty segment for
>> infering blocks moving in last BG_GC. But dirty segments do not always
>> indicate that, BG_GC may just start and do not move any blocks at all.
>> Futhermore, skipping checkpoint if there are some dirty segments but no
>> prefree segments is OK.
> 
> 
>>
>> Signed-off-by: Weichao Guo 
>> ---
>>  fs/f2fs/gc.c | 7 ++-
>>  1 file changed, 6 insertions(+), 1 deletion(-)
>>
>> diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
>> index 6c996e3..30d206a 100644
>> --- a/fs/f2fs/gc.c
>> +++ b/fs/f2fs/gc.c
>> @@ -958,7 +958,12 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool 
>> background)
>>   * enough free sections, we should flush dent/node blocks and do
>>   * garbage collections.
>>   */
>> -ret = write_checkpoint(sbi, );
>> +if (prefree_segments(sbi))
>> +ret = write_checkpoint(sbi, );
>> +else if (!__get_victim(sbi, , gc_type) {
>> +segno = NULL_SEGNO;
>> +ret = write_checkpoint(sbi, );
>> +}
>>  if (ret)
>>  goto stop;
>>  } else if (gc_type == BG_GC && !background) {
>> -- 
>> 2.10.1
> 
> .
> 

Thanks,
Weichao


--
Check out the vibrant tech community on one of the world's most
engaging tech sites, SlashDot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


Re: [f2fs-dev] [PATCH v2] F2FS customized migrate_page callback

2016-09-19 Thread guoweichao


On 2016/9/19 22:48, Chao Yu wrote:
> On 2016/9/20 5:03, Weichao Guo wrote:
>> This patch improves the migration of dirty pages and allows migrating atomic
>> written pages that F2FS uses in Page Cache. Instead of the fallback releasing
>> page path, it provides better performance for memory compaction, CMA and 
>> other
>> users of memory page migrating. For dirty pages, there is no need to write 
>> back
>> first when migrating. For an atomic written page before committing, we can
>> migrate the page and update the related 'inmem_pages' list at the same time.
>>
>> Signed-off-by: Weichao Guo 
>> ---
>>  fs/f2fs/checkpoint.c |  3 +++
>>  fs/f2fs/data.c   | 56 
>> 
>>  fs/f2fs/f2fs.h   |  4 
>>  fs/f2fs/node.c   |  3 +++
>>  4 files changed, 66 insertions(+)
>>
>> diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
>> index df56a43..a366521 100644
>> --- a/fs/f2fs/checkpoint.c
>> +++ b/fs/f2fs/checkpoint.c
>> @@ -388,6 +388,9 @@ const struct address_space_operations f2fs_meta_aops = {
>>  .set_page_dirty = f2fs_set_meta_page_dirty,
>>  .invalidatepage = f2fs_invalidate_page,
>>  .releasepage= f2fs_release_page,
>> +#ifdef CONFIG_MIGRATION
>> +.migratepage= f2fs_migrate_page,
>> +#endif
>>  };
>>  
>>  static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
>> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
>> index 528c3c0..eb0d63f 100644
>> --- a/fs/f2fs/data.c
>> +++ b/fs/f2fs/data.c
>> @@ -1882,6 +1882,59 @@ static sector_t f2fs_bmap(struct address_space 
>> *mapping, sector_t block)
>>  return generic_block_bmap(mapping, block, get_data_block_bmap);
>>  }
>>  
>> +#ifdef CONFIG_MIGRATION
>> +
>> +#include 
>> +
>> +int f2fs_migrate_page(struct address_space *mapping,
>> +struct page *newpage, struct page *page, enum migrate_mode mode)
>> +{
>> +int rc, extra_count;
>> +struct f2fs_inode_info *fi = F2FS_I(mapping->host);
>> +bool atomic_written = IS_ATOMIC_WRITTEN_PAGE(page);
>> +
>> +BUG_ON(PageWriteback(page));
>> +
>> +/* migrating an atomic written page is safe with the inmem_lock hold */
>> +if (atomic_written && !mutex_trylock(>inmem_lock))
>> +return -EAGAIN;
>> +
>> +/*
>> + * A reference is expected if PagePrivate set when move mapping,
>> + * however F2FS breaks this for maintaining dirty page counts when
>> + * truncating pages. So here adjusting the 'extra_count' make it work.
>> + */
>> +extra_count = (atomic_written ? 1 : 0) - page_has_private(page);
>> +rc = migrate_page_move_mapping(mapping, newpage,
>> +page, NULL, mode, extra_count);
>> +if (rc != MIGRATEPAGE_SUCCESS) {
>> +if (atomic_written)
>> +mutex_unlock(>inmem_lock);
>> +return rc;
>> +}
>> +
>> +if (atomic_written) {
>> +struct inmem_pages *cur;
>> +list_for_each_entry(cur, >inmem_pages, list)
>> +if (cur->page == page) {
>> +cur->page = newpage;
>> +break;
>> +}
>> +mutex_unlock(>inmem_lock);
>> +put_page(page);
>> +get_page(newpage);
> 
> Should we also do put_page(oldpage) & get_page(newpage) for 
> non-atomic-written page?
> 
No, the 'page_count' of the old/new page for Page Cache has been maintained in 
'migrate_page_move_mapping'. As atomic-written pages have an additional 
reference, we should update their 'page_count' here.
> Thanks,
> 
>> +}
>> +
>> +if (PagePrivate(page))
>> +SetPagePrivate(newpage);
>> +set_page_private(newpage, page_private(page));
>> +
>> +migrate_page_copy(newpage, page);
>> +
>> +return MIGRATEPAGE_SUCCESS;
>> +}
>> +#endif
>> +
>>  const struct address_space_operations f2fs_dblock_aops = {
>>  .readpage   = f2fs_read_data_page,
>>  .readpages  = f2fs_read_data_pages,
>> @@ -1894,4 +1947,7 @@ const struct address_space_operations f2fs_dblock_aops 
>> = {
>>  .releasepage= f2fs_release_page,
>>  .direct_IO  = f2fs_direct_IO,
>>  .bmap   = f2fs_bmap,
>> +#ifdef CONFIG_MIGRATION
>> +.migratepage= f2fs_migrate_page,
>> +#endif
>>  };
>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
>> index 5d2aa6a..b951482 100644
>> --- a/fs/f2fs/f2fs.h
>> +++ b/fs/f2fs/f2fs.h
>> @@ -2126,6 +2126,10 @@ int f2fs_fiemap(struct inode *inode, struct 
>> fiemap_extent_info *, u64, u64);
>>  void f2fs_set_page_dirty_nobuffers(struct page *);
>>  void f2fs_invalidate_page(struct page *, unsigned int, unsigned int);
>>  int f2fs_release_page(struct page *, gfp_t);
>> +#ifdef CONFIG_MIGRATION
>> +int f2fs_migrate_page(struct address_space *mapping,
>> +struct page *newpage, struct page *page, enum migrate_mode mode);
>> +#endif
>>  
>>  /*
>>   * gc.c
>> diff --git a/fs/f2fs/node.c