[f2fs-dev] [PATCH RFC v5] f2fs: flush cp pack except cp pack 2 page at first

2018-02-09 Thread Gao Xiang via Linux-f2fs-devel
Previously, we attempt to flush the whole cp pack in a single bio,
however, when suddenly powering off at this time, we could get into
an extreme scenario that cp pack 1 page and cp pack 2 page are updated
and latest, but payload or current summaries are still partially
outdated. (see reliable write in the UFS specification)

This patch submits the whole cp pack except cp pack 2 page at first,
and then writes the cp pack 2 page with an extra independent
bio with pre-io barrier.

Signed-off-by: Gao Xiang 
Reviewed-by: Chao Yu 
---
Change log from v4:
  - remove redundant "filemap_fdatawait_range"
  - filemap_fdatawait_range(NODE_MAPPING(sbi), 0, LLONG_MAX);
  - filemap_fdatawait_range(META_MAPPING(sbi), 0, LLONG_MAX);
  - move f2fs_flush_device_cache to a more suitable position
  - wait_on_all_pages_writeback after commit_checkpoint
  - since we remove lots of redundant code, I think it's acceptable
  - and it will ensure one checkpoint safety.
Change log from v3:
  - further review comments are applied from Jaegeuk and Chao
  - Tested on this patch (without multiple-device): mount, boot Android with 
f2fs userdata and make fragment
  - If any problem with this patch or I miss something, please kindly share 
your comments, thanks :)
Change log from v2:
  - Apply the review comments from Chao
Change log from v1:
  - Apply the review comments from Chao
  - time data from "finish block_ops" to " finish checkpoint" (tested on ARM64 
with TOSHIBA 128GB UFS):
 Before patch: 0.002273  0.001973  0.002789  0.005159  0.002050
 After patch: 0.002502  0.001624  0.002487  0.003049  0.002696

 fs/f2fs/checkpoint.c | 69 ++--
 1 file changed, 46 insertions(+), 23 deletions(-)

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 512dca8..4e352cf 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -1162,6 +1162,39 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, 
struct cp_control *cpc)
spin_unlock_irqrestore(&sbi->cp_lock, flags);
 }
 
+static void commit_checkpoint(struct f2fs_sb_info *sbi,
+   void *src, block_t blk_addr)
+{
+   struct writeback_control wbc = {
+   .for_reclaim = 0,
+   };
+
+   /*
+* pagevec_lookup_tag and lock_page again will take
+* some extra time. Therefore, update_meta_pages and
+* sync_meta_pages are combined in this function.
+*/
+   struct page *page = grab_meta_page(sbi, blk_addr);
+   int err;
+
+   memcpy(page_address(page), src, PAGE_SIZE);
+   set_page_dirty(page);
+
+   f2fs_wait_on_page_writeback(page, META, true);
+   f2fs_bug_on(sbi, PageWriteback(page));
+   if (unlikely(!clear_page_dirty_for_io(page)))
+   f2fs_bug_on(sbi, 1);
+
+   /* writeout cp pack 2 page */
+   err = __f2fs_write_meta_page(page, &wbc, FS_CP_META_IO);
+   f2fs_bug_on(sbi, err);
+
+   f2fs_put_page(page, 0);
+
+   /* submit checkpoint (with barrier if NOBARRIER is not set) */
+   f2fs_submit_merged_write(sbi, META_FLUSH);
+}
+
 static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 {
struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
@@ -1264,16 +1297,6 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, 
struct cp_control *cpc)
}
}
 
-   /* need to wait for end_io results */
-   wait_on_all_pages_writeback(sbi);
-   if (unlikely(f2fs_cp_error(sbi)))
-   return -EIO;
-
-   /* flush all device cache */
-   err = f2fs_flush_device_cache(sbi);
-   if (err)
-   return err;
-
/* write out checkpoint buffer at block 0 */
update_meta_page(sbi, ckpt, start_blk++);
 
@@ -1301,26 +1324,26 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, 
struct cp_control *cpc)
start_blk += NR_CURSEG_NODE_TYPE;
}
 
-   /* writeout checkpoint block */
-   update_meta_page(sbi, ckpt, start_blk);
+   /* update user_block_counts */
+   sbi->last_valid_block_count = sbi->total_valid_block_count;
+   percpu_counter_set(&sbi->alloc_valid_block_count, 0);
 
-   /* wait for previous submitted node/meta pages writeback */
+   /* Here, we have one bio having CP pack except cp pack 2 page */
+   sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO);
+
+   /* wait for previous submitted meta pages writeback */
wait_on_all_pages_writeback(sbi);
 
if (unlikely(f2fs_cp_error(sbi)))
return -EIO;
 
-   filemap_fdatawait_range(NODE_MAPPING(sbi), 0, LLONG_MAX);
-   filemap_fdatawait_range(META_MAPPING(sbi), 0, LLONG_MAX);
-
-   /* update user_block_counts */
-   sbi->last_valid_block_count = sbi->total_valid_block_count;
-   percpu_counter_set(&sbi->alloc_valid_block_count, 0);
-
-   /* Here, we only have one bio having CP pack */
-   sync_meta_pages(sbi, META_FLUSH, LONG_MAX, FS_CP_META_IO);
+  

Re: [f2fs-dev] [PATCH 2/2] f2fs: support {d,id,did,x}node checksum

2018-02-09 Thread Chao Yu
On 2018/2/10 9:41, Jaegeuk Kim wrote:
> On 02/01, Chao Yu wrote:
>>
>>
>> On 2018/2/1 6:15, Jaegeuk Kim wrote:
>>> On 01/31, Chao Yu wrote:
 On 2018/1/31 10:02, Jaegeuk Kim wrote:
> What if we want to add more entries in addition to node_checksum? Do we 
> have
> to add a new feature flag at every time? How about adding a layout value 
> instead

 Hmm.. for previous implementation, IMO, we'd better add a new feature flag 
 at
 every time, otherwise, w/ extra_nsize only, in current image, we can know a
 valid range of extended area in node block, but we don't know which
 fields/features are valid/enabled or not.

 One more thing is that if we can add one feature flag for each field, we 
 got one
 more chance to disable it dynamically.

> of extra_nsize? For example, layout #1 means node_checksum with 
> extra_nsize=X?
>
>
> What does 1017 mean? We need to make this structure more flexibly for new

 Yes, using raw 1017 is not appropriate here.

> entries. Like this?
>   union {
>   struct node_v1;
>   struct node_v2;
>   struct node_v3;
>   ...
>   struct direct_node dn;
>   struct indirect_node in;
>   };
>   };
>
>   struct node_v1 {
>   __le32 data[DEF_ADDRS_PER_BLOCK - V1_NSIZE=1];
>   __le32 node_checksum;
>   }
>
>   struct node_v2 {
>   __le32 data[DEF_ADDRS_PER_BLOCK - V2_NSIZE=500];

 Hmm.. If we only need to add one more 4 bytes field in struct node_v2, but
 V2_NSIZE is defined as fixed 500, there must be 492 bytes wasted.

 Or we can define V2_NSIZE as 8, but if there comes more and more extended
 fields, node version count can be a large number, it results in complicated
 version recognization and handling.

 One more question is how can we control which fields are valid or not in
 comp[Vx_NSIZE]?


 Anyway, what I'm thinking is maybe we can restructure layout of node block 
 like
 the one used by f2fs_inode:

 struct f2fs_node {
union {
struct f2fs_inode i;
union {
struct {
__le32 node_checksum;
__le32 feature_field_1;
__le32 feature_field_2;

__le32 addr[];

};
struct direct_node dn;
struct indirect_node in;
};
};
struct node_footer footer;
 } __packed;

 Moving all extended fields to the head of f2fs_node, so we don't have to 
 use
 macro to indicate actual size of addr.
>>>
>>> Thinking what'd be the best way. My concern is, once getting more entries, 
>>> we
>>
>> OK, I think we need more discussion.. ;)
>>
>>> can't set each of features individually. Like the second entry should have
>>
>> Oh, that will be hard. If we have to avoid that, we have to tag in somewhere
>> e.g. f2fs_inode::i_flags2 to indicate which new field in f2fs_node is valid, 
>> for
>> example:
>>
>> #define F2FS_NODE_CHECKSUM   0x0001
>> #define F2FS_NODE_FIELD1 0x0002
>> #define F2FS_NODE_FIELD2 0x0004
>>
>>  union {
>>  struct {
>>  __le32 node_checksum;
>>  __le32 field_1;
>>  __le32 field_2;
>>  
>>  __le32 addr[];
>>  };
>>  struct direct_node dn;
>>  struct indirect_node in;
>>  };
>>
>> f2fs_inode::i_flags2 = F2FS_NODE_CHECKSUM | F2FS_NODE_FIELD1
>> indicates that f2fs_node::node_checksum and f2fs_node::field_1 are valid;
>>
>> f2fs_inode::i_flags2 = F2FS_NODE_FIELD1 | F2FS_NODE_FIELD2
>> indicates that f2fs_node::field_1 and f2fs_node::field_2 are valid.
> 
> So, that's why I thought we may need a sort of each formats.

Hmm.. if we have two new added fields, there are (2 << 2) combinations
of all formats, as:

struct original {
__le32 data[DEF_ADDRS_PER_BLOCK];
}

struct node_v1 {
__le32 data[DEF_ADDRS_PER_BLOCK - V1_NSIZE=1];
__le32 field_1;
}

struct node_v2 {
__le32 data[DEF_ADDRS_PER_BLOCK - V2_NSIZE=1];
__le32 field_2;
}

struct node_v2 {
__le32 data[DEF_ADDRS_PER_BLOCK - V3_NSIZE=2];
__le32 field_1;
__le32 field_2;
}

If we add more new fields, the node version will increase sharply due
to there is (n << 2) combination with n fields. Right? Any thoughts to
reduce maintaining overhead on those node versions structures?

Thanks,

> 
>>
>> Any thoughts?
>>
>> Thanks,
>>
>>> enabled node_checksum, which we may not want to do.
>>>

 Th

Re: [f2fs-dev] [RFC PATCH 0/2] f2fs: introduce lost+found feature

2018-02-09 Thread Jaegeuk Kim
On 02/06, Sheng Yong wrote:
> This patchset introduces lost+found feature in f2fs. If the feature is
> enabled, f2fs should avoid to encrypting root directory.

In that case, we need to add test_dummy_encryption likewise ext4.

> 
> For more information, please check the mail "f2fs-tools: introduce lost+
> found feature".
> 
> Thanks,
> Sheng
> 
> Sheng Yong (2):
>   f2fs: clean up f2fs_sb_has_xxx functions
>   f2fs: introduce lost+found feature
> 
>  fs/f2fs/data.c|  2 +-
>  fs/f2fs/f2fs.h| 53 +++--
>  fs/f2fs/file.c|  6 +++---
>  fs/f2fs/segment.c |  4 ++--
>  fs/f2fs/super.c   | 26 +++---
>  fs/f2fs/sysfs.c   |  4 ++--
>  6 files changed, 42 insertions(+), 53 deletions(-)
> 
> -- 
> 2.11.0

--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


Re: [f2fs-dev] [RFC PATCH 2/5] f2fs-tools: init f2fs_configuration as 0

2018-02-09 Thread Jaegeuk Kim
On 02/08, Chao Yu wrote:
> On 2018/2/6 12:31, Sheng Yong wrote:
> > Signed-off-by: Sheng Yong 
> 
> Reviewed-by: Chao Yu 

Sheng Yong,

I only merged this patch in this set. ;)

> 
> Thanks,

--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


[f2fs-dev] [PATCH] f2fs: handle quota for orphan inodes

2018-02-09 Thread Jaegeuk Kim
This is to fix missing dquot_initialize for orphan inodes.

Signed-off-by: Jaegeuk Kim 
---
 fs/f2fs/checkpoint.c | 28 
 1 file changed, 16 insertions(+), 12 deletions(-)

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 8b0945ba284d..e3bf753a47be 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -569,13 +569,8 @@ static int recover_orphan_inode(struct f2fs_sb_info *sbi, 
nid_t ino)
struct node_info ni;
int err = acquire_orphan_inode(sbi);
 
-   if (err) {
-   set_sbi_flag(sbi, SBI_NEED_FSCK);
-   f2fs_msg(sbi->sb, KERN_WARNING,
-   "%s: orphan failed (ino=%x), run fsck to fix.",
-   __func__, ino);
-   return err;
-   }
+   if (err)
+   goto err_out;
 
__add_ino_entry(sbi, ino, 0, ORPHAN_INO);
 
@@ -589,6 +584,11 @@ static int recover_orphan_inode(struct f2fs_sb_info *sbi, 
nid_t ino)
return PTR_ERR(inode);
}
 
+   err = dquot_initialize(inode);
+   if (err)
+   goto err_out;
+
+   dquot_initialize(inode);
clear_nlink(inode);
 
/* truncate all the data during iput */
@@ -598,14 +598,18 @@ static int recover_orphan_inode(struct f2fs_sb_info *sbi, 
nid_t ino)
 
/* ENOMEM was fully retried in f2fs_evict_inode. */
if (ni.blk_addr != NULL_ADDR) {
-   set_sbi_flag(sbi, SBI_NEED_FSCK);
-   f2fs_msg(sbi->sb, KERN_WARNING,
-   "%s: orphan failed (ino=%x) by kernel, retry mount.",
-   __func__, ino);
-   return -EIO;
+   err = -EIO;
+   goto err_out;
}
__remove_ino_entry(sbi, ino, ORPHAN_INO);
return 0;
+
+err_out:
+   set_sbi_flag(sbi, SBI_NEED_FSCK);
+   f2fs_msg(sbi->sb, KERN_WARNING,
+   "%s: orphan failed (ino=%x), run fsck to fix.",
+   __func__, ino);
+   return err;
 }
 
 int recover_orphan_inodes(struct f2fs_sb_info *sbi)
-- 
2.15.0.531.g2ccb3012c9-goog


--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


Re: [f2fs-dev] [PATCH RFC v4] f2fs: flush cp pack except cp pack 2 page at first

2018-02-09 Thread Jaegeuk Kim
On 02/01, Gao Xiang wrote:
> Hi Jaegeuk and Chao,
> 
> On 2018/2/1 6:28, Jaegeuk Kim wrote:
> > On 01/31, Chao Yu wrote:
> > > On 2018/1/31 14:39, Gaoxiang (OS) wrote:
> > > > Previously, we attempt to flush the whole cp pack in a single bio,
> > > > however, when suddenly powering off at this time, we could get into
> > > > an extreme scenario that cp pack 1 page and cp pack 2 page are updated
> > > > and latest, but payload or current summaries are still partially
> > > > outdated. (see reliable write in the UFS specification)
> > > > 
> > > > This patch submits the whole cp pack except cp pack 2 page at first,
> > > > and then writes the cp pack 2 page with an extra independent
> > > > bio with pre-io barrier.
> > > > 
> > > > Signed-off-by: Gao Xiang 
> > > > Reviewed-by: Chao Yu 
> > > > ---
> > > > Change log from v3:
> > > >- further review comments are applied from Jaegeuk and Chao
> > > >- Tested on this patch (without multiple-device): mount, boot 
> > > > Android with f2fs userdata and make fragment
> > > >- If any problem with this patch or I miss something, please kindly 
> > > > share your comments, thanks :)
> > > > Change log from v2:
> > > >- Apply the review comments from Chao
> > > > Change log from v1:
> > > >- Apply the review comments from Chao
> > > >- time data from "finish block_ops" to " finish checkpoint" (tested 
> > > > on ARM64 with TOSHIBA 128GB UFS):
> > > >   Before patch: 0.002273  0.001973  0.002789  0.005159  0.002050
> > > >   After patch: 0.002502  0.001624  0.002487  0.003049  0.002696
> > > >   fs/f2fs/checkpoint.c | 67 
> > > > 
> > > >   1 file changed, 46 insertions(+), 21 deletions(-)
> > > > 
> > > > diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
> > > > index 14d2fed..916dc72 100644
> > > > --- a/fs/f2fs/checkpoint.c
> > > > +++ b/fs/f2fs/checkpoint.c
> > > > @@ -1158,6 +1158,39 @@ static void update_ckpt_flags(struct 
> > > > f2fs_sb_info *sbi, struct cp_control *cpc)
> > > > spin_unlock_irqrestore(&sbi->cp_lock, flags);
> > > >   }
> > > > +static void commit_checkpoint(struct f2fs_sb_info *sbi,
> > > > +   void *src, block_t blk_addr)
> > > > +{
> > > > +   struct writeback_control wbc = {
> > > > +   .for_reclaim = 0,
> > > > +   };
> > > > +
> > > > +   /*
> > > > +* pagevec_lookup_tag and lock_page again will take
> > > > +* some extra time. Therefore, update_meta_pages and
> > > > +* sync_meta_pages are combined in this function.
> > > > +*/
> > > > +   struct page *page = grab_meta_page(sbi, blk_addr);
> > > > +   int err;
> > > > +
> > > > +   memcpy(page_address(page), src, PAGE_SIZE);
> > > > +   set_page_dirty(page);
> > > > +
> > > > +   f2fs_wait_on_page_writeback(page, META, true);
> > > > +   f2fs_bug_on(sbi, PageWriteback(page));
> > > > +   if (unlikely(!clear_page_dirty_for_io(page)))
> > > > +   f2fs_bug_on(sbi, 1);
> > > > +
> > > > +   /* writeout cp pack 2 page */
> > > > +   err = __f2fs_write_meta_page(page, &wbc, FS_CP_META_IO);
> > > > +   f2fs_bug_on(sbi, err);
> > > > +
> > > > +   f2fs_put_page(page, 0);
> > > > +
> > > > +   /* submit checkpoint (with barrier if NOBARRIER is not set) */
> > > > +   f2fs_submit_merged_write(sbi, META_FLUSH);
> > > > +}
> > > > +
> > > >   static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control 
> > > > *cpc)
> > > >   {
> > > > struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
> > > > @@ -1260,16 +1293,6 @@ static int do_checkpoint(struct f2fs_sb_info 
> > > > *sbi, struct cp_control *cpc)
> > > > }
> > > > }
> > > > -   /* need to wait for end_io results */
> > > > -   wait_on_all_pages_writeback(sbi);
> > > > -   if (unlikely(f2fs_cp_error(sbi)))
> > > > -   return -EIO;
> > > > -
> > > > -   /* flush all device cache */
> > > > -   err = f2fs_flush_device_cache(sbi);
> > > > -   if (err)
> > > > -   return err;
> > > > -
> > > > /* write out checkpoint buffer at block 0 */
> > > > update_meta_page(sbi, ckpt, start_blk++);
> > > > @@ -1297,15 +1320,6 @@ static int do_checkpoint(struct f2fs_sb_info 
> > > > *sbi, struct cp_control *cpc)
> > > > start_blk += NR_CURSEG_NODE_TYPE;
> > > > }
> > > > -   /* writeout checkpoint block */
> > > > -   update_meta_page(sbi, ckpt, start_blk);
> > > > -
> > > > -   /* wait for previous submitted node/meta pages writeback */
> > > > -   wait_on_all_pages_writeback(sbi);
> > > > -
> > > > -   if (unlikely(f2fs_cp_error(sbi)))
> > > > -   return -EIO;
> > > > -
> > > > filemap_fdatawait_range(NODE_MAPPING(sbi), 0, LLONG_MAX);
> > > > filemap_fdatawait_range(META_MAPPING(sbi), 0, LLONG_MAX);
> > 
> >   - remove
> > 
> You mean remove
> filemap_fdatawait_range(NODE_MA

Re: [f2fs-dev] [PATCH 2/2] f2fs: support {d,id,did,x}node checksum

2018-02-09 Thread Jaegeuk Kim
On 02/01, Chao Yu wrote:
> 
> 
> On 2018/2/1 6:15, Jaegeuk Kim wrote:
> > On 01/31, Chao Yu wrote:
> >> On 2018/1/31 10:02, Jaegeuk Kim wrote:
> >>> What if we want to add more entries in addition to node_checksum? Do we 
> >>> have
> >>> to add a new feature flag at every time? How about adding a layout value 
> >>> instead
> >>
> >> Hmm.. for previous implementation, IMO, we'd better add a new feature flag 
> >> at
> >> every time, otherwise, w/ extra_nsize only, in current image, we can know a
> >> valid range of extended area in node block, but we don't know which
> >> fields/features are valid/enabled or not.
> >>
> >> One more thing is that if we can add one feature flag for each field, we 
> >> got one
> >> more chance to disable it dynamically.
> >>
> >>> of extra_nsize? For example, layout #1 means node_checksum with 
> >>> extra_nsize=X?
> >>>
> >>>
> >>> What does 1017 mean? We need to make this structure more flexibly for new
> >>
> >> Yes, using raw 1017 is not appropriate here.
> >>
> >>> entries. Like this?
> >>>   union {
> >>>   struct node_v1;
> >>>   struct node_v2;
> >>>   struct node_v3;
> >>>   ...
> >>>   struct direct_node dn;
> >>>   struct indirect_node in;
> >>>   };
> >>>   };
> >>>
> >>>   struct node_v1 {
> >>>   __le32 data[DEF_ADDRS_PER_BLOCK - V1_NSIZE=1];
> >>>   __le32 node_checksum;
> >>>   }
> >>>
> >>>   struct node_v2 {
> >>>   __le32 data[DEF_ADDRS_PER_BLOCK - V2_NSIZE=500];
> >>
> >> Hmm.. If we only need to add one more 4 bytes field in struct node_v2, but
> >> V2_NSIZE is defined as fixed 500, there must be 492 bytes wasted.
> >>
> >> Or we can define V2_NSIZE as 8, but if there comes more and more extended
> >> fields, node version count can be a large number, it results in complicated
> >> version recognization and handling.
> >>
> >> One more question is how can we control which fields are valid or not in
> >> comp[Vx_NSIZE]?
> >>
> >>
> >> Anyway, what I'm thinking is maybe we can restructure layout of node block 
> >> like
> >> the one used by f2fs_inode:
> >>
> >> struct f2fs_node {
> >>union {
> >>struct f2fs_inode i;
> >>union {
> >>struct {
> >>__le32 node_checksum;
> >>__le32 feature_field_1;
> >>__le32 feature_field_2;
> >>
> >>__le32 addr[];
> >>
> >>};
> >>struct direct_node dn;
> >>struct indirect_node in;
> >>};
> >>};
> >>struct node_footer footer;
> >> } __packed;
> >>
> >> Moving all extended fields to the head of f2fs_node, so we don't have to 
> >> use
> >> macro to indicate actual size of addr.
> > 
> > Thinking what'd be the best way. My concern is, once getting more entries, 
> > we
> 
> OK, I think we need more discussion.. ;)
> 
> > can't set each of features individually. Like the second entry should have
> 
> Oh, that will be hard. If we have to avoid that, we have to tag in somewhere
> e.g. f2fs_inode::i_flags2 to indicate which new field in f2fs_node is valid, 
> for
> example:
> 
> #define F2FS_NODE_CHECKSUM0x0001
> #define F2FS_NODE_FIELD1  0x0002
> #define F2FS_NODE_FIELD2  0x0004
> 
>   union {
>   struct {
>   __le32 node_checksum;
>   __le32 field_1;
>   __le32 field_2;
>   
>   __le32 addr[];
>   };
>   struct direct_node dn;
>   struct indirect_node in;
>   };
> 
> f2fs_inode::i_flags2 = F2FS_NODE_CHECKSUM | F2FS_NODE_FIELD1
> indicates that f2fs_node::node_checksum and f2fs_node::field_1 are valid;
> 
> f2fs_inode::i_flags2 = F2FS_NODE_FIELD1 | F2FS_NODE_FIELD2
> indicates that f2fs_node::field_1 and f2fs_node::field_2 are valid.

So, that's why I thought we may need a sort of each formats.

> 
> Any thoughts?
> 
> Thanks,
> 
> > enabled node_checksum, which we may not want to do.
> > 
> >>
> >> Thanks,
> >>
> >>>   __le32 comp[V2_NSIZE];
> >>>   }
> >>>   ...
> >>>
>  +};
>  +struct direct_node dn;
>  +struct indirect_node in;
>  +};
>   };
>   struct node_footer footer;
>   } __packed;
>  -- 
>  2.15.0.55.gc2ece9dc4de6
> >>>
> >>> .
> >>>

--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/

Re: [f2fs-dev] [PATCH v2] f2fs: introduce "strict_fsync" for posix standard fsync

2018-02-09 Thread Jaegeuk Kim
On 02/02, Junling Zheng wrote:
> Commit "0a007b97aad6"(f2fs: recover directory operations by fsync)
> fixed xfstest generic/342 case, but it also increased the written
> data and caused the performance degradation. In most cases, there's
> no need to do so heavily fsync actually.
> 
> So we introduce a new mount option "strict_fsync" to control the
> policy of fsync. It's set by default, and means that fsync follows
> POSIX semantics. And "nostrict_fsync" means that the behaviour is
> in line with xfs, ext4 and btrfs, where generic/342 will pass.

How about adding "fsync=%s" to give another chance for fsync policies?

Thanks,

> 
> Signed-off-by: Junling Zheng 
> Reviewed-by: Chao Yu 
> ---
>  Documentation/filesystems/f2fs.txt |  4 
>  fs/f2fs/dir.c  |  3 ++-
>  fs/f2fs/f2fs.h |  1 +
>  fs/f2fs/file.c |  3 ++-
>  fs/f2fs/namei.c|  9 ++---
>  fs/f2fs/super.c| 13 +
>  6 files changed, 28 insertions(+), 5 deletions(-)
> 
> diff --git a/Documentation/filesystems/f2fs.txt 
> b/Documentation/filesystems/f2fs.txt
> index 0caf7da0a532..c484ce8d1f4c 100644
> --- a/Documentation/filesystems/f2fs.txt
> +++ b/Documentation/filesystems/f2fs.txt
> @@ -180,6 +180,10 @@ whint_mode=%s  Control which write hints are 
> passed down to block
> down hints. In "user-based" mode, f2fs tries to pass
> down hints given by users. And in "fs-based" mode, 
> f2fs
> passes down hints with its policy.
> +{,no}strict_fsync  Control the policy of fsync. Set "strict_fsync" by 
> default,
> +   which means that fsync will follow POSIX semantics. 
> Use
> +   "nostrict_fsync" if you expect fsync to behave in 
> line with
> +   xfs, ext4 and btrfs, where xfstest generic/342 will 
> pass.
>  
>  
> 
>  DEBUGFS ENTRIES
> diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
> index f00b5ed8c011..7487b7e77a36 100644
> --- a/fs/f2fs/dir.c
> +++ b/fs/f2fs/dir.c
> @@ -713,7 +713,8 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, 
> struct page *page,
>  
>   f2fs_update_time(F2FS_I_SB(dir), REQ_TIME);
>  
> - add_ino_entry(F2FS_I_SB(dir), dir->i_ino, TRANS_DIR_INO);
> + if (!test_opt(F2FS_I_SB(dir), STRICT_FSYNC))
> + add_ino_entry(F2FS_I_SB(dir), dir->i_ino, TRANS_DIR_INO);
>  
>   if (f2fs_has_inline_dentry(dir))
>   return f2fs_delete_inline_entry(dentry, page, dir, inode);
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index dbe87c7a266e..8cf914d12f17 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -97,6 +97,7 @@ extern char *fault_name[FAULT_MAX];
>  #define F2FS_MOUNT_QUOTA 0x0040
>  #define F2FS_MOUNT_INLINE_XATTR_SIZE 0x0080
>  #define F2FS_MOUNT_RESERVE_ROOT  0x0100
> +#define F2FS_MOUNT_STRICT_FSYNC  0x0200
>  
>  #define clear_opt(sbi, option)   ((sbi)->mount_opt.opt &= 
> ~F2FS_MOUNT_##option)
>  #define set_opt(sbi, option) ((sbi)->mount_opt.opt |= F2FS_MOUNT_##option)
> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> index 672a542e5464..9b39254f5b48 100644
> --- a/fs/f2fs/file.c
> +++ b/fs/f2fs/file.c
> @@ -165,7 +165,8 @@ static inline enum cp_reason_type 
> need_do_checkpoint(struct inode *inode)
>   cp_reason = CP_FASTBOOT_MODE;
>   else if (sbi->active_logs == 2)
>   cp_reason = CP_SPEC_LOG_NUM;
> - else if (need_dentry_mark(sbi, inode->i_ino) &&
> + else if (!test_opt(sbi, STRICT_FSYNC) &&
> + need_dentry_mark(sbi, inode->i_ino) &&
>   exist_written_data(sbi, F2FS_I(inode)->i_pino, TRANS_DIR_INO))
>   cp_reason = CP_RECOVER_DIR;
>  
> diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
> index c4c94c7e9f4f..ef86ae327f91 100644
> --- a/fs/f2fs/namei.c
> +++ b/fs/f2fs/namei.c
> @@ -936,7 +936,8 @@ static int f2fs_rename(struct inode *old_dir, struct 
> dentry *old_dentry,
>   }
>   f2fs_i_links_write(old_dir, false);
>   }
> - add_ino_entry(sbi, new_dir->i_ino, TRANS_DIR_INO);
> + if (!test_opt(sbi, STRICT_FSYNC))
> + add_ino_entry(sbi, new_dir->i_ino, TRANS_DIR_INO);
>  
>   f2fs_unlock_op(sbi);
>  
> @@ -1091,8 +1092,10 @@ static int f2fs_cross_rename(struct inode *old_dir, 
> struct dentry *old_dentry,
>   }
>   f2fs_mark_inode_dirty_sync(new_dir, false);
>  
> - add_ino_entry(sbi, old_dir->i_ino, TRANS_DIR_INO);
> - add_ino_entry(sbi, new_dir->i_ino, TRANS_DIR_INO);
> + if (!test_opt(sbi, STRICT_FSYNC)) {
> + add_ino_entry(sbi, old_dir->i_ino, TRANS_DIR_INO);
> + add_ino_entry(sbi, new_dir->i_ino, TRANS_DIR_INO);
> + }
>  
>   f2fs_unlock_op(sbi);
>  
> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> index 7966cf7bfb8

Re: [f2fs-dev] [PATCH] f2fs: add fi->commit_lock to protect commit GCed pages

2018-02-09 Thread Yunlong Song

The problem is that you can not find a proper value of the threshold
time, when f2fs_gc select the GCed data page of the atomic file (which
has atomic started but not atomic committed yet), then f2fs_gc will
run into loop, and all the f2fs ops will be blocked in f2fs_balane_fs.
If the threshold time is set larger (e.g. 5s? Then all the f2fs ops will
block 5s, which will cause unexpected bad result of user experience).
And if the threshold time is set smaller (e.g. 500ms? Then the atomic
ops will probably fail frequently). BTW, some more patches are needed
to notify the atomic ops itself that it has run time out, and should
handle the inmem pages

Back to these two patches, why not use them to separate inmem pages
and GCed data pages in such a simple way.

On 2018/2/9 21:38, Chao Yu wrote:

On 2018/2/9 21:29, Yunlong Song wrote:

Back to the problem, if we skip out, then the f2fs_gc will go
into dead loop if the apps only atomic start but never atomic


That's another issue, which I have suggest to set a threshold time
to release atomic/volatile pages by balance_fs_bg.

Thanks,


commit. The main aim of my two patches is to remove the skip
action to avoid the dead loop.

On 2018/2/9 21:26, Chao Yu wrote:

On 2018/2/9 20:56, Yunlong Song wrote:

As what I point in last mail, if the atomic file is not committed
yet, gc_data_segment will register_inmem_page the GCed data pages.


We will skip GCing that page as below check:

- move_data_{page,block}
   - f2fs_is_atomic_file()
 skip out;

No?

Thanks,


This will cause these data pages written twice, the first write
happens in move_data_page->do_write_data_page, and the second
write happens in later __commit_inmem_pages->do_write_data_page.

On 2018/2/9 20:44, Chao Yu wrote:

On 2018/2/8 11:11, Yunlong Song wrote:

Then the GCed data pages are totally mixed with the inmem atomic pages,


If we add dio_rwsem, GC flow is exclude with atomic write flow. There
will be not race case to mix GCed page into atomic pages.

Or you mean:

  - gc_data_segment
   - move_data_page
- f2fs_is_atomic_file
- f2fs_ioc_start_atomic_write
- set_inode_flag(inode, FI_ATOMIC_FILE);
- f2fs_set_data_page_dirty
 - register_inmem_page

In this case, GCed page can be mixed into database transaction, but could
it cause any problem except break rule of isolation for transaction.


this will cause the atomic commit ops write the GCed data pages twice
(the first write happens in GC).

How about using the early two patches to separate the inmem data pages
and GCed data pages, and use dio_rwsem instead of this patch to fix the
dnode page problem (dnode page commited but data page are not committed
for the GCed page)?


Could we fix the race case first, based on that fixing, and then find the
place that we can improve?




On 2018/2/7 20:16, Chao Yu wrote:

On 2018/2/6 11:49, Yunlong Song wrote:

This patch adds fi->commit_lock to avoid the case that GCed node pages
are committed but GCed data pages are not committed. This can avoid the
db file run into inconsistent state when sudden-power-off happens if
data pages of atomic file is allowed to be GCed before.


do_fsync:GC:
- mutex_lock(&fi->commit_lock);
   - lock_page()
- mutex_lock(&fi->commit_lock);
 - lock_page()


Well, please consider lock dependency & code complexity, IMO, reuse
fi->dio_rwsem[WRITE] will be enough as below:

---
 fs/f2fs/file.c | 3 +++
 fs/f2fs/gc.c   | 5 -
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 672a542e5464..1bdc11feb8d0 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -1711,6 +1711,8 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp)

 inode_lock(inode);

+down_write(&F2FS_I(inode)->dio_rwsem[WRITE]);
+
 if (f2fs_is_volatile_file(inode))
 goto err_out;

@@ -1729,6 +1731,7 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp)
 ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 1, false);
 }
 err_out:
+up_write(&F2FS_I(inode)->dio_rwsem[WRITE]);
 inode_unlock(inode);
 mnt_drop_write_file(filp);
 return ret;
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index b9d93fd532a9..e49416283563 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -622,9 +622,6 @@ static void move_data_block(struct inode *inode, block_t 
bidx,
 if (!check_valid_map(F2FS_I_SB(inode), segno, off))
 goto out;

-if (f2fs_is_atomic_file(inode))
-goto out;


Seems that we need this check.


-
 if (f2fs_is_pinned_file(inode)) {
 f2fs_pin_file_control(inode, true);
 goto out;
@@ -729,8 +726,6 @@ static void move_data_page(struct inode *inode, block_t 
bidx, int gc_type,
 if (!check_valid_map(F2FS_I_SB(inode), segno, off))
 g

Re: [f2fs-dev] [PATCH] f2fs: add fi->commit_lock to protect commit GCed pages

2018-02-09 Thread Chao Yu
On 2018/2/9 21:29, Yunlong Song wrote:
> Back to the problem, if we skip out, then the f2fs_gc will go
> into dead loop if the apps only atomic start but never atomic

That's another issue, which I have suggest to set a threshold time
to release atomic/volatile pages by balance_fs_bg.

Thanks,

> commit. The main aim of my two patches is to remove the skip
> action to avoid the dead loop.
> 
> On 2018/2/9 21:26, Chao Yu wrote:
>> On 2018/2/9 20:56, Yunlong Song wrote:
>>> As what I point in last mail, if the atomic file is not committed
>>> yet, gc_data_segment will register_inmem_page the GCed data pages.
>>
>> We will skip GCing that page as below check:
>>
>> - move_data_{page,block}
>>   - f2fs_is_atomic_file()
>>     skip out;
>>
>> No?
>>
>> Thanks,
>>
>>> This will cause these data pages written twice, the first write
>>> happens in move_data_page->do_write_data_page, and the second
>>> write happens in later __commit_inmem_pages->do_write_data_page.
>>>
>>> On 2018/2/9 20:44, Chao Yu wrote:
 On 2018/2/8 11:11, Yunlong Song wrote:
> Then the GCed data pages are totally mixed with the inmem atomic pages,

 If we add dio_rwsem, GC flow is exclude with atomic write flow. There
 will be not race case to mix GCed page into atomic pages.

 Or you mean:

  - gc_data_segment
   - move_data_page
    - f2fs_is_atomic_file
 - f2fs_ioc_start_atomic_write
    - set_inode_flag(inode, FI_ATOMIC_FILE);
    - f2fs_set_data_page_dirty
     - register_inmem_page

 In this case, GCed page can be mixed into database transaction, but could
 it cause any problem except break rule of isolation for transaction.

> this will cause the atomic commit ops write the GCed data pages twice
> (the first write happens in GC).
>
> How about using the early two patches to separate the inmem data pages
> and GCed data pages, and use dio_rwsem instead of this patch to fix the
> dnode page problem (dnode page commited but data page are not committed
> for the GCed page)?

 Could we fix the race case first, based on that fixing, and then find the
 place that we can improve?

>
>
> On 2018/2/7 20:16, Chao Yu wrote:
>> On 2018/2/6 11:49, Yunlong Song wrote:
>>> This patch adds fi->commit_lock to avoid the case that GCed node pages
>>> are committed but GCed data pages are not committed. This can avoid the
>>> db file run into inconsistent state when sudden-power-off happens if
>>> data pages of atomic file is allowed to be GCed before.
>>
>> do_fsync:    GC:
>> - mutex_lock(&fi->commit_lock);
>>   - lock_page()
>>    - mutex_lock(&fi->commit_lock);
>>     - lock_page()
>>
>>
>> Well, please consider lock dependency & code complexity, IMO, reuse
>> fi->dio_rwsem[WRITE] will be enough as below:
>>
>> ---
>>     fs/f2fs/file.c | 3 +++
>>     fs/f2fs/gc.c   | 5 -
>>     2 files changed, 3 insertions(+), 5 deletions(-)
>>
>> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
>> index 672a542e5464..1bdc11feb8d0 100644
>> --- a/fs/f2fs/file.c
>> +++ b/fs/f2fs/file.c
>> @@ -1711,6 +1711,8 @@ static int f2fs_ioc_commit_atomic_write(struct 
>> file *filp)
>>
>>     inode_lock(inode);
>>
>> +    down_write(&F2FS_I(inode)->dio_rwsem[WRITE]);
>> +
>>     if (f2fs_is_volatile_file(inode))
>>     goto err_out;
>>
>> @@ -1729,6 +1731,7 @@ static int f2fs_ioc_commit_atomic_write(struct 
>> file *filp)
>>     ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 1, false);
>>     }
>>     err_out:
>> +    up_write(&F2FS_I(inode)->dio_rwsem[WRITE]);
>>     inode_unlock(inode);
>>     mnt_drop_write_file(filp);
>>     return ret;
>> diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
>> index b9d93fd532a9..e49416283563 100644
>> --- a/fs/f2fs/gc.c
>> +++ b/fs/f2fs/gc.c
>> @@ -622,9 +622,6 @@ static void move_data_block(struct inode *inode, 
>> block_t bidx,
>>     if (!check_valid_map(F2FS_I_SB(inode), segno, off))
>>     goto out;
>>
>> -    if (f2fs_is_atomic_file(inode))
>> -    goto out;

 Seems that we need this check.

>> -
>>     if (f2fs_is_pinned_file(inode)) {
>>     f2fs_pin_file_control(inode, true);
>>     goto out;
>> @@ -729,8 +726,6 @@ static void move_data_page(struct inode *inode, 
>> block_t bidx, int gc_type,
>>     if (!check_valid_map(F2FS_I_SB(inode), segno, off))
>>     goto out;
>>
>> -    if (f2fs_is_atomic_file(inode))
>> -    goto out;

 Ditto.

 Thanks,

>>     if (f2fs_is_pinned_file(in

Re: [f2fs-dev] [PATCH] f2fs: remove unneed reada during build free nids

2018-02-09 Thread Chao Yu
On 2018/2/9 11:58, Yunlei He wrote:
> This patch remove unneed reada during build free nids.
> If few nids left, three will introduce a lot of no hit
> read io.

I guess it is due to be lack of commit c1fe3e981440 ("Revert "f2fs:
reuse nids more aggressively""), before that commit, we will update
.next_scan_nid randomly instead of updating it ascendingly, then
during __build_free_nids we may readahead NAT pages starting from
random position.

Could try that commit to check IO again?

Thanks,

--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


Re: [f2fs-dev] [PATCH] f2fs: add fi->commit_lock to protect commit GCed pages

2018-02-09 Thread Yunlong Song

Back to the problem, if we skip out, then the f2fs_gc will go
into dead loop if the apps only atomic start but never atomic
commit. The main aim of my two patches is to remove the skip
action to avoid the dead loop.

On 2018/2/9 21:26, Chao Yu wrote:

On 2018/2/9 20:56, Yunlong Song wrote:

As what I point in last mail, if the atomic file is not committed
yet, gc_data_segment will register_inmem_page the GCed data pages.


We will skip GCing that page as below check:

- move_data_{page,block}
  - f2fs_is_atomic_file()
skip out;

No?

Thanks,


This will cause these data pages written twice, the first write
happens in move_data_page->do_write_data_page, and the second
write happens in later __commit_inmem_pages->do_write_data_page.

On 2018/2/9 20:44, Chao Yu wrote:

On 2018/2/8 11:11, Yunlong Song wrote:

Then the GCed data pages are totally mixed with the inmem atomic pages,


If we add dio_rwsem, GC flow is exclude with atomic write flow. There
will be not race case to mix GCed page into atomic pages.

Or you mean:

 - gc_data_segment
  - move_data_page
   - f2fs_is_atomic_file
- f2fs_ioc_start_atomic_write
   - set_inode_flag(inode, FI_ATOMIC_FILE);
   - f2fs_set_data_page_dirty
- register_inmem_page

In this case, GCed page can be mixed into database transaction, but could
it cause any problem except break rule of isolation for transaction.


this will cause the atomic commit ops write the GCed data pages twice
(the first write happens in GC).

How about using the early two patches to separate the inmem data pages
and GCed data pages, and use dio_rwsem instead of this patch to fix the
dnode page problem (dnode page commited but data page are not committed
for the GCed page)?


Could we fix the race case first, based on that fixing, and then find the
place that we can improve?




On 2018/2/7 20:16, Chao Yu wrote:

On 2018/2/6 11:49, Yunlong Song wrote:

This patch adds fi->commit_lock to avoid the case that GCed node pages
are committed but GCed data pages are not committed. This can avoid the
db file run into inconsistent state when sudden-power-off happens if
data pages of atomic file is allowed to be GCed before.


do_fsync:GC:
- mutex_lock(&fi->commit_lock);
  - lock_page()
   - mutex_lock(&fi->commit_lock);
- lock_page()


Well, please consider lock dependency & code complexity, IMO, reuse
fi->dio_rwsem[WRITE] will be enough as below:

---
fs/f2fs/file.c | 3 +++
fs/f2fs/gc.c   | 5 -
2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 672a542e5464..1bdc11feb8d0 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -1711,6 +1711,8 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp)

inode_lock(inode);

+down_write(&F2FS_I(inode)->dio_rwsem[WRITE]);
+
if (f2fs_is_volatile_file(inode))
goto err_out;

@@ -1729,6 +1731,7 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp)
ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 1, false);
}
err_out:
+up_write(&F2FS_I(inode)->dio_rwsem[WRITE]);
inode_unlock(inode);
mnt_drop_write_file(filp);
return ret;
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index b9d93fd532a9..e49416283563 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -622,9 +622,6 @@ static void move_data_block(struct inode *inode, block_t 
bidx,
if (!check_valid_map(F2FS_I_SB(inode), segno, off))
goto out;

-if (f2fs_is_atomic_file(inode))
-goto out;


Seems that we need this check.


-
if (f2fs_is_pinned_file(inode)) {
f2fs_pin_file_control(inode, true);
goto out;
@@ -729,8 +726,6 @@ static void move_data_page(struct inode *inode, block_t 
bidx, int gc_type,
if (!check_valid_map(F2FS_I_SB(inode), segno, off))
goto out;

-if (f2fs_is_atomic_file(inode))
-goto out;


Ditto.

Thanks,


if (f2fs_is_pinned_file(inode)) {
if (gc_type == FG_GC)
f2fs_pin_file_control(inode, true);





.





.



--
Thanks,
Yunlong Song


--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


Re: [f2fs-dev] [PATCH] f2fs: add fi->commit_lock to protect commit GCed pages

2018-02-09 Thread Chao Yu
On 2018/2/9 20:56, Yunlong Song wrote:
> As what I point in last mail, if the atomic file is not committed
> yet, gc_data_segment will register_inmem_page the GCed data pages.

We will skip GCing that page as below check:

- move_data_{page,block}
 - f2fs_is_atomic_file()
   skip out;

No?

Thanks,

> This will cause these data pages written twice, the first write
> happens in move_data_page->do_write_data_page, and the second
> write happens in later __commit_inmem_pages->do_write_data_page.
> 
> On 2018/2/9 20:44, Chao Yu wrote:
>> On 2018/2/8 11:11, Yunlong Song wrote:
>>> Then the GCed data pages are totally mixed with the inmem atomic pages,
>>
>> If we add dio_rwsem, GC flow is exclude with atomic write flow. There
>> will be not race case to mix GCed page into atomic pages.
>>
>> Or you mean:
>>
>>     - gc_data_segment
>>  - move_data_page
>>   - f2fs_is_atomic_file
>> - f2fs_ioc_start_atomic_write
>>   - set_inode_flag(inode, FI_ATOMIC_FILE);
>>   - f2fs_set_data_page_dirty
>>    - register_inmem_page
>>
>> In this case, GCed page can be mixed into database transaction, but could
>> it cause any problem except break rule of isolation for transaction.
>>
>>> this will cause the atomic commit ops write the GCed data pages twice
>>> (the first write happens in GC).
>>>
>>> How about using the early two patches to separate the inmem data pages
>>> and GCed data pages, and use dio_rwsem instead of this patch to fix the
>>> dnode page problem (dnode page commited but data page are not committed
>>> for the GCed page)?
>>
>> Could we fix the race case first, based on that fixing, and then find the
>> place that we can improve?
>>
>>>
>>>
>>> On 2018/2/7 20:16, Chao Yu wrote:
 On 2018/2/6 11:49, Yunlong Song wrote:
> This patch adds fi->commit_lock to avoid the case that GCed node pages
> are committed but GCed data pages are not committed. This can avoid the
> db file run into inconsistent state when sudden-power-off happens if
> data pages of atomic file is allowed to be GCed before.

 do_fsync:    GC:
 - mutex_lock(&fi->commit_lock);
  - lock_page()
   - mutex_lock(&fi->commit_lock);
    - lock_page()


 Well, please consider lock dependency & code complexity, IMO, reuse
 fi->dio_rwsem[WRITE] will be enough as below:

 ---
    fs/f2fs/file.c | 3 +++
    fs/f2fs/gc.c   | 5 -
    2 files changed, 3 insertions(+), 5 deletions(-)

 diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
 index 672a542e5464..1bdc11feb8d0 100644
 --- a/fs/f2fs/file.c
 +++ b/fs/f2fs/file.c
 @@ -1711,6 +1711,8 @@ static int f2fs_ioc_commit_atomic_write(struct file 
 *filp)

    inode_lock(inode);

 +    down_write(&F2FS_I(inode)->dio_rwsem[WRITE]);
 +
    if (f2fs_is_volatile_file(inode))
    goto err_out;

 @@ -1729,6 +1731,7 @@ static int f2fs_ioc_commit_atomic_write(struct file 
 *filp)
    ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 1, false);
    }
    err_out:
 +    up_write(&F2FS_I(inode)->dio_rwsem[WRITE]);
    inode_unlock(inode);
    mnt_drop_write_file(filp);
    return ret;
 diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
 index b9d93fd532a9..e49416283563 100644
 --- a/fs/f2fs/gc.c
 +++ b/fs/f2fs/gc.c
 @@ -622,9 +622,6 @@ static void move_data_block(struct inode *inode, 
 block_t bidx,
    if (!check_valid_map(F2FS_I_SB(inode), segno, off))
    goto out;

 -    if (f2fs_is_atomic_file(inode))
 -    goto out;
>>
>> Seems that we need this check.
>>
 -
    if (f2fs_is_pinned_file(inode)) {
    f2fs_pin_file_control(inode, true);
    goto out;
 @@ -729,8 +726,6 @@ static void move_data_page(struct inode *inode, 
 block_t bidx, int gc_type,
    if (!check_valid_map(F2FS_I_SB(inode), segno, off))
    goto out;

 -    if (f2fs_is_atomic_file(inode))
 -    goto out;
>>
>> Ditto.
>>
>> Thanks,
>>
    if (f2fs_is_pinned_file(inode)) {
    if (gc_type == FG_GC)
    f2fs_pin_file_control(inode, true);

>>>
>>
>> .
>>
> 

--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


Re: [f2fs-dev] [PATCH] dump.f2fs: correct the seg type in ssa_dump

2018-02-09 Thread Chao Yu
On 2018/2/6 16:47, Junling Zheng wrote:
> Fix the mixed using of "ret" in ssa_dump.
> 
> Signed-off-by: Junling Zheng 

Reviewed-by: Chao Yu 

Thanks,

--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


Re: [f2fs-dev] [RFC PATCH 4/5] mkfs.f2fs: create lost+found directory

2018-02-09 Thread Chao Yu
Hi Sheng Yong,

On 2018/2/9 11:21, Sheng Yong wrote:
> Hi, Chao
> 
> On 2018/2/8 23:08, Chao Yu wrote:
>> On 2018/2/6 12:31, Sheng Yong wrote:
>>> This patch introduces a new feature F2FS_FEATURE_LOST_FOUND. It can be
>>> switched on by indicating a new option `lost+found' with -O. If
>>
>> Not sure, do we need to change this option to 'lost_found' to follow other
>> generic -O options?
> 
> I'm also not sure about this, since there is already a `lost_found' in fsck :(

Change 'lost_found' to 'lost+found' in fsck to unify the output path?

>> Do not need to update this whole slot?
> 
> What do you mean by updating the whole slot? Two slots used by "lost+found"
> are filled.

'lost+found' takes two slots, but we need only fill all contents in first slot
and filename & bitmap of these two slots, as in both kernel/userspace sides,
we will skip updating & travelling second and later slots during add_link & 
lookup.
Let's keep consistent with the behavior.

Thanks,

--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


Re: [f2fs-dev] [RFC PATCH 1/5] mkfs.f2fs: introduce mkfs parameters in f2fs_configuration

2018-02-09 Thread Chao Yu
Hi Sheng Yong,

On 2018/2/9 11:21, Sheng Yong wrote:
> Hi, Chao
> 
> Add Hyojun.
> 
> On 2018/2/8 21:30, Chao Yu wrote:
>> On 2018/2/6 12:31, Sheng Yong wrote:
>>>   /* only root inode was written before truncating dnodes */
>>>   last_inode_pos = start_inode_pos +
>>> -    c.cur_seg[CURSEG_HOT_NODE] * c.blks_per_seg + quota_inum;
>>> +    c.cur_seg[CURSEG_HOT_NODE] * c.blks_per_seg + c.quota_inum;
>>
>> - f2fs_create_root_dir
>>   - f2fs_write_root_inode
>>    - discard_obsolete_dnode
>>  access c.quota_inum
>>   - f2fs_write_qf_inode
>>     update c.quota_inum
>>
>> Should c.quota_inum be initialized more early?
> 
> I think we should only count quota inodes if thye are already initialized.
> Otherwise, if quota_ino is not enabled, there is only one inode (root) in
> CURSEG_HOT_NODE, and two blocks next to root inode will not be discarded.

IMO, we need to move discard_obsolete_dnode in to f2fs_create_root_dir as below:

- f2fs_create_root_dir
 - f2fs_write_root_inode
 - f2fs_write_qf_inode
 - discard_obsolete_dnode

So the last obsolete position can be calculated correctly.

Thanks,

> 
> I'm a bit confused about the `offset' scale in discard_obsolete_dnode.
> It seems `if (offset >= start_inode_pos || offset <= last_inode_pos)'
> will always be true?
> 
> Hi, Hyojun, could you please also have a look at this, thanks :)
> 
> diff --git a/mkfs/f2fs_format.c b/mkfs/f2fs_format.c
> index 0fc8b30..6c9ae22 100644
> --- a/mkfs/f2fs_format.c
> +++ b/mkfs/f2fs_format.c
> @@ -963,7 +963,7 @@ static int discard_obsolete_dnode(struct f2fs_node 
> *raw_node, u_int64_t offset)
>     }
>     offset = next_blkaddr;
>     /* should avoid recursive chain due to stale data */
> -   if (offset >= start_inode_pos || offset <= last_inode_pos)
> +   if (offset <= last_inode_pos)
>     break;
>     } while (1);
> 
> thanks,
> Sheng
>>
>> Thanks,
>>
>> .
>>
> 

--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


Re: [f2fs-dev] [PATCH] f2fs: add fi->commit_lock to protect commit GCed pages

2018-02-09 Thread Yunlong Song

As what I point in last mail, if the atomic file is not committed
yet, gc_data_segment will register_inmem_page the GCed data pages.
This will cause these data pages written twice, the first write
happens in move_data_page->do_write_data_page, and the second
write happens in later __commit_inmem_pages->do_write_data_page.

On 2018/2/9 20:44, Chao Yu wrote:

On 2018/2/8 11:11, Yunlong Song wrote:

Then the GCed data pages are totally mixed with the inmem atomic pages,


If we add dio_rwsem, GC flow is exclude with atomic write flow. There
will be not race case to mix GCed page into atomic pages.

Or you mean:

- gc_data_segment
 - move_data_page
  - f2fs_is_atomic_file
- f2fs_ioc_start_atomic_write
  - set_inode_flag(inode, FI_ATOMIC_FILE);
  - f2fs_set_data_page_dirty
   - register_inmem_page

In this case, GCed page can be mixed into database transaction, but could
it cause any problem except break rule of isolation for transaction.


this will cause the atomic commit ops write the GCed data pages twice
(the first write happens in GC).

How about using the early two patches to separate the inmem data pages
and GCed data pages, and use dio_rwsem instead of this patch to fix the
dnode page problem (dnode page commited but data page are not committed
for the GCed page)?


Could we fix the race case first, based on that fixing, and then find the
place that we can improve?




On 2018/2/7 20:16, Chao Yu wrote:

On 2018/2/6 11:49, Yunlong Song wrote:

This patch adds fi->commit_lock to avoid the case that GCed node pages
are committed but GCed data pages are not committed. This can avoid the
db file run into inconsistent state when sudden-power-off happens if
data pages of atomic file is allowed to be GCed before.


do_fsync:GC:
- mutex_lock(&fi->commit_lock);
 - lock_page()
  - mutex_lock(&fi->commit_lock);
   - lock_page()


Well, please consider lock dependency & code complexity, IMO, reuse
fi->dio_rwsem[WRITE] will be enough as below:

---
   fs/f2fs/file.c | 3 +++
   fs/f2fs/gc.c   | 5 -
   2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 672a542e5464..1bdc11feb8d0 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -1711,6 +1711,8 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp)

   inode_lock(inode);

+down_write(&F2FS_I(inode)->dio_rwsem[WRITE]);
+
   if (f2fs_is_volatile_file(inode))
   goto err_out;

@@ -1729,6 +1731,7 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp)
   ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 1, false);
   }
   err_out:
+up_write(&F2FS_I(inode)->dio_rwsem[WRITE]);
   inode_unlock(inode);
   mnt_drop_write_file(filp);
   return ret;
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index b9d93fd532a9..e49416283563 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -622,9 +622,6 @@ static void move_data_block(struct inode *inode, block_t 
bidx,
   if (!check_valid_map(F2FS_I_SB(inode), segno, off))
   goto out;

-if (f2fs_is_atomic_file(inode))
-goto out;


Seems that we need this check.


-
   if (f2fs_is_pinned_file(inode)) {
   f2fs_pin_file_control(inode, true);
   goto out;
@@ -729,8 +726,6 @@ static void move_data_page(struct inode *inode, block_t 
bidx, int gc_type,
   if (!check_valid_map(F2FS_I_SB(inode), segno, off))
   goto out;

-if (f2fs_is_atomic_file(inode))
-goto out;


Ditto.

Thanks,


   if (f2fs_is_pinned_file(inode)) {
   if (gc_type == FG_GC)
   f2fs_pin_file_control(inode, true);





.



--
Thanks,
Yunlong Song


--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


Re: [f2fs-dev] [PATCH] f2fs: add fi->commit_lock to protect commit GCed pages

2018-02-09 Thread Chao Yu
On 2018/2/8 11:11, Yunlong Song wrote:
> Then the GCed data pages are totally mixed with the inmem atomic pages,

If we add dio_rwsem, GC flow is exclude with atomic write flow. There
will be not race case to mix GCed page into atomic pages.

Or you mean:

- gc_data_segment
 - move_data_page
  - f2fs_is_atomic_file
- f2fs_ioc_start_atomic_write
 - set_inode_flag(inode, FI_ATOMIC_FILE);
  - f2fs_set_data_page_dirty
   - register_inmem_page

In this case, GCed page can be mixed into database transaction, but could
it cause any problem except break rule of isolation for transaction.

> this will cause the atomic commit ops write the GCed data pages twice
> (the first write happens in GC).
> 
> How about using the early two patches to separate the inmem data pages
> and GCed data pages, and use dio_rwsem instead of this patch to fix the
> dnode page problem (dnode page commited but data page are not committed
> for the GCed page)?

Could we fix the race case first, based on that fixing, and then find the
place that we can improve?

> 
> 
> On 2018/2/7 20:16, Chao Yu wrote:
>> On 2018/2/6 11:49, Yunlong Song wrote:
>>> This patch adds fi->commit_lock to avoid the case that GCed node pages
>>> are committed but GCed data pages are not committed. This can avoid the
>>> db file run into inconsistent state when sudden-power-off happens if
>>> data pages of atomic file is allowed to be GCed before.
>>
>> do_fsync:    GC:
>> - mutex_lock(&fi->commit_lock);
>>     - lock_page()
>>  - mutex_lock(&fi->commit_lock);
>>   - lock_page()
>>
>>
>> Well, please consider lock dependency & code complexity, IMO, reuse
>> fi->dio_rwsem[WRITE] will be enough as below:
>>
>> ---
>>   fs/f2fs/file.c | 3 +++
>>   fs/f2fs/gc.c   | 5 -
>>   2 files changed, 3 insertions(+), 5 deletions(-)
>>
>> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
>> index 672a542e5464..1bdc11feb8d0 100644
>> --- a/fs/f2fs/file.c
>> +++ b/fs/f2fs/file.c
>> @@ -1711,6 +1711,8 @@ static int f2fs_ioc_commit_atomic_write(struct file 
>> *filp)
>>
>>   inode_lock(inode);
>>
>> +    down_write(&F2FS_I(inode)->dio_rwsem[WRITE]);
>> +
>>   if (f2fs_is_volatile_file(inode))
>>   goto err_out;
>>
>> @@ -1729,6 +1731,7 @@ static int f2fs_ioc_commit_atomic_write(struct file 
>> *filp)
>>   ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 1, false);
>>   }
>>   err_out:
>> +    up_write(&F2FS_I(inode)->dio_rwsem[WRITE]);
>>   inode_unlock(inode);
>>   mnt_drop_write_file(filp);
>>   return ret;
>> diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
>> index b9d93fd532a9..e49416283563 100644
>> --- a/fs/f2fs/gc.c
>> +++ b/fs/f2fs/gc.c
>> @@ -622,9 +622,6 @@ static void move_data_block(struct inode *inode, block_t 
>> bidx,
>>   if (!check_valid_map(F2FS_I_SB(inode), segno, off))
>>   goto out;
>>
>> -    if (f2fs_is_atomic_file(inode))
>> -    goto out;

Seems that we need this check.

>> -
>>   if (f2fs_is_pinned_file(inode)) {
>>   f2fs_pin_file_control(inode, true);
>>   goto out;
>> @@ -729,8 +726,6 @@ static void move_data_page(struct inode *inode, block_t 
>> bidx, int gc_type,
>>   if (!check_valid_map(F2FS_I_SB(inode), segno, off))
>>   goto out;
>>
>> -    if (f2fs_is_atomic_file(inode))
>> -    goto out;

Ditto.

Thanks,

>>   if (f2fs_is_pinned_file(inode)) {
>>   if (gc_type == FG_GC)
>>   f2fs_pin_file_control(inode, true);
>>
> 

--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel