Re: [f2fs-dev] [PATCH] f2fs:modify the entering condition for f2fs_migrate_blocks()

2024-05-16 Thread Chao Yu

On 2024/5/15 16:24, Liao Yuanhong wrote:

Currently, when we allocating a swap file on zone UFS, this file will
created on conventional UFS. If the swap file size is not aligned with the
zone size, the last extent will enter f2fs_migrate_blocks(), resulting in
significant additional I/O overhead and prolonged lock occupancy. In most
cases, this is unnecessary, because on Conventional UFS, as long as the
start block of the swap file is aligned with zone, it is sequentially
aligned.To circumvent this issue, we have altered the conditions for
entering f2fs_migrate_blocks(). Now, if the start block of the last extent
is aligned with the start of zone, we avoids entering
f2fs_migrate_blocks().


Hi,

Is it possible that we can pin swapfile, and fallocate on it aligned to
zone size, then mkswap and swapon?

Thanks,



Signed-off-by: Liao Yuanhong 
Signed-off-by: Wu Bo 
---
  fs/f2fs/data.c | 23 +--
  1 file changed, 21 insertions(+), 2 deletions(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 50ceb25b3..4d58fb6c2 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -3925,10 +3925,12 @@ static int check_swap_activate(struct swap_info_struct 
*sis,
 block_t pblock;
 block_t lowest_pblock = -1;
 block_t highest_pblock = 0;
+   block_t blk_start;
 int nr_extents = 0;
 unsigned int nr_pblocks;
 unsigned int blks_per_sec = BLKS_PER_SEC(sbi);
 unsigned int not_aligned = 0;
+   unsigned int cur_sec;
 int ret = 0;

 /*
@@ -3965,23 +3967,39 @@ static int check_swap_activate(struct swap_info_struct 
*sis,
 pblock = map.m_pblk;
 nr_pblocks = map.m_len;

-   if ((pblock - SM_I(sbi)->main_blkaddr) % blks_per_sec ||
+   blk_start = pblock - SM_I(sbi)->main_blkaddr;
+
+   if (blk_start % blks_per_sec ||
 nr_pblocks % blks_per_sec ||
 !f2fs_valid_pinned_area(sbi, pblock)) {
 bool last_extent = false;

 not_aligned++;

+   cur_sec = (blk_start + nr_pblocks) / BLKS_PER_SEC(sbi);
 nr_pblocks = roundup(nr_pblocks, blks_per_sec);
-   if (cur_lblock + nr_pblocks > sis->max)
+   if (cur_lblock + nr_pblocks > sis->max) {
 nr_pblocks -= blks_per_sec;

+   /* the start address is aligned to section */
+   if (!(blk_start % blks_per_sec))
+   last_extent = true;
+   }
+
 /* this extent is last one */
 if (!nr_pblocks) {
 nr_pblocks = last_lblock - cur_lblock;
 last_extent = true;
 }

+   /*
+* the last extent which located on conventional UFS 
doesn't
+* need migrate
+*/
+   if (last_extent && f2fs_sb_has_blkzoned(sbi) &&
+   cur_sec < GET_SEC_FROM_SEG(sbi, 
first_zoned_segno(sbi)))
+   goto next;
+
 ret = f2fs_migrate_blocks(inode, cur_lblock,
 nr_pblocks);
 if (ret) {
@@ -3994,6 +4012,7 @@ static int check_swap_activate(struct swap_info_struct 
*sis,
 goto retry;
 }

+next:
 if (cur_lblock + nr_pblocks >= sis->max)
 nr_pblocks = sis->max - cur_lblock;

--
2.25.1




___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


Re: [f2fs-dev] [PATCH] f2fs:modify the entering condition for f2fs_migrate_blocks()

2024-05-16 Thread Wu Bo via Linux-f2fs-devel
On Thu, May 16, 2024 at 04:06:34PM +0800, Chao Yu wrote:
> On 2024/5/15 16:24, Liao Yuanhong wrote:
> > Currently, when we allocating a swap file on zone UFS, this file will
> > created on conventional UFS. If the swap file size is not aligned with the
> > zone size, the last extent will enter f2fs_migrate_blocks(), resulting in
> > significant additional I/O overhead and prolonged lock occupancy. In most
> > cases, this is unnecessary, because on Conventional UFS, as long as the
> > start block of the swap file is aligned with zone, it is sequentially
> > aligned.To circumvent this issue, we have altered the conditions for
> > entering f2fs_migrate_blocks(). Now, if the start block of the last extent
> > is aligned with the start of zone, we avoids entering
> > f2fs_migrate_blocks().
> 
> Hi,
> 
> Is it possible that we can pin swapfile, and fallocate on it aligned to
> zone size, then mkswap and swapon?

User can't see if the device is zoned device. And if f2fs fallocate to align
size to zone size, too much space may be wasted? Some zoned devices has a very
large zone size(>1G)

> 
> Thanks,
> 
> > 
> > Signed-off-by: Liao Yuanhong 
> > Signed-off-by: Wu Bo 
> > ---
> >   fs/f2fs/data.c | 23 +--
> >   1 file changed, 21 insertions(+), 2 deletions(-)
> > 
> > diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> > index 50ceb25b3..4d58fb6c2 100644
> > --- a/fs/f2fs/data.c
> > +++ b/fs/f2fs/data.c
> > @@ -3925,10 +3925,12 @@ static int check_swap_activate(struct 
> > swap_info_struct *sis,
> >  block_t pblock;
> >  block_t lowest_pblock = -1;
> >  block_t highest_pblock = 0;
> > +   block_t blk_start;
> >  int nr_extents = 0;
> >  unsigned int nr_pblocks;
> >  unsigned int blks_per_sec = BLKS_PER_SEC(sbi);
> >  unsigned int not_aligned = 0;
> > +   unsigned int cur_sec;
> >  int ret = 0;
> > 
> >  /*
> > @@ -3965,23 +3967,39 @@ static int check_swap_activate(struct 
> > swap_info_struct *sis,
> >  pblock = map.m_pblk;
> >  nr_pblocks = map.m_len;
> > 
> > -   if ((pblock - SM_I(sbi)->main_blkaddr) % blks_per_sec ||
> > +   blk_start = pblock - SM_I(sbi)->main_blkaddr;
> > +
> > +   if (blk_start % blks_per_sec ||
> >  nr_pblocks % blks_per_sec ||
> >  !f2fs_valid_pinned_area(sbi, pblock)) {
> >  bool last_extent = false;
> > 
> >  not_aligned++;
> > 
> > +   cur_sec = (blk_start + nr_pblocks) / 
> > BLKS_PER_SEC(sbi);
> >  nr_pblocks = roundup(nr_pblocks, blks_per_sec);
> > -   if (cur_lblock + nr_pblocks > sis->max)
> > +   if (cur_lblock + nr_pblocks > sis->max) {
> >  nr_pblocks -= blks_per_sec;
> > 
> > +   /* the start address is aligned to section 
> > */
> > +   if (!(blk_start % blks_per_sec))
> > +   last_extent = true;
> > +   }
> > +
> >  /* this extent is last one */
> >  if (!nr_pblocks) {
> >  nr_pblocks = last_lblock - cur_lblock;
> >  last_extent = true;
> >  }
> > 
> > +   /*
> > +* the last extent which located on conventional 
> > UFS doesn't
> > +* need migrate
> > +*/
> > +   if (last_extent && f2fs_sb_has_blkzoned(sbi) &&
> > +   cur_sec < GET_SEC_FROM_SEG(sbi, 
> > first_zoned_segno(sbi)))
> > +   goto next;
> > +
> >  ret = f2fs_migrate_blocks(inode, cur_lblock,
> >  nr_pblocks);
> >  if (ret) {
> > @@ -3994,6 +4012,7 @@ static int check_swap_activate(struct 
> > swap_info_struct *sis,
> >  goto retry;
> >  }
> > 
> > +next:
> >  if (cur_lblock + nr_pblocks >= sis->max)
> >  nr_pblocks = sis->max - cur_lblock;
> > 
> > --
> > 2.25.1
> > 
> 
> 
> ___
> Linux-f2fs-devel mailing list
> Linux-f2fs-devel@lists.sourceforge.net
> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


Re: [f2fs-dev] [PATCH] f2fs: fix panic in f2fs_put_super

2024-05-16 Thread Chao Yu

On 2024/5/16 16:55, sunshijie wrote:

When thread A calls kill_f2fs_super, Thread A first executes the code 
sbi->node_inode = NULL;
Then thread A may submit a bio to the function iput(sbi->meta_inode);
Then thread A enters the process D state,
Now that the bio submitted by thread A is complete, it calls f2fs_write_end_io 
and may trigger null-ptr-deref in NODE_MAPPING.


I didn't get it, if there is no cp_err, f2fs_write_checkpoint() in
f2fs_put_super() will flush all dirty pages of node_inode, if there is
cp_err, below flow will keep all dirty pages being truncated, and
there is sanity check on all types of dirty pages.

/* our cp_error case, we can wait for any writeback page */
f2fs_flush_merged_writes(sbi);

f2fs_wait_on_all_pages(sbi, F2FS_WB_CP_DATA);

if (err || f2fs_cp_error(sbi)) {
truncate_inode_pages_final(NODE_MAPPING(sbi));
truncate_inode_pages_final(META_MAPPING(sbi));
}

for (i = 0; i < NR_COUNT_TYPE; i++) {
if (!get_pages(sbi, i))
continue;
f2fs_err(sbi, "detect filesystem reference count leak during "
"umount, type: %d, count: %lld", i, get_pages(sbi, i));
f2fs_bug_on(sbi, 1);
}

So, is there any missing case that dirty page of node_inode is missed by
f2fs_put_super()?

Thanks,



Thread A  IRQ context
- f2fs_put_super
  - sbi->node_inode = NULL;
  - iput(sbi->meta_inode);
   - iput_final
- write_inode_now
 - writeback_single_inode
  - __writeback_single_inode
   - filemap_fdatawait
- filemap_fdatawait_range
 - __kcfi_typeid_free_transhuge_page
  - __filemap_fdatawait_range
   - wait_on_page_writeback
- folio_wait_writeback
 - folio_wait_bit
  - folio_wait_bit_common
   - io_schedule

   - __handle_irq_event_percpu
- ufs_qcom_mcq_esi_handler
 - 
ufshcd_mcq_poll_cqe_nolock
  - ufshcd_compl_one_cqe
   - scsi_done
- scsi_done_internal
 - 
blk_mq_complete_request
  - scsi_complete
   - scsi_finish_command
- scsi_io_completion
 - scsi_end_request
  - 
blk_update_request
   - bio_endio
- 
f2fs_write_end_io
 - 
NODE_MAPPING(sbi)

Signed-off-by: sunshijie 
---
  fs/f2fs/super.c | 6 +++---
  1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index adffc9b80a9c..aeb085e11f9a 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1641,12 +1641,12 @@ static void f2fs_put_super(struct super_block *sb)
  
  	f2fs_destroy_compress_inode(sbi);
  
-	iput(sbi->node_inode);

-   sbi->node_inode = NULL;
-
iput(sbi->meta_inode);
sbi->meta_inode = NULL;
  
+	iput(sbi->node_inode);

+   sbi->node_inode = NULL;
+
mutex_unlock(&sbi->umount_mutex);
  
  	/*



___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


[f2fs-dev] [PATCH] f2fs: fix panic in f2fs_put_super

2024-05-16 Thread sunshijie via Linux-f2fs-devel
When thread A calls kill_f2fs_super, Thread A first executes the code 
sbi->node_inode = NULL;
Then thread A may submit a bio to the function iput(sbi->meta_inode);
Then thread A enters the process D state,
Now that the bio submitted by thread A is complete, it calls f2fs_write_end_io 
and may trigger null-ptr-deref in NODE_MAPPING.

Thread A  IRQ context
- f2fs_put_super
 - sbi->node_inode = NULL;
 - iput(sbi->meta_inode);
  - iput_final
   - write_inode_now
- writeback_single_inode
 - __writeback_single_inode
  - filemap_fdatawait
   - filemap_fdatawait_range
- __kcfi_typeid_free_transhuge_page
 - __filemap_fdatawait_range
  - wait_on_page_writeback
   - folio_wait_writeback
- folio_wait_bit
 - folio_wait_bit_common
  - io_schedule

  - __handle_irq_event_percpu
   - ufs_qcom_mcq_esi_handler
- ufshcd_mcq_poll_cqe_nolock
 - ufshcd_compl_one_cqe
  - scsi_done
   - scsi_done_internal
- 
blk_mq_complete_request
 - scsi_complete
  - scsi_finish_command
   - scsi_io_completion
- scsi_end_request
 - 
blk_update_request
  - bio_endio
   - 
f2fs_write_end_io
- 
NODE_MAPPING(sbi)

Signed-off-by: sunshijie 
---
 fs/f2fs/super.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index adffc9b80a9c..aeb085e11f9a 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1641,12 +1641,12 @@ static void f2fs_put_super(struct super_block *sb)
 
f2fs_destroy_compress_inode(sbi);
 
-   iput(sbi->node_inode);
-   sbi->node_inode = NULL;
-
iput(sbi->meta_inode);
sbi->meta_inode = NULL;
 
+   iput(sbi->node_inode);
+   sbi->node_inode = NULL;
+
mutex_unlock(&sbi->umount_mutex);
 
/*
-- 
2.34.1



___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


[f2fs-dev] [PATCH] f2fs: fix panic in f2fs_put_super

2024-05-16 Thread sunshijie via Linux-f2fs-devel
When thread A calls kill_f2fs_super, thread A may submit a bio to the function 
iput(sbi->node_inode);
Then thread A enters the process D state, and thread B may also call 
kill_f2fs_super and execute the code sbi->node_inode = NULL;
Now that the bio submitted by thread A is complete, it calls f2fs_write_end_io 
and may trigger null-ptr-deref in NODE_MAPPING.

Thread A  Thread B  
   IRQ context
- f2fs_put_super
 - iput(sbi->node_inode)
  - iput_final
   - write_inode_now
- writeback_single_inode
 - __writeback_single_inode
  - filemap_fdatawait
   - filemap_fdatawait_range
- __kcfi_typeid_free_transhuge_page
 - __filemap_fdatawait_range
  - wait_on_page_writeback
   - folio_wait_writeback
- folio_wait_bit
 - folio_wait_bit_common
  - io_schedule
  - f2fs_put_super
   - sbi->node_inode = NULL

   - __handle_irq_event_percpu

- ufs_qcom_mcq_esi_handler

 - ufshcd_mcq_poll_cqe_nolock

  - ufshcd_compl_one_cqe

   - scsi_done

- scsi_done_internal

 - blk_mq_complete_request

  - scsi_complete

   - scsi_finish_command

- scsi_io_completion

 - scsi_end_request

  - blk_update_request

   - bio_endio

- f2fs_write_end_io

 - NODE_MAPPING(sbi)

Signed-off-by: sunshijie 
---
 fs/f2fs/super.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index a4bc26dfdb1a..adffc9b80a9c 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1618,7 +1618,6 @@ static void f2fs_put_super(struct super_block *sb)
f2fs_release_ino_entry(sbi, true);
 
f2fs_leave_shrinker(sbi);
-   mutex_unlock(&sbi->umount_mutex);
 
/* our cp_error case, we can wait for any writeback page */
f2fs_flush_merged_writes(sbi);
@@ -1648,6 +1647,8 @@ static void f2fs_put_super(struct super_block *sb)
iput(sbi->meta_inode);
sbi->meta_inode = NULL;
 
+   mutex_unlock(&sbi->umount_mutex);
+
/*
 * iput() can update stat information, if f2fs_write_checkpoint()
 * above failed with error.
-- 
2.34.1



___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


[f2fs-dev] [PATCH] f2fs: fix panic in f2fs_put_super

2024-05-16 Thread sunshijie via Linux-f2fs-devel
When thread A calls kill_f2fs_super, Thread A first executes the code 
sbi->node_inode = NULL;
Then thread A may submit a bio to the function iput(sbi->meta_inode);
Then thread A enters the process D state,
Now that the bio submitted by thread A is complete, it calls f2fs_write_end_io 
and may trigger null-ptr-deref in NODE_MAPPING.

Thread A  IRQ context
- f2fs_put_super
 - sbi->node_inode = NULL;
 - iput(sbi->meta_inode);
  - iput_final
   - write_inode_now
- writeback_single_inode
 - __writeback_single_inode
  - filemap_fdatawait
   - filemap_fdatawait_range
- __kcfi_typeid_free_transhuge_page
 - __filemap_fdatawait_range
  - wait_on_page_writeback
   - folio_wait_writeback
- folio_wait_bit
 - folio_wait_bit_common
  - io_schedule

  - __handle_irq_event_percpu
   - ufs_qcom_mcq_esi_handler
- ufshcd_mcq_poll_cqe_nolock
 - ufshcd_compl_one_cqe
  - scsi_done
   - scsi_done_internal
- 
blk_mq_complete_request
 - scsi_complete
  - scsi_finish_command
   - scsi_io_completion
- scsi_end_request
 - 
blk_update_request
  - bio_endio
   - 
f2fs_write_end_io
- 
NODE_MAPPING(sbi)

Signed-off-by: sunshijie 
---
 fs/f2fs/super.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index adffc9b80a9c..62d4f229f601 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1642,9 +1642,9 @@ static void f2fs_put_super(struct super_block *sb)
f2fs_destroy_compress_inode(sbi);
 
iput(sbi->node_inode);
-   sbi->node_inode = NULL;
-
iput(sbi->meta_inode);
+
+   sbi->node_inode = NULL;
sbi->meta_inode = NULL;
 
mutex_unlock(&sbi->umount_mutex);
-- 
2.34.1



___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


Re: [f2fs-dev] 答复: [External Mail][PATCH] f2fs: fix panic in f2fs_put_super

2024-05-16 Thread Chao Yu

On 2024/5/16 18:15, 孙士杰 wrote:

I didn't get it, if there is no cp_err, f2fs_write_checkpoint() in
f2fs_put_super() will flush all dirty pages of node_inode, if there is
cp_err, below flow will keep all dirty pages being truncated, and
there is sanity check on all types of dirty pages.

===》
I understand what you mean, so is it better to modify in this way? Please help 
to check, thank you


Hi, let's figure out the root cause first?

Thanks,



--
*发件人:* sunshijie 
*发送时间:* 2024年5月16日 18:13:38
*收件人:* jaeg...@kernel.org; c...@kernel.org; 
linux-f2fs-devel@lists.sourceforge.net; linux-ker...@vger.kernel.org
*抄送:* 孙士杰
*主题:* [External Mail][PATCH] f2fs: fix panic in f2fs_put_super
[外部邮件] 此邮件来源于小米公司外部,请谨慎处理。若对邮件安全性存疑,请将邮件转发给mi...@xiaomi.com进行反馈

When thread A calls kill_f2fs_super, Thread A first executes the code 
sbi->node_inode = NULL;
Then thread A may submit a bio to the function iput(sbi->meta_inode);
Then thread A enters the process D state,
Now that the bio submitted by thread A is complete, it calls f2fs_write_end_io 
and may trigger null-ptr-deref in NODE_MAPPING.

Thread A  IRQ context
- f2fs_put_super
  - sbi->node_inode = NULL;
  - iput(sbi->meta_inode);
   - iput_final
    - write_inode_now
     - writeback_single_inode
  - __writeback_single_inode
   - filemap_fdatawait
    - filemap_fdatawait_range
     - __kcfi_typeid_free_transhuge_page
  - __filemap_fdatawait_range
   - wait_on_page_writeback
    - folio_wait_writeback
     - folio_wait_bit
  - folio_wait_bit_common
   - io_schedule

   - __handle_irq_event_percpu
    - ufs_qcom_mcq_esi_handler
     - 
ufshcd_mcq_poll_cqe_nolock
  - ufshcd_compl_one_cqe
   - scsi_done
    - scsi_done_internal
     - 
blk_mq_complete_request
  - scsi_complete
   - scsi_finish_command
    - scsi_io_completion
     - scsi_end_request
  - 
blk_update_request
   - bio_endio
    - 
f2fs_write_end_io
     - 
NODE_MAPPING(sbi)

Signed-off-by: sunshijie 
---
  fs/f2fs/super.c | 4 ++--
  1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index adffc9b80a9c..62d4f229f601 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1642,9 +1642,9 @@ static void f2fs_put_super(struct super_block *sb)
     f2fs_destroy_compress_inode(sbi);

     iput(sbi->node_inode);
-   sbi->node_inode = NULL;
-
     iput(sbi->meta_inode);
+
+   sbi->node_inode = NULL;
     sbi->meta_inode = NULL;

     mutex_unlock(&sbi->umount_mutex);
--
2.34.1

#/**本邮件及其附件含有小米公司的保密信息,仅限于发送给上面地址中列出的个人或群组。禁止任何其他人以任何形式使用(包括但不限于全部或部分地泄露、复制、或散发)本邮件中的信息。如果您错收了本邮件,请您立即电话或邮件通知发件人并删除本邮件!
 This e-mail and its attachments contain confidential information from XIAOMI, 
which is intended only for the person or entity whose address is listed above. 
Any use of the information contained herein in any way (including, but not 
limited to, total or partial disclosure, reproduction, or dissemination) by 
persons other than the intended recipient(s) is prohibited. If you receive this 
e-mail in error, please notify the sender by phone or email immediately and 
delete it!*