Hi,

在 2026/4/15 3:03, Benjamin Marzinski 写道:
> returning DM_MAPIO_REQUEUE from the target map() function only requeues
> the bio during noflush suspends. During regular operations or during
> flushing suspends, it fails the bio. Failing the bio during flushing
> suspends is the correct behavior here. We cannot handle the bio, and we
> cannot suspends while it is outstanding. But during normal operations,
> we should not push the bio back to do. Instead, wait for the reshape
> to be resumed.
>
> Signed-off-by: Benjamin Marzinski <[email protected]>
> ---
>
> Yang Xiuwei, if you are still able to see I/O errors during LVM testing,
> does this patch fix them?
>
>   drivers/md/dm-raid.c | 7 +++++++
>   drivers/md/md.h      | 1 +
>   drivers/md/raid5.c   | 6 ++++--
>   3 files changed, 12 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
> index 4bacdc499984..cac61d57e7e2 100644
> --- a/drivers/md/dm-raid.c
> +++ b/drivers/md/dm-raid.c
> @@ -3831,6 +3831,7 @@ static void raid_presuspend(struct dm_target *ti)
>        * resume, raid_postsuspend() is too late.
>        */
>       set_bit(RT_FLAG_RS_FROZEN, &rs->runtime_flags);
> +     WRITE_ONCE(mddev->dm_suspending, 1);
>   
>       if (!reshape_interrupted(mddev))
>               return;
> @@ -3847,6 +3848,9 @@ static void raid_presuspend(struct dm_target *ti)
>   static void raid_presuspend_undo(struct dm_target *ti)
>   {
>       struct raid_set *rs = ti->private;
> +     struct mddev *mddev = &rs->md;
> +
> +     WRITE_ONCE(mddev->dm_suspending, 0);
>   
>       clear_bit(RT_FLAG_RS_FROZEN, &rs->runtime_flags);
>   }
> @@ -3854,6 +3858,7 @@ static void raid_presuspend_undo(struct dm_target *ti)
>   static void raid_postsuspend(struct dm_target *ti)
>   {
>       struct raid_set *rs = ti->private;
> +     struct mddev *mddev = &rs->md;
>   
>       if (!test_and_set_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags)) {
>               /*
> @@ -3864,6 +3869,8 @@ static void raid_postsuspend(struct dm_target *ti)
>               mddev_suspend(&rs->md, false);
>               rs->md.ro = MD_RDONLY;
>       }
> +     WRITE_ONCE(mddev->dm_suspending, 0);
> +
>   }
>   
>   static void attempt_restore_of_faulty_devices(struct raid_set *rs)
> diff --git a/drivers/md/md.h b/drivers/md/md.h
> index ac84289664cd..e8d7332c5cb9 100644
> --- a/drivers/md/md.h
> +++ b/drivers/md/md.h
> @@ -463,6 +463,7 @@ struct mddev {
>       int                             delta_disks, new_level, new_layout;
>       int                             new_chunk_sectors;
>       int                             reshape_backwards;
> +     int                             dm_suspending;

This patch looks fine, however, can you also optimize it by a new
flag instead a new int field ?

>   
>       struct md_thread __rcu          *thread;        /* management thread */
>       struct md_thread __rcu          *sync_thread;   /* doing resync or 
> reconstruct */
> diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
> index 8854e024f311..d528263f92a3 100644
> --- a/drivers/md/raid5.c
> +++ b/drivers/md/raid5.c
> @@ -6042,8 +6042,10 @@ static enum stripe_result make_stripe_request(struct 
> mddev *mddev,
>       raid5_release_stripe(sh);
>   out:
>       if (ret == STRIPE_SCHEDULE_AND_RETRY && reshape_interrupted(mddev)) {
> -             bi->bi_status = BLK_STS_RESOURCE;
> -             ret = STRIPE_WAIT_RESHAPE;
> +             if (!mddev_is_dm(mddev) || READ_ONCE(mddev->dm_suspending)) {
> +                     bi->bi_status = BLK_STS_RESOURCE;
> +                     ret = STRIPE_WAIT_RESHAPE;
> +             }
>               pr_err_ratelimited("dm-raid456: io across reshape position 
> while reshape can't make progress");
>       }
>       return ret;

-- 
Thansk,
Kuai

Reply via email to