returning DM_MAPIO_REQUEUE from the target map() function only requeues the bio during noflush suspends. During regular operations or during flushing suspends, it fails the bio. Failing the bio during flushing suspends is the correct behavior here. We cannot handle the bio, and we cannot suspends while it is outstanding. But during normal operations, we should not push the bio back to do. Instead, wait for the reshape to be resumed.
Signed-off-by: Benjamin Marzinski <[email protected]> --- Yang Xiuwei, if you are still able to see I/O errors during LVM testing, does this patch fix them? drivers/md/dm-raid.c | 7 +++++++ drivers/md/md.h | 1 + drivers/md/raid5.c | 6 ++++-- 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 4bacdc499984..cac61d57e7e2 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -3831,6 +3831,7 @@ static void raid_presuspend(struct dm_target *ti) * resume, raid_postsuspend() is too late. */ set_bit(RT_FLAG_RS_FROZEN, &rs->runtime_flags); + WRITE_ONCE(mddev->dm_suspending, 1); if (!reshape_interrupted(mddev)) return; @@ -3847,6 +3848,9 @@ static void raid_presuspend(struct dm_target *ti) static void raid_presuspend_undo(struct dm_target *ti) { struct raid_set *rs = ti->private; + struct mddev *mddev = &rs->md; + + WRITE_ONCE(mddev->dm_suspending, 0); clear_bit(RT_FLAG_RS_FROZEN, &rs->runtime_flags); } @@ -3854,6 +3858,7 @@ static void raid_presuspend_undo(struct dm_target *ti) static void raid_postsuspend(struct dm_target *ti) { struct raid_set *rs = ti->private; + struct mddev *mddev = &rs->md; if (!test_and_set_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags)) { /* @@ -3864,6 +3869,8 @@ static void raid_postsuspend(struct dm_target *ti) mddev_suspend(&rs->md, false); rs->md.ro = MD_RDONLY; } + WRITE_ONCE(mddev->dm_suspending, 0); + } static void attempt_restore_of_faulty_devices(struct raid_set *rs) diff --git a/drivers/md/md.h b/drivers/md/md.h index ac84289664cd..e8d7332c5cb9 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -463,6 +463,7 @@ struct mddev { int delta_disks, new_level, new_layout; int new_chunk_sectors; int reshape_backwards; + int dm_suspending; struct md_thread __rcu *thread; /* management thread */ struct md_thread __rcu *sync_thread; /* doing resync or reconstruct */ diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 8854e024f311..d528263f92a3 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -6042,8 +6042,10 @@ static enum stripe_result make_stripe_request(struct mddev *mddev, raid5_release_stripe(sh); out: if (ret == STRIPE_SCHEDULE_AND_RETRY && reshape_interrupted(mddev)) { - bi->bi_status = BLK_STS_RESOURCE; - ret = STRIPE_WAIT_RESHAPE; + if (!mddev_is_dm(mddev) || READ_ONCE(mddev->dm_suspending)) { + bi->bi_status = BLK_STS_RESOURCE; + ret = STRIPE_WAIT_RESHAPE; + } pr_err_ratelimited("dm-raid456: io across reshape position while reshape can't make progress"); } return ret; -- 2.50.1
