returning DM_MAPIO_REQUEUE from the target map() function only requeues
the bio during noflush suspends. During regular operations or during
flushing suspends, it fails the bio. Failing the bio during flushing
suspends is the correct behavior here. We cannot handle the bio, and we
cannot suspends while it is outstanding. But during normal operations,
we should not push the bio back to do. Instead, wait for the reshape
to be resumed.

Signed-off-by: Benjamin Marzinski <[email protected]>
---

Yang Xiuwei, if you are still able to see I/O errors during LVM testing,
does this patch fix them?

 drivers/md/dm-raid.c | 7 +++++++
 drivers/md/md.h      | 1 +
 drivers/md/raid5.c   | 6 ++++--
 3 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index 4bacdc499984..cac61d57e7e2 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -3831,6 +3831,7 @@ static void raid_presuspend(struct dm_target *ti)
         * resume, raid_postsuspend() is too late.
         */
        set_bit(RT_FLAG_RS_FROZEN, &rs->runtime_flags);
+       WRITE_ONCE(mddev->dm_suspending, 1);
 
        if (!reshape_interrupted(mddev))
                return;
@@ -3847,6 +3848,9 @@ static void raid_presuspend(struct dm_target *ti)
 static void raid_presuspend_undo(struct dm_target *ti)
 {
        struct raid_set *rs = ti->private;
+       struct mddev *mddev = &rs->md;
+
+       WRITE_ONCE(mddev->dm_suspending, 0);
 
        clear_bit(RT_FLAG_RS_FROZEN, &rs->runtime_flags);
 }
@@ -3854,6 +3858,7 @@ static void raid_presuspend_undo(struct dm_target *ti)
 static void raid_postsuspend(struct dm_target *ti)
 {
        struct raid_set *rs = ti->private;
+       struct mddev *mddev = &rs->md;
 
        if (!test_and_set_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags)) {
                /*
@@ -3864,6 +3869,8 @@ static void raid_postsuspend(struct dm_target *ti)
                mddev_suspend(&rs->md, false);
                rs->md.ro = MD_RDONLY;
        }
+       WRITE_ONCE(mddev->dm_suspending, 0);
+
 }
 
 static void attempt_restore_of_faulty_devices(struct raid_set *rs)
diff --git a/drivers/md/md.h b/drivers/md/md.h
index ac84289664cd..e8d7332c5cb9 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -463,6 +463,7 @@ struct mddev {
        int                             delta_disks, new_level, new_layout;
        int                             new_chunk_sectors;
        int                             reshape_backwards;
+       int                             dm_suspending;
 
        struct md_thread __rcu          *thread;        /* management thread */
        struct md_thread __rcu          *sync_thread;   /* doing resync or 
reconstruct */
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 8854e024f311..d528263f92a3 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -6042,8 +6042,10 @@ static enum stripe_result make_stripe_request(struct 
mddev *mddev,
        raid5_release_stripe(sh);
 out:
        if (ret == STRIPE_SCHEDULE_AND_RETRY && reshape_interrupted(mddev)) {
-               bi->bi_status = BLK_STS_RESOURCE;
-               ret = STRIPE_WAIT_RESHAPE;
+               if (!mddev_is_dm(mddev) || READ_ONCE(mddev->dm_suspending)) {
+                       bi->bi_status = BLK_STS_RESOURCE;
+                       ret = STRIPE_WAIT_RESHAPE;
+               }
                pr_err_ratelimited("dm-raid456: io across reshape position 
while reshape can't make progress");
        }
        return ret;
-- 
2.50.1


Reply via email to