This fills out our blk_holders_ops with freeze and thaw callbacks, for shutting down IO (generally during a system suspend).
This is implemented completely differently as on other filesystems since we have a low level synchronization object which conveniently works well for us - bch_dev.io_ref, normally used for guarding against a device being offlined while in use. bch2_dev_get_ioref() now checks if a freeze is in progress if it fails to get ca->io_ref, and sleeps until complete and ca->io_ref is alive. We also need a bit of synchronization for freeze/suspend vs. device online/offline, done with the new bch_dev.io_ref_statechange_lock. Signed-off-by: Kent Overstreet <[email protected]> --- fs/bcachefs/bcachefs.h | 3 ++ fs/bcachefs/journal_io.c | 2 +- fs/bcachefs/sb-members.c | 49 ++++++++++++++++++++++ fs/bcachefs/sb-members.h | 20 +-------- fs/bcachefs/super.c | 87 +++++++++++++++++++++++++++++++++++++--- 5 files changed, 136 insertions(+), 25 deletions(-) diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index d2c3f59a668f..d03aa62907ad 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -526,6 +526,9 @@ struct bch_dev { struct completion ref_completion; struct percpu_ref io_ref; struct completion io_ref_completion; + struct mutex io_ref_statechange_lock; + unsigned frozen; + wait_queue_head_t frozen_wait; struct bch_fs *fs; diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index a510755a8364..6979fef5c128 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -1769,7 +1769,7 @@ static CLOSURE_CALLBACK(journal_write_submit) struct bch_dev *ca = bch2_dev_get_ioref(c, ptr->dev, WRITE); if (!ca) { /* XXX: fix this */ - bch_err(c, "missing device for journal write\n"); + bch_err(c, "missing device for journal write"); continue; } diff --git a/fs/bcachefs/sb-members.c b/fs/bcachefs/sb-members.c index 116131f95815..2363367cb32d 100644 --- a/fs/bcachefs/sb-members.c +++ b/fs/bcachefs/sb-members.c @@ -9,6 +9,55 @@ #include "sb-members.h" #include "super-io.h" +/* + * Use of bch2_dev_get_ioref() is subject to deadlocks if used incorrectly, and + * we cannot write asserts for correct usage, so: pay attention, because this is + * where we implement freeze. + * + * Waiting on an outstanding freeze to complete will indirectly wait on all + * other outstanding io_refs to be released. That means: + * + * - Don't use bch2_dev_get_ioref() if you already have an io_ref, use + * percpu_ref_get(). Since dev_get_ioref() has tryget() semantics, that's what + * you should be doing anyways. + * + * - All io_refs must be released without blocking on locks that might be held + * while calling dev_get_ioref(). This is easy to obey since we generally + * release io_refs from endio functions. + * + */ +struct bch_dev *bch2_dev_get_ioref(struct bch_fs *c, unsigned dev, int rw) +{ + might_sleep(); +again: + rcu_read_lock(); + struct bch_dev *ca = bch2_dev_rcu(c, dev); + if (likely(ca)) { + if (unlikely(!percpu_ref_tryget(&ca->io_ref))) { + smp_mb(); + if (ca->frozen) { + bch2_dev_get(ca); + rcu_read_unlock(); + + wait_event(ca->frozen_wait, !ca->frozen); + bch2_dev_put(ca); + goto again; + } + ca = NULL; + } + } + rcu_read_unlock(); + + if (ca && + (ca->mi.state == BCH_MEMBER_STATE_rw || + (ca->mi.state == BCH_MEMBER_STATE_ro && rw == READ))) + return ca; + + if (ca) + percpu_ref_put(&ca->io_ref); + return NULL; +} + void bch2_dev_missing(struct bch_fs *c, unsigned dev) { if (dev != BCH_SB_MEMBER_INVALID) diff --git a/fs/bcachefs/sb-members.h b/fs/bcachefs/sb-members.h index df91b02ce575..b3359ee63b0e 100644 --- a/fs/bcachefs/sb-members.h +++ b/fs/bcachefs/sb-members.h @@ -281,25 +281,7 @@ static inline struct bch_dev *bch2_dev_iterate(struct bch_fs *c, struct bch_dev return bch2_dev_tryget(c, dev_idx); } -static inline struct bch_dev *bch2_dev_get_ioref(struct bch_fs *c, unsigned dev, int rw) -{ - might_sleep(); - - rcu_read_lock(); - struct bch_dev *ca = bch2_dev_rcu(c, dev); - if (ca && !percpu_ref_tryget(&ca->io_ref)) - ca = NULL; - rcu_read_unlock(); - - if (ca && - (ca->mi.state == BCH_MEMBER_STATE_rw || - (ca->mi.state == BCH_MEMBER_STATE_ro && rw == READ))) - return ca; - - if (ca) - percpu_ref_put(&ca->io_ref); - return NULL; -} +struct bch_dev *bch2_dev_get_ioref(struct bch_fs *, unsigned, int); /* XXX kill, move to struct bch_fs */ static inline struct bch_devs_mask bch2_online_devs(struct bch_fs *c) diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 05a2dc5ef513..dfdeab7d847c 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -1236,6 +1236,22 @@ static void bch2_dev_free(struct bch_dev *ca) kobject_put(&ca->kobj); } +static void bch2_dev_io_ref_stop(struct bch_dev *ca) +{ + lockdep_assert_held(&ca->io_ref_statechange_lock); + + reinit_completion(&ca->io_ref_completion); + percpu_ref_kill(&ca->io_ref); + wait_for_completion(&ca->io_ref_completion); +} + +static void bch2_dev_io_ref_start(struct bch_dev *ca) +{ + lockdep_assert_held(&ca->io_ref_statechange_lock); + + percpu_ref_reinit(&ca->io_ref); +} + static void __bch2_dev_offline(struct bch_fs *c, struct bch_dev *ca) { @@ -1246,13 +1262,14 @@ static void __bch2_dev_offline(struct bch_fs *c, struct bch_dev *ca) __bch2_dev_read_only(c, ca); - reinit_completion(&ca->io_ref_completion); - percpu_ref_kill(&ca->io_ref); - wait_for_completion(&ca->io_ref_completion); - bch2_dev_unlink(ca); + mutex_lock(&ca->io_ref_statechange_lock); + bch2_dev_io_ref_stop(ca); + bch2_free_super(&ca->disk_sb); + mutex_unlock(&ca->io_ref_statechange_lock); + bch2_dev_journal_exit(ca); } @@ -1334,6 +1351,8 @@ static struct bch_dev *__bch2_dev_alloc(struct bch_fs *c, kobject_init(&ca->kobj, &bch2_dev_ktype); init_completion(&ca->ref_completion); init_completion(&ca->io_ref_completion); + mutex_init(&ca->io_ref_statechange_lock); + init_waitqueue_head(&ca->frozen_wait); INIT_WORK(&ca->io_error_work, bch2_io_error_work); @@ -1428,6 +1447,8 @@ static int __bch2_dev_attach_bdev(struct bch_dev *ca, struct bch_sb_handle *sb) if (ret) return ret; + mutex_lock(&ca->io_ref_statechange_lock); + /* Commit: */ ca->disk_sb = *sb; memset(sb, 0, sizeof(*sb)); @@ -1441,7 +1462,9 @@ static int __bch2_dev_attach_bdev(struct bch_dev *ca, struct bch_sb_handle *sb) ca->dev = ca->disk_sb.bdev->bd_dev; - percpu_ref_reinit(&ca->io_ref); + if (!ca->frozen) + bch2_dev_io_ref_start(ca); + mutex_unlock(&ca->io_ref_statechange_lock); return 0; } @@ -2115,9 +2138,63 @@ static void bch2_fs_bdev_sync(struct block_device *bdev) bch2_ro_ref_put(c); } +static int bch2_fs_bdev_freeze(struct block_device *bdev) +{ + int ret = -EINVAL; + struct bch_fs *c = bdev_get_fs(bdev); + if (!c) + return ret; + + struct bch_dev *ca = bdev_to_bch_dev(c, bdev); + if (!ca) + goto err; + + mutex_lock(&ca->io_ref_statechange_lock); + ca->frozen++; + smp_mb(); + bch2_dev_io_ref_stop(ca); + mutex_unlock(&ca->io_ref_statechange_lock); + + ret = sync_blockdev(bdev); + + bch2_dev_put(ca); +err: + bch2_ro_ref_put(c); + return ret; +} + +static int bch2_fs_bdev_thaw(struct block_device *bdev) +{ + int ret = -EINVAL; + struct bch_fs *c = bdev_get_fs(bdev); + if (!c) + return ret; + + struct bch_dev *ca = bdev_to_bch_dev(c, bdev); + if (!ca) + goto err; + + mutex_lock(&ca->io_ref_statechange_lock); + if (ca->disk_sb.bdev && + ca->frozen == 1) + bch2_dev_io_ref_start(ca); + --ca->frozen; + wake_up(&ca->frozen_wait); + mutex_unlock(&ca->io_ref_statechange_lock); + + ret = 0; + + bch2_dev_put(ca); +err: + bch2_ro_ref_put(c); + return ret; +} + const struct blk_holder_ops bch2_sb_handle_bdev_ops = { .mark_dead = bch2_fs_bdev_mark_dead, .sync = bch2_fs_bdev_sync, + .freeze = bch2_fs_bdev_freeze, + .thaw = bch2_fs_bdev_thaw, }; /* Filesystem open: */ -- 2.47.2
