We're going to be strictly enforcing that extents match the IO path options, as defined by the filesystem/inode options: that means when we call set_needs_rebalance(), we need to pass it the opts we got from the inode in that same transaction.
Signed-off-by: Kent Overstreet <[email protected]> --- fs/bcachefs/data_update.c | 6 +++++- fs/bcachefs/inode.c | 28 ++++++++++++++++++--------- fs/bcachefs/inode.h | 2 +- fs/bcachefs/io_misc.c | 8 +------- fs/bcachefs/io_write.c | 40 ++++++++++++++++++++++++++++----------- fs/bcachefs/reflink.c | 16 +++++----------- 6 files changed, 60 insertions(+), 40 deletions(-) diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c index 968850da0d23..43d318ff488e 100644 --- a/fs/bcachefs/data_update.c +++ b/fs/bcachefs/data_update.c @@ -11,6 +11,7 @@ #include "ec.h" #include "error.h" #include "extents.h" +#include "inode.h" #include "io_write.h" #include "keylist.h" #include "move.h" @@ -428,13 +429,16 @@ static int __bch2_data_update_index_update(struct btree_trans *trans, goto out; } + struct bch_inode_opts opts; + ret = bch2_trans_log_str(trans, bch2_data_update_type_strs[m->type]) ?: bch2_trans_log_bkey(trans, m->btree_id, 0, m->k.k) ?: bch2_insert_snapshot_whiteouts(trans, m->btree_id, k.k->p, bkey_start_pos(&insert->k)) ?: bch2_insert_snapshot_whiteouts(trans, m->btree_id, k.k->p, insert->k.p) ?: - bch2_bkey_set_needs_rebalance(c, &op->opts, insert) ?: + bch2_inum_snapshot_opts_get(trans, k.k->p.inode, k.k->p.snapshot, &opts) ?: + bch2_bkey_set_needs_rebalance(c, &opts, insert) ?: bch2_trans_update(trans, &iter, insert, BTREE_UPDATE_internal_snapshot_node); if (ret) diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index c1d673374e02..d1ec33edcc0b 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -369,9 +369,9 @@ int __bch2_inode_peek(struct btree_trans *trans, } int bch2_inode_find_by_inum_snapshot(struct btree_trans *trans, - u64 inode_nr, u32 snapshot, - struct bch_inode_unpacked *inode, - unsigned flags) + u64 inode_nr, u32 snapshot, + struct bch_inode_unpacked *inode, + unsigned flags) { CLASS(btree_iter, iter)(trans, BTREE_ID_inodes, SPOS(0, inode_nr, snapshot), flags); struct bkey_s_c k = bch2_btree_iter_peek_slot(&iter); @@ -1244,15 +1244,25 @@ void bch2_inode_opts_get_inode(struct bch_fs *c, bch2_io_opts_fixups(ret); } -int bch2_inum_opts_get(struct btree_trans *trans, subvol_inum inum, struct bch_inode_opts *opts) +int bch2_inum_snapshot_opts_get(struct btree_trans *trans, + u64 inum, u32 snapshot, + struct bch_inode_opts *opts) { - struct bch_inode_unpacked inode; - int ret = lockrestart_do(trans, bch2_inode_find_by_inum_trans(trans, inum, &inode)); + if (inum) { + struct bch_inode_unpacked inode; + int ret = bch2_inode_find_by_inum_snapshot(trans, inum, snapshot, &inode, 0); + if (ret) + return ret; - if (ret) - return ret; + bch2_inode_opts_get_inode(trans->c, &inode, opts); + } else { + /* + * data_update_index_update may call us for reflink btree extent + * updates, inum will be 0 + */ - bch2_inode_opts_get_inode(trans->c, &inode, opts); + bch2_inode_opts_get(trans->c, opts); + } return 0; } diff --git a/fs/bcachefs/inode.h b/fs/bcachefs/inode.h index 12e0a104c196..63b7088811fb 100644 --- a/fs/bcachefs/inode.h +++ b/fs/bcachefs/inode.h @@ -290,7 +290,7 @@ void bch2_inode_nlink_dec(struct btree_trans *, struct bch_inode_unpacked *); struct bch_opts bch2_inode_opts_to_opts(struct bch_inode_unpacked *); void bch2_inode_opts_get_inode(struct bch_fs *, struct bch_inode_unpacked *, struct bch_inode_opts *); -int bch2_inum_opts_get(struct btree_trans *, subvol_inum, struct bch_inode_opts *); +int bch2_inum_snapshot_opts_get(struct btree_trans *, u64, u32, struct bch_inode_opts *); int bch2_inode_set_casefold(struct btree_trans *, subvol_inum, struct bch_inode_unpacked *, unsigned); diff --git a/fs/bcachefs/io_misc.c b/fs/bcachefs/io_misc.c index 5e03574059e0..6d204b980f76 100644 --- a/fs/bcachefs/io_misc.c +++ b/fs/bcachefs/io_misc.c @@ -373,7 +373,6 @@ static int __bch2_resume_logged_op_finsert(struct btree_trans *trans, struct btree_iter iter; struct bkey_i_logged_op_finsert *op = bkey_i_to_logged_op_finsert(op_k); subvol_inum inum = { le32_to_cpu(op->v.subvol), le64_to_cpu(op->v.inum) }; - struct bch_inode_opts opts; u64 dst_offset = le64_to_cpu(op->v.dst_offset); u64 src_offset = le64_to_cpu(op->v.src_offset); s64 shift = dst_offset - src_offset; @@ -384,10 +383,6 @@ static int __bch2_resume_logged_op_finsert(struct btree_trans *trans, bool warn_errors = i_sectors_delta != NULL; int ret = 0; - ret = bch2_inum_opts_get(trans, inum, &opts); - if (ret) - return ret; - /* * check for missing subvolume before fpunch, as in resume we don't want * it to be a fatal error @@ -476,8 +471,7 @@ case LOGGED_OP_FINSERT_shift_extents: op->v.pos = cpu_to_le64(insert ? bkey_start_offset(&delete.k) : delete.k.p.offset); - ret = bch2_bkey_set_needs_rebalance(c, &opts, copy) ?: - bch2_btree_insert_trans(trans, BTREE_ID_extents, &delete, 0) ?: + ret = bch2_btree_insert_trans(trans, BTREE_ID_extents, &delete, 0) ?: bch2_btree_insert_trans(trans, BTREE_ID_extents, copy, 0) ?: bch2_logged_op_update(trans, &op->k_i) ?: bch2_trans_commit(trans, &disk_res, NULL, BCH_TRANS_COMMIT_no_enospc); diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c index 1d83dcc9731e..a0cb5d2dd0f8 100644 --- a/fs/bcachefs/io_write.c +++ b/fs/bcachefs/io_write.c @@ -205,7 +205,8 @@ int bch2_sum_sector_overwrites(struct btree_trans *trans, static inline int bch2_extent_update_i_size_sectors(struct btree_trans *trans, struct btree_iter *extent_iter, u64 new_i_size, - s64 i_sectors_delta) + s64 i_sectors_delta, + struct bch_inode_unpacked *inode_u) { /* * Crazy performance optimization: @@ -227,7 +228,13 @@ static inline int bch2_extent_update_i_size_sectors(struct btree_trans *trans, BTREE_ITER_intent| BTREE_ITER_cached); struct bkey_s_c k = bch2_btree_iter_peek_slot(&iter); - int ret = bkey_err(k); + + /* + * XXX: we currently need to unpack the inode on every write because we + * need the current io_opts, for transactional consistency - inode_v4? + */ + int ret = bkey_err(k) ?: + bch2_inode_unpack(k, inode_u); if (unlikely(ret)) return ret; @@ -305,6 +312,7 @@ int bch2_extent_update(struct btree_trans *trans, s64 *i_sectors_delta_total, bool check_enospc) { + struct bch_fs *c = trans->c; struct bpos next_pos; bool usage_increasing; s64 i_sectors_delta = 0, disk_sectors_delta = 0; @@ -335,7 +343,7 @@ int bch2_extent_update(struct btree_trans *trans, if (disk_res && disk_sectors_delta > (s64) disk_res->sectors) { - ret = bch2_disk_reservation_add(trans->c, disk_res, + ret = bch2_disk_reservation_add(c, disk_res, disk_sectors_delta - disk_res->sectors, !check_enospc || !usage_increasing ? BCH_DISK_RESERVATION_NOFAIL : 0); @@ -349,9 +357,14 @@ int bch2_extent_update(struct btree_trans *trans, * aren't changing - for fsync to work properly; fsync relies on * inode->bi_journal_seq which is updated by the trigger code: */ + struct bch_inode_unpacked inode; + struct bch_inode_opts opts; + ret = bch2_extent_update_i_size_sectors(trans, iter, min(k->k.p.offset << 9, new_i_size), - i_sectors_delta) ?: + i_sectors_delta, &inode) ?: + (bch2_inode_opts_get_inode(c, &inode, &opts), + bch2_bkey_set_needs_rebalance(c, &opts, k)) ?: bch2_trans_update(trans, iter, k, 0) ?: bch2_trans_commit(trans, disk_res, NULL, BCH_TRANS_COMMIT_no_check_rw| @@ -792,10 +805,6 @@ static void init_append_extent(struct bch_write_op *op, bch2_alloc_sectors_append_ptrs_inlined(op->c, wp, &e->k_i, crc.compressed_size, op->flags & BCH_WRITE_cached); - - if (!(op->flags & BCH_WRITE_move)) - bch2_bkey_set_needs_rebalance(op->c, &op->opts, &e->k_i); - bch2_keylist_push(&op->insert_keys); } @@ -1225,6 +1234,7 @@ static int bch2_nocow_write_convert_one_unwritten(struct btree_trans *trans, return 0; } + struct bch_fs *c = trans->c; struct bkey_i *new = bch2_trans_kmalloc_nomemzero(trans, bkey_bytes(k.k) + sizeof(struct bch_extent_rebalance)); int ret = PTR_ERR_OR_ZERO(new); @@ -1239,8 +1249,6 @@ static int bch2_nocow_write_convert_one_unwritten(struct btree_trans *trans, bkey_for_each_ptr(ptrs, ptr) ptr->unwritten = 0; - bch2_bkey_set_needs_rebalance(op->c, &op->opts, new); - /* * Note that we're not calling bch2_subvol_get_snapshot() in this path - * that was done when we kicked off the write, and here it's important @@ -1248,8 +1256,18 @@ static int bch2_nocow_write_convert_one_unwritten(struct btree_trans *trans, * since been created. The write is still outstanding, so we're ok * w.r.t. snapshot atomicity: */ + + /* + * For transactional consistency, set_needs_rebalance() has to be called + * with the io_opts from the btree in the same transaction: + */ + struct bch_inode_unpacked inode; + struct bch_inode_opts opts; + return bch2_extent_update_i_size_sectors(trans, iter, - min(new->k.p.offset << 9, new_i_size), 0) ?: + min(new->k.p.offset << 9, new_i_size), 0, &inode) ?: + (bch2_inode_opts_get_inode(c, &inode, &opts), + bch2_bkey_set_needs_rebalance(c, &opts, new)) ?: bch2_trans_update(trans, iter, new, BTREE_UPDATE_internal_snapshot_node); } diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c index 55ad8ab7a148..5e62eddf30ba 100644 --- a/fs/bcachefs/reflink.c +++ b/fs/bcachefs/reflink.c @@ -589,7 +589,6 @@ s64 bch2_remap_range(struct bch_fs *c, struct bpos dst_start = POS(dst_inum.inum, dst_offset); struct bpos src_start = POS(src_inum.inum, src_offset); struct bpos dst_end = dst_start, src_end = src_start; - struct bch_inode_opts opts; struct bpos src_want; u64 dst_done = 0; u32 dst_snapshot, src_snapshot; @@ -609,10 +608,6 @@ s64 bch2_remap_range(struct bch_fs *c, bch2_bkey_buf_init(&new_src); CLASS(btree_trans, trans)(c); - ret = bch2_inum_opts_get(trans, src_inum, &opts); - if (ret) - goto err; - bch2_trans_iter_init(trans, &src_iter, BTREE_ID_extents, src_start, BTREE_ITER_intent); bch2_trans_iter_init(trans, &dst_iter, BTREE_ID_extents, dst_start, @@ -709,11 +704,10 @@ s64 bch2_remap_range(struct bch_fs *c, min(src_k.k->p.offset - src_want.offset, dst_end.offset - dst_iter.pos.offset)); - ret = bch2_bkey_set_needs_rebalance(c, &opts, new_dst.k) ?: - bch2_extent_update(trans, dst_inum, &dst_iter, - new_dst.k, &disk_res, - new_i_size, i_sectors_delta, - true); + ret = bch2_extent_update(trans, dst_inum, &dst_iter, + new_dst.k, &disk_res, + new_i_size, i_sectors_delta, + true); bch2_disk_reservation_put(c, &disk_res); } bch2_trans_iter_exit(&dst_iter); @@ -744,7 +738,7 @@ s64 bch2_remap_range(struct bch_fs *c, bch2_trans_iter_exit(&inode_iter); } while (bch2_err_matches(ret2, BCH_ERR_transaction_restart)); -err: + bch2_bkey_buf_exit(&new_src, c); bch2_bkey_buf_exit(&new_dst, c); -- 2.50.1
