On 04.07.2012 15:38, Alexander Block wrote: > This patch introduces uuids for subvolumes. Each > subvolume has it's own uuid. In case it was snapshotted, > it also contains parent_uuid. In case it was received, > it also contains received_uuid. > > It also introduces subvolume ctime/otime/stime/rtime. The > first two are comparable to the times found in inodes. otime > is the origin/creation time and ctime is the change time. > stime/rtime are only valid on received subvolumes. > stime is the time of the subvolume when it was > sent. rtime is the time of the subvolume when it was > received. > > Additionally to the times, we have a transid for each > time. They are updated at the same place as the times. > > btrfs receive uses stransid and rtransid to find out > if a received subvolume changed in the meantime. > > If an older kernel mounts a filesystem with the > extented fields, all fields become invalid. The next > mount with a new kernel will detect this and reset the > fields. > > Signed-off-by: Alexander Block <abloc...@googlemail.com> > --- > fs/btrfs/ctree.h | 43 ++++++++++++++++++++++ > fs/btrfs/disk-io.c | 2 + > fs/btrfs/inode.c | 4 ++ > fs/btrfs/ioctl.c | 96 > ++++++++++++++++++++++++++++++++++++++++++++++-- > fs/btrfs/ioctl.h | 13 +++++++ > fs/btrfs/root-tree.c | 92 +++++++++++++++++++++++++++++++++++++++++++--- > fs/btrfs/transaction.c | 17 +++++++++ > 7 files changed, 258 insertions(+), 9 deletions(-) > > diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h > index 8cfde93..2bd5df8 100644 > --- a/fs/btrfs/ctree.h > +++ b/fs/btrfs/ctree.h > @@ -709,6 +709,35 @@ struct btrfs_root_item { > struct btrfs_disk_key drop_progress; > u8 drop_level; > u8 level; > + > + /* > + * The following fields appear after subvol_uuids+subvol_times > + * were introduced. > + */ > + > + /* > + * This generation number is used to test if the new fields are valid > + * and up to date while reading the root item. Everytime the root item > + * is written out, the "generation" field is copied into this field. If > + * anyone ever mounted the fs with an older kernel, we will have > + * mismatching generation values here and thus must invalidate the > + * new fields. See btrfs_update_root and btrfs_find_last_root for > + * details. > + * the offset of generation_v2 is also used as the start for the memset > + * when invalidating the fields. > + */ > + __le64 generation_v2; > + u8 uuid[BTRFS_UUID_SIZE]; > + u8 parent_uuid[BTRFS_UUID_SIZE]; > + u8 received_uuid[BTRFS_UUID_SIZE]; > + __le64 ctransid; /* updated when an inode changes */ > + __le64 otransid; /* trans when created */ > + __le64 stransid; /* trans when sent. non-zero for received subvol */ > + __le64 rtransid; /* trans when received. non-zero for received subvol */ > + struct btrfs_timespec ctime; > + struct btrfs_timespec otime; > + struct btrfs_timespec stime; > + struct btrfs_timespec rtime; > } __attribute__ ((__packed__)); > > /* > @@ -1416,6 +1445,8 @@ struct btrfs_root { > dev_t anon_dev; > > int force_cow; > + > + spinlock_t root_times_lock; > }; > > struct btrfs_ioctl_defrag_range_args { > @@ -2189,6 +2220,16 @@ BTRFS_SETGET_STACK_FUNCS(root_used, struct > btrfs_root_item, bytes_used, 64); > BTRFS_SETGET_STACK_FUNCS(root_limit, struct btrfs_root_item, byte_limit, 64); > BTRFS_SETGET_STACK_FUNCS(root_last_snapshot, struct btrfs_root_item, > last_snapshot, 64); > +BTRFS_SETGET_STACK_FUNCS(root_generation_v2, struct btrfs_root_item, > + generation_v2, 64); > +BTRFS_SETGET_STACK_FUNCS(root_ctransid, struct btrfs_root_item, > + ctransid, 64); > +BTRFS_SETGET_STACK_FUNCS(root_otransid, struct btrfs_root_item, > + otransid, 64); > +BTRFS_SETGET_STACK_FUNCS(root_stransid, struct btrfs_root_item, > + stransid, 64); > +BTRFS_SETGET_STACK_FUNCS(root_rtransid, struct btrfs_root_item, > + rtransid, 64); > > static inline bool btrfs_root_readonly(struct btrfs_root *root) > { > @@ -2829,6 +2870,8 @@ int btrfs_find_orphan_roots(struct btrfs_root > *tree_root); > void btrfs_set_root_node(struct btrfs_root_item *item, > struct extent_buffer *node); > void btrfs_check_and_init_root_item(struct btrfs_root_item *item); > +void btrfs_update_root_times(struct btrfs_trans_handle *trans, > + struct btrfs_root *root); > > /* dir-item.c */ > int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, > diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c > index 7b845ff..d3b49ad 100644 > --- a/fs/btrfs/disk-io.c > +++ b/fs/btrfs/disk-io.c > @@ -1182,6 +1182,8 @@ static void __setup_root(u32 nodesize, u32 leafsize, > u32 sectorsize, > root->defrag_running = 0; > root->root_key.objectid = objectid; > root->anon_dev = 0; > + > + spin_lock_init(&root->root_times_lock); > } > > static int __must_check find_and_setup_root(struct btrfs_root *tree_root, > diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c > index 139be17..0f6a65d 100644 > --- a/fs/btrfs/inode.c > +++ b/fs/btrfs/inode.c > @@ -2734,6 +2734,8 @@ noinline int btrfs_update_inode(struct > btrfs_trans_handle *trans, > */ > if (!btrfs_is_free_space_inode(root, inode) > && root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID) { > + btrfs_update_root_times(trans, root); > + > ret = btrfs_delayed_update_inode(trans, root, inode); > if (!ret) > btrfs_set_inode_last_trans(trans, inode); > @@ -4728,6 +4730,8 @@ static struct inode *btrfs_new_inode(struct > btrfs_trans_handle *trans, > trace_btrfs_inode_new(inode); > btrfs_set_inode_last_trans(trans, inode); > > + btrfs_update_root_times(trans, root); > + > return inode; > fail: > if (dir) > diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c > index 7011871..8d258cb 100644 > --- a/fs/btrfs/ioctl.c > +++ b/fs/btrfs/ioctl.c > @@ -41,6 +41,7 @@ > #include <linux/vmalloc.h> > #include <linux/slab.h> > #include <linux/blkdev.h> > +#include <linux/uuid.h> > #include "compat.h" > #include "ctree.h" > #include "disk-io.h" > @@ -346,11 +347,13 @@ static noinline int create_subvol(struct btrfs_root > *root, > struct btrfs_root *new_root; > struct dentry *parent = dentry->d_parent; > struct inode *dir; > + struct timespec cur_time = CURRENT_TIME; > int ret; > int err; > u64 objectid; > u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID; > u64 index = 0; > + uuid_le new_uuid; > > ret = btrfs_find_free_objectid(root->fs_info->tree_root, &objectid); > if (ret) > @@ -389,8 +392,9 @@ static noinline int create_subvol(struct btrfs_root *root, > BTRFS_UUID_SIZE); > btrfs_mark_buffer_dirty(leaf); > > + memset(&root_item, 0, sizeof(root_item)); > + > inode_item = &root_item.inode; > - memset(inode_item, 0, sizeof(*inode_item)); > inode_item->generation = cpu_to_le64(1); > inode_item->size = cpu_to_le64(3); > inode_item->nlink = cpu_to_le32(1); > @@ -408,8 +412,15 @@ static noinline int create_subvol(struct btrfs_root > *root, > btrfs_set_root_used(&root_item, leaf->len); > btrfs_set_root_last_snapshot(&root_item, 0); > > - memset(&root_item.drop_progress, 0, sizeof(root_item.drop_progress)); > - root_item.drop_level = 0; > + btrfs_set_root_generation_v2(&root_item, > + btrfs_root_generation(&root_item)); > + uuid_le_gen(&new_uuid); > + memcpy(root_item.uuid, new_uuid.b, BTRFS_UUID_SIZE); > + root_item.otime.sec = cpu_to_le64(cur_time.tv_sec); > + root_item.otime.nsec = cpu_to_le64(cur_time.tv_nsec); > + root_item.ctime = root_item.otime; > + btrfs_set_root_ctransid(&root_item, trans->transid); > + btrfs_set_root_otransid(&root_item, trans->transid); > > btrfs_tree_unlock(leaf); > free_extent_buffer(leaf); > @@ -3395,6 +3406,83 @@ out: > return ret; > } > > +static long btrfs_ioctl_set_received_subvol(struct file *file, > + void __user *arg) > +{ > + struct btrfs_ioctl_received_subvol_args *sa = NULL; > + struct inode *inode = fdentry(file)->d_inode; > + struct btrfs_root *root = BTRFS_I(inode)->root; > + struct btrfs_root_item *root_item = &root->root_item; > + struct btrfs_trans_handle *trans; > + int ret = 0; > + > + ret = mnt_want_write_file(file); > + if (ret < 0) > + return ret; > + > + down_write(&root->fs_info->subvol_sem); > + > + if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID) { > + ret = -EINVAL; > + goto out; > + } > + > + if (btrfs_root_readonly(root)) { > + ret = -EROFS; > + goto out; > + } > + > + if (!inode_owner_or_capable(inode)) { > + ret = -EACCES; > + goto out; > + } > + > + sa = memdup_user(arg, sizeof(*sa)); > + if (IS_ERR(sa)) { > + ret = PTR_ERR(sa); > + sa = NULL; > + goto out; > + } > + > + trans = btrfs_start_transaction(root, 1); > + if (IS_ERR(trans)) { > + ret = PTR_ERR(trans); > + trans = NULL; > + goto out; > + } > + > + sa->rtransid = trans->transid; > + sa->rtime = CURRENT_TIME; > + > + memcpy(root_item->received_uuid, sa->uuid, BTRFS_UUID_SIZE); > + btrfs_set_root_stransid(root_item, sa->stransid); > + btrfs_set_root_rtransid(root_item, sa->rtransid); > + root_item->stime.sec = cpu_to_le64(sa->stime.tv_sec); > + root_item->stime.nsec = cpu_to_le64(sa->stime.tv_nsec); > + root_item->rtime.sec = cpu_to_le64(sa->rtime.tv_sec); > + root_item->rtime.nsec = cpu_to_le64(sa->rtime.tv_nsec); > + > + ret = btrfs_update_root(trans, root->fs_info->tree_root, > + &root->root_key, &root->root_item); > + if (ret < 0) { > + goto out;
are you leaking a trans handle here? > + } else { > + ret = btrfs_commit_transaction(trans, root); > + if (ret < 0) > + goto out; > + } > + > + ret = copy_to_user(arg, sa, sizeof(*sa)); > + if (ret) > + ret = -EFAULT; > + > +out: > + kfree(sa); > + up_write(&root->fs_info->subvol_sem); > + mnt_drop_write_file(file); > + return ret; > +} > + > long btrfs_ioctl(struct file *file, unsigned int > cmd, unsigned long arg) > { > @@ -3477,6 +3565,8 @@ long btrfs_ioctl(struct file *file, unsigned int > return btrfs_ioctl_balance_ctl(root, arg); > case BTRFS_IOC_BALANCE_PROGRESS: > return btrfs_ioctl_balance_progress(root, argp); > + case BTRFS_IOC_SET_RECEIVED_SUBVOL: > + return btrfs_ioctl_set_received_subvol(file, argp); > case BTRFS_IOC_GET_DEV_STATS: > return btrfs_ioctl_get_dev_stats(root, argp, 0); > case BTRFS_IOC_GET_AND_RESET_DEV_STATS: > diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h > index e440aa6..c9e3fac 100644 > --- a/fs/btrfs/ioctl.h > +++ b/fs/btrfs/ioctl.h > @@ -295,6 +295,15 @@ struct btrfs_ioctl_get_dev_stats { > __u64 unused[128 - 2 - BTRFS_DEV_STAT_VALUES_MAX]; /* pad to 1k */ > }; > > +struct btrfs_ioctl_received_subvol_args { > + char uuid[BTRFS_UUID_SIZE]; /* in */ > + __u64 stransid; /* in */ > + __u64 rtransid; /* out */ > + struct timespec stime; /* in */ > + struct timespec rtime; /* out */ > + __u64 reserved[16]; What is this reserved used for? I don't see a mechanism that could be used to signal that there are useful information here, other than using a different ioctl. > +}; > + > #define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \ > struct btrfs_ioctl_vol_args) > #define BTRFS_IOC_DEFRAG _IOW(BTRFS_IOCTL_MAGIC, 2, \ > @@ -359,6 +368,10 @@ struct btrfs_ioctl_get_dev_stats { > struct btrfs_ioctl_ino_path_args) > #define BTRFS_IOC_LOGICAL_INO _IOWR(BTRFS_IOCTL_MAGIC, 36, \ > struct btrfs_ioctl_ino_path_args) > + > +#define BTRFS_IOC_SET_RECEIVED_SUBVOL _IOWR(BTRFS_IOCTL_MAGIC, 37, \ > + struct btrfs_ioctl_received_subvol_args) > + > #define BTRFS_IOC_GET_DEV_STATS _IOWR(BTRFS_IOCTL_MAGIC, 52, \ > struct btrfs_ioctl_get_dev_stats) > #define BTRFS_IOC_GET_AND_RESET_DEV_STATS _IOWR(BTRFS_IOCTL_MAGIC, 53, \ > diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c > index 24fb8ce..17d638e 100644 > --- a/fs/btrfs/root-tree.c > +++ b/fs/btrfs/root-tree.c > @@ -16,6 +16,7 @@ > * Boston, MA 021110-1307, USA. > */ > > +#include <linux/uuid.h> > #include "ctree.h" > #include "transaction.h" > #include "disk-io.h" > @@ -25,6 +26,9 @@ > * lookup the root with the highest offset for a given objectid. The key we > do > * find is copied into 'key'. If we find something return 0, otherwise 1, < > 0 > * on error. > + * We also check if the root was once mounted with an older kernel. If we > detect > + * this, the new fields coming after 'level' get overwritten with zeros so to > + * invalidate the fields. ... "This is detected by a mismatch of the 2 generation fields" ... or something like that. > */ > int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, > struct btrfs_root_item *item, struct btrfs_key *key) > @@ -35,6 +39,9 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 > objectid, > struct extent_buffer *l; > int ret; > int slot; > + int len; > + int need_reset = 0; > + uuid_le uuid; > > search_key.objectid = objectid; > search_key.type = BTRFS_ROOT_ITEM_KEY; > @@ -60,11 +67,36 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 > objectid, > ret = 1; > goto out; > } > - if (item) > + if (item) { > + len = btrfs_item_size_nr(l, slot); > read_extent_buffer(l, item, btrfs_item_ptr_offset(l, slot), > - sizeof(*item)); > + min_t(int, len, (int)sizeof(*item))); > + if (len < sizeof(*item)) > + need_reset = 1; > + if (!need_reset && btrfs_root_generation(item) > + != btrfs_root_generation_v2(item)) { > + if (btrfs_root_generation_v2(item) != 0) { > + printk(KERN_WARNING "btrfs: mismatching " > + "generation and generation_v2 " > + "found in root item. This root " > + "was probably mounted with an " > + "older kernel. Resetting all " > + "new fields.\n"); > + } > + need_reset = 1; > + } > + if (need_reset) { > + memset(&item->generation_v2, 0, > + sizeof(*item) - offsetof(struct btrfs_root_item, > + generation_v2)); > + > + uuid_le_gen(&uuid); > + memcpy(item->uuid, uuid.b, BTRFS_UUID_SIZE); > + } > + } > if (key) > memcpy(key, &found_key, sizeof(found_key)); > + > ret = 0; > out: > btrfs_free_path(path); > @@ -91,16 +123,15 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, > struct btrfs_root > int ret; > int slot; > unsigned long ptr; > + int old_len; > > path = btrfs_alloc_path(); > if (!path) > return -ENOMEM; > > ret = btrfs_search_slot(trans, root, key, path, 0, 1); > - if (ret < 0) { > - btrfs_abort_transaction(trans, root, ret); > - goto out; > - } > + if (ret < 0) > + goto out_abort; > > if (ret != 0) { > btrfs_print_leaf(root, path->nodes[0]); > @@ -113,11 +144,47 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, > struct btrfs_root > l = path->nodes[0]; > slot = path->slots[0]; > ptr = btrfs_item_ptr_offset(l, slot); > + old_len = btrfs_item_size_nr(l, slot); > + > + /* > + * If this is the first time we update the root item which originated > + * from an older kernel, we need to enlarge the item size to make room > + * for the added fields. > + */ > + if (old_len < sizeof(*item)) { > + btrfs_release_path(path); > + ret = btrfs_search_slot(trans, root, key, path, > + -1, 1); > + if (ret < 0) > + goto out_abort; > + ret = btrfs_del_item(trans, root, path); > + if (ret < 0) > + goto out_abort; > + btrfs_release_path(path); > + ret = btrfs_insert_empty_item(trans, root, path, > + key, sizeof(*item)); > + if (ret < 0) > + goto out_abort; > + l = path->nodes[0]; > + slot = path->slots[0]; > + ptr = btrfs_item_ptr_offset(l, slot); > + } > + > + /* > + * Update generation_v2 so at the next mount we know the new root > + * fields are valid. > + */ > + btrfs_set_root_generation_v2(item, btrfs_root_generation(item)); > + > write_extent_buffer(l, item, ptr, sizeof(*item)); > btrfs_mark_buffer_dirty(path->nodes[0]); > out: > btrfs_free_path(path); > return ret; > + > +out_abort: > + btrfs_abort_transaction(trans, root, ret); > + goto out; > } > > int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root > *root, > @@ -454,3 +521,16 @@ void btrfs_check_and_init_root_item(struct > btrfs_root_item *root_item) > root_item->byte_limit = 0; > } > } > + > +void btrfs_update_root_times(struct btrfs_trans_handle *trans, > + struct btrfs_root *root) > +{ > + struct btrfs_root_item *item = &root->root_item; > + struct timespec ct = CURRENT_TIME; > + > + spin_lock(&root->root_times_lock); > + item->ctransid = trans->transid; > + item->ctime.sec = cpu_to_le64(ct.tv_sec); > + item->ctime.nsec = cpu_to_le64(ct.tv_nsec); > + spin_unlock(&root->root_times_lock); > +} > diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c > index b72b068..a21f308 100644 > --- a/fs/btrfs/transaction.c > +++ b/fs/btrfs/transaction.c > @@ -22,6 +22,7 @@ > #include <linux/writeback.h> > #include <linux/pagemap.h> > #include <linux/blkdev.h> > +#include <linux/uuid.h> > #include "ctree.h" > #include "disk-io.h" > #include "transaction.h" > @@ -926,11 +927,13 @@ static noinline int create_pending_snapshot(struct > btrfs_trans_handle *trans, > struct dentry *dentry; > struct extent_buffer *tmp; > struct extent_buffer *old; > + struct timespec cur_time = CURRENT_TIME; > int ret; > u64 to_reserve = 0; > u64 index = 0; > u64 objectid; > u64 root_flags; > + uuid_le new_uuid; > > rsv = trans->block_rsv; > > @@ -1016,6 +1019,20 @@ static noinline int create_pending_snapshot(struct > btrfs_trans_handle *trans, > root_flags &= ~BTRFS_ROOT_SUBVOL_RDONLY; > btrfs_set_root_flags(new_root_item, root_flags); > > + btrfs_set_root_generation_v2(new_root_item, > + trans->transid); > + uuid_le_gen(&new_uuid); > + memcpy(new_root_item->uuid, new_uuid.b, BTRFS_UUID_SIZE); > + memcpy(new_root_item->parent_uuid, root->root_item.uuid, > + BTRFS_UUID_SIZE); > + new_root_item->otime.sec = cpu_to_le64(cur_time.tv_sec); > + new_root_item->otime.nsec = cpu_to_le64(cur_time.tv_nsec); > + btrfs_set_root_otransid(new_root_item, trans->transid); > + memset(&new_root_item->stime, 0, sizeof(new_root_item->stime)); > + memset(&new_root_item->rtime, 0, sizeof(new_root_item->rtime)); > + btrfs_set_root_stransid(new_root_item, 0); > + btrfs_set_root_rtransid(new_root_item, 0); > + > old = btrfs_lock_root_node(root); > ret = btrfs_cow_block(trans, root, old, NULL, 0, &old); > if (ret) { -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html