On 04.07.2012 15:38, Alexander Block wrote:
> This patch introduces uuids for subvolumes. Each
> subvolume has it's own uuid. In case it was snapshotted,
> it also contains parent_uuid. In case it was received,
> it also contains received_uuid.
> 
> It also introduces subvolume ctime/otime/stime/rtime. The
> first two are comparable to the times found in inodes. otime
> is the origin/creation time and ctime is the change time.
> stime/rtime are only valid on received subvolumes.
> stime is the time of the subvolume when it was
> sent. rtime is the time of the subvolume when it was
> received.
> 
> Additionally to the times, we have a transid for each
> time. They are updated at the same place as the times.
> 
> btrfs receive uses stransid and rtransid to find out
> if a received subvolume changed in the meantime.
> 
> If an older kernel mounts a filesystem with the
> extented fields, all fields become invalid. The next
> mount with a new kernel will detect this and reset the
> fields.
> 
> Signed-off-by: Alexander Block <abloc...@googlemail.com>
> ---
>  fs/btrfs/ctree.h       |   43 ++++++++++++++++++++++
>  fs/btrfs/disk-io.c     |    2 +
>  fs/btrfs/inode.c       |    4 ++
>  fs/btrfs/ioctl.c       |   96 
> ++++++++++++++++++++++++++++++++++++++++++++++--
>  fs/btrfs/ioctl.h       |   13 +++++++
>  fs/btrfs/root-tree.c   |   92 +++++++++++++++++++++++++++++++++++++++++++---
>  fs/btrfs/transaction.c |   17 +++++++++
>  7 files changed, 258 insertions(+), 9 deletions(-)
> 
> diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
> index 8cfde93..2bd5df8 100644
> --- a/fs/btrfs/ctree.h
> +++ b/fs/btrfs/ctree.h
> @@ -709,6 +709,35 @@ struct btrfs_root_item {
>       struct btrfs_disk_key drop_progress;
>       u8 drop_level;
>       u8 level;
> +
> +     /*
> +      * The following fields appear after subvol_uuids+subvol_times
> +      * were introduced.
> +      */
> +
> +     /*
> +      * This generation number is used to test if the new fields are valid
> +      * and up to date while reading the root item. Everytime the root item
> +      * is written out, the "generation" field is copied into this field. If
> +      * anyone ever mounted the fs with an older kernel, we will have
> +      * mismatching generation values here and thus must invalidate the
> +      * new fields. See btrfs_update_root and btrfs_find_last_root for
> +      * details.
> +      * the offset of generation_v2 is also used as the start for the memset
> +      * when invalidating the fields.
> +      */
> +     __le64 generation_v2;
> +     u8 uuid[BTRFS_UUID_SIZE];
> +     u8 parent_uuid[BTRFS_UUID_SIZE];
> +     u8 received_uuid[BTRFS_UUID_SIZE];
> +     __le64 ctransid; /* updated when an inode changes */
> +     __le64 otransid; /* trans when created */
> +     __le64 stransid; /* trans when sent. non-zero for received subvol */
> +     __le64 rtransid; /* trans when received. non-zero for received subvol */
> +     struct btrfs_timespec ctime;
> +     struct btrfs_timespec otime;
> +     struct btrfs_timespec stime;
> +     struct btrfs_timespec rtime;
>  } __attribute__ ((__packed__));
>  
>  /*
> @@ -1416,6 +1445,8 @@ struct btrfs_root {
>       dev_t anon_dev;
>  
>       int force_cow;
> +
> +     spinlock_t root_times_lock;
>  };
>  
>  struct btrfs_ioctl_defrag_range_args {
> @@ -2189,6 +2220,16 @@ BTRFS_SETGET_STACK_FUNCS(root_used, struct 
> btrfs_root_item, bytes_used, 64);
>  BTRFS_SETGET_STACK_FUNCS(root_limit, struct btrfs_root_item, byte_limit, 64);
>  BTRFS_SETGET_STACK_FUNCS(root_last_snapshot, struct btrfs_root_item,
>                        last_snapshot, 64);
> +BTRFS_SETGET_STACK_FUNCS(root_generation_v2, struct btrfs_root_item,
> +                      generation_v2, 64);
> +BTRFS_SETGET_STACK_FUNCS(root_ctransid, struct btrfs_root_item,
> +                      ctransid, 64);
> +BTRFS_SETGET_STACK_FUNCS(root_otransid, struct btrfs_root_item,
> +                      otransid, 64);
> +BTRFS_SETGET_STACK_FUNCS(root_stransid, struct btrfs_root_item,
> +                      stransid, 64);
> +BTRFS_SETGET_STACK_FUNCS(root_rtransid, struct btrfs_root_item,
> +                      rtransid, 64);
>  
>  static inline bool btrfs_root_readonly(struct btrfs_root *root)
>  {
> @@ -2829,6 +2870,8 @@ int btrfs_find_orphan_roots(struct btrfs_root 
> *tree_root);
>  void btrfs_set_root_node(struct btrfs_root_item *item,
>                        struct extent_buffer *node);
>  void btrfs_check_and_init_root_item(struct btrfs_root_item *item);
> +void btrfs_update_root_times(struct btrfs_trans_handle *trans,
> +                          struct btrfs_root *root);
>  
>  /* dir-item.c */
>  int btrfs_insert_dir_item(struct btrfs_trans_handle *trans,
> diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
> index 7b845ff..d3b49ad 100644
> --- a/fs/btrfs/disk-io.c
> +++ b/fs/btrfs/disk-io.c
> @@ -1182,6 +1182,8 @@ static void __setup_root(u32 nodesize, u32 leafsize, 
> u32 sectorsize,
>       root->defrag_running = 0;
>       root->root_key.objectid = objectid;
>       root->anon_dev = 0;
> +
> +     spin_lock_init(&root->root_times_lock);
>  }
>  
>  static int __must_check find_and_setup_root(struct btrfs_root *tree_root,
> diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
> index 139be17..0f6a65d 100644
> --- a/fs/btrfs/inode.c
> +++ b/fs/btrfs/inode.c
> @@ -2734,6 +2734,8 @@ noinline int btrfs_update_inode(struct 
> btrfs_trans_handle *trans,
>        */
>       if (!btrfs_is_free_space_inode(root, inode)
>           && root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID) {
> +             btrfs_update_root_times(trans, root);
> +
>               ret = btrfs_delayed_update_inode(trans, root, inode);
>               if (!ret)
>                       btrfs_set_inode_last_trans(trans, inode);
> @@ -4728,6 +4730,8 @@ static struct inode *btrfs_new_inode(struct 
> btrfs_trans_handle *trans,
>       trace_btrfs_inode_new(inode);
>       btrfs_set_inode_last_trans(trans, inode);
>  
> +     btrfs_update_root_times(trans, root);
> +
>       return inode;
>  fail:
>       if (dir)
> diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
> index 7011871..8d258cb 100644
> --- a/fs/btrfs/ioctl.c
> +++ b/fs/btrfs/ioctl.c
> @@ -41,6 +41,7 @@
>  #include <linux/vmalloc.h>
>  #include <linux/slab.h>
>  #include <linux/blkdev.h>
> +#include <linux/uuid.h>
>  #include "compat.h"
>  #include "ctree.h"
>  #include "disk-io.h"
> @@ -346,11 +347,13 @@ static noinline int create_subvol(struct btrfs_root 
> *root,
>       struct btrfs_root *new_root;
>       struct dentry *parent = dentry->d_parent;
>       struct inode *dir;
> +     struct timespec cur_time = CURRENT_TIME;
>       int ret;
>       int err;
>       u64 objectid;
>       u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID;
>       u64 index = 0;
> +     uuid_le new_uuid;
>  
>       ret = btrfs_find_free_objectid(root->fs_info->tree_root, &objectid);
>       if (ret)
> @@ -389,8 +392,9 @@ static noinline int create_subvol(struct btrfs_root *root,
>                           BTRFS_UUID_SIZE);
>       btrfs_mark_buffer_dirty(leaf);
>  
> +     memset(&root_item, 0, sizeof(root_item));
> +
>       inode_item = &root_item.inode;
> -     memset(inode_item, 0, sizeof(*inode_item));
>       inode_item->generation = cpu_to_le64(1);
>       inode_item->size = cpu_to_le64(3);
>       inode_item->nlink = cpu_to_le32(1);
> @@ -408,8 +412,15 @@ static noinline int create_subvol(struct btrfs_root 
> *root,
>       btrfs_set_root_used(&root_item, leaf->len);
>       btrfs_set_root_last_snapshot(&root_item, 0);
>  
> -     memset(&root_item.drop_progress, 0, sizeof(root_item.drop_progress));
> -     root_item.drop_level = 0;
> +     btrfs_set_root_generation_v2(&root_item,
> +                     btrfs_root_generation(&root_item));
> +     uuid_le_gen(&new_uuid);
> +     memcpy(root_item.uuid, new_uuid.b, BTRFS_UUID_SIZE);
> +     root_item.otime.sec = cpu_to_le64(cur_time.tv_sec);
> +     root_item.otime.nsec = cpu_to_le64(cur_time.tv_nsec);
> +     root_item.ctime = root_item.otime;
> +     btrfs_set_root_ctransid(&root_item, trans->transid);
> +     btrfs_set_root_otransid(&root_item, trans->transid);
>  
>       btrfs_tree_unlock(leaf);
>       free_extent_buffer(leaf);
> @@ -3395,6 +3406,83 @@ out:
>       return ret;
>  }
>  
> +static long btrfs_ioctl_set_received_subvol(struct file *file,
> +                                         void __user *arg)
> +{
> +     struct btrfs_ioctl_received_subvol_args *sa = NULL;
> +     struct inode *inode = fdentry(file)->d_inode;
> +     struct btrfs_root *root = BTRFS_I(inode)->root;
> +     struct btrfs_root_item *root_item = &root->root_item;
> +     struct btrfs_trans_handle *trans;
> +     int ret = 0;
> +
> +     ret = mnt_want_write_file(file);
> +     if (ret < 0)
> +             return ret;
> +
> +     down_write(&root->fs_info->subvol_sem);
> +
> +     if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID) {
> +             ret = -EINVAL;
> +             goto out;
> +     }
> +
> +     if (btrfs_root_readonly(root)) {
> +             ret = -EROFS;
> +             goto out;
> +     }
> +
> +     if (!inode_owner_or_capable(inode)) {
> +             ret = -EACCES;
> +             goto out;
> +     }
> +
> +     sa = memdup_user(arg, sizeof(*sa));
> +     if (IS_ERR(sa)) {
> +             ret = PTR_ERR(sa);
> +             sa = NULL;
> +             goto out;
> +     }
> +
> +     trans = btrfs_start_transaction(root, 1);
> +     if (IS_ERR(trans)) {
> +             ret = PTR_ERR(trans);
> +             trans = NULL;
> +             goto out;
> +     }
> +
> +     sa->rtransid = trans->transid;
> +     sa->rtime = CURRENT_TIME;
> +
> +     memcpy(root_item->received_uuid, sa->uuid, BTRFS_UUID_SIZE);
> +     btrfs_set_root_stransid(root_item, sa->stransid);
> +     btrfs_set_root_rtransid(root_item, sa->rtransid);
> +     root_item->stime.sec = cpu_to_le64(sa->stime.tv_sec);
> +     root_item->stime.nsec = cpu_to_le64(sa->stime.tv_nsec);
> +     root_item->rtime.sec = cpu_to_le64(sa->rtime.tv_sec);
> +     root_item->rtime.nsec = cpu_to_le64(sa->rtime.tv_nsec);
> +
> +     ret = btrfs_update_root(trans, root->fs_info->tree_root,
> +                             &root->root_key, &root->root_item);
> +     if (ret < 0) {
> +             goto out;

are you leaking a trans handle here?

> +     } else {
> +             ret = btrfs_commit_transaction(trans, root);
> +             if (ret < 0)
> +                     goto out;
> +     }
> +
> +     ret = copy_to_user(arg, sa, sizeof(*sa));
> +     if (ret)
> +             ret = -EFAULT;
> +
> +out:
> +     kfree(sa);
> +     up_write(&root->fs_info->subvol_sem);
> +     mnt_drop_write_file(file);
> +     return ret;
> +}
> +
>  long btrfs_ioctl(struct file *file, unsigned int
>               cmd, unsigned long arg)
>  {
> @@ -3477,6 +3565,8 @@ long btrfs_ioctl(struct file *file, unsigned int
>               return btrfs_ioctl_balance_ctl(root, arg);
>       case BTRFS_IOC_BALANCE_PROGRESS:
>               return btrfs_ioctl_balance_progress(root, argp);
> +     case BTRFS_IOC_SET_RECEIVED_SUBVOL:
> +             return btrfs_ioctl_set_received_subvol(file, argp);
>       case BTRFS_IOC_GET_DEV_STATS:
>               return btrfs_ioctl_get_dev_stats(root, argp, 0);
>       case BTRFS_IOC_GET_AND_RESET_DEV_STATS:
> diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h
> index e440aa6..c9e3fac 100644
> --- a/fs/btrfs/ioctl.h
> +++ b/fs/btrfs/ioctl.h
> @@ -295,6 +295,15 @@ struct btrfs_ioctl_get_dev_stats {
>       __u64 unused[128 - 2 - BTRFS_DEV_STAT_VALUES_MAX]; /* pad to 1k */
>  };
>  
> +struct btrfs_ioctl_received_subvol_args {
> +     char    uuid[BTRFS_UUID_SIZE];  /* in */
> +     __u64   stransid;               /* in */
> +     __u64   rtransid;               /* out */
> +     struct timespec stime;          /* in */
> +     struct timespec rtime;          /* out */
> +     __u64   reserved[16];

What is this reserved used for? I don't see a mechanism that could be
used to signal that there are useful information here, other than
using a different ioctl.

> +};
> +
>  #define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \
>                                  struct btrfs_ioctl_vol_args)
>  #define BTRFS_IOC_DEFRAG _IOW(BTRFS_IOCTL_MAGIC, 2, \
> @@ -359,6 +368,10 @@ struct btrfs_ioctl_get_dev_stats {
>                                       struct btrfs_ioctl_ino_path_args)
>  #define BTRFS_IOC_LOGICAL_INO _IOWR(BTRFS_IOCTL_MAGIC, 36, \
>                                       struct btrfs_ioctl_ino_path_args)
> +
> +#define BTRFS_IOC_SET_RECEIVED_SUBVOL _IOWR(BTRFS_IOCTL_MAGIC, 37, \
> +                             struct btrfs_ioctl_received_subvol_args)
> +
>  #define BTRFS_IOC_GET_DEV_STATS _IOWR(BTRFS_IOCTL_MAGIC, 52, \
>                                     struct btrfs_ioctl_get_dev_stats)
>  #define BTRFS_IOC_GET_AND_RESET_DEV_STATS _IOWR(BTRFS_IOCTL_MAGIC, 53, \
> diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
> index 24fb8ce..17d638e 100644
> --- a/fs/btrfs/root-tree.c
> +++ b/fs/btrfs/root-tree.c
> @@ -16,6 +16,7 @@
>   * Boston, MA 021110-1307, USA.
>   */
>  
> +#include <linux/uuid.h>
>  #include "ctree.h"
>  #include "transaction.h"
>  #include "disk-io.h"
> @@ -25,6 +26,9 @@
>   * lookup the root with the highest offset for a given objectid.  The key we 
> do
>   * find is copied into 'key'.  If we find something return 0, otherwise 1, < > 0
>   * on error.
> + * We also check if the root was once mounted with an older kernel. If we 
> detect
> + * this, the new fields coming after 'level' get overwritten with zeros so to
> + * invalidate the fields.

... "This is detected by a mismatch of the 2 generation fields" ... or something
like that.

>   */
>  int btrfs_find_last_root(struct btrfs_root *root, u64 objectid,
>                       struct btrfs_root_item *item, struct btrfs_key *key)
> @@ -35,6 +39,9 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 
> objectid,
>       struct extent_buffer *l;
>       int ret;
>       int slot;
> +     int len;
> +     int need_reset = 0;
> +     uuid_le uuid;
>  
>       search_key.objectid = objectid;
>       search_key.type = BTRFS_ROOT_ITEM_KEY;
> @@ -60,11 +67,36 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 
> objectid,
>               ret = 1;
>               goto out;
>       }
> -     if (item)
> +     if (item) {
> +             len = btrfs_item_size_nr(l, slot);
>               read_extent_buffer(l, item, btrfs_item_ptr_offset(l, slot),
> -                                sizeof(*item));
> +                             min_t(int, len, (int)sizeof(*item)));
> +             if (len < sizeof(*item))
> +                     need_reset = 1;
> +             if (!need_reset && btrfs_root_generation(item)
> +                     != btrfs_root_generation_v2(item)) {
> +                     if (btrfs_root_generation_v2(item) != 0) {
> +                             printk(KERN_WARNING "btrfs: mismatching "
> +                                             "generation and generation_v2 "
> +                                             "found in root item. This root "
> +                                             "was probably mounted with an "
> +                                             "older kernel. Resetting all "
> +                                             "new fields.\n");
> +                     }
> +                     need_reset = 1;
> +             }
> +             if (need_reset) {
> +                     memset(&item->generation_v2, 0,
> +                             sizeof(*item) - offsetof(struct btrfs_root_item,
> +                                             generation_v2));
> +
> +                     uuid_le_gen(&uuid);
> +                     memcpy(item->uuid, uuid.b, BTRFS_UUID_SIZE);
> +             }
> +     }
>       if (key)
>               memcpy(key, &found_key, sizeof(found_key));
> +
>       ret = 0;
>  out:
>       btrfs_free_path(path);
> @@ -91,16 +123,15 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, 
> struct btrfs_root
>       int ret;
>       int slot;
>       unsigned long ptr;
> +     int old_len;
>  
>       path = btrfs_alloc_path();
>       if (!path)
>               return -ENOMEM;
>  
>       ret = btrfs_search_slot(trans, root, key, path, 0, 1);
> -     if (ret < 0) {
> -             btrfs_abort_transaction(trans, root, ret);
> -             goto out;
> -     }
> +     if (ret < 0)
> +             goto out_abort;
>  
>       if (ret != 0) {
>               btrfs_print_leaf(root, path->nodes[0]);
> @@ -113,11 +144,47 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, 
> struct btrfs_root
>       l = path->nodes[0];
>       slot = path->slots[0];
>       ptr = btrfs_item_ptr_offset(l, slot);
> +     old_len = btrfs_item_size_nr(l, slot);
> +
> +     /*
> +      * If this is the first time we update the root item which originated
> +      * from an older kernel, we need to enlarge the item size to make room
> +      * for the added fields.
> +      */
> +     if (old_len < sizeof(*item)) {
> +             btrfs_release_path(path);
> +             ret = btrfs_search_slot(trans, root, key, path,
> +                             -1, 1);
> +             if (ret < 0)
> +                     goto out_abort;
> +             ret = btrfs_del_item(trans, root, path);
> +             if (ret < 0)
> +                     goto out_abort;
> +             btrfs_release_path(path);
> +             ret = btrfs_insert_empty_item(trans, root, path,
> +                             key, sizeof(*item));
> +             if (ret < 0)
> +                     goto out_abort;
> +             l = path->nodes[0];
> +             slot = path->slots[0];
> +             ptr = btrfs_item_ptr_offset(l, slot);
> +     }
> +
> +     /*
> +      * Update generation_v2 so at the next mount we know the new root
> +      * fields are valid.
> +      */
> +     btrfs_set_root_generation_v2(item, btrfs_root_generation(item));
> +
>       write_extent_buffer(l, item, ptr, sizeof(*item));
>       btrfs_mark_buffer_dirty(path->nodes[0]);
>  out:
>       btrfs_free_path(path);
>       return ret;
> +
> +out_abort:
> +     btrfs_abort_transaction(trans, root, ret);
> +     goto out;
>  }
>  
>  int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root 
> *root,
> @@ -454,3 +521,16 @@ void btrfs_check_and_init_root_item(struct 
> btrfs_root_item *root_item)
>               root_item->byte_limit = 0;
>       }
>  }
> +
> +void btrfs_update_root_times(struct btrfs_trans_handle *trans,
> +                          struct btrfs_root *root)
> +{
> +     struct btrfs_root_item *item = &root->root_item;
> +     struct timespec ct = CURRENT_TIME;
> +
> +     spin_lock(&root->root_times_lock);
> +     item->ctransid = trans->transid;
> +     item->ctime.sec = cpu_to_le64(ct.tv_sec);
> +     item->ctime.nsec = cpu_to_le64(ct.tv_nsec);
> +     spin_unlock(&root->root_times_lock);
> +}
> diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
> index b72b068..a21f308 100644
> --- a/fs/btrfs/transaction.c
> +++ b/fs/btrfs/transaction.c
> @@ -22,6 +22,7 @@
>  #include <linux/writeback.h>
>  #include <linux/pagemap.h>
>  #include <linux/blkdev.h>
> +#include <linux/uuid.h>
>  #include "ctree.h"
>  #include "disk-io.h"
>  #include "transaction.h"
> @@ -926,11 +927,13 @@ static noinline int create_pending_snapshot(struct 
> btrfs_trans_handle *trans,
>       struct dentry *dentry;
>       struct extent_buffer *tmp;
>       struct extent_buffer *old;
> +     struct timespec cur_time = CURRENT_TIME;
>       int ret;
>       u64 to_reserve = 0;
>       u64 index = 0;
>       u64 objectid;
>       u64 root_flags;
> +     uuid_le new_uuid;
>  
>       rsv = trans->block_rsv;
>  
> @@ -1016,6 +1019,20 @@ static noinline int create_pending_snapshot(struct 
> btrfs_trans_handle *trans,
>               root_flags &= ~BTRFS_ROOT_SUBVOL_RDONLY;
>       btrfs_set_root_flags(new_root_item, root_flags);
>  
> +     btrfs_set_root_generation_v2(new_root_item,
> +                     trans->transid);
> +     uuid_le_gen(&new_uuid);
> +     memcpy(new_root_item->uuid, new_uuid.b, BTRFS_UUID_SIZE);
> +     memcpy(new_root_item->parent_uuid, root->root_item.uuid,
> +                     BTRFS_UUID_SIZE);
> +     new_root_item->otime.sec = cpu_to_le64(cur_time.tv_sec);
> +     new_root_item->otime.nsec = cpu_to_le64(cur_time.tv_nsec);
> +     btrfs_set_root_otransid(new_root_item, trans->transid);
> +     memset(&new_root_item->stime, 0, sizeof(new_root_item->stime));
> +     memset(&new_root_item->rtime, 0, sizeof(new_root_item->rtime));
> +     btrfs_set_root_stransid(new_root_item, 0);
> +     btrfs_set_root_rtransid(new_root_item, 0);
> +
>       old = btrfs_lock_root_node(root);
>       ret = btrfs_cow_block(trans, root, old, NULL, 0, &old);
>       if (ret) {

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to