Hi Wang,

On 01.05.2013 09:29, Wang Shilong wrote:
> Hello Jan,
> 
>> If qgroup tracking is out of sync, a rescan operation can be started. It
>> iterates the complete extent tree and recalculates all qgroup tracking data.
>> This is an expensive operation and should not be used unless required.
>>
>> A filesystem under rescan can still be umounted. The rescan continues on the
>> next mount.  Status information is provided with a separate ioctl while a
>> rescan operation is in progress.
>>
>> Signed-off-by: Jan Schmidt <list.bt...@jan-o-sch.net>
>> ---
>> fs/btrfs/ctree.h           |   17 ++-
>> fs/btrfs/disk-io.c         |    5 +
>> fs/btrfs/ioctl.c           |   83 ++++++++++--
>> fs/btrfs/qgroup.c          |  318 
>> ++++++++++++++++++++++++++++++++++++++++++--
>> include/uapi/linux/btrfs.h |   12 ++-
>> 5 files changed, 400 insertions(+), 35 deletions(-)
>>
>> diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
>> index 412c306..e4f28a6 100644
>> --- a/fs/btrfs/ctree.h
>> +++ b/fs/btrfs/ctree.h
>> @@ -1021,9 +1021,9 @@ struct btrfs_block_group_item {
>>  */
>> #define BTRFS_QGROUP_STATUS_FLAG_ON          (1ULL << 0)
>> /*
>> - * SCANNING is set during the initialization phase
>> + * RESCAN is set during the initialization phase
>>  */
>> -#define BTRFS_QGROUP_STATUS_FLAG_SCANNING   (1ULL << 1)
>> +#define BTRFS_QGROUP_STATUS_FLAG_RESCAN             (1ULL << 1)
>> /*
>>  * Some qgroup entries are known to be out of date,
>>  * either because the configuration has changed in a way that
>> @@ -1052,7 +1052,7 @@ struct btrfs_qgroup_status_item {
>>       * only used during scanning to record the progress
>>       * of the scan. It contains a logical address
>>       */
>> -    __le64 scan;
>> +    __le64 rescan;
>> } __attribute__ ((__packed__));
>>
>> struct btrfs_qgroup_info_item {
>> @@ -1603,6 +1603,11 @@ struct btrfs_fs_info {
>>      /* used by btrfs_qgroup_record_ref for an efficient tree traversal */
>>      u64 qgroup_seq;
>>
>> +    /* qgroup rescan items */
>> +    struct mutex qgroup_rescan_lock; /* protects the progress item */
>> +    struct btrfs_key qgroup_rescan_progress;
>> +    struct btrfs_workers qgroup_rescan_workers;
>> +
>>      /* filesystem state */
>>      unsigned long fs_state;
>>
>> @@ -2888,8 +2893,8 @@ BTRFS_SETGET_FUNCS(qgroup_status_version, struct 
>> btrfs_qgroup_status_item,
>>                 version, 64);
>> BTRFS_SETGET_FUNCS(qgroup_status_flags, struct btrfs_qgroup_status_item,
>>                 flags, 64);
>> -BTRFS_SETGET_FUNCS(qgroup_status_scan, struct btrfs_qgroup_status_item,
>> -               scan, 64);
>> +BTRFS_SETGET_FUNCS(qgroup_status_rescan, struct btrfs_qgroup_status_item,
>> +               rescan, 64);
>>
>> /* btrfs_qgroup_info_item */
>> BTRFS_SETGET_FUNCS(qgroup_info_generation, struct btrfs_qgroup_info_item,
>> @@ -3834,7 +3839,7 @@ int btrfs_quota_enable(struct btrfs_trans_handle 
>> *trans,
>>                     struct btrfs_fs_info *fs_info);
>> int btrfs_quota_disable(struct btrfs_trans_handle *trans,
>>                      struct btrfs_fs_info *fs_info);
>> -int btrfs_quota_rescan(struct btrfs_fs_info *fs_info);
>> +int btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info);
>> int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
>>                            struct btrfs_fs_info *fs_info, u64 src, u64 dst);
>> int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans,
>> diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
>> index 7717363..63e9348 100644
>> --- a/fs/btrfs/disk-io.c
>> +++ b/fs/btrfs/disk-io.c
>> @@ -2010,6 +2010,7 @@ static void btrfs_stop_all_workers(struct 
>> btrfs_fs_info *fs_info)
>>      btrfs_stop_workers(&fs_info->caching_workers);
>>      btrfs_stop_workers(&fs_info->readahead_workers);
>>      btrfs_stop_workers(&fs_info->flush_workers);
>> +    btrfs_stop_workers(&fs_info->qgroup_rescan_workers);
>> }
>>
>> /* helper to cleanup tree roots */
>> @@ -2301,6 +2302,7 @@ int open_ctree(struct super_block *sb,
>>      fs_info->qgroup_seq = 1;
>>      fs_info->quota_enabled = 0;
>>      fs_info->pending_quota_state = 0;
>> +    mutex_init(&fs_info->qgroup_rescan_lock);
>>
>>      btrfs_init_free_cluster(&fs_info->meta_alloc_cluster);
>>      btrfs_init_free_cluster(&fs_info->data_alloc_cluster);
>> @@ -2529,6 +2531,8 @@ int open_ctree(struct super_block *sb,
>>      btrfs_init_workers(&fs_info->readahead_workers, "readahead",
>>                         fs_info->thread_pool_size,
>>                         &fs_info->generic_worker);
>> +    btrfs_init_workers(&fs_info->qgroup_rescan_workers, "qgroup-rescan", 1,
>> +                       &fs_info->generic_worker);
>>
>>      /*
>>       * endios are largely parallel and should have a very
>> @@ -2563,6 +2567,7 @@ int open_ctree(struct super_block *sb,
>>      ret |= btrfs_start_workers(&fs_info->caching_workers);
>>      ret |= btrfs_start_workers(&fs_info->readahead_workers);
>>      ret |= btrfs_start_workers(&fs_info->flush_workers);
>> +    ret |= btrfs_start_workers(&fs_info->qgroup_rescan_workers);
>>      if (ret) {
>>              err = -ENOMEM;
>>              goto fail_sb_buffer;
>> diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
>> index d0af96a..5e93bb8 100644
>> --- a/fs/btrfs/ioctl.c
>> +++ b/fs/btrfs/ioctl.c
>> @@ -3701,12 +3701,10 @@ static long btrfs_ioctl_quota_ctl(struct file *file, 
>> void __user *arg)
>>      }
>>
>>      down_write(&root->fs_info->subvol_sem);
>> -    if (sa->cmd != BTRFS_QUOTA_CTL_RESCAN) {
>> -            trans = btrfs_start_transaction(root->fs_info->tree_root, 2);
>> -            if (IS_ERR(trans)) {
>> -                    ret = PTR_ERR(trans);
>> -                    goto out;
>> -            }
>> +    trans = btrfs_start_transaction(root->fs_info->tree_root, 2);
>> +    if (IS_ERR(trans)) {
>> +            ret = PTR_ERR(trans);
>> +            goto out;
>>      }
>>
>>      switch (sa->cmd) {
>> @@ -3716,9 +3714,6 @@ static long btrfs_ioctl_quota_ctl(struct file *file, 
>> void __user *arg)
>>      case BTRFS_QUOTA_CTL_DISABLE:
>>              ret = btrfs_quota_disable(trans, root->fs_info);
>>              break;
>> -    case BTRFS_QUOTA_CTL_RESCAN:
>> -            ret = btrfs_quota_rescan(root->fs_info);
>> -            break;
>>      default:
>>              ret = -EINVAL;
>>              break;
>> @@ -3727,11 +3722,9 @@ static long btrfs_ioctl_quota_ctl(struct file *file, 
>> void __user *arg)
>>      if (copy_to_user(arg, sa, sizeof(*sa)))
>>              ret = -EFAULT;
>>
>> -    if (trans) {
>> -            err = btrfs_commit_transaction(trans, root->fs_info->tree_root);
>> -            if (err && !ret)
>> -                    ret = err;
>> -    }
>> +    err = btrfs_commit_transaction(trans, root->fs_info->tree_root);
>> +    if (err && !ret)
>> +            ret = err;
>> out:
>>      kfree(sa);
>>      up_write(&root->fs_info->subvol_sem);
>> @@ -3886,6 +3879,64 @@ drop_write:
>>      return ret;
>> }
>>
>> +static long btrfs_ioctl_quota_rescan(struct file *file, void __user *arg)
>> +{
>> +    struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
>> +    struct btrfs_ioctl_quota_rescan_args *qsa;
>> +    int ret;
>> +
>> +    if (!capable(CAP_SYS_ADMIN))
>> +            return -EPERM;
>> +
>> +    ret = mnt_want_write_file(file);
>> +    if (ret)
>> +            return ret;
>> +
>> +    qsa = memdup_user(arg, sizeof(*qsa));
>> +    if (IS_ERR(qsa)) {
>> +            ret = PTR_ERR(qsa);
>> +            goto drop_write;
>> +    }
>> +
>> +    if (qsa->flags) {
>> +            ret = -EINVAL;
>> +            goto out;
>> +    }
>> +
>> +    ret = btrfs_qgroup_rescan(root->fs_info);
>> +
>> +out:
>> +    kfree(qsa);
>> +drop_write:
>> +    mnt_drop_write_file(file);
>> +    return ret;
>> +}
>> +
>> +static long btrfs_ioctl_quota_rescan_status(struct file *file, void __user 
>> *arg)
>> +{
>> +    struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
>> +    struct btrfs_ioctl_quota_rescan_args *qsa;
>> +    int ret = 0;
>> +
>> +    if (!capable(CAP_SYS_ADMIN))
>> +            return -EPERM;
>> +
>> +    qsa = kzalloc(sizeof(*qsa), GFP_NOFS);
>> +    if (!qsa)
>> +            return -ENOMEM;
>> +
>> +    if (root->fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
>> +            qsa->flags = 1;
>> +            qsa->progress = root->fs_info->qgroup_rescan_progress.objectid;
>> +    }
>> +
>> +    if (copy_to_user(arg, qsa, sizeof(*qsa)))
>> +            ret = -EFAULT;
>> +
>> +    kfree(qsa);
>> +    return ret;
>> +}
>> +
>> static long btrfs_ioctl_set_received_subvol(struct file *file,
>>                                          void __user *arg)
>> {
>> @@ -4124,6 +4175,10 @@ long btrfs_ioctl(struct file *file, unsigned int
>>              return btrfs_ioctl_qgroup_create(file, argp);
>>      case BTRFS_IOC_QGROUP_LIMIT:
>>              return btrfs_ioctl_qgroup_limit(file, argp);
>> +    case BTRFS_IOC_QUOTA_RESCAN:
>> +            return btrfs_ioctl_quota_rescan(file, argp);
>> +    case BTRFS_IOC_QUOTA_RESCAN_STATUS:
>> +            return btrfs_ioctl_quota_rescan_status(file, argp);
>>      case BTRFS_IOC_DEV_REPLACE:
>>              return btrfs_ioctl_dev_replace(root, argp);
>>      case BTRFS_IOC_GET_FSLABEL:
>> diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
>> index c50e5a5..664d457 100644
>> --- a/fs/btrfs/qgroup.c
>> +++ b/fs/btrfs/qgroup.c
>> @@ -31,13 +31,13 @@
>> #include "locking.h"
>> #include "ulist.h"
>> #include "backref.h"
>> +#include "extent_io.h"
>>
>> /* TODO XXX FIXME
>>  *  - subvol delete -> delete when ref goes to 0? delete limits also?
>>  *  - reorganize keys
>>  *  - compressed
>>  *  - sync
>> - *  - rescan
>>  *  - copy also limits on subvol creation
>>  *  - limit
>>  *  - caches fuer ulists
>> @@ -98,6 +98,14 @@ struct btrfs_qgroup_list {
>>      struct btrfs_qgroup *member;
>> };
>>
>> +struct qgroup_rescan {
>> +    struct btrfs_work       work;
>> +    struct btrfs_fs_info    *fs_info;
>> +};
>> +
>> +static void qgroup_rescan_start(struct btrfs_fs_info *fs_info,
>> +                            struct qgroup_rescan *qscan);
>> +
>> /* must be called with qgroup_ioctl_lock held */
>> static struct btrfs_qgroup *find_qgroup_rb(struct btrfs_fs_info *fs_info,
>>                                         u64 qgroupid)
>> @@ -298,7 +306,20 @@ int btrfs_read_qgroup_config(struct btrfs_fs_info 
>> *fs_info)
>>                      }
>>                      fs_info->qgroup_flags = btrfs_qgroup_status_flags(l,
>>                                                                        ptr);
>> -                    /* FIXME read scan element */
>> +                    fs_info->qgroup_rescan_progress.objectid =
>> +                                    btrfs_qgroup_status_rescan(l, ptr);
>> +                    if (fs_info->qgroup_flags &
>> +                        BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
>> +                            struct qgroup_rescan *qscan =
>> +                                    kmalloc(sizeof(*qscan), GFP_NOFS);
>> +                            if (!qscan) {
>> +                                    ret = -ENOMEM;
>> +                                    goto out;
>> +                            }
>> +                            fs_info->qgroup_rescan_progress.type = 0;
>> +                            fs_info->qgroup_rescan_progress.offset = 0;
>> +                            qgroup_rescan_start(fs_info, qscan);
>> +                    }
>>                      goto next1;
>>              }
>>
>> @@ -719,7 +740,8 @@ static int update_qgroup_status_item(struct 
>> btrfs_trans_handle *trans,
>>      ptr = btrfs_item_ptr(l, slot, struct btrfs_qgroup_status_item);
>>      btrfs_set_qgroup_status_flags(l, ptr, fs_info->qgroup_flags);
>>      btrfs_set_qgroup_status_generation(l, ptr, trans->transid);
>> -    /* XXX scan */
>> +    btrfs_set_qgroup_status_rescan(l, ptr,
>> +                            fs_info->qgroup_rescan_progress.objectid);
>>
>>      btrfs_mark_buffer_dirty(l);
>>
>> @@ -830,7 +852,7 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans,
>>      fs_info->qgroup_flags = BTRFS_QGROUP_STATUS_FLAG_ON |
>>                              BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
>>      btrfs_set_qgroup_status_flags(leaf, ptr, fs_info->qgroup_flags);
>> -    btrfs_set_qgroup_status_scan(leaf, ptr, 0);
>> +    btrfs_set_qgroup_status_rescan(leaf, ptr, 0);
>>
>>      btrfs_mark_buffer_dirty(leaf);
>>
>> @@ -944,10 +966,11 @@ out:
>>      return ret;
>> }
>>
>> -int btrfs_quota_rescan(struct btrfs_fs_info *fs_info)
>> +static void qgroup_dirty(struct btrfs_fs_info *fs_info,
>> +                     struct btrfs_qgroup *qgroup)
>> {
>> -    /* FIXME */
>> -    return 0;
>> +    if (list_empty(&qgroup->dirty))
>> +            list_add(&qgroup->dirty, &fs_info->dirty_qgroups);
>> }
>>
>> int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
>> @@ -1155,13 +1178,6 @@ out:
>>      return ret;
>> }
>>
>> -static void qgroup_dirty(struct btrfs_fs_info *fs_info,
>> -                     struct btrfs_qgroup *qgroup)
>> -{
>> -    if (list_empty(&qgroup->dirty))
>> -            list_add(&qgroup->dirty, &fs_info->dirty_qgroups);
>> -}
>> -
>> /*
>>  * btrfs_qgroup_record_ref is called when the ref is added or deleted. it 
>> puts
>>  * the modification into a list that's later used by btrfs_end_transaction to
>> @@ -1388,6 +1404,15 @@ int btrfs_qgroup_account_ref(struct 
>> btrfs_trans_handle *trans,
>>              BUG();
>>      }
>>
>> +    mutex_lock(&fs_info->qgroup_rescan_lock);
>> +    if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
>> +            if (fs_info->qgroup_rescan_progress.objectid <= node->bytenr) {
>> +                    mutex_unlock(&fs_info->qgroup_rescan_lock);
>> +                    return 0;
>> +            }
>> +    }
>> +    mutex_unlock(&fs_info->qgroup_rescan_lock);
>> +
>>      /*
>>       * the delayed ref sequence number we pass depends on the direction of
>>       * the operation. for add operations, we pass (node->seq - 1) to skip
>> @@ -1401,7 +1426,15 @@ int btrfs_qgroup_account_ref(struct 
>> btrfs_trans_handle *trans,
>>      if (ret < 0)
>>              return ret;
>>
>> +    mutex_lock(&fs_info->qgroup_rescan_lock);
>>      spin_lock(&fs_info->qgroup_lock);
>> +    if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
>> +            if (fs_info->qgroup_rescan_progress.objectid <= node->bytenr) {
>> +                    ret = 0;
>> +                    goto unlock;
>> +            }
>> +    }
>> +
>>      quota_root = fs_info->quota_root;
>>      if (!quota_root)
>>              goto unlock;
>> @@ -1443,6 +1476,7 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle 
>> *trans,
>>
>> unlock:
>>      spin_unlock(&fs_info->qgroup_lock);
>> +    mutex_unlock(&fs_info->qgroup_rescan_lock);
> 
> 
> Why do you hold qgroup_rescan_lock  when doing qgroup accounting here?
> I can understand that we hold qgroup_rescan_lock when we update 
> qgroup_flag(at first in qgroup_account_ref()),
> However, is it necessary that we hold qgroup_rescan_lock when we are doing 
> qgroup
> accounting step1,2,3??
>  
> Or am  i missing something here?

We need the lock for the check added above. This check needs the mutex
lock, while the three accounting steps need a spin lock (which was not
modified by my patch). We cannot call mutex_unlock while holding a spin
lock, because mutex_unlock might schedule.

Thanks,
-Jan

> Thanks,
> Wang
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to