This patch adds mount option 'chunk_width_limit=X', which when set forces
the chunk allocator to use only up to X devices when allocating a chunk.
This may help reduce the seek penalties seen in filesystems with large
numbers of devices.

Signed-off-by: Andrew Armenia <and...@asquaredlabs.com>
---
 fs/btrfs/ctree.h   |  3 +++
 fs/btrfs/super.c   | 22 +++++++++++++++++++++-
 fs/btrfs/volumes.c | 26 ++++++++++++++++++++++++++
 3 files changed, 50 insertions(+), 1 deletion(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 101c3cf..27b6f8f 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -754,6 +754,9 @@ struct btrfs_fs_info {
        unsigned long pending_changes;
        unsigned long compress_type:4;
        int commit_interval;
+
+       int chunk_width_limit;
+
        /*
         * It is a suggestive number, the read side is safe even it gets a
         * wrong number because we will write out the data into a regular
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 4e59a91..3da5220 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -300,7 +300,7 @@ enum {
        Opt_commit_interval, Opt_barrier, Opt_nodefrag, Opt_nodiscard,
        Opt_noenospc_debug, Opt_noflushoncommit, Opt_acl, Opt_datacow,
        Opt_datasum, Opt_treelog, Opt_noinode_cache, Opt_usebackuproot,
-       Opt_nologreplay, Opt_norecovery,
+       Opt_nologreplay, Opt_norecovery, Opt_width_limit,
 #ifdef CONFIG_BTRFS_DEBUG
        Opt_fragment_data, Opt_fragment_metadata, Opt_fragment_all,
 #endif
@@ -360,6 +360,7 @@ static const match_table_t tokens = {
        {Opt_rescan_uuid_tree, "rescan_uuid_tree"},
        {Opt_fatal_errors, "fatal_errors=%s"},
        {Opt_commit_interval, "commit=%d"},
+       {Opt_width_limit, "chunk_width_limit=%d"},
 #ifdef CONFIG_BTRFS_DEBUG
        {Opt_fragment_data, "fragment=data"},
        {Opt_fragment_metadata, "fragment=metadata"},
@@ -782,6 +783,22 @@ int btrfs_parse_options(struct btrfs_root *root, char 
*options,
                                info->commit_interval = 
BTRFS_DEFAULT_COMMIT_INTERVAL;
                        }
                        break;
+               case Opt_width_limit:
+                       intarg = 0;
+                       ret = match_int(&args[0], &intarg);
+                       if (ret < 0) {
+                               btrfs_err(root->fs_info, "invalid chunk width 
limit");
+                               ret = -EINVAL;
+                               goto out;
+                       }
+
+                       if (intarg > 0) {
+                               info->chunk_width_limit = intarg;
+                       } else {
+                               btrfs_info(root->fs_info, "chunk width is 
unlimited");
+                               info->chunk_width_limit = 0;
+                       }
+                       break;
 #ifdef CONFIG_BTRFS_DEBUG
                case Opt_fragment_all:
                        btrfs_info(root->fs_info, "fragmenting all space");
@@ -1207,6 +1224,9 @@ static int btrfs_show_options(struct seq_file *seq, 
struct dentry *dentry)
        if (info->thread_pool_size !=  min_t(unsigned long,
                                             num_online_cpus() + 2, 8))
                seq_printf(seq, ",thread_pool=%d", info->thread_pool_size);
+       if (info->chunk_width_limit != 0)
+               seq_printf(seq, ",chunk_width_limit=%d",
+                       info->chunk_width_limit);
        if (btrfs_test_opt(root, COMPRESS)) {
                if (info->compress_type == BTRFS_COMPRESS_ZLIB)
                        compress_type = "zlib";
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index bdc6256..6d0d35d 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -4558,6 +4558,32 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle 
*trans,
        devs_increment = btrfs_raid_array[index].devs_increment;
        ncopies = btrfs_raid_array[index].ncopies;
 
+       /*
+        * if we have a statically-configured chunk width, and the type doesn't
+        * specify one, go ahead and use the statically-configured max instead.
+        *
+        * If the static value is greater than the BTRFS_MAX_DEVS for the
+        * chunk tree, we ignore it.
+        *
+        * Also, we ignore the static value for system chunks.
+        */
+       if (
+               devs_max == 0 && info->chunk_width_limit != 0
+               && !(type & BTRFS_BLOCK_GROUP_SYSTEM)
+               && info->chunk_width_limit <= BTRFS_MAX_DEVS(info->chunk_root)
+       ) {
+               if (info->chunk_width_limit >= devs_min) {
+                       devs_max = info->chunk_width_limit;
+               } else {
+                       /* warn that the static devs_max is unusable */
+                       btrfs_warn(info,
+                               "can't satisfy max chunk width of %d; "
+                               "minimum %d devices needed",
+                               info->chunk_width_limit, devs_max
+                       );
+               }
+       }
+
        if (type & BTRFS_BLOCK_GROUP_DATA) {
                max_stripe_size = SZ_1G;
                max_chunk_size = 10 * max_stripe_size;
-- 
2.1.4

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to