This is only a preparatory patch. The parameter is only an in-memory
as of now. A persistent disk structure can be added on top of this
when we have a consensus.
Signed-off-by: Anand Jain <anand.j...@oracle.com>
---
This + sequential chunk layout hint (experimental) (patch not yet sent)
helped me get consistent performance numbers for read_policy pid.
As chunk layout hint is not set at mkfs, a balance after setting the
desired chunk layout hint is needed.
fs/btrfs/ctree.h | 3 ++
fs/btrfs/disk-io.c | 3 ++
fs/btrfs/sysfs.c | 98 ++++++++++++++++++++++++++++++++++++++++++++++
fs/btrfs/volumes.c | 4 +-
fs/btrfs/volumes.h | 10 +++++
5 files changed, 117 insertions(+), 1 deletion(-)
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 3bc00aed13b2..c37bd2d7f5d4 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -993,6 +993,9 @@ struct btrfs_fs_info {
spinlock_t eb_leak_lock;
struct list_head allocated_ebs;
#endif
+
+ int chunk_layout_data;
+ int chunk_layout_metadata;
};
static inline struct btrfs_fs_info *btrfs_sb(struct super_block *sb)
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index c2576c5fe62e..c81f95339a35 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2890,6 +2890,9 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
fs_info->swapfile_pins = RB_ROOT;
fs_info->send_in_progress = 0;
+
+ fs_info->chunk_layout_data = BTRFS_CHUNK_LAYOUT_SIZE;
+ fs_info->chunk_layout_metadata = BTRFS_CHUNK_LAYOUT_SIZE;
}
static int init_mount_fs_info(struct btrfs_fs_info *fs_info, struct super_block *sb)
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index 30e1cfcaa925..788784b1ed44 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -967,6 +967,102 @@ static ssize_t btrfs_read_policy_store(struct kobject
*kobj,
}
BTRFS_ATTR_RW(, read_policy, btrfs_read_policy_show, btrfs_read_policy_store);
+static const char * const btrfs_chunk_layout_name[] = { "size" };
+
+static ssize_t btrfs_chunk_layout_data_show(struct kobject *kobj,
+ struct kobj_attribute *a, char *buf)
+{
+ struct btrfs_fs_info *fs_info = to_fs_info(kobj);
+ ssize_t ret = 0;
+ int i;
+
+ for (i = 0; i < BTRFS_NR_CHUNK_LAYOUT; i++) {
+ if (fs_info->chunk_layout_data == i)
+ ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s[%s]",
+ (ret == 0 ? "" : " "),
+ btrfs_chunk_layout_name[i]);
+ else
+ ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s%s",
+ (ret == 0 ? "" : " "),
+ btrfs_chunk_layout_name[i]);
+ }
+
+ ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n");
+
+ return ret;
+}
+
+static ssize_t btrfs_chunk_layout_data_store(struct kobject *kobj,
+ struct kobj_attribute *a,
+ const char *buf, size_t len)
+{
+ struct btrfs_fs_info *fs_info = to_fs_info(kobj);
+ int i;
+
+ for (i = 0; i < BTRFS_NR_CHUNK_LAYOUT; i++) {
+ if (strmatch(buf, btrfs_chunk_layout_name[i])) {
+ if (i != fs_info->chunk_layout_data) {
+ fs_info->chunk_layout_data = i;
+ btrfs_info(fs_info, "chunk_layout_data set to
'%s'",
+ btrfs_chunk_layout_name[i]);
+ }
+ return len;
+ }
+ }
+
+ return -EINVAL;
+}
+BTRFS_ATTR_RW(, chunk_layout_data, btrfs_chunk_layout_data_show,
+ btrfs_chunk_layout_data_store);
+
+static ssize_t btrfs_chunk_layout_metadata_show(struct kobject *kobj,
+ struct kobj_attribute *a,
+ char *buf)
+{
+ struct btrfs_fs_info *fs_info = to_fs_info(kobj);
+ ssize_t ret = 0;
+ int i;
+
+ for (i = 0; i < BTRFS_NR_CHUNK_LAYOUT; i++) {
+ if (fs_info->chunk_layout_metadata == i)
+ ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s[%s]",
+ (ret == 0 ? "" : " "),
+ btrfs_chunk_layout_name[i]);
+ else
+ ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s%s",
+ (ret == 0 ? "" : " "),
+ btrfs_chunk_layout_name[i]);
+ }
+
+ ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n");
+
+ return ret;
+}
+
+static ssize_t btrfs_chunk_layout_metadata_store(struct kobject *kobj,
+ struct kobj_attribute *a,
+ const char *buf, size_t len)
+{
+ struct btrfs_fs_info *fs_info = to_fs_info(kobj);
+ int i;
+
+ for (i = 0; i < BTRFS_NR_CHUNK_LAYOUT; i++) {
+ if (strmatch(buf, btrfs_chunk_layout_name[i])) {
+ if (i != fs_info->chunk_layout_metadata) {
+ fs_info->chunk_layout_metadata = i;
+ btrfs_info(fs_info,
+ "chunk_layout_metadata set to '%s'",
+ btrfs_chunk_layout_name[i]);
+ }
+ return len;
+ }
+ }
+
+ return -EINVAL;
+}
+BTRFS_ATTR_RW(, chunk_layout_metadata, btrfs_chunk_layout_metadata_show,
+ btrfs_chunk_layout_metadata_store);
+
static const struct attribute *btrfs_attrs[] = {
BTRFS_ATTR_PTR(, label),
BTRFS_ATTR_PTR(, nodesize),
@@ -978,6 +1074,8 @@ static const struct attribute *btrfs_attrs[] = {
BTRFS_ATTR_PTR(, exclusive_operation),
BTRFS_ATTR_PTR(, generation),
BTRFS_ATTR_PTR(, read_policy),
+ BTRFS_ATTR_PTR(, chunk_layout_data),
+ BTRFS_ATTR_PTR(, chunk_layout_metadata),
NULL,
};
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index d1ba160ef73b..2223c4263d4a 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -5097,7 +5097,9 @@ static int gather_device_info(struct btrfs_fs_devices
*fs_devices,
ctl->ndevs = ndevs;
/*
- * now sort the devices by hole size / available space
+ * Now sort the devices by hole size / available space.
+ * This sort helps to pick device(s) with larger space.
+ * That is BTRFS_CHUNK_LAYOUT_SIZE.
*/
sort(devices_info, ndevs, sizeof(struct btrfs_device_info),
btrfs_cmp_device_info, NULL);
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index d0a90dc7fc03..b514d09f4ba8 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -218,6 +218,16 @@ enum btrfs_chunk_allocation_policy {
BTRFS_CHUNK_ALLOC_ZONED,
};
+/*
+ * If we have more than the required number of the devices for striping,
+ * chunk_layout let us know which device to use.
+ */
+enum btrfs_chunk_layout {
+ /* Use in the order of the size of the unallocated space on the device
*/
+ BTRFS_CHUNK_LAYOUT_SIZE,
+ BTRFS_NR_CHUNK_LAYOUT,
+};
+
/*
* Read policies for mirrored block group profiles, read picks the stripe
based
* on these policies.