Function call chain __btrfs_map_block()->find_live_mirror() uses thread %pid to determine the %mirror_num for the read when the mirror_num=0 in the argument.
This pid based mirror_num extrapolation has following disadvantages A single-process large read IO will read only from one disk. In a worst scenario all processes read accessing the FS could have either odd or even pid, the read IO gets skewed. There is no deterministic way of knowing/controlling which copy will be used for reading. May see performance variations for a given set of multi process workload ran at different times. So we need other types of readmirror policies. This patch introduces a framework so that we can add more policies, and converts the existing %pid into as a configurable parameter using the property. For example: btrfs property set /btrfs readmirror pid btrfs property set /btrfs readmirror "" Signed-off-by: Anand Jain <anand.j...@oracle.com> --- fs/btrfs/props.c | 47 +++++++++++++++++++++++++++++++++++++++++++++++ fs/btrfs/volumes.c | 11 ++++++++++- fs/btrfs/volumes.h | 7 +++++++ 3 files changed, 64 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/props.c b/fs/btrfs/props.c index 1a13f10a6ef5..776cdf099f93 100644 --- a/fs/btrfs/props.c +++ b/fs/btrfs/props.c @@ -11,6 +11,7 @@ #include "ctree.h" #include "xattr.h" #include "compression.h" +#include "volumes.h" #define BTRFS_PROP_HANDLERS_HT_BITS 8 static DEFINE_HASHTABLE(prop_handlers_ht, BTRFS_PROP_HANDLERS_HT_BITS); @@ -326,6 +327,45 @@ static const char *prop_compression_extract(struct inode *inode) return NULL; } +static int prop_readmirror_validate(struct inode *inode, const char *value, + size_t len) +{ + struct btrfs_root *root = BTRFS_I(inode)->root; + + if (root->root_key.objectid != BTRFS_FS_TREE_OBJECTID) + return -EINVAL; + + if (!len) + return 0; + + if (!strncmp("pid", value, 3)) + return 0; + + return -EINVAL; +} + +static int prop_readmirror_apply(struct inode *inode, const char *value, + size_t len) +{ + struct btrfs_fs_devices *fs_devices = btrfs_sb(inode->i_sb)->fs_devices; + + if (!value) + fs_devices->readmirror_policy = BTRFS_READMIRROR_DEFAULT; + else if (!strncmp("pid", value, 3)) + fs_devices->readmirror_policy = BTRFS_READMIRROR_PID; + + return 0; +} + +static const char *prop_readmirror_extract(struct inode *inode) +{ + /* + * readmirror policy is applied for the whole FS, inheritance is not + * applicable. + */ + return NULL; +} + static struct prop_handler prop_handlers[] = { { .xattr_name = XATTR_BTRFS_PREFIX "compression", @@ -334,6 +374,13 @@ static const char *prop_compression_extract(struct inode *inode) .extract = prop_compression_extract, .inheritable = 1 }, + { + .xattr_name = XATTR_BTRFS_PREFIX "readmirror", + .validate = prop_readmirror_validate, + .apply = prop_readmirror_apply, + .extract = prop_readmirror_extract, + .inheritable = 0 + }, }; static int inherit_props(struct btrfs_trans_handle *trans, diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 9024eee889b9..e5072d46e181 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -5562,7 +5562,16 @@ static int find_live_mirror(struct btrfs_fs_info *fs_info, else num_stripes = map->num_stripes; - preferred_mirror = first + current->pid % num_stripes; + switch(fs_info->fs_devices->readmirror_policy) { + case BTRFS_READMIRROR_PID: + /* fall through */ + case BTRFS_READMIRROR_DEFAULT: + /* fall through */ + default: + /* readmirror as per thread pid */ + preferred_mirror = first + current->pid % num_stripes; + break; + } if (dev_replace_is_ongoing && fs_info->dev_replace.cont_reading_from_srcdev_mode == diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 3ad9d58d1b66..27dce9242b55 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -208,6 +208,11 @@ struct btrfs_device { BTRFS_DEVICE_GETSET_FUNCS(disk_total_bytes); BTRFS_DEVICE_GETSET_FUNCS(bytes_used); +enum btrfs_readmirror_policy { + BTRFS_READMIRROR_DEFAULT, + BTRFS_READMIRROR_PID, +}; + struct btrfs_fs_devices { u8 fsid[BTRFS_FSID_SIZE]; /* FS specific uuid */ u8 metadata_uuid[BTRFS_FSID_SIZE]; @@ -254,6 +259,8 @@ struct btrfs_fs_devices { struct kobject fsid_kobj; struct kobject *device_dir_kobj; struct completion kobj_unregister; + + int readmirror_policy; }; #define BTRFS_BIO_INLINE_CSUM_SIZE 64 -- 1.8.3.1