Changing max_sectors_kb on a live map is dangerous. When I/O occurs while the
max_sectors_kb value of a map is larger than that of any of its paths, I/O
errors like this may result:
blk_insert_cloned_request: over max size limit. (127 > 126)
This situation must be strictly avoided. The kernel makes sure that the
map's limits match those of the paths when the map is created. But setting
max_sectors_kb on path devices before reloading is dangerous, even if we
read the value from the map's max_sectors_kb beforehand. The reason for
this is that the sysfs value max_sectors_kb is the kernel's max_sectors
divided by 2, and user space has no way to figure out if the kernel-internal
value is odd or even. Thus by writing back the value just read to
max_sectors_kb, one might actually decrease the kernel value by one.
The only safe way to set max_sectors_kb on a live map would be to suspend
it, flushing all outstanding IO, then the path max_sectors_kb, reload,
and resume.
Since commit 8fd4868 ("libmultipath: don't set max_sectors_kb on reloads"),
we don't set the configured max_sectors_kb any more. But as shown above,
this is not safe. So really only set max_sectors_kb when creating a map.
Users who have stacked block devices on top of multipath, as described in the
commit message of 8fd4868 must make sure that the max_sectors_kb setting is
correct when the multipath map is first created. Decreasing the map's
max_sectors_kb value (without touching the paths) should actually be possible
in this situation, because stacked block devices are usually bio-based, and
bio-based IO (in contrast to request-based) can be split if the lower-level
device has can't handle the size of the I/O.
Signed-off-by: Martin Wilck <[email protected]>
---
libmultipath/configure.c | 38 ++++----------------------------------
1 file changed, 4 insertions(+), 34 deletions(-)
diff --git a/libmultipath/configure.c b/libmultipath/configure.c
index 13602f3..8cbb2a8 100644
--- a/libmultipath/configure.c
+++ b/libmultipath/configure.c
@@ -566,46 +566,18 @@ trigger_paths_udev_change(struct multipath *mpp, bool
is_mpath)
mpp->needs_paths_uevent = 0;
}
-static int
-sysfs_set_max_sectors_kb(struct multipath *mpp, int is_reload)
+static int sysfs_set_max_sectors_kb(struct multipath *mpp)
{
struct pathgroup * pgp;
struct path *pp;
char buff[11];
ssize_t len;
int i, j, ret, err = 0;
- struct udev_device *udd;
- int max_sectors_kb;
if (mpp->max_sectors_kb == MAX_SECTORS_KB_UNDEF)
return 0;
- max_sectors_kb = mpp->max_sectors_kb;
- if (is_reload) {
- if (!has_dm_info(mpp) &&
- dm_get_info(mpp->alias, &mpp->dmi) != 0) {
- condlog(1, "failed to get dm info for %s", mpp->alias);
- return 1;
- }
- udd = get_udev_for_mpp(mpp);
- if (!udd) {
- condlog(1, "failed to get udev device to set
max_sectors_kb for %s", mpp->alias);
- return 1;
- }
- ret = sysfs_attr_get_value(udd, "queue/max_sectors_kb", buff,
- sizeof(buff));
- udev_device_unref(udd);
- if (!sysfs_attr_value_ok(ret, sizeof(buff))) {
- condlog(1, "failed to get current max_sectors_kb from
%s", mpp->alias);
- return 1;
- }
- if (sscanf(buff, "%u\n", &max_sectors_kb) != 1) {
- condlog(1, "can't parse current max_sectors_kb from %s",
- mpp->alias);
- return 1;
- }
- }
- snprintf(buff, 11, "%d", max_sectors_kb);
- len = strlen(buff);
+
+ len = snprintf(buff, sizeof(buff), "%d", mpp->max_sectors_kb);
vector_foreach_slot (mpp->pg, pgp, i) {
vector_foreach_slot(pgp->paths, pp, j) {
@@ -923,7 +895,7 @@ int domap(struct multipath *mpp, char *params, int
is_daemon)
return DOMAP_RETRY;
}
- sysfs_set_max_sectors_kb(mpp, 0);
+ sysfs_set_max_sectors_kb(mpp);
if (is_daemon && mpp->ghost_delay > 0 &&
count_active_paths(mpp) &&
pathcount(mpp, PATH_UP) == 0)
mpp->ghost_delay_tick = mpp->ghost_delay;
@@ -934,7 +906,6 @@ int domap(struct multipath *mpp, char *params, int
is_daemon)
case ACT_RELOAD:
case ACT_RELOAD_RENAME:
- sysfs_set_max_sectors_kb(mpp, 1);
if (mpp->ghost_delay_tick > 0 && pathcount(mpp, PATH_UP))
mpp->ghost_delay_tick = 0;
r = dm_addmap_reload(mpp, params, 0);
@@ -942,7 +913,6 @@ int domap(struct multipath *mpp, char *params, int
is_daemon)
case ACT_RESIZE:
case ACT_RESIZE_RENAME:
- sysfs_set_max_sectors_kb(mpp, 1);
if (mpp->ghost_delay_tick > 0 && pathcount(mpp, PATH_UP))
mpp->ghost_delay_tick = 0;
r = dm_addmap_reload(mpp, params, 1);
--
2.44.0