On Thu, 2025-07-10 at 14:11 -0400, Benjamin Marzinski wrote:
> When a reservation key is changed from one non-zero value to another,
> libmpathpersist checks if the old key is still holding the
> reservation,
> and preempts it if it is. This was only safe if two nodes never share
> the same key. If a node uses the same key as another node that is
> holding the reservation, and switches keys so that it no longer
> matches,
> it will end up preempting the reservation. This is clearly unexpected
> behavior, and it can come about by a simple accident of registering a
> device with the wrong key, and then immediately fixing it.
>
> To handle this, add code to track if a device is the reservation
> holder
> to multipathd. multipathd now has three new commands "getprhold",
> "setprhold", and "unsetprhold". These commands work like the
> equivalent
> *prstatus commands. libmpathpersist calls setprhold on RESERVE
> commands
> and PREEMPT commands when the preempted key is holding the
> reservation
> and unsetprhold on RELEASE commands. Also, calling unsetprflag causes
> prhold to be unset as well, so CLEAR commands and REGISTER commands
> with
> a 0x0 service action key will also unset prhold. libmpathpersist()
> will
> also unset prhold if it notices that the device cannot be holding a
> reservation in preempt_missing_path().
>
> When a new multipath device is created, its initial prhold state is
> PR_UNKNOWN until it checks the current reservation, just like with
> prflag. If multipathd ever finds that a device's registration has
> been
> preempted or cleared in update_map_pr(), it unsets prhold, just like
> with prflag.
>
> Now, before libmpathpersist preempts a reservation when changing keys
> it also checks if multipathd thinks that the device is holding
> the reservation. If it does not, then libmpathpersist won't preempt
> the key. It will assume that another node is holding the reservation
> with the same key.
>
> I should note that this safety check only stops a node not holding
> the
> reservation from preempting the node holding the reservation. If the
> node holding the reservation changes its key, but it fails to change
> the
> resevation key, because that path is down or gone, it will still
> issue
> the preempt to move the reservation to a usable path, even if another
> node is using the same key. This will remove the registrations for
> that
> other node. It also will not work correctly if multipathd stops
> tracking
> a device for some reason. It's only a best-effort safety check.
>
> Signed-off-by: Benjamin Marzinski <[email protected]>
> ---
> libmpathpersist/mpath_persist_int.c | 87 ++++++++++++++++++++++++---
> --
> libmpathpersist/mpath_updatepr.c | 19 ++++++-
> libmpathpersist/mpathpr.h | 2 +
> libmultipath/structs.h | 1 +
> multipathd/callbacks.c | 3 +
> multipathd/cli.c | 4 +-
> multipathd/cli.h | 3 +
> multipathd/cli_handlers.c | 48 ++++++++++++++++
> multipathd/main.c | 33 ++++++++++-
> 9 files changed, 182 insertions(+), 18 deletions(-)
>
Two questions all the way down.
> diff --git a/libmpathpersist/mpath_persist_int.c
> b/libmpathpersist/mpath_persist_int.c
> index ae0defc2..91f0d018 100644
> --- a/libmpathpersist/mpath_persist_int.c
> +++ b/libmpathpersist/mpath_persist_int.c
> @@ -260,6 +260,10 @@ static int mpath_prout_common(struct multipath
> *mpp,int rq_servact, int rq_scope
> * holding the reservation on a path that couldn't get its key
> updated,
> * either because it is down or no longer part of the multipath
> device,
> * you need to preempt the reservation to a usable path with the new
> key
> + *
> + * Also, it's possible that the reservation was preempted, and the
> device
> + * is being re-registered. If it appears that is the case, clear
> + * mpp->prhold in multipathd.
> */
> void preempt_missing_path(struct multipath *mpp, uint8_t *key,
> uint8_t *sa_key,
> int noisy)
> @@ -272,12 +276,19 @@ void preempt_missing_path(struct multipath
> *mpp, uint8_t *key, uint8_t *sa_key,
> int status;
>
> /*
> - * If you previously didn't have a key registered or you
> didn't
> - * switch to a different key, there's no need to preempt.
> Also, you
> - * can't preempt if you no longer have a registered key
> + * If you previously didn't have a key registered, you can't
> + * be holding the reservation. Also, you can't preempt if
> you
> + * no longer have a registered key
> */
> - if (memcmp(key, zero, 8) == 0 || memcmp(sa_key, zero, 8) ==
> 0 ||
> - memcmp(key, sa_key, 8) == 0)
> + if (memcmp(key, zero, 8) == 0 || memcmp(sa_key, zero, 8) ==
> 0) {
> + update_prhold(mpp->alias, false);
> + return;
> + }
> + /*
> + * If you didn't switch to a different key, there is no need
> to
> + * preempt.
> + */
> + if (memcmp(key, sa_key, 8) == 0)
> return;
>
> status = mpath_prin_activepath(mpp, MPATH_PRIN_RRES_SA,
> &resp, noisy);
> @@ -286,13 +297,29 @@ void preempt_missing_path(struct multipath
> *mpp, uint8_t *key, uint8_t *sa_key,
> return;
> }
> /* If there is no reservation, there's nothing to preempt */
> - if (!resp.prin_descriptor.prin_readresv.additional_length)
> + if (!resp.prin_descriptor.prin_readresv.additional_length) {
> + update_prhold(mpp->alias, false);
> return;
> + }
> /*
> * If there reservation is not held by the old key, you
> don't
> * want to preempt it
> */
> - if (memcmp(key, resp.prin_descriptor.prin_readresv.key, 8)
> != 0)
> + if (memcmp(key, resp.prin_descriptor.prin_readresv.key, 8)
> != 0) {
> + /*
> + * If reseravation key doesn't match either the old
> or
> + * the new key, then clear prhold.
> + */
> + if (memcmp(sa_key,
> resp.prin_descriptor.prin_readresv.key, 8) != 0)
> + update_prhold(mpp->alias, false);
> + return;
> + }
> +
> + /*
> + * If multipathd doesn't think it is holding the
> reservation, don't
> + * preempt it
> + */
> + if (get_prhold(mpp->alias) != PR_SET)
> return;
> /* Assume this key is being held by an inaccessable path on
> this
> * node. libmpathpersist has never worked if multiple nodes
> share
> @@ -640,19 +667,38 @@ fail_resume:
> return (status == MPATH_PR_RETRYABLE_ERROR) ? MPATH_PR_OTHER
> : status;
> }
>
> +static int reservation_key_matches(struct multipath *mpp, uint8_t
> *key,
> + int noisy)
> +{
> + struct prin_resp resp = {0};
> + int status;
> +
> + status = mpath_prin_activepath(mpp, MPATH_PRIN_RRES_SA,
> &resp, noisy);
> + if (status != MPATH_PR_SUCCESS) {
> + condlog(0, "%s: pr in read reservation command
> failed.",
> + mpp->wwid);
> + return YNU_UNDEF;
> + }
> + if (!resp.prin_descriptor.prin_readresv.additional_length)
> + return YNU_NO;
> + if (memcmp(key, resp.prin_descriptor.prin_readresv.key, 8)
> == 0)
> + return YNU_YES;
> + return YNU_NO;
> +}
> +
> /*
> * for MPATH_PROUT_REG_IGN_SA, we use the ignored paramp->key to
> store the
> - * currently registered key.
> + * currently registered key for use in preempt_missing_path(), but
> only if
> + * the key is holding the reservation.
> */
> static void set_ignored_key(struct multipath *mpp, uint8_t *key)
> {
> memset(key, 0, 8);
> if (!get_be64(mpp->reservation_key))
> return;
> - if (get_prflag(mpp->alias) == PR_UNSET)
> + if (get_prhold(mpp->alias) == PR_UNSET)
> return;
> - update_map_pr(mpp, NULL);
> - if (mpp->prflag != PR_SET)
> + if (reservation_key_matches(mpp, key, 0) == YNU_NO)
> return;
> memcpy(key, &mpp->reservation_key, 8);
> }
> @@ -665,6 +711,7 @@ int do_mpath_persistent_reserve_out(vector curmp,
> vector pathvec, int fd,
> int ret;
> uint64_t prkey;
> struct config *conf;
> + bool preempting_reservation = false;
>
> ret = mpath_get_map(curmp, pathvec, fd, &mpp);
> if (ret != MPATH_PR_SUCCESS)
> @@ -715,9 +762,12 @@ int do_mpath_persistent_reserve_out(vector
> curmp, vector pathvec, int fd,
> case MPATH_PROUT_REG_IGN_SA:
> ret= mpath_prout_reg(mpp, rq_servact, rq_scope,
> rq_type, paramp, noisy);
> break;
> - case MPATH_PROUT_RES_SA :
> - case MPATH_PROUT_PREE_SA :
> - case MPATH_PROUT_PREE_AB_SA :
> + case MPATH_PROUT_PREE_SA:
> + case MPATH_PROUT_PREE_AB_SA:
> + if (reservation_key_matches(mpp, paramp->sa_key,
> noisy) == YNU_YES)
> + preempting_reservation = true;
> + /* fallthrough */
> + case MPATH_PROUT_RES_SA:
> case MPATH_PROUT_CLEAR_SA:
> ret = mpath_prout_common(mpp, rq_servact, rq_scope,
> rq_type,
> paramp, noisy, NULL);
> @@ -744,6 +794,15 @@ int do_mpath_persistent_reserve_out(vector
> curmp, vector pathvec, int fd,
> case MPATH_PROUT_CLEAR_SA:
> update_prflag(mpp->alias, 0);
> update_prkey(mpp->alias, 0);
> + break;
> + case MPATH_PROUT_RES_SA:
> + case MPATH_PROUT_REL_SA:
> + update_prhold(mpp->alias, (rq_servact ==
> MPATH_PROUT_RES_SA));
> + break;
> + case MPATH_PROUT_PREE_SA:
> + case MPATH_PROUT_PREE_AB_SA:
> + if (preempting_reservation)
> + update_prhold(mpp->alias, 1);
> }
> return ret;
> }
> diff --git a/libmpathpersist/mpath_updatepr.c
> b/libmpathpersist/mpath_updatepr.c
> index c8fbe4a2..dd8dd48e 100644
> --- a/libmpathpersist/mpath_updatepr.c
> +++ b/libmpathpersist/mpath_updatepr.c
> @@ -75,13 +75,13 @@ static int do_update_pr(char *alias, char *cmd,
> char *key)
> return ret;
> }
>
> -int get_prflag(char *mapname)
> +static int do_get_pr(char *mapname, const char *cmd)
> {
> char str[256];
> char *reply;
> int prflag;
>
> - snprintf(str, sizeof(str), "getprstatus map %s", mapname);
> + snprintf(str, sizeof(str), "%s map %s", cmd, mapname);
> reply = do_pr(mapname, str);
> if (!reply)
> prflag = PR_UNKNOWN;
> @@ -96,11 +96,26 @@ int get_prflag(char *mapname)
> return prflag;
> }
>
> +int get_prflag(char *mapname)
> +{
> + return do_get_pr(mapname, "getprstatus");
> +}
> +
> +int get_prhold(char *mapname)
> +{
> + return do_get_pr(mapname, "getprhold");
> +}
> +
> int update_prflag(char *mapname, int set) {
> return do_update_pr(mapname, (set)? "setprstatus" :
> "unsetprstatus",
> NULL);
> }
>
> +int update_prhold(char *mapname, bool set)
> +{
> + return do_update_pr(mapname, (set) ? "setprhold" :
> "unsetprhold", NULL);
> +}
> +
> int update_prkey_flags(char *mapname, uint64_t prkey, uint8_t
> sa_flags) {
> char str[256];
>
> diff --git a/libmpathpersist/mpathpr.h b/libmpathpersist/mpathpr.h
> index 912957e5..99d6b82f 100644
> --- a/libmpathpersist/mpathpr.h
> +++ b/libmpathpersist/mpathpr.h
> @@ -9,6 +9,8 @@
> int update_prflag(char *mapname, int set);
> int update_prkey_flags(char *mapname, uint64_t prkey, uint8_t
> sa_flags);
> int get_prflag(char *mapname);
> +int get_prhold(char *mapname);
> +int update_prhold(char *mapname, bool set);
> #define update_prkey(mapname, prkey) update_prkey_flags(mapname,
> prkey, 0)
>
> #endif
> diff --git a/libmultipath/structs.h b/libmultipath/structs.h
> index 6c427d6f..97093675 100644
> --- a/libmultipath/structs.h
> +++ b/libmultipath/structs.h
> @@ -522,6 +522,7 @@ struct multipath {
> struct be64 reservation_key;
> uint8_t sa_flags;
> int prflag;
> + int prhold;
> int all_tg_pt;
> struct gen_multipath generic_mp;
> bool fpin_must_reload;
> diff --git a/multipathd/callbacks.c b/multipathd/callbacks.c
> index fb87b280..b6b57f45 100644
> --- a/multipathd/callbacks.c
> +++ b/multipathd/callbacks.c
> @@ -69,4 +69,7 @@ void init_handler_callbacks(void)
> set_handler_callback(VRB_UNSETMARGINAL | Q1_PATH,
> HANDLER(cli_unset_marginal));
> set_handler_callback(VRB_UNSETMARGINAL | Q1_MAP,
> HANDLER(cli_unset_all_marginal));
> + set_handler_callback(VRB_GETPRHOLD | Q1_MAP,
> HANDLER(cli_getprhold));
> + set_handler_callback(VRB_SETPRHOLD | Q1_MAP,
> HANDLER(cli_setprhold));
> + set_handler_callback(VRB_UNSETPRHOLD | Q1_MAP,
> HANDLER(cli_unsetprhold));
> }
> diff --git a/multipathd/cli.c b/multipathd/cli.c
> index 34588290..d0e6cebc 100644
> --- a/multipathd/cli.c
> +++ b/multipathd/cli.c
> @@ -224,7 +224,9 @@ load_keys (void)
> r += add_key(keys, "setmarginal", VRB_SETMARGINAL, 0);
> r += add_key(keys, "unsetmarginal", VRB_UNSETMARGINAL, 0);
> r += add_key(keys, "all", KEY_ALL, 0);
> -
> + r += add_key(keys, "getprhold", VRB_GETPRHOLD, 0);
> + r += add_key(keys, "setprhold", VRB_SETPRHOLD, 0);
> + r += add_key(keys, "unsetprhold", VRB_UNSETPRHOLD, 0);
>
> if (r) {
> free_keys(keys);
> diff --git a/multipathd/cli.h b/multipathd/cli.h
> index 9fa50145..5a943a45 100644
> --- a/multipathd/cli.h
> +++ b/multipathd/cli.h
> @@ -38,6 +38,9 @@ enum {
> VRB_UNSETMARGINAL = 23,
> VRB_SHUTDOWN = 24,
> VRB_QUIT = 25,
> + VRB_GETPRHOLD = 26,
> + VRB_SETPRHOLD = 27,
> + VRB_UNSETPRHOLD = 28,
>
> /* Qualifiers, values must be different from verbs */
> KEY_PATH = 65,
> diff --git a/multipathd/cli_handlers.c b/multipathd/cli_handlers.c
> index a92b07ce..34910c9b 100644
> --- a/multipathd/cli_handlers.c
> +++ b/multipathd/cli_handlers.c
> @@ -1314,6 +1314,10 @@ cli_unsetprstatus(void * v, struct strbuf
> *reply, void * data)
> mpp->prflag = PR_UNSET;
> condlog(2, "%s: prflag unset", param);
> }
> + if (mpp->prhold != PR_UNSET) {
> + mpp->prhold = PR_UNSET;
> + condlog(2, "%s: prhold unset (by clearing prflag)",
> param);
> + }
>
> return 0;
> }
> @@ -1401,6 +1405,50 @@ cli_setprkey(void * v, struct strbuf *reply,
> void * data)
> return ret;
> }
>
> +static int do_prhold(struct vectors *vecs, char *param, int prhold)
> +{
> + struct multipath *mpp = find_mp_by_str(vecs->mpvec, param);
> +
> + if (!mpp)
> + return -ENODEV;
> +
> + if (mpp->prhold != prhold) {
> + mpp->prhold = prhold;
> + condlog(2, "%s: prhold %s", param, pr_str[prhold]);
> + }
> +
> + return 0;
> +}
> +
> +static int cli_setprhold(void *v, struct strbuf *reply, void *data)
> +{
> + return do_prhold((struct vectors *)data, get_keyparam(v,
> KEY_MAP),
> + PR_SET);
> +}
> +
> +static int cli_unsetprhold(void *v, struct strbuf *reply, void
> *data)
> +{
> + return do_prhold((struct vectors *)data, get_keyparam(v,
> KEY_MAP),
> + PR_UNSET);
> +}
> +
> +static int cli_getprhold(void *v, struct strbuf *reply, void *data)
> +{
> + struct multipath *mpp;
> + struct vectors *vecs = (struct vectors *)data;
> + char *param = get_keyparam(v, KEY_MAP);
> +
> + param = convert_dev(param, 0);
> +
> + mpp = find_mp_by_str(vecs->mpvec, param);
> + if (!mpp)
> + return -ENODEV;
> +
> + append_strbuf_str(reply, pr_str[mpp->prhold]);
> + condlog(3, "%s: reply = %s", param, get_strbuf_str(reply));
> + return 0;
> +}
> +
> static int cli_set_marginal(void * v, struct strbuf *reply, void *
> data)
> {
> struct vectors * vecs = (struct vectors *)data;
> diff --git a/multipathd/main.c b/multipathd/main.c
> index 582af880..d22425f6 100644
> --- a/multipathd/main.c
> +++ b/multipathd/main.c
> @@ -4222,6 +4222,32 @@ main (int argc, char *argv[])
> return (child(NULL));
> }
>
> +static void check_prhold(struct multipath *mpp, struct path *pp)
> +{
> + struct prin_resp resp = {0};
> + int status;
> +
> + if (mpp->prflag == PR_UNSET) {
> + mpp->prhold = PR_UNSET;
> + return;
> + }
> + if (mpp->prflag != PR_SET || mpp->prhold != PR_UNKNOWN)
> + return;
How can this situation come to pass? prflag must be PR_UNKNOWN.
Shouldn't we reset prhold to PR_UNKNOWN as well?
> +
> + status = prin_do_scsi_ioctl(pp->dev, MPATH_PRIN_RRES_SA,
> &resp, 0);
> + if (status != MPATH_PR_SUCCESS) {
> + condlog (0, "%s: pr in read reservation command
> failed: %d",
> + mpp->wwid, status);
> + return;
> + }
> + mpp->prhold = PR_UNSET;
> + if (!resp.prin_descriptor.prin_readresv.additional_length)
> + return;
> +
> + if (memcmp(&mpp->reservation_key,
> resp.prin_descriptor.prin_readresv.key, 8) == 0)
> + mpp->prhold = PR_SET;
> +}
> +
> static void mpath_pr_event_handle(struct path *pp)
> {
> struct multipath *mpp = pp->mpp;
> @@ -4233,8 +4259,13 @@ static void mpath_pr_event_handle(struct path
> *pp)
> return;
> }
>
> - if (update_map_pr(mpp, pp) != MPATH_PR_SUCCESS)
> + if (update_map_pr(mpp, pp) != MPATH_PR_SUCCESS) {
> + if (mpp->prflag == PR_UNSET)
> + mpp->prhold = PR_UNSET;
Perhaps we should move setting prhold to update_map_pr()?
Martin
> return;
> + }
> +
> + check_prhold(mpp, pp);
>
> if (mpp->prflag != PR_SET)
> return;