On Thu, Mar 12, 2026 at 9:32 AM Martin Wilck <[email protected]> wrote:
>
> Hi Brian,
>
> On Wed, 2026-03-11 at 17:16 -0700, Brian Bunker wrote:
> > Add an 'alua' path checker that uses RTPG to check path state, and an
> > 'alua_cached' prioritizer that uses the checker's cached ALUA state.
> >
> > The checker maps ALUA Asymmetric Access States to path states:
> > - AAS_OPTIMIZED, AAS_NON_OPTIMIZED -> PATH_UP
> > - AAS_STANDBY -> PATH_GHOST
> > - AAS_TRANSITIONING -> PATH_PENDING
> > - AAS_UNAVAILABLE, AAS_OFFLINE -> PATH_DOWN
> >
> > The alua_cached prioritizer first checks for cached state from the
> > alua
> > checker, then falls back to sysfs, then to issuing RTPG directly.
> > This
> > maintains backward compatibility with other checker configurations.
> >
> > When detect_checker is enabled, the alua checker is auto-selected for
> > devices with TPGS support. When the alua checker is in use,
> > detect_prio
> > auto-selects the alua_cached prioritizer.
> >
> > Signed-off-by: Brian Bunker <[email protected]>
>
> Thanks! Just a few remarks for now.
>
> Martin
>
> > ---
> > libmultipath/Makefile | 5 +
> > libmultipath/checkers.c | 1 +
> > libmultipath/checkers.h | 1 +
> > libmultipath/checkers/Makefile | 3 +-
> > libmultipath/checkers/alua.c | 426
> > ++++++++++++++++++++++++
> > libmultipath/checkers/alua.h | 15 +
> > libmultipath/prio.c | 1 +
> > libmultipath/prio.h | 1 +
> > libmultipath/prioritizers/Makefile | 1 +
> > libmultipath/prioritizers/alua_cached.c | 224 +++++++++++++
> > libmultipath/prioritizers/alua_rtpg.c | 12 +-
> > libmultipath/prioritizers/alua_rtpg.h | 1 +
> > libmultipath/prioritizers/sysfs.c | 7 +-
> > libmultipath/propsel.c | 20 +-
> > libmultipath/structs.c | 2 +
> > libmultipath/structs.h | 3 +
> > 16 files changed, 716 insertions(+), 7 deletions(-)
> > create mode 100644 libmultipath/checkers/alua.c
> > create mode 100644 libmultipath/checkers/alua.h
> > create mode 100644 libmultipath/prioritizers/alua_cached.c
> >
> > diff --git a/libmultipath/Makefile b/libmultipath/Makefile
> > index 85767ab4..379f4d02 100644
> > --- a/libmultipath/Makefile
> > +++ b/libmultipath/Makefile
> > @@ -38,6 +38,11 @@ nvme-lib.o: nvme-lib.c nvme-ioctl.c nvme-ioctl.h
> > dict.o: dict.c
> > $(Q)$(CC) $(CPPFLAGS) $(CFLAGS) -Wno-unused-parameter -c -o
> > $@ $<
> >
> > +# checkers/alua.o needs to find headers in parent directory
> > +checkers/alua.o: checkers/alua.c
> > + @echo building $@ because of $?
> > + $(Q)$(CC) $(CPPFLAGS) $(CFLAGS) -I. -c -o $@ $<
> > +
> > make_static = $(shell sed '/^static/!s/^\([a-z]\{1,\} \)/static \1/'
> > <$1 >$2)
> >
> > nvme-ioctl.c: nvme/nvme-ioctl.c
> > diff --git a/libmultipath/checkers.c b/libmultipath/checkers.c
> > index bb6ad1ee..21aa52d4 100644
> > --- a/libmultipath/checkers.c
> > +++ b/libmultipath/checkers.c
> > @@ -461,6 +461,7 @@ int init_checkers(void)
> > EMC_CLARIION,
> > READSECTOR0,
> > CCISS_TUR,
> > + ALUA_RTPG,
> > };
> > unsigned int i;
> >
> > diff --git a/libmultipath/checkers.h b/libmultipath/checkers.h
> > index a969e7d1..2905f3fc 100644
> > --- a/libmultipath/checkers.h
> > +++ b/libmultipath/checkers.h
> > @@ -98,6 +98,7 @@ enum path_check_state {
> > #define EMC_CLARIION "emc_clariion"
> > #define READSECTOR0 "readsector0"
> > #define CCISS_TUR "cciss_tur"
> > +#define ALUA "alua"
> > #define NONE "none"
> > #define INVALID "invalid"
> >
> > diff --git a/libmultipath/checkers/Makefile
> > b/libmultipath/checkers/Makefile
> > index 6f7cfb95..dfdd13a1 100644
> > --- a/libmultipath/checkers/Makefile
> > +++ b/libmultipath/checkers/Makefile
> > @@ -17,7 +17,8 @@ LIBS= \
> > libcheckdirectio.so \
> > libcheckemc_clariion.so \
> > libcheckhp_sw.so \
> > - libcheckrdac.so
> > + libcheckrdac.so \
> > + libcheckalua.so
> >
> > all: $(LIBS)
> >
> > diff --git a/libmultipath/checkers/alua.c
> > b/libmultipath/checkers/alua.c
> > new file mode 100644
> > index 00000000..cb3d7000
> > --- /dev/null
> > +++ b/libmultipath/checkers/alua.c
> > @@ -0,0 +1,426 @@
> > +/*
> > + * ALUA Path Checker
> > + *
> > + * Copyright (c) 2024
> > + * This file is released under the GPL.
>
> Please add a proper license header.
I will do that.
>
> > + */
> > +#include <stdio.h>
> > +#include <stdlib.h>
> > +#include <string.h>
> > +#include <sys/types.h>
> > +#include <sys/stat.h>
> > +#include <sys/sysmacros.h>
> > +#include <unistd.h>
> > +#include <fcntl.h>
> > +#include <errno.h>
> > +#include <pthread.h>
> > +#include <urcu.h>
> > +#include <urcu/uatomic.h>
> > +
> > +#include "checkers.h"
> > +#include "debug.h"
> > +#include "structs.h"
> > +#include "prio.h"
> > +#include "util.h"
> > +#include "time-util.h"
> > +#include "../prioritizers/alua.h"
> > +#include "../prioritizers/alua_rtpg.h"
> > +#include "alua.h"
> > +
> > +#define MAX_NR_TIMEOUTS 1
> > +
> > +enum {
> > + MSG_ALUA_RTPG_NOT_SUPPORTED = CHECKER_FIRST_MSGID,
> > + MSG_ALUA_RTPG_TRANSITIONING,
> > + MSG_ALUA_RUNNING,
> > + MSG_ALUA_TIMEOUT,
> > +};
> > +
> > +#define IDX_(x) (MSG_ ## x - CHECKER_FIRST_MSGID)
> > +const char *libcheck_msgtable[] = {
> > + [IDX_(ALUA_RTPG_NOT_SUPPORTED)] = " ALUA not supported",
> > + [IDX_(ALUA_RTPG_TRANSITIONING)] = " path is transitioning",
> > + [IDX_(ALUA_RUNNING)] = " still running",
> > + [IDX_(ALUA_TIMEOUT)] = " timed out",
> > + NULL,
> > +};
> > +
> > +struct alua_checker_context {
> > + dev_t devt;
> > + int state;
> > + int running; /* uatomic access only */
> > + int fd;
> > + unsigned int timeout;
> > + time_t time;
> > + pthread_t thread;
> > + pthread_mutex_t lock;
> > + pthread_cond_t active;
> > + int holders; /* uatomic access only */
> > + int msgid;
> > + struct checker_context ctx;
> > + unsigned int nr_timeouts;
> > + bool checked_state;
> > + /* ALUA-specific cached data for prioritizer */
> > + int aas; /* Asymmetric Access State */
> > + int tpg; /* Target Port Group */
> > + time_t timestamp; /* When this data was collected */
> > + /* Data needed for RTPG in thread */
> > + struct path *pp; /* Path pointer - only valid during check
> > */
> > +};
>
> Instead of duplicating the threading implementation of the TUR checker
> here we should abstract it out and use the common framework by both
> checkers (and others, too, maybe). This is a thing I've wanted to do
> for a long time. Maybe it's time to start doing it now.
I considered that sort of re-work, but I was concerned that changes
adding to the existing checker might muddy the waters. There are certainly
changes where helper functions could be extracted for both checkers
to leverage them.
>
>
>
>
> > +
> > +int libcheck_init(struct checker *c)
> > +{
> > + struct alua_checker_context *ct;
> > + struct stat sb;
> > +
> > + ct = malloc(sizeof(struct alua_checker_context));
> > + if (!ct)
> > + return 1;
> > + memset(ct, 0, sizeof(struct alua_checker_context));
> > +
> > + ct->state = PATH_UNCHECKED;
> > + ct->fd = -1;
> > + ct->aas = -1;
> > + ct->tpg = -1;
> > + uatomic_set(&ct->holders, 1);
> > + pthread_cond_init_mono(&ct->active);
> > + pthread_mutex_init(&ct->lock, NULL);
> > + if (fstat(c->fd, &sb) == 0)
> > + ct->devt = sb.st_rdev;
> > + ct->ctx.cls = c->cls;
> > + c->context = ct;
> > +
> > + return 0;
> > +}
> > +
> > +static void cleanup_context(struct alua_checker_context *ct)
> > +{
> > + pthread_mutex_destroy(&ct->lock);
> > + pthread_cond_destroy(&ct->active);
> > + free(ct);
> > +}
> > +
> > +void libcheck_free(struct checker *c)
> > +{
> > + if (c->context) {
> > + struct alua_checker_context *ct = c->context;
> > + int holders;
> > + int running;
> > +
> > + running = uatomic_xchg(&ct->running, 0);
> > + if (running)
> > + pthread_cancel(ct->thread);
> > + ct->thread = 0;
> > + holders = uatomic_sub_return(&ct->holders, 1);
> > + if (!holders)
> > + cleanup_context(ct);
> > + c->context = NULL;
> > + }
> > + return;
> > +}
> > +
> > +/*
> > + * Map ALUA Asymmetric Access State to path state
> > + */
> > +static int alua_state_to_path_state(int aas, short *msgid)
> > +{
> > + switch (aas & 0x0f) {
> > + case AAS_OPTIMIZED:
> > + case AAS_NON_OPTIMIZED:
> > + *msgid = CHECKER_MSGID_UP;
> > + return PATH_UP;
> > + case AAS_STANDBY:
> > + *msgid = CHECKER_MSGID_GHOST;
> > + return PATH_GHOST;
> > + case AAS_TRANSITIONING:
> > + *msgid = MSG_ALUA_RTPG_TRANSITIONING;
> > + return PATH_PENDING;
> > + case AAS_UNAVAILABLE:
> > + case AAS_OFFLINE:
> > + default:
> > + *msgid = CHECKER_MSGID_DOWN;
> > + return PATH_DOWN;
> > + }
> > +}
> > +
> > +/*
> > + * Main ALUA check function - uses alua_rtpg.c library.
> > + * This is called either in sync mode or from the async thread.
> > + */
> > +static int
> > +alua_check(struct path *pp, int *aas_out, short *msgid)
> > +{
> > + int aas;
> > +
> > + if (pp->tpg_id == GROUP_ID_UNDEF) {
> > + *msgid = CHECKER_MSGID_DOWN;
> > + return PATH_DOWN;
> > + }
> > +
> > + aas = get_asymmetric_access_state(pp, pp->tpg_id);
> > + if (aas < 0) {
> > + if (aas == -RTPG_LUN_DISCONNECTED) {
> > + *msgid = CHECKER_MSGID_DISCONNECTED;
> > + return PATH_DISCONNECTED;
> > + }
> > + *msgid = CHECKER_MSGID_DOWN;
> > + return PATH_DOWN;
> > + }
> > +
> > + *aas_out = aas;
> > + return alua_state_to_path_state(aas, msgid);
> > +}
> > +
> > +#define alua_thread_cleanup_push(ct)
> > pthread_cleanup_push(cleanup_func, ct)
> > +#define alua_thread_cleanup_pop(ct) pthread_cleanup_pop(1)
> > +
> > +static void cleanup_func(void *data)
> > +{
> > + int holders;
> > + struct alua_checker_context *ct = data;
> > +
> > + holders = uatomic_sub_return(&ct->holders, 1);
> > + if (!holders)
> > + cleanup_context(ct);
> > +}
> > +
> > +void *libcheck_thread(struct checker_context *ctx)
> > +{
> > + struct alua_checker_context *ct =
> > + container_of(ctx, struct alua_checker_context, ctx);
> > + int state, running;
> > + short msgid;
> > + int aas;
> > +
> > + /* This thread can be canceled, so setup clean up */
> > + alua_thread_cleanup_push(ct);
> > +
> > + condlog(4, "%d:%d : alua checker starting up", major(ct-
> > >devt),
> > + minor(ct->devt));
> > +
> > + state = alua_check(ct->pp, &aas, &msgid);
> > + pthread_testcancel();
> > +
> > + /* ALUA checker done */
> > + pthread_mutex_lock(&ct->lock);
> > + ct->state = state;
> > + ct->msgid = msgid;
> > + ct->aas = aas;
> > + ct->tpg = ct->pp->tpg_id;
> > + ct->timestamp = time(NULL);
> > + /* Update path-level cache for prioritizer use */
> > + ct->pp->alua_state = aas;
> > + ct->pp->alua_timestamp = ct->timestamp;
> > + pthread_cond_signal(&ct->active);
> > + pthread_mutex_unlock(&ct->lock);
> > +
> > + condlog(4, "%d:%d : alua checker finished, state %s",
> > major(ct->devt),
> > + minor(ct->devt), checker_state_name(state));
> > +
> > + running = uatomic_xchg(&ct->running, 0);
> > + if (!running)
> > + pause();
> > +
> > + alua_thread_cleanup_pop(ct);
> > +
> > + return ((void *)0);
> > +}
> > +
> > +static void alua_set_async_timeout(struct checker *c)
> > +{
> > + struct alua_checker_context *ct = c->context;
> > + struct timespec now;
> > +
> > + get_monotonic_time(&now);
> > + ct->time = now.tv_sec + c->timeout;
> > +}
> > +
> > +static int alua_check_async_timeout(struct checker *c)
> > +{
> > + struct alua_checker_context *ct = c->context;
> > + struct timespec now;
> > +
> > + get_monotonic_time(&now);
> > + return (now.tv_sec > ct->time);
> > +}
> > +
> > +static int check_pending(struct checker *c)
> > +{
> > + struct alua_checker_context *ct = c->context;
> > + int alua_status = PATH_PENDING;
> > +
> > + pthread_mutex_lock(&ct->lock);
> > + if (ct->state != PATH_PENDING || ct->msgid !=
> > MSG_ALUA_RUNNING) {
> > + alua_status = ct->state;
> > + c->msgid = ct->msgid;
> > + }
> > + pthread_mutex_unlock(&ct->lock);
> > + if (alua_status == PATH_PENDING && c->msgid ==
> > MSG_ALUA_RUNNING) {
> > + condlog(4, "%d:%d : alua checker still running",
> > + major(ct->devt), minor(ct->devt));
> > + } else {
> > + int running = uatomic_xchg(&ct->running, 0);
> > + if (running)
> > + pthread_cancel(ct->thread);
> > + ct->thread = 0;
> > + }
> > +
> > + ct->checked_state = true;
> > + return alua_status;
> > +}
> > +
> > +bool libcheck_need_wait(struct checker *c)
> > +{
> > + struct alua_checker_context *ct = c->context;
> > + return (ct && ct->thread && uatomic_read(&ct->running) != 0
> > &&
> > + !ct->checked_state);
> > +}
> > +
> > +int libcheck_pending(struct checker *c)
> > +{
> > + struct alua_checker_context *ct = c->context;
> > +
> > + /* The if path checker isn't running, just return the
> > exiting value. */
> > + if (!ct || !ct->thread)
> > + return c->path_state;
> > +
> > + return check_pending(c);
> > +}
> > +
> > +int libcheck_check(struct checker *c)
> > +{
> > + struct alua_checker_context *ct = c->context;
> > + struct path *pp = container_of(c, struct path, checker);
> > + pthread_attr_t attr;
> > + int alua_status = PATH_PENDING, r;
> > + int aas;
> > +
> > + if (!ct)
> > + return PATH_UNCHECKED;
> > +
> > + if (checker_is_sync(c)) {
> > + alua_status = alua_check(pp, &aas, &c->msgid);
> > + ct->tpg = pp->tpg_id;
> > + ct->aas = aas;
> > + ct->timestamp = time(NULL);
> > + /* Update path-level cache for prioritizer use */
> > + pp->alua_state = aas;
> > + pp->alua_timestamp = ct->timestamp;
> > + return alua_status;
> > + }
> > +
> > + /* Async mode */
> > + if (ct->thread) {
> > + ct->checked_state = true;
> > + if (alua_check_async_timeout(c)) {
> > + int running = uatomic_xchg(&ct->running, 0);
> > + if (running) {
> > + pthread_cancel(ct->thread);
> > + condlog(3, "%d:%d : alua checker
> > timeout",
> > + major(ct->devt), minor(ct-
> > >devt));
> > + c->msgid = MSG_ALUA_TIMEOUT;
> > + alua_status = PATH_TIMEOUT;
> > + } else {
> > + pthread_mutex_lock(&ct->lock);
> > + alua_status = ct->state;
> > + c->msgid = ct->msgid;
> > + pthread_mutex_unlock(&ct->lock);
> > + }
> > + ct->thread = 0;
> > + } else if (uatomic_read(&ct->running) != 0) {
> > + /*
> > + * ct->running may be stale - the thread
> > sets
> > + * msgid/state under lock, then clears
> > running
> > + * after releasing it. Check msgid under
> > lock
> > + * to get the truth. Only MSG_ALUA_RUNNING
> > means
> > + * no result yet.
> > + */
> > + pthread_mutex_lock(&ct->lock);
> > + if (ct->msgid != MSG_ALUA_RUNNING) {
> > + alua_status = ct->state;
> > + c->msgid = ct->msgid;
> > + ct->thread = 0;
> > + }
> > + pthread_mutex_unlock(&ct->lock);
> > + } else {
> > + /* ALUA checker done */
> > + ct->thread = 0;
> > + pthread_mutex_lock(&ct->lock);
> > + alua_status = ct->state;
> > + c->msgid = ct->msgid;
> > + pthread_mutex_unlock(&ct->lock);
> > + }
> > + } else {
> > + if (uatomic_read(&ct->holders) > 1) {
> > + /* The thread has been cancelled but hasn't
> > quit. */
> > + if (ct->nr_timeouts <= MAX_NR_TIMEOUTS) {
> > + if (ct->nr_timeouts ==
> > MAX_NR_TIMEOUTS) {
> > + condlog(2, "%d:%d : waiting
> > for stalled alua thread to finish",
> > + major(ct->devt),
> > minor(ct->devt));
> > + }
> > + ct->nr_timeouts++;
> > + c->msgid = MSG_ALUA_TIMEOUT;
> > + return PATH_TIMEOUT;
> > + }
> > + ct->nr_timeouts++;
> > + /*
> > + * Start a new thread while the old one is
> > stalled.
> > + * We have to prevent it from interfering
> > with the new
> > + * thread. We create a new context and leave
> > the old
> > + * one with the stale thread, hoping it will
> > clean up
> > + * eventually.
> > + */
> > + condlog(3, "%d:%d : alua thread not
> > responding",
> > + major(ct->devt), minor(ct->devt));
> > +
> > + if (libcheck_init(c) != 0) {
> > + c->msgid = CHECKER_MSGID_DOWN;
> > + return PATH_UNCHECKED;
> > + }
> > + ((struct alua_checker_context *)c->context)-
> > >nr_timeouts = ct->nr_timeouts;
> > +
> > + if (!uatomic_sub_return(&ct->holders, 1)) {
> > + /* It did terminate, eventually */
> > + cleanup_context(ct);
> > + ((struct alua_checker_context *)c-
> > >context)->nr_timeouts = 0;
> > + }
> > +
> > + ct = c->context;
> > + } else
> > + ct->nr_timeouts = 0;
> > +
> > + /* Start new ALUA checker */
> > + pthread_mutex_lock(&ct->lock);
> > + alua_status = ct->state = PATH_PENDING;
> > + c->msgid = ct->msgid = MSG_ALUA_RUNNING;
> > + pthread_mutex_unlock(&ct->lock);
> > + ct->fd = c->fd;
> > + ct->timeout = c->timeout;
> > + ct->pp = pp;
> > + ct->checked_state = false;
> > + uatomic_add(&ct->holders, 1);
> > + uatomic_set(&ct->running, 1);
> > + alua_set_async_timeout(c);
> > + setup_thread_attr(&attr, 32 * 1024, 1);
> > + r = start_checker_thread(&ct->thread, &attr, &ct-
> > >ctx);
> > + pthread_attr_destroy(&attr);
> > + if (r) {
> > + uatomic_sub(&ct->holders, 1);
> > + uatomic_set(&ct->running, 0);
> > + ct->thread = 0;
> > + condlog(3, "%d:%d : failed to start alua
> > thread, using"
> > + " sync mode", major(ct->devt),
> > minor(ct->devt));
> > + alua_status = alua_check(pp, &aas, &c-
> > >msgid);
> > + ct->tpg = pp->tpg_id;
> > + ct->aas = aas;
> > + ct->timestamp = time(NULL);
> > + /* Update path-level cache for prioritizer
> > use */
> > + pp->alua_state = aas;
> > + pp->alua_timestamp = ct->timestamp;
> > + return alua_status;
> > + }
> > + }
> > +
> > + return alua_status;
> > +}
> > +
> > diff --git a/libmultipath/checkers/alua.h
> > b/libmultipath/checkers/alua.h
> > new file mode 100644
> > index 00000000..b53cef66
> > --- /dev/null
> > +++ b/libmultipath/checkers/alua.h
> > @@ -0,0 +1,15 @@
> > +/*
> > + * ALUA Path Checker Header
> > + *
> > + * This file is released under the GPL.
>
> Please add a proper license header.
>
> > + */
> > +#ifndef _ALUA_CHECKER_H
> > +#define _ALUA_CHECKER_H
> > +
> > +/* ALUA checker caches state in struct path for prioritizer use:
> > + * pp->alua_state - cached AAS value (-1 if not cached)
> > + * pp->alua_timestamp - when the state was last updated
> > + */
> > +
> > +#endif /* _ALUA_CHECKER_H */
> > +
>
> Only comments in this file?
I will look at this. It was likely leftover.
>
>
> > diff --git a/libmultipath/prio.c b/libmultipath/prio.c
> > index 24f825bd..2d59d04e 100644
> > --- a/libmultipath/prio.c
> > +++ b/libmultipath/prio.c
> > @@ -29,6 +29,7 @@ int init_prio(void)
> > #ifdef LOAD_ALL_SHARED_LIBS
> > static const char *const all_prios[] = {
> > PRIO_ALUA,
> > + PRIO_ALUA_CACHED,
> > PRIO_CONST,
> > PRIO_DATACORE,
> > PRIO_EMC,
> > diff --git a/libmultipath/prio.h b/libmultipath/prio.h
> > index 119b75f2..b54ece0d 100644
> > --- a/libmultipath/prio.h
> > +++ b/libmultipath/prio.h
> > @@ -17,6 +17,7 @@ struct path;
> > * Known prioritizers for use in hwtable.c
> > */
> > #define PRIO_ALUA "alua"
> > +#define PRIO_ALUA_CACHED "alua_cached"
> > #define PRIO_CONST "const"
> > #define PRIO_DATACORE "datacore"
> > #define PRIO_EMC "emc"
> > diff --git a/libmultipath/prioritizers/Makefile
> > b/libmultipath/prioritizers/Makefile
> > index ff2524c2..201c52c0 100644
> > --- a/libmultipath/prioritizers/Makefile
> > +++ b/libmultipath/prioritizers/Makefile
> > @@ -13,6 +13,7 @@ LIBDEPS = -lmultipath -lmpathutil -lm -lpthread -
> > lrt
> > # If you add or remove a prioritizer also update
> > multipath/multipath.conf.5
> > LIBS = \
> > libprioalua.so \
> > + libprioalua_cached.so \
> > libprioconst.so \
> > libpriodatacore.so \
> > libprioemc.so \
>
> I think the below should go into a separate patch.
Can they be separated? The prioritizer's logic relies
on the checker populating values for its decision.
>
> > diff --git a/libmultipath/prioritizers/alua_cached.c
> > b/libmultipath/prioritizers/alua_cached.c
> > new file mode 100644
> > index 00000000..2ba5b2e7
> > --- /dev/null
> > +++ b/libmultipath/prioritizers/alua_cached.c
> > @@ -0,0 +1,224 @@
> > +/*
> > + * (C) Copyright IBM Corp. 2004, 2005 All Rights Reserved.
> > + * Modified 2024 to support cached ALUA state from checker
> > + *
> > + * ALUA prioritizer that can use cached state from alua checker
> > + * to avoid duplicate RTPG commands.
> > + *
> > + * This file is released under the GPL.
>
> Please add a proper license header.
>
> > + */
> > +#include <stdio.h>
> > +#include <string.h>
> > +
> > +#include <time.h>
> > +
> > +#include "debug.h"
> > +#include "prio.h"
> > +#include "structs.h"
> > +#include "checkers.h"
> > +#include "discovery.h"
> > +
> > +#include "alua.h"
> > +
> > +#define ALUA_PRIO_NOT_SUPPORTED 1
> > +#define ALUA_PRIO_RTPG_FAILED 2
> > +#define ALUA_PRIO_GETAAS_FAILED 3
> > +#define ALUA_PRIO_TPGS_FAILED 4
> > +#define ALUA_PRIO_NO_INFORMATION 5
> > +#define ALUA_PRIO_CACHED_STALE 6
> > +
> > +/* Maximum age of cached ALUA state in seconds */
> > +#define ALUA_CACHE_MAX_AGE_SECS 5
> > +
> > +static const char * aas_string[] = {
> > + [AAS_OPTIMIZED] = "active/optimized",
> > + [AAS_NON_OPTIMIZED] = "active/non-optimized",
> > + [AAS_STANDBY] = "standby",
> > + [AAS_UNAVAILABLE] = "unavailable",
> > + [AAS_LBA_DEPENDENT] = "logical block dependent",
> > + [AAS_RESERVED] = "ARRAY BUG: invalid TPGs state!",
> > + [AAS_OFFLINE] = "offline",
> > + [AAS_TRANSITIONING] = "transitioning between states",
> > +};
>
> We shouldn't duplicate the definition of these constants.
This was somewhat intentional though not efficient. I will
do better with leveraging in the next patch.
>
> > +
> > +static const char *aas_print_string(int rc)
> > +{
> > + rc &= 0x7f;
> > +
> > + if (rc & 0x70)
> > + return aas_string[AAS_RESERVED];
> > + rc &= 0x0f;
> > + if (rc > AAS_RESERVED && rc < AAS_OFFLINE)
> > + return aas_string[AAS_RESERVED];
> > + else
> > + return aas_string[rc];
> > +}
> > +
> > +/*
> > + * Try to get ALUA info with optimization:
> > + * 1. If using alua checker: use its cached state (it just polled!)
> > + * 2. Try sysfs (kernel maintains this, no I/O needed)
> > + * 3. Fall back to issuing RTPG directly
> > + *
> > + * Rationale: The checker is our active poller. If we're using alua
> > + * and its cache is fresh, we trust that data first. If the cache
> > misses
> > + * (stale or empty), we try sysfs before issuing RTPG. This provides
> > a
> > + * three-tier optimization: checker cache → sysfs → RTPG.
> > + */
> > +/* Maximum length of sysfs access_state string */
> > +#define ALUA_ACCESS_STATE_SIZE 32
> > +
> > +int
> > +get_alua_info_cached(struct path * pp, bool *used_cache)
> > +{
> > + int rc;
> > + int tpg;
> > + bool diff_tpg;
> > + char buff[ALUA_ACCESS_STATE_SIZE];
> > +
> > + *used_cache = false;
> > +
> > + /* First: if using alua checker, use its fresh data from
> > path structure */
> > + if (checker_selected(&pp->checker)) {
> > + const char *chk_name = checker_name(&pp->checker);
> > +
> > + if (chk_name && strcmp(chk_name, "alua") == 0 &&
> > + pp->alua_state >= 0) {
> > + time_t now = time(NULL);
> > + time_t age = now - pp->alua_timestamp;
> > +
> > + if (pp->alua_timestamp > 0 && age <=
> > ALUA_CACHE_MAX_AGE_SECS) {
> > + condlog(4, "%s: using cached ALUA
> > state from checker (fresh poll)", pp->dev);
> > + *used_cache = true;
> > + return pp->alua_state;
> > + }
> > + condlog(4, "%s: cached ALUA state not
> > available or stale, trying sysfs",
> > + pp->dev);
> > + /* Cache miss - try sysfs before issuing
> > RTPG */
> > + }
> > + }
> > +
> > + /* Second: try sysfs (if cache missed or not using alua
> > checker) */
> > + rc = sysfs_get_asymmetric_access_state(pp, buff,
> > sizeof(buff));
> > + if (rc >= 0) {
> > + /* Parse sysfs state string to ALUA state value */
> > + int aas = -1;
> > + int priopath = rc; /* rc contains preferred bit */
> > +
> > + if (!strncmp(buff, "active/optimized", 16))
> > + aas = AAS_OPTIMIZED;
> > + else if (!strncmp(buff, "active/non-optimized", 20))
> > + aas = AAS_NON_OPTIMIZED;
> > + else if (!strncmp(buff, "lba-dependent", 13))
> > + aas = AAS_LBA_DEPENDENT;
> > + else if (!strncmp(buff, "standby", 7))
> > + aas = AAS_STANDBY;
> > +
> > + if (aas >= 0) {
> > + condlog(4, "%s: using sysfs ALUA state (no
> > I/O)", pp->dev);
> > + *used_cache = true;
> > + return (priopath ? 0x80 : 0) | aas;
> > + }
> > + }
> > +
> > + /* Last resort: issue RTPG directly */
> > + tpg = get_target_port_group(pp);
> > + if (tpg < 0) {
> > + rc = get_target_port_group_support(pp);
> > + if (rc < 0)
> > + return -ALUA_PRIO_TPGS_FAILED;
> > + if (rc == TPGS_NONE)
> > + return -ALUA_PRIO_NOT_SUPPORTED;
> > + return -ALUA_PRIO_RTPG_FAILED;
> > + }
> > + diff_tpg = (pp->tpg_id != GROUP_ID_UNDEF && pp->tpg_id !=
> > tpg);
> > + pp->tpg_id = tpg;
> > + condlog((diff_tpg) ? 2 : 4, "%s: reported target port group
> > is %i",
> > + pp->dev, tpg);
> > + rc = get_asymmetric_access_state(pp, tpg);
> > + if (rc < 0) {
> > + condlog(2, "%s: get_asymmetric_access_state returned
> > %d",
> > + __func__, rc);
> > + return -ALUA_PRIO_GETAAS_FAILED;
> > + }
> > +
> > + condlog(3, "%s: aas = %02x [%s]%s", pp->dev, rc,
> > aas_print_string(rc),
> > + (rc & 0x80) ? " [preferred]" : "");
> > + return rc;
> > +}
> > +
> > +int get_exclusive_pref_arg(char *args)
> > +{
> > + char *ptr;
> > +
> > + if (args == NULL)
> > + return 0;
> > + ptr = strstr(args, "exclusive_pref_bit");
> > + if (!ptr)
> > + return 0;
> > + if (ptr[18] != '\0' && ptr[18] != ' ' && ptr[18] != '\t')
> > + return 0;
> > + if (ptr != args && ptr[-1] != ' ' && ptr[-1] != '\t')
> > + return 0;
> > + return 1;
> > +}
> > +
> > +int getprio (struct path * pp, char * args)
> > +{
> > + int rc;
> > + int aas;
> > + int priopath;
> > + int exclusive_pref;
> > + bool used_cache = false;
> > +
> > + if (pp->fd < 0)
> > + return -ALUA_PRIO_NO_INFORMATION;
> > +
> > + exclusive_pref = get_exclusive_pref_arg(args);
> > + rc = get_alua_info_cached(pp, &used_cache);
> > +
> > + if (used_cache) {
> > + condlog(4, "%s: priority calculated from cached ALUA
> > state (no RTPG issued)",
> > + pp->dev);
> > + }
> > +
> > + if (rc >= 0) {
> > + aas = (rc & 0x0f);
> > + priopath = (rc & 0x80);
> > + switch(aas) {
> > + case AAS_OPTIMIZED:
> > + rc = 50;
> > + break;
> > + case AAS_NON_OPTIMIZED:
> > + rc = 10;
> > + break;
> > + case AAS_LBA_DEPENDENT:
> > + rc = 5;
> > + break;
> > + case AAS_STANDBY:
> > + rc = 1;
> > + break;
> > + default:
> > + rc = 0;
> > + }
> > + if (priopath && (aas != AAS_OPTIMIZED ||
> > exclusive_pref))
> > + rc += 80;
> > + } else {
> > + switch(-rc) {
> > + case ALUA_PRIO_NOT_SUPPORTED:
> > + condlog(0, "%s: alua not supported", pp-
> > >dev);
> > + break;
> > + case ALUA_PRIO_RTPG_FAILED:
> > + condlog(0, "%s: couldn't get target port
> > group", pp->dev);
> > + break;
> > + case ALUA_PRIO_GETAAS_FAILED:
> > + condlog(0, "%s: couldn't get asymmetric
> > access state", pp->dev);
> > + break;
> > + case ALUA_PRIO_TPGS_FAILED:
> > + condlog(3, "%s: couldn't get supported alua
> > states", pp->dev);
> > + break;
> > + }
> > + }
> > + return rc;
> > +}
> > +
> > diff --git a/libmultipath/prioritizers/alua_rtpg.c
> > b/libmultipath/prioritizers/alua_rtpg.c
> > index 053cccb7..5da06b50 100644
> > --- a/libmultipath/prioritizers/alua_rtpg.c
> > +++ b/libmultipath/prioritizers/alua_rtpg.c
> > @@ -75,6 +75,7 @@ enum scsi_disposition {
> > SCSI_GOOD = 0,
> > SCSI_ERROR,
> > SCSI_RETRY,
> > + SCSI_DISCONNECTED,
> > };
> >
> > static int
> > @@ -124,6 +125,12 @@ scsi_error(struct sg_io_hdr *hdr, int opcode)
> > PRINT_DEBUG("alua: SCSI error for command %02x: status %02x,
> > sense %02x/%02x/%02x",
> > opcode, hdr->status, sense_key, asc, ascq);
> >
> > + /* Check for disconnected LUN (unmapped at target) */
> > + if (sense_key == 0x5 && asc == 0x25 && ascq == 0x00) {
> > + /* ILLEGAL REQUEST / LOGICAL UNIT NOT SUPPORTED */
> > + return SCSI_DISCONNECTED;
> > + }
> > +
> > if (sense_key == UNIT_ATTENTION || sense_key == NOT_READY)
> > return SCSI_RETRY;
> > else
> > @@ -325,7 +332,10 @@ retry:
> > }
> >
> > rc = scsi_error(&hdr, OPERATION_CODE_RTPG);
> > - if (rc == SCSI_ERROR) {
> > + if (rc == SCSI_DISCONNECTED) {
> > + PRINT_DEBUG("do_rtpg: LUN disconnected (unmapped at
> > target)!");
> > + return -RTPG_LUN_DISCONNECTED;
> > + } else if (rc == SCSI_ERROR) {
> > PRINT_DEBUG("do_rtpg: SCSI error!");
> > return -RTPG_RTPG_FAILED;
> > } else if (rc == SCSI_RETRY) {
> > diff --git a/libmultipath/prioritizers/alua_rtpg.h
> > b/libmultipath/prioritizers/alua_rtpg.h
> > index ff6ec0ef..030ca58d 100644
> > --- a/libmultipath/prioritizers/alua_rtpg.h
> > +++ b/libmultipath/prioritizers/alua_rtpg.h
> > @@ -21,6 +21,7 @@
> > #define RTPG_NO_TPG_IDENTIFIER 2
> > #define RTPG_RTPG_FAILED 3
> > #define RTPG_TPG_NOT_FOUND 4
> > +#define RTPG_LUN_DISCONNECTED 5
> >
> > int get_target_port_group_support(const struct path *pp);
> > int get_target_port_group(const struct path *pp);
> > diff --git a/libmultipath/prioritizers/sysfs.c
> > b/libmultipath/prioritizers/sysfs.c
> > index 5e8adc05..ab058ea9 100644
> > --- a/libmultipath/prioritizers/sysfs.c
> > +++ b/libmultipath/prioritizers/sysfs.c
> > @@ -10,6 +10,9 @@
> > #include "discovery.h"
> > #include "prio.h"
> >
> > +/* Maximum length of sysfs access_state string */
> > +#define ALUA_ACCESS_STATE_SIZE 32
> > +
> > static const struct {
> > unsigned char value;
> > char *name;
> > @@ -39,11 +42,11 @@ int get_exclusive_pref_arg(char *args)
> > int getprio (struct path * pp, char *args)
> > {
> > int prio = 0, rc, i;
> > - char buff[512];
> > + char buff[ALUA_ACCESS_STATE_SIZE];
> > int exclusive_pref;
> >
> > exclusive_pref = get_exclusive_pref_arg(args);
> > - rc = sysfs_get_asymmetric_access_state(pp, buff, 512);
> > + rc = sysfs_get_asymmetric_access_state(pp, buff,
> > sizeof(buff));
> > if (rc < 0)
> > return PRIO_UNDEF;
> > prio = 0;
> > diff --git a/libmultipath/propsel.c b/libmultipath/propsel.c
> > index 56895c4b..e421abba 100644
> > --- a/libmultipath/propsel.c
> > +++ b/libmultipath/propsel.c
> > @@ -262,6 +262,7 @@ verify_alua_prio(struct multipath *mp)
> > if (!prio_selected(&pp->prio))
> > continue;
> > if (strncmp(name, PRIO_ALUA, PRIO_NAME_LEN) &&
> > + strncmp(name, PRIO_ALUA_CACHED, PRIO_NAME_LEN)
> > &&
> > strncmp(name, PRIO_SYSFS, PRIO_NAME_LEN))
> > return false;
> > assume_alua = true;
> > @@ -690,7 +691,12 @@ int select_checker(struct config *conf, struct
> > path *pp)
> > }
> > (void)path_get_tpgs(pp);
> > if (pp->tpgs != TPGS_NONE && pp->tpgs != TPGS_UNDEF)
> > {
> > - ckr_name = TUR;
> > + /*
> > + * For ALUA devices (any TPGS support), use
> > alua
> > + * checker which combines path checking with
> > ALUA state
> > + * caching for optimal performance.
> > + */
> > + ckr_name = ALUA;
> > goto out;
> > }
> > }
> > @@ -767,8 +773,15 @@ void detect_prio(struct path *pp)
> > if ((tpgs == TPGS_EXPLICIT || !check_rdac(pp)) &&
> > sysfs_get_asymmetric_access_state(pp, buff, 512)
> > >= 0)
> > default_prio = PRIO_SYSFS;
> > - else
> > + else {
> > default_prio = PRIO_ALUA;
> > + /* If using alua checker, use cached
> > prioritizer */
> > + if (checker_selected(&pp->checker)) {
> > + const char *chk_name =
> > checker_name(&pp->checker);
> > + if (chk_name && strcmp(chk_name,
> > ALUA) == 0)
> > + default_prio =
> > PRIO_ALUA_CACHED;
> > + }
> > + }
> > break;
> > default:
> > return;
> > @@ -831,7 +844,8 @@ out:
> > /*
> > * fetch tpgs mode for alua, if its not already obtained
> > */
> > - if (!strncmp(prio_name(p), PRIO_ALUA, PRIO_NAME_LEN)) {
> > + if (!strncmp(prio_name(p), PRIO_ALUA, PRIO_NAME_LEN) ||
> > + !strncmp(prio_name(p), PRIO_ALUA_CACHED, PRIO_NAME_LEN))
> > {
> > int tpgs = path_get_tpgs(pp);
> >
> > if (tpgs == TPGS_NONE) {
> > diff --git a/libmultipath/structs.c b/libmultipath/structs.c
> > index f4413127..6483a568 100644
> > --- a/libmultipath/structs.c
> > +++ b/libmultipath/structs.c
> > @@ -126,6 +126,8 @@ alloc_path (void)
> > pp->fd = -1;
> > pp->tpgs = TPGS_UNDEF;
> > pp->tpg_id = GROUP_ID_UNDEF;
> > + pp->alua_state = -1;
> > + pp->alua_timestamp = 0;
> > pp->priority = PRIO_UNDEF;
> > pp->checkint = CHECKINT_UNDEF;
> > checker_clear(&pp->checker);
> > diff --git a/libmultipath/structs.h b/libmultipath/structs.h
> > index 9adedde2..c4bddee9 100644
> > --- a/libmultipath/structs.h
> > +++ b/libmultipath/structs.h
> > @@ -444,6 +444,9 @@ struct path {
> > vector hwe;
> > struct gen_path generic_path;
> > int tpg_id;
> > + /* Cached ALUA state from checker for prioritizer use */
> > + int alua_state; /* AAS value, -1 if not
> > cached */
> > + time_t alua_timestamp; /* When alua_state was last updated
> > */
> > enum ioctl_info_states ioctl_info;
> > };
> >
I will put together a 2nd version which addresses these comments.
Thanks,
Brian
--
Brian Bunker
PURE Storage, Inc.
[email protected]