On Thu, Mar 12, 2026 at 9:32 AM Martin Wilck <[email protected]> wrote:
>
> Hi Brian,
>
> On Wed, 2026-03-11 at 17:16 -0700, Brian Bunker wrote:
> > Add an 'alua' path checker that uses RTPG to check path state, and an
> > 'alua_cached' prioritizer that uses the checker's cached ALUA state.
> >
> > The checker maps ALUA Asymmetric Access States to path states:
> >   - AAS_OPTIMIZED, AAS_NON_OPTIMIZED -> PATH_UP
> >   - AAS_STANDBY -> PATH_GHOST
> >   - AAS_TRANSITIONING -> PATH_PENDING
> >   - AAS_UNAVAILABLE, AAS_OFFLINE -> PATH_DOWN
> >
> > The alua_cached prioritizer first checks for cached state from the
> > alua
> > checker, then falls back to sysfs, then to issuing RTPG directly.
> > This
> > maintains backward compatibility with other checker configurations.
> >
> > When detect_checker is enabled, the alua checker is auto-selected for
> > devices with TPGS support. When the alua checker is in use,
> > detect_prio
> > auto-selects the alua_cached prioritizer.
> >
> > Signed-off-by: Brian Bunker <[email protected]>
>
> Thanks! Just a few remarks for now.
>
> Martin
>
> > ---
> >  libmultipath/Makefile                   |   5 +
> >  libmultipath/checkers.c                 |   1 +
> >  libmultipath/checkers.h                 |   1 +
> >  libmultipath/checkers/Makefile          |   3 +-
> >  libmultipath/checkers/alua.c            | 426
> > ++++++++++++++++++++++++
> >  libmultipath/checkers/alua.h            |  15 +
> >  libmultipath/prio.c                     |   1 +
> >  libmultipath/prio.h                     |   1 +
> >  libmultipath/prioritizers/Makefile      |   1 +
> >  libmultipath/prioritizers/alua_cached.c | 224 +++++++++++++
> >  libmultipath/prioritizers/alua_rtpg.c   |  12 +-
> >  libmultipath/prioritizers/alua_rtpg.h   |   1 +
> >  libmultipath/prioritizers/sysfs.c       |   7 +-
> >  libmultipath/propsel.c                  |  20 +-
> >  libmultipath/structs.c                  |   2 +
> >  libmultipath/structs.h                  |   3 +
> >  16 files changed, 716 insertions(+), 7 deletions(-)
> >  create mode 100644 libmultipath/checkers/alua.c
> >  create mode 100644 libmultipath/checkers/alua.h
> >  create mode 100644 libmultipath/prioritizers/alua_cached.c
> >
> > diff --git a/libmultipath/Makefile b/libmultipath/Makefile
> > index 85767ab4..379f4d02 100644
> > --- a/libmultipath/Makefile
> > +++ b/libmultipath/Makefile
> > @@ -38,6 +38,11 @@ nvme-lib.o: nvme-lib.c nvme-ioctl.c nvme-ioctl.h
> >  dict.o:      dict.c
> >       $(Q)$(CC) $(CPPFLAGS) $(CFLAGS) -Wno-unused-parameter -c -o
> > $@ $<
> >
> > +# checkers/alua.o needs to find headers in parent directory
> > +checkers/alua.o: checkers/alua.c
> > +     @echo building $@ because of $?
> > +     $(Q)$(CC) $(CPPFLAGS) $(CFLAGS) -I. -c -o $@ $<
> > +
> >  make_static = $(shell sed '/^static/!s/^\([a-z]\{1,\} \)/static \1/'
> > <$1 >$2)
> >
> >  nvme-ioctl.c: nvme/nvme-ioctl.c
> > diff --git a/libmultipath/checkers.c b/libmultipath/checkers.c
> > index bb6ad1ee..21aa52d4 100644
> > --- a/libmultipath/checkers.c
> > +++ b/libmultipath/checkers.c
> > @@ -461,6 +461,7 @@ int init_checkers(void)
> >               EMC_CLARIION,
> >               READSECTOR0,
> >               CCISS_TUR,
> > +             ALUA_RTPG,
> >       };
> >       unsigned int i;
> >
> > diff --git a/libmultipath/checkers.h b/libmultipath/checkers.h
> > index a969e7d1..2905f3fc 100644
> > --- a/libmultipath/checkers.h
> > +++ b/libmultipath/checkers.h
> > @@ -98,6 +98,7 @@ enum path_check_state {
> >  #define EMC_CLARIION "emc_clariion"
> >  #define READSECTOR0  "readsector0"
> >  #define CCISS_TUR    "cciss_tur"
> > +#define ALUA         "alua"
> >  #define NONE         "none"
> >  #define INVALID      "invalid"
> >
> > diff --git a/libmultipath/checkers/Makefile
> > b/libmultipath/checkers/Makefile
> > index 6f7cfb95..dfdd13a1 100644
> > --- a/libmultipath/checkers/Makefile
> > +++ b/libmultipath/checkers/Makefile
> > @@ -17,7 +17,8 @@ LIBS= \
> >       libcheckdirectio.so \
> >       libcheckemc_clariion.so \
> >       libcheckhp_sw.so \
> > -     libcheckrdac.so
> > +     libcheckrdac.so \
> > +     libcheckalua.so
> >
> >  all: $(LIBS)
> >
> > diff --git a/libmultipath/checkers/alua.c
> > b/libmultipath/checkers/alua.c
> > new file mode 100644
> > index 00000000..cb3d7000
> > --- /dev/null
> > +++ b/libmultipath/checkers/alua.c
> > @@ -0,0 +1,426 @@
> > +/*
> > + * ALUA Path Checker
> > + *
> > + * Copyright (c) 2024
> > + * This file is released under the GPL.
>
> Please add a proper license header.
I will do that.
>
> > + */
> > +#include <stdio.h>
> > +#include <stdlib.h>
> > +#include <string.h>
> > +#include <sys/types.h>
> > +#include <sys/stat.h>
> > +#include <sys/sysmacros.h>
> > +#include <unistd.h>
> > +#include <fcntl.h>
> > +#include <errno.h>
> > +#include <pthread.h>
> > +#include <urcu.h>
> > +#include <urcu/uatomic.h>
> > +
> > +#include "checkers.h"
> > +#include "debug.h"
> > +#include "structs.h"
> > +#include "prio.h"
> > +#include "util.h"
> > +#include "time-util.h"
> > +#include "../prioritizers/alua.h"
> > +#include "../prioritizers/alua_rtpg.h"
> > +#include "alua.h"
> > +
> > +#define MAX_NR_TIMEOUTS 1
> > +
> > +enum {
> > +     MSG_ALUA_RTPG_NOT_SUPPORTED = CHECKER_FIRST_MSGID,
> > +     MSG_ALUA_RTPG_TRANSITIONING,
> > +     MSG_ALUA_RUNNING,
> > +     MSG_ALUA_TIMEOUT,
> > +};
> > +
> > +#define IDX_(x) (MSG_ ## x - CHECKER_FIRST_MSGID)
> > +const char *libcheck_msgtable[] = {
> > +     [IDX_(ALUA_RTPG_NOT_SUPPORTED)] = " ALUA not supported",
> > +     [IDX_(ALUA_RTPG_TRANSITIONING)] = " path is transitioning",
> > +     [IDX_(ALUA_RUNNING)] = " still running",
> > +     [IDX_(ALUA_TIMEOUT)] = " timed out",
> > +     NULL,
> > +};
> > +
> > +struct alua_checker_context {
> > +     dev_t devt;
> > +     int state;
> > +     int running; /* uatomic access only */
> > +     int fd;
> > +     unsigned int timeout;
> > +     time_t time;
> > +     pthread_t thread;
> > +     pthread_mutex_t lock;
> > +     pthread_cond_t active;
> > +     int holders; /* uatomic access only */
> > +     int msgid;
> > +     struct checker_context ctx;
> > +     unsigned int nr_timeouts;
> > +     bool checked_state;
> > +     /* ALUA-specific cached data for prioritizer */
> > +     int aas;           /* Asymmetric Access State */
> > +     int tpg;           /* Target Port Group */
> > +     time_t timestamp;  /* When this data was collected */
> > +     /* Data needed for RTPG in thread */
> > +     struct path *pp;   /* Path pointer - only valid during check
> > */
> > +};
>
> Instead of duplicating the threading implementation of the TUR checker
> here we should abstract it out and use the common framework by both
> checkers (and others, too, maybe). This is a thing I've wanted to do
> for a long time. Maybe it's time to start doing it now.
I considered that sort of re-work, but I was concerned that changes
adding to the existing checker might muddy the waters. There are certainly
changes where helper functions could be extracted for both checkers
to leverage them.
>
>
>
>
> > +
> > +int libcheck_init(struct checker *c)
> > +{
> > +     struct alua_checker_context *ct;
> > +     struct stat sb;
> > +
> > +     ct = malloc(sizeof(struct alua_checker_context));
> > +     if (!ct)
> > +             return 1;
> > +     memset(ct, 0, sizeof(struct alua_checker_context));
> > +
> > +     ct->state = PATH_UNCHECKED;
> > +     ct->fd = -1;
> > +     ct->aas = -1;
> > +     ct->tpg = -1;
> > +     uatomic_set(&ct->holders, 1);
> > +     pthread_cond_init_mono(&ct->active);
> > +     pthread_mutex_init(&ct->lock, NULL);
> > +     if (fstat(c->fd, &sb) == 0)
> > +             ct->devt = sb.st_rdev;
> > +     ct->ctx.cls = c->cls;
> > +     c->context = ct;
> > +
> > +     return 0;
> > +}
> > +
> > +static void cleanup_context(struct alua_checker_context *ct)
> > +{
> > +     pthread_mutex_destroy(&ct->lock);
> > +     pthread_cond_destroy(&ct->active);
> > +     free(ct);
> > +}
> > +
> > +void libcheck_free(struct checker *c)
> > +{
> > +     if (c->context) {
> > +             struct alua_checker_context *ct = c->context;
> > +             int holders;
> > +             int running;
> > +
> > +             running = uatomic_xchg(&ct->running, 0);
> > +             if (running)
> > +                     pthread_cancel(ct->thread);
> > +             ct->thread = 0;
> > +             holders = uatomic_sub_return(&ct->holders, 1);
> > +             if (!holders)
> > +                     cleanup_context(ct);
> > +             c->context = NULL;
> > +     }
> > +     return;
> > +}
> > +
> > +/*
> > + * Map ALUA Asymmetric Access State to path state
> > + */
> > +static int alua_state_to_path_state(int aas, short *msgid)
> > +{
> > +     switch (aas & 0x0f) {
> > +     case AAS_OPTIMIZED:
> > +     case AAS_NON_OPTIMIZED:
> > +             *msgid = CHECKER_MSGID_UP;
> > +             return PATH_UP;
> > +     case AAS_STANDBY:
> > +             *msgid = CHECKER_MSGID_GHOST;
> > +             return PATH_GHOST;
> > +     case AAS_TRANSITIONING:
> > +             *msgid = MSG_ALUA_RTPG_TRANSITIONING;
> > +             return PATH_PENDING;
> > +     case AAS_UNAVAILABLE:
> > +     case AAS_OFFLINE:
> > +     default:
> > +             *msgid = CHECKER_MSGID_DOWN;
> > +             return PATH_DOWN;
> > +     }
> > +}
> > +
> > +/*
> > + * Main ALUA check function - uses alua_rtpg.c library.
> > + * This is called either in sync mode or from the async thread.
> > + */
> > +static int
> > +alua_check(struct path *pp, int *aas_out, short *msgid)
> > +{
> > +     int aas;
> > +
> > +     if (pp->tpg_id == GROUP_ID_UNDEF) {
> > +             *msgid = CHECKER_MSGID_DOWN;
> > +             return PATH_DOWN;
> > +     }
> > +
> > +     aas = get_asymmetric_access_state(pp, pp->tpg_id);
> > +     if (aas < 0) {
> > +             if (aas == -RTPG_LUN_DISCONNECTED) {
> > +                     *msgid = CHECKER_MSGID_DISCONNECTED;
> > +                     return PATH_DISCONNECTED;
> > +             }
> > +             *msgid = CHECKER_MSGID_DOWN;
> > +             return PATH_DOWN;
> > +     }
> > +
> > +     *aas_out = aas;
> > +     return alua_state_to_path_state(aas, msgid);
> > +}
> > +
> > +#define alua_thread_cleanup_push(ct)
> > pthread_cleanup_push(cleanup_func, ct)
> > +#define alua_thread_cleanup_pop(ct) pthread_cleanup_pop(1)
> > +
> > +static void cleanup_func(void *data)
> > +{
> > +     int holders;
> > +     struct alua_checker_context *ct = data;
> > +
> > +     holders = uatomic_sub_return(&ct->holders, 1);
> > +     if (!holders)
> > +             cleanup_context(ct);
> > +}
> > +
> > +void *libcheck_thread(struct checker_context *ctx)
> > +{
> > +     struct alua_checker_context *ct =
> > +             container_of(ctx, struct alua_checker_context, ctx);
> > +     int state, running;
> > +     short msgid;
> > +     int aas;
> > +
> > +     /* This thread can be canceled, so setup clean up */
> > +     alua_thread_cleanup_push(ct);
> > +
> > +     condlog(4, "%d:%d : alua checker starting up", major(ct-
> > >devt),
> > +             minor(ct->devt));
> > +
> > +     state = alua_check(ct->pp, &aas, &msgid);
> > +     pthread_testcancel();
> > +
> > +     /* ALUA checker done */
> > +     pthread_mutex_lock(&ct->lock);
> > +     ct->state = state;
> > +     ct->msgid = msgid;
> > +     ct->aas = aas;
> > +     ct->tpg = ct->pp->tpg_id;
> > +     ct->timestamp = time(NULL);
> > +     /* Update path-level cache for prioritizer use */
> > +     ct->pp->alua_state = aas;
> > +     ct->pp->alua_timestamp = ct->timestamp;
> > +     pthread_cond_signal(&ct->active);
> > +     pthread_mutex_unlock(&ct->lock);
> > +
> > +     condlog(4, "%d:%d : alua checker finished, state %s",
> > major(ct->devt),
> > +             minor(ct->devt), checker_state_name(state));
> > +
> > +     running = uatomic_xchg(&ct->running, 0);
> > +     if (!running)
> > +             pause();
> > +
> > +     alua_thread_cleanup_pop(ct);
> > +
> > +     return ((void *)0);
> > +}
> > +
> > +static void alua_set_async_timeout(struct checker *c)
> > +{
> > +     struct alua_checker_context *ct = c->context;
> > +     struct timespec now;
> > +
> > +     get_monotonic_time(&now);
> > +     ct->time = now.tv_sec + c->timeout;
> > +}
> > +
> > +static int alua_check_async_timeout(struct checker *c)
> > +{
> > +     struct alua_checker_context *ct = c->context;
> > +     struct timespec now;
> > +
> > +     get_monotonic_time(&now);
> > +     return (now.tv_sec > ct->time);
> > +}
> > +
> > +static int check_pending(struct checker *c)
> > +{
> > +     struct alua_checker_context *ct = c->context;
> > +     int alua_status = PATH_PENDING;
> > +
> > +     pthread_mutex_lock(&ct->lock);
> > +     if (ct->state != PATH_PENDING || ct->msgid !=
> > MSG_ALUA_RUNNING) {
> > +             alua_status = ct->state;
> > +             c->msgid = ct->msgid;
> > +     }
> > +     pthread_mutex_unlock(&ct->lock);
> > +     if (alua_status == PATH_PENDING && c->msgid ==
> > MSG_ALUA_RUNNING) {
> > +             condlog(4, "%d:%d : alua checker still running",
> > +                     major(ct->devt), minor(ct->devt));
> > +     } else {
> > +             int running = uatomic_xchg(&ct->running, 0);
> > +             if (running)
> > +                     pthread_cancel(ct->thread);
> > +             ct->thread = 0;
> > +     }
> > +
> > +     ct->checked_state = true;
> > +     return alua_status;
> > +}
> > +
> > +bool libcheck_need_wait(struct checker *c)
> > +{
> > +     struct alua_checker_context *ct = c->context;
> > +     return (ct && ct->thread && uatomic_read(&ct->running) != 0
> > &&
> > +             !ct->checked_state);
> > +}
> > +
> > +int libcheck_pending(struct checker *c)
> > +{
> > +     struct alua_checker_context *ct = c->context;
> > +
> > +     /* The if path checker isn't running, just return the
> > exiting value. */
> > +     if (!ct || !ct->thread)
> > +             return c->path_state;
> > +
> > +     return check_pending(c);
> > +}
> > +
> > +int libcheck_check(struct checker *c)
> > +{
> > +     struct alua_checker_context *ct = c->context;
> > +     struct path *pp = container_of(c, struct path, checker);
> > +     pthread_attr_t attr;
> > +     int alua_status = PATH_PENDING, r;
> > +     int aas;
> > +
> > +     if (!ct)
> > +             return PATH_UNCHECKED;
> > +
> > +     if (checker_is_sync(c)) {
> > +             alua_status = alua_check(pp, &aas, &c->msgid);
> > +             ct->tpg = pp->tpg_id;
> > +             ct->aas = aas;
> > +             ct->timestamp = time(NULL);
> > +             /* Update path-level cache for prioritizer use */
> > +             pp->alua_state = aas;
> > +             pp->alua_timestamp = ct->timestamp;
> > +             return alua_status;
> > +     }
> > +
> > +     /* Async mode */
> > +     if (ct->thread) {
> > +             ct->checked_state = true;
> > +             if (alua_check_async_timeout(c)) {
> > +                     int running = uatomic_xchg(&ct->running, 0);
> > +                     if (running) {
> > +                             pthread_cancel(ct->thread);
> > +                             condlog(3, "%d:%d : alua checker
> > timeout",
> > +                                     major(ct->devt), minor(ct-
> > >devt));
> > +                             c->msgid = MSG_ALUA_TIMEOUT;
> > +                             alua_status = PATH_TIMEOUT;
> > +                     } else {
> > +                             pthread_mutex_lock(&ct->lock);
> > +                             alua_status = ct->state;
> > +                             c->msgid = ct->msgid;
> > +                             pthread_mutex_unlock(&ct->lock);
> > +                     }
> > +                     ct->thread = 0;
> > +             } else if (uatomic_read(&ct->running) != 0) {
> > +                     /*
> > +                      * ct->running may be stale - the thread
> > sets
> > +                      * msgid/state under lock, then clears
> > running
> > +                      * after releasing it. Check msgid under
> > lock
> > +                      * to get the truth. Only MSG_ALUA_RUNNING
> > means
> > +                      * no result yet.
> > +                      */
> > +                     pthread_mutex_lock(&ct->lock);
> > +                     if (ct->msgid != MSG_ALUA_RUNNING) {
> > +                             alua_status = ct->state;
> > +                             c->msgid = ct->msgid;
> > +                             ct->thread = 0;
> > +                     }
> > +                     pthread_mutex_unlock(&ct->lock);
> > +             } else {
> > +                     /* ALUA checker done */
> > +                     ct->thread = 0;
> > +                     pthread_mutex_lock(&ct->lock);
> > +                     alua_status = ct->state;
> > +                     c->msgid = ct->msgid;
> > +                     pthread_mutex_unlock(&ct->lock);
> > +             }
> > +     } else {
> > +             if (uatomic_read(&ct->holders) > 1) {
> > +                     /* The thread has been cancelled but hasn't
> > quit. */
> > +                     if (ct->nr_timeouts <= MAX_NR_TIMEOUTS) {
> > +                             if (ct->nr_timeouts ==
> > MAX_NR_TIMEOUTS) {
> > +                                     condlog(2, "%d:%d : waiting
> > for stalled alua thread to finish",
> > +                                             major(ct->devt),
> > minor(ct->devt));
> > +                             }
> > +                             ct->nr_timeouts++;
> > +                             c->msgid = MSG_ALUA_TIMEOUT;
> > +                             return PATH_TIMEOUT;
> > +                     }
> > +                     ct->nr_timeouts++;
> > +                     /*
> > +                      * Start a new thread while the old one is
> > stalled.
> > +                      * We have to prevent it from interfering
> > with the new
> > +                      * thread. We create a new context and leave
> > the old
> > +                      * one with the stale thread, hoping it will
> > clean up
> > +                      * eventually.
> > +                      */
> > +                     condlog(3, "%d:%d : alua thread not
> > responding",
> > +                             major(ct->devt), minor(ct->devt));
> > +
> > +                     if (libcheck_init(c) != 0) {
> > +                             c->msgid = CHECKER_MSGID_DOWN;
> > +                             return PATH_UNCHECKED;
> > +                     }
> > +                     ((struct alua_checker_context *)c->context)-
> > >nr_timeouts = ct->nr_timeouts;
> > +
> > +                     if (!uatomic_sub_return(&ct->holders, 1)) {
> > +                             /* It did terminate, eventually */
> > +                             cleanup_context(ct);
> > +                             ((struct alua_checker_context *)c-
> > >context)->nr_timeouts = 0;
> > +                     }
> > +
> > +                     ct = c->context;
> > +             } else
> > +                     ct->nr_timeouts = 0;
> > +
> > +             /* Start new ALUA checker */
> > +             pthread_mutex_lock(&ct->lock);
> > +             alua_status = ct->state = PATH_PENDING;
> > +             c->msgid = ct->msgid = MSG_ALUA_RUNNING;
> > +             pthread_mutex_unlock(&ct->lock);
> > +             ct->fd = c->fd;
> > +             ct->timeout = c->timeout;
> > +             ct->pp = pp;
> > +             ct->checked_state = false;
> > +             uatomic_add(&ct->holders, 1);
> > +             uatomic_set(&ct->running, 1);
> > +             alua_set_async_timeout(c);
> > +             setup_thread_attr(&attr, 32 * 1024, 1);
> > +             r = start_checker_thread(&ct->thread, &attr, &ct-
> > >ctx);
> > +             pthread_attr_destroy(&attr);
> > +             if (r) {
> > +                     uatomic_sub(&ct->holders, 1);
> > +                     uatomic_set(&ct->running, 0);
> > +                     ct->thread = 0;
> > +                     condlog(3, "%d:%d : failed to start alua
> > thread, using"
> > +                             " sync mode", major(ct->devt),
> > minor(ct->devt));
> > +                     alua_status = alua_check(pp, &aas, &c-
> > >msgid);
> > +                     ct->tpg = pp->tpg_id;
> > +                     ct->aas = aas;
> > +                     ct->timestamp = time(NULL);
> > +                     /* Update path-level cache for prioritizer
> > use */
> > +                     pp->alua_state = aas;
> > +                     pp->alua_timestamp = ct->timestamp;
> > +                     return alua_status;
> > +             }
> > +     }
> > +
> > +     return alua_status;
> > +}
> > +
> > diff --git a/libmultipath/checkers/alua.h
> > b/libmultipath/checkers/alua.h
> > new file mode 100644
> > index 00000000..b53cef66
> > --- /dev/null
> > +++ b/libmultipath/checkers/alua.h
> > @@ -0,0 +1,15 @@
> > +/*
> > + * ALUA Path Checker Header
> > + *
> > + * This file is released under the GPL.
>
> Please add a proper license header.
>
> > + */
> > +#ifndef _ALUA_CHECKER_H
> > +#define _ALUA_CHECKER_H
> > +
> > +/* ALUA checker caches state in struct path for prioritizer use:
> > + *   pp->alua_state     - cached AAS value (-1 if not cached)
> > + *   pp->alua_timestamp - when the state was last updated
> > + */
> > +
> > +#endif /* _ALUA_CHECKER_H */
> > +
>
> Only comments in this file?
I will look at this. It was likely leftover.
>
>
> > diff --git a/libmultipath/prio.c b/libmultipath/prio.c
> > index 24f825bd..2d59d04e 100644
> > --- a/libmultipath/prio.c
> > +++ b/libmultipath/prio.c
> > @@ -29,6 +29,7 @@ int init_prio(void)
> >  #ifdef LOAD_ALL_SHARED_LIBS
> >       static const char *const all_prios[] = {
> >               PRIO_ALUA,
> > +             PRIO_ALUA_CACHED,
> >               PRIO_CONST,
> >               PRIO_DATACORE,
> >               PRIO_EMC,
> > diff --git a/libmultipath/prio.h b/libmultipath/prio.h
> > index 119b75f2..b54ece0d 100644
> > --- a/libmultipath/prio.h
> > +++ b/libmultipath/prio.h
> > @@ -17,6 +17,7 @@ struct path;
> >   * Known prioritizers for use in hwtable.c
> >   */
> >  #define PRIO_ALUA            "alua"
> > +#define PRIO_ALUA_CACHED     "alua_cached"
> >  #define PRIO_CONST           "const"
> >  #define PRIO_DATACORE                "datacore"
> >  #define PRIO_EMC             "emc"
> > diff --git a/libmultipath/prioritizers/Makefile
> > b/libmultipath/prioritizers/Makefile
> > index ff2524c2..201c52c0 100644
> > --- a/libmultipath/prioritizers/Makefile
> > +++ b/libmultipath/prioritizers/Makefile
> > @@ -13,6 +13,7 @@ LIBDEPS = -lmultipath -lmpathutil -lm -lpthread -
> > lrt
> >  # If you add or remove a prioritizer also update
> > multipath/multipath.conf.5
> >  LIBS = \
> >       libprioalua.so \
> > +     libprioalua_cached.so \
> >       libprioconst.so \
> >       libpriodatacore.so \
> >       libprioemc.so \
>
> I think the below should go into a separate patch.
Can they be separated? The prioritizer's logic relies
on the checker populating values for its decision.
>
> > diff --git a/libmultipath/prioritizers/alua_cached.c
> > b/libmultipath/prioritizers/alua_cached.c
> > new file mode 100644
> > index 00000000..2ba5b2e7
> > --- /dev/null
> > +++ b/libmultipath/prioritizers/alua_cached.c
> > @@ -0,0 +1,224 @@
> > +/*
> > + * (C) Copyright IBM Corp. 2004, 2005   All Rights Reserved.
> > + * Modified 2024 to support cached ALUA state from checker
> > + *
> > + * ALUA prioritizer that can use cached state from alua checker
> > + * to avoid duplicate RTPG commands.
> > + *
> > + * This file is released under the GPL.
>
> Please add a proper license header.
>
> > + */
> > +#include <stdio.h>
> > +#include <string.h>
> > +
> > +#include <time.h>
> > +
> > +#include "debug.h"
> > +#include "prio.h"
> > +#include "structs.h"
> > +#include "checkers.h"
> > +#include "discovery.h"
> > +
> > +#include "alua.h"
> > +
> > +#define ALUA_PRIO_NOT_SUPPORTED                      1
> > +#define ALUA_PRIO_RTPG_FAILED                        2
> > +#define ALUA_PRIO_GETAAS_FAILED                      3
> > +#define ALUA_PRIO_TPGS_FAILED                        4
> > +#define ALUA_PRIO_NO_INFORMATION             5
> > +#define ALUA_PRIO_CACHED_STALE                       6
> > +
> > +/* Maximum age of cached ALUA state in seconds */
> > +#define ALUA_CACHE_MAX_AGE_SECS                      5
> > +
> > +static const char * aas_string[] = {
> > +     [AAS_OPTIMIZED]         = "active/optimized",
> > +     [AAS_NON_OPTIMIZED]     = "active/non-optimized",
> > +     [AAS_STANDBY]           = "standby",
> > +     [AAS_UNAVAILABLE]       = "unavailable",
> > +     [AAS_LBA_DEPENDENT]     = "logical block dependent",
> > +     [AAS_RESERVED]          = "ARRAY BUG: invalid TPGs state!",
> > +     [AAS_OFFLINE]           = "offline",
> > +     [AAS_TRANSITIONING]     = "transitioning between states",
> > +};
>
> We shouldn't duplicate the definition of these constants.
This was somewhat intentional though not efficient. I will
do better with leveraging in the next patch.
>
> > +
> > +static const char *aas_print_string(int rc)
> > +{
> > +     rc &= 0x7f;
> > +
> > +     if (rc & 0x70)
> > +             return aas_string[AAS_RESERVED];
> > +     rc &= 0x0f;
> > +     if (rc > AAS_RESERVED && rc < AAS_OFFLINE)
> > +             return aas_string[AAS_RESERVED];
> > +     else
> > +             return aas_string[rc];
> > +}
> > +
> > +/*
> > + * Try to get ALUA info with optimization:
> > + * 1. If using alua checker: use its cached state (it just polled!)
> > + * 2. Try sysfs (kernel maintains this, no I/O needed)
> > + * 3. Fall back to issuing RTPG directly
> > + *
> > + * Rationale: The checker is our active poller. If we're using alua
> > + * and its cache is fresh, we trust that data first. If the cache
> > misses
> > + * (stale or empty), we try sysfs before issuing RTPG. This provides
> > a
> > + * three-tier optimization: checker cache → sysfs → RTPG.
> > + */
> > +/* Maximum length of sysfs access_state string */
> > +#define ALUA_ACCESS_STATE_SIZE 32
> > +
> > +int
> > +get_alua_info_cached(struct path * pp, bool *used_cache)
> > +{
> > +     int     rc;
> > +     int     tpg;
> > +     bool    diff_tpg;
> > +     char    buff[ALUA_ACCESS_STATE_SIZE];
> > +
> > +     *used_cache = false;
> > +
> > +     /* First: if using alua checker, use its fresh data from
> > path structure */
> > +     if (checker_selected(&pp->checker)) {
> > +             const char *chk_name = checker_name(&pp->checker);
> > +
> > +             if (chk_name && strcmp(chk_name, "alua") == 0 &&
> > +                 pp->alua_state >= 0) {
> > +                     time_t now = time(NULL);
> > +                     time_t age = now - pp->alua_timestamp;
> > +
> > +                     if (pp->alua_timestamp > 0 && age <=
> > ALUA_CACHE_MAX_AGE_SECS) {
> > +                             condlog(4, "%s: using cached ALUA
> > state from checker (fresh poll)", pp->dev);
> > +                             *used_cache = true;
> > +                             return pp->alua_state;
> > +                     }
> > +                     condlog(4, "%s: cached ALUA state not
> > available or stale, trying sysfs",
> > +                             pp->dev);
> > +                     /* Cache miss - try sysfs before issuing
> > RTPG */
> > +             }
> > +     }
> > +
> > +     /* Second: try sysfs (if cache missed or not using alua
> > checker) */
> > +     rc = sysfs_get_asymmetric_access_state(pp, buff,
> > sizeof(buff));
> > +     if (rc >= 0) {
> > +             /* Parse sysfs state string to ALUA state value */
> > +             int aas = -1;
> > +             int priopath = rc; /* rc contains preferred bit */
> > +
> > +             if (!strncmp(buff, "active/optimized", 16))
> > +                     aas = AAS_OPTIMIZED;
> > +             else if (!strncmp(buff, "active/non-optimized", 20))
> > +                     aas = AAS_NON_OPTIMIZED;
> > +             else if (!strncmp(buff, "lba-dependent", 13))
> > +                     aas = AAS_LBA_DEPENDENT;
> > +             else if (!strncmp(buff, "standby", 7))
> > +                     aas = AAS_STANDBY;
> > +
> > +             if (aas >= 0) {
> > +                     condlog(4, "%s: using sysfs ALUA state (no
> > I/O)", pp->dev);
> > +                     *used_cache = true;
> > +                     return (priopath ? 0x80 : 0) | aas;
> > +             }
> > +     }
> > +
> > +     /* Last resort: issue RTPG directly */
> > +     tpg = get_target_port_group(pp);
> > +     if (tpg < 0) {
> > +             rc = get_target_port_group_support(pp);
> > +             if (rc < 0)
> > +                     return -ALUA_PRIO_TPGS_FAILED;
> > +             if (rc == TPGS_NONE)
> > +                     return -ALUA_PRIO_NOT_SUPPORTED;
> > +             return -ALUA_PRIO_RTPG_FAILED;
> > +     }
> > +     diff_tpg = (pp->tpg_id != GROUP_ID_UNDEF && pp->tpg_id !=
> > tpg);
> > +     pp->tpg_id = tpg;
> > +     condlog((diff_tpg) ? 2 : 4, "%s: reported target port group
> > is %i",
> > +             pp->dev, tpg);
> > +     rc = get_asymmetric_access_state(pp, tpg);
> > +     if (rc < 0) {
> > +             condlog(2, "%s: get_asymmetric_access_state returned
> > %d",
> > +                     __func__, rc);
> > +             return -ALUA_PRIO_GETAAS_FAILED;
> > +     }
> > +
> > +     condlog(3, "%s: aas = %02x [%s]%s", pp->dev, rc,
> > aas_print_string(rc),
> > +             (rc & 0x80) ? " [preferred]" : "");
> > +     return rc;
> > +}
> > +
> > +int get_exclusive_pref_arg(char *args)
> > +{
> > +     char *ptr;
> > +
> > +     if (args == NULL)
> > +             return 0;
> > +     ptr = strstr(args, "exclusive_pref_bit");
> > +     if (!ptr)
> > +             return 0;
> > +     if (ptr[18] != '\0' && ptr[18] != ' ' && ptr[18] != '\t')
> > +             return 0;
> > +     if (ptr != args && ptr[-1] != ' ' && ptr[-1] != '\t')
> > +             return 0;
> > +     return 1;
> > +}
> > +
> > +int getprio (struct path * pp, char * args)
> > +{
> > +     int rc;
> > +     int aas;
> > +     int priopath;
> > +     int exclusive_pref;
> > +     bool used_cache = false;
> > +
> > +     if (pp->fd < 0)
> > +             return -ALUA_PRIO_NO_INFORMATION;
> > +
> > +     exclusive_pref = get_exclusive_pref_arg(args);
> > +     rc = get_alua_info_cached(pp, &used_cache);
> > +
> > +     if (used_cache) {
> > +             condlog(4, "%s: priority calculated from cached ALUA
> > state (no RTPG issued)",
> > +                     pp->dev);
> > +     }
> > +
> > +     if (rc >= 0) {
> > +             aas = (rc & 0x0f);
> > +             priopath = (rc & 0x80);
> > +             switch(aas) {
> > +             case AAS_OPTIMIZED:
> > +                     rc = 50;
> > +                     break;
> > +             case AAS_NON_OPTIMIZED:
> > +                     rc = 10;
> > +                     break;
> > +             case AAS_LBA_DEPENDENT:
> > +                     rc = 5;
> > +                     break;
> > +             case AAS_STANDBY:
> > +                     rc = 1;
> > +                     break;
> > +             default:
> > +                     rc = 0;
> > +             }
> > +             if (priopath && (aas != AAS_OPTIMIZED ||
> > exclusive_pref))
> > +                     rc += 80;
> > +     } else {
> > +             switch(-rc) {
> > +             case ALUA_PRIO_NOT_SUPPORTED:
> > +                     condlog(0, "%s: alua not supported", pp-
> > >dev);
> > +                     break;
> > +             case ALUA_PRIO_RTPG_FAILED:
> > +                     condlog(0, "%s: couldn't get target port
> > group", pp->dev);
> > +                     break;
> > +             case ALUA_PRIO_GETAAS_FAILED:
> > +                     condlog(0, "%s: couldn't get asymmetric
> > access state", pp->dev);
> > +                     break;
> > +             case ALUA_PRIO_TPGS_FAILED:
> > +                     condlog(3, "%s: couldn't get supported alua
> > states", pp->dev);
> > +                     break;
> > +             }
> > +     }
> > +     return rc;
> > +}
> > +
> > diff --git a/libmultipath/prioritizers/alua_rtpg.c
> > b/libmultipath/prioritizers/alua_rtpg.c
> > index 053cccb7..5da06b50 100644
> > --- a/libmultipath/prioritizers/alua_rtpg.c
> > +++ b/libmultipath/prioritizers/alua_rtpg.c
> > @@ -75,6 +75,7 @@ enum scsi_disposition {
> >       SCSI_GOOD = 0,
> >       SCSI_ERROR,
> >       SCSI_RETRY,
> > +     SCSI_DISCONNECTED,
> >  };
> >
> >  static int
> > @@ -124,6 +125,12 @@ scsi_error(struct sg_io_hdr *hdr, int opcode)
> >       PRINT_DEBUG("alua: SCSI error for command %02x: status %02x,
> > sense %02x/%02x/%02x",
> >                   opcode, hdr->status, sense_key, asc, ascq);
> >
> > +     /* Check for disconnected LUN (unmapped at target) */
> > +     if (sense_key == 0x5 && asc == 0x25 && ascq == 0x00) {
> > +             /* ILLEGAL REQUEST / LOGICAL UNIT NOT SUPPORTED */
> > +             return SCSI_DISCONNECTED;
> > +     }
> > +
> >       if (sense_key == UNIT_ATTENTION || sense_key == NOT_READY)
> >               return SCSI_RETRY;
> >       else
> > @@ -325,7 +332,10 @@ retry:
> >       }
> >
> >       rc = scsi_error(&hdr, OPERATION_CODE_RTPG);
> > -     if (rc == SCSI_ERROR) {
> > +     if (rc == SCSI_DISCONNECTED) {
> > +             PRINT_DEBUG("do_rtpg: LUN disconnected (unmapped at
> > target)!");
> > +             return -RTPG_LUN_DISCONNECTED;
> > +     } else if (rc == SCSI_ERROR) {
> >               PRINT_DEBUG("do_rtpg: SCSI error!");
> >               return -RTPG_RTPG_FAILED;
> >       } else if (rc == SCSI_RETRY) {
> > diff --git a/libmultipath/prioritizers/alua_rtpg.h
> > b/libmultipath/prioritizers/alua_rtpg.h
> > index ff6ec0ef..030ca58d 100644
> > --- a/libmultipath/prioritizers/alua_rtpg.h
> > +++ b/libmultipath/prioritizers/alua_rtpg.h
> > @@ -21,6 +21,7 @@
> >  #define RTPG_NO_TPG_IDENTIFIER                       2
> >  #define RTPG_RTPG_FAILED                     3
> >  #define RTPG_TPG_NOT_FOUND                   4
> > +#define RTPG_LUN_DISCONNECTED                        5
> >
> >  int get_target_port_group_support(const struct path *pp);
> >  int get_target_port_group(const struct path *pp);
> > diff --git a/libmultipath/prioritizers/sysfs.c
> > b/libmultipath/prioritizers/sysfs.c
> > index 5e8adc05..ab058ea9 100644
> > --- a/libmultipath/prioritizers/sysfs.c
> > +++ b/libmultipath/prioritizers/sysfs.c
> > @@ -10,6 +10,9 @@
> >  #include "discovery.h"
> >  #include "prio.h"
> >
> > +/* Maximum length of sysfs access_state string */
> > +#define ALUA_ACCESS_STATE_SIZE 32
> > +
> >  static const struct {
> >       unsigned char value;
> >       char *name;
> > @@ -39,11 +42,11 @@ int get_exclusive_pref_arg(char *args)
> >  int getprio (struct path * pp, char *args)
> >  {
> >       int prio = 0, rc, i;
> > -     char buff[512];
> > +     char buff[ALUA_ACCESS_STATE_SIZE];
> >       int exclusive_pref;
> >
> >       exclusive_pref = get_exclusive_pref_arg(args);
> > -     rc = sysfs_get_asymmetric_access_state(pp, buff, 512);
> > +     rc = sysfs_get_asymmetric_access_state(pp, buff,
> > sizeof(buff));
> >       if (rc < 0)
> >               return PRIO_UNDEF;
> >       prio = 0;
> > diff --git a/libmultipath/propsel.c b/libmultipath/propsel.c
> > index 56895c4b..e421abba 100644
> > --- a/libmultipath/propsel.c
> > +++ b/libmultipath/propsel.c
> > @@ -262,6 +262,7 @@ verify_alua_prio(struct multipath *mp)
> >               if (!prio_selected(&pp->prio))
> >                       continue;
> >               if (strncmp(name, PRIO_ALUA, PRIO_NAME_LEN) &&
> > +                 strncmp(name, PRIO_ALUA_CACHED, PRIO_NAME_LEN)
> > &&
> >                   strncmp(name, PRIO_SYSFS, PRIO_NAME_LEN))
> >                        return false;
> >               assume_alua = true;
> > @@ -690,7 +691,12 @@ int select_checker(struct config *conf, struct
> > path *pp)
> >               }
> >               (void)path_get_tpgs(pp);
> >               if (pp->tpgs != TPGS_NONE && pp->tpgs != TPGS_UNDEF)
> > {
> > -                     ckr_name = TUR;
> > +                     /*
> > +                      * For ALUA devices (any TPGS support), use
> > alua
> > +                      * checker which combines path checking with
> > ALUA state
> > +                      * caching for optimal performance.
> > +                      */
> > +                     ckr_name = ALUA;
> >                       goto out;
> >               }
> >       }
> > @@ -767,8 +773,15 @@ void detect_prio(struct path *pp)
> >               if ((tpgs == TPGS_EXPLICIT || !check_rdac(pp)) &&
> >                   sysfs_get_asymmetric_access_state(pp, buff, 512)
> > >= 0)
> >                       default_prio = PRIO_SYSFS;
> > -             else
> > +             else {
> >                       default_prio = PRIO_ALUA;
> > +                     /* If using alua checker, use cached
> > prioritizer */
> > +                     if (checker_selected(&pp->checker)) {
> > +                             const char *chk_name =
> > checker_name(&pp->checker);
> > +                             if (chk_name && strcmp(chk_name,
> > ALUA) == 0)
> > +                                     default_prio =
> > PRIO_ALUA_CACHED;
> > +                     }
> > +             }
> >               break;
> >       default:
> >               return;
> > @@ -831,7 +844,8 @@ out:
> >       /*
> >        * fetch tpgs mode for alua, if its not already obtained
> >        */
> > -     if (!strncmp(prio_name(p), PRIO_ALUA, PRIO_NAME_LEN)) {
> > +     if (!strncmp(prio_name(p), PRIO_ALUA, PRIO_NAME_LEN) ||
> > +         !strncmp(prio_name(p), PRIO_ALUA_CACHED, PRIO_NAME_LEN))
> > {
> >               int tpgs = path_get_tpgs(pp);
> >
> >               if (tpgs == TPGS_NONE) {
> > diff --git a/libmultipath/structs.c b/libmultipath/structs.c
> > index f4413127..6483a568 100644
> > --- a/libmultipath/structs.c
> > +++ b/libmultipath/structs.c
> > @@ -126,6 +126,8 @@ alloc_path (void)
> >               pp->fd = -1;
> >               pp->tpgs = TPGS_UNDEF;
> >               pp->tpg_id = GROUP_ID_UNDEF;
> > +             pp->alua_state = -1;
> > +             pp->alua_timestamp = 0;
> >               pp->priority = PRIO_UNDEF;
> >               pp->checkint = CHECKINT_UNDEF;
> >               checker_clear(&pp->checker);
> > diff --git a/libmultipath/structs.h b/libmultipath/structs.h
> > index 9adedde2..c4bddee9 100644
> > --- a/libmultipath/structs.h
> > +++ b/libmultipath/structs.h
> > @@ -444,6 +444,9 @@ struct path {
> >       vector hwe;
> >       struct gen_path generic_path;
> >       int tpg_id;
> > +     /* Cached ALUA state from checker for prioritizer use */
> > +     int alua_state;         /* AAS value, -1 if not
> > cached */
> > +     time_t alua_timestamp;  /* When alua_state was last updated
> > */
> >       enum ioctl_info_states ioctl_info;
> >  };
> >
I will put together a 2nd version which addresses these comments.
Thanks,
Brian
-- 
Brian Bunker
PURE Storage, Inc.
[email protected]

Reply via email to