Hi Brian,

On Wed, 2026-03-25 at 08:19 -0700, Brian Bunker wrote:
> Add an 'alua' path checker that uses RTPG to check path state. The
> checker maps ALUA Asymmetric Access States to path states:
>   - AAS_OPTIMIZED, AAS_NON_OPTIMIZED -> PATH_UP
>   - AAS_STANDBY -> PATH_GHOST
>   - AAS_TRANSITIONING -> PATH_PENDING
>   - AAS_UNAVAILABLE, AAS_OFFLINE -> PATH_DOWN
> 
> As part of this change, the common async threading infrastructure is
> extracted from the TUR checker into a new async_checker base class.
> Both TUR and ALUA checkers now extend this base, eliminating code
> duplication and establishing a pattern for future async checkers.
> 
> Two fields are added to struct path (alua_state, alua_timestamp) to
> cache ALUA state for use by prioritizers.
> 
> When detect_checker is enabled, the alua checker is auto-selected for
> devices with TPGS support. Explicitly configured checkers in the
> hardware table take precedence over auto-detection.
> 
> Signed-off-by: Brian Bunker <[email protected]>
> Signed-off-by: Krishna Kant <[email protected]>
> ---
>  libmultipath/Makefile                 |   2 +-
>  libmultipath/async_checker.c          | 249 +++++++++++++++++++++
>  libmultipath/async_checker.h          | 123 +++++++++++
>  libmultipath/checkers.c               |   1 +
>  libmultipath/checkers.h               |   1 +
>  libmultipath/checkers/Makefile        |   3 +-
>  libmultipath/checkers/alua.c          | 266 +++++++++++++++++++++++
>  libmultipath/checkers/tur.c           | 302 ++++--------------------
> --
>  libmultipath/libmultipath.version     |   8 +
>  libmultipath/prioritizers/alua.c      |  30 ---
>  libmultipath/prioritizers/alua.h      |   8 +
>  libmultipath/prioritizers/alua_rtpg.c |  62 +++++-
>  libmultipath/prioritizers/alua_rtpg.h |   4 +
>  libmultipath/propsel.c                |  16 +-
>  libmultipath/structs.c                |   2 +
>  libmultipath/structs.h                |   3 +
>  multipath/multipath.conf.5.in         |  23 +-
>  17 files changed, 795 insertions(+), 308 deletions(-)
>  create mode 100644 libmultipath/async_checker.c
>  create mode 100644 libmultipath/async_checker.h
>  create mode 100644 libmultipath/checkers/alua.c
> 
> [...]

>  
> diff --git a/libmultipath/checkers/alua.c
> b/libmultipath/checkers/alua.c
> new file mode 100644
> index 00000000..bff1bed6
> --- /dev/null
> +++ b/libmultipath/checkers/alua.c
> @@ -0,0 +1,266 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +/*
> + * ALUA Path Checker - uses RTPG to check path state
> + *
> + * Based on the TUR checker by Christophe Varoqui.
> + *
> + * Copyright (c) 2026 Brian Bunker <[email protected]>
> + * Copyright (c) 2026 Krishna Kant <[email protected]>
> + */
> +#include <errno.h>
> +#include <pthread.h>
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <string.h>
> +#include <sys/sysmacros.h>
> +#include <sys/types.h>
> +#include <unistd.h>
> +#include <urcu/uatomic.h>
> +
> +#include "async_checker.h"
> +#include "checkers.h"
> +#include "debug.h"
> +#include "prio.h"
> +#include "structs.h"
> +#include "time-util.h"
> +#include "../prioritizers/alua.h"
> +#include "../prioritizers/alua_rtpg.h"
> +
> +enum {
> +     MSG_ALUA_RTPG_NOT_SUPPORTED = CHECKER_FIRST_MSGID,
> +     MSG_ALUA_RTPG_TRANSITIONING,
> +     MSG_ALUA_RUNNING,
> +     MSG_ALUA_TIMEOUT,
> +};
> +
> +#define IDX_(x) (MSG_ ## x - CHECKER_FIRST_MSGID)
> +const char *libcheck_msgtable[] = {
> +     [IDX_(ALUA_RTPG_NOT_SUPPORTED)] = " ALUA not supported",
> +     [IDX_(ALUA_RTPG_TRANSITIONING)] = " path is transitioning",
> +     [IDX_(ALUA_RUNNING)] = " still running",
> +     [IDX_(ALUA_TIMEOUT)] = " timed out",
> +     NULL,
> +};
> +
> +struct alua_checker_context {
> +     struct async_checker_context base;      /* Must be first */
> +     /* ALUA-specific cached data for prioritizer */
> +     int aas;           /* Asymmetric Access State */
> +     int tpg;           /* Target Port Group */
> +     time_t timestamp;  /* When this data was collected */
> +     /* Data needed for RTPG in thread - copied before thread
> starts */
> +     unsigned int timeout_ms;
> +};
> +
> +int libcheck_init(struct checker *c)
> +{
> +     int ret = async_checker_init(c, sizeof(struct
> alua_checker_context));
> +     if (ret == 0) {
> +             struct alua_checker_context *ct = c->context;
> +             ct->aas = -1;
> +             ct->tpg = -1;
> +     }
> +     return ret;
> +}
> +
> +void libcheck_free(struct checker *c)
> +{
> +     async_checker_free(c);
> +}
> +
> +/*
> + * Map ALUA Asymmetric Access State to path state
> + */
> +static int alua_state_to_path_state(int aas, short *msgid)
> +{
> +     switch (aas & 0x0f) {
> +     case AAS_OPTIMIZED:
> +     case AAS_NON_OPTIMIZED:
> +             *msgid = CHECKER_MSGID_UP;
> +             return PATH_UP;
> +     case AAS_STANDBY:
> +             *msgid = CHECKER_MSGID_GHOST;
> +             return PATH_GHOST;
> +     case AAS_TRANSITIONING:
> +             *msgid = MSG_ALUA_RTPG_TRANSITIONING;
> +             return PATH_PENDING;
> +     case AAS_UNAVAILABLE:
> +     case AAS_OFFLINE:
> +     default:
> +             *msgid = CHECKER_MSGID_DOWN;
> +             return PATH_DOWN;
> +     }
> +}
> +
> +/*
> + * Main ALUA check function - uses alua_rtpg.c library.
> + * This is called either in sync mode or from the async thread.
> + */
> +static int
> +alua_check(struct path *pp, int *aas_out, short *msgid)
> +{
> +     int aas;
> +
> +     /* Discover TPG ID if not already set */
> +     if (pp->tpg_id == GROUP_ID_UNDEF) {
> +             int tpg = get_target_port_group(pp);
> +             if (tpg < 0) {
> +                     *msgid = CHECKER_MSGID_DOWN;
> +                     return PATH_DOWN;
> +             }
> +             pp->tpg_id = tpg;
> +     }
> +
> +     aas = get_asymmetric_access_state(pp, pp->tpg_id);
> +     if (aas < 0) {
> +             if (aas == -RTPG_LUN_DISCONNECTED) {
> +                     *msgid = CHECKER_MSGID_DISCONNECTED;
> +                     return PATH_DISCONNECTED;
> +             }
> +             *msgid = CHECKER_MSGID_DOWN;
> +             return PATH_DOWN;
> +     }
> +
> +     *aas_out = aas;
> +     return alua_state_to_path_state(aas, msgid);
> +}
> +
> +#define alua_thread_cleanup_push(ct) \
> +     pthread_cleanup_push(async_checker_cleanup_func, &(ct)-
> >base)
> +#define alua_thread_cleanup_pop(ct) pthread_cleanup_pop(1)
> +
> +/*
> + * Async thread function.
> + * IMPORTANT: Do not access struct path from here - it may be freed.
> + * Use only data copied to context before thread start: fd, tpg,
> timeout_ms.
> + */
> +void *libcheck_thread(struct checker_context *ctx)
> +{
> +     struct alua_checker_context *ct =
> +             container_of(ctx, struct alua_checker_context,
> base.ctx);
> +     struct timespec ts;
> +     int state;
> +     short msgid;
> +     int aas;
> +
> +     /* This thread can be canceled, so setup clean up */
> +     alua_thread_cleanup_push(ct);
> +
> +     condlog(4, "%d:%d : alua checker starting up", major(ct-
> >base.devt),
> +             minor(ct->base.devt));
> +
> +     /* Use fd-based check - path pointer is not safe here */
> +     aas = get_asymmetric_access_state_fd(ct->base.fd, ct->tpg,
> +                                          ct->timeout_ms);
> +     pthread_testcancel();
> +
> +     if (aas < 0) {
> +             if (aas == -RTPG_LUN_DISCONNECTED) {
> +                     msgid = CHECKER_MSGID_DISCONNECTED;
> +                     state = PATH_DISCONNECTED;
> +             } else {
> +                     msgid = CHECKER_MSGID_DOWN;
> +                     state = PATH_DOWN;
> +             }
> +             aas = -1;
> +     } else {
> +             state = alua_state_to_path_state(aas, &msgid);
> +     }
> +
> +     /* Store results in context - will be copied to path by
> libcheck_check */
> +     get_monotonic_time(&ts);
> +     pthread_mutex_lock(&ct->base.lock);
> +     ct->base.state = state;
> +     ct->base.msgid = msgid;
> +     ct->aas = aas;
> +     ct->timestamp = ts.tv_sec;
> +     pthread_cond_signal(&ct->base.active);
> +     pthread_mutex_unlock(&ct->base.lock);
> +
> +     condlog(4, "%d:%d : alua checker finished, state %s",
> major(ct->base.devt),
> +             minor(ct->base.devt), checker_state_name(state));
> +
> +     if (!uatomic_xchg(&ct->base.running, 0))
> +             pause();
> +
> +     alua_thread_cleanup_pop(ct);
> +
> +     return ((void *)0);
> +}
> +
> +bool libcheck_need_wait(struct checker *c)
> +{
> +     return async_checker_need_wait(c);
> +}
> +
> +int libcheck_pending(struct checker *c)
> +{
> +     return async_checker_pending(c, MSG_ALUA_RUNNING);
> +}
> +
> +/*
> + * Sync mode callback for async_checker_check.
> + * Safe to access path here since we're in the main thread.
> + */
> +static int alua_check_callback(struct checker *c,
> +                            struct async_checker_context *base_ct
> +                                     __attribute__((unused)),
> +                            short *msgid)
> +{
> +     struct path *pp = container_of(c, struct path, checker);
> +     struct timespec ts;
> +     int aas = -1;
> +     int state;
> +
> +     state = alua_check(pp, &aas, msgid);
> +
> +     /* Cache ALUA state for prioritizer */
> +     get_monotonic_time(&ts);
> +     pp->alua_state = aas;
> +     pp->alua_timestamp = ts.tv_sec;
> +
> +     return state;
> +}
> +
> +int libcheck_check(struct checker *c)
> +{
> +     struct alua_checker_context *ct = c->context;
> +     struct path *pp = container_of(c, struct path, checker);

I just had another look at this patch as I'm working on the checkers
myself, and I have to say that this is something you absolutely cannot
do, especially not when the checker thread is supposed to run
asynchronously. There's a reason that we pass "struct checker *" to the
checker threads and not "struct path *", and the code above is a hack
to work around this design decision.

When you write a checker, you need to think carefully what information
you actually need, and copy that from "struct path" to the checker
context. Currently, for all checkers except emc_clariion, nothing is
needed except pp->fd and pp->timeout. I understand that the alua
checker needs more context, but that doesn't mean you can simply pass
the entire data structure to the checker, and I believe that an  ALUA
can be written so that it needs just a few more fields (if any).

For the "caching" functionality for the prioritizer, I understand that
you need to store the alua_state property in "struct path". So far I
haven't figured out how to do this properly. I could imagine something
like a "checker_private" field in "struct path" which could be handled
like this:
    
union checker_private {
    long alua_data;
}

struct path {
...
    union checker_private c_private; 

    state = checker_get_state(c, &pp->c_private)

and checker_get_state() would do something like

int checker_get_state(struct checker *c, 
                      union checker_private *private)
{
     ...
     if (c->set_private_data)
          c->set_private_data(private);
}

("private" is perhaps not the best name for this if it's to be shared
with the prioritizer but you get the idea).

Let me know if this would be sufficient for your needs.

Btw, I'm still working on my approach for the async checker mechanism,
which will be quite different from what we had before, and based on my
recent "runner" patch set. I'll try to design it such that porting your
code to that framework will be easy. (That's why I just had another
look at your code).

Regards
Martin

Reply via email to