On Thu, Nov 05, 2020 at 12:49:52PM +0100, mwi...@suse.com wrote: > From: Martin Wilck <mwi...@suse.com> > > The multipathd tur checker thread is designed to be able to finish at > any time, even after the tur checker itself has been freed. The > multipathd shutdown code makes sure all the checkers have been freed > before freeing the checker_class and calling dlclose() to unload the > DSO, but this doesn't guarantee that the checker threads have finished. > If one hasn't, the DSO will get unloaded while the thread still running > code from it, causing a segfault. > > This patch fixes the issue by further incrementing the DSO's refcount > for every running thread. To avoid race conditions leading to segfaults, > the thread's entrypoint must be in libmultipath, not in the DSO itself. > Therefore we add a new optional checker method, libcheck_thread(). > Checkers defining this method may create a detached thread with > entrypoint checker_thread_entry(), which will call the DSO's > libcheck_thread and take care of the refcount handling.
I can't make this segfault. So that looks good, but it does need libmultipath.version updated to include checker_thread_entry() -Ben > > Reported-by: Benjamin Marzinski <bmarz...@redhat.com> > Signed-off-by: Martin Wilck <mwi...@suse.com> > --- > libmultipath/checkers.c | 55 ++++++++++++++++++++++++++++++++----- > libmultipath/checkers.h | 17 ++++++++++++ > libmultipath/checkers/tur.c | 7 ++--- > 3 files changed, 67 insertions(+), 12 deletions(-) > > diff --git a/libmultipath/checkers.c b/libmultipath/checkers.c > index f7ddd53..7d10f2a 100644 > --- a/libmultipath/checkers.c > +++ b/libmultipath/checkers.c > @@ -3,6 +3,9 @@ > #include <stddef.h> > #include <dlfcn.h> > #include <sys/stat.h> > +#include <pthread.h> > +#include <urcu.h> > +#include <urcu/uatomic.h> > > #include "debug.h" > #include "checkers.h" > @@ -19,6 +22,7 @@ struct checker_class { > int (*mp_init)(struct checker *); /* to allocate the mpcontext */ > void (*free)(struct checker *); /* to free the context */ > void (*reset)(void); /* to reset the global variables */ > + void *(*thread)(void *); /* async thread entry point */ > const char **msgtable; > short msgtable_size; > }; > @@ -54,19 +58,32 @@ static struct checker_class *alloc_checker_class(void) > c = MALLOC(sizeof(struct checker_class)); > if (c) { > INIT_LIST_HEAD(&c->node); > - c->refcount = 1; > + uatomic_set(&c->refcount, 1); > } > return c; > } > > +/* Use uatomic_{sub,add}_return() to ensure proper memory barriers */ > +static int checker_class_ref(struct checker_class *cls) > +{ > + return uatomic_add_return(&cls->refcount, 1); > +} > + > +static int checker_class_unref(struct checker_class *cls) > +{ > + return uatomic_sub_return(&cls->refcount, 1); > +} > + > void free_checker_class(struct checker_class *c) > { > + int cnt; > + > if (!c) > return; > - c->refcount--; > - if (c->refcount) { > - condlog(4, "%s checker refcount %d", > - c->name, c->refcount); > + cnt = checker_class_unref(c); > + if (cnt != 0) { > + condlog(cnt < 0 ? 1 : 4, "%s checker refcount %d", > + c->name, cnt); > return; > } > condlog(3, "unloading %s checker", c->name); > @@ -160,7 +177,8 @@ static struct checker_class *add_checker_class(const char > *multipath_dir, > > c->mp_init = (int (*)(struct checker *)) dlsym(c->handle, > "libcheck_mp_init"); > c->reset = (void (*)(void)) dlsym(c->handle, "libcheck_reset"); > - /* These 2 functions can be NULL. call dlerror() to clear out any > + c->thread = (void *(*)(void*)) dlsym(c->handle, "libcheck_thread"); > + /* These 3 functions can be NULL. call dlerror() to clear out any > * error string */ > dlerror(); > > @@ -346,6 +364,29 @@ bad_id: > return generic_msg[CHECKER_MSGID_NONE]; > } > > +static void checker_cleanup_thread(void *arg) > +{ > + struct checker_class *cls = arg; > + > + (void)checker_class_unref(cls); > + rcu_unregister_thread(); > +} > + > +void *checker_thread_entry(void *arg) > +{ > + struct checker *chk = arg; > + void *rv; > + > + assert(chk && chk->cls && chk->cls->thread); > + > + rcu_register_thread(); > + (void)checker_class_ref(chk->cls); > + pthread_cleanup_push(checker_cleanup_thread, chk->cls); > + rv = chk->cls->thread(chk->context); > + pthread_cleanup_pop(1); > + return rv; > +} > + > void checker_clear_message (struct checker *c) > { > if (!c) > @@ -370,7 +411,7 @@ void checker_get(const char *multipath_dir, struct > checker *dst, > if (!src) > return; > > - src->refcount++; > + (void)checker_class_ref(dst->cls); > } > > int init_checkers(const char *multipath_dir) > diff --git a/libmultipath/checkers.h b/libmultipath/checkers.h > index 9d5f90b..01af02d 100644 > --- a/libmultipath/checkers.h > +++ b/libmultipath/checkers.h > @@ -148,6 +148,23 @@ void checker_set_async (struct checker *); > void checker_set_fd (struct checker *, int); > void checker_enable (struct checker *); > void checker_disable (struct checker *); > +/* > + * checker_thread_entry(): entry point for async path checker thread > + * > + * Path checkers that do I/O may hang forever. To avoid blocking, some > + * checkers therefore use asyncronous, detached threads for checking > + * the paths. These threads may continue hanging if multipathd is stopped. > + * In this case, we can't unload the checker DSO at exit. In order to > + * avoid race conditions and crashes, the entry point of the thread > + * needs to be in libmultipath, not in the DSO itself. > + * Checker threads must use this function as entry point. It will call > + * the DSO's "libcheck_thread" function with the checker context as > + * argument. When libcheck_thread() returns, it will clean up and > + * decrement the DSO's refcount. See the tur checker for a usage example. > + * > + * @param arg: pointer to struct checker, must have non-NULL cls->thread > + */ > +void *checker_thread_entry (void *); > int checker_check (struct checker *, int); > int checker_is_sync(const struct checker *); > const char *checker_name (const struct checker *); > diff --git a/libmultipath/checkers/tur.c b/libmultipath/checkers/tur.c > index e886fcf..063c419 100644 > --- a/libmultipath/checkers/tur.c > +++ b/libmultipath/checkers/tur.c > @@ -15,7 +15,6 @@ > #include <errno.h> > #include <sys/time.h> > #include <pthread.h> > -#include <urcu.h> > #include <urcu/uatomic.h> > > #include "checkers.h" > @@ -204,7 +203,6 @@ static void cleanup_func(void *data) > holders = uatomic_sub_return(&ct->holders, 1); > if (!holders) > cleanup_context(ct); > - rcu_unregister_thread(); > } > > /* > @@ -251,7 +249,7 @@ static void tur_deep_sleep(const struct > tur_checker_context *ct) > #define tur_deep_sleep(x) do {} while (0) > #endif /* TUR_TEST_MAJOR */ > > -static void *tur_thread(void *ctx) > +void *libcheck_thread(void *ctx) > { > struct tur_checker_context *ct = ctx; > int state, running; > @@ -259,7 +257,6 @@ static void *tur_thread(void *ctx) > > /* This thread can be canceled, so setup clean up */ > tur_thread_cleanup_push(ct); > - rcu_register_thread(); > > condlog(4, "%d:%d : tur checker starting up", major(ct->devt), > minor(ct->devt)); > @@ -394,7 +391,7 @@ int libcheck_check(struct checker * c) > uatomic_set(&ct->running, 1); > tur_set_async_timeout(c); > setup_thread_attr(&attr, 32 * 1024, 1); > - r = pthread_create(&ct->thread, &attr, tur_thread, ct); > + r = pthread_create(&ct->thread, &attr, checker_thread_entry, c); > pthread_attr_destroy(&attr); > if (r) { > uatomic_sub(&ct->holders, 1); > -- > 2.29.0 -- dm-devel mailing list dm-devel@redhat.com https://www.redhat.com/mailman/listinfo/dm-devel