On Wed, Dec 17, 2025 at 08:45:31PM -0500, Mathieu Desnoyers wrote:
> Integrate with the scheduler to migrate per-CPU slots to the backup slot
> on context switch. This ensures that the per-CPU slots won't be used by
> blocked or preempted tasks holding on hazard pointers for a long time.
> 
> Signed-off-by: Mathieu Desnoyers <[email protected]>
> Cc: Nicholas Piggin <[email protected]>
> Cc: Michael Ellerman <[email protected]>
> Cc: Greg Kroah-Hartman <[email protected]>
> Cc: Sebastian Andrzej Siewior <[email protected]>
> Cc: "Paul E. McKenney" <[email protected]>
> Cc: Will Deacon <[email protected]>
> Cc: Peter Zijlstra <[email protected]>
> Cc: Boqun Feng <[email protected]>
> Cc: Alan Stern <[email protected]>
> Cc: John Stultz <[email protected]>
> Cc: Neeraj Upadhyay <[email protected]>
> Cc: Linus Torvalds <[email protected]>
> Cc: Andrew Morton <[email protected]>
> Cc: Boqun Feng <[email protected]>
> Cc: Frederic Weisbecker <[email protected]>
> Cc: Joel Fernandes <[email protected]>
> Cc: Josh Triplett <[email protected]>
> Cc: Uladzislau Rezki <[email protected]>
> Cc: Steven Rostedt <[email protected]>
> Cc: Lai Jiangshan <[email protected]>
> Cc: Zqiang <[email protected]>
> Cc: Ingo Molnar <[email protected]>
> Cc: Waiman Long <[email protected]>
> Cc: Mark Rutland <[email protected]>
> Cc: Thomas Gleixner <[email protected]>
> Cc: Vlastimil Babka <[email protected]>
> Cc: [email protected]
> Cc: Mateusz Guzik <[email protected]>
> Cc: Jonas Oberhauser <[email protected]>
> Cc: [email protected]
> Cc: [email protected]
> Cc: [email protected]
> ---
>  include/linux/hazptr.h | 63 ++++++++++++++++++++++++++++++++++++++++--
>  include/linux/sched.h  |  4 +++
>  init/init_task.c       |  3 ++
>  kernel/Kconfig.preempt | 10 +++++++
>  kernel/fork.c          |  3 ++
>  kernel/sched/core.c    |  2 ++
>  6 files changed, 83 insertions(+), 2 deletions(-)
> 
> diff --git a/include/linux/hazptr.h b/include/linux/hazptr.h
> index 70c066ddb0f5..10ac53a42a7a 100644
> --- a/include/linux/hazptr.h
> +++ b/include/linux/hazptr.h
> @@ -24,6 +24,7 @@
>  #include <linux/percpu.h>
>  #include <linux/types.h>
>  #include <linux/cleanup.h>
> +#include <linux/sched.h>
>  
>  /* 8 slots (each sizeof(void *)) fit in a single cache line. */
>  #define NR_HAZPTR_PERCPU_SLOTS       8
> @@ -46,6 +47,9 @@ struct hazptr_ctx {
>       struct hazptr_slot *slot;
>       /* Backup slot in case all per-CPU slots are used. */
>       struct hazptr_backup_slot backup_slot;
> +#ifdef CONFIG_PREEMPT_HAZPTR

I would suggest we make CONFIG_PREEMPT_HAZPTR always enabled hence no
need for a config, do we have the measurement of the additional cost?

> +     struct list_head preempt_node;
> +#endif
>  };
>  
>  struct hazptr_percpu_slots {
> @@ -98,6 +102,50 @@ bool hazptr_slot_is_backup(struct hazptr_ctx *ctx, struct 
> hazptr_slot *slot)
>       return slot == &ctx->backup_slot.slot;
>  }
>  
> +#ifdef CONFIG_PREEMPT_HAZPTR
> +static inline
> +void hazptr_chain_task_ctx(struct hazptr_ctx *ctx)
> +{
> +     list_add(&ctx->preempt_node, &current->hazptr_ctx_list);
> +}
> +
> +static inline
> +void hazptr_unchain_task_ctx(struct hazptr_ctx *ctx)
> +{
> +     list_del(&ctx->preempt_node);
> +}
> +

I think you need to add interrupt disabling for chain/unchain because of
the potential readers in interrupt and then you can avoid the preempt
disabling in hazptr_release() I think. Let's aim for supporting readers
in interrupt handler, because at least lockdep needs that.

Regards,
Boqun

> +static inline
> +void hazptr_note_context_switch(void)
> +{
> +     struct hazptr_ctx *ctx;
> +
> +     list_for_each_entry(ctx, &current->hazptr_ctx_list, preempt_node) {
> +             struct hazptr_slot *slot;
> +
> +             if (hazptr_slot_is_backup(ctx, ctx->slot))
> +                     continue;
> +             slot = hazptr_chain_backup_slot(ctx);
> +             /*
> +              * Move hazard pointer from per-CPU slot to backup slot.
> +              * This requires hazard pointer synchronize to iterate
> +              * on per-CPU slots with load-acquire before iterating
> +              * on the overflow list.
> +              */
> +             WRITE_ONCE(slot->addr, ctx->slot->addr);
> +             /*
> +              * store-release orders store to backup slot addr before
> +              * store to per-CPU slot addr.
> +              */
> +             smp_store_release(&ctx->slot->addr, NULL);
> +     }
> +}
> +#else
> +static inline void hazptr_chain_task_ctx(struct hazptr_ctx *ctx) { }
> +static inline void hazptr_unchain_task_ctx(struct hazptr_ctx *ctx) { }
> +static inline void hazptr_note_context_switch(void) { }
> +#endif
> +
>  /*
>   * hazptr_acquire: Load pointer at address and protect with hazard pointer.
>   *
> @@ -114,6 +162,7 @@ void *hazptr_acquire(struct hazptr_ctx *ctx, void * const 
> * addr_p)
>       struct hazptr_slot *slot = NULL;
>       void *addr, *addr2;
>  
> +     ctx->slot = NULL;
>       /*
>        * Load @addr_p to know which address should be protected.
>        */
> @@ -121,7 +170,9 @@ void *hazptr_acquire(struct hazptr_ctx *ctx, void * const 
> * addr_p)
>       for (;;) {
>               if (!addr)
>                       return NULL;
> +
>               guard(preempt)();
> +             hazptr_chain_task_ctx(ctx);
>               if (likely(!hazptr_slot_is_backup(ctx, slot))) {
>                       slot = hazptr_get_free_percpu_slot();
>                       /*
> @@ -140,8 +191,11 @@ void *hazptr_acquire(struct hazptr_ctx *ctx, void * 
> const * addr_p)
>                * Re-load @addr_p after storing it to the hazard pointer slot.
>                */
>               addr2 = READ_ONCE(*addr_p);     /* Load A */
> -             if (likely(ptr_eq(addr2, addr)))
> +             if (likely(ptr_eq(addr2, addr))) {
> +                     ctx->slot = slot;
> +                     /* Success. Break loop, enable preemption and return. */
>                       break;
> +             }
>               /*
>                * If @addr_p content has changed since the first load,
>                * release the hazard pointer and try again.
> @@ -150,11 +204,14 @@ void *hazptr_acquire(struct hazptr_ctx *ctx, void * 
> const * addr_p)
>               if (!addr2) {
>                       if (hazptr_slot_is_backup(ctx, slot))
>                               hazptr_unchain_backup_slot(ctx);
> +                     hazptr_unchain_task_ctx(ctx);
> +                     /* Loaded NULL. Enable preemption and return NULL. */
>                       return NULL;
>               }
>               addr = addr2;
> +             hazptr_unchain_task_ctx(ctx);
> +             /* Enable preemption and retry. */
>       }
> -     ctx->slot = slot;
>       /*
>        * Use addr2 loaded from the second READ_ONCE() to preserve
>        * address dependency ordering.
> @@ -170,11 +227,13 @@ void hazptr_release(struct hazptr_ctx *ctx, void *addr)
>  
>       if (!addr)
>               return;
> +     guard(preempt)();
>       slot = ctx->slot;
>       WARN_ON_ONCE(slot->addr != addr);
>       smp_store_release(&slot->addr, NULL);
>       if (unlikely(hazptr_slot_is_backup(ctx, slot)))
>               hazptr_unchain_backup_slot(ctx);
> +     hazptr_unchain_task_ctx(ctx);
>  }
[...]

Reply via email to