On Mon May 8, 2023 at 12:01 PM AEST, Rohan McLure wrote: > The idle_state entry in the PACA on PowerNV features a bit which is > atomically tested and set through ldarx/stdcx. to be used as a spinlock. > This lock then guards access to other bit fields of idle_state. KCSAN > cannot differentiate between any of these bitfield accesses as they all > are implemented by 8-byte store/load instructions, thus cores contending > on the bit-lock appear to data race with modifications to idle_state. > > Separate the bit-lock entry from the data guarded by the lock to avoid > the possibility of data races being detected by KCSAN. > > Suggested-by: Nicholas Piggin <npig...@gmail.com> > Signed-off-by: Rohan McLure <rmcl...@ibm.com> > --- > arch/powerpc/include/asm/paca.h | 1 + > arch/powerpc/platforms/powernv/idle.c | 20 +++++++++++--------- > 2 files changed, 12 insertions(+), 9 deletions(-) > > diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h > index da0377f46597..cb325938766a 100644 > --- a/arch/powerpc/include/asm/paca.h > +++ b/arch/powerpc/include/asm/paca.h > @@ -191,6 +191,7 @@ struct paca_struct { > #ifdef CONFIG_PPC_POWERNV > /* PowerNV idle fields */ > /* PNV_CORE_IDLE_* bits, all siblings work on thread 0 paca */ > + unsigned long idle_lock; /* A value of 1 means acquired */ > unsigned long idle_state; > union { > /* P7/P8 specific fields */ > diff --git a/arch/powerpc/platforms/powernv/idle.c > b/arch/powerpc/platforms/powernv/idle.c > index 841cb7f31f4f..97dbb7bc2b00 100644 > --- a/arch/powerpc/platforms/powernv/idle.c > +++ b/arch/powerpc/platforms/powernv/idle.c > @@ -246,9 +246,9 @@ static inline void atomic_lock_thread_idle(void) > { > int cpu = raw_smp_processor_id(); > int first = cpu_first_thread_sibling(cpu); > - unsigned long *state = &paca_ptrs[first]->idle_state; > + unsigned long *lock = &paca_ptrs[first]->idle_lock; > > - while (unlikely(test_and_set_bit_lock(NR_PNV_CORE_IDLE_LOCK_BIT, > state))) > + while (unlikely(test_and_set_bit_lock(NR_PNV_CORE_IDLE_LOCK_BIT, lock))) > barrier(); > } > > @@ -258,29 +258,31 @@ static inline void > atomic_unlock_and_stop_thread_idle(void) > int first = cpu_first_thread_sibling(cpu); > unsigned long thread = 1UL << cpu_thread_in_core(cpu); > unsigned long *state = &paca_ptrs[first]->idle_state; > + unsigned long *lock = &paca_ptrs[first]->idle_lock; > u64 s = READ_ONCE(*state); > u64 new, tmp; > > - BUG_ON(!(s & PNV_CORE_IDLE_LOCK_BIT)); > + BUG_ON(!(READ_ONCE(*lock) & PNV_CORE_IDLE_LOCK_BIT)); > BUG_ON(s & thread); > > again: > - new = (s | thread) & ~PNV_CORE_IDLE_LOCK_BIT; > + new = s | thread; > tmp = cmpxchg(state, s, new); > if (unlikely(tmp != s)) { > s = tmp; > goto again; > } > + clear_bit_unlock(NR_PNV_CORE_IDLE_LOCK_BIT, lock);
Sigh, another atomic. It's in a slow path though so I won't get too upset. Would be nice to add a comment here and revert it when KCSCAN can be taught about this pattern though, so we don't lose it. > } > > static inline void atomic_unlock_thread_idle(void) > { > int cpu = raw_smp_processor_id(); > int first = cpu_first_thread_sibling(cpu); > - unsigned long *state = &paca_ptrs[first]->idle_state; > + unsigned long *lock = &paca_ptrs[first]->idle_lock; > > - BUG_ON(!test_bit(NR_PNV_CORE_IDLE_LOCK_BIT, state)); > - clear_bit_unlock(NR_PNV_CORE_IDLE_LOCK_BIT, state); > + BUG_ON(!test_bit(NR_PNV_CORE_IDLE_LOCK_BIT, lock)); > + clear_bit_unlock(NR_PNV_CORE_IDLE_LOCK_BIT, lock); > } > > /* P7 and P8 */ > @@ -380,9 +382,9 @@ static unsigned long power7_idle_insn(unsigned long type) > sprs.uamor = mfspr(SPRN_UAMOR); > } > > - local_paca->thread_idle_state = type; > + WRITE_ONCE(local_paca->thread_idle_state, type); > srr1 = isa206_idle_insn_mayloss(type); /* go idle */ > - local_paca->thread_idle_state = PNV_THREAD_RUNNING; > + WRITE_ONCE(local_paca->thread_idle_state, PNV_THREAD_RUNNING); Where is the thread_idle_state concurrency coming from? Thanks, Nick