Re: [GIT PULL] locking fixes
The pull request you sent on Sun, 11 Apr 2021 14:14:53 +0200: > git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git > locking-urgent-2021-04-11 has been merged into torvalds/linux.git: https://git.kernel.org/torvalds/c/add6b92660b3dca65465d3bd7710b4b1338f34f0 Thank you! -- Deet-doot-dot, I am a bot. https://korg.docs.kernel.org/prtracker.html
[GIT PULL] locking fixes
Linus, Please pull the latest locking/urgent git tree from: git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git locking-urgent-2021-04-11 # HEAD: 6d48b7912cc72275dc7c59ff961c8bac7ef66a92 lockdep: Address clang -Wformat warning printing for %hd Two minor fixes: one for a Clang warning, the other improves an ambiguous/confusing kernel log message. Thanks, Ingo --> Arnd Bergmann (1): lockdep: Address clang -Wformat warning printing for %hd Tetsuo Handa (1): lockdep: Add a missing initialization hint to the "INFO: Trying to register non-static key" message kernel/locking/lockdep.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index c6d0c1dc6253..f160f1c97ca1 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -705,7 +705,7 @@ static void print_lock_name(struct lock_class *class) printk(KERN_CONT " ("); __print_lock_name(class); - printk(KERN_CONT "){%s}-{%hd:%hd}", usage, + printk(KERN_CONT "){%s}-{%d:%d}", usage, class->wait_type_outer ?: class->wait_type_inner, class->wait_type_inner); } @@ -930,7 +930,8 @@ static bool assign_lock_key(struct lockdep_map *lock) /* Debug-check: all keys must be persistent! */ debug_locks_off(); pr_err("INFO: trying to register non-static key.\n"); - pr_err("the code is fine but needs lockdep annotation.\n"); + pr_err("The code is fine but needs lockdep annotation, or maybe\n"); + pr_err("you didn't initialize this object before use?\n"); pr_err("turning off the locking correctness validator.\n"); dump_stack(); return false;
Re: [GIT PULL] locking fixes
The pull request you sent on Sun, 21 Mar 2021 11:53:43 +0100: > git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git > locking-urgent-2021-03-21 has been merged into torvalds/linux.git: https://git.kernel.org/torvalds/c/5ba33b488a04a13268131b0b4748a7c6f3598693 Thank you! -- Deet-doot-dot, I am a bot. https://korg.docs.kernel.org/prtracker.html
[GIT PULL] locking fixes
Linus, Please pull the latest locking/urgent git tree from: git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git locking-urgent-2021-03-21 # HEAD: 38c93587375053c5b9ef093f4a5ea754538cba32 static_call: Fix static_call_update() sanity check Locking fixes: - Get static calls & modules right. Hopefully. - WW mutex fixes Thanks, Ingo --> Peter Zijlstra (3): static_call: Fix static_call_set_init() static_call: Align static_call_is_init() patching condition static_call: Fix static_call_update() sanity check Waiman Long (2): locking/ww_mutex: Simplify use_ww_ctx & ww_ctx handling locking/ww_mutex: Fix acquire/release imbalance in ww_acquire_init()/ww_acquire_fini() include/linux/ww_mutex.h | 5 +++-- kernel/jump_label.c | 8 kernel/locking/mutex.c | 25 ++--- kernel/static_call.c | 42 -- 4 files changed, 49 insertions(+), 31 deletions(-) diff --git a/include/linux/ww_mutex.h b/include/linux/ww_mutex.h index 850424e5d030..6ecf2a0220db 100644 --- a/include/linux/ww_mutex.h +++ b/include/linux/ww_mutex.h @@ -173,9 +173,10 @@ static inline void ww_acquire_done(struct ww_acquire_ctx *ctx) */ static inline void ww_acquire_fini(struct ww_acquire_ctx *ctx) { -#ifdef CONFIG_DEBUG_MUTEXES +#ifdef CONFIG_DEBUG_LOCK_ALLOC mutex_release(&ctx->dep_map, _THIS_IP_); - +#endif +#ifdef CONFIG_DEBUG_MUTEXES DEBUG_LOCKS_WARN_ON(ctx->acquired); if (!IS_ENABLED(CONFIG_PROVE_LOCKING)) /* diff --git a/kernel/jump_label.c b/kernel/jump_label.c index c6a39d662935..ba39fbb1f8e7 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c @@ -407,6 +407,14 @@ static bool jump_label_can_update(struct jump_entry *entry, bool init) return false; if (!kernel_text_address(jump_entry_code(entry))) { + /* +* This skips patching built-in __exit, which +* is part of init_section_contains() but is +* not part of kernel_text_address(). +* +* Skipping built-in __exit is fine since it +* will never be executed. +*/ WARN_ONCE(!jump_entry_is_init(entry), "can't patch jump_label at %pS", (void *)jump_entry_code(entry)); diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index adb935090768..622ebdfcd083 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -626,7 +626,7 @@ static inline int mutex_can_spin_on_owner(struct mutex *lock) */ static __always_inline bool mutex_optimistic_spin(struct mutex *lock, struct ww_acquire_ctx *ww_ctx, - const bool use_ww_ctx, struct mutex_waiter *waiter) + struct mutex_waiter *waiter) { if (!waiter) { /* @@ -702,7 +702,7 @@ mutex_optimistic_spin(struct mutex *lock, struct ww_acquire_ctx *ww_ctx, #else static __always_inline bool mutex_optimistic_spin(struct mutex *lock, struct ww_acquire_ctx *ww_ctx, - const bool use_ww_ctx, struct mutex_waiter *waiter) + struct mutex_waiter *waiter) { return false; } @@ -922,6 +922,9 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, struct ww_mutex *ww; int ret; + if (!use_ww_ctx) + ww_ctx = NULL; + might_sleep(); #ifdef CONFIG_DEBUG_MUTEXES @@ -929,7 +932,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, #endif ww = container_of(lock, struct ww_mutex, base); - if (use_ww_ctx && ww_ctx) { + if (ww_ctx) { if (unlikely(ww_ctx == READ_ONCE(ww->ctx))) return -EALREADY; @@ -946,10 +949,10 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, mutex_acquire_nest(&lock->dep_map, subclass, 0, nest_lock, ip); if (__mutex_trylock(lock) || - mutex_optimistic_spin(lock, ww_ctx, use_ww_ctx, NULL)) { + mutex_optimistic_spin(lock, ww_ctx, NULL)) { /* got the lock, yay! */ lock_acquired(&lock->dep_map, ip); - if (use_ww_ctx && ww_ctx) + if (ww_ctx) ww_mutex_set_context_fastpath(ww, ww_ctx); preempt_enable(); return 0; @@ -960,7 +963,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, * After waiting to acquire the wait_lock, try again. */ if (__mutex_trylock(lock)) { - if (use_ww_ctx && ww_ctx) + if (ww_ctx) __ww_mutex_check_waiters(lock, ww_ctx); goto skip_wait; @@ -1013,7 +1016,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
Re: [GIT PULL] locking fixes
The pull request you sent on Sun, 27 Dec 2020 10:50:44 +0100: > git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git > locking-urgent-2020-12-27 has been merged into torvalds/linux.git: https://git.kernel.org/torvalds/c/6be5f58215f1dcbd697a695ad5db9986c28c50c3 Thank you! -- Deet-doot-dot, I am a bot. https://korg.docs.kernel.org/prtracker.html
[GIT PULL] locking fixes
Linus, Please pull the latest locking/urgent git tree from: git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git locking-urgent-2020-12-27 # HEAD: 91ea62d58bd661827c328a2c6c02a87fa4aae88b softirq: Avoid bad tracing / lockdep interaction Misc fixes/updates: - Fix static keys usage in module __init sections - Add separate MAINTAINERS entry for static branches/calls - Fix lockdep splat with CONFIG_PREEMPTIRQ_EVENTS=y tracing Thanks, Ingo --> Peter Zijlstra (3): jump_label: Fix usage in module __init jump_label/static_call: Add MAINTAINERS softirq: Avoid bad tracing / lockdep interaction MAINTAINERS | 16 kernel/jump_label.c | 8 +--- kernel/softirq.c| 2 +- 3 files changed, 22 insertions(+), 4 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 281de213ef47..be02614ad28d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16675,6 +16675,22 @@ M: Ion Badulescu S: Odd Fixes F: drivers/net/ethernet/adaptec/starfire* +STATIC BRANCH/CALL +M: Peter Zijlstra +M: Josh Poimboeuf +M: Jason Baron +R: Steven Rostedt +R: Ard Biesheuvel +S: Supported +F: arch/*/include/asm/jump_label*.h +F: arch/*/include/asm/static_call*.h +F: arch/*/kernel/jump_label.c +F: arch/*/kernel/static_call.c +F: include/linux/jump_label*.h +F: include/linux/static_call*.h +F: kernel/jump_label.c +F: kernel/static_call.c + STEC S1220 SKD DRIVER M: Damien Le Moal L: linux-bl...@vger.kernel.org diff --git a/kernel/jump_label.c b/kernel/jump_label.c index 015ef903ce8c..c6a39d662935 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c @@ -793,6 +793,7 @@ int jump_label_text_reserved(void *start, void *end) static void jump_label_update(struct static_key *key) { struct jump_entry *stop = __stop___jump_table; + bool init = system_state < SYSTEM_RUNNING; struct jump_entry *entry; #ifdef CONFIG_MODULES struct module *mod; @@ -804,15 +805,16 @@ static void jump_label_update(struct static_key *key) preempt_disable(); mod = __module_address((unsigned long)key); - if (mod) + if (mod) { stop = mod->jump_entries + mod->num_jump_entries; + init = mod->state == MODULE_STATE_COMING; + } preempt_enable(); #endif entry = static_key_entries(key); /* if there are no users, entry can be NULL */ if (entry) - __jump_label_update(key, entry, stop, - system_state < SYSTEM_RUNNING); + __jump_label_update(key, entry, stop, init); } #ifdef CONFIG_STATIC_KEYS_SELFTEST diff --git a/kernel/softirq.c b/kernel/softirq.c index 09229ad82209..0f1d3a32d53b 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -185,7 +185,7 @@ void __local_bh_enable_ip(unsigned long ip, unsigned int cnt) * Keep preemption disabled until we are done with * softirq processing: */ - preempt_count_sub(cnt - 1); + __preempt_count_sub(cnt - 1); if (unlikely(!in_interrupt() && local_softirq_pending())) { /*
Re: [GIT PULL] locking fixes
The pull request you sent on Sat, 15 Aug 2020 13:13:23 +0200: > git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git > locking-urgent-2020-08-15 has been merged into torvalds/linux.git: https://git.kernel.org/torvalds/c/eb1319af4119c4936e02c879295e9fd4d50bbe3a Thank you! -- Deet-doot-dot, I am a bot. https://korg.docs.kernel.org/prtracker.html
[GIT PULL] locking fixes
Linus, Please pull the latest locking/urgent git tree from: git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git locking-urgent-2020-08-15 # HEAD: 405fa8ac89e7aaa87282df659e525992f2639e76 futex: Convert to use the preferred 'fallthrough' macro A documentation fix and a 'fallthrough' macro update. Thanks, Ingo --> Huang Shijie (1): Documentation/locking/locktypes: Fix a typo Miaohe Lin (1): futex: Convert to use the preferred 'fallthrough' macro Documentation/locking/locktypes.rst | 2 +- kernel/futex.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Documentation/locking/locktypes.rst b/Documentation/locking/locktypes.rst index 1b577a8bf982..4cefed8048ca 100644 --- a/Documentation/locking/locktypes.rst +++ b/Documentation/locking/locktypes.rst @@ -10,7 +10,7 @@ Introduction The kernel provides a variety of locking primitives which can be divided -into two categories: +into three categories: - Sleeping locks - CPU local locks diff --git a/kernel/futex.c b/kernel/futex.c index 61e8153e6c76..a5876694a60e 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -3744,12 +3744,12 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, switch (cmd) { case FUTEX_WAIT: val3 = FUTEX_BITSET_MATCH_ANY; - /* fall through */ + fallthrough; case FUTEX_WAIT_BITSET: return futex_wait(uaddr, flags, val, timeout, val3); case FUTEX_WAKE: val3 = FUTEX_BITSET_MATCH_ANY; - /* fall through */ + fallthrough; case FUTEX_WAKE_BITSET: return futex_wake(uaddr, flags, val, val3); case FUTEX_REQUEUE:
Re: [GIT PULL] locking fixes
* Linus Torvalds wrote: > On Sat, Apr 20, 2019 at 12:30 AM Ingo Molnar wrote: > > > > A lockdep warning fix and a script execution fix when atomics are > > generated. > > Hmm. I've pulled this, but looking at it, I think it's worth noting > something... > > > diff --git a/scripts/atomic/gen-atomics.sh b/scripts/atomic/gen-atomics.sh > > index 27400b0cd732..000dc6437893 100644 > > --- a/scripts/atomic/gen-atomics.sh > > +++ b/scripts/atomic/gen-atomics.sh > > - ${ATOMICDIR}/${script} ${ATOMICTBL} > ${LINUXDIR}/include/${header} > > + /bin/sh ${ATOMICDIR}/${script} ${ATOMICTBL} > > > ${LINUXDIR}/include/${header} > > /bin/sh ? > > Yes, that's what the hash-bang line says in the scripts themselves, > and thus what we used to do with the whole direct execution thing, so > it's clearly not _wrong_, but every single time when we manually do > the "run with shell" normally, we use $(CONFIG_SHELL)". > > So I get the feeling that we should likely do that here too. > > Of course, the gen-atomics script is (now) outside the normal build, > so maybe people just go "this is special, doesn't go through the > normal build process anyway, and thus might as well not follow the > common rules". Yeah, agreed that this is all a bit weird. The status quo right now is: - scripts/atomic/gen-atomics.sh is a completely standalone, external script which isn't even tied into any Makefile mechanism to build the kernel. - To generate the headers one has to explicitly call scripts/atomic/gen-atomics.sh, and it's not even executable, so the incantation is even more weird: $ . scripts/atomic/gen-atomics.sh So I agree that the UI of all this should be improved, I suspect we should do the following improvements: - make gen-atomics.sh executable - add a "make headers_gen_atomics" target to the main Makefile - call gen-atomics.sh via the build system and thus have access to $CONFIG_SHELL and such and don't have assumptions about the shell environment. Arguably /bin/sh tends to exist during the build, everywhere. What usually results in the use of CONFIG_SHELL isn't /bin/sh per se but specific shell variant assumptions such as /bin/bash and the resulting occasional Bashism in the scripts - there are systems with non-bash shells by default and so. Thanks, Ingo
Re: [GIT PULL] locking fixes
The pull request you sent on Sat, 20 Apr 2019 09:30:18 +0200: > git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git > locking-urgent-for-linus has been merged into torvalds/linux.git: https://git.kernel.org/torvalds/c/de3af9a99013fdb0358bb990e9bed0172c65bba9 Thank you! -- Deet-doot-dot, I am a bot. https://korg.wiki.kernel.org/userdoc/prtracker
Re: [GIT PULL] locking fixes
On Sat, Apr 20, 2019 at 12:30 AM Ingo Molnar wrote: > > A lockdep warning fix and a script execution fix when atomics are > generated. Hmm. I've pulled this, but looking at it, I think it's worth noting something... > diff --git a/scripts/atomic/gen-atomics.sh b/scripts/atomic/gen-atomics.sh > index 27400b0cd732..000dc6437893 100644 > --- a/scripts/atomic/gen-atomics.sh > +++ b/scripts/atomic/gen-atomics.sh > - ${ATOMICDIR}/${script} ${ATOMICTBL} > ${LINUXDIR}/include/${header} > + /bin/sh ${ATOMICDIR}/${script} ${ATOMICTBL} > > ${LINUXDIR}/include/${header} /bin/sh ? Yes, that's what the hash-bang line says in the scripts themselves, and thus what we used to do with the whole direct execution thing, so it's clearly not _wrong_, but every single time when we manually do the "run with shell" normally, we use $(CONFIG_SHELL)". So I get the feeling that we should likely do that here too. Of course, the gen-atomics script is (now) outside the normal build, so maybe people just go "this is special, doesn't go through the normal build process anyway, and thus might as well not follow the common rules". Linus
[GIT PULL] locking fixes
Linus, Please pull the latest locking-urgent-for-linus git tree from: git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git locking-urgent-for-linus # HEAD: b50776ae011cfd26df3cc2b4af8b2dc3b683e553 locking/atomics: Don't assume that scripts are executable A lockdep warning fix and a script execution fix when atomics are generated. Thanks, Ingo --> Andrew Morton (1): locking/atomics: Don't assume that scripts are executable Bart Van Assche (1): locking/lockdep: Make lockdep_unregister_key() honor 'debug_locks' again kernel/locking/lockdep.c | 9 + scripts/atomic/gen-atomics.sh | 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index e16766ff184b..e221be724fe8 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -4907,8 +4907,9 @@ void lockdep_unregister_key(struct lock_class_key *key) return; raw_local_irq_save(flags); - arch_spin_lock(&lockdep_lock); - current->lockdep_recursion = 1; + if (!graph_lock()) + goto out_irq; + pf = get_pending_free(); hlist_for_each_entry_rcu(k, hash_head, hash_entry) { if (k == key) { @@ -4920,8 +4921,8 @@ void lockdep_unregister_key(struct lock_class_key *key) WARN_ON_ONCE(!found); __lockdep_free_key_range(pf, key, 1); call_rcu_zapped(pf); - current->lockdep_recursion = 0; - arch_spin_unlock(&lockdep_lock); + graph_unlock(); +out_irq: raw_local_irq_restore(flags); /* Wait until is_dynamic_key() has finished accessing k->hash_entry. */ diff --git a/scripts/atomic/gen-atomics.sh b/scripts/atomic/gen-atomics.sh index 27400b0cd732..000dc6437893 100644 --- a/scripts/atomic/gen-atomics.sh +++ b/scripts/atomic/gen-atomics.sh @@ -13,7 +13,7 @@ gen-atomic-long.sh asm-generic/atomic-long.h gen-atomic-fallback.sh linux/atomic-fallback.h EOF while read script header; do - ${ATOMICDIR}/${script} ${ATOMICTBL} > ${LINUXDIR}/include/${header} + /bin/sh ${ATOMICDIR}/${script} ${ATOMICTBL} > ${LINUXDIR}/include/${header} HASH="$(sha1sum ${LINUXDIR}/include/${header})" HASH="${HASH%% *}" printf "// %s\n" "${HASH}" >> ${LINUXDIR}/include/${header}
Re: [GIT pull] locking fixes for 5.1
The pull request you sent on Sun, 10 Mar 2019 12:33:47 +0100: > git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git > locking-urgent-for-linus has been merged into torvalds/linux.git: https://git.kernel.org/torvalds/c/9e55f87c0e3b3db11f52834222f881094eb97205 Thank you! -- Deet-doot-dot, I am a bot. https://korg.wiki.kernel.org/userdoc/prtracker
[GIT pull] locking fixes for 5.1
Linus, please pull the latest locking-urgent-for-linus git tree from: git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git locking-urgent-for-linus A few fixes for lockdep: - Initialize lockdep internal RCU head after initializing RCU - Prevent use after free in a alloc_workqueue() error handling path - Plug a memory leak in the workqueue core which fails to free a dynamically allocated lock name. - Make Clang happy Thanks, tglx --> Arnd Bergmann (1): locking/lockdep: Avoid a Clang warning Bart Van Assche (2): locking/lockdep: Only call init_rcu_head() after RCU has been initialized workqueue, lockdep: Fix an alloc_workqueue() error path Qian Cai (1): workqueue, lockdep: Fix a memory leak in wq->lock_name kernel/locking/lockdep.c | 19 ++- kernel/workqueue.c | 4 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 21cb81fe6359..34cdcbedda49 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -842,7 +842,9 @@ static bool class_lock_list_valid(struct lock_class *c, struct list_head *h) return true; } -static u16 chain_hlocks[]; +#ifdef CONFIG_PROVE_LOCKING +static u16 chain_hlocks[MAX_LOCKDEP_CHAIN_HLOCKS]; +#endif static bool check_lock_chain_key(struct lock_chain *chain) { @@ -980,15 +982,22 @@ static inline void check_data_structures(void) { } */ static void init_data_structures_once(void) { - static bool initialization_happened; + static bool ds_initialized, rcu_head_initialized; int i; - if (likely(initialization_happened)) + if (likely(rcu_head_initialized)) + return; + + if (system_state >= SYSTEM_SCHEDULING) { + init_rcu_head(&delayed_free.rcu_head); + rcu_head_initialized = true; + } + + if (ds_initialized) return; - initialization_happened = true; + ds_initialized = true; - init_rcu_head(&delayed_free.rcu_head); INIT_LIST_HEAD(&delayed_free.pf[0].zapped); INIT_LIST_HEAD(&delayed_free.pf[1].zapped); diff --git a/kernel/workqueue.c b/kernel/workqueue.c index e163e7a7f5e5..e14da4f599ab 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -3348,6 +3348,8 @@ static void wq_init_lockdep(struct workqueue_struct *wq) lock_name = kasprintf(GFP_KERNEL, "%s%s", "(wq_completion)", wq->name); if (!lock_name) lock_name = wq->name; + + wq->lock_name = lock_name; lockdep_init_map(&wq->lockdep_map, lock_name, &wq->key, 0); } @@ -4194,6 +4196,8 @@ struct workqueue_struct *alloc_workqueue(const char *fmt, return wq; err_free_wq: + wq_unregister_lockdep(wq); + wq_free_lockdep(wq); free_workqueue_attrs(wq->unbound_attrs); kfree(wq); return NULL;
Re: [GIT PULL] locking fixes
The pull request you sent on Sun, 10 Feb 2019 09:53:31 +0100: > git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git > locking-urgent-for-linus has been merged into torvalds/linux.git: https://git.kernel.org/torvalds/c/d2a6aae99f5fa2f1e7e400bd2f17c1d42c50312a Thank you! -- Deet-doot-dot, I am a bot. https://korg.wiki.kernel.org/userdoc/prtracker
[GIT PULL] locking fixes
Linus, Please pull the latest locking-urgent-for-linus git tree from: git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git locking-urgent-for-linus # HEAD: 1a1fb985f2e2b85ec0d3dc2e519ee48389ec2434 futex: Handle early deadlock return correctly An rtmutex (PI-futex) deadlock scenario fix, plus a locking documentation fix. Thanks, Ingo --> Davidlohr Bueso (1): futex: Fix barrier comment Thomas Gleixner (1): futex: Handle early deadlock return correctly kernel/futex.c | 32 kernel/locking/rtmutex.c | 37 - 2 files changed, 52 insertions(+), 17 deletions(-) diff --git a/kernel/futex.c b/kernel/futex.c index fdd312da0992..a0514e01c3eb 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -2221,11 +2221,11 @@ static inline struct futex_hash_bucket *queue_lock(struct futex_q *q) * decrement the counter at queue_unlock() when some error has * occurred and we don't end up adding the task to the list. */ - hb_waiters_inc(hb); + hb_waiters_inc(hb); /* implies smp_mb(); (A) */ q->lock_ptr = &hb->lock; - spin_lock(&hb->lock); /* implies smp_mb(); (A) */ + spin_lock(&hb->lock); return hb; } @@ -2861,35 +2861,39 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags, * and BUG when futex_unlock_pi() interleaves with this. * * Therefore acquire wait_lock while holding hb->lock, but drop the -* latter before calling rt_mutex_start_proxy_lock(). This still fully -* serializes against futex_unlock_pi() as that does the exact same -* lock handoff sequence. +* latter before calling __rt_mutex_start_proxy_lock(). This +* interleaves with futex_unlock_pi() -- which does a similar lock +* handoff -- such that the latter can observe the futex_q::pi_state +* before __rt_mutex_start_proxy_lock() is done. */ raw_spin_lock_irq(&q.pi_state->pi_mutex.wait_lock); spin_unlock(q.lock_ptr); + /* +* __rt_mutex_start_proxy_lock() unconditionally enqueues the @rt_waiter +* such that futex_unlock_pi() is guaranteed to observe the waiter when +* it sees the futex_q::pi_state. +*/ ret = __rt_mutex_start_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter, current); raw_spin_unlock_irq(&q.pi_state->pi_mutex.wait_lock); if (ret) { if (ret == 1) ret = 0; - - spin_lock(q.lock_ptr); - goto no_block; + goto cleanup; } - if (unlikely(to)) hrtimer_start_expires(&to->timer, HRTIMER_MODE_ABS); ret = rt_mutex_wait_proxy_lock(&q.pi_state->pi_mutex, to, &rt_waiter); +cleanup: spin_lock(q.lock_ptr); /* -* If we failed to acquire the lock (signal/timeout), we must +* If we failed to acquire the lock (deadlock/signal/timeout), we must * first acquire the hb->lock before removing the lock from the -* rt_mutex waitqueue, such that we can keep the hb and rt_mutex -* wait lists consistent. +* rt_mutex waitqueue, such that we can keep the hb and rt_mutex wait +* lists consistent. * * In particular; it is important that futex_unlock_pi() can not * observe this inconsistency. @@ -3013,6 +3017,10 @@ static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags) * there is no point where we hold neither; and therefore * wake_futex_pi() must observe a state consistent with what we * observed. +* +* In particular; this forces __rt_mutex_start_proxy() to +* complete such that we're guaranteed to observe the +* rt_waiter. Also see the WARN in wake_futex_pi(). */ raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); spin_unlock(&hb->lock); diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index 581edcc63c26..978d63a8261c 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -1726,12 +1726,33 @@ void rt_mutex_proxy_unlock(struct rt_mutex *lock, rt_mutex_set_owner(lock, NULL); } +/** + * __rt_mutex_start_proxy_lock() - Start lock acquisition for another task + * @lock: the rt_mutex to take + * @waiter:the pre-initialized rt_mutex_waiter + * @task: the task to prepare + * + * Starts the rt_mutex acquire; it enqueues the @waiter and does deadlock + * detection. It does not wait, see rt_mutex_wait_proxy_lock() for that. + * + * NOTE: does _NOT_ remove the @waiter on failure; must either call + * rt_mutex_wait_proxy_lock() or rt_mutex_cleanup_proxy_lock() after this. + * + * Returns: + * 0 - task blocked on lock + * 1 - acquired t
Re: [GIT pull] locking fixes for 4.20
The pull request you sent on Sun, 11 Nov 2018 09:11:07 +0100: > git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git > locking-urgent-for-linus has been merged into torvalds/linux.git: https://git.kernel.org/torvalds/c/1acf93ca6c53144f5ffd408f1820ef5431656eb7 Thank you! -- Deet-doot-dot, I am a bot. https://korg.wiki.kernel.org/userdoc/prtracker
[GIT pull] locking fixes for 4.20
Linus, please pull the latest locking-urgent-for-linus git tree from: git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git locking-urgent-for-linus A single fix for a build fail with CONFIG_PROFILE_ALL_BRANCHES=y in the qspinlock code. Thanks, tglx --> Peter Zijlstra (1): x86/qspinlock: Fix compile error arch/x86/include/asm/qspinlock.h | 13 - 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h index 87623c6b13db..bd5ac6cc37db 100644 --- a/arch/x86/include/asm/qspinlock.h +++ b/arch/x86/include/asm/qspinlock.h @@ -13,12 +13,15 @@ #define queued_fetch_set_pending_acquire queued_fetch_set_pending_acquire static __always_inline u32 queued_fetch_set_pending_acquire(struct qspinlock *lock) { - u32 val = 0; - - if (GEN_BINARY_RMWcc(LOCK_PREFIX "btsl", lock->val.counter, c, -"I", _Q_PENDING_OFFSET)) - val |= _Q_PENDING_VAL; + u32 val; + /* +* We can't use GEN_BINARY_RMWcc() inside an if() stmt because asm goto +* and CONFIG_PROFILE_ALL_BRANCHES=y results in a label inside a +* statement expression, which GCC doesn't like. +*/ + val = GEN_BINARY_RMWcc(LOCK_PREFIX "btsl", lock->val.counter, c, + "I", _Q_PENDING_OFFSET) * _Q_PENDING_VAL; val |= atomic_read(&lock->val) & ~_Q_PENDING_MASK; return val;
Re: [GIT PULL] locking fixes
On Fri, Oct 05, 2018 at 11:36:47AM +0200, Ingo Molnar wrote: > Greg, > > Please pull the latest locking-urgent-for-linus git tree from: > >git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git > locking-urgent-for-linus Now merged, thanks. greg k-h
[GIT PULL] locking fixes
Greg, Please pull the latest locking-urgent-for-linus git tree from: git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git locking-urgent-for-linus # HEAD: e4a02ed2aaf447fa849e3254bfdb3b9b01e1e520 locking/ww_mutex: Fix runtime warning in the WW mutex selftest A fix in the ww_mutex self-test that produces a scary splat, plus an updates to the maintained-filed patters in MAINTAINER. Thanks, Ingo --> Guenter Roeck (1): locking/ww_mutex: Fix runtime warning in the WW mutex selftest Will Deacon (1): MAINTAINERS: Remove dead path from LOCKING PRIMITIVES entry MAINTAINERS| 1 - kernel/locking/test-ww_mutex.c | 10 ++ 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index a255240d1452..3bd8913b2d78 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8598,7 +8598,6 @@ F:include/linux/spinlock*.h F: arch/*/include/asm/spinlock*.h F: include/linux/rwlock*.h F: include/linux/mutex*.h -F: arch/*/include/asm/mutex*.h F: include/linux/rwsem*.h F: arch/*/include/asm/rwsem.h F: include/linux/seqlock.h diff --git a/kernel/locking/test-ww_mutex.c b/kernel/locking/test-ww_mutex.c index 0be047dbd897..65a3b7e55b9f 100644 --- a/kernel/locking/test-ww_mutex.c +++ b/kernel/locking/test-ww_mutex.c @@ -260,7 +260,7 @@ static void test_cycle_work(struct work_struct *work) { struct test_cycle *cycle = container_of(work, typeof(*cycle), work); struct ww_acquire_ctx ctx; - int err; + int err, erra = 0; ww_acquire_init(&ctx, &ww_class); ww_mutex_lock(&cycle->a_mutex, &ctx); @@ -270,17 +270,19 @@ static void test_cycle_work(struct work_struct *work) err = ww_mutex_lock(cycle->b_mutex, &ctx); if (err == -EDEADLK) { + err = 0; ww_mutex_unlock(&cycle->a_mutex); ww_mutex_lock_slow(cycle->b_mutex, &ctx); - err = ww_mutex_lock(&cycle->a_mutex, &ctx); + erra = ww_mutex_lock(&cycle->a_mutex, &ctx); } if (!err) ww_mutex_unlock(cycle->b_mutex); - ww_mutex_unlock(&cycle->a_mutex); + if (!erra) + ww_mutex_unlock(&cycle->a_mutex); ww_acquire_fini(&ctx); - cycle->result = err; + cycle->result = err ?: erra; } static int __test_cycle(unsigned int nthreads)
[GIT PULL] locking fixes
Linus, Please pull the latest locking-urgent-for-linus git tree from: git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git locking-urgent-for-linus # HEAD: 0b405c65ad459f5f4d3db1672246172bd19d946d locking/ww_mutex: Fix spelling mistake "cylic" -> "cyclic" Misc fixes: liblockdep fixes and ww_mutex fixes. Thanks, Ingo --> Ben Hutchings (1): locking/lockdep: Delete unnecessary #include Borislav Petkov (1): jump_label: Fix typo in warning message Colin Ian King (1): locking/ww_mutex: Fix spelling mistake "cylic" -> "cyclic" Sasha Levin (3): tools/lib/lockdep: Update Sasha Levin email to MSFT tools/lib/lockdep: Add empty nmi.h tools/lib/lockdep: Add dummy task_struct state member Thomas Hellstrom (1): locking/mutex: Fix mutex debug call and ww_mutex documentation MAINTAINERS| 2 +- kernel/jump_label.c| 2 +- kernel/locking/lockdep.c | 1 - kernel/locking/mutex.c | 3 +-- kernel/locking/test-ww_mutex.c | 2 +- tools/include/linux/lockdep.h | 3 +++ tools/include/linux/nmi.h | 0 7 files changed, 7 insertions(+), 6 deletions(-) create mode 100644 tools/include/linux/nmi.h diff --git a/MAINTAINERS b/MAINTAINERS index d870cb57c887..f999786cfa90 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8300,7 +8300,7 @@ F:include/linux/libata.h F: Documentation/devicetree/bindings/ata/ LIBLOCKDEP -M: Sasha Levin +M: Sasha Levin S: Maintained F: tools/lib/lockdep/ diff --git a/kernel/jump_label.c b/kernel/jump_label.c index 01ebdf1f9f40..2e62503bea0d 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c @@ -678,7 +678,7 @@ jump_label_module_notify(struct notifier_block *self, unsigned long val, case MODULE_STATE_COMING: ret = jump_label_add_module(mod); if (ret) { - WARN(1, "Failed to allocatote memory: jump_label may not work properly.\n"); + WARN(1, "Failed to allocate memory: jump_label may not work properly.\n"); jump_label_del_module(mod); } break; diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index e406c5fdb41e..dd13f865ad40 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -55,7 +55,6 @@ #include "lockdep_internals.h" -#include #define CREATE_TRACE_POINTS #include diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index 1a81a1257b3f..3f8a35104285 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -389,7 +389,7 @@ static bool __ww_mutex_wound(struct mutex *lock, /* * wake_up_process() paired with set_current_state() * inserts sufficient barriers to make sure @owner either sees -* it's wounded in __ww_mutex_lock_check_stamp() or has a +* it's wounded in __ww_mutex_check_kill() or has a * wakeup pending to re-read the wounded state. */ if (owner != current) @@ -946,7 +946,6 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, } debug_mutex_lock_common(lock, &waiter); - debug_mutex_add_waiter(lock, &waiter, current); lock_contended(&lock->dep_map, ip); diff --git a/kernel/locking/test-ww_mutex.c b/kernel/locking/test-ww_mutex.c index 5b915b370d5a..0be047dbd897 100644 --- a/kernel/locking/test-ww_mutex.c +++ b/kernel/locking/test-ww_mutex.c @@ -324,7 +324,7 @@ static int __test_cycle(unsigned int nthreads) if (!cycle->result) continue; - pr_err("cylic deadlock not resolved, ret[%d/%d] = %d\n", + pr_err("cyclic deadlock not resolved, ret[%d/%d] = %d\n", n, nthreads, cycle->result); ret = -EINVAL; break; diff --git a/tools/include/linux/lockdep.h b/tools/include/linux/lockdep.h index 6b0c36a58fcb..e56997288f2b 100644 --- a/tools/include/linux/lockdep.h +++ b/tools/include/linux/lockdep.h @@ -30,9 +30,12 @@ struct task_struct { struct held_lock held_locks[MAX_LOCK_DEPTH]; gfp_t lockdep_reclaim_gfp; int pid; + int state; char comm[17]; }; +#define TASK_RUNNING 0 + extern struct task_struct *__curr(void); #define current (__curr()) diff --git a/tools/include/linux/nmi.h b/tools/include/linux/nmi.h new file mode 100644 index ..e69de29bb2d1
[GIT PULL] locking fixes
Linus, Please pull the latest locking-urgent-for-linus git tree from: git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git locking-urgent-for-linus # HEAD: c0dc373a780f4ec63e45a573b9551763abd8cd1a locking/pvqspinlock/x86: Use LOCK_PREFIX in __pv_queued_spin_unlock() assembly code A paravirt UP-patching fix, and an I2C MUX driver lockdep warning fix. Thanks, Ingo --> Peter Rosin (2): locking/rtmutex: Allow specifying a subclass for nested locking i2c/mux, locking/core: Annotate the nested rt_mutex usage Waiman Long (1): locking/pvqspinlock/x86: Use LOCK_PREFIX in __pv_queued_spin_unlock() assembly code arch/x86/include/asm/qspinlock_paravirt.h | 2 +- drivers/i2c/i2c-core-base.c | 2 +- drivers/i2c/i2c-mux.c | 4 ++-- include/linux/rtmutex.h | 7 +++ kernel/locking/rtmutex.c | 29 + 5 files changed, 36 insertions(+), 8 deletions(-) diff --git a/arch/x86/include/asm/qspinlock_paravirt.h b/arch/x86/include/asm/qspinlock_paravirt.h index 9ef5ee03d2d7..159622ee0674 100644 --- a/arch/x86/include/asm/qspinlock_paravirt.h +++ b/arch/x86/include/asm/qspinlock_paravirt.h @@ -43,7 +43,7 @@ asm(".pushsection .text;" "push %rdx;" "mov $0x1,%eax;" "xor %edx,%edx;" - "lock cmpxchg %dl,(%rdi);" + LOCK_PREFIX "cmpxchg %dl,(%rdi);" "cmp $0x1,%al;" "jne .slowpath;" "pop %rdx;" diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c index 301285c54603..15c95aaa484c 100644 --- a/drivers/i2c/i2c-core-base.c +++ b/drivers/i2c/i2c-core-base.c @@ -624,7 +624,7 @@ static int i2c_check_addr_busy(struct i2c_adapter *adapter, int addr) static void i2c_adapter_lock_bus(struct i2c_adapter *adapter, unsigned int flags) { - rt_mutex_lock(&adapter->bus_lock); + rt_mutex_lock_nested(&adapter->bus_lock, i2c_adapter_depth(adapter)); } /** diff --git a/drivers/i2c/i2c-mux.c b/drivers/i2c/i2c-mux.c index 300ab4b672e4..29646aa6132e 100644 --- a/drivers/i2c/i2c-mux.c +++ b/drivers/i2c/i2c-mux.c @@ -144,7 +144,7 @@ static void i2c_mux_lock_bus(struct i2c_adapter *adapter, unsigned int flags) struct i2c_mux_priv *priv = adapter->algo_data; struct i2c_adapter *parent = priv->muxc->parent; - rt_mutex_lock(&parent->mux_lock); + rt_mutex_lock_nested(&parent->mux_lock, i2c_adapter_depth(adapter)); if (!(flags & I2C_LOCK_ROOT_ADAPTER)) return; i2c_lock_bus(parent, flags); @@ -181,7 +181,7 @@ static void i2c_parent_lock_bus(struct i2c_adapter *adapter, struct i2c_mux_priv *priv = adapter->algo_data; struct i2c_adapter *parent = priv->muxc->parent; - rt_mutex_lock(&parent->mux_lock); + rt_mutex_lock_nested(&parent->mux_lock, i2c_adapter_depth(adapter)); i2c_lock_bus(parent, flags); } diff --git a/include/linux/rtmutex.h b/include/linux/rtmutex.h index 1b92a28dd672..6fd615a0eea9 100644 --- a/include/linux/rtmutex.h +++ b/include/linux/rtmutex.h @@ -106,7 +106,14 @@ static inline int rt_mutex_is_locked(struct rt_mutex *lock) extern void __rt_mutex_init(struct rt_mutex *lock, const char *name, struct lock_class_key *key); extern void rt_mutex_destroy(struct rt_mutex *lock); +#ifdef CONFIG_DEBUG_LOCK_ALLOC +extern void rt_mutex_lock_nested(struct rt_mutex *lock, unsigned int subclass); +#define rt_mutex_lock(lock) rt_mutex_lock_nested(lock, 0) +#else extern void rt_mutex_lock(struct rt_mutex *lock); +#define rt_mutex_lock_nested(lock, subclass) rt_mutex_lock(lock) +#endif + extern int rt_mutex_lock_interruptible(struct rt_mutex *lock); extern int rt_mutex_timed_lock(struct rt_mutex *lock, struct hrtimer_sleeper *timeout); diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index 4f014be7a4b8..2823d4163a37 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -1465,6 +1465,29 @@ rt_mutex_fastunlock(struct rt_mutex *lock, rt_mutex_postunlock(&wake_q); } +static inline void __rt_mutex_lock(struct rt_mutex *lock, unsigned int subclass) +{ + might_sleep(); + + mutex_acquire(&lock->dep_map, subclass, 0, _RET_IP_); + rt_mutex_fastlock(lock, TASK_UNINTERRUPTIBLE, rt_mutex_slowlock); +} + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +/** + * rt_mutex_lock_nested - lock a rt_mutex + * + * @lock: the rt_mutex to be locked + * @subclass: the lockdep subclass + */ +void __sched rt_mutex_lock_nested(struct rt_mutex *lock, unsigned int subclass) +{ + __rt_mutex_lock(lock, subclass); +} +EXPORT_SYMBOL_GPL(rt_mutex_lock_nested); +#endif + +#ifndef CONFIG_DEBUG_LOCK_ALLOC /** * rt_mutex_lock - lock a rt_mutex * @@ -1472,12 +1495,10 @@ rt_mutex_fastunlock(struct rt_mutex *lock, */ void __sched rt_mutex_lock(struct rt_mutex *lock) { - might_sleep(); - -
[GIT pull] locking fixes for 4.17
Linus, please pull the latest locking-urgent-for-linus git tree from: git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git locking-urgent-for-linus Two fixes to address shortcomings of the rwsem/percpu-rwsem lock debugging code which emits false positive warnings when the rwsem is anonymously locked and unlocked. Thanks, tglx --> Waiman Long (2): locking/rwsem: Add a new RWSEM_ANONYMOUSLY_OWNED flag locking/percpu-rwsem: Annotate rwsem ownership transfer by setting RWSEM_OWNER_UNKNOWN include/linux/percpu-rwsem.h | 6 +- include/linux/rwsem.h| 6 ++ kernel/locking/rwsem-xadd.c | 19 +-- kernel/locking/rwsem.c | 2 -- kernel/locking/rwsem.h | 30 +- 5 files changed, 41 insertions(+), 22 deletions(-) diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h index b1f37a89e368..79b99d653e03 100644 --- a/include/linux/percpu-rwsem.h +++ b/include/linux/percpu-rwsem.h @@ -133,7 +133,7 @@ static inline void percpu_rwsem_release(struct percpu_rw_semaphore *sem, lock_release(&sem->rw_sem.dep_map, 1, ip); #ifdef CONFIG_RWSEM_SPIN_ON_OWNER if (!read) - sem->rw_sem.owner = NULL; + sem->rw_sem.owner = RWSEM_OWNER_UNKNOWN; #endif } @@ -141,6 +141,10 @@ static inline void percpu_rwsem_acquire(struct percpu_rw_semaphore *sem, bool read, unsigned long ip) { lock_acquire(&sem->rw_sem.dep_map, 0, 1, read, 1, NULL, ip); +#ifdef CONFIG_RWSEM_SPIN_ON_OWNER + if (!read) + sem->rw_sem.owner = current; +#endif } #endif diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h index 56707d5ff6ad..ab93b6eae696 100644 --- a/include/linux/rwsem.h +++ b/include/linux/rwsem.h @@ -44,6 +44,12 @@ struct rw_semaphore { #endif }; +/* + * Setting bit 0 of the owner field with other non-zero bits will indicate + * that the rwsem is writer-owned with an unknown owner. + */ +#define RWSEM_OWNER_UNKNOWN((struct task_struct *)-1L) + extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem); extern struct rw_semaphore *rwsem_down_read_failed_killable(struct rw_semaphore *sem); extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem); diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c index e795908f3607..a90336779375 100644 --- a/kernel/locking/rwsem-xadd.c +++ b/kernel/locking/rwsem-xadd.c @@ -352,16 +352,15 @@ static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem) struct task_struct *owner; bool ret = true; + BUILD_BUG_ON(!rwsem_has_anonymous_owner(RWSEM_OWNER_UNKNOWN)); + if (need_resched()) return false; rcu_read_lock(); owner = READ_ONCE(sem->owner); - if (!rwsem_owner_is_writer(owner)) { - /* -* Don't spin if the rwsem is readers owned. -*/ - ret = !rwsem_owner_is_reader(owner); + if (!owner || !is_rwsem_owner_spinnable(owner)) { + ret = !owner; /* !owner is spinnable */ goto done; } @@ -382,11 +381,11 @@ static noinline bool rwsem_spin_on_owner(struct rw_semaphore *sem) { struct task_struct *owner = READ_ONCE(sem->owner); - if (!rwsem_owner_is_writer(owner)) - goto out; + if (!is_rwsem_owner_spinnable(owner)) + return false; rcu_read_lock(); - while (sem->owner == owner) { + while (owner && (READ_ONCE(sem->owner) == owner)) { /* * Ensure we emit the owner->on_cpu, dereference _after_ * checking sem->owner still matches owner, if that fails, @@ -408,12 +407,12 @@ static noinline bool rwsem_spin_on_owner(struct rw_semaphore *sem) cpu_relax(); } rcu_read_unlock(); -out: + /* * If there is a new owner or the owner is not set, we continue * spinning. */ - return !rwsem_owner_is_reader(READ_ONCE(sem->owner)); + return is_rwsem_owner_spinnable(READ_ONCE(sem->owner)); } static bool rwsem_optimistic_spin(struct rw_semaphore *sem) diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c index 30465a2f2b6c..bc1e507be9ff 100644 --- a/kernel/locking/rwsem.c +++ b/kernel/locking/rwsem.c @@ -221,5 +221,3 @@ void up_read_non_owner(struct rw_semaphore *sem) EXPORT_SYMBOL(up_read_non_owner); #endif - - diff --git a/kernel/locking/rwsem.h b/kernel/locking/rwsem.h index a17cba8d94bb..b9d0e72aa80f 100644 --- a/kernel/locking/rwsem.h +++ b/kernel/locking/rwsem.h @@ -1,20 +1,24 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* * The owner field of the rw_semaphore structure will be set to - * RWSEM_READ_OWNED when a reader grabs the lock. A writer will clear + * RWSEM_READER_OWNED when a reader grabs the lock. A writer will
[GIT PULL] locking fixes
Linus, Please pull the latest locking-urgent-for-linus git tree from: git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git locking-urgent-for-linus # HEAD: 578ae447e7e5d78c90ac40a06406c1741f79ba96 jump_label: Disable jump labels in __exit code Two fixes: tighten up a jump-labels warning to not trigger on certain modules and fix confusing (and non-existent) mutex API documentation. Thanks, Ingo --> Josh Poimboeuf (1): jump_label: Disable jump labels in __exit code Matthew Wilcox (1): locking/mutex: Improve documentation include/linux/jump_label.h | 4 ++-- init/main.c| 2 +- kernel/jump_label.c| 7 --- kernel/locking/mutex.c | 37 ++--- 4 files changed, 37 insertions(+), 13 deletions(-) diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index 2168cc6b8b30..b46b541c67c4 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -151,7 +151,7 @@ extern struct jump_entry __start___jump_table[]; extern struct jump_entry __stop___jump_table[]; extern void jump_label_init(void); -extern void jump_label_invalidate_init(void); +extern void jump_label_invalidate_initmem(void); extern void jump_label_lock(void); extern void jump_label_unlock(void); extern void arch_jump_label_transform(struct jump_entry *entry, @@ -199,7 +199,7 @@ static __always_inline void jump_label_init(void) static_key_initialized = true; } -static inline void jump_label_invalidate_init(void) {} +static inline void jump_label_invalidate_initmem(void) {} static __always_inline bool static_key_false(struct static_key *key) { diff --git a/init/main.c b/init/main.c index 969eaf140ef0..21efbf6ace93 100644 --- a/init/main.c +++ b/init/main.c @@ -1001,7 +1001,7 @@ static int __ref kernel_init(void *unused) /* need to finish all async __init code before freeing the memory */ async_synchronize_full(); ftrace_free_init_mem(); - jump_label_invalidate_init(); + jump_label_invalidate_initmem(); free_initmem(); mark_readonly(); system_state = SYSTEM_RUNNING; diff --git a/kernel/jump_label.c b/kernel/jump_label.c index e7214093dcd1..01ebdf1f9f40 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c @@ -16,6 +16,7 @@ #include #include #include +#include #ifdef HAVE_JUMP_LABEL @@ -421,15 +422,15 @@ void __init jump_label_init(void) cpus_read_unlock(); } -/* Disable any jump label entries in __init code */ -void __init jump_label_invalidate_init(void) +/* Disable any jump label entries in __init/__exit code */ +void __init jump_label_invalidate_initmem(void) { struct jump_entry *iter_start = __start___jump_table; struct jump_entry *iter_stop = __stop___jump_table; struct jump_entry *iter; for (iter = iter_start; iter < iter_stop; iter++) { - if (init_kernel_text(iter->code)) + if (init_section_contains((void *)(unsigned long)iter->code, 1)) iter->code = 0; } } diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index 858a07590e39..2048359f33d2 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -1082,15 +1082,16 @@ static noinline int __sched __mutex_lock_interruptible_slowpath(struct mutex *lock); /** - * mutex_lock_interruptible - acquire the mutex, interruptible - * @lock: the mutex to be acquired + * mutex_lock_interruptible() - Acquire the mutex, interruptible by signals. + * @lock: The mutex to be acquired. * - * Lock the mutex like mutex_lock(), and return 0 if the mutex has - * been acquired or sleep until the mutex becomes available. If a - * signal arrives while waiting for the lock then this function - * returns -EINTR. + * Lock the mutex like mutex_lock(). If a signal is delivered while the + * process is sleeping, this function will return without acquiring the + * mutex. * - * This function is similar to (but not equivalent to) down_interruptible(). + * Context: Process context. + * Return: 0 if the lock was successfully acquired or %-EINTR if a + * signal arrived. */ int __sched mutex_lock_interruptible(struct mutex *lock) { @@ -1104,6 +1105,18 @@ int __sched mutex_lock_interruptible(struct mutex *lock) EXPORT_SYMBOL(mutex_lock_interruptible); +/** + * mutex_lock_killable() - Acquire the mutex, interruptible by fatal signals. + * @lock: The mutex to be acquired. + * + * Lock the mutex like mutex_lock(). If a signal which will be fatal to + * the current process is delivered while the process is sleeping, this + * function will return without acquiring the mutex. + * + * Context: Process context. + * Return: 0 if the lock was successfully acquired or %-EINTR if a + * fatal signal arrived. + */ int __sched mutex_lock_killable(struct mutex *lock) { might_sleep(); @@ -1115,6 +1128,16 @@ int __sched mutex_lock_killable(struct mutex *lo
[GIT pull] locking fixes for 4.16
Linus, please pull the latest locking-urgent-for-linus git tree from: git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git locking-urgent-for-linus Three patches to fix memory ordering issues on ALPHA and a comment to clarify the usage scope of a mutex internal function. Thanks, tglx --> Andrea Parri (3): locking/xchg/alpha: Add unconditional memory barrier to cmpxchg() locking/xchg/alpha: Clean up barrier usage by using smp_mb() in place of __ASM__MB locking/xchg/alpha: Fix xchg() and cmpxchg() memory ordering bugs Peter Zijlstra (1): locking/mutex: Add comment to __mutex_owner() to deter usage arch/alpha/include/asm/cmpxchg.h | 6 -- arch/alpha/include/asm/xchg.h| 38 ++ include/linux/mutex.h| 5 + 3 files changed, 31 insertions(+), 18 deletions(-) diff --git a/arch/alpha/include/asm/cmpxchg.h b/arch/alpha/include/asm/cmpxchg.h index 46ebf14aed4e..8a2b331e43fe 100644 --- a/arch/alpha/include/asm/cmpxchg.h +++ b/arch/alpha/include/asm/cmpxchg.h @@ -6,7 +6,6 @@ * Atomic exchange routines. */ -#define __ASM__MB #define xchg(type, args...)__xchg ## type ## _local(args) #define cmpxchg(type, args...) __cmpxchg ## type ## _local(args) #include @@ -33,10 +32,6 @@ cmpxchg_local((ptr), (o), (n)); \ }) -#ifdef CONFIG_SMP -#undef __ASM__MB -#define __ASM__MB "\tmb\n" -#endif #undef xchg #undef cmpxchg #define xchg(type, args...)__xchg ##type(args) @@ -64,7 +59,6 @@ cmpxchg((ptr), (o), (n)); \ }) -#undef __ASM__MB #undef cmpxchg #endif /* _ALPHA_CMPXCHG_H */ diff --git a/arch/alpha/include/asm/xchg.h b/arch/alpha/include/asm/xchg.h index 68dfb3cb7145..e2b59fac5257 100644 --- a/arch/alpha/include/asm/xchg.h +++ b/arch/alpha/include/asm/xchg.h @@ -12,6 +12,10 @@ * Atomic exchange. * Since it can be used to implement critical sections * it must clobber "memory" (also for interrupts in UP). + * + * The leading and the trailing memory barriers guarantee that these + * operations are fully ordered. + * */ static inline unsigned long @@ -19,6 +23,7 @@ xchg(_u8, volatile char *m, unsigned long val) { unsigned long ret, tmp, addr64; + smp_mb(); __asm__ __volatile__( " andnot %4,7,%3\n" " insbl %1,%4,%1\n" @@ -28,12 +33,12 @@ xchg(_u8, volatile char *m, unsigned long val) " or %1,%2,%2\n" " stq_c %2,0(%3)\n" " beq %2,2f\n" - __ASM__MB ".subsection 2\n" "2: br 1b\n" ".previous" : "=&r" (ret), "=&r" (val), "=&r" (tmp), "=&r" (addr64) : "r" ((long)m), "1" (val) : "memory"); + smp_mb(); return ret; } @@ -43,6 +48,7 @@ xchg(_u16, volatile short *m, unsigned long val) { unsigned long ret, tmp, addr64; + smp_mb(); __asm__ __volatile__( " andnot %4,7,%3\n" " inswl %1,%4,%1\n" @@ -52,12 +58,12 @@ xchg(_u16, volatile short *m, unsigned long val) " or %1,%2,%2\n" " stq_c %2,0(%3)\n" " beq %2,2f\n" - __ASM__MB ".subsection 2\n" "2: br 1b\n" ".previous" : "=&r" (ret), "=&r" (val), "=&r" (tmp), "=&r" (addr64) : "r" ((long)m), "1" (val) : "memory"); + smp_mb(); return ret; } @@ -67,17 +73,18 @@ xchg(_u32, volatile int *m, unsigned long val) { unsigned long dummy; + smp_mb(); __asm__ __volatile__( "1: ldl_l %0,%4\n" " bis $31,%3,%1\n" " stl_c %1,%2\n" " beq %1,2f\n" - __ASM__MB ".subsection 2\n" "2: br 1b\n" ".previous" : "=&r" (val), "=&r" (dummy), "=m" (*m) : "rI" (val), "m" (*m) : "memory"); + smp_mb(); return val; } @@ -87,17 +94,18 @@ xchg(_u64, volatile long *m, unsigned long val) { unsigned long dummy; + smp_mb(); __asm__ __volatile__( "1: ldq_l %0,%4\n" " bis $31,%3,%1\n" " stq_c %1,%2\n" " beq %1,2f\n" - __ASM__MB ".subsection 2\n" "2: br 1b\n" ".previous" : "=&r" (val), "=&r" (dummy), "=m" (*m) : "rI" (val), "m" (*m) : "memory"); + smp_mb(); return val; } @@ -128,10 +136,12 @@ xchg(, volatile void *ptr, unsigned long x, int size) * store NEW in MEM. Return the initial value in MEM. Success is * indicated by comparing RETURN with OLD. * - * The memory barrier should be placed in SMP only when we actually - * make the change. If we don't change anything (so if the returned - * prev is equal to old) then we aren't ac
[GIT PULL] locking fixes
Linus, Please pull the latest locking-urgent-for-linus git tree from: git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git locking-urgent-for-linus # HEAD: 2dd6fd2e999774041397f2a7da2e1d30b3a27c3a locking/semaphore: Update the file path in documentation This tree contains two qspinlock fixes and three documentation and comment fixes. Thanks, Ingo --> Juri Lelli (1): Documentation/locking/mutex-design: Update to reflect latest changes Tycho Andersen (1): locking/semaphore: Update the file path in documentation Will Deacon (3): locking/qspinlock: Ensure node is initialised before updating prev->next locking/qspinlock: Ensure node->count is updated before initialising node locking/atomic/bitops: Document and clarify ordering semantics for failed test_and_{}_bit() Documentation/atomic_bitops.txt| 7 - Documentation/locking/mutex-design.txt | 49 -- include/asm-generic/bitops/lock.h | 3 ++- include/linux/semaphore.h | 2 +- kernel/locking/qspinlock.c | 21 ++- 5 files changed, 41 insertions(+), 41 deletions(-) diff --git a/Documentation/atomic_bitops.txt b/Documentation/atomic_bitops.txt index 5550bfdcce5f..be70b32c95d9 100644 --- a/Documentation/atomic_bitops.txt +++ b/Documentation/atomic_bitops.txt @@ -58,7 +58,12 @@ ORDERING - RMW operations that have a return value are fully ordered. -Except for test_and_set_bit_lock() which has ACQUIRE semantics and + - RMW operations that are conditional are unordered on FAILURE, + otherwise the above rules apply. In the case of test_and_{}_bit() operations, + if the bit in memory is unchanged by the operation then it is deemed to have + failed. + +Except for a successful test_and_set_bit_lock() which has ACQUIRE semantics and clear_bit_unlock() which has RELEASE semantics. Since a platform only has a single means of achieving atomic operations diff --git a/Documentation/locking/mutex-design.txt b/Documentation/locking/mutex-design.txt index 60c482df1a38..818aca19612f 100644 --- a/Documentation/locking/mutex-design.txt +++ b/Documentation/locking/mutex-design.txt @@ -21,37 +21,23 @@ Implementation -- Mutexes are represented by 'struct mutex', defined in include/linux/mutex.h -and implemented in kernel/locking/mutex.c. These locks use a three -state atomic counter (->count) to represent the different possible -transitions that can occur during the lifetime of a lock: - - 1: unlocked - 0: locked, no waiters - negative: locked, with potential waiters - -In its most basic form it also includes a wait-queue and a spinlock -that serializes access to it. CONFIG_SMP systems can also include -a pointer to the lock task owner (->owner) as well as a spinner MCS -lock (->osq), both described below in (ii). +and implemented in kernel/locking/mutex.c. These locks use an atomic variable +(->owner) to keep track of the lock state during its lifetime. Field owner +actually contains 'struct task_struct *' to the current lock owner and it is +therefore NULL if not currently owned. Since task_struct pointers are aligned +at at least L1_CACHE_BYTES, low bits (3) are used to store extra state (e.g., +if waiter list is non-empty). In its most basic form it also includes a +wait-queue and a spinlock that serializes access to it. Furthermore, +CONFIG_MUTEX_SPIN_ON_OWNER=y systems use a spinner MCS lock (->osq), described +below in (ii). When acquiring a mutex, there are three possible paths that can be taken, depending on the state of the lock: -(i) fastpath: tries to atomically acquire the lock by decrementing the -counter. If it was already taken by another task it goes to the next -possible path. This logic is architecture specific. On x86-64, the -locking fastpath is 2 instructions: - -0e10 : -e21: f0 ff 0block decl (%rbx) -e24: 79 08 jnse2e - - the unlocking fastpath is equally tight: - -0bc0 : -bc8: f0 ff 07lock incl (%rdi) -bcb: 7f 0a jg bd7 - +(i) fastpath: tries to atomically acquire the lock by cmpxchg()ing the owner with +the current task. This only works in the uncontended case (cmpxchg() checks +against 0UL, so all 3 state bits above have to be 0). If the lock is +contended it goes to the next possible path. (ii) midpath: aka optimistic spinning, tries to spin for acquisition while the lock owner is running and there are no other tasks ready @@ -143,11 +129,10 @@ Interfaces Disadvantages - -Unlike its original design and purpose, 'struct mutex' is larger than -most locks in the kernel. E.g: on x86-64 it is 40 bytes, almost twice -as large as 'struct semaphore' (24 bytes) and tied, along with rwsems, -for the largest lock in the kernel. Larger structure sizes mean more -CP
[GIT pull] locking: Fixes for 4.15
Linus, please pull the latest locking-urgent-for-linus git tree from: git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git locking-urgent-for-linus Two final fixes for 4.15: - Repair the OWNER_DIED logic in the futex code which got wreckaged with the recent fix for a subtle race condition. - Prevent the hard lockup detector from triggering when dumping all held locks in the system. Thanks, tglx --> Peter Zijlstra (1): futex: Fix OWNER_DEAD fixup Tejun Heo (1): locking/lockdep: Avoid triggering hardlockup from debug_show_all_locks() kernel/futex.c | 6 +++--- kernel/locking/lockdep.c | 2 ++ 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/kernel/futex.c b/kernel/futex.c index 8c5424dd5924..7f719d110908 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -2311,9 +2311,6 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); oldowner = pi_state->owner; - /* Owner died? */ - if (!pi_state->owner) - newtid |= FUTEX_OWNER_DIED; /* * We are here because either: @@ -2374,6 +2371,9 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, } newtid = task_pid_vnr(newowner) | FUTEX_WAITERS; + /* Owner died? */ + if (!pi_state->owner) + newtid |= FUTEX_OWNER_DIED; if (get_futex_value_locked(&uval, uaddr)) goto handle_fault; diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 5fa1324a4f29..521659044719 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -49,6 +49,7 @@ #include #include #include +#include #include @@ -4490,6 +4491,7 @@ void debug_show_all_locks(void) if (!unlock) if (read_trylock(&tasklist_lock)) unlock = 1; + touch_nmi_watchdog(); } while_each_thread(g, p); pr_warn("\n");
Re: [GIT PULL] locking fixes
On Mon, Jan 22, 2018 at 10:43:36AM +0100, Geert Uytterhoeven wrote: > > static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, > > + struct task_struct *argowner) > > { > > struct futex_pi_state *pi_state = q->pi_state; > > u32 uval, uninitialized_var(curval), newval; > > + struct task_struct *oldowner, *newowner; > > + u32 newtid; > > new tid is no longer initialized... > > > int ret; > > > > + lockdep_assert_held(q->lock_ptr); > > + > > raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); > > > > oldowner = pi_state->owner; > > @@ -2317,11 +2316,17 @@ static int fixup_pi_state_owner(u32 __user *uaddr, > > struct futex_q *q, > > newtid |= FUTEX_OWNER_DIED; > > ... leading to a compiler warning with gcc 4.1.2: > > warning: ‘newtid’ is used uninitialized in this function > > I guess newer compilers don't give the warning, as the result of the > assignment above is not used at all, and thus may be optimized away... > > > > > /* > > +* We are here because either: > > +* > > +* - we stole the lock and pi_state->owner needs updating to > > reflect > > +*that (@argowner == current), > > +* > > +* or: > > +* > > +* - someone stole our lock and we need to fix things to point to > > the > > +*new owner (@argowner == NULL). > > * > > +* Either way, we have to replace the TID in the user space > > variable. > > * This must be atomic as we have to preserve the owner died bit > > here. > > * > > * Note: We write the user space value _before_ changing the > > pi_state > > @@ -2334,6 +2339,42 @@ static int fixup_pi_state_owner(u32 __user *uaddr, > > struct futex_q *q, > > * in the PID check in lookup_pi_state. > > */ > > retry: > > + if (!argowner) { > > + if (oldowner != current) { > > + /* > > +* We raced against a concurrent self; things are > > +* already fixed up. Nothing to do. > > +*/ > > + ret = 0; > > + goto out_unlock; > > + } > > + > > + if (__rt_mutex_futex_trylock(&pi_state->pi_mutex)) { > > + /* We got the lock after all, nothing to fix. */ > > + ret = 0; > > + goto out_unlock; > > + } > > + > > + /* > > +* Since we just failed the trylock; there must be an owner. > > +*/ > > + newowner = rt_mutex_owner(&pi_state->pi_mutex); > > + BUG_ON(!newowner); > > + } else { > > + WARN_ON_ONCE(argowner != current); > > + if (oldowner == current) { > > + /* > > +* We raced against a concurrent self; things are > > +* already fixed up. Nothing to do. > > +*/ > > + ret = 0; > > + goto out_unlock; > > + } > > + newowner = argowner; > > + } > > + > > + newtid = task_pid_vnr(newowner) | FUTEX_WAITERS; > > ... since it is always overwritten here. > > Is that intentional? No, I think you actually spotted a bug there. We now can't set OWNER_DIED anymore, which is bad. I think the below fixes things, but let me go trawl through the various futex test things, because I think I've seen a unit test for this _somewhere_. --- kernel/futex.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/futex.c b/kernel/futex.c index 8c5424dd5924..7f719d110908 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -2311,9 +2311,6 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); oldowner = pi_state->owner; - /* Owner died? */ - if (!pi_state->owner) - newtid |= FUTEX_OWNER_DIED; /* * We are here because either: @@ -2374,6 +2371,9 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, } newtid = task_pid_vnr(newowner) | FUTEX_WAITERS; + /* Owner died? */ + if (!pi_state->owner) + newtid |= FUTEX_OWNER_DIED; if (get_futex_value_locked(&uval, uaddr)) goto handle_fault;
Re: [GIT PULL] locking fixes
Hi Ingo, Peter, On Wed, Jan 17, 2018 at 4:24 PM, Ingo Molnar wrote: > Please pull the latest locking-urgent-for-linus git tree from: > >git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git > locking-urgent-for-linus > ># HEAD: fbe0e839d1e22d88810f3ee3e2f1479be4c0aa4a futex: Prevent overflow > by strengthen input validation > > Two futex fixes: a input parameters robustness fix, and futex race fixes. > Peter Zijlstra (1): > futex: Avoid violating the 10th rule of futex > --- a/kernel/futex.c > +++ b/kernel/futex.c > @@ -2294,21 +2297,17 @@ static void unqueue_me_pi(struct futex_q *q) > spin_unlock(q->lock_ptr); > } > > -/* > - * Fixup the pi_state owner with the new owner. > - * > - * Must be called with hash bucket lock held and mm->sem held for non > - * private futexes. > - */ > static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, > - struct task_struct *newowner) > + struct task_struct *argowner) > { > - u32 newtid = task_pid_vnr(newowner) | FUTEX_WAITERS; > struct futex_pi_state *pi_state = q->pi_state; > u32 uval, uninitialized_var(curval), newval; > - struct task_struct *oldowner; > + struct task_struct *oldowner, *newowner; > + u32 newtid; new tid is no longer initialized... > int ret; > > + lockdep_assert_held(q->lock_ptr); > + > raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); > > oldowner = pi_state->owner; > @@ -2317,11 +2316,17 @@ static int fixup_pi_state_owner(u32 __user *uaddr, > struct futex_q *q, > newtid |= FUTEX_OWNER_DIED; ... leading to a compiler warning with gcc 4.1.2: warning: ‘newtid’ is used uninitialized in this function I guess newer compilers don't give the warning, as the result of the assignment above is not used at all, and thus may be optimized away... > > /* > -* We are here either because we stole the rtmutex from the > -* previous highest priority waiter or we are the highest priority > -* waiter but have failed to get the rtmutex the first time. > +* We are here because either: > +* > +* - we stole the lock and pi_state->owner needs updating to reflect > +*that (@argowner == current), > +* > +* or: > +* > +* - someone stole our lock and we need to fix things to point to the > +*new owner (@argowner == NULL). > * > -* We have to replace the newowner TID in the user space variable. > +* Either way, we have to replace the TID in the user space variable. > * This must be atomic as we have to preserve the owner died bit here. > * > * Note: We write the user space value _before_ changing the pi_state > @@ -2334,6 +2339,42 @@ static int fixup_pi_state_owner(u32 __user *uaddr, > struct futex_q *q, > * in the PID check in lookup_pi_state. > */ > retry: > + if (!argowner) { > + if (oldowner != current) { > + /* > +* We raced against a concurrent self; things are > +* already fixed up. Nothing to do. > +*/ > + ret = 0; > + goto out_unlock; > + } > + > + if (__rt_mutex_futex_trylock(&pi_state->pi_mutex)) { > + /* We got the lock after all, nothing to fix. */ > + ret = 0; > + goto out_unlock; > + } > + > + /* > +* Since we just failed the trylock; there must be an owner. > +*/ > + newowner = rt_mutex_owner(&pi_state->pi_mutex); > + BUG_ON(!newowner); > + } else { > + WARN_ON_ONCE(argowner != current); > + if (oldowner == current) { > + /* > +* We raced against a concurrent self; things are > +* already fixed up. Nothing to do. > +*/ > + ret = 0; > + goto out_unlock; > + } > + newowner = argowner; > + } > + > + newtid = task_pid_vnr(newowner) | FUTEX_WAITERS; ... since it is always overwritten here. Is that intentional? Gr{oetje,eeting}s, Geert -- Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- ge...@linux-m68k.org In personal conversations with technical people, I call myself a hacker. But when I'm talking to journalists I just say "programmer" or something like that. -- Linus Torvalds
[GIT PULL] locking fixes
Linus, Please pull the latest locking-urgent-for-linus git tree from: git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git locking-urgent-for-linus # HEAD: fbe0e839d1e22d88810f3ee3e2f1479be4c0aa4a futex: Prevent overflow by strengthen input validation Two futex fixes: a input parameters robustness fix, and futex race fixes. Thanks, Ingo --> Li Jinyue (1): futex: Prevent overflow by strengthen input validation Peter Zijlstra (1): futex: Avoid violating the 10th rule of futex kernel/futex.c | 86 + kernel/locking/rtmutex.c| 26 + kernel/locking/rtmutex_common.h | 1 + 3 files changed, 90 insertions(+), 23 deletions(-) diff --git a/kernel/futex.c b/kernel/futex.c index 57d0b3657e16..8c5424dd5924 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -1878,6 +1878,9 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags, struct futex_q *this, *next; DEFINE_WAKE_Q(wake_q); + if (nr_wake < 0 || nr_requeue < 0) + return -EINVAL; + /* * When PI not supported: return -ENOSYS if requeue_pi is true, * consequently the compiler knows requeue_pi is always false past @@ -2294,21 +2297,17 @@ static void unqueue_me_pi(struct futex_q *q) spin_unlock(q->lock_ptr); } -/* - * Fixup the pi_state owner with the new owner. - * - * Must be called with hash bucket lock held and mm->sem held for non - * private futexes. - */ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, - struct task_struct *newowner) + struct task_struct *argowner) { - u32 newtid = task_pid_vnr(newowner) | FUTEX_WAITERS; struct futex_pi_state *pi_state = q->pi_state; u32 uval, uninitialized_var(curval), newval; - struct task_struct *oldowner; + struct task_struct *oldowner, *newowner; + u32 newtid; int ret; + lockdep_assert_held(q->lock_ptr); + raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); oldowner = pi_state->owner; @@ -2317,11 +2316,17 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, newtid |= FUTEX_OWNER_DIED; /* -* We are here either because we stole the rtmutex from the -* previous highest priority waiter or we are the highest priority -* waiter but have failed to get the rtmutex the first time. +* We are here because either: +* +* - we stole the lock and pi_state->owner needs updating to reflect +*that (@argowner == current), +* +* or: +* +* - someone stole our lock and we need to fix things to point to the +*new owner (@argowner == NULL). * -* We have to replace the newowner TID in the user space variable. +* Either way, we have to replace the TID in the user space variable. * This must be atomic as we have to preserve the owner died bit here. * * Note: We write the user space value _before_ changing the pi_state @@ -2334,6 +2339,42 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, * in the PID check in lookup_pi_state. */ retry: + if (!argowner) { + if (oldowner != current) { + /* +* We raced against a concurrent self; things are +* already fixed up. Nothing to do. +*/ + ret = 0; + goto out_unlock; + } + + if (__rt_mutex_futex_trylock(&pi_state->pi_mutex)) { + /* We got the lock after all, nothing to fix. */ + ret = 0; + goto out_unlock; + } + + /* +* Since we just failed the trylock; there must be an owner. +*/ + newowner = rt_mutex_owner(&pi_state->pi_mutex); + BUG_ON(!newowner); + } else { + WARN_ON_ONCE(argowner != current); + if (oldowner == current) { + /* +* We raced against a concurrent self; things are +* already fixed up. Nothing to do. +*/ + ret = 0; + goto out_unlock; + } + newowner = argowner; + } + + newtid = task_pid_vnr(newowner) | FUTEX_WAITERS; + if (get_futex_value_locked(&uval, uaddr)) goto handle_fault; @@ -2434,15 +2475,28 @@ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked) * Got the lock. We might not be the anticipated owner if we * did a lock-steal - fix up the PI-state in that
[GIT PULL] locking fixes
Linus, Please pull the latest locking-urgent-for-linus git tree from: git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git locking-urgent-for-linus # HEAD: f328299e54a94998b31baf788d2b33d8122a4acb locking/refcounts: Remove stale comment from the ARCH_HAS_REFCOUNT Kconfig entry No functional effects intended: removes leftovers from recent lockdep and refcounts work. Thanks, Ingo --> David Sterba (1): locking/Documentation: Remove stale crossrelease_fullstack parameter Eric Biggers (1): locking/refcounts: Remove stale comment from the ARCH_HAS_REFCOUNT Kconfig entry Ingo Molnar (1): locking/lockdep: Remove cross-release leftovers Documentation/admin-guide/kernel-parameters.txt | 3 --- arch/x86/Kconfig| 1 - include/linux/completion.h | 1 - include/linux/irqflags.h| 4 include/linux/lockdep.h | 2 -- kernel/sched/completion.c | 5 - 6 files changed, 16 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index af7104aaffd9..a626465dd877 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -713,9 +713,6 @@ It will be ignored when crashkernel=X,high is not used or memory reserved is below 4G. - crossrelease_fullstack - [KNL] Allow to record full stack trace in cross-release - cryptomgr.notests [KNL] Disable crypto self-tests diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index d4fc98c50378..ff4e9cd99854 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -55,7 +55,6 @@ config X86 select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_HAS_KCOVif X86_64 select ARCH_HAS_PMEM_APIif X86_64 - # Causing hangs/crashes, see the commit that added this change for details. select ARCH_HAS_REFCOUNT select ARCH_HAS_UACCESS_FLUSHCACHE if X86_64 select ARCH_HAS_SET_MEMORY diff --git a/include/linux/completion.h b/include/linux/completion.h index 94a59ba7d422..519e94915d18 100644 --- a/include/linux/completion.h +++ b/include/linux/completion.h @@ -32,7 +32,6 @@ struct completion { #define init_completion(x) __init_completion(x) static inline void complete_acquire(struct completion *x) {} static inline void complete_release(struct completion *x) {} -static inline void complete_release_commit(struct completion *x) {} #define COMPLETION_INITIALIZER(work) \ { 0, __WAIT_QUEUE_HEAD_INITIALIZER((work).wait) } diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h index 46cb57d5eb13..1b3996ff3f16 100644 --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -27,22 +27,18 @@ # define trace_hardirq_enter() \ do { \ current->hardirq_context++; \ - crossrelease_hist_start(XHLOCK_HARD); \ } while (0) # define trace_hardirq_exit() \ do { \ current->hardirq_context--; \ - crossrelease_hist_end(XHLOCK_HARD); \ } while (0) # define lockdep_softirq_enter() \ do { \ current->softirq_context++; \ - crossrelease_hist_start(XHLOCK_SOFT); \ } while (0) # define lockdep_softirq_exit()\ do { \ current->softirq_context--; \ - crossrelease_hist_end(XHLOCK_SOFT); \ } while (0) # define INIT_TRACE_IRQFLAGS .softirqs_enabled = 1, #else diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 2e75dc34bff5..3251d9c0d313 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -475,8 +475,6 @@ enum xhlock_context_t { #define STATIC_LOCKDEP_MAP_INIT(_name, _key) \ { .name = (_name), .key = (void *)(_key), } -static inline void crossrelease_hist_start(enum xhlock_context_t c) {} -static inline void crossrelease_hist_end(enum xhlock_context_t c) {} static inline void lockdep_invariant_state(bool force) {} static inline void lockdep_init_task(struct task_struct *task) {} static inline void lockdep_free_task(struct task_struct *task) {} diff --git a/kernel/sched/completion.c b/kernel/sched/completion.c index 2ddaec40956f..0926aef10dad 100644 --- a/kernel/sched/completion.c +++ b/kernel/sched/completion.c @@ -34,11 +34,6 @@ void complete(struct completion *x) spin_lock_irqsave(&x->wait.lock, flags); - /* -* Perform commit of crossrelease here. -*/ - complete_release_commit(x); - if (x->done != UINT_MAX) x->done++; __wake_u
[GIT PULL] locking fixes
Linus, Please pull the latest locking-urgent-for-linus git tree from: git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git locking-urgent-for-linus # HEAD: 92ccc262e485781ff4c0fb3b7c77a619282df49a tools/lib/lockdep: Add missing declaration of 'pr_cont()' Misc fixes: - Fix a S390 boot hang that was caused by the lock-break logic. Remove lock-break to begin with, as review suggested it was unreasonably fragile and our confidence in its continued good health is lower than our confidence in its removal. - Remove the lockdep cross-release checking code for now, because of unresolved false positive warnings. This should make lockdep work well everywhere again. - Get rid of the final (and single) ACCESS_ONCE() straggler and remove the API from v4.15. - Fix a liblockdep build warning. Thanks, Ingo --> Ingo Molnar (1): locking/lockdep: Remove the cross-release locking checks Mark Rutland (4): tools/perf: Convert ACCESS_ONCE() to READ_ONCE() tools/include: Remove ACCESS_ONCE() compiler.h: Remove ACCESS_ONCE() checkpatch: Remove ACCESS_ONCE() warning Mengting Zhang (1): tools/lib/lockdep: Add missing declaration of 'pr_cont()' Will Deacon (2): locking/core: Fix deadlock during boot on systems with GENERIC_LOCKBREAK locking/core: Remove break_lock field when CONFIG_GENERIC_LOCKBREAK=y Documentation/locking/crossrelease.txt | 874 - include/linux/compiler.h | 47 +- include/linux/completion.h | 45 -- include/linux/lockdep.h| 125 - include/linux/rwlock_types.h | 3 - include/linux/sched.h | 11 - include/linux/spinlock.h | 5 - include/linux/spinlock_types.h | 3 - kernel/locking/lockdep.c | 652 ++-- kernel/locking/spinlock.c | 13 +- lib/Kconfig.debug | 33 -- scripts/checkpatch.pl | 22 - tools/include/linux/compiler.h | 21 +- tools/include/linux/lockdep.h | 1 + tools/perf/util/mmap.h | 2 +- 15 files changed, 60 insertions(+), 1797 deletions(-) delete mode 100644 Documentation/locking/crossrelease.txt
[GIT PULL] locking fixes
Linus, Please pull the latest locking-urgent-for-linus git tree from: git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git locking-urgent-for-linus # HEAD: b483cf3bc249d7af706390efa63d6671e80d1c09 locking/lockdep: Disable cross-release features for now Two lockdep fixes for bugs introduced by the cross-release dependency tracking feature - plus a commit that disables it because performance regressed in an absymal fashion on some systems. Thanks, Ingo --> Ingo Molnar (1): locking/lockdep: Disable cross-release features for now Peter Zijlstra (2): locking/lockdep: Fix stacktrace mess locking/selftest: Avoid false BUG report kernel/locking/lockdep.c | 48 lib/Kconfig.debug| 4 ++-- lib/locking-selftest.c | 2 ++ 3 files changed, 24 insertions(+), 30 deletions(-) diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 44c8d0d17170..e36e652d996f 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -1873,10 +1873,10 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev, struct held_lock *next, int distance, struct stack_trace *trace, int (*save)(struct stack_trace *trace)) { + struct lock_list *uninitialized_var(target_entry); struct lock_list *entry; - int ret; struct lock_list this; - struct lock_list *uninitialized_var(target_entry); + int ret; /* * Prove that the new -> dependency would not @@ -1890,8 +1890,17 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev, this.class = hlock_class(next); this.parent = NULL; ret = check_noncircular(&this, hlock_class(prev), &target_entry); - if (unlikely(!ret)) + if (unlikely(!ret)) { + if (!trace->entries) { + /* +* If @save fails here, the printing might trigger +* a WARN but because of the !nr_entries it should +* not do bad things. +*/ + save(trace); + } return print_circular_bug(&this, target_entry, next, prev, trace); + } else if (unlikely(ret < 0)) return print_bfs_bug(ret); @@ -1938,7 +1947,7 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev, return print_bfs_bug(ret); - if (save && !save(trace)) + if (!trace->entries && !save(trace)) return 0; /* @@ -1958,20 +1967,6 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev, if (!ret) return 0; - /* -* Debugging printouts: -*/ - if (verbose(hlock_class(prev)) || verbose(hlock_class(next))) { - graph_unlock(); - printk("\n new dependency: "); - print_lock_name(hlock_class(prev)); - printk(KERN_CONT " => "); - print_lock_name(hlock_class(next)); - printk(KERN_CONT "\n"); - dump_stack(); - if (!graph_lock()) - return 0; - } return 2; } @@ -1986,8 +1981,12 @@ check_prevs_add(struct task_struct *curr, struct held_lock *next) { int depth = curr->lockdep_depth; struct held_lock *hlock; - struct stack_trace trace; - int (*save)(struct stack_trace *trace) = save_trace; + struct stack_trace trace = { + .nr_entries = 0, + .max_entries = 0, + .entries = NULL, + .skip = 0, + }; /* * Debugging checks. @@ -2018,18 +2017,11 @@ check_prevs_add(struct task_struct *curr, struct held_lock *next) */ if (hlock->read != 2 && hlock->check) { int ret = check_prev_add(curr, hlock, next, -distance, &trace, save); +distance, &trace, save_trace); if (!ret) return 0; /* -* Stop saving stack_trace if save_trace() was -* called at least once: -*/ - if (save && ret == 2) - save = NULL; - - /* * Stop after the first non-trylock entry, * as non-trylock entries have added their * own direct dependencies already, so this diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 2689b7c50c52..e2705843c524 100644 --- a/lib/Kconfig.debug +
[GIT pull] locking fixes for 4.14
Linus, please pull the latest locking-urgent-for-linus git tree from: git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git locking-urgent-for-linus Two fixes for locking: - Plug a hole the pi_stat->owner serialization which was changed recently and failed to fixup two usage sites. - Prevent reordering of the rwsem_has_spinner() check vs. the decrement of rwsem count in up_write() which causes a missed wakeup. Thanks, tglx --> Peter Zijlstra (1): futex: Fix pi_state->owner serialization Prateek Sood (1): locking/rwsem-xadd: Fix missed wakeup due to reordering of load kernel/futex.c | 33 ++--- kernel/locking/rwsem-xadd.c | 27 +++ 2 files changed, 49 insertions(+), 11 deletions(-) diff --git a/kernel/futex.c b/kernel/futex.c index 3d38eaf05492..0518a0bfc746 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -821,8 +821,6 @@ static void get_pi_state(struct futex_pi_state *pi_state) /* * Drops a reference to the pi_state object and frees or caches it * when the last reference is gone. - * - * Must be called with the hb lock held. */ static void put_pi_state(struct futex_pi_state *pi_state) { @@ -837,16 +835,22 @@ static void put_pi_state(struct futex_pi_state *pi_state) * and has cleaned up the pi_state already */ if (pi_state->owner) { - raw_spin_lock_irq(&pi_state->owner->pi_lock); - list_del_init(&pi_state->list); - raw_spin_unlock_irq(&pi_state->owner->pi_lock); + struct task_struct *owner; - rt_mutex_proxy_unlock(&pi_state->pi_mutex, pi_state->owner); + raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); + owner = pi_state->owner; + if (owner) { + raw_spin_lock(&owner->pi_lock); + list_del_init(&pi_state->list); + raw_spin_unlock(&owner->pi_lock); + } + rt_mutex_proxy_unlock(&pi_state->pi_mutex, owner); + raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); } - if (current->pi_state_cache) + if (current->pi_state_cache) { kfree(pi_state); - else { + } else { /* * pi_state->list is already empty. * clear pi_state->owner. @@ -907,13 +911,14 @@ void exit_pi_state_list(struct task_struct *curr) raw_spin_unlock_irq(&curr->pi_lock); spin_lock(&hb->lock); - - raw_spin_lock_irq(&curr->pi_lock); + raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); + raw_spin_lock(&curr->pi_lock); /* * We dropped the pi-lock, so re-check whether this * task still owns the PI-state: */ if (head->next != next) { + raw_spin_unlock(&pi_state->pi_mutex.wait_lock); spin_unlock(&hb->lock); continue; } @@ -922,9 +927,10 @@ void exit_pi_state_list(struct task_struct *curr) WARN_ON(list_empty(&pi_state->list)); list_del_init(&pi_state->list); pi_state->owner = NULL; - raw_spin_unlock_irq(&curr->pi_lock); + raw_spin_unlock(&curr->pi_lock); get_pi_state(pi_state); + raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); spin_unlock(&hb->lock); rt_mutex_futex_unlock(&pi_state->pi_mutex); @@ -1208,6 +1214,10 @@ static int attach_to_pi_owner(u32 uval, union futex_key *key, WARN_ON(!list_empty(&pi_state->list)); list_add(&pi_state->list, &p->pi_state_list); + /* +* Assignment without holding pi_state->pi_mutex.wait_lock is safe +* because there is no concurrency as the object is not published yet. +*/ pi_state->owner = p; raw_spin_unlock_irq(&p->pi_lock); @@ -2878,6 +2888,7 @@ static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags) raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); spin_unlock(&hb->lock); + /* drops pi_state->pi_mutex.wait_lock */ ret = wake_futex_pi(uaddr, uval, pi_state); put_pi_state(pi_state); diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c index 02f660666ab8..1fefe6dcafd7 100644 --- a/kernel/locking/rwsem-xadd.c +++ b/kernel/locking/rwsem-xadd.c @@ -613,6 +613,33 @@ struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem) DEFINE_WAKE_Q(wake_q); /* + * __rwsem_down_write_failed_common(sem) + * rwsem_optimistic_spin(sem) + * osq_unlock(sem->osq) + * ... + * atomic_long_add_return(&sem->count) + * + * - VS - + *
Re: [GIT PULL] locking fixes
On Wed, May 03, 2017 at 11:10:07PM -0700, Linus Torvalds wrote: > > > On May 3, 2017 22:40, "Peter Zijlstra" wrote: > > > These people are out-of-tree dubious licensed modules, right? I really > _really_ don't care about those. > > > Mainly Nvidia, I think. nvidia said it was for "new code" they were still developing, and that the license issue was on their side, and they fixed it up. They also agreed that the change was ok from their point of view. > But the point is, you broke people's working setups. > > We don't do that. We have never guaranteed kernel api stability, but yes, this is different from that. Moving these from a .h to .c caused the change, I asked for this as the original symbols were obviously GPL-only being in a .h file. However I understand your point, we don't want to have people any grumpier at us than normal :) > Perhaps equally importantly, you did it by marking *trivial* functions that > are > definitely meant for drivers as gpl-only. Which only demeans the whole concept > that we consider the gpl-only thing to be an "internal kernel function". These are really now low-level kernel functions, and not trivial ones given the long long email threads on how to get them all working properly. This was obviously something that took a lot of time to do. > So the change actually makes our explicitly stated arguments for why certain > functions are special less valid, and replaced it with a stupid grandstanding > and legally dubious "linking means it's a derived work" argument. There's no "linking" argument here, I didn't make that. But again, I understand the point, changing api "markings" like this "mid-stream" isn't the nicest thing to do. And hey, I'm trying to be "meaner" as I think someone once told me to be that way, so I recommended that Peter make this change :) I'll send a patch to change these back now... thanks, greg k-h
Re: [GIT PULL] locking fixes
On Wed, May 03, 2017 at 04:21:01PM -0700, Linus Torvalds wrote: > This is from last merge window, and the reason I react now is that > nobody noticed or cared until we had a release.. > > On Mon, Feb 27, 2017 at 11:57 PM, Ingo Molnar wrote: > > > > Peter Zijlstra (1): > > locking/refcounts: Out-of-line everything > > This one is all good generally, but it has one really stupid side > effect: it makes refcounting GPL-only. > > That's just silly. These are functions that atomically add and > subtract one. The only thing that making them GPL-only can possibly do > is to make people hack around it, and lose the overflow handling > debugging in the process. These people are out-of-tree dubious licensed modules, right? I really _really_ don't care about those. > It also breaks any kref uses. Which is what drivers etc are supposed to use. Greg KH had this to say: "As all of the previous kref functions were in a GPL-only header file, and included directly that way, they were already GPL-only symbols, so there really was no change here except now the linker checks them. If you have questions about using inline GPL-only functions from a .h file, in a non-GPL codebase, please consult your corporate lawyer to get clarification." https://lkml.kernel.org/r/20170308094810.gb30...@kroah.com > So that "move from inline to out-of-line" had a big subtle semantic > change that was probably not intentional, and certainly not > documented. I'll take the not documented bit.
Re: [GIT PULL] locking fixes
This is from last merge window, and the reason I react now is that nobody noticed or cared until we had a release.. On Mon, Feb 27, 2017 at 11:57 PM, Ingo Molnar wrote: > > Peter Zijlstra (1): > locking/refcounts: Out-of-line everything This one is all good generally, but it has one really stupid side effect: it makes refcounting GPL-only. That's just silly. These are functions that atomically add and subtract one. The only thing that making them GPL-only can possibly do is to make people hack around it, and lose the overflow handling debugging in the process. It also breaks any kref uses. Which is what drivers etc are supposed to use. So that "move from inline to out-of-line" had a big subtle semantic change that was probably not intentional, and certainly not documented. Linus
[GIT pull] locking fixes for 4.11
Linus, please pull the latest locking-urgent-for-linus git tree from: git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git locking-urgent-for-linus Three fixes related to locking: - Fix a SIGKILL issue for RWSEM_GENERIC_SPINLOCK which has been fixed for the XCHGADD variant already - Plug a potential use after free in the futex code - Prevent leaking a held spinlock in an futex error handling code path Thanks, tglx --> Niklas Cassel (1): locking/rwsem: Fix down_write_killable() for CONFIG_RWSEM_GENERIC_SPINLOCK=y Peter Zijlstra (2): futex: Fix potential use-after-free in FUTEX_REQUEUE_PI futex: Add missing error handling to FUTEX_REQUEUE_PI kernel/futex.c | 22 +- kernel/locking/rwsem-spinlock.c | 16 +++- 2 files changed, 24 insertions(+), 14 deletions(-) diff --git a/kernel/futex.c b/kernel/futex.c index 229a744b1781..45858ec73941 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -2815,7 +2815,6 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, { struct hrtimer_sleeper timeout, *to = NULL; struct rt_mutex_waiter rt_waiter; - struct rt_mutex *pi_mutex = NULL; struct futex_hash_bucket *hb; union futex_key key2 = FUTEX_KEY_INIT; struct futex_q q = futex_q_init; @@ -2899,6 +2898,8 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, if (q.pi_state && (q.pi_state->owner != current)) { spin_lock(q.lock_ptr); ret = fixup_pi_state_owner(uaddr2, &q, current); + if (ret && rt_mutex_owner(&q.pi_state->pi_mutex) == current) + rt_mutex_unlock(&q.pi_state->pi_mutex); /* * Drop the reference to the pi state which * the requeue_pi() code acquired for us. @@ -2907,6 +2908,8 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, spin_unlock(q.lock_ptr); } } else { + struct rt_mutex *pi_mutex; + /* * We have been woken up by futex_unlock_pi(), a timeout, or a * signal. futex_unlock_pi() will not destroy the lock_ptr nor @@ -2930,18 +2933,19 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, if (res) ret = (res < 0) ? res : 0; + /* +* If fixup_pi_state_owner() faulted and was unable to handle +* the fault, unlock the rt_mutex and return the fault to +* userspace. +*/ + if (ret && rt_mutex_owner(pi_mutex) == current) + rt_mutex_unlock(pi_mutex); + /* Unqueue and drop the lock. */ unqueue_me_pi(&q); } - /* -* If fixup_pi_state_owner() faulted and was unable to handle the -* fault, unlock the rt_mutex and return the fault to userspace. -*/ - if (ret == -EFAULT) { - if (pi_mutex && rt_mutex_owner(pi_mutex) == current) - rt_mutex_unlock(pi_mutex); - } else if (ret == -EINTR) { + if (ret == -EINTR) { /* * We've already been requeued, but cannot restart by calling * futex_lock_pi() directly. We could restart this syscall, but diff --git a/kernel/locking/rwsem-spinlock.c b/kernel/locking/rwsem-spinlock.c index 7bc24d477805..c65f7989f850 100644 --- a/kernel/locking/rwsem-spinlock.c +++ b/kernel/locking/rwsem-spinlock.c @@ -213,10 +213,9 @@ int __sched __down_write_common(struct rw_semaphore *sem, int state) */ if (sem->count == 0) break; - if (signal_pending_state(state, current)) { - ret = -EINTR; - goto out; - } + if (signal_pending_state(state, current)) + goto out_nolock; + set_current_state(state); raw_spin_unlock_irqrestore(&sem->wait_lock, flags); schedule(); @@ -224,12 +223,19 @@ int __sched __down_write_common(struct rw_semaphore *sem, int state) } /* got the lock */ sem->count = -1; -out: list_del(&waiter.list); raw_spin_unlock_irqrestore(&sem->wait_lock, flags); return ret; + +out_nolock: + list_del(&waiter.list); + if (!list_empty(&sem->wait_list)) + __rwsem_do_wake(sem, 1); + raw_spin_unlock_irqrestore(&sem->wait_lock, flags); + + return -EINTR; } void __sched __down_write(struct rw_semaphore *sem)
[GIT PULL] locking fixes
Linus, Please pull the latest locking-urgent-for-linus git tree from: git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git locking-urgent-for-linus # HEAD: 857811a37129f5d2ba162d7be3986eff44724014 locking/ww_mutex: Adjust the lock number for stress test - Change the new refcount_t warnings from WARN() to WARN_ONCE(), - two ww_mutex fixes, - plus a new lockdep self-consistency check for a bug that triggered in practice. Thanks, Ingo --> Boqun Feng (1): locking/ww_mutex: Adjust the lock number for stress test Chris Wilson (1): locking/ww_mutex: Replace cpu_relax() with cond_resched() for tests Ingo Molnar (1): locking/refcounts: Change WARN() to WARN_ONCE() Peter Zijlstra (1): locking/lockdep: Add nest_lock integrity test kernel/locking/lockdep.c | 11 +-- kernel/locking/test-ww_mutex.c | 6 +++--- lib/refcount.c | 14 +++--- 3 files changed, 19 insertions(+), 12 deletions(-) diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 9812e5dd409e..c0ee8607c11e 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -3260,10 +3260,17 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, if (depth) { hlock = curr->held_locks + depth - 1; if (hlock->class_idx == class_idx && nest_lock) { - if (hlock->references) + if (hlock->references) { + /* +* Check: unsigned int references:12, overflow. +*/ + if (DEBUG_LOCKS_WARN_ON(hlock->references == (1 << 12)-1)) + return 0; + hlock->references++; - else + } else { hlock->references = 2; + } return 1; } diff --git a/kernel/locking/test-ww_mutex.c b/kernel/locking/test-ww_mutex.c index da6c9a34f62f..6b7abb334ca6 100644 --- a/kernel/locking/test-ww_mutex.c +++ b/kernel/locking/test-ww_mutex.c @@ -50,7 +50,7 @@ static void test_mutex_work(struct work_struct *work) if (mtx->flags & TEST_MTX_TRY) { while (!ww_mutex_trylock(&mtx->mutex)) - cpu_relax(); + cond_resched(); } else { ww_mutex_lock(&mtx->mutex, NULL); } @@ -88,7 +88,7 @@ static int __test_mutex(unsigned int flags) ret = -EINVAL; break; } - cpu_relax(); + cond_resched(); } while (time_before(jiffies, timeout)); } else { ret = wait_for_completion_timeout(&mtx.done, TIMEOUT); @@ -627,7 +627,7 @@ static int __init test_ww_mutex_init(void) if (ret) return ret; - ret = stress(4096, hweight32(STRESS_ALL)*ncpus, 1<<12, STRESS_ALL); + ret = stress(4095, hweight32(STRESS_ALL)*ncpus, 1<<12, STRESS_ALL); if (ret) return ret; diff --git a/lib/refcount.c b/lib/refcount.c index 1d33366189d1..aa09ad3c30b0 100644 --- a/lib/refcount.c +++ b/lib/refcount.c @@ -58,7 +58,7 @@ bool refcount_add_not_zero(unsigned int i, refcount_t *r) val = old; } - WARN(new == UINT_MAX, "refcount_t: saturated; leaking memory.\n"); + WARN_ONCE(new == UINT_MAX, "refcount_t: saturated; leaking memory.\n"); return true; } @@ -66,7 +66,7 @@ EXPORT_SYMBOL_GPL(refcount_add_not_zero); void refcount_add(unsigned int i, refcount_t *r) { - WARN(!refcount_add_not_zero(i, r), "refcount_t: addition on 0; use-after-free.\n"); + WARN_ONCE(!refcount_add_not_zero(i, r), "refcount_t: addition on 0; use-after-free.\n"); } EXPORT_SYMBOL_GPL(refcount_add); @@ -97,7 +97,7 @@ bool refcount_inc_not_zero(refcount_t *r) val = old; } - WARN(new == UINT_MAX, "refcount_t: saturated; leaking memory.\n"); + WARN_ONCE(new == UINT_MAX, "refcount_t: saturated; leaking memory.\n"); return true; } @@ -111,7 +111,7 @@ EXPORT_SYMBOL_GPL(refcount_inc_not_zero); */ void refcount_inc(refcount_t *r) { - WARN(!refcount_inc_not_zero(r), "refcount_t: increment on 0; use-after-free.\n"); + WARN_ONCE(!refcount_inc_not_zero(r), "refcount_t: increment on 0; use-after-free.\n"); } EXPORT_SYMBOL_GPL(refcount_inc); @@ -125,7 +125,7 @@ bool refcount_sub_and_test(unsigned int i, refcount_t *r) new = val - i; if (new > val) { - WARN(new > val, "refcount_t: underflow; use-after-free.\n"); + WARN_ONCE(new > val, "refcount_t: underflow; use-after-free.\n"); retu
Re: [GIT PULL] locking fixes
On Mon, Feb 27, 2017 at 11:57 PM, Ingo Molnar wrote: > > Note that the uninlining allowed us to enable the underflow/overflow warnings > unconditionally and remove the debug Kconfig switch: this might trigger new > warnings in buggy code and turn crashes/use-after-free bugs into less harmful > memory leaks. I'm ok with this, but that WARN() really needs to be a WARN_ON_ONCE(). Because once an underflow (or overflow) is happening, it tends to _keep_ happening. And you may just have essentially DoS'ed the machine that is now spending all its time writing those logs to disk. Yes, yes, quiet independently of this we should limit WARN printouts (and do the reverse: turn a "once" to mean "once in a blue moon" rather than actually just once), but particularly for this kind of "never happens" thing, it really is better to just warn once. Linus
[GIT PULL] locking fixes
Linus, Please pull the latest locking-urgent-for-linus git tree from: git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git locking-urgent-for-linus # HEAD: 318b1dedcd39012624f466d281627553e9fa2570 locking/refcounts: Add missing kernel.h header to have UINT_MAX defined The main change is the uninlining of large refcount_t APIs, plus a header dependency fix. Note that the uninlining allowed us to enable the underflow/overflow warnings unconditionally and remove the debug Kconfig switch: this might trigger new warnings in buggy code and turn crashes/use-after-free bugs into less harmful memory leaks. Thanks, Ingo --> Elena Reshetova (1): locking/refcounts: Add missing kernel.h header to have UINT_MAX defined Peter Zijlstra (1): locking/refcounts: Out-of-line everything include/linux/refcount.h | 278 +++ lib/Kconfig.debug| 13 --- lib/Makefile | 2 +- lib/refcount.c | 267 + 4 files changed, 281 insertions(+), 279 deletions(-) create mode 100644 lib/refcount.c diff --git a/include/linux/refcount.h b/include/linux/refcount.h index 600aadf9cca4..0023fee4bbbc 100644 --- a/include/linux/refcount.h +++ b/include/linux/refcount.h @@ -1,54 +1,10 @@ #ifndef _LINUX_REFCOUNT_H #define _LINUX_REFCOUNT_H -/* - * Variant of atomic_t specialized for reference counts. - * - * The interface matches the atomic_t interface (to aid in porting) but only - * provides the few functions one should use for reference counting. - * - * It differs in that the counter saturates at UINT_MAX and will not move once - * there. This avoids wrapping the counter and causing 'spurious' - * use-after-free issues. - * - * Memory ordering rules are slightly relaxed wrt regular atomic_t functions - * and provide only what is strictly required for refcounts. - * - * The increments are fully relaxed; these will not provide ordering. The - * rationale is that whatever is used to obtain the object we're increasing the - * reference count on will provide the ordering. For locked data structures, - * its the lock acquire, for RCU/lockless data structures its the dependent - * load. - * - * Do note that inc_not_zero() provides a control dependency which will order - * future stores against the inc, this ensures we'll never modify the object - * if we did not in fact acquire a reference. - * - * The decrements will provide release order, such that all the prior loads and - * stores will be issued before, it also provides a control dependency, which - * will order us against the subsequent free(). - * - * The control dependency is against the load of the cmpxchg (ll/sc) that - * succeeded. This means the stores aren't fully ordered, but this is fine - * because the 1->0 transition indicates no concurrency. - * - * Note that the allocator is responsible for ordering things between free() - * and alloc(). - * - */ - #include -#include #include #include - -#ifdef CONFIG_DEBUG_REFCOUNT -#define REFCOUNT_WARN(cond, str) WARN_ON(cond) -#define __refcount_check __must_check -#else -#define REFCOUNT_WARN(cond, str) (void)(cond) -#define __refcount_check -#endif +#include typedef struct refcount_struct { atomic_t refs; @@ -66,229 +22,21 @@ static inline unsigned int refcount_read(const refcount_t *r) return atomic_read(&r->refs); } -static inline __refcount_check -bool refcount_add_not_zero(unsigned int i, refcount_t *r) -{ - unsigned int old, new, val = atomic_read(&r->refs); - - for (;;) { - if (!val) - return false; - - if (unlikely(val == UINT_MAX)) - return true; - - new = val + i; - if (new < val) - new = UINT_MAX; - old = atomic_cmpxchg_relaxed(&r->refs, val, new); - if (old == val) - break; - - val = old; - } - - REFCOUNT_WARN(new == UINT_MAX, "refcount_t: saturated; leaking memory.\n"); - - return true; -} - -static inline void refcount_add(unsigned int i, refcount_t *r) -{ - REFCOUNT_WARN(!refcount_add_not_zero(i, r), "refcount_t: addition on 0; use-after-free.\n"); -} - -/* - * Similar to atomic_inc_not_zero(), will saturate at UINT_MAX and WARN. - * - * Provides no memory ordering, it is assumed the caller has guaranteed the - * object memory to be stable (RCU, etc.). It does provide a control dependency - * and thereby orders future stores. See the comment on top. - */ -static inline __refcount_check -bool refcount_inc_not_zero(refcount_t *r) -{ - unsigned int old, new, val = atomic_read(&r->refs); - - for (;;) { - new = val + 1; - - if (!val) - return false; - - if (unlikely(!new)) - return true; - -
[GIT PULL] locking fixes
Linus, Please pull the latest locking-urgent-for-linus git tree from: git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git locking-urgent-for-linus # HEAD: f943fe0faf27991d256e10b5a85f175385c64cdc lockdep: Fix report formatting Two rtmutex race fixes (which miraculously never triggered, that we know of), plus two lockdep printk formatting regression fixes. Thanks, Ingo --> Dmitry Vyukov (1): lockdep: Fix report formatting Michael Ellerman (1): locking/selftest: Fix output since KERN_CONT changes Thomas Gleixner (2): locking/rtmutex: Prevent dequeue vs. unlock race locking/rtmutex: Use READ_ONCE() in rt_mutex_owner() kernel/locking/lockdep.c| 111 +--- kernel/locking/rtmutex.c| 68 +++- kernel/locking/rtmutex_common.h | 5 +- lib/locking-selftest.c | 66 4 files changed, 159 insertions(+), 91 deletions(-) diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 589d763a49b3..4d7ffc0a0d00 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -506,13 +506,13 @@ static void __print_lock_name(struct lock_class *class) name = class->name; if (!name) { name = __get_key_name(class->key, str); - printk("%s", name); + printk(KERN_CONT "%s", name); } else { - printk("%s", name); + printk(KERN_CONT "%s", name); if (class->name_version > 1) - printk("#%d", class->name_version); + printk(KERN_CONT "#%d", class->name_version); if (class->subclass) - printk("/%d", class->subclass); + printk(KERN_CONT "/%d", class->subclass); } } @@ -522,9 +522,9 @@ static void print_lock_name(struct lock_class *class) get_usage_chars(class, usage); - printk(" ("); + printk(KERN_CONT " ("); __print_lock_name(class); - printk("){%s}", usage); + printk(KERN_CONT "){%s}", usage); } static void print_lockdep_cache(struct lockdep_map *lock) @@ -536,7 +536,7 @@ static void print_lockdep_cache(struct lockdep_map *lock) if (!name) name = __get_key_name(lock->key->subkeys, str); - printk("%s", name); + printk(KERN_CONT "%s", name); } static void print_lock(struct held_lock *hlock) @@ -551,13 +551,13 @@ static void print_lock(struct held_lock *hlock) barrier(); if (!class_idx || (class_idx - 1) >= MAX_LOCKDEP_KEYS) { - printk("\n"); + printk(KERN_CONT "\n"); return; } print_lock_name(lock_classes + class_idx - 1); - printk(", at: "); - print_ip_sym(hlock->acquire_ip); + printk(KERN_CONT ", at: [<%p>] %pS\n", + (void *)hlock->acquire_ip, (void *)hlock->acquire_ip); } static void lockdep_print_held_locks(struct task_struct *curr) @@ -792,8 +792,8 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force) printk("\nnew class %p: %s", class->key, class->name); if (class->name_version > 1) - printk("#%d", class->name_version); - printk("\n"); + printk(KERN_CONT "#%d", class->name_version); + printk(KERN_CONT "\n"); dump_stack(); if (!graph_lock()) { @@ -1071,7 +1071,7 @@ print_circular_bug_entry(struct lock_list *target, int depth) return 0; printk("\n-> #%u", depth); print_lock_name(target->class); - printk(":\n"); + printk(KERN_CONT ":\n"); print_stack_trace(&target->trace, 6); return 0; @@ -1102,11 +1102,11 @@ print_circular_lock_scenario(struct held_lock *src, if (parent != source) { printk("Chain exists of:\n "); __print_lock_name(source); - printk(" --> "); + printk(KERN_CONT " --> "); __print_lock_name(parent); - printk(" --> "); + printk(KERN_CONT " --> "); __print_lock_name(target); - printk("\n\n"); + printk(KERN_CONT "\n\n"); } printk(" Possible unsafe locking scenario:\n\n"); @@ -1114,16 +1114,16 @@ print_circular_lock_scenario(struct held_lock *src, printk(" \n"); printk(" lock("); __print_lock_name(target); - printk(");\n"); + printk(KERN_CONT ");\n"); printk(" lock("); __print_lock_name(parent); - printk(");\n"); + printk(KERN_CONT ");\n"); printk(" lock("); __print_lock_name(target); - printk(");\n"); + printk(KERN_CONT
[GIT PULL] locking fixes
Linus, Please pull the latest locking-urgent-for-linus git tree from: git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git locking-urgent-for-linus # HEAD: 5f43086bb9224987010460dcf3dee68fbd4f574d locking, fs/locks: Add missing file_sem locks Two fixes: - a file locks fix (missing critical section, bug introduced in this merge window) - an x86 down_write() stack frame annotation Thanks, Ingo --> Josh Poimboeuf (1): locking/rwsem/x86: Add stack frame dependency for down_write() Peter Zijlstra (1): locking, fs/locks: Add missing file_sem locks arch/x86/include/asm/rwsem.h | 6 -- fs/locks.c | 6 ++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/rwsem.h b/arch/x86/include/asm/rwsem.h index 3d33a719f5c1..a34e0d4b957d 100644 --- a/arch/x86/include/asm/rwsem.h +++ b/arch/x86/include/asm/rwsem.h @@ -103,8 +103,10 @@ static inline bool __down_read_trylock(struct rw_semaphore *sem) ({ \ long tmp; \ struct rw_semaphore* ret; \ + register void *__sp asm(_ASM_SP); \ + \ asm volatile("# beginning down_write\n\t" \ -LOCK_PREFIX " xadd %1,(%3)\n\t" \ +LOCK_PREFIX " xadd %1,(%4)\n\t" \ /* adds 0x0001, returns the old value */ \ " test " __ASM_SEL(%w1,%k1) "," __ASM_SEL(%w1,%k1) "\n\t" \ /* was the active mask 0 before? */\ @@ -112,7 +114,7 @@ static inline bool __down_read_trylock(struct rw_semaphore *sem) " call " slow_path "\n" \ "1:\n" \ "# ending down_write" \ -: "+m" (sem->count), "=d" (tmp), "=a" (ret)\ +: "+m" (sem->count), "=d" (tmp), "=a" (ret), "+r" (__sp) \ : "a" (sem), "1" (RWSEM_ACTIVE_WRITE_BIAS) \ : "memory", "cc"); \ ret;\ diff --git a/fs/locks.c b/fs/locks.c index ce93b416b490..22c5b4aa4961 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -1609,6 +1609,7 @@ int fcntl_getlease(struct file *filp) ctx = smp_load_acquire(&inode->i_flctx); if (ctx && !list_empty_careful(&ctx->flc_lease)) { + percpu_down_read_preempt_disable(&file_rwsem); spin_lock(&ctx->flc_lock); time_out_leases(inode, &dispose); list_for_each_entry(fl, &ctx->flc_lease, fl_list) { @@ -1618,6 +1619,8 @@ int fcntl_getlease(struct file *filp) break; } spin_unlock(&ctx->flc_lock); + percpu_up_read_preempt_enable(&file_rwsem); + locks_dispose_list(&dispose); } return type; @@ -2529,11 +2532,14 @@ locks_remove_lease(struct file *filp, struct file_lock_context *ctx) if (list_empty(&ctx->flc_lease)) return; + percpu_down_read_preempt_disable(&file_rwsem); spin_lock(&ctx->flc_lock); list_for_each_entry_safe(fl, tmp, &ctx->flc_lease, fl_list) if (filp == fl->fl_file) lease_modify(fl, F_UNLCK, &dispose); spin_unlock(&ctx->flc_lock); + percpu_up_read_preempt_enable(&file_rwsem); + locks_dispose_list(&dispose); }
[GIT pull] locking fixes for 4.8
Linus, please pull the latest locking-urgent-for-linus git tree from: git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git locking-urgent-for-linus Two smallish fixes: - Use the proper asm constraint in the Super-H atomic_fetch_ops - A trivial typo fix in the Kconfig help text Thanks, tglx --> Peter Zijlstra (1): locking/atomic, arch/sh: Fix ATOMIC_FETCH_OP() Vivien Didelot (1): locking/hung_task: Fix typo in CONFIG_DETECT_HUNG_TASK help text arch/sh/include/asm/atomic-llsc.h | 2 +- lib/Kconfig.debug | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/sh/include/asm/atomic-llsc.h b/arch/sh/include/asm/atomic-llsc.h index caea2c45f6c2..1d159ce50f5a 100644 --- a/arch/sh/include/asm/atomic-llsc.h +++ b/arch/sh/include/asm/atomic-llsc.h @@ -60,7 +60,7 @@ static inline int atomic_fetch_##op(int i, atomic_t *v) \ " movco.l %0, @%3 \n" \ " bf 1b \n" \ " synco \n" \ - : "=&z" (temp), "=&z" (res) \ + : "=&z" (temp), "=&r" (res) \ : "r" (i), "r" (&v->counter)\ : "t"); \ \ diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 2e2cca509231..cab7405f48d2 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -821,7 +821,7 @@ config DETECT_HUNG_TASK help Say Y here to enable the kernel to detect "hung tasks", which are bugs that cause the task to be stuck in - uninterruptible "D" state indefinitiley. + uninterruptible "D" state indefinitely. When a hung task is detected, the kernel will print the current stack trace (which you should report), but the
Fw: [GIT PULL] locking fixes
Hello, Eric, Does Johannes's recent change to lockless_dereference() help with the sparse issue? Thanx, Paul - Forwarded message from Ingo Molnar - Date: Thu, 18 Aug 2016 22:34:21 +0200 From: Ingo Molnar To: Linus Torvalds Cc: linux-kernel@vger.kernel.org, Peter Zijlstra , Thomas Gleixner , Andrew Morton , "Paul E. McKenney" Subject: [GIT PULL] locking fixes Linus, Please pull the latest locking-urgent-for-linus git tree from: git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git locking-urgent-for-linus # HEAD: 112dc0c8069e5554e0ad29c58228f1e6ca49e13d locking/barriers: Suppress sparse warnings in lockless_dereference() Two lockless_dereference() related fixes. Thanks, Ingo --> Johannes Berg (2): Revert "drm/fb-helper: Reduce READ_ONCE(master) to lockless_dereference" locking/barriers: Suppress sparse warnings in lockless_dereference() drivers/gpu/drm/drm_fb_helper.c | 2 +- include/linux/compiler.h| 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index ce54e985d91b..0a06f9120b5a 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -464,7 +464,7 @@ static bool drm_fb_helper_is_bound(struct drm_fb_helper *fb_helper) /* Sometimes user space wants everything disabled, so don't steal the * display if there's a master. */ - if (lockless_dereference(dev->master)) + if (READ_ONCE(dev->master)) return false; drm_for_each_crtc(crtc, dev) { diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 1bb954842725..436aa4e42221 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -527,13 +527,13 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s * object's lifetime is managed by something other than RCU. That * "something other" might be reference counting or simple immortality. * - * The seemingly unused void * variable is to validate @p is indeed a pointer - * type. All pointer types silently cast to void *. + * The seemingly unused size_t variable is to validate @p is indeed a pointer + * type by making sure it can be dereferenced. */ #define lockless_dereference(p) \ ({ \ typeof(p) _p1 = READ_ONCE(p); \ - __maybe_unused const void * const _p2 = _p1; \ + size_t __maybe_unused __size_of_ptr = sizeof(*(p)); \ smp_read_barrier_depends(); /* Dependency order vs. p above. */ \ (_p1); \ }) - End forwarded message -
[GIT PULL] locking fixes
Linus, Please pull the latest locking-urgent-for-linus git tree from: git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git locking-urgent-for-linus # HEAD: 112dc0c8069e5554e0ad29c58228f1e6ca49e13d locking/barriers: Suppress sparse warnings in lockless_dereference() Two lockless_dereference() related fixes. Thanks, Ingo --> Johannes Berg (2): Revert "drm/fb-helper: Reduce READ_ONCE(master) to lockless_dereference" locking/barriers: Suppress sparse warnings in lockless_dereference() drivers/gpu/drm/drm_fb_helper.c | 2 +- include/linux/compiler.h| 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index ce54e985d91b..0a06f9120b5a 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -464,7 +464,7 @@ static bool drm_fb_helper_is_bound(struct drm_fb_helper *fb_helper) /* Sometimes user space wants everything disabled, so don't steal the * display if there's a master. */ - if (lockless_dereference(dev->master)) + if (READ_ONCE(dev->master)) return false; drm_for_each_crtc(crtc, dev) { diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 1bb954842725..436aa4e42221 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -527,13 +527,13 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s * object's lifetime is managed by something other than RCU. That * "something other" might be reference counting or simple immortality. * - * The seemingly unused void * variable is to validate @p is indeed a pointer - * type. All pointer types silently cast to void *. + * The seemingly unused size_t variable is to validate @p is indeed a pointer + * type by making sure it can be dereferenced. */ #define lockless_dereference(p) \ ({ \ typeof(p) _p1 = READ_ONCE(p); \ - __maybe_unused const void * const _p2 = _p1; \ + size_t __maybe_unused __size_of_ptr = sizeof(*(p)); \ smp_read_barrier_depends(); /* Dependency order vs. p above. */ \ (_p1); \ })
Re: Fw: [GIT PULL] locking fixes
On Thu, Aug 18, 2016 at 02:34:23PM -0700, Eric Dumazet wrote: > On Thu, 2016-08-18 at 14:02 -0700, Paul E. McKenney wrote: > > Hello, Eric, > > > > Does Johannes's recent change to lockless_dereference() help with > > the sparse issue? > > > > Thanx, Paul > > > > Hi Paul > > Yes indeed, this helps. > > Thanks ! Woo-hoo! ;-) Thanx, Paul
Re: Fw: [GIT PULL] locking fixes
On Thu, 2016-08-18 at 14:02 -0700, Paul E. McKenney wrote: > Hello, Eric, > > Does Johannes's recent change to lockless_dereference() help with > the sparse issue? > > Thanx, Paul > Hi Paul Yes indeed, this helps. Thanks ! > - Forwarded message from Ingo Molnar - > > Date: Thu, 18 Aug 2016 22:34:21 +0200 > From: Ingo Molnar > To: Linus Torvalds > Cc: linux-kernel@vger.kernel.org, Peter Zijlstra , > Thomas Gleixner , Andrew Morton > , "Paul E. McKenney" > Subject: [GIT PULL] locking fixes > > Linus, > > Please pull the latest locking-urgent-for-linus git tree from: > >git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git > locking-urgent-for-linus > ># HEAD: 112dc0c8069e5554e0ad29c58228f1e6ca49e13d locking/barriers: > Suppress sparse warnings in lockless_dereference() > > Two lockless_dereference() related fixes. > > Thanks, > > Ingo > > --> > Johannes Berg (2): > Revert "drm/fb-helper: Reduce READ_ONCE(master) to lockless_dereference" > locking/barriers: Suppress sparse warnings in lockless_dereference() > > > drivers/gpu/drm/drm_fb_helper.c | 2 +- > include/linux/compiler.h| 6 +++--- > 2 files changed, 4 insertions(+), 4 deletions(-) > > diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c > index ce54e985d91b..0a06f9120b5a 100644 > --- a/drivers/gpu/drm/drm_fb_helper.c > +++ b/drivers/gpu/drm/drm_fb_helper.c > @@ -464,7 +464,7 @@ static bool drm_fb_helper_is_bound(struct drm_fb_helper > *fb_helper) > > /* Sometimes user space wants everything disabled, so don't steal the >* display if there's a master. */ > - if (lockless_dereference(dev->master)) > + if (READ_ONCE(dev->master)) > return false; > > drm_for_each_crtc(crtc, dev) { > diff --git a/include/linux/compiler.h b/include/linux/compiler.h > index 1bb954842725..436aa4e42221 100644 > --- a/include/linux/compiler.h > +++ b/include/linux/compiler.h > @@ -527,13 +527,13 @@ static __always_inline void __write_once_size(volatile > void *p, void *res, int s > * object's lifetime is managed by something other than RCU. That > * "something other" might be reference counting or simple immortality. > * > - * The seemingly unused void * variable is to validate @p is indeed a pointer > - * type. All pointer types silently cast to void *. > + * The seemingly unused size_t variable is to validate @p is indeed a pointer > + * type by making sure it can be dereferenced. > */ > #define lockless_dereference(p) \ > ({ \ > typeof(p) _p1 = READ_ONCE(p); \ > - __maybe_unused const void * const _p2 = _p1; \ > + size_t __maybe_unused __size_of_ptr = sizeof(*(p)); \ > smp_read_barrier_depends(); /* Dependency order vs. p above. */ \ > (_p1); \ > }) > > > - End forwarded message - >
[GIT PULL] locking fixes
Linus, Please pull the latest locking-urgent-for-linus git tree from: git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git locking-urgent-for-linus # HEAD: c2ace36b884de9330c4149064ae8d212d2e0d9ee locking/pvqspinlock: Fix a bug in qstat_read() Misc fixes: lockstat fix, futex fix on !MMU systems, big endian fix for qrwlocks and a race fix for pvqspinlocks. Thanks, Ingo --> Pan Xinhui (1): locking/pvqspinlock: Fix a bug in qstat_read() Thomas Gleixner (1): futex: Assume all mappings are private on !MMU systems Wanpeng Li (1): locking/pvqspinlock: Fix double hash race pan xinhui (1): locking/qrwlock: Fix write unlock bug on big endian systems include/asm-generic/qrwlock.h | 27 +-- kernel/futex.c | 23 ++- kernel/locking/qspinlock_paravirt.h | 2 +- kernel/locking/qspinlock_stat.h | 1 - 4 files changed, 48 insertions(+), 5 deletions(-) diff --git a/include/asm-generic/qrwlock.h b/include/asm-generic/qrwlock.h index 54a8e65e18b6..7d026bf27713 100644 --- a/include/asm-generic/qrwlock.h +++ b/include/asm-generic/qrwlock.h @@ -25,7 +25,20 @@ #include /* - * Writer states & reader shift and bias + * Writer states & reader shift and bias. + * + * | +0 | +1 | +2 | +3 | + * +++++ + *LE | 78 | 56 | 34 | 12 | 0x12345678 + * +++++ + * | wr | rd | + * +++++ + * + * +++++ + *BE | 12 | 34 | 56 | 78 | 0x12345678 + * +++++ + * | rd | wr | + * +++++ */ #define_QW_WAITING 1 /* A writer is waiting */ #define_QW_LOCKED 0xff/* A writer holds the lock */ @@ -134,12 +147,22 @@ static inline void queued_read_unlock(struct qrwlock *lock) } /** + * __qrwlock_write_byte - retrieve the write byte address of a queue rwlock + * @lock : Pointer to queue rwlock structure + * Return: the write byte address of a queue rwlock + */ +static inline u8 *__qrwlock_write_byte(struct qrwlock *lock) +{ + return (u8 *)lock + 3 * IS_BUILTIN(CONFIG_CPU_BIG_ENDIAN); +} + +/** * queued_write_unlock - release write lock of a queue rwlock * @lock : Pointer to queue rwlock structure */ static inline void queued_write_unlock(struct qrwlock *lock) { - smp_store_release((u8 *)&lock->cnts, 0); + smp_store_release(__qrwlock_write_byte(lock), 0); } /* diff --git a/kernel/futex.c b/kernel/futex.c index 33664f70e2d2..46cb3a301bc1 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -179,7 +179,15 @@ int __read_mostly futex_cmpxchg_enabled; * Futex flags used to encode options to functions and preserve them across * restarts. */ -#define FLAGS_SHARED 0x01 +#ifdef CONFIG_MMU +# define FLAGS_SHARED 0x01 +#else +/* + * NOMMU does not have per process address space. Let the compiler optimize + * code away. + */ +# define FLAGS_SHARED 0x00 +#endif #define FLAGS_CLOCKRT 0x02 #define FLAGS_HAS_TIMEOUT 0x04 @@ -405,6 +413,16 @@ static void get_futex_key_refs(union futex_key *key) if (!key->both.ptr) return; + /* +* On MMU less systems futexes are always "private" as there is no per +* process address space. We need the smp wmb nevertheless - yes, +* arch/blackfin has MMU less SMP ... +*/ + if (!IS_ENABLED(CONFIG_MMU)) { + smp_mb(); /* explicit smp_mb(); (B) */ + return; + } + switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) { case FUT_OFF_INODE: ihold(key->shared.inode); /* implies smp_mb(); (B) */ @@ -436,6 +454,9 @@ static void drop_futex_key_refs(union futex_key *key) return; } + if (!IS_ENABLED(CONFIG_MMU)) + return; + switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) { case FUT_OFF_INODE: iput(key->shared.inode); diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h index 37649e69056c..8a99abf58080 100644 --- a/kernel/locking/qspinlock_paravirt.h +++ b/kernel/locking/qspinlock_paravirt.h @@ -450,7 +450,7 @@ pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node) goto gotlock; } } - WRITE_ONCE(pn->state, vcpu_halted); + WRITE_ONCE(pn->state, vcpu_hashed); qstat_inc(qstat_pv_wait_head, true); qstat_inc(qstat_pv_wait_again, waitcnt); pv_wait(&l->locked, _Q_SLOW_VAL); diff --git a/kernel/locking/qspinlock_stat.h b/kernel/locking/qspinlock_stat.h index 22e025309845..b9d031516254 100644 --- a/kernel/locking/qspinlock_stat.h +++ b/kernel/locking/qspinlock_stat.h @@ -153,7 +15
[GIT PULL] locking fixes
Linus, Please pull the latest locking-urgent-for-linus git tree from: git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git locking-urgent-for-linus # HEAD: 077fa7aed17de5022e44bf07dbaf732078b7b5b2 futex: Calculate the futex key based on a tail page for file-based futexes Misc fixes: - a file-based futex fix - one more spin_unlock_wait() fix - a ww-mutex deadlock detection improvement/fix - and a raw_read_seqcount_latch() barrier fix. Thanks, Ingo --> Chris Wilson (1): locking/ww_mutex: Report recursive ww_mutex locking early Mel Gorman (1): futex: Calculate the futex key based on a tail page for file-based futexes Peter Zijlstra (2): locking/seqcount: Re-fix raw_read_seqcount_latch() locking/qspinlock: Fix spin_unlock_wait() some more include/asm-generic/qspinlock.h | 53 include/linux/seqlock.h | 7 +++-- kernel/futex.c | 14 +++--- kernel/locking/mutex.c | 9 --- kernel/locking/qspinlock.c | 60 + 5 files changed, 99 insertions(+), 44 deletions(-) diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h index 6bd05700d8c9..05f05f17a7c2 100644 --- a/include/asm-generic/qspinlock.h +++ b/include/asm-generic/qspinlock.h @@ -22,37 +22,33 @@ #include /** + * queued_spin_unlock_wait - wait until the _current_ lock holder releases the lock + * @lock : Pointer to queued spinlock structure + * + * There is a very slight possibility of live-lock if the lockers keep coming + * and the waiter is just unfortunate enough to not see any unlock state. + */ +#ifndef queued_spin_unlock_wait +extern void queued_spin_unlock_wait(struct qspinlock *lock); +#endif + +/** * queued_spin_is_locked - is the spinlock locked? * @lock: Pointer to queued spinlock structure * Return: 1 if it is locked, 0 otherwise */ +#ifndef queued_spin_is_locked static __always_inline int queued_spin_is_locked(struct qspinlock *lock) { /* -* queued_spin_lock_slowpath() can ACQUIRE the lock before -* issuing the unordered store that sets _Q_LOCKED_VAL. -* -* See both smp_cond_acquire() sites for more detail. -* -* This however means that in code like: -* -* spin_lock(A) spin_lock(B) -* spin_unlock_wait(B)spin_is_locked(A) -* do_something() do_something() -* -* Both CPUs can end up running do_something() because the store -* setting _Q_LOCKED_VAL will pass through the loads in -* spin_unlock_wait() and/or spin_is_locked(). +* See queued_spin_unlock_wait(). * -* Avoid this by issuing a full memory barrier between the spin_lock() -* and the loads in spin_unlock_wait() and spin_is_locked(). -* -* Note that regular mutual exclusion doesn't care about this -* delayed store. +* Any !0 state indicates it is locked, even if _Q_LOCKED_VAL +* isn't immediately observable. */ - smp_mb(); - return atomic_read(&lock->val) & _Q_LOCKED_MASK; + return atomic_read(&lock->val); } +#endif /** * queued_spin_value_unlocked - is the spinlock structure unlocked? @@ -122,21 +118,6 @@ static __always_inline void queued_spin_unlock(struct qspinlock *lock) } #endif -/** - * queued_spin_unlock_wait - wait until current lock holder releases the lock - * @lock : Pointer to queued spinlock structure - * - * There is a very slight possibility of live-lock if the lockers keep coming - * and the waiter is just unfortunate enough to not see any unlock state. - */ -static inline void queued_spin_unlock_wait(struct qspinlock *lock) -{ - /* See queued_spin_is_locked() */ - smp_mb(); - while (atomic_read(&lock->val) & _Q_LOCKED_MASK) - cpu_relax(); -} - #ifndef virt_spin_lock static __always_inline bool virt_spin_lock(struct qspinlock *lock) { diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 7973a821ac58..ead97654c4e9 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -277,7 +277,10 @@ static inline void raw_write_seqcount_barrier(seqcount_t *s) static inline int raw_read_seqcount_latch(seqcount_t *s) { - return lockless_dereference(s)->sequence; + int seq = READ_ONCE(s->sequence); + /* Pairs with the first smp_wmb() in raw_write_seqcount_latch() */ + smp_read_barrier_depends(); + return seq; } /** @@ -331,7 +334,7 @@ static inline int raw_read_seqcount_latch(seqcount_t *s) * unsigned seq, idx; * * do { - * seq = lockless_dereference(latch)->seq; + * seq = raw_read_seqcount_latch(&latch->seq); * * idx = seq & 0x01; * entry = data_query(latch->data[idx], ...); diff --git a/kernel/futex.c b/kernel/futex.c inde
[GIT PULL] locking fixes
Linus, Please pull the latest locking-urgent-for-linus git tree from: git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git locking-urgent-for-linus # HEAD: 75dd602a5198a6e5f75534db52b6e6fbaabb33d1 lockdep: Fix lock_chain::base size Two lockdep fixes. Thanks, Ingo --> Boqun Feng (1): locking/lockdep: Fix ->irq_context calculation Peter Zijlstra (1): lockdep: Fix lock_chain::base size include/linux/lockdep.h | 8 +--- kernel/locking/lockdep.c | 37 ++--- kernel/locking/lockdep_proc.c | 2 ++ 3 files changed, 41 insertions(+), 6 deletions(-) diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index d026b190c530..d10ef06971b5 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -196,9 +196,11 @@ struct lock_list { * We record lock dependency chains, so that we can cache them: */ struct lock_chain { - u8 irq_context; - u8 depth; - u16 base; + /* see BUILD_BUG_ON()s in lookup_chain_cache() */ + unsigned intirq_context : 2, + depth : 6, + base: 24; + /* 4 byte hole */ struct hlist_node entry; u64 chain_key; }; diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index ed9410936a22..78c1c0ee6dc1 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -2176,15 +2176,37 @@ static inline int lookup_chain_cache(struct task_struct *curr, chain->irq_context = hlock->irq_context; i = get_first_held_lock(curr, hlock); chain->depth = curr->lockdep_depth + 1 - i; + + BUILD_BUG_ON((1UL << 24) <= ARRAY_SIZE(chain_hlocks)); + BUILD_BUG_ON((1UL << 6) <= ARRAY_SIZE(curr->held_locks)); + BUILD_BUG_ON((1UL << 8*sizeof(chain_hlocks[0])) <= ARRAY_SIZE(lock_classes)); + if (likely(nr_chain_hlocks + chain->depth <= MAX_LOCKDEP_CHAIN_HLOCKS)) { chain->base = nr_chain_hlocks; - nr_chain_hlocks += chain->depth; for (j = 0; j < chain->depth - 1; j++, i++) { int lock_id = curr->held_locks[i].class_idx - 1; chain_hlocks[chain->base + j] = lock_id; } chain_hlocks[chain->base + j] = class - lock_classes; } + + if (nr_chain_hlocks < MAX_LOCKDEP_CHAIN_HLOCKS) + nr_chain_hlocks += chain->depth; + +#ifdef CONFIG_DEBUG_LOCKDEP + /* +* Important for check_no_collision(). +*/ + if (unlikely(nr_chain_hlocks > MAX_LOCKDEP_CHAIN_HLOCKS)) { + if (debug_locks_off_graph_unlock()) + return 0; + + print_lockdep_off("BUG: MAX_LOCKDEP_CHAIN_HLOCKS too low!"); + dump_stack(); + return 0; + } +#endif + hlist_add_head_rcu(&chain->entry, hash_head); debug_atomic_inc(chain_lookup_misses); inc_chains(); @@ -2932,6 +2954,11 @@ static int mark_irqflags(struct task_struct *curr, struct held_lock *hlock) return 1; } +static inline unsigned int task_irq_context(struct task_struct *task) +{ + return 2 * !!task->hardirq_context + !!task->softirq_context; +} + static int separate_irq_context(struct task_struct *curr, struct held_lock *hlock) { @@ -2940,8 +2967,6 @@ static int separate_irq_context(struct task_struct *curr, /* * Keep track of points where we cross into an interrupt context: */ - hlock->irq_context = 2*(curr->hardirq_context ? 1 : 0) + - curr->softirq_context; if (depth) { struct held_lock *prev_hlock; @@ -2973,6 +2998,11 @@ static inline int mark_irqflags(struct task_struct *curr, return 1; } +static inline unsigned int task_irq_context(struct task_struct *task) +{ + return 0; +} + static inline int separate_irq_context(struct task_struct *curr, struct held_lock *hlock) { @@ -3241,6 +3271,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, hlock->acquire_ip = ip; hlock->instance = lock; hlock->nest_lock = nest_lock; + hlock->irq_context = task_irq_context(curr); hlock->trylock = trylock; hlock->read = read; hlock->check = check; diff --git a/kernel/locking/lockdep_proc.c b/kernel/locking/lockdep_proc.c index dbb61a302548..a0f61effad25 100644 --- a/kernel/locking/lockdep_proc.c +++ b/kernel/locking/lockdep_proc.c @@ -141,6 +141,8 @@ static int lc_show(struct seq_file *m, void *v) int i; if (v == SEQ_START_TOKEN) { + if (nr_chain_hlocks > MAX_LOCKDEP_CHAIN_HLOCKS) + seq_printf
[GIT PULL] locking fixes
Linus, Please pull the latest locking-urgent-for-linus git tree from: git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git locking-urgent-for-linus # HEAD: fba7cd681b6155e2d93e7862fcd6f970336b83c3 asm-generic/futex: Re-enable preemption in futex_atomic_cmpxchg_inatomic() Misc fixes: - pvqspinlocks: an instrumentation fix - futexes: a preempt-count vs. pagefault_disable decouple corner case fix - futexes: futex requeue plist race window fix - futexes: a futex UNLOCK_PI transaction fix for a corner case Thanks, Ingo --> Davidlohr Bueso (2): locking/pvqspinlock: Fix division by zero in qstat_read() futex: Acknowledge a new waiter in counter before plist Romain Perier (1): asm-generic/futex: Re-enable preemption in futex_atomic_cmpxchg_inatomic() Sebastian Andrzej Siewior (1): futex: Handle unlock_pi race gracefully include/asm-generic/futex.h | 8 ++-- kernel/futex.c | 27 +++ kernel/locking/qspinlock_stat.h | 8 +--- 3 files changed, 34 insertions(+), 9 deletions(-) diff --git a/include/asm-generic/futex.h b/include/asm-generic/futex.h index e56272c919b5..bf2d34c9d804 100644 --- a/include/asm-generic/futex.h +++ b/include/asm-generic/futex.h @@ -108,11 +108,15 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 val; preempt_disable(); - if (unlikely(get_user(val, uaddr) != 0)) + if (unlikely(get_user(val, uaddr) != 0)) { + preempt_enable(); return -EFAULT; + } - if (val == oldval && unlikely(put_user(newval, uaddr) != 0)) + if (val == oldval && unlikely(put_user(newval, uaddr) != 0)) { + preempt_enable(); return -EFAULT; + } *uval = val; preempt_enable(); diff --git a/kernel/futex.c b/kernel/futex.c index a5d2e74c89e0..c20f06f38ef3 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -1295,10 +1295,20 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this, if (unlikely(should_fail_futex(true))) ret = -EFAULT; - if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)) + if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)) { ret = -EFAULT; - else if (curval != uval) - ret = -EINVAL; + } else if (curval != uval) { + /* +* If a unconditional UNLOCK_PI operation (user space did not +* try the TID->0 transition) raced with a waiter setting the +* FUTEX_WAITERS flag between get_user() and locking the hash +* bucket lock, retry the operation. +*/ + if ((FUTEX_TID_MASK & curval) == uval) + ret = -EAGAIN; + else + ret = -EINVAL; + } if (ret) { raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); return ret; @@ -1525,8 +1535,8 @@ void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1, if (likely(&hb1->chain != &hb2->chain)) { plist_del(&q->list, &hb1->chain); hb_waiters_dec(hb1); - plist_add(&q->list, &hb2->chain); hb_waiters_inc(hb2); + plist_add(&q->list, &hb2->chain); q->lock_ptr = &hb2->lock; } get_futex_key_refs(key2); @@ -2623,6 +2633,15 @@ static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags) if (ret == -EFAULT) goto pi_faulted; /* +* A unconditional UNLOCK_PI op raced against a waiter +* setting the FUTEX_WAITERS bit. Try again. +*/ + if (ret == -EAGAIN) { + spin_unlock(&hb->lock); + put_futex_key(&key); + goto retry; + } + /* * wake_futex_pi has detected invalid state. Tell user * space. */ diff --git a/kernel/locking/qspinlock_stat.h b/kernel/locking/qspinlock_stat.h index eb2a2c9bc3fc..d734b7502001 100644 --- a/kernel/locking/qspinlock_stat.h +++ b/kernel/locking/qspinlock_stat.h @@ -136,10 +136,12 @@ static ssize_t qstat_read(struct file *file, char __user *user_buf, } if (counter == qstat_pv_hash_hops) { - u64 frac; + u64 frac = 0; - frac = 100ULL * do_div(stat, kicks); - frac = DIV_ROUND_CLOSEST_ULL(frac, kicks); + if (kicks) { + frac = 100ULL * do_div(stat, kicks); + frac = DIV_ROUND_CLOSEST_ULL(frac, kicks); + } /* * Return a X.XX decimal number
[GIT PULL] locking fixes
Linus, Please pull the latest locking-urgent-for-linus git tree from: git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git locking-urgent-for-linus # HEAD: f75d48644c56a31731d17fa693c8175328957e1d bitops: Do not default to __clear_bit() for __clear_bit_unlock() Documentation updates and a bitops ordering fix. Thanks, Ingo --> Paul E. McKenney (7): documentation: Fix control dependency and identical stores documentation: Fix memory-barriers.txt section references documentation: Remove obsolete reference to RCU-protected indexes documentation: Subsequent writes ordered by rcu_dereference() documentation: Distinguish between local and global transitivity documentation: Add alternative release-acquire outcome documentation: Transitivity is not cumulativity Peter Zijlstra (1): bitops: Do not default to __clear_bit() for __clear_bit_unlock() SeongJae Park (1): documentation: Clarify compiler store-fusion example Documentation/memory-barriers.txt | 141 +++--- include/asm-generic/bitops/lock.h | 14 ++-- 2 files changed, 123 insertions(+), 32 deletions(-) diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt index 904ee42d078e..3729cbe60e41 100644 --- a/Documentation/memory-barriers.txt +++ b/Documentation/memory-barriers.txt @@ -232,7 +232,7 @@ GUARANTEES with memory references that are not protected by READ_ONCE() and WRITE_ONCE(). Without them, the compiler is within its rights to do all sorts of "creative" transformations, which are covered in - the Compiler Barrier section. + the COMPILER BARRIER section. (*) It _must_not_ be assumed that independent loads and stores will be issued in the order given. This means that for: @@ -555,6 +555,30 @@ To deal with this, a data dependency barrier or better must be inserted This enforces the occurrence of one of the two implications, and prevents the third possibility from arising. +A data-dependency barrier must also order against dependent writes: + + CPU 1 CPU 2 + === === + { A == 1, B == 2, C = 3, P == &A, Q == &C } + B = 4; + + WRITE_ONCE(P, &B); + Q = READ_ONCE(P); + + *Q = 5; + +The data-dependency barrier must order the read into Q with the store +into *Q. This prohibits this outcome: + + (Q == B) && (B == 4) + +Please note that this pattern should be rare. After all, the whole point +of dependency ordering is to -prevent- writes to the data structure, along +with the expensive cache misses associated with those writes. This pattern +can be used to record rare error conditions and the like, and the ordering +prevents such records from being lost. + + [!] Note that this extremely counterintuitive situation arises most easily on machines with split caches, so that, for example, one cache bank processes even-numbered cache lines and the other bank processes odd-numbered cache @@ -565,21 +589,6 @@ odd-numbered bank is idle, one can see the new value of the pointer P (&B), but the old value of the variable B (2). -Another example of where data dependency barriers might be required is where a -number is read from memory and then used to calculate the index for an array -access: - - CPU 1 CPU 2 - === === - { M[0] == 1, M[1] == 2, M[3] = 3, P == 0, Q == 3 } - M[1] = 4; - - WRITE_ONCE(P, 1); - Q = READ_ONCE(P); - - D = M[Q]; - - The data dependency barrier is very important to the RCU system, for example. See rcu_assign_pointer() and rcu_dereference() in include/linux/rcupdate.h. This permits the current target of an RCU'd @@ -800,9 +809,13 @@ site: https://www.cl.cam.ac.uk/~pes20/ppcmem/index.html. use smp_rmb(), smp_wmb(), or, in the case of prior stores and later loads, smp_mb(). - (*) If both legs of the "if" statement begin with identical stores - to the same variable, a barrier() statement is required at the - beginning of each leg of the "if" statement. + (*) If both legs of the "if" statement begin with identical stores to + the same variable, then those stores must be ordered, either by + preceding both of them with smp_mb() or by using smp_store_release() + to carry out the stores. Please note that it is -not- sufficient + to use barrier() at beginning of each leg of the "if" statement, + as optimizing compilers do not necessarily respect barrier() + in this case. (*) Control dependencies require at least one run-time conditional between the prior load and the subsequent store, and this @@ -814,7 +827,7 @@ site: https://www.cl.cam
[GIT pull] locking fixes for 4.3
Linus, please pull the latest locking-urgent-for-linus git tree from: git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git locking-urgent-for-linus A single bugfix for lockdep: - Preserve the pinning counter when rebuilding the lock stack. Thanks, tglx --> Peter Zijlstra (1): locking/lockdep: Fix hlock->pin_count reset on lock stack rebuilds kernel/locking/lockdep.c | 10 +- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 8acfbf773e06..4e49cc4c9952 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -3068,7 +3068,7 @@ static int __lock_is_held(struct lockdep_map *lock); static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, int trylock, int read, int check, int hardirqs_off, struct lockdep_map *nest_lock, unsigned long ip, - int references) + int references, int pin_count) { struct task_struct *curr = current; struct lock_class *class = NULL; @@ -3157,7 +3157,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, hlock->waittime_stamp = 0; hlock->holdtime_stamp = lockstat_clock(); #endif - hlock->pin_count = 0; + hlock->pin_count = pin_count; if (check && !mark_irqflags(curr, hlock)) return 0; @@ -3343,7 +3343,7 @@ found_it: hlock_class(hlock)->subclass, hlock->trylock, hlock->read, hlock->check, hlock->hardirqs_off, hlock->nest_lock, hlock->acquire_ip, - hlock->references)) + hlock->references, hlock->pin_count)) return 0; } @@ -3433,7 +3433,7 @@ found_it: hlock_class(hlock)->subclass, hlock->trylock, hlock->read, hlock->check, hlock->hardirqs_off, hlock->nest_lock, hlock->acquire_ip, - hlock->references)) + hlock->references, hlock->pin_count)) return 0; } @@ -3583,7 +3583,7 @@ void lock_acquire(struct lockdep_map *lock, unsigned int subclass, current->lockdep_recursion = 1; trace_lock_acquire(lock, subclass, trylock, read, check, nest_lock, ip); __lock_acquire(lock, subclass, trylock, read, check, - irqs_disabled_flags(flags), nest_lock, ip, 0); + irqs_disabled_flags(flags), nest_lock, ip, 0, 0); current->lockdep_recursion = 0; raw_local_irq_restore(flags); } -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[GIT PULL] locking fixes
Linus, Please pull the latest locking-urgent-for-linus git tree from: git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git locking-urgent-for-linus # HEAD: 1975dbc276c6ab62230cf4f9df5ddc9ff0e0e473 locking/static_keys: Fix up the static keys documentation Spinlock performance regression fix, plus documentation fixes. Thanks, Ingo --> Jonathan Corbet (2): locking/static_keys: Fix a silly typo locking/static_keys: Fix up the static keys documentation Peter Zijlstra (2): locking/qspinlock/x86: Fix performance regression under unaccelerated VMs locking/qspinlock/x86: Only emit the test-and-set fallback when building guest support Documentation/static-keys.txt| 4 ++-- arch/x86/include/asm/qspinlock.h | 19 ++- include/asm-generic/qspinlock.h | 4 ++-- include/linux/jump_label.h | 10 -- kernel/locking/qspinlock.c | 2 +- 5 files changed, 23 insertions(+), 16 deletions(-) diff --git a/Documentation/static-keys.txt b/Documentation/static-keys.txt index f4cb0b2d5cd7..477927becacb 100644 --- a/Documentation/static-keys.txt +++ b/Documentation/static-keys.txt @@ -15,8 +15,8 @@ static_key_false() DEFINE_STATIC_KEY_TRUE(key); DEFINE_STATIC_KEY_FALSE(key); -static_key_likely() -statick_key_unlikely() +static_branch_likely() +static_branch_unlikely() 0) Abstract diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h index 9d51fae1cba3..eaba08076030 100644 --- a/arch/x86/include/asm/qspinlock.h +++ b/arch/x86/include/asm/qspinlock.h @@ -39,18 +39,27 @@ static inline void queued_spin_unlock(struct qspinlock *lock) } #endif -#define virt_queued_spin_lock virt_queued_spin_lock - -static inline bool virt_queued_spin_lock(struct qspinlock *lock) +#ifdef CONFIG_PARAVIRT +#define virt_spin_lock virt_spin_lock +static inline bool virt_spin_lock(struct qspinlock *lock) { if (!static_cpu_has(X86_FEATURE_HYPERVISOR)) return false; - while (atomic_cmpxchg(&lock->val, 0, _Q_LOCKED_VAL) != 0) - cpu_relax(); + /* +* On hypervisors without PARAVIRT_SPINLOCKS support we fall +* back to a Test-and-Set spinlock, because fair locks have +* horrible lock 'holder' preemption issues. +*/ + + do { + while (atomic_read(&lock->val) != 0) + cpu_relax(); + } while (atomic_cmpxchg(&lock->val, 0, _Q_LOCKED_VAL) != 0); return true; } +#endif /* CONFIG_PARAVIRT */ #include diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h index 83bfb87f5bf1..e2aadbc7151f 100644 --- a/include/asm-generic/qspinlock.h +++ b/include/asm-generic/qspinlock.h @@ -111,8 +111,8 @@ static inline void queued_spin_unlock_wait(struct qspinlock *lock) cpu_relax(); } -#ifndef virt_queued_spin_lock -static __always_inline bool virt_queued_spin_lock(struct qspinlock *lock) +#ifndef virt_spin_lock +static __always_inline bool virt_spin_lock(struct qspinlock *lock) { return false; } diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index 7f653e8f6690..f1094238ab2a 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -21,8 +21,8 @@ * * DEFINE_STATIC_KEY_TRUE(key); * DEFINE_STATIC_KEY_FALSE(key); - * static_key_likely() - * statick_key_unlikely() + * static_branch_likely() + * static_branch_unlikely() * * Jump labels provide an interface to generate dynamic branches using * self-modifying code. Assuming toolchain and architecture support, if we @@ -45,12 +45,10 @@ * statement, setting the key to true requires us to patch in a jump * to the out-of-line of true branch. * - * In addtion to static_branch_{enable,disable}, we can also reference count + * In addition to static_branch_{enable,disable}, we can also reference count * the key or branch direction via static_branch_{inc,dec}. Thus, * static_branch_inc() can be thought of as a 'make more true' and - * static_branch_dec() as a 'make more false'. The inc()/dec() - * interface is meant to be used exclusively from the inc()/dec() for a given - * key. + * static_branch_dec() as a 'make more false'. * * Since this relies on modifying code, the branch modifying functions * must be considered absolute slow paths (machine wide synchronization etc.). diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index 337c8818541d..87e9ce6a63c5 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -289,7 +289,7 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) if (pv_enabled()) goto queue; - if (virt_queued_spin_lock(lock)) + if (virt_spin_lock(lock)) return; /* -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info
[GIT PULL] locking fixes left over from v4.2
Linus, Please pull the latest locking-urgent-for-linus git tree from: git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git locking-urgent-for-linus # HEAD: 33fef662d20a8a98bafa6b2430b845def30f616a tools/liblockdep: Use the rbtree header provided by common tools headers Three liblockdep fixes left over from the v4.2 cycle. Thanks, Ingo --> Sasha Levin (3): tools: Restore export.h tools/liblockdep: Correct macro for WARN tools/liblockdep: Use the rbtree header provided by common tools headers tools/include/linux/export.h | 10 ++ tools/lib/lockdep/preload.c | 2 +- tools/lib/lockdep/uinclude/linux/kernel.h | 2 +- tools/lib/lockdep/uinclude/linux/rbtree.h | 1 - 4 files changed, 12 insertions(+), 3 deletions(-) create mode 100644 tools/include/linux/export.h delete mode 100644 tools/lib/lockdep/uinclude/linux/rbtree.h diff --git a/tools/include/linux/export.h b/tools/include/linux/export.h new file mode 100644 index ..d07e586b9ba0 --- /dev/null +++ b/tools/include/linux/export.h @@ -0,0 +1,10 @@ +#ifndef _TOOLS_LINUX_EXPORT_H_ +#define _TOOLS_LINUX_EXPORT_H_ + +#define EXPORT_SYMBOL(sym) +#define EXPORT_SYMBOL_GPL(sym) +#define EXPORT_SYMBOL_GPL_FUTURE(sym) +#define EXPORT_UNUSED_SYMBOL(sym) +#define EXPORT_UNUSED_SYMBOL_GPL(sym) + +#endif diff --git a/tools/lib/lockdep/preload.c b/tools/lib/lockdep/preload.c index 0b0112c80f22..21cdf869a01b 100644 --- a/tools/lib/lockdep/preload.c +++ b/tools/lib/lockdep/preload.c @@ -5,7 +5,7 @@ #include #include #include "include/liblockdep/mutex.h" -#include "../../../include/linux/rbtree.h" +#include "../../include/linux/rbtree.h" /** * struct lock_lookup - liblockdep's view of a single unique lock diff --git a/tools/lib/lockdep/uinclude/linux/kernel.h b/tools/lib/lockdep/uinclude/linux/kernel.h index cd2cc59a5da7..276c7a8b2ed1 100644 --- a/tools/lib/lockdep/uinclude/linux/kernel.h +++ b/tools/lib/lockdep/uinclude/linux/kernel.h @@ -23,7 +23,7 @@ #define WARN_ON(x) (x) #define WARN_ON_ONCE(x) (x) #define likely(x) (x) -#define WARN(x, y, z) (x) +#define WARN(x, y...) (x) #define uninitialized_var(x) x #define __init #define noinline diff --git a/tools/lib/lockdep/uinclude/linux/rbtree.h b/tools/lib/lockdep/uinclude/linux/rbtree.h deleted file mode 100644 index 965901db4862.. --- a/tools/lib/lockdep/uinclude/linux/rbtree.h +++ /dev/null @@ -1 +0,0 @@ -#include "../../../include/linux/rbtree.h" -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[GIT PULL] locking fixes
Linus, Please pull the latest locking-urgent-for-linus git tree from: git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git locking-urgent-for-linus # HEAD: 8053871d0f7f67c7efb7f226ef031f78877d6625 smp: Fix smp_call_function_single_async() locking Two fixes: your smp-call fix and a lockdep fix. Thanks, Ingo --> Linus Torvalds (1): smp: Fix smp_call_function_single_async() locking Peter Zijlstra (1): lockdep: Make print_lock() robust against concurrent release kernel/locking/lockdep.c | 16 +- kernel/smp.c | 78 +--- 2 files changed, 62 insertions(+), 32 deletions(-) diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index ba77ab5f64dd..a0831e1b99f4 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -551,7 +551,21 @@ static void print_lockdep_cache(struct lockdep_map *lock) static void print_lock(struct held_lock *hlock) { - print_lock_name(hlock_class(hlock)); + /* +* We can be called locklessly through debug_show_all_locks() so be +* extra careful, the hlock might have been released and cleared. +*/ + unsigned int class_idx = hlock->class_idx; + + /* Don't re-read hlock->class_idx, can't use READ_ONCE() on bitfields: */ + barrier(); + + if (!class_idx || (class_idx - 1) >= MAX_LOCKDEP_KEYS) { + printk("\n"); + return; + } + + print_lock_name(lock_classes + class_idx - 1); printk(", at: "); print_ip_sym(hlock->acquire_ip); } diff --git a/kernel/smp.c b/kernel/smp.c index f38a1e692259..2aaac2c47683 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -19,7 +19,7 @@ enum { CSD_FLAG_LOCK = 0x01, - CSD_FLAG_WAIT = 0x02, + CSD_FLAG_SYNCHRONOUS= 0x02, }; struct call_function_data { @@ -107,7 +107,7 @@ void __init call_function_init(void) */ static void csd_lock_wait(struct call_single_data *csd) { - while (csd->flags & CSD_FLAG_LOCK) + while (smp_load_acquire(&csd->flags) & CSD_FLAG_LOCK) cpu_relax(); } @@ -121,19 +121,17 @@ static void csd_lock(struct call_single_data *csd) * to ->flags with any subsequent assignments to other * fields of the specified call_single_data structure: */ - smp_mb(); + smp_wmb(); } static void csd_unlock(struct call_single_data *csd) { - WARN_ON((csd->flags & CSD_FLAG_WAIT) && !(csd->flags & CSD_FLAG_LOCK)); + WARN_ON(!(csd->flags & CSD_FLAG_LOCK)); /* * ensure we're all done before releasing data: */ - smp_mb(); - - csd->flags &= ~CSD_FLAG_LOCK; + smp_store_release(&csd->flags, 0); } static DEFINE_PER_CPU_SHARED_ALIGNED(struct call_single_data, csd_data); @@ -144,13 +142,16 @@ static DEFINE_PER_CPU_SHARED_ALIGNED(struct call_single_data, csd_data); * ->func, ->info, and ->flags set. */ static int generic_exec_single(int cpu, struct call_single_data *csd, - smp_call_func_t func, void *info, int wait) + smp_call_func_t func, void *info) { - struct call_single_data csd_stack = { .flags = 0 }; - unsigned long flags; - - if (cpu == smp_processor_id()) { + unsigned long flags; + + /* +* We can unlock early even for the synchronous on-stack case, +* since we're doing this from the same CPU.. +*/ + csd_unlock(csd); local_irq_save(flags); func(info); local_irq_restore(flags); @@ -161,21 +162,9 @@ static int generic_exec_single(int cpu, struct call_single_data *csd, if ((unsigned)cpu >= nr_cpu_ids || !cpu_online(cpu)) return -ENXIO; - - if (!csd) { - csd = &csd_stack; - if (!wait) - csd = this_cpu_ptr(&csd_data); - } - - csd_lock(csd); - csd->func = func; csd->info = info; - if (wait) - csd->flags |= CSD_FLAG_WAIT; - /* * The list addition should be visible before sending the IPI * handler locks the list to pull the entry off it because of @@ -190,9 +179,6 @@ static int generic_exec_single(int cpu, struct call_single_data *csd, if (llist_add(&csd->llist, &per_cpu(call_single_queue, cpu))) arch_send_call_function_single_ipi(cpu); - if (wait) - csd_lock_wait(csd); - return 0; } @@ -250,8 +236,17 @@ static void flush_smp_call_function_queue(bool warn_cpu_offline) } llist_for_each_entry_safe(csd, csd_next, entry, llist) { - csd->func(csd->info); - csd_unlock(csd); + smp_call_func_t func = csd->func; + void *info = csd->info; + +
Re: [GIT PULL] locking fixes
Am 21.02.2015 um 02:51 schrieb Linus Torvalds: > So here's my try at fixing READ_ONCE() so that it is happy with 'const' > sources. > > It is entirely untested. Comments/testing? > > Christian, I guess I could just have forced a cast instead of the > union. I'd like you to take a look at this, because right now it's > holding up me pulling from Ingo. Sorry for the too late for rc1 answer, but I was traveling the last 4 days. Hmm, some autocasting feels better, but I could not come up with a proper solution that works for all cases (e.g. I tried "__auto_type __val = x" or "typeof(x * 0)" to make this lvalue and rvalue, but all variants failed in one or the other way). Unless I can come up with a better solution your union patch is probably the best way to go and rc1 seems to work. > > And Ingo, I think you need to add some kind of test for "horrible new > warnings". I think your pull request *worked*, but the tens of lines > of new warnings it generates is unacceptable, and will just cause me > to undo the pull if I notice in time (like I did this time). I was getting several complaints from the linux-next buildbots about new sparse warnings, compile warning and so on when doing this rework, e.g. commit c5b19946eb76c675 ("kernel: Fix sparse warning for ACCESS_ONCE") fixes two of those warnings. So I am somewhat surprised that I never saw this as I am also following the KVM list. turns out that arch/x86/kernel/kvm.c does not CC the kvm list in get_maintainers.pl. Maybe I should push something like that to Paolo/Marcelo. --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5574,6 +5574,7 @@ S:Supported F: Documentation/*/kvm*.txt F: Documentation/virtual/kvm/ F: arch/*/kvm/ +F: arch/x86/kernel/kvm.c F: arch/*/include/asm/kvm* F: include/linux/kvm* F: include/uapi/linux/kvm* Christian > > Linus > > On Fri, Feb 20, 2015 at 4:03 PM, Linus Torvalds > wrote: >> How does this work for you at all? >> >> On Fri, Feb 20, 2015 at 5:37 AM, Ingo Molnar wrote: >>> diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c >>> index 94f643484300..e354cc6446ab 100644 >>> --- a/arch/x86/kernel/kvm.c >>> +++ b/arch/x86/kernel/kvm.c >>> @@ -803,8 +808,8 @@ static void kvm_unlock_kick(struct arch_spinlock *lock, >>> __ticket_t ticket) >>> add_stats(RELEASED_SLOW, 1); >>> for_each_cpu(cpu, &waiting_cpus) { >>> const struct kvm_lock_waiting *w = &per_cpu(klock_waiting, >>> cpu); >>> - if (ACCESS_ONCE(w->lock) == lock && >>> - ACCESS_ONCE(w->want) == ticket) { >>> + if (READ_ONCE(w->lock) == lock && >>> + READ_ONCE(w->want) == ticket) { >>> add_stats(RELEASED_SLOW_KICKED, 1); >>> kvm_kick_cpu(cpu); >>> break; >> >> I get horrible compile warnings from this, because of how 'w' is a >> pointer to a 'const' structure, which then causes things like >> >> include/linux/compiler.h:262:39: warning: passing argument 1 of >> ‘__read_once_size’ discards ‘const’ qualifier from pointer target type >> ({ typeof(x) __val; __read_once_size(&x, &__val, sizeof(__val)); >> __val; }) >> >> which is fairly hard to avoid (looks like it might need a union) >> >>Linus -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [GIT PULL] locking fixes
* Ingo Molnar wrote: > Oh, crap, I just triggered the warnings with: > > x86 defconfig + 'make kvmconfig' + CONFIG_PARAVIRT_SPINLOCKS=y. > > My bad, mea culpa ... > > [...] > But you do allmodconfig tests and look at warnings so > there it probably showed up straight away? Yeah, so an allmodconfig 'make bzImage' build shows these - and only these warnings so they absolutely stand out. 'make modules' is noisier but still reviewable. I'll try to code up some robust automation to fix this problem on my side. Sorry about this! Thanks, Ingo -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [GIT PULL] locking fixes
* Ingo Molnar wrote: > ( I've attached the build log that I saw before sending you > the pull request. ) > > The kbuild test robot which tests for warnings didn't > report one to me either. I tested a few other configs as > well: > > x86 defconfig (PARAVIRT=n):0 warnings > 'make kvmconfig' (PARAVIRT=y): 0 warnings > x86 bootable config (attached):0 warnings Oh, crap, I just triggered the warnings with: x86 defconfig + 'make kvmconfig' + CONFIG_PARAVIRT_SPINLOCKS=y. My bad, mea culpa ... I just tested your patch.diff and it fixed the warnings: Tested-by: Ingo Molnar Extra sad: I _did_ build and boot test paravirt spinlocks as well, but only within a test environment that does not show warnings (in general there's too many of them, even defconfig is rarely warnings free, although it is with the very latest kernel) and my burn-in test before the pull request didn't have paravirt spinlocks turned on. But you do allmodconfig tests and look at warnings so there it probably showed up straight away? Thanks, Ingo -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [GIT PULL] locking fixes
So here's my try at fixing READ_ONCE() so that it is happy with 'const' sources. It is entirely untested. Comments/testing? Christian, I guess I could just have forced a cast instead of the union. I'd like you to take a look at this, because right now it's holding up me pulling from Ingo. And Ingo, I think you need to add some kind of test for "horrible new warnings". I think your pull request *worked*, but the tens of lines of new warnings it generates is unacceptable, and will just cause me to undo the pull if I notice in time (like I did this time). Linus On Fri, Feb 20, 2015 at 4:03 PM, Linus Torvalds wrote: > How does this work for you at all? > > On Fri, Feb 20, 2015 at 5:37 AM, Ingo Molnar wrote: >> diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c >> index 94f643484300..e354cc6446ab 100644 >> --- a/arch/x86/kernel/kvm.c >> +++ b/arch/x86/kernel/kvm.c >> @@ -803,8 +808,8 @@ static void kvm_unlock_kick(struct arch_spinlock *lock, >> __ticket_t ticket) >> add_stats(RELEASED_SLOW, 1); >> for_each_cpu(cpu, &waiting_cpus) { >> const struct kvm_lock_waiting *w = &per_cpu(klock_waiting, >> cpu); >> - if (ACCESS_ONCE(w->lock) == lock && >> - ACCESS_ONCE(w->want) == ticket) { >> + if (READ_ONCE(w->lock) == lock && >> + READ_ONCE(w->want) == ticket) { >> add_stats(RELEASED_SLOW_KICKED, 1); >> kvm_kick_cpu(cpu); >> break; > > I get horrible compile warnings from this, because of how 'w' is a > pointer to a 'const' structure, which then causes things like > > include/linux/compiler.h:262:39: warning: passing argument 1 of > ‘__read_once_size’ discards ‘const’ qualifier from pointer target type > ({ typeof(x) __val; __read_once_size(&x, &__val, sizeof(__val)); __val; > }) > > which is fairly hard to avoid (looks like it might need a union) > >Linus commit 1cb20581b0a32673fa3d056829ca89db4fa0de09 Author: Linus Torvalds Date: Fri Feb 20 15:46:31 2015 -0800 kernel: make READ_ONCE() valid on const pointers There is certainly nothing wrong with using READ_ONCE() on a const pointer, but the helper function __read_once_size() would cause warnings because it would drop the 'const' qualifier. Signed-off-by: Linus Torvalds --- include/linux/compiler.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/compiler.h b/include/linux/compiler.h index d1ec10a940ff..312cf710e26a 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -202,7 +202,7 @@ static __always_inline void data_access_exceeds_word_size(void) { } -static __always_inline void __read_once_size(volatile void *p, void *res, int size) +static __always_inline void __read_once_size(const volatile void *p, void *res, int size) { switch (size) { case 1: *(__u8 *)res = *(volatile __u8 *)p; break; @@ -259,7 +259,7 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s */ #define READ_ONCE(x) \ - ({ typeof(x) __val; __read_once_size(&x, &__val, sizeof(__val)); __val; }) + ({ union { typeof(x) __val; char __c[1]; } __u; __read_once_size(&(x), __u.__c, sizeof((x))); __u.__val; }) #define WRITE_ONCE(x, val) \ ({ typeof(x) __val; __val = val; __write_once_size(&x, &__val, sizeof(__val)); __val; })
Re: [GIT PULL] locking fixes
How does this work for you at all? On Fri, Feb 20, 2015 at 5:37 AM, Ingo Molnar wrote: > diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c > index 94f643484300..e354cc6446ab 100644 > --- a/arch/x86/kernel/kvm.c > +++ b/arch/x86/kernel/kvm.c > @@ -803,8 +808,8 @@ static void kvm_unlock_kick(struct arch_spinlock *lock, > __ticket_t ticket) > add_stats(RELEASED_SLOW, 1); > for_each_cpu(cpu, &waiting_cpus) { > const struct kvm_lock_waiting *w = &per_cpu(klock_waiting, > cpu); > - if (ACCESS_ONCE(w->lock) == lock && > - ACCESS_ONCE(w->want) == ticket) { > + if (READ_ONCE(w->lock) == lock && > + READ_ONCE(w->want) == ticket) { > add_stats(RELEASED_SLOW_KICKED, 1); > kvm_kick_cpu(cpu); > break; I get horrible compile warnings from this, because of how 'w' is a pointer to a 'const' structure, which then causes things like include/linux/compiler.h:262:39: warning: passing argument 1 of ‘__read_once_size’ discards ‘const’ qualifier from pointer target type ({ typeof(x) __val; __read_once_size(&x, &__val, sizeof(__val)); __val; }) which is fairly hard to avoid (looks like it might need a union) Linus -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[GIT PULL] locking fixes
Linus, Please pull the latest locking-urgent-for-linus git tree from: git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git locking-urgent-for-linus # HEAD: d6abfdb2022368d8c6c4be3f11a06656601a6cc2 x86/spinlocks/paravirt: Fix memory corruption on unlock Two fixes: the paravirt spin_unlock() corruption/crash fix, and an rtmutex NULL dereference crash fix. Thanks, Ingo --> Raghavendra K T (1): x86/spinlocks/paravirt: Fix memory corruption on unlock Sebastian Andrzej Siewior (1): locking/rtmutex: Avoid a NULL pointer dereference on deadlock arch/x86/include/asm/spinlock.h | 94 - arch/x86/kernel/kvm.c | 13 -- arch/x86/xen/spinlock.c | 13 -- kernel/locking/rtmutex.c| 3 +- 4 files changed, 66 insertions(+), 57 deletions(-) diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h index 625660f8a2fc..cf87de3fc390 100644 --- a/arch/x86/include/asm/spinlock.h +++ b/arch/x86/include/asm/spinlock.h @@ -46,7 +46,7 @@ static __always_inline bool static_key_false(struct static_key *key); static inline void __ticket_enter_slowpath(arch_spinlock_t *lock) { - set_bit(0, (volatile unsigned long *)&lock->tickets.tail); + set_bit(0, (volatile unsigned long *)&lock->tickets.head); } #else /* !CONFIG_PARAVIRT_SPINLOCKS */ @@ -60,10 +60,30 @@ static inline void __ticket_unlock_kick(arch_spinlock_t *lock, } #endif /* CONFIG_PARAVIRT_SPINLOCKS */ +static inline int __tickets_equal(__ticket_t one, __ticket_t two) +{ + return !((one ^ two) & ~TICKET_SLOWPATH_FLAG); +} + +static inline void __ticket_check_and_clear_slowpath(arch_spinlock_t *lock, + __ticket_t head) +{ + if (head & TICKET_SLOWPATH_FLAG) { + arch_spinlock_t old, new; + + old.tickets.head = head; + new.tickets.head = head & ~TICKET_SLOWPATH_FLAG; + old.tickets.tail = new.tickets.head + TICKET_LOCK_INC; + new.tickets.tail = old.tickets.tail; + + /* try to clear slowpath flag when there are no contenders */ + cmpxchg(&lock->head_tail, old.head_tail, new.head_tail); + } +} static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock) { - return lock.tickets.head == lock.tickets.tail; + return __tickets_equal(lock.tickets.head, lock.tickets.tail); } /* @@ -87,18 +107,21 @@ static __always_inline void arch_spin_lock(arch_spinlock_t *lock) if (likely(inc.head == inc.tail)) goto out; - inc.tail &= ~TICKET_SLOWPATH_FLAG; for (;;) { unsigned count = SPIN_THRESHOLD; do { - if (READ_ONCE(lock->tickets.head) == inc.tail) - goto out; + inc.head = READ_ONCE(lock->tickets.head); + if (__tickets_equal(inc.head, inc.tail)) + goto clear_slowpath; cpu_relax(); } while (--count); __ticket_lock_spinning(lock, inc.tail); } -out: barrier(); /* make sure nothing creeps before the lock is taken */ +clear_slowpath: + __ticket_check_and_clear_slowpath(lock, inc.head); +out: + barrier(); /* make sure nothing creeps before the lock is taken */ } static __always_inline int arch_spin_trylock(arch_spinlock_t *lock) @@ -106,56 +129,30 @@ static __always_inline int arch_spin_trylock(arch_spinlock_t *lock) arch_spinlock_t old, new; old.tickets = READ_ONCE(lock->tickets); - if (old.tickets.head != (old.tickets.tail & ~TICKET_SLOWPATH_FLAG)) + if (!__tickets_equal(old.tickets.head, old.tickets.tail)) return 0; new.head_tail = old.head_tail + (TICKET_LOCK_INC << TICKET_SHIFT); + new.head_tail &= ~TICKET_SLOWPATH_FLAG; /* cmpxchg is a full barrier, so nothing can move before it */ return cmpxchg(&lock->head_tail, old.head_tail, new.head_tail) == old.head_tail; } -static inline void __ticket_unlock_slowpath(arch_spinlock_t *lock, - arch_spinlock_t old) -{ - arch_spinlock_t new; - - BUILD_BUG_ON(((__ticket_t)NR_CPUS) != NR_CPUS); - - /* Perform the unlock on the "before" copy */ - old.tickets.head += TICKET_LOCK_INC; - - /* Clear the slowpath flag */ - new.head_tail = old.head_tail & ~(TICKET_SLOWPATH_FLAG << TICKET_SHIFT); - - /* -* If the lock is uncontended, clear the flag - use cmpxchg in -* case it changes behind our back though. -*/ - if (new.tickets.head != new.tickets.tail || - cmpxchg(&lock->head_tail, old.head_tail, - new.head_tail) != old.head_tail) { - /* -
[GIT PULL] locking fixes
Linus, Please pull the latest locking-urgent-for-linus git tree from: git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git locking-urgent-for-linus # HEAD: 8e654dd68f95ac5f990ed21b2b31967dacbe563f Merge branch 'core/urgent' into locking/urgent, to collect all pending locking fixes A liblockdep fix and a mutex_unlock() mutex-debugging fix. Thanks, Ingo --> Chris Wilson (1): mutex: Always clear owner field upon mutex_unlock() Kirill Smelkov (1): tools/liblockdep: Fix debug_check thinko in mutex destroy kernel/locking/mutex-debug.c | 2 +- tools/lib/lockdep/preload.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/locking/mutex-debug.c b/kernel/locking/mutex-debug.c index 5cf6731b98e9..3ef3736002d8 100644 --- a/kernel/locking/mutex-debug.c +++ b/kernel/locking/mutex-debug.c @@ -80,13 +80,13 @@ void debug_mutex_unlock(struct mutex *lock) DEBUG_LOCKS_WARN_ON(lock->owner != current); DEBUG_LOCKS_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next); - mutex_clear_owner(lock); } /* * __mutex_slowpath_needs_to_unlock() is explicitly 0 for debug * mutexes so that we can do it here after we've verified state. */ + mutex_clear_owner(lock); atomic_set(&lock->count, 1); } diff --git a/tools/lib/lockdep/preload.c b/tools/lib/lockdep/preload.c index 6f803609e498..0b0112c80f22 100644 --- a/tools/lib/lockdep/preload.c +++ b/tools/lib/lockdep/preload.c @@ -317,7 +317,7 @@ int pthread_mutex_destroy(pthread_mutex_t *mutex) * * TODO: Hook into free() and add that check there as well. */ - debug_check_no_locks_freed(mutex, mutex + sizeof(*mutex)); + debug_check_no_locks_freed(mutex, sizeof(*mutex)); __del_lock(__get_lock(mutex)); return ll_pthread_mutex_destroy(mutex); } @@ -341,7 +341,7 @@ int pthread_rwlock_destroy(pthread_rwlock_t *rwlock) { try_init_preload(); - debug_check_no_locks_freed(rwlock, rwlock + sizeof(*rwlock)); + debug_check_no_locks_freed(rwlock, sizeof(*rwlock)); __del_lock(__get_lock(rwlock)); return ll_pthread_rwlock_destroy(rwlock); } -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[GIT PULL] locking fixes
Linus, Please pull the latest locking-urgent-for-linus git tree from: git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git locking-urgent-for-linus # HEAD: 30a6b8031fe14031ab27c1fa3483cb9780e7f63c futex: Fix a race condition between REQUEUE_PI and task death This tree contains two futex fixes: one fixes a race condition, the other clarifies shared/private futex comments. Thanks, Ingo --> Brian Silverman (1): futex: Fix a race condition between REQUEUE_PI and task death Davidlohr Bueso (1): futex: Mention key referencing differences between shared and private futexes kernel/futex.c | 36 +--- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/kernel/futex.c b/kernel/futex.c index f3a3a071283c..63678b573d61 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -143,9 +143,8 @@ * * Where (A) orders the waiters increment and the futex value read through * atomic operations (see hb_waiters_inc) and where (B) orders the write - * to futex and the waiters read -- this is done by the barriers in - * get_futex_key_refs(), through either ihold or atomic_inc, depending on the - * futex type. + * to futex and the waiters read -- this is done by the barriers for both + * shared and private futexes in get_futex_key_refs(). * * This yields the following case (where X:=waiters, Y:=futex): * @@ -344,13 +343,20 @@ static void get_futex_key_refs(union futex_key *key) futex_get_mm(key); /* implies MB (B) */ break; default: + /* +* Private futexes do not hold reference on an inode or +* mm, therefore the only purpose of calling get_futex_key_refs +* is because we need the barrier for the lockless waiter check. +*/ smp_mb(); /* explicit MB (B) */ } } /* * Drop a reference to the resource addressed by a key. - * The hash bucket spinlock must not be held. + * The hash bucket spinlock must not be held. This is + * a no-op for private futexes, see comment in the get + * counterpart. */ static void drop_futex_key_refs(union futex_key *key) { @@ -641,8 +647,14 @@ static struct futex_pi_state * alloc_pi_state(void) return pi_state; } +/* + * Must be called with the hb lock held. + */ static void free_pi_state(struct futex_pi_state *pi_state) { + if (!pi_state) + return; + if (!atomic_dec_and_test(&pi_state->refcount)) return; @@ -1521,15 +1533,6 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags, } retry: - if (pi_state != NULL) { - /* -* We will have to lookup the pi_state again, so free this one -* to keep the accounting correct. -*/ - free_pi_state(pi_state); - pi_state = NULL; - } - ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, VERIFY_READ); if (unlikely(ret != 0)) goto out; @@ -1619,6 +1622,8 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags, case 0: break; case -EFAULT: + free_pi_state(pi_state); + pi_state = NULL; double_unlock_hb(hb1, hb2); hb_waiters_dec(hb2); put_futex_key(&key2); @@ -1634,6 +1639,8 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags, * exit to complete. * - The user space value changed. */ + free_pi_state(pi_state); + pi_state = NULL; double_unlock_hb(hb1, hb2); hb_waiters_dec(hb2); put_futex_key(&key2); @@ -1710,6 +1717,7 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags, } out_unlock: + free_pi_state(pi_state); double_unlock_hb(hb1, hb2); hb_waiters_dec(hb2); @@ -1727,8 +1735,6 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags, out_put_key1: put_futex_key(&key1); out: - if (pi_state != NULL) - free_pi_state(pi_state); return ret ? ret : task_count; } -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[GIT pull] locking fixes for 3.16
Linus, please pull the latest locking-urgent-for-linus.patch git tree from: git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git locking-urgent-for-linus.patch Another three patches to make the rtmutex code more robust. That's the last urgent fallout from the big futex/rtmutex investigation. Thanks, tglx --> Thomas Gleixner (3): rtmutex: Handle deadlock detection smarter rtmutex: Detect changes in the pi lock chain rtmutex: Plug slow unlock race kernel/locking/rtmutex-debug.h | 5 + kernel/locking/rtmutex.c | 243 +++-- kernel/locking/rtmutex.h | 5 + 3 files changed, 218 insertions(+), 35 deletions(-) diff --git a/kernel/locking/rtmutex-debug.h b/kernel/locking/rtmutex-debug.h index 14193d5..ab29b6a 100644 --- a/kernel/locking/rtmutex-debug.h +++ b/kernel/locking/rtmutex-debug.h @@ -31,3 +31,8 @@ static inline int debug_rt_mutex_detect_deadlock(struct rt_mutex_waiter *waiter, { return (waiter != NULL); } + +static inline void rt_mutex_print_deadlock(struct rt_mutex_waiter *w) +{ + debug_rt_mutex_print_deadlock(w); +} diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index a620d4d..fc60594 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -83,6 +83,47 @@ static inline void mark_rt_mutex_waiters(struct rt_mutex *lock) owner = *p; } while (cmpxchg(p, owner, owner | RT_MUTEX_HAS_WAITERS) != owner); } + +/* + * Safe fastpath aware unlock: + * 1) Clear the waiters bit + * 2) Drop lock->wait_lock + * 3) Try to unlock the lock with cmpxchg + */ +static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock) + __releases(lock->wait_lock) +{ + struct task_struct *owner = rt_mutex_owner(lock); + + clear_rt_mutex_waiters(lock); + raw_spin_unlock(&lock->wait_lock); + /* +* If a new waiter comes in between the unlock and the cmpxchg +* we have two situations: +* +* unlock(wait_lock); +* lock(wait_lock); +* cmpxchg(p, owner, 0) == owner +* mark_rt_mutex_waiters(lock); +* acquire(lock); +* or: +* +* unlock(wait_lock); +* lock(wait_lock); +* mark_rt_mutex_waiters(lock); +* +* cmpxchg(p, owner, 0) != owner +* enqueue_waiter(); +* unlock(wait_lock); +* lock(wait_lock); +* wake waiter(); +* unlock(wait_lock); +* lock(wait_lock); +* acquire(lock); +*/ + return rt_mutex_cmpxchg(lock, owner, NULL); +} + #else # define rt_mutex_cmpxchg(l,c,n) (0) static inline void mark_rt_mutex_waiters(struct rt_mutex *lock) @@ -90,6 +131,17 @@ static inline void mark_rt_mutex_waiters(struct rt_mutex *lock) lock->owner = (struct task_struct *) ((unsigned long)lock->owner | RT_MUTEX_HAS_WAITERS); } + +/* + * Simple slow path only version: lock->owner is protected by lock->wait_lock. + */ +static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock) + __releases(lock->wait_lock) +{ + lock->owner = NULL; + raw_spin_unlock(&lock->wait_lock); + return true; +} #endif static inline int @@ -260,27 +312,36 @@ static void rt_mutex_adjust_prio(struct task_struct *task) */ int max_lock_depth = 1024; +static inline struct rt_mutex *task_blocked_on_lock(struct task_struct *p) +{ + return p->pi_blocked_on ? p->pi_blocked_on->lock : NULL; +} + /* * Adjust the priority chain. Also used for deadlock detection. * Decreases task's usage by one - may thus free the task. * - * @task: the task owning the mutex (owner) for which a chain walk is probably - * needed + * @task: the task owning the mutex (owner) for which a chain walk is + * probably needed * @deadlock_detect: do we have to carry out deadlock detection? - * @orig_lock: the mutex (can be NULL if we are walking the chain to recheck - *things for a task that has just got its priority adjusted, and - *is waiting on a mutex) + * @orig_lock: the mutex (can be NULL if we are walking the chain to recheck + * things for a task that has just got its priority adjusted, and + * is waiting on a mutex) + * @next_lock: the mutex on which the owner of @orig_lock was blocked before + * we dropped its pi_lock. Is never dereferenced, only used for + * comparison to detect lock chain changes. * @orig_waiter: rt_mutex_waiter struct for the task that has just donated - * its priority to the mute
[GIT PULL] locking fixes
Linus, Please pull the latest core-urgent-for-linus git tree from: git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git core-urgent-for-linus # HEAD: 65063ced73077b9add610b413d87bc6a51e40bab Merge branch 'liblockdep-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/sashal/linux into core/urgent liblockdep fixes and mutex debugging fixes. Thanks, Ingo --> Peter Zijlstra (1): locking/mutex: Fix debug_mutexes Sasha Levin (2): tools/liblockdep: Ignore asmlinkage and visible tools/liblockdep: Add proper versioning to the shared obj kernel/locking/mutex-debug.c | 19 +-- tools/lib/lockdep/Makefile | 15 --- tools/lib/lockdep/uinclude/linux/lockdep.h | 3 +++ 3 files changed, 16 insertions(+), 21 deletions(-) diff --git a/kernel/locking/mutex-debug.c b/kernel/locking/mutex-debug.c index e1191c9..5cf6731 100644 --- a/kernel/locking/mutex-debug.c +++ b/kernel/locking/mutex-debug.c @@ -71,18 +71,17 @@ void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter, void debug_mutex_unlock(struct mutex *lock) { - if (unlikely(!debug_locks)) - return; + if (likely(debug_locks)) { + DEBUG_LOCKS_WARN_ON(lock->magic != lock); - DEBUG_LOCKS_WARN_ON(lock->magic != lock); + if (!lock->owner) + DEBUG_LOCKS_WARN_ON(!lock->owner); + else + DEBUG_LOCKS_WARN_ON(lock->owner != current); - if (!lock->owner) - DEBUG_LOCKS_WARN_ON(!lock->owner); - else - DEBUG_LOCKS_WARN_ON(lock->owner != current); - - DEBUG_LOCKS_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next); - mutex_clear_owner(lock); + DEBUG_LOCKS_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next); + mutex_clear_owner(lock); + } /* * __mutex_slowpath_needs_to_unlock() is explicitly 0 for debug diff --git a/tools/lib/lockdep/Makefile b/tools/lib/lockdep/Makefile index 07b0b75..cb09d3f 100644 --- a/tools/lib/lockdep/Makefile +++ b/tools/lib/lockdep/Makefile @@ -1,13 +1,8 @@ -# liblockdep version -LL_VERSION = 0 -LL_PATCHLEVEL = 0 -LL_EXTRAVERSION = 1 - # file format version FILE_VERSION = 1 MAKEFLAGS += --no-print-directory - +LIBLOCKDEP_VERSION=$(shell make -sC ../../.. kernelversion) # Makefiles suck: This macro sets a default value of $(2) for the # variable named by $(1), unless the variable has been set by @@ -98,7 +93,7 @@ export prefix libdir bindir src obj libdir_SQ = $(subst ','\'',$(libdir)) bindir_SQ = $(subst ','\'',$(bindir)) -LIB_FILE = liblockdep.a liblockdep.so +LIB_FILE = liblockdep.a liblockdep.so.$(LIBLOCKDEP_VERSION) BIN_FILE = lockdep CONFIG_INCLUDES = @@ -110,8 +105,6 @@ N = export Q VERBOSE -LIBLOCKDEP_VERSION = $(LL_VERSION).$(LL_PATCHLEVEL).$(LL_EXTRAVERSION) - INCLUDES = -I. -I/usr/local/include -I./uinclude -I./include $(CONFIG_INCLUDES) # Set compile option CFLAGS if not set elsewhere @@ -146,7 +139,7 @@ do_app_build = \ do_compile_shared_library =\ ($(print_shared_lib_compile)\ - $(CC) --shared $^ -o $@ -lpthread -ldl) + $(CC) --shared $^ -o $@ -lpthread -ldl -Wl,-soname='"$@"';$(shell ln -s $@ liblockdep.so)) do_build_static_lib = \ ($(print_static_lib_build) \ @@ -177,7 +170,7 @@ all: all_cmd all_cmd: $(CMD_TARGETS) -liblockdep.so: $(PEVENT_LIB_OBJS) +liblockdep.so.$(LIBLOCKDEP_VERSION): $(PEVENT_LIB_OBJS) $(Q)$(do_compile_shared_library) liblockdep.a: $(PEVENT_LIB_OBJS) diff --git a/tools/lib/lockdep/uinclude/linux/lockdep.h b/tools/lib/lockdep/uinclude/linux/lockdep.h index d0f5d6e..c1552c2 100644 --- a/tools/lib/lockdep/uinclude/linux/lockdep.h +++ b/tools/lib/lockdep/uinclude/linux/lockdep.h @@ -10,6 +10,9 @@ #define MAX_LOCK_DEPTH 2000UL +#define asmlinkage +#define __visible + #include "../../../include/linux/lockdep.h" struct task_struct { -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[GIT PULL] locking fixes
Linus, Please pull the latest core-urgent-for-linus git tree from: git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git core-urgent-for-linus # HEAD: 7a06c41cbec33c6dbe7eec575c61986122617408 sched_clock: Disable seqlock lockdep usage in sched_clock() Two fixes from lockdep coverage of seqlocks, which fix deadlocks on lockdep-enabled ARM systems. Thanks, Ingo --> John Stultz (2): seqlock: Use raw_ prefix instead of _no_lockdep sched_clock: Disable seqlock lockdep usage in sched_clock() arch/x86/vdso/vclock_gettime.c | 8 include/linux/seqlock.h| 27 +++ kernel/time/sched_clock.c | 6 +++--- 3 files changed, 26 insertions(+), 15 deletions(-) diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c index 2ada505..eb5d7a5 100644 --- a/arch/x86/vdso/vclock_gettime.c +++ b/arch/x86/vdso/vclock_gettime.c @@ -178,7 +178,7 @@ notrace static int __always_inline do_realtime(struct timespec *ts) ts->tv_nsec = 0; do { - seq = read_seqcount_begin_no_lockdep(>od->seq); + seq = raw_read_seqcount_begin(>od->seq); mode = gtod->clock.vclock_mode; ts->tv_sec = gtod->wall_time_sec; ns = gtod->wall_time_snsec; @@ -198,7 +198,7 @@ notrace static int do_monotonic(struct timespec *ts) ts->tv_nsec = 0; do { - seq = read_seqcount_begin_no_lockdep(>od->seq); + seq = raw_read_seqcount_begin(>od->seq); mode = gtod->clock.vclock_mode; ts->tv_sec = gtod->monotonic_time_sec; ns = gtod->monotonic_time_snsec; @@ -214,7 +214,7 @@ notrace static int do_realtime_coarse(struct timespec *ts) { unsigned long seq; do { - seq = read_seqcount_begin_no_lockdep(>od->seq); + seq = raw_read_seqcount_begin(>od->seq); ts->tv_sec = gtod->wall_time_coarse.tv_sec; ts->tv_nsec = gtod->wall_time_coarse.tv_nsec; } while (unlikely(read_seqcount_retry(>od->seq, seq))); @@ -225,7 +225,7 @@ notrace static int do_monotonic_coarse(struct timespec *ts) { unsigned long seq; do { - seq = read_seqcount_begin_no_lockdep(>od->seq); + seq = raw_read_seqcount_begin(>od->seq); ts->tv_sec = gtod->monotonic_time_coarse.tv_sec; ts->tv_nsec = gtod->monotonic_time_coarse.tv_nsec; } while (unlikely(read_seqcount_retry(>od->seq, seq))); diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index cf87a24..535f158 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -117,15 +117,15 @@ repeat: } /** - * read_seqcount_begin_no_lockdep - start seq-read critical section w/o lockdep + * raw_read_seqcount_begin - start seq-read critical section w/o lockdep * @s: pointer to seqcount_t * Returns: count to be passed to read_seqcount_retry * - * read_seqcount_begin_no_lockdep opens a read critical section of the given + * raw_read_seqcount_begin opens a read critical section of the given * seqcount, but without any lockdep checking. Validity of the critical * section is tested by checking read_seqcount_retry function. */ -static inline unsigned read_seqcount_begin_no_lockdep(const seqcount_t *s) +static inline unsigned raw_read_seqcount_begin(const seqcount_t *s) { unsigned ret = __read_seqcount_begin(s); smp_rmb(); @@ -144,7 +144,7 @@ static inline unsigned read_seqcount_begin_no_lockdep(const seqcount_t *s) static inline unsigned read_seqcount_begin(const seqcount_t *s) { seqcount_lockdep_reader_access(s); - return read_seqcount_begin_no_lockdep(s); + return raw_read_seqcount_begin(s); } /** @@ -206,14 +206,26 @@ static inline int read_seqcount_retry(const seqcount_t *s, unsigned start) } + +static inline void raw_write_seqcount_begin(seqcount_t *s) +{ + s->sequence++; + smp_wmb(); +} + +static inline void raw_write_seqcount_end(seqcount_t *s) +{ + smp_wmb(); + s->sequence++; +} + /* * Sequence counter only version assumes that callers are using their * own mutexing. */ static inline void write_seqcount_begin_nested(seqcount_t *s, int subclass) { - s->sequence++; - smp_wmb(); + raw_write_seqcount_begin(s); seqcount_acquire(&s->dep_map, subclass, 0, _RET_IP_); } @@ -225,8 +237,7 @@ static inline void write_seqcount_begin(seqcount_t *s) static inline void write_seqcount_end(seqcount_t *s) { seqcount_release(&s->dep_map, 1, _RET_IP_); - smp_wmb(); - s->sequence++; + raw_write_seqcount_end(s); } /** diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c index 68b7993..0abb364 100644 --- a/kernel/time/sched_clock.c +++ b/kernel/time/sched_clock.c @@ -74,7 +74,7 @@ unsigned long long notrace sched_clock(void)