lock->break_lock is set when a lock is contended, but cleared only in cond_resched_lock. Users of need_lockbreak (journal_commit_transaction, copy_pte_range, unmap_vmas) don't necessarily use cond_resched_lock on it.
So, if the lock has been contended at some time in the past, break_lock remains set thereafter, and the fastpath keeps dropping lock unnecessarily. Hanging the system if you make a change like I did, forever restarting a loop before making any progress. Should it be cleared when contending to lock, just the other side of the cpu_relax loop? No, that loop is preemptible, we don't want break_lock set all the while the contender has been preempted. It should be cleared when we unlock - any remaining contenders will quickly set it again. So cond_resched_lock's spin_unlock will clear it, no need for it to do that; and use need_lockbreak there too, preferring optimizer to #ifdefs. Or would you prefer the few need_lockbreak users to clear it in advance? Less overhead, more errorprone. Signed-off-by: Hugh Dickins <[EMAIL PROTECTED]> --- 2.6.11-bk7/kernel/sched.c 2005-03-11 13:33:09.000000000 +0000 +++ linux/kernel/sched.c 2005-03-11 17:46:50.000000000 +0000 @@ -3753,14 +3753,11 @@ EXPORT_SYMBOL(cond_resched); */ int cond_resched_lock(spinlock_t * lock) { -#if defined(CONFIG_SMP) && defined(CONFIG_PREEMPT) - if (lock->break_lock) { - lock->break_lock = 0; + if (need_lockbreak(lock)) { spin_unlock(lock); cpu_relax(); spin_lock(lock); } -#endif if (need_resched()) { _raw_spin_unlock(lock); preempt_enable_no_resched(); --- 2.6.11-bk7/kernel/spinlock.c 2005-03-02 07:38:52.000000000 +0000 +++ linux/kernel/spinlock.c 2005-03-11 17:46:50.000000000 +0000 @@ -163,6 +163,8 @@ void __lockfunc _write_lock(rwlock_t *lo EXPORT_SYMBOL(_write_lock); +#define _stop_breaking(lock) + #else /* CONFIG_PREEMPT: */ /* @@ -250,12 +252,19 @@ BUILD_LOCK_OPS(spin, spinlock); BUILD_LOCK_OPS(read, rwlock); BUILD_LOCK_OPS(write, rwlock); +#define _stop_breaking(lock) \ + do { \ + if ((lock)->break_lock) \ + (lock)->break_lock = 0; \ + } while (0) + #endif /* CONFIG_PREEMPT */ void __lockfunc _spin_unlock(spinlock_t *lock) { _raw_spin_unlock(lock); preempt_enable(); + _stop_breaking(lock); } EXPORT_SYMBOL(_spin_unlock); @@ -263,6 +272,7 @@ void __lockfunc _write_unlock(rwlock_t * { _raw_write_unlock(lock); preempt_enable(); + _stop_breaking(lock); } EXPORT_SYMBOL(_write_unlock); @@ -270,6 +280,7 @@ void __lockfunc _read_unlock(rwlock_t *l { _raw_read_unlock(lock); preempt_enable(); + _stop_breaking(lock); } EXPORT_SYMBOL(_read_unlock); @@ -278,6 +289,7 @@ void __lockfunc _spin_unlock_irqrestore( _raw_spin_unlock(lock); local_irq_restore(flags); preempt_enable(); + _stop_breaking(lock); } EXPORT_SYMBOL(_spin_unlock_irqrestore); @@ -286,6 +298,7 @@ void __lockfunc _spin_unlock_irq(spinloc _raw_spin_unlock(lock); local_irq_enable(); preempt_enable(); + _stop_breaking(lock); } EXPORT_SYMBOL(_spin_unlock_irq); @@ -294,6 +307,7 @@ void __lockfunc _spin_unlock_bh(spinlock _raw_spin_unlock(lock); preempt_enable(); local_bh_enable(); + _stop_breaking(lock); } EXPORT_SYMBOL(_spin_unlock_bh); @@ -302,6 +316,7 @@ void __lockfunc _read_unlock_irqrestore( _raw_read_unlock(lock); local_irq_restore(flags); preempt_enable(); + _stop_breaking(lock); } EXPORT_SYMBOL(_read_unlock_irqrestore); @@ -310,6 +325,7 @@ void __lockfunc _read_unlock_irq(rwlock_ _raw_read_unlock(lock); local_irq_enable(); preempt_enable(); + _stop_breaking(lock); } EXPORT_SYMBOL(_read_unlock_irq); @@ -318,6 +334,7 @@ void __lockfunc _read_unlock_bh(rwlock_t _raw_read_unlock(lock); preempt_enable(); local_bh_enable(); + _stop_breaking(lock); } EXPORT_SYMBOL(_read_unlock_bh); @@ -326,6 +343,7 @@ void __lockfunc _write_unlock_irqrestore _raw_write_unlock(lock); local_irq_restore(flags); preempt_enable(); + _stop_breaking(lock); } EXPORT_SYMBOL(_write_unlock_irqrestore); @@ -334,6 +352,7 @@ void __lockfunc _write_unlock_irq(rwlock _raw_write_unlock(lock); local_irq_enable(); preempt_enable(); + _stop_breaking(lock); } EXPORT_SYMBOL(_write_unlock_irq); @@ -342,6 +361,7 @@ void __lockfunc _write_unlock_bh(rwlock_ _raw_write_unlock(lock); preempt_enable(); local_bh_enable(); + _stop_breaking(lock); } EXPORT_SYMBOL(_write_unlock_bh); - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/