[ANNOUNCE] 3.14.2-rt3

Sebastian Andrzej Siewior Sat, 03 May 2014 10:01:57 -0700

Dear RT folks!

I'm pleased to announce the v3.14.2-rt3 patch set.


Changes since v3.14.2-rt2
- rwsem readers are now not allowed to nest. A patch rom Steven Rostedt.
- a few bugs were fixed in the hotplug code which were made during the
  v3.14 port. Fixed by Mike Galbraith.
- Mike Galbraith sent a patch which might fix lazy preempt on x86_64.
  Patch applied and my machine still explodes therefore lazy preempt
  remains off on x86_64.
- Mike Galbraith sent a few patches to get cpu hoplug to work. This
  includes lg_global_trylock_relax().
- A few push downs of migrate_disable() (where we call migrate_disable()
  after the rt_mutex_trylock()) have been reverted. It seems hotplug is
  not too happy about this. A patch by Steven Rostedt and and Mike
  Galbraith
- There was a complaint about a backrace from run_local_timers() in UP
  mode because a spin_try_lock() failed. _This_ particular case was not
  an error. This optimization was for FULL_NO_HZ which is pointless on
  UP because there is no spare CPU. Therefore, this optimization is
  disabled in UP mode and the backtrace is gone. Reported by Stanislav
  Meduna.
- block-mq notifier uses now a spinlock and runs during CPU_POST_DEAD
  instead at CPU_DEAD time. lockdep complained about the sleeping
  ctx->lock within the rawlock (blk_mq_cpu_notify_lock) and CPU_DEAD
  runs with irqs off.

Known issues:

      - bcache is disabled.

      - lazy preempt on x86_64 leads to a crash with some load.

      - CPU hotplug works in general. Steven's test script however
        deadlocks usually on the second invocation.

The delta patch against v3.14.2-rt2 is appended below and can be found
here:
   
https://www.kernel.org/pub/linux/kernel/projects/rt/3.14/incr/patch-3.14.2-rt2-rt3.patch.xz

The RT patch against 3.14.2 can be found here:

   
https://www.kernel.org/pub/linux/kernel/projects/rt/3.14/patch-3.14.2-rt3.patch.xz

The split quilt queue is available at:

   
https://www.kernel.org/pub/linux/kernel/projects/rt/3.14/patches-3.14.2-rt3.tar.xz

Sebastian

diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h
index 752fe56..1e649c4 100644
--- a/arch/x86/include/asm/preempt.h
+++ b/arch/x86/include/asm/preempt.h
@@ -94,7 +94,11 @@ static __always_inline bool 
__preempt_count_dec_and_test(void)
 {
        if (____preempt_count_dec_and_test())
                return true;
+#ifdef CONFIG_PREEMPT_LAZY
        return test_thread_flag(TIF_NEED_RESCHED_LAZY);
+#else
+       return false;
+#endif
 }
 
 /*
@@ -102,8 +106,12 @@ static __always_inline bool 
__preempt_count_dec_and_test(void)
  */
 static __always_inline bool should_resched(void)
 {
+#ifdef CONFIG_PREEMPT_LAZY
        return unlikely(!__this_cpu_read_4(__preempt_count) || \
                        test_thread_flag(TIF_NEED_RESCHED_LAZY));
+#else
+       return unlikely(!__this_cpu_read_4(__preempt_count));
+#endif
 }
 
 #ifdef CONFIG_PREEMPT
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index 7c8b356..5701b50 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -72,4 +72,5 @@ void common(void) {
 
        BLANK();
        DEFINE(PTREGS_SIZE, sizeof(struct pt_regs));
+       DEFINE(_PREEMPT_ENABLED, PREEMPT_ENABLED);
 }
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index fd2d976..6157ed6 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -365,19 +365,22 @@ ENTRY(resume_kernel)
 need_resched:
        # preempt count == 0 + NEED_RS set?
        cmpl $0,PER_CPU_VAR(__preempt_count)
+#ifndef CONFIG_PREEMPT_LAZY
+       jnz restore_all
+#else
        jz test_int_off
 
        # atleast preempt count == 0 ?
-       cmpl $_TIF_NEED_RESCHED,PER_CPU_VAR(__preempt_count)
+       cmpl $_PREEMPT_ENABLED,PER_CPU_VAR(__preempt_count)
        jne restore_all
 
        cmpl $0,TI_preempt_lazy_count(%ebp)     # non-zero preempt_lazy_count ?
        jnz restore_all
 
-       testl $_TIF_NEED_RESCHED_LAZY, %ecx
+       testl $_TIF_NEED_RESCHED_LAZY, TI_flags(%ebp)
        jz restore_all
-
 test_int_off:
+#endif
        testl $X86_EFLAGS_IF,PT_EFLAGS(%esp)    # interrupts off (exception 
path) ?
        jz restore_all
        call preempt_schedule_irq
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index b650b43..d893814 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -658,8 +658,8 @@ GLOBAL(system_call_after_swapgs)
        /* Handle reschedules */
        /* edx: work, edi: workmask */
 sysret_careful:
-       bt $TIF_NEED_RESCHED,%edx
-       jnc sysret_signal
+       testl $_TIF_NEED_RESCHED_MASK,%edx
+       jz sysret_signal
        TRACE_IRQS_ON
        ENABLE_INTERRUPTS(CLBR_NONE)
        pushq_cfi %rdi
@@ -771,8 +771,8 @@ GLOBAL(int_with_check)
        /* First do a reschedule test. */
        /* edx: work, edi: workmask */
 int_careful:
-       bt $TIF_NEED_RESCHED,%edx
-       jnc  int_very_careful
+       testl $_TIF_NEED_RESCHED_MASK,%edx
+       jz  int_very_careful
        TRACE_IRQS_ON
        ENABLE_INTERRUPTS(CLBR_NONE)
        pushq_cfi %rdi
@@ -1071,8 +1071,8 @@ ENTRY(native_iret)
        /* edi: workmask, edx: work */
 retint_careful:
        CFI_RESTORE_STATE
-       bt    $TIF_NEED_RESCHED,%edx
-       jnc   retint_signal
+       testl $_TIF_NEED_RESCHED_MASK,%edx
+       jz   retint_signal
        TRACE_IRQS_ON
        ENABLE_INTERRUPTS(CLBR_NONE)
        pushq_cfi %rdi
@@ -1104,7 +1104,22 @@ ENTRY(native_iret)
        /* rcx:  threadinfo. interrupts off. */
 ENTRY(retint_kernel)
        cmpl $0,PER_CPU_VAR(__preempt_count)
+#ifndef CONFIG_PREEMPT_LAZY
        jnz  retint_restore_args
+#else
+       jz  check_int_off
+
+       # atleast preempt count == 0 ?
+       cmpl $_PREEMPT_ENABLED,PER_CPU_VAR(__preempt_count)
+       jnz retint_restore_args
+
+       cmpl $0, TI_preempt_lazy_count(%rcx)
+       jnz retint_restore_args
+
+       bt $TIF_NEED_RESCHED_LAZY,TI_flags(%rcx)
+       jnc  retint_restore_args
+check_int_off:
+#endif
        bt   $9,EFLAGS-ARGOFFSET(%rsp)  /* interrupts off? */
        jnc  retint_restore_args
        call preempt_schedule_irq
@@ -1540,7 +1555,7 @@ ENTRY(paranoid_exit)
        movq %rsp,%rdi                  /* &pt_regs */
        call sync_regs
        movq %rax,%rsp                  /* switch stack for scheduling */
-       testl $_TIF_NEED_RESCHED,%ebx
+       testl $_TIF_NEED_RESCHED_MASK,%ebx
        jnz paranoid_schedule
        movl %ebx,%edx                  /* arg3: thread flags */
        TRACE_IRQS_ON
diff --git a/block/blk-mq-cpu.c b/block/blk-mq-cpu.c
index 136ef86..37acc3a 100644
--- a/block/blk-mq-cpu.c
+++ b/block/blk-mq-cpu.c
@@ -11,7 +11,7 @@
 #include "blk-mq.h"
 
 static LIST_HEAD(blk_mq_cpu_notify_list);
-static DEFINE_RAW_SPINLOCK(blk_mq_cpu_notify_lock);
+static DEFINE_SPINLOCK(blk_mq_cpu_notify_lock);
 
 static int blk_mq_main_cpu_notify(struct notifier_block *self,
                                  unsigned long action, void *hcpu)
@@ -19,12 +19,15 @@ static int blk_mq_main_cpu_notify(struct notifier_block 
*self,
        unsigned int cpu = (unsigned long) hcpu;
        struct blk_mq_cpu_notifier *notify;
 
-       raw_spin_lock(&blk_mq_cpu_notify_lock);
+       if (action != CPU_POST_DEAD && action != CPU_POST_DEAD)
+               return NOTIFY_OK;
+
+       spin_lock(&blk_mq_cpu_notify_lock);
 
        list_for_each_entry(notify, &blk_mq_cpu_notify_list, list)
                notify->notify(notify->data, action, cpu);
 
-       raw_spin_unlock(&blk_mq_cpu_notify_lock);
+       spin_unlock(&blk_mq_cpu_notify_lock);
        return NOTIFY_OK;
 }
 
@@ -32,16 +35,16 @@ void blk_mq_register_cpu_notifier(struct 
blk_mq_cpu_notifier *notifier)
 {
        BUG_ON(!notifier->notify);
 
-       raw_spin_lock(&blk_mq_cpu_notify_lock);
+       spin_lock(&blk_mq_cpu_notify_lock);
        list_add_tail(&notifier->list, &blk_mq_cpu_notify_list);
-       raw_spin_unlock(&blk_mq_cpu_notify_lock);
+       spin_unlock(&blk_mq_cpu_notify_lock);
 }
 
 void blk_mq_unregister_cpu_notifier(struct blk_mq_cpu_notifier *notifier)
 {
-       raw_spin_lock(&blk_mq_cpu_notify_lock);
+       spin_lock(&blk_mq_cpu_notify_lock);
        list_del(&notifier->list);
-       raw_spin_unlock(&blk_mq_cpu_notify_lock);
+       spin_unlock(&blk_mq_cpu_notify_lock);
 }
 
 void blk_mq_init_cpu_notifier(struct blk_mq_cpu_notifier *notifier,
diff --git a/block/blk-mq.c b/block/blk-mq.c
index a5f25f9..5fb26f7 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -48,9 +48,14 @@ static struct blk_mq_ctx *blk_mq_get_ctx(struct 
request_queue *q)
        return __blk_mq_get_ctx(q, get_cpu_light());
 }
 
-static void blk_mq_put_ctx(struct blk_mq_ctx *ctx)
+static void __blk_mq_put_ctx(struct blk_mq_ctx *ctx)
 {
        spin_unlock(&ctx->cpu_lock);
+}
+
+static void blk_mq_put_ctx(struct blk_mq_ctx *ctx)
+{
+       __blk_mq_put_ctx(ctx);
        put_cpu_light();
 }
 
@@ -966,7 +971,7 @@ static void blk_mq_hctx_notify(void *data, unsigned long 
action,
        struct blk_mq_ctx *ctx;
        LIST_HEAD(tmp);
 
-       if (action != CPU_DEAD && action != CPU_DEAD_FROZEN)
+       if (action != CPU_POST_DEAD && action != CPU_POST_DEAD)
                return;
 
        /*
@@ -980,6 +985,7 @@ static void blk_mq_hctx_notify(void *data, unsigned long 
action,
                clear_bit(ctx->index_hw, hctx->ctx_map);
        }
        spin_unlock(&ctx->lock);
+       __blk_mq_put_ctx(ctx);
 
        if (list_empty(&tmp))
                return;
diff --git a/include/linux/lglock.h b/include/linux/lglock.h
index 2b2204e..534b16e 100644
--- a/include/linux/lglock.h
+++ b/include/linux/lglock.h
@@ -74,4 +74,10 @@ void lg_local_unlock_cpu(struct lglock *lg, int cpu);
 void lg_global_lock(struct lglock *lg);
 void lg_global_unlock(struct lglock *lg);
 
+#ifndef CONFIG_PREEMPT_RT_FULL
+#define lg_global_trylock_relax(name)  lg_global_lock(name)
+#else
+void lg_global_trylock_relax(struct lglock *lg);
+#endif
+
 #endif
diff --git a/include/linux/preempt.h b/include/linux/preempt.h
index 116af6a..5b2cdf4 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -126,8 +126,7 @@ do { \
 #define preempt_enable_notrace() \
 do { \
        barrier(); \
-       if (unlikely(__preempt_count_dec_and_test() || \
-                               test_thread_flag(TIF_NEED_RESCHED_LAZY))) \
+       if (unlikely(__preempt_count_dec_and_test())) \
                __preempt_schedule_context(); \
 } while (0)
 #else
diff --git a/include/linux/rwsem_rt.h b/include/linux/rwsem_rt.h
index 924c2d2..0065b08 100644
--- a/include/linux/rwsem_rt.h
+++ b/include/linux/rwsem_rt.h
@@ -20,7 +20,6 @@
 
 struct rw_semaphore {
        struct rt_mutex         lock;
-       int                     read_depth;
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
        struct lockdep_map      dep_map;
 #endif
diff --git a/include/linux/spinlock_rt.h b/include/linux/spinlock_rt.h
index ac6f08b..c0d1367 100644
--- a/include/linux/spinlock_rt.h
+++ b/include/linux/spinlock_rt.h
@@ -35,6 +35,7 @@ extern int atomic_dec_and_spin_lock(atomic_t *atomic, 
spinlock_t *lock);
  */
 extern void __lockfunc __rt_spin_lock(struct rt_mutex *lock);
 extern void __lockfunc __rt_spin_unlock(struct rt_mutex *lock);
+extern int __lockfunc __rt_spin_trylock(struct rt_mutex *lock);
 
 #define spin_lock(lock)                                \
        do {                                    \
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 041fada..ce00329 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -649,7 +649,7 @@ static int __ref _cpu_down(unsigned int cpu, int 
tasks_frozen)
                /* CPU didn't die: tell everyone.  Can't complain. */
                smpboot_unpark_threads(cpu);
                cpu_notify_nofail(CPU_DOWN_FAILED | mod, hcpu);
-               goto out_cancel;
+               goto out_release;
        }
        BUG_ON(cpu_online(cpu));
 
diff --git a/kernel/locking/lglock.c b/kernel/locking/lglock.c
index f2356df..9397974 100644
--- a/kernel/locking/lglock.c
+++ b/kernel/locking/lglock.c
@@ -105,3 +105,28 @@ void lg_global_unlock(struct lglock *lg)
        preempt_enable_nort();
 }
 EXPORT_SYMBOL(lg_global_unlock);
+
+#ifdef CONFIG_PREEMPT_RT_FULL
+/*
+ * HACK: If you use this, you get to keep the pieces.
+ * Used in queue_stop_cpus_work() when stop machinery
+ * is called from inactive CPU, so we can't schedule.
+ */
+# define lg_do_trylock_relax(l)                        \
+       do {                                    \
+               while (!__rt_spin_trylock(l))   \
+                       cpu_relax();            \
+       } while (0)
+
+void lg_global_trylock_relax(struct lglock *lg)
+{
+       int i;
+
+       lock_acquire_exclusive(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_);
+       for_each_possible_cpu(i) {
+               lg_lock_ptr *lock;
+               lock = per_cpu_ptr(lg->lock, i);
+               lg_do_trylock_relax(lock);
+       }
+}
+#endif
diff --git a/kernel/locking/rt.c b/kernel/locking/rt.c
index 5d17727..055a3df 100644
--- a/kernel/locking/rt.c
+++ b/kernel/locking/rt.c
@@ -180,12 +180,14 @@ EXPORT_SYMBOL(_mutex_unlock);
  */
 int __lockfunc rt_write_trylock(rwlock_t *rwlock)
 {
-       int ret = rt_mutex_trylock(&rwlock->lock);
+       int ret;
 
-       if (ret) {
+       migrate_disable();
+       ret = rt_mutex_trylock(&rwlock->lock);
+       if (ret)
                rwlock_acquire(&rwlock->dep_map, 0, 1, _RET_IP_);
-               migrate_disable();
-       }
+       else
+               migrate_enable();
 
        return ret;
 }
@@ -212,11 +214,13 @@ int __lockfunc rt_read_trylock(rwlock_t *rwlock)
         * write locked.
         */
        if (rt_mutex_owner(lock) != current) {
+               migrate_disable();
                ret = rt_mutex_trylock(lock);
-               if (ret) {
+               if (ret)
                        rwlock_acquire(&rwlock->dep_map, 0, 1, _RET_IP_);
-                       migrate_disable();
-               }
+               else
+                       migrate_enable();
+
        } else if (!rwlock->read_depth) {
                ret = 0;
        }
@@ -240,13 +244,14 @@ void __lockfunc rt_read_lock(rwlock_t *rwlock)
 {
        struct rt_mutex *lock = &rwlock->lock;
 
+
        /*
         * recursive read locks succeed when current owns the lock
         */
        if (rt_mutex_owner(lock) != current) {
-               rwlock_acquire(&rwlock->dep_map, 0, 0, _RET_IP_);
-               __rt_spin_lock(lock);
                migrate_disable();
+               rwlock_acquire_read(&rwlock->dep_map, 0, 0, _RET_IP_);
+               __rt_spin_lock(lock);
        }
        rwlock->read_depth++;
 }
@@ -316,10 +321,8 @@ EXPORT_SYMBOL(rt_up_write);
 
 void  rt_up_read(struct rw_semaphore *rwsem)
 {
-       if (--rwsem->read_depth == 0) {
-               rwsem_release(&rwsem->dep_map, 1, _RET_IP_);
-               rt_mutex_unlock(&rwsem->lock);
-       }
+       rwsem_release(&rwsem->dep_map, 1, _RET_IP_);
+       rt_mutex_unlock(&rwsem->lock);
 }
 EXPORT_SYMBOL(rt_up_read);
 
@@ -330,7 +333,6 @@ EXPORT_SYMBOL(rt_up_read);
 void  rt_downgrade_write(struct rw_semaphore *rwsem)
 {
        BUG_ON(rt_mutex_owner(&rwsem->lock) != current);
-       rwsem->read_depth = 1;
 }
 EXPORT_SYMBOL(rt_downgrade_write);
 
@@ -367,37 +369,20 @@ void rt_down_write_nested_lock(struct rw_semaphore *rwsem,
 
 int  rt_down_read_trylock(struct rw_semaphore *rwsem)
 {
-       struct rt_mutex *lock = &rwsem->lock;
-       int ret = 1;
-
-       /*
-        * recursive read locks succeed when current owns the rwsem,
-        * but not when read_depth == 0 which means that the rwsem is
-        * write locked.
-        */
-       if (rt_mutex_owner(lock) != current) {
-               ret = rt_mutex_trylock(&rwsem->lock);
-               if (ret)
-                       rwsem_acquire(&rwsem->dep_map, 0, 1, _RET_IP_);
-       } else if (!rwsem->read_depth) {
-               ret = 0;
-       }
+       int ret;
 
+       ret = rt_mutex_trylock(&rwsem->lock);
        if (ret)
-               rwsem->read_depth++;
+               rwsem_acquire(&rwsem->dep_map, 0, 1, _RET_IP_);
+
        return ret;
 }
 EXPORT_SYMBOL(rt_down_read_trylock);
 
 static void __rt_down_read(struct rw_semaphore *rwsem, int subclass)
 {
-       struct rt_mutex *lock = &rwsem->lock;
-
-       if (rt_mutex_owner(lock) != current) {
-               rwsem_acquire(&rwsem->dep_map, subclass, 0, _RET_IP_);
-               rt_mutex_lock(&rwsem->lock);
-       }
-       rwsem->read_depth++;
+       rwsem_acquire(&rwsem->dep_map, subclass, 0, _RET_IP_);
+       rt_mutex_lock(&rwsem->lock);
 }
 
 void  rt_down_read(struct rw_semaphore *rwsem)
@@ -422,7 +407,6 @@ void  __rt_rwsem_init(struct rw_semaphore *rwsem, const 
char *name,
        debug_check_no_locks_freed((void *)rwsem, sizeof(*rwsem));
        lockdep_init_map(&rwsem->dep_map, name, key, 0);
 #endif
-       rwsem->read_depth = 0;
        rwsem->lock.save_state = 0;
 }
 EXPORT_SYMBOL(__rt_rwsem_init);
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index 42f4f28..5c5cc76 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -1001,6 +1001,11 @@ void __lockfunc rt_spin_unlock_wait(spinlock_t *lock)
 }
 EXPORT_SYMBOL(rt_spin_unlock_wait);
 
+int __lockfunc __rt_spin_trylock(struct rt_mutex *lock)
+{
+       return rt_mutex_trylock(lock);
+}
+
 int __lockfunc rt_spin_trylock(spinlock_t *lock)
 {
        int ret = rt_mutex_trylock(&lock->lock);
@@ -1045,12 +1050,12 @@ int atomic_dec_and_spin_lock(atomic_t *atomic, 
spinlock_t *lock)
        /* Subtract 1 from counter unless that drops it to 0 (ie. it was 1) */
        if (atomic_add_unless(atomic, -1, 1))
                return 0;
+       migrate_disable();
        rt_spin_lock(lock);
-       if (atomic_dec_and_test(atomic)){
-               migrate_disable();
+       if (atomic_dec_and_test(atomic))
                return 1;
-       }
        rt_spin_unlock(lock);
+       migrate_enable();
        return 0;
 }
 EXPORT_SYMBOL(atomic_dec_and_spin_lock);
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index aaae9f1..bcbae9c 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -266,7 +266,7 @@ int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, 
cpu_stop_fn_t fn, void *
        struct irq_cpu_stop_queue_work_info call_args;
        struct multi_stop_data msdata;
 
-       preempt_disable();
+       preempt_disable_nort();
        msdata = (struct multi_stop_data){
                .fn = fn,
                .data = arg,
@@ -299,7 +299,7 @@ int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, 
cpu_stop_fn_t fn, void *
         * This relies on the stopper workqueues to be FIFO.
         */
        if (!cpu_active(cpu1) || !cpu_active(cpu2)) {
-               preempt_enable();
+               preempt_enable_nort();
                return -ENOENT;
        }
 
@@ -313,7 +313,7 @@ int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, 
cpu_stop_fn_t fn, void *
                                 &irq_cpu_stop_queue_work,
                                 &call_args, 1);
        lg_local_unlock(&stop_cpus_lock);
-       preempt_enable();
+       preempt_enable_nort();
 
        wait_for_stop_done(&done);
 
@@ -346,7 +346,7 @@ static DEFINE_PER_CPU(struct cpu_stop_work, stop_cpus_work);
 
 static void queue_stop_cpus_work(const struct cpumask *cpumask,
                                 cpu_stop_fn_t fn, void *arg,
-                                struct cpu_stop_done *done)
+                                struct cpu_stop_done *done, bool inactive)
 {
        struct cpu_stop_work *work;
        unsigned int cpu;
@@ -360,11 +360,13 @@ static void queue_stop_cpus_work(const struct cpumask 
*cpumask,
        }
 
        /*
-        * Disable preemption while queueing to avoid getting
-        * preempted by a stopper which might wait for other stoppers
-        * to enter @fn which can lead to deadlock.
+        * Make sure that all work is queued on all cpus before
+        * any of the cpus can execute it.
         */
-       lg_global_lock(&stop_cpus_lock);
+       if (!inactive)
+               lg_global_lock(&stop_cpus_lock);
+       else
+               lg_global_trylock_relax(&stop_cpus_lock);
        for_each_cpu(cpu, cpumask)
                cpu_stop_queue_work(cpu, &per_cpu(stop_cpus_work, cpu));
        lg_global_unlock(&stop_cpus_lock);
@@ -376,7 +378,7 @@ static int __stop_cpus(const struct cpumask *cpumask,
        struct cpu_stop_done done;
 
        cpu_stop_init_done(&done, cpumask_weight(cpumask));
-       queue_stop_cpus_work(cpumask, fn, arg, &done);
+       queue_stop_cpus_work(cpumask, fn, arg, &done, false);
        wait_for_stop_done(&done);
        return done.executed ? done.ret : -ENOENT;
 }
@@ -572,6 +574,8 @@ static int __init cpu_stop_init(void)
                INIT_LIST_HEAD(&stopper->works);
        }
 
+       lg_lock_init(&stop_cpus_lock, "stop_cpus_lock");
+
        BUG_ON(smpboot_register_percpu_thread(&cpu_stop_threads));
        stop_machine_initialized = true;
        return 0;
@@ -667,11 +671,11 @@ int stop_machine_from_inactive_cpu(int (*fn)(void *), 
void *data,
        set_state(&msdata, MULTI_STOP_PREPARE);
        cpu_stop_init_done(&done, num_active_cpus());
        queue_stop_cpus_work(cpu_active_mask, multi_cpu_stop, &msdata,
-                            &done);
+                            &done, true);
        ret = multi_cpu_stop(&msdata);
 
        /* Busy wait for completion. */
-       while (!atomic_read(&done.nr_todo))
+       while (atomic_read(&done.nr_todo))
                cpu_relax();
 
        mutex_unlock(&stop_cpus_mutex);
diff --git a/kernel/timer.c b/kernel/timer.c
index 54596b5..8750875 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -1461,6 +1461,19 @@ void run_local_timers(void)
         * the timer softirq.
         */
 #ifdef CONFIG_PREEMPT_RT_FULL
+
+#ifndef CONFIG_SMP
+       /*
+        * The spin_do_trylock() later may fail as the lock may be hold before
+        * the interrupt arrived. The spin-lock debugging code will raise a
+        * warning if the try_lock fails on UP. Since this is only an
+        * optimization for the FULL_NO_HZ case (not to run the timer softirq on
+        * an nohz_full CPU) we don't really care and shedule the softirq.
+        */
+       raise_softirq(TIMER_SOFTIRQ);
+       return;
+#endif
+
        /* On RT, irq work runs from softirq */
        if (irq_work_needs_cpu()) {
                raise_softirq(TIMER_SOFTIRQ);
diff --git a/localversion-rt b/localversion-rt
index c3054d0..1445cd6 100644
--- a/localversion-rt
+++ b/localversion-rt
@@ -1 +1 @@
--rt2
+-rt3
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[ANNOUNCE] 3.14.2-rt3

Reply via email to