Below Original message from Andrey Smirnov.
I would like to address the issue and submit the patch for it.
Please find the patch attached with this mail.
Issue mention below. Probably a race in the scheduling of the notifier chain.
Build and tested on Udoo board with RT patch applied.
-Anand Moon
----------------------------------------------------------------------------
Hello everyone,
I am working on integrating PREEMPT-RT patches into 3.10.17 kernel BSP
release from Freescale which can be found at:
http://git.freescale.com/git/cgit.cgi/imx/linux-2.6-imx.git (tag
imx_3.10.17_1.0.0_ga)
and what I am finding is that if I select "interactive" cpufreq
governor I get a kernel the kernel that occasionally spouts this
BUG: scheduling while atomic: swapper/3/0/0x00000002
Modules linked in:
Preemption disabled at:[< (null)>] (null)
CPU: 3 PID: 0 Comm: swapper/3 Not tainted 3.10.17-rt12-80705-g232293e-dirty #3
[<800139c4>] (unwind_backtrace+0x0/0xf8) from [<80011420>]
(show_stack+0x10/0x14)
[<80011420>] (show_stack+0x10/0x14) from [<805bfbbc>] (__schedule_bug+0x78/0x9c)
[<805bfbbc>] (__schedule_bug+0x78/0x9c) from [<805c431c>]
(__schedule+0x398/0x49c)
[<805c431c>] (__schedule+0x398/0x49c) from [<805c44d0>] (schedule+0x34/0xa0)
[<805c44d0>] (schedule+0x34/0xa0) from [<805c5250>]
(rt_spin_lock_slowlock+0xc0/0x258)
[<805c5250>] (rt_spin_lock_slowlock+0xc0/0x258) from [<80031d44>]
(lock_timer_base+0x2c/0x4c)
[<80031d44>] (lock_timer_base+0x2c/0x4c) from [<80032024>]
(mod_timer+0x60/0x1c0)
[<80032024>] (mod_timer+0x60/0x1c0) from [<803fe860>]
(cpufreq_interactive_idle_notifier+0xa4/0x13c)
[<803fe860>] (cpufreq_interactive_idle_notifier+0xa4/0x13c) from
[<80048444>] (notifier_call_chain+0)
[<80048444>] (notifier_call_chain+0x44/0x84) from [<80048754>]
(__atomic_notifier_call_chain+0x38/0x)
[<80048754>] (__atomic_notifier_call_chain+0x38/0x4c) from
[<80048780>] (atomic_notifier_call_chain+)
[<80048780>] (atomic_notifier_call_chain+0x18/0x20) from [<80058df8>]
(cpu_startup_entry+0x68/0x1a4)
[....] [<80058df8>] (cpu_startup_entry+0x68/0x1a4) from [<105bc204>]
(0x105bc204)
and eventually crashes. After doing some digging I believe the
sequence of events leading to this is following:
secondary_start_kernel() calls preempt_disable(), then
cpu_startup_entry(CPUHP_ONLINE), which results in a call to
cpufreq_interactive_idle_notifier() which in turn tries to use
mod_timer(). Mod_timer() internally tries to acquire a spinlock, but
with RT
patches applied it unfolds into a rt_mutex, the attempt of acquisition
of which results in the call to schedule() and that's when we
see that backtrace.
Eventually I am hoping to disable any sorts of frequency scaling or
power management on our system, but I am still curious to know if that
is a known issue patches for which exist. Does anyone has any leads/suggestions?
Thank you,
Andrey Smirnov
diff --git a/drivers/cpufreq/cpufreq_interactive.c
b/drivers/cpufreq/cpufreq_interactive.c
index 9a6f64f..64f2784 100644
--- a/drivers/cpufreq/cpufreq_interactive.c
+++ b/drivers/cpufreq/cpufreq_interactive.c
@@ -17,6 +17,7 @@
*
*/
+#include <linux/module.h>
#include <linux/cpu.h>
#include <linux/cpumask.h>
#include <linux/cpufreq.h>
@@ -29,7 +30,6 @@
#include <linux/kthread.h>
#include <linux/mutex.h>
#include <linux/kernel_stat.h>
-#include <linux/module.h>
#include <asm/cputime.h>
static atomic_t active_count = ATOMIC_INIT(0);
@@ -61,6 +61,8 @@ static cpumask_t down_cpumask;
static spinlock_t down_cpumask_lock;
static struct mutex set_speed_lock;
+#define MAX_RT_PRIO 100
+
/* Hi speed to bump to from lo speed when load burst (default max) */
static u64 hispeed_freq;
@@ -77,9 +79,7 @@ static unsigned long min_sample_time;
/*
* The sample rate of the timer used to increase frequency
*/
-#define DEFAULT_TIMER_RATE (50 * USEC_PER_MSEC)
-#define CPUFREQ_IRQ_LEN 60
-#define CPUFREQ_NOTE_LEN 120
+#define DEFAULT_TIMER_RATE (20 * USEC_PER_MSEC)
static unsigned long timer_rate;
static int cpufreq_governor_interactive(struct cpufreq_policy *policy,
@@ -133,8 +133,8 @@ static void cpufreq_interactive_timer(unsigned long data)
if (!idle_exit_time)
goto exit;
- delta_idle = (unsigned int)(now_idle - time_in_idle);
- delta_time = (unsigned int)(pcpu->timer_run_time - idle_exit_time);
+ delta_idle = (unsigned int) (now_idle - time_in_idle);
+ delta_time = (unsigned int) (pcpu->timer_run_time - idle_exit_time);
/*
* If timer ran less than 1ms after short-term sample started, retry.
@@ -147,9 +147,8 @@ static void cpufreq_interactive_timer(unsigned long data)
else
cpu_load = 100 * (delta_time - delta_idle) / delta_time;
- delta_idle = (unsigned int)(now_idle - pcpu->freq_change_time_in_idle);
- delta_time = (unsigned int)(pcpu->timer_run_time -
- pcpu->freq_change_time);
+ delta_idle = (unsigned int) (now_idle - pcpu->freq_change_time_in_idle);
+ delta_time = (unsigned int) (pcpu->timer_run_time -
pcpu->freq_change_time);
if ((delta_time == 0) || (delta_idle > delta_time))
load_since_change = 0;
@@ -250,10 +249,11 @@ static void cpufreq_interactive_idle_start(void)
&per_cpu(cpuinfo, smp_processor_id());
int pending;
- pcpu->idling = 1;
- smp_wmb();
if (!pcpu->governor_enabled)
return;
+
+ pcpu->idling = 1;
+ smp_wmb();
pending = timer_pending(&pcpu->cpu_timer);
if (pcpu->target_freq != pcpu->policy->min) {
@@ -619,10 +619,10 @@ static int cpufreq_interactive_idle_notifier(struct
notifier_block *nb,
void *data)
{
switch (val) {
- case IDLE_START:
+ case SCHED_IDLE_START:
cpufreq_interactive_idle_start();
break;
- case IDLE_END:
+ case SCHED_IDLE_END:
cpufreq_interactive_idle_end();
break;
}
@@ -638,7 +638,7 @@ static int __init cpufreq_interactive_init(void)
{
unsigned int i;
struct cpufreq_interactive_cpuinfo *pcpu;
- struct sched_param param = { .sched_priority = 99 };
+ struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
go_hispeed_load = DEFAULT_GO_HISPEED_LOAD;
min_sample_time = DEFAULT_MIN_SAMPLE_TIME;
@@ -674,7 +674,7 @@ static int __init cpufreq_interactive_init(void)
spin_lock_init(&down_cpumask_lock);
mutex_init(&set_speed_lock);
- idle_notifier_register(&cpufreq_interactive_idle_nb);
+ sched_idle_notifier_register(&cpufreq_interactive_idle_nb);
return cpufreq_register_governor(&cpufreq_gov_interactive);
@@ -684,7 +684,7 @@ err_freeuptask:
}
#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_INTERACTIVE
-late_initcall(cpufreq_interactive_init);
+fs_initcall(cpufreq_interactive_init);
#else
module_init(cpufreq_interactive_init);
#endif
@@ -699,7 +699,7 @@ static void __exit cpufreq_interactive_exit(void)
module_exit(cpufreq_interactive_exit);
-MODULE_AUTHOR("Mike Chan <[email protected]>");
+MODULE_AUTHOR("Mike Chan <mike at android.com>");
MODULE_DESCRIPTION("'cpufreq_interactive' - A cpufreq governor for "
"Latency sensitive workloads");
MODULE_LICENSE("GPL");
diff --git a/include/linux/sched.h b/include/linux/sched.h
index ecb2f9b..c247353 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1898,6 +1898,16 @@ extern void sched_clock_idle_sleep_event(void);
extern void sched_clock_idle_wakeup_event(u64 delta_ns);
#endif
+#define SCHED_IDLE_START 1
+#define SCHED_IDLE_END 2
+extern void sched_idle_notifier_register(struct notifier_block *nb);
+extern void sched_idle_notifier_unregister(struct notifier_block *nb);
+extern void sched_idle_notifier_call_chain(unsigned long val);
+extern void sched_idle_enter_condrcu(bool idle_uses_rcu);
+extern void sched_idle_exit_condrcu(bool idle_uses_rcu);
+static inline void sched_idle_enter(void) { sched_idle_enter_condrcu(0); }
+static inline void sched_idle_exit(void) { sched_idle_exit_condrcu(0); }
+
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
/*
* An i/f to runtime opt-in for irq time accounting based off of sched_clock.
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index a060a09..2df783e 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1828,6 +1828,44 @@ void wake_up_new_task(struct task_struct *p)
task_rq_unlock(rq, p, &flags);
}
+static ATOMIC_NOTIFIER_HEAD(sched_idle_notifier);
+
+void sched_idle_notifier_register(struct notifier_block *nb)
+{
+ atomic_notifier_chain_register(&sched_idle_notifier, nb);
+}
+EXPORT_SYMBOL_GPL(sched_idle_notifier_register);
+
+void sched_idle_notifier_unregister(struct notifier_block *nb)
+{
+ atomic_notifier_chain_unregister(&sched_idle_notifier, nb);
+}
+EXPORT_SYMBOL_GPL(sched_idle_notifier_unregister);
+
+void sched_idle_notifier_call_chain(unsigned long val)
+{
+ atomic_notifier_call_chain(&sched_idle_notifier, val, NULL);
+}
+EXPORT_SYMBOL_GPL(sched_idle_notifier_call_chain);
+
+void sched_idle_enter_condrcu(bool idle_uses_rcu)
+{
+ tick_nohz_idle_enter();
+ if (!idle_uses_rcu)
+ rcu_idle_enter();
+ sched_idle_notifier_call_chain(SCHED_IDLE_START);
+}
+EXPORT_SYMBOL_GPL(sched_idle_enter_condrcu);
+
+void sched_idle_exit_condrcu(bool idle_uses_rcu)
+{
+ sched_idle_notifier_call_chain(SCHED_IDLE_END);
+ if (!idle_uses_rcu)
+ rcu_idle_exit();
+ tick_nohz_idle_exit();
+}
+EXPORT_SYMBOL_GPL(sched_idle_exit_condrcu);
+
#ifdef CONFIG_PREEMPT_NOTIFIERS
/**
--
_______________________________________________
meta-freescale mailing list
[email protected]
https://lists.yoctoproject.org/listinfo/meta-freescale