On Wed, Aug 13, 2014 at 07:24:07PM +0200, Peter Zijlstra wrote: > On Wed, Aug 13, 2014 at 04:42:19PM +0200, Peter Zijlstra wrote: > > Auditing all idle functions will be somewhat of a pain, but its entirely > > doable. Looking at this stuff, it appears we can clean it up massively; > > see how the generic cpuidle code already has the broadcast logic in, so > > we can remove that from the drivers by setting the right flags. > > > > We can similarly pull out the leave_mm() call by adding a > > CPUIDLE_FLAG_TLB_FLUSH. At which point all we'd need to do is mark the > > intel_idle (and all other cpuidle_state::enter functions with __notrace. > > This removes the broadcast stuff from intel_idle.c; processor_idle.c hurts > my brain, but something similar should be possible. >
And this moves the leave_mm() bit to generic code. --- --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -101,14 +101,6 @@ static int intel_idle_cpu_init(int cpu); static struct cpuidle_state *cpuidle_state_table; /* - * Set this flag for states where the HW flushes the TLB for us - * and so we don't need cross-calls to keep it consistent. - * If this flag is set, SW flushes the TLB, so even if the - * HW doesn't do the flushing, this flag is safe to use. - */ -#define CPUIDLE_FLAG_TLB_FLUSHED 0x10000 - -/* * MWAIT takes an 8-bit "hint" in EAX "suggesting" * the C-state (top nibble) and sub-state (bottom nibble) * 0x00 means "MWAIT(C1)", 0x10 means "MWAIT(C2)" etc. @@ -508,14 +500,6 @@ static int intel_idle(struct cpuidle_dev unsigned long ecx = 1; /* break on interrupt flag */ struct cpuidle_state *state = &drv->states[index]; unsigned long eax = flg2MWAIT(state->flags); - int cpu = smp_processor_id(); - - /* - * leave_mm() to avoid costly and often unnecessary wakeups - * for flushing the user TLB's associated with the active mm. - */ - if (state->flags & CPUIDLE_FLAG_TLB_FLUSHED) - leave_mm(cpu); mwait_idle_with_hints(eax, ecx); --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -56,6 +56,7 @@ struct cpuidle_state { #define CPUIDLE_FLAG_TIME_VALID (0x01) /* is residency time measurable? */ #define CPUIDLE_FLAG_COUPLED (0x02) /* state applies to multiple cpus */ #define CPUIDLE_FLAG_TIMER_STOP (0x04) /* timer is stopped on this state */ +#define CPUIDLE_FLAG_TLB_FLUSHED (0x08) /* TLBs are flushed on this state */ #define CPUIDLE_DRIVER_FLAGS_MASK (0xFFFF0000) --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -79,7 +79,7 @@ static void cpuidle_idle_call(void) struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices); struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev); int next_state, entered_state; - unsigned int broadcast; + unsigned int broadcast, flags; /* * Check if the idle task must be rescheduled. If it is the @@ -135,7 +135,16 @@ static void cpuidle_idle_call(void) goto exit_idle; } - broadcast = drv->states[next_state].flags & CPUIDLE_FLAG_TIMER_STOP; + flags = drv->states[next_state].flags; + + /* + * leave_mm() to avoid costly and often unnecessary wakeups + * for flushing the user TLB's associated with the active mm. + */ + if (flags & CPUIDLE_FLAG_TLB_FLUSHED) + leave_mm(dev->cpu); + + broadcast = flags & CPUIDLE_FLAG_TIMER_STOP; /* * Tell the time framework to switch to a broadcast timer
pgpnNyBK_S1xd.pgp
Description: PGP signature