On Wed, Aug 13, 2014 at 07:24:07PM +0200, Peter Zijlstra wrote: > On Wed, Aug 13, 2014 at 04:42:19PM +0200, Peter Zijlstra wrote: > > Auditing all idle functions will be somewhat of a pain, but its entirely > > doable. Looking at this stuff, it appears we can clean it up massively; > > see how the generic cpuidle code already has the broadcast logic in, so > > we can remove that from the drivers by setting the right flags. > > > > We can similarly pull out the leave_mm() call by adding a > > CPUIDLE_FLAG_TLB_FLUSH. At which point all we'd need to do is mark the > > intel_idle (and all other cpuidle_state::enter functions with __notrace. > > This removes the broadcast stuff from intel_idle.c; processor_idle.c hurts > my brain, but something similar should be possible.
Still hurts my brain; esp. acpi_idle_enter_bm() which calls a massive lot of code to disable busmastering, but maybe its doable, we need that __notrace detectoring. Also, someone needs to double check this; preferably LenB or Rafael who have seen this driver before. Teh pain. --- --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -171,32 +171,11 @@ static void lapic_timer_propagate_broadc (void *)pr, 1); } -/* Power(C) State timer broadcast control */ -static void lapic_timer_state_broadcast(struct acpi_processor *pr, - struct acpi_processor_cx *cx, - int broadcast) -{ - int state = cx - pr->power.states; - - if (state >= pr->power.timer_broadcast_on_state) { - unsigned long reason; - - reason = broadcast ? CLOCK_EVT_NOTIFY_BROADCAST_ENTER : - CLOCK_EVT_NOTIFY_BROADCAST_EXIT; - clockevents_notify(reason, &pr->id); - } -} - #else static void lapic_timer_check_state(int state, struct acpi_processor *pr, struct acpi_processor_cx *cstate) { } static void lapic_timer_propagate_broadcast(struct acpi_processor *pr) { } -static void lapic_timer_state_broadcast(struct acpi_processor *pr, - struct acpi_processor_cx *cx, - int broadcast) -{ -} #endif @@ -717,19 +696,10 @@ static inline void acpi_idle_do_entry(st static int acpi_idle_enter_c1(struct cpuidle_device *dev, struct cpuidle_driver *drv, int index) { - struct acpi_processor *pr; struct acpi_processor_cx *cx = per_cpu(acpi_cstate[index], dev->cpu); - pr = __this_cpu_read(processors); - - if (unlikely(!pr)) - return -EINVAL; - - lapic_timer_state_broadcast(pr, cx, 1); acpi_idle_do_entry(cx); - lapic_timer_state_broadcast(pr, cx, 0); - return index; } @@ -785,22 +755,8 @@ static int acpi_idle_enter_simple(struct return acpi_idle_enter_c1(dev, drv, CPUIDLE_DRIVER_STATE_START); #endif - /* - * Must be done before busmaster disable as we might need to - * access HPET ! - */ - lapic_timer_state_broadcast(pr, cx, 1); - - if (cx->type == ACPI_STATE_C3) - ACPI_FLUSH_CPU_CACHE(); - - /* Tell the scheduler that we are going deep-idle: */ - sched_clock_idle_sleep_event(); acpi_idle_do_entry(cx); - sched_clock_idle_wakeup_event(0); - - lapic_timer_state_broadcast(pr, cx, 0); return index; } @@ -843,16 +799,6 @@ static int acpi_idle_enter_bm(struct cpu } } - acpi_unlazy_tlb(smp_processor_id()); - - /* Tell the scheduler that we are going deep-idle: */ - sched_clock_idle_sleep_event(); - /* - * Must be done before busmaster disable as we might need to - * access HPET ! - */ - lapic_timer_state_broadcast(pr, cx, 1); - /* * disable bus master * bm_check implies we need ARB_DIS @@ -884,9 +830,6 @@ static int acpi_idle_enter_bm(struct cpu raw_spin_unlock(&c3_lock); } - sched_clock_idle_wakeup_event(0); - - lapic_timer_state_broadcast(pr, cx, 0); return index; } @@ -989,6 +932,7 @@ static int acpi_processor_setup_cpuidle_ state->flags |= CPUIDLE_FLAG_TIME_VALID; state->enter = acpi_idle_enter_c1; + state->flags |= CPUIDLE_FLAG_TIMER_STOP; state->enter_dead = acpi_idle_play_dead; drv->safe_state_index = count; break; @@ -996,15 +940,20 @@ static int acpi_processor_setup_cpuidle_ case ACPI_STATE_C2: state->flags |= CPUIDLE_FLAG_TIME_VALID; state->enter = acpi_idle_enter_simple; + state->flags |= CPUIDLE_FLAG_TIMER_STOP; state->enter_dead = acpi_idle_play_dead; drv->safe_state_index = count; break; case ACPI_STATE_C3: state->flags |= CPUIDLE_FLAG_TIME_VALID; - state->enter = pr->flags.bm_check ? - acpi_idle_enter_bm : - acpi_idle_enter_simple; + state->flags |= CPUIDLE_FLAG_TLB_FLUSHED; + state->flags |= CPUIDLE_FLAG_TIMER_STOP; + if (pr->flags.bm_check) { + state->enter = acpi_idle_enter_bm; + } else { + state->enter = acpi_idle_enter_simple; + } break; } --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -152,9 +152,11 @@ static void cpuidle_idle_call(void) * is used from another cpu as a broadcast timer, this call may * fail if it is not available */ - if (broadcast && - clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &dev->cpu)) - goto use_default; + if (broadcast) { + if (clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &dev->cpu)) + goto use_default; + sched_clock_idle_sleep_event(); + } trace_cpu_idle_rcuidle(next_state, dev->cpu); @@ -167,8 +169,10 @@ static void cpuidle_idle_call(void) trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, dev->cpu); - if (broadcast) + if (broadcast) { + sched_clock_idle_wakeup_event(0); clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &dev->cpu); + } /* * Give the governor an opportunity to reflect on the outcome
pgpQ3QoOEMGfj.pgp
Description: PGP signature