On Monday, October 20, 2014 06:25:39 PM Daniel Lezcano wrote:
> When the pmqos latency requirement is set to zero that means "poll in all the
> cases".
> 
> That is correctly implemented on x86 but not on the other archs.
> 
> As how is written the code, if the latency request is zero, the governor will
> return zero, so corresponding, for x86, to the poll function, but for the
> others arch the default idle function. For example, on ARM this is wait-for-
> interrupt with a latency of '1', so violating the constraint.
> 
> In order to fix that, do the latency requirement check *before* calling the
> cpuidle framework in order to jump to the poll function without entering
> cpuidle. That has several benefits:
> 
>  1. It clarifies and unifies the code
>  2. It fixes x86 vs other archs behavior
>  3. Factors out the call to the same function
>  4. Prevent to enter the cpuidle framework with its expensive cost in
>     calculation
> 
> As the latency_req is needed in all the cases, change the select API to take
> the latency_req as parameter in case it is not equal to zero.
> 
> As a positive side effect, it introduces the latency constraint specified
> externally, so one more step to the cpuidle/scheduler integration.
> 
> Signed-off-by: Daniel Lezcano <[email protected]>

I've discussed this series with Len and the patches look good to us.

I can apply them unless Peter prefers to take them through sched.  In which
case Peter please add my ACK to them.


> ---
>  drivers/cpuidle/cpuidle.c          |  5 +++--
>  drivers/cpuidle/governors/ladder.c |  9 +--------
>  drivers/cpuidle/governors/menu.c   |  8 ++------
>  include/linux/cpuidle.h            |  7 ++++---
>  kernel/sched/idle.c                | 18 ++++++++++++++----
>  5 files changed, 24 insertions(+), 23 deletions(-)
> 
> diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
> index ee9df5e..372c36f 100644
> --- a/drivers/cpuidle/cpuidle.c
> +++ b/drivers/cpuidle/cpuidle.c
> @@ -158,7 +158,8 @@ int cpuidle_enter_state(struct cpuidle_device *dev, 
> struct cpuidle_driver *drv,
>   *
>   * Returns the index of the idle state.
>   */
> -int cpuidle_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
> +int cpuidle_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
> +                int latency_req)
>  {
>       if (off || !initialized)
>               return -ENODEV;
> @@ -169,7 +170,7 @@ int cpuidle_select(struct cpuidle_driver *drv, struct 
> cpuidle_device *dev)
>       if (unlikely(use_deepest_state))
>               return cpuidle_find_deepest_state(drv, dev);
>  
> -     return cpuidle_curr_governor->select(drv, dev);
> +     return cpuidle_curr_governor->select(drv, dev, latency_req);
>  }
>  
>  /**
> diff --git a/drivers/cpuidle/governors/ladder.c 
> b/drivers/cpuidle/governors/ladder.c
> index 044ee0d..18f0da9 100644
> --- a/drivers/cpuidle/governors/ladder.c
> +++ b/drivers/cpuidle/governors/ladder.c
> @@ -64,18 +64,11 @@ static inline void ladder_do_selection(struct 
> ladder_device *ldev,
>   * @dev: the CPU
>   */
>  static int ladder_select_state(struct cpuidle_driver *drv,
> -                             struct cpuidle_device *dev)
> +                            struct cpuidle_device *dev, int latency_req)
>  {
>       struct ladder_device *ldev = &__get_cpu_var(ladder_devices);
>       struct ladder_device_state *last_state;
>       int last_residency, last_idx = ldev->last_state_idx;
> -     int latency_req = pm_qos_request(PM_QOS_CPU_DMA_LATENCY);
> -
> -     /* Special case when user has set very strict latency requirement */
> -     if (unlikely(latency_req == 0)) {
> -             ladder_do_selection(ldev, last_idx, 0);
> -             return 0;
> -     }
>  
>       last_state = &ldev->states[last_idx];
>  
> diff --git a/drivers/cpuidle/governors/menu.c 
> b/drivers/cpuidle/governors/menu.c
> index 34db2fb..96f8fb0 100644
> --- a/drivers/cpuidle/governors/menu.c
> +++ b/drivers/cpuidle/governors/menu.c
> @@ -287,10 +287,10 @@ again:
>   * @drv: cpuidle driver containing state data
>   * @dev: the CPU
>   */
> -static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device 
> *dev)
> +static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device 
> *dev,
> +                    int latency_req)
>  {
>       struct menu_device *data = &__get_cpu_var(menu_devices);
> -     int latency_req = pm_qos_request(PM_QOS_CPU_DMA_LATENCY);
>       int i;
>       unsigned int interactivity_req;
>       unsigned long nr_iowaiters, cpu_load;
> @@ -302,10 +302,6 @@ static int menu_select(struct cpuidle_driver *drv, 
> struct cpuidle_device *dev)
>  
>       data->last_state_idx = CPUIDLE_DRIVER_STATE_START - 1;
>  
> -     /* Special case when user has set very strict latency requirement */
> -     if (unlikely(latency_req == 0))
> -             return 0;
> -
>       /* determine the expected residency time, round up */
>       data->next_timer_us = ktime_to_us(tick_nohz_get_sleep_length());
>  
> diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
> index 25e0df6..fb465c1 100644
> --- a/include/linux/cpuidle.h
> +++ b/include/linux/cpuidle.h
> @@ -122,7 +122,7 @@ struct cpuidle_driver {
>  extern void disable_cpuidle(void);
>  
>  extern int cpuidle_select(struct cpuidle_driver *drv,
> -                       struct cpuidle_device *dev);
> +                       struct cpuidle_device *dev, int latency_req);
>  extern int cpuidle_enter(struct cpuidle_driver *drv,
>                        struct cpuidle_device *dev, int index);
>  extern void cpuidle_reflect(struct cpuidle_device *dev, int index);
> @@ -150,7 +150,7 @@ extern struct cpuidle_driver 
> *cpuidle_get_cpu_driver(struct cpuidle_device *dev)
>  #else
>  static inline void disable_cpuidle(void) { }
>  static inline int cpuidle_select(struct cpuidle_driver *drv,
> -                              struct cpuidle_device *dev)
> +                              struct cpuidle_device *dev, int latency_req)
>  {return -ENODEV; }
>  static inline int cpuidle_enter(struct cpuidle_driver *drv,
>                               struct cpuidle_device *dev, int index)
> @@ -205,7 +205,8 @@ struct cpuidle_governor {
>                                       struct cpuidle_device *dev);
>  
>       int  (*select)          (struct cpuidle_driver *drv,
> -                                     struct cpuidle_device *dev);
> +                              struct cpuidle_device *dev,
> +                              int latency_req);
>       void (*reflect)         (struct cpuidle_device *dev, int index);
>  
>       struct module           *owner;
> diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
> index 11e7bc4..25ba94d 100644
> --- a/kernel/sched/idle.c
> +++ b/kernel/sched/idle.c
> @@ -5,6 +5,7 @@
>  #include <linux/cpu.h>
>  #include <linux/cpuidle.h>
>  #include <linux/tick.h>
> +#include <linux/pm_qos.h>
>  #include <linux/mm.h>
>  #include <linux/stackprotector.h>
>  
> @@ -74,7 +75,7 @@ void __weak arch_cpu_idle(void)
>   * set, and it returns with polling set.  If it ever stops polling, it
>   * must clear the polling bit.
>   */
> -static void cpuidle_idle_call(void)
> +static void cpuidle_idle_call(unsigned int latency_req)
>  {
>       struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices);
>       struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev);
> @@ -107,7 +108,7 @@ static void cpuidle_idle_call(void)
>        * Ask the cpuidle framework to choose a convenient idle state.
>        * Fall back to the default arch idle method on errors.
>        */
> -     next_state = cpuidle_select(drv, dev);
> +     next_state = cpuidle_select(drv, dev, latency_req);
>       if (next_state < 0) {
>  use_default:
>               /*
> @@ -182,6 +183,8 @@ exit_idle:
>   */
>  static void cpu_idle_loop(void)
>  {
> +     unsigned int latency_req;
> +
>       while (1) {
>               /*
>                * If the arch has a polling bit, we maintain an invariant:
> @@ -205,19 +208,26 @@ static void cpu_idle_loop(void)
>                       local_irq_disable();
>                       arch_cpu_idle_enter();
>  
> +                     latency_req = pm_qos_request(PM_QOS_CPU_DMA_LATENCY);
> +
>                       /*
>                        * In poll mode we reenable interrupts and spin.
>                        *
> +                      * If the latency req is zero, we don't want to
> +                      * enter any idle state and we jump to the poll
> +                      * function directly
> +                      *
>                        * Also if we detected in the wakeup from idle
>                        * path that the tick broadcast device expired
>                        * for us, we don't want to go deep idle as we
>                        * know that the IPI is going to arrive right
>                        * away
>                        */
> -                     if (cpu_idle_force_poll || 
> tick_check_broadcast_expired())
> +                     if (!latency_req || cpu_idle_force_poll ||
> +                         tick_check_broadcast_expired())
>                               cpu_idle_poll();
>                       else
> -                             cpuidle_idle_call();
> +                             cpuidle_idle_call(latency_req);
>  
>                       arch_cpu_idle_exit();
>               }
> 

-- 
I speak only for myself.
Rafael J. Wysocki, Intel Open Source Technology Center.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to