On 04/20/2015 10:32 AM, Shreyas B. Prabhu wrote:
> Fastsleep is one of the idle state which cpuidle subsystem currently
> uses on power8 machines. In this state L2 cache is brought down to a
> threshold voltage. Therefore when the core is in fastsleep, the
> communication between L2 and L3 needs to be fenced. But there is a bug
> in the current power8 chips surrounding this fencing.
> 
> OPAL provides a workaround which precludes the possibility of hitting
> this bug. But running with this workaround applied causes checkstop
> if any correctable error in L2 cache directory is detected. Hence OPAL
> also provides a way to undo the workaround.
> 
> In the existing implementation, workaround is applied by the last thread
> of the core entering fastsleep and undone by the first thread waking up.
> But this has a performance cost. These OPAL calls account for roughly
> 4000 cycles everytime the core has to enter or wakeup from fastsleep.
> 
> This patch introduces a sysfs attribute (fastsleep_workaround_applyonce)
> to choose the behavior of this workaround.
> 
> By default, fastsleep_workaround_applyonce = 0. In this case, workaround
> is applied/undone everytime the core enters/exits fastsleep.
> 
> fastsleep_workaround_applyonce = 1. In this case the workaround is
> applied once on all the cores and never undone. This can be triggered by
> echo 1 > /sys/devices/system/cpu/fastsleep_workaround_applyonce
> 
> For simplicity this attribute can be modified only once. Implying, once
> fastsleep_workaround_applyonce is changed to 1, it cannot be reverted
> to the default state.
> 
> Signed-off-by: Shreyas B. Prabhu <shre...@linux.vnet.ibm.com>
> ---
>  arch/powerpc/include/asm/opal-api.h            |   7 ++
>  arch/powerpc/include/asm/opal.h                |   1 +
>  arch/powerpc/platforms/powernv/idle.c          | 101 
> +++++++++++++++++++++++++
>  arch/powerpc/platforms/powernv/opal-wrappers.S |   1 +
>  4 files changed, 110 insertions(+)
> 
> diff --git a/arch/powerpc/include/asm/opal-api.h 
> b/arch/powerpc/include/asm/opal-api.h
> index 0321a90..a49e5fa 100644
> --- a/arch/powerpc/include/asm/opal-api.h
> +++ b/arch/powerpc/include/asm/opal-api.h
> @@ -165,6 +165,13 @@
>  #define OPAL_PM_WINKLE_ENABLED               0x00040000
>  #define OPAL_PM_SLEEP_ENABLED_ER1    0x00080000 /* with workaround */
> 
> +/*
> + * OPAL_CONFIG_CPU_IDLE_STATE parameters
> + */
> +#define OPAL_CONFIG_IDLE_FASTSLEEP   1
> +#define OPAL_CONFIG_IDLE_UNDO                0
> +#define OPAL_CONFIG_IDLE_APPLY               1
> +
>  #ifndef __ASSEMBLY__
> 
>  /* Other enums */
> diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
> index 042af1a..9a47813 100644
> --- a/arch/powerpc/include/asm/opal.h
> +++ b/arch/powerpc/include/asm/opal.h
> @@ -186,6 +186,7 @@ int64_t opal_handle_hmi(void);
>  int64_t opal_register_dump_region(uint32_t id, uint64_t start, uint64_t end);
>  int64_t opal_unregister_dump_region(uint32_t id);
>  int64_t opal_slw_set_reg(uint64_t cpu_pir, uint64_t sprn, uint64_t val);
> +int64_t opal_config_cpu_idle_state(uint64_t state, uint64_t flag);
>  int64_t opal_pci_set_phb_cxl_mode(uint64_t phb_id, uint64_t mode, uint64_t 
> pe_number);
>  int64_t opal_ipmi_send(uint64_t interface, struct opal_ipmi_msg *msg,
>               uint64_t msg_len);
> diff --git a/arch/powerpc/platforms/powernv/idle.c 
> b/arch/powerpc/platforms/powernv/idle.c
> index 104235a..f90cc86 100644
> --- a/arch/powerpc/platforms/powernv/idle.c
> +++ b/arch/powerpc/platforms/powernv/idle.c
> @@ -13,6 +13,8 @@
>  #include <linux/mm.h>
>  #include <linux/slab.h>
>  #include <linux/of.h>
> +#include <linux/device.h>
> +#include <linux/cpu.h>
> 
>  #include <asm/firmware.h>
>  #include <asm/opal.h>
> @@ -136,6 +138,96 @@ u32 pnv_get_supported_cpuidle_states(void)
>  }
>  EXPORT_SYMBOL_GPL(pnv_get_supported_cpuidle_states);
> 
> +
> +static void pnv_fastsleep_workaround_apply(void *info)
> +
> +{
> +     int rc;
> +     int *err = info;
> +
> +     rc = opal_config_cpu_idle_state(OPAL_CONFIG_IDLE_FASTSLEEP,
> +                                     OPAL_CONFIG_IDLE_APPLY);
> +     if (rc)
> +             *err = 1;
> +}
> +
> +/*
> + * Used to store fastsleep workaround state
> + * 0 - Workaround applied/undone at fastsleep entry/exit path (Default)
> + * 1 - Workaround applied once, never undone.
> + */
> +static u8 fastsleep_workaround_applyonce;
> +
> +static ssize_t show_fastsleep_workaround_applyonce(struct device *dev,
> +             struct device_attribute *attr, char *buf)
> +{
> +     return sprintf(buf, "%u\n", fastsleep_workaround_applyonce);
> +}
> +
> +static ssize_t store_fastsleep_workaround_applyonce(struct device *dev,
> +             struct device_attribute *attr, const char *buf,
> +             size_t count)
> +{
> +     cpumask_t primary_thread_mask;
> +     int err;
> +     u8 val;
> +
> +     if (kstrtou8(buf, 0, &val) || val != 1)
> +             return -EINVAL;
> +
> +     if (fastsleep_workaround_applyonce == 1)
> +             return count;
> +
> +     /*
> +      * fastsleep_workaround_applyonce = 1 implies
> +      * fastsleep workaround needs to be left in 'applied' state on all
> +      * the cores. Do this by-
> +      * 1. Patching out the call to 'undo' workaround in fastsleep exit path
> +      * 2. Sending ipi to all the cores which have atleast one online thread
> +      * 3. Patching out the call to 'apply' workaround in fastsleep entry
> +      * path
> +      * There is no need to send ipi to cores which have all threads
> +      * offlined, as last thread of the core entering fastsleep or deeper
> +      * state would have applied workaround.
> +      */
> +     err = patch_instruction(
> +             (unsigned int *)pnv_fastsleep_workaround_at_exit,
> +             PPC_INST_NOP);
> +     if (err) {
> +             pr_err("fastsleep_workaround_applyonce change failed while 
> patching pnv_fastsleep_workaround_at_exit");
> +             goto fail;
> +     }
> +
> +     get_online_cpus();
> +     primary_thread_mask = cpu_online_cores_map();
> +     on_each_cpu_mask(&primary_thread_mask,
> +                             pnv_fastsleep_workaround_apply,
> +                             &err, 1);
> +     put_online_cpus();
> +     if (err) {
> +             pr_err("fastsleep_workaround_applyonce change failed while 
> running pnv_fastsleep_workaround_apply");
> +             goto fail;
> +     }
> +
> +     err = patch_instruction(
> +             (unsigned int *)pnv_fastsleep_workaround_at_entry,
> +             PPC_INST_NOP);
> +     if (err) {
> +             pr_err("fastsleep_workaround_applyonce change failed while 
> patching pnv_fastsleep_workaround_at_entry");
> +             goto fail;
> +     }
> +
> +     fastsleep_workaround_applyonce = 1;
> +
> +     return count;
> +fail:
> +     return -EIO;
> +}
> +
> +static DEVICE_ATTR(fastsleep_workaround_applyonce, 0600,
> +                     show_fastsleep_workaround_applyonce,
> +                     store_fastsleep_workaround_applyonce);
> +
>  static int __init pnv_init_idle_states(void)
>  {
>       struct device_node *power_mgt;
> @@ -180,7 +272,16 @@ static int __init pnv_init_idle_states(void)
>               patch_instruction(
>                       (unsigned int *)pnv_fastsleep_workaround_at_exit,
>                       PPC_INST_NOP);
> +     } else {
> +             /*
> +              * OPAL_PM_SLEEP_ENABLED_ER1 is set. It indicates that
> +              * workaround is needed to use fastsleep. Provide sysfs
> +              * control to choose how this workaround has to be applied.
> +              */
> +             device_create_file(cpu_subsys.dev_root,
> +                             &dev_attr_fastsleep_workaround_applyonce);
>       }
> +
>       pnv_alloc_idle_core_states();
>  out_free:
>       kfree(flags);
> diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S 
> b/arch/powerpc/platforms/powernv/opal-wrappers.S
> index a7ade94..bf15ead 100644
> --- a/arch/powerpc/platforms/powernv/opal-wrappers.S
> +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
> @@ -283,6 +283,7 @@ OPAL_CALL(opal_sensor_read,                       
> OPAL_SENSOR_READ);
>  OPAL_CALL(opal_get_param,                    OPAL_GET_PARAM);
>  OPAL_CALL(opal_set_param,                    OPAL_SET_PARAM);
>  OPAL_CALL(opal_handle_hmi,                   OPAL_HANDLE_HMI);
> +OPAL_CALL(opal_config_cpu_idle_state,                
> OPAL_CONFIG_CPU_IDLE_STATE);
>  OPAL_CALL(opal_slw_set_reg,                  OPAL_SLW_SET_REG);
>  OPAL_CALL(opal_register_dump_region,         OPAL_REGISTER_DUMP_REGION);
>  OPAL_CALL(opal_unregister_dump_region,               
> OPAL_UNREGISTER_DUMP_REGION);
> 

Reviewed-by: Preeti U Murthy <pre...@linux.vnet.ibm.com>

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to