On Tue, 2014-11-25 at 16:47 +0530, Shreyas B. Prabhu wrote:

> diff --git a/arch/powerpc/kernel/cpu_setup_power.S 
> b/arch/powerpc/kernel/cpu_setup_power.S
> index 4673353..66874aa 100644
> --- a/arch/powerpc/kernel/cpu_setup_power.S
> +++ b/arch/powerpc/kernel/cpu_setup_power.S
> @@ -55,6 +55,8 @@ _GLOBAL(__setup_cpu_power8)
>       beqlr
>       li      r0,0
>       mtspr   SPRN_LPID,r0
> +     mtspr   SPRN_WORT,r0
> +     mtspr   SPRN_WORC,r0
>       mfspr   r3,SPRN_LPCR
>       ori     r3, r3, LPCR_PECEDH
>       bl      __init_LPCR
> @@ -75,6 +77,8 @@ _GLOBAL(__restore_cpu_power8)
>       li      r0,0
>       mtspr   SPRN_LPID,r0
>       mfspr   r3,SPRN_LPCR
> +     mtspr   SPRN_WORT,r0
> +     mtspr   SPRN_WORC,r0
>       ori     r3, r3, LPCR_PECEDH
>       bl      __init_LPCR
>       bl      __init_HFSCR

Clearing WORT and WORC might not be the best thing. We know the HW folks
have been trying to tune those values and we might need to preserve what
the boot FW has set.

Can you get in touch with them and double check what we should do here ?

> diff --git a/arch/powerpc/kernel/exceptions-64s.S 
> b/arch/powerpc/kernel/exceptions-64s.S
> index 3311c8d..c9897cb 100644
> --- a/arch/powerpc/kernel/exceptions-64s.S
> +++ b/arch/powerpc/kernel/exceptions-64s.S
> @@ -112,6 +112,16 @@ BEGIN_FTR_SECTION
>  
>       cmpwi   cr1,r13,2
>  
> +     /* Check if last bit of HSPGR0 is set. This indicates whether we are
> +      * waking up from winkle */
> +     li      r3,1
> +     mfspr   r4,SPRN_HSPRG0
> +     and     r5,r4,r3
> +     cmpwi   cr4,r5,1        /* Store result in cr4 for later use */
> +
> +     andc    r4,r4,r3
> +     mtspr   SPRN_HSPRG0,r4
> +

There is an open question here whether adding a beq cr4,+8 after the
cmpwi (or a +4 after the andc) is worthwhile. Can you check ? (either
measure or talk to HW folks). Also we could write directly to r13...

>       GET_PACA(r13)
>       lbz     r0,PACA_THREAD_IDLE_STATE(r13)
>       cmpwi   cr2,r0,PNV_THREAD_NAP
> diff --git a/arch/powerpc/kernel/idle_power7.S 
> b/arch/powerpc/kernel/idle_power7.S
> index c1d590f..78c30b0 100644
> --- a/arch/powerpc/kernel/idle_power7.S
> +++ b/arch/powerpc/kernel/idle_power7.S
> @@ -19,8 +19,22 @@
>  #include <asm/kvm_book3s_asm.h>
>  #include <asm/opal.h>
>  #include <asm/cpuidle.h>
> +#include <asm/mmu-hash64.h>
>  
>  #undef DEBUG
> +/*
> + * Use unused space in the interrupt stack to save and restore
> + * registers for winkle support.
> + */
> +#define _SDR1        GPR3
> +#define _RPR GPR4
> +#define _SPURR       GPR5
> +#define _PURR        GPR6
> +#define _TSCR        GPR7
> +#define _DSCR        GPR8
> +#define _AMOR        GPR9
> +#define _PMC5        GPR10
> +#define _PMC6        GPR11

WORT/WORTC need saving restoring

>  /* Idle state entry routines */
>  
> @@ -153,32 +167,60 @@ lwarx_loop1:
>       b       common_enter
>  
>  last_thread:
> -     LOAD_REG_ADDR(r3, pnv_need_fastsleep_workaround)
> -     lbz     r3,0(r3)
> -     cmpwi   r3,1
> -     bne     common_enter
>       /*
>        * Last thread of the core entering sleep. Last thread needs to execute
>        * the hardware bug workaround code. Before that, set the lock bit to
>        * avoid the race of other threads waking up and undoing workaround
>        * before workaround is applied.
>        */
> +     LOAD_REG_ADDR(r3, pnv_need_fastsleep_workaround)
> +     lbz     r3,0(r3)
> +     cmpwi   r3,1
> +     bne     common_enter
> +
>       ori     r15,r15,PNV_CORE_IDLE_LOCK_BIT
>       stwcx.  r15,0,r14
>       bne-    lwarx_loop1
>  
>       /* Fast sleep workaround */
> +     mfcr    r16     /* Backup CR to a non-volatile register */
>       li      r3,1
>       li      r4,1
>       li      r0,OPAL_CONFIG_CPU_IDLE_STATE
>       bl      opal_call_realmode
> +     mtcr    r16     /* Restore CR */

Why isn't the above already in the previous patch ? Also see my comment
about using a non-volatile CR instead.

>       /* Clear Lock bit */
>       andi.   r15,r15,PNV_CORE_IDLE_THREAD_BITS
>       stw     r15,0(r14)
>  
> -common_enter: /* common code for all the threads entering sleep */
> +common_enter: /* common code for all the threads entering sleep or winkle*/
> +     bgt     cr1,enter_winkle
>       IDLE_STATE_ENTER_SEQ(PPC_SLEEP)
> +enter_winkle:
> +     /*
> +      * Note all register i.e per-core, per-subcore or per-thread is saved
> +      * here since any thread in the core might wake up first
> +      */
> +     mfspr   r3,SPRN_SDR1
> +     std     r3,_SDR1(r1)
> +     mfspr   r3,SPRN_RPR
> +     std     r3,_RPR(r1)
> +     mfspr   r3,SPRN_SPURR
> +     std     r3,_SPURR(r1)
> +     mfspr   r3,SPRN_PURR
> +     std     r3,_PURR(r1)
> +     mfspr   r3,SPRN_TSCR
> +     std     r3,_TSCR(r1)
> +     mfspr   r3,SPRN_DSCR
> +     std     r3,_DSCR(r1)
> +     mfspr   r3,SPRN_AMOR
> +     std     r3,_AMOR(r1)
> +     mfspr   r3,SPRN_PMC5
> +     std     r3,_PMC5(r1)
> +     mfspr   r3,SPRN_PMC6
> +     std     r3,_PMC6(r1)
> +     IDLE_STATE_ENTER_SEQ(PPC_WINKLE)
>  
>  _GLOBAL(power7_idle)
>       /* Now check if user or arch enabled NAP mode */
> @@ -201,6 +243,12 @@ _GLOBAL(power7_sleep)
>       b       power7_powersave_common
>       /* No return */
>  
> +_GLOBAL(power7_winkle)
> +     li      r3,PNV_THREAD_WINKLE
> +     li      r4,1
> +     b       power7_powersave_common
> +     /* No return */
> +
>  #define CHECK_HMI_INTERRUPT                                          \
>       mfspr   r0,SPRN_SRR1;                                           \
>  BEGIN_FTR_SECTION_NESTED(66);                                                
> \
> @@ -250,22 +298,54 @@ lwarx_loop2:
>        */
>       bne     lwarx_loop2
>  
> -     cmpwi   cr2,r15,0
> +     cmpwi   cr2,r15,0       /* Check if first in core */
> +     lbz     r4,PACA_SUBCORE_SIBLING_MASK(r13)
> +     and     r4,r4,r15
> +     cmpwi   cr3,r4,0        /* Check if first in subcore */
> +
> +     /*
> +      * At this stage
> +      * cr1 - 01 if waking up from sleep or winkle
> +      * cr2 - 10 if first thread to wakeup in core
> +      * cr3 - 10 if first thread to wakeup in subcore
> +      * cr4 - 10 if waking up from winkle
> +      */
> +
>       or      r15,r15,r7              /* Set thread bit */
>  
> -     beq     cr2,first_thread
> +     beq     cr3,first_thread_in_subcore
>  
> -     /* Not first thread in core to wake up */
> +     /* Not first thread in subcore to wake up */
>       stwcx.  r15,0,r14
>       bne-    lwarx_loop2
>       b       common_exit
>  
> -first_thread:
> -     /* First thread in core to wakeup */
> +first_thread_in_subcore:
> +     /* First thread in subcore to wakeup set the lock bit */
>       ori     r15,r15,PNV_CORE_IDLE_LOCK_BIT
>       stwcx.  r15,0,r14
>       bne-    lwarx_loop2
>  
> +     /*
> +      * If waking up from sleep, subcore state is not lost. Hence
> +      * skip subcore state restore
> +      */
> +     bne     cr4,subcore_state_restored
> +
> +     /* Restore per-subcore state */
> +     ld      r4,_SDR1(r1)
> +     mtspr   SPRN_SDR1,r4
> +     ld      r4,_RPR(r1)
> +     mtspr   SPRN_RPR,r4
> +     ld      r4,_AMOR(r1)
> +     mtspr   SPRN_AMOR,r4
> +
> +subcore_state_restored:
> +     /* Check if the thread is also the first thread in the core. If not,
> +      * skip to clear_lock */
> +     bne     cr2,clear_lock
> +
> +first_thread_in_core:
>       LOAD_REG_ADDR(r3, pnv_need_fastsleep_workaround)
>       lbz     r3,0(r3)
>       cmpwi   r3,1
> @@ -280,21 +360,71 @@ first_thread:
>       bl      opal_call_realmode
>       mtcr    r16     /* Restore CR */
>  
> -     /* Do timebase resync if we are waking up from sleep. Use cr1 value
> -      * set in exceptions-64s.S */
> +timebase_resync:
> +     /* Do timebase resync only if the core truly woke up from
> +      * sleep/winkle */
>       ble     cr1,clear_lock
>  
> -timebase_resync:
>       /* Time base re-sync */
> +     mfcr    r16     /* Backup CR into a non-volatile register */
>       li      r0,OPAL_RESYNC_TIMEBASE
>       bl      opal_call_realmode;
>       /* TODO: Check r3 for failure */
> +     mtcr    r16     /* Restore CR */
> +
> +     /*
> +      * If waking up from sleep, per core state is not lost, skip to
> +      * clear_lock.
> +      */
> +     bne     cr4,clear_lock
> +
> +     /* Restore per core state */
> +     ld      r4,_TSCR(r1)
> +     mtspr   SPRN_TSCR,r4
>  
>  clear_lock:
>       andi.   r15,r15,PNV_CORE_IDLE_THREAD_BITS
>       stw     r15,0(r14)
>  
>  common_exit:
> +     /* Common to all threads
> +      *
> +      * If waking up from sleep, hypervisor state is not lost. Hence
> +      * skip hypervisor state restore.
> +      */
> +     bne     cr4,hypervisor_state_restored
> +
> +     /* Waking up from winkle */
> +
> +     /* Restore per thread state */
> +     bl      __restore_cpu_power8
> +
> +     /* Restore SLB  from PACA */
> +     ld      r8,PACA_SLBSHADOWPTR(r13)
> +
> +     .rept   SLB_NUM_BOLTED
> +     li      r3, SLBSHADOW_SAVEAREA
> +     LDX_BE  r5, r8, r3
> +     addi    r3, r3, 8
> +     LDX_BE  r6, r8, r3
> +     andis.  r7,r5,SLB_ESID_V@h
> +     beq     1f
> +     slbmte  r6,r5
> +1:   addi    r8,r8,16
> +     .endr
> +
> +     ld      r4,_SPURR(r1)
> +     mtspr   SPRN_SPURR,r4
> +     ld      r4,_PURR(r1)
> +     mtspr   SPRN_PURR,r4
> +     ld      r4,_DSCR(r1)
> +     mtspr   SPRN_DSCR,r4
> +     ld      r4,_PMC5(r1)
> +     mtspr   SPRN_PMC5,r4
> +     ld      r4,_PMC6(r1)
> +     mtspr   SPRN_PMC6,r4
> +
> +hypervisor_state_restored:
>       li      r5,PNV_THREAD_RUNNING
>       stb     r5,PACA_THREAD_IDLE_STATE(r13)
>  
> diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S 
> b/arch/powerpc/platforms/powernv/opal-wrappers.S
> index b2aa93b..e1e91e0 100644
> --- a/arch/powerpc/platforms/powernv/opal-wrappers.S
> +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
> @@ -191,6 +191,7 @@ return_from_opal_call:
>  #ifdef __LITTLE_ENDIAN__
>       FIXUP_ENDIAN
>  #endif
> +     ld      r2,PACATOC(r13)
>       ld      r12,_LINK(r1)
>       mtlr    r12
>       blr
> @@ -284,6 +285,7 @@ OPAL_CALL(opal_sensor_read,                       
> OPAL_SENSOR_READ);
>  OPAL_CALL(opal_get_param,                    OPAL_GET_PARAM);
>  OPAL_CALL(opal_set_param,                    OPAL_SET_PARAM);
>  OPAL_CALL(opal_handle_hmi,                   OPAL_HANDLE_HMI);
> +OPAL_CALL(opal_slw_set_reg,                  OPAL_SLW_SET_REG);
>  OPAL_CALL(opal_register_dump_region,         OPAL_REGISTER_DUMP_REGION);
>  OPAL_CALL(opal_unregister_dump_region,               
> OPAL_UNREGISTER_DUMP_REGION);
>  OPAL_CALL(opal_pci_set_phb_cxl_mode,         OPAL_PCI_SET_PHB_CXL_MODE);
> diff --git a/arch/powerpc/platforms/powernv/setup.c 
> b/arch/powerpc/platforms/powernv/setup.c
> index 17fb98c..4a886a1 100644
> --- a/arch/powerpc/platforms/powernv/setup.c
> +++ b/arch/powerpc/platforms/powernv/setup.c
> @@ -40,6 +40,7 @@
>  #include <asm/cpuidle.h>
>  
>  #include "powernv.h"
> +#include "subcore.h"
>  
>  static void __init pnv_setup_arch(void)
>  {
> @@ -293,6 +294,74 @@ static void __init pnv_setup_machdep_rtas(void)
>  #endif /* CONFIG_PPC_POWERNV_RTAS */
>  
>  static u32 supported_cpuidle_states;
> +int pnv_save_sprs_for_winkle(void)
> +{
> +     int cpu;
> +     int rc;
> +
> +     /*
> +      * hid0, hid1, hid4, hid5, hmeer and lpcr values are symmetric accross
> +      * all cpus at boot. Get these reg values of current cpu and use the
> +      * same accross all cpus.
> +      */
> +     uint64_t lpcr_val = mfspr(SPRN_LPCR);
> +     uint64_t hid0_val = mfspr(SPRN_HID0);
> +     uint64_t hid1_val = mfspr(SPRN_HID1);
> +     uint64_t hid4_val = mfspr(SPRN_HID4);
> +     uint64_t hid5_val = mfspr(SPRN_HID5);
> +     uint64_t hmeer_val = mfspr(SPRN_HMEER);
> +
> +     for_each_possible_cpu(cpu) {
> +             uint64_t pir = get_hard_smp_processor_id(cpu);
> +             uint64_t hsprg0_val = (uint64_t)&paca[cpu];
> +
> +             /*
> +              * HSPRG0 is used to store the cpu's pointer to paca. Hence last
> +              * 3 bits are guaranteed to be 0. Program slw to restore HSPRG0
> +              * with 63rd bit set, so that when a thread wakes up at 0x100 we
> +              * can use this bit to distinguish between fastsleep and
> +              * deep winkle.
> +              */
> +             hsprg0_val |= 1;
> +
> +             rc = opal_slw_set_reg(pir, SPRN_HSPRG0, hsprg0_val);
> +             if (rc != 0)
> +                     return rc;
> +
> +             rc = opal_slw_set_reg(pir, SPRN_LPCR, lpcr_val);
> +             if (rc != 0)
> +                     return rc;
> +
> +             /* HIDs are per core registers */
> +             if (cpu_thread_in_core(cpu) == 0) {
> +
> +                     rc = opal_slw_set_reg(pir, SPRN_HMEER, hmeer_val);
> +                     if (rc != 0)
> +                             return rc;
> +
> +                     rc = opal_slw_set_reg(pir, SPRN_HID0, hid0_val);
> +                     if (rc != 0)
> +                             return rc;
> +
> +                     rc = opal_slw_set_reg(pir, SPRN_HID1, hid1_val);
> +                     if (rc != 0)
> +                             return rc;
> +
> +                     rc = opal_slw_set_reg(pir, SPRN_HID4, hid4_val);
> +                     if (rc != 0)
> +                             return rc;
> +
> +                     rc = opal_slw_set_reg(pir, SPRN_HID5, hid5_val);
> +                     if (rc != 0)
> +                             return rc;
> +
> +             }
> +
> +     }
> +
> +     return 0;
> +
> +}
>  
>  static void pnv_alloc_idle_core_states(void)
>  {
> @@ -324,6 +393,10 @@ static void pnv_alloc_idle_core_states(void)
>  
>               }
>       }
> +     update_subcore_sibling_mask();
> +     if (supported_cpuidle_states & OPAL_PM_WINKLE_ENABLED)
> +             pnv_save_sprs_for_winkle();
> +
>  }
>  
>  u32 pnv_get_supported_cpuidle_states(void)
> diff --git a/arch/powerpc/platforms/powernv/smp.c 
> b/arch/powerpc/platforms/powernv/smp.c
> index 12b761a..5e35857 100644
> --- a/arch/powerpc/platforms/powernv/smp.c
> +++ b/arch/powerpc/platforms/powernv/smp.c
> @@ -167,7 +167,9 @@ static void pnv_smp_cpu_kill_self(void)
>       mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~(u64)LPCR_PECE1);
>       while (!generic_check_cpu_restart(cpu)) {
>               ppc64_runlatch_off();
> -             if ((idle_states & OPAL_PM_SLEEP_ENABLED) ||
> +             if (idle_states & OPAL_PM_WINKLE_ENABLED)
> +                     power7_winkle();
> +             else if ((idle_states & OPAL_PM_SLEEP_ENABLED) ||
>                               (idle_states & OPAL_PM_SLEEP_ENABLED_ER1))
>                       power7_sleep();
>               else
> diff --git a/arch/powerpc/platforms/powernv/subcore.c 
> b/arch/powerpc/platforms/powernv/subcore.c
> index c87f96b..f60f80a 100644
> --- a/arch/powerpc/platforms/powernv/subcore.c
> +++ b/arch/powerpc/platforms/powernv/subcore.c
> @@ -160,6 +160,18 @@ static void wait_for_sync_step(int step)
>       mb();
>  }
>  
> +static void update_hid_in_slw(u64 hid0)
> +{
> +     u64 idle_states = pnv_get_supported_cpuidle_states();
> +
> +     if (idle_states & OPAL_PM_WINKLE_ENABLED) {
> +             /* OPAL call to patch slw with the new HID0 value */
> +             u64 cpu_pir = hard_smp_processor_id();
> +
> +             opal_slw_set_reg(cpu_pir, SPRN_HID0, hid0);
> +     }
> +}
> +
>  static void unsplit_core(void)
>  {
>       u64 hid0, mask;
> @@ -179,6 +191,7 @@ static void unsplit_core(void)
>       hid0 = mfspr(SPRN_HID0);
>       hid0 &= ~HID0_POWER8_DYNLPARDIS;
>       mtspr(SPRN_HID0, hid0);
> +     update_hid_in_slw(hid0);
>  
>       while (mfspr(SPRN_HID0) & mask)
>               cpu_relax();
> @@ -215,6 +228,7 @@ static void split_core(int new_mode)
>       hid0  = mfspr(SPRN_HID0);
>       hid0 |= HID0_POWER8_DYNLPARDIS | split_parms[i].value;
>       mtspr(SPRN_HID0, hid0);
> +     update_hid_in_slw(hid0);
>  
>       /* Wait for it to happen */
>       while (!(mfspr(SPRN_HID0) & split_parms[i].mask))
> @@ -251,6 +265,25 @@ bool cpu_core_split_required(void)
>       return true;
>  }
>  
> +void update_subcore_sibling_mask(void)
> +{
> +     int cpu;
> +     /*
> +      * sibling mask for the first cpu. Left shift this by required bits
> +      * to get sibling mask for the rest of the cpus.
> +      */
> +     int sibling_mask_first_cpu =  (1 << threads_per_subcore) - 1;
> +
> +     for_each_possible_cpu(cpu) {
> +             int tid = cpu_thread_in_core(cpu);
> +             int offset = (tid / threads_per_subcore) * threads_per_subcore;
> +             int mask = sibling_mask_first_cpu << offset;
> +
> +             paca[cpu].subcore_sibling_mask = mask;
> +
> +     }
> +}
> +
>  static int cpu_update_split_mode(void *data)
>  {
>       int cpu, new_mode = *(int *)data;
> @@ -284,6 +317,7 @@ static int cpu_update_split_mode(void *data)
>               /* Make the new mode public */
>               subcores_per_core = new_mode;
>               threads_per_subcore = threads_per_core / subcores_per_core;
> +             update_subcore_sibling_mask();
>  
>               /* Make sure the new mode is written before we exit */
>               mb();
> diff --git a/arch/powerpc/platforms/powernv/subcore.h 
> b/arch/powerpc/platforms/powernv/subcore.h
> index 148abc9..604eb40 100644
> --- a/arch/powerpc/platforms/powernv/subcore.h
> +++ b/arch/powerpc/platforms/powernv/subcore.h
> @@ -15,4 +15,5 @@
>  
>  #ifndef __ASSEMBLY__
>  void split_core_secondary_loop(u8 *state);
> +extern void update_subcore_sibling_mask(void);
>  #endif


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to