Hi Shreyas,

On Tue, May 03, 2016 at 01:54:36PM +0530, Shreyas B. Prabhu wrote:
> POWER ISA v3 defines a new idle processor core mechanism. In summary,
>  a) new instruction named stop is added. This instruction replaces
>       instructions like nap, sleep, rvwinkle.
>  b) new per thread SPR named PSSCR is added which controls the behavior
>       of stop instruction.
> 
> PSSCR has following key fields
>       Bits 0:3  - Power-Saving Level Status. This field indicates the lowest
>       power-saving state the thread entered since stop instruction was last
>       executed.
> 
>       Bit 42 - Enable State Loss
>       0 - No state is lost irrespective of other fields
>       1 - Allows state loss
> 
>       Bits 44:47 - Power-Saving Level Limit
>       This limits the power-saving level that can be entered into.
> 
>       Bits 60:63 - Requested Level
>       Used to specify which power-saving level must be entered on executing
>       stop instruction
> 
> This patch adds support for stop instruction and PSSCR handling.
> 
> Signed-off-by: Shreyas B. Prabhu <shre...@linux.vnet.ibm.com>

[..snip..]

> diff --git a/arch/powerpc/kernel/idle_power7.S 
> b/arch/powerpc/kernel/idle_power7.S
> index 6a24769..d85f834 100644
> --- a/arch/powerpc/kernel/idle_power7.S
> +++ b/arch/powerpc/kernel/idle_power7.S
> @@ -46,7 +46,7 @@ core_idle_lock_held:
>  power7_enter_nap_mode:
>  #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
>       /* Tell KVM we're napping */
> -     li      r4,KVM_HWTHREAD_IN_NAP
> +     li      r4,KVM_HWTHREAD_IN_IDLE
>       stb     r4,HSTATE_HWTHREAD_STATE(r13)
>  #endif
>       stb     r3,PACA_THREAD_IDLE_STATE(r13)
> diff --git a/arch/powerpc/kernel/idle_power_common.S 
> b/arch/powerpc/kernel/idle_power_common.S
> index ff7a541..f260fa8 100644
> --- a/arch/powerpc/kernel/idle_power_common.S
> +++ b/arch/powerpc/kernel/idle_power_common.S
> @@ -96,11 +96,35 @@ _GLOBAL(power_powersave_common)
>   * back to reset vector.
>   */
>  _GLOBAL(power7_restore_hyp_resource)
> +     GET_PACA(r13)
> +BEGIN_FTR_SECTION_NESTED(888)
> +     /*
> +      * POWER ISA 3. Use PSSCR to determine if we
> +      * are waking up from deep idle state
> +      */
> +     LOAD_REG_ADDRBASE(r5,pnv_first_deep_stop_state)
> +     ld      r4,ADDROFF(pnv_first_deep_stop_state)(r5)
> +
> +     mfspr   r5,SPRN_PSSCR
> +     /*
> +      * 0-4 bits correspond to Power-Saving Level Status
> +      * which indicates the idle state we are waking up from
> +      */
> +     rldicl  r5,r5,4,60
> +     cmpd    r5,r4
> +     bge     power_stop_wakeup_hyp_loss
>       /*
> +      * Waking up without hypervisor state loss. Return to
> +      * reset vector
> +      */
> +     blr
> +
> +END_FTR_SECTION_NESTED(CPU_FTR_ARCH_300,CPU_FTR_ARCH_300,888)
> +     /*
> +      * POWER ISA 2.07 or less.
>        * Check if last bit of HSPGR0 is set. This indicates whether we are
>        * waking up from winkle.
>        */
> -     GET_PACA(r13)
>       clrldi  r5,r13,63
>       clrrdi  r13,r13,1
>       cmpwi   cr4,r5,1
> diff --git a/arch/powerpc/kernel/idle_power_stop.S 
> b/arch/powerpc/kernel/idle_power_stop.S
> new file mode 100644
> index 0000000..6c86c56
> --- /dev/null
> +++ b/arch/powerpc/kernel/idle_power_stop.S
> @@ -0,0 +1,221 @@
> +#include <linux/threads.h>
> +
> +#include <asm/processor.h>
> +#include <asm/cputable.h>
> +#include <asm/thread_info.h>
> +#include <asm/ppc_asm.h>
> +#include <asm/asm-offsets.h>
> +#include <asm/ppc-opcode.h>
> +#include <asm/hw_irq.h>
> +#include <asm/kvm_book3s_asm.h>
> +#include <asm/opal.h>
> +#include <asm/cpuidle.h>
> +#include <asm/book3s/64/mmu-hash.h>
> +#include <asm/exception-64s.h>
> +
> +#undef DEBUG
> +
> +/*
> + * rA - Requested stop state
> + * rB - Spare reg that can be used
> + */
> +#define PSSCR_REQUEST_STATE(rA, rB)          \
> +     ld      rB, PACA_THREAD_PSSCR(r13);     \
> +     or      rB,rB,rA;                       \
> +     mtspr   SPRN_PSSCR, rB;                 \
> +
> +     .text
> +
> +     .globl  power_enter_stop
> +power_enter_stop:
> +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
> +     /* Tell KVM we're napping */
> +     li      r4,KVM_HWTHREAD_IN_IDLE
> +     stb     r4,HSTATE_HWTHREAD_STATE(r13)
> +#endif
> +     LOAD_REG_ADDRBASE(r5,pnv_first_deep_stop_state)
> +     ld      r4,ADDROFF(pnv_first_deep_stop_state)(r5)
> +     cmpd    cr3,r3,r4

It is not clear what r3 is supposed to contain at this point. I think
it should contain the requested stop state. But I might be wrong!
Perhaps a comment above power_enter_stop can clarify that.

> +     bge     2f
> +     IDLE_STATE_ENTER_SEQ(PPC_STOP)
> +2:
> +     lbz     r7,PACA_THREAD_MASK(r13)
> +     ld      r14,PACA_CORE_IDLE_STATE_PTR(r13)
> +
> +lwarx_loop1:
> +     lwarx   r15,0,r14
> +     andi.   r9,r15,PNV_CORE_IDLE_LOCK_BIT
> +     bnel    core_idle_lock_held

The definition of core_idle_lock_held below jumps to lwarx_loop2
instead of doing a blr once it observed that the LOCK_BIT is no longer
set. This doesn't seem correct since the purpose of
core_idle_lock_held is to spin until the LOCK_BIT is cleared and then
resume whatever we were supposed to do next.

Can you clarify this part ?

> +     andc    r15,r15,r7                      /* Clear thread bit */
> +
> +     andi.   r15,r15,PNV_CORE_IDLE_THREAD_BITS
> +     stwcx.  r15,0,r14
> +     bne-    lwarx_loop1
> +
> +     /*
> +      * Note all register i.e per-core, per-subcore or per-thread is saved
> +      * here since any thread in the core might wake up first
> +      */
> +     mfspr   r3,SPRN_RPR
> +     std     r3,_RPR(r1)
> +     mfspr   r3,SPRN_SPURR
> +     std     r3,_SPURR(r1)
> +     mfspr   r3,SPRN_PURR
> +     std     r3,_PURR(r1)
> +     mfspr   r3,SPRN_TSCR
> +     std     r3,_TSCR(r1)
> +     mfspr   r3,SPRN_DSCR
> +     std     r3,_DSCR(r1)
> +     mfspr   r3,SPRN_AMOR
> +     std     r3,_AMOR(r1)
> +
> +     IDLE_STATE_ENTER_SEQ(PPC_STOP)
> +
> +
> +_GLOBAL(power_stop)
> +     PSSCR_REQUEST_STATE(r3,r4)
> +     li      r4, 1
> +     LOAD_REG_ADDR(r5,power_enter_stop)
> +     b       power_powersave_common
> +
> +_GLOBAL(power_stop0)
> +     li      r3,0
> +     li      r4,1
> +     LOAD_REG_ADDR(r5,power_enter_stop)
> +     PSSCR_REQUEST_STATE(r3,r4)

r4 will get clobbered at this point. Move PSSCR_REQUEST_STATE before
"li r4,1". 

Also why cant this simply call "power_stop" having set r3
to 0 ?


> +     b       power_powersave_common
> +
> +_GLOBAL(power_stop_wakeup_hyp_loss)
> +     ld      r2,PACATOC(r13);
> +     ld      r1,PACAR1(r13)
> +     /*
> +      * Before entering any idle state, the NVGPRs are saved in the stack
> +      * and they are restored before switching to the process context. Hence
> +      * until they are restored, they are free to be used.
> +      *
> +      * Save SRR1 in a NVGPR as it might be clobbered in opal_call_realmode
> +      * (called in CHECK_HMI_INTERRUPT). SRR1 is required to determine the
> +      * wakeup reason if we branch to kvm_start_guest.
> +      */

Retain the comment from an earlier patch explaning why LR is being
cached in r17.

> +     mflr    r17
> +     mfspr   r16,SPRN_SRR1
> +BEGIN_FTR_SECTION
> +     CHECK_HMI_INTERRUPT
> +END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
> +
> +     lbz     r7,PACA_THREAD_MASK(r13)
> +     ld      r14,PACA_CORE_IDLE_STATE_PTR(r13)
> +lwarx_loop2:
> +     lwarx   r15,0,r14
> +     andi.   r9,r15,PNV_CORE_IDLE_LOCK_BIT
> +     /*
> +      * Lock bit is set in one of the 2 cases-
> +      * a. In the stop enter path, the last thread is executing
> +      * fastsleep workaround code.
> +      * b. In the wake up path, another thread is resyncing timebase or
> +      * restoring context
> +      * In either case loop until the lock bit is cleared.
> +      */
> +     bne     core_idle_lock_held
> +
> +     cmpwi   cr2,r15,0
> +     lbz     r4,PACA_SUBCORE_SIBLING_MASK(r13)
> +     and     r4,r4,r15
> +     cmpwi   cr1,r4,0        /* Check if first in subcore */
> +
> +     or      r15,r15,r7              /* Set thread bit */
> +
> +     beq     cr1,first_thread_in_subcore
> +
> +     /* Not first thread in subcore to wake up */
> +     stwcx.  r15,0,r14
> +     bne-    lwarx_loop2
> +     isync
> +     b       common_exit

The code from lwarx_loop2 till the end of the definition of
common_exit is the same as the lwarx_loop2 to common_exit in
idle_power7.S. Well, except for a minor bit in the manner in which
return from core_idle_lock_held is handled and the fact that we're not
defining pnv_fastsleep_workaround_at_exit immediately in
first_thread_in_core. I prefer the original version where
core_idle_lock_held does a blr instead of explicitly jumping back to
lwarx_loop2 since it can be invoked safely from multiple places.

Can we move this to a common place and invoke it from these two places
instead of duplicating the code ?

> +
> +core_idle_lock_held:
> +     HMT_LOW
> +core_idle_lock_loop:
> +     lwz     r15,0(14)
> +     andi.   r9,r15,PNV_CORE_IDLE_LOCK_BIT
> +     bne     core_idle_lock_loop
> +     HMT_MEDIUM
> +     b       lwarx_loop2
> +
> +first_thread_in_subcore:
> +     /* First thread in subcore to wakeup */
> +     ori     r15,r15,PNV_CORE_IDLE_LOCK_BIT
> +     stwcx.  r15,0,r14
> +     bne-    lwarx_loop2
> +     isync
> +
> +     /*
> +      * If waking up from sleep, subcore state is not lost. Hence
> +      * skip subcore state restore
> +      */
> +     bne     cr4,subcore_state_restored
> +
> +     /* Restore per-subcore state */
> +     ld      r4,_RPR(r1)
> +     mtspr   SPRN_RPR,r4
> +     ld      r4,_AMOR(r1)
> +     mtspr   SPRN_AMOR,r4
> +
> +subcore_state_restored:
> +     /*
> +      * Check if the thread is also the first thread in the core. If not,
> +      * skip to clear_lock.
> +      */
> +     bne     cr2,clear_lock
> +
> +first_thread_in_core:

I suppose we don't need the pnv_fastsleep_workaround_at_exit at this
point anymore.

> +
> +timebase_resync:
> +     /* Do timebase resync if we are waking up from sleep. Use cr3 value
> +      * set in exceptions-64s.S */
> +     ble     cr3,clear_lock
> +     /* Time base re-sync */
> +     li      r0,OPAL_RESYNC_TIMEBASE
> +     bl      opal_call_realmode;
> +
> +     /*
> +      * If waking up from sleep, per core state is not lost, skip to
> +      * clear_lock.
> +      */
> +     bne     cr4,clear_lock
> +
> +     /* Restore per core state */
> +     ld      r4,_TSCR(r1)
> +     mtspr   SPRN_TSCR,r4
> +
> +clear_lock:
> +     andi.   r15,r15,PNV_CORE_IDLE_THREAD_BITS
> +     lwsync
> +     stw     r15,0(r14)
> +
> +common_exit:
> +     /*
> +      * Common to all threads.
> +      *
> +      * If waking up from sleep, hypervisor state is not lost. Hence
> +      * skip hypervisor state restore.
> +      */
> +     bne     cr4,hypervisor_state_restored
> +
> +     /* Waking up from deep idle state */
> +
> +     /* Restore per thread state */
> +     bl      __restore_cpu_power8
> +
> +     ld      r4,_SPURR(r1)
> +     mtspr   SPRN_SPURR,r4
> +     ld      r4,_PURR(r1)
> +     mtspr   SPRN_PURR,r4
> +     ld      r4,_DSCR(r1)
> +     mtspr   SPRN_DSCR,r4
> +
> +hypervisor_state_restored:
> +
> +     mtspr   SPRN_SRR1,r16
> +     mtlr    r17
> +     blr

[..snip..]

> @@ -264,6 +275,30 @@ static int __init pnv_init_idle_states(void)
>               goto out_free;
>       }
> 
> +     if (cpu_has_feature(CPU_FTR_ARCH_300)) {
> +             psscr_val = kcalloc(dt_idle_states, sizeof(*psscr_val),
> +                                     GFP_KERNEL);

Need to handle the case whe the kcalloc fails to allocate memory for
psscr_val here.

> +             if (of_property_read_u64_array(power_mgt,
> +                     "ibm,cpu-idle-state-psscr",
> +                     psscr_val, dt_idle_states)) {
> +                     pr_warn("cpuidle-powernv: missing 
> ibm,cpu-idle-states-psscr in DT\n");
> +                     goto out_free_psscr;
> +             }

The remainder of the patch looks ok.

--
Thanks and Regards
gautham.

Reply via email to