On Friday 28 November 2014 06:26 AM, Benjamin Herrenschmidt wrote:
> On Thu, 2014-11-27 at 17:48 +0530, Madhavan Srinivasan wrote:
>> This patch create the infrastructure to handle the CR based 
>> local_* atomic operations. Local atomic operations are fast 
>> and highly reentrant per CPU counters.  Used for percpu 
>> variable updates. Local atomic operations only guarantee 
>> variable modification atomicity wrt the CPU which owns the
>> data and these needs to be executed in a preemption safe way. 
>>
>> Here is the design of this patch. Since local_* operations 
>> are only need to be atomic to interrupts (IIUC), patch uses 
>> one of the Condition Register (CR) fields as a flag variable. When 
>> entering the local_*, specific bit in the CR5 field is set
>> and on exit, bit is cleared. CR bit checking is done in the
>> interrupt return path. If CR5[EQ] bit set and if we return 
>> to kernel, we reset to start of local_* operation.
>>
>> Reason for this approach is that, currently l[w/d]arx/st[w/d]cx.
>> instruction pair is used for local_* operations, which are heavy 
>> on cycle count and they dont support a local variant. So to 
>> see whether the new implementation helps, used a modified 
>> version of Rusty's benchmark code on local_t.   
>>
>> https://lkml.org/lkml/2008/12/16/450
>>
>> Modifications: 
>>  - increated the working set size from 1MB to 8MB,
>>  - removed cpu_local_inc test.
>>
>> Test ran 
>>     - on POWER8 1S Scale out System 2.0GHz
>>     - on OPAL v3 with v3.18-rc4 patch kernel as Host
>>
>> Here are the values with the patch.
>>
>> Time in ns per iteration
>>
>>              inc     add     read    add_return
>> atomic_long  67      67      18      69
>> irqsave/rest 39      39      23      39
>> trivalue     39      39      29      49
>> local_t              26      26      24      26
>>
>> Since CR5 is used as a flag, have added CFLAGS to avoid CR5 
>> for the kernel compilation and CR5 is zeroed at the kernel
>> entry.  
>>
>> Tested the patch in a 
>>  - pSeries LPAR, 
>>  - Host with patched/unmodified guest kernel 
>>
>> To check whether userspace see any CR5 corruption, ran a simple
>> test which does,
>>  - set CR5 field,
>>  - while(1)
>>    - sleep or gettimeofday
>>    - chk bit set
>>
>> Signed-off-by: Madhavan Srinivasan <ma...@linux.vnet.ibm.com>
>> ---
>> - I really appreciate feedback on the patchset.
>> - Kindly comment if I should try with any other benchmark or
>>     workload to check the numbers.
>> - Also, kindly recommand any know stress test for CR
>>
>>  Makefile                                 |   6 ++
>>  arch/powerpc/include/asm/exception-64s.h |  21 +++++-
>>  arch/powerpc/kernel/entry_64.S           | 106 
>> ++++++++++++++++++++++++++++++-
>>  arch/powerpc/kernel/exceptions-64s.S     |   2 +-
>>  arch/powerpc/kernel/head_64.S            |   8 +++
>>  5 files changed, 138 insertions(+), 5 deletions(-)
>>
>> diff --git a/Makefile b/Makefile
>> index 00d618b..2e271ad 100644
>> --- a/Makefile
>> +++ b/Makefile
>> @@ -706,6 +706,12 @@ endif
>>  
>>  KBUILD_CFLAGS   += $(call cc-option, -fno-var-tracking-assignments)
>>  
>> +ifdef       CONFIG_PPC64
>> +# We need this flag to force compiler not to use CR5, since
>> +# local_t type code is based on this.
>> +KBUILD_CFLAGS   += -ffixed-cr5
>> +endif
>> +
>>  ifdef CONFIG_DEBUG_INFO
>>  ifdef CONFIG_DEBUG_INFO_SPLIT
>>  KBUILD_CFLAGS   += $(call cc-option, -gsplit-dwarf, -g)
>> diff --git a/arch/powerpc/include/asm/exception-64s.h 
>> b/arch/powerpc/include/asm/exception-64s.h
>> index 77f52b2..c42919a 100644
>> --- a/arch/powerpc/include/asm/exception-64s.h
>> +++ b/arch/powerpc/include/asm/exception-64s.h
>> @@ -306,7 +306,26 @@ do_kvm_##n:                                             
>>                 \
>>      std     r10,0(r1);              /* make stack chain pointer     */ \
>>      std     r0,GPR0(r1);            /* save r0 in stackframe        */ \
>>      std     r10,GPR1(r1);           /* save r1 in stackframe        */ \
>> -    beq     4f;                     /* if from kernel mode          */ \
>> +BEGIN_FTR_SECTION;                                                     \
>> +    lis     r9,4096;                /* Create a mask with HV and PR */ \
>> +    rldicr  r9,r9,32,31;            /* bits, AND with the MSR       */ \
>> +    mr      r10,r9;                 /* to check for Hyp state       */ \
>> +    ori     r9,r9,16384;                                               \
>> +    and     r9,r12,r9;                                                 \
>> +    cmpd    cr3,r10,r9;                                                     
>>    \
>> +    beq     cr3,66f;                /* Jump if we come from Hyp mode*/ \
>> +    mtcrf   0x04,r10;               /* Clear CR5 if coming from usr */ \
>> +FTR_SECTION_ELSE;                                                      \
> 
> Can't we just unconditionally clear at as long as we do that after we've
> saved it ? In that case, it's just a matter for the fixup code to check
> the saved version rather than the actual CR..
> 
I use CR bit setting in the interrupt return path to enter the fixup
section search. If we unconditionally clear it, we will have to enter
the fixup section for every kernel return nip right?

Regards
Maddy

>> +    beq     4f;                     /* if kernel mode branch        */ \
>> +    li      r10,0;                  /* Clear CR5 incase of coming   */ \
>> +    mtcrf   0x04,r10;               /* from user.                   */ \
>> +    nop;                            /* This part of code is for     */ \
>> +    nop;                            /* kernel with MSR[HV]=0,       */ \
>> +    nop;                            /* MSR[PR]=0, so just chk for   */ \
>> +    nop;                            /* MSR[PR]                      */ \
>> +    nop;                                                               \
>> +ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE);                             \
>> +66: beq     4f;                     /* if from kernel mode          */ \
>>      ACCOUNT_CPU_USER_ENTRY(r9, r10);                                   \
>>      SAVE_PPR(area, r9, r10);                                           \
>>  4:  EXCEPTION_PROLOG_COMMON_2(area)                                    \
>> diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
>> index 0905c8d..e42bb99 100644
>> --- a/arch/powerpc/kernel/entry_64.S
>> +++ b/arch/powerpc/kernel/entry_64.S
>> @@ -68,7 +68,26 @@ system_call_common:
>>  2:  std     r2,GPR2(r1)
>>      std     r3,GPR3(r1)
>>      mfcr    r2
>> -    std     r4,GPR4(r1)
>> +BEGIN_FTR_SECTION
>> +    lis     r10,4096
>> +    rldicr  r10,r10,32,31
>> +    mr      r11,r10
>> +    ori     r10,r10,16384
>> +    and     r10,r12,r10
>> +    cmpd    r11,r10
>> +    beq     67f
>> +    mtcrf   0x04,r11
>> +FTR_SECTION_ELSE
>> +    beq     67f
>> +    li      r11,0
>> +    mtcrf   0x04,r11
>> +    nop
>> +    nop
>> +    nop
>> +    nop
>> +    nop
>> +ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
>> +67: std     r4,GPR4(r1)
>>      std     r5,GPR5(r1)
>>      std     r6,GPR6(r1)
>>      std     r7,GPR7(r1)
>> @@ -224,8 +243,26 @@ syscall_exit:
>>  BEGIN_FTR_SECTION
>>      stdcx.  r0,0,r1                 /* to clear the reservation */
>>  END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
>> +BEGIN_FTR_SECTION
>> +    lis     r4,4096
>> +    rldicr  r4,r4,32,31
>> +    mr      r6,r4
>> +    ori     r4,r4,16384
>> +    and     r4,r8,r4
>> +    cmpd    cr3,r6,r4
>> +    beq     cr3,65f
>> +    mtcr    r5
>> +FTR_SECTION_ELSE
>>      andi.   r6,r8,MSR_PR
>> -    ld      r4,_LINK(r1)
>> +    beq     65f
>> +    mtcr    r5
>> +    nop
>> +    nop
>> +    nop
>> +    nop
>> +    nop
>> +ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
>> +65: ld      r4,_LINK(r1)
>>  
>>      beq-    1f
>>      ACCOUNT_CPU_USER_EXIT(r11, r12)
>> @@ -234,7 +271,11 @@ END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
>>  1:  ld      r2,GPR2(r1)
>>      ld      r1,GPR1(r1)
>>      mtlr    r4
>> +#ifdef      CONFIG_PPC64
>> +    mtcrf   0xFB,r5
>> +#else
>>      mtcr    r5
>> +#endif
>>      mtspr   SPRN_SRR0,r7
>>      mtspr   SPRN_SRR1,r8
>>      RFI
>> @@ -804,7 +845,66 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
>>       */
>>      .globl  fast_exception_return
>>  fast_exception_return:
>> -    ld      r3,_MSR(r1)
>> +
>> +    /*
>> +     * Now that we are about to exit from interrupt, lets check for
>> +     * cr5 eq bit. If it is set, then we may be in the middle of
>> +     * local_t update. In this case, we should rewind the NIP
>> +     * accordingly.
>> +     */
>> +    mfcr    r3
>> +    andi.   r4,r3,0x200
>> +    beq     63f
>> +
>> +    /*
>> +     * Now that the bit is set, lets check for return to User
>> +     */
>> +    ld      r4,_MSR(r1)
>> +BEGIN_FTR_SECTION
>> +    li      r3,4096
>> +    rldicr  r3,r3,32,31
>> +    mr      r5,r3
>> +    ori     r3,r3,16384
>> +    and     r3,r4,r3
>> +    cmpd    r5,r3
>> +    bne     63f
>> +FTR_SECTION_ELSE
>> +    andi.   r3,r4,MSR_PR
>> +    bne     63f
>> +    nop
>> +    nop
>> +    nop
>> +    nop
>> +    nop
>> +ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
>> +
>> +    /*
>> +     * Looks like we are returning to Kernel, so
>> +     * lets get the NIP and search the ex_table.
>> +     * Change the NIP based on the return value
>> +     */
>> +lookup_ex_table:
>> +    ld      r3,_NIP(r1)
>> +    bl      search_exception_tables 
>> +    cmpli   0,1,r3,0
>> +    bne     62f
>> +
>> +    /*
>> +     * This is a panic case. Reason is that, we
>> +     * have the CR5 bit set, but we are not in
>> +     * local_* code and we are returning to Kernel.
>> +     */
>> +    ld      r3,_NIP(r1)
>> +    mfcr    r4
>> +    EMIT_BUG_ENTRY lookup_ex_table, __FILE__,__LINE__,BUGFLAG_WARNING
>> +
>> +    /*
>> +     * Now save the return fixup address as NIP
>> +     */
>> +62: ld      r4,8(r3)
>> +    std     r4,_NIP(r1)
>> +    crclr   22
>> +63: ld      r3,_MSR(r1)
>>      ld      r4,_CTR(r1)
>>      ld      r0,_LINK(r1)
>>      mtctr   r4
>> diff --git a/arch/powerpc/kernel/exceptions-64s.S 
>> b/arch/powerpc/kernel/exceptions-64s.S
>> index 72e783e..edb75a9 100644
>> --- a/arch/powerpc/kernel/exceptions-64s.S
>> +++ b/arch/powerpc/kernel/exceptions-64s.S
>> @@ -637,7 +637,7 @@ masked_##_H##interrupt:                                  
>> \
>>      rldicl  r10,r10,48,1; /* clear MSR_EE */        \
>>      rotldi  r10,r10,16;                             \
>>      mtspr   SPRN_##_H##SRR1,r10;                    \
>> -2:  mtcrf   0x80,r9;                                \
>> +2:  mtcrf   0x90,r9;                                \
>>      ld      r9,PACA_EXGEN+EX_R9(r13);               \
>>      ld      r10,PACA_EXGEN+EX_R10(r13);             \
>>      ld      r11,PACA_EXGEN+EX_R11(r13);             \
>> diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
>> index d48125d..02e49b3 100644
>> --- a/arch/powerpc/kernel/head_64.S
>> +++ b/arch/powerpc/kernel/head_64.S
>> @@ -347,6 +347,14 @@ __mmu_off:
>>   *
>>   */
>>  __start_initialization_multiplatform:
>> +
>> +    /*
>> +     * Before we do anything, lets clear CR5 field,
>> +     * so that we will have a clean start at entry
>> +     */
>> +    li      r11,0
>> +    mtcrf   0x04,r11
>> +
>>      /* Make sure we are running in 64 bits mode */
>>      bl      enable_64b_mode
>>  
> 
> 

_______________________________________________
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Reply via email to