[offlist]

On Thu, 2016-21-01 at 00:55:41 UTC, Cyril Bur wrote:
> Test that the non volatile floating point and Altivec registers get
> correctly preserved across the fork() syscall.
> 
> fork() works nicely for this purpose, the registers should be the same for
> both parent and child

...

> diff --git a/tools/testing/selftests/powerpc/basic_asm.h 
> b/tools/testing/selftests/powerpc/basic_asm.h
> new file mode 100644
> index 0000000..27aca79
> --- /dev/null
> +++ b/tools/testing/selftests/powerpc/basic_asm.h
> @@ -0,0 +1,26 @@
> +#include <ppc-asm.h>
> +#include <asm/unistd.h>
> +
> +#define LOAD_REG_IMMEDIATE(reg,expr) \
> +     lis     reg,(expr)@highest;     \
> +     ori     reg,reg,(expr)@higher;  \
> +     rldicr  reg,reg,32,31;  \
> +     oris    reg,reg,(expr)@high;    \
> +     ori     reg,reg,(expr)@l;
> +
> +#define PUSH_BASIC_STACK(size) \
> +     std     2,24(sp); \
> +     mflr    r0; \
> +     std     r0,16(sp); \
> +     mfcr    r0; \
> +     stw     r0,8(sp); \
> +     stdu    sp,-size(sp);

Unless I'm completely misreading this, that's not ABI compliant.

You are supposed to save LR into your caller's stack frame, but the TOC and CR
go into *your* stack frame.

I know that's not super obvious from the ABI doc.

See for example this code generated by GCC:

extern int global_thing;

int foo(int x, int (*fp)(int x))
{
        int a, b, c;

        a = x;
        b = a + global_thing;
        c = b + 2;

        a = fp(x);

        printf("%d %d %d\n", a, b, c);

        return a;
}

0000000000000000 <foo>:
   0:   00 00 4c 3c     addis   r2,r12,0
                        0: R_PPC64_REL16_HA     .TOC.
   4:   00 00 42 38     addi    r2,r2,0
                        4: R_PPC64_REL16_LO     .TOC.+0x4
   8:   a6 02 08 7c     mflr    r0
   c:   f0 ff c1 fb     std     r30,-16(r1)     <- save r30 into (what will be) 
our stack frame
  10:   f8 ff e1 fb     std     r31,-8(r1)      <- save r31 into (what will be) 
our stack frame
  14:   78 23 8c 7c     mr      r12,r4          <- put fp into r12 because 
we'll be calling its global entry point
  18:   a6 03 89 7c     mtctr   r4              <- put fp into CTR
  1c:   00 00 22 3d     addis   r9,r2,0
                        1c: R_PPC64_TOC16_HA    .toc
  20:   00 00 29 e9     ld      r9,0(r9)
                        20: R_PPC64_TOC16_LO_DS .toc
  24:   10 00 01 f8     std     r0,16(r1)       <- save r0 (LR) into *our 
caller's* stack frame
  28:   91 ff 21 f8     stdu    r1,-112(r1)     <- create our stack frame & 
store the backchain pointer
  2c:   18 00 41 f8     std     r2,24(r1)       <- save r2 (TOC) into *our* 
stack frame
  30:   00 00 e9 83     lwz     r31,0(r9)
  34:   14 1a ff 7f     add     r31,r31,r3
  38:   21 04 80 4e     bctrl                   <- call fp, it will get it's 
TOC pointer using r12
  3c:   18 00 41 e8     ld      r2,24(r1)       <- restore our r2 (TOC) from 
*our* stack frame
  40:   02 00 ff 38     addi    r7,r31,2
  44:   b4 07 e6 7f     extsw   r6,r31
  48:   b4 07 e7 7c     extsw   r7,r7
  4c:   78 1b 7e 7c     mr      r30,r3
  50:   00 00 82 3c     addis   r4,r2,0
                        50: R_PPC64_TOC16_HA    .rodata.str1.8
  54:   78 f3 c5 7f     mr      r5,r30
  58:   00 00 84 38     addi    r4,r4,0
                        58: R_PPC64_TOC16_LO    .rodata.str1.8
  5c:   01 00 60 38     li      r3,1
  60:   01 00 00 48     bl      60 <foo+0x60>
                        60: R_PPC64_REL24       __printf_chk
  64:   00 00 00 60     nop
  68:   70 00 21 38     addi    r1,r1,112       <- pop our stack frame
  6c:   78 f3 c3 7f     mr      r3,r30
  70:   10 00 01 e8     ld      r0,16(r1)       <- restore r0 (LR) from our 
caller's stack frame
  74:   f0 ff c1 eb     ld      r30,-16(r1)     <- restore r30 from (what was) 
our stack frame
  78:   f8 ff e1 eb     ld      r31,-8(r1)      <- restore r31 from (what was) 
our stack frame
  7c:   a6 03 08 7c     mtlr    r0              <- restore LR
  80:   20 00 80 4e     blr                     <- return



So your macro wants to be more like:

#define PUSH_BASIC_STACK(_size) \
        mflr    r0;             \
        std     r0,16(r1);      \
        stdu    r1,-_size(r1);  \
        mfcr    r0;             \
        stw     r0,8(r1);       \
        std     r2,24(r1);

Though untested.

Also you're relying on the caller to ensure size is big enough. So maybe it'd
be better if you ensured that, making the size parameter the *extra* size above
the minimum stack frame. eg:


#define PUSH_BASIC_STACK(_extra)        \
        mflr    r0;                     \
        std     r0,16(r1);              \
        stdu    r1,-(_extra + 32)(r1);  \
        mfcr    r0;                     \
        stw     r0,8(r1);               \
        std     r2,24(r1);


cheers
_______________________________________________
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Reply via email to