On Sun, Dec 4, 2022 at 12:51 PM Iain Sandoe <iains....@gmail.com> wrote: > > This is almost a completely Darwin-local patch, but there is one (repeated) > place where a general change is needed - which is in making xmm_regs and > x87_regs extern in the three copies of args.h (this is consistent with the > other saved vars). These fails represent most of the current testsuite noise > on x86 Darwin. > > tested on x86-64 Darwin and Linux. > > OK for master? > Iain > > -- >8 -- > > These tests have failed since introduction since they assume that the > assembler output is ELF and that the ABI targeted supports the addressing. > > For Darwin, Mach-O and ABI we need to make several changes: > 1. Use the __USER_LABEL__PREFIX__ > 2. Remove the use of ELF-specific constructs (.size, .type etc.) > 3. We cannot make direct access to common variables in the ABI, so that we > must move these to BSS. > > Since that set is quite significant, I elected to make a separate source > section for Darwin. This is introduced by #elif defined(__APPLE__) because > __MACH__ is also used by HURD. > > There are potentially other X86 targets (e.g. XCOFF) that could have yet > more changes, so I added a catchall section that #errors if the object format > is neither ELF or Mach-O. > > Signed-off-by: Iain Sandoe <i...@sandoe.co.uk> > > gcc/testsuite/ChangeLog: > > * gcc.target/x86_64/abi/bf16/args.h: > * gcc.target/x86_64/abi/bf16/asm-support.S: > * gcc.target/x86_64/abi/bf16/m256bf16/args.h: > * gcc.target/x86_64/abi/bf16/m256bf16/asm-support.S: > * gcc.target/x86_64/abi/bf16/m512bf16/args.h: > * gcc.target/x86_64/abi/bf16/m512bf16/asm-support.S:
Missing descriptions in ChangeLog entry. Uros. > --- > .../gcc.target/x86_64/abi/bf16/args.h | 4 +- > .../gcc.target/x86_64/abi/bf16/asm-support.S | 105 +++++++++++++++ > .../x86_64/abi/bf16/m256bf16/args.h | 4 +- > .../x86_64/abi/bf16/m256bf16/asm-support.S | 107 +++++++++++++++ > .../x86_64/abi/bf16/m512bf16/args.h | 4 +- > .../x86_64/abi/bf16/m512bf16/asm-support.S | 123 ++++++++++++++++++ > 6 files changed, 341 insertions(+), 6 deletions(-) > > diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/args.h > b/gcc/testsuite/gcc.target/x86_64/abi/bf16/args.h > index 11d7e2b3a1c..95f9a394f2c 100644 > --- a/gcc/testsuite/gcc.target/x86_64/abi/bf16/args.h > +++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/args.h > @@ -42,8 +42,8 @@ typedef union { > } X87_T; > extern void (*callthis)(void); > extern unsigned long long > rax,rbx,rcx,rdx,rsi,rdi,rsp,rbp,r8,r9,r10,r11,r12,r13,r14,r15; > -XMM_T xmm_regs[16]; > -X87_T x87_regs[8]; > +extern XMM_T xmm_regs[16]; > +extern X87_T x87_regs[8]; > extern volatile unsigned long long volatile_var; > extern void snapshot (void); > extern void snapshot_ret (void); > diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/asm-support.S > b/gcc/testsuite/gcc.target/x86_64/abi/bf16/asm-support.S > index 7559aa910c4..331bf92d761 100644 > --- a/gcc/testsuite/gcc.target/x86_64/abi/bf16/asm-support.S > +++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/asm-support.S > @@ -1,3 +1,5 @@ > + > +#ifdef __ELF__ > .text > .p2align 4,,15 > .globl snapshot > @@ -82,3 +84,106 @@ snapshot_ret: > #ifdef __linux__ > .section .note.GNU-stack,"",@progbits > #endif > + > +#elif defined(__APPLE__) > + .text > + .p2align 4,,15 > + .globl _snapshot > +_snapshot: > +LFB3: > + movq %rax, _rax(%rip) > + movq %rbx, _rbx(%rip) > + movq %rcx, _rcx(%rip) > + movq %rdx, _rdx(%rip) > + movq %rdi, _rdi(%rip) > + movq %rsi, _rsi(%rip) > + movq %rbp, _rbp(%rip) > + movq %rsp, _rsp(%rip) > + movq %r8, _r8(%rip) > + movq %r9, _r9(%rip) > + movq %r10, _r10(%rip) > + movq %r11, _r11(%rip) > + movq %r12, _r12(%rip) > + movq %r13, _r13(%rip) > + movq %r14, _r14(%rip) > + movq %r15, _r15(%rip) > + movdqu %xmm0, _xmm_regs+0(%rip) > + movdqu %xmm1, _xmm_regs+16(%rip) > + movdqu %xmm2, _xmm_regs+32(%rip) > + movdqu %xmm3, _xmm_regs+48(%rip) > + movdqu %xmm4, _xmm_regs+64(%rip) > + movdqu %xmm5, _xmm_regs+80(%rip) > + movdqu %xmm6, _xmm_regs+96(%rip) > + movdqu %xmm7, _xmm_regs+112(%rip) > + movdqu %xmm8, _xmm_regs+128(%rip) > + movdqu %xmm9, _xmm_regs+144(%rip) > + movdqu %xmm10, _xmm_regs+160(%rip) > + movdqu %xmm11, _xmm_regs+176(%rip) > + movdqu %xmm12, _xmm_regs+192(%rip) > + movdqu %xmm13, _xmm_regs+208(%rip) > + movdqu %xmm14, _xmm_regs+224(%rip) > + movdqu %xmm15, _xmm_regs+240(%rip) > + jmp *_callthis(%rip) > +LFE3: > + > + .p2align 4,,15 > + .globl _snapshot_ret > +_snapshot_ret: > + movq %rdi, _rdi(%rip) > + subq $8, %rsp > + call *_callthis(%rip) > + addq $8, %rsp > + movq %rax, _rax(%rip) > + movq %rdx, _rdx(%rip) > + movdqu %xmm0, _xmm_regs+0(%rip) > + movdqu %xmm1, _xmm_regs+16(%rip) > + fstpt _x87_regs(%rip) > + fstpt _x87_regs+16(%rip) > + fldt _x87_regs+16(%rip) > + fldt _x87_regs(%rip) > + ret > + > + .globl _callthis > + .zerofill __DATA,__bss,_callthis,8,3 > + .globl _rax > + .zerofill __DATA,__bss,_rax,8,3 > + .globl _rbx > + .zerofill __DATA,__bss,_rbx,8,3 > + .globl _rcx > + .zerofill __DATA,__bss,_rcx,8,3 > + .globl _rdx > + .zerofill __DATA,__bss,_rdx,8,3 > + .globl _rsi > + .zerofill __DATA,__bss,_rsi,8,3 > + .globl _rdi > + .zerofill __DATA,__bss,_rdi,8,3 > + .globl _rsp > + .zerofill __DATA,__bss,_rsp,8,3 > + .globl _rbp > + .zerofill __DATA,__bss,_rbp,8,3 > + .globl _r8 > + .zerofill __DATA,__bss,_r8,8,3 > + .globl _r9 > + .zerofill __DATA,__bss,_r9,8,3 > + .globl _r10 > + .zerofill __DATA,__bss,_r10,8,3 > + .globl _r11 > + .zerofill __DATA,__bss,_r11,8,3 > + .globl _r12 > + .zerofill __DATA,__bss,_r12,8,3 > + .globl _r13 > + .zerofill __DATA,__bss,_r13,8,3 > + .globl _r14 > + .zerofill __DATA,__bss,_r14,8,3 > + .globl _r15 > + .zerofill __DATA,__bss,_r15,8,3 > + .globl _xmm_regs > + .zerofill __DATA,__bss,_xmm_regs,256,5 > + .globl _x87_regs > + .zerofill __DATA,__bss,_x87_regs,128,5 > + .globl _volatile_var > + .zerofill __DATA,__bss,_volatile_var,8,3 > + > +#else > +#error unknown object format > +#endif > diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/args.h > b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/args.h > index 94627ffbd44..1027742cbb2 100644 > --- a/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/args.h > +++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/args.h > @@ -50,8 +50,8 @@ typedef union { > } X87_T; > extern void (*callthis)(void); > extern unsigned long long > rax,rbx,rcx,rdx,rsi,rdi,rsp,rbp,r8,r9,r10,r11,r12,r13,r14,r15; > -YMM_T ymm_regs[16]; > -X87_T x87_regs[8]; > +extern YMM_T ymm_regs[16]; > +extern X87_T x87_regs[8]; > extern volatile unsigned long long volatile_var; > extern void snapshot (void); > extern void snapshot_ret (void); > diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/asm-support.S > b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/asm-support.S > index 24c8b3c9023..171654aa4db 100644 > --- a/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/asm-support.S > +++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/asm-support.S > @@ -1,3 +1,5 @@ > + > +#ifdef __ELF__ > .text > .p2align 4,,15 > .globl snapshot > @@ -82,3 +84,108 @@ snapshot_ret: > #ifdef __linux__ > .section .note.GNU-stack,"",@progbits > #endif > + > +#elif defined(__APPLE__) > + > + .text > + .p2align 4,,15 > + .globl _snapshot > +_snapshot: > +.LFB3: > + movq %rax, _rax(%rip) > + movq %rbx, _rbx(%rip) > + movq %rcx, _rcx(%rip) > + movq %rdx, _rdx(%rip) > + movq %rdi, _rdi(%rip) > + movq %rsi, _rsi(%rip) > + movq %rbp, _rbp(%rip) > + movq %rsp, _rsp(%rip) > + movq %r8, _r8(%rip) > + movq %r9, _r9(%rip) > + movq %r10, _r10(%rip) > + movq %r11, _r11(%rip) > + movq %r12, _r12(%rip) > + movq %r13, _r13(%rip) > + movq %r14, _r14(%rip) > + movq %r15, _r15(%rip) > + vmovdqu %ymm0, _ymm_regs+0(%rip) > + vmovdqu %ymm1, _ymm_regs+32(%rip) > + vmovdqu %ymm2, _ymm_regs+64(%rip) > + vmovdqu %ymm3, _ymm_regs+96(%rip) > + vmovdqu %ymm4, _ymm_regs+128(%rip) > + vmovdqu %ymm5, _ymm_regs+160(%rip) > + vmovdqu %ymm6, _ymm_regs+192(%rip) > + vmovdqu %ymm7, _ymm_regs+224(%rip) > + vmovdqu %ymm8, _ymm_regs+256(%rip) > + vmovdqu %ymm9, _ymm_regs+288(%rip) > + vmovdqu %ymm10, _ymm_regs+320(%rip) > + vmovdqu %ymm11, _ymm_regs+352(%rip) > + vmovdqu %ymm12, _ymm_regs+384(%rip) > + vmovdqu %ymm13, _ymm_regs+416(%rip) > + vmovdqu %ymm14, _ymm_regs+448(%rip) > + vmovdqu %ymm15, _ymm_regs+480(%rip) > + jmp *_callthis(%rip) > +.LFE3: > + > + .p2align 4,,15 > + .globl _snapshot_ret > +_snapshot_ret: > + movq %rdi, _rdi(%rip) > + subq $8, %rsp > + call *_callthis(%rip) > + addq $8, %rsp > + movq %rax, _rax(%rip) > + movq %rdx, _rdx(%rip) > + vmovdqu %ymm0, _ymm_regs+0(%rip) > + vmovdqu %ymm1, _ymm_regs+32(%rip) > + fstpt _x87_regs(%rip) > + fstpt _x87_regs+16(%rip) > + fldt _x87_regs+16(%rip) > + fldt _x87_regs(%rip) > + ret > + > + .globl _callthis > + .zerofill __DATA,__bss,_callthis,8,3 > + .globl _rax > + .zerofill __DATA,__bss,_rax,8,3 > + .globl _rbx > + .zerofill __DATA,__bss,_rbx,8,3 > + .globl _rcx > + .zerofill __DATA,__bss,_rcx,8,3 > + .globl _rdx > + .zerofill __DATA,__bss,_rdx,8,3 > + .globl _rsi > + .zerofill __DATA,__bss,_rsi,8,3 > + .globl _rdi > + .zerofill __DATA,__bss,_rdi,8,3 > + .globl _rsp > + .zerofill __DATA,__bss,_rsp,8,3 > + .globl _rbp > + .zerofill __DATA,__bss,_rbp,8,3 > + .globl _r8 > + .zerofill __DATA,__bss,_r8,8,3 > + .globl _r9 > + .zerofill __DATA,__bss,_r9,8,3 > + .globl _r10 > + .zerofill __DATA,__bss,_r10,8,3 > + .globl _r11 > + .zerofill __DATA,__bss,_r11,8,3 > + .globl _r12 > + .zerofill __DATA,__bss,_r12,8,3 > + .globl _r13 > + .zerofill __DATA,__bss,_r13,8,3 > + .globl _r14 > + .zerofill __DATA,__bss,_r14,8,3 > + .globl _r15 > + .zerofill __DATA,__bss,_r15,8,3 > + .globl _ymm_regs > + .zerofill __DATA,__bss,_ymm_regs,512,5 > + .globl _x87_regs > + .zerofill __DATA,__bss,_x87_regs,128,5 > + .globl _volatile_var > + .zerofill __DATA,__bss,_volatile_var,8,3 > + > +#else > +#error unknown object format > +#endif > + > diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/args.h > b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/args.h > index 64b24783833..f9710bae347 100644 > --- a/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/args.h > +++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/args.h > @@ -51,8 +51,8 @@ typedef union { > } X87_T; > extern void (*callthis)(void); > extern unsigned long long > rax,rbx,rcx,rdx,rsi,rdi,rsp,rbp,r8,r9,r10,r11,r12,r13,r14,r15; > -ZMM_T zmm_regs[32]; > -X87_T x87_regs[8]; > +extern ZMM_T zmm_regs[32]; > +extern X87_T x87_regs[8]; > extern volatile unsigned long long volatile_var; > extern void snapshot (void); > extern void snapshot_ret (void); > diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/asm-support.S > b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/asm-support.S > index 86d54d11c58..9dc6d173a61 100644 > --- a/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/asm-support.S > +++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/asm-support.S > @@ -1,3 +1,5 @@ > + > +#ifdef __ELF__ > .text > .p2align 4,,15 > .globl snapshot > @@ -98,3 +100,124 @@ snapshot_ret: > #ifdef __linux__ > .section .note.GNU-stack,"",@progbits > #endif > + > +#elif defined(__APPLE__) > + > + .text > + .p2align 4,,15 > + .globl _snapshot > +_snapshot: > +.LFB3: > + movq %rax, _rax(%rip) > + movq %rbx, _rbx(%rip) > + movq %rcx, _rcx(%rip) > + movq %rdx, _rdx(%rip) > + movq %rdi, _rdi(%rip) > + movq %rsi, _rsi(%rip) > + movq %rbp, _rbp(%rip) > + movq %rsp, _rsp(%rip) > + movq %r8, _r8(%rip) > + movq %r9, _r9(%rip) > + movq %r10, _r10(%rip) > + movq %r11, _r11(%rip) > + movq %r12, _r12(%rip) > + movq %r13, _r13(%rip) > + movq %r14, _r14(%rip) > + movq %r15, _r15(%rip) > + vmovdqu32 %zmm0, _zmm_regs+0(%rip) > + vmovdqu32 %zmm1, _zmm_regs+64(%rip) > + vmovdqu32 %zmm2, _zmm_regs+128(%rip) > + vmovdqu32 %zmm3, _zmm_regs+192(%rip) > + vmovdqu32 %zmm4, _zmm_regs+256(%rip) > + vmovdqu32 %zmm5, _zmm_regs+320(%rip) > + vmovdqu32 %zmm6, _zmm_regs+384(%rip) > + vmovdqu32 %zmm7, _zmm_regs+448(%rip) > + vmovdqu32 %zmm8, _zmm_regs+512(%rip) > + vmovdqu32 %zmm9, _zmm_regs+576(%rip) > + vmovdqu32 %zmm10, _zmm_regs+640(%rip) > + vmovdqu32 %zmm11, _zmm_regs+704(%rip) > + vmovdqu32 %zmm12, _zmm_regs+768(%rip) > + vmovdqu32 %zmm13, _zmm_regs+832(%rip) > + vmovdqu32 %zmm14, _zmm_regs+896(%rip) > + vmovdqu32 %zmm15, _zmm_regs+960(%rip) > + vmovdqu32 %zmm16, _zmm_regs+1024(%rip) > + vmovdqu32 %zmm17, _zmm_regs+1088(%rip) > + vmovdqu32 %zmm18, _zmm_regs+1152(%rip) > + vmovdqu32 %zmm19, _zmm_regs+1216(%rip) > + vmovdqu32 %zmm20, _zmm_regs+1280(%rip) > + vmovdqu32 %zmm21, _zmm_regs+1344(%rip) > + vmovdqu32 %zmm22, _zmm_regs+1408(%rip) > + vmovdqu32 %zmm23, _zmm_regs+1472(%rip) > + vmovdqu32 %zmm24, _zmm_regs+1536(%rip) > + vmovdqu32 %zmm25, _zmm_regs+1600(%rip) > + vmovdqu32 %zmm26, _zmm_regs+1664(%rip) > + vmovdqu32 %zmm27, _zmm_regs+1728(%rip) > + vmovdqu32 %zmm28, _zmm_regs+1792(%rip) > + vmovdqu32 %zmm29, _zmm_regs+1856(%rip) > + vmovdqu32 %zmm30, _zmm_regs+1920(%rip) > + vmovdqu32 %zmm31, _zmm_regs+1984(%rip) > + jmp *_callthis(%rip) > +.LFE3: > + > + .p2align 4,,15 > + .globl _snapshot_ret > +_snapshot_ret: > + movq %rdi, _rdi(%rip) > + subq $8, %rsp > + call *_callthis(%rip) > + addq $8, %rsp > + movq %rax, _rax(%rip) > + movq %rdx, _rdx(%rip) > + vmovdqu32 %zmm0, _zmm_regs+0(%rip) > + vmovdqu32 %zmm1, _zmm_regs+64(%rip) > + fstpt _x87_regs(%rip) > + fstpt _x87_regs+16(%rip) > + fldt _x87_regs+16(%rip) > + fldt _x87_regs(%rip) > + ret > + > + .globl _callthis > + .zerofill __DATA,__bss,_callthis,8,3 > + .globl _rax > + .zerofill __DATA,__bss,_rax,8,3 > + .globl _rbx > + .zerofill __DATA,__bss,_rbx,8,3 > + .globl _rcx > + .zerofill __DATA,__bss,_rcx,8,3 > + .globl _rdx > + .zerofill __DATA,__bss,_rdx,8,3 > + .globl _rsi > + .zerofill __DATA,__bss,_rsi,8,3 > + .globl _rdi > + .zerofill __DATA,__bss,_rdi,8,3 > + .globl _rsp > + .zerofill __DATA,__bss,_rsp,8,3 > + .globl _rbp > + .zerofill __DATA,__bss,_rbp,8,3 > + .globl _r8 > + .zerofill __DATA,__bss,_r8,8,3 > + .globl _r9 > + .zerofill __DATA,__bss,_r9,8,3 > + .globl _r10 > + .zerofill __DATA,__bss,_r10,8,3 > + .globl _r11 > + .zerofill __DATA,__bss,_r11,8,3 > + .globl _r12 > + .zerofill __DATA,__bss,_r12,8,3 > + .globl _r13 > + .zerofill __DATA,__bss,_r13,8,3 > + .globl _r14 > + .zerofill __DATA,__bss,_r14,8,3 > + .globl _r15 > + .zerofill __DATA,__bss,_r15,8,3 > + .globl _zmm_regs > + .zerofill __DATA,__bss,_zmm_regs,2048,6 > + .globl _x87_regs > + .zerofill __DATA,__bss,_x87_regs,128,5 > + .globl _volatile_var > + .zerofill __DATA,__bss,_volatile_var,8,3 > + > +#else > +#error unknown object format > +#endif > + > -- > 2.37.1 (Apple Git-137.1) >