https://gcc.gnu.org/bugzilla/show_bug.cgi?id=91348
Chris Hall changed:
What|Removed |Added
CC||gcc at gmch dot uk
--- Comment #3 from Chris Hall ---
I have another example of this broken-ness...
In qerrst.h:
typedef struct { char s[64] ; } qerrst_t ;
extern qerrst_t qerrst0(int err) ;
In qerrst.c:
#include "qerrst.h"
#include
#include
extern qerrst_t
qerrst0(int err)
{
qerrst_t st ;
snprintf(st.s, sizeof(st.s), "errno=%d", err) ;
return st ;
}
In qmain.c:
#include
#include "qerrst.h"
int
main(int argc, char* argv[])
{
int err = argc ;
qerrst_t z ;
printf("qerrst0()='%s'\n", qerrst0(err).s) ;
z = qerrst0(err) ;
printf("qerrst0()='%s'\n", z.s) ;
return 0 ;
}
Compile: gcc -O2 -Wall ../qerrst.c ../qmain.c [gcc 9.2.1]
function main:
push %rbp
mov%edi,%esi
mov%edi,%ebp
sub$0xc0,%rsp
lea0x80(%rsp),%rdi # "hidden pointer"
callq 0x4011c0
lea0x80(%rsp),%rsi # use "hidden pointer" for printf
mov$0x402019,%edi
xor%eax,%eax
callq 0x401030
mov%ebp,%esi
mov%rsp,%rdi # another "hidden pointer"
callq 0x4011c0
movdqu (%rsp),%xmm0
lea0x40(%rsp),%rsi # address of 'qerrst_t z'
xor%eax,%eax
movdqu 0x10(%rsp),%xmm1
movdqu 0x20(%rsp),%xmm2
mov$0x402019,%edi
movdqu 0x30(%rsp),%xmm3
movaps %xmm0,0x40(%rsp)# copy "hidden" to 'qerrst_t z'
movaps %xmm1,0x50(%rsp)
movaps %xmm2,0x60(%rsp)
movaps %xmm3,0x70(%rsp)
callq 0x401030
add$0xc0,%rsp
xor%eax,%eax
pop%rbp
retq
function qerrst0:
push %r12
mov%esi,%ecx
mov%rdi,%r12 # save "hidden pointer"
mov$0x402010,%edx
mov$0x40,%esi
xor%eax,%eax
sub$0x40,%rsp # allocate 'qerrst_t st'
mov%rsp,%rdi
callq 0x401040
movdqa (%rsp),%xmm0
mov%r12,%rax
movdqa 0x10(%rsp),%xmm1
movdqa 0x20(%rsp),%xmm2
movdqa 0x30(%rsp),%xmm3
movups %xmm0,(%r12)# copy st !!!
movups %xmm1,0x10(%r12)
movups %xmm2,0x20(%r12)
movups %xmm3,0x30(%r12)
add$0x40,%rsp
pop%r12
retq
So, I looked at the AMD64 ABI (Draft 0.99.7 – November 17, 2014 – 15:08),
Section 3.2.3 Parameter Passing, p22:
Returning of Values:
2. If the type has class MEMORY, then the caller provides space
for the return value and passes the address of this storage
in %rdi as if it were the first argument to the function.
In effect, this address becomes a “hidden” first argument.
This storage must not overlap any data visible to the callee
through other names than this argument.
So... the ABI appears to say that the callee does *not* need to do any copying
*ever*.
So... why is the qerrst0() function doing a copy ?
This pushes the problem back to the caller. If the caller can be sure that the
final destination is not visible to the callee, it too can avoid copying.
In the case above, gcc fails to spot that 'qerrst_t z' in main() is not visible
to anything beyond main().
FWIW: clang (8.0.0) avoids the spurious copy in qerrst0(), but not the
unnecessary copy to 'qerrst_t z'.
---
I guess that functions returning large(ish) struct is not deemed worth
supporting properly ?