On 2022/05/27 1:57, Max Filippov wrote:
is that something that can be addressed in this patch?
seems hard to resolve, because the RTL-generation pass passes only 68
bytes in that case:
void f(char *p);
void g(void)
{
char c[72] = {0};
f(c);
}
without this patch, we would get as:
g:
entry sp, 112
movi.n a8, 0
movi.n a12, 0x44 ; 68, not 72
mov.n a11, a8
addi.n a10, sp, 4 ; skipped first 4 bytes
s32i.n a8, sp, 0 ; cleared without using memset()
call8 memset
mov.n a10, sp
call8 f
retw.n
parhaps, it can be solved it by using peephole2 pattern... (depends on
whether peephole2 can capture code_label)
this behavior does not occur in configuration without zero-overhead
loop, eg. in xtensa-lx106 (ESP8266 SoC):
g:
addi sp, sp, -96
movi.n a3, 0
s32i a0, sp, 92
s32i.n a3, sp, 0
addi.n a2, sp, 4
addi a4, sp, 72
.L2:
s32i.n a3, a2, 0
addi.n a2, a2, 4
bne a2, a4, .L2
mov.n a2, sp
call0 f
l32i a0, sp, 92
addi sp, sp, 96
ret.n
in x86_64-linux:
g:
.LFB0:
.cfi_startproc
subq $88, %rsp
.cfi_def_cfa_offset 96
pxor %xmm0, %xmm0
movq %rsp, %rdi
movaps %xmm0, (%rsp)
movaps %xmm0, 16(%rsp)
movaps %xmm0, 32(%rsp)
movaps %xmm0, 48(%rsp)
movq $0, 64(%rsp)
call f@PLT
addq $88, %rsp
.cfi_def_cfa_offset 8
ret
.cfi_endproc
.LFE0:
or, dword-aligned element:
void f(int *p);
void g(void)
{
int c[18] = { 0 };
f(c);
}