On 2022/05/27 1:57, Max Filippov wrote:
is that something that can be addressed in this patch?

seems hard to resolve, because the RTL-generation pass passes only 68 bytes in that case:

void f(char *p);

void g(void)
{
       char c[72] = {0};
       f(c);
}

without this patch, we would get as:

g:
        entry   sp, 112
        movi.n  a8, 0
        movi.n  a12, 0x44       ; 68, not 72
        mov.n   a11, a8
        addi.n  a10, sp, 4      ; skipped first 4 bytes
        s32i.n  a8, sp, 0       ; cleared without using memset()
        call8   memset
        mov.n   a10, sp
        call8   f
        retw.n

parhaps, it can be solved it by using peephole2 pattern... (depends on whether peephole2 can capture code_label)

this behavior does not occur in configuration without zero-overhead loop, eg. in xtensa-lx106 (ESP8266 SoC):

g:
        addi    sp, sp, -96
        movi.n  a3, 0
        s32i    a0, sp, 92
        s32i.n  a3, sp, 0
        addi.n  a2, sp, 4
        addi    a4, sp, 72
.L2:
        s32i.n  a3, a2, 0
        addi.n  a2, a2, 4
        bne     a2, a4, .L2
        mov.n   a2, sp
        call0   f
        l32i    a0, sp, 92
        addi    sp, sp, 96
        ret.n

in x86_64-linux:

g:
.LFB0:
        .cfi_startproc
        subq    $88, %rsp
        .cfi_def_cfa_offset 96
        pxor    %xmm0, %xmm0
        movq    %rsp, %rdi
        movaps  %xmm0, (%rsp)
        movaps  %xmm0, 16(%rsp)
        movaps  %xmm0, 32(%rsp)
        movaps  %xmm0, 48(%rsp)
        movq    $0, 64(%rsp)
        call    f@PLT
        addq    $88, %rsp
        .cfi_def_cfa_offset 8
        ret
        .cfi_endproc
.LFE0:

or, dword-aligned element:

void f(int *p);
void g(void)
{
        int c[18] = { 0 };
        f(c);
}

Reply via email to