Re: [PATCH v3 4/5] xtensa: Add setmemsi insn pattern

2022-05-26 Thread Max Filippov via Gcc-patches
On Mon, May 23, 2022 at 8:52 AM Takayuki 'January June' Suwa
 wrote:
>
> This patch introduces setmemsi insn pattern of two kinds, unrolled loop and
> small loop, for fixed small length and constant initialization value.
>
> gcc/ChangeLog:
>
> * gcc/config/xtensa/xtensa-protos.h
> (xtensa_expand_block_set_unrolled_loop,
> xtensa_expand_block_set_small_loop): New prototypes.
> * gcc/config/xtensa/xtensa.cc (xtensa_sizeof_MOVI,
> xtensa_expand_block_set_unrolled_loop,
> xtensa_expand_block_set_small_loop): New functions.
> * gcc/config/xtensa/xtensa.md (setmemsi): New expansion pattern.
> * gcc/config/xtensa/xtensa.opt (mlongcalls): Add target mask.
> ---
>   gcc/config/xtensa/xtensa-protos.h |   2 +
>   gcc/config/xtensa/xtensa.cc   | 211 ++
>   gcc/config/xtensa/xtensa.md   |  16 +++
>   gcc/config/xtensa/xtensa.opt  |   2 +-
>   4 files changed, 230 insertions(+), 1 deletion(-)

With this patch applied for the following test program

void f(char *p);

void g(void)
{
   char c[72] = {0};
   f(c);
}

the following code is generated with -O2:

   .text
   .literal_position
   .literal .LC0, f@PLT
   .align  4
   .global g
   .type   g, @function
g:
   entry   sp, 112
   movi.n  a10, 0
   s32i.n  a10, sp, 0
   addi.n  a9, sp, 4
   movi.n  a8, 0x11
   loopa8, .L2_LEND
.L2:
   s32i.n  a10, a9, 0
   addi.n  a9, a9, 4
   .L2_LEND:
   l32ra8, .LC0
   mov.n   a10, sp
   callx8  a8
   retw.n

The part

   s32i.n  a10, sp, 0
   addi.n  a9, sp, 4
   movi.n  a8, 0x11

looks redundant and could be just

mov a9, sp
movi a8, 0x12

is that something that can be addressed in this patch?

-- 
Thanks.
-- Max


Re: [PATCH v3 4/5] xtensa: Add setmemsi insn pattern

2022-05-26 Thread Takayuki 'January June' Suwa via Gcc-patches

On 2022/05/27 1:57, Max Filippov wrote:

is that something that can be addressed in this patch?


seems hard to resolve, because the RTL-generation pass passes only 68 
bytes in that case:



void f(char *p);

void g(void)
{
   char c[72] = {0};
   f(c);
}


without this patch, we would get as:

g:
entry   sp, 112
movi.n  a8, 0
movi.n  a12, 0x44   ; 68, not 72
mov.n   a11, a8
addi.n  a10, sp, 4  ; skipped first 4 bytes
s32i.n  a8, sp, 0   ; cleared without using memset()
call8   memset
mov.n   a10, sp
call8   f
retw.n

parhaps, it can be solved it by using peephole2 pattern... (depends on 
whether peephole2 can capture code_label)


this behavior does not occur in configuration without zero-overhead 
loop, eg. in xtensa-lx106 (ESP8266 SoC):


g:
addisp, sp, -96
movi.n  a3, 0
s32ia0, sp, 92
s32i.n  a3, sp, 0
addi.n  a2, sp, 4
addia4, sp, 72
.L2:
s32i.n  a3, a2, 0
addi.n  a2, a2, 4
bne a2, a4, .L2
mov.n   a2, sp
call0   f
l32ia0, sp, 92
addisp, sp, 96
ret.n

in x86_64-linux:

g:
.LFB0:
.cfi_startproc
subq$88, %rsp
.cfi_def_cfa_offset 96
pxor%xmm0, %xmm0
movq%rsp, %rdi
movaps  %xmm0, (%rsp)
movaps  %xmm0, 16(%rsp)
movaps  %xmm0, 32(%rsp)
movaps  %xmm0, 48(%rsp)
movq$0, 64(%rsp)
callf@PLT
addq$88, %rsp
.cfi_def_cfa_offset 8
ret
.cfi_endproc
.LFE0:

or, dword-aligned element:

void f(int *p);
void g(void)
{
int c[18] = { 0 };
f(c);
}


Re: [PATCH v3 4/5] xtensa: Add setmemsi insn pattern

2022-05-26 Thread Max Filippov via Gcc-patches
On Thu, May 26, 2022 at 8:00 PM Takayuki 'January June' Suwa
 wrote:
>
> On 2022/05/27 1:57, Max Filippov wrote:
> > is that something that can be addressed in this patch?
>
> seems hard to resolve, because the RTL-generation pass passes only 68
> bytes in that case:
...
> this behavior does not occur in configuration without zero-overhead
> loop, eg. in xtensa-lx106 (ESP8266 SoC):

Ok, I'll commit it as is then.

-- 
Thanks.
-- Max


Re: [PATCH v3 4/5] xtensa: Add setmemsi insn pattern

2022-05-26 Thread Max Filippov via Gcc-patches
On Mon, May 23, 2022 at 8:52 AM Takayuki 'January June' Suwa
 wrote:
>
> This patch introduces setmemsi insn pattern of two kinds, unrolled loop and
> small loop, for fixed small length and constant initialization value.
>
> gcc/ChangeLog:
>
> * gcc/config/xtensa/xtensa-protos.h
> (xtensa_expand_block_set_unrolled_loop,
> xtensa_expand_block_set_small_loop): New prototypes.
> * gcc/config/xtensa/xtensa.cc (xtensa_sizeof_MOVI,
> xtensa_expand_block_set_unrolled_loop,
> xtensa_expand_block_set_small_loop): New functions.
> * gcc/config/xtensa/xtensa.md (setmemsi): New expansion pattern.
> * gcc/config/xtensa/xtensa.opt (mlongcalls): Add target mask.
> ---
>   gcc/config/xtensa/xtensa-protos.h |   2 +
>   gcc/config/xtensa/xtensa.cc   | 211 ++
>   gcc/config/xtensa/xtensa.md   |  16 +++
>   gcc/config/xtensa/xtensa.opt  |   2 +-
>   4 files changed, 230 insertions(+), 1 deletion(-)

Regtested for target=xtensa-linux-uclibc, no new regressions.
Changelog has extra 'gcc/' in paths, so I've dropped this part.
Committed to master.

-- 
Thanks.
-- Max


Re: [PATCH v3 4/5] xtensa: Add setmemsi insn pattern

2022-05-27 Thread Takayuki 'January June' Suwa via Gcc-patches

On 2022/05/27 12:00, Takayuki 'January June' Suwa via Gcc-patches wrote:

On 2022/05/27 1:57, Max Filippov wrote:

is that something that can be addressed in this patch?


seems hard to resolve, because the RTL-generation pass passes only 68 
bytes in that case:


the culprit is here, but i don't know whether it is known regression or not.

diff --git a/gcc/expr.cc b/gcc/expr.cc
index 7197996cec7..be100dd9946 100644
--- a/gcc/expr.cc
+++ b/gcc/expr.cc
@@ -6043,13 +6043,19 @@ store_expr (tree exp, rtx target, int call_param_p,
   if (!can_store_by_pieces (str_copy_len, string_cst_read_str,
(void *) str, MEM_ALIGN (target), false))
goto normal_expr;
-
-  dest_mem = store_by_pieces (target, str_copy_len, 
string_cst_read_str,

- (void *) str, MEM_ALIGN (target), false,
- RETURN_END);
-  clear_storage (adjust_address_1 (dest_mem, BLKmode, 0, 1, 1, 0,
-  exp_len - str_copy_len),
-GEN_INT (exp_len - str_copy_len), BLOCK_OP_NORMAL);
+  if (TREE_STRING_LENGTH (str) == 1 && *TREE_STRING_POINTER (str) == 0)
+   clear_storage (adjust_address_1 (target, BLKmode, 0, 1, 1, 0,
+exp_len),
+  GEN_INT (exp_len), BLOCK_OP_NORMAL);
+  else
+   {
+ dest_mem = store_by_pieces (target, str_copy_len, string_cst_read_str,
+ (void *) str, MEM_ALIGN (target), false,
+ RETURN_END);
+ clear_storage (adjust_address_1 (dest_mem, BLKmode, 0, 1, 1, 0,
+  exp_len - str_copy_len),
+GEN_INT (exp_len - str_copy_len), BLOCK_OP_NORMAL);
+   }
   return NULL_RTX;
 }
   else