Re: [PATCH v2] bpf: add inline memmove and memcpy expansion
On 2/20/24 12:37, Jose E. Marchesi wrote: > > Hi Faust. > >> +bool >> +bpf_expand_cpymem (rtx *operands, bool is_move) >> +{ >> + /* Size must be constant for this expansion to work. */ >> + if (!CONST_INT_P (operands[2])) >> +{ >> + const char *name = is_move ? "memmove" : "memcpy"; >> + if (flag_building_libgcc) >> +warning (RTL_LOCATION (operands[2]), >> + "could not expand call to %<__builtin_%s%> inline: " >> + "size must be constant", name); >> + else >> +error ("could not expand call to %<__builtin_%s%> inline: " >> + "size must be constant", name); >> + return false; >> +} > > I think you want to use warning_at and error_at above... the first > argument to `warning' (and second to warning_at) is not a location but > an OPT_W* value. You should pass 0 to the opt argument in this case as > there is no -W option to handle this warning. Oops. Thanks. After fixing the arguments, `warning' and `error' generally give better messages than `warning_at' and `error_at' since the RTL_LOCATION (operands[2]) seems to be NULL for most (all?) cases, which causes the diagnostic message to refer only to the containing function. The non-`_at' versions highlight the builtin call being expanded, which is much nicer. > > Also, I would not mention 'expand' in the error/warning message, as it > is a GCC internal concept. I would use perhaps "could not inline call > to %<__builtin_%s%>". OK. > >> + /* Alignment is a CONST_INT. */ >> + gcc_assert (CONST_INT_P (operands[3])); >> + >> + rtx dst = operands[0]; >> + rtx src = operands[1]; >> + rtx size = operands[2]; >> + unsigned HOST_WIDE_INT size_bytes = UINTVAL (size); >> + unsigned align = UINTVAL (operands[3]); >> + enum machine_mode mode; >> + switch (align) >> +{ >> +case 1: mode = QImode; break; >> +case 2: mode = HImode; break; >> +case 4: mode = SImode; break; >> +case 8: mode = DImode; break; >> +default: >> + gcc_unreachable (); >> +} >> + >> + unsigned iters = size_bytes >> ceil_log2 (align); >> + unsigned remainder = size_bytes & (align - 1); >> + >> + int inc = GET_MODE_SIZE (mode); >> + rtx_code_label *fwd_label, *done_label; >> + if (is_move) >> +{ >> + /* For memmove, be careful of overlap. It is not a concern for >> memcpy. >> + To handle overlap, we check (at runtime) if SRC < DST, and if so do >> + the move "backwards" starting from SRC + SIZE. */ >> + fwd_label = gen_label_rtx (); >> + done_label = gen_label_rtx (); >> + >> + rtx dst_addr = copy_to_mode_reg (Pmode, XEXP (dst, 0)); >> + rtx src_addr = copy_to_mode_reg (Pmode, XEXP (src, 0)); >> + emit_cmp_and_jump_insns (src_addr, dst_addr, GEU, NULL_RTX, Pmode, >> + true, fwd_label, profile_probability::even ()); >> + >> + /* Emit the "backwards" unrolled loop. */ >> + emit_move_loop (src, dst, mode, size_bytes, -inc, iters, remainder); >> + emit_jump_insn (gen_jump (done_label)); >> + emit_barrier (); >> + >> + emit_label (fwd_label); >> +} >> + >> + emit_move_loop (src, dst, mode, 0, inc, iters, remainder); >> + >> + if (is_move) >> +emit_label (done_label); >> + >> + return true; >> +} >> + >> /* Finally, build the GCC target. */ >> >> struct gcc_target targetm = TARGET_INITIALIZER; >> diff --git a/gcc/config/bpf/bpf.md b/gcc/config/bpf/bpf.md >> index 50df1aaa3e2..ca677bc6b50 100644 >> --- a/gcc/config/bpf/bpf.md >> +++ b/gcc/config/bpf/bpf.md >> @@ -627,4 +627,40 @@ (define_insn "ldabs" >>"{ldabs\t%0|r0 = *( *) skb[%0]}" >>[(set_attr "type" "ld")]) >> >> +;;; memmove and memcopy >> + >> +;; 0 is dst >> +;; 1 is src >> +;; 2 is size of copy in bytes >> +;; 3 is alignment >> + >> +(define_expand "cpymemdi" >> + [(match_operand:BLK 0 "memory_operand") >> + (match_operand:BLK 1 "memory_operand") >> + (match_operand:DI 2 "general_operand") >> + (match_operand:DI 3 "immediate_operand")] >> + "" >> +{ >> + if (bpf_expand_cpymem (operands, false)) >> +DONE; >> + FAIL; >> +}) >> + >> +;; 0 is dst >> +;; 1 is src >> +;; 2 is size of copy in bytes >> +;; 3 is alignment >> + >> +(define_expand "movmemdi" >> + [(match_operand:BLK 0 "memory_operand") >> + (match_operand:BLK 1 "memory_operand") >> + (match_operand:DI 2 "general_operand") >> + (match_operand:DI 3 "immediate_operand")] >> + "" >> +{ >> + if (bpf_expand_cpymem (operands, true)) >> +DONE; >> + FAIL; >> +}) >> + >> (include "atomic.md") >> diff --git a/gcc/testsuite/gcc.target/bpf/memcpy-1.c >> b/gcc/testsuite/gcc.target/bpf/memcpy-1.c >> new file mode 100644 >> index 000..6c9707f24e8 >> --- /dev/null >> +++ b/gcc/testsuite/gcc.target/bpf/memcpy-1.c >> @@ -0,0 +1,26 @@ >> +/* Ensure memcpy is expanded inline rather than emitting a libcall. */ >> + >> +/* { dg-do compile } */ >> +/* { dg-options "-O2" } */ >> + >> +struct context { >> + unsigned int data; >>
Re: [PATCH v2] bpf: add inline memmove and memcpy expansion
Hi Faust. > +bool > +bpf_expand_cpymem (rtx *operands, bool is_move) > +{ > + /* Size must be constant for this expansion to work. */ > + if (!CONST_INT_P (operands[2])) > +{ > + const char *name = is_move ? "memmove" : "memcpy"; > + if (flag_building_libgcc) > + warning (RTL_LOCATION (operands[2]), > + "could not expand call to %<__builtin_%s%> inline: " > + "size must be constant", name); > + else > + error ("could not expand call to %<__builtin_%s%> inline: " > +"size must be constant", name); > + return false; > +} I think you want to use warning_at and error_at above... the first argument to `warning' (and second to warning_at) is not a location but an OPT_W* value. You should pass 0 to the opt argument in this case as there is no -W option to handle this warning. Also, I would not mention 'expand' in the error/warning message, as it is a GCC internal concept. I would use perhaps "could not inline call to %<__builtin_%s%>". > + /* Alignment is a CONST_INT. */ > + gcc_assert (CONST_INT_P (operands[3])); > + > + rtx dst = operands[0]; > + rtx src = operands[1]; > + rtx size = operands[2]; > + unsigned HOST_WIDE_INT size_bytes = UINTVAL (size); > + unsigned align = UINTVAL (operands[3]); > + enum machine_mode mode; > + switch (align) > +{ > +case 1: mode = QImode; break; > +case 2: mode = HImode; break; > +case 4: mode = SImode; break; > +case 8: mode = DImode; break; > +default: > + gcc_unreachable (); > +} > + > + unsigned iters = size_bytes >> ceil_log2 (align); > + unsigned remainder = size_bytes & (align - 1); > + > + int inc = GET_MODE_SIZE (mode); > + rtx_code_label *fwd_label, *done_label; > + if (is_move) > +{ > + /* For memmove, be careful of overlap. It is not a concern for memcpy. > + To handle overlap, we check (at runtime) if SRC < DST, and if so do > + the move "backwards" starting from SRC + SIZE. */ > + fwd_label = gen_label_rtx (); > + done_label = gen_label_rtx (); > + > + rtx dst_addr = copy_to_mode_reg (Pmode, XEXP (dst, 0)); > + rtx src_addr = copy_to_mode_reg (Pmode, XEXP (src, 0)); > + emit_cmp_and_jump_insns (src_addr, dst_addr, GEU, NULL_RTX, Pmode, > +true, fwd_label, profile_probability::even ()); > + > + /* Emit the "backwards" unrolled loop. */ > + emit_move_loop (src, dst, mode, size_bytes, -inc, iters, remainder); > + emit_jump_insn (gen_jump (done_label)); > + emit_barrier (); > + > + emit_label (fwd_label); > +} > + > + emit_move_loop (src, dst, mode, 0, inc, iters, remainder); > + > + if (is_move) > +emit_label (done_label); > + > + return true; > +} > + > /* Finally, build the GCC target. */ > > struct gcc_target targetm = TARGET_INITIALIZER; > diff --git a/gcc/config/bpf/bpf.md b/gcc/config/bpf/bpf.md > index 50df1aaa3e2..ca677bc6b50 100644 > --- a/gcc/config/bpf/bpf.md > +++ b/gcc/config/bpf/bpf.md > @@ -627,4 +627,40 @@ (define_insn "ldabs" >"{ldabs\t%0|r0 = *( *) skb[%0]}" >[(set_attr "type" "ld")]) > > +;;; memmove and memcopy > + > +;; 0 is dst > +;; 1 is src > +;; 2 is size of copy in bytes > +;; 3 is alignment > + > +(define_expand "cpymemdi" > + [(match_operand:BLK 0 "memory_operand") > + (match_operand:BLK 1 "memory_operand") > + (match_operand:DI 2 "general_operand") > + (match_operand:DI 3 "immediate_operand")] > + "" > +{ > + if (bpf_expand_cpymem (operands, false)) > +DONE; > + FAIL; > +}) > + > +;; 0 is dst > +;; 1 is src > +;; 2 is size of copy in bytes > +;; 3 is alignment > + > +(define_expand "movmemdi" > + [(match_operand:BLK 0 "memory_operand") > + (match_operand:BLK 1 "memory_operand") > + (match_operand:DI 2 "general_operand") > + (match_operand:DI 3 "immediate_operand")] > + "" > +{ > + if (bpf_expand_cpymem (operands, true)) > +DONE; > + FAIL; > +}) > + > (include "atomic.md") > diff --git a/gcc/testsuite/gcc.target/bpf/memcpy-1.c > b/gcc/testsuite/gcc.target/bpf/memcpy-1.c > new file mode 100644 > index 000..6c9707f24e8 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/bpf/memcpy-1.c > @@ -0,0 +1,26 @@ > +/* Ensure memcpy is expanded inline rather than emitting a libcall. */ > + > +/* { dg-do compile } */ > +/* { dg-options "-O2" } */ > + > +struct context { > + unsigned int data; > + unsigned int data_end; > + unsigned int data_meta; > + unsigned int ingress; > + unsigned int queue_index; > + unsigned int egress; > +}; > + > +void > +cpy_1(struct context *ctx) > +{ > + void *data = (void *)(long)ctx->data; > + char *dest; > + dest = data; > + dest += 16; > + > + __builtin_memcpy (dest, data, 8); > +} > + > +/* { dg-final { scan-assembler-times "call" 0 } } */ > diff --git a/gcc/testsuite/gcc.target/bpf/memmove-1.c > b/gcc/testsuite/gcc.target/bpf/memmove-1.c > new file mode 100644 > index 000..3b8ba82639e > --- /dev/null > +++