Re: [PATCH v2] bpf: add inline memmove and memcpy expansion

2024-02-20 Thread David Faust



On 2/20/24 12:37, Jose E. Marchesi wrote:
> 
> Hi Faust.
> 
>> +bool
>> +bpf_expand_cpymem (rtx *operands, bool is_move)
>> +{
>> +  /* Size must be constant for this expansion to work.  */
>> +  if (!CONST_INT_P (operands[2]))
>> +{
>> +  const char *name = is_move ? "memmove" : "memcpy";
>> +  if (flag_building_libgcc)
>> +warning (RTL_LOCATION (operands[2]),
>> + "could not expand call to %<__builtin_%s%> inline: "
>> + "size must be constant", name);
>> +  else
>> +error ("could not expand call to %<__builtin_%s%> inline: "
>> +   "size must be constant", name);
>> +  return false;
>> +}
> 
> I think you want to use warning_at and error_at above...  the first
> argument to `warning' (and second to warning_at) is not a location but
> an OPT_W* value.  You should pass 0 to the opt argument in this case as
> there is no -W option to handle this warning.

Oops. Thanks.

After fixing the arguments, `warning' and `error' generally give better
messages than `warning_at' and `error_at' since the
RTL_LOCATION (operands[2]) seems to be NULL for most (all?) cases, which
causes the diagnostic message to refer only to the containing function.
The non-`_at' versions highlight the builtin call being expanded, which
is much nicer.

> 
> Also, I would not mention 'expand' in the error/warning message, as it
> is a GCC internal concept.  I would use perhaps "could not inline call
> to %<__builtin_%s%>".

OK.

> 
>> +  /* Alignment is a CONST_INT.  */
>> +  gcc_assert (CONST_INT_P (operands[3]));
>> +
>> +  rtx dst = operands[0];
>> +  rtx src = operands[1];
>> +  rtx size = operands[2];
>> +  unsigned HOST_WIDE_INT size_bytes = UINTVAL (size);
>> +  unsigned align = UINTVAL (operands[3]);
>> +  enum machine_mode mode;
>> +  switch (align)
>> +{
>> +case 1: mode = QImode; break;
>> +case 2: mode = HImode; break;
>> +case 4: mode = SImode; break;
>> +case 8: mode = DImode; break;
>> +default:
>> +  gcc_unreachable ();
>> +}
>> +
>> +  unsigned iters = size_bytes >> ceil_log2 (align);
>> +  unsigned remainder = size_bytes & (align - 1);
>> +
>> +  int inc = GET_MODE_SIZE (mode);
>> +  rtx_code_label *fwd_label, *done_label;
>> +  if (is_move)
>> +{
>> +  /* For memmove, be careful of overlap.  It is not a concern for 
>> memcpy.
>> + To handle overlap, we check (at runtime) if SRC < DST, and if so do
>> + the move "backwards" starting from SRC + SIZE.  */
>> +  fwd_label = gen_label_rtx ();
>> +  done_label = gen_label_rtx ();
>> +
>> +  rtx dst_addr = copy_to_mode_reg (Pmode, XEXP (dst, 0));
>> +  rtx src_addr = copy_to_mode_reg (Pmode, XEXP (src, 0));
>> +  emit_cmp_and_jump_insns (src_addr, dst_addr, GEU, NULL_RTX, Pmode,
>> +   true, fwd_label, profile_probability::even ());
>> +
>> +  /* Emit the "backwards" unrolled loop.  */
>> +  emit_move_loop (src, dst, mode, size_bytes, -inc, iters, remainder);
>> +  emit_jump_insn (gen_jump (done_label));
>> +  emit_barrier ();
>> +
>> +  emit_label (fwd_label);
>> +}
>> +
>> +  emit_move_loop (src, dst, mode, 0, inc, iters, remainder);
>> +
>> +  if (is_move)
>> +emit_label (done_label);
>> +
>> +  return true;
>> +}
>> +
>>  /* Finally, build the GCC target.  */
>>  
>>  struct gcc_target targetm = TARGET_INITIALIZER;
>> diff --git a/gcc/config/bpf/bpf.md b/gcc/config/bpf/bpf.md
>> index 50df1aaa3e2..ca677bc6b50 100644
>> --- a/gcc/config/bpf/bpf.md
>> +++ b/gcc/config/bpf/bpf.md
>> @@ -627,4 +627,40 @@ (define_insn "ldabs"
>>"{ldabs\t%0|r0 = *( *) skb[%0]}"
>>[(set_attr "type" "ld")])
>>  
>> +;;; memmove and memcopy
>> +
>> +;; 0 is dst
>> +;; 1 is src
>> +;; 2 is size of copy in bytes
>> +;; 3 is alignment
>> +
>> +(define_expand "cpymemdi"
>> +  [(match_operand:BLK 0 "memory_operand")
>> +   (match_operand:BLK 1 "memory_operand")
>> +   (match_operand:DI 2 "general_operand")
>> +   (match_operand:DI 3 "immediate_operand")]
>> +   ""
>> +{
>> +  if (bpf_expand_cpymem (operands, false))
>> +DONE;
>> +  FAIL;
>> +})
>> +
>> +;; 0 is dst
>> +;; 1 is src
>> +;; 2 is size of copy in bytes
>> +;; 3 is alignment
>> +
>> +(define_expand "movmemdi"
>> +  [(match_operand:BLK 0 "memory_operand")
>> +   (match_operand:BLK 1 "memory_operand")
>> +   (match_operand:DI 2 "general_operand")
>> +   (match_operand:DI 3 "immediate_operand")]
>> +   ""
>> +{
>> +  if (bpf_expand_cpymem (operands, true))
>> +DONE;
>> +  FAIL;
>> +})
>> +
>>  (include "atomic.md")
>> diff --git a/gcc/testsuite/gcc.target/bpf/memcpy-1.c 
>> b/gcc/testsuite/gcc.target/bpf/memcpy-1.c
>> new file mode 100644
>> index 000..6c9707f24e8
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/bpf/memcpy-1.c
>> @@ -0,0 +1,26 @@
>> +/* Ensure memcpy is expanded inline rather than emitting a libcall.  */
>> +
>> +/* { dg-do compile } */
>> +/* { dg-options "-O2" } */
>> +
>> +struct context {
>> + unsigned int data;
>> 

Re: [PATCH v2] bpf: add inline memmove and memcpy expansion

2024-02-20 Thread Jose E. Marchesi


Hi Faust.

> +bool
> +bpf_expand_cpymem (rtx *operands, bool is_move)
> +{
> +  /* Size must be constant for this expansion to work.  */
> +  if (!CONST_INT_P (operands[2]))
> +{
> +  const char *name = is_move ? "memmove" : "memcpy";
> +  if (flag_building_libgcc)
> + warning (RTL_LOCATION (operands[2]),
> +  "could not expand call to %<__builtin_%s%> inline: "
> +  "size must be constant", name);
> +  else
> + error ("could not expand call to %<__builtin_%s%> inline: "
> +"size must be constant", name);
> +  return false;
> +}

I think you want to use warning_at and error_at above...  the first
argument to `warning' (and second to warning_at) is not a location but
an OPT_W* value.  You should pass 0 to the opt argument in this case as
there is no -W option to handle this warning.

Also, I would not mention 'expand' in the error/warning message, as it
is a GCC internal concept.  I would use perhaps "could not inline call
to %<__builtin_%s%>".

> +  /* Alignment is a CONST_INT.  */
> +  gcc_assert (CONST_INT_P (operands[3]));
> +
> +  rtx dst = operands[0];
> +  rtx src = operands[1];
> +  rtx size = operands[2];
> +  unsigned HOST_WIDE_INT size_bytes = UINTVAL (size);
> +  unsigned align = UINTVAL (operands[3]);
> +  enum machine_mode mode;
> +  switch (align)
> +{
> +case 1: mode = QImode; break;
> +case 2: mode = HImode; break;
> +case 4: mode = SImode; break;
> +case 8: mode = DImode; break;
> +default:
> +  gcc_unreachable ();
> +}
> +
> +  unsigned iters = size_bytes >> ceil_log2 (align);
> +  unsigned remainder = size_bytes & (align - 1);
> +
> +  int inc = GET_MODE_SIZE (mode);
> +  rtx_code_label *fwd_label, *done_label;
> +  if (is_move)
> +{
> +  /* For memmove, be careful of overlap.  It is not a concern for memcpy.
> +  To handle overlap, we check (at runtime) if SRC < DST, and if so do
> +  the move "backwards" starting from SRC + SIZE.  */
> +  fwd_label = gen_label_rtx ();
> +  done_label = gen_label_rtx ();
> +
> +  rtx dst_addr = copy_to_mode_reg (Pmode, XEXP (dst, 0));
> +  rtx src_addr = copy_to_mode_reg (Pmode, XEXP (src, 0));
> +  emit_cmp_and_jump_insns (src_addr, dst_addr, GEU, NULL_RTX, Pmode,
> +true, fwd_label, profile_probability::even ());
> +
> +  /* Emit the "backwards" unrolled loop.  */
> +  emit_move_loop (src, dst, mode, size_bytes, -inc, iters, remainder);
> +  emit_jump_insn (gen_jump (done_label));
> +  emit_barrier ();
> +
> +  emit_label (fwd_label);
> +}
> +
> +  emit_move_loop (src, dst, mode, 0, inc, iters, remainder);
> +
> +  if (is_move)
> +emit_label (done_label);
> +
> +  return true;
> +}
> +
>  /* Finally, build the GCC target.  */
>  
>  struct gcc_target targetm = TARGET_INITIALIZER;
> diff --git a/gcc/config/bpf/bpf.md b/gcc/config/bpf/bpf.md
> index 50df1aaa3e2..ca677bc6b50 100644
> --- a/gcc/config/bpf/bpf.md
> +++ b/gcc/config/bpf/bpf.md
> @@ -627,4 +627,40 @@ (define_insn "ldabs"
>"{ldabs\t%0|r0 = *( *) skb[%0]}"
>[(set_attr "type" "ld")])
>  
> +;;; memmove and memcopy
> +
> +;; 0 is dst
> +;; 1 is src
> +;; 2 is size of copy in bytes
> +;; 3 is alignment
> +
> +(define_expand "cpymemdi"
> +  [(match_operand:BLK 0 "memory_operand")
> +   (match_operand:BLK 1 "memory_operand")
> +   (match_operand:DI 2 "general_operand")
> +   (match_operand:DI 3 "immediate_operand")]
> +   ""
> +{
> +  if (bpf_expand_cpymem (operands, false))
> +DONE;
> +  FAIL;
> +})
> +
> +;; 0 is dst
> +;; 1 is src
> +;; 2 is size of copy in bytes
> +;; 3 is alignment
> +
> +(define_expand "movmemdi"
> +  [(match_operand:BLK 0 "memory_operand")
> +   (match_operand:BLK 1 "memory_operand")
> +   (match_operand:DI 2 "general_operand")
> +   (match_operand:DI 3 "immediate_operand")]
> +   ""
> +{
> +  if (bpf_expand_cpymem (operands, true))
> +DONE;
> +  FAIL;
> +})
> +
>  (include "atomic.md")
> diff --git a/gcc/testsuite/gcc.target/bpf/memcpy-1.c 
> b/gcc/testsuite/gcc.target/bpf/memcpy-1.c
> new file mode 100644
> index 000..6c9707f24e8
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/bpf/memcpy-1.c
> @@ -0,0 +1,26 @@
> +/* Ensure memcpy is expanded inline rather than emitting a libcall.  */
> +
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +struct context {
> + unsigned int data;
> + unsigned int data_end;
> + unsigned int data_meta;
> + unsigned int ingress;
> + unsigned int queue_index;
> + unsigned int egress;
> +};
> +
> +void
> +cpy_1(struct context *ctx)
> +{
> +  void *data = (void *)(long)ctx->data;
> +  char *dest;
> +  dest = data;
> +  dest += 16;
> +
> +  __builtin_memcpy (dest, data, 8);
> +}
> +
> +/* { dg-final { scan-assembler-times "call" 0 } } */
> diff --git a/gcc/testsuite/gcc.target/bpf/memmove-1.c 
> b/gcc/testsuite/gcc.target/bpf/memmove-1.c
> new file mode 100644
> index 000..3b8ba82639e
> --- /dev/null
> +++