https://gcc.gnu.org/g:54ba8d44bbd703bca6984700b4d6f978890097e2

commit r15-490-g54ba8d44bbd703bca6984700b4d6f978890097e2
Author: Christoph Müllner <christoph.muell...@vrull.eu>
Date:   Tue May 14 09:21:17 2024 -0600

    [PATCH 3/3] RISC-V: Add memset-zero expansion to cbo.zero
    
    The Zicboz extension offers the cbo.zero instruction, which can be used
    to clean a memory region corresponding to a cache block.
    The Zic64b extension defines the cache block size to 64 byte.
    If both extensions are available, it is possible to use cbo.zero
    to clear memory, if the alignment and size constraints are met.
    This patch implements this.
    
    gcc/ChangeLog:
    
            * config/riscv/riscv-protos.h (riscv_expand_block_clear): New 
prototype.
            * config/riscv/riscv-string.cc 
(riscv_expand_block_clear_zicboz_zic64b):
            New function to expand a block-clear with cbo.zero.
            (riscv_expand_block_clear): New RISC-V block-clear expansion 
function.
            * config/riscv/riscv.md (setmem<mode>): New setmem expansion.

Diff:
---
 gcc/config/riscv/riscv-protos.h                    |  1 +
 gcc/config/riscv/riscv-string.cc                   | 59 ++++++++++++++++++++++
 gcc/config/riscv/riscv.md                          | 24 +++++++++
 .../gcc.target/riscv/cmo-zicboz-zic64-1.c          | 43 ++++++++++++++++
 4 files changed, 127 insertions(+)

diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index e5aebf3fc3d5..255fd6a0de97 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -189,6 +189,7 @@ rtl_opt_pass * make_pass_vsetvl (gcc::context *ctxt);
 
 /* Routines implemented in riscv-string.c.  */
 extern bool riscv_expand_block_move (rtx, rtx, rtx);
+extern bool riscv_expand_block_clear (rtx, rtx);
 
 /* Information about one CPU we know about.  */
 struct riscv_cpu_info {
diff --git a/gcc/config/riscv/riscv-string.cc b/gcc/config/riscv/riscv-string.cc
index 41cb061c746d..87f5fdee3c14 100644
--- a/gcc/config/riscv/riscv-string.cc
+++ b/gcc/config/riscv/riscv-string.cc
@@ -794,6 +794,65 @@ riscv_expand_block_move (rtx dest, rtx src, rtx length)
   return false;
 }
 
+/* Expand a block-clear instruction via cbo.zero instructions.  */
+
+static bool
+riscv_expand_block_clear_zicboz_zic64b (rtx dest, rtx length)
+{
+  unsigned HOST_WIDE_INT hwi_length;
+  unsigned HOST_WIDE_INT align;
+  const unsigned HOST_WIDE_INT cbo_bytes = 64;
+
+  gcc_assert (TARGET_ZICBOZ && TARGET_ZIC64B);
+
+  if (!CONST_INT_P (length))
+    return false;
+
+  hwi_length = UINTVAL (length);
+  if (hwi_length < cbo_bytes)
+    return false;
+
+  align = MEM_ALIGN (dest) / BITS_PER_UNIT;
+  if (align < cbo_bytes)
+    return false;
+
+  /* We don't emit loops.  Instead apply move-bytes limitation.  */
+  unsigned HOST_WIDE_INT max_bytes = RISCV_MAX_MOVE_BYTES_STRAIGHT /
+         UNITS_PER_WORD * cbo_bytes;
+  if (hwi_length > max_bytes)
+    return false;
+
+  unsigned HOST_WIDE_INT offset = 0;
+  while (offset + cbo_bytes <= hwi_length)
+    {
+      rtx mem = adjust_address (dest, BLKmode, offset);
+      rtx addr = force_reg (Pmode, XEXP (mem, 0));
+      emit_insn (gen_riscv_zero_di (addr));
+      offset += cbo_bytes;
+    }
+
+  if (offset < hwi_length)
+    {
+      rtx mem = adjust_address (dest, BLKmode, offset);
+      clear_by_pieces (mem, hwi_length - offset, align);
+    }
+
+  return true;
+}
+
+bool
+riscv_expand_block_clear (rtx dest, rtx length)
+{
+  /* Only use setmem-zero expansion for Zicboz + Zic64b.  */
+  if (!TARGET_ZICBOZ || !TARGET_ZIC64B)
+    return false;
+
+  if (optimize_function_for_size_p (cfun))
+    return false;
+
+  return riscv_expand_block_clear_zicboz_zic64b (dest, length);
+}
+
 /* --- Vector expanders --- */
 
 namespace riscv_vector {
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 4d6de9925572..c45b1129b0a0 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -2608,6 +2608,30 @@
     FAIL;
 })
 
+;; Fill memory with constant byte.
+;; Argument 0 is the destination
+;; Argument 1 is the constant byte
+;; Argument 2 is the length
+;; Argument 3 is the alignment
+
+(define_expand "setmem<mode>"
+  [(parallel [(set (match_operand:BLK 0 "memory_operand")
+                  (match_operand:QI 2 "const_int_operand"))
+             (use (match_operand:P 1 ""))
+             (use (match_operand:SI 3 "const_int_operand"))])]
+ ""
+ {
+  /* If value to set is not zero, use the library routine.  */
+  if (operands[2] != const0_rtx)
+    FAIL;
+
+  if (riscv_expand_block_clear (operands[0], operands[1]))
+    DONE;
+  else
+    FAIL;
+})
+
+
 ;; Expand in-line code to clear the instruction cache between operand[0] and
 ;; operand[1].
 (define_expand "clear_cache"
diff --git a/gcc/testsuite/gcc.target/riscv/cmo-zicboz-zic64-1.c 
b/gcc/testsuite/gcc.target/riscv/cmo-zicboz-zic64-1.c
new file mode 100644
index 000000000000..c2d79eb7ae68
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/cmo-zicboz-zic64-1.c
@@ -0,0 +1,43 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc_zic64b_zicboz" { target { rv64 } } } */
+/* { dg-options "-march=rv32gc_zic64b_zicboz" { target { rv32 } } } */
+/* { dg-skip-if "" { *-*-* } {"-O0" "-Os" "-Og" "-Oz" "-flto" } } */
+/* { dg-final { check-function-bodies "**" "" } } */
+/* { dg-allow-blank-lines-in-output 1 } */
+
+/*
+**clear_buf_123:
+**    ...
+**    cbo\.zero\t0\(a[0-9]+\)
+**    sd\tzero,64\(a[0-9]+\)
+**    sd\tzero,72\(a[0-9]+\)
+**    sd\tzero,80\(a[0-9]+\)
+**    sd\tzero,88\(a[0-9]+\)
+**    sd\tzero,96\(a[0-9]+\)
+**    sd\tzero,104\(a[0-9]+\)
+**    sd\tzero,112\(a[0-9]+\)
+**    sh\tzero,120\(a[0-9]+\)
+**    sb\tzero,122\(a[0-9]+\)
+**    ...
+*/
+int
+clear_buf_123 (void *p)
+{
+  p = __builtin_assume_aligned(p, 64);
+  __builtin_memset (p, 0, 123);
+}
+
+/*
+**clear_buf_128:
+**    ...
+**    cbo\.zero\t0\(a[0-9]+\)
+**    addi\ta[0-9]+,a[0-9]+,64
+**    cbo\.zero\t0\(a[0-9]+\)
+**    ...
+*/
+int
+clear_buf_128 (void *p)
+{
+  p = __builtin_assume_aligned(p, 64);
+  __builtin_memset (p, 0, 128);
+}

Reply via email to