This fixes a minor code quality issue I found while comparing GCC and LLVM. Essentially we want to do a bit of re-association to generate shNadd.uw instructions.

Combine does the right thing and finds all the necessary instructions, reassociates the operands, combines constants, etc. Where is fails is finding a good split point. The backend can trivially provide guidance on how to split via a define_split pattern.

This has survived both Ventana's internal CI system (rv64gcb) as well as my own (rv64gc, rv32gcv).

I'll wait for the external CI system to give the all-clear before pushing.



jeff

diff --git a/gcc/config/riscv/bitmanip.md b/gcc/config/riscv/bitmanip.md
index ad3ad758959..d76a72d30e0 100644
--- a/gcc/config/riscv/bitmanip.md
+++ b/gcc/config/riscv/bitmanip.md
@@ -184,6 +184,23 @@ (define_insn "*slliuw"
   [(set_attr "type" "bitmanip")
    (set_attr "mode" "DI")])
 
+;; Combine will reassociate the operands in the most useful way here.  We
+;; just have to give it guidance on where to split the result to facilitate
+;; shNadd.uw generation.
+(define_split
+  [(set (match_operand:DI 0 "register_operand")
+       (plus:DI (plus:DI (and:DI (ashift:DI (match_operand:DI 1 
"register_operand")
+                                            (match_operand:QI 2 
"imm123_operand"))
+                                 (match_operand 3 
"consecutive_bits32_operand"))
+                         (match_operand:DI 4 "register_operand"))
+                (match_operand 5 "immediate_operand")))]
+  "TARGET_64BIT && TARGET_ZBA"
+  [(set (match_dup 0)
+       (plus:DI (and:DI (ashift:DI (match_dup 1) (match_dup 2))
+                        (match_dup 3))
+                (match_dup 4)))
+   (set (match_dup 0) (plus:DI (match_dup 0) (match_dup 5)))])
+
 ;; ZBB extension.
 
 (define_expand "clzdi2"
diff --git a/gcc/testsuite/gcc.target/riscv/zba-shadduw.c 
b/gcc/testsuite/gcc.target/riscv/zba-shadduw.c
new file mode 100644
index 00000000000..5b77447e681
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/zba-shadduw.c
@@ -0,0 +1,35 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=rv64gc_zba -mabi=lp64" } */
+
+typedef struct simple_bitmap_def
+{
+  unsigned char *popcount;
+  unsigned int n_bits;
+  unsigned int size;
+  unsigned long elms[1];
+} *sbitmap;
+typedef const struct simple_bitmap_def *const_sbitmap;
+
+typedef unsigned long *sbitmap_ptr;
+typedef const unsigned long *const_sbitmap_ptr;
+static unsigned long sbitmap_elt_popcount (unsigned long);
+
+void
+sbitmap_a_or_b (sbitmap dst, const_sbitmap a, const_sbitmap b)
+{
+  unsigned int i, n = dst->size;
+  sbitmap_ptr dstp = dst->elms;
+  const_sbitmap_ptr ap = a->elms;
+  const_sbitmap_ptr bp = b->elms;
+  unsigned char has_popcount = dst->popcount != ((void *) 0);
+
+  for (i = 0; i < n; i++)
+    {
+      const unsigned long tmp = *ap++ | *bp++;
+      *dstp++ = tmp;
+    }
+}
+
+
+/* { dg-final { scan-assembler "sh3add.uw" } } */
+/* { dg-final { scan-assembler-not {\mslli.uw} } } */

Reply via email to