https://gcc.gnu.org/g:3a915d6ad5fc3e0fadd14e54515b48b1d655c5a4

commit r15-838-g3a915d6ad5fc3e0fadd14e54515b48b1d655c5a4
Author: Jeff Law <j...@ventanamicro.com>
Date:   Sun May 26 17:54:51 2024 -0600

    [to-be-committed] [RISC-V] Try inverting for constant synthesis
    
    So there's another class of constants we're failing to synthesize well.
    Specifically those where we can invert our original constant C into C' and 
C'
    takes at least 2 fewer instructions to synthesize than C.  In that case we 
can
    initially generate C', then use xori with the constant -1 to flip all the 
bits
    resulting in our target constant.
    
    I've only seen this trigger when the final synthesis is li+srli+xori. The
    original synthesis took on various 4 or 5 instruction forms.
    
    Most of the methods we use to improve constant synthesis are in
    riscv_build_integer_1.  I originally tried to put this code in there. But
    that'll end up with infinite recursion due to some other ADDI related code
    which wants to flip bits and try synthesis.
    
    So this was put into riscv_build_integer and recurses into 
riscv_build_integer.
    This isn't unprecedented, just a bit different than most of the other 
synthesis
    implementation bits.
    
    This doesn't depend on any extensions.  So it should help any rv64 system.
    
    gcc/
    
            * config/riscv/riscv.cc (riscv_build_integer_one): Verify there
            are no bits left to set in the constant when generating bseti.
            (riscv_built_integer): Synthesize ~value and if it's cheap use it
            with a trailing xori with -1.
    
    gcc/testsuite
    
            * gcc.target/riscv/synthesis-8.c: New test.

Diff:
---
 gcc/config/riscv/riscv.cc                    | 27 +++++++++++++++++++++-
 gcc/testsuite/gcc.target/riscv/synthesis-8.c | 34 ++++++++++++++++++++++++++++
 2 files changed, 60 insertions(+), 1 deletion(-)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 0ebdd696a95..401ddc9eeb7 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -1022,7 +1022,7 @@ riscv_build_integer_1 (struct riscv_integer_op 
codes[RISCV_MAX_INTEGER_OPS],
 
       /* If LUI+ADDI+BSETI resulted in a more efficient
         sequence, then use it.  */
-      if (i < cost)
+      if (value == 0 && i < cost)
        {
          memcpy (codes, alt_codes, sizeof (alt_codes));
          cost = i;
@@ -1074,6 +1074,31 @@ riscv_build_integer (struct riscv_integer_op *codes, 
HOST_WIDE_INT value,
        }
     }
 
+  /* See if we can generate the inverted constant, then use
+     not to get the desired constant.
+
+     This can't be in riscv_build_integer_1 as it'll mutually
+     recurse with another case in there.  And it has to recurse
+     into riscv_build_integer so we get the trailing 0s case
+     above.  */
+  if (cost > 2 && value < 0)
+    {
+      struct riscv_integer_op alt_codes[RISCV_MAX_INTEGER_OPS];
+      int alt_cost;
+
+      HOST_WIDE_INT nval = ~value;
+      alt_cost = 1 + riscv_build_integer (alt_codes, nval, mode);
+      if (alt_cost < cost)
+       {
+         alt_codes[alt_cost - 1].code = XOR;
+         alt_codes[alt_cost - 1].value = -1;
+         alt_codes[alt_cost - 1].use_uw = false;
+         memcpy (codes, alt_codes, sizeof (alt_codes));
+         cost = alt_cost;
+       }
+    }
+
+
   if (!TARGET_64BIT
       && (value > INT32_MAX || value < INT32_MIN))
     {
diff --git a/gcc/testsuite/gcc.target/riscv/synthesis-8.c 
b/gcc/testsuite/gcc.target/riscv/synthesis-8.c
new file mode 100644
index 00000000000..2bcdb4e774d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/synthesis-8.c
@@ -0,0 +1,34 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target rv64 } */
+/* We aggressively skip as we really just need to test the basic synthesis
+   which shouldn't vary based on the optimization level.  -O1 seems to work
+   and eliminates the usual sources of extraneous dead code that would throw
+   off the counts.  */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" "-O2" "-O3" "-Os" "-Oz" "-flto" } } 
*/
+/* { dg-options "-march=rv64gc_zba_zbb_zbs" } */
+
+/* Rather than test for a specific synthesis of all these constants or
+   having thousands of tests each testing one variant, we just test the
+   total number of instructions.
+
+   This isn't expected to change much and any change is worthy of a look.  */
+/* { dg-final { scan-assembler-times 
"\\t(add|addi|bseti|li|ret|sh1add|sh2add|sh3add|slli|srli|xori)" 72 } } */
+
+unsigned long foo_0xc0000000000077ff(void) { return 0xc0000000000077ffUL; }
+unsigned long foo_0xc00000000000b7ff(void) { return 0xc00000000000b7ffUL; }
+unsigned long foo_0xc0000000000137ff(void) { return 0xc0000000000137ffUL; }
+unsigned long foo_0xc0000000000237ff(void) { return 0xc0000000000237ffUL; }
+unsigned long foo_0xc0000000000437ff(void) { return 0xc0000000000437ffUL; }
+unsigned long foo_0xc0000000000837ff(void) { return 0xc0000000000837ffUL; }
+unsigned long foo_0xc0000000001037ff(void) { return 0xc0000000001037ffUL; }
+unsigned long foo_0xc0000000002037ff(void) { return 0xc0000000002037ffUL; }
+unsigned long foo_0xc0000000004037ff(void) { return 0xc0000000004037ffUL; }
+unsigned long foo_0xc0000000008037ff(void) { return 0xc0000000008037ffUL; }
+unsigned long foo_0xc0000000010037ff(void) { return 0xc0000000010037ffUL; }
+unsigned long foo_0xc0000000020037ff(void) { return 0xc0000000020037ffUL; }
+unsigned long foo_0xc0000000040037ff(void) { return 0xc0000000040037ffUL; }
+unsigned long foo_0xc0000000080037ff(void) { return 0xc0000000080037ffUL; }
+unsigned long foo_0xc0000000100037ff(void) { return 0xc0000000100037ffUL; }
+unsigned long foo_0xe0000000000037ff(void) { return 0xe0000000000037ffUL; }
+unsigned long foo_0xc00000000000d7ff(void) { return 0xc00000000000d7ffUL; }
+unsigned long foo_0xc0000000000157ff(void) { return 0xc0000000000157ffUL; }

Reply via email to