https://gcc.gnu.org/g:236116068151bbc72aaaf53d0f223fe06f7e3bac

commit r15-864-g236116068151bbc72aaaf53d0f223fe06f7e3bac
Author: Lyut Nersisyan <lyut.nersis...@gmail.com>
Date:   Tue May 28 09:17:50 2024 -0600

    [to-be-committed] [RISC-V] Some basic patterns for zbkb code generation
    
    And here's Lyut's basic Zbkb support.  Essentially it's four new patterns 
for
    packh, packw, pack plus a bridge pattern needed for packh.
    
    packw is a bit ugly as we need to match a sign extension in an inconvenient
    location.  We pull it out so that the extension is exposed in a convenient
    place for subsequent sign extension elimination.
    
    We need a bridge pattern to get packh.  Thankfully the bridge pattern is a
    degenerate packh where one operand is x0, so it works as-is without 
splitting
    and provides the bridge to the more general form of packh.
    
    This patch also refines the condition for the constant reassociation patch 
to
    avoid a few more cases than can be handled efficiently with other 
preexisting
    patterns and one bugfix to avoid losing bits, particularly in the xor/ior 
case.
    
    Lyut did the core work here.  I think I did some minor cleanups and the 
bridge
    pattern to work with gcc-15 and beyond.
    
    This is a prerequisite for using zbkb in constant synthesis.  It also 
stands on
    its own.  I know we've seen it trigger in spec without the constant 
synthesis
    bits.
    
    It's been through our internal CI and my tester.  I'll obviously wait for 
the
    upstream CI to finish before taking further action.
    
    gcc/
            * config/riscv/crypto.md: Add new combiner patterns to generate
            pack, packh, packw instrutions.
            * config/riscv/iterators.md (HX): New iterator for half X mode.
            * config/riscv/riscv.md (<optab>_shift_reverse<X:mode>): Tighten
            cases to avoid.  Do not lose bits for XOR/IOR.
    
    gcc/testsuite
    
            * gcc.target/riscv/pack32.c: New test.
            * gcc.target/riscv/pack64.c: New test.
            * gcc.target/riscv/packh32.c: New test.
            * gcc.target/riscv/packh64.c: New test.
            * gcc.target/riscv/packw.c: New test.
    
            Co-authored-by: Jeffrey A Law <j...@ventanamicro.com>

Diff:
---
 gcc/config/riscv/crypto.md               | 63 ++++++++++++++++++++++++++++++++
 gcc/config/riscv/iterators.md            |  3 ++
 gcc/config/riscv/riscv.md                |  9 +++--
 gcc/testsuite/gcc.target/riscv/pack32.c  | 18 +++++++++
 gcc/testsuite/gcc.target/riscv/pack64.c  | 17 +++++++++
 gcc/testsuite/gcc.target/riscv/packh32.c | 13 +++++++
 gcc/testsuite/gcc.target/riscv/packh64.c |  6 +++
 gcc/testsuite/gcc.target/riscv/packw.c   | 13 +++++++
 8 files changed, 139 insertions(+), 3 deletions(-)

diff --git a/gcc/config/riscv/crypto.md b/gcc/config/riscv/crypto.md
index dd2bc94ee88..b632312ade2 100644
--- a/gcc/config/riscv/crypto.md
+++ b/gcc/config/riscv/crypto.md
@@ -104,6 +104,19 @@
   "pack\t%0,%1,%2"
   [(set_attr "type" "crypto")])
 
+;; This is slightly more complex than the other pack patterns
+;; that fully expose the RTL as it needs to self-adjust to
+;; rv32 and rv64.  But it's not that hard.
+(define_insn "*riscv_xpack_<X:mode>_2"
+  [(set (match_operand:X 0 "register_operand" "=r")
+       (ior:X (ashift:X (match_operand:X 1 "register_operand" "r")
+                        (match_operand 2 "immediate_operand" "n"))
+              (zero_extend:X
+                (match_operand:HX 3 "register_operand" "r"))))]
+  "TARGET_ZBKB && INTVAL (operands[2]) == BITS_PER_WORD / 2"
+  "pack\t%0,%3,%1"
+  [(set_attr "type" "crypto")])
+
 (define_insn "riscv_packh_<mode>"
   [(set (match_operand:X 0 "register_operand" "=r")
         (unspec:X [(match_operand:QI 1 "register_operand" "r")
@@ -113,6 +126,29 @@
   "packh\t%0,%1,%2"
   [(set_attr "type" "crypto")])
 
+;; So this is both a useful pattern unto itself and a bridge to the
+;; general packh pattern below.
+(define_insn "*riscv_packh_<mode>_2"
+  [(set (match_operand:X 0 "register_operand" "=r")
+       (and:X (ashift:X (match_operand:X 1 "register_operand" "r")
+                        (const_int 8))
+              (const_int 65280)))]
+ "TARGET_ZBKB"
+ "packh\t%0,x0,%1"
+ [(set_attr "type" "crypto")])
+
+;; While the two operands of the IOR could be swapped, this appears
+;; to be the canonical form.  The other form doesn't seem to trigger.
+(define_insn "*riscv_packh_<mode>_3"
+  [(set (match_operand:X 0 "register_operand" "=r")
+       (ior:X (and:X (ashift:X (match_operand:X 1 "register_operand" "r")
+                               (const_int 8))
+                     (const_int 65280))
+              (zero_extend:X (match_operand:QI 2 "register_operand" "r"))))]
+ "TARGET_ZBKB"
+ "packh\t%0,%2,%1"
+ [(set_attr "type" "crypto")])
+
 (define_insn "riscv_packw"
   [(set (match_operand:DI 0 "register_operand" "=r")
         (unspec:DI [(match_operand:HI 1 "register_operand" "r")
@@ -122,6 +158,33 @@
   "packw\t%0,%1,%2"
   [(set_attr "type" "crypto")])
 
+;; Implemented as a splitter for initial recognition.  It generates
+;; new RTL with the extension moved to the outer position.  This
+;; allows later code to eliminate subsequent explicit sign extensions.
+(define_split
+  [(set (match_operand:DI 0 "register_operand")
+       (ior:DI (ashift:DI
+                 (sign_extend:DI (match_operand:HI 1 "register_operand"))
+                 (const_int 16))
+               (zero_extend:DI (match_operand:HI 2 "register_operand"))))]
+  "TARGET_ZBKB && TARGET_64BIT"
+  [(set (match_dup 0)
+       (sign_extend:DI (ior:SI (ashift:SI (match_dup 1) (const_int 16))
+                               (zero_extend:SI (match_dup 2)))))]
+  "operands[1] = gen_lowpart (SImode, operands[1]);")
+
+;; And this patches the result of the splitter above.
+(define_insn "*riscv_packw_2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+       (sign_extend:DI
+         (ior:SI
+           (ashift:SI (match_operand:SI 1 "register_operand" "r")
+                      (const_int 16))
+           (zero_extend:SI (match_operand:HI 2 "register_operand" "r")))))]
+  "TARGET_ZBKB && TARGET_64BIT"
+  "packw\t%0,%2,%1"
+  [(set_attr "type" "crypto")])
+
 ;; ZBKX extension
 
 (define_insn "riscv_xperm4_<mode>"
diff --git a/gcc/config/riscv/iterators.md b/gcc/config/riscv/iterators.md
index 8a9d1986b4a..3c139bc2e30 100644
--- a/gcc/config/riscv/iterators.md
+++ b/gcc/config/riscv/iterators.md
@@ -37,6 +37,9 @@
 ;; Likewise, but for XLEN-sized quantities.
 (define_mode_iterator X [(SI "!TARGET_64BIT") (DI "TARGET_64BIT")])
 
+;; Likewise, but for XLEN/2 -sized quantities.
+(define_mode_iterator HX [(HI "!TARGET_64BIT") (SI "TARGET_64BIT")])
+
 ;; Branches operate on XLEN-sized quantities, but for RV64 we accept
 ;; QImode values so we can force zero-extension.
 (define_mode_iterator BR [(QI "TARGET_64BIT") SI (DI "TARGET_64BIT")])
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index fe74b8dcd3b..25d341ec987 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -2844,9 +2844,12 @@
                             (match_operand 2 "immediate_operand" "n"))
                   (match_operand 3 "immediate_operand" "n")))]
   "(!SMALL_OPERAND (INTVAL (operands[3]))
-   && SMALL_OPERAND (INTVAL (operands[3]) >> INTVAL (operands[2]))
-   && (popcount_hwi (INTVAL (operands[3]))
-       <= popcount_hwi (INTVAL (operands[3]) >> INTVAL (operands[2]))))"
+    && SMALL_OPERAND (INTVAL (operands[3]) >> INTVAL (operands[2]))
+    && popcount_hwi (INTVAL (operands[3])) > 1
+    && (!TARGET_64BIT
+       || (exact_log2 ((INTVAL (operands[3]) >> INTVAL (operands[2])) + 1)
+            == -1))
+    && (INTVAL (operands[3]) & ((1ULL << INTVAL (operands[2])) - 1)) == 0)"
   "#"
   "&& 1"
   [(set (match_dup 0) (any_bitwise:X (match_dup 1) (match_dup 3)))
diff --git a/gcc/testsuite/gcc.target/riscv/pack32.c 
b/gcc/testsuite/gcc.target/riscv/pack32.c
new file mode 100644
index 00000000000..24304d6b614
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/pack32.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=rv32gc_zbkb -mabi=ilp32" } */
+/* { dg-skip-if "" { *-*-* } { "-g" "-flto" "-O0" } } */
+
+#include <stdint-gcc.h>
+
+uint32_t foo1(uint32_t rs1, uint32_t rs2)
+{
+    return (rs1 << 16) | ((rs2 << 16) >> 16);
+}
+
+uint32_t foo2(uint32_t rs1, uint32_t rs2)
+{
+    return (rs1 << 16) | (rs2 & 65535);
+}
+
+/* { dg-final { scan-assembler-times "\\spack\\s" 2 } } */
+
diff --git a/gcc/testsuite/gcc.target/riscv/pack64.c 
b/gcc/testsuite/gcc.target/riscv/pack64.c
new file mode 100644
index 00000000000..7f54baabb2c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/pack64.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=rv64gc_zbkb -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-g" "-flto" "-O0" } } */
+#include <stdint-gcc.h>
+
+uint64_t foo1(uint64_t rs1, uint64_t rs2)
+{
+    return (rs1 << 32) | ((rs2 << 32) >> 32);
+}
+
+uint64_t foo2(uint64_t rs1, uint64_t rs2)
+{
+    return (rs1 << 32) | (rs2 & 4294967295);
+}
+
+/* { dg-final { scan-assembler-times "\\spack\\s" 2 } } */
+
diff --git a/gcc/testsuite/gcc.target/riscv/packh32.c 
b/gcc/testsuite/gcc.target/riscv/packh32.c
new file mode 100644
index 00000000000..803224189d5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/packh32.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=rv32gc_zbkb -mabi=ilp32" } */
+/* { dg-skip-if "" { *-*-* } { "-g" "-flto" "-O0" } } */
+
+#include <stdint-gcc.h>
+
+int32_t foo1(int32_t rs1, int32_t rs2)
+{
+    return (rs1 & 255) | ((rs2 & 255) << 8);
+}
+
+/* { dg-final { scan-assembler-times "\\spackh\\s" 1 } } */
+
diff --git a/gcc/testsuite/gcc.target/riscv/packh64.c 
b/gcc/testsuite/gcc.target/riscv/packh64.c
new file mode 100644
index 00000000000..b91d4014c9a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/packh64.c
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=rv64gc_zbkb -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-g" "-flto" "-O0" } } */
+#include "packh32.c"
+/* { dg-final { scan-assembler-times "\\spackh\\s" 1 } } */
+
diff --git a/gcc/testsuite/gcc.target/riscv/packw.c 
b/gcc/testsuite/gcc.target/riscv/packw.c
new file mode 100644
index 00000000000..c178738a43b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/packw.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=rv64gc_zbkb -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-g" "-flto" "-O0" } } */
+#include <stdint-gcc.h>
+
+uint32_t foo1(uint32_t rs1, uint32_t rs2)
+{
+    return (rs1 << 16) | ((rs2 << 16) >> 16);
+}
+
+/* { dg-final { scan-assembler-times "\\spackw\\s" 1 } } */
+/* { dg-final { scan-assembler-not "\\ssext\\s" } } */
+

Reply via email to