https://gcc.gnu.org/g:af4bf422a699de0e7af5a26e02997d313e7301a6

commit r15-953-gaf4bf422a699de0e7af5a26e02997d313e7301a6
Author: Robin Dapp <rd...@ventanamicro.com>
Date:   Mon May 13 22:09:35 2024 +0200

    RISC-V: Add vwsll combine helpers.
    
    This patch enables the usage of vwsll in autovec context by adding the
    necessary combine patterns and tests.
    
    gcc/ChangeLog:
    
            * config/riscv/autovec-opt.md (*vwsll_zext1_<mode>): New
            pattern.
            (*vwsll_zext2_<mode>): Ditto.
            (*vwsll_zext1_scalar_<mode>): Ditto.
            (*vwsll_zext1_trunc_<mode>): Ditto.
            (*vwsll_zext2_trunc_<mode>): Ditto.
            (*vwsll_zext1_trunc_scalar_<mode>): Ditto.
            * config/riscv/vector-crypto.md: Make pattern similar to other
            narrowing/widening patterns.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.target/riscv/rvv/autovec/binop/vwsll-1.c: New test.
            * gcc.target/riscv/rvv/autovec/binop/vwsll-run.c: New test.
            * gcc.target/riscv/rvv/autovec/binop/vwsll-template.h: New test.

Diff:
---
 gcc/config/riscv/autovec-opt.md                    | 126 ++++++++++++++++++++-
 gcc/config/riscv/vector-crypto.md                  |   2 +-
 .../gcc.target/riscv/rvv/autovec/binop/vwsll-1.c   |  10 ++
 .../gcc.target/riscv/rvv/autovec/binop/vwsll-run.c |  67 +++++++++++
 .../riscv/rvv/autovec/binop/vwsll-template.h       |  49 ++++++++
 5 files changed, 251 insertions(+), 3 deletions(-)

diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md
index 04f85d8e455..bc6af042bcf 100644
--- a/gcc/config/riscv/autovec-opt.md
+++ b/gcc/config/riscv/autovec-opt.md
@@ -1467,5 +1467,127 @@
        operands, operands[4]);
     DONE;
   }
-  [(set_attr "type" "vector")]
-)
+  [(set_attr "type" "vector")])
+
+;; vzext.vf2 + vsll = vwsll.
+(define_insn_and_split "*vwsll_zext1_<mode>"
+  [(set (match_operand:VWEXTI 0                    "register_operand"     "=vr 
")
+      (ashift:VWEXTI
+       (zero_extend:VWEXTI
+         (match_operand:<V_DOUBLE_TRUNC> 1 "register_operand"     " vr "))
+         (match_operand:<V_DOUBLE_TRUNC> 2 "vector_shift_operand" "vrvk")))]
+  "TARGET_ZVBB && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+  {
+    insn_code icode = code_for_pred_vwsll (<MODE>mode);
+    riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, operands);
+    DONE;
+  }
+  [(set_attr "type" "vwsll")])
+
+(define_insn_and_split "*vwsll_zext2_<mode>"
+  [(set (match_operand:VWEXTI 0                    "register_operand"     "=vr 
")
+      (ashift:VWEXTI
+       (zero_extend:VWEXTI
+         (match_operand:<V_DOUBLE_TRUNC> 1 "register_operand"     " vr "))
+       (zero_extend:VWEXTI
+         (match_operand:<V_DOUBLE_TRUNC> 2 "vector_shift_operand" "vrvk"))))]
+  "TARGET_ZVBB && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+  {
+    insn_code icode = code_for_pred_vwsll (<MODE>mode);
+    riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, operands);
+    DONE;
+  }
+  [(set_attr "type" "vwsll")])
+
+
+(define_insn_and_split "*vwsll_zext1_scalar_<mode>"
+  [(set (match_operand:VWEXTI 0                    "register_operand"          
  "=vr")
+      (ashift:VWEXTI
+       (zero_extend:VWEXTI
+         (match_operand:<V_DOUBLE_TRUNC> 1 "register_operand"            " 
vr"))
+         (match_operand:<VEL>            2 "vector_scalar_shift_operand" " 
rK")))]
+  "TARGET_ZVBB && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+  {
+    if (GET_CODE (operands[2]) == SUBREG)
+      operands[2] = SUBREG_REG (operands[2]);
+    insn_code icode = code_for_pred_vwsll_scalar (<MODE>mode);
+    riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, operands);
+    DONE;
+  }
+  [(set_attr "type" "vwsll")])
+
+;; For
+;;   uint16_t dst;
+;;   uint8_t a, b;
+;;   dst = vwsll (a, b)
+;; we seem to create
+;;   aa = (int) a;
+;;   bb = (int) b;
+;;   dst = (short) vwsll (aa, bb);
+;; The following patterns help to combine this idiom into one vwsll.
+
+(define_insn_and_split "*vwsll_zext1_trunc_<mode>"
+  [(set (match_operand:<V_DOUBLE_TRUNC> 0   "register_operand"    "=vr ")
+    (truncate:<V_DOUBLE_TRUNC>
+      (ashift:VQEXTI
+       (zero_extend:VQEXTI
+         (match_operand:<V_QUAD_TRUNC> 1   "register_operand"     " vr "))
+       (match_operand:VQEXTI           2   "vector_shift_operand" "vrvk"))))]
+  "TARGET_ZVBB && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+  {
+    insn_code icode = code_for_pred_vwsll (<V_DOUBLE_TRUNC>mode);
+    riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, operands);
+    DONE;
+  }
+  [(set_attr "type" "vwsll")])
+
+(define_insn_and_split "*vwsll_zext2_trunc_<mode>"
+  [(set (match_operand:<V_DOUBLE_TRUNC> 0   "register_operand"    "=vr ")
+    (truncate:<V_DOUBLE_TRUNC>
+      (ashift:VQEXTI
+       (zero_extend:VQEXTI
+         (match_operand:<V_QUAD_TRUNC> 1   "register_operand"     " vr "))
+       (zero_extend:VQEXTI
+         (match_operand:<V_QUAD_TRUNC> 2   "vector_shift_operand" "vrvk")))))]
+  "TARGET_ZVBB && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+  {
+    insn_code icode = code_for_pred_vwsll (<V_DOUBLE_TRUNC>mode);
+    riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, operands);
+    DONE;
+  }
+  [(set_attr "type" "vwsll")])
+
+(define_insn_and_split "*vwsll_zext1_trunc_scalar_<mode>"
+  [(set (match_operand:<V_DOUBLE_TRUNC> 0   "register_operand"           "=vr 
")
+    (truncate:<V_DOUBLE_TRUNC>
+      (ashift:VQEXTI
+       (zero_extend:VQEXTI
+         (match_operand:<V_QUAD_TRUNC> 1   "register_operand"            " vr 
"))
+         (match_operand:<VEL>          2   "vector_scalar_shift_operand" " 
rK"))))]
+  "TARGET_ZVBB && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+  {
+    if (GET_CODE (operands[2]) == SUBREG)
+      operands[2] = SUBREG_REG (operands[2]);
+    insn_code icode = code_for_pred_vwsll_scalar (<V_DOUBLE_TRUNC>mode);
+    riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, operands);
+    DONE;
+  }
+  [(set_attr "type" "vwsll")])
diff --git a/gcc/config/riscv/vector-crypto.md 
b/gcc/config/riscv/vector-crypto.md
index e474ddf5da7..24822e2712c 100755
--- a/gcc/config/riscv/vector-crypto.md
+++ b/gcc/config/riscv/vector-crypto.md
@@ -298,7 +298,7 @@
          (match_operand:<V_DOUBLE_TRUNC> 4 "register_operand"  "vr"))
        (match_operand:VWEXTI 2 "vector_merge_operand" "0vu")))]
   "TARGET_ZVBB"
-  "vwsll.vv\t%0,%3,%4%p1"
+  "vwsll.v%o4\t%0,%3,%4%p1"
   [(set_attr "type" "vwsll")
    (set_attr "mode" "<V_DOUBLE_TRUNC>")])
 
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vwsll-1.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vwsll-1.c
new file mode 100644
index 00000000000..a2e5b4f5aa1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vwsll-1.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-add-options "riscv_v" } */
+/* { dg-add-options "riscv_zvbb" } */
+/* { dg-additional-options "-std=c99 -fno-vect-cost-model" } */
+
+#include "vwsll-template.h"
+
+/* { dg-final { scan-assembler-times {\tvwsll\.vv} 3 } } */
+/* { dg-final { scan-assembler-times {\tvwsll\.vx} 3 } } */
+/* { dg-final { scan-assembler-times {\tvwsll\.vi} 3 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vwsll-run.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vwsll-run.c
new file mode 100644
index 00000000000..ddb84618b50
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vwsll-run.c
@@ -0,0 +1,67 @@
+/* { dg-do run } */
+/* { dg-require-effective-target "riscv_zvbb_ok" } */
+/* { dg-add-options "riscv_v" } */
+/* { dg-add-options "riscv_zvbb" } */
+/* { dg-additional-options "-std=c99 -fno-vect-cost-model" } */
+
+#include "vwsll-template.h"
+
+#include <assert.h>
+
+#define SZ 512
+
+#define RUN(TYPE1, TYPE2, VAL)                                                 
\
+  TYPE1 dst##TYPE1[SZ];                                                        
\
+  TYPE2 a##TYPE2[SZ];                                                          
\
+  TYPE2 b##TYPE2[SZ];                                                          
\
+  for (int i = 0; i < SZ; i++)                                                 
\
+    {                                                                          
\
+      dst##TYPE1[i] = 0;                                                       
\
+      a##TYPE2[i] = VAL;                                                       
\
+      b##TYPE2[i] = i % 4;                                                     
\
+    }                                                                          
\
+  vwsll_vv##TYPE1 (dst##TYPE1, a##TYPE2, b##TYPE2, SZ);                        
\
+  for (int i = 0; i < SZ; i++)                                                 
\
+    assert (dst##TYPE1[i] == (VAL << (i % 4)));
+
+#define RUN2(TYPE1, TYPE2, VAL)                                                
\
+  TYPE1 dst2##TYPE1[SZ];                                                       
\
+  TYPE2 a2##TYPE2[SZ];                                                         
\
+  for (int i = 0; i < SZ; i++)                                                 
\
+    {                                                                          
\
+      dst2##TYPE1[i] = 0;                                                      
\
+      a2##TYPE2[i] = VAL;                                                      
\
+    }                                                                          
\
+  TYPE2 b2##TYPE2 = 7;                                                         
\
+  vwsll_vx##TYPE1 (dst2##TYPE1, a2##TYPE2, b2##TYPE2, SZ);                     
\
+  for (int i = 0; i < SZ; i++)                                                 
\
+    assert (dst2##TYPE1[i] == (VAL << b2##TYPE2));
+
+#define RUN3(TYPE1, TYPE2, VAL)                                                
\
+  TYPE1 dst3##TYPE1[SZ];                                                       
\
+  TYPE2 a3##TYPE2[SZ];                                                         
\
+  for (int i = 0; i < SZ; i++)                                                 
\
+    {                                                                          
\
+      dst3##TYPE1[i] = 0;                                                      
\
+      a3##TYPE2[i] = VAL;                                                      
\
+    }                                                                          
\
+  vwsll_vi##TYPE1 (dst3##TYPE1, a3##TYPE2, SZ);                                
\
+  for (int i = 0; i < SZ; i++)                                                 
\
+    assert (dst3##TYPE1[i] == (VAL << 6));
+
+#define RUN_ALL()                                                              
\
+  RUN (uint16_t, uint8_t, 2)                                                   
\
+  RUN (uint32_t, uint16_t, 2)                                                  
\
+  RUN (uint64_t, uint32_t, 4)                                                  
\
+  RUN2 (uint16_t, uint8_t, 8)                                                  
\
+  RUN2 (uint32_t, uint16_t, 8)                                                 
\
+  RUN2 (uint64_t, uint32_t, 10)                                                
\
+  RUN3 (uint16_t, uint8_t, 255)                                                
\
+  RUN3 (uint32_t, uint16_t, 34853)                                           \
+  RUN3 (uint64_t, uint32_t, 1794394)
+
+int
+main ()
+{
+  RUN_ALL ()
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vwsll-template.h 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vwsll-template.h
new file mode 100644
index 00000000000..376cbaee0d5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vwsll-template.h
@@ -0,0 +1,49 @@
+/* { dg-do compile } */
+/* { dg-add-options "riscv_v" } */
+/* { dg-add-options "riscv_zvbb" } */
+/* { dg-additional-options "-std=c99 -fno-vect-cost-model" } */
+
+#include <stdint-gcc.h>
+
+#define TEST1_TYPE(TYPE1, TYPE2)                                               
\
+  __attribute__ ((noipa)) void vwsll_vv##TYPE1 (TYPE1 *restrict dst,           
\
+                                               TYPE2 *restrict a,             \
+                                               TYPE2 *restrict b, int n)      \
+  {                                                                            
\
+    for (int i = 0; i < n; i++)                                                
\
+      dst[i] = (TYPE1) a[i] << b[i];                                           
\
+  }
+
+#define TEST2_TYPE(TYPE1, TYPE2)                                               
\
+  __attribute__ ((noipa)) void vwsll_vx##TYPE1 (TYPE1 *restrict dst,           
\
+                                               TYPE2 *restrict a, TYPE2 b,    \
+                                               int n)                         \
+  {                                                                            
\
+    for (int i = 0; i < n; i++)                                                
\
+      dst[i] = (TYPE1) a[i] << b;                                              
\
+  }
+
+#define TEST3_TYPE(TYPE1, TYPE2)                                               
\
+  __attribute__ ((noipa)) void vwsll_vi##TYPE1 (TYPE1 *restrict dst,           
\
+                                               TYPE2 *restrict a, int n)      \
+  {                                                                            
\
+    for (int i = 0; i < n; i++)                                                
\
+      dst[i] = (TYPE1) a[i] << 6;                                             \
+  }
+
+#define TEST_ALL()                                                             
\
+  TEST1_TYPE (uint16_t, uint8_t)                                               
\
+  TEST1_TYPE (uint32_t, uint16_t)                                              
\
+  TEST1_TYPE (uint64_t, uint32_t)                                              
\
+  TEST2_TYPE (uint16_t, uint8_t)                                               
\
+  TEST2_TYPE (uint32_t, uint16_t)                                              
\
+  TEST2_TYPE (uint64_t, uint32_t)                                              
\
+  TEST3_TYPE (uint16_t, uint8_t)                                               
\
+  TEST3_TYPE (uint32_t, uint16_t)                                              
\
+  TEST3_TYPE (uint64_t, uint32_t)
+
+TEST_ALL()
+
+/* { dg-final { scan-assembler-times {\tvwsll\.vv} 3 } } */
+/* { dg-final { scan-assembler-times {\tvwsll\.vx} 3 } } */
+/* { dg-final { scan-assembler-times {\tvwsll\.vi} 3 } } */

Reply via email to