[PATCH 4/7] arm: Auto-vectorization for MVE: vshl

Christophe Lyon via Gcc-patches Wed, 25 Nov 2020 05:54:56 -0800

This patch enables MVE vshlq instructions for auto-vectorization.  A
new MVE pattern is introduced that takes a vector of constants as
second operand, all constants being equal.


The existing mve_vshlq_n_<supf><mode> is kept, as it takes a single
immediate as second operand, and is used by arm_mve.h.

The vashl<mode>3 expander is added to vec-common.md.

2020-11-12  Christophe Lyon  <christophe.l...@linaro.org>

        gcc/
        * config/arm/mve.md (mve_vshlq_n_<mode>_imm): New entry.
        * config/arm/neon.md (vashl<mode>3): Rename into vashl<mode>3_neon.
        * config/arm/vec-common.md (vasl<mode>3): New expander.

        gcc/testsuite/
        * gcc.target/arm/simd/mve-vshl.c: Add tests for vshl.
---
 gcc/config/arm/mve.md                        | 19 +++++++++++++++
 gcc/config/arm/neon.md                       |  2 +-
 gcc/config/arm/vec-common.md                 |  7 ++++++
 gcc/testsuite/gcc.target/arm/simd/mve-vshl.c | 35 ++++++++++++++++++++++++++++
 4 files changed, 62 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/arm/simd/mve-vshl.c

diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
index a5f5d75..ce82258 100644
--- a/gcc/config/arm/mve.md
+++ b/gcc/config/arm/mve.md
@@ -1924,6 +1924,7 @@ (define_insn "mve_vrshrq_n_<supf><mode>"
 ;;
 ;; [vshlq_n_u, vshlq_n_s])
 ;;
+;; Version that takes an immediate as operand 2.
 (define_insn "mve_vshlq_n_<supf><mode>"
   [
    (set (match_operand:MVE_2 0 "s_register_operand" "=w")
@@ -1936,6 +1937,24 @@ (define_insn "mve_vshlq_n_<supf><mode>"
   [(set_attr "type" "mve_move")
 ])
 
+;; Version with a vector of immediates as operand 2.
+;; We only emit signed ('s') versions, since it makes no difference.
+(define_insn "mve_vshlq_n_<mode>_imm"
+  [
+   (set (match_operand:MVE_2 0 "s_register_operand" "=w")
+       (ashift:MVE_2 (match_operand:MVE_2 1 "s_register_operand" "w")
+                     (match_operand:MVE_2 2 "imm_for_neon_lshift_operand" 
"i")))
+  ]
+  "TARGET_HAVE_MVE"
+{
+  return neon_output_shift_immediate ("vshl", 's', &operands[2],
+                                    <MODE>mode,
+                                    VALID_NEON_QREG_MODE (<MODE>mode),
+                                    true);
+}
+  [(set_attr "type" "mve_move")
+])
+
 ;;
 ;; [vshlq_r_s, vshlq_r_u])
 ;;
diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
index e1263b0..cb7646e 100644
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -870,7 +870,7 @@ (define_insn "*smax<mode>3_neon"
 ; generic vectorizer code.  It ends up creating a V2DI constructor with
 ; SImode elements.
 
-(define_insn "vashl<mode>3"
+(define_insn "vashl<mode>3_neon"
   [(set (match_operand:VDQIW 0 "s_register_operand" "=w,w")
        (ashift:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w,w")
                      (match_operand:VDQIW 2 "imm_lshift_or_reg_neon" "w,Dm")))]
diff --git a/gcc/config/arm/vec-common.md b/gcc/config/arm/vec-common.md
index 687134a..4d04b0f 100644
--- a/gcc/config/arm/vec-common.md
+++ b/gcc/config/arm/vec-common.md
@@ -217,3 +217,10 @@ (define_expand "xor<mode>3"
                      (match_operand:VNINOTM1 2 "s_register_operand" "")))]
   "TARGET_NEON"
 )
+
+(define_expand "vashl<mode>3"
+  [(set (match_operand:VDQIW 0 "s_register_operand" "")
+       (ashift:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
+                     (match_operand:VDQIW 2 "imm_lshift_or_reg_neon" "")))]
+  "TARGET_NEON || TARGET_HAVE_MVE"
+)
\ No newline at end of file
diff --git a/gcc/testsuite/gcc.target/arm/simd/mve-vshl.c 
b/gcc/testsuite/gcc.target/arm/simd/mve-vshl.c
new file mode 100644
index 0000000..4ccc9a2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/simd/mve-vshl.c
@@ -0,0 +1,35 @@
+/* { dg-do assemble } */
+/* { dg-require-effective-target arm_v8_1m_mve_ok } */
+/* { dg-add-options arm_v8_1m_mve } */
+/* { dg-additional-options "-O3" } */
+
+#include <stdint.h>
+
+#define FUNC(SIGN, TYPE, BITS, NB, OP, NAME)                           \
+  void test_ ## NAME ##_ ## SIGN ## BITS ## x ## NB (TYPE##BITS##_t * 
__restrict__ dest, TYPE##BITS##_t *a) { \
+    int i;                                                             \
+    for (i=0; i<NB; i++) {                                             \
+      dest[i] = a[i] OP 5;                                             \
+    }                                                                  \
+}
+
+/* 64-bit vectors.  */
+FUNC(s, int, 32, 2, <<, vshl)
+FUNC(u, uint, 32, 2, <<, vshl)
+FUNC(s, int, 16, 4, <<, vshl)
+FUNC(u, uint, 16, 4, <<, vshl)
+FUNC(s, int, 8, 8, <<, vshl)
+FUNC(u, uint, 8, 8, <<, vshl)
+
+/* 128-bit vectors.  */
+FUNC(s, int, 32, 4, <<, vshl)
+FUNC(u, uint, 32, 4, <<, vshl)
+FUNC(s, int, 16, 8, <<, vshl)
+FUNC(u, uint, 16, 8, <<, vshl)
+FUNC(s, int, 8, 16, <<, vshl)
+FUNC(u, uint, 8, 16, <<, vshl)
+
+/* MVE has only 128-bit vectors, so we can vectorize only half of the
+   functions above.  */
+/* We only emit vshl.s, which is equivalent to vshl.u anyway.  */
+/* { dg-final { scan-assembler-times {vshl.s[0-9]+\tq[0-9]+, q[0-9]+} 6 } } */
-- 
2.7.4

[PATCH 4/7] arm: Auto-vectorization for MVE: vshl

Reply via email to