Hi,

This patch is to make Power10 newly introduced vector
multiply high (part) instructions exploited in vectorized
loops, it renames existing define_insns as standard pattern
names.  It depends on that patch which enables vectorizer
to recog mul_highpart.

Tested on powerpc64le-linux-gnu P9 with P10 supported
binutils, will test more if the vectorizer patch gets
landed.

BR,
Kewen.
-----
gcc/ChangeLog:

        * config/rs6000/vsx.md (mulhs_<mode>): Rename to...
        (smul<mode>3_highpart): ... this.
        (mulhu_<mode>): Rename to...
        (umul<mode>3_highpart): ... this.
        * config/rs6000/rs6000-builtin.def (MULHS_V2DI, MULHS_V4SI,
        MULHU_V2DI, MULHU_V4SI): Adjust.

gcc/testsuite/ChangeLog:

        * gcc.target/powerpc/mul-vectorize-3.c: New test.
        * gcc.target/powerpc/mul-vectorize-4.c: New test.
---
 gcc/config/rs6000/rs6000-builtin.def          |  8 ++---
 gcc/config/rs6000/vsx.md                      |  4 +--
 .../gcc.target/powerpc/mul-vectorize-3.c      | 32 ++++++++++++++++++
 .../gcc.target/powerpc/mul-vectorize-4.c      | 33 +++++++++++++++++++
 4 files changed, 71 insertions(+), 6 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/mul-vectorize-3.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/mul-vectorize-4.c

diff --git a/gcc/config/rs6000/rs6000-builtin.def 
b/gcc/config/rs6000/rs6000-builtin.def
index 592efe31b04..cbacbc6b785 100644
--- a/gcc/config/rs6000/rs6000-builtin.def
+++ b/gcc/config/rs6000/rs6000-builtin.def
@@ -3016,10 +3016,10 @@ BU_P10V_AV_2 (MODS_V2DI, "vmodsd", CONST, modv2di3)
 BU_P10V_AV_2 (MODS_V4SI, "vmodsw", CONST, modv4si3)
 BU_P10V_AV_2 (MODU_V2DI, "vmodud", CONST, umodv2di3)
 BU_P10V_AV_2 (MODU_V4SI, "vmoduw", CONST, umodv4si3)
-BU_P10V_AV_2 (MULHS_V2DI, "vmulhsd", CONST, mulhs_v2di)
-BU_P10V_AV_2 (MULHS_V4SI, "vmulhsw", CONST, mulhs_v4si)
-BU_P10V_AV_2 (MULHU_V2DI, "vmulhud", CONST, mulhu_v2di)
-BU_P10V_AV_2 (MULHU_V4SI, "vmulhuw", CONST, mulhu_v4si)
+BU_P10V_AV_2 (MULHS_V2DI, "vmulhsd", CONST, smulv2di3_highpart)
+BU_P10V_AV_2 (MULHS_V4SI, "vmulhsw", CONST, smulv4si3_highpart)
+BU_P10V_AV_2 (MULHU_V2DI, "vmulhud", CONST, umulv2di3_highpart)
+BU_P10V_AV_2 (MULHU_V4SI, "vmulhuw", CONST, umulv4si3_highpart)
 BU_P10V_AV_2 (MULLD_V2DI, "vmulld", CONST, mulv2di3)
 
 BU_P10V_VSX_1 (VXXSPLTIW_V4SI, "vxxspltiw_v4si", CONST, xxspltiw_v4si)
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index f622873d758..6f6fc0bd835 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -6351,7 +6351,7 @@ (define_insn "umod<mode>3"
   [(set_attr "type" "vecdiv")
    (set_attr "size" "<bits>")])
 
-(define_insn "mulhs_<mode>"
+(define_insn "smul<mode>3_highpart"
   [(set (match_operand:VIlong 0 "vsx_register_operand" "=v")
        (mult:VIlong (ashiftrt
                       (match_operand:VIlong 1 "vsx_register_operand" "v")
@@ -6363,7 +6363,7 @@ (define_insn "mulhs_<mode>"
   "vmulhs<wd> %0,%1,%2"
   [(set_attr "type" "veccomplex")])
 
-(define_insn "mulhu_<mode>"
+(define_insn "umul<mode>3_highpart"
   [(set (match_operand:VIlong 0 "vsx_register_operand" "=v")
        (us_mult:VIlong (ashiftrt
                          (match_operand:VIlong 1 "vsx_register_operand" "v")
diff --git a/gcc/testsuite/gcc.target/powerpc/mul-vectorize-3.c 
b/gcc/testsuite/gcc.target/powerpc/mul-vectorize-3.c
new file mode 100644
index 00000000000..2c89c0faec2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/mul-vectorize-3.c
@@ -0,0 +1,32 @@
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2 -ftree-vectorize 
-fno-vect-cost-model -fno-unroll-loops -fdump-tree-vect-details" } */
+
+/* Test vectorizer can exploit ISA 3.1 instructions Vector Multiply
+   High Signed/Unsigned Word for both signed and unsigned int high part
+   multiplication.  */
+
+#define N 128
+
+extern signed int si_a[N], si_b[N], si_c[N];
+extern unsigned int ui_a[N], ui_b[N], ui_c[N];
+
+typedef signed long long sLL;
+typedef unsigned long long uLL;
+
+__attribute__ ((noipa)) void
+test_si ()
+{
+  for (int i = 0; i < N; i++)
+    si_c[i] = ((sLL) si_a[i] * (sLL) si_b[i]) >> 32;
+}
+
+__attribute__ ((noipa)) void
+test_ui ()
+{
+  for (int i = 0; i < N; i++)
+    ui_c[i] = ((uLL) ui_a[i] * (uLL) ui_b[i]) >> 32;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */
+/* { dg-final { scan-assembler-times {\mvmulhsw\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mvmulhuw\M} 1 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/mul-vectorize-4.c 
b/gcc/testsuite/gcc.target/powerpc/mul-vectorize-4.c
new file mode 100644
index 00000000000..265e7588bb6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/mul-vectorize-4.c
@@ -0,0 +1,33 @@
+/* { dg-require-effective-target power10_ok } */
+/* { dg-require-effective-target int128 } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2 -ftree-vectorize 
-fno-vect-cost-model -fno-unroll-loops -fdump-tree-vect-details" } */
+
+/* Test vectorizer can exploit ISA 3.1 instructions Vector Multiply
+   High Signed/Unsigned Doubleword for both signed and unsigned long
+   long high part multiplication.  */
+
+#define N 128
+
+extern signed long long sll_a[N], sll_b[N], sll_c[N];
+extern unsigned long long ull_a[N], ull_b[N], ull_c[N];
+
+typedef signed __int128 s128;
+typedef unsigned __int128 u128;
+
+__attribute__ ((noipa)) void
+test_sll ()
+{
+  for (int i = 0; i < N; i++)
+    sll_c[i] = ((s128) sll_a[i] * (s128) sll_b[i]) >> 64;
+}
+
+__attribute__ ((noipa)) void
+test_ull ()
+{
+  for (int i = 0; i < N; i++)
+    ull_c[i] = ((u128) ull_a[i] * (u128) ull_b[i]) >> 64;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */
+/* { dg-final { scan-assembler-times {\mvmulhsd\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mvmulhud\M} 1 } } */
-- 
2.17.1

Reply via email to