Hi,

r14-1832 adds recognition pattern, ifn and optab for ABD
(ABsolute Difference), we have some vector absolute
difference unsigned instructions since ISA 3.0, as the
associated test cases shown, they are not exploited well
as we don't define it (them) with a standard name.  So this
patch is to rename it with standard name first.  And it
merges both define_expand and define_insn as a separated
define_expand isn't needed.  Besides, it adjusts the RTL
pattern by using generic umax and umin rather than
UNSPEC_VADU, it's more meaningful and can catch umin/umax
opportunity.

Bootstrapped and regtested on powerpc64-linux-gnu P8/P9
and powerpc64le-linux-gnu P9/P10.

I'm going to push this next week if no objections.

BR,
Kewen
-----

gcc/ChangeLog:

        * config/rs6000/altivec.md (p9_vadu<mode>3): Rename to ...
        (uabd<mode>3): ... this.  Update RTL pattern with umin and umax rather
        than UNSPEC_VADU.
        (vadu<mode>3): Remove.
        (UNSPEC_VADU): Remove.
        (usadv16qi): Replace gen_p9_vaduv16qi3 with gen_uabdv16qi3.
        (usadv8hi): Replace gen_p9_vaduv8hi3 with gen_uabdv8hi3.
        * config/rs6000/rs6000-builtins.def (__builtin_altivec_vadub): Replace
        expander with uabdv16qi3.
        (__builtin_altivec_vaduh): Adjust expander with uabdv8hi3.
        (__builtin_altivec_vaduw): Adjust expander with uabdv4si3.

gcc/testsuite/ChangeLog:

        * gcc.target/powerpc/abd-vectorize-1.c: New test.
        * gcc.target/powerpc/abd-vectorize-2.c: New test.
---
 gcc/config/rs6000/altivec.md                  | 25 +++++--------
 gcc/config/rs6000/rs6000-builtins.def         |  6 +--
 .../gcc.target/powerpc/abd-vectorize-1.c      | 27 ++++++++++++++
 .../gcc.target/powerpc/abd-vectorize-2.c      | 37 +++++++++++++++++++
 4 files changed, 77 insertions(+), 18 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/abd-vectorize-1.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/abd-vectorize-2.c

diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 5af9bf920a2..aa9d8fffc90 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -119,7 +119,6 @@ (define_c_enum "unspec"
    UNSPEC_STVLXL
    UNSPEC_STVRX
    UNSPEC_STVRXL
-   UNSPEC_VADU
    UNSPEC_VSLV
    UNSPEC_VSRV
    UNSPEC_VMULWHUB
@@ -4323,19 +4322,15 @@ (define_insn "*p8v_clz<mode>2"
   [(set_attr "type" "vecsimple")])

 ;; Vector absolute difference unsigned
-(define_expand "vadu<mode>3"
-  [(set (match_operand:VI 0 "register_operand")
-        (unspec:VI [(match_operand:VI 1 "register_operand")
-                   (match_operand:VI 2 "register_operand")]
-         UNSPEC_VADU))]
-  "TARGET_P9_VECTOR")
-
-;; Vector absolute difference unsigned
-(define_insn "p9_vadu<mode>3"
+(define_insn "uabd<mode>3"
   [(set (match_operand:VI 0 "register_operand" "=v")
-        (unspec:VI [(match_operand:VI 1 "register_operand" "v")
-                   (match_operand:VI 2 "register_operand" "v")]
-         UNSPEC_VADU))]
+       (minus:VI
+         (umax:VI
+           (match_operand:VI 1 "register_operand" "v")
+           (match_operand:VI 2 "register_operand" "v"))
+         (umin:VI
+           (match_dup 1)
+           (match_dup 2))))]
   "TARGET_P9_VECTOR"
   "vabsdu<wd> %0,%1,%2"
   [(set_attr "type" "vecsimple")])
@@ -4500,7 +4495,7 @@ (define_expand "usadv16qi"
   rtx zero = gen_reg_rtx (V4SImode);
   rtx psum = gen_reg_rtx (V4SImode);

-  emit_insn (gen_p9_vaduv16qi3 (absd, operands[1], operands[2]));
+  emit_insn (gen_uabdv16qi3 (absd, operands[1], operands[2]));
   emit_insn (gen_altivec_vspltisw (zero, const0_rtx));
   emit_insn (gen_altivec_vsum4ubs (psum, absd, zero));
   emit_insn (gen_addv4si3 (operands[0], psum, operands[3]));
@@ -4521,7 +4516,7 @@ (define_expand "usadv8hi"
   rtx zero = gen_reg_rtx (V4SImode);
   rtx psum = gen_reg_rtx (V4SImode);

-  emit_insn (gen_p9_vaduv8hi3 (absd, operands[1], operands[2]));
+  emit_insn (gen_uabdv8hi3 (absd, operands[1], operands[2]));
   emit_insn (gen_altivec_vspltisw (zero, const0_rtx));
   emit_insn (gen_altivec_vsum4shs (psum, absd, zero));
   emit_insn (gen_addv4si3 (operands[0], psum, operands[3]));
diff --git a/gcc/config/rs6000/rs6000-builtins.def 
b/gcc/config/rs6000/rs6000-builtins.def
index 77eb0f7e406..07d18a8eced 100644
--- a/gcc/config/rs6000/rs6000-builtins.def
+++ b/gcc/config/rs6000/rs6000-builtins.def
@@ -2377,13 +2377,13 @@
     VFIRSTMISMATCHOREOSINDEX_V4SI first_mismatch_or_eos_index_v4si {}

   const vsc __builtin_altivec_vadub (vsc, vsc);
-    VADUB vaduv16qi3 {}
+    VADUB uabdv16qi3 {}

   const vss __builtin_altivec_vaduh (vss, vss);
-    VADUH vaduv8hi3 {}
+    VADUH uabdv8hi3 {}

   const vsi __builtin_altivec_vaduw (vsi, vsi);
-    VADUW vaduv4si3 {}
+    VADUW uabdv4si3 {}

   const vsll __builtin_altivec_vbpermd (vsll, vsc);
     VBPERMD altivec_vbpermd {}
diff --git a/gcc/testsuite/gcc.target/powerpc/abd-vectorize-1.c 
b/gcc/testsuite/gcc.target/powerpc/abd-vectorize-1.c
new file mode 100644
index 00000000000..d63b887b4b8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/abd-vectorize-1.c
@@ -0,0 +1,27 @@
+/* { dg-options "-mdejagnu-cpu=power9 -O2" } */
+/* { dg-require-effective-target powerpc_vsx } */
+
+/* Expliot vector absolute difference unsigned.  */
+
+#define MAX(x, y) ((x) > (y) ? (x) : (y))
+#define MIN(x, y) ((x) < (y) ? (x) : (y))
+#define N 128
+#define PRAGMA(X) _Pragma (#X)
+#define UNROLL0 PRAGMA (GCC unroll 0)
+
+#define TEST(T)                                                                
\
+  void uabd_##T (unsigned T *restrict a, unsigned T *restrict b,               
\
+                unsigned T *restrict out)                                     \
+  {                                                                            
\
+    UNROLL0                                                                    
\
+    for (int i = 0; i < N; i++)                                                
\
+      out[i] = MAX (a[i], b[i]) - MIN (a[i], b[i]);                            
\
+  }
+
+TEST(char)
+TEST(short)
+TEST(int)
+
+/* { dg-final { scan-assembler-times {\mvabsdub\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mvabsduh\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mvabsduw\M} 1 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/abd-vectorize-2.c 
b/gcc/testsuite/gcc.target/powerpc/abd-vectorize-2.c
new file mode 100644
index 00000000000..f5a80d8fbd9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/abd-vectorize-2.c
@@ -0,0 +1,37 @@
+/* { dg-options "-mdejagnu-cpu=power9 -O2" } */
+/* { dg-require-effective-target powerpc_vsx } */
+
+/* Expliot vector absolute difference unsigned.  */
+
+#define N 128
+#define PRAGMA(X) _Pragma (#X)
+#define UNROLL0 PRAGMA (GCC unroll 0)
+
+#define TEST1(TYPE)                                                            
\
+  void test1_##TYPE (unsigned TYPE *restrict a, unsigned TYPE *restrict b,     
\
+                    unsigned TYPE *restrict out)                              \
+  {                                                                            
\
+    UNROLL0                                                                    
\
+    for (int i = 0; i < N; i++)                                                
\
+      out[i] = __builtin_abs (a[i] - b[i]);                                    
\
+  }
+
+TEST1(char)
+TEST1(short)
+
+#define TEST2(TYPE1, TYPE2, FUNC)                                              
\
+  void test2_##TYPE1 (unsigned TYPE1 *restrict a, unsigned TYPE1 *restrict b,  
\
+                     unsigned TYPE1 *restrict out)                            \
+  {                                                                            
\
+    UNROLL0                                                                    
\
+    for (int i = 0; i < N; i++)                                                
\
+      out[i] = __builtin_##FUNC ((TYPE2) a[i] - (TYPE2) b[i]);                 
\
+  }
+
+TEST2(char, int, abs)
+TEST2(short, int, abs)
+TEST2(int, long long, llabs)
+
+/* { dg-final { scan-assembler-times {\mvabsdub\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mvabsduh\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mvabsduw\M} 1 } } */
--
2.39.1

Reply via email to