On Wed, May 26, 2021 at 1:17 PM Hongtao Liu <crazy...@gmail.com> wrote:
>
> On Wed, May 26, 2021 at 12:12 PM Andrew Pinski <pins...@gmail.com> wrote:
> >
> > On Tue, May 25, 2021 at 6:17 PM Hongtao Liu <crazy...@gmail.com> wrote:
> > >
> > > Update patch:
> > >   The new patch simplify (vec_duplicate (not (nonimmedaite_operand)))
> > > to (not (vec_duplicate (nonimmedaite_operand))). This is not a
> > > straightforward simplification, just adding some tendency to pull not
> > > out of vec_duplicate.
> > >
> > >   For i386, it will enable below opt
> > >
> > > from
> > >         notl    %edi
> > >         vpbroadcastd    %edi, %xmm0
> > >         vpand   %xmm1, %xmm0, %xmm0
> > > to
> > >         vpbroadcastd    %edi, %xmm0
> > >         vpandn   %xmm1, %xmm0, %xmm0
> > >
> > >   Bootstrapped and regtested on x86_64-linux-gnu{-m32,}.
> > >   Ok for trunk?
> > > gcc/ChangeLog:
> > >
> > >         PR target/100711
> > >         * simplify-rtx.c (simplify_unary_operation_1):
> > >         Simplify (vec_duplicate (not (nonimmedaite_operand)))
> > >         to (not (vec_duplicate (nonimmedaite_operand))).
> > >
> > > gcc/testsuite/ChangeLog:
> > >
> > >         PR target/100711
> > >         * gcc.target/i386/avx2-pr100711.c: New test.
> > >         * gcc.target/i386/avx512bw-pr100711.c: New test.
> >
> > This patch should not use nonimmedaite_operand at all in
> There's no simplification opportunity for nonimmediate_operand, but
> I'm not sure for other cases(not constants).
> Reading from codes in case NOT of simplify_unary_operation_1, there
> may be (vec_duplicate (not (plus X - 1))???

After reconsidering, I think you're right, (not  op) will be
simplified in the first place, so the updated patch just pulls not out
of vec_duplicate.

> >
> > Thanks,
> > Andrew
>
>
>
> --
> BR,
> Hongtao



--
BR,
Hongtao
From 99a479145084cb315be223fd7e02876da9938a9a Mon Sep 17 00:00:00 2001
From: liuhongt <hongtao.liu@intel.com>
Date: Tue, 25 May 2021 17:17:32 +0800
Subject: [PATCH] Simplify (vec_duplicate (not op)) to (not (vec_duplicate
 op)).

This is not a straightforward simplification, just adding some
tendency to pull not out of vec_duplicate.

For i386, it will enable below opt

from
	notl    %edi
      	vpbroadcastd    %edi, %xmm0
      	vpand   %xmm1, %xmm0, %xmm0
to
      	vpbroadcastd    %edi, %xmm0
      	vpandn   %xmm1, %xmm0, %xmm0

gcc/ChangeLog:

	PR target/100711
	* simplify-rtx.c (simplify_unary_operation_1):
	Simplify (vec_duplicate (not op)) to (not (vec_duplicate op)).

gcc/testsuite/ChangeLog:

	PR target/100711
	* gcc.target/i386/avx2-pr100711.c: New test.
	* gcc.target/i386/avx512bw-pr100711.c: New test.
---
 gcc/simplify-rtx.c                            |  7 ++
 gcc/testsuite/gcc.target/i386/avx2-pr100711.c | 73 +++++++++++++++++++
 .../gcc.target/i386/avx512bw-pr100711.c       | 48 ++++++++++++
 3 files changed, 128 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/avx2-pr100711.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512bw-pr100711.c

diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c
index 04423bbd195..59fd9bebbd5 100644
--- a/gcc/simplify-rtx.c
+++ b/gcc/simplify-rtx.c
@@ -1708,6 +1708,13 @@ simplify_context::simplify_unary_operation_1 (rtx_code code, machine_mode mode,
 #endif
       break;
 
+    /* Prefer (not (vec_duplicate (nonimmedaite_operand)))
+       to (vec_duplicate (not (nonimmedaite_operand))).  */
+    case VEC_DUPLICATE:
+      if (GET_CODE (op) == NOT)
+	return gen_rtx_NOT (mode, gen_rtx_VEC_DUPLICATE (mode, XEXP (op, 0)));
+      break;
+
     default:
       break;
     }
diff --git a/gcc/testsuite/gcc.target/i386/avx2-pr100711.c b/gcc/testsuite/gcc.target/i386/avx2-pr100711.c
new file mode 100644
index 00000000000..5b144623873
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx2-pr100711.c
@@ -0,0 +1,73 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -O2" } */
+/* { dg-final { scan-assembler-times "pandn" 8 } } */
+/* { dg-final { scan-assembler-not "not\[bwlq\]" } } */
+typedef char v16qi __attribute__((vector_size(16)));
+typedef char v32qi __attribute__((vector_size(32)));
+typedef short v8hi __attribute__((vector_size(16)));
+typedef short v16hi __attribute__((vector_size(32)));
+typedef int v4si __attribute__((vector_size(16)));
+typedef int v8si __attribute__((vector_size(32)));
+typedef long long v2di __attribute__((vector_size(16)));
+typedef long long v4di __attribute__((vector_size(32)));
+
+v16qi
+f1 (char a, v16qi c)
+{
+  char b = ~a;
+  return (__extension__(v16qi) {b, b, b, b, b, b, b, b,
+				 b, b, b, b, b, b, b, b}) & c;
+}
+
+v32qi
+f2 (char a, v32qi c)
+{
+  char b = ~a;
+  return (__extension__(v32qi) {b, b, b, b, b, b, b, b,
+				 b, b, b, b, b, b, b, b,
+				 b, b, b, b, b, b, b, b,
+				 b, b, b, b, b, b, b, b}) & c;
+}
+
+v8hi
+f3 (short a, v8hi c)
+{
+  short b = ~a;
+  return (__extension__(v8hi) {b, b, b, b, b, b, b, b}) & c;
+}
+
+v16hi
+f4 (short a, v16hi c)
+{
+  short b = ~a;
+  return (__extension__(v16hi) {b, b, b, b, b, b, b, b,
+				 b, b, b, b, b, b, b, b}) & c;
+}
+
+v4si
+f5 (int a, v4si c)
+{
+  int b = ~a;
+  return (__extension__(v4si) {b, b, b, b}) & c;
+}
+
+v8si
+f6 (int a, v8si c)
+{
+  int b = ~a;
+  return (__extension__(v8si) {b, b, b, b, b, b, b, b}) & c;
+}
+
+v2di
+f7 (long long a, v2di c)
+{
+  long long b = ~a;
+  return (__extension__(v2di) {b, b}) & c;
+}
+
+v4di
+f8 (long long a, v4di c)
+{
+  long long b = ~a;
+  return (__extension__(v4di) {b, b, b, b}) & c;
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-pr100711.c b/gcc/testsuite/gcc.target/i386/avx512bw-pr100711.c
new file mode 100644
index 00000000000..f0a103d0bc2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-pr100711.c
@@ -0,0 +1,48 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -O2" } */
+/* { dg-final { scan-assembler-times "pandn" 4 } } */
+/* { dg-final { scan-assembler-not "not\[bwlq\]" } } */
+
+typedef char v64qi __attribute__((vector_size(64)));
+typedef short v32hi __attribute__((vector_size(64)));
+typedef int v16si __attribute__((vector_size(64)));
+typedef long long v8di __attribute__((vector_size(64)));
+
+v64qi
+f1 (char a, v64qi c)
+{
+  char b = ~a;
+  return (__extension__(v64qi) {b, b, b, b, b, b, b, b,
+				 b, b, b, b, b, b, b, b,
+				 b, b, b, b, b, b, b, b,
+				 b, b, b, b, b, b, b, b,
+				 b, b, b, b, b, b, b, b,
+				 b, b, b, b, b, b, b, b,
+				 b, b, b, b, b, b, b, b,
+				 b, b, b, b, b, b, b, b}) & c;
+}
+
+v32hi
+f2 (short a, v32hi c)
+{
+  short b = ~a;
+  return (__extension__(v32hi) {b, b, b, b, b, b, b, b,
+				 b, b, b, b, b, b, b, b,
+				 b, b, b, b, b, b, b, b,
+				 b, b, b, b, b, b, b, b}) & c;
+}
+
+v16si
+f3 (int a, v16si c)
+{
+  int b = ~a;
+  return (__extension__(v16si) {b, b, b, b, b, b, b, b,
+				 b, b, b, b, b, b, b, b}) & c;
+}
+
+v8di
+f4 (long long a, v8di c)
+{
+  long long b = ~a;
+  return (__extension__(v8di) {b, b, b, b, b, b, b, b}) & c;
+}
-- 
2.18.1

Reply via email to