On Wed, Nov 15, 2017 at 8:09 AM, Uros Bizjak <ubiz...@gmail.com> wrote:
> On Wed, Nov 15, 2017 at 2:37 PM, H.J. Lu <hjl.to...@gmail.com> wrote:
>> -mzeroupper is specified to generate vzeroupper instruction.  If it
>> isn't used, the default should depend on !TARGET_AVX512ER.  Users can
>> always use -mzeroupper or -mno-zeroupper to override it.
>>
>> Sebastian, can you run the full test with it?
>>
>> OK for trunk if there is no regression?
>
> If we want to go this way, please add relevant tune flag (e.g.
> X86_TUNE_EMIT_VZEROUPPER) and use it for ~m_KNL. This tune is the
> property of the processor model, not ISA.

How about this?  OK for trunk if there are no regressions?


-- 
H.J.
From d9388c1b7f36e2310645aed4a4debefa65b5129e Mon Sep 17 00:00:00 2001
From: "H.J. Lu" <hjl.to...@gmail.com>
Date: Tue, 14 Nov 2017 20:49:33 -0800
Subject: [PATCH] i386: Add X86_TUNE_EMIT_VZEROUPPER

Add X86_TUNE_EMIT_VZEROUPPER to indicate if vzeroupper instruction should
be inserted before a transfer of control flow out of the function.  It is
turned on by default unless we are tuning for KNL.  Users can always use
-mzeroupper or -mno-zeroupper to override X86_TUNE_EMIT_VZEROUPPER.

gcc/

	PR target/82990
	* config/i386/i386.c (pass_insert_vzeroupper::gate): Remove
	TARGET_AVX512ER check.
	(ix86_option_override_internal): Set MASK_VZEROUPPER if
	neither -mzeroupper nor -mno-zeroupper is used and
	TARGET_EMIT_VZEROUPPER is set.
	* config/i386/i386.h (TARGET_EMIT_VZEROUPPER): New.
	* config/i386/x86-tune.def: Add X86_TUNE_EMIT_VZEROUPPER.

gcc/testsuite/

	PR target/82990
	* gcc.target/i386/pr82942-2.c: Add -mtune=knl.
	* gcc.target/i386/pr82990-1.c: New test.
	* gcc.target/i386/pr82990-2.c: Likewise.
	* gcc.target/i386/pr82990-3.c: Likewise.
	* gcc.target/i386/pr82990-4.c: Likewise.
	* gcc.target/i386/pr82990-5.c: Likewise.
	* gcc.target/i386/pr82990-6.c: Likewise.
	* gcc.target/i386/pr82990-7.c: Likewise.
---
 gcc/config/i386/i386.c                    |  5 +++--
 gcc/config/i386/i386.h                    |  2 ++
 gcc/config/i386/x86-tune.def              |  4 ++++
 gcc/testsuite/gcc.target/i386/pr82942-2.c |  2 +-
 gcc/testsuite/gcc.target/i386/pr82990-1.c | 14 ++++++++++++++
 gcc/testsuite/gcc.target/i386/pr82990-2.c |  6 ++++++
 gcc/testsuite/gcc.target/i386/pr82990-3.c |  6 ++++++
 gcc/testsuite/gcc.target/i386/pr82990-4.c |  6 ++++++
 gcc/testsuite/gcc.target/i386/pr82990-5.c | 14 ++++++++++++++
 gcc/testsuite/gcc.target/i386/pr82990-6.c |  6 ++++++
 gcc/testsuite/gcc.target/i386/pr82990-7.c |  6 ++++++
 11 files changed, 68 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr82990-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr82990-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr82990-3.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr82990-4.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr82990-5.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr82990-6.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr82990-7.c

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index c5e84a09954..c6ca0712755 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -2497,7 +2497,7 @@ public:
   /* opt_pass methods: */
   virtual bool gate (function *)
     {
-      return TARGET_AVX && !TARGET_AVX512ER
+      return TARGET_AVX
 	     && TARGET_VZEROUPPER && flag_expensive_optimizations
 	     && !optimize_size;
     }
@@ -4666,7 +4666,8 @@ ix86_option_override_internal (bool main_args_p,
   if (TARGET_SEH && TARGET_CALL_MS2SYSV_XLOGUES)
     sorry ("-mcall-ms2sysv-xlogues isn%'t currently supported with SEH");
 
-  if (!(opts_set->x_target_flags & MASK_VZEROUPPER))
+  if (!(opts_set->x_target_flags & MASK_VZEROUPPER)
+      && TARGET_EMIT_VZEROUPPER)
     opts->x_target_flags |= MASK_VZEROUPPER;
   if (!(opts_set->x_target_flags & MASK_STV))
     opts->x_target_flags |= MASK_STV;
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index e3e55da4232..a45e2df5783 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -517,6 +517,8 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
 	ix86_tune_features[X86_TUNE_AVOID_FALSE_DEP_FOR_BMI]
 #define TARGET_ONE_IF_CONV_INSN \
 	ix86_tune_features[X86_TUNE_ONE_IF_CONV_INSN]
+#define TARGET_EMIT_VZEROUPPER \
+	ix86_tune_features[X86_TUNE_EMIT_VZEROUPPER]
 
 /* Feature tests against the various architecture variations.  */
 enum ix86_arch_indices {
diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def
index 99282c88341..19fd2b52b30 100644
--- a/gcc/config/i386/x86-tune.def
+++ b/gcc/config/i386/x86-tune.def
@@ -543,3 +543,7 @@ DEF_TUNE (X86_TUNE_QIMODE_MATH, "qimode_math", ~0U)
    arithmetic to 32bit via PROMOTE_MODE macro.  This code generation scheme
    is usually used for RISC targets.  */
 DEF_TUNE (X86_TUNE_PROMOTE_QI_REGS, "promote_qi_regs", 0U)
+
+/* X86_TUNE_EMIT_VZEROUPPER: This enables vzeroupper instruction insertion
+   before a transfer of control flow out of the function.  */
+DEF_TUNE (X86_TUNE_EMIT_VZEROUPPER, "emit_vzeroupper", ~m_KNL)
diff --git a/gcc/testsuite/gcc.target/i386/pr82942-2.c b/gcc/testsuite/gcc.target/i386/pr82942-2.c
index cb0e337113e..ddb4e689659 100644
--- a/gcc/testsuite/gcc.target/i386/pr82942-2.c
+++ b/gcc/testsuite/gcc.target/i386/pr82942-2.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-mavx512f -mavx512er -O2" } */
+/* { dg-options "-mavx512f -mavx512er -mtune=knl -O2" } */
 
 #include "pr82941-1.c"
 
diff --git a/gcc/testsuite/gcc.target/i386/pr82990-1.c b/gcc/testsuite/gcc.target/i386/pr82990-1.c
new file mode 100644
index 00000000000..ff1d6d40eb2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr82990-1.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=knl -mvzeroupper" } */
+
+#include <immintrin.h>
+
+extern __m512d y, z;
+
+void
+pr82941 ()
+{
+  z = y;
+}
+
+/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr82990-2.c b/gcc/testsuite/gcc.target/i386/pr82990-2.c
new file mode 100644
index 00000000000..0d3cb2333dd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr82990-2.c
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=skylake-avx512 -mno-vzeroupper" } */
+
+#include "pr82941-1.c"
+
+/* { dg-final { scan-assembler-not "vzeroupper" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr82990-3.c b/gcc/testsuite/gcc.target/i386/pr82990-3.c
new file mode 100644
index 00000000000..201fa98d8d4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr82990-3.c
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -mavx512er -mvzeroupper -O2" } */
+
+#include "pr82941-1.c"
+
+/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr82990-4.c b/gcc/testsuite/gcc.target/i386/pr82990-4.c
new file mode 100644
index 00000000000..09f161c7291
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr82990-4.c
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -mno-avx512er -mno-vzeroupper -O2" } */
+
+#include "pr82941-1.c"
+
+/* { dg-final { scan-assembler-not "vzeroupper" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr82990-5.c b/gcc/testsuite/gcc.target/i386/pr82990-5.c
new file mode 100644
index 00000000000..9932bdc5375
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr82990-5.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f -mtune=generic" } */
+
+#include <immintrin.h>
+
+extern __m512d y, z;
+
+void
+pr82941 ()
+{
+  z = y;
+}
+
+/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr82990-6.c b/gcc/testsuite/gcc.target/i386/pr82990-6.c
new file mode 100644
index 00000000000..063a61c111d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr82990-6.c
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=skylake-avx512 -mtune=knl" } */
+
+#include "pr82941-1.c"
+
+/* { dg-final { scan-assembler-not "vzeroupper" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr82990-7.c b/gcc/testsuite/gcc.target/i386/pr82990-7.c
new file mode 100644
index 00000000000..dedde8b854b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr82990-7.c
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=skylake-avx512 -mtune=generic -mtune-ctrl=^emit_vzeroupper" } */
+
+#include "pr82941-1.c"
+
+/* { dg-final { scan-assembler-not "vzeroupper" } } */
-- 
2.14.3

Reply via email to