On Tue, Aug 3, 2021 at 4:47 AM Richard Biener
<richard.guent...@gmail.com> wrote:
>
> On Sun, Jul 18, 2021 at 3:46 AM H.J. Lu <hjl.to...@gmail.com> wrote:
> >
> > On Thu, Apr 22, 2021 at 7:30 AM Richard Biener via Gcc-patches
> > <gcc-patches@gcc.gnu.org> wrote:
> > >
> > > On Thu, Apr 22, 2021 at 2:52 PM Richard Biener
> > > <richard.guent...@gmail.com> wrote:
> > > >
> > > > On Thu, Apr 22, 2021 at 2:22 PM Jakub Jelinek <ja...@redhat.com> wrote:
> > > > >
> > > > > On Thu, Apr 22, 2021 at 01:23:20PM +0200, Richard Biener via 
> > > > > Gcc-patches wrote:
> > > > > > > The question is if the pragma GCC target right now behaves 
> > > > > > > incrementally
> > > > > > > or not, whether
> > > > > > > #pragma GCC target("avx2")
> > > > > > > adds -mavx2 to options if it was missing before and nothing 
> > > > > > > otherwise, or if
> > > > > > > it switches other options off.  If it is incremental, we could 
> > > > > > > e.g. try to
> > > > > > > use the second least significant bit of global_options_set.x_* to 
> > > > > > > mean
> > > > > > > this option has been set explicitly by some surrounding #pragma 
> > > > > > > GCC target.
> > > > > > > The normal tests - global_options_set.x_flag_whatever could still 
> > > > > > > work
> > > > > > > fine because they wouldn't care if the option was explicit from 
> > > > > > > anywhere
> > > > > > > (command line or GCC target or target attribute) and just & 2 
> > > > > > > would mean
> > > > > > > it was explicit from pragma GCC target; though there is the case 
> > > > > > > of
> > > > > > > bitfields... And then the inlining decision could check the & 2 
> > > > > > > flags to
> > > > > > > see what is required and what is just from command line.
> > > > > > > Or we can have some other pragma GCC that would be like target 
> > > > > > > but would
> > > > > > > have flags that are explicit (and could e.g. be more restricted, 
> > > > > > > to ISA
> > > > > > > options only, and let those use in addition to #pragma GCC target.
> > > > > >
> > > > > > I'm still curious as to what you think will break if always-inline 
> > > > > > does what
> > > > > > it is documented to do.
> > > > >
> > > > > We will silently accept calling intrinsics that must be used only in 
> > > > > certain
> > > > > ISA contexts, which will lead to people writing non-portable code.
> > > > >
> > > > > So -O2 -mno-avx
> > > > > #include <x86intrin.h>
> > > > >
> > > > > void
> > > > > foo (__m256 *x)
> > > > > {
> > > > >   x[0] = _mm256_sub_ps (x[1], x[2]);
> > > > > }
> > > > > etc. will now be accepted when it shouldn't be.
> > > > > clang rejects it like gcc with:
> > > > > 1.c:6:10: error: always_inline function '_mm256_sub_ps' requires 
> > > > > target feature 'avx', but would be inlined into function 'foo' that 
> > > > > is compiled without support for 'avx'
> > > > >   x[0] = _mm256_sub_ps (x[1], x[2]);
> > > > >          ^
> > > > >
> > > > > Note, if I do:
> > > > > #include <x86intrin.h>
> > > > >
> > > > > __attribute__((target ("no-sse3"))) void
> > > > > foo (__m256 *x)
> > > > > {
> > > > >   x[0] = _mm256_sub_ps (x[1], x[2]);
> > > > > }
> > > > > and compile
> > > > > clang -S -O2 -mavx2 1.c
> > > > > 1.c:6:10: error: always_inline function '_mm256_sub_ps' requires 
> > > > > target feature 'avx', but would be inlined into function 'foo' that 
> > > > > is compiled without support for 'avx'
> > > > >   x[0] = _mm256_sub_ps (x[1], x[2]);
> > > > >          ^
> > > > > then from the error message it seems that unlike GCC, clang remembers
> > > > > the exact target features that are needed for the intrinsics and 
> > > > > checks just
> > > > > those.
> > > > > Though, looking at the preprocessed source, seems it uses
> > > > > static __inline __m256 __attribute__((__always_inline__, __nodebug__, 
> > > > > __target__("avx"), __min_vector_width__(256)))
> > > > > _mm256_sub_ps(__m256 __a, __m256 __b)
> > > > > {
> > > > >   return (__m256)((__v8sf)__a-(__v8sf)__b);
> > > > > }
> > > > > and not target pragmas.
> > > > >
> > > > > Anyway, if we tweak our intrinsic headers so that
> > > > > -#ifndef __AVX__
> > > > >  #pragma GCC push_options
> > > > >  #pragma GCC target("avx")
> > > > > -#define __DISABLE_AVX__
> > > > > -#endif /* __AVX__ */
> > > > >
> > > > > ...
> > > > > -#ifdef __DISABLE_AVX__
> > > > > -#undef __DISABLE_AVX__
> > > > >  #pragma GCC pop_options
> > > > > -#endif /* __DISABLE_AVX__ */
> > > > > and do the opts_set->x_* & 2 stuff on explicit options coming out of
> > > > > target/optimize pragmas and attributes, perhaps we don't even need
> > > > > to introduce a new attribute and can handle everything magically:
> > >
> > > Oh, and any such changes will likely interact with Martins ideas to rework
> > > how optimize and target attributes work (aka adding ontop of the
> > > commandline options).  That is, attribute target will then not be enough
> > > to remember the exact set of needed ISA features (as opposed to what
> > > likely clang implements?)
> > >
> > > > > 1) if it is gnu_inline extern inline, allow indirect calls, otherwise
> > > > > disallow them for always_inline functions
> > > >
> > > > There are a lot of intrinsics using extern inline __gnu_inline though...
> > > >
> > > > > 2) for the isa flags and option mismatches, only disallow 
> > > > > opts_set->x_* & 2
> > > > > stuff
> > > > > This will keep both intrinsics and glibc fortify macros working fine
> > > > > in all the needed use cases.
> > > >
> > > > Yes, see my example in the other mail.
> > > >
> > > > I think before we add any new attributes we should sort out the
> > > > current mess, eventually adding some testcases for desired
> > > > diagnostic.
> > > >
> > > > Richard.
> > > >
> > > > >         Jakub
> >
> > Here is the v5 patch:
> >
> > 1. Intrinsics in <x86gprintrin.h> only require GPR ISAs.  Add
> >
> >  #if defined __MMX__ || defined __SSE__
> >  #pragma GCC push_options
> >  #pragma GCC target("general-regs-only")
> >  #define __DISABLE_GENERAL_REGS_ONLY__
> >  #endif
> >
> > and
> >
> >  #ifdef __DISABLE_GENERAL_REGS_ONLY__
> >  #undef __DISABLE_GENERAL_REGS_ONLY__
> >  #pragma GCC pop_options
> >  #endif /* __DISABLE_GENERAL_REGS_ONLY__ */
> >
> > to <x86gprintrin.h> to disable non-GPR ISAs so that they can be used in
> > functions with __attribute__ ((target("general-regs-only"))).
> > 2. When checking always_inline attribute, if callee only uses GPRs,
> > ignore MASK_80387 since enable MASK_80387 in caller has no impact on
> > callee inline.
> >
> > OK for master?
>
> +
> +#include <x86intrin.h>
> +
> +#include <x86intrin.h>
> +
>
> there are some cases like the above - intentional?

Fixed in the v6 patch.

> Otherwise I guess I can live with this, hopefully things won't break.
>
> In the end it's a quite narrow solution to a subpart of the overall
> issue of course.
>
> Thus OK unless any other stakeholder has comments.

Thanks.  I will wait for a few days before commiting.

> Thanks,
> Richard.
>
> > Thanks.
> >
> > --
> > H.J.



-- 
H.J.
From ef93867816e73a0aae1c526cf8d7999d5a15b6f9 Mon Sep 17 00:00:00 2001
From: "H.J. Lu" <hjl.to...@gmail.com>
Date: Sat, 17 Jul 2021 07:44:45 -0700
Subject: [PATCH v6] <x86gprintrin.h>: Add pragma GCC
 target("general-regs-only")

1. Intrinsics in <x86gprintrin.h> only require GPR ISAs.  Add

 #if defined __MMX__ || defined __SSE__
 #pragma GCC push_options
 #pragma GCC target("general-regs-only")
 #define __DISABLE_GENERAL_REGS_ONLY__
 #endif

and

 #ifdef __DISABLE_GENERAL_REGS_ONLY__
 #undef __DISABLE_GENERAL_REGS_ONLY__
 #pragma GCC pop_options
 #endif /* __DISABLE_GENERAL_REGS_ONLY__ */

to <x86gprintrin.h> to disable non-GPR ISAs so that they can be used in
functions with __attribute__ ((target("general-regs-only"))).
2. When checking always_inline attribute, if callee only uses GPRs,
ignore MASK_80387 since enable MASK_80387 in caller has no impact on
callee inline.

gcc/

	PR target/99744
	* config/i386/i386.c (ix86_can_inline_p): Ignore MASK_80387 if
	callee only uses GPRs.
	* config/i386/ia32intrin.h: Revert commit 5463cee2770.
	* config/i386/serializeintrin.h: Revert commit 71958f740f1.
	* config/i386/x86gprintrin.h: Add
	#pragma GCC target("general-regs-only") and #pragma GCC pop_options
	to disable non-GPR ISAs.

gcc/testsuite/

	PR target/99744
	* gcc.target/i386/pr99744-3.c: New test.
	* gcc.target/i386/pr99744-4.c: Likewise.
	* gcc.target/i386/pr99744-5.c: Likewise.
	* gcc.target/i386/pr99744-6.c: Likewise.
	* gcc.target/i386/pr99744-7.c: Likewise.
	* gcc.target/i386/pr99744-8.c: Likewise.
---
 gcc/config/i386/i386.c                    |   6 +-
 gcc/config/i386/ia32intrin.h              |  14 +-
 gcc/config/i386/serializeintrin.h         |   7 +-
 gcc/config/i386/x86gprintrin.h            |  11 +
 gcc/testsuite/gcc.target/i386/pr99744-3.c |  13 +
 gcc/testsuite/gcc.target/i386/pr99744-4.c | 357 ++++++++++++++++++++++
 gcc/testsuite/gcc.target/i386/pr99744-5.c |  25 ++
 gcc/testsuite/gcc.target/i386/pr99744-6.c |  23 ++
 gcc/testsuite/gcc.target/i386/pr99744-7.c |  12 +
 gcc/testsuite/gcc.target/i386/pr99744-8.c |  13 +
 10 files changed, 477 insertions(+), 4 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr99744-3.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr99744-4.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr99744-5.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr99744-6.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr99744-7.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr99744-8.c

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index ec0690876b7..aea224ab235 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -554,7 +554,7 @@ ix86_can_inline_p (tree caller, tree callee)
 
   /* Changes of those flags can be tolerated for always inlines. Lets hope
      user knows what he is doing.  */
-  const unsigned HOST_WIDE_INT always_inline_safe_mask
+  unsigned HOST_WIDE_INT always_inline_safe_mask
 	 = (MASK_USE_8BIT_IDIV | MASK_ACCUMULATE_OUTGOING_ARGS
 	    | MASK_NO_ALIGN_STRINGOPS | MASK_AVX256_SPLIT_UNALIGNED_LOAD
 	    | MASK_AVX256_SPLIT_UNALIGNED_STORE | MASK_CLD
@@ -579,6 +579,10 @@ ix86_can_inline_p (tree caller, tree callee)
        && lookup_attribute ("always_inline",
 			    DECL_ATTRIBUTES (callee)));
 
+  /* If callee only uses GPRs, ignore MASK_80387.  */
+  if (TARGET_GENERAL_REGS_ONLY_P (callee_opts->x_ix86_target_flags))
+    always_inline_safe_mask |= MASK_80387;
+
   cgraph_node *callee_node = cgraph_node::get (callee);
   /* Callee's isa options should be a subset of the caller's, i.e. a SSE4
      function can inline a SSE2 function but a SSE2 function can't inline
diff --git a/gcc/config/i386/ia32intrin.h b/gcc/config/i386/ia32intrin.h
index 5422b0fc9e0..df99220ee4f 100644
--- a/gcc/config/i386/ia32intrin.h
+++ b/gcc/config/i386/ia32intrin.h
@@ -107,12 +107,22 @@ __rdpmc (int __S)
 #endif /* __iamcu__ */
 
 /* rdtsc */
-#define __rdtsc()		__builtin_ia32_rdtsc ()
+extern __inline unsigned long long
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__rdtsc (void)
+{
+  return __builtin_ia32_rdtsc ();
+}
 
 #ifndef __iamcu__
 
 /* rdtscp */
-#define __rdtscp(a)		__builtin_ia32_rdtscp (a)
+extern __inline unsigned long long
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__rdtscp (unsigned int *__A)
+{
+  return __builtin_ia32_rdtscp (__A);
+}
 
 #endif /* __iamcu__ */
 
diff --git a/gcc/config/i386/serializeintrin.h b/gcc/config/i386/serializeintrin.h
index e280250b198..89b5b94ea9b 100644
--- a/gcc/config/i386/serializeintrin.h
+++ b/gcc/config/i386/serializeintrin.h
@@ -34,7 +34,12 @@
 #define __DISABLE_SERIALIZE__
 #endif /* __SERIALIZE__ */
 
-#define _serialize()	__builtin_ia32_serialize ()
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_serialize (void)
+{
+  __builtin_ia32_serialize ();
+}
 
 #ifdef __DISABLE_SERIALIZE__
 #undef __DISABLE_SERIALIZE__
diff --git a/gcc/config/i386/x86gprintrin.h b/gcc/config/i386/x86gprintrin.h
index 7793032ba90..b7fefa780a6 100644
--- a/gcc/config/i386/x86gprintrin.h
+++ b/gcc/config/i386/x86gprintrin.h
@@ -24,6 +24,12 @@
 #ifndef _X86GPRINTRIN_H_INCLUDED
 #define _X86GPRINTRIN_H_INCLUDED
 
+#if defined __MMX__ || defined __SSE__
+#pragma GCC push_options
+#pragma GCC target("general-regs-only")
+#define __DISABLE_GENERAL_REGS_ONLY__
+#endif
+
 #include <ia32intrin.h>
 
 #ifndef __iamcu__
@@ -255,4 +261,9 @@ _ptwrite32 (unsigned __B)
 
 #endif /* __iamcu__ */
 
+#ifdef __DISABLE_GENERAL_REGS_ONLY__
+#undef __DISABLE_GENERAL_REGS_ONLY__
+#pragma GCC pop_options
+#endif /* __DISABLE_GENERAL_REGS_ONLY__ */
+
 #endif /* _X86GPRINTRIN_H_INCLUDED.  */
diff --git a/gcc/testsuite/gcc.target/i386/pr99744-3.c b/gcc/testsuite/gcc.target/i386/pr99744-3.c
new file mode 100644
index 00000000000..6c505816ceb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr99744-3.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-serialize" } */
+
+#include <x86intrin.h>
+
+__attribute__ ((target("general-regs-only")))
+void
+foo1 (void)
+{
+  _serialize ();
+}
+
+/* { dg-error "target specific option mismatch" "" { target *-*-* } 0 } */
diff --git a/gcc/testsuite/gcc.target/i386/pr99744-4.c b/gcc/testsuite/gcc.target/i386/pr99744-4.c
new file mode 100644
index 00000000000..9196e62d955
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr99744-4.c
@@ -0,0 +1,357 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mbmi -mbmi2 -mcldemote -mclflushopt -mclwb -mclzero -mcrc32 -menqcmd -mfsgsbase -mfxsr -mhreset -mlzcnt -mlwp -mmovdir64b -mmovdiri -mmwaitx -mpconfig -mpku -mpopcnt -mptwrite -mrdpid -mrdrnd -mrdseed -mrtm -msgx -mshstk -mtbm -mtsxldtrk -mxsave -mxsavec -mxsaveopt -mxsaves -mwaitpkg -mwbnoinvd" } */
+/* { dg-additional-options "-muintr" { target { ! ia32 } } }  */
+
+/* Test calling GPR intrinsics from functions with general-regs-only
+   target attribute.  */
+
+#include <x86gprintrin.h>
+
+#define _CONCAT(x,y) x ## y
+
+#define test_0(func, type)						\
+  __attribute__ ((target("general-regs-only")))				\
+  type _CONCAT(do_,func) (void)						\
+  { return func (); }
+
+#define test_0_i1(func, type, imm)					\
+  __attribute__ ((target("general-regs-only")))				\
+  type _CONCAT(do_,func) (void)						\
+  { return func (imm); }
+
+#define test_1(func, type, op1_type)					\
+  __attribute__ ((target("general-regs-only")))				\
+  type _CONCAT(do_,func) (op1_type A)					\
+  { return func (A); }
+
+#define test_1_i1(func, type, op1_type, imm)				\
+  __attribute__ ((target("general-regs-only")))				\
+  type _CONCAT(do_,func) (op1_type A)					\
+  { return func (A, imm); }
+
+#define test_2(func, type, op1_type, op2_type)				\
+  __attribute__ ((target("general-regs-only")))				\
+  type _CONCAT(do_,func) (op1_type A, op2_type B)			\
+  { return func (A, B); }
+
+#define test_2_i1(func, type, op1_type, op2_type, imm)			\
+  __attribute__ ((target("general-regs-only")))				\
+  type _CONCAT(do_,func) (op1_type A, op2_type B)			\
+  { return func (A, B, imm); }
+
+#define test_3(func, type, op1_type, op2_type, op3_type)		\
+  __attribute__ ((target("general-regs-only")))				\
+  type _CONCAT(do_,func) (op1_type A, op2_type B, op3_type C)		\
+  { return func (A, B, C); }
+
+#define test_4(func, type, op1_type, op2_type, op3_type, op4_type)	\
+  __attribute__ ((target("general-regs-only")))				\
+  type _CONCAT(do_,func) (op1_type A, op2_type B, op3_type C,		\
+			  op4_type D)					\
+  { return func (A, B, C, D); }
+
+/* ia32intrin.h  */
+test_1 (__bsfd, int, int)
+test_1 (__bsrd, int, int)
+test_1 (__bswapd, int, int)
+test_1 (__popcntd, int, unsigned int)
+test_2 (__rolb, unsigned char, unsigned char, int)
+test_2 (__rolw, unsigned short, unsigned short, int)
+test_2 (__rold, unsigned int, unsigned int, int)
+test_2 (__rorb, unsigned char, unsigned char, int)
+test_2 (__rorw, unsigned short, unsigned short, int)
+test_2 (__rord, unsigned int, unsigned int, int)
+
+#ifndef __iamcu__
+/* adxintrin.h */
+test_4 (_subborrow_u32, unsigned char, unsigned char, unsigned int,
+	unsigned int, unsigned int *)
+test_4 (_addcarry_u32, unsigned char, unsigned char, unsigned int,
+	unsigned int, unsigned int *)
+test_4 (_addcarryx_u32, unsigned char, unsigned char, unsigned int,
+	unsigned int, unsigned int *)
+
+/* bmiintrin.h */
+test_1 (__tzcnt_u16, unsigned short, unsigned short)
+test_2 (__andn_u32, unsigned int, unsigned int, unsigned int)
+test_2 (__bextr_u32, unsigned int, unsigned int, unsigned int)
+test_3 (_bextr_u32, unsigned int, unsigned int, unsigned int,
+	unsigned int)
+test_1 (__blsi_u32, unsigned int, unsigned int)
+test_1 (_blsi_u32, unsigned int, unsigned int)
+test_1 (__blsmsk_u32, unsigned int, unsigned int)
+test_1 (_blsmsk_u32, unsigned int, unsigned int)
+test_1 (__blsr_u32, unsigned int, unsigned int)
+test_1 (_blsr_u32, unsigned int, unsigned int)
+test_1 (__tzcnt_u32, unsigned int, unsigned int)
+test_1 (_tzcnt_u32, unsigned int, unsigned int)
+
+/* bmi2intrin.h */
+test_2 (_bzhi_u32, unsigned int, unsigned int, unsigned int)
+test_2 (_pdep_u32, unsigned int, unsigned int, unsigned int)
+test_2 (_pext_u32, unsigned int, unsigned int, unsigned int)
+
+/* cetintrin.h */
+test_1 (_inc_ssp, void, unsigned int)
+test_0 (_saveprevssp, void)
+test_1 (_rstorssp, void, void *)
+test_2 (_wrssd, void, unsigned int, void *)
+test_2 (_wrussd, void, unsigned int, void *)
+test_0 (_setssbsy, void)
+test_1 (_clrssbsy, void, void *)
+
+/* cldemoteintrin.h */
+test_1 (_cldemote, void, void *)
+
+/* clflushoptintrin.h */
+test_1 (_mm_clflushopt, void, void *)
+
+/* clwbintrin.h */
+test_1 (_mm_clwb, void, void *)
+
+/* clzerointrin.h */
+test_1 (_mm_clzero, void, void *)
+
+/* enqcmdintrin.h */
+test_2 (_enqcmd, int, void *, const void *)
+test_2 (_enqcmds, int, void *, const void *)
+
+/* fxsrintrin.h */
+test_1 (_fxsave, void, void *)
+test_1 (_fxrstor, void, void *)
+
+/* hresetintrin.h */
+test_1 (_hreset, void, unsigned int)
+
+/* ia32intrin.h  */
+test_2 (__crc32b, unsigned int, unsigned char, unsigned char)
+test_2 (__crc32w, unsigned int, unsigned short, unsigned short)
+test_2 (__crc32d, unsigned int, unsigned int, unsigned int)
+test_1 (__rdpmc, unsigned long long, int)
+test_0 (__rdtsc, unsigned long long)
+test_1 (__rdtscp, unsigned long long, unsigned int *)
+test_0 (__pause, void)
+
+/* lzcntintrin.h */
+test_1 (__lzcnt16, unsigned short, unsigned short)
+test_1 (__lzcnt32, unsigned int, unsigned int)
+test_1 (_lzcnt_u32, unsigned int, unsigned int)
+
+/* lwpintrin.h */
+test_1 (__llwpcb, void, void *)
+test_0 (__slwpcb, void *)
+test_2_i1 (__lwpval32, void, unsigned int, unsigned int, 1)
+test_2_i1 (__lwpins32, unsigned char, unsigned int, unsigned int, 1)
+
+/* movdirintrin.h */
+test_2 (_directstoreu_u32, void, void *, unsigned int)
+test_2 (_movdir64b, void, void *, const void *)
+
+/* mwaitxintrin.h */
+test_3 (_mm_monitorx, void, void const *, unsigned int, unsigned int)
+test_3 (_mm_mwaitx, void, unsigned int, unsigned int, unsigned int)
+
+/* pconfigintrin.h */
+test_2 (_pconfig_u32, unsigned int, const unsigned int, size_t *)
+
+/* pkuintrin.h */
+test_0 (_rdpkru_u32, unsigned int)
+test_1 (_wrpkru, void, unsigned int)
+
+/* popcntintrin.h */
+test_1 (_mm_popcnt_u32, int, unsigned int)
+
+/* rdseedintrin.h */
+test_1 (_rdseed16_step, int, unsigned short *)
+test_1 (_rdseed32_step, int, unsigned int *)
+
+/* rtmintrin.h */
+test_0 (_xbegin, unsigned int)
+test_0 (_xend, void)
+test_0_i1 (_xabort, void, 1)
+
+/* sgxintrin.h */
+test_2 (_encls_u32, unsigned int, const unsigned int, size_t *)
+test_2 (_enclu_u32, unsigned int, const unsigned int, size_t *)
+test_2 (_enclv_u32, unsigned int, const unsigned int, size_t *)
+
+/* tbmintrin.h */
+test_1_i1 (__bextri_u32, unsigned int, unsigned int, 1)
+test_1 (__blcfill_u32, unsigned int, unsigned int)
+test_1 (__blci_u32, unsigned int, unsigned int)
+test_1 (__blcic_u32, unsigned int, unsigned int)
+test_1 (__blcmsk_u32, unsigned int, unsigned int)
+test_1 (__blcs_u32, unsigned int, unsigned int)
+test_1 (__blsfill_u32, unsigned int, unsigned int)
+test_1 (__blsic_u32, unsigned int, unsigned int)
+test_1 (__t1mskc_u32, unsigned int, unsigned int)
+test_1 (__tzmsk_u32, unsigned int, unsigned int)
+
+/* tsxldtrkintrin.h */
+test_0 (_xsusldtrk, void)
+test_0 (_xresldtrk, void)
+
+/* x86gprintrin.h */
+test_1 (_ptwrite32, void, unsigned int)
+test_1 (_rdrand16_step, int, unsigned short *)
+test_1 (_rdrand32_step, int, unsigned int *)
+test_0 (_wbinvd, void)
+
+/* xtestintrin.h */
+test_0 (_xtest, int)
+
+/* xsaveintrin.h */
+test_2 (_xsave, void, void *, long long)
+test_2 (_xrstor, void, void *, long long)
+test_2 (_xsetbv, void, unsigned int, long long)
+test_1 (_xgetbv, long long, unsigned int)
+
+/* xsavecintrin.h */
+test_2 (_xsavec, void, void *, long long)
+
+/* xsaveoptintrin.h */
+test_2 (_xsaveopt, void, void *, long long)
+
+/* xsavesintrin.h */
+test_2 (_xsaves, void, void *, long long)
+test_2 (_xrstors, void, void *, long long)
+
+/* wbnoinvdintrin.h */
+test_0 (_wbnoinvd, void)
+
+#ifdef __x86_64__
+/* adxintrin.h */
+test_4 (_subborrow_u64, unsigned char, unsigned char,
+	unsigned long long, unsigned long long,
+	unsigned long long *)
+test_4 (_addcarry_u64, unsigned char, unsigned char,
+	unsigned long long, unsigned long long,
+	unsigned long long *)
+test_4 (_addcarryx_u64, unsigned char, unsigned char,
+	unsigned long long, unsigned long long,
+	unsigned long long *)
+
+/* bmiintrin.h */
+test_2 (__andn_u64, unsigned long long, unsigned long long,
+	unsigned long long)
+test_2 (__bextr_u64, unsigned long long, unsigned long long,
+	unsigned long long)
+test_3 (_bextr_u64, unsigned long long, unsigned long long,
+	unsigned long long, unsigned long long)
+test_1 (__blsi_u64, unsigned long long, unsigned long long)
+test_1 (_blsi_u64, unsigned long long, unsigned long long)
+test_1 (__blsmsk_u64, unsigned long long, unsigned long long)
+test_1 (_blsmsk_u64, unsigned long long, unsigned long long)
+test_1 (__blsr_u64, unsigned long long, unsigned long long)
+test_1 (_blsr_u64, unsigned long long, unsigned long long)
+test_1 (__tzcnt_u64, unsigned long long, unsigned long long)
+test_1 (_tzcnt_u64, unsigned long long, unsigned long long)
+
+/* bmi2intrin.h */
+test_2 (_bzhi_u64, unsigned long long, unsigned long long,
+	unsigned long long)
+test_2 (_pdep_u64, unsigned long long, unsigned long long,
+	unsigned long long)
+test_2 (_pext_u64, unsigned long long, unsigned long long,
+	unsigned long long)
+test_3 (_mulx_u64, unsigned long long, unsigned long long,
+	unsigned long long, unsigned long long *)
+
+/* cetintrin.h */
+test_0 (_get_ssp, unsigned long long)
+test_2 (_wrssq, void, unsigned long long, void *)
+test_2 (_wrussq, void, unsigned long long, void *)
+
+/* fxsrintrin.h */
+test_1 (_fxsave64, void, void *)
+test_1 (_fxrstor64, void, void *)
+
+/* ia32intrin.h  */
+test_1 (__bsfq, int, long long)
+test_1 (__bsrq, int, long long)
+test_1 (__bswapq, long long, long long)
+test_2 (__crc32q, unsigned long long, unsigned long long,
+	unsigned long long)
+test_1 (__popcntq, long long, unsigned long long)
+test_2 (__rolq, unsigned long long, unsigned long long, int)
+test_2 (__rorq, unsigned long long, unsigned long long, int)
+test_0 (__readeflags, unsigned long long)
+test_1 (__writeeflags, void, unsigned int)
+
+/* lzcntintrin.h */
+test_1 (__lzcnt64, unsigned long long, unsigned long long)
+test_1 (_lzcnt_u64, unsigned long long, unsigned long long)
+
+/* lwpintrin.h */
+test_2_i1 (__lwpval64, void, unsigned long long, unsigned int, 1)
+test_2_i1 (__lwpins64, unsigned char, unsigned long long,
+	   unsigned int, 1)
+
+/* movdirintrin.h */
+test_2 (_directstoreu_u64, void, void *, unsigned long long)
+
+/* popcntintrin.h */
+test_1 (_mm_popcnt_u64, long long, unsigned long long)
+
+/* rdseedintrin.h */
+test_1 (_rdseed64_step, int, unsigned long long *)
+
+/* tbmintrin.h */
+test_1_i1 (__bextri_u64, unsigned long long, unsigned long long, 1)
+test_1 (__blcfill_u64, unsigned long long, unsigned long long)
+test_1 (__blci_u64, unsigned long long, unsigned long long)
+test_1 (__blcic_u64, unsigned long long, unsigned long long)
+test_1 (__blcmsk_u64, unsigned long long, unsigned long long)
+test_1 (__blcs_u64, unsigned long long, unsigned long long)
+test_1 (__blsfill_u64, unsigned long long, unsigned long long)
+test_1 (__blsic_u64, unsigned long long, unsigned long long)
+test_1 (__t1mskc_u64, unsigned long long, unsigned long long)
+test_1 (__tzmsk_u64, unsigned long long, unsigned long long)
+
+/* uintrintrin.h */
+test_0 (_clui, void)
+test_1 (_senduipi, void, unsigned long long)
+test_0 (_stui, void)
+test_0 (_testui, unsigned char)
+
+/* x86gprintrin.h */
+test_1 (_ptwrite64, void, unsigned long long)
+test_0 (_readfsbase_u32, unsigned int)
+test_0 (_readfsbase_u64, unsigned long long)
+test_0 (_readgsbase_u32, unsigned int)
+test_0 (_readgsbase_u64, unsigned long long)
+test_1 (_rdrand64_step, int, unsigned long long *)
+test_1 (_writefsbase_u32, void, unsigned int)
+test_1 (_writefsbase_u64, void, unsigned long long)
+test_1 (_writegsbase_u32, void, unsigned int)
+test_1 (_writegsbase_u64, void, unsigned long long)
+
+/* xsaveintrin.h */
+test_2 (_xsave64, void, void *, long long)
+test_2 (_xrstor64, void, void *, long long)
+
+/* xsavecintrin.h */
+test_2 (_xsavec64, void, void *, long long)
+
+/* xsaveoptintrin.h */
+test_2 (_xsaveopt64, void, void *, long long)
+
+/* xsavesintrin.h */
+test_2 (_xsaves64, void, void *, long long)
+test_2 (_xrstors64, void, void *, long long)
+
+/* waitpkgintrin.h */
+test_1 (_umonitor, void, void *)
+test_2 (_umwait, unsigned char, unsigned int, unsigned long long)
+test_2 (_tpause, unsigned char, unsigned int, unsigned long long)
+
+#else /* !__x86_64__ */
+/* bmi2intrin.h */
+test_3 (_mulx_u32, unsigned int, unsigned int, unsigned int,
+	unsigned int *)
+
+/* cetintrin.h */
+test_0 (_get_ssp, unsigned int)
+#endif /* __x86_64__ */
+
+#endif
diff --git a/gcc/testsuite/gcc.target/i386/pr99744-5.c b/gcc/testsuite/gcc.target/i386/pr99744-5.c
new file mode 100644
index 00000000000..9e40e5ef428
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr99744-5.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mmwait" } */
+
+/* Test calling MWAIT intrinsics from functions with general-regs-only
+   target attribute.  */
+
+#include <x86gprintrin.h>
+
+#define _CONCAT(x,y) x ## y
+
+#define test_2(func, type, op1_type, op2_type)				\
+  __attribute__ ((target("general-regs-only")))				\
+  type _CONCAT(do_,func) (op1_type A, op2_type B)			\
+  { return func (A, B); }
+
+#define test_3(func, type, op1_type, op2_type, op3_type)		\
+  __attribute__ ((target("general-regs-only")))				\
+  type _CONCAT(do_,func) (op1_type A, op2_type B, op3_type C)		\
+  { return func (A, B, C); }
+
+#ifndef __iamcu__
+/* mwaitintrin.h */
+test_3 (_mm_monitor, void, void const *, unsigned int, unsigned int)
+test_2 (_mm_mwait, void, unsigned int, unsigned int)
+#endif
diff --git a/gcc/testsuite/gcc.target/i386/pr99744-6.c b/gcc/testsuite/gcc.target/i386/pr99744-6.c
new file mode 100644
index 00000000000..4025918a9c9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr99744-6.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-O" } */
+
+#include <x86intrin.h>
+
+extern unsigned long long int curr_deadline;
+extern void bar (void);
+
+void
+foo1 (void)
+{
+  if (__rdtsc () < curr_deadline)
+    return; 
+  bar ();
+}
+
+void
+foo2 (unsigned int *p)
+{
+  if (__rdtscp (p) < curr_deadline)
+    return; 
+  bar ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr99744-7.c b/gcc/testsuite/gcc.target/i386/pr99744-7.c
new file mode 100644
index 00000000000..30b7ca05966
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr99744-7.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O -mno-avx -Wno-psabi" } */
+
+#include <x86intrin.h>
+
+void
+foo (__m256 *x)
+{
+  x[0] = _mm256_sub_ps (x[1], x[2]);
+}
+
+/* { dg-error "target specific option mismatch" "" { target *-*-* } 0 } */
diff --git a/gcc/testsuite/gcc.target/i386/pr99744-8.c b/gcc/testsuite/gcc.target/i386/pr99744-8.c
new file mode 100644
index 00000000000..115183eede6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr99744-8.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O -Wno-psabi" } */
+
+#include <x86intrin.h>
+
+__attribute__((target ("no-avx")))
+void
+foo (__m256 *x)
+{
+  x[0] = _mm256_sub_ps (x[1], x[2]);
+}
+
+/* { dg-error "target specific option mismatch" "" { target *-*-* } 0 } */
-- 
2.31.1

Reply via email to