On Sun, Aug 23, 2020 at 5:07 PM H.J. Lu <hjl.to...@gmail.com> wrote: > > On Sun, Aug 23, 2020 at 10:18:28AM +0200, Uros Bizjak wrote: > > On Sat, Aug 22, 2020 at 9:09 PM H.J. Lu <hjl.to...@gmail.com> wrote: > > > > > > > Compile CPUID check with "-mno-sse -mfpmath=387" to disable SSE, AVX > > > > > and > > > > > AVX512 during CPUID check to avoid vector and mask register > > > > > operations. > > > > > > > > -mgeneral-regs-only ? > > > > > > > > > > Here is a patch to add target("general-regs-only") function > > > attribute and use it for CPUID check. OK for master if there > > > are no regressions? > > > > Please test it first, then ask for an approval. > > > > Please submit the general-regs-only part as an independent patch. (I > > think this is the option linux should use for compilation). > > > > Tested on Linux/x86-64. OK for master? > > Thanks. > > H.J. > --- > gcc/ > > PR target/96744 > * config/i386/i386-options.c (IX86_ATTR_IX86_YES): New. > (IX86_ATTR_IX86_NO): Likewise. > (ix86_opt_type): Add ix86_opt_ix86_yes and ix86_opt_ix86_no. > (ix86_valid_target_attribute_inner_p): Handle general-regs-only, > ix86_opt_ix86_yes and ix86_opt_ix86_no. > (ix86_option_override_internal): Check opts->x_ix86_target_flags > instead of opts->x_ix86_target_flags. > * doc/extend.texi: Document target("general-regs-only") function > attribute. > > gcc/testsuite/ > > PR target/96744 > * gcc.target/i386/pr96744-1.c: New test. > * gcc.target/i386/pr96744-2.c: Likewise. > * gcc.target/i386/pr96744-3a.c: Likewise. > * gcc.target/i386/pr96744-3b.c: Likewise. > * gcc.target/i386/pr96744-4.c: Likewise. > * gcc.target/i386/pr96744-5.c: Likewise. > * gcc.target/i386/pr96744-6.c: Likewise. > * gcc.target/i386/pr96744-7.c: Likewise. > * gcc.target/i386/pr96744-8a.c: Likewise. > * gcc.target/i386/pr96744-8b.c: Likewise. > * gcc.target/i386/pr96744-9.c: Likewise.
OK. Thanks, Uros. > --- > gcc/config/i386/i386-options.c | 44 ++++++++++++++++++++-- > gcc/doc/extend.texi | 4 ++ > gcc/testsuite/gcc.target/i386/pr96744-1.c | 10 +++++ > gcc/testsuite/gcc.target/i386/pr96744-2.c | 11 ++++++ > gcc/testsuite/gcc.target/i386/pr96744-3a.c | 12 ++++++ > gcc/testsuite/gcc.target/i386/pr96744-3b.c | 16 ++++++++ > gcc/testsuite/gcc.target/i386/pr96744-4.c | 11 ++++++ > gcc/testsuite/gcc.target/i386/pr96744-5.c | 17 +++++++++ > gcc/testsuite/gcc.target/i386/pr96744-6.c | 11 ++++++ > gcc/testsuite/gcc.target/i386/pr96744-7.c | 14 +++++++ > gcc/testsuite/gcc.target/i386/pr96744-8a.c | 33 ++++++++++++++++ > gcc/testsuite/gcc.target/i386/pr96744-8b.c | 35 +++++++++++++++++ > gcc/testsuite/gcc.target/i386/pr96744-9.c | 25 ++++++++++++ > 13 files changed, 240 insertions(+), 3 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/i386/pr96744-1.c > create mode 100644 gcc/testsuite/gcc.target/i386/pr96744-2.c > create mode 100644 gcc/testsuite/gcc.target/i386/pr96744-3a.c > create mode 100644 gcc/testsuite/gcc.target/i386/pr96744-3b.c > create mode 100644 gcc/testsuite/gcc.target/i386/pr96744-4.c > create mode 100644 gcc/testsuite/gcc.target/i386/pr96744-5.c > create mode 100644 gcc/testsuite/gcc.target/i386/pr96744-6.c > create mode 100644 gcc/testsuite/gcc.target/i386/pr96744-7.c > create mode 100644 gcc/testsuite/gcc.target/i386/pr96744-8a.c > create mode 100644 gcc/testsuite/gcc.target/i386/pr96744-8b.c > create mode 100644 gcc/testsuite/gcc.target/i386/pr96744-9.c > > diff --git a/gcc/config/i386/i386-options.c b/gcc/config/i386/i386-options.c > index 26d1ea18ef1..e0fc68c27bf 100644 > --- a/gcc/config/i386/i386-options.c > +++ b/gcc/config/i386/i386-options.c > @@ -922,12 +922,18 @@ ix86_valid_target_attribute_inner_p (tree fndecl, tree > args, char *p_strings[], > #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 } > #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M } > #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M } > +#define IX86_ATTR_IX86_YES(S,O,M) \ > + { S, sizeof (S)-1, ix86_opt_ix86_yes, O, M } > +#define IX86_ATTR_IX86_NO(S,O,M) \ > + { S, sizeof (S)-1, ix86_opt_ix86_no, O, M } > > enum ix86_opt_type > { > ix86_opt_unknown, > ix86_opt_yes, > ix86_opt_no, > + ix86_opt_ix86_yes, > + ix86_opt_ix86_no, > ix86_opt_str, > ix86_opt_enum, > ix86_opt_isa > @@ -1062,6 +1068,10 @@ ix86_valid_target_attribute_inner_p (tree fndecl, tree > args, char *p_strings[], > IX86_ATTR_YES ("recip", > OPT_mrecip, > MASK_RECIP), > + > + IX86_ATTR_IX86_YES ("general-regs-only", > + OPT_mgeneral_regs_only, > + OPTION_MASK_GENERAL_REGS_ONLY), > }; > > location_t loc > @@ -1175,6 +1185,33 @@ ix86_valid_target_attribute_inner_p (tree fndecl, tree > args, char *p_strings[], > opts->x_target_flags &= ~mask; > } > > + else if (type == ix86_opt_ix86_yes || type == ix86_opt_ix86_no) > + { > + if (mask == OPTION_MASK_GENERAL_REGS_ONLY) > + { > + if (type != ix86_opt_ix86_yes) > + gcc_unreachable (); > + > + opts->x_ix86_target_flags |= mask; > + > + struct cl_decoded_option decoded; > + generate_option (opt, NULL, opt_set_p, CL_TARGET, > + &decoded); > + ix86_handle_option (opts, opts_set, &decoded, > + input_location); > + } > + else > + { > + if (type == ix86_opt_ix86_no) > + opt_set_p = !opt_set_p; > + > + if (opt_set_p) > + opts->x_ix86_target_flags |= mask; > + else > + opts->x_ix86_target_flags &= ~mask; > + } > + } > + > else if (type == ix86_opt_str) > { > if (p_strings[opt]) > @@ -2260,9 +2297,10 @@ ix86_option_override_internal (bool main_args_p, > && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PKU)) > opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PKU; > > - /* Don't enable x87 instructions if only > - general registers are allowed. */ > - if (!(opts_set->x_ix86_target_flags & OPTION_MASK_GENERAL_REGS_ONLY) > + /* Don't enable x87 instructions if only general registers are > + allowed by target("general-regs-only") function attribute or > + -mgeneral-regs-only. */ > + if (!(opts->x_ix86_target_flags & OPTION_MASK_GENERAL_REGS_ONLY) > && !(opts_set->x_target_flags & MASK_80387)) > { > if (((processor_alias_table[i].flags & PTA_NO_80387) != 0)) > diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi > index fd794961e0a..2bb9b2f72f5 100644 > --- a/gcc/doc/extend.texi > +++ b/gcc/doc/extend.texi > @@ -6656,6 +6656,10 @@ Enable/disable the generation of RCPSS, RCPPS, RSQRTSS > and RSQRTPS > instructions followed an additional Newton-Raphson step instead of > doing a floating-point division. > > +@item general-regs-only > +@cindex @code{target("general-regs-only")} function attribute, x86 > +Generate code which uses only the general registers. > + > @item arch=@var{ARCH} > @cindex @code{target("arch=@var{ARCH}")} function attribute, x86 > Specify the architecture to generate code for in compiling the function. > diff --git a/gcc/testsuite/gcc.target/i386/pr96744-1.c > b/gcc/testsuite/gcc.target/i386/pr96744-1.c > new file mode 100644 > index 00000000000..46f3ce6ddd4 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr96744-1.c > @@ -0,0 +1,10 @@ > +/* { dg-do compile { target { ! ia32 } } } */ > +/* { dg-options "-msse2" } */ > + > +typedef int int32x2_t __attribute__ ((__vector_size__ ((8)))); > + > +__attribute__((__target__("general-regs-only"))) > +int32x2_t test (int32x2_t a, int32x2_t b) > +{ /* { dg-error "SSE register return with SSE disabled" } */ > + return a + b; > +} > diff --git a/gcc/testsuite/gcc.target/i386/pr96744-2.c > b/gcc/testsuite/gcc.target/i386/pr96744-2.c > new file mode 100644 > index 00000000000..4a436d8ef04 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr96744-2.c > @@ -0,0 +1,11 @@ > +/* { dg-do compile { target ia32 } } */ > +/* { dg-options "-mmmx" } */ > + > +typedef int int32x2_t __attribute__ ((__vector_size__ ((8)))); > + > +__attribute__((__target__("general-regs-only"))) > +int32x2_t > +test (int32x2_t a, int32x2_t b) /* { dg-warning "MMX vector argument without > MMX enabled" } */ > +{ /* { dg-warning "MMX vector return without MMX enabled" } */ > + return a + b; > +} > diff --git a/gcc/testsuite/gcc.target/i386/pr96744-3a.c > b/gcc/testsuite/gcc.target/i386/pr96744-3a.c > new file mode 100644 > index 00000000000..79191544eb4 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr96744-3a.c > @@ -0,0 +1,12 @@ > +/* { dg-do compile } */ > +/* { dg-options "-msse2" } */ > + > +typedef int int32x4_t __attribute__ ((__vector_size__ ((16)))); > +extern int32x4_t c; > + > +__attribute__((__target__("general-regs-only"))) > +void > +test (int32x4_t a, int32x4_t b) /* { dg-warning "SSE vector argument without > SSE enabled" } */ > +{ > + c = a + b; > +} > diff --git a/gcc/testsuite/gcc.target/i386/pr96744-3b.c > b/gcc/testsuite/gcc.target/i386/pr96744-3b.c > new file mode 100644 > index 00000000000..389b5cf9897 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr96744-3b.c > @@ -0,0 +1,16 @@ > +/* { dg-do compile } */ > +/* { dg-options "-msse2" } */ > + > +typedef int int32x4_t __attribute__ ((__vector_size__ ((16)))); > +extern int32x4_t c; > + > +#pragma GCC push_options > +#pragma GCC target("general-regs-only") > + > +void > +test (int32x4_t a, int32x4_t b) /* { dg-warning "SSE vector argument without > SSE enabled" } */ > +{ > + c = a + b; > +} > + > +#pragma GCC pop_options > diff --git a/gcc/testsuite/gcc.target/i386/pr96744-4.c > b/gcc/testsuite/gcc.target/i386/pr96744-4.c > new file mode 100644 > index 00000000000..005329f95e9 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr96744-4.c > @@ -0,0 +1,11 @@ > +/* { dg-do compile { target ia32 } } */ > +/* { dg-options "-msse2" } */ > + > +typedef int int32x4_t __attribute__ ((__vector_size__ ((16)))); > + > +__attribute__((__target__("general-regs-only"))) > +int32x4_t > +test (int32x4_t a, int32x4_t b) /* { dg-warning "SSE vector argument without > SSE enabled" } */ > +{ /* { dg-warning "SSE vector return without SSE enabled" } */ > + return a + b; > +} > diff --git a/gcc/testsuite/gcc.target/i386/pr96744-5.c > b/gcc/testsuite/gcc.target/i386/pr96744-5.c > new file mode 100644 > index 00000000000..18f2132aa27 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr96744-5.c > @@ -0,0 +1,17 @@ > +/* { dg-do compile { target { ! ia32 } } } */ > +/* { dg-options "-msse2" } */ > + > +#include <stdarg.h> > + > +typedef int int32x2_t __attribute__ ((__vector_size__ ((8)))); > + > +__attribute__((__target__("general-regs-only"))) > +int > +test (int i, ...) > +{ > + va_list argp; > + va_start (argp, i); > + int32x2_t x = (int32x2_t) {0, 1}; > + x += va_arg (argp, int32x2_t); /* { dg-error "SSE register argument with > SSE disabled" } */ > + return x[0] + x[1]; > +} > diff --git a/gcc/testsuite/gcc.target/i386/pr96744-6.c > b/gcc/testsuite/gcc.target/i386/pr96744-6.c > new file mode 100644 > index 00000000000..38a3cc7fa92 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr96744-6.c > @@ -0,0 +1,11 @@ > +/* { dg-do compile { target { ! ia32 } } } */ > +/* { dg-options "-msse2 -mfpmath=sse" } */ > + > +extern float a, b, c; > + > +__attribute__((__target__("general-regs-only"))) > +void > +foo (void) > +{ > + c = a * b; /* { dg-error "SSE register return with SSE disabled" } */ > +} > diff --git a/gcc/testsuite/gcc.target/i386/pr96744-7.c > b/gcc/testsuite/gcc.target/i386/pr96744-7.c > new file mode 100644 > index 00000000000..5f55b6cbd33 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr96744-7.c > @@ -0,0 +1,14 @@ > +/* { dg-do compile { target { ia32 && { ! *-*-darwin* } } } } */ > +/* { dg-options "-msse2" } */ > + > +extern float a, b, c; > + > +__attribute__((__target__("general-regs-only"))) > +void > +foo (void) > +{ > + c = a * b; > +} > + > +/* { dg-final { scan-assembler-not "mulss" } } */ > +/* { dg-final { scan-assembler "call\[ \t\]__mulsf3" } } */ > diff --git a/gcc/testsuite/gcc.target/i386/pr96744-8a.c > b/gcc/testsuite/gcc.target/i386/pr96744-8a.c > new file mode 100644 > index 00000000000..d264e1e01d4 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr96744-8a.c > @@ -0,0 +1,33 @@ > +/* { dg-do run } */ > +/* { dg-options "-O2" } */ > + > +extern void abort (); > + > +__attribute__((__target__("general-regs-only"))) > +int > +dec (int a, int b) > +{ > + return a + b; > +} > + > +__attribute__((__target__("general-regs-only"))) > +int > +cal (int a, int b) > +{ > + int sum1 = a * b; > + int sum2 = a / b; > + int sum = dec (sum1, sum2); > + return a + b + sum + sum1 + sum2; > +} > + > +__attribute__((__target__("general-regs-only"))) > +int > +main (int argc, char **argv) > +{ > + int ret = cal (2, 1); > + > + if (ret != 11) > + abort (); > + > + return 0; > +} > diff --git a/gcc/testsuite/gcc.target/i386/pr96744-8b.c > b/gcc/testsuite/gcc.target/i386/pr96744-8b.c > new file mode 100644 > index 00000000000..30e763d932e > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr96744-8b.c > @@ -0,0 +1,35 @@ > +/* { dg-do run } */ > +/* { dg-options "-O2" } */ > + > +#pragma GCC push_options > +#pragma GCC target("general-regs-only") > + > +extern void abort (); > + > +int > +dec (int a, int b) > +{ > + return a + b; > +} > + > +int > +cal (int a, int b) > +{ > + int sum1 = a * b; > + int sum2 = a / b; > + int sum = dec (sum1, sum2); > + return a + b + sum + sum1 + sum2; > +} > + > +int > +main (int argc, char **argv) > +{ > + int ret = cal (2, 1); > + > + if (ret != 11) > + abort (); > + > + return 0; > +} > + > +#pragma GCC pop_options > diff --git a/gcc/testsuite/gcc.target/i386/pr96744-9.c > b/gcc/testsuite/gcc.target/i386/pr96744-9.c > new file mode 100644 > index 00000000000..196e86f08e9 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr96744-9.c > @@ -0,0 +1,25 @@ > +/* { dg-do run } */ > +/* { dg-options "-O2" } */ > + > +extern void abort (); > + > +__attribute__((__target__("general-regs-only"))) > +int > +cal (int a, int b) > +{ > + int sum = a + b; > + int sum1 = a * b; > + return (a + b + sum + sum1); > +} > + > +__attribute__((__target__("general-regs-only"))) > +int > +main (int argc, char **argv) > +{ > + int ret = cal (1, 2); > + > + if (ret != 8) > + abort (); > + > + return 0; > +} > -- > 2.26.2 >