Re: [PATCH] [PR91400] build only one __cpu_model variable
On Mon, May 3, 2021 at 11:31 AM Ivan Sorokin via Gcc-patches wrote: > > Prior to this commit GCC -O2 generated quite bad code for this > function: > > bool f() > { > return __builtin_cpu_supports("popcnt") > && __builtin_cpu_supports("ssse3"); > } > > f: > movl__cpu_model+12(%rip), %eax > xorl%r8d, %r8d > testb $4, %al > je .L1 > shrl$6, %eax > movl%eax, %r8d > andl$1, %r8d > .L1: > movl%r8d, %eax > ret > > The problem was caused by the fact that internally every invocation > of __builtin_cpu_supports built a new variable __cpu_model and a new > type __processor_model. Because of this GIMPLE level optimizers > weren't able to CSE the loads of __cpu_model and optimize > bit-operations properly. > > This commit fixes the problem by caching created __cpu_model > variable and __processor_model type. Now the GCC -O2 generates: > > f: > movl__cpu_model+12(%rip), %eax > andl$68, %eax > cmpl$68, %eax > sete%al > ret The patch looks good, the function could need a comment and the global variables better names, not starting with __ Up to the x86 maintainers - HJ, can you pick up this work? Thanks, Richard. > gcc/ChangeLog: > > PR target/91400 > * config/i386/i386-builtins.c (fold_builtin_cpu): Extract > building of __cpu_model and __processor_model into new > function. > * config/i386/i386-builtins.c (init_cpu_model_var): New. > Cache creation of __cpu_model and __processor_model. > > gcc/testsuite/Changelog: > > PR target/91400 > * gcc.target/i386/pr91400.c: New. > --- > gcc/config/i386/i386-builtins.c | 27 ++--- > gcc/testsuite/gcc.target/i386/pr91400.c | 11 ++ > 2 files changed, 31 insertions(+), 7 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/i386/pr91400.c > > diff --git a/gcc/config/i386/i386-builtins.c b/gcc/config/i386/i386-builtins.c > index b66911082ab..b7d9dd18b03 100644 > --- a/gcc/config/i386/i386-builtins.c > +++ b/gcc/config/i386/i386-builtins.c > @@ -2103,6 +2103,25 @@ make_var_decl (tree type, const char *name) >return new_decl; > } > > +static GTY(()) tree __cpu_model_var; > +static GTY(()) tree __processor_model_type; > + > +static void > +init_cpu_model_var() > +{ > + if (__cpu_model_var != NULL_TREE) > +{ > + gcc_assert (__processor_model_type != NULL_TREE); > + return; > +} > + > + __processor_model_type = build_processor_model_struct (); > + __cpu_model_var = make_var_decl (__processor_model_type, > + "__cpu_model"); > + > + varpool_node::add (__cpu_model_var); > +} > + > /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is > folded > into an integer defined in libgcc/config/i386/cpuinfo.c */ > > @@ -2114,13 +2133,7 @@ fold_builtin_cpu (tree fndecl, tree *args) > = (enum ix86_builtins) DECL_MD_FUNCTION_CODE (fndecl); >tree param_string_cst = NULL; > > - tree __processor_model_type = build_processor_model_struct (); > - tree __cpu_model_var = make_var_decl (__processor_model_type, > - "__cpu_model"); > - > - > - varpool_node::add (__cpu_model_var); > - > + init_cpu_model_var (); >gcc_assert ((args != NULL) && (*args != NULL)); > >param_string_cst = *args; > diff --git a/gcc/testsuite/gcc.target/i386/pr91400.c > b/gcc/testsuite/gcc.target/i386/pr91400.c > new file mode 100644 > index 000..e8b7d9285f9 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr91400.c > @@ -0,0 +1,11 @@ > +/* PR target/91400 */ > +/* { dg-do compile { target { ! ia32 } } } */ > +/* { dg-options "-O2" } */ > +/* { dg-final { scan-assembler-times "andl" 1 } } */ > +/* { dg-final { scan-assembler-times "68" 2 } } */ > +/* { dg-final { scan-assembler-not "je" } } */ > + > +_Bool f() > +{ > +return __builtin_cpu_supports("popcnt") && > __builtin_cpu_supports("ssse3"); > +} > -- > 2.25.1 >
Re: [PATCH] [PR91400] build only one __cpu_model variable
As GCC 11 is released and the development of GCC 12 begins I would like to send this patch again. It is basically unchanged since the previous submission in February. The only change I made is fixing formatting by adding a space after gcc_assert. Please note, that the original author of the code no longer actively contribute to GCC, so he is unlikely to review this patch. It would be great if someone else takes a look at it. On 03.05.2021 11:39, Ivan Sorokin wrote: Prior to this commit GCC -O2 generated quite bad code for this function: bool f() { return __builtin_cpu_supports("popcnt") && __builtin_cpu_supports("ssse3"); } f: movl__cpu_model+12(%rip), %eax xorl%r8d, %r8d testb $4, %al je .L1 shrl$6, %eax movl%eax, %r8d andl$1, %r8d .L1: movl%r8d, %eax ret The problem was caused by the fact that internally every invocation of __builtin_cpu_supports built a new variable __cpu_model and a new type __processor_model. Because of this GIMPLE level optimizers weren't able to CSE the loads of __cpu_model and optimize bit-operations properly. This commit fixes the problem by caching created __cpu_model variable and __processor_model type. Now the GCC -O2 generates: f: movl__cpu_model+12(%rip), %eax andl$68, %eax cmpl$68, %eax sete%al ret gcc/ChangeLog: PR target/91400 * config/i386/i386-builtins.c (fold_builtin_cpu): Extract building of __cpu_model and __processor_model into new function. * config/i386/i386-builtins.c (init_cpu_model_var): New. Cache creation of __cpu_model and __processor_model. gcc/testsuite/Changelog: PR target/91400 * gcc.target/i386/pr91400.c: New. --- gcc/config/i386/i386-builtins.c | 27 ++--- gcc/testsuite/gcc.target/i386/pr91400.c | 11 ++ 2 files changed, 31 insertions(+), 7 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr91400.c diff --git a/gcc/config/i386/i386-builtins.c b/gcc/config/i386/i386-builtins.c index b66911082ab..b7d9dd18b03 100644 --- a/gcc/config/i386/i386-builtins.c +++ b/gcc/config/i386/i386-builtins.c @@ -2103,6 +2103,25 @@ make_var_decl (tree type, const char *name) return new_decl; } +static GTY(()) tree __cpu_model_var; +static GTY(()) tree __processor_model_type; + +static void +init_cpu_model_var() +{ + if (__cpu_model_var != NULL_TREE) +{ + gcc_assert (__processor_model_type != NULL_TREE); + return; +} + + __processor_model_type = build_processor_model_struct (); + __cpu_model_var = make_var_decl (__processor_model_type, + "__cpu_model"); + + varpool_node::add (__cpu_model_var); +} + /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded into an integer defined in libgcc/config/i386/cpuinfo.c */ @@ -2114,13 +2133,7 @@ fold_builtin_cpu (tree fndecl, tree *args) = (enum ix86_builtins) DECL_MD_FUNCTION_CODE (fndecl); tree param_string_cst = NULL; - tree __processor_model_type = build_processor_model_struct (); - tree __cpu_model_var = make_var_decl (__processor_model_type, - "__cpu_model"); - - - varpool_node::add (__cpu_model_var); - + init_cpu_model_var (); gcc_assert ((args != NULL) && (*args != NULL)); param_string_cst = *args; diff --git a/gcc/testsuite/gcc.target/i386/pr91400.c b/gcc/testsuite/gcc.target/i386/pr91400.c new file mode 100644 index 000..e8b7d9285f9 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr91400.c @@ -0,0 +1,11 @@ +/* PR target/91400 */ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2" } */ +/* { dg-final { scan-assembler-times "andl" 1 } } */ +/* { dg-final { scan-assembler-times "68" 2 } } */ +/* { dg-final { scan-assembler-not "je" } } */ + +_Bool f() +{ +return __builtin_cpu_supports("popcnt") && __builtin_cpu_supports("ssse3"); +}
[PATCH] [PR91400] build only one __cpu_model variable
Prior to this commit GCC -O2 generated quite bad code for this function: bool f() { return __builtin_cpu_supports("popcnt") && __builtin_cpu_supports("ssse3"); } f: movl__cpu_model+12(%rip), %eax xorl%r8d, %r8d testb $4, %al je .L1 shrl$6, %eax movl%eax, %r8d andl$1, %r8d .L1: movl%r8d, %eax ret The problem was caused by the fact that internally every invocation of __builtin_cpu_supports built a new variable __cpu_model and a new type __processor_model. Because of this GIMPLE level optimizers weren't able to CSE the loads of __cpu_model and optimize bit-operations properly. This commit fixes the problem by caching created __cpu_model variable and __processor_model type. Now the GCC -O2 generates: f: movl__cpu_model+12(%rip), %eax andl$68, %eax cmpl$68, %eax sete%al ret gcc/ChangeLog: PR target/91400 * config/i386/i386-builtins.c (fold_builtin_cpu): Extract building of __cpu_model and __processor_model into new function. * config/i386/i386-builtins.c (init_cpu_model_var): New. Cache creation of __cpu_model and __processor_model. gcc/testsuite/Changelog: PR target/91400 * gcc.target/i386/pr91400.c: New. --- gcc/config/i386/i386-builtins.c | 27 ++--- gcc/testsuite/gcc.target/i386/pr91400.c | 11 ++ 2 files changed, 31 insertions(+), 7 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr91400.c diff --git a/gcc/config/i386/i386-builtins.c b/gcc/config/i386/i386-builtins.c index b66911082ab..b7d9dd18b03 100644 --- a/gcc/config/i386/i386-builtins.c +++ b/gcc/config/i386/i386-builtins.c @@ -2103,6 +2103,25 @@ make_var_decl (tree type, const char *name) return new_decl; } +static GTY(()) tree __cpu_model_var; +static GTY(()) tree __processor_model_type; + +static void +init_cpu_model_var() +{ + if (__cpu_model_var != NULL_TREE) +{ + gcc_assert (__processor_model_type != NULL_TREE); + return; +} + + __processor_model_type = build_processor_model_struct (); + __cpu_model_var = make_var_decl (__processor_model_type, + "__cpu_model"); + + varpool_node::add (__cpu_model_var); +} + /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded into an integer defined in libgcc/config/i386/cpuinfo.c */ @@ -2114,13 +2133,7 @@ fold_builtin_cpu (tree fndecl, tree *args) = (enum ix86_builtins) DECL_MD_FUNCTION_CODE (fndecl); tree param_string_cst = NULL; - tree __processor_model_type = build_processor_model_struct (); - tree __cpu_model_var = make_var_decl (__processor_model_type, - "__cpu_model"); - - - varpool_node::add (__cpu_model_var); - + init_cpu_model_var (); gcc_assert ((args != NULL) && (*args != NULL)); param_string_cst = *args; diff --git a/gcc/testsuite/gcc.target/i386/pr91400.c b/gcc/testsuite/gcc.target/i386/pr91400.c new file mode 100644 index 000..e8b7d9285f9 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr91400.c @@ -0,0 +1,11 @@ +/* PR target/91400 */ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2" } */ +/* { dg-final { scan-assembler-times "andl" 1 } } */ +/* { dg-final { scan-assembler-times "68" 2 } } */ +/* { dg-final { scan-assembler-not "je" } } */ + +_Bool f() +{ +return __builtin_cpu_supports("popcnt") && __builtin_cpu_supports("ssse3"); +} -- 2.25.1
Re: [PATCH] [PR91400] build only one __cpu_model variable
On 02.02.2021 02:00, Ivan Sorokin wrote: > [PATCH] [PR91400] build only one __cpu_model variable This is my first patch to GCC. So I might have done something totally stupid or totally wrong. Caution is required for reviewing. :-) > gcc/ChangeLog: > > PR target/91400 > * config/i386/i386-builtins.c (fold_builtin_cpu): Extract > building of __cpu_model and __processor_model into new > function. > * config/i386/i386-builtins.c (init_cpu_model_var): New. > Cache creation of __cpu_model and __processor_model. > > gcc/testsuite/Changelog: > > PR target/91400 > * gcc.target/i386/pr91400.c: New. I wrote the change log text manually. I hope I didn't mess up the formatting. > +static GTY(()) tree __cpu_model_var; > +static GTY(()) tree __processor_model_type; I felt a bit uneasy writing global variables, but this file contains other global variables already and they are used for similar purpose. > +static void > +init_cpu_model_var() > +{ > + if (__cpu_model_var != NULL_TREE) > +{ > + gcc_assert(__processor_model_type != NULL_TREE); > + return; > +} > + > + __processor_model_type = build_processor_model_struct (); > + __cpu_model_var = make_var_decl (__processor_model_type, > +"__cpu_model"); > + > + varpool_node::add (__cpu_model_var); I have no idea what this line does. But I decided that perhaps we want to do it only once instead of once for each usage of __builtin_cpu_supports. > diff --git a/gcc/testsuite/gcc.target/i386/pr91400.c > b/gcc/testsuite/gcc.target/i386/pr91400.c > new file mode 100644 > index 000..e8b7d9285f9 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr91400.c > @@ -0,0 +1,11 @@ > +/* PR target/91400 */ > +/* { dg-do compile { target { ! ia32 } } } */ > +/* { dg-options "-O2" } */ > +/* { dg-final { scan-assembler-times "andl" 1 } } */ > +/* { dg-final { scan-assembler-times "68" 2 } } */ > +/* { dg-final { scan-assembler-not "je" } } */ > + > +_Bool f() > +{ > +return __builtin_cpu_supports("popcnt") && > __builtin_cpu_supports("ssse3"); > +} The test was the most complicated thing for me. Previous versions of GCC did two instructions andl, so I written check for them. Current master does a conditional jump, so I add test for it too. Any suggestions for better checks are welcomed.
[PATCH] [PR91400] build only one __cpu_model variable
Prior to this commit GCC -O2 generated quite bad code for this function: bool f() { return __builtin_cpu_supports("popcnt") && __builtin_cpu_supports("ssse3"); } f: movl__cpu_model+12(%rip), %eax xorl%r8d, %r8d testb $4, %al je .L1 shrl$6, %eax movl%eax, %r8d andl$1, %r8d .L1: movl%r8d, %eax ret The problem was caused by the fact that internally every invocation of __builtin_cpu_supports built a new variable __cpu_model and a new type __processor_model. Because of this GIMPLE level optimizers weren't able to CSE the loads of __cpu_model and optimize bit-operations properly. This commit fixes the problem by caching created __cpu_model variable and __processor_model type. Now the GCC -O2 generates: f: movl__cpu_model+12(%rip), %eax andl$68, %eax cmpl$68, %eax sete%al ret gcc/ChangeLog: PR target/91400 * config/i386/i386-builtins.c (fold_builtin_cpu): Extract building of __cpu_model and __processor_model into new function. * config/i386/i386-builtins.c (init_cpu_model_var): New. Cache creation of __cpu_model and __processor_model. gcc/testsuite/Changelog: PR target/91400 * gcc.target/i386/pr91400.c: New. --- gcc/config/i386/i386-builtins.c | 27 ++--- gcc/testsuite/gcc.target/i386/pr91400.c | 11 ++ 2 files changed, 31 insertions(+), 7 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr91400.c diff --git a/gcc/config/i386/i386-builtins.c b/gcc/config/i386/i386-builtins.c index 4fcdf4b89ee..96534318756 100644 --- a/gcc/config/i386/i386-builtins.c +++ b/gcc/config/i386/i386-builtins.c @@ -2085,6 +2085,25 @@ make_var_decl (tree type, const char *name) return new_decl; } +static GTY(()) tree __cpu_model_var; +static GTY(()) tree __processor_model_type; + +static void +init_cpu_model_var() +{ + if (__cpu_model_var != NULL_TREE) +{ + gcc_assert(__processor_model_type != NULL_TREE); + return; +} + + __processor_model_type = build_processor_model_struct (); + __cpu_model_var = make_var_decl (__processor_model_type, + "__cpu_model"); + + varpool_node::add (__cpu_model_var); +} + /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded into an integer defined in libgcc/config/i386/cpuinfo.c */ @@ -2096,13 +2115,7 @@ fold_builtin_cpu (tree fndecl, tree *args) = (enum ix86_builtins) DECL_MD_FUNCTION_CODE (fndecl); tree param_string_cst = NULL; - tree __processor_model_type = build_processor_model_struct (); - tree __cpu_model_var = make_var_decl (__processor_model_type, - "__cpu_model"); - - - varpool_node::add (__cpu_model_var); - + init_cpu_model_var (); gcc_assert ((args != NULL) && (*args != NULL)); param_string_cst = *args; diff --git a/gcc/testsuite/gcc.target/i386/pr91400.c b/gcc/testsuite/gcc.target/i386/pr91400.c new file mode 100644 index 000..e8b7d9285f9 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr91400.c @@ -0,0 +1,11 @@ +/* PR target/91400 */ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2" } */ +/* { dg-final { scan-assembler-times "andl" 1 } } */ +/* { dg-final { scan-assembler-times "68" 2 } } */ +/* { dg-final { scan-assembler-not "je" } } */ + +_Bool f() +{ +return __builtin_cpu_supports("popcnt") && __builtin_cpu_supports("ssse3"); +} -- 2.25.1