On Wed, Jun 22, 2022 at 11:03 PM Richard Biener <richard.guent...@gmail.com> wrote: > > On Wed, Jun 22, 2022 at 7:13 PM H.J. Lu <hjl.to...@gmail.com> wrote: > > > > On Wed, Jun 22, 2022 at 4:39 AM Richard Biener > > <richard.guent...@gmail.com> wrote: > > > > > > On Tue, Jun 21, 2022 at 11:03 PM H.J. Lu via Gcc-patches > > > <gcc-patches@gcc.gnu.org> wrote: > > > > > > > > When memchr is applied on a constant string of no more than the bytes of > > > > a word, inline memchr by checking each byte in the constant string. > > > > > > > > int f (int a) > > > > { > > > > return __builtin_memchr ("eE", a, 2) != 0; > > > > } > > > > > > > > is simplified to > > > > > > > > int f (int a) > > > > { > > > > return (char) a == 'e' || (char) a == 'E'; > > > > } > > > > > > > > gcc/ > > > > > > > > PR tree-optimization/103798 > > > > * match.pd (__builtin_memchr (const_str, a, N)): Inline memchr > > > > with constant strings of no more than the bytes of a word. > > > > > > Please do this in strlenopt or so, with match.pd you will end up moving > > > the memchr loads across possible aliasing stores to the point of the > > > comparison. > > > > strlenopt is run after many other passes. The code won't be well optimized. > > What followup optimizations do you expect? That is, other builtins are only
reassociation and dce turn _5 = a_2(D) == 101; _6 = a_2(D) == 69; _1 = _5 | _6; _4 = (int) _1; into _7 = a_2(D) & -33; _8 = _7 == 69; _1 = _8; _4 = (int) _1; > expanded inline at RTL expansion time? Some high level optimizations will be missed and TARGET_GIMPLE_FOLD_BUILTIN improves builtins codegen. > > Since we are only optimizing > > > > __builtin_memchr ("eE", a, 2) != 0; > > > > I don't see any aliasing store issues here. > > Ah, I failed to see the STRING_CST restriction. Note that when optimizing for > size this doesn't look very good. True. > I would expect a target might produce some vector code for > memchr ("aAbBcCdDeE...", c, 9) != 0 by splatting 'c', doing > a v16qimode compare, masking off excess elements beyond length > and then comparing against zero or for == 0 against all-ones. > > The repetitive pattern result also suggests an implementation elsewhere, > if you think strlenopt is too late there would be forwprop as well. forwprop seems a good place. Thanks. > Richard. > > > > > > Richard. > > > > > > > gcc/testsuite/ > > > > > > > > PR tree-optimization/103798 > > > > * c-c++-common/pr103798-1.c: New test. > > > > * c-c++-common/pr103798-2.c: Likewise. > > > > * c-c++-common/pr103798-3.c: Likewise. > > > > * c-c++-common/pr103798-4.c: Likewise. > > > > * c-c++-common/pr103798-5.c: Likewise. > > > > * c-c++-common/pr103798-6.c: Likewise. > > > > * c-c++-common/pr103798-7.c: Likewise. > > > > * c-c++-common/pr103798-8.c: Likewise. > > > > --- > > > > gcc/match.pd | 136 ++++++++++++++++++++++++ > > > > gcc/testsuite/c-c++-common/pr103798-1.c | 28 +++++ > > > > gcc/testsuite/c-c++-common/pr103798-2.c | 30 ++++++ > > > > gcc/testsuite/c-c++-common/pr103798-3.c | 28 +++++ > > > > gcc/testsuite/c-c++-common/pr103798-4.c | 28 +++++ > > > > gcc/testsuite/c-c++-common/pr103798-5.c | 26 +++++ > > > > gcc/testsuite/c-c++-common/pr103798-6.c | 27 +++++ > > > > gcc/testsuite/c-c++-common/pr103798-7.c | 27 +++++ > > > > gcc/testsuite/c-c++-common/pr103798-8.c | 27 +++++ > > > > 9 files changed, 357 insertions(+) > > > > create mode 100644 gcc/testsuite/c-c++-common/pr103798-1.c > > > > create mode 100644 gcc/testsuite/c-c++-common/pr103798-2.c > > > > create mode 100644 gcc/testsuite/c-c++-common/pr103798-3.c > > > > create mode 100644 gcc/testsuite/c-c++-common/pr103798-4.c > > > > create mode 100644 gcc/testsuite/c-c++-common/pr103798-5.c > > > > create mode 100644 gcc/testsuite/c-c++-common/pr103798-6.c > > > > create mode 100644 gcc/testsuite/c-c++-common/pr103798-7.c > > > > create mode 100644 gcc/testsuite/c-c++-common/pr103798-8.c > > > > > > > > diff --git a/gcc/match.pd b/gcc/match.pd > > > > index a63b649841b..aa4766749af 100644 > > > > --- a/gcc/match.pd > > > > +++ b/gcc/match.pd > > > > @@ -7976,3 +7976,139 @@ and, > > > > (match (bitwise_induction_p @0 @2 @3) > > > > (bit_not > > > > (nop_convert1? (bit_xor@0 (convert2? (lshift integer_onep@1 @2)) > > > > @3)))) > > > > + > > > > +#if GIMPLE > > > > +/* __builtin_memchr (const_str, a, N) != 0 -> > > > > + a == const_str[0] .. || a == const_str[N-1] > > > > + __builtin_memchr (const_str, a, N) == 0 -> > > > > + a != const_str[0] .. && a != const_str[N-1] > > > > + where N is less than the string size. */ > > > > +(for cmp (eq ne) > > > > + icmp (ne eq) > > > > + bit_op (bit_and bit_ior) > > > > + (simplify (cmp:c @0 (BUILT_IN_MEMCHR ADDR_EXPR@1 @2 INTEGER_CST@3)) > > > > + (if (UNITS_PER_WORD <= 8 > > > > + && CHAR_TYPE_SIZE == 8 > > > > + && BITS_PER_UNIT == 8 > > > > + && CHAR_BIT == 8 > > > > + && integer_zerop (@0) > > > > + && !integer_zerop (@3) > > > > + && TREE_CODE (TREE_OPERAND (@1, 0)) == STRING_CST > > > > + && TREE_STRING_LENGTH (TREE_OPERAND (@1, 0)) >= 2 > > > > + && wi::leu_p (wi::to_wide (@3), UNITS_PER_WORD) > > > > + && wi::ltu_p (wi::to_wide (@3), > > > > + TREE_STRING_LENGTH (TREE_OPERAND (@1, 0)))) > > > > + (with > > > > + { > > > > + const char *p = TREE_STRING_POINTER (TREE_OPERAND (@1, 0)); > > > > + unsigned HOST_WIDE_INT size = TREE_INT_CST_LOW (@3); > > > > + } > > > > + (switch > > > > + (if (size == 1) > > > > + (icmp (convert:char_type_node @2) > > > > + { build_int_cst (char_type_node, p[0]); })) > > > > + (if (size == 2) > > > > + (bit_op > > > > + (icmp (convert:char_type_node @2) > > > > + { build_int_cst (char_type_node, p[0]); }) > > > > + (icmp (convert:char_type_node @2) > > > > + { build_int_cst (char_type_node, p[1]); }))) > > > > + (if (size == 3) > > > > + (bit_op > > > > + (icmp (convert:char_type_node @2) > > > > + { build_int_cst (char_type_node, p[0]); }) > > > > + (bit_op > > > > + (icmp (convert:char_type_node @2) > > > > + { build_int_cst (char_type_node, p[1]); }) > > > > + (icmp (convert:char_type_node @2) > > > > + { build_int_cst (char_type_node, p[2]); })))) > > > > + (if (size == 4) > > > > + (bit_op > > > > + (icmp (convert:char_type_node @2) > > > > + { build_int_cst (char_type_node, p[0]); }) > > > > + (bit_op > > > > + (icmp (convert:char_type_node @2) > > > > + { build_int_cst (char_type_node, p[1]); }) > > > > + (bit_op > > > > + (icmp (convert:char_type_node @2) > > > > + { build_int_cst (char_type_node, p[2]); }) > > > > + (icmp (convert:char_type_node @2) > > > > + { build_int_cst (char_type_node, p[3]); }))))) > > > > + (if (size == 5) > > > > + (bit_op > > > > + (icmp (convert:char_type_node @2) > > > > + { build_int_cst (char_type_node, p[0]); }) > > > > + (bit_op > > > > + (icmp (convert:char_type_node @2) > > > > + { build_int_cst (char_type_node, p[1]); }) > > > > + (bit_op > > > > + (icmp (convert:char_type_node @2) > > > > + { build_int_cst (char_type_node, p[2]); }) > > > > + (bit_op > > > > + (icmp (convert:char_type_node @2) > > > > + { build_int_cst (char_type_node, p[3]); }) > > > > + (icmp (convert:char_type_node @2) > > > > + { build_int_cst (char_type_node, p[4]); })))))) > > > > + (if (size == 6) > > > > + (bit_op > > > > + (icmp (convert:char_type_node @2) > > > > + { build_int_cst (char_type_node, p[0]); }) > > > > + (bit_op > > > > + (icmp (convert:char_type_node @2) > > > > + { build_int_cst (char_type_node, p[1]); }) > > > > + (bit_op > > > > + (icmp (convert:char_type_node @2) > > > > + { build_int_cst (char_type_node, p[2]); }) > > > > + (bit_op > > > > + (icmp (convert:char_type_node @2) > > > > + { build_int_cst (char_type_node, p[3]); }) > > > > + (bit_op > > > > + (icmp (convert:char_type_node @2) > > > > + { build_int_cst (char_type_node, p[4]); }) > > > > + (icmp (convert:char_type_node @2) > > > > + { build_int_cst (char_type_node, p[5]); }))))))) > > > > + (if (size == 7) > > > > + (bit_op > > > > + (icmp (convert:char_type_node @2) > > > > + { build_int_cst (char_type_node, p[0]); }) > > > > + (bit_op > > > > + (icmp (convert:char_type_node @2) > > > > + { build_int_cst (char_type_node, p[1]); }) > > > > + (bit_op > > > > + (icmp (convert:char_type_node @2) > > > > + { build_int_cst (char_type_node, p[2]); }) > > > > + (bit_op > > > > + (icmp (convert:char_type_node @2) > > > > + { build_int_cst (char_type_node, p[3]); }) > > > > + (bit_op > > > > + (icmp (convert:char_type_node @2) > > > > + { build_int_cst (char_type_node, p[4]); }) > > > > + (bit_op > > > > + (icmp (convert:char_type_node @2) > > > > + { build_int_cst (char_type_node, p[5]); }) > > > > + (icmp (convert:char_type_node @2) > > > > + { build_int_cst (char_type_node, p[6]); })))))))) > > > > + (bit_op > > > > + (icmp (convert:char_type_node @2) > > > > + { build_int_cst (char_type_node, p[0]); }) > > > > + (bit_op > > > > + (icmp (convert:char_type_node @2) > > > > + { build_int_cst (char_type_node, p[1]); }) > > > > + (bit_op > > > > + (icmp (convert:char_type_node @2) > > > > + { build_int_cst (char_type_node, p[2]); }) > > > > + (bit_op > > > > + (icmp (convert:char_type_node @2) > > > > + { build_int_cst (char_type_node, p[3]); }) > > > > + (bit_op > > > > + (icmp (convert:char_type_node @2) > > > > + { build_int_cst (char_type_node, p[4]); }) > > > > + (bit_op > > > > + (icmp (convert:char_type_node @2) > > > > + { build_int_cst (char_type_node, p[5]); }) > > > > + (bit_op > > > > + (icmp (convert:char_type_node @2) > > > > + { build_int_cst (char_type_node, p[6]); }) > > > > + (icmp (convert:char_type_node @2) > > > > + { build_int_cst (char_type_node, p[7]); }))))))))))))) > > > > +#endif > > > > diff --git a/gcc/testsuite/c-c++-common/pr103798-1.c > > > > b/gcc/testsuite/c-c++-common/pr103798-1.c > > > > new file mode 100644 > > > > index 00000000000..cd3edf569fc > > > > --- /dev/null > > > > +++ b/gcc/testsuite/c-c++-common/pr103798-1.c > > > > @@ -0,0 +1,28 @@ > > > > +/* { dg-do run } */ > > > > +/* { dg-options "-O2 -fdump-tree-optimized -save-temps" } */ > > > > + > > > > +__attribute__ ((weak)) > > > > +int > > > > +f (char a) > > > > +{ > > > > + return __builtin_memchr ("a", a, 1) == 0; > > > > +} > > > > + > > > > +__attribute__ ((weak)) > > > > +int > > > > +g (char a) > > > > +{ > > > > + return a != 'a'; > > > > +} > > > > + > > > > +int > > > > +main () > > > > +{ > > > > + for (int i = 0; i < 255; i++) > > > > + if (f (i) != g (i)) > > > > + __builtin_abort (); > > > > + > > > > + return 0; > > > > +} > > > > + > > > > +/* { dg-final { scan-assembler-not "memchr" } } */ > > > > diff --git a/gcc/testsuite/c-c++-common/pr103798-2.c > > > > b/gcc/testsuite/c-c++-common/pr103798-2.c > > > > new file mode 100644 > > > > index 00000000000..e7e99c3679e > > > > --- /dev/null > > > > +++ b/gcc/testsuite/c-c++-common/pr103798-2.c > > > > @@ -0,0 +1,30 @@ > > > > +/* { dg-do run } */ > > > > +/* { dg-options "-O2 -fdump-tree-optimized -save-temps" } */ > > > > + > > > > +#include <string.h> > > > > + > > > > +__attribute__ ((weak)) > > > > +int > > > > +f (int a) > > > > +{ > > > > + return memchr ("aE", a, 2) != NULL; > > > > +} > > > > + > > > > +__attribute__ ((weak)) > > > > +int > > > > +g (char a) > > > > +{ > > > > + return a == 'a' || a == 'E'; > > > > +} > > > > + > > > > +int > > > > +main () > > > > +{ > > > > + for (int i = 0; i < 255; i++) > > > > + if (f (i + 256) != g (i + 256)) > > > > + __builtin_abort (); > > > > + > > > > + return 0; > > > > +} > > > > + > > > > +/* { dg-final { scan-assembler-not "memchr" } } */ > > > > diff --git a/gcc/testsuite/c-c++-common/pr103798-3.c > > > > b/gcc/testsuite/c-c++-common/pr103798-3.c > > > > new file mode 100644 > > > > index 00000000000..ddcedc7e238 > > > > --- /dev/null > > > > +++ b/gcc/testsuite/c-c++-common/pr103798-3.c > > > > @@ -0,0 +1,28 @@ > > > > +/* { dg-do run } */ > > > > +/* { dg-options "-O2 -fdump-tree-optimized -save-temps" } */ > > > > + > > > > +__attribute__ ((weak)) > > > > +int > > > > +f (char a) > > > > +{ > > > > + return __builtin_memchr ("aEgZ", a, 3) == 0; > > > > +} > > > > + > > > > +__attribute__ ((weak)) > > > > +int > > > > +g (char a) > > > > +{ > > > > + return a != 'a' && a != 'E' && a != 'g'; > > > > +} > > > > + > > > > +int > > > > +main () > > > > +{ > > > > + for (int i = 0; i < 255; i++) > > > > + if (f (i) != g (i)) > > > > + __builtin_abort (); > > > > + > > > > + return 0; > > > > +} > > > > + > > > > +/* { dg-final { scan-assembler-not "memchr" } } */ > > > > diff --git a/gcc/testsuite/c-c++-common/pr103798-4.c > > > > b/gcc/testsuite/c-c++-common/pr103798-4.c > > > > new file mode 100644 > > > > index 00000000000..00e8302a833 > > > > --- /dev/null > > > > +++ b/gcc/testsuite/c-c++-common/pr103798-4.c > > > > @@ -0,0 +1,28 @@ > > > > +/* { dg-do run } */ > > > > +/* { dg-options "-O2 -fdump-tree-optimized -save-temps" } */ > > > > + > > > > +__attribute__ ((weak)) > > > > +int > > > > +f (char a) > > > > +{ > > > > + return __builtin_memchr ("aEgi", a, 4) != 0; > > > > +} > > > > + > > > > +__attribute__ ((weak)) > > > > +int > > > > +g (char a) > > > > +{ > > > > + return a == 'a' || a == 'E' || a == 'g' || a == 'i'; > > > > +} > > > > + > > > > +int > > > > +main () > > > > +{ > > > > + for (int i = 0; i < 255; i++) > > > > + if (f (i) != g (i)) > > > > + __builtin_abort (); > > > > + > > > > + return 0; > > > > +} > > > > + > > > > +/* { dg-final { scan-assembler-not "memchr" } } */ > > > > diff --git a/gcc/testsuite/c-c++-common/pr103798-5.c > > > > b/gcc/testsuite/c-c++-common/pr103798-5.c > > > > new file mode 100644 > > > > index 00000000000..0d6487a13df > > > > --- /dev/null > > > > +++ b/gcc/testsuite/c-c++-common/pr103798-5.c > > > > @@ -0,0 +1,26 @@ > > > > +/* { dg-do run { target int128 } } */ > > > > +/* { dg-options "-O2 -fdump-tree-optimized -save-temps" } */ > > > > + > > > > +__attribute__ ((weak)) > > > > +int f(char a) > > > > +{ > > > > + return __builtin_memchr ("aEgiH", a, 5) == 0; > > > > +} > > > > + > > > > +__attribute__ ((weak)) > > > > +int g(char a) > > > > +{ > > > > + return a != 'a' && a != 'E' && a != 'g' && a != 'i' && a != 'H'; > > > > +} > > > > + > > > > +int > > > > +main () > > > > +{ > > > > + for (int i = 0; i < 255; i++) > > > > + if (f (i) != g (i)) > > > > + __builtin_abort (); > > > > + > > > > + return 0; > > > > +} > > > > + > > > > +/* { dg-final { scan-assembler-not "memchr" } } */ > > > > diff --git a/gcc/testsuite/c-c++-common/pr103798-6.c > > > > b/gcc/testsuite/c-c++-common/pr103798-6.c > > > > new file mode 100644 > > > > index 00000000000..5ccb5ee66e0 > > > > --- /dev/null > > > > +++ b/gcc/testsuite/c-c++-common/pr103798-6.c > > > > @@ -0,0 +1,27 @@ > > > > +/* { dg-do run { target int128 } } */ > > > > +/* { dg-options "-O2 -fdump-tree-optimized -save-temps" } */ > > > > + > > > > +__attribute__ ((weak)) > > > > +int f(char a) > > > > +{ > > > > + return __builtin_memchr ("aEgiHx", a, 6) != 0; > > > > +} > > > > + > > > > +__attribute__ ((weak)) > > > > +int g(char a) > > > > +{ > > > > + return (a == 'a' || a == 'E' || a == 'g' || a == 'i' || a == 'H' > > > > + || a == 'x'); > > > > +} > > > > + > > > > +int > > > > +main () > > > > +{ > > > > + for (int i = 0; i < 255; i++) > > > > + if (f (i) != g (i)) > > > > + __builtin_abort (); > > > > + > > > > + return 0; > > > > +} > > > > + > > > > +/* { dg-final { scan-assembler-not "memchr" } } */ > > > > diff --git a/gcc/testsuite/c-c++-common/pr103798-7.c > > > > b/gcc/testsuite/c-c++-common/pr103798-7.c > > > > new file mode 100644 > > > > index 00000000000..40fd38257d1 > > > > --- /dev/null > > > > +++ b/gcc/testsuite/c-c++-common/pr103798-7.c > > > > @@ -0,0 +1,27 @@ > > > > +/* { dg-do run { target int128 } } */ > > > > +/* { dg-options "-O2 -fdump-tree-optimized -save-temps" } */ > > > > + > > > > +__attribute__ ((weak)) > > > > +int f(char a) > > > > +{ > > > > + return __builtin_memchr ("aEgiHjZ", a, 7) == 0; > > > > +} > > > > + > > > > +__attribute__ ((weak)) > > > > +int g(char a) > > > > +{ > > > > + return (a != 'a' && a != 'E' && a != 'g' && a != 'i' && a != 'H' > > > > + && a != 'j' && a != 'Z'); > > > > +} > > > > + > > > > +int > > > > +main () > > > > +{ > > > > + for (int i = 0; i < 255; i++) > > > > + if (f (i) != g (i)) > > > > + __builtin_abort (); > > > > + > > > > + return 0; > > > > +} > > > > + > > > > +/* { dg-final { scan-assembler-not "memchr" } } */ > > > > diff --git a/gcc/testsuite/c-c++-common/pr103798-8.c > > > > b/gcc/testsuite/c-c++-common/pr103798-8.c > > > > new file mode 100644 > > > > index 00000000000..0841b18cea4 > > > > --- /dev/null > > > > +++ b/gcc/testsuite/c-c++-common/pr103798-8.c > > > > @@ -0,0 +1,27 @@ > > > > +/* { dg-do run { target int128 } } */ > > > > +/* { dg-options "-O2 -fdump-tree-optimized -save-temps" } */ > > > > + > > > > +__attribute__ ((weak)) > > > > +int f(int a) > > > > +{ > > > > + return __builtin_memchr ("aEgiHx19ABC", a, 8) != 0; > > > > +} > > > > + > > > > +__attribute__ ((weak)) > > > > +int g(char a) > > > > +{ > > > > + return (a == 'a' || a == 'E' || a == 'g' || a == 'i' || a == 'H' > > > > + || a == 'x' || a == '1' || a == '9'); > > > > +} > > > > + > > > > +int > > > > +main () > > > > +{ > > > > + for (int i = 0; i < 255; i++) > > > > + if (f (i + 256) != g (i + 256)) > > > > + __builtin_abort (); > > > > + > > > > + return 0; > > > > +} > > > > + > > > > +/* { dg-final { scan-assembler-not "memchr" } } */ > > > > -- > > > > 2.36.1 > > > > > > > > > > > > -- > > H.J. -- H.J.