Re: [PATCH] [Middle-end] Enhance final_value_replacement_loop to handle bitwise induction.
On Wed, May 18, 2022 at 4:45 AM Hongtao Liu wrote: > > On Fri, May 13, 2022 at 7:16 PM Richard Biener > wrote: > > > > On Fri, May 13, 2022 at 5:37 AM Hongtao Liu wrote: > > > > > > On Wed, May 11, 2022 at 4:45 PM Richard Biener via Gcc-patches > > > wrote: > > > > > > > > On Mon, May 9, 2022 at 7:19 AM liuhongt wrote: > > > > > > > > > > This patch will enable below optimization: > > > > > > > > > > { > > > > > - int bit; > > > > > - long long unsigned int _1; > > > > > - long long unsigned int _2; > > > > > - > > > > > [local count: 46707768]: > > > > > - > > > > > - [local count: 1027034057]: > > > > > - # tmp_11 = PHI > > > > > - # bit_13 = PHI > > > > > - _1 = 1 << bit_13; > > > > > - _2 = ~_1; > > > > > - tmp_8 = _2 & tmp_11; > > > > > - bit_9 = bit_13 + -3; > > > > > - if (bit_9 != -3(OVF)) > > > > > -goto ; [95.65%] > > > > > - else > > > > > -goto ; [4.35%] > > > > > - > > > > > - [local count: 46707768]: > > > > > - return tmp_8; > > > > > + tmp_12 = tmp_6(D) & 7905747460161236406; > > > > > + return tmp_12; > > > > > > > > > > } > > > > > > > > > > > > > > > Boostrapped and regtested on x86_64-pc-linux-gnu{-m32,} > > > > > Ok for trunk? > > > > > > > > > > gcc/ChangeLog: > > > > > > > > > > PR middle-end/103462 > > > > > * match.pd (bitwise_induction_p): New match. > > > > > * tree-scalar-evolution.c (gimple_bitwise_induction_p): > > > > > Declare. > > > > > (analyze_and_compute_bitwise_induction_effect): New function. > > > > > (enum bit_op_kind): New enum. > > > > > (final_value_replacement_loop): Enhanced to handle bitwise > > > > > induction. > > > > > > > > > > gcc/testsuite/ChangeLog: > > > > > > > > > > * gcc.target/i386/pr103462-1.c: New test. > > > > > * gcc.target/i386/pr103462-2.c: New test. > > > > > * gcc.target/i386/pr103462-3.c: New test. > > > > > * gcc.target/i386/pr103462-4.c: New test. > > > > > * gcc.target/i386/pr103462-5.c: New test. > > > > > * gcc.target/i386/pr103462-6.c: New test. > > > > > --- > > > > > gcc/match.pd | 7 + > > > > > gcc/testsuite/gcc.target/i386/pr103462-1.c | 111 + > > > > > gcc/testsuite/gcc.target/i386/pr103462-2.c | 45 ++ > > > > > gcc/testsuite/gcc.target/i386/pr103462-3.c | 111 + > > > > > gcc/testsuite/gcc.target/i386/pr103462-4.c | 46 ++ > > > > > gcc/testsuite/gcc.target/i386/pr103462-5.c | 111 + > > > > > gcc/testsuite/gcc.target/i386/pr103462-6.c | 46 ++ > > > > > gcc/tree-scalar-evolution.cc | 178 > > > > > - > > > > > 8 files changed, 654 insertions(+), 1 deletion(-) > > > > > create mode 100644 gcc/testsuite/gcc.target/i386/pr103462-1.c > > > > > create mode 100644 gcc/testsuite/gcc.target/i386/pr103462-2.c > > > > > create mode 100644 gcc/testsuite/gcc.target/i386/pr103462-3.c > > > > > create mode 100644 gcc/testsuite/gcc.target/i386/pr103462-4.c > > > > > create mode 100644 gcc/testsuite/gcc.target/i386/pr103462-5.c > > > > > create mode 100644 gcc/testsuite/gcc.target/i386/pr103462-6.c > > > > > > > > > > diff --git a/gcc/match.pd b/gcc/match.pd > > > > > index 6d691d302b3..24ff5f9e6a8 100644 > > > > > --- a/gcc/match.pd > > > > > +++ b/gcc/match.pd > > > > > @@ -7746,3 +7746,10 @@ and, > > > > >== TYPE_UNSIGNED (TREE_TYPE (@3 > > > > > && single_use (@4) > > > > > && single_use (@5 > > > > > + > > > > > +(for bit_op (bit_and bit_ior bit_xor) > > > > > + (match (bitwise_induction_p @0 @2 @3) > > > > > + (bit_op:c (nop_convert1? (bit_not2?@0 (convert3? (lshift > > > > > integer_onep@1 @2 @3))) > > > > > + > > > > > +(match (bitwise_induction_p @0 @2 @3) > > > > > + (bit_not (nop_convert1? (bit_xor@0 (convert2? (lshift > > > > > integer_onep@1 @2)) @3 > > > > > diff --git a/gcc/testsuite/gcc.target/i386/pr103462-1.c > > > > > b/gcc/testsuite/gcc.target/i386/pr103462-1.c > > > > > new file mode 100644 > > > > > index 000..1dc4c2acad6 > > > > > --- /dev/null > > > > > +++ b/gcc/testsuite/gcc.target/i386/pr103462-1.c > > > > > @@ -0,0 +1,111 @@ > > > > > +/* { dg-do compile } */ > > > > > +/* { dg-options "-O1 -fdump-tree-sccp-details" } */ > > > > > +/* { dg-final { scan-tree-dump-times {final value replacement} 12 > > > > > "sccp" } } */ > > > > > + > > > > > +unsigned long long > > > > > +__attribute__((noipa)) > > > > > +foo (unsigned long long tmp) > > > > > +{ > > > > > + for (int bit = 0; bit < 64; bit += 3) > > > > > +tmp &= ~(1ULL << bit); > > > > > + return tmp; > > > > > +} > > > > > + > > > > > +unsigned long long > > > > > +__attribute__((noipa)) > > > > > +foo1 (unsigned long long tmp) > > > > > +{ > > > > > + for (int bit = 63; bit >= 0; bit -= 3) > > > > > +tmp &= ~(1ULL << bit); > > > > > + return tmp; > > > > > +} > > > > > + > > > > > +unsigned long long > > > > >
Re: [PATCH] [Middle-end] Enhance final_value_replacement_loop to handle bitwise induction.
On Fri, May 13, 2022 at 7:16 PM Richard Biener wrote: > > On Fri, May 13, 2022 at 5:37 AM Hongtao Liu wrote: > > > > On Wed, May 11, 2022 at 4:45 PM Richard Biener via Gcc-patches > > wrote: > > > > > > On Mon, May 9, 2022 at 7:19 AM liuhongt wrote: > > > > > > > > This patch will enable below optimization: > > > > > > > > { > > > > - int bit; > > > > - long long unsigned int _1; > > > > - long long unsigned int _2; > > > > - > > > > [local count: 46707768]: > > > > - > > > > - [local count: 1027034057]: > > > > - # tmp_11 = PHI > > > > - # bit_13 = PHI > > > > - _1 = 1 << bit_13; > > > > - _2 = ~_1; > > > > - tmp_8 = _2 & tmp_11; > > > > - bit_9 = bit_13 + -3; > > > > - if (bit_9 != -3(OVF)) > > > > -goto ; [95.65%] > > > > - else > > > > -goto ; [4.35%] > > > > - > > > > - [local count: 46707768]: > > > > - return tmp_8; > > > > + tmp_12 = tmp_6(D) & 7905747460161236406; > > > > + return tmp_12; > > > > > > > > } > > > > > > > > > > > > Boostrapped and regtested on x86_64-pc-linux-gnu{-m32,} > > > > Ok for trunk? > > > > > > > > gcc/ChangeLog: > > > > > > > > PR middle-end/103462 > > > > * match.pd (bitwise_induction_p): New match. > > > > * tree-scalar-evolution.c (gimple_bitwise_induction_p): > > > > Declare. > > > > (analyze_and_compute_bitwise_induction_effect): New function. > > > > (enum bit_op_kind): New enum. > > > > (final_value_replacement_loop): Enhanced to handle bitwise > > > > induction. > > > > > > > > gcc/testsuite/ChangeLog: > > > > > > > > * gcc.target/i386/pr103462-1.c: New test. > > > > * gcc.target/i386/pr103462-2.c: New test. > > > > * gcc.target/i386/pr103462-3.c: New test. > > > > * gcc.target/i386/pr103462-4.c: New test. > > > > * gcc.target/i386/pr103462-5.c: New test. > > > > * gcc.target/i386/pr103462-6.c: New test. > > > > --- > > > > gcc/match.pd | 7 + > > > > gcc/testsuite/gcc.target/i386/pr103462-1.c | 111 + > > > > gcc/testsuite/gcc.target/i386/pr103462-2.c | 45 ++ > > > > gcc/testsuite/gcc.target/i386/pr103462-3.c | 111 + > > > > gcc/testsuite/gcc.target/i386/pr103462-4.c | 46 ++ > > > > gcc/testsuite/gcc.target/i386/pr103462-5.c | 111 + > > > > gcc/testsuite/gcc.target/i386/pr103462-6.c | 46 ++ > > > > gcc/tree-scalar-evolution.cc | 178 - > > > > 8 files changed, 654 insertions(+), 1 deletion(-) > > > > create mode 100644 gcc/testsuite/gcc.target/i386/pr103462-1.c > > > > create mode 100644 gcc/testsuite/gcc.target/i386/pr103462-2.c > > > > create mode 100644 gcc/testsuite/gcc.target/i386/pr103462-3.c > > > > create mode 100644 gcc/testsuite/gcc.target/i386/pr103462-4.c > > > > create mode 100644 gcc/testsuite/gcc.target/i386/pr103462-5.c > > > > create mode 100644 gcc/testsuite/gcc.target/i386/pr103462-6.c > > > > > > > > diff --git a/gcc/match.pd b/gcc/match.pd > > > > index 6d691d302b3..24ff5f9e6a8 100644 > > > > --- a/gcc/match.pd > > > > +++ b/gcc/match.pd > > > > @@ -7746,3 +7746,10 @@ and, > > > >== TYPE_UNSIGNED (TREE_TYPE (@3 > > > > && single_use (@4) > > > > && single_use (@5 > > > > + > > > > +(for bit_op (bit_and bit_ior bit_xor) > > > > + (match (bitwise_induction_p @0 @2 @3) > > > > + (bit_op:c (nop_convert1? (bit_not2?@0 (convert3? (lshift > > > > integer_onep@1 @2 @3))) > > > > + > > > > +(match (bitwise_induction_p @0 @2 @3) > > > > + (bit_not (nop_convert1? (bit_xor@0 (convert2? (lshift integer_onep@1 > > > > @2)) @3 > > > > diff --git a/gcc/testsuite/gcc.target/i386/pr103462-1.c > > > > b/gcc/testsuite/gcc.target/i386/pr103462-1.c > > > > new file mode 100644 > > > > index 000..1dc4c2acad6 > > > > --- /dev/null > > > > +++ b/gcc/testsuite/gcc.target/i386/pr103462-1.c > > > > @@ -0,0 +1,111 @@ > > > > +/* { dg-do compile } */ > > > > +/* { dg-options "-O1 -fdump-tree-sccp-details" } */ > > > > +/* { dg-final { scan-tree-dump-times {final value replacement} 12 > > > > "sccp" } } */ > > > > + > > > > +unsigned long long > > > > +__attribute__((noipa)) > > > > +foo (unsigned long long tmp) > > > > +{ > > > > + for (int bit = 0; bit < 64; bit += 3) > > > > +tmp &= ~(1ULL << bit); > > > > + return tmp; > > > > +} > > > > + > > > > +unsigned long long > > > > +__attribute__((noipa)) > > > > +foo1 (unsigned long long tmp) > > > > +{ > > > > + for (int bit = 63; bit >= 0; bit -= 3) > > > > +tmp &= ~(1ULL << bit); > > > > + return tmp; > > > > +} > > > > + > > > > +unsigned long long > > > > +__attribute__((noipa)) > > > > +foo2 (unsigned long long tmp) > > > > +{ > > > > + for (int bit = 0; bit < 64; bit += 3) > > > > +tmp &= (1ULL << bit); > > > > + return tmp; > > > > +} > > > > + > > > > +unsigned long long > > > > +__attribute__((noipa)) > > > > +foo3 (unsigned long long tmp) > > > > +{ > > > > +
Re: [PATCH] [Middle-end] Enhance final_value_replacement_loop to handle bitwise induction.
On Fri, May 13, 2022 at 5:37 AM Hongtao Liu wrote: > > On Wed, May 11, 2022 at 4:45 PM Richard Biener via Gcc-patches > wrote: > > > > On Mon, May 9, 2022 at 7:19 AM liuhongt wrote: > > > > > > This patch will enable below optimization: > > > > > > { > > > - int bit; > > > - long long unsigned int _1; > > > - long long unsigned int _2; > > > - > > > [local count: 46707768]: > > > - > > > - [local count: 1027034057]: > > > - # tmp_11 = PHI > > > - # bit_13 = PHI > > > - _1 = 1 << bit_13; > > > - _2 = ~_1; > > > - tmp_8 = _2 & tmp_11; > > > - bit_9 = bit_13 + -3; > > > - if (bit_9 != -3(OVF)) > > > -goto ; [95.65%] > > > - else > > > -goto ; [4.35%] > > > - > > > - [local count: 46707768]: > > > - return tmp_8; > > > + tmp_12 = tmp_6(D) & 7905747460161236406; > > > + return tmp_12; > > > > > > } > > > > > > > > > Boostrapped and regtested on x86_64-pc-linux-gnu{-m32,} > > > Ok for trunk? > > > > > > gcc/ChangeLog: > > > > > > PR middle-end/103462 > > > * match.pd (bitwise_induction_p): New match. > > > * tree-scalar-evolution.c (gimple_bitwise_induction_p): > > > Declare. > > > (analyze_and_compute_bitwise_induction_effect): New function. > > > (enum bit_op_kind): New enum. > > > (final_value_replacement_loop): Enhanced to handle bitwise > > > induction. > > > > > > gcc/testsuite/ChangeLog: > > > > > > * gcc.target/i386/pr103462-1.c: New test. > > > * gcc.target/i386/pr103462-2.c: New test. > > > * gcc.target/i386/pr103462-3.c: New test. > > > * gcc.target/i386/pr103462-4.c: New test. > > > * gcc.target/i386/pr103462-5.c: New test. > > > * gcc.target/i386/pr103462-6.c: New test. > > > --- > > > gcc/match.pd | 7 + > > > gcc/testsuite/gcc.target/i386/pr103462-1.c | 111 + > > > gcc/testsuite/gcc.target/i386/pr103462-2.c | 45 ++ > > > gcc/testsuite/gcc.target/i386/pr103462-3.c | 111 + > > > gcc/testsuite/gcc.target/i386/pr103462-4.c | 46 ++ > > > gcc/testsuite/gcc.target/i386/pr103462-5.c | 111 + > > > gcc/testsuite/gcc.target/i386/pr103462-6.c | 46 ++ > > > gcc/tree-scalar-evolution.cc | 178 - > > > 8 files changed, 654 insertions(+), 1 deletion(-) > > > create mode 100644 gcc/testsuite/gcc.target/i386/pr103462-1.c > > > create mode 100644 gcc/testsuite/gcc.target/i386/pr103462-2.c > > > create mode 100644 gcc/testsuite/gcc.target/i386/pr103462-3.c > > > create mode 100644 gcc/testsuite/gcc.target/i386/pr103462-4.c > > > create mode 100644 gcc/testsuite/gcc.target/i386/pr103462-5.c > > > create mode 100644 gcc/testsuite/gcc.target/i386/pr103462-6.c > > > > > > diff --git a/gcc/match.pd b/gcc/match.pd > > > index 6d691d302b3..24ff5f9e6a8 100644 > > > --- a/gcc/match.pd > > > +++ b/gcc/match.pd > > > @@ -7746,3 +7746,10 @@ and, > > >== TYPE_UNSIGNED (TREE_TYPE (@3 > > > && single_use (@4) > > > && single_use (@5 > > > + > > > +(for bit_op (bit_and bit_ior bit_xor) > > > + (match (bitwise_induction_p @0 @2 @3) > > > + (bit_op:c (nop_convert1? (bit_not2?@0 (convert3? (lshift > > > integer_onep@1 @2 @3))) > > > + > > > +(match (bitwise_induction_p @0 @2 @3) > > > + (bit_not (nop_convert1? (bit_xor@0 (convert2? (lshift integer_onep@1 > > > @2)) @3 > > > diff --git a/gcc/testsuite/gcc.target/i386/pr103462-1.c > > > b/gcc/testsuite/gcc.target/i386/pr103462-1.c > > > new file mode 100644 > > > index 000..1dc4c2acad6 > > > --- /dev/null > > > +++ b/gcc/testsuite/gcc.target/i386/pr103462-1.c > > > @@ -0,0 +1,111 @@ > > > +/* { dg-do compile } */ > > > +/* { dg-options "-O1 -fdump-tree-sccp-details" } */ > > > +/* { dg-final { scan-tree-dump-times {final value replacement} 12 "sccp" > > > } } */ > > > + > > > +unsigned long long > > > +__attribute__((noipa)) > > > +foo (unsigned long long tmp) > > > +{ > > > + for (int bit = 0; bit < 64; bit += 3) > > > +tmp &= ~(1ULL << bit); > > > + return tmp; > > > +} > > > + > > > +unsigned long long > > > +__attribute__((noipa)) > > > +foo1 (unsigned long long tmp) > > > +{ > > > + for (int bit = 63; bit >= 0; bit -= 3) > > > +tmp &= ~(1ULL << bit); > > > + return tmp; > > > +} > > > + > > > +unsigned long long > > > +__attribute__((noipa)) > > > +foo2 (unsigned long long tmp) > > > +{ > > > + for (int bit = 0; bit < 64; bit += 3) > > > +tmp &= (1ULL << bit); > > > + return tmp; > > > +} > > > + > > > +unsigned long long > > > +__attribute__((noipa)) > > > +foo3 (unsigned long long tmp) > > > +{ > > > + for (int bit = 63; bit >= 0; bit -= 3) > > > +tmp &= (1ULL << bit); > > > + return tmp; > > > +} > > > + > > > +unsigned long long > > > +__attribute__((noipa)) > > > +foo4 (unsigned long long tmp) > > > +{ > > > + for (int bit = 0; bit < 64; bit += 3) > > > +tmp |= ~(1ULL << bit); > > > + return tmp; > > > +} > > >
Re: [PATCH] [Middle-end] Enhance final_value_replacement_loop to handle bitwise induction.
On Wed, May 11, 2022 at 4:45 PM Richard Biener via Gcc-patches wrote: > > On Mon, May 9, 2022 at 7:19 AM liuhongt wrote: > > > > This patch will enable below optimization: > > > > { > > - int bit; > > - long long unsigned int _1; > > - long long unsigned int _2; > > - > > [local count: 46707768]: > > - > > - [local count: 1027034057]: > > - # tmp_11 = PHI > > - # bit_13 = PHI > > - _1 = 1 << bit_13; > > - _2 = ~_1; > > - tmp_8 = _2 & tmp_11; > > - bit_9 = bit_13 + -3; > > - if (bit_9 != -3(OVF)) > > -goto ; [95.65%] > > - else > > -goto ; [4.35%] > > - > > - [local count: 46707768]: > > - return tmp_8; > > + tmp_12 = tmp_6(D) & 7905747460161236406; > > + return tmp_12; > > > > } > > > > > > Boostrapped and regtested on x86_64-pc-linux-gnu{-m32,} > > Ok for trunk? > > > > gcc/ChangeLog: > > > > PR middle-end/103462 > > * match.pd (bitwise_induction_p): New match. > > * tree-scalar-evolution.c (gimple_bitwise_induction_p): > > Declare. > > (analyze_and_compute_bitwise_induction_effect): New function. > > (enum bit_op_kind): New enum. > > (final_value_replacement_loop): Enhanced to handle bitwise > > induction. > > > > gcc/testsuite/ChangeLog: > > > > * gcc.target/i386/pr103462-1.c: New test. > > * gcc.target/i386/pr103462-2.c: New test. > > * gcc.target/i386/pr103462-3.c: New test. > > * gcc.target/i386/pr103462-4.c: New test. > > * gcc.target/i386/pr103462-5.c: New test. > > * gcc.target/i386/pr103462-6.c: New test. > > --- > > gcc/match.pd | 7 + > > gcc/testsuite/gcc.target/i386/pr103462-1.c | 111 + > > gcc/testsuite/gcc.target/i386/pr103462-2.c | 45 ++ > > gcc/testsuite/gcc.target/i386/pr103462-3.c | 111 + > > gcc/testsuite/gcc.target/i386/pr103462-4.c | 46 ++ > > gcc/testsuite/gcc.target/i386/pr103462-5.c | 111 + > > gcc/testsuite/gcc.target/i386/pr103462-6.c | 46 ++ > > gcc/tree-scalar-evolution.cc | 178 - > > 8 files changed, 654 insertions(+), 1 deletion(-) > > create mode 100644 gcc/testsuite/gcc.target/i386/pr103462-1.c > > create mode 100644 gcc/testsuite/gcc.target/i386/pr103462-2.c > > create mode 100644 gcc/testsuite/gcc.target/i386/pr103462-3.c > > create mode 100644 gcc/testsuite/gcc.target/i386/pr103462-4.c > > create mode 100644 gcc/testsuite/gcc.target/i386/pr103462-5.c > > create mode 100644 gcc/testsuite/gcc.target/i386/pr103462-6.c > > > > diff --git a/gcc/match.pd b/gcc/match.pd > > index 6d691d302b3..24ff5f9e6a8 100644 > > --- a/gcc/match.pd > > +++ b/gcc/match.pd > > @@ -7746,3 +7746,10 @@ and, > >== TYPE_UNSIGNED (TREE_TYPE (@3 > > && single_use (@4) > > && single_use (@5 > > + > > +(for bit_op (bit_and bit_ior bit_xor) > > + (match (bitwise_induction_p @0 @2 @3) > > + (bit_op:c (nop_convert1? (bit_not2?@0 (convert3? (lshift integer_onep@1 > > @2 @3))) > > + > > +(match (bitwise_induction_p @0 @2 @3) > > + (bit_not (nop_convert1? (bit_xor@0 (convert2? (lshift integer_onep@1 > > @2)) @3 > > diff --git a/gcc/testsuite/gcc.target/i386/pr103462-1.c > > b/gcc/testsuite/gcc.target/i386/pr103462-1.c > > new file mode 100644 > > index 000..1dc4c2acad6 > > --- /dev/null > > +++ b/gcc/testsuite/gcc.target/i386/pr103462-1.c > > @@ -0,0 +1,111 @@ > > +/* { dg-do compile } */ > > +/* { dg-options "-O1 -fdump-tree-sccp-details" } */ > > +/* { dg-final { scan-tree-dump-times {final value replacement} 12 "sccp" } > > } */ > > + > > +unsigned long long > > +__attribute__((noipa)) > > +foo (unsigned long long tmp) > > +{ > > + for (int bit = 0; bit < 64; bit += 3) > > +tmp &= ~(1ULL << bit); > > + return tmp; > > +} > > + > > +unsigned long long > > +__attribute__((noipa)) > > +foo1 (unsigned long long tmp) > > +{ > > + for (int bit = 63; bit >= 0; bit -= 3) > > +tmp &= ~(1ULL << bit); > > + return tmp; > > +} > > + > > +unsigned long long > > +__attribute__((noipa)) > > +foo2 (unsigned long long tmp) > > +{ > > + for (int bit = 0; bit < 64; bit += 3) > > +tmp &= (1ULL << bit); > > + return tmp; > > +} > > + > > +unsigned long long > > +__attribute__((noipa)) > > +foo3 (unsigned long long tmp) > > +{ > > + for (int bit = 63; bit >= 0; bit -= 3) > > +tmp &= (1ULL << bit); > > + return tmp; > > +} > > + > > +unsigned long long > > +__attribute__((noipa)) > > +foo4 (unsigned long long tmp) > > +{ > > + for (int bit = 0; bit < 64; bit += 3) > > +tmp |= ~(1ULL << bit); > > + return tmp; > > +} > > + > > +unsigned long long > > +__attribute__((noipa)) > > +foo5 (unsigned long long tmp) > > +{ > > + for (int bit = 63; bit >= 0; bit -= 3) > > +tmp |= ~(1ULL << bit); > > + return tmp; > > +} > > + > > +unsigned long long > > +__attribute__((noipa)) > > +foo6 (unsigned long long tmp) > > +{ > > + for (int bit = 0; bit < 64; bit += 3) > >
Re: [PATCH] [Middle-end] Enhance final_value_replacement_loop to handle bitwise induction.
On Mon, May 9, 2022 at 7:19 AM liuhongt wrote: > > This patch will enable below optimization: > > { > - int bit; > - long long unsigned int _1; > - long long unsigned int _2; > - > [local count: 46707768]: > - > - [local count: 1027034057]: > - # tmp_11 = PHI > - # bit_13 = PHI > - _1 = 1 << bit_13; > - _2 = ~_1; > - tmp_8 = _2 & tmp_11; > - bit_9 = bit_13 + -3; > - if (bit_9 != -3(OVF)) > -goto ; [95.65%] > - else > -goto ; [4.35%] > - > - [local count: 46707768]: > - return tmp_8; > + tmp_12 = tmp_6(D) & 7905747460161236406; > + return tmp_12; > > } > > > Boostrapped and regtested on x86_64-pc-linux-gnu{-m32,} > Ok for trunk? > > gcc/ChangeLog: > > PR middle-end/103462 > * match.pd (bitwise_induction_p): New match. > * tree-scalar-evolution.c (gimple_bitwise_induction_p): > Declare. > (analyze_and_compute_bitwise_induction_effect): New function. > (enum bit_op_kind): New enum. > (final_value_replacement_loop): Enhanced to handle bitwise > induction. > > gcc/testsuite/ChangeLog: > > * gcc.target/i386/pr103462-1.c: New test. > * gcc.target/i386/pr103462-2.c: New test. > * gcc.target/i386/pr103462-3.c: New test. > * gcc.target/i386/pr103462-4.c: New test. > * gcc.target/i386/pr103462-5.c: New test. > * gcc.target/i386/pr103462-6.c: New test. > --- > gcc/match.pd | 7 + > gcc/testsuite/gcc.target/i386/pr103462-1.c | 111 + > gcc/testsuite/gcc.target/i386/pr103462-2.c | 45 ++ > gcc/testsuite/gcc.target/i386/pr103462-3.c | 111 + > gcc/testsuite/gcc.target/i386/pr103462-4.c | 46 ++ > gcc/testsuite/gcc.target/i386/pr103462-5.c | 111 + > gcc/testsuite/gcc.target/i386/pr103462-6.c | 46 ++ > gcc/tree-scalar-evolution.cc | 178 - > 8 files changed, 654 insertions(+), 1 deletion(-) > create mode 100644 gcc/testsuite/gcc.target/i386/pr103462-1.c > create mode 100644 gcc/testsuite/gcc.target/i386/pr103462-2.c > create mode 100644 gcc/testsuite/gcc.target/i386/pr103462-3.c > create mode 100644 gcc/testsuite/gcc.target/i386/pr103462-4.c > create mode 100644 gcc/testsuite/gcc.target/i386/pr103462-5.c > create mode 100644 gcc/testsuite/gcc.target/i386/pr103462-6.c > > diff --git a/gcc/match.pd b/gcc/match.pd > index 6d691d302b3..24ff5f9e6a8 100644 > --- a/gcc/match.pd > +++ b/gcc/match.pd > @@ -7746,3 +7746,10 @@ and, >== TYPE_UNSIGNED (TREE_TYPE (@3 > && single_use (@4) > && single_use (@5 > + > +(for bit_op (bit_and bit_ior bit_xor) > + (match (bitwise_induction_p @0 @2 @3) > + (bit_op:c (nop_convert1? (bit_not2?@0 (convert3? (lshift integer_onep@1 > @2 @3))) > + > +(match (bitwise_induction_p @0 @2 @3) > + (bit_not (nop_convert1? (bit_xor@0 (convert2? (lshift integer_onep@1 @2)) > @3 > diff --git a/gcc/testsuite/gcc.target/i386/pr103462-1.c > b/gcc/testsuite/gcc.target/i386/pr103462-1.c > new file mode 100644 > index 000..1dc4c2acad6 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr103462-1.c > @@ -0,0 +1,111 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O1 -fdump-tree-sccp-details" } */ > +/* { dg-final { scan-tree-dump-times {final value replacement} 12 "sccp" } } > */ > + > +unsigned long long > +__attribute__((noipa)) > +foo (unsigned long long tmp) > +{ > + for (int bit = 0; bit < 64; bit += 3) > +tmp &= ~(1ULL << bit); > + return tmp; > +} > + > +unsigned long long > +__attribute__((noipa)) > +foo1 (unsigned long long tmp) > +{ > + for (int bit = 63; bit >= 0; bit -= 3) > +tmp &= ~(1ULL << bit); > + return tmp; > +} > + > +unsigned long long > +__attribute__((noipa)) > +foo2 (unsigned long long tmp) > +{ > + for (int bit = 0; bit < 64; bit += 3) > +tmp &= (1ULL << bit); > + return tmp; > +} > + > +unsigned long long > +__attribute__((noipa)) > +foo3 (unsigned long long tmp) > +{ > + for (int bit = 63; bit >= 0; bit -= 3) > +tmp &= (1ULL << bit); > + return tmp; > +} > + > +unsigned long long > +__attribute__((noipa)) > +foo4 (unsigned long long tmp) > +{ > + for (int bit = 0; bit < 64; bit += 3) > +tmp |= ~(1ULL << bit); > + return tmp; > +} > + > +unsigned long long > +__attribute__((noipa)) > +foo5 (unsigned long long tmp) > +{ > + for (int bit = 63; bit >= 0; bit -= 3) > +tmp |= ~(1ULL << bit); > + return tmp; > +} > + > +unsigned long long > +__attribute__((noipa)) > +foo6 (unsigned long long tmp) > +{ > + for (int bit = 0; bit < 64; bit += 3) > +tmp |= (1ULL << bit); > + return tmp; > +} > + > +unsigned long long > +__attribute__((noipa)) > +foo7 (unsigned long long tmp) > +{ > + for (int bit = 63; bit >= 0; bit -= 3) > +tmp |= (1ULL << bit); > + return tmp; > +} > + > +unsigned long long > +__attribute__((noipa)) > +foo8 (unsigned long long tmp) > +{ > + for (int bit = 0; bit < 64; bit += 3) > +tmp ^= ~(1ULL <<
[PATCH] [Middle-end] Enhance final_value_replacement_loop to handle bitwise induction.
This patch will enable below optimization: { - int bit; - long long unsigned int _1; - long long unsigned int _2; - [local count: 46707768]: - - [local count: 1027034057]: - # tmp_11 = PHI - # bit_13 = PHI - _1 = 1 << bit_13; - _2 = ~_1; - tmp_8 = _2 & tmp_11; - bit_9 = bit_13 + -3; - if (bit_9 != -3(OVF)) -goto ; [95.65%] - else -goto ; [4.35%] - - [local count: 46707768]: - return tmp_8; + tmp_12 = tmp_6(D) & 7905747460161236406; + return tmp_12; } Boostrapped and regtested on x86_64-pc-linux-gnu{-m32,} Ok for trunk? gcc/ChangeLog: PR middle-end/103462 * match.pd (bitwise_induction_p): New match. * tree-scalar-evolution.c (gimple_bitwise_induction_p): Declare. (analyze_and_compute_bitwise_induction_effect): New function. (enum bit_op_kind): New enum. (final_value_replacement_loop): Enhanced to handle bitwise induction. gcc/testsuite/ChangeLog: * gcc.target/i386/pr103462-1.c: New test. * gcc.target/i386/pr103462-2.c: New test. * gcc.target/i386/pr103462-3.c: New test. * gcc.target/i386/pr103462-4.c: New test. * gcc.target/i386/pr103462-5.c: New test. * gcc.target/i386/pr103462-6.c: New test. --- gcc/match.pd | 7 + gcc/testsuite/gcc.target/i386/pr103462-1.c | 111 + gcc/testsuite/gcc.target/i386/pr103462-2.c | 45 ++ gcc/testsuite/gcc.target/i386/pr103462-3.c | 111 + gcc/testsuite/gcc.target/i386/pr103462-4.c | 46 ++ gcc/testsuite/gcc.target/i386/pr103462-5.c | 111 + gcc/testsuite/gcc.target/i386/pr103462-6.c | 46 ++ gcc/tree-scalar-evolution.cc | 178 - 8 files changed, 654 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr103462-1.c create mode 100644 gcc/testsuite/gcc.target/i386/pr103462-2.c create mode 100644 gcc/testsuite/gcc.target/i386/pr103462-3.c create mode 100644 gcc/testsuite/gcc.target/i386/pr103462-4.c create mode 100644 gcc/testsuite/gcc.target/i386/pr103462-5.c create mode 100644 gcc/testsuite/gcc.target/i386/pr103462-6.c diff --git a/gcc/match.pd b/gcc/match.pd index 6d691d302b3..24ff5f9e6a8 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -7746,3 +7746,10 @@ and, == TYPE_UNSIGNED (TREE_TYPE (@3 && single_use (@4) && single_use (@5 + +(for bit_op (bit_and bit_ior bit_xor) + (match (bitwise_induction_p @0 @2 @3) + (bit_op:c (nop_convert1? (bit_not2?@0 (convert3? (lshift integer_onep@1 @2 @3))) + +(match (bitwise_induction_p @0 @2 @3) + (bit_not (nop_convert1? (bit_xor@0 (convert2? (lshift integer_onep@1 @2)) @3 diff --git a/gcc/testsuite/gcc.target/i386/pr103462-1.c b/gcc/testsuite/gcc.target/i386/pr103462-1.c new file mode 100644 index 000..1dc4c2acad6 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr103462-1.c @@ -0,0 +1,111 @@ +/* { dg-do compile } */ +/* { dg-options "-O1 -fdump-tree-sccp-details" } */ +/* { dg-final { scan-tree-dump-times {final value replacement} 12 "sccp" } } */ + +unsigned long long +__attribute__((noipa)) +foo (unsigned long long tmp) +{ + for (int bit = 0; bit < 64; bit += 3) +tmp &= ~(1ULL << bit); + return tmp; +} + +unsigned long long +__attribute__((noipa)) +foo1 (unsigned long long tmp) +{ + for (int bit = 63; bit >= 0; bit -= 3) +tmp &= ~(1ULL << bit); + return tmp; +} + +unsigned long long +__attribute__((noipa)) +foo2 (unsigned long long tmp) +{ + for (int bit = 0; bit < 64; bit += 3) +tmp &= (1ULL << bit); + return tmp; +} + +unsigned long long +__attribute__((noipa)) +foo3 (unsigned long long tmp) +{ + for (int bit = 63; bit >= 0; bit -= 3) +tmp &= (1ULL << bit); + return tmp; +} + +unsigned long long +__attribute__((noipa)) +foo4 (unsigned long long tmp) +{ + for (int bit = 0; bit < 64; bit += 3) +tmp |= ~(1ULL << bit); + return tmp; +} + +unsigned long long +__attribute__((noipa)) +foo5 (unsigned long long tmp) +{ + for (int bit = 63; bit >= 0; bit -= 3) +tmp |= ~(1ULL << bit); + return tmp; +} + +unsigned long long +__attribute__((noipa)) +foo6 (unsigned long long tmp) +{ + for (int bit = 0; bit < 64; bit += 3) +tmp |= (1ULL << bit); + return tmp; +} + +unsigned long long +__attribute__((noipa)) +foo7 (unsigned long long tmp) +{ + for (int bit = 63; bit >= 0; bit -= 3) +tmp |= (1ULL << bit); + return tmp; +} + +unsigned long long +__attribute__((noipa)) +foo8 (unsigned long long tmp) +{ + for (int bit = 0; bit < 64; bit += 3) +tmp ^= ~(1ULL << bit); + return tmp; +} + +unsigned long long +__attribute__((noipa)) +foo9 (unsigned long long tmp) +{ + for (int bit = 63; bit >= 0; bit -= 3) +tmp ^= ~(1ULL << bit); + return tmp; +} + +unsigned long long +__attribute__((noipa)) +foo10 (unsigned long long tmp) +{ + for (int bit = 0; bit < 64; bit += 3) +tmp ^= (1ULL << bit); + return tmp; +} + +unsigned long long