Re: [PATCH] [Middle-end] Enhance final_value_replacement_loop to handle bitwise induction.

2022-05-18 Thread Richard Biener via Gcc-patches
On Wed, May 18, 2022 at 4:45 AM Hongtao Liu  wrote:
>
> On Fri, May 13, 2022 at 7:16 PM Richard Biener
>  wrote:
> >
> > On Fri, May 13, 2022 at 5:37 AM Hongtao Liu  wrote:
> > >
> > > On Wed, May 11, 2022 at 4:45 PM Richard Biener via Gcc-patches
> > >  wrote:
> > > >
> > > > On Mon, May 9, 2022 at 7:19 AM liuhongt  wrote:
> > > > >
> > > > > This patch will enable below optimization:
> > > > >
> > > > >  {
> > > > > -  int bit;
> > > > > -  long long unsigned int _1;
> > > > > -  long long unsigned int _2;
> > > > > -
> > > > > [local count: 46707768]:
> > > > > -
> > > > > -   [local count: 1027034057]:
> > > > > -  # tmp_11 = PHI 
> > > > > -  # bit_13 = PHI 
> > > > > -  _1 = 1 << bit_13;
> > > > > -  _2 = ~_1;
> > > > > -  tmp_8 = _2 & tmp_11;
> > > > > -  bit_9 = bit_13 + -3;
> > > > > -  if (bit_9 != -3(OVF))
> > > > > -goto ; [95.65%]
> > > > > -  else
> > > > > -goto ; [4.35%]
> > > > > -
> > > > > -   [local count: 46707768]:
> > > > > -  return tmp_8;
> > > > > +  tmp_12 = tmp_6(D) & 7905747460161236406;
> > > > > +  return tmp_12;
> > > > >
> > > > >  }
> > > > >
> > > > >
> > > > > Boostrapped and regtested on x86_64-pc-linux-gnu{-m32,}
> > > > > Ok for trunk?
> > > > >
> > > > > gcc/ChangeLog:
> > > > >
> > > > > PR middle-end/103462
> > > > > * match.pd (bitwise_induction_p): New match.
> > > > > * tree-scalar-evolution.c (gimple_bitwise_induction_p):
> > > > > Declare.
> > > > > (analyze_and_compute_bitwise_induction_effect): New function.
> > > > > (enum bit_op_kind): New enum.
> > > > > (final_value_replacement_loop): Enhanced to handle bitwise
> > > > > induction.
> > > > >
> > > > > gcc/testsuite/ChangeLog:
> > > > >
> > > > > * gcc.target/i386/pr103462-1.c: New test.
> > > > > * gcc.target/i386/pr103462-2.c: New test.
> > > > > * gcc.target/i386/pr103462-3.c: New test.
> > > > > * gcc.target/i386/pr103462-4.c: New test.
> > > > > * gcc.target/i386/pr103462-5.c: New test.
> > > > > * gcc.target/i386/pr103462-6.c: New test.
> > > > > ---
> > > > >  gcc/match.pd   |   7 +
> > > > >  gcc/testsuite/gcc.target/i386/pr103462-1.c | 111 +
> > > > >  gcc/testsuite/gcc.target/i386/pr103462-2.c |  45 ++
> > > > >  gcc/testsuite/gcc.target/i386/pr103462-3.c | 111 +
> > > > >  gcc/testsuite/gcc.target/i386/pr103462-4.c |  46 ++
> > > > >  gcc/testsuite/gcc.target/i386/pr103462-5.c | 111 +
> > > > >  gcc/testsuite/gcc.target/i386/pr103462-6.c |  46 ++
> > > > >  gcc/tree-scalar-evolution.cc   | 178 
> > > > > -
> > > > >  8 files changed, 654 insertions(+), 1 deletion(-)
> > > > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr103462-1.c
> > > > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr103462-2.c
> > > > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr103462-3.c
> > > > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr103462-4.c
> > > > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr103462-5.c
> > > > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr103462-6.c
> > > > >
> > > > > diff --git a/gcc/match.pd b/gcc/match.pd
> > > > > index 6d691d302b3..24ff5f9e6a8 100644
> > > > > --- a/gcc/match.pd
> > > > > +++ b/gcc/match.pd
> > > > > @@ -7746,3 +7746,10 @@ and,
> > > > >== TYPE_UNSIGNED (TREE_TYPE (@3
> > > > > && single_use (@4)
> > > > > && single_use (@5
> > > > > +
> > > > > +(for bit_op (bit_and bit_ior bit_xor)
> > > > > + (match (bitwise_induction_p @0 @2 @3)
> > > > > +   (bit_op:c (nop_convert1? (bit_not2?@0 (convert3? (lshift 
> > > > > integer_onep@1 @2 @3)))
> > > > > +
> > > > > +(match (bitwise_induction_p @0 @2 @3)
> > > > > +  (bit_not (nop_convert1? (bit_xor@0 (convert2? (lshift 
> > > > > integer_onep@1 @2)) @3
> > > > > diff --git a/gcc/testsuite/gcc.target/i386/pr103462-1.c 
> > > > > b/gcc/testsuite/gcc.target/i386/pr103462-1.c
> > > > > new file mode 100644
> > > > > index 000..1dc4c2acad6
> > > > > --- /dev/null
> > > > > +++ b/gcc/testsuite/gcc.target/i386/pr103462-1.c
> > > > > @@ -0,0 +1,111 @@
> > > > > +/* { dg-do compile } */
> > > > > +/* { dg-options "-O1 -fdump-tree-sccp-details" } */
> > > > > +/* { dg-final { scan-tree-dump-times {final value replacement} 12 
> > > > > "sccp" } } */
> > > > > +
> > > > > +unsigned long long
> > > > > +__attribute__((noipa))
> > > > > +foo (unsigned long long tmp)
> > > > > +{
> > > > > +  for (int bit = 0; bit < 64; bit += 3)
> > > > > +tmp &= ~(1ULL << bit);
> > > > > +  return tmp;
> > > > > +}
> > > > > +
> > > > > +unsigned long long
> > > > > +__attribute__((noipa))
> > > > > +foo1 (unsigned long long tmp)
> > > > > +{
> > > > > +  for (int bit = 63; bit >= 0; bit -= 3)
> > > > > +tmp &= ~(1ULL << bit);
> > > > > +  return tmp;
> > > > > +}
> > > > > +
> > > > > +unsigned long long
> > > > > 

Re: [PATCH] [Middle-end] Enhance final_value_replacement_loop to handle bitwise induction.

2022-05-17 Thread Hongtao Liu via Gcc-patches
On Fri, May 13, 2022 at 7:16 PM Richard Biener
 wrote:
>
> On Fri, May 13, 2022 at 5:37 AM Hongtao Liu  wrote:
> >
> > On Wed, May 11, 2022 at 4:45 PM Richard Biener via Gcc-patches
> >  wrote:
> > >
> > > On Mon, May 9, 2022 at 7:19 AM liuhongt  wrote:
> > > >
> > > > This patch will enable below optimization:
> > > >
> > > >  {
> > > > -  int bit;
> > > > -  long long unsigned int _1;
> > > > -  long long unsigned int _2;
> > > > -
> > > > [local count: 46707768]:
> > > > -
> > > > -   [local count: 1027034057]:
> > > > -  # tmp_11 = PHI 
> > > > -  # bit_13 = PHI 
> > > > -  _1 = 1 << bit_13;
> > > > -  _2 = ~_1;
> > > > -  tmp_8 = _2 & tmp_11;
> > > > -  bit_9 = bit_13 + -3;
> > > > -  if (bit_9 != -3(OVF))
> > > > -goto ; [95.65%]
> > > > -  else
> > > > -goto ; [4.35%]
> > > > -
> > > > -   [local count: 46707768]:
> > > > -  return tmp_8;
> > > > +  tmp_12 = tmp_6(D) & 7905747460161236406;
> > > > +  return tmp_12;
> > > >
> > > >  }
> > > >
> > > >
> > > > Boostrapped and regtested on x86_64-pc-linux-gnu{-m32,}
> > > > Ok for trunk?
> > > >
> > > > gcc/ChangeLog:
> > > >
> > > > PR middle-end/103462
> > > > * match.pd (bitwise_induction_p): New match.
> > > > * tree-scalar-evolution.c (gimple_bitwise_induction_p):
> > > > Declare.
> > > > (analyze_and_compute_bitwise_induction_effect): New function.
> > > > (enum bit_op_kind): New enum.
> > > > (final_value_replacement_loop): Enhanced to handle bitwise
> > > > induction.
> > > >
> > > > gcc/testsuite/ChangeLog:
> > > >
> > > > * gcc.target/i386/pr103462-1.c: New test.
> > > > * gcc.target/i386/pr103462-2.c: New test.
> > > > * gcc.target/i386/pr103462-3.c: New test.
> > > > * gcc.target/i386/pr103462-4.c: New test.
> > > > * gcc.target/i386/pr103462-5.c: New test.
> > > > * gcc.target/i386/pr103462-6.c: New test.
> > > > ---
> > > >  gcc/match.pd   |   7 +
> > > >  gcc/testsuite/gcc.target/i386/pr103462-1.c | 111 +
> > > >  gcc/testsuite/gcc.target/i386/pr103462-2.c |  45 ++
> > > >  gcc/testsuite/gcc.target/i386/pr103462-3.c | 111 +
> > > >  gcc/testsuite/gcc.target/i386/pr103462-4.c |  46 ++
> > > >  gcc/testsuite/gcc.target/i386/pr103462-5.c | 111 +
> > > >  gcc/testsuite/gcc.target/i386/pr103462-6.c |  46 ++
> > > >  gcc/tree-scalar-evolution.cc   | 178 -
> > > >  8 files changed, 654 insertions(+), 1 deletion(-)
> > > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr103462-1.c
> > > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr103462-2.c
> > > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr103462-3.c
> > > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr103462-4.c
> > > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr103462-5.c
> > > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr103462-6.c
> > > >
> > > > diff --git a/gcc/match.pd b/gcc/match.pd
> > > > index 6d691d302b3..24ff5f9e6a8 100644
> > > > --- a/gcc/match.pd
> > > > +++ b/gcc/match.pd
> > > > @@ -7746,3 +7746,10 @@ and,
> > > >== TYPE_UNSIGNED (TREE_TYPE (@3
> > > > && single_use (@4)
> > > > && single_use (@5
> > > > +
> > > > +(for bit_op (bit_and bit_ior bit_xor)
> > > > + (match (bitwise_induction_p @0 @2 @3)
> > > > +   (bit_op:c (nop_convert1? (bit_not2?@0 (convert3? (lshift 
> > > > integer_onep@1 @2 @3)))
> > > > +
> > > > +(match (bitwise_induction_p @0 @2 @3)
> > > > +  (bit_not (nop_convert1? (bit_xor@0 (convert2? (lshift integer_onep@1 
> > > > @2)) @3
> > > > diff --git a/gcc/testsuite/gcc.target/i386/pr103462-1.c 
> > > > b/gcc/testsuite/gcc.target/i386/pr103462-1.c
> > > > new file mode 100644
> > > > index 000..1dc4c2acad6
> > > > --- /dev/null
> > > > +++ b/gcc/testsuite/gcc.target/i386/pr103462-1.c
> > > > @@ -0,0 +1,111 @@
> > > > +/* { dg-do compile } */
> > > > +/* { dg-options "-O1 -fdump-tree-sccp-details" } */
> > > > +/* { dg-final { scan-tree-dump-times {final value replacement} 12 
> > > > "sccp" } } */
> > > > +
> > > > +unsigned long long
> > > > +__attribute__((noipa))
> > > > +foo (unsigned long long tmp)
> > > > +{
> > > > +  for (int bit = 0; bit < 64; bit += 3)
> > > > +tmp &= ~(1ULL << bit);
> > > > +  return tmp;
> > > > +}
> > > > +
> > > > +unsigned long long
> > > > +__attribute__((noipa))
> > > > +foo1 (unsigned long long tmp)
> > > > +{
> > > > +  for (int bit = 63; bit >= 0; bit -= 3)
> > > > +tmp &= ~(1ULL << bit);
> > > > +  return tmp;
> > > > +}
> > > > +
> > > > +unsigned long long
> > > > +__attribute__((noipa))
> > > > +foo2 (unsigned long long tmp)
> > > > +{
> > > > +  for (int bit = 0; bit < 64; bit += 3)
> > > > +tmp &= (1ULL << bit);
> > > > +  return tmp;
> > > > +}
> > > > +
> > > > +unsigned long long
> > > > +__attribute__((noipa))
> > > > +foo3 (unsigned long long tmp)
> > > > +{
> > > > +  

Re: [PATCH] [Middle-end] Enhance final_value_replacement_loop to handle bitwise induction.

2022-05-13 Thread Richard Biener via Gcc-patches
On Fri, May 13, 2022 at 5:37 AM Hongtao Liu  wrote:
>
> On Wed, May 11, 2022 at 4:45 PM Richard Biener via Gcc-patches
>  wrote:
> >
> > On Mon, May 9, 2022 at 7:19 AM liuhongt  wrote:
> > >
> > > This patch will enable below optimization:
> > >
> > >  {
> > > -  int bit;
> > > -  long long unsigned int _1;
> > > -  long long unsigned int _2;
> > > -
> > > [local count: 46707768]:
> > > -
> > > -   [local count: 1027034057]:
> > > -  # tmp_11 = PHI 
> > > -  # bit_13 = PHI 
> > > -  _1 = 1 << bit_13;
> > > -  _2 = ~_1;
> > > -  tmp_8 = _2 & tmp_11;
> > > -  bit_9 = bit_13 + -3;
> > > -  if (bit_9 != -3(OVF))
> > > -goto ; [95.65%]
> > > -  else
> > > -goto ; [4.35%]
> > > -
> > > -   [local count: 46707768]:
> > > -  return tmp_8;
> > > +  tmp_12 = tmp_6(D) & 7905747460161236406;
> > > +  return tmp_12;
> > >
> > >  }
> > >
> > >
> > > Boostrapped and regtested on x86_64-pc-linux-gnu{-m32,}
> > > Ok for trunk?
> > >
> > > gcc/ChangeLog:
> > >
> > > PR middle-end/103462
> > > * match.pd (bitwise_induction_p): New match.
> > > * tree-scalar-evolution.c (gimple_bitwise_induction_p):
> > > Declare.
> > > (analyze_and_compute_bitwise_induction_effect): New function.
> > > (enum bit_op_kind): New enum.
> > > (final_value_replacement_loop): Enhanced to handle bitwise
> > > induction.
> > >
> > > gcc/testsuite/ChangeLog:
> > >
> > > * gcc.target/i386/pr103462-1.c: New test.
> > > * gcc.target/i386/pr103462-2.c: New test.
> > > * gcc.target/i386/pr103462-3.c: New test.
> > > * gcc.target/i386/pr103462-4.c: New test.
> > > * gcc.target/i386/pr103462-5.c: New test.
> > > * gcc.target/i386/pr103462-6.c: New test.
> > > ---
> > >  gcc/match.pd   |   7 +
> > >  gcc/testsuite/gcc.target/i386/pr103462-1.c | 111 +
> > >  gcc/testsuite/gcc.target/i386/pr103462-2.c |  45 ++
> > >  gcc/testsuite/gcc.target/i386/pr103462-3.c | 111 +
> > >  gcc/testsuite/gcc.target/i386/pr103462-4.c |  46 ++
> > >  gcc/testsuite/gcc.target/i386/pr103462-5.c | 111 +
> > >  gcc/testsuite/gcc.target/i386/pr103462-6.c |  46 ++
> > >  gcc/tree-scalar-evolution.cc   | 178 -
> > >  8 files changed, 654 insertions(+), 1 deletion(-)
> > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr103462-1.c
> > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr103462-2.c
> > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr103462-3.c
> > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr103462-4.c
> > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr103462-5.c
> > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr103462-6.c
> > >
> > > diff --git a/gcc/match.pd b/gcc/match.pd
> > > index 6d691d302b3..24ff5f9e6a8 100644
> > > --- a/gcc/match.pd
> > > +++ b/gcc/match.pd
> > > @@ -7746,3 +7746,10 @@ and,
> > >== TYPE_UNSIGNED (TREE_TYPE (@3
> > > && single_use (@4)
> > > && single_use (@5
> > > +
> > > +(for bit_op (bit_and bit_ior bit_xor)
> > > + (match (bitwise_induction_p @0 @2 @3)
> > > +   (bit_op:c (nop_convert1? (bit_not2?@0 (convert3? (lshift 
> > > integer_onep@1 @2 @3)))
> > > +
> > > +(match (bitwise_induction_p @0 @2 @3)
> > > +  (bit_not (nop_convert1? (bit_xor@0 (convert2? (lshift integer_onep@1 
> > > @2)) @3
> > > diff --git a/gcc/testsuite/gcc.target/i386/pr103462-1.c 
> > > b/gcc/testsuite/gcc.target/i386/pr103462-1.c
> > > new file mode 100644
> > > index 000..1dc4c2acad6
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/i386/pr103462-1.c
> > > @@ -0,0 +1,111 @@
> > > +/* { dg-do compile } */
> > > +/* { dg-options "-O1 -fdump-tree-sccp-details" } */
> > > +/* { dg-final { scan-tree-dump-times {final value replacement} 12 "sccp" 
> > > } } */
> > > +
> > > +unsigned long long
> > > +__attribute__((noipa))
> > > +foo (unsigned long long tmp)
> > > +{
> > > +  for (int bit = 0; bit < 64; bit += 3)
> > > +tmp &= ~(1ULL << bit);
> > > +  return tmp;
> > > +}
> > > +
> > > +unsigned long long
> > > +__attribute__((noipa))
> > > +foo1 (unsigned long long tmp)
> > > +{
> > > +  for (int bit = 63; bit >= 0; bit -= 3)
> > > +tmp &= ~(1ULL << bit);
> > > +  return tmp;
> > > +}
> > > +
> > > +unsigned long long
> > > +__attribute__((noipa))
> > > +foo2 (unsigned long long tmp)
> > > +{
> > > +  for (int bit = 0; bit < 64; bit += 3)
> > > +tmp &= (1ULL << bit);
> > > +  return tmp;
> > > +}
> > > +
> > > +unsigned long long
> > > +__attribute__((noipa))
> > > +foo3 (unsigned long long tmp)
> > > +{
> > > +  for (int bit = 63; bit >= 0; bit -= 3)
> > > +tmp &= (1ULL << bit);
> > > +  return tmp;
> > > +}
> > > +
> > > +unsigned long long
> > > +__attribute__((noipa))
> > > +foo4 (unsigned long long tmp)
> > > +{
> > > +  for (int bit = 0; bit < 64; bit += 3)
> > > +tmp |= ~(1ULL << bit);
> > > +  return tmp;
> > > +}
> > > 

Re: [PATCH] [Middle-end] Enhance final_value_replacement_loop to handle bitwise induction.

2022-05-12 Thread Hongtao Liu via Gcc-patches
On Wed, May 11, 2022 at 4:45 PM Richard Biener via Gcc-patches
 wrote:
>
> On Mon, May 9, 2022 at 7:19 AM liuhongt  wrote:
> >
> > This patch will enable below optimization:
> >
> >  {
> > -  int bit;
> > -  long long unsigned int _1;
> > -  long long unsigned int _2;
> > -
> > [local count: 46707768]:
> > -
> > -   [local count: 1027034057]:
> > -  # tmp_11 = PHI 
> > -  # bit_13 = PHI 
> > -  _1 = 1 << bit_13;
> > -  _2 = ~_1;
> > -  tmp_8 = _2 & tmp_11;
> > -  bit_9 = bit_13 + -3;
> > -  if (bit_9 != -3(OVF))
> > -goto ; [95.65%]
> > -  else
> > -goto ; [4.35%]
> > -
> > -   [local count: 46707768]:
> > -  return tmp_8;
> > +  tmp_12 = tmp_6(D) & 7905747460161236406;
> > +  return tmp_12;
> >
> >  }
> >
> >
> > Boostrapped and regtested on x86_64-pc-linux-gnu{-m32,}
> > Ok for trunk?
> >
> > gcc/ChangeLog:
> >
> > PR middle-end/103462
> > * match.pd (bitwise_induction_p): New match.
> > * tree-scalar-evolution.c (gimple_bitwise_induction_p):
> > Declare.
> > (analyze_and_compute_bitwise_induction_effect): New function.
> > (enum bit_op_kind): New enum.
> > (final_value_replacement_loop): Enhanced to handle bitwise
> > induction.
> >
> > gcc/testsuite/ChangeLog:
> >
> > * gcc.target/i386/pr103462-1.c: New test.
> > * gcc.target/i386/pr103462-2.c: New test.
> > * gcc.target/i386/pr103462-3.c: New test.
> > * gcc.target/i386/pr103462-4.c: New test.
> > * gcc.target/i386/pr103462-5.c: New test.
> > * gcc.target/i386/pr103462-6.c: New test.
> > ---
> >  gcc/match.pd   |   7 +
> >  gcc/testsuite/gcc.target/i386/pr103462-1.c | 111 +
> >  gcc/testsuite/gcc.target/i386/pr103462-2.c |  45 ++
> >  gcc/testsuite/gcc.target/i386/pr103462-3.c | 111 +
> >  gcc/testsuite/gcc.target/i386/pr103462-4.c |  46 ++
> >  gcc/testsuite/gcc.target/i386/pr103462-5.c | 111 +
> >  gcc/testsuite/gcc.target/i386/pr103462-6.c |  46 ++
> >  gcc/tree-scalar-evolution.cc   | 178 -
> >  8 files changed, 654 insertions(+), 1 deletion(-)
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr103462-1.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr103462-2.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr103462-3.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr103462-4.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr103462-5.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr103462-6.c
> >
> > diff --git a/gcc/match.pd b/gcc/match.pd
> > index 6d691d302b3..24ff5f9e6a8 100644
> > --- a/gcc/match.pd
> > +++ b/gcc/match.pd
> > @@ -7746,3 +7746,10 @@ and,
> >== TYPE_UNSIGNED (TREE_TYPE (@3
> > && single_use (@4)
> > && single_use (@5
> > +
> > +(for bit_op (bit_and bit_ior bit_xor)
> > + (match (bitwise_induction_p @0 @2 @3)
> > +   (bit_op:c (nop_convert1? (bit_not2?@0 (convert3? (lshift integer_onep@1 
> > @2 @3)))
> > +
> > +(match (bitwise_induction_p @0 @2 @3)
> > +  (bit_not (nop_convert1? (bit_xor@0 (convert2? (lshift integer_onep@1 
> > @2)) @3
> > diff --git a/gcc/testsuite/gcc.target/i386/pr103462-1.c 
> > b/gcc/testsuite/gcc.target/i386/pr103462-1.c
> > new file mode 100644
> > index 000..1dc4c2acad6
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr103462-1.c
> > @@ -0,0 +1,111 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O1 -fdump-tree-sccp-details" } */
> > +/* { dg-final { scan-tree-dump-times {final value replacement} 12 "sccp" } 
> > } */
> > +
> > +unsigned long long
> > +__attribute__((noipa))
> > +foo (unsigned long long tmp)
> > +{
> > +  for (int bit = 0; bit < 64; bit += 3)
> > +tmp &= ~(1ULL << bit);
> > +  return tmp;
> > +}
> > +
> > +unsigned long long
> > +__attribute__((noipa))
> > +foo1 (unsigned long long tmp)
> > +{
> > +  for (int bit = 63; bit >= 0; bit -= 3)
> > +tmp &= ~(1ULL << bit);
> > +  return tmp;
> > +}
> > +
> > +unsigned long long
> > +__attribute__((noipa))
> > +foo2 (unsigned long long tmp)
> > +{
> > +  for (int bit = 0; bit < 64; bit += 3)
> > +tmp &= (1ULL << bit);
> > +  return tmp;
> > +}
> > +
> > +unsigned long long
> > +__attribute__((noipa))
> > +foo3 (unsigned long long tmp)
> > +{
> > +  for (int bit = 63; bit >= 0; bit -= 3)
> > +tmp &= (1ULL << bit);
> > +  return tmp;
> > +}
> > +
> > +unsigned long long
> > +__attribute__((noipa))
> > +foo4 (unsigned long long tmp)
> > +{
> > +  for (int bit = 0; bit < 64; bit += 3)
> > +tmp |= ~(1ULL << bit);
> > +  return tmp;
> > +}
> > +
> > +unsigned long long
> > +__attribute__((noipa))
> > +foo5 (unsigned long long tmp)
> > +{
> > +  for (int bit = 63; bit >= 0; bit -= 3)
> > +tmp |= ~(1ULL << bit);
> > +  return tmp;
> > +}
> > +
> > +unsigned long long
> > +__attribute__((noipa))
> > +foo6 (unsigned long long tmp)
> > +{
> > +  for (int bit = 0; bit < 64; bit += 3)
> > 

Re: [PATCH] [Middle-end] Enhance final_value_replacement_loop to handle bitwise induction.

2022-05-11 Thread Richard Biener via Gcc-patches
On Mon, May 9, 2022 at 7:19 AM liuhongt  wrote:
>
> This patch will enable below optimization:
>
>  {
> -  int bit;
> -  long long unsigned int _1;
> -  long long unsigned int _2;
> -
> [local count: 46707768]:
> -
> -   [local count: 1027034057]:
> -  # tmp_11 = PHI 
> -  # bit_13 = PHI 
> -  _1 = 1 << bit_13;
> -  _2 = ~_1;
> -  tmp_8 = _2 & tmp_11;
> -  bit_9 = bit_13 + -3;
> -  if (bit_9 != -3(OVF))
> -goto ; [95.65%]
> -  else
> -goto ; [4.35%]
> -
> -   [local count: 46707768]:
> -  return tmp_8;
> +  tmp_12 = tmp_6(D) & 7905747460161236406;
> +  return tmp_12;
>
>  }
>
>
> Boostrapped and regtested on x86_64-pc-linux-gnu{-m32,}
> Ok for trunk?
>
> gcc/ChangeLog:
>
> PR middle-end/103462
> * match.pd (bitwise_induction_p): New match.
> * tree-scalar-evolution.c (gimple_bitwise_induction_p):
> Declare.
> (analyze_and_compute_bitwise_induction_effect): New function.
> (enum bit_op_kind): New enum.
> (final_value_replacement_loop): Enhanced to handle bitwise
> induction.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/i386/pr103462-1.c: New test.
> * gcc.target/i386/pr103462-2.c: New test.
> * gcc.target/i386/pr103462-3.c: New test.
> * gcc.target/i386/pr103462-4.c: New test.
> * gcc.target/i386/pr103462-5.c: New test.
> * gcc.target/i386/pr103462-6.c: New test.
> ---
>  gcc/match.pd   |   7 +
>  gcc/testsuite/gcc.target/i386/pr103462-1.c | 111 +
>  gcc/testsuite/gcc.target/i386/pr103462-2.c |  45 ++
>  gcc/testsuite/gcc.target/i386/pr103462-3.c | 111 +
>  gcc/testsuite/gcc.target/i386/pr103462-4.c |  46 ++
>  gcc/testsuite/gcc.target/i386/pr103462-5.c | 111 +
>  gcc/testsuite/gcc.target/i386/pr103462-6.c |  46 ++
>  gcc/tree-scalar-evolution.cc   | 178 -
>  8 files changed, 654 insertions(+), 1 deletion(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr103462-1.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr103462-2.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr103462-3.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr103462-4.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr103462-5.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr103462-6.c
>
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 6d691d302b3..24ff5f9e6a8 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -7746,3 +7746,10 @@ and,
>== TYPE_UNSIGNED (TREE_TYPE (@3
> && single_use (@4)
> && single_use (@5
> +
> +(for bit_op (bit_and bit_ior bit_xor)
> + (match (bitwise_induction_p @0 @2 @3)
> +   (bit_op:c (nop_convert1? (bit_not2?@0 (convert3? (lshift integer_onep@1 
> @2 @3)))
> +
> +(match (bitwise_induction_p @0 @2 @3)
> +  (bit_not (nop_convert1? (bit_xor@0 (convert2? (lshift integer_onep@1 @2)) 
> @3
> diff --git a/gcc/testsuite/gcc.target/i386/pr103462-1.c 
> b/gcc/testsuite/gcc.target/i386/pr103462-1.c
> new file mode 100644
> index 000..1dc4c2acad6
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr103462-1.c
> @@ -0,0 +1,111 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O1 -fdump-tree-sccp-details" } */
> +/* { dg-final { scan-tree-dump-times {final value replacement} 12 "sccp" } } 
> */
> +
> +unsigned long long
> +__attribute__((noipa))
> +foo (unsigned long long tmp)
> +{
> +  for (int bit = 0; bit < 64; bit += 3)
> +tmp &= ~(1ULL << bit);
> +  return tmp;
> +}
> +
> +unsigned long long
> +__attribute__((noipa))
> +foo1 (unsigned long long tmp)
> +{
> +  for (int bit = 63; bit >= 0; bit -= 3)
> +tmp &= ~(1ULL << bit);
> +  return tmp;
> +}
> +
> +unsigned long long
> +__attribute__((noipa))
> +foo2 (unsigned long long tmp)
> +{
> +  for (int bit = 0; bit < 64; bit += 3)
> +tmp &= (1ULL << bit);
> +  return tmp;
> +}
> +
> +unsigned long long
> +__attribute__((noipa))
> +foo3 (unsigned long long tmp)
> +{
> +  for (int bit = 63; bit >= 0; bit -= 3)
> +tmp &= (1ULL << bit);
> +  return tmp;
> +}
> +
> +unsigned long long
> +__attribute__((noipa))
> +foo4 (unsigned long long tmp)
> +{
> +  for (int bit = 0; bit < 64; bit += 3)
> +tmp |= ~(1ULL << bit);
> +  return tmp;
> +}
> +
> +unsigned long long
> +__attribute__((noipa))
> +foo5 (unsigned long long tmp)
> +{
> +  for (int bit = 63; bit >= 0; bit -= 3)
> +tmp |= ~(1ULL << bit);
> +  return tmp;
> +}
> +
> +unsigned long long
> +__attribute__((noipa))
> +foo6 (unsigned long long tmp)
> +{
> +  for (int bit = 0; bit < 64; bit += 3)
> +tmp |= (1ULL << bit);
> +  return tmp;
> +}
> +
> +unsigned long long
> +__attribute__((noipa))
> +foo7 (unsigned long long tmp)
> +{
> +  for (int bit = 63; bit >= 0; bit -= 3)
> +tmp |= (1ULL << bit);
> +  return tmp;
> +}
> +
> +unsigned long long
> +__attribute__((noipa))
> +foo8 (unsigned long long tmp)
> +{
> +  for (int bit = 0; bit < 64; bit += 3)
> +tmp ^= ~(1ULL << 

[PATCH] [Middle-end] Enhance final_value_replacement_loop to handle bitwise induction.

2022-05-08 Thread liuhongt via Gcc-patches
This patch will enable below optimization:

 {
-  int bit;
-  long long unsigned int _1;
-  long long unsigned int _2;
-
[local count: 46707768]:
-
-   [local count: 1027034057]:
-  # tmp_11 = PHI 
-  # bit_13 = PHI 
-  _1 = 1 << bit_13;
-  _2 = ~_1;
-  tmp_8 = _2 & tmp_11;
-  bit_9 = bit_13 + -3;
-  if (bit_9 != -3(OVF))
-goto ; [95.65%]
-  else
-goto ; [4.35%]
-
-   [local count: 46707768]:
-  return tmp_8;
+  tmp_12 = tmp_6(D) & 7905747460161236406;
+  return tmp_12;

 }


Boostrapped and regtested on x86_64-pc-linux-gnu{-m32,}
Ok for trunk?

gcc/ChangeLog:

PR middle-end/103462
* match.pd (bitwise_induction_p): New match.
* tree-scalar-evolution.c (gimple_bitwise_induction_p):
Declare.
(analyze_and_compute_bitwise_induction_effect): New function.
(enum bit_op_kind): New enum.
(final_value_replacement_loop): Enhanced to handle bitwise
induction.

gcc/testsuite/ChangeLog:

* gcc.target/i386/pr103462-1.c: New test.
* gcc.target/i386/pr103462-2.c: New test.
* gcc.target/i386/pr103462-3.c: New test.
* gcc.target/i386/pr103462-4.c: New test.
* gcc.target/i386/pr103462-5.c: New test.
* gcc.target/i386/pr103462-6.c: New test.
---
 gcc/match.pd   |   7 +
 gcc/testsuite/gcc.target/i386/pr103462-1.c | 111 +
 gcc/testsuite/gcc.target/i386/pr103462-2.c |  45 ++
 gcc/testsuite/gcc.target/i386/pr103462-3.c | 111 +
 gcc/testsuite/gcc.target/i386/pr103462-4.c |  46 ++
 gcc/testsuite/gcc.target/i386/pr103462-5.c | 111 +
 gcc/testsuite/gcc.target/i386/pr103462-6.c |  46 ++
 gcc/tree-scalar-evolution.cc   | 178 -
 8 files changed, 654 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr103462-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr103462-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr103462-3.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr103462-4.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr103462-5.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr103462-6.c

diff --git a/gcc/match.pd b/gcc/match.pd
index 6d691d302b3..24ff5f9e6a8 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -7746,3 +7746,10 @@ and,
   == TYPE_UNSIGNED (TREE_TYPE (@3
&& single_use (@4)
&& single_use (@5
+
+(for bit_op (bit_and bit_ior bit_xor)
+ (match (bitwise_induction_p @0 @2 @3)
+   (bit_op:c (nop_convert1? (bit_not2?@0 (convert3? (lshift integer_onep@1 
@2 @3)))
+
+(match (bitwise_induction_p @0 @2 @3)
+  (bit_not (nop_convert1? (bit_xor@0 (convert2? (lshift integer_onep@1 @2)) 
@3
diff --git a/gcc/testsuite/gcc.target/i386/pr103462-1.c 
b/gcc/testsuite/gcc.target/i386/pr103462-1.c
new file mode 100644
index 000..1dc4c2acad6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr103462-1.c
@@ -0,0 +1,111 @@
+/* { dg-do compile } */
+/* { dg-options "-O1 -fdump-tree-sccp-details" } */
+/* { dg-final { scan-tree-dump-times {final value replacement} 12 "sccp" } } */
+
+unsigned long long
+__attribute__((noipa))
+foo (unsigned long long tmp)
+{
+  for (int bit = 0; bit < 64; bit += 3)
+tmp &= ~(1ULL << bit);
+  return tmp;
+}
+
+unsigned long long
+__attribute__((noipa))
+foo1 (unsigned long long tmp)
+{
+  for (int bit = 63; bit >= 0; bit -= 3)
+tmp &= ~(1ULL << bit);
+  return tmp;
+}
+
+unsigned long long
+__attribute__((noipa))
+foo2 (unsigned long long tmp)
+{
+  for (int bit = 0; bit < 64; bit += 3)
+tmp &= (1ULL << bit);
+  return tmp;
+}
+
+unsigned long long
+__attribute__((noipa))
+foo3 (unsigned long long tmp)
+{
+  for (int bit = 63; bit >= 0; bit -= 3)
+tmp &= (1ULL << bit);
+  return tmp;
+}
+
+unsigned long long
+__attribute__((noipa))
+foo4 (unsigned long long tmp)
+{
+  for (int bit = 0; bit < 64; bit += 3)
+tmp |= ~(1ULL << bit);
+  return tmp;
+}
+
+unsigned long long
+__attribute__((noipa))
+foo5 (unsigned long long tmp)
+{
+  for (int bit = 63; bit >= 0; bit -= 3)
+tmp |= ~(1ULL << bit);
+  return tmp;
+}
+
+unsigned long long
+__attribute__((noipa))
+foo6 (unsigned long long tmp)
+{
+  for (int bit = 0; bit < 64; bit += 3)
+tmp |= (1ULL << bit);
+  return tmp;
+}
+
+unsigned long long
+__attribute__((noipa))
+foo7 (unsigned long long tmp)
+{
+  for (int bit = 63; bit >= 0; bit -= 3)
+tmp |= (1ULL << bit);
+  return tmp;
+}
+
+unsigned long long
+__attribute__((noipa))
+foo8 (unsigned long long tmp)
+{
+  for (int bit = 0; bit < 64; bit += 3)
+tmp ^= ~(1ULL << bit);
+  return tmp;
+}
+
+unsigned long long
+__attribute__((noipa))
+foo9 (unsigned long long tmp)
+{
+  for (int bit = 63; bit >= 0; bit -= 3)
+tmp ^= ~(1ULL << bit);
+  return tmp;
+}
+
+unsigned long long
+__attribute__((noipa))
+foo10 (unsigned long long tmp)
+{
+  for (int bit = 0; bit < 64; bit += 3)
+tmp ^= (1ULL << bit);
+  return tmp;
+}
+
+unsigned long long