RE: [PATCH] PR gcc/98350:Handle FMA friendly in reassoc pass

2023-05-24 Thread Cui, Lili via Gcc-patches
> > +rewrite_expr_tree_parallel (gassign *stmt, int width, bool has_fma,
> > +const vec
> > +&ops)
> >  {
> >enum tree_code opcode = gimple_assign_rhs_code (stmt);
> >int op_num = ops.length ();
> > @@ -5483,10 +5494,11 @@ rewrite_expr_tree_parallel (gassign *stmt, int
> width,
> >int stmt_num = op_num - 1;
> >gimple **stmts = XALLOCAVEC (gimple *, stmt_num);
> >int op_index = op_num - 1;
> > -  int stmt_index = 0;
> > -  int ready_stmts_end = 0;
> > -  int i = 0;
> > -  gimple *stmt1 = NULL, *stmt2 = NULL;
> > +  int width_count = width;
> > +  int i = 0, j = 0;
> > +  tree tmp_op[2], op1;
> > +  operand_entry *oe;
> > +  gimple *stmt1 = NULL;
> >tree last_rhs1 = gimple_assign_rhs1 (stmt);
> >
> >/* We start expression rewriting from the top statements.
> > @@ -5496,91 +5508,84 @@ rewrite_expr_tree_parallel (gassign *stmt, int
> width,
> >for (i = stmt_num - 2; i >= 0; i--)
> >  stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
> >
> > -  for (i = 0; i < stmt_num; i++)
> > +  /* Build parallel dependency chain according to width.  */  for (i
> > + = 0; i < width; i++)
> >  {
> > -  tree op1, op2;
> > -
> > -  /* Determine whether we should use results of
> > -already handled statements or not.  */
> > -  if (ready_stmts_end == 0
> > - && (i - stmt_index >= width || op_index < 1))
> > -   ready_stmts_end = i;
> > -
> > -  /* Now we choose operands for the next statement.  Non zero
> > -value in ready_stmts_end means here that we should use
> > -the result of already generated statements as new operand.  */
> > -  if (ready_stmts_end > 0)
> > -   {
> > - op1 = gimple_assign_lhs (stmts[stmt_index++]);
> > - if (ready_stmts_end > stmt_index)
> > -   op2 = gimple_assign_lhs (stmts[stmt_index++]);
> > - else if (op_index >= 0)
> > -   {
> > - operand_entry *oe = ops[op_index--];
> > - stmt2 = oe->stmt_to_insert;
> > - op2 = oe->op;
> > -   }
> > - else
> > -   {
> > - gcc_assert (stmt_index < i);
> > - op2 = gimple_assign_lhs (stmts[stmt_index++]);
> > -   }
> > +  /*   */
> 
> empty comment?

Added it, thanks.

> 
> > +  if (op_index > 1 && !has_fma)
> > +   swap_ops_for_binary_stmt (ops, op_index - 2);
> >
> > - if (stmt_index >= ready_stmts_end)
> > -   ready_stmts_end = 0;
> > -   }
> > -  else
> > +  for (j = 0; j < 2; j++)
> > {
> > - if (op_index > 1)
> > -   swap_ops_for_binary_stmt (ops, op_index - 2);
> > - operand_entry *oe2 = ops[op_index--];
> > - operand_entry *oe1 = ops[op_index--];
> > - op2 = oe2->op;
> > - stmt2 = oe2->stmt_to_insert;
> > - op1 = oe1->op;
> > - stmt1 = oe1->stmt_to_insert;
> > + gcc_assert (op_index >= 0);
> > + oe = ops[op_index--];
> > + tmp_op[j] = oe->op;
> > + /* If the stmt that defines operand has to be inserted, insert it
> > +before the use.  */
> > + stmt1 = oe->stmt_to_insert;
> > + if (stmt1)
> > +   insert_stmt_before_use (stmts[i], stmt1);
> > + stmt1 = NULL;
> > }
> > -
> > -  /* If we emit the last statement then we should put
> > -operands into the last statement.  It will also
> > -break the loop.  */
> > -  if (op_index < 0 && stmt_index == i)
> > -   i = stmt_num - 1;
> > +  stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), tmp_op[1],
> tmp_op[0], opcode);
> > +  gimple_set_visited (stmts[i], true);
> >
> >if (dump_file && (dump_flags & TDF_DETAILS))
> > {
> > - fprintf (dump_file, "Transforming ");
> > + fprintf (dump_file, " into ");
> >   print_gimple_stmt (dump_file, stmts[i], 0);
> > }
> > +}
> >
> > -  /* If the stmt that defines operand has to be inserted, insert it
> > -before the use.  */
> > -  if (stmt1)
> > -   insert_stmt_before_use (stmts[i], stmt1);
> > -  if (stmt2)
> > -   insert_stmt_before_use (stmts[i], stmt2);
> > -  stmt1 = stmt2 = NULL;
> > -
> > -  /* We keep original statement only for the last one.  All
> > -others are recreated.  */
> > -  if (i == stmt_num - 1)
> > +  for (i = width; i < stmt_num; i++)
> > +{
> > +  /* We keep original statement only for the last one.  All others are
> > +recreated.  */
> > +  if ( op_index < 0)
> > {
> > - gimple_assign_set_rhs1 (stmts[i], op1);
> > - gimple_assign_set_rhs2 (stmts[i], op2);
> > - update_stmt (stmts[i]);
> > + if (width_count == 2)
> > +   {
> > +
> > + /* We keep original statement only for the last one.  All
> > +others are recreated.  */
> > + gimple_assign_set_

Re: [PATCH] PR gcc/98350:Handle FMA friendly in reassoc pass

2023-05-22 Thread Richard Biener via Gcc-patches
On Wed, May 17, 2023 at 3:02 PM Cui, Lili  wrote:
>
> From: Lili Cui 
>
> Make some changes in reassoc pass to make it more friendly to fma pass later.
> Using FMA instead of mult + add reduces register pressure and insruction
> retired.
>
> There are mainly two changes
> 1. Put no-mult ops and mult ops alternately at the end of the queue, which is
> conducive to generating more fma and reducing the loss of FMA when breaking
> the chain.
> 2. Rewrite the rewrite_expr_tree_parallel function to try to build parallel
> chains according to the given correlation width, keeping the FMA chance as
> much as possible.
>
> TEST1:
>
> float
> foo (float a, float b, float c, float d, float *e)
> {
>return  *e  + a * b + c * d ;
> }
>
> For "-Ofast -mfpmath=sse -mfma" GCC generates:
> vmulss  %xmm3, %xmm2, %xmm2
> vfmadd132ss %xmm1, %xmm2, %xmm0
> vaddss  (%rdi), %xmm0, %xmm0
> ret
>
> With this patch GCC generates:
> vfmadd213ss   (%rdi), %xmm1, %xmm0
> vfmadd231ss   %xmm2, %xmm3, %xmm0
> ret
>
> TEST2:
>
> for (int i = 0; i < N; i++)
> {
>   a[i] += b[i]* c[i] + d[i] * e[i] + f[i] * g[i] + h[i] * j[i] + k[i] * l[i] 
> + m[i]* o[i] + p[i];
> }
>
> For "-Ofast -mfpmath=sse -mfma"  GCC generates:
> vmovapd e(%rax), %ymm4
> vmulpd  d(%rax), %ymm4, %ymm3
> addq$32, %rax
> vmovapd c-32(%rax), %ymm5
> vmovapd j-32(%rax), %ymm6
> vmulpd  h-32(%rax), %ymm6, %ymm2
> vmovapd a-32(%rax), %ymm6
> vaddpd  p-32(%rax), %ymm6, %ymm0
> vmovapd g-32(%rax), %ymm7
> vfmadd231pd b-32(%rax), %ymm5, %ymm3
> vmovapd o-32(%rax), %ymm4
> vmulpd  m-32(%rax), %ymm4, %ymm1
> vmovapd l-32(%rax), %ymm5
> vfmadd231pd f-32(%rax), %ymm7, %ymm2
> vfmadd231pd k-32(%rax), %ymm5, %ymm1
> vaddpd  %ymm3, %ymm0, %ymm0
> vaddpd  %ymm2, %ymm0, %ymm0
> vaddpd  %ymm1, %ymm0, %ymm0
> vmovapd %ymm0, a-32(%rax)
> cmpq$8192, %rax
> jne .L4
> vzeroupper
> ret
>
> with this patch applied GCC breaks the chain with width = 2 and generates 6 
> fma:
>
> vmovapd a(%rax), %ymm2
> vmovapd c(%rax), %ymm0
> addq$32, %rax
> vmovapd e-32(%rax), %ymm1
> vmovapd p-32(%rax), %ymm5
> vmovapd g-32(%rax), %ymm3
> vmovapd j-32(%rax), %ymm6
> vmovapd l-32(%rax), %ymm4
> vmovapd o-32(%rax), %ymm7
> vfmadd132pd b-32(%rax), %ymm2, %ymm0
> vfmadd132pd d-32(%rax), %ymm5, %ymm1
> vfmadd231pd f-32(%rax), %ymm3, %ymm0
> vfmadd231pd h-32(%rax), %ymm6, %ymm1
> vfmadd231pd k-32(%rax), %ymm4, %ymm0
> vfmadd231pd m-32(%rax), %ymm7, %ymm1
> vaddpd  %ymm1, %ymm0, %ymm0
> vmovapd %ymm0, a-32(%rax)
> cmpq$8192, %rax
> jne .L2
> vzeroupper
> ret
>
> gcc/ChangeLog:
>
> PR gcc/98350
> * tree-ssa-reassoc.cc
> (rewrite_expr_tree_parallel): Rewrite this function.
> (rank_ops_for_fma): New.
> (reassociate_bb): Handle new function.
>
> gcc/testsuite/ChangeLog:
>
> PR gcc/98350
> * gcc.dg/pr98350-1.c: New test.
> * gcc.dg/pr98350-2.c: Ditto.
> ---
>  gcc/testsuite/gcc.dg/pr98350-1.c |  31 
>  gcc/testsuite/gcc.dg/pr98350-2.c |  11 ++
>  gcc/tree-ssa-reassoc.cc  | 256 +--
>  3 files changed, 215 insertions(+), 83 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.dg/pr98350-1.c
>  create mode 100644 gcc/testsuite/gcc.dg/pr98350-2.c
>
> diff --git a/gcc/testsuite/gcc.dg/pr98350-1.c 
> b/gcc/testsuite/gcc.dg/pr98350-1.c
> new file mode 100644
> index 000..185511c5e0a
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/pr98350-1.c
> @@ -0,0 +1,31 @@
> +/* { dg-do compile } */
> +/* { dg-options "-Ofast -mfpmath=sse -mfma -Wno-attributes " } */
> +
> +/* Test that the compiler properly optimizes multiply and add
> +   to generate more FMA instructions.  */
> +#define N 1024
> +double a[N];
> +double b[N];
> +double c[N];
> +double d[N];
> +double e[N];
> +double f[N];
> +double g[N];
> +double h[N];
> +double j[N];
> +double k[N];
> +double l[N];
> +double m[N];
> +double o[N];
> +double p[N];
> +
> +
> +void
> +foo (void)
> +{
> +  for (int i = 0; i < N; i++)
> +  {
> +a[i] += b[i] * c[i] + d[i] * e[i] + f[i] * g[i] + h[i] * j[i] + k[i] * 
> l[i] + m[i]* o[i] + p[i];
> +  }
> +}
> +/* { dg-final { scan-assembler-times "vfm" 6  } } */
> diff --git a/gcc/testsuite/gcc.dg/pr98350-2.c 
> b/gcc/testsuite/gcc.dg/pr98350-2.c
> new file mode 100644
> index 000..b35d88aead9
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/pr98350-2.c
> @@ -0,0 +1,11 @@
> +/* { dg-do compile } */
> +/* { dg-options "-Ofast -mfpmath=sse -mfma -Wno-attributes " } */
> +
> +/* Test that the compiler rearrange the ops to generate more FMA.  */
> +
> +float
> +foo1 (

RE: [PATCH] PR gcc/98350:Handle FMA friendly in reassoc pass

2023-05-18 Thread Cui, Lili via Gcc-patches
Attach CPU2017 3 run results:

On ICX: 
507.cactuBSSN_r: Improved by 1.7% for multi-copy .
503.bwaves_r  : Improved by  0.60% for single copy .
507.cactuBSSN_r : Improved by  1.10% for single copy .
519.lbm_r : Improved by  2.21% for single copy .
no measurable changes for other benchmarks.

On aarch64 
507.cactuBSSN_r: Improved by 1.7% for multi-copy.
503.bwaves_r : Improved by 6.00% for single-copy.
no measurable changes for other benchmarks.

> -Original Message-
> From: Cui, Lili 
> Sent: Wednesday, May 17, 2023 9:02 PM
> To: gcc-patches@gcc.gnu.org
> Cc: richard.guent...@gmail.com; Cui, Lili 
> Subject: [PATCH] PR gcc/98350:Handle FMA friendly in reassoc pass
> 
> From: Lili Cui 
> 
> Make some changes in reassoc pass to make it more friendly to fma pass
> later.
> Using FMA instead of mult + add reduces register pressure and insruction
> retired.
> 
> There are mainly two changes
> 1. Put no-mult ops and mult ops alternately at the end of the queue, which is
> conducive to generating more fma and reducing the loss of FMA when
> breaking the chain.
> 2. Rewrite the rewrite_expr_tree_parallel function to try to build parallel
> chains according to the given correlation width, keeping the FMA chance as
> much as possible.
> 
> TEST1:
> 
> float
> foo (float a, float b, float c, float d, float *e) {
>return  *e  + a * b + c * d ;
> }
> 
> For "-Ofast -mfpmath=sse -mfma" GCC generates:
> vmulss  %xmm3, %xmm2, %xmm2
> vfmadd132ss %xmm1, %xmm2, %xmm0
> vaddss  (%rdi), %xmm0, %xmm0
> ret
> 
> With this patch GCC generates:
> vfmadd213ss   (%rdi), %xmm1, %xmm0
> vfmadd231ss   %xmm2, %xmm3, %xmm0
> ret
> 
> TEST2:
> 
> for (int i = 0; i < N; i++)
> {
>   a[i] += b[i]* c[i] + d[i] * e[i] + f[i] * g[i] + h[i] * j[i] + k[i] * l[i] 
> + m[i]* o[i] +
> p[i]; }
> 
> For "-Ofast -mfpmath=sse -mfma"  GCC generates:
>   vmovapd e(%rax), %ymm4
>   vmulpd  d(%rax), %ymm4, %ymm3
>   addq$32, %rax
>   vmovapd c-32(%rax), %ymm5
>   vmovapd j-32(%rax), %ymm6
>   vmulpd  h-32(%rax), %ymm6, %ymm2
>   vmovapd a-32(%rax), %ymm6
>   vaddpd  p-32(%rax), %ymm6, %ymm0
>   vmovapd g-32(%rax), %ymm7
>   vfmadd231pd b-32(%rax), %ymm5, %ymm3
>   vmovapd o-32(%rax), %ymm4
>   vmulpd  m-32(%rax), %ymm4, %ymm1
>   vmovapd l-32(%rax), %ymm5
>   vfmadd231pd f-32(%rax), %ymm7, %ymm2
>   vfmadd231pd k-32(%rax), %ymm5, %ymm1
>   vaddpd  %ymm3, %ymm0, %ymm0
>   vaddpd  %ymm2, %ymm0, %ymm0
>   vaddpd  %ymm1, %ymm0, %ymm0
>   vmovapd %ymm0, a-32(%rax)
>   cmpq$8192, %rax
>   jne .L4
>   vzeroupper
>   ret
> 
> with this patch applied GCC breaks the chain with width = 2 and generates 6
> fma:
> 
>   vmovapd a(%rax), %ymm2
>   vmovapd c(%rax), %ymm0
>   addq$32, %rax
>   vmovapd e-32(%rax), %ymm1
>   vmovapd p-32(%rax), %ymm5
>   vmovapd g-32(%rax), %ymm3
>   vmovapd j-32(%rax), %ymm6
>   vmovapd l-32(%rax), %ymm4
>   vmovapd o-32(%rax), %ymm7
>   vfmadd132pd b-32(%rax), %ymm2, %ymm0
>   vfmadd132pd d-32(%rax), %ymm5, %ymm1
>   vfmadd231pd f-32(%rax), %ymm3, %ymm0
>   vfmadd231pd h-32(%rax), %ymm6, %ymm1
>   vfmadd231pd k-32(%rax), %ymm4, %ymm0
>   vfmadd231pd m-32(%rax), %ymm7, %ymm1
>   vaddpd  %ymm1, %ymm0, %ymm0
>   vmovapd %ymm0, a-32(%rax)
>   cmpq$8192, %rax
>   jne .L2
>   vzeroupper
>   ret
> 
> gcc/ChangeLog:
> 
>   PR gcc/98350
>   * tree-ssa-reassoc.cc
>   (rewrite_expr_tree_parallel): Rewrite this function.
>   (rank_ops_for_fma): New.
>   (reassociate_bb): Handle new function.
> 
> gcc/testsuite/ChangeLog:
> 
>   PR gcc/98350
>   * gcc.dg/pr98350-1.c: New test.
>   * gcc.dg/pr98350-2.c: Ditto.
> ---
>  gcc/testsuite/gcc.dg/pr98350-1.c |  31   gcc/testsuite/gcc.dg/pr98350-2.c
> |  11 ++
>  gcc/tree-ssa-reassoc.cc  | 256 +--
>  3 files changed, 215 insertions(+), 83 deletions(-)  create mode 100644
> gcc/testsuite/gcc.dg/pr98350-1.c  create mode 100644
> gcc/testsuite/gcc.dg/pr98350-2.c
> 
> diff --git a/gcc/testsuite/gcc.dg/pr98350-1.c b/gcc/testsuite/gcc.dg/pr98350-
> 1.c
> new file mode 100644
> index 000..185511c5e0a
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/pr98350-1.c
> @@ -0,0 +1,31 @@
> +/* { dg-do compile } */
> +/* { dg-options "-Ofast -mfpmath=sse -mfma -Wno-attributes " } */
> +
> +/* Test that the compiler

[PATCH] PR gcc/98350:Handle FMA friendly in reassoc pass

2023-05-17 Thread Cui, Lili via Gcc-patches
From: Lili Cui 

Make some changes in reassoc pass to make it more friendly to fma pass later.
Using FMA instead of mult + add reduces register pressure and insruction
retired.

There are mainly two changes
1. Put no-mult ops and mult ops alternately at the end of the queue, which is
conducive to generating more fma and reducing the loss of FMA when breaking
the chain.
2. Rewrite the rewrite_expr_tree_parallel function to try to build parallel
chains according to the given correlation width, keeping the FMA chance as
much as possible.

TEST1:

float
foo (float a, float b, float c, float d, float *e)
{
   return  *e  + a * b + c * d ;
}

For "-Ofast -mfpmath=sse -mfma" GCC generates:
vmulss  %xmm3, %xmm2, %xmm2
vfmadd132ss %xmm1, %xmm2, %xmm0
vaddss  (%rdi), %xmm0, %xmm0
ret

With this patch GCC generates:
vfmadd213ss   (%rdi), %xmm1, %xmm0
vfmadd231ss   %xmm2, %xmm3, %xmm0
ret

TEST2:

for (int i = 0; i < N; i++)
{
  a[i] += b[i]* c[i] + d[i] * e[i] + f[i] * g[i] + h[i] * j[i] + k[i] * l[i] + 
m[i]* o[i] + p[i];
}

For "-Ofast -mfpmath=sse -mfma"  GCC generates:
vmovapd e(%rax), %ymm4
vmulpd  d(%rax), %ymm4, %ymm3
addq$32, %rax
vmovapd c-32(%rax), %ymm5
vmovapd j-32(%rax), %ymm6
vmulpd  h-32(%rax), %ymm6, %ymm2
vmovapd a-32(%rax), %ymm6
vaddpd  p-32(%rax), %ymm6, %ymm0
vmovapd g-32(%rax), %ymm7
vfmadd231pd b-32(%rax), %ymm5, %ymm3
vmovapd o-32(%rax), %ymm4
vmulpd  m-32(%rax), %ymm4, %ymm1
vmovapd l-32(%rax), %ymm5
vfmadd231pd f-32(%rax), %ymm7, %ymm2
vfmadd231pd k-32(%rax), %ymm5, %ymm1
vaddpd  %ymm3, %ymm0, %ymm0
vaddpd  %ymm2, %ymm0, %ymm0
vaddpd  %ymm1, %ymm0, %ymm0
vmovapd %ymm0, a-32(%rax)
cmpq$8192, %rax
jne .L4
vzeroupper
ret

with this patch applied GCC breaks the chain with width = 2 and generates 6 fma:

vmovapd a(%rax), %ymm2
vmovapd c(%rax), %ymm0
addq$32, %rax
vmovapd e-32(%rax), %ymm1
vmovapd p-32(%rax), %ymm5
vmovapd g-32(%rax), %ymm3
vmovapd j-32(%rax), %ymm6
vmovapd l-32(%rax), %ymm4
vmovapd o-32(%rax), %ymm7
vfmadd132pd b-32(%rax), %ymm2, %ymm0
vfmadd132pd d-32(%rax), %ymm5, %ymm1
vfmadd231pd f-32(%rax), %ymm3, %ymm0
vfmadd231pd h-32(%rax), %ymm6, %ymm1
vfmadd231pd k-32(%rax), %ymm4, %ymm0
vfmadd231pd m-32(%rax), %ymm7, %ymm1
vaddpd  %ymm1, %ymm0, %ymm0
vmovapd %ymm0, a-32(%rax)
cmpq$8192, %rax
jne .L2
vzeroupper
ret

gcc/ChangeLog:

PR gcc/98350
* tree-ssa-reassoc.cc
(rewrite_expr_tree_parallel): Rewrite this function.
(rank_ops_for_fma): New.
(reassociate_bb): Handle new function.

gcc/testsuite/ChangeLog:

PR gcc/98350
* gcc.dg/pr98350-1.c: New test.
* gcc.dg/pr98350-2.c: Ditto.
---
 gcc/testsuite/gcc.dg/pr98350-1.c |  31 
 gcc/testsuite/gcc.dg/pr98350-2.c |  11 ++
 gcc/tree-ssa-reassoc.cc  | 256 +--
 3 files changed, 215 insertions(+), 83 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/pr98350-1.c
 create mode 100644 gcc/testsuite/gcc.dg/pr98350-2.c

diff --git a/gcc/testsuite/gcc.dg/pr98350-1.c b/gcc/testsuite/gcc.dg/pr98350-1.c
new file mode 100644
index 000..185511c5e0a
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr98350-1.c
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-options "-Ofast -mfpmath=sse -mfma -Wno-attributes " } */
+
+/* Test that the compiler properly optimizes multiply and add 
+   to generate more FMA instructions.  */
+#define N 1024
+double a[N];
+double b[N];
+double c[N];
+double d[N];
+double e[N];
+double f[N];
+double g[N];
+double h[N];
+double j[N];
+double k[N];
+double l[N];
+double m[N];
+double o[N];
+double p[N];
+
+
+void
+foo (void)
+{
+  for (int i = 0; i < N; i++)
+  {
+a[i] += b[i] * c[i] + d[i] * e[i] + f[i] * g[i] + h[i] * j[i] + k[i] * 
l[i] + m[i]* o[i] + p[i];
+  }
+}
+/* { dg-final { scan-assembler-times "vfm" 6  } } */
diff --git a/gcc/testsuite/gcc.dg/pr98350-2.c b/gcc/testsuite/gcc.dg/pr98350-2.c
new file mode 100644
index 000..b35d88aead9
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr98350-2.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-Ofast -mfpmath=sse -mfma -Wno-attributes " } */
+
+/* Test that the compiler rearrange the ops to generate more FMA.  */
+
+float
+foo1 (float a, float b, float c, float d, float *e)
+{
+   return   *e + a * b + c * d ;
+}
+/* { dg-final { scan-assembler-times "vfm" 2  } } */
diff --git a/gcc/tree-ssa-reassoc.cc b/gcc/tree-ssa-reassoc.cc
index 067a3f07f7e..52c8aab6033 100644
--- a/gcc/tree-ssa-reassoc.cc
+++ b/gcc/tree-ssa-reassoc.cc
@@ -54,6 +54,7 @@ along with GCC; see the file COPYING3.  If not se