[gcc r15-1796] Small optimization for complex addition, real/imag parts the same

2024-07-02 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:578ccc717937178b50db0e35554da3f77f19e304

commit r15-1796-g578ccc717937178b50db0e35554da3f77f19e304
Author: Andrew Pinski 
Date:   Sun Jun 30 18:39:07 2024 -0700

Small optimization for complex addition, real/imag parts the same

This is just a small optimization for the case where the real and imag
parts are the same when lowering complex addition/subtraction. We only
need to do the addition once when the real and imag parts are the same (on
both sides of the operator). This gets done later on by FRE/PRE/DOM but
having it done soon allows the cabs lowering to remove the sqrt and
just change it to a multiply by a constant.

Bootstrapped and tested on x86_64-linux-gnu.

gcc/ChangeLog:

* tree-complex.cc (expand_complex_addition): If both
operands have the same real and imag parts, only
add the addition once.

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/complex-8.c: New test.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/testsuite/gcc.dg/tree-ssa/complex-8.c | 12 
 gcc/tree-complex.cc   |  7 ++-
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/complex-8.c 
b/gcc/testsuite/gcc.dg/tree-ssa/complex-8.c
new file mode 100644
index 000..a9636ff9e9a
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/complex-8.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O1 -fdump-tree-cplxlower1-raw" } */
+
+_Complex double f(double a, double c)
+{
+  _Complex double d = __builtin_complex (a, a);
+  d+=__builtin_complex(c, c);
+  return d;
+}
+
+/* There should only be one plus as (a+c) is still (a+c) */
+/* { dg-final { scan-tree-dump-times "plus_expr, " 1 "cplxlower1" } } */
diff --git a/gcc/tree-complex.cc b/gcc/tree-complex.cc
index 8a879acffca..dfebec18ec3 100644
--- a/gcc/tree-complex.cc
+++ b/gcc/tree-complex.cc
@@ -984,7 +984,12 @@ expand_complex_addition (gimple_stmt_iterator *gsi, tree 
inner_type,
 case PAIR (VARYING, VARYING):
 general:
   rr = gimple_build (&stmts, loc, code, inner_type, ar, br);
-  ri = gimple_build (&stmts, loc, code, inner_type, ai, bi);
+  /* (a+ai) + (b+bi) -> (a+b)+(a+b)i
+ small optimization to remove one new statement. */
+  if (operand_equal_p (ar, ai) && operand_equal_p (br, bi))
+   ri = rr;
+  else
+   ri = gimple_build (&stmts, loc, code, inner_type, ai, bi);
   break;
 
 default:


[gcc r15-1797] Move cabs expansion from powcabs to complex lowering [PR115710]

2024-07-02 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:d8fe4f05ef448e6a136398d38c0f2170d3c6bb0d

commit r15-1797-gd8fe4f05ef448e6a136398d38c0f2170d3c6bb0d
Author: Andrew Pinski 
Date:   Sun Jun 30 12:57:14 2024 -0700

Move cabs expansion from powcabs to complex lowering [PR115710]

Expanding cabs in powcab might be too late as forwprop might
recombine the load from a memory with the complex expr. Moving
instead to complex lowering allows us to use directly the real/imag
component from the loads instead. This allows for vectorization too.

Bootstrapped and tested on x86_64-linux-gnu with no regressions.

PR tree-optimization/115710

gcc/ChangeLog:

* tree-complex.cc (init_dont_simulate_again): Handle CABS.
(gimple_expand_builtin_cabs): New function, moved mostly
from tree-ssa-math-opts.cc.
(expand_complex_operations_1): Call gimple_expand_builtin_cabs.
* tree-ssa-math-opts.cc (gimple_expand_builtin_cabs): Remove.
(build_and_insert_binop): Remove.
(pass_data_expand_powcabs): Update comment.
(pass_expand_powcabs::execute): Don't handle CABS.

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/cabs-1.c: New test.
* gcc.dg/tree-ssa/cabs-2.c: New test.
* gfortran.dg/vect/pr115710.f90: New test.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/testsuite/gcc.dg/tree-ssa/cabs-1.c  | 14 ++
 gcc/testsuite/gcc.dg/tree-ssa/cabs-2.c  | 13 ++
 gcc/testsuite/gfortran.dg/vect/pr115710.f90 | 18 
 gcc/tree-complex.cc | 68 +++-
 gcc/tree-ssa-math-opts.cc   | 70 +
 5 files changed, 113 insertions(+), 70 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/cabs-1.c 
b/gcc/testsuite/gcc.dg/tree-ssa/cabs-1.c
new file mode 100644
index 000..12ff6049e63
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/cabs-1.c
@@ -0,0 +1,14 @@
+/* { dg-do compile { target sqrt_insn } } */
+/* { dg-options "-Ofast -fdump-tree-cplxlower1" } */
+/* { dg-add-options sqrt_insn } */
+
+
+double f(_Complex double a)
+{
+  a+= 1.0f;
+  return __builtin_cabs(a);
+}
+
+/* Check that cabs is expanded during complex lowering. */
+/* { dg-final { scan-tree-dump-not "__builtin_cabs " "cplxlower1" } } */
+/* { dg-final { scan-tree-dump "__builtin_sqrt " "cplxlower1" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/cabs-2.c 
b/gcc/testsuite/gcc.dg/tree-ssa/cabs-2.c
new file mode 100644
index 000..efe3de90cba
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/cabs-2.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-cplxlower1" } */
+
+
+double f(_Complex double a)
+{
+  a+= 1.0f;
+  return __builtin_cabs(a);
+}
+
+/* Check that cabs is not expanded during complex lowering. */
+/* { dg-final { scan-tree-dump "__builtin_cabs " "cplxlower1" } } */
+/* { dg-final { scan-tree-dump-not "__builtin_sqrt " "cplxlower1" } } */
diff --git a/gcc/testsuite/gfortran.dg/vect/pr115710.f90 
b/gcc/testsuite/gfortran.dg/vect/pr115710.f90
new file mode 100644
index 000..3749210ac80
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/vect/pr115710.f90
@@ -0,0 +1,18 @@
+! { dg-do compile }
+! { dg-additional-options "-Ofast" }
+! { dg-require-effective-target vect_float }
+! { dg-require-effective-target vect_call_sqrtf }
+
+! { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } }
+! CABS expansion should allow for the vectorization to happen.
+
+subroutine foo(a,b,n)
+  complex(kind(1.0))::a(*)
+  real(kind(1.0))::b(*)
+  integer::i,n
+
+  do i=1,n
+ b(i)=abs(a(i))**2
+  end do
+
+end subroutine foo
diff --git a/gcc/tree-complex.cc b/gcc/tree-complex.cc
index dfebec18ec3..d1276dc1c2f 100644
--- a/gcc/tree-complex.cc
+++ b/gcc/tree-complex.cc
@@ -21,6 +21,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "system.h"
 #include "coretypes.h"
 #include "backend.h"
+#include "target.h"
 #include "rtl.h"
 #include "tree.h"
 #include "gimple.h"
@@ -42,7 +43,9 @@ along with GCC; see the file COPYING3.  If not see
 #include "cfganal.h"
 #include "gimple-fold.h"
 #include "diagnostic-core.h"
-
+#include "case-cfn-macros.h"
+#include "builtins.h"
+#include "optabs-tree.h"
 
 /* For each complex ssa name, a lattice value.  We're interested in finding
out whether a complex number is degenerate in some way, having only real
@@ -238,7 +241,18 @@ init_dont_simulate_again (void)
{
case GIMPLE_CALL:
  if (gimple_call_lhs (stmt))
-   sim_again_p = is_complex_reg (gimple_call_lhs (stmt));
+   {
+ sim_again_p = is_complex_reg (gimple_call_lhs (stmt));
+ switch (gimple_call_combined_fn (stmt))
+   {
+   CASE_CFN_CABS:
+ /* Expand cabs only if unsafe math and optimizing. */
+ if (optimize && 

[gcc r15-1798] Add some optimizations to gimple_expand_builtin_cabs

2024-07-02 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:a17ce10c97fa1d08c8d40507b6cf05642b5821fb

commit r15-1798-ga17ce10c97fa1d08c8d40507b6cf05642b5821fb
Author: Andrew Pinski 
Date:   Sun Jun 30 18:21:15 2024 -0700

Add some optimizations to gimple_expand_builtin_cabs

While looking into the original folding code for cabs
(moved to match in r6-4111-gabcc43f5323869), I noticed that
`cabs(x+0i)` was optimized even without the need of sqrt.
I also noticed that now the code generation in this case
will be worse if the target had a sqrt. So let's implement
this small optimizations in gimple_expand_builtin_cabs.
Note `cabs(x+0i)` is done without unsafe math optimizations.
This is because the definition of `cabs(x+0i)` is `hypot(x, 0)`
and the definition in the standard says that just returns `abs(x)`.

Bootstrapped and tested on x86_64-linux-gnu with no regressions.

gcc/ChangeLog:

* tree-complex.cc (gimple_expand_builtin_cabs): Add
`cabs(a+ai)`, `cabs(x+0i)` and `cabs(0+xi)` optimizations.

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/cabs-3.c: New test.
* gcc.dg/tree-ssa/cabs-4.c: New test.
* gcc.dg/tree-ssa/cabs-5.c: New test.
* gcc.dg/tree-ssa/cabs-6.c: New test.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/testsuite/gcc.dg/tree-ssa/cabs-3.c | 24 +++
 gcc/testsuite/gcc.dg/tree-ssa/cabs-4.c | 16 ++
 gcc/testsuite/gcc.dg/tree-ssa/cabs-5.c | 22 +
 gcc/testsuite/gcc.dg/tree-ssa/cabs-6.c | 16 ++
 gcc/tree-complex.cc| 56 +++---
 5 files changed, 123 insertions(+), 11 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/cabs-3.c 
b/gcc/testsuite/gcc.dg/tree-ssa/cabs-3.c
new file mode 100644
index 000..976c0169131
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/cabs-3.c
@@ -0,0 +1,24 @@
+/* { dg-do compile { target sqrt_insn } } */
+/* { dg-options "-Ofast -fdump-tree-cplxlower1" } */
+/* { dg-add-options sqrt_insn } */
+
+
+double f(double a, double c)
+{
+  _Complex double b = a;
+  b+= c;
+  return __builtin_cabs(b);
+}
+
+double f1(double a, double c)
+{
+  _Complex double b = __builtin_complex(0.0, a);
+  b+= __builtin_complex(0.0, c);
+  return __builtin_cabs(b);
+}
+
+/* Check that cabs is expanded during complex lowering. */
+/* { dg-final { scan-tree-dump-not "__builtin_cabs " "cplxlower1" } } */
+/* { dg-final { scan-tree-dump-not "__builtin_sqrt " "cplxlower1" } } */
+/* { dg-final { scan-tree-dump-times "ABS_EXPR <" 2 "cplxlower1" } } */
+
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/cabs-4.c 
b/gcc/testsuite/gcc.dg/tree-ssa/cabs-4.c
new file mode 100644
index 000..00aa3c9d4e7
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/cabs-4.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-Ofast -fdump-tree-cplxlower1" } */
+
+double f(double a, double c)
+{
+  _Complex double d = __builtin_complex (a, a);
+  d+=__builtin_complex(1.0, 1.0);
+  return __builtin_cabs(d);
+}
+
+/* Check that cabs is expanded during complex lowering and there is no sqrt 
(since it is a constant). */
+/* { dg-final { scan-tree-dump-not "__builtin_cabs " "cplxlower1" } } */
+/* { dg-final { scan-tree-dump-not "__builtin_sqrt " "cplxlower1" } } */
+/* { dg-final { scan-tree-dump-times "ABS_EXPR <" 1 "cplxlower1" } } */
+
+
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/cabs-5.c 
b/gcc/testsuite/gcc.dg/tree-ssa/cabs-5.c
new file mode 100644
index 000..dd794079921
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/cabs-5.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-cplxlower1" } */
+
+double f(double a, double c)
+{
+  _Complex double b = a;
+  b+= c;
+  return __builtin_cabs(b);
+}
+
+double f1(double a, double c)
+{
+  _Complex double b = __builtin_complex(0.0, a);
+  b+= __builtin_complex(0.0, c);
+  return __builtin_cabs(b);
+}
+
+/* Check that cabs is expanded into ABS for both f and f1 during complex 
lowering. */
+/* { dg-final { scan-tree-dump-not "__builtin_cabs " "cplxlower1" } } */
+/* { dg-final { scan-tree-dump-not "__builtin_sqrt " "cplxlower1" } } */
+/* { dg-final { scan-tree-dump-times "ABS_EXPR <" 2 "cplxlower1" } } */
+
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/cabs-6.c 
b/gcc/testsuite/gcc.dg/tree-ssa/cabs-6.c
new file mode 100644
index 000..bc88932449c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/cabs-6.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-Ofast -fdump-tree-cplxlower1" } */
+
+double f(double a, double c)
+{
+  _Complex double d = __builtin_complex (a, 0.0);
+  d+=__builtin_complex(0.0, a);
+  return __builtin_cabs(d);
+}
+
+/* Check that cabs is expanded during complex lowering and there is no sqrt 
(since it is a constant). */
+/* { dg-final { scan-tree-dump-not "__builtin_cabs " "cplxlower1" } } */
+/* { dg-final { scan-tree-dump-not "__builtin_sqrt " "cplxlower1" } } */
+/* { dg-final { scan-tre

[gcc r15-1799] Rename expand_powcabs pass to expand_pow

2024-07-02 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:36852a189a1dce1dbbc9a62cc433e890c1b95be1

commit r15-1799-g36852a189a1dce1dbbc9a62cc433e890c1b95be1
Author: Andrew Pinski 
Date:   Sun Jun 30 21:17:00 2024 +

Rename expand_powcabs pass to expand_pow

Since cabs expansion was removed from this pass,
it would be good to rename it.

Bootstrapped and tested on x86_64-linux-gnu

gcc/ChangeLog:

* passes.def (expand_pow): Renamed from expand_powcabs.
* timevar.def (TV_TREE_POWCABS): Remove.
(TV_TREE_POW): Add
* tree-pass.h (make_pass_expand_powcabs): Rename to ...
(make_pass_expand_pow): This.
* tree-ssa-math-opts.cc (class pass_expand_powcabs): Rename to ...
(class pass_expand_pow): This.
(pass_expand_powcabs::execute): Rename to ...
(pass_expand_pow::execute): This.
(make_pass_expand_powcabs): Rename to ...
(make_pass_expand_pow): This.

gcc/testsuite/ChangeLog:

* gcc.dg/pow-sqrt-synth-1.c: Update testcase for renamed pass.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/passes.def  |  2 +-
 gcc/testsuite/gcc.dg/pow-sqrt-synth-1.c |  4 ++--
 gcc/timevar.def |  2 +-
 gcc/tree-pass.h |  2 +-
 gcc/tree-ssa-math-opts.cc   | 25 -
 5 files changed, 17 insertions(+), 18 deletions(-)

diff --git a/gcc/passes.def b/gcc/passes.def
index 13c9dc34ddf..b8c21b1e435 100644
--- a/gcc/passes.def
+++ b/gcc/passes.def
@@ -265,7 +265,7 @@ along with GCC; see the file COPYING3.  If not see
   NEXT_PASS (pass_ccp, true /* nonzero_p */);
   /* After CCP we rewrite no longer addressed locals into SSA
 form if possible.  */
-  NEXT_PASS (pass_expand_powcabs);
+  NEXT_PASS (pass_expand_pow);
   NEXT_PASS (pass_optimize_bswap);
   NEXT_PASS (pass_laddress);
   NEXT_PASS (pass_lim);
diff --git a/gcc/testsuite/gcc.dg/pow-sqrt-synth-1.c 
b/gcc/testsuite/gcc.dg/pow-sqrt-synth-1.c
index 484b29a8fc8..be81e43ad9a 100644
--- a/gcc/testsuite/gcc.dg/pow-sqrt-synth-1.c
+++ b/gcc/testsuite/gcc.dg/pow-sqrt-synth-1.c
@@ -1,5 +1,5 @@
 /* { dg-do compile { target sqrt_insn } } */
-/* { dg-options "-fdump-tree-powcabs -Ofast --param max-pow-sqrt-depth=8" } */
+/* { dg-options "-fdump-tree-pow -Ofast --param max-pow-sqrt-depth=8" } */
 /* { dg-additional-options "-mfloat-abi=softfp -mfpu=neon-vfpv4" { target 
arm*-*-* } } */
 
 double
@@ -34,4 +34,4 @@ vecfoo (double *a)
 a[i] = __builtin_pow (a[i], 1.25);
 }
 
-/* { dg-final { scan-tree-dump-times "synthesizing" 7 "powcabs" } } */
+/* { dg-final { scan-tree-dump-times "synthesizing" 7 "pow" } } */
diff --git a/gcc/timevar.def b/gcc/timevar.def
index 6fc36859138..0f9d2c0b032 100644
--- a/gcc/timevar.def
+++ b/gcc/timevar.def
@@ -223,7 +223,7 @@ DEFTIMEVAR (TV_TREE_SWITCH_CONVERSION, "tree switch 
conversion")
 DEFTIMEVAR (TV_TREE_SWITCH_LOWERING,   "tree switch lowering")
 DEFTIMEVAR (TV_TREE_RECIP, "gimple CSE reciprocals")
 DEFTIMEVAR (TV_TREE_SINCOS   , "gimple CSE sin/cos")
-DEFTIMEVAR (TV_TREE_POWCABS   , "gimple expand pow/cabs")
+DEFTIMEVAR (TV_TREE_POW  , "gimple expand pow")
 DEFTIMEVAR (TV_TREE_WIDEN_MUL, "gimple widening/fma detection")
 DEFTIMEVAR (TV_TRANS_MEM , "transactional memory")
 DEFTIMEVAR (TV_TREE_STRLEN   , "tree strlen optimization")
diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
index 38902b1b01b..9843d189d27 100644
--- a/gcc/tree-pass.h
+++ b/gcc/tree-pass.h
@@ -451,7 +451,7 @@ extern gimple_opt_pass *make_pass_early_warn_uninitialized 
(gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_late_warn_uninitialized (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_cse_reciprocals (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_cse_sincos (gcc::context *ctxt);
-extern gimple_opt_pass *make_pass_expand_powcabs (gcc::context *ctxt);
+extern gimple_opt_pass *make_pass_expand_pow (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_optimize_bswap (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_store_merging (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_optimize_widening_mul (gcc::context *ctxt);
diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
index 71f896a9790..a35caf5f058 100644
--- a/gcc/tree-ssa-math-opts.cc
+++ b/gcc/tree-ssa-math-opts.cc
@@ -2272,17 +2272,16 @@ make_pass_cse_sincos (gcc::context *ctxt)
   return new pass_cse_sincos (ctxt);
 }
 
-/* Expand powi(x,n) into an optimal number of multiplies, when n is a constant.
-   Note the name is powcabs but cabs expansion was moved to the lower complex
-   pass.  */
+/* Expand powi(x,n) into an optimal number of multiplies, when n is a
+   constant.  */
 namespace {
 
-const pass_data pass_data_expand_powcabs =
+const pass_data pass_data_expand_pow =
 {
   GIMPLE_PASS, /* type */
-  "po

[gcc r15-1800] aarch64: Add testcase for vectorconvert lowering [PR110473]

2024-07-02 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:a7ad9cb813063ddf51269910f33b56116c10462c

commit r15-1800-ga7ad9cb813063ddf51269910f33b56116c10462c
Author: Andrew Pinski 
Date:   Tue Jul 2 15:02:17 2024 -0700

aarch64: Add testcase for vectorconvert lowering [PR110473]

Vectorconvert lowering was changed to use the convert optab directly
starting in r15-1677-gc320a7efcd35ba. I had filed an aarch64 specific
issue for this specific thing and it would make sense to add an aarch64
specific testcase instead of just having a x86_64 specific ones for
this.

Pushed as obvious after testing for aarch64-linux-gnu.

PR tree-optimization/110473
PR tree-optimization/107432

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/vect-convert-1.c: New test.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/testsuite/gcc.target/aarch64/vect-convert-1.c | 20 
 1 file changed, 20 insertions(+)

diff --git a/gcc/testsuite/gcc.target/aarch64/vect-convert-1.c 
b/gcc/testsuite/gcc.target/aarch64/vect-convert-1.c
new file mode 100644
index 000..74b3f5095e2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vect-convert-1.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-veclower2" } */
+/* PR tree-optimization/110473 */
+/* convertvector lowering should produce directly the casts
+   rather than lower them to scalar.  */
+
+typedef unsigned int v4si __attribute__ ((vector_size (4*sizeof(int;
+typedef unsigned short v4hi __attribute__ ((vector_size (4*sizeof(short;
+
+v4si f(v4si a, v4si b)
+{
+  v4hi t = __builtin_convertvector (a, v4hi);
+  v4si t1 = __builtin_convertvector (t, v4si);
+  return t1;
+}
+
+/* { dg-final { scan-assembler-times "\txtn\t" 1 } } */
+/* { dg-final { scan-assembler-times "\tuxtl\t" 1 } } */
+/* { dg-final { scan-tree-dump-times " = .v4hi. a_" 1 "veclower21" } } */
+/* { dg-final { scan-tree-dump-times " = .v4si. " 1 "veclower21" } } */


[gcc r15-1933] Remove expanding complex EQ/NE inside a GIMPLE_RETURN [PR115721]

2024-07-09 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:8f8bddb58ad7746b2a19c43e32641d161fa15caf

commit r15-1933-g8f8bddb58ad7746b2a19c43e32641d161fa15caf
Author: Andrew Pinski 
Date:   Tue Jul 9 14:00:34 2024 -0700

Remove expanding complex EQ/NE inside a GIMPLE_RETURN [PR115721]

This code has been dead at least since the move over to tuples
in 0-88576-g726a989a8b74bf, when gimple returns could only have
a simple expression in it. So let's remove it.

Bootstrapped and tested on x86_64-linux-gnu with no regressions.

gcc/ChangeLog:

PR tree-optimization/115721
* tree-complex.cc (expand_complex_comparison): Remove
support for GIMPLE_RETURN.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/tree-complex.cc | 8 
 1 file changed, 8 deletions(-)

diff --git a/gcc/tree-complex.cc b/gcc/tree-complex.cc
index d2d9d76827db..dfb45b9d91cf 100644
--- a/gcc/tree-complex.cc
+++ b/gcc/tree-complex.cc
@@ -1622,14 +1622,6 @@ expand_complex_comparison (gimple_stmt_iterator *gsi, 
tree ar, tree ai,
 
   switch (gimple_code (stmt))
 {
-case GIMPLE_RETURN:
-  {
-   greturn *return_stmt = as_a  (stmt);
-   type = TREE_TYPE (gimple_return_retval (return_stmt));
-   gimple_return_set_retval (return_stmt, fold_convert (type, cc));
-  }
-  break;
-
 case GIMPLE_ASSIGN:
   type = TREE_TYPE (gimple_assign_lhs (stmt));
   gimple_assign_set_rhs_from_tree (gsi, fold_convert (type, cc));


[gcc r15-1934] testsuite: Allow matching `{_1, { 0, 0, 0, 0 }}` for vect/slp-gap-1.c

2024-07-09 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:6fce4664d4a2e44843bd1464930696c819906d0f

commit r15-1934-g6fce4664d4a2e44843bd1464930696c819906d0f
Author: Andrew Pinski 
Date:   Tue Jul 9 17:13:24 2024 -0700

testsuite: Allow matching `{_1, { 0,0,0,0 }}` for vect/slp-gap-1.c

While working on adding V4QI support to the aarch64 backend,
vect/slp-gap-1.c started to fail but only because the regex
was failing. Before it was loading use SI (int) and afterwards,
we started to use V4QI. The generated code was the same and the
generated gimple was almost the same. The regex was searching
for `zero-padding trick` and it was still doing that but instead
of directly 0, it was V4QI 0 (or rather `{ 0, 0, 0 }`).
This extends regex to support both.

Tested on x86_64-linux-gnu and aarch64-linux-gnu (with the support added).

gcc/testsuite/ChangeLog:

* gcc.dg/vect/slp-gap-1.c: Support matching `{_1, { 0, 0, 0, 0 }}`
in addition to `{_1, 0}`.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/testsuite/gcc.dg/vect/slp-gap-1.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/vect/slp-gap-1.c 
b/gcc/testsuite/gcc.dg/vect/slp-gap-1.c
index 9856da7a7f48..74bde8219d9a 100644
--- a/gcc/testsuite/gcc.dg/vect/slp-gap-1.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-gap-1.c
@@ -14,5 +14,7 @@ void pixel_sub_wxh(int16_t * __restrict diff, uint8_t *pix1, 
uint8_t *pix2) {
 
 /* We can vectorize this without peeling for gaps and thus without epilogue,
but the only thing we can reliably scan is the zero-padding trick for the
-   partial loads.  */
-/* { dg-final { scan-tree-dump-times "\{_\[0-9\]\+, 0" 6 "vect" { target { 
vect64 && { vect_unpack && vect_perm } } } } } */
+   partial loads.  
+   Note this will match `{_1, 0}` or `{_1, {0, 0, 0, 0}}`. Both are the same
+   in the end, the difference is the load is either via SI or V4QI. */
+/* { dg-final { scan-tree-dump-times "\{_\[0-9\]\+, (?:0\|{ 0(?:, 0)\+ )}" 6 
"vect" { target { vect64 && { vect_unpack && vect_perm } } } } } */


[gcc r15-1975] Ranger: Mark a few classes as final

2024-07-11 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:4e0aa05f67cf79729c2a8c20236de885f2e393d1

commit r15-1975-g4e0aa05f67cf79729c2a8c20236de885f2e393d1
Author: Andrew Pinski 
Date:   Fri Jun 21 21:07:26 2024 -0700

Ranger: Mark a few classes as final

I noticed there was a warning from clang about int_range's
dtor being marked as final saying the class cannot be inherited from.
So let's mark the few ranger classes as final for those which we know
will be final.

Bootstrapped and tested on x86_64-linux-gnu.

gcc/ChangeLog:

* value-range.h (class int_range): Mark as final.
(class prange): Likewise.
(class frange): Likewise.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/value-range.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/gcc/value-range.h b/gcc/value-range.h
index 4a8d69f34084..334ea1bc338c 100644
--- a/gcc/value-range.h
+++ b/gcc/value-range.h
@@ -363,7 +363,7 @@ protected:
 // HARD_MAX_RANGES.  This new storage is freed upon destruction.
 
 template
-class int_range : public irange
+class int_range final : public irange
 {
 public:
   int_range ();
@@ -380,7 +380,7 @@ private:
   wide_int m_ranges[N*2];
 };
 
-class prange : public vrange
+class prange final : public vrange
 {
   friend class prange_storage;
   friend class vrange_printer;
@@ -523,7 +523,7 @@ nan_state::neg_p () const
 // The representation is a type with a couple of endpoints, unioned
 // with the set of { -NAN, +Nan }.
 
-class frange : public vrange
+class frange final : public vrange
 {
   friend class frange_storage;
   friend class vrange_printer;


[gcc r15-2105] Add debug counter for ext_dce

2024-07-17 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:7c3287f3613210d4f98c8095bc739bea6582bfbb

commit r15-2105-g7c3287f3613210d4f98c8095bc739bea6582bfbb
Author: Andrew Pinski 
Date:   Tue Jul 16 09:53:20 2024 -0700

Add debug counter for ext_dce

Like r15-1610-gb6215065a5b143 (which adds one for late_combine),
adding one for ext_dce is useful to debug some issues with this pass.

Bootstrapped and tested on x86_64-linux-gnu with no regressions.

gcc/ChangeLog:

* dbgcnt.def (ext_dce): New debug counter.
* ext-dce.cc (ext_dce_try_optimize_insn): Reject the insn
if the debug counter says so.
(ext_dce): Rename to ...
(ext_dce_execute): This.
(pass_ext_dce::execute): Update for the name of ext_dce.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/dbgcnt.def |  1 +
 gcc/ext-dce.cc | 16 +---
 2 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/gcc/dbgcnt.def b/gcc/dbgcnt.def
index e0b9b1b2a762..ac1f87098493 100644
--- a/gcc/dbgcnt.def
+++ b/gcc/dbgcnt.def
@@ -162,6 +162,7 @@ DEBUG_COUNTER (dom_unreachable_edges)
 DEBUG_COUNTER (dse)
 DEBUG_COUNTER (dse1)
 DEBUG_COUNTER (dse2)
+DEBUG_COUNTER (ext_dce)
 DEBUG_COUNTER (form_fma)
 DEBUG_COUNTER (gcse2_delete)
 DEBUG_COUNTER (gimple_unroll)
diff --git a/gcc/ext-dce.cc b/gcc/ext-dce.cc
index 6c961feee635..6d4b8858ec63 100644
--- a/gcc/ext-dce.cc
+++ b/gcc/ext-dce.cc
@@ -33,6 +33,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "rtl-iter.h"
 #include "df.h"
 #include "print-rtl.h"
+#include "dbgcnt.h"
 
 /* These should probably move into a C++ class.  */
 static vec livein;
@@ -312,6 +313,15 @@ ext_dce_try_optimize_insn (rtx_insn *insn, rtx set)
   print_rtl_single (dump_file, SET_SRC (set));
 }
 
+  /* We decided to turn do the optimization but allow it to be rejected for
+ bisection purposes.  */
+  if (!dbg_cnt (::ext_dce))
+{
+  if (dump_file)
+   fprintf (dump_file, "Rejected due to debug counter.\n");
+  return;
+}
+
   new_pattern = simplify_gen_subreg (GET_MODE (src), inner,
 GET_MODE (inner), 0);
   /* simplify_gen_subreg may fail in which case NEW_PATTERN will be NULL.
@@ -881,8 +891,8 @@ static bool ext_dce_rd_confluence_n (edge) { return true; }
are never read.  Turn such extensions into SUBREGs instead which
can often be propagated away.  */
 
-static void
-ext_dce (void)
+void
+ext_dce_execute (void)
 {
   df_analyze ();
   ext_dce_init ();
@@ -929,7 +939,7 @@ public:
   virtual bool gate (function *) { return flag_ext_dce && optimize > 0; }
   virtual unsigned int execute (function *)
 {
-  ext_dce ();
+  ext_dce_execute ();
   return 0;
 }


[gcc r15-2106] MATCH: Simplify (a ? x : y) eq/ne (b ? x : y) [PR111150]

2024-07-17 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:44fcc1ca11e7ea35dc9fb25a5317346bc1eaf7b2

commit r15-2106-g44fcc1ca11e7ea35dc9fb25a5317346bc1eaf7b2
Author: Eikansh Gupta 
Date:   Wed May 22 23:28:48 2024 +0530

MATCH: Simplify (a ? x : y) eq/ne (b ? x : y) [PR50]

This patch adds match pattern for `(a ? x : y) eq/ne (b ? x : y)`.
In forwprop1 pass, depending on the type of `a` and `b`, GCC produces
`vec_cond` or `cond_expr`. Based on the observation that `(x != y)` is
TRUE, the pattern can be optimized to produce `(a^b ? TRUE : FALSE)`.

The patch adds match pattern for a, b:
(a ? x : y) != (b ? x : y) --> (a^b) ? TRUE  : FALSE
(a ? x : y) == (b ? x : y) --> (a^b) ? FALSE : TRUE
(a ? x : y) != (b ? y : x) --> (a^b) ? TRUE  : FALSE
(a ? x : y) == (b ? y : x) --> (a^b) ? FALSE : TRUE

PR tree-optimization/50

gcc/ChangeLog:

* match.pd (`(a ? x : y) eq/ne (b ? x : y)`): New pattern.
(`(a ? x : y) eq/ne (b ? y : x)`): New pattern.

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/pr50.c: New test.
* gcc.dg/tree-ssa/pr50-1.c: New test.
* g++.dg/tree-ssa/pr50.C: New test.

Signed-off-by: Eikansh Gupta 

Diff:
---
 gcc/match.pd   | 15 +++
 gcc/testsuite/g++.dg/tree-ssa/pr50.C   | 33 ++
 gcc/testsuite/gcc.dg/tree-ssa/pr50-1.c | 72 ++
 gcc/testsuite/gcc.dg/tree-ssa/pr50.c   | 22 +
 4 files changed, 142 insertions(+)

diff --git a/gcc/match.pd b/gcc/match.pd
index 24a0bbead3e7..5cb399b87180 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -5586,6 +5586,21 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   (vec_cond (bit_and (bit_not @0) @1) @2 @3)))
 #endif
 
+/* (a ? x : y) != (b ? x : y) --> (a^b) ? TRUE  : FALSE */
+/* (a ? x : y) == (b ? x : y) --> (a^b) ? FALSE : TRUE  */
+/* (a ? x : y) != (b ? y : x) --> (a^b) ? FALSE : TRUE  */
+/* (a ? x : y) == (b ? y : x) --> (a^b) ? TRUE  : FALSE */
+(for cnd (cond vec_cond)
+ (for eqne (eq ne)
+  (simplify
+   (eqne:c (cnd @0 @1 @2) (cnd @3 @1 @2))
+(cnd (bit_xor @0 @3) { constant_boolean_node (eqne == NE_EXPR, type); }
+ { constant_boolean_node (eqne != NE_EXPR, type); }))
+  (simplify
+   (eqne:c (cnd @0 @1 @2) (cnd @3 @2 @1))
+(cnd (bit_xor @0 @3) { constant_boolean_node (eqne != NE_EXPR, type); }
+ { constant_boolean_node (eqne == NE_EXPR, type); }
+
 /* Canonicalize mask ? { 0, ... } : { -1, ...} to ~mask if the mask
types are compatible.  */
 (simplify
diff --git a/gcc/testsuite/g++.dg/tree-ssa/pr50.C 
b/gcc/testsuite/g++.dg/tree-ssa/pr50.C
new file mode 100644
index ..ca02d8dc51e3
--- /dev/null
+++ b/gcc/testsuite/g++.dg/tree-ssa/pr50.C
@@ -0,0 +1,33 @@
+/* PR tree-optimization/50 */
+/* { dg-do compile } */
+/* { dg-options "-O1 -fdump-tree-forwprop1" } */
+typedef int v4si __attribute((__vector_size__(4 * sizeof(int;
+
+/* Before the patch, VEC_COND_EXPR was generated for each statement in the
+   function. This resulted in 3 VEC_COND_EXPR. */
+v4si f1_(v4si a, v4si b, v4si c, v4si d, v4si e, v4si f) {
+  v4si X = a == b ? e : f;
+  v4si Y = c == d ? e : f;
+  return (X != Y);
+}
+
+v4si f2_(v4si a, v4si b, v4si c, v4si d, v4si e, v4si f) {
+  v4si X = a == b ? e : f;
+  v4si Y = c == d ? e : f;
+  return (X == Y);
+}
+
+v4si f3_(v4si a, v4si b, v4si c, v4si d, v4si e, v4si f) {
+  v4si X = a == b ? e : f;
+  v4si Y = c == d ? f : e;
+  return (X != Y);
+}
+
+v4si f4_(v4si a, v4si b, v4si c, v4si d, v4si e, v4si f) {
+  v4si X = a == b ? e : f;
+  v4si Y = c == d ? f : e;
+  return (X == Y);
+}
+
+/* For each testcase, should produce only one VEC_COND_EXPR for X^Y. */
+/* { dg-final { scan-tree-dump-times " VEC_COND_EXPR " 4 "forwprop1" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr50-1.c 
b/gcc/testsuite/gcc.dg/tree-ssa/pr50-1.c
new file mode 100644
index ..6f4b21ac6bcb
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr50-1.c
@@ -0,0 +1,72 @@
+/* PR tree-optimization/50 */
+/* { dg-do compile } */
+/* { dg-options "-O1 -fgimple -fdump-tree-forwprop1-raw" } */
+
+/* Checks if pattern (X ? e : f) == (Y ? e : f) gets optimized. */
+__GIMPLE()
+_Bool f1_(int a, int b, int c, int d, int e, int f) {
+  _Bool X;
+  _Bool Y;
+  _Bool t;
+  int t1;
+  int t2;
+  X = a == b;
+  Y = c == d;
+  /* Before the patch cond_expr was generated for these 2 statements. */
+  t1 = X ? e : f;
+  t2 = Y ? e : f;
+  t = t1 == t2;
+  return t;
+}
+
+/* Checks if pattern (X ? e : f) != (Y ? e : f) gets optimized. */
+__GIMPLE()
+_Bool f2_(int a, int b, int c, int d, int e, int f) {
+  _Bool X;
+  _Bool Y;
+  _Bool t;
+  int t1;
+  int t2;
+  X = a == b;
+  Y = c == d;
+  t1 = X ? e : f;
+  t2 = Y ? e : f;
+  t = t1 != t2;
+  return t;
+}
+
+/* Checks if pattern (X ? e : f) == (Y ? f : e) gets optimized. */
+__GIMPLE()
+_Bool f3_(int a, int b, int c, int d, int e, int f) 

[gcc r15-2258] optabs/rs6000: Rename iorc and andc to iorn and andn

2024-07-24 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:23f195b06ad759939805a36646c724bccc3f3984

commit r15-2258-g23f195b06ad759939805a36646c724bccc3f3984
Author: Andrew Pinski 
Date:   Mon Jul 22 21:23:38 2024 -0700

optabs/rs6000: Rename iorc and andc to iorn and andn

When I was trying to add an scalar version of iorc and andc, the optab that
got matched was for and/ior with the mode of csi and cdi instead of iorc and
andc optabs for si and di modes. Since csi/cdi are the complex integer 
modes,
we need to rename the optabs to be without c there. This changes c to n 
which
is a neutral and known not to be first letter of a mode.

Bootstrapped and tested on x86_64 and powerpc64le.

gcc/ChangeLog:

* config/rs6000/rs6000-builtins.def: s/iorc/iorn/. s/andc/andn/
for the code.
* config/rs6000/rs6000-string.cc (expand_cmp_vec_sequence): Update
to iorn.
* config/rs6000/rs6000.md (andc3): Rename to ...
(andn3): This.
(iorc3): Rename to ...
(iorn3): This.
* doc/md.texi: Update documentation for the rename.
* internal-fn.def (BIT_ANDC): Rename to ...
(BIT_ANDN): This.
(BIT_IORC): Rename to ...
(BIT_IORN): This.
* optabs.def (andc_optab): Rename to ...
(andn_optab): This.
(iorc_optab): Rename to ...
(iorn_optab): This.
* gimple-isel.cc (gimple_expand_vec_cond_expr): Update for the
renamed internal functions, ANDC/IORC to ANDN/IORN.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/config/rs6000/rs6000-builtins.def | 44 +--
 gcc/config/rs6000/rs6000-string.cc|  2 +-
 gcc/config/rs6000/rs6000.md   |  4 ++--
 gcc/doc/md.texi   |  8 +++
 gcc/gimple-isel.cc| 12 +-
 gcc/internal-fn.def   |  4 ++--
 gcc/optabs.def| 10 +---
 7 files changed, 44 insertions(+), 40 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-builtins.def 
b/gcc/config/rs6000/rs6000-builtins.def
index 77eb0f7e4069..ffbeff64d6d6 100644
--- a/gcc/config/rs6000/rs6000-builtins.def
+++ b/gcc/config/rs6000/rs6000-builtins.def
@@ -518,25 +518,25 @@
 VAND_V8HI_UNS andv8hi3 {}
 
   const vsc __builtin_altivec_vandc_v16qi (vsc, vsc);
-VANDC_V16QI andcv16qi3 {}
+VANDC_V16QI andnv16qi3 {}
 
   const vuc __builtin_altivec_vandc_v16qi_uns (vuc, vuc);
-VANDC_V16QI_UNS andcv16qi3 {}
+VANDC_V16QI_UNS andnv16qi3 {}
 
   const vf __builtin_altivec_vandc_v4sf (vf, vf);
-VANDC_V4SF andcv4sf3 {}
+VANDC_V4SF andnv4sf3 {}
 
   const vsi __builtin_altivec_vandc_v4si (vsi, vsi);
-VANDC_V4SI andcv4si3 {}
+VANDC_V4SI andnv4si3 {}
 
   const vui __builtin_altivec_vandc_v4si_uns (vui, vui);
-VANDC_V4SI_UNS andcv4si3 {}
+VANDC_V4SI_UNS andnv4si3 {}
 
   const vss __builtin_altivec_vandc_v8hi (vss, vss);
-VANDC_V8HI andcv8hi3 {}
+VANDC_V8HI andnv8hi3 {}
 
   const vus __builtin_altivec_vandc_v8hi_uns (vus, vus);
-VANDC_V8HI_UNS andcv8hi3 {}
+VANDC_V8HI_UNS andnv8hi3 {}
 
   const vsc __builtin_altivec_vavgsb (vsc, vsc);
 VAVGSB avgv16qi3_ceil {}
@@ -1189,13 +1189,13 @@
 VAND_V2DI_UNS andv2di3 {}
 
   const vd __builtin_altivec_vandc_v2df (vd, vd);
-VANDC_V2DF andcv2df3 {}
+VANDC_V2DF andnv2df3 {}
 
   const vsll __builtin_altivec_vandc_v2di (vsll, vsll);
-VANDC_V2DI andcv2di3 {}
+VANDC_V2DI andnv2di3 {}
 
   const vull __builtin_altivec_vandc_v2di_uns (vull, vull);
-VANDC_V2DI_UNS andcv2di3 {}
+VANDC_V2DI_UNS andnv2di3 {}
 
   const vd __builtin_altivec_vnor_v2df (vd, vd);
 VNOR_V2DF norv2df3 {}
@@ -1975,40 +1975,40 @@
 NEG_V2DI negv2di2 {}
 
   const vsc __builtin_altivec_orc_v16qi (vsc, vsc);
-ORC_V16QI iorcv16qi3 {}
+ORC_V16QI iornv16qi3 {}
 
   const vuc __builtin_altivec_orc_v16qi_uns (vuc, vuc);
-ORC_V16QI_UNS iorcv16qi3 {}
+ORC_V16QI_UNS iornv16qi3 {}
 
   const vsq __builtin_altivec_orc_v1ti (vsq, vsq);
-ORC_V1TI iorcv1ti3 {}
+ORC_V1TI iornv1ti3 {}
 
   const vuq __builtin_altivec_orc_v1ti_uns (vuq, vuq);
-ORC_V1TI_UNS iorcv1ti3 {}
+ORC_V1TI_UNS iornv1ti3 {}
 
   const vd __builtin_altivec_orc_v2df (vd, vd);
-ORC_V2DF iorcv2df3 {}
+ORC_V2DF iornv2df3 {}
 
   const vsll __builtin_altivec_orc_v2di (vsll, vsll);
-ORC_V2DI iorcv2di3 {}
+ORC_V2DI iornv2di3 {}
 
   const vull __builtin_altivec_orc_v2di_uns (vull, vull);
-ORC_V2DI_UNS iorcv2di3 {}
+ORC_V2DI_UNS iornv2di3 {}
 
   const vf __builtin_altivec_orc_v4sf (vf, vf);
-ORC_V4SF iorcv4sf3 {}
+ORC_V4SF iornv4sf3 {}
 
   const vsi __builtin_altivec_orc_v4si (vsi, vsi);
-ORC_V4SI iorcv4si3 {}
+ORC_V4SI iornv4si3 {}
 
   const vui __builtin_altivec_orc_v4si_uns (vui, vui);
-ORC_V4SI_UNS iorcv4si3 {}
+ORC_V4SI_UNS iornv4si3 {}
 
   const vss __builtin_altivec_orc_v8hi (vss

[gcc r15-2341] isel: Move duplicate comparisons to its own function

2024-07-26 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:9e7b2ad4abae69e6348220b7c5ad2fb8e3d52c83

commit r15-2341-g9e7b2ad4abae69e6348220b7c5ad2fb8e3d52c83
Author: Andrew Pinski 
Date:   Thu Jul 25 16:17:15 2024 -0700

isel: Move duplicate comparisons to its own function

This is just a small cleanup to isel and no functional changes just.
The loop inside pass_gimple_isel::execute looked was getting too
deap so let's fix that by moving it to its own function.

Bootstrapped and tested on x86_64-linux-gnu with no regressions.

gcc/ChangeLog:

* gimple-isel.cc (pass_gimple_isel::execute): Factor out
duplicate comparisons out to ...
(duplicate_comparison): New function.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/gimple-isel.cc | 66 +-
 1 file changed, 35 insertions(+), 31 deletions(-)

diff --git a/gcc/gimple-isel.cc b/gcc/gimple-isel.cc
index 57f7281bb508..327a78ea408e 100644
--- a/gcc/gimple-isel.cc
+++ b/gcc/gimple-isel.cc
@@ -395,6 +395,40 @@ gimple_expand_vec_cond_expr (struct function *fun, 
gimple_stmt_iterator *gsi,
 5, op0a, op0b, op1, op2, tcode_tree);
 }
 
+/* Duplicate COND_EXPR condition defs of STMT located in BB when they are
+   comparisons so RTL expansion with the help of TER
+   can perform better if conversion.  */
+static void
+duplicate_comparison (gassign *stmt, basic_block bb)
+{
+  imm_use_iterator imm_iter;
+  use_operand_p use_p;
+  auto_vec cond_exprs;
+  unsigned cnt = 0;
+  tree lhs = gimple_assign_lhs (stmt);
+  FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
+{
+  if (is_gimple_debug (USE_STMT (use_p)))
+   continue;
+  cnt++;
+  if (gimple_bb (USE_STMT (use_p)) == bb
+ && is_gimple_assign (USE_STMT (use_p))
+ && gimple_assign_rhs1_ptr (USE_STMT (use_p)) == use_p->use
+ && gimple_assign_rhs_code (USE_STMT (use_p)) == COND_EXPR)
+   cond_exprs.safe_push (as_a  (USE_STMT (use_p)));
+  }
+  for (unsigned i = cond_exprs.length () == cnt ? 1 : 0;
+   i < cond_exprs.length (); ++i)
+{
+  gassign *copy = as_a  (gimple_copy (stmt));
+  tree new_def = duplicate_ssa_name (lhs, copy);
+  gimple_assign_set_lhs (copy, new_def);
+  auto gsi2 = gsi_for_stmt (cond_exprs[i]);
+  gsi_insert_before (&gsi2, copy, GSI_SAME_STMT);
+  gimple_assign_set_rhs1 (cond_exprs[i], new_def);
+  update_stmt (cond_exprs[i]);
+}
+}
 
 
 namespace {
@@ -469,37 +503,7 @@ pass_gimple_isel::execute (struct function *fun)
  tree lhs = gimple_assign_lhs (stmt);
  if (TREE_CODE_CLASS (code) == tcc_comparison
  && !has_single_use (lhs))
-   {
- /* Duplicate COND_EXPR condition defs when they are
-comparisons so RTL expansion with the help of TER
-can perform better if conversion.  */
- imm_use_iterator imm_iter;
- use_operand_p use_p;
- auto_vec cond_exprs;
- unsigned cnt = 0;
- FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
-   {
- if (is_gimple_debug (USE_STMT (use_p)))
-   continue;
- cnt++;
- if (gimple_bb (USE_STMT (use_p)) == bb
- && is_gimple_assign (USE_STMT (use_p))
- && gimple_assign_rhs1_ptr (USE_STMT (use_p)) == use_p->use
- && gimple_assign_rhs_code (USE_STMT (use_p)) == COND_EXPR)
-   cond_exprs.safe_push (as_a  (USE_STMT (use_p)));
-   }
- for (unsigned i = cond_exprs.length () == cnt ? 1 : 0;
-  i < cond_exprs.length (); ++i)
-   {
- gassign *copy = as_a  (gimple_copy (stmt));
- tree new_def = duplicate_ssa_name (lhs, copy);
- gimple_assign_set_lhs (copy, new_def);
- auto gsi2 = gsi_for_stmt (cond_exprs[i]);
- gsi_insert_before (&gsi2, copy, GSI_SAME_STMT);
- gimple_assign_set_rhs1 (cond_exprs[i], new_def);
- update_stmt (cond_exprs[i]);
-   }
-   }
+   duplicate_comparison (stmt, bb);
}
 }


[gcc r15-2342] isel: Small cleanup of duplicating comparisons

2024-07-26 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:db74887097272a8390e33eba47c6fb8f50b64f5c

commit r15-2342-gdb74887097272a8390e33eba47c6fb8f50b64f5c
Author: Andrew Pinski 
Date:   Thu Jul 25 17:07:28 2024 -0700

isel: Small cleanup of duplicating comparisons

This is a small cleanup of the duplicating comparison code.
There is code generation difference but only for -O0 and -fno-tree-ter
(both of which will be fixed in a later patch).
The difference is instead of skipping the first use if the
comparison uses are only in cond_expr we skip the last use.
Also we go through the uses list in the opposite order now too.

The cleanups are the following:
* Don't call has_single_use as we will do the loop anyways
* Change the order of the checks slightly, it is better
  to check for cond_expr earlier
* Use cond_exprs as a stack and pop from it.
  Skipping the top if the use is only from cond_expr.

Bootstrapped and tested on x86_64-linux-gnu with no regressions.

gcc/ChangeLog:

* gimple-isel.cc (duplicate_comparison): Rename to ...
(maybe_duplicate_comparison): This. Add check for use here
rather than in its caller.
(pass_gimple_isel::execute): Don't check how many uses the
comparison had and call maybe_duplicate_comparison instead of
duplicate_comparison.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/gimple-isel.cc | 38 --
 1 file changed, 24 insertions(+), 14 deletions(-)

diff --git a/gcc/gimple-isel.cc b/gcc/gimple-isel.cc
index 327a78ea408e..99bfc937bd55 100644
--- a/gcc/gimple-isel.cc
+++ b/gcc/gimple-isel.cc
@@ -399,34 +399,46 @@ gimple_expand_vec_cond_expr (struct function *fun, 
gimple_stmt_iterator *gsi,
comparisons so RTL expansion with the help of TER
can perform better if conversion.  */
 static void
-duplicate_comparison (gassign *stmt, basic_block bb)
+maybe_duplicate_comparison (gassign *stmt, basic_block bb)
 {
   imm_use_iterator imm_iter;
   use_operand_p use_p;
   auto_vec cond_exprs;
-  unsigned cnt = 0;
   tree lhs = gimple_assign_lhs (stmt);
+  unsigned cnt = 0;
+
   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
 {
   if (is_gimple_debug (USE_STMT (use_p)))
continue;
   cnt++;
+  /* Add the use statement if it was a cond_expr.  */
   if (gimple_bb (USE_STMT (use_p)) == bb
  && is_gimple_assign (USE_STMT (use_p))
- && gimple_assign_rhs1_ptr (USE_STMT (use_p)) == use_p->use
- && gimple_assign_rhs_code (USE_STMT (use_p)) == COND_EXPR)
+ && gimple_assign_rhs_code (USE_STMT (use_p)) == COND_EXPR
+ && gimple_assign_rhs1_ptr (USE_STMT (use_p)) == use_p->use)
cond_exprs.safe_push (as_a  (USE_STMT (use_p)));
-  }
-  for (unsigned i = cond_exprs.length () == cnt ? 1 : 0;
-   i < cond_exprs.length (); ++i)
+}
+
+  /* If the comparison has 0 or 1 uses, no reason to do anything. */
+  if (cnt <= 1)
+return;
+
+  /* If we only use the expression inside cond_exprs in that BB, we don't
+ need to duplicate for one of them so pop the top. */
+  if (cond_exprs.length () == cnt)
+cond_exprs.pop();
+
+  while (!cond_exprs.is_empty())
 {
+  auto old_top = cond_exprs.pop();
   gassign *copy = as_a  (gimple_copy (stmt));
   tree new_def = duplicate_ssa_name (lhs, copy);
   gimple_assign_set_lhs (copy, new_def);
-  auto gsi2 = gsi_for_stmt (cond_exprs[i]);
+  auto gsi2 = gsi_for_stmt (old_top);
   gsi_insert_before (&gsi2, copy, GSI_SAME_STMT);
-  gimple_assign_set_rhs1 (cond_exprs[i], new_def);
-  update_stmt (cond_exprs[i]);
+  gimple_assign_set_rhs1 (old_top, new_def);
+  update_stmt (old_top);
 }
 }
 
@@ -500,10 +512,8 @@ pass_gimple_isel::execute (struct function *fun)
continue;
 
  tree_code code = gimple_assign_rhs_code (stmt);
- tree lhs = gimple_assign_lhs (stmt);
- if (TREE_CODE_CLASS (code) == tcc_comparison
- && !has_single_use (lhs))
-   duplicate_comparison (stmt, bb);
+ if (TREE_CODE_CLASS (code) == tcc_comparison)
+   maybe_duplicate_comparison (stmt, bb);
}
 }


[gcc r15-2343] isel: Don't duplicate comparisons for -O0 nor -fno-tree-ter [PR116101]

2024-07-26 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:9fe53beacfc5c01e24690dc70d7599db084cc8b4

commit r15-2343-g9fe53beacfc5c01e24690dc70d7599db084cc8b4
Author: Andrew Pinski 
Date:   Thu Jul 25 17:43:07 2024 -0700

isel: Don't duplicate comparisons for -O0 nor -fno-tree-ter [PR116101]

While doing cleanups on this code I noticed that we do the duplicate
of comparisons at -O0. For C and C++ code this makes no difference as
the gimplifier never produces COND_EXPR. But it could make a difference
for other front-ends.
Oh and for -fno-tree-ter, duplicating the comparison is just a waste
as it is never used for expand.

I also decided to add a few testcases so this is checked in the future.
Even added one for the duplication itself.

Bootstrapped and tested on x86_64-linux-gnu with no regressions.

PR tree-optimization/116101

gcc/ChangeLog:

* gimple-isel.cc (maybe_duplicate_comparison): Don't
do anything for -O0 or -fno-tree-ter.

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/dup_compare_cond-1.c: New test.
* gcc.dg/tree-ssa/dup_compare_cond-2.c: New test.
* gcc.dg/tree-ssa/dup_compare_cond-3.c: New test.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/gimple-isel.cc |  5 +
 gcc/testsuite/gcc.dg/tree-ssa/dup_compare_cond-1.c | 19 +++
 gcc/testsuite/gcc.dg/tree-ssa/dup_compare_cond-2.c | 19 +++
 gcc/testsuite/gcc.dg/tree-ssa/dup_compare_cond-3.c | 19 +++
 4 files changed, 62 insertions(+)

diff --git a/gcc/gimple-isel.cc b/gcc/gimple-isel.cc
index 99bfc937bd55..2817ab659af1 100644
--- a/gcc/gimple-isel.cc
+++ b/gcc/gimple-isel.cc
@@ -407,6 +407,11 @@ maybe_duplicate_comparison (gassign *stmt, basic_block bb)
   tree lhs = gimple_assign_lhs (stmt);
   unsigned cnt = 0;
 
+  /* This is should not be used for -O0 nor it is not useful
+ when ter is turned off. */
+  if (!optimize || !flag_tree_ter)
+return;
+
   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
 {
   if (is_gimple_debug (USE_STMT (use_p)))
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/dup_compare_cond-1.c 
b/gcc/testsuite/gcc.dg/tree-ssa/dup_compare_cond-1.c
new file mode 100644
index ..0321a60b34f8
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/dup_compare_cond-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-fgimple -O0 -fdump-tree-optimized " } */
+/* PR tree-optimization/116101 */
+
+int __GIMPLE() f(int a, int b, int c, int d, int e)
+{
+  _Bool t;
+  int ff;
+  int gg;
+  int res;
+  t = a == b;
+  ff = t ? a : e;
+  gg = t ? d : b;
+  res = ff+gg;
+  return res;
+}
+
+/* At -O0 we should not duplicate the comparison. */
+/* { dg-final { scan-tree-dump-times " == " 1 "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/dup_compare_cond-2.c 
b/gcc/testsuite/gcc.dg/tree-ssa/dup_compare_cond-2.c
new file mode 100644
index ..07e2175c612e
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/dup_compare_cond-2.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-fgimple -O2 -fdump-tree-optimized " } */
+/* PR middle-end/105715 */
+
+int __GIMPLE() f(int a, int b, int c, int d, int e)
+{
+  _Bool t;
+  int ff;
+  int gg;
+  int res;
+  t = a == b;
+  ff = t ? a : e;
+  gg = t ? d : b;
+  res = ff+gg;
+  return res;
+}
+
+/* At -O2 we should have duplicate the comparison. */
+/* { dg-final { scan-tree-dump-times " == " 2 "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/dup_compare_cond-3.c 
b/gcc/testsuite/gcc.dg/tree-ssa/dup_compare_cond-3.c
new file mode 100644
index ..88bf19795e04
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/dup_compare_cond-3.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-fgimple -O2 -fno-tree-ter -fdump-tree-optimized " } */
+/* PR tree-optimization/116101 */
+
+int __GIMPLE() f(int a, int b, int c, int d, int e)
+{
+  _Bool t;
+  int ff;
+  int gg;
+  int res;
+  t = a == b;
+  ff = t ? a : e;
+  gg = t ? d : b;
+  res = ff+gg;
+  return res;
+}
+
+/* With -fno-tree-ter it is not useful to duplicate the comparison. */
+/* { dg-final { scan-tree-dump-times " == " 1 "optimized" } } */


[gcc r15-2344] aarch64: Fix target/optimize option handling with transiting between O1 to O2

2024-07-26 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:8a5f528fba788f2af40a15a999bb63a2a0f6f455

commit r15-2344-g8a5f528fba788f2af40a15a999bb63a2a0f6f455
Author: Andrew Pinski 
Date:   Thu Jul 25 09:37:49 2024 -0700

aarch64: Fix target/optimize option handling with transiting between O1 to 
O2

The problem here is the aarch64 backend enables -mearly-ra at -O2 and above 
but
it is not marked as an Optimization in the .opt file so enabling it 
sometimes
reset the target options when going from -O1 to -O2 for the first time.

Build and tested for aarch64-linux-gnu with no regressions.

PR target/116065

gcc/ChangeLog:

* config/aarch64/aarch64.opt (mearly-ra=): Mark as Optimization 
rather
than Save.

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/sve/target_optimization-1.c: New test.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/config/aarch64/aarch64.opt   |  2 +-
 .../gcc.target/aarch64/sve/target_optimization-1.c   | 16 
 2 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt
index 2f90f10352af..6229bcb371e3 100644
--- a/gcc/config/aarch64/aarch64.opt
+++ b/gcc/config/aarch64/aarch64.opt
@@ -256,7 +256,7 @@ EnumValue
 Enum(early_ra_scope) String(none) Value(AARCH64_EARLY_RA_NONE)
 
 mearly-ra=
-Target RejectNegative Joined Enum(early_ra_scope) Var(aarch64_early_ra) 
Init(AARCH64_EARLY_RA_NONE) Save
+Target RejectNegative Joined Enum(early_ra_scope) Var(aarch64_early_ra) 
Init(AARCH64_EARLY_RA_NONE) Optimization
 Specify when to enable an early register allocation pass.  The possibilities
 are: all functions, functions that have access to strided multi-register
 instructions, and no functions.
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/target_optimization-1.c 
b/gcc/testsuite/gcc.target/aarch64/sve/target_optimization-1.c
new file mode 100644
index ..3010f0c4189d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/target_optimization-1.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O1" } */
+
+#include 
+
+/* Turn off SVE overall */
+#pragma GCC target("+nosve")
+
+/* But the function turns it on again so it should work.
+   Even if changing the optimization level from O1 to O2. */
+int __attribute__((target ("+sve"), optimize(2)))
+bar (void)
+{
+  svfloat32_t xseg;
+  return svlen_f32(xseg);
+}


[gcc r15-2345] aarch64: Rename bic/orn patterns to iorn/andn for vector modes

2024-07-26 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:245187de498887072c20d4d9fa55491b3e947cdf

commit r15-2345-g245187de498887072c20d4d9fa55491b3e947cdf
Author: Andrew Pinski 
Date:   Mon Jul 22 11:19:11 2024 -0700

aarch64: Rename bic/orn patterns to iorn/andn for vector modes

This renames the patterns orn3 to iorn3 so it
matches the new optab that was added with r15-1890-gf379596e0ba99d.
Likewise for bic3 to andn3.

Note the operand 1 and operand 2 are swapped from the original
patterns to match the optab now.

Built and tested for aarch64-linux-gnu with no regression.

gcc/ChangeLog:

* config/aarch64/aarch64-simd.md
(bic3): Rename to ...
(andn3): This. Also swap operands.
(orn3): Rename to ...
(iorn3): This. Also swap operands.
(vec_cmp): Update orn call to iorn
and swap the last two arguments.

gcc/testsuite/ChangeLog:

* g++.target/aarch64/vect_cmp-1.C: New test.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/config/aarch64/aarch64-simd.md| 20 +++
 gcc/testsuite/g++.target/aarch64/vect_cmp-1.C | 37 +++
 2 files changed, 47 insertions(+), 10 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-simd.md 
b/gcc/config/aarch64/aarch64-simd.md
index bbeee221f37c..459e11b09a19 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -322,21 +322,21 @@
   [(set_attr "length" "4")]
 )
 
-(define_insn "orn3"
+(define_insn "iorn3"
  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
-   (ior:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
-   (match_operand:VDQ_I 2 "register_operand" "w")))]
+   (ior:VDQ_I (not:VDQ_I (match_operand:VDQ_I 2 "register_operand" "w"))
+   (match_operand:VDQ_I 1 "register_operand" "w")))]
  "TARGET_SIMD"
- "orn\t%0., %2., %1."
+ "orn\t%0., %1., %2."
   [(set_attr "type" "neon_logic")]
 )
 
-(define_insn "bic3"
+(define_insn "andn3"
  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
-   (and:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
-   (match_operand:VDQ_I 2 "register_operand" "w")))]
+   (and:VDQ_I (not:VDQ_I (match_operand:VDQ_I 2 "register_operand" "w"))
+   (match_operand:VDQ_I 1 "register_operand" "w")))]
  "TARGET_SIMD"
- "bic\t%0., %2., %1."
+ "bic\t%0., %1., %2."
   [(set_attr "type" "neon_logic")]
 )
 
@@ -4064,7 +4064,7 @@
   tmp0, mode),
   lowpart_subreg (mode,
   tmp1, mode)));
-   emit_insn (gen_orn3 (operands[0], tmp2, operands[0]));
+   emit_insn (gen_iorn3 (operands[0], operands[0], tmp2));
   }
   break;
 
@@ -4111,7 +4111,7 @@
   else if (code == UNEQ)
{
  emit_insn (gen_aarch64_cmeq (tmp, operands[2], operands[3]));
- emit_insn (gen_orn3 (operands[0], operands[0], tmp));
+ emit_insn (gen_iorn3 (operands[0], tmp, operands[0]));
}
   break;
 
diff --git a/gcc/testsuite/g++.target/aarch64/vect_cmp-1.C 
b/gcc/testsuite/g++.target/aarch64/vect_cmp-1.C
new file mode 100644
index ..b82d87827d30
--- /dev/null
+++ b/gcc/testsuite/g++.target/aarch64/vect_cmp-1.C
@@ -0,0 +1,37 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O2 -fdump-tree-optimized" } */
+/* { dg-final { check-function-bodies "**" "" "" { target { le } } } } */
+
+#pragma GCC target "+nosve"
+
+#define vect8 __attribute__((vector_size(8) ))
+
+/**
+**bar1:
+** fcmgt   v([0-9]+).2s, v[0-9]+.2s, v[0-9]+.2s
+** bic v0.8b, v2.8b, v\1.8b
+** ret
+*/
+extern "C"
+vect8 int bar1(vect8 float a, vect8 float b, vect8 int c)
+{
+  return (a > b) ? 0 : c;
+}
+
+/**
+**bar2:
+** fcmgt   v([0-9]+).2s, v[0-9]+.2s, v[0-9]+.2s
+** orn v0.8b, v2.8b, v\1.8b
+** ret
+*/
+extern "C"
+vect8 int bar2(vect8 float a, vect8 float b, vect8 int c)
+{
+  return (a > b) ? c : -1;
+}
+
+// We should produce a BIT_ANDC and BIT_IORC here.
+
+// { dg-final { scan-tree-dump ".BIT_ANDN " "optimized" } }
+// { dg-final { scan-tree-dump ".BIT_IORN " "optimized" } }
+


[gcc r15-2347] aarch64: sve: Rename aarch64_bic to standard pattern, andn

2024-07-26 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:795021d9bc8546ccc51bacc899b6077c6928067b

commit r15-2347-g795021d9bc8546ccc51bacc899b6077c6928067b
Author: Andrew Pinski 
Date:   Mon Jul 22 15:39:37 2024 -0700

aarch64: sve: Rename aarch64_bic to standard pattern, andn

Now there is an optab for bic, andn since r15-1890-gf379596e0ba99d.
This moves aarch64_bic for sve over to use it instead.

Note unlike the simd bic patterns, the operands were already
in the order that was expected for the optab so no swapping
was needed.

Built and tested on aarch64-linux-gnu with no regressions.

gcc/ChangeLog:

* config/aarch64/aarch64-sve-builtins-base.cc (svbic_impl::expand): 
Update
to use andn optab instead of using code_for_aarch64_bic.
* config/aarch64/aarch64-sve.md (@aarch64_bic): Rename to ...
(andn3): This.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/config/aarch64/aarch64-sve-builtins-base.cc | 2 +-
 gcc/config/aarch64/aarch64-sve.md   | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc 
b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
index aa26370d397f..a2268353ae31 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
@@ -271,7 +271,7 @@ public:
   }
 
 if (e.pred == PRED_x)
-  return e.use_unpred_insn (code_for_aarch64_bic (e.vector_mode (0)));
+  return e.use_unpred_insn (e.direct_optab_handler (andn_optab));
 
 return e.use_cond_insn (code_for_cond_bic (e.vector_mode (0)));
   }
diff --git a/gcc/config/aarch64/aarch64-sve.md 
b/gcc/config/aarch64/aarch64-sve.md
index 5331e7121d55..c3ed5075c4ed 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -4641,8 +4641,8 @@
 ;; - BIC
 ;; -
 
-;; Unpredicated BIC.
-(define_expand "@aarch64_bic"
+;; Unpredicated BIC; andn named pattern.
+(define_expand "andn3"
   [(set (match_operand:SVE_I 0 "register_operand")
(and:SVE_I
  (unspec:SVE_I


[gcc r15-2346] aarch64: Use iorn and andn standard pattern names for scalar modes

2024-07-26 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:7e8e8a745a0f3389c2ef7de5798932f5ac0f8c9d

commit r15-2346-g7e8e8a745a0f3389c2ef7de5798932f5ac0f8c9d
Author: Andrew Pinski 
Date:   Mon Jul 22 16:18:47 2024 -0700

aarch64: Use iorn and andn standard pattern names for scalar modes

Since r15-1890-gf379596e0ba99d, these are the new optabs.
So let's use these names for them. These will be used to
generate during expand from gimple in the next few patches.

Built and tested for aarch64-linux-gnu with no regressions.

gcc/ChangeLog:

* config/aarch64/aarch64.md (*_one_cmpl3): 
Rename to ...
(n3): This.
(*_one_cmplsidi3_ze): Rename to ...
(*nsidi3_ze): This.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/config/aarch64/aarch64.md | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 94ff0eefa77f..ed29127dafbe 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -5069,18 +5069,18 @@
 
 ;; Binary logical operators negating one operand, i.e. (a & !b), (a | !b).
 
-(define_insn "*_one_cmpl3"
+(define_insn "n3"
   [(set (match_operand:GPI 0 "register_operand")
-   (NLOGICAL:GPI (not:GPI (match_operand:GPI 1 "register_operand"))
-(match_operand:GPI 2 "register_operand")))]
+   (NLOGICAL:GPI (not:GPI (match_operand:GPI 2 "register_operand"))
+(match_operand:GPI 1 "register_operand")))]
   ""
   {@ [ cons: =0 , 1 , 2 ; attrs: type , arch  ]
- [ r, r , r ; logic_reg   , * ] \t%0, 
%2, %1
- [ w, w , w ; neon_logic  , simd  ] 
\t%0., %2., %1.
+ [ r, r , r ; logic_reg   , * ] \t%0, 
%1, %2
+ [ w, w , w ; neon_logic  , simd  ] 
\t%0., %1., %2.
   }
 )
 
-(define_insn "*_one_cmplsidi3_ze"
+(define_insn "*nsidi3_ze"
   [(set (match_operand:DI 0 "register_operand" "=r")
(zero_extend:DI
  (NLOGICAL:SI (not:SI (match_operand:SI 1 "register_operand" "r"))


[gcc r14-9422] Fold: Fix up merge_truthop_with_opposite_arm for NaNs [PR95351]

2024-03-10 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:31ce2e993d09dcad1ce139a2848a28de5931056d

commit r14-9422-g31ce2e993d09dcad1ce139a2848a28de5931056d
Author: Andrew Pinski 
Date:   Sun Mar 10 22:17:09 2024 +

Fold: Fix up merge_truthop_with_opposite_arm for NaNs [PR95351]

The problem here is that merge_truthop_with_opposite_arm would
use the type of the result of the comparison rather than the operands
of the comparison to figure out if we are honoring NaNs.
This fixes that oversight and now we get the correct results in this
case.

Committed as obvious after a bootstrap/test on x86_64-linux-gnu.

PR middle-end/95351

gcc/ChangeLog:

* fold-const.cc (merge_truthop_with_opposite_arm): Use
the type of the operands of the comparison and not the type
of the comparison.

gcc/testsuite/ChangeLog:

* gcc.dg/float_opposite_arm-1.c: New test.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/fold-const.cc   |  3 ++-
 gcc/testsuite/gcc.dg/float_opposite_arm-1.c | 17 +
 2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/gcc/fold-const.cc b/gcc/fold-const.cc
index 43105d20be3..299c22bf391 100644
--- a/gcc/fold-const.cc
+++ b/gcc/fold-const.cc
@@ -6420,7 +6420,6 @@ static tree
 merge_truthop_with_opposite_arm (location_t loc, tree op, tree cmpop,
 bool rhs_only)
 {
-  tree type = TREE_TYPE (cmpop);
   enum tree_code code = TREE_CODE (cmpop);
   enum tree_code truthop_code = TREE_CODE (op);
   tree lhs = TREE_OPERAND (op, 0);
@@ -6436,6 +6435,8 @@ merge_truthop_with_opposite_arm (location_t loc, tree op, 
tree cmpop,
   if (TREE_CODE_CLASS (code) != tcc_comparison)
 return NULL_TREE;
 
+  tree type = TREE_TYPE (TREE_OPERAND (cmpop, 0));
+
   if (rhs_code == truthop_code)
 {
   tree newrhs = merge_truthop_with_opposite_arm (loc, rhs, cmpop, 
rhs_only);
diff --git a/gcc/testsuite/gcc.dg/float_opposite_arm-1.c 
b/gcc/testsuite/gcc.dg/float_opposite_arm-1.c
new file mode 100644
index 000..d2dbff35066
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/float_opposite_arm-1.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O1 -fdump-tree-original -fdump-tree-optimized" } */
+/* { dg-add-options ieee } */
+/* PR middle-end/95351 */
+
+int Foo(double possiblyNAN, double b, double c)
+{
+return (possiblyNAN <= 2.0) || ((possiblyNAN  > 2.0) && (b > c));
+}
+
+/* Make sure we don't remove either >/<=  */
+
+/* { dg-final { scan-tree-dump "possiblyNAN > 2.0e.0" "original" } } */
+/* { dg-final { scan-tree-dump "possiblyNAN_\[0-9\]+.D. > 2.0e.0" "optimized" 
} } */
+
+/* { dg-final { scan-tree-dump "possiblyNAN <= 2.0e.0" "original" } } */
+/* { dg-final { scan-tree-dump "possiblyNAN_\[0-9\]+.D. <= 2.0e.0" "optimized" 
} } */


[gcc r14-9434] Reject -fno-multiflags [PR114314]

2024-03-11 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:c4e5789cede6974b6483c0f82069ff80b5a547e4

commit r14-9434-gc4e5789cede6974b6483c0f82069ff80b5a547e4
Author: Andrew Pinski 
Date:   Mon Mar 11 17:40:08 2024 -0700

Reject -fno-multiflags [PR114314]

When -fmultiflags option support was added in r13-3693-g6b1a2474f9e422,
it accidently allowed -fno-multiflags which then would pass on to cc1.
This fixes that oversight.

Committed as obvious after bootstrap/test on x86_64-linux-gnu.

gcc/ChangeLog:

PR driver/114314
* common.opt (fmultiflags): Add RejectNegative.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/common.opt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/common.opt b/gcc/common.opt
index 51c4a17da83..1ad0169bd6f 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -2295,7 +2295,7 @@ Common Var(flag_move_loop_stores) Optimization
 Move stores out of loops.
 
 fmultiflags
-Common Driver
+Common Driver RejectNegative
 Building block for specs-based multilib-aware TFLAGS.
 
 fdce


[gcc r13-8419] Reject -fno-multiflags [PR114314]

2024-03-11 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:4bd9d097197334e786690ba1566ccf79396da730

commit r13-8419-g4bd9d097197334e786690ba1566ccf79396da730
Author: Andrew Pinski 
Date:   Mon Mar 11 17:40:08 2024 -0700

Reject -fno-multiflags [PR114314]

When -fmultiflags option support was added in r13-3693-g6b1a2474f9e422,
it accidently allowed -fno-multiflags which then would pass on to cc1.
This fixes that oversight.

Committed as obvious after bootstrap/test on x86_64-linux-gnu.

gcc/ChangeLog:

PR driver/114314
* common.opt (fmultiflags): Add RejectNegative.

Signed-off-by: Andrew Pinski 
(cherry picked from commit c4e5789cede6974b6483c0f82069ff80b5a547e4)

Diff:
---
 gcc/common.opt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/common.opt b/gcc/common.opt
index 862c474d3c8..b055c7bd9ac 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -2182,7 +2182,7 @@ Common Var(flag_move_loop_stores) Optimization
 Move stores out of loops.
 
 fmultiflags
-Common Driver
+Common Driver RejectNegative
 Building block for specs-based multilib-aware TFLAGS.
 
 fdce


[gcc r13-8420] Fold: Fix up merge_truthop_with_opposite_arm for NaNs [PR95351]

2024-03-11 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:ac96973150b3279fe157f160efd83995077c7590

commit r13-8420-gac96973150b3279fe157f160efd83995077c7590
Author: Andrew Pinski 
Date:   Sun Mar 10 22:17:09 2024 +

Fold: Fix up merge_truthop_with_opposite_arm for NaNs [PR95351]

The problem here is that merge_truthop_with_opposite_arm would
use the type of the result of the comparison rather than the operands
of the comparison to figure out if we are honoring NaNs.
This fixes that oversight and now we get the correct results in this
case.

Committed as obvious after a bootstrap/test on x86_64-linux-gnu.

PR middle-end/95351

gcc/ChangeLog:

* fold-const.cc (merge_truthop_with_opposite_arm): Use
the type of the operands of the comparison and not the type
of the comparison.

gcc/testsuite/ChangeLog:

* gcc.dg/float_opposite_arm-1.c: New test.

Signed-off-by: Andrew Pinski 
(cherry picked from commit 31ce2e993d09dcad1ce139a2848a28de5931056d)

Diff:
---
 gcc/fold-const.cc   |  3 ++-
 gcc/testsuite/gcc.dg/float_opposite_arm-1.c | 17 +
 2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/gcc/fold-const.cc b/gcc/fold-const.cc
index 7ebcac30666..a40b0d98ae7 100644
--- a/gcc/fold-const.cc
+++ b/gcc/fold-const.cc
@@ -6223,7 +6223,6 @@ static tree
 merge_truthop_with_opposite_arm (location_t loc, tree op, tree cmpop,
 bool rhs_only)
 {
-  tree type = TREE_TYPE (cmpop);
   enum tree_code code = TREE_CODE (cmpop);
   enum tree_code truthop_code = TREE_CODE (op);
   tree lhs = TREE_OPERAND (op, 0);
@@ -6239,6 +6238,8 @@ merge_truthop_with_opposite_arm (location_t loc, tree op, 
tree cmpop,
   if (TREE_CODE_CLASS (code) != tcc_comparison)
 return NULL_TREE;
 
+  tree type = TREE_TYPE (TREE_OPERAND (cmpop, 0));
+
   if (rhs_code == truthop_code)
 {
   tree newrhs = merge_truthop_with_opposite_arm (loc, rhs, cmpop, 
rhs_only);
diff --git a/gcc/testsuite/gcc.dg/float_opposite_arm-1.c 
b/gcc/testsuite/gcc.dg/float_opposite_arm-1.c
new file mode 100644
index 000..d2dbff35066
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/float_opposite_arm-1.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O1 -fdump-tree-original -fdump-tree-optimized" } */
+/* { dg-add-options ieee } */
+/* PR middle-end/95351 */
+
+int Foo(double possiblyNAN, double b, double c)
+{
+return (possiblyNAN <= 2.0) || ((possiblyNAN  > 2.0) && (b > c));
+}
+
+/* Make sure we don't remove either >/<=  */
+
+/* { dg-final { scan-tree-dump "possiblyNAN > 2.0e.0" "original" } } */
+/* { dg-final { scan-tree-dump "possiblyNAN_\[0-9\]+.D. > 2.0e.0" "optimized" 
} } */
+
+/* { dg-final { scan-tree-dump "possiblyNAN <= 2.0e.0" "original" } } */
+/* { dg-final { scan-tree-dump "possiblyNAN_\[0-9\]+.D. <= 2.0e.0" "optimized" 
} } */


[gcc r14-9613] Another ICE after conflicting types of redeclaration [PR109619]

2024-03-22 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:dbe9062ce070c861cd3fa6435187618413b1b3d1

commit r14-9613-gdbe9062ce070c861cd3fa6435187618413b1b3d1
Author: Andrew Pinski 
Date:   Thu Mar 21 16:29:20 2024 -0700

Another ICE after conflicting types of redeclaration [PR109619]

This another one of these ICE after error issues with the
gimplifier and a fallout from r12-3278-g823685221de986af.
This case happens when we are trying to fold memcpy/memmove.
There is already code to try to catch ERROR_MARKs as arguments
to the builtins so just need to change them to use error_operand_p
which checks the type of the expression to see if it was an error mark
also.

Bootstrapped and tested on x86_64-linux-gnu with no regressions.

gcc/ChangeLog:

PR c/109619
* builtins.cc (fold_builtin_1): Use error_operand_p
instead of checking against ERROR_MARK.
(fold_builtin_2): Likewise.
(fold_builtin_3): Likewise.

gcc/testsuite/ChangeLog:

PR c/109619
* gcc.dg/redecl-26.c: New test.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/builtins.cc  | 12 ++--
 gcc/testsuite/gcc.dg/redecl-26.c | 14 ++
 2 files changed, 20 insertions(+), 6 deletions(-)

diff --git a/gcc/builtins.cc b/gcc/builtins.cc
index eda8bea9c4b..bb74b5cbcd6 100644
--- a/gcc/builtins.cc
+++ b/gcc/builtins.cc
@@ -10461,7 +10461,7 @@ fold_builtin_1 (location_t loc, tree expr, tree fndecl, 
tree arg0)
   tree type = TREE_TYPE (TREE_TYPE (fndecl));
   enum built_in_function fcode = DECL_FUNCTION_CODE (fndecl);
 
-  if (TREE_CODE (arg0) == ERROR_MARK)
+  if (error_operand_p (arg0))
 return NULL_TREE;
 
   if (tree ret = fold_const_call (as_combined_fn (fcode), type, arg0))
@@ -10601,8 +10601,8 @@ fold_builtin_2 (location_t loc, tree expr, tree fndecl, 
tree arg0, tree arg1)
   tree type = TREE_TYPE (TREE_TYPE (fndecl));
   enum built_in_function fcode = DECL_FUNCTION_CODE (fndecl);
 
-  if (TREE_CODE (arg0) == ERROR_MARK
-  || TREE_CODE (arg1) == ERROR_MARK)
+  if (error_operand_p (arg0)
+  || error_operand_p (arg1))
 return NULL_TREE;
 
   if (tree ret = fold_const_call (as_combined_fn (fcode), type, arg0, arg1))
@@ -10693,9 +10693,9 @@ fold_builtin_3 (location_t loc, tree fndecl,
   tree type = TREE_TYPE (TREE_TYPE (fndecl));
   enum built_in_function fcode = DECL_FUNCTION_CODE (fndecl);
 
-  if (TREE_CODE (arg0) == ERROR_MARK
-  || TREE_CODE (arg1) == ERROR_MARK
-  || TREE_CODE (arg2) == ERROR_MARK)
+  if (error_operand_p (arg0)
+  || error_operand_p (arg1)
+  || error_operand_p (arg2))
 return NULL_TREE;
 
   if (tree ret = fold_const_call (as_combined_fn (fcode), type,
diff --git a/gcc/testsuite/gcc.dg/redecl-26.c b/gcc/testsuite/gcc.dg/redecl-26.c
new file mode 100644
index 000..5f8889c4c39
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/redecl-26.c
@@ -0,0 +1,14 @@
+/* We used to ICE while folding memcpy and memmove.
+   PR c/109619. */
+/* { dg-do compile } */
+/* { dg-options "" } */
+
+int *a1, *a2;
+
+void foo(__SIZE_TYPE__ a3) /* { dg-note "" }  */
+{
+  __builtin_memcpy(a1, a2, a3);
+  __builtin_memmove(a1, a2, a3);
+  int *a3; /* { dg-error "redeclared as different kind of symbol" } */
+}
+


[gcc r14-9718] Use fatal_error instead of internal_error for when ZSTD is not enabled

2024-03-28 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:22f48d78f033922fd2fbf9252041cd97ce201052

commit r14-9718-g22f48d78f033922fd2fbf9252041cd97ce201052
Author: Andrew Pinski 
Date:   Thu Mar 28 16:46:33 2024 -0700

Use fatal_error instead of internal_error for when ZSTD is not enabled

This changes an internal error to be a fatal error for when the ZSTD
is not enabled but the section was compressed as ZSTD.

Committed as approved after bootstrap/test on x86_64-linux-gnu.

gcc/ChangeLog:

* lto-compress.cc (lto_end_uncompression): Use
fatal_error instead of internal_error when ZSTD
is not enabled.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/lto-compress.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/lto-compress.cc b/gcc/lto-compress.cc
index c167ac967aa..bebf0277ef6 100644
--- a/gcc/lto-compress.cc
+++ b/gcc/lto-compress.cc
@@ -408,7 +408,7 @@ lto_end_uncompression (struct lto_compression_stream 
*stream,
 }
 #endif
   if (compression == ZSTD)
-internal_error ("compiler does not support ZSTD LTO compression");
+fatal_error (UNKNOWN_LOCATION, "compiler does not support ZSTD LTO 
compression");
 
   lto_uncompression_zlib (stream);
 }


[gcc r14-9941] match: Fix `!a?b:c` and `a?~t:t` patterns for signed 1 bit types [PR114666]

2024-04-12 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:245595d72818526e2ca857848831e8afa87ae2de

commit r14-9941-g245595d72818526e2ca857848831e8afa87ae2de
Author: Andrew Pinski 
Date:   Wed Apr 10 13:39:01 2024 -0700

match: Fix `!a?b:c` and `a?~t:t` patterns for signed 1 bit types [PR114666]

The problem is `!a?b:c` pattern will create a COND_EXPR with an 1bit signed 
integer
which breaks patterns like `a?~t:t`. This rejects when we have a signed 
operand for
both patterns.

Note for GCC 15, I am going to look at the canonicalization of `a?~t:t` 
where t
was a constant since I think keeping it a COND_EXPR might be more canonical 
and
is what VPR produces from the same IR; if anything expand should handle 
which one
is better.

Bootstrapped and tested on x86_64-linux-gnu with no regressions.

PR tree-optimization/114666

gcc/ChangeLog:

* match.pd (`!a?b:c`): Reject signed types for the condition.
(`a?~t:t`): Likewise.

gcc/testsuite/ChangeLog:

* gcc.c-torture/execute/bitfld-signed1-1.c: New test.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/match.pd   |  6 +-
 gcc/testsuite/gcc.c-torture/execute/bitfld-signed1-1.c | 13 +
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/gcc/match.pd b/gcc/match.pd
index 15a1e7350d4..d401e7503e6 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -5895,7 +5895,10 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
  /* !A ? B : C -> A ? C : B.  */
  (simplify
   (cnd (logical_inverted_value truth_valued_p@0) @1 @2)
-  (cnd @0 @2 @1)))
+  /* For CONDs, don't handle signed values here. */
+  (if (cnd == VEC_COND_EXPR
+   || TYPE_UNSIGNED (TREE_TYPE (@0)))
+   (cnd @0 @2 @1
 
 /* abs/negative simplifications moved from fold_cond_expr_with_comparison.
 
@@ -7095,6 +7098,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
  (cond @0 @1 @2)
  (with { bool wascmp; }
   (if (INTEGRAL_TYPE_P (type)
+   && TYPE_UNSIGNED (TREE_TYPE (@0))
&& bitwise_inverted_equal_p (@1, @2, wascmp)
&& (!wascmp || TYPE_PRECISION (type) == 1))
(if ((!TYPE_UNSIGNED (type) && TREE_CODE (type) == BOOLEAN_TYPE)
diff --git a/gcc/testsuite/gcc.c-torture/execute/bitfld-signed1-1.c 
b/gcc/testsuite/gcc.c-torture/execute/bitfld-signed1-1.c
new file mode 100644
index 000..b0ff120ea51
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/bitfld-signed1-1.c
@@ -0,0 +1,13 @@
+/* PR tree-optimization/114666 */
+/* We used to miscompile this to be always aborting
+   due to the use of the signed 1bit into the COND_EXPR. */
+
+struct {
+  signed a : 1;
+} b = {-1};
+char c;
+int main()
+{
+  if ((b.a ^ 1UL) < 3)
+__builtin_abort();
+}


[gcc r14-9994] Document that vector_size works with typedefs [PR92880]

2024-04-16 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:8eddd87da2dd01c841f9742f973f65ebe0a88e71

commit r14-9994-g8eddd87da2dd01c841f9742f973f65ebe0a88e71
Author: Andrew Pinski 
Date:   Mon Apr 15 17:13:36 2024 -0700

Document that vector_size works with typedefs [PR92880]

This just adds a clause to make it more obvious that the vector_size
attribute extension works with typedefs.
Note this whole section needs a rewrite to be a similar format as other
extensions. But that is for another day.

gcc/ChangeLog:

PR c/92880
* doc/extend.texi (Using Vector Instructions): Add that
the base_types could be a typedef of them.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/doc/extend.texi | 13 +++--
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 7b54a241a7b..e290265d68d 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -12901,12 +12901,13 @@ typedef int v4si __attribute__ ((vector_size (16)));
 @end smallexample
 
 @noindent
-The @code{int} type specifies the @dfn{base type}, while the attribute 
specifies
-the vector size for the variable, measured in bytes.  For example, the
-declaration above causes the compiler to set the mode for the @code{v4si}
-type to be 16 bytes wide and divided into @code{int} sized units.  For
-a 32-bit @code{int} this means a vector of 4 units of 4 bytes, and the
-corresponding mode of @code{foo} is @acronym{V4SI}.
+The @code{int} type specifies the @dfn{base type} (which can be a
+@code{typedef}), while the attribute specifies the vector size for the
+variable, measured in bytes. For example, the declaration above causes
+the compiler to set the mode for the @code{v4si} type to be 16 bytes wide
+and divided into @code{int} sized units.  For a 32-bit @code{int} this
+means a vector of 4 units of 4 bytes, and the corresponding mode of
+@code{foo} is @acronym{V4SI}.
 
 The @code{vector_size} attribute is only applicable to integral and
 floating scalars, although arrays, pointers, and function return values


[gcc r15-12] aarch64: Use vec_perm_indices::new_shrunk_vector in aarch64_evpc_reencode

2024-04-27 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:f91569e779041e2723be23d31c2a79f1861efc7f

commit r15-12-gf91569e779041e2723be23d31c2a79f1861efc7f
Author: Andrew Pinski 
Date:   Mon Feb 12 15:48:48 2024 -0800

aarch64: Use vec_perm_indices::new_shrunk_vector in aarch64_evpc_reencode

While working on PERM related stuff, I can across that aarch64_evpc_reencode
was manually figuring out if we shrink the perm indices instead of
using vec_perm_indices::new_shrunk_vector; shrunk was added after reencode
was added.

Built and tested for aarch64-linux-gnu with no regressions.

gcc/ChangeLog:

PR target/113822
* config/aarch64/aarch64.cc (aarch64_evpc_reencode): Use
vec_perm_indices::new_shrunk_vector instead of manually
going through the indices.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/config/aarch64/aarch64.cc | 24 +---
 1 file changed, 5 insertions(+), 19 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index a4b7db62546..662ff5a9b0c 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -25574,7 +25574,6 @@ static bool
 aarch64_evpc_reencode (struct expand_vec_perm_d *d)
 {
   expand_vec_perm_d newd;
-  unsigned HOST_WIDE_INT nelt;
 
   if (d->vec_flags != VEC_ADVSIMD)
 return false;
@@ -25589,24 +25588,10 @@ aarch64_evpc_reencode (struct expand_vec_perm_d *d)
   if (new_mode == word_mode)
 return false;
 
-  /* to_constant is safe since this routine is specific to Advanced SIMD
- vectors.  */
-  nelt = d->perm.length ().to_constant ();
-
-  vec_perm_builder newpermconst;
-  newpermconst.new_vector (nelt / 2, nelt / 2, 1);
+  vec_perm_indices newpermindices;
 
-  /* Convert the perm constant if we can.  Require even, odd as the pairs.  */
-  for (unsigned int i = 0; i < nelt; i += 2)
-{
-  poly_int64 elt0 = d->perm[i];
-  poly_int64 elt1 = d->perm[i + 1];
-  poly_int64 newelt;
-  if (!multiple_p (elt0, 2, &newelt) || maybe_ne (elt0 + 1, elt1))
-   return false;
-  newpermconst.quick_push (newelt.to_constant ());
-}
-  newpermconst.finalize ();
+  if (!newpermindices.new_shrunk_vector (d->perm, 2))
+return false;
 
   newd.vmode = new_mode;
   newd.vec_flags = VEC_ADVSIMD;
@@ -25618,7 +25603,8 @@ aarch64_evpc_reencode (struct expand_vec_perm_d *d)
   newd.testing_p = d->testing_p;
   newd.one_vector_p = d->one_vector_p;
 
-  newd.perm.new_vector (newpermconst, newd.one_vector_p ? 1 : 2, nelt / 2);
+  newd.perm.new_vector (newpermindices.encoding (), newd.one_vector_p ? 1 : 2,
+   newpermindices.nelts_per_input ());
   return aarch64_expand_vec_perm_const_1 (&newd);
 }


[gcc r15-72] Add verification of gimple_assign_nontemporal_move_p [PR112976]

2024-04-30 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:e3a7f359c18bf347f6ac8fcda05e9839fac5bd62

commit r15-72-ge3a7f359c18bf347f6ac8fcda05e9839fac5bd62
Author: Andrew Pinski 
Date:   Wed Apr 17 14:12:17 2024 -0700

Add verification of gimple_assign_nontemporal_move_p [PR112976]

Currently the middle-end only knows how to support temporal stores
(the undocumented storent optab) so let's verify that the only time
we set nontemporal_move on an assign is if the the lhs is not a
gimple reg.

Bootstrapped and tested on x86_64-linux-gnu no regressions.

gcc/ChangeLog:

PR middle-end/112976
* tree-cfg.cc (verify_gimple_assign): Verify that
nontmporal moves are stores.
* gimple.h (struct gimple): Note that only
nontemporal stores are supported.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/gimple.h|  3 ++-
 gcc/tree-cfg.cc | 11 +++
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/gcc/gimple.h b/gcc/gimple.h
index 8a8ca109bbf..bd315ffc2dd 100644
--- a/gcc/gimple.h
+++ b/gcc/gimple.h
@@ -236,7 +236,8 @@ struct GTY((desc ("gimple_statement_structure (&%h)"), tag 
("GSS_BASE"),
  for clearing this bit before using it.  */
   unsigned int visited : 1;
 
-  /* Nonzero if this tuple represents a non-temporal move.  */
+  /* Nonzero if this tuple represents a non-temporal move; currently
+ only stores are supported.  */
   unsigned int nontemporal_move: 1;
 
   /* Pass local flags.  These flags are free for any pass to use as
diff --git a/gcc/tree-cfg.cc b/gcc/tree-cfg.cc
index b1ba33018fd..1c5b7df8541 100644
--- a/gcc/tree-cfg.cc
+++ b/gcc/tree-cfg.cc
@@ -4837,6 +4837,17 @@ verify_gimple_assign_single (gassign *stmt)
 static bool
 verify_gimple_assign (gassign *stmt)
 {
+  if (gimple_assign_nontemporal_move_p (stmt))
+{
+  tree lhs = gimple_assign_lhs (stmt);
+  if (is_gimple_reg (lhs))
+   {
+ error ("nontemporal store's lhs cannot be a gimple register");
+ debug_generic_stmt (lhs);
+ return true;
+   }
+}
+
   switch (gimple_assign_rhs_class (stmt))
 {
 case GIMPLE_SINGLE_RHS:


[gcc r15-73] Remove support for nontemporal stores with ssa_names on lhs [PR112976]

2024-04-30 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:8614d60233a64afd7e28ae7af2ab74c4a5b06010

commit r15-73-g8614d60233a64afd7e28ae7af2ab74c4a5b06010
Author: Andrew Pinski 
Date:   Wed Apr 17 14:30:06 2024 -0700

Remove support for nontemporal stores with ssa_names on lhs [PR112976]

When cfgexpand was changed to support expanding from tuple gimple
(r0-95521-g28ed065ef9f345), the code was added to support
doing nontemporal stores with LHS of a SSA_NAME but that will
never be a nontemporal store.
This patch removes that and asserts that expanding with a LHS
of a SSA_NAME is not a nontemporal store.

Bootstrapped and tested on x86_64-linux-gnu.

gcc/ChangeLog:

PR middle-end/112976
* cfgexpand.cc (expand_gimple_stmt_1): Remove
support for expanding nontemporal "moves" with
ssa names on the LHS.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/cfgexpand.cc | 11 ---
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/gcc/cfgexpand.cc b/gcc/cfgexpand.cc
index eef565eddb5..cfc5291aa0c 100644
--- a/gcc/cfgexpand.cc
+++ b/gcc/cfgexpand.cc
@@ -4002,17 +4002,16 @@ expand_gimple_stmt_1 (gimple *stmt)
else
  {
rtx target, temp;
-   bool nontemporal = gimple_assign_nontemporal_move_p (assign_stmt);
+   gcc_assert (!gimple_assign_nontemporal_move_p (assign_stmt));
bool promoted = false;
 
target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
if (GET_CODE (target) == SUBREG && SUBREG_PROMOTED_VAR_P (target))
  promoted = true;
 
-  /* If we want to use a nontemporal store, force the value to
- register first.  If we store into a promoted register,
- don't directly expand to target.  */
-   temp = nontemporal || promoted ? NULL_RTX : target;
+  /* If we store into a promoted register, don't directly
+ expand to target.  */
+   temp = promoted ? NULL_RTX : target;
temp = expand_expr_real_gassign (assign_stmt, temp,
 GET_MODE (target), EXPAND_NORMAL);
 
@@ -4034,8 +4033,6 @@ expand_gimple_stmt_1 (gimple *stmt)
 
convert_move (SUBREG_REG (target), temp, unsignedp);
  }
-   else if (nontemporal && emit_storent_insn (target, temp))
- ;
else
  {
temp = force_operand (temp, target);


[gcc r15-75] PHI-OPT: speed up value_replacement slightly

2024-04-30 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:611815e0233302e1fa113e6f865fa450b7ae

commit r15-75-g611815e0233302e1fa113e6f865fa450b7ae
Author: Andrew Pinski 
Date:   Sat Apr 27 18:54:45 2024 -0700

PHI-OPT: speed up value_replacement slightly

This adds a few early outs to value_replacement that I noticed
while rewriting this to use match-and-simplify but could be committed
seperately.
* virtual operands won't change so return early for them
* special case `A ? B : B` as that is already just `B`

Also moves the check for NE/EQ earlier as calculating 
empty_or_with_defined_p
is an IR walk for a BB and that might be big.

Bootstrapped and tested on x86_64-linux-gnu with no regressions.

gcc/ChangeLog:

* tree-ssa-phiopt.cc (value_replacement): Move check for
NE/EQ earlier.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/tree-ssa-phiopt.cc | 22 +++---
 1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/gcc/tree-ssa-phiopt.cc b/gcc/tree-ssa-phiopt.cc
index f1e07502b02..a2bdcb5eae8 100644
--- a/gcc/tree-ssa-phiopt.cc
+++ b/gcc/tree-ssa-phiopt.cc
@@ -1131,6 +1131,21 @@ value_replacement (basic_block cond_bb, basic_block 
middle_bb,
   enum tree_code code;
   bool empty_or_with_defined_p = true;
 
+  /* Virtual operands don't need to be handled. */
+  if (virtual_operand_p (arg1))
+return 0;
+
+  /* Special case A ? B : B as this will always simplify to B. */
+  if (operand_equal_for_phi_arg_p (arg0, arg1))
+return 0;
+
+  gcond *cond = as_a  (*gsi_last_bb (cond_bb));
+  code = gimple_cond_code (cond);
+
+  /* This transformation is only valid for equality comparisons.  */
+  if (code != NE_EXPR && code != EQ_EXPR)
+return 0;
+
   /* If the type says honor signed zeros we cannot do this
  optimization.  */
   if (HONOR_SIGNED_ZEROS (arg1))
@@ -1161,13 +1176,6 @@ value_replacement (basic_block cond_bb, basic_block 
middle_bb,
empty_or_with_defined_p = false;
 }
 
-  gcond *cond = as_a  (*gsi_last_bb (cond_bb));
-  code = gimple_cond_code (cond);
-
-  /* This transformation is only valid for equality comparisons.  */
-  if (code != NE_EXPR && code != EQ_EXPR)
-return 0;
-
   /* We need to know which is the true edge and which is the false
   edge so that we know if have abs or negative abs.  */
   extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge);


[gcc r15-76] PHIOPT: Value-replacement check undef

2024-04-30 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:a30d2e6bd0b965e7687f58530a767a3c3b079158

commit r15-76-ga30d2e6bd0b965e7687f58530a767a3c3b079158
Author: Andrew Pinski 
Date:   Sun Apr 28 20:21:02 2024 -0700

PHIOPT: Value-replacement check undef

While moving value replacement part of PHIOPT over
to use match-and-simplify, I ran into the case where
we would have an undef use that was conditional become
unconditional. This prevents that. I can't remember at this
point what the testcase was though.

Bootstrapped and tested on x86_64-linux-gnu with no regressions.

gcc/ChangeLog:

* tree-ssa-phiopt.cc (value_replacement): Reject undef variables
so they don't become unconditional used.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/tree-ssa-phiopt.cc | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/gcc/tree-ssa-phiopt.cc b/gcc/tree-ssa-phiopt.cc
index a2bdcb5eae8..f166c3132cb 100644
--- a/gcc/tree-ssa-phiopt.cc
+++ b/gcc/tree-ssa-phiopt.cc
@@ -1146,6 +1146,13 @@ value_replacement (basic_block cond_bb, basic_block 
middle_bb,
   if (code != NE_EXPR && code != EQ_EXPR)
 return 0;
 
+  /* Do not make conditional undefs unconditional.  */
+  if ((TREE_CODE (arg0) == SSA_NAME
+   && ssa_name_maybe_undef_p (arg0))
+  || (TREE_CODE (arg1) == SSA_NAME
+ && ssa_name_maybe_undef_p (arg1)))
+return false;
+
   /* If the type says honor signed zeros we cannot do this
  optimization.  */
   if (HONOR_SIGNED_ZEROS (arg1))


[gcc r15-74] MATCH: change single_non_singleton_phi_for_edges for singleton phis

2024-04-30 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:9c18bdb07e299b25e7526fea16659c7ff8f0d14e

commit r15-74-g9c18bdb07e299b25e7526fea16659c7ff8f0d14e
Author: Andrew Pinski 
Date:   Sat Apr 27 18:54:44 2024 -0700

MATCH: change single_non_singleton_phi_for_edges for singleton phis

I noticed that single_non_singleton_phi_for_edges could
return a phi whos entry are all the same for the edge.
This happens only if there was a single phis in the first place.
Also gimple_seq_singleton_p walks the sequence to see if it the one
element in the sequence so there is removing that check actually
reduces the number of pointer walks needed.

Bootstrapped and tested on x86_64-linux-gnu with no regressions.

gcc/ChangeLog:

* tree-ssa-phiopt.cc (single_non_singleton_phi_for_edges):
Remove the special case of gimple_seq_singleton_p.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/tree-ssa-phiopt.cc | 8 
 1 file changed, 8 deletions(-)

diff --git a/gcc/tree-ssa-phiopt.cc b/gcc/tree-ssa-phiopt.cc
index d1746c4b468..f1e07502b02 100644
--- a/gcc/tree-ssa-phiopt.cc
+++ b/gcc/tree-ssa-phiopt.cc
@@ -62,14 +62,6 @@ single_non_singleton_phi_for_edges (gimple_seq seq, edge e0, 
edge e1)
 {
   gimple_stmt_iterator i;
   gphi *phi = NULL;
-  if (gimple_seq_singleton_p (seq))
-{
-  phi = as_a  (gsi_stmt (gsi_start (seq)));
-  /* Never return virtual phis.  */
-  if (virtual_operand_p (gimple_phi_result (phi)))
-   return NULL;
-  return phi;
-}
   for (i = gsi_start (seq); !gsi_end_p (i); gsi_next (&i))
 {
   gphi *p = as_a  (gsi_stmt (i));


[gcc r15-77] Fix the build: error message `quote`

2024-04-30 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:1ff71f71a13f5ed423389d20ed34f3217e632777

commit r15-77-g1ff71f71a13f5ed423389d20ed34f3217e632777
Author: Andrew Pinski 
Date:   Tue Apr 30 09:44:52 2024 -0700

Fix the build: error message `quote`

The problem here is the quote mark is for English's
possessiveness rather than a quote but the error message
format detection is too simple so it warns which causes
-Werror to fail.

Committed as obvious after a quick build.

gcc/ChangeLog:

* tree-cfg.cc (verify_gimple_assign): Remove quote
mark to shut up the warning.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/tree-cfg.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/tree-cfg.cc b/gcc/tree-cfg.cc
index 1c5b7df8541..b2d47b72084 100644
--- a/gcc/tree-cfg.cc
+++ b/gcc/tree-cfg.cc
@@ -4842,7 +4842,7 @@ verify_gimple_assign (gassign *stmt)
   tree lhs = gimple_assign_lhs (stmt);
   if (is_gimple_reg (lhs))
{
- error ("nontemporal store's lhs cannot be a gimple register");
+ error ("nontemporal store lhs cannot be a gimple register");
  debug_generic_stmt (lhs);
  return true;
}


[gcc r15-778] aarch64: Fold vget_high_* intrinsics to BIT_FIELD_REF [PR102171]

2024-05-22 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:1d1ef1c22752b3e250ee769ae6d79f537471a57f

commit r15-778-g1d1ef1c22752b3e250ee769ae6d79f537471a57f
Author: Pengxuan Zheng 
Date:   Tue May 21 10:55:06 2024 -0700

aarch64: Fold vget_high_* intrinsics to BIT_FIELD_REF [PR102171]

This patch is a follow-up of r15-697-ga2e4fe5a53cf75 to also fold 
vget_high_*
intrinsics to BIT_FILED_REF and remove the vget_high_* definitions from
arm_neon.h to use the new intrinsics framework.

PR target/102171

gcc/ChangeLog:

* config/aarch64/aarch64-builtins.cc 
(AARCH64_SIMD_VGET_HIGH_BUILTINS):
New macro to create definitions for all vget_high intrinsics.
(VGET_HIGH_BUILTIN): Likewise.
(enum aarch64_builtins): Add vget_high function codes.
(AARCH64_SIMD_VGET_LOW_BUILTINS): Delete duplicate macro.
(aarch64_general_fold_builtin): Fold vget_high calls.
* config/aarch64/aarch64-simd-builtins.def: Delete vget_high 
builtins.
* config/aarch64/aarch64-simd.md (aarch64_get_high): Delete.
(aarch64_vget_hi_halfv8bf): Likewise.
* config/aarch64/arm_neon.h (__attribute__): Delete.
(vget_high_f16): Likewise.
(vget_high_f32): Likewise.
(vget_high_f64): Likewise.
(vget_high_p8): Likewise.
(vget_high_p16): Likewise.
(vget_high_p64): Likewise.
(vget_high_s8): Likewise.
(vget_high_s16): Likewise.
(vget_high_s32): Likewise.
(vget_high_s64): Likewise.
(vget_high_u8): Likewise.
(vget_high_u16): Likewise.
(vget_high_u32): Likewise.
(vget_high_u64): Likewise.
(vget_high_bf16): Likewise.

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/vget_high_2.c: New test.
* gcc.target/aarch64/vget_high_2_be.c: New test.

Signed-off-by: Pengxuan Zheng 

Diff:
---
 gcc/config/aarch64/aarch64-builtins.cc|  59 
 gcc/config/aarch64/aarch64-simd-builtins.def  |   6 --
 gcc/config/aarch64/aarch64-simd.md|  22 -
 gcc/config/aarch64/arm_neon.h | 105 --
 gcc/testsuite/gcc.target/aarch64/vget_high_2.c|  30 +++
 gcc/testsuite/gcc.target/aarch64/vget_high_2_be.c |  31 +++
 6 files changed, 104 insertions(+), 149 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-builtins.cc 
b/gcc/config/aarch64/aarch64-builtins.cc
index 11b888016ed..f8eeccb554d 100644
--- a/gcc/config/aarch64/aarch64-builtins.cc
+++ b/gcc/config/aarch64/aarch64-builtins.cc
@@ -675,6 +675,23 @@ static aarch64_simd_builtin_datum 
aarch64_simd_builtin_data[] = {
   VGET_LOW_BUILTIN(u64) \
   VGET_LOW_BUILTIN(bf16)
 
+#define AARCH64_SIMD_VGET_HIGH_BUILTINS \
+  VGET_HIGH_BUILTIN(f16) \
+  VGET_HIGH_BUILTIN(f32) \
+  VGET_HIGH_BUILTIN(f64) \
+  VGET_HIGH_BUILTIN(p8) \
+  VGET_HIGH_BUILTIN(p16) \
+  VGET_HIGH_BUILTIN(p64) \
+  VGET_HIGH_BUILTIN(s8) \
+  VGET_HIGH_BUILTIN(s16) \
+  VGET_HIGH_BUILTIN(s32) \
+  VGET_HIGH_BUILTIN(s64) \
+  VGET_HIGH_BUILTIN(u8) \
+  VGET_HIGH_BUILTIN(u16) \
+  VGET_HIGH_BUILTIN(u32) \
+  VGET_HIGH_BUILTIN(u64) \
+  VGET_HIGH_BUILTIN(bf16)
+
 typedef struct
 {
   const char *name;
@@ -717,6 +734,9 @@ typedef struct
 #define VGET_LOW_BUILTIN(A) \
   AARCH64_SIMD_BUILTIN_VGET_LOW_##A,
 
+#define VGET_HIGH_BUILTIN(A) \
+  AARCH64_SIMD_BUILTIN_VGET_HIGH_##A,
+
 #undef VAR1
 #define VAR1(T, N, MAP, FLAG, A) \
   AARCH64_SIMD_BUILTIN_##T##_##N##A,
@@ -753,6 +773,7 @@ enum aarch64_builtins
   /* SIMD intrinsic builtins.  */
   AARCH64_SIMD_VREINTERPRET_BUILTINS
   AARCH64_SIMD_VGET_LOW_BUILTINS
+  AARCH64_SIMD_VGET_HIGH_BUILTINS
   /* ARMv8.3-A Pointer Authentication Builtins.  */
   AARCH64_PAUTH_BUILTIN_AUTIA1716,
   AARCH64_PAUTH_BUILTIN_PACIA1716,
@@ -855,26 +876,21 @@ static aarch64_fcmla_laneq_builtin_datum 
aarch64_fcmla_lane_builtin_data[] = {
false \
   },
 
-#define AARCH64_SIMD_VGET_LOW_BUILTINS \
-  VGET_LOW_BUILTIN(f16) \
-  VGET_LOW_BUILTIN(f32) \
-  VGET_LOW_BUILTIN(f64) \
-  VGET_LOW_BUILTIN(p8) \
-  VGET_LOW_BUILTIN(p16) \
-  VGET_LOW_BUILTIN(p64) \
-  VGET_LOW_BUILTIN(s8) \
-  VGET_LOW_BUILTIN(s16) \
-  VGET_LOW_BUILTIN(s32) \
-  VGET_LOW_BUILTIN(s64) \
-  VGET_LOW_BUILTIN(u8) \
-  VGET_LOW_BUILTIN(u16) \
-  VGET_LOW_BUILTIN(u32) \
-  VGET_LOW_BUILTIN(u64) \
-  VGET_LOW_BUILTIN(bf16)
+#undef VGET_HIGH_BUILTIN
+#define VGET_HIGH_BUILTIN(A) \
+  {"vget_high_" #A, \
+   AARCH64_SIMD_BUILTIN_VGET_HIGH_##A, \
+   2, \
+   { SIMD_INTR_MODE(A, d), SIMD_INTR_MODE(A, q) }, \
+   { SIMD_INTR_QUAL(A), SIMD_INTR_QUAL(A) }, \
+   FLAG_AUTO_FP, \
+   false \
+  },
 
 static const aarch64_simd_intrinsic_datum aarch64_simd_intrinsic_data[] = {
   AARCH64_SIMD_VREINTERPRET_BUILTINS
   AARCH64_SIMD_VGET_LOW_BUILTINS
+  AARCH64_SIMD_VGET_HIGH_BUILTINS
 };
 
 
@@ -3270,6 +3286,10 @@ aarch64_fold_builtin_lan

[gcc r15-784] AARCH64: Add Qualcomnm oryon-1 core

2024-05-22 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:01cfd6018250141a262219c5803c3f2a278d909d

commit r15-784-g01cfd6018250141a262219c5803c3f2a278d909d
Author: Andrew Pinski 
Date:   Fri Apr 5 13:40:35 2024 -0700

AARCH64: Add Qualcomnm oryon-1 core

This patch adds Qualcomm's new oryon-1 core; this is enough
to recongize the core and later on will add the tuning structure.

gcc/ChangeLog:

* config/aarch64/aarch64-cores.def (oryon-1): New entry.
* config/aarch64/aarch64-tune.md: Regenerate.
* doc/invoke.texi  (AArch64 Options): Document oryon-1.

Signed-off-by: Andrew Pinski 
Co-authored-by: Joel Jones 
Co-authored-by: Wei Zhao 

Diff:
---
 gcc/config/aarch64/aarch64-cores.def | 5 +
 gcc/config/aarch64/aarch64-tune.md   | 2 +-
 gcc/doc/invoke.texi  | 1 +
 3 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/gcc/config/aarch64/aarch64-cores.def 
b/gcc/config/aarch64/aarch64-cores.def
index f69fc212d56..be60929e400 100644
--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -151,6 +151,11 @@ AARCH64_CORE("neoverse-512tvb", neoverse512tvb, cortexa57, 
V8_4A,  (SVE, I8MM, B
 /* Qualcomm ('Q') cores. */
 AARCH64_CORE("saphira", saphira,saphira,V8_4A,  (CRYPTO), saphira, 
  0x51, 0xC01, -1)
 
+/* ARMv8.6-A Architecture Processors.  */
+
+/* Qualcomm ('Q') cores. */
+AARCH64_CORE("oryon-1", oryon1, cortexa57, V8_6A, (CRYPTO, SM4, SHA3, F16), 
cortexa72,   0x51, 0x001, -1)
+
 /* ARMv8-A big.LITTLE implementations.  */
 
 AARCH64_CORE("cortex-a57.cortex-a53",  cortexa57cortexa53, cortexa53, V8A,  
(CRC), cortexa57, 0x41, AARCH64_BIG_LITTLE (0xd07, 0xd03), -1)
diff --git a/gcc/config/aarch64/aarch64-tune.md 
b/gcc/config/aarch64/aarch64-tune.md
index abd3c9e0822..ba940f1c890 100644
--- a/gcc/config/aarch64/aarch64-tune.md
+++ b/gcc/config/aarch64/aarch64-tune.md
@@ -1,5 +1,5 @@
 ;; -*- buffer-read-only: t -*-
 ;; Generated automatically by gentune.sh from aarch64-cores.def
 (define_attr "tune"
-   
"cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,ampere1a,ampere1b,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,cortexx1c,neoversen1,ares,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,thunderx3t110,neoversev1,zeus,neoverse512tvb,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82,cortexa510,cortexa520,cortexa710,cortexa715,cortexa720,cortexx2,cortexx3,cortexx4,neoversen2,cobalt100,neoversev2,demeter,generic,generic_armv8_a,generic_armv9_a"
+   
"cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,ampere1a,ampere1b,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,cortexx1c,neoversen1,ares,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,thunderx3t110,neoversev1,zeus,neoverse512tvb,saphira,oryon1,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82,cortexa510,cortexa520,cortexa710,cortexa715,cortexa720,cortexx2,cortexx3,cortexx4,neoversen2,cobalt100,neoversev2,demeter,generic,generic_armv8_a,generic_armv9_a"
(const (symbol_ref "((enum attr_tune) aarch64_tune)")))
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 0625a5ede6f..c9d8f6b37b6 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -21382,6 +21382,7 @@ performance of the code.  Permissible values for this 
option are:
 @samp{cortex-a65}, @samp{cortex-a65ae}, @samp{cortex-a34},
 @samp{cortex-a78}, @samp{cortex-a78ae}, @samp{cortex-a78c},
 @samp{ares}, @samp{exynos-m1}, @samp{emag}, @samp{falkor},
+@samp{oryon-1},
 @samp{neoverse-512tvb}, @samp{neoverse-e1}, @samp{neoverse-n1},
 @samp{neoverse-n2}, @samp{neoverse-v1}, @samp{neoverse-v2}, @samp{qdf24xx},
 @samp{saphira}, @samp{phecda}, @samp{xgene1}, @samp{vulcan},


[gcc r15-813] Use simple_dce_from_worklist in phiprop

2024-05-23 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:3e06763a695d97aa46c9de71573ec6a43bb92449

commit r15-813-g3e06763a695d97aa46c9de71573ec6a43bb92449
Author: Andrew Pinski 
Date:   Thu May 23 09:56:37 2024 -0700

Use simple_dce_from_worklist in phiprop

I noticed that phiprop leaves around phi nodes which
defines a ssa name which is unused. This just adds a
bitmap to mark those ssa names and then calls
simple_dce_from_worklist at the very end to remove
those phi nodes and all of the dependencies if there
was any. This might allow us to optimize something earlier
due to the removal of the phi which was taking the address
of the variables.

Bootstrapped and tested on x86_64-linux-gnu with no regressions.

gcc/ChangeLog:

* tree-ssa-phiprop.cc (phiprop_insert_phi): Add
dce_ssa_names argument. Add the phi's result to it.
(propagate_with_phi): Add dce_ssa_names argument.
Update call to phiprop_insert_phi.
(pass_phiprop::execute): Update call to propagate_with_phi.
Call simple_dce_from_worklist if there was a change.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/tree-ssa-phiprop.cc | 28 ++--
 1 file changed, 18 insertions(+), 10 deletions(-)

diff --git a/gcc/tree-ssa-phiprop.cc b/gcc/tree-ssa-phiprop.cc
index 041521ef106..2a1cdae46d2 100644
--- a/gcc/tree-ssa-phiprop.cc
+++ b/gcc/tree-ssa-phiprop.cc
@@ -34,6 +34,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "stor-layout.h"
 #include "tree-ssa-loop.h"
 #include "tree-cfg.h"
+#include "tree-ssa-dce.h"
 
 /* This pass propagates indirect loads through the PHI node for its
address to make the load source possibly non-addressable and to
@@ -132,12 +133,15 @@ phivn_valid_p (struct phiprop_d *phivn, tree name, 
basic_block bb)
 
 static tree
 phiprop_insert_phi (basic_block bb, gphi *phi, gimple *use_stmt,
-   struct phiprop_d *phivn, size_t n)
+   struct phiprop_d *phivn, size_t n,
+   bitmap dce_ssa_names)
 {
   tree res;
   gphi *new_phi = NULL;
   edge_iterator ei;
   edge e;
+  tree phi_result = PHI_RESULT (phi);
+  bitmap_set_bit (dce_ssa_names, SSA_NAME_VERSION (phi_result));
 
   gcc_assert (is_gimple_assign (use_stmt)
  && gimple_assign_rhs_code (use_stmt) == MEM_REF);
@@ -276,7 +280,7 @@ chk_uses (tree, tree *idx, void *data)
 
 static bool
 propagate_with_phi (basic_block bb, gphi *phi, struct phiprop_d *phivn,
-   size_t n)
+   size_t n, bitmap dce_ssa_names)
 {
   tree ptr = PHI_RESULT (phi);
   gimple *use_stmt;
@@ -420,9 +424,10 @@ propagate_with_phi (basic_block bb, gphi *phi, struct 
phiprop_d *phivn,
goto next;
}
 
- phiprop_insert_phi (bb, phi, use_stmt, phivn, n);
+ phiprop_insert_phi (bb, phi, use_stmt, phivn, n, dce_ssa_names);
 
- /* Remove old stmt.  The phi is taken care of by DCE.  */
+ /* Remove old stmt. The phi and all of maybe its depedencies
+will be removed later via simple_dce_from_worklist. */
  gsi = gsi_for_stmt (use_stmt);
  /* Unlinking the VDEF here is fine as we are sure that we process
 stmts in execution order due to aggregate copies having VDEFs
@@ -442,16 +447,15 @@ propagate_with_phi (basic_block bb, gphi *phi, struct 
phiprop_d *phivn,
 is the first load transformation.  */
   else if (!phi_inserted)
{
- res = phiprop_insert_phi (bb, phi, use_stmt, phivn, n);
+ res = phiprop_insert_phi (bb, phi, use_stmt, phivn, n, dce_ssa_names);
  type = TREE_TYPE (res);
 
  /* Remember the value we created for *ptr.  */
  phivn[SSA_NAME_VERSION (ptr)].value = res;
  phivn[SSA_NAME_VERSION (ptr)].vuse = vuse;
 
- /* Remove old stmt.  The phi is taken care of by DCE, if we
-want to delete it here we also have to delete all intermediate
-copies.  */
+ /* Remove old stmt.  The phi and all of maybe its depedencies
+will be removed later via simple_dce_from_worklist. */
  gsi = gsi_for_stmt (use_stmt);
  gsi_remove (&gsi, true);
 
@@ -514,6 +518,7 @@ pass_phiprop::execute (function *fun)
   gphi_iterator gsi;
   unsigned i;
   size_t n;
+  auto_bitmap dce_ssa_names;
 
   calculate_dominance_info (CDI_DOMINATORS);
 
@@ -531,11 +536,14 @@ pass_phiprop::execute (function *fun)
   if (bb_has_abnormal_pred (bb))
continue;
   for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
-   did_something |= propagate_with_phi (bb, gsi.phi (), phivn, n);
+   did_something |= propagate_with_phi (bb, gsi.phi (), phivn, n, 
dce_ssa_names);
 }
 
   if (did_something)
-gsi_commit_edge_inserts ();
+{
+  gsi_commit_edge_inserts ();
+  simple_dce_from_worklist (dce_ssa_names);
+}
 
   free (phivn);


[gcc r15-853] match: Use uniform_integer_cst_p in bitwise_inverted_equal_p [PR115238]

2024-05-27 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:c5a7628470a7fb801ebeea82e16a4549db43bfa5

commit r15-853-gc5a7628470a7fb801ebeea82e16a4549db43bfa5
Author: Andrew Pinski 
Date:   Sun May 26 17:59:21 2024 -0700

match: Use uniform_integer_cst_p in bitwise_inverted_equal_p [PR115238]

I noticed while working on the `a ^ CST` patch, that 
bitwise_inverted_equal_p
would check INTEGER_CST directly and not handle vector csts that are 
uniform.
This moves over to using uniform_integer_cst_p instead of checking 
INTEGER_CST
directly.

Bootstrapped and tested on x86_64-linux-gnu with no regressions.

PR tree-optimization/115238

gcc/ChangeLog:

* generic-match-head.cc (bitwise_inverted_equal_p): Use
uniform_integer_cst_p instead of checking INTEGER_CST.
* gimple-match-head.cc (gimple_bitwise_inverted_equal_p): Likewise.

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/bitops-9.c: New test.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/generic-match-head.cc|  6 --
 gcc/gimple-match-head.cc |  6 --
 gcc/testsuite/gcc.dg/tree-ssa/bitops-9.c | 15 +++
 3 files changed, 23 insertions(+), 4 deletions(-)

diff --git a/gcc/generic-match-head.cc b/gcc/generic-match-head.cc
index e2e1e4b2d64..55ba369c6b3 100644
--- a/gcc/generic-match-head.cc
+++ b/gcc/generic-match-head.cc
@@ -146,8 +146,10 @@ bitwise_inverted_equal_p (tree expr1, tree expr2, bool 
&wascmp)
 return false;
   if (!tree_nop_conversion_p (TREE_TYPE (expr1), TREE_TYPE (expr2)))
 return false;
-  if (TREE_CODE (expr1) == INTEGER_CST && TREE_CODE (expr2) == INTEGER_CST)
-return wi::to_wide (expr1) == ~wi::to_wide (expr2);
+  tree cst1 = uniform_integer_cst_p (expr1);
+  tree cst2 = uniform_integer_cst_p (expr2);
+  if (cst1 && cst2)
+return wi::to_wide (cst1) == ~wi::to_wide (cst2);
   if (operand_equal_p (expr1, expr2, 0))
 return false;
   if (TREE_CODE (expr1) == BIT_NOT_EXPR
diff --git a/gcc/gimple-match-head.cc b/gcc/gimple-match-head.cc
index 49b1dde6ae4..6220725b259 100644
--- a/gcc/gimple-match-head.cc
+++ b/gcc/gimple-match-head.cc
@@ -294,8 +294,10 @@ gimple_bitwise_inverted_equal_p (tree expr1, tree expr2, 
bool &wascmp, tree (*va
 return false;
   if (!tree_nop_conversion_p (TREE_TYPE (expr1), TREE_TYPE (expr2)))
 return false;
-  if (TREE_CODE (expr1) == INTEGER_CST && TREE_CODE (expr2) == INTEGER_CST)
-return wi::to_wide (expr1) == ~wi::to_wide (expr2);
+  tree cst1 = uniform_integer_cst_p (expr1);
+  tree cst2 = uniform_integer_cst_p (expr2);
+  if (cst1 && cst2)
+return wi::to_wide (cst1) == ~wi::to_wide (cst2);
   if (operand_equal_p (expr1, expr2, 0))
 return false;
 
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/bitops-9.c 
b/gcc/testsuite/gcc.dg/tree-ssa/bitops-9.c
new file mode 100644
index 000..a18b6bf3214
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/bitops-9.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized-raw" } */
+/* PR tree-optimization/115238 */
+
+
+#define vector8 __attribute__((vector_size(2*sizeof(int
+
+void f(int a, vector8 int *b)
+{
+a = 1;
+*b = a | ((~a) ^ *b);
+}
+/* { dg-final { scan-tree-dump-not "bit_xor_expr, " "optimized" } } */
+/* { dg-final { scan-tree-dump-times   "bit_ior_expr, "  1  "optimized" } } */
+/* { dg-final { scan-tree-dump-times   "bit_not_expr, "  1  "optimized" } } */


[gcc r15-855] Fix bitops-9.c for -m32 and other targets that don't have vector modes

2024-05-27 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:4fcdc37e8856bde847d3b8dd2915b68d56ad1d62

commit r15-855-g4fcdc37e8856bde847d3b8dd2915b68d56ad1d62
Author: Andrew Pinski 
Date:   Mon May 27 17:24:11 2024 -0700

Fix bitops-9.c for -m32 and other targets that don't have vector modes

This just moves the tree scan earlier so we can detect the optimization and 
not
need to detect the vector splitting too.

Committed as obvious after a quick test.

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/bitops-9.c: Look at cdcde1 rather than 
optmization.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/testsuite/gcc.dg/tree-ssa/bitops-9.c | 10 ++
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/bitops-9.c 
b/gcc/testsuite/gcc.dg/tree-ssa/bitops-9.c
index a18b6bf3214..bcf079ab59d 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/bitops-9.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/bitops-9.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-optimized-raw" } */
+/* { dg-options "-O2 -fdump-tree-cddce1-raw" } */
 /* PR tree-optimization/115238 */
 
 
@@ -10,6 +10,8 @@ void f(int a, vector8 int *b)
 a = 1;
 *b = a | ((~a) ^ *b);
 }
-/* { dg-final { scan-tree-dump-not "bit_xor_expr, " "optimized" } } */
-/* { dg-final { scan-tree-dump-times   "bit_ior_expr, "  1  "optimized" } } */
-/* { dg-final { scan-tree-dump-times   "bit_not_expr, "  1  "optimized" } } */
+/* Scan early on in the phases before the vector has possibily been split
+   but late enough after forwprop or other match-simplify has happened though. 
*/
+/* { dg-final { scan-tree-dump-not "bit_xor_expr, " "cddce1" } } */
+/* { dg-final { scan-tree-dump-times   "bit_ior_expr, "  1  "cddce1" } } */
+/* { dg-final { scan-tree-dump-times   "bit_not_expr, "  1  "cddce1" } } */


[gcc r15-907] Match: Add maybe_bit_not instead of plain matching

2024-05-29 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:0a9154d154957b21eb2c9e4fbe9869e50fb9742f

commit r15-907-g0a9154d154957b21eb2c9e4fbe9869e50fb9742f
Author: Andrew Pinski 
Date:   Sat May 25 23:29:48 2024 -0700

Match: Add maybe_bit_not instead of plain matching

While working on adding matching of negative expressions of `a - b`,
I noticed that we started to have "duplicated" patterns due to not having
a way to match maybe negative expressions. So I went back to what I did for
bit_not and decided to improve the situtation there so for some patterns
where we had 2 operands of an expression where one could have been a 
bit_not,
add back maybe_bit_not.
This does not add maybe_bit_not in every place were bitwise_inverted_equal_p
is used, just the ones were 2 operands of an expression could be swapped.

Bootstrapped and tested on x86_64-linux-gnu with no regressions.

gcc/ChangeLog:

* match.pd (bit_not_with_nop): Unconditionalize.
(maybe_cmp): Likewise.
(maybe_bit_not): New match pattern.
(`~X & X`): Use maybe_bit_not and add `:c` back.
(`~x ^ x`/`~x | x`): Likewise.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/match.pd | 14 ++
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/gcc/match.pd b/gcc/match.pd
index 024e3350465..090ad4e08b0 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -167,7 +167,6 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   TYPE_VECTOR_SUBPARTS (TREE_TYPE (@0)))
   && tree_nop_conversion_p (TREE_TYPE (type), TREE_TYPE (TREE_TYPE 
(@0))
 
-#if GIMPLE
 /* These are used by gimple_bitwise_inverted_equal_p to simplify
detection of BIT_NOT and comparisons. */
 (match (bit_not_with_nop @0)
@@ -188,7 +187,14 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
  (bit_xor@0 @1 @2)
  (if (INTEGRAL_TYPE_P (type)
   && TYPE_PRECISION (type) == 1)))
-#endif
+/* maybe_bit_not is used to match what
+   is acceptable for bitwise_inverted_equal_p. */
+(match (maybe_bit_not @0)
+ (bit_not_with_nop@0 @1))
+(match (maybe_bit_not @0)
+ (INTEGER_CST@0))
+(match (maybe_bit_not @0)
+ (maybe_cmp@0 @1))
 
 /* Transform likes of (char) ABS_EXPR <(int) x> into (char) ABSU_EXPR 
ABSU_EXPR returns unsigned absolute value of the operand and the operand
@@ -1332,7 +1338,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 
 /* Simplify ~X & X as zero.  */
 (simplify
- (bit_and (convert? @0) (convert? @1))
+ (bit_and:c (convert? @0) (convert? (maybe_bit_not @1)))
  (with { bool wascmp; }
   (if (types_match (TREE_TYPE (@0), TREE_TYPE (@1))
&& bitwise_inverted_equal_p (@0, @1, wascmp))
@@ -1597,7 +1603,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 /* ~x ^ x -> -1 */
 (for op (bit_ior bit_xor)
  (simplify
-  (op (convert? @0) (convert? @1))
+  (op:c (convert? @0) (convert? (maybe_bit_not @1)))
   (with { bool wascmp; }
(if (types_match (TREE_TYPE (@0), TREE_TYPE (@1))
 && bitwise_inverted_equal_p (@0, @1, wascmp))


[gcc r15-908] match: Add support for `a ^ CST` to bitwise_inverted_equal_p [PR115224]

2024-05-29 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:547143df5aa0960fb149a26933dad7ca1c363afb

commit r15-908-g547143df5aa0960fb149a26933dad7ca1c363afb
Author: Andrew Pinski 
Date:   Sun May 26 17:38:37 2024 -0700

match: Add support for `a ^ CST` to bitwise_inverted_equal_p [PR115224]

While looking into something else, I noticed that `a ^ CST` needed to be
special casing to bitwise_inverted_equal_p as it would simplify to `a ^ 
~CST`
for the bitwise not.

Bootstrapped and tested on x86_64-linux-gnu with no regressions.

PR tree-optimization/115224

gcc/ChangeLog:

* generic-match-head.cc (bitwise_inverted_equal_p): Add `a ^ CST`
case.
* gimple-match-head.cc (gimple_bit_xor_cst): New declaration.
(gimple_bitwise_inverted_equal_p): Add `a ^ CST` case.
* match.pd (bit_xor_cst): New match.
(maybe_bit_not): Add bit_xor_cst case.

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/bitops-8.c: New test.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/generic-match-head.cc| 10 ++
 gcc/gimple-match-head.cc | 13 +
 gcc/match.pd |  4 
 gcc/testsuite/gcc.dg/tree-ssa/bitops-8.c | 15 +++
 4 files changed, 42 insertions(+)

diff --git a/gcc/generic-match-head.cc b/gcc/generic-match-head.cc
index 55ba369c6b3..641d8e9b2de 100644
--- a/gcc/generic-match-head.cc
+++ b/gcc/generic-match-head.cc
@@ -158,6 +158,16 @@ bitwise_inverted_equal_p (tree expr1, tree expr2, bool 
&wascmp)
   if (TREE_CODE (expr2) == BIT_NOT_EXPR
   && bitwise_equal_p (expr1, TREE_OPERAND (expr2, 0)))
 return true;
+
+  /* `X ^ CST` and `X ^ ~CST` match for ~. */
+  if (TREE_CODE (expr1) == BIT_XOR_EXPR && TREE_CODE (expr2) == BIT_XOR_EXPR
+  && bitwise_equal_p (TREE_OPERAND (expr1, 0), TREE_OPERAND (expr2, 0)))
+{
+  tree cst1 = uniform_integer_cst_p (TREE_OPERAND (expr1, 1));
+  tree cst2 = uniform_integer_cst_p (TREE_OPERAND (expr2, 1));
+  if (cst1 && cst2 && wi::to_wide (cst1) == ~wi::to_wide (cst2))
+   return true;
+}
   if (COMPARISON_CLASS_P (expr1)
   && COMPARISON_CLASS_P (expr2))
 {
diff --git a/gcc/gimple-match-head.cc b/gcc/gimple-match-head.cc
index 6220725b259..e26fa0860ee 100644
--- a/gcc/gimple-match-head.cc
+++ b/gcc/gimple-match-head.cc
@@ -283,6 +283,7 @@ gimple_bitwise_equal_p (tree expr1, tree expr2, tree 
(*valueize) (tree))
 
 bool gimple_bit_not_with_nop (tree, tree *, tree (*) (tree));
 bool gimple_maybe_cmp (tree, tree *, tree (*) (tree));
+bool gimple_bit_xor_cst (tree, tree *, tree (*) (tree));
 
 /* Helper function for bitwise_inverted_equal_p macro.  */
 
@@ -301,6 +302,18 @@ gimple_bitwise_inverted_equal_p (tree expr1, tree expr2, 
bool &wascmp, tree (*va
   if (operand_equal_p (expr1, expr2, 0))
 return false;
 
+  tree xor1[2];
+  tree xor2[2];
+  /* `X ^ CST` and `X ^ ~CST` match for ~. */
+  if (gimple_bit_xor_cst (expr1, xor1, valueize)
+  && gimple_bit_xor_cst (expr2, xor2, valueize))
+{
+  if (operand_equal_p (xor1[0], xor2[0], 0)
+ && (wi::to_wide (uniform_integer_cst_p (xor1[1]))
+ == ~wi::to_wide (uniform_integer_cst_p (xor2[1]
+   return true;
+}
+
   tree other;
   /* Try if EXPR1 was defined as ~EXPR2. */
   if (gimple_bit_not_with_nop (expr1, &other, valueize))
diff --git a/gcc/match.pd b/gcc/match.pd
index 090ad4e08b0..480e36bbbaf 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -174,6 +174,8 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 (match (bit_not_with_nop @0)
  (convert (bit_not @0))
  (if (tree_nop_conversion_p (type, TREE_TYPE (@0)
+(match (bit_xor_cst @0 @1)
+ (bit_xor @0 uniform_integer_cst_p@1))
 (for cmp (tcc_comparison)
  (match (maybe_cmp @0)
   (cmp@0 @1 @2))
@@ -195,6 +197,8 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
  (INTEGER_CST@0))
 (match (maybe_bit_not @0)
  (maybe_cmp@0 @1))
+(match (maybe_bit_not @0)
+ (bit_xor_cst@0 @1 @2))
 
 /* Transform likes of (char) ABS_EXPR <(int) x> into (char) ABSU_EXPR 
ABSU_EXPR returns unsigned absolute value of the operand and the operand
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/bitops-8.c 
b/gcc/testsuite/gcc.dg/tree-ssa/bitops-8.c
new file mode 100644
index 000..40f756e4455
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/bitops-8.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized-raw" } */
+/* PR tree-optimization/115224 */
+
+int f1(int a, int b)
+{
+a = a ^ 1;
+int c = ~a;
+return c | (a ^ b);
+// ~((a ^ 1) & b) or (a ^ -2) | ~b
+}
+/* { dg-final { scan-tree-dump-times   "bit_xor_expr, "  1  "optimized" } } */
+/* { dg-final { scan-tree-dump-times   "bit_ior_expr, "  1  "optimized" } } */
+/* { dg-final { scan-tree-dump-times   "bit_not_expr, "  1  "optimized" } } */
+


[gcc r15-938] Fix some opindex for some options [PR115022]

2024-05-31 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:a0d60660f2aae2d79685f73d568facb2397582d8

commit r15-938-ga0d60660f2aae2d79685f73d568facb2397582d8
Author: Andrew Pinski 
Date:   Wed May 29 20:40:31 2024 -0700

Fix some opindex for some options [PR115022]

While looking at the index I noticed that some options had
`-` in the front for the index which is wrong. And then
I noticed there was no index for `mcmodel=` for targets or had
used `-mcmodel` incorrectly.

This fixes both of those and regnerates the urls files see that
`-mcmodel=` option now has an url associated with it.

gcc/ChangeLog:

PR target/115022
* doc/invoke.texi (fstrub=disable): Fix opindex.
(minline-memops-threshold): Fix opindex.
(mcmodel=): Add opindex and fix them.
* common.opt.urls: Regenerate.
* config/aarch64/aarch64.opt.urls: Regenerate.
* config/bpf/bpf.opt.urls: Regenerate.
* config/i386/i386.opt.urls: Regenerate.
* config/loongarch/loongarch.opt.urls: Regenerate.
* config/nds32/nds32-elf.opt.urls: Regenerate.
* config/nds32/nds32-linux.opt.urls: Regenerate.
* config/or1k/or1k.opt.urls: Regenerate.
* config/riscv/riscv.opt.urls: Regenerate.
* config/rs6000/aix64.opt.urls: Regenerate.
* config/rs6000/linux64.opt.urls: Regenerate.
* config/sparc/sparc.opt.urls: Regenerate.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/common.opt.urls |  3 +++
 gcc/config/aarch64/aarch64.opt.urls |  3 ++-
 gcc/config/bpf/bpf.opt.urls |  3 +++
 gcc/config/i386/i386.opt.urls   |  3 ++-
 gcc/config/loongarch/loongarch.opt.urls |  2 +-
 gcc/config/nds32/nds32-elf.opt.urls |  2 +-
 gcc/config/nds32/nds32-linux.opt.urls   |  2 +-
 gcc/config/or1k/or1k.opt.urls   |  3 ++-
 gcc/config/riscv/riscv.opt.urls |  3 ++-
 gcc/config/rs6000/aix64.opt.urls|  3 ++-
 gcc/config/rs6000/linux64.opt.urls  |  3 ++-
 gcc/config/sparc/sparc.opt.urls |  2 +-
 gcc/doc/invoke.texi | 17 +++--
 13 files changed, 33 insertions(+), 16 deletions(-)

diff --git a/gcc/common.opt.urls b/gcc/common.opt.urls
index 10462e40874..1f2eb67c8e0 100644
--- a/gcc/common.opt.urls
+++ b/gcc/common.opt.urls
@@ -1339,6 +1339,9 @@ 
UrlSuffix(gcc/Optimize-Options.html#index-fstrict-aliasing)
 fstrict-overflow
 UrlSuffix(gcc/Code-Gen-Options.html#index-fstrict-overflow)
 
+fstrub=disable
+UrlSuffix(gcc/Instrumentation-Options.html#index-fstrub_003ddisable)
+
 fstrub=strict
 UrlSuffix(gcc/Instrumentation-Options.html#index-fstrub_003dstrict)
 
diff --git a/gcc/config/aarch64/aarch64.opt.urls 
b/gcc/config/aarch64/aarch64.opt.urls
index 993634c52f8..4fa90384378 100644
--- a/gcc/config/aarch64/aarch64.opt.urls
+++ b/gcc/config/aarch64/aarch64.opt.urls
@@ -18,7 +18,8 @@ 
UrlSuffix(gcc/AArch64-Options.html#index-mfix-cortex-a53-843419)
 mlittle-endian
 UrlSuffix(gcc/AArch64-Options.html#index-mlittle-endian)
 
-; skipping UrlSuffix for 'mcmodel=' due to finding no URLs
+mcmodel=
+UrlSuffix(gcc/AArch64-Options.html#index-mcmodel_003d)
 
 mtp=
 UrlSuffix(gcc/AArch64-Options.html#index-mtp)
diff --git a/gcc/config/bpf/bpf.opt.urls b/gcc/config/bpf/bpf.opt.urls
index 8c1e5f86d5c..1e8873a899f 100644
--- a/gcc/config/bpf/bpf.opt.urls
+++ b/gcc/config/bpf/bpf.opt.urls
@@ -33,3 +33,6 @@ UrlSuffix(gcc/eBPF-Options.html#index-msmov)
 mcpu=
 UrlSuffix(gcc/eBPF-Options.html#index-mcpu-5)
 
+minline-memops-threshold=
+UrlSuffix(gcc/eBPF-Options.html#index-minline-memops-threshold)
+
diff --git a/gcc/config/i386/i386.opt.urls b/gcc/config/i386/i386.opt.urls
index 40e8a844936..9384b0b3187 100644
--- a/gcc/config/i386/i386.opt.urls
+++ b/gcc/config/i386/i386.opt.urls
@@ -40,7 +40,8 @@ UrlSuffix(gcc/x86-Options.html#index-march-16)
 mlarge-data-threshold=
 UrlSuffix(gcc/x86-Options.html#index-mlarge-data-threshold)
 
-; skipping UrlSuffix for 'mcmodel=' due to finding no URLs
+mcmodel=
+UrlSuffix(gcc/x86-Options.html#index-mcmodel_003d-7)
 
 mcpu=
 UrlSuffix(gcc/x86-Options.html#index-mcpu-14)
diff --git a/gcc/config/loongarch/loongarch.opt.urls 
b/gcc/config/loongarch/loongarch.opt.urls
index 9ed5d7b5596..f7545f65103 100644
--- a/gcc/config/loongarch/loongarch.opt.urls
+++ b/gcc/config/loongarch/loongarch.opt.urls
@@ -58,7 +58,7 @@ mrecip
 UrlSuffix(gcc/LoongArch-Options.html#index-mrecip)
 
 mcmodel=
-UrlSuffix(gcc/LoongArch-Options.html#index-mcmodel)
+UrlSuffix(gcc/LoongArch-Options.html#index-mcmodel_003d-1)
 
 mdirect-extern-access
 UrlSuffix(gcc/LoongArch-Options.html#index-mdirect-extern-access)
diff --git a/gcc/config/nds32/nds32-elf.opt.urls 
b/gcc/config/nds32/nds32-elf.opt.urls
index 3ae1efe7312..e5432b62863 100644
--- a/gcc/config/nds32/nds32-elf.opt.urls
+++ b/gcc/config/nds32/nds32-elf.opt.urls
@@ -1,5 +1,5 @@
 ; Autogenerated by regenerate-opt-urls.py from gcc/config/nds32/nds

[gcc r15-1076] Plugins: Add label-text.h to CPPLIB_H so it will be installed [PR115288]

2024-06-06 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:6e6471806d886bc052d3922d636d49aaf75d5d16

commit r15-1076-g6e6471806d886bc052d3922d636d49aaf75d5d16
Author: Andrew Pinski 
Date:   Thu May 30 07:59:00 2024 -0700

Plugins: Add label-text.h to CPPLIB_H so it will be installed [PR115288]

After r15-874-g9bda2c4c81b668, out of tree plugins won't compile
as the new libcpp header file label-text.h is not installed.

This adds the new header file to CPPLIB_H which is used for
the plugin headers to install.

Committed as obvious after a build and install and make sure
the new header file is installed.

gcc/ChangeLog:

PR plugins/115288
* Makefile.in (CPPLIB_H): Add label-text.h.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/Makefile.in | 1 +
 1 file changed, 1 insertion(+)

diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index c983b0c102a..f5adb647d3f 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -1038,6 +1038,7 @@ SYSTEM_H = system.h hwint.h 
$(srcdir)/../include/libiberty.h \
 PREDICT_H = predict.h predict.def
 CPPLIB_H = $(srcdir)/../libcpp/include/line-map.h \
$(srcdir)/../libcpp/include/rich-location.h \
+   $(srcdir)/../libcpp/include/label-text.h \
$(srcdir)/../libcpp/include/cpplib.h
 CODYLIB_H = $(srcdir)/../libcody/cody.hh
 INPUT_H = $(srcdir)/../libcpp/include/line-map.h input.h


[gcc r15-1165] Fix pr115388.c: plain char could be unsigned by default [PR115415]

2024-06-10 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:c3d1153bc0a2b820e3c373ecf19a5a127703f854

commit r15-1165-gc3d1153bc0a2b820e3c373ecf19a5a127703f854
Author: Andrew Pinski 
Date:   Mon Jun 10 08:23:00 2024 -0700

Fix pr115388.c: plain char could be unsigned by default [PR115415]

This is a simple fix to the testcase as plain `char` could be
unsigned by default on some targets (e.g. aarch64 and powerpc).

Committed as obvious after quick test of the testcase on both aarch64 and 
x86_64.

gcc/testsuite/ChangeLog:

PR testsuite/115415
PR tree-optimization/115388
* gcc.dg/torture/pr115388.c: Use `signed char` directly instead
of plain `char`.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/testsuite/gcc.dg/torture/pr115388.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.dg/torture/pr115388.c 
b/gcc/testsuite/gcc.dg/torture/pr115388.c
index c7c902888da..17b3f1bcd90 100644
--- a/gcc/testsuite/gcc.dg/torture/pr115388.c
+++ b/gcc/testsuite/gcc.dg/torture/pr115388.c
@@ -2,7 +2,7 @@
 
 int printf(const char *, ...);
 int a[10], b, c, d[0], h, i, j, k, l;
-char e = -1, g;
+signed char e = -1, g;
 volatile int f;
 static void n() {
   while (e >= 0)


[gcc r12-10546] PHIOPT: Don't transform minmax if middle bb contains a phi [PR115143]

2024-06-11 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:d30afaae6764379a63c22459b40aaecfa82b0fc4

commit r12-10546-gd30afaae6764379a63c22459b40aaecfa82b0fc4
Author: Andrew Pinski 
Date:   Sat May 18 11:55:58 2024 -0700

PHIOPT: Don't transform minmax if middle bb contains a phi [PR115143]

The problem here is even if last_and_only_stmt returns a statement,
the bb might still contain a phi node which defines a ssa name
which is used in that statement so we need to add a check to make sure
that the phi nodes are empty for the middle bbs in both the
`CMP?MINMAX:MINMAX` case and the `CMP?MINMAX:B` cases.

Bootstrapped and tested on x86_64_linux-gnu with no regressions.

PR tree-optimization/115143

gcc/ChangeLog:

* tree-ssa-phiopt.cc (minmax_replacement): Check for empty
phi nodes for middle bbs for the case where middle bb is not empty.

gcc/testsuite/ChangeLog:

* gcc.c-torture/compile/pr115143-1.c: New test.
* gcc.c-torture/compile/pr115143-2.c: New test.
* gcc.c-torture/compile/pr115143-3.c: New test.

Signed-off-by: Andrew Pinski 
(cherry picked from commit 9ff8f041331ef8b56007fb3c4d41d76f9850010d)

Diff:
---
 gcc/testsuite/gcc.c-torture/compile/pr115143-1.c | 21 +
 gcc/testsuite/gcc.c-torture/compile/pr115143-2.c | 30 
 gcc/testsuite/gcc.c-torture/compile/pr115143-3.c | 29 +++
 gcc/tree-ssa-phiopt.cc   |  4 
 4 files changed, 84 insertions(+)

diff --git a/gcc/testsuite/gcc.c-torture/compile/pr115143-1.c 
b/gcc/testsuite/gcc.c-torture/compile/pr115143-1.c
new file mode 100644
index ..5cb119ea4325
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/compile/pr115143-1.c
@@ -0,0 +1,21 @@
+/* PR tree-optimization/115143 */
+/* This used to ICE.
+   minmax part of phiopt would transform,
+   would transform `a!=0?min(a, b) : 0` into `min(a,b)`
+   which was correct except b was defined by a phi in the inner
+   bb which was not handled. */
+short a, d;
+char b;
+long c;
+unsigned long e, f;
+void g(unsigned long h) {
+  if (c ? e : b)
+if (e)
+  if (d) {
+a = f ? ({
+  unsigned long i = d ? f : 0, j = e ? h : 0;
+  i < j ? i : j;
+}) : 0;
+  }
+}
+
diff --git a/gcc/testsuite/gcc.c-torture/compile/pr115143-2.c 
b/gcc/testsuite/gcc.c-torture/compile/pr115143-2.c
new file mode 100644
index ..05c3bbe9738e
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/compile/pr115143-2.c
@@ -0,0 +1,30 @@
+/* { dg-options "-fgimple" } */
+/* PR tree-optimization/115143 */
+/* This used to ICE.
+   minmax part of phiopt would transform,
+   would transform `a!=0?min(a, b) : 0` into `min(a,b)`
+   which was correct except b was defined by a phi in the inner
+   bb which was not handled. */
+unsigned __GIMPLE (ssa,startwith("phiopt"))
+foo (unsigned a, unsigned b)
+{
+  unsigned j;
+  unsigned _23;
+  unsigned _12;
+
+  __BB(2):
+  if (a_6(D) != 0u)
+goto __BB3;
+  else
+goto __BB4;
+
+  __BB(3):
+  j_10 = __PHI (__BB2: b_11(D));
+  _23 = __MIN (a_6(D), j_10);
+  goto __BB4;
+
+  __BB(4):
+  _12 = __PHI (__BB3: _23, __BB2: 0u);
+  return _12;
+
+}
diff --git a/gcc/testsuite/gcc.c-torture/compile/pr115143-3.c 
b/gcc/testsuite/gcc.c-torture/compile/pr115143-3.c
new file mode 100644
index ..53c5fb5588e9
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/compile/pr115143-3.c
@@ -0,0 +1,29 @@
+/* { dg-options "-fgimple" } */
+/* PR tree-optimization/115143 */
+/* This used to ICE.
+   minmax part of phiopt would transform,
+   would transform `a!=0?min(a, b) : 0` into `min(a,b)`
+   which was correct except b was defined by a phi in the inner
+   bb which was not handled. */
+unsigned __GIMPLE (ssa,startwith("phiopt"))
+foo (unsigned a, unsigned b)
+{
+  unsigned j;
+  unsigned _23;
+  unsigned _12;
+
+  __BB(2):
+  if (a_6(D) > 0u)
+goto __BB3;
+  else
+goto __BB4;
+
+  __BB(3):
+  j_10 = __PHI (__BB2: b_7(D));
+  _23 = __MIN (a_6(D), j_10);
+  goto __BB4;
+
+  __BB(4):
+  _12 = __PHI (__BB3: _23, __BB2: 0u);
+  return _12;
+}
diff --git a/gcc/tree-ssa-phiopt.cc b/gcc/tree-ssa-phiopt.cc
index e2dba56383b4..558d5b4b57db 100644
--- a/gcc/tree-ssa-phiopt.cc
+++ b/gcc/tree-ssa-phiopt.cc
@@ -1973,6 +1973,10 @@ minmax_replacement (basic_block cond_bb, basic_block 
middle_bb,
  || gimple_code (assign) != GIMPLE_ASSIGN)
return false;
 
+  /* There cannot be any phi nodes in the middle bb. */
+  if (!gimple_seq_empty_p (phi_nodes (middle_bb)))
+   return false;
+
   lhs = gimple_assign_lhs (assign);
   ass_code = gimple_assign_rhs_code (assign);
   if (ass_code != MAX_EXPR && ass_code != MIN_EXPR)


[gcc r15-1188] Fix building JIT with musl libc [PR115442]

2024-06-11 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:e4244b88d75124f6957bfa080c8ad34017364e53

commit r15-1188-ge4244b88d75124f6957bfa080c8ad34017364e53
Author: Andrew Pinski 
Date:   Tue Jun 11 12:30:01 2024 -0700

Fix building JIT with musl libc [PR115442]

Just like r13-6662-g0e6f87835ccabf but this time for jit/jit-recording.cc.

Pushed as obvious after a quick build to make sure jit still builds.

gcc/jit/ChangeLog:

PR jit/115442
* jit-recording.cc: Define INCLUDE_SSTREAM before including
system.h and don't directly incldue sstream.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/jit/jit-recording.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/jit/jit-recording.cc b/gcc/jit/jit-recording.cc
index 68a2e860c1fb..70830e349653 100644
--- a/gcc/jit/jit-recording.cc
+++ b/gcc/jit/jit-recording.cc
@@ -19,6 +19,7 @@ along with GCC; see the file COPYING3.  If not see
 .  */
 
 #include "config.h"
+#define INCLUDE_SSTREAM
 #include "system.h"
 #include "coretypes.h"
 #include "tm.h"
@@ -29,7 +30,6 @@ along with GCC; see the file COPYING3.  If not see
 #include "jit-builtins.h"
 #include "jit-recording.h"
 #include "jit-playback.h"
-#include 
 
 namespace gcc {
 namespace jit {


[gcc r14-10304] Fix building JIT with musl libc [PR115442]

2024-06-11 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:e6b1c0820590a1f330099ed7560982b5c6da4e91

commit r14-10304-ge6b1c0820590a1f330099ed7560982b5c6da4e91
Author: Andrew Pinski 
Date:   Tue Jun 11 12:30:01 2024 -0700

Fix building JIT with musl libc [PR115442]

Just like r13-6662-g0e6f87835ccabf but this time for jit/jit-recording.cc.

Pushed as obvious after a quick build to make sure jit still builds.

gcc/jit/ChangeLog:

PR jit/115442
* jit-recording.cc: Define INCLUDE_SSTREAM before including
system.h and don't directly incldue sstream.

Signed-off-by: Andrew Pinski 
(cherry picked from commit e4244b88d75124f6957bfa080c8ad34017364e53)

Diff:
---
 gcc/jit/jit-recording.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/jit/jit-recording.cc b/gcc/jit/jit-recording.cc
index 68a2e860c1fb..70830e349653 100644
--- a/gcc/jit/jit-recording.cc
+++ b/gcc/jit/jit-recording.cc
@@ -19,6 +19,7 @@ along with GCC; see the file COPYING3.  If not see
 .  */
 
 #include "config.h"
+#define INCLUDE_SSTREAM
 #include "system.h"
 #include "coretypes.h"
 #include "tm.h"
@@ -29,7 +30,6 @@ along with GCC; see the file COPYING3.  If not see
 #include "jit-builtins.h"
 #include "jit-recording.h"
 #include "jit-playback.h"
-#include 
 
 namespace gcc {
 namespace jit {


[gcc r13-8842] Fix building JIT with musl libc [PR115442]

2024-06-11 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:6eb0e931097a8fec01591051c9ef582d52fe7f0c

commit r13-8842-g6eb0e931097a8fec01591051c9ef582d52fe7f0c
Author: Andrew Pinski 
Date:   Tue Jun 11 12:30:01 2024 -0700

Fix building JIT with musl libc [PR115442]

Just like r13-6662-g0e6f87835ccabf but this time for jit/jit-recording.cc.

Pushed as obvious after a quick build to make sure jit still builds.

gcc/jit/ChangeLog:

PR jit/115442
* jit-recording.cc: Define INCLUDE_SSTREAM before including
system.h and don't directly incldue sstream.

Signed-off-by: Andrew Pinski 
(cherry picked from commit e4244b88d75124f6957bfa080c8ad34017364e53)

Diff:
---
 gcc/jit/jit-recording.cc | 1 +
 1 file changed, 1 insertion(+)

diff --git a/gcc/jit/jit-recording.cc b/gcc/jit/jit-recording.cc
index cf734cf7ef5f..914082ae861e 100644
--- a/gcc/jit/jit-recording.cc
+++ b/gcc/jit/jit-recording.cc
@@ -19,6 +19,7 @@ along with GCC; see the file COPYING3.  If not see
 .  */
 
 #include "config.h"
+#define INCLUDE_SSTREAM
 #include "system.h"
 #include "coretypes.h"
 #include "tm.h"


[gcc r15-1215] match: Improve gimple_bitwise_equal_p and gimple_bitwise_inverted_equal_p for truncating casts [PR11

2024-06-12 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:0256121e2f23ac3550e87410c9b1e690c8edfc7c

commit r15-1215-g0256121e2f23ac3550e87410c9b1e690c8edfc7c
Author: Andrew Pinski 
Date:   Tue Jun 11 17:16:42 2024 -0700

match: Improve gimple_bitwise_equal_p and gimple_bitwise_inverted_equal_p 
for truncating casts [PR115449]

As mentioned by Jeff in r15-831-g05daf617ea22e1d818295ed2d037456937e23530, 
we don't handle
`(X | Y) & ~Y` -> `X & ~Y` on the gimple level when there are some 
different signed
(but same precision) types dealing with matching `~Y` with the `Y` part. 
This
improves both gimple_bitwise_equal_p and gimple_bitwise_inverted_equal_p to
be able to say `(truncate)a` and `(truncate)a` are bitwise_equal and
that `~(truncate)a` and `(truncate)a` are bitwise_invert_equal.

Bootstrapped and tested on x86_64-linux-gnu with no regressions.

PR tree-optimization/115449

gcc/ChangeLog:

* gimple-match-head.cc (gimple_maybe_truncate): New declaration.
(gimple_bitwise_equal_p): Match truncations that differ only
in types with the same precision.
(gimple_bitwise_inverted_equal_p): For matching after 
bit_not_with_nop
call gimple_bitwise_equal_p.
* match.pd (maybe_truncate): New match pattern.

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/bitops-10.c: New test.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/gimple-match-head.cc  | 17 +++-
 gcc/match.pd  |  7 +++
 gcc/testsuite/gcc.dg/tree-ssa/bitops-10.c | 34 +++
 3 files changed, 48 insertions(+), 10 deletions(-)

diff --git a/gcc/gimple-match-head.cc b/gcc/gimple-match-head.cc
index e26fa0860ee9..924d3f1e7103 100644
--- a/gcc/gimple-match-head.cc
+++ b/gcc/gimple-match-head.cc
@@ -243,6 +243,7 @@ optimize_successive_divisions_p (tree divisor, tree 
inner_div)
   gimple_bitwise_equal_p (expr1, expr2, valueize)
 
 bool gimple_nop_convert (tree, tree *, tree (*) (tree));
+bool gimple_maybe_truncate (tree, tree *, tree (*) (tree));
 
 /* Helper function for bitwise_equal_p macro.  */
 
@@ -271,6 +272,10 @@ gimple_bitwise_equal_p (tree expr1, tree expr2, tree 
(*valueize) (tree))
 }
   if (expr2 != expr4 && operand_equal_p (expr1, expr4, 0))
 return true;
+  if (gimple_maybe_truncate (expr3, &expr3, valueize)
+  && gimple_maybe_truncate (expr4, &expr4, valueize)
+  && operand_equal_p (expr3, expr4, 0))
+return true;
   return false;
 }
 
@@ -318,21 +323,13 @@ gimple_bitwise_inverted_equal_p (tree expr1, tree expr2, 
bool &wascmp, tree (*va
   /* Try if EXPR1 was defined as ~EXPR2. */
   if (gimple_bit_not_with_nop (expr1, &other, valueize))
 {
-  if (operand_equal_p (other, expr2, 0))
-   return true;
-  tree expr4;
-  if (gimple_nop_convert (expr2, &expr4, valueize)
- && operand_equal_p (other, expr4, 0))
+  if (gimple_bitwise_equal_p (other, expr2, valueize))
return true;
 }
   /* Try if EXPR2 was defined as ~EXPR1. */
   if (gimple_bit_not_with_nop (expr2, &other, valueize))
 {
-  if (operand_equal_p (other, expr1, 0))
-   return true;
-  tree expr3;
-  if (gimple_nop_convert (expr1, &expr3, valueize)
- && operand_equal_p (other, expr3, 0))
+  if (gimple_bitwise_equal_p (other, expr1, valueize))
return true;
 }
 
diff --git a/gcc/match.pd b/gcc/match.pd
index 5cfe81e80b31..3204cf415387 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -200,6 +200,13 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 (match (maybe_bit_not @0)
  (bit_xor_cst@0 @1 @2))
 
+#if GIMPLE
+(match (maybe_truncate @0)
+ (convert @0)
+ (if (INTEGRAL_TYPE_P (type)
+  && TYPE_PRECISION (type) < TYPE_PRECISION (TREE_TYPE (@0)
+#endif
+
 /* Transform likes of (char) ABS_EXPR <(int) x> into (char) ABSU_EXPR 
ABSU_EXPR returns unsigned absolute value of the operand and the operand
of the ABSU_EXPR will have the corresponding signed type.  */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/bitops-10.c 
b/gcc/testsuite/gcc.dg/tree-ssa/bitops-10.c
new file mode 100644
index ..000c5aef2377
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/bitops-10.c
@@ -0,0 +1,34 @@
+/* { dg-do compile } */
+/* { dg-options "-O1 -fdump-tree-optimized-raw" } */
+/* PR tree-optimization/115449 */
+
+void setBit_un(unsigned char *a, int b) {
+   unsigned char c = 0x1UL << b;
+   *a &= ~c;
+   *a |= c;
+}
+
+void setBit_sign(signed char *a, int b) {
+   signed char c = 0x1UL << b;
+   *a &= ~c;
+   *a |= c;
+}
+
+void setBit(char *a, int b) {
+   char c = 0x1UL << b;
+   *a &= ~c;
+   *a |= c;
+}
+/*
+   All three should produce:
+_1 = 1 << b_4(D);
+c_5 = (cast) _1;
+_2 = *a_7(D);
+_3 = _2 | c_5;
+*a_7(D) = _3;
+   Removing the `&~c` as we are matching `(~x & y) | x` -> `x | y`
+   match pattern even with extra casts are being involved. */
+
+/* { dg-final 

[gcc r15-1216] aarch64: Use bitreverse rtl code instead of unspec [PR115176]

2024-06-12 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:c2f0aaf7539c42b024ed6b3fb6909bd2c86bb206

commit r15-1216-gc2f0aaf7539c42b024ed6b3fb6909bd2c86bb206
Author: Andrew Pinski 
Date:   Tue Jun 11 20:36:34 2024 +

aarch64: Use bitreverse rtl code instead of unspec [PR115176]

Bitreverse rtl code was added with r14-1586-g6160572f8d243c. So let's
use it instead of an unspec. This is just a small cleanup but it does
have one small fix with respect to rtx costs which didn't handle vector 
modes
correctly for the UNSPEC and now it does.
This is part of the first step in adding __builtin_bitreverse's builtins
but it is independent of it though.

Bootstrapped and tested on aarch64-linux-gnu with no regressions.

gcc/ChangeLog:

PR target/115176
* config/aarch64/aarch64-simd.md 
(aarch64_rbit): Use
bitreverse instead of unspec.
* config/aarch64/aarch64-sve-builtins-base.cc (svrbit): Convert 
over to using
rtx_code_function instead of unspec_based_function.
* config/aarch64/aarch64-sve.md: Update comment where RBIT is 
included.
* config/aarch64/aarch64.cc (aarch64_rtx_costs): Handle BITREVERSE 
like BSWAP.
Remove UNSPEC_RBIT support.
* config/aarch64/aarch64.md (unspec): Remove UNSPEC_RBIT.
(aarch64_rbit): Use bitreverse instead of unspec.
* config/aarch64/iterators.md (SVE_INT_UNARY): Add bitreverse.
(optab): Likewise.
(sve_int_op): Likewise.
(SVE_INT_UNARY): Remove UNSPEC_RBIT.
(optab): Likewise.
(sve_int_op): Likewise.
(min_elem_bits): Likewise.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/config/aarch64/aarch64-simd.md  |  3 +--
 gcc/config/aarch64/aarch64-sve-builtins-base.cc |  2 +-
 gcc/config/aarch64/aarch64-sve.md   |  2 +-
 gcc/config/aarch64/aarch64.cc   |  9 +
 gcc/config/aarch64/aarch64.md   |  3 +--
 gcc/config/aarch64/iterators.md | 10 +-
 6 files changed, 10 insertions(+), 19 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-simd.md 
b/gcc/config/aarch64/aarch64-simd.md
index f644bd1731e5..0bb39091a385 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -377,8 +377,7 @@
 
 (define_insn "aarch64_rbit"
   [(set (match_operand:VB 0 "register_operand" "=w")
-   (unspec:VB [(match_operand:VB 1 "register_operand" "w")]
-  UNSPEC_RBIT))]
+   (bitreverse:VB (match_operand:VB 1 "register_operand" "w")))]
   "TARGET_SIMD"
   "rbit\\t%0., %1."
   [(set_attr "type" "neon_rbit")]
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc 
b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
index 0d2edf3f19e1..dea2f6e6bfc4 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
@@ -3186,7 +3186,7 @@ FUNCTION (svqincp, svqdecp_svqincp_impl, (SS_PLUS, 
US_PLUS))
 FUNCTION (svqincw, svqinc_bhwd_impl, (SImode))
 FUNCTION (svqincw_pat, svqinc_bhwd_impl, (SImode))
 FUNCTION (svqsub, rtx_code_function, (SS_MINUS, US_MINUS, -1))
-FUNCTION (svrbit, unspec_based_function, (UNSPEC_RBIT, UNSPEC_RBIT, -1))
+FUNCTION (svrbit, rtx_code_function, (BITREVERSE, BITREVERSE, -1))
 FUNCTION (svrdffr, svrdffr_impl,)
 FUNCTION (svrecpe, unspec_based_function, (-1, UNSPEC_URECPE, UNSPEC_FRECPE))
 FUNCTION (svrecps, unspec_based_function, (-1, -1, UNSPEC_FRECPS))
diff --git a/gcc/config/aarch64/aarch64-sve.md 
b/gcc/config/aarch64/aarch64-sve.md
index d69db34016a5..5331e7121d55 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -3083,6 +3083,7 @@
 ;; - CLS (= clrsb)
 ;; - CLZ
 ;; - CNT (= popcount)
+;; - RBIT (= bitreverse)
 ;; - NEG
 ;; - NOT
 ;; -
@@ -3171,7 +3172,6 @@
 ;;  [INT] General unary arithmetic corresponding to unspecs
 ;; -
 ;; Includes
-;; - RBIT
 ;; - REVB
 ;; - REVH
 ;; - REVW
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 13191ec8e345..149e5b2f69ae 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -14690,6 +14690,7 @@ cost_plus:
return true;
   }
 
+case BITREVERSE:
 case BSWAP:
   *cost = COSTS_N_INSNS (1);
 
@@ -15339,14 +15340,6 @@ cost_plus:
 
   return false;
 }
-
-  if (XINT (x, 1) == UNSPEC_RBIT)
-{
-  if (speed)
-*cost += extra_cost->alu.rev;
-
-  return false;
-}
   break;
 
 case TRUNCATE:
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 389a1906e236..9de6235b1398 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -259,7 +259,6 @@
 UNSPEC_PACIBSP
 UNSPEC_

[gcc r15-1305] expand: constify sepops operand to expand_expr_real_2 and expand_widen_pattern_expr [PR113212]

2024-06-13 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:d8a6de9e2b850b71712e89e8e6026e4ae6284766

commit r15-1305-gd8a6de9e2b850b71712e89e8e6026e4ae6284766
Author: Andrew Pinski 
Date:   Thu Jun 13 13:07:10 2024 -0700

expand: constify sepops operand to expand_expr_real_2 and 
expand_widen_pattern_expr [PR113212]

While working on an expand patch back in January I noticed that
the first argument (of sepops type) of expand_expr_real_2 could be
constified as it was not to be touched by the function (nor should it be).
There is code in internal-fn.cc that depends on expand_expr_real_2 not 
touching
the ops argument so constification makes this more obvious.

Bootstrapped and tested on x86_64-linux-gnu with no regressions.

gcc/ChangeLog:

PR middle-end/113212
* expr.h (const_seqpops): New typedef.
(expand_expr_real_2): Constify the first argument.
* optabs.cc (expand_widen_pattern_expr): Likewise.
* optabs.h (expand_widen_pattern_expr): Likewise.
* expr.cc (expand_expr_real_2):  Likewise
(do_store_flag): Likewise. Remove incorrect store to ops->code.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/expr.cc   | 8 
 gcc/expr.h| 4 +++-
 gcc/optabs.cc | 2 +-
 gcc/optabs.h  | 2 +-
 4 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/gcc/expr.cc b/gcc/expr.cc
index 04bad5e1425d..9cecc1758f5c 100644
--- a/gcc/expr.cc
+++ b/gcc/expr.cc
@@ -94,7 +94,7 @@ static unsigned HOST_WIDE_INT highest_pow2_factor_for_target 
(const_tree, const_
 
 static bool is_aligning_offset (const_tree, const_tree);
 static rtx reduce_to_bit_field_precision (rtx, rtx, tree);
-static rtx do_store_flag (sepops, rtx, machine_mode);
+static rtx do_store_flag (const_sepops, rtx, machine_mode);
 #ifdef PUSH_ROUNDING
 static void emit_single_push_insn (machine_mode, rtx, tree);
 #endif
@@ -9643,7 +9643,7 @@ expand_expr_divmod (tree_code code, machine_mode mode, 
tree treeop0,
 }
 
 rtx
-expand_expr_real_2 (sepops ops, rtx target, machine_mode tmode,
+expand_expr_real_2 (const_sepops ops, rtx target, machine_mode tmode,
enum expand_modifier modifier)
 {
   rtx op0, op1, op2, temp;
@@ -13504,7 +13504,7 @@ expand_single_bit_test (location_t loc, enum tree_code 
code,
set/jump/set sequence.  */
 
 static rtx
-do_store_flag (sepops ops, rtx target, machine_mode mode)
+do_store_flag (const_sepops ops, rtx target, machine_mode mode)
 {
   enum rtx_code code;
   tree arg0, arg1, type;
@@ -13566,7 +13566,7 @@ do_store_flag (sepops ops, rtx target, machine_mode 
mode)
   if (new_code != ops->code)
{
  struct separate_ops nops = *ops;
- nops.code = ops->code = new_code;
+ nops.code = new_code;
  nops.op0 = arg0;
  nops.op1 = arg1;
  nops.type = TREE_TYPE (arg0);
diff --git a/gcc/expr.h b/gcc/expr.h
index 751815841083..533ae0af3871 100644
--- a/gcc/expr.h
+++ b/gcc/expr.h
@@ -53,6 +53,8 @@ typedef struct separate_ops
   tree type;
   tree op0, op1, op2;
 } *sepops;
+
+typedef const struct separate_ops *const_sepops;
 
 /* This is run during target initialization to set up which modes can be
used directly in memory and to initialize the block move optab.  */
@@ -305,7 +307,7 @@ extern rtx expand_expr_real (tree, rtx, machine_mode,
 enum expand_modifier, rtx *, bool);
 extern rtx expand_expr_real_1 (tree, rtx, machine_mode,
   enum expand_modifier, rtx *, bool);
-extern rtx expand_expr_real_2 (sepops, rtx, machine_mode,
+extern rtx expand_expr_real_2 (const_sepops, rtx, machine_mode,
   enum expand_modifier);
 extern rtx expand_expr_real_gassign (gassign *, rtx, machine_mode,
 enum expand_modifier modifier,
diff --git a/gcc/optabs.cc b/gcc/optabs.cc
index 78cd9ef34488..c54d275b8b7a 100644
--- a/gcc/optabs.cc
+++ b/gcc/optabs.cc
@@ -253,7 +253,7 @@ widen_operand (rtx op, machine_mode mode, machine_mode 
oldmode,
type-promotion (vec-unpack)  1   oprnd0  -   -  */
 
 rtx
-expand_widen_pattern_expr (sepops ops, rtx op0, rtx op1, rtx wide_op,
+expand_widen_pattern_expr (const_sepops ops, rtx op0, rtx op1, rtx wide_op,
   rtx target, int unsignedp)
 {
   class expand_operand eops[4];
diff --git a/gcc/optabs.h b/gcc/optabs.h
index c0b8df5268f6..301847e2186d 100644
--- a/gcc/optabs.h
+++ b/gcc/optabs.h
@@ -182,7 +182,7 @@ enum optab_methods
   OPTAB_MUST_WIDEN
 };
 
-extern rtx expand_widen_pattern_expr (struct separate_ops *, rtx , rtx , rtx,
+extern rtx expand_widen_pattern_expr (const struct separate_ops *, rtx , rtx , 
rtx,
   rtx, int);
 extern rtx expand_ternary_op (machine_mode mode, optab ternary_optab,
  rtx op0, rtx op1, rtx op2, rtx target,


[gcc r15-1359] aarch64: Fix reg_is_wrapped_separately array size [PR100211]

2024-06-16 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:33caee556c130b2dcf311480314e942a43d6b368

commit r15-1359-g33caee556c130b2dcf311480314e942a43d6b368
Author: Andrew Pinski 
Date:   Sun Jun 16 10:53:15 2024 -0700

aarch64: Fix reg_is_wrapped_separately array size [PR100211]

Currrently the size of the array reg_is_wrapped_separately is 
LAST_SAVED_REGNUM.
But LAST_SAVED_REGNUM could be regno that is being saved. So the size needs
to be `LAST_SAVED_REGNUM + 1` like aarch64_frame->reg_offset is.

Committed as obvious after a bootstrap/test for aarch64-linux-gnu.

gcc/ChangeLog:

PR target/100211
* config/aarch64/aarch64.h (machine_function): Fix the size
of reg_is_wrapped_separately.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/config/aarch64/aarch64.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 0997b82dbc0f..2b89f6f88ef0 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -1059,7 +1059,7 @@ typedef struct GTY (()) machine_function
 {
   struct aarch64_frame frame;
   /* One entry for each hard register.  */
-  bool reg_is_wrapped_separately[LAST_SAVED_REGNUM];
+  bool reg_is_wrapped_separately[LAST_SAVED_REGNUM + 1];
   /* One entry for each general purpose register.  */
   rtx call_via[SP_REGNUM];


[gcc r15-1387] aarch64: Add testcase for PR97405

2024-06-17 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:17979deb15d34dd4f036ca93d2977d0fc4d556a7

commit r15-1387-g17979deb15d34dd4f036ca93d2977d0fc4d556a7
Author: Andrew Pinski 
Date:   Mon Jun 17 16:45:34 2024 -0700

aarch64: Add testcase for PR97405

This aarch64 sve specific code was fixed by r15-917-gc9842f99042454
which added a riscv specific testcase so adding an aarch64 one to test
the fix does not regress is a good idea.

Committed as obvious after testing the testcase for aarch64-linux-gnu.

PR tree-optimization/97405

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/sve/pr97405-1.c: New test.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/testsuite/gcc.target/aarch64/sve/pr97405-1.c | 13 +
 1 file changed, 13 insertions(+)

diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr97405-1.c 
b/gcc/testsuite/gcc.target/aarch64/sve/pr97405-1.c
new file mode 100644
index ..5efa32c99280
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pr97405-1.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-march=armv8.2-a+sve -O2" }
+/* PR tree-optimization/97405 */
+#include "arm_sve.h"
+
+void
+a (svuint8x3_t b, unsigned char *p, int c) {
+  if (c)
+svst1_u8(svptrue_pat_b8(SV_VL16), p, svget3_u8(b, 1));
+  else
+svst1_u8(svwhilelt_b8(6, 6), p, svget3_u8(b, 1));
+}
+


[gcc r15-1417] aarch64: make thunderxt88p1 an alias of thunderxt88

2024-06-18 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:79ab7245bea102f2c4ec38bd4b3ba03e7828617f

commit r15-1417-g79ab7245bea102f2c4ec38bd4b3ba03e7828617f
Author: Andrew Pinski 
Date:   Mon Jun 17 13:26:54 2024 -0700

aarch64: make thunderxt88p1 an alias of thunderxt88

Since r7-6575-g71aba51d6460ff, thunderxt88 has been the same as 
thunderxt88p1 so let's make
them a true alias and remove the odd variant handling and moves it below 
thunderxt88.

Bootstrapped and tested on aarch64-linux-gnu with no regressions.

gcc/ChangeLog:

* config/aarch64/aarch64-cores.def (thunderxt88p1): Make an alias 
of thunderxt88 and
move below thunderxt88.
* config/aarch64/aarch64-tune.md: Regenerate.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/config/aarch64/aarch64-cores.def | 5 ++---
 gcc/config/aarch64/aarch64-tune.md   | 2 +-
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-cores.def 
b/gcc/config/aarch64/aarch64-cores.def
index be60929e4000..06a8213811ca 100644
--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -58,10 +58,9 @@ AARCH64_CORE("cortex-a73",  cortexa73, cortexa57, V8A,  
(CRC), cortexa73, 0x41,
 
 /* Cavium ('C') cores. */
 AARCH64_CORE("thunderx",  thunderx,  thunderx,  V8A,  (CRC, CRYPTO), 
thunderx,  0x43, 0x0a0, -1)
-/* Do not swap around "thunderxt88p1" and "thunderxt88",
-   this order is required to handle variant correctly. */
-AARCH64_CORE("thunderxt88p1", thunderxt88p1, thunderx,  V8A,  (CRC, CRYPTO),   
thunderxt88,  0x43, 0x0a1, 0)
 AARCH64_CORE("thunderxt88",   thunderxt88,   thunderx,  V8A,  (CRC, CRYPTO), 
thunderxt88,  0x43, 0x0a1, -1)
+/* "thunderxt88p1 is just an alias for thunderxt88 now. */
+AARCH64_CORE("thunderxt88p1", thunderxt88p1, thunderx,  V8A,  (CRC, CRYPTO), 
thunderxt88,  0x43, 0x0a1, -1)
 
 /* OcteonTX is the official name for T81/T83. */
 AARCH64_CORE("octeontx",  octeontx,  thunderx,  V8A,  (CRC, CRYPTO), 
thunderx,  0x43, 0x0a0, -1)
diff --git a/gcc/config/aarch64/aarch64-tune.md 
b/gcc/config/aarch64/aarch64-tune.md
index ba940f1c8901..9b1f32a0330a 100644
--- a/gcc/config/aarch64/aarch64-tune.md
+++ b/gcc/config/aarch64/aarch64-tune.md
@@ -1,5 +1,5 @@
 ;; -*- buffer-read-only: t -*-
 ;; Generated automatically by gentune.sh from aarch64-cores.def
 (define_attr "tune"
-   
"cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,ampere1a,ampere1b,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,cortexx1c,neoversen1,ares,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,thunderx3t110,neoversev1,zeus,neoverse512tvb,saphira,oryon1,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82,cortexa510,cortexa520,cortexa710,cortexa715,cortexa720,cortexx2,cortexx3,cortexx4,neoversen2,cobalt100,neoversev2,demeter,generic,generic_armv8_a,generic_armv9_a"
+   
"cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88,thunderxt88p1,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,ampere1a,ampere1b,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,cortexx1c,neoversen1,ares,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,thunderx3t110,neoversev1,zeus,neoverse512tvb,saphira,oryon1,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82,cortexa510,cortexa520,cortexa710,cortexa715,cortexa720,cortexx2,cortexx3,cortexx4,neoversen2,cobalt100,neoversev2,demeter,generic,generic_armv8_a,generic_armv9_a"
(const (symbol_ref "((enum attr_tune) aarch64_tune)")))


[gcc r15-1418] aarch64: Add comment about thunderxt81/t83 being aliases

2024-06-18 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:adadb5c7ba0922ea77bb9ca695f398de67c11c49

commit r15-1418-gadadb5c7ba0922ea77bb9ca695f398de67c11c49
Author: Andrew Pinski 
Date:   Mon Jun 17 14:20:10 2024 -0700

aarch64: Add comment about thunderxt81/t83 being aliases

Since these were already aliases just make it clear on that.

gcc/ChangeLog:

* config/aarch64/aarch64-cores.def: Add comment
saying thunderxt81/t83 are aliases of octeontx81/83.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/config/aarch64/aarch64-cores.def | 1 +
 1 file changed, 1 insertion(+)

diff --git a/gcc/config/aarch64/aarch64-cores.def 
b/gcc/config/aarch64/aarch64-cores.def
index 06a8213811ca..0e05e81761cb 100644
--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -67,6 +67,7 @@ AARCH64_CORE("octeontx",  octeontx,  thunderx,  V8A,  
(CRC, CRYPTO), thu
 AARCH64_CORE("octeontx81",octeontxt81,   thunderx,  V8A,  (CRC, CRYPTO), 
thunderx,  0x43, 0x0a2, -1)
 AARCH64_CORE("octeontx83",octeontxt83,   thunderx,  V8A,  (CRC, CRYPTO), 
thunderx,  0x43, 0x0a3, -1)
 
+/* thunderxt81/83 are aliases for octeontxt81/83. */
 AARCH64_CORE("thunderxt81",   thunderxt81,   thunderx,  V8A,  (CRC, CRYPTO), 
thunderx,  0x43, 0x0a2, -1)
 AARCH64_CORE("thunderxt83",   thunderxt83,   thunderx,  V8A,  (CRC, CRYPTO), 
thunderx,  0x43, 0x0a3, -1)


[gcc r15-1508] complex-lowering: Better handling of PAREN_EXPR [PR68855]

2024-06-20 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:59221dc587f369695d9b0c2f73aedf8458931f0f

commit r15-1508-g59221dc587f369695d9b0c2f73aedf8458931f0f
Author: Andrew Pinski 
Date:   Thu Jun 20 15:52:05 2024 -0700

complex-lowering: Better handling of PAREN_EXPR [PR68855]

When PAREN_EXPR tree code was added in r0-85884-gdedd42d511b6e4,
a simplified handling was added to complex lowering. Which means
we would get:
```
  _9 = COMPLEX_EXPR <_15, _14>;
  _11 = ((_9));
  _19 = REALPART_EXPR <_11>;
  _20 = IMAGPART_EXPR <_11>;
```

In many cases instead of just simply:
```
  _19 = ((_15));
  _20 = ((_14));
```

So this adds full support for PAREN_EXPR to complex lowering.
It is handled very similar as NEGATE_EXPR; except creating PAREN_EXPR
instead of NEGATE_EXPR for the real/imag parts. This allows for
more optimizations including vectorization, especially with
-ffast-math.
gfortran.dg/vect/pr68855.f90 is an example where this could show up.
It also shows up in SPEC CPU 2006's 465.tonto; though I have not done
any benchmarking there.

Bootstrapped and tested on x86_64-linux-gnu with no regressions.

gcc/ChangeLog:

PR tree-optimization/68855
* tree-complex.cc (init_dont_simulate_again): Handle PAREN_EXPR
like NEGATE_EXPR.
(complex_propagate::visit_stmt): Likewise.
(expand_complex_move): Don't handle PAREN_EXPR.
(expand_complex_paren): New function.
(expand_complex_operations_1): Handle PAREN_EXPR like
NEGATE_EXPR. And call expand_complex_paren for PAREN_EXPR.

gcc/testsuite/ChangeLog:

* gcc.dg/vect/pr68855.c: New test.
* gfortran.dg/vect/pr68855.f90: New test.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/testsuite/gcc.dg/vect/pr68855.c| 17 +
 gcc/testsuite/gfortran.dg/vect/pr68855.f90 | 16 
 gcc/tree-complex.cc| 29 +++--
 3 files changed, 60 insertions(+), 2 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/vect/pr68855.c 
b/gcc/testsuite/gcc.dg/vect/pr68855.c
new file mode 100644
index ..68a3a1cee36e
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr68855.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_float } */
+
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
+
+/* PAREN_EXPR should not cause the vectorization of complex float add to be 
missed. */
+void foo(_Complex float *a, int n)
+{
+  for(int i = 0; i < n; i++)
+  {
+_Complex float t;
+t = a[i];
+t += 6.0;
+t = __builtin_assoc_barrier(t);
+a[i] = t;
+  }
+}
diff --git a/gcc/testsuite/gfortran.dg/vect/pr68855.f90 
b/gcc/testsuite/gfortran.dg/vect/pr68855.f90
new file mode 100644
index ..90d444c86bfa
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/vect/pr68855.f90
@@ -0,0 +1,16 @@
+! { dg-do compile }
+! { dg-require-effective-target vect_float }
+
+! { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } }
+! PAREN_EXPR should not cause the vectorization of complex float add to be 
missed.
+
+subroutine foo(a,n)
+
+  complex (kind(1.0)) :: a(*)
+  integer :: i,n
+
+  do i=1,n
+ a(i)=(a(i)+(6.0,1.0))
+  enddo
+  
+end subroutine foo
diff --git a/gcc/tree-complex.cc b/gcc/tree-complex.cc
index 877913972bdd..8a879acffca8 100644
--- a/gcc/tree-complex.cc
+++ b/gcc/tree-complex.cc
@@ -281,6 +281,7 @@ init_dont_simulate_again (void)
 
  case NEGATE_EXPR:
  case CONJ_EXPR:
+ case PAREN_EXPR:
if (TREE_CODE (TREE_TYPE (op0)) == COMPLEX_TYPE)
  saw_a_complex_op = true;
break;
@@ -391,6 +392,7 @@ complex_propagate::visit_stmt (gimple *stmt, edge 
*taken_edge_p ATTRIBUTE_UNUSED
   break;
 
 case NEGATE_EXPR:
+case PAREN_EXPR:
 case CONJ_EXPR:
   new_l = find_lattice_value (gimple_assign_rhs1 (stmt));
   break;
@@ -852,8 +854,7 @@ expand_complex_move (gimple_stmt_iterator *gsi, tree type)
  update_complex_components_on_edge (e, lhs, r, i);
}
   else if (is_gimple_call (stmt)
-  || gimple_has_side_effects (stmt)
-  || gimple_assign_rhs_code (stmt) == PAREN_EXPR)
+  || gimple_has_side_effects (stmt))
{
  r = build1 (REALPART_EXPR, inner_type, lhs);
  i = build1 (IMAGPART_EXPR, inner_type, lhs);
@@ -1545,6 +1546,25 @@ expand_complex_negation (gimple_stmt_iterator *gsi, tree 
inner_type,
   update_complex_assignment (gsi, rr, ri);
 }
 
+/* Expand complex paren to scalars:
+   ((a)) = ((ar)) + i((ai))
+*/
+
+static void
+expand_complex_paren (gimple_stmt_iterator *gsi, tree inner_type,
+ tree ar, tree ai)
+{
+  tree rr, ri;
+  gimple_seq stmts = NULL;
+  location_t loc = gimple_location (gsi_stmt (*gsi));
+
+  rr = gimple_build (&stmts, loc, PAREN_EXPR

[gcc r15-1590] c-family: Add Warning property to Wnrvo option [PR115624]

2024-06-24 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:f7747210947a7c66e865c6ac571cce39e2b87caf

commit r15-1590-gf7747210947a7c66e865c6ac571cce39e2b87caf
Author: Andrew Pinski 
Date:   Mon Jun 24 18:16:13 2024 -0700

c-family: Add Warning property to Wnrvo option [PR115624]

This was missing when Wnrvo was added in
r14-1594-g2ae5384d457b9c67586de012816dfc71a6943164 .

Pushed after a bootstrap/test on x86_64-linux-gnu.

gcc/c-family/ChangeLog:

PR c++/115624
* c.opt (Wnrvo): Add Warning property.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/c-family/c.opt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt
index b067369fa7e..864ef4e3b3d 100644
--- a/gcc/c-family/c.opt
+++ b/gcc/c-family/c.opt
@@ -1137,7 +1137,7 @@ C ObjC Var(warn_override_init_side_effects) Init(1) 
Warning
 Warn about overriding initializers with side effects.
 
 Wnrvo
-C++ ObjC++ Var(warn_nrvo)
+C++ ObjC++ Var(warn_nrvo) Warning
 Warn if the named return value optimization is not performed although it is 
allowed.
 
 Wpacked-bitfield-compat


[gcc r14-10344] c-family: Add Warning property to Wnrvo option [PR115624]

2024-06-24 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:b7157f3930762097210aa24a3f24ed5cafee6672

commit r14-10344-gb7157f3930762097210aa24a3f24ed5cafee6672
Author: Andrew Pinski 
Date:   Mon Jun 24 18:16:13 2024 -0700

c-family: Add Warning property to Wnrvo option [PR115624]

This was missing when Wnrvo was added in
r14-1594-g2ae5384d457b9c67586de012816dfc71a6943164 .

Pushed after a bootstrap/test on x86_64-linux-gnu.

gcc/c-family/ChangeLog:

PR c++/115624
* c.opt (Wnrvo): Add Warning property.

Signed-off-by: Andrew Pinski 
(cherry picked from commit f7747210947a7c66e865c6ac571cce39e2b87caf)

Diff:
---
 gcc/c-family/c.opt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt
index 403abc1f26e..b310b40d857 100644
--- a/gcc/c-family/c.opt
+++ b/gcc/c-family/c.opt
@@ -1133,7 +1133,7 @@ C ObjC Var(warn_override_init_side_effects) Init(1) 
Warning
 Warn about overriding initializers with side effects.
 
 Wnrvo
-C++ ObjC++ Var(warn_nrvo)
+C++ ObjC++ Var(warn_nrvo) Warning
 Warn if the named return value optimization is not performed although it is 
allowed.
 
 Wpacked-bitfield-compat


[gcc r15-2458] match: Fix types matching for `(?:) !=/== (?:)` [PR116134]

2024-07-31 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:c65653f5685a106661596a413744953ea9cdbc60

commit r15-2458-gc65653f5685a106661596a413744953ea9cdbc60
Author: Andrew Pinski 
Date:   Mon Jul 29 11:33:58 2024 -0700

match: Fix types matching for `(?:) !=/== (?:)` [PR116134]

The problem here is that in generic types of comparisons don't need
to be boolean types (or vector boolean types). And fixes that by making
sure the types of the conditions match before doing the optimization.

Bootstrapped and tested on x86_64-linux-gnu with no regressions.

PR middle-end/116134

gcc/ChangeLog:

* match.pd (`(a ? x : y) eq/ne (b ? x : y)`): Check that
a and b types match.
(`(a ? x : y) eq/ne (b ? y : x)`): Likewise.

gcc/testsuite/ChangeLog:

* gcc.dg/torture/pr116134-1.c: New test.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/match.pd  | 10 ++
 gcc/testsuite/gcc.dg/torture/pr116134-1.c |  9 +
 2 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/gcc/match.pd b/gcc/match.pd
index 1c8601229e3d..881a827860f0 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -5640,12 +5640,14 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
  (for eqne (eq ne)
   (simplify
(eqne:c (cnd @0 @1 @2) (cnd @3 @1 @2))
-(cnd (bit_xor @0 @3) { constant_boolean_node (eqne == NE_EXPR, type); }
- { constant_boolean_node (eqne != NE_EXPR, type); }))
+(if (types_match (TREE_TYPE (@0), TREE_TYPE (@3)))
+ (cnd (bit_xor @0 @3) { constant_boolean_node (eqne == NE_EXPR, type); }
+  { constant_boolean_node (eqne != NE_EXPR, type); })))
   (simplify
(eqne:c (cnd @0 @1 @2) (cnd @3 @2 @1))
-(cnd (bit_xor @0 @3) { constant_boolean_node (eqne != NE_EXPR, type); }
- { constant_boolean_node (eqne == NE_EXPR, type); }
+(if (types_match (TREE_TYPE (@0), TREE_TYPE (@3)))
+ (cnd (bit_xor @0 @3) { constant_boolean_node (eqne != NE_EXPR, type); }
+  { constant_boolean_node (eqne == NE_EXPR, type); })
 
 /* Canonicalize mask ? { 0, ... } : { -1, ...} to ~mask if the mask
types are compatible.  */
diff --git a/gcc/testsuite/gcc.dg/torture/pr116134-1.c 
b/gcc/testsuite/gcc.dg/torture/pr116134-1.c
new file mode 100644
index ..ab595f996805
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr116134-1.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+
+/* This used to ICE as comparisons on generic can be different types. */
+/* PR middle-end/116134  */
+
+int a;
+int b;
+int d;
+void c() { 1UL <= (d < b) != (1UL & (0 < a | 0L)); }


[gcc r15-2637] match: Fix wrong code due to `(a ? e : f) !=/== (b ? e : f)` patterns [PR116120]

2024-08-01 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:c5ccdfdcab0b24afba2a661af861bec1d63f0595

commit r15-2637-gc5ccdfdcab0b24afba2a661af861bec1d63f0595
Author: Andrew Pinski 
Date:   Mon Jul 29 14:00:13 2024 -0700

match: Fix wrong code due to `(a ? e : f) !=/== (b ? e : f)` patterns 
[PR116120]

When this pattern was converted from being only dealing with 0/-1, we 
missed that if `e == f` is true
then the optimization is wrong and needs an extra check for that.

This changes the patterns to be:
/* (a ? x : y) != (b ? x : y) --> (a^b & (x != y)) ? TRUE  : FALSE */
/* (a ? x : y) == (b ? x : y) --> (a^b & (x != y)) ? FALSE : TRUE  */
/* (a ? x : y) != (b ? y : x) --> (a^b | (x == y)) ? FALSE : TRUE  */
/* (a ? x : y) == (b ? y : x) --> (a^b | (x == y)) ? TRUE  : FALSE */

Also this can't be done if the X can be a NaNs either. Since that changes 
the value there too.

This still produces better code than the original case and in many cases (x 
!= y) will
still reduce to either false or true.

With this change we also need to make sure `a`, `b` and the resulting types 
are all
the same for the same reason as the previous patch.

I updated (well added) to the testcases to make sure there are the right 
amount of
comparisons left.

Changes since v1:
* v2: Fixed the testcase names and fixed dg-run to be `dg-do run`. Added a 
check for HONORS_NANS too.

Bootstrapped and tested on x86_64-linux-gnu with no regressions.

PR tree-optimization/116120

gcc/ChangeLog:

* match.pd (`(a ? x : y) eq/ne (b ? x : y)`): Add test for `x != y`
in result.
(`(a ? x : y) eq/ne (b ? y : x)`): Add test for `x == y` in result.

gcc/testsuite/ChangeLog:

* g++.dg/tree-ssa/pr50.C: Add extra checks on the test.
* gcc.dg/tree-ssa/pr50-1.c: Likewise.
* gcc.dg/tree-ssa/pr50.c: Likewise.
* g++.dg/torture/pr116120-1.C: New test.
* g++.dg/torture/pr116120-2.C: New test.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/match.pd   | 23 -
 gcc/testsuite/g++.dg/torture/pr116120-1.C  | 32 ++
 gcc/testsuite/g++.dg/torture/pr116120-2.C  | 53 ++
 gcc/testsuite/g++.dg/tree-ssa/pr50.C   | 10 ++
 gcc/testsuite/gcc.dg/tree-ssa/pr50-1.c |  9 +
 gcc/testsuite/gcc.dg/tree-ssa/pr50.c   |  1 +
 6 files changed, 120 insertions(+), 8 deletions(-)

diff --git a/gcc/match.pd b/gcc/match.pd
index 881a827860f0..c9c8478d2865 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -5632,21 +5632,28 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   (vec_cond (bit_and (bit_not @0) @1) @2 @3)))
 #endif
 
-/* (a ? x : y) != (b ? x : y) --> (a^b) ? TRUE  : FALSE */
-/* (a ? x : y) == (b ? x : y) --> (a^b) ? FALSE : TRUE  */
-/* (a ? x : y) != (b ? y : x) --> (a^b) ? FALSE : TRUE  */
-/* (a ? x : y) == (b ? y : x) --> (a^b) ? TRUE  : FALSE */
+/* (a ? x : y) != (b ? x : y) --> (a^b & (x != y)) ? TRUE  : FALSE */
+/* (a ? x : y) == (b ? x : y) --> (a^b & (x != y)) ? FALSE : TRUE  */
+/* (a ? x : y) != (b ? y : x) --> (a^b | (x == y)) ? FALSE : TRUE  */
+/* (a ? x : y) == (b ? y : x) --> (a^b | (x == y)) ? TRUE  : FALSE */
+/* These are only valid if x and y don't have NaNs. */
 (for cnd (cond vec_cond)
  (for eqne (eq ne)
   (simplify
(eqne:c (cnd @0 @1 @2) (cnd @3 @1 @2))
-(if (types_match (TREE_TYPE (@0), TREE_TYPE (@3)))
- (cnd (bit_xor @0 @3) { constant_boolean_node (eqne == NE_EXPR, type); }
+(if (!HONOR_NANS (@1)
+&& types_match (TREE_TYPE (@0), TREE_TYPE (@3))
+ && types_match (type, TREE_TYPE (@0)))
+ (cnd (bit_and (bit_xor @0 @3) (ne:type @1 @2))
+  { constant_boolean_node (eqne == NE_EXPR, type); }
   { constant_boolean_node (eqne != NE_EXPR, type); })))
   (simplify
(eqne:c (cnd @0 @1 @2) (cnd @3 @2 @1))
-(if (types_match (TREE_TYPE (@0), TREE_TYPE (@3)))
- (cnd (bit_xor @0 @3) { constant_boolean_node (eqne != NE_EXPR, type); }
+(if (!HONOR_NANS (@1)
+&& types_match (TREE_TYPE (@0), TREE_TYPE (@3))
+ && types_match (type, TREE_TYPE (@0)))
+ (cnd (bit_ior (bit_xor @0 @3) (eq:type @1 @2))
+  { constant_boolean_node (eqne != NE_EXPR, type); }
   { constant_boolean_node (eqne == NE_EXPR, type); })
 
 /* Canonicalize mask ? { 0, ... } : { -1, ...} to ~mask if the mask
diff --git a/gcc/testsuite/g++.dg/torture/pr116120-1.C 
b/gcc/testsuite/g++.dg/torture/pr116120-1.C
new file mode 100644
index ..209946f17a43
--- /dev/null
+++ b/gcc/testsuite/g++.dg/torture/pr116120-1.C
@@ -0,0 +1,32 @@
+// { dg-do run }
+// PR tree-optimization/116120
+
+// The optimization for `(a ? x : y) != (b ? x : y)`
+// missed that x and y could be the same value.
+
+typedef int v4si __attribute((__vector_size__(1 * sizeof(int;
+v4si f1(v4si a, v4si b, v4si c, v4si d, v4si e, v4

[gcc r15-2691] forwprop: Don't add uses to dce list if debug statement [PR116156]

2024-08-02 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:33baa20c5cdcf5ff8164606115f00aa30f559312

commit r15-2691-g33baa20c5cdcf5ff8164606115f00aa30f559312
Author: Andrew Pinski 
Date:   Thu Aug 1 10:33:34 2024 -0700

forwprop: Don't add uses to dce list if debug statement [PR116156]

The problem here is that when forwprop does a copy prop, into a statement,
we mark the uses of that statement as possibly need to be removed. But it 
just
happened that statement was a debug statement, there will be a difference 
when
compiling with debuging info turned on vs off; this is not expected.
So the fix is not to add the old use to dce list to process if it was a 
debug
statement.

Bootstrapped and tested on x86_64-linux-gnu with no regressions.

PR tree-optimization/116156

gcc/ChangeLog:

* tree-ssa-forwprop.cc (pass_forwprop::execute): Don't add
uses if the statement was a debug statement.

gcc/testsuite/ChangeLog:

* c-c++-common/torture/pr116156-1.c: New test.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/testsuite/c-c++-common/torture/pr116156-1.c | 30 +
 gcc/tree-ssa-forwprop.cc| 16 +++--
 2 files changed, 39 insertions(+), 7 deletions(-)

diff --git a/gcc/testsuite/c-c++-common/torture/pr116156-1.c 
b/gcc/testsuite/c-c++-common/torture/pr116156-1.c
new file mode 100644
index ..10f938ef4e5a
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/torture/pr116156-1.c
@@ -0,0 +1,30 @@
+/* { dg-additional-options "-fcompare-debug" } */
+/* PR tree-optimization/116156 */
+
+/* Forwprop used to delete an unused statement
+   but only with debug statements around. */
+
+struct jpeg_compress_struct {
+  int X_density;
+};
+void gg();
+int h(const char*,const char*) __attribute((pure));
+int h1(const char*) __attribute((pure));
+int f1() __attribute__((returns_twice));
+void real_save_jpeg(char **keys, char *values) {
+  struct jpeg_compress_struct cinfo;
+  int x_density = 0;
+  while (*keys)
+  {
+if (h1(*keys) == 0)
+  gg();
+if (h1(*keys) == 0)  {
+  if (!*values)
+x_density = -1;
+  if (x_density <= 0)
+gg();
+}
+  }
+  if (f1())
+cinfo.X_density = x_density;
+}
diff --git a/gcc/tree-ssa-forwprop.cc b/gcc/tree-ssa-forwprop.cc
index 44a6b5d39aa7..2e37642359c9 100644
--- a/gcc/tree-ssa-forwprop.cc
+++ b/gcc/tree-ssa-forwprop.cc
@@ -3923,7 +3923,8 @@ pass_forwprop::execute (function *fun)
  tree val = fwprop_ssa_val (use);
  if (val && val != use)
{
- bitmap_set_bit (simple_dce_worklist, SSA_NAME_VERSION (use));
+ if (!is_gimple_debug (stmt))
+   bitmap_set_bit (simple_dce_worklist, SSA_NAME_VERSION 
(use));
  if (may_propagate_copy (use, val))
{
  propagate_value (usep, val);
@@ -3963,12 +3964,13 @@ pass_forwprop::execute (function *fun)
if (gimple_cond_true_p (cond)
|| gimple_cond_false_p (cond))
  cfg_changed = true;
- /* Queue old uses for simple DCE.  */
- for (tree use : uses)
-   if (TREE_CODE (use) == SSA_NAME
-   && !SSA_NAME_IS_DEFAULT_DEF (use))
- bitmap_set_bit (simple_dce_worklist,
- SSA_NAME_VERSION (use));
+ /* Queue old uses for simple DCE if not debug statement.  */
+ if (!is_gimple_debug (stmt))
+   for (tree use : uses)
+ if (TREE_CODE (use) == SSA_NAME
+ && !SSA_NAME_IS_DEFAULT_DEF (use))
+   bitmap_set_bit (simple_dce_worklist,
+   SSA_NAME_VERSION (use));
}
 
  if (changed || substituted_p)


[gcc r14-10552] forwprop: Don't add uses to dce list if debug statement [PR116156]

2024-08-02 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:14fa2b2ae7f49dee5e7e7469243e281e48d925b9

commit r14-10552-g14fa2b2ae7f49dee5e7e7469243e281e48d925b9
Author: Andrew Pinski 
Date:   Thu Aug 1 10:33:34 2024 -0700

forwprop: Don't add uses to dce list if debug statement [PR116156]

The problem here is that when forwprop does a copy prop, into a statement,
we mark the uses of that statement as possibly need to be removed. But it 
just
happened that statement was a debug statement, there will be a difference 
when
compiling with debuging info turned on vs off; this is not expected.
So the fix is not to add the old use to dce list to process if it was a 
debug
statement.

Bootstrapped and tested on x86_64-linux-gnu with no regressions.

PR tree-optimization/116156

gcc/ChangeLog:

* tree-ssa-forwprop.cc (pass_forwprop::execute): Don't add
uses if the statement was a debug statement.

gcc/testsuite/ChangeLog:

* c-c++-common/torture/pr116156-1.c: New test.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/testsuite/c-c++-common/torture/pr116156-1.c | 30 +
 gcc/tree-ssa-forwprop.cc| 16 +++--
 2 files changed, 39 insertions(+), 7 deletions(-)

diff --git a/gcc/testsuite/c-c++-common/torture/pr116156-1.c 
b/gcc/testsuite/c-c++-common/torture/pr116156-1.c
new file mode 100644
index ..10f938ef4e5a
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/torture/pr116156-1.c
@@ -0,0 +1,30 @@
+/* { dg-additional-options "-fcompare-debug" } */
+/* PR tree-optimization/116156 */
+
+/* Forwprop used to delete an unused statement
+   but only with debug statements around. */
+
+struct jpeg_compress_struct {
+  int X_density;
+};
+void gg();
+int h(const char*,const char*) __attribute((pure));
+int h1(const char*) __attribute((pure));
+int f1() __attribute__((returns_twice));
+void real_save_jpeg(char **keys, char *values) {
+  struct jpeg_compress_struct cinfo;
+  int x_density = 0;
+  while (*keys)
+  {
+if (h1(*keys) == 0)
+  gg();
+if (h1(*keys) == 0)  {
+  if (!*values)
+x_density = -1;
+  if (x_density <= 0)
+gg();
+}
+  }
+  if (f1())
+cinfo.X_density = x_density;
+}
diff --git a/gcc/tree-ssa-forwprop.cc b/gcc/tree-ssa-forwprop.cc
index abf71f0d3a03..692e96604b84 100644
--- a/gcc/tree-ssa-forwprop.cc
+++ b/gcc/tree-ssa-forwprop.cc
@@ -3919,7 +3919,8 @@ pass_forwprop::execute (function *fun)
  tree val = fwprop_ssa_val (use);
  if (val && val != use)
{
- bitmap_set_bit (simple_dce_worklist, SSA_NAME_VERSION (use));
+ if (!is_gimple_debug (stmt))
+   bitmap_set_bit (simple_dce_worklist, SSA_NAME_VERSION 
(use));
  if (may_propagate_copy (use, val))
{
  propagate_value (usep, val);
@@ -3959,12 +3960,13 @@ pass_forwprop::execute (function *fun)
if (gimple_cond_true_p (cond)
|| gimple_cond_false_p (cond))
  cfg_changed = true;
- /* Queue old uses for simple DCE.  */
- for (tree use : uses)
-   if (TREE_CODE (use) == SSA_NAME
-   && !SSA_NAME_IS_DEFAULT_DEF (use))
- bitmap_set_bit (simple_dce_worklist,
- SSA_NAME_VERSION (use));
+ /* Queue old uses for simple DCE if not debug statement.  */
+ if (!is_gimple_debug (stmt))
+   for (tree use : uses)
+ if (TREE_CODE (use) == SSA_NAME
+ && !SSA_NAME_IS_DEFAULT_DEF (use))
+   bitmap_set_bit (simple_dce_worklist,
+   SSA_NAME_VERSION (use));
}
 
  if (changed || substituted_p)


[gcc r15-2700] genemit: Fix handling of explicit parallels for clobbers [PR116058]

2024-08-02 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:da33ad53bcb57943fa671c745938a53f4de89a1b

commit r15-2700-gda33ad53bcb57943fa671c745938a53f4de89a1b
Author: Andrew Pinski 
Date:   Thu Aug 1 14:22:36 2024 -0700

genemit: Fix handling of explicit parallels for clobbers [PR116058]

In a define_insn, you could use either an explicit parallel for
the insns or genrecog/genemit will add one for you.
The problem when genemit is processing the pattern for clobbers
(to create the function add_clobbers), genemit hadn't add the implicit
parallel yet but at the same time forgot to ignore that there
could be an explicit parallel there.
This means in some cases (like in the sh backend), add_clobbers
and recog had a different idea if there was clobbers on the insn.
This fixes the problem by looking through the explicit parallel
for the instruction in genemit.

Bootstrapped and tested on x86_64-linux-gnu.

PR middle-end/116058

gcc/ChangeLog:

* genemit.cc (struct clobber_pat): Change pattern to be rtvec.
Add code field.
(gen_insn): Look through an explicit parallel if there was one.
Update store to new clobber_pat.
(output_add_clobbers): Update call to gen_exp for the changed
clobber_pat.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/genemit.cc | 38 +++---
 1 file changed, 23 insertions(+), 15 deletions(-)

diff --git a/gcc/genemit.cc b/gcc/genemit.cc
index 98d0477424b0..5d3d10f5061a 100644
--- a/gcc/genemit.cc
+++ b/gcc/genemit.cc
@@ -35,10 +35,11 @@ along with GCC; see the file COPYING3.  If not see
 struct clobber_pat
 {
   struct clobber_ent *insns;
-  rtx pattern;
+  rtvec pattern;
   int first_clobber;
   struct clobber_pat *next;
   int has_hard_reg;
+  rtx_code code;
 } *clobber_list;
 
 /* Records one insn that uses the clobber list.  */
@@ -337,19 +338,25 @@ gen_insn (md_rtx_info *info, FILE *file)
   if (XVEC (insn, 1))
 {
   int has_hard_reg = 0;
+  rtvec pattern = XVEC (insn, 1);
 
-  for (i = XVECLEN (insn, 1) - 1; i > 0; i--)
+  /* Look though an explicit parallel. */
+  if (GET_NUM_ELEM (pattern) == 1
+ && GET_CODE (RTVEC_ELT (pattern, 0)) == PARALLEL)
+   pattern = XVEC (RTVEC_ELT (pattern, 0), 0);
+
+  for (i = GET_NUM_ELEM (pattern) - 1; i > 0; i--)
{
- if (GET_CODE (XVECEXP (insn, 1, i)) != CLOBBER)
+ if (GET_CODE (RTVEC_ELT (pattern, i)) != CLOBBER)
break;
 
- if (REG_P (XEXP (XVECEXP (insn, 1, i), 0)))
+ if (REG_P (XEXP (RTVEC_ELT (pattern, i), 0)))
has_hard_reg = 1;
- else if (GET_CODE (XEXP (XVECEXP (insn, 1, i), 0)) != MATCH_SCRATCH)
+ else if (GET_CODE (XEXP (RTVEC_ELT (pattern, i), 0)) != MATCH_SCRATCH)
break;
}
 
-  if (i != XVECLEN (insn, 1) - 1)
+  if (i != GET_NUM_ELEM (pattern) - 1)
{
  struct clobber_pat *p;
  struct clobber_ent *link = XNEW (struct clobber_ent);
@@ -363,13 +370,13 @@ gen_insn (md_rtx_info *info, FILE *file)
  for (p = clobber_list; p; p = p->next)
{
  if (p->first_clobber != i + 1
- || XVECLEN (p->pattern, 1) != XVECLEN (insn, 1))
+ || GET_NUM_ELEM (p->pattern) != GET_NUM_ELEM (pattern))
continue;
 
- for (j = i + 1; j < XVECLEN (insn, 1); j++)
+ for (j = i + 1; j < GET_NUM_ELEM (pattern); j++)
{
- rtx old_rtx = XEXP (XVECEXP (p->pattern, 1, j), 0);
- rtx new_rtx = XEXP (XVECEXP (insn, 1, j), 0);
+ rtx old_rtx = XEXP (RTVEC_ELT (p->pattern, j), 0);
+ rtx new_rtx = XEXP (RTVEC_ELT (pattern, j), 0);
 
  /* OLD and NEW_INSN are the same if both are to be a SCRATCH
 of the same mode,
@@ -383,7 +390,7 @@ gen_insn (md_rtx_info *info, FILE *file)
break;
}
 
- if (j == XVECLEN (insn, 1))
+ if (j == GET_NUM_ELEM (pattern))
break;
}
 
@@ -392,10 +399,11 @@ gen_insn (md_rtx_info *info, FILE *file)
  p = XNEW (struct clobber_pat);
 
  p->insns = 0;
- p->pattern = insn;
+ p->pattern = pattern;
  p->first_clobber = i + 1;
  p->next = clobber_list;
  p->has_hard_reg = has_hard_reg;
+ p->code = GET_CODE (insn);
  clobber_list = p;
}
 
@@ -662,11 +670,11 @@ output_add_clobbers (md_rtx_info *info, FILE *file)
   for (ent = clobber->insns; ent; ent = ent->next)
fprintf (file, "case %d:\n", ent->code_number);
 
-  for (i = clobber->first_clobber; i < XVECLEN (clobber->pattern, 1); i++)
+  for (i = clobber->first_clobber; i < GET_NUM_ELEM (clobber->pattern); 
i++)
{
  fprintf (file, "  XVECEXP 

[gcc r15-2712] IRA: Ignore debug insns for uses in split_live_ranges_for_shrink_wrap. [PR116179]

2024-08-04 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:01cca42f65f25d709264fe277d25d9db3d68c437

commit r15-2712-g01cca42f65f25d709264fe277d25d9db3d68c437
Author: Andrew Pinski 
Date:   Fri Aug 2 10:04:40 2024 -0700

IRA: Ignore debug insns for uses in split_live_ranges_for_shrink_wrap. 
[PR116179]

Late_combine exposed this latent bug in split_live_ranges_for_shrink_wrap.
What it did was copy-prop regno 151 from regno 119 from:
```
(insn 2 264 3 2 (set (reg/f:DI 119 [ thisD.3697 ])
(reg:DI 151)) "/app/example.cpp":19:13 70 {*movdi_aarch64}
 (expr_list:REG_DEAD (reg:DI 151)
(nil)))
```

into these insns:
```
(debug_insn 147 146 148 5 (var_location:DI thisD.3727 (reg/f:DI 119 [ 
thisD.3697 ])) "/app/example.cpp":21:5 -1
 (nil))

(insn 167 166 168 7 (set (reg:DI 1 x1)
(reg/f:DI 119 [ thisD.3697 ])) "/app/example.cpp":14:21 70 
{*movdi_aarch64}
 (nil))
```

Both are valid things to do. The problem is 
split_live_ranges_for_shrink_wrap looks at the
uses of reg 151 and with and without debugging reg 151 have a different 
usage in different BBs.
The function is trying to find a splitting point for reg 151 and they are 
different. In the end
this causes register allocation difference.
The fix is for split_live_ranges_for_shrink_wrap to ignore uses that were 
in debug insns.

Bootstrappped and tested on x86_64-linux-gnu with no regressions.

PR rtl-optimization/116179

gcc/ChangeLog:

* ira.cc (split_live_ranges_for_shrink_wrap): For the uses loop,
only look at non-debug insns.

gcc/testsuite/ChangeLog:

* g++.dg/torture/pr116179-1.C: New test.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/ira.cc|  5 -
 gcc/testsuite/g++.dg/torture/pr116179-1.C | 26 ++
 2 files changed, 30 insertions(+), 1 deletion(-)

diff --git a/gcc/ira.cc b/gcc/ira.cc
index 5642aea3caae..156541df4e6c 100644
--- a/gcc/ira.cc
+++ b/gcc/ira.cc
@@ -5144,7 +5144,10 @@ split_live_ranges_for_shrink_wrap (void)
   use = DF_REF_NEXT_REG (use))
{
  int ubbi = DF_REF_BB (use)->index;
- if (bitmap_bit_p (reachable, ubbi))
+
+ /* Only non debug insns should be taken into account.  */
+ if (NONDEBUG_INSN_P (DF_REF_INSN (use))
+ && bitmap_bit_p (reachable, ubbi))
bitmap_set_bit (need_new, ubbi);
}
   last_interesting_insn = insn;
diff --git a/gcc/testsuite/g++.dg/torture/pr116179-1.C 
b/gcc/testsuite/g++.dg/torture/pr116179-1.C
new file mode 100644
index ..85e63c5938f5
--- /dev/null
+++ b/gcc/testsuite/g++.dg/torture/pr116179-1.C
@@ -0,0 +1,26 @@
+/* { dg-additional-options "-fcompare-debug" } */
+
+/* PR rtl-optimization/116179 */
+
+struct g *b;
+struct g {};
+void *operator new(__SIZE_TYPE__, void *);
+enum c {};
+struct d : g{} * e;
+c f;
+struct h {
+  g *k() {
+d *a;
+c i;
+if (a || i == 0 || i == 1)
+  if (e || f)
+return 0;
+return new (&j) d;
+  }
+  void n();
+  g j;
+};
+void h::n() {
+  for (g *l(b), *m(b); l; l = m, m = 0)
+k();
+}


[gcc r15-2736] sh: Don't call make_insn_raw in sh_recog_treg_set_expr [PR116189]

2024-08-05 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:0355c943b9e954e8f59068971d934f1b91ecb729

commit r15-2736-g0355c943b9e954e8f59068971d934f1b91ecb729
Author: Andrew Pinski 
Date:   Sat Aug 3 09:30:57 2024 -0700

sh: Don't call make_insn_raw in sh_recog_treg_set_expr [PR116189]

This was an interesting compare debug failure to debug. The first symptom
was in gcse which would produce different order of creating 
psedu-registers. This
was caused by a different order of a hashtable walk, due to the hash table 
having different
number of entries. Which in turn was due to the number of max insn being 
different between
the 2 runs. The place max insn uid comes from was in sh_recog_treg_set_expr 
which is called
via rtx_costs and fwprop would cause rtx_costs in some cases for debug insn 
related stuff.

Build and tested for sh4-linux-gnu.

PR target/116189

gcc/ChangeLog:

* config/sh/sh.cc (sh_recog_treg_set_expr): Don't call 
make_insn_raw,
make the insn with a fake uid.

gcc/testsuite/ChangeLog:

* c-c++-common/torture/pr116189-1.c: New test.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/config/sh/sh.cc | 12 +-
 gcc/testsuite/c-c++-common/torture/pr116189-1.c | 30 +
 2 files changed, 41 insertions(+), 1 deletion(-)

diff --git a/gcc/config/sh/sh.cc b/gcc/config/sh/sh.cc
index bc0174203810..7391b8df5830 100644
--- a/gcc/config/sh/sh.cc
+++ b/gcc/config/sh/sh.cc
@@ -12297,7 +12297,17 @@ sh_recog_treg_set_expr (rtx op, machine_mode mode)
  have to capture its current state and restore it afterwards.  */
   recog_data_d prev_recog_data = recog_data;
 
-  rtx_insn* i = make_insn_raw (gen_rtx_SET (get_t_reg_rtx (), op));
+  /* Note we can't use insn_raw here since that increases the uid
+ and could cause debug compare differences; this insn never leaves
+ this function so create a dummy one. */
+  rtx_insn* i = as_a  (rtx_alloc (INSN));
+
+  INSN_UID (i) = 1;
+  PATTERN (i) = gen_rtx_SET (get_t_reg_rtx (), op);
+  INSN_CODE (i) = -1;
+  REG_NOTES (i) = NULL;
+  INSN_LOCATION (i) = curr_insn_location ();
+  BLOCK_FOR_INSN (i) = NULL;
   SET_PREV_INSN (i) = NULL;
   SET_NEXT_INSN (i) = NULL;
 
diff --git a/gcc/testsuite/c-c++-common/torture/pr116189-1.c 
b/gcc/testsuite/c-c++-common/torture/pr116189-1.c
new file mode 100644
index ..055c563f43e5
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/torture/pr116189-1.c
@@ -0,0 +1,30 @@
+/* { dg-additional-options "-fcompare-debug" } */
+
+/* PR target/116189 */
+
+/* In the sh backend, we used to create insn in the path of rtx_costs.
+   This means sometimes the max uid for insns would be different between
+   debugging and non debugging which then would cause gcse's hashtable
+   to have different number of slots which would cause a different walk
+   for that hash table.  */
+
+extern void ff(void);
+extern short nn[8][4];
+typedef unsigned short move_table[4];
+extern signed long long ira_overall_cost;
+extern signed long long ira_load_cost;
+extern move_table *x_ira_register_move_cost[1];
+struct move { struct move *next; };
+unsigned short t;
+void emit_move_list(struct move * list, int freq, unsigned char mode, int 
regno) {
+  int cost;
+  for (; list != 0; list = list->next)
+  {
+ff();
+unsigned short aclass = t;
+cost = (nn)[mode][aclass] ;
+ira_load_cost = cost;
+cost = x_ira_register_move_cost[mode][aclass][aclass] * freq ;
+ira_overall_cost = cost;
+  }
+}


[gcc r14-10564] sh: Don't call make_insn_raw in sh_recog_treg_set_expr [PR116189]

2024-08-05 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:ba45573c8072f06f67af22a2ca60da5f5c12beae

commit r14-10564-gba45573c8072f06f67af22a2ca60da5f5c12beae
Author: Andrew Pinski 
Date:   Sat Aug 3 09:30:57 2024 -0700

sh: Don't call make_insn_raw in sh_recog_treg_set_expr [PR116189]

This was an interesting compare debug failure to debug. The first symptom
was in gcse which would produce different order of creating 
psedu-registers. This
was caused by a different order of a hashtable walk, due to the hash table 
having different
number of entries. Which in turn was due to the number of max insn being 
different between
the 2 runs. The place max insn uid comes from was in sh_recog_treg_set_expr 
which is called
via rtx_costs and fwprop would cause rtx_costs in some cases for debug insn 
related stuff.

Build and tested for sh4-linux-gnu.

PR target/116189

gcc/ChangeLog:

* config/sh/sh.cc (sh_recog_treg_set_expr): Don't call 
make_insn_raw,
make the insn with a fake uid.

gcc/testsuite/ChangeLog:

* c-c++-common/torture/pr116189-1.c: New test.

Signed-off-by: Andrew Pinski 
(cherry picked from commit 0355c943b9e954e8f59068971d934f1b91ecb729)

Diff:
---
 gcc/config/sh/sh.cc | 12 +-
 gcc/testsuite/c-c++-common/torture/pr116189-1.c | 30 +
 2 files changed, 41 insertions(+), 1 deletion(-)

diff --git a/gcc/config/sh/sh.cc b/gcc/config/sh/sh.cc
index ef3c2e6791d7..d6a6e5bd88df 100644
--- a/gcc/config/sh/sh.cc
+++ b/gcc/config/sh/sh.cc
@@ -12279,7 +12279,17 @@ sh_recog_treg_set_expr (rtx op, machine_mode mode)
  have to capture its current state and restore it afterwards.  */
   recog_data_d prev_recog_data = recog_data;
 
-  rtx_insn* i = make_insn_raw (gen_rtx_SET (get_t_reg_rtx (), op));
+  /* Note we can't use insn_raw here since that increases the uid
+ and could cause debug compare differences; this insn never leaves
+ this function so create a dummy one. */
+  rtx_insn* i = as_a  (rtx_alloc (INSN));
+
+  INSN_UID (i) = 1;
+  PATTERN (i) = gen_rtx_SET (get_t_reg_rtx (), op);
+  INSN_CODE (i) = -1;
+  REG_NOTES (i) = NULL;
+  INSN_LOCATION (i) = curr_insn_location ();
+  BLOCK_FOR_INSN (i) = NULL;
   SET_PREV_INSN (i) = NULL;
   SET_NEXT_INSN (i) = NULL;
 
diff --git a/gcc/testsuite/c-c++-common/torture/pr116189-1.c 
b/gcc/testsuite/c-c++-common/torture/pr116189-1.c
new file mode 100644
index ..055c563f43e5
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/torture/pr116189-1.c
@@ -0,0 +1,30 @@
+/* { dg-additional-options "-fcompare-debug" } */
+
+/* PR target/116189 */
+
+/* In the sh backend, we used to create insn in the path of rtx_costs.
+   This means sometimes the max uid for insns would be different between
+   debugging and non debugging which then would cause gcse's hashtable
+   to have different number of slots which would cause a different walk
+   for that hash table.  */
+
+extern void ff(void);
+extern short nn[8][4];
+typedef unsigned short move_table[4];
+extern signed long long ira_overall_cost;
+extern signed long long ira_load_cost;
+extern move_table *x_ira_register_move_cost[1];
+struct move { struct move *next; };
+unsigned short t;
+void emit_move_list(struct move * list, int freq, unsigned char mode, int 
regno) {
+  int cost;
+  for (; list != 0; list = list->next)
+  {
+ff();
+unsigned short aclass = t;
+cost = (nn)[mode][aclass] ;
+ira_load_cost = cost;
+cost = x_ira_register_move_cost[mode][aclass][aclass] * freq ;
+ira_overall_cost = cost;
+  }
+}


[gcc r13-8962] sh: Don't call make_insn_raw in sh_recog_treg_set_expr [PR116189]

2024-08-05 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:73064a28689fe3799f4fb5f1bab7634df889a43e

commit r13-8962-g73064a28689fe3799f4fb5f1bab7634df889a43e
Author: Andrew Pinski 
Date:   Sat Aug 3 09:30:57 2024 -0700

sh: Don't call make_insn_raw in sh_recog_treg_set_expr [PR116189]

This was an interesting compare debug failure to debug. The first symptom
was in gcse which would produce different order of creating 
psedu-registers. This
was caused by a different order of a hashtable walk, due to the hash table 
having different
number of entries. Which in turn was due to the number of max insn being 
different between
the 2 runs. The place max insn uid comes from was in sh_recog_treg_set_expr 
which is called
via rtx_costs and fwprop would cause rtx_costs in some cases for debug insn 
related stuff.

Build and tested for sh4-linux-gnu.

PR target/116189

gcc/ChangeLog:

* config/sh/sh.cc (sh_recog_treg_set_expr): Don't call 
make_insn_raw,
make the insn with a fake uid.

gcc/testsuite/ChangeLog:

* c-c++-common/torture/pr116189-1.c: New test.

Signed-off-by: Andrew Pinski 
(cherry picked from commit 0355c943b9e954e8f59068971d934f1b91ecb729)

Diff:
---
 gcc/config/sh/sh.cc | 12 +-
 gcc/testsuite/c-c++-common/torture/pr116189-1.c | 30 +
 2 files changed, 41 insertions(+), 1 deletion(-)

diff --git a/gcc/config/sh/sh.cc b/gcc/config/sh/sh.cc
index 8ac20dfaa382..717123af6554 100644
--- a/gcc/config/sh/sh.cc
+++ b/gcc/config/sh/sh.cc
@@ -12279,7 +12279,17 @@ sh_recog_treg_set_expr (rtx op, machine_mode mode)
  have to capture its current state and restore it afterwards.  */
   recog_data_d prev_recog_data = recog_data;
 
-  rtx_insn* i = make_insn_raw (gen_rtx_SET (get_t_reg_rtx (), op));
+  /* Note we can't use insn_raw here since that increases the uid
+ and could cause debug compare differences; this insn never leaves
+ this function so create a dummy one. */
+  rtx_insn* i = as_a  (rtx_alloc (INSN));
+
+  INSN_UID (i) = 1;
+  PATTERN (i) = gen_rtx_SET (get_t_reg_rtx (), op);
+  INSN_CODE (i) = -1;
+  REG_NOTES (i) = NULL;
+  INSN_LOCATION (i) = curr_insn_location ();
+  BLOCK_FOR_INSN (i) = NULL;
   SET_PREV_INSN (i) = NULL;
   SET_NEXT_INSN (i) = NULL;
 
diff --git a/gcc/testsuite/c-c++-common/torture/pr116189-1.c 
b/gcc/testsuite/c-c++-common/torture/pr116189-1.c
new file mode 100644
index ..055c563f43e5
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/torture/pr116189-1.c
@@ -0,0 +1,30 @@
+/* { dg-additional-options "-fcompare-debug" } */
+
+/* PR target/116189 */
+
+/* In the sh backend, we used to create insn in the path of rtx_costs.
+   This means sometimes the max uid for insns would be different between
+   debugging and non debugging which then would cause gcse's hashtable
+   to have different number of slots which would cause a different walk
+   for that hash table.  */
+
+extern void ff(void);
+extern short nn[8][4];
+typedef unsigned short move_table[4];
+extern signed long long ira_overall_cost;
+extern signed long long ira_load_cost;
+extern move_table *x_ira_register_move_cost[1];
+struct move { struct move *next; };
+unsigned short t;
+void emit_move_list(struct move * list, int freq, unsigned char mode, int 
regno) {
+  int cost;
+  for (; list != 0; list = list->next)
+  {
+ff();
+unsigned short aclass = t;
+cost = (nn)[mode][aclass] ;
+ira_load_cost = cost;
+cost = x_ira_register_move_cost[mode][aclass][aclass] * freq ;
+ira_overall_cost = cost;
+  }
+}


[gcc r12-10660] sh: Don't call make_insn_raw in sh_recog_treg_set_expr [PR116189]

2024-08-05 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:dfacc021c9775b1563c717cf3f8114d0f874b030

commit r12-10660-gdfacc021c9775b1563c717cf3f8114d0f874b030
Author: Andrew Pinski 
Date:   Sat Aug 3 09:30:57 2024 -0700

sh: Don't call make_insn_raw in sh_recog_treg_set_expr [PR116189]

This was an interesting compare debug failure to debug. The first symptom
was in gcse which would produce different order of creating 
psedu-registers. This
was caused by a different order of a hashtable walk, due to the hash table 
having different
number of entries. Which in turn was due to the number of max insn being 
different between
the 2 runs. The place max insn uid comes from was in sh_recog_treg_set_expr 
which is called
via rtx_costs and fwprop would cause rtx_costs in some cases for debug insn 
related stuff.

Build and tested for sh4-linux-gnu.

PR target/116189

gcc/ChangeLog:

* config/sh/sh.cc (sh_recog_treg_set_expr): Don't call 
make_insn_raw,
make the insn with a fake uid.

gcc/testsuite/ChangeLog:

* c-c++-common/torture/pr116189-1.c: New test.

Signed-off-by: Andrew Pinski 
(cherry picked from commit 0355c943b9e954e8f59068971d934f1b91ecb729)

Diff:
---
 gcc/config/sh/sh.cc | 12 +-
 gcc/testsuite/c-c++-common/torture/pr116189-1.c | 30 +
 2 files changed, 41 insertions(+), 1 deletion(-)

diff --git a/gcc/config/sh/sh.cc b/gcc/config/sh/sh.cc
index 494e4536251d..5455c3fddaaf 100644
--- a/gcc/config/sh/sh.cc
+++ b/gcc/config/sh/sh.cc
@@ -12278,7 +12278,17 @@ sh_recog_treg_set_expr (rtx op, machine_mode mode)
  have to capture its current state and restore it afterwards.  */
   recog_data_d prev_recog_data = recog_data;
 
-  rtx_insn* i = make_insn_raw (gen_rtx_SET (get_t_reg_rtx (), op));
+  /* Note we can't use insn_raw here since that increases the uid
+ and could cause debug compare differences; this insn never leaves
+ this function so create a dummy one. */
+  rtx_insn* i = as_a  (rtx_alloc (INSN));
+
+  INSN_UID (i) = 1;
+  PATTERN (i) = gen_rtx_SET (get_t_reg_rtx (), op);
+  INSN_CODE (i) = -1;
+  REG_NOTES (i) = NULL;
+  INSN_LOCATION (i) = curr_insn_location ();
+  BLOCK_FOR_INSN (i) = NULL;
   SET_PREV_INSN (i) = NULL;
   SET_NEXT_INSN (i) = NULL;
 
diff --git a/gcc/testsuite/c-c++-common/torture/pr116189-1.c 
b/gcc/testsuite/c-c++-common/torture/pr116189-1.c
new file mode 100644
index ..055c563f43e5
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/torture/pr116189-1.c
@@ -0,0 +1,30 @@
+/* { dg-additional-options "-fcompare-debug" } */
+
+/* PR target/116189 */
+
+/* In the sh backend, we used to create insn in the path of rtx_costs.
+   This means sometimes the max uid for insns would be different between
+   debugging and non debugging which then would cause gcse's hashtable
+   to have different number of slots which would cause a different walk
+   for that hash table.  */
+
+extern void ff(void);
+extern short nn[8][4];
+typedef unsigned short move_table[4];
+extern signed long long ira_overall_cost;
+extern signed long long ira_load_cost;
+extern move_table *x_ira_register_move_cost[1];
+struct move { struct move *next; };
+unsigned short t;
+void emit_move_list(struct move * list, int freq, unsigned char mode, int 
regno) {
+  int cost;
+  for (; list != 0; list = list->next)
+  {
+ff();
+unsigned short aclass = t;
+cost = (nn)[mode][aclass] ;
+ira_load_cost = cost;
+cost = x_ira_register_move_cost[mode][aclass][aclass] * freq ;
+ira_overall_cost = cost;
+  }
+}


[gcc r15-2771] aarch64/testsuite: Fix gcc.target/aarch64/simd/vmmla.c [PR116207]

2024-08-06 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:d4b35dab72b161f699aede157d90446973a71c99

commit r15-2771-gd4b35dab72b161f699aede157d90446973a71c99
Author: Andrew Pinski 
Date:   Tue Aug 6 15:43:21 2024 -0700

aarch64/testsuite: Fix gcc.target/aarch64/simd/vmmla.c [PR116207]

After r15-2414-g2d105efd6f60 which fixed the dg-do directive, the testcase
stopped working because there was a missing -save-temps. This adds that and
now the testcase passes again.

Pushed as obvious.

gcc/testsuite/ChangeLog:

PR testsuite/116207
* gcc.target/aarch64/simd/vmmla.c: Add -save-temps to the
options.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/testsuite/gcc.target/aarch64/simd/vmmla.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vmmla.c 
b/gcc/testsuite/gcc.target/aarch64/simd/vmmla.c
index 777decc56a20..3a599ddb58a4 100644
--- a/gcc/testsuite/gcc.target/aarch64/simd/vmmla.c
+++ b/gcc/testsuite/gcc.target/aarch64/simd/vmmla.c
@@ -1,6 +1,6 @@
 /* { dg-do assemble } */
 /* { dg-require-effective-target arm_v8_2a_i8mm_ok } */
-/* { dg-additional-options "-march=armv8.2-a+i8mm" } */
+/* { dg-additional-options "-march=armv8.2-a+i8mm -save-temps" } */
 
 #include "arm_neon.h"


[gcc r15-2797] aarch64/testsuite: Add testcases for recently fixed PRs

2024-08-07 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:2c6174402ea315ecf618cfcba741e8cb18bc5282

commit r15-2797-g2c6174402ea315ecf618cfcba741e8cb18bc5282
Author: Andrew Pinski 
Date:   Wed Aug 7 09:36:38 2024 -0700

aarch64/testsuite: Add testcases for recently fixed PRs

The commit for PR 116258, added a x86_64 specific testcase,
I thought it would be a good idea to add an aarch64 testcase too.
And since it also fixed VLA vectors too so add a SVE testcase.

Pushed as obvious after a test for aarch64-linux-gnu.

PR middle-end/116258
PR middle-end/116259

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/pr116258.c: New test.
* gcc.target/aarch64/sve/pr116259-1.c: New test.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/testsuite/gcc.target/aarch64/pr116258.c   | 17 +
 gcc/testsuite/gcc.target/aarch64/sve/pr116259-1.c | 12 
 2 files changed, 29 insertions(+)

diff --git a/gcc/testsuite/gcc.target/aarch64/pr116258.c 
b/gcc/testsuite/gcc.target/aarch64/pr116258.c
new file mode 100644
index ..e727ad4b72a5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr116258.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#pragma GCC target "+nosve"
+
+#define vect16 __attribute__((vector_size(16)))
+#define h(a) __builtin_assoc_barrier((a))
+
+ vect16 float  f( vect16 float  x, vect16 float vconstants0)
+{
+  vect16 float  t = (x * (vconstants0[0]));
+  return (x + h(t));
+}
+
+/* { dg-final { scan-assembler-times "\\\[0\\\]" 1 } } */
+/* { dg-final { scan-assembler-not "dup\t" } } */
+/* { dg-final { scan-assembler-not "ins\t" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr116259-1.c 
b/gcc/testsuite/gcc.target/aarch64/sve/pr116259-1.c
new file mode 100644
index ..bb2eed4728c0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pr116259-1.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* PR middle-end/116259 */
+
+#include 
+
+/* PAREN_EXPR lowering for VLA vectors was ICEing.
+   It should not be lowered in a similar way as moves
+   are not lowered.  */
+svfloat64_t f(svfloat64_t x)
+{
+  return __builtin_assoc_barrier(x);
+}


[gcc r15-2824] vect: Small C++11-ification of vect_vect_recog_func_ptrs

2024-08-08 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:ad7d4843d452b97686bcc30da11b933759f09a12

commit r15-2824-gad7d4843d452b97686bcc30da11b933759f09a12
Author: Andrew Pinski 
Date:   Wed Aug 7 10:58:45 2024 -0700

vect: Small C++11-ification of vect_vect_recog_func_ptrs

This is a small C++11-ificiation for the use of vect_vect_recog_func_ptrs.
Changes the loop into a range based loop which then we can remove the 
variable
definition of NUM_PATTERNS. Also uses const reference instead of a pointer.

Bootstrapped and tested on x86_64-linux-gnu.

gcc/ChangeLog:

* tree-vect-patterns.cc (NUM_PATTERNS): Delete.
(vect_pattern_recog_1): Constify and change
recog_func to a reference.
(vect_pattern_recog): Use range-based loop over
vect_vect_recog_func_ptrs.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/tree-vect-patterns.cc | 12 +---
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
index 87b3dc413b8b..f52de2b6972d 100644
--- a/gcc/tree-vect-patterns.cc
+++ b/gcc/tree-vect-patterns.cc
@@ -7362,8 +7362,6 @@ static vect_recog_func vect_vect_recog_func_ptrs[] = {
   /* These must come after the double widening ones.  */
 };
 
-const unsigned int NUM_PATTERNS = ARRAY_SIZE (vect_vect_recog_func_ptrs);
-
 /* Mark statements that are involved in a pattern.  */
 
 void
@@ -7518,7 +7516,7 @@ vect_mark_pattern_stmts (vec_info *vinfo,
 
 static void
 vect_pattern_recog_1 (vec_info *vinfo,
- vect_recog_func *recog_func, stmt_vec_info stmt_info)
+ const vect_recog_func &recog_func, stmt_vec_info 
stmt_info)
 {
   gimple *pattern_stmt;
   tree pattern_vectype;
@@ -7538,7 +7536,7 @@ vect_pattern_recog_1 (vec_info *vinfo,
 }
 
   gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
-  pattern_stmt = recog_func->fn (vinfo, stmt_info, &pattern_vectype);
+  pattern_stmt = recog_func.fn (vinfo, stmt_info, &pattern_vectype);
   if (!pattern_stmt)
 {
   /* Clear any half-formed pattern definition sequence.  */
@@ -7550,7 +7548,7 @@ vect_pattern_recog_1 (vec_info *vinfo,
   if (dump_enabled_p ())
 dump_printf_loc (MSG_NOTE, vect_location,
 "%s pattern recognized: %G",
-recog_func->name, pattern_stmt);
+recog_func.name, pattern_stmt);
 
   /* Mark the stmts that are involved in the pattern. */
   vect_mark_pattern_stmts (vinfo, stmt_info, pattern_stmt, pattern_vectype);
@@ -7658,8 +7656,8 @@ vect_pattern_recog (vec_info *vinfo)
continue;
 
  /* Scan over all generic vect_recog_xxx_pattern functions.  */
- for (unsigned j = 0; j < NUM_PATTERNS; j++)
-   vect_pattern_recog_1 (vinfo, &vect_vect_recog_func_ptrs[j],
+ for (const auto &func_ptr : vect_vect_recog_func_ptrs)
+   vect_pattern_recog_1 (vinfo, func_ptr,
  stmt_info);
}
 }


[gcc r15-2840] aarch64/testsuite: Fix if-compare_2.c for removing vcond{, u, eq} patterns [PR116041]

2024-08-08 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:7223c64745530db102a160d5a1db4c2c8d2b9fe1

commit r15-2840-g7223c64745530db102a160d5a1db4c2c8d2b9fe1
Author: Andrew Pinski 
Date:   Tue Aug 6 16:00:02 2024 -0700

aarch64/testsuite: Fix if-compare_2.c for removing vcond{,u,eq} patterns 
[PR116041]

For bar1 and bar2, we currently is expecting to use the bsl instruction but
with slightly different register allocation inside the loop (which happens 
after
the removal of the vcond{,u,eq} patterns), we get the bit instruction.  The 
pattern that
outputs bsl instruction will output bit and bif too depending register 
allocation.

So let's check for bsl, bit or bif instructions instead of just bsl 
instruction.

Tested on aarch64 both with an unmodified compiler and one which has the 
patch to disable
these optabs.

gcc/testsuite/ChangeLog:

PR testsuite/116041
* gcc.target/aarch64/if-compare_2.c: Support bit and bif for
both bar1 and bar2; add comment on why too.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/testsuite/gcc.target/aarch64/if-compare_2.c | 9 +++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/gcc/testsuite/gcc.target/aarch64/if-compare_2.c 
b/gcc/testsuite/gcc.target/aarch64/if-compare_2.c
index 14988abac459..f5a2b1956e38 100644
--- a/gcc/testsuite/gcc.target/aarch64/if-compare_2.c
+++ b/gcc/testsuite/gcc.target/aarch64/if-compare_2.c
@@ -8,6 +8,7 @@
 
 typedef int v4si __attribute__ ((vector_size (16)));
 
+
 /*
 **foo1:
 ** cmgtv0.4s, v1.4s, v0.4s
@@ -29,11 +30,13 @@ v4si foo2 (v4si a, v4si b, v4si c, v4si d) {
 }
 
 
+/* The bsl could be bit or bif depending on register
+   allocator inside the loop. */
 /**
 **bar1:
 **...
 ** cmgev[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s
-** bsl v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b
+** (bsl|bit|bif)   v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b
 ** and v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b
 **...
 */
@@ -44,11 +47,13 @@ void bar1 (int * restrict a, int * restrict b, int * 
restrict c,
 res[i] = ((a[i] < b[i]) & c[i]) | ((a[i] >= b[i]) & d[i]);
 }
 
+/* The bsl could be bit or bif depending on register
+   allocator inside the loop. */
 /**
 **bar2:
 **...
 ** cmgev[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s
-** bsl v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b
+** (bsl|bit|bif)   v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b
 **...
 */
 void bar2 (int * restrict a, int * restrict b, int * restrict c,


[gcc r15-2942] PHIOPT: Fix comment before factor_out_conditional_operation

2024-08-15 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:9381d52893a77edf2983d72b41f64063ee7cd4bd

commit r15-2942-g9381d52893a77edf2983d72b41f64063ee7cd4bd
Author: Andrew Pinski 
Date:   Sun Nov 5 19:27:51 2023 -0800

PHIOPT: Fix comment before factor_out_conditional_operation

I didn't update the comment before factor_out_conditional_operation
correctly. this updates it to be correct and mentions unary operations
rather than just conversions.

Pushed as obvious.

gcc/ChangeLog:

* tree-ssa-phiopt.cc (factor_out_conditional_operation): Update
comment.

Diff:
---
 gcc/tree-ssa-phiopt.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/tree-ssa-phiopt.cc b/gcc/tree-ssa-phiopt.cc
index f05ca727503..aa414f6 100644
--- a/gcc/tree-ssa-phiopt.cc
+++ b/gcc/tree-ssa-phiopt.cc
@@ -212,7 +212,7 @@ replace_phi_edge_with_variable (basic_block cond_block,
 }
 
 /* PR66726: Factor operations out of COND_EXPR.  If the arguments of the PHI
-   stmt are CONVERT_STMT, factor out the conversion and perform the conversion
+   stmt are Unary operator, factor out the operation and perform the operation
to the result of PHI stmt.  COND_STMT is the controlling predicate.
Return the newly-created PHI, if any.  */


[gcc r15-2946] aarch64: Improve popcount for bytes [PR113042]

2024-08-15 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:fcc3af9949880476c4ed01a98bd7f5d7f29b7b16

commit r15-2946-gfcc3af9949880476c4ed01a98bd7f5d7f29b7b16
Author: Andrew Pinski 
Date:   Mon Jun 10 00:39:54 2024 +

aarch64: Improve popcount for bytes [PR113042]

For popcount for bytes, we don't need the reduction addition
after the vector cnt instruction as we are only counting one
byte's popcount.
This changes the popcount extend to cover all ALLI rather than GPI.

Changes since v1:
* v2 - Use ALLI iterator and combine all into one pattern.
   Add new testcases popcnt[6-8].c.
* v3 - Simplify TARGET_CSSC path.
   Use convert_to_mode instead of gen_zero_extend* directly.
   Some other small cleanups.

Bootstrapped and tested on aarch64-linux-gnu with no regressions.

PR target/113042

gcc/ChangeLog:

* config/aarch64/aarch64.md (popcount2): Update pattern
to support ALLI modes.

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/popcnt5.c: New test.
* gcc.target/aarch64/popcnt6.c: New test.
* gcc.target/aarch64/popcnt7.c: New test.
* gcc.target/aarch64/popcnt8.c: New test.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/config/aarch64/aarch64.md  | 37 +++---
 gcc/testsuite/gcc.target/aarch64/popcnt5.c | 19 +++
 gcc/testsuite/gcc.target/aarch64/popcnt6.c | 19 +++
 gcc/testsuite/gcc.target/aarch64/popcnt7.c | 18 +++
 gcc/testsuite/gcc.target/aarch64/popcnt8.c | 18 +++
 5 files changed, 98 insertions(+), 13 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 665a333903c..12dcc16529a 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -5341,9 +5341,9 @@
 ;; MOV w0, v2.b[0]
 
 (define_expand "popcount2"
-  [(set (match_operand:GPI 0 "register_operand")
-   (popcount:GPI (match_operand:GPI 1 "register_operand")))]
-  "TARGET_CSSC || TARGET_SIMD"
+  [(set (match_operand:ALLI 0 "register_operand")
+   (popcount:ALLI (match_operand:ALLI 1 "register_operand")))]
+  "TARGET_CSSC ? GET_MODE_BITSIZE (mode) >= 32 : TARGET_SIMD"
 {
   if (!TARGET_CSSC)
 {
@@ -5351,18 +5351,29 @@
   rtx v1 = gen_reg_rtx (V8QImode);
   rtx in = operands[1];
   rtx out = operands[0];
-  if(mode == SImode)
-   {
- rtx tmp;
- tmp = gen_reg_rtx (DImode);
- /* If we have SImode, zero extend to DImode, pop count does
-not change if we have extra zeros. */
- emit_insn (gen_zero_extendsidi2 (tmp, in));
- in = tmp;
-   }
+  /* SImode and HImode should be zero extended to DImode.
+popcount does not change if we have extra zeros.  */
+  if (mode == SImode || mode == HImode)
+   in = convert_to_mode (DImode, in, true);
+
   emit_move_insn (v, gen_lowpart (V8QImode, in));
   emit_insn (gen_popcountv8qi2 (v1, v));
-  emit_insn (gen_aarch64_zero_extend_reduc_plus_v8qi (out, v1));
+  /* QImode, just extract from the v8qi vector.  */
+  if (mode == QImode)
+   emit_move_insn (out, gen_lowpart (QImode, v1));
+  /* HI and SI, reduction is zero extended to SImode. */
+  else if (mode == SImode || mode == HImode)
+   {
+ rtx out1 = gen_reg_rtx (SImode);
+ emit_insn (gen_aarch64_zero_extendsi_reduc_plus_v8qi (out1, v1));
+ emit_move_insn (out, gen_lowpart (mode, out1));
+   }
+  /* DImode, reduction is zero extended to DImode. */
+  else
+   {
+ gcc_assert (mode == DImode);
+ emit_insn (gen_aarch64_zero_extenddi_reduc_plus_v8qi (out, v1));
+   }
   DONE;
 }
 })
diff --git a/gcc/testsuite/gcc.target/aarch64/popcnt5.c 
b/gcc/testsuite/gcc.target/aarch64/popcnt5.c
new file mode 100644
index 000..406369d9b29
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/popcnt5.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+/* PR target/113042 */
+
+#pragma GCC target "+nocssc"
+
+/*
+** h8:
+** ldr b[0-9]+, \[x0\]
+** cnt v[0-9]+.8b, v[0-9]+.8b
+** smovw0, v[0-9]+.b\[0\]
+** ret
+*/
+/* We should not need the addv here since we only need a byte popcount. */
+
+unsigned h8 (const unsigned char *a) {
+ return __builtin_popcountg (a[0]);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/popcnt6.c 
b/gcc/testsuite/gcc.target/aarch64/popcnt6.c
new file mode 100644
index 000..e882cb24126
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/popcnt6.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+/* PR target/113042 */
+
+#pragma GCC target "+nocssc"
+
+/*
+** h8:
+** ldr h[0-9]+, \[x0\]
+** cnt v[0-9]+.8b, v[0-9]+.8b
+** addvb[0-9]+, v[

[gcc r15-2981] forwprop: Also dce from added statements from gimple_simplify

2024-08-18 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:3ae8794665ee7cbefee755d4b4be8d1ecb8c2a81

commit r15-2981-g3ae8794665ee7cbefee755d4b4be8d1ecb8c2a81
Author: Andrew Pinski 
Date:   Sat Aug 17 12:14:54 2024 -0700

forwprop: Also dce from added statements from gimple_simplify

This extends r14-3982-g9ea74d235c7e78 to also include the newly added 
statements
since some of them might be dead too (due to the way match and simplify 
works).
This was noticed while working on adding a new match and simplify pattern 
where a
new statement that got added was not being used.

Bootstrapped and tested on x86_64-linux-gnu with no regressions.

gcc/ChangeLog:

* gimple-fold.cc (mark_lhs_in_seq_for_dce): New function.
(replace_stmt_with_simplification): Call mark_lhs_in_seq_for_dce
right before inserting the sequence.
(fold_stmt_1): Add dce_worklist argument, update call to
replace_stmt_with_simplification.
(fold_stmt): Add dce_worklist argument, update call to fold_stmt_1.
(fold_stmt_inplace): Update call to fold_stmt_1.
* gimple-fold.h (fold_stmt): Add bitmap argument.
* tree-ssa-forwprop.cc (pass_forwprop::execute): Update call to 
fold_stmt.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/gimple-fold.cc   | 43 ---
 gcc/gimple-fold.h|  4 ++--
 gcc/tree-ssa-forwprop.cc |  2 +-
 3 files changed, 39 insertions(+), 10 deletions(-)

diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc
index 18d7a6b176d..0bec35d06f6 100644
--- a/gcc/gimple-fold.cc
+++ b/gcc/gimple-fold.cc
@@ -5914,6 +5914,24 @@ has_use_on_stmt (tree name, gimple *stmt)
   return false;
 }
 
+/* Add the lhs of each statement of SEQ to DCE_WORKLIST. */
+
+static void
+mark_lhs_in_seq_for_dce (bitmap dce_worklist, gimple_seq seq)
+{
+  if (!dce_worklist)
+return;
+
+  for (gimple_stmt_iterator i = gsi_start (seq);
+   !gsi_end_p (i); gsi_next (&i))
+{
+  gimple *stmt = gsi_stmt (i);
+  tree name = gimple_get_lhs (stmt);
+  if (name && TREE_CODE (name) == SSA_NAME)
+   bitmap_set_bit (dce_worklist, SSA_NAME_VERSION (name));
+}
+}
+
 /* Worker for fold_stmt_1 dispatch to pattern based folding with
gimple_simplify.
 
@@ -5924,7 +5942,8 @@ has_use_on_stmt (tree name, gimple *stmt)
 static bool
 replace_stmt_with_simplification (gimple_stmt_iterator *gsi,
  gimple_match_op *res_op,
- gimple_seq *seq, bool inplace)
+ gimple_seq *seq, bool inplace,
+ bitmap dce_worklist)
 {
   gimple *stmt = gsi_stmt (*gsi);
   tree *ops = res_op->ops;
@@ -5992,6 +6011,8 @@ replace_stmt_with_simplification (gimple_stmt_iterator 
*gsi,
  print_gimple_stmt (dump_file, gsi_stmt (*gsi),
 0, TDF_SLIM);
}
+  // Mark the lhs of the new statements maybe for dce
+  mark_lhs_in_seq_for_dce (dce_worklist, *seq);
   gsi_insert_seq_before (gsi, *seq, GSI_SAME_STMT);
   return true;
 }
@@ -6015,6 +6036,8 @@ replace_stmt_with_simplification (gimple_stmt_iterator 
*gsi,
  print_gimple_stmt (dump_file, gsi_stmt (*gsi),
 0, TDF_SLIM);
}
+ // Mark the lhs of the new statements maybe for dce
+ mark_lhs_in_seq_for_dce (dce_worklist, *seq);
  gsi_insert_seq_before (gsi, *seq, GSI_SAME_STMT);
  return true;
}
@@ -6032,6 +6055,8 @@ replace_stmt_with_simplification (gimple_stmt_iterator 
*gsi,
print_gimple_seq (dump_file, *seq, 0, TDF_SLIM);
  print_gimple_stmt (dump_file, gsi_stmt (*gsi), 0, TDF_SLIM);
}
+  // Mark the lhs of the new statements maybe for dce
+  mark_lhs_in_seq_for_dce (dce_worklist, *seq);
   gsi_insert_seq_before (gsi, *seq, GSI_SAME_STMT);
   return true;
 }
@@ -6047,6 +6072,8 @@ replace_stmt_with_simplification (gimple_stmt_iterator 
*gsi,
  fprintf (dump_file, "gimple_simplified to ");
  print_gimple_seq (dump_file, *seq, 0, TDF_SLIM);
}
+ // Mark the lhs of the new statements maybe for dce
+ mark_lhs_in_seq_for_dce (dce_worklist, *seq);
  gsi_replace_with_seq_vops (gsi, *seq);
  return true;
}
@@ -6214,7 +6241,8 @@ maybe_canonicalize_mem_ref_addr (tree *t, bool is_debug = 
false)
distinguishes both cases.  */
 
 static bool
-fold_stmt_1 (gimple_stmt_iterator *gsi, bool inplace, tree (*valueize) (tree))
+fold_stmt_1 (gimple_stmt_iterator *gsi, bool inplace, tree (*valueize) (tree),
+bitmap dce_worklist = nullptr)
 {
   bool changed = false;
   gimple *stmt = gsi_stmt (*gsi);
@@ -6382,7 +6410,8 @@ fold_stmt_1 (gimple_stmt_iterator *gsi, bool inplace, 
tree (*valueize) (tree))
   if (gimple_simplify (stmt, &res_op, inplace ? NULL : &seq,
 

[gcc r15-2986] PHIOPT: move factor_out_conditional_operation over to use gimple_match_op

2024-08-18 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:cd2f394418be0cc15d05c97ed72567f2f5e15172

commit r15-2986-gcd2f394418be0cc15d05c97ed72567f2f5e15172
Author: Andrew Pinski 
Date:   Sat Apr 20 00:13:12 2024 -0700

PHIOPT: move factor_out_conditional_operation over to use gimple_match_op

To start working on more with expressions with more than one operand, 
converting
over to use gimple_match_op is needed.
The added side-effect here is factor_out_conditional_operation can now 
support
builtins/internal calls that has one operand without any extra code added.

Note on the changed testcases:
* pr87007-5.c: the test was testing testing for avoiding partial register 
stalls
for the sqrt and making sure there is only one zero of the register before 
the
branch, the phiopt would now merge the sqrt's so disable phiopt.

Bootstrapped and tested on x86_64-linux-gnu with no regressions.

gcc/ChangeLog:

* gimple-match-exports.cc 
(gimple_match_op::operands_occurs_in_abnormal_phi):
New function.
* gimple-match.h (gimple_match_op): Add 
operands_occurs_in_abnormal_phi.
* tree-ssa-phiopt.cc (factor_out_conditional_operation): Use 
gimple_match_op
instead of manually extracting from/creating the gimple.

gcc/testsuite/ChangeLog:

* gcc.target/i386/pr87007-5.c: Disable phi-opt.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/gimple-match-exports.cc   | 14 +++
 gcc/gimple-match.h|  2 +
 gcc/testsuite/gcc.target/i386/pr87007-5.c |  5 ++-
 gcc/tree-ssa-phiopt.cc| 66 ++-
 4 files changed, 49 insertions(+), 38 deletions(-)

diff --git a/gcc/gimple-match-exports.cc b/gcc/gimple-match-exports.cc
index aacf3ff0414..15d54b7d843 100644
--- a/gcc/gimple-match-exports.cc
+++ b/gcc/gimple-match-exports.cc
@@ -126,6 +126,20 @@ gimple_match_op::resimplify (gimple_seq *seq, tree 
(*valueize)(tree))
 }
 }
 
+/* Returns true if any of the operands of THIS occurs
+   in abnormal phis. */
+bool
+gimple_match_op::operands_occurs_in_abnormal_phi() const
+{
+  for (unsigned int i = 0; i < num_ops; i++)
+{
+   if (TREE_CODE (ops[i]) == SSA_NAME
+  && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (ops[i]))
+   return true;
+}
+  return false;
+}
+
 /* Return whether T is a constant that we'll dispatch to fold to
evaluate fully constant expressions.  */
 
diff --git a/gcc/gimple-match.h b/gcc/gimple-match.h
index d710fcbace2..8edff578ba9 100644
--- a/gcc/gimple-match.h
+++ b/gcc/gimple-match.h
@@ -136,6 +136,8 @@ public:
 
   /* The operands to CODE.  Only the first NUM_OPS entries are meaningful.  */
   tree ops[MAX_NUM_OPS];
+
+  bool operands_occurs_in_abnormal_phi() const;
 };
 
 inline
diff --git a/gcc/testsuite/gcc.target/i386/pr87007-5.c 
b/gcc/testsuite/gcc.target/i386/pr87007-5.c
index 8f2dc947f6c..c696827df12 100644
--- a/gcc/testsuite/gcc.target/i386/pr87007-5.c
+++ b/gcc/testsuite/gcc.target/i386/pr87007-5.c
@@ -1,8 +1,11 @@
 /* { dg-do compile } */
-/* { dg-options "-Ofast -march=skylake-avx512 -mfpmath=sse -fno-tree-vectorize 
-fdump-tree-cddce3-details -fdump-tree-lsplit-optimized" } */
+/* { dg-options "-Ofast -march=skylake-avx512 -mfpmath=sse -fno-tree-vectorize 
-fdump-tree-cddce3-details -fdump-tree-lsplit-optimized -fno-ssa-phiopt" } */
 /* Load of d2/d3 is hoisted out, the loop is split, store of d1 and sqrt
are sunk out of the loop and the loop is elided.  One vsqrtsd with
memory operand needs a xor to avoid partial dependence.  */
+/* Phi-OPT needs to be disabled otherwise, sqrt calls are merged which is 
better
+   but we are testing to make sure the partial register stall for SSE is still 
avoided
+   for sqrts.  */
 
 #include
 
diff --git a/gcc/tree-ssa-phiopt.cc b/gcc/tree-ssa-phiopt.cc
index aa414f6..2d4aba5b087 100644
--- a/gcc/tree-ssa-phiopt.cc
+++ b/gcc/tree-ssa-phiopt.cc
@@ -220,13 +220,12 @@ static gphi *
 factor_out_conditional_operation (edge e0, edge e1, gphi *phi,
   tree arg0, tree arg1, gimple *cond_stmt)
 {
-  gimple *arg0_def_stmt = NULL, *arg1_def_stmt = NULL, *new_stmt;
-  tree new_arg0 = NULL_TREE, new_arg1 = NULL_TREE;
+  gimple *arg0_def_stmt = NULL, *arg1_def_stmt = NULL;
   tree temp, result;
   gphi *newphi;
   gimple_stmt_iterator gsi, gsi_for_def;
   location_t locus = gimple_location (phi);
-  enum tree_code op_code;
+  gimple_match_op arg0_op, arg1_op;
 
   /* Handle only PHI statements with two arguments.  TODO: If all
  other arguments to PHI are INTEGER_CST or if their defining
@@ -250,31 +249,31 @@ factor_out_conditional_operation (edge e0, edge e1, gphi 
*phi,
   /* Check if arg0 is an SSA_NAME and the stmt which defines arg0 is
  an unary operation.  */
   arg0_def_stmt = SSA_NAME_DEF_STMT (arg0);
-  if (!is_gimple_assign (arg0_def_stmt)
-  || (gimple_assign_rhs_class (arg0_def_stmt) != GIMPLE_UNARY_RHS

[gcc r15-139] Fix printing COMPOUND_EXPR in .original [PR23872]

2024-05-03 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:04f24e44fb14a22516444f70503719f3fda15d6c

commit r15-139-g04f24e44fb14a22516444f70503719f3fda15d6c
Author: Andrew Pinski 
Date:   Tue Apr 16 17:43:36 2024 -0700

Fix printing COMPOUND_EXPR in .original [PR23872]

Starting with the merge of the openmp branch into the trunk
(r0-73077-g953ff28998b59b), COMPOUND_EXPR started to be printed
as `expr; , expr` which is wrong. This was due to the wrong
conversion of dumping_stmts into `!(flags & TDF_SLIM)`. That is wrong
as we are not dumping stmts at this point (`!(flags & TDF_SLIM)` was always
true for this case as TDF_SLIM case was handled before hand). So switch it
to be always false.

Bootstrapped and tested on x86_64-linux-gnu with no regressions.

gcc/ChangeLog:

PR middle-end/23872
* tree-pretty-print.cc (dump_generic_node ): Fix
calls to dump_generic_node and also remove unreachable code that is 
testing
`flags & TDF_SLIM`.

gcc/testsuite/ChangeLog:

* gfortran.dg/gomp/atomic-21.f90: Update testcase for the removal 
of `;`.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/testsuite/gfortran.dg/gomp/atomic-21.f90 |  4 ++--
 gcc/tree-pretty-print.cc | 24 +++-
 2 files changed, 9 insertions(+), 19 deletions(-)

diff --git a/gcc/testsuite/gfortran.dg/gomp/atomic-21.f90 
b/gcc/testsuite/gfortran.dg/gomp/atomic-21.f90
index febcdbbacfb..35099294d7a 100644
--- a/gcc/testsuite/gfortran.dg/gomp/atomic-21.f90
+++ b/gcc/testsuite/gfortran.dg/gomp/atomic-21.f90
@@ -56,7 +56,7 @@ subroutine foobar()
 endif
 
 !  TARGET_EXPR  = #pragma omp atomic capture acq_rel
-!TARGET_EXPR  = NON_LVALUE_EXPR  = 
*TARGET_EXPR  == oo> ? pp : *TARGET_EXPR ;, if 
(TARGET_EXPR )
+!TARGET_EXPR  = NON_LVALUE_EXPR  = 
*TARGET_EXPR  == oo> ? pp : *TARGET_EXPR , if 
(TARGET_EXPR )
 !{
 !  <<< Unknown tree: void_cst >>>
 !}
@@ -66,7 +66,7 @@ subroutine foobar()
 !};
 !
 ! { dg-final { scan-tree-dump-times "TARGET_EXPR  = #pragma omp 
atomic capture acq_rel" 1 "original" } }
-! { dg-final { scan-tree-dump-times "TARGET_EXPR  = 
NON_LVALUE_EXPR  = \\*TARGET_EXPR  
== oo> \\? pp : \\*TARGET_EXPR ;, if \\(TARGET_EXPR 
\\)" 1 "original" } }
+! { dg-final { scan-tree-dump-times "TARGET_EXPR  = 
NON_LVALUE_EXPR  = \\*TARGET_EXPR  
== oo> \\? pp : \\*TARGET_EXPR , if \\(TARGET_EXPR 
\\)" 1 "original" } }
 ! { dg-final { scan-tree-dump-times "<<< Unknown tree: void_cst >>>" 1 
"original" } }
 ! { dg-final { scan-tree-dump-times "qq = TARGET_EXPR ;" 1 
"original" } }
 
diff --git a/gcc/tree-pretty-print.cc b/gcc/tree-pretty-print.cc
index c935a7da7d1..f9ad8562078 100644
--- a/gcc/tree-pretty-print.cc
+++ b/gcc/tree-pretty-print.cc
@@ -2856,31 +2856,21 @@ dump_generic_node (pretty_printer *pp, tree node, int 
spc, dump_flags_t flags,
  }
 
dump_generic_node (pp, TREE_OPERAND (node, 0),
-  spc, flags, !(flags & TDF_SLIM));
-   if (flags & TDF_SLIM)
- newline_and_indent (pp, spc);
-   else
- {
-   pp_comma (pp);
-   pp_space (pp);
- }
+  spc, flags, false);
+   pp_comma (pp);
+   pp_space (pp);
 
for (tp = &TREE_OPERAND (node, 1);
 TREE_CODE (*tp) == COMPOUND_EXPR;
 tp = &TREE_OPERAND (*tp, 1))
  {
dump_generic_node (pp, TREE_OPERAND (*tp, 0),
-  spc, flags, !(flags & TDF_SLIM));
-   if (flags & TDF_SLIM)
- newline_and_indent (pp, spc);
-   else
- {
-   pp_comma (pp);
-   pp_space (pp);
- }
+  spc, flags, false);
+   pp_comma (pp);
+   pp_space (pp);
  }
 
-   dump_generic_node (pp, *tp, spc, flags, !(flags & TDF_SLIM));
+   dump_generic_node (pp, *tp, spc, flags, false);
   }
   break;


[gcc r15-166] Remove m_nloops field from loop_versioning

2024-05-04 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:ffb521f157fec823907913602ed5cb73dd1c63e8

commit r15-166-gffb521f157fec823907913602ed5cb73dd1c63e8
Author: Andrew Pinski 
Date:   Sat Apr 27 18:54:43 2024 -0700

Remove m_nloops field from loop_versioning

This is a small cleanup of loop_versioning where m_nloops
is only used in the constructor so we can remove the whole
field.

Bootstrapped and tested on x86_64-linux-gnu.

gcc/ChangeLog:

* gimple-loop-versioning.cc (loop_versioning): Remove m_nloops 
field.
(loop_versioning::loop_versioning): Remove initialization of
m_nloops field and move it to be a local variable.
(loop_versioning::analyze_blocks): Fix formating.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/gimple-loop-versioning.cc | 7 ++-
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/gcc/gimple-loop-versioning.cc b/gcc/gimple-loop-versioning.cc
index 17877f06921..adea207659b 100644
--- a/gcc/gimple-loop-versioning.cc
+++ b/gcc/gimple-loop-versioning.cc
@@ -322,9 +322,6 @@ private:
   /* An obstack to use for general allocation.  */
   obstack m_obstack;
 
-  /* The number of loops in the function.  */
-  unsigned int m_nloops;
-
   /* The total number of loop version conditions we've found.  */
   unsigned int m_num_conditions;
 
@@ -525,10 +522,10 @@ loop_versioning::name_prop::value_of_expr (tree val, 
gimple *)
 
 loop_versioning::loop_versioning (function *fn)
   : m_fn (fn),
-m_nloops (number_of_loops (fn)),
 m_num_conditions (0),
 m_address_table (31)
 {
+  unsigned m_nloops = number_of_loops (fn);
   bitmap_obstack_initialize (&m_bitmap_obstack);
   gcc_obstack_init (&m_obstack);
 
@@ -1437,7 +1434,7 @@ loop_versioning::analyze_blocks ()
  {
linfo.rejected_p = true;
break;
-   }
+ }
 
  if (!linfo.rejected_p)
{


[gcc r15-212] aarch64: Fix gcc.target/aarch64/sve/loop_add_6.c for LLP64 targets

2024-05-06 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:53026cbf08889d00fed34d8667796d22ef8554cf

commit r15-212-g53026cbf08889d00fed34d8667796d22ef8554cf
Author: Andrew Pinski 
Date:   Mon May 6 12:20:17 2024 -0700

aarch64: Fix gcc.target/aarch64/sve/loop_add_6.c for LLP64 targets

Even though the aarch64-mingw32 support has not been committed yet,
we should fix some of the testcases. In this case 
gcc.target/aarch64/sve/loop_add_6.c
is easy to fix. We should use __SIZETYPE__ instead of `unsigned long` for 
the variables
that will be used for pointer plus.

Committed as obvious after a quick test on aarch64-linux-gnu.

gcc/testsuite/ChangeLog:

PR testsuite/114177
* gcc.target/aarch64/sve/loop_add_6.c: Use __SIZETYPE__ instead
of `unsigned long` for index and offset variables.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/testsuite/gcc.target/aarch64/sve/loop_add_6.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/gcc/testsuite/gcc.target/aarch64/sve/loop_add_6.c 
b/gcc/testsuite/gcc.target/aarch64/sve/loop_add_6.c
index e7416ebcded..a530998f54b 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/loop_add_6.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/loop_add_6.c
@@ -5,8 +5,8 @@ double __GIMPLE (ssa, startwith("loop"))
 neg_xi (double *x)
 {
   int i;
-  long unsigned int index;
-  long unsigned int offset;
+  __SIZETYPE__ index;
+  __SIZETYPE__ offset;
   double * xi_ptr;
   double xi;
   double neg_xi;
@@ -20,8 +20,8 @@ neg_xi (double *x)
   res_1 = __PHI (__BB5: 0.0, __BB3: res_2);
   i_4 = __PHI (__BB5: 0, __BB3: i_5);
   ivtmp_6 = __PHI (__BB5: 100U, __BB3: ivtmp_7);
-  index = (long unsigned int) i_4;
-  offset = index * 8UL;
+  index = (__SIZETYPE__ ) i_4;
+  offset = index * _Literal (__SIZETYPE__) 8;
   xi_ptr = x_8(D) + offset;
   xi = *xi_ptr;
   neg_xi = -xi;


[gcc r15-237] Mention that some options are turned on by `-Ofast` in their descriptions [PR97263]

2024-05-07 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:0920e64ffa55adb063174559b274b0a521f34934

commit r15-237-g0920e64ffa55adb063174559b274b0a521f34934
Author: Andrew Pinski 
Date:   Mon May 6 14:14:41 2024 -0700

Mention that some options are turned on by `-Ofast` in their descriptions 
[PR97263]

Like was done for -ffast-math in r0-105946-ga570fc16fa8056, we should
document that -Ofast enables -fmath-errno, -funsafe-math-optimizations,
-finite-math-only, -fno-trapping-math in their documentation.

Note this changes the stronger "must not" to be "is not" for 
-fno-trapping-math
since we do enable it for -Ofast already.

gcc/ChangeLog:

PR middle-end/97263
* doc/invoke.texi(fmath-errno): Document it is turned on
with -Ofast.
(funsafe-math-optimizations): Likewise.
(ffinite-math-only): Likewise.
(fno-trapping-math): Likewise and use less strong language.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/doc/invoke.texi | 41 ++---
 1 file changed, 22 insertions(+), 19 deletions(-)

diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index dc4c5a3189d..ed03a613b4b 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -14996,11 +14996,12 @@ with a single instruction, e.g., @code{sqrt}.  A 
program that relies on
 IEEE exceptions for math error handling may want to use this flag
 for speed while maintaining IEEE arithmetic compatibility.
 
-This option is not turned on by any @option{-O} option since
-it can result in incorrect output for programs that depend on
-an exact implementation of IEEE or ISO rules/specifications for
-math functions. It may, however, yield faster code for programs
-that do not require the guarantees of these specifications.
+This option is not turned on by any @option{-O} option  besides
+@option{-Ofast} since it can result in incorrect output for
+programs that depend on an exact implementation of IEEE or
+ISO rules/specifications for math functions. It may, however,
+yield faster code for programs that do not require the guarantees
+of these specifications.
 
 The default is @option{-fmath-errno}.
 
@@ -15017,11 +15018,12 @@ ANSI standards.  When used at link time, it may 
include libraries
 or startup files that change the default FPU control word or other
 similar optimizations.
 
-This option is not turned on by any @option{-O} option since
-it can result in incorrect output for programs that depend on
-an exact implementation of IEEE or ISO rules/specifications for
-math functions. It may, however, yield faster code for programs
-that do not require the guarantees of these specifications.
+This option is not turned on by any @option{-O} option besides
+@option{-Ofast} since it can result in incorrect output
+for programs that depend on an exact implementation of IEEE
+or ISO rules/specifications for math functions. It may, however,
+yield faster code for programs that do not require the guarantees
+of these specifications.
 Enables @option{-fno-signed-zeros}, @option{-fno-trapping-math},
 @option{-fassociative-math} and @option{-freciprocal-math}.
 
@@ -15061,11 +15063,12 @@ The default is @option{-fno-reciprocal-math}.
 Allow optimizations for floating-point arithmetic that assume
 that arguments and results are not NaNs or +-Infs.
 
-This option is not turned on by any @option{-O} option since
-it can result in incorrect output for programs that depend on
-an exact implementation of IEEE or ISO rules/specifications for
-math functions. It may, however, yield faster code for programs
-that do not require the guarantees of these specifications.
+This option is not turned on by any @option{-O} option besides
+@option{-Ofast} since it can result in incorrect output
+for programs that depend on an exact implementation of IEEE or
+ISO rules/specifications for math functions. It may, however,
+yield faster code for programs that do not require the guarantees
+of these specifications.
 
 The default is @option{-fno-finite-math-only}.
 
@@ -15089,10 +15092,10 @@ underflow, inexact result and invalid operation.  
This option requires
 that @option{-fno-signaling-nans} be in effect.  Setting this option may
 allow faster code if one relies on ``non-stop'' IEEE arithmetic, for example.
 
-This option should never be turned on by any @option{-O} option since
-it can result in incorrect output for programs that depend on
-an exact implementation of IEEE or ISO rules/specifications for
-math functions.
+This option is not turned on by any @option{-O} option besides
+@option{-Ofast} since it can result in incorrect output for programs
+that depend on an exact implementation of IEEE or ISO rules/specifications
+for math functions.
 
 The default is @option{-ftrapping-math}.


[gcc r15-307] MATCH: Add some more value_replacement simplifications (a != 0 ? expr : 0) to match

2024-05-07 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:e472527c7b45d23e8dfd0fb767a6e663b4bc136e

commit r15-307-ge472527c7b45d23e8dfd0fb767a6e663b4bc136e
Author: Andrew Pinski 
Date:   Tue Apr 30 14:45:26 2024 -0700

MATCH: Add some more value_replacement simplifications (a != 0 ? expr : 0) 
to match

This adds a few more of what is currently done in phiopt's value_replacement
to match. I noticed this when I was hooking up phiopt's value_replacement
code to use match and disabling the old code. But this can be done
independently from the hooking up phiopt's value_replacement as phiopt
is already hooked up for simplified versions already.

/* a != 0 ? a / b : 0  -> a / b iff b is nonzero. */
/* a != 0 ? a * b : 0 -> a * b */
/* a != 0 ? a & b : 0 -> a & b */

We prefer the `cond ? a : 0` forms to allow optimization of `a * cond` which
uses that form.

Bootstrapped and tested on x86_64-linux-gnu with no regressions.

PR tree-optimization/114894

gcc/ChangeLog:

* match.pd (`a != 0 ? a / b : 0`): New pattern.
(`a != 0 ? a * b : 0`): New pattern.
(`a != 0 ? a & b : 0`): New pattern.

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/phi-opt-value-5.c: New test.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/match.pd| 18 
 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-value-5.c | 39 +
 2 files changed, 57 insertions(+)

diff --git a/gcc/match.pd b/gcc/match.pd
index d401e7503e62..03a03c31233c 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -4290,6 +4290,24 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   (cond (eq @0 integer_all_onesp) @1 (op:c@2 @1 @0))
@2))
 
+/* a != 0 ? a / b : 0  -> a / b iff b is nonzero. */
+(for op (trunc_div ceil_div floor_div round_div exact_div)
+ (simplify
+  (cond (ne @0 integer_zerop) (op@2 @3 @1) integer_zerop )
+   (if (bitwise_equal_p (@0, @3)
+&& tree_expr_nonzero_p (@1))
+@2)))
+
+/* Note we prefer the != case here
+   as (a != 0) * (a * b) will generate that version. */
+/* a != 0 ? a * b : 0 -> a * b */
+/* a != 0 ? a & b : 0 -> a & b */
+(for op (mult bit_and)
+ (simplify
+  (cond (ne @0 integer_zerop) (op:c@2 @1 @3) integer_zerop)
+  (if (bitwise_equal_p (@0, @3))
+   @2)))
+
 /* Simplifications of shift and rotates.  */
 
 (for rotate (lrotate rrotate)
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-value-5.c 
b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-value-5.c
new file mode 100644
index ..8062eb19b113
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-value-5.c
@@ -0,0 +1,39 @@
+/* { dg-do compile } */
+/* PR treee-optimization/114894 */
+/* Phi-OPT should be able to optimize these without sinking being invoked. */
+/* { dg-options "-O -fdump-tree-phiopt2 -fdump-tree-phiopt3 
-fdump-tree-optimized -fno-tree-sink" } */
+
+int fmul1(int a, int b)
+{
+  int c = a * b;
+  if (a != 0)
+return c;
+  return 0;
+}
+
+
+int fand1(int a, int b)
+{
+  int c = a & b;
+  if (a != 0)
+return c;
+  return 0;
+}
+
+
+void g(int);
+
+int fdiv1(int a, int b)
+{
+  int d = b|1;
+  g(d);
+  int c = a / d;
+  return a != 0 ? c : 0;
+}
+
+/* fdiv1 requires until later than phiopt2 to be able to detect that
+   d is non-zero. to be able to remove the conditional.  */
+/* { dg-final { scan-tree-dump-times "goto" 2 "phiopt2" } } */
+/* { dg-final { scan-tree-dump-not "goto" "phiopt3" } } */
+/* { dg-final { scan-tree-dump-not "goto" "optimized" } } */
+


[gcc r15-308] DCE __cxa_atexit calls where the function is pure/const [PR19661]

2024-05-07 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:c9dd853680b12d9c9def5de61abde5d057c526ba

commit r15-308-gc9dd853680b12d9c9def5de61abde5d057c526ba
Author: Andrew Pinski 
Date:   Fri Mar 15 16:34:22 2024 -0700

DCE __cxa_atexit calls where the function is pure/const [PR19661]

In C++ sometimes you have a deconstructor function which is "empty", like 
for an
example with unions or with arrays.  The front-end might not know it is 
empty either
so this should be done on during optimization.o
To implement it I added it to DCE where we mark if a statement is necessary 
or not.

Bootstrapped and tested on x86_64-linux-gnu with no regressions.

Changes since v1:
  * v2: Add support for __aeabi_atexit for arm-*eabi. Add extra comments.
Add cxa_atexit-5.C testcase for -fPIC case.
  * v3: Fix testcases for the __aeabi_atexit (forgot to do in the v2).

PR tree-optimization/19661

gcc/ChangeLog:

* tree-ssa-dce.cc (is_cxa_atexit): New function.
(is_removable_cxa_atexit_call): New function.
(mark_stmt_if_obviously_necessary): Don't mark removable
cxa_at_exit calls.
(mark_all_reaching_defs_necessary_1): Likewise.
(propagate_necessity): Likewise.

gcc/testsuite/ChangeLog:

* g++.dg/tree-ssa/cxa_atexit-1.C: New test.
* g++.dg/tree-ssa/cxa_atexit-2.C: New test.
* g++.dg/tree-ssa/cxa_atexit-3.C: New test.
* g++.dg/tree-ssa/cxa_atexit-4.C: New test.
* g++.dg/tree-ssa/cxa_atexit-5.C: New test.
* g++.dg/tree-ssa/cxa_atexit-6.C: New test.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/testsuite/g++.dg/tree-ssa/cxa_atexit-1.C | 20 ++
 gcc/testsuite/g++.dg/tree-ssa/cxa_atexit-2.C | 21 ++
 gcc/testsuite/g++.dg/tree-ssa/cxa_atexit-3.C | 19 +
 gcc/testsuite/g++.dg/tree-ssa/cxa_atexit-4.C | 20 ++
 gcc/testsuite/g++.dg/tree-ssa/cxa_atexit-5.C | 39 +++
 gcc/testsuite/g++.dg/tree-ssa/cxa_atexit-6.C | 24 
 gcc/tree-ssa-dce.cc  | 58 
 7 files changed, 201 insertions(+)

diff --git a/gcc/testsuite/g++.dg/tree-ssa/cxa_atexit-1.C 
b/gcc/testsuite/g++.dg/tree-ssa/cxa_atexit-1.C
new file mode 100644
index ..82ff3d2b7783
--- /dev/null
+++ b/gcc/testsuite/g++.dg/tree-ssa/cxa_atexit-1.C
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-cddce1-details -fdump-tree-optimized" } */
+// { dg-require-effective-target cxa_atexit }
+/* PR tree-optimization/19661 */
+
+/* The call to axexit should be removed as A::~A() is a pure/const function 
call
+   and there is no visible effect if A::~A() call does not happen.  */
+
+struct A { 
+A(); 
+~A() {} 
+}; 
+ 
+void foo () { 
+  static A a; 
+} 
+
+/* { dg-final { scan-tree-dump-times "Deleting : 
(?:__cxxabiv1::__cxa_atexit|__aeabiv1::__aeabi_atexit)" 1 "cddce1" } } */
+/* { dg-final { scan-tree-dump-not "__cxa_atexit|__aeabi_atexit" "optimized" } 
} */
+
diff --git a/gcc/testsuite/g++.dg/tree-ssa/cxa_atexit-2.C 
b/gcc/testsuite/g++.dg/tree-ssa/cxa_atexit-2.C
new file mode 100644
index ..726b6d7f1561
--- /dev/null
+++ b/gcc/testsuite/g++.dg/tree-ssa/cxa_atexit-2.C
@@ -0,0 +1,21 @@
+/* { dg-do compile { target c++11 } } */
+/* { dg-options "-O2 -fdump-tree-cddce1-details -fdump-tree-optimized" } */
+// { dg-require-effective-target cxa_atexit }
+/* PR tree-optimization/19661 */
+
+/* The call to axexit should be not removed as A::~A() as it marked with 
noipa.  */
+
+struct A { 
+A(); 
+~A();
+}; 
+
+[[gnu::noipa]] A::~A() {}
+ 
+void foo () { 
+  static A a; 
+} 
+
+/* { dg-final { scan-tree-dump-not "Deleting : 
(?:__cxxabiv1::__cxa_atexit|__aeabiv1::__aeabi_atexit)" "cddce1" } } */
+/* { dg-final { scan-tree-dump-times "(?:__cxa_atexit|__aeabi_atexit)" 1 
"optimized" } } */
+
diff --git a/gcc/testsuite/g++.dg/tree-ssa/cxa_atexit-3.C 
b/gcc/testsuite/g++.dg/tree-ssa/cxa_atexit-3.C
new file mode 100644
index ..42cc7ccb11ba
--- /dev/null
+++ b/gcc/testsuite/g++.dg/tree-ssa/cxa_atexit-3.C
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-cddce1-details -fdump-tree-optimized" } */
+// { dg-require-effective-target cxa_atexit }
+/* PR tree-optimization/19661 */
+
+/* We should not remove the call to atexit as A::~A is unknown.  */
+
+struct A { 
+A(); 
+~A();
+}; 
+
+void foo () { 
+  static A a; 
+} 
+
+/* { dg-final { scan-tree-dump-not "Deleting : 
(?:__cxxabiv1::__cxa_atexit|__aeabiv1::__aeabi_atexit)" "cddce1" } } */
+/* { dg-final { scan-tree-dump-times "(?:__cxa_atexit|__aeabi_atexit)" 1 
"optimized" } } */
+
diff --git a/gcc/testsuite/g++.dg/tree-ssa/cxa_atexit-4.C 
b/gcc/testsuite/g++.dg/tree-ssa/cxa_atexit-4.C
new file mode 100644
index ..591c1c0552a1
--- /dev/null
+++ b/gcc/testsuite/g++.dg/tree-ssa/cxa_atexit-4.C
@@ -0,0 +1,20 @@
+/* { dg-do com

[gcc r15-309] c++/c-common: Fix convert_vector_to_array_for_subscript for qualified vector types [PR89224]

2024-05-07 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:4421d35167b3083e0f2e4c84c91fded09a30cf22

commit r15-309-g4421d35167b3083e0f2e4c84c91fded09a30cf22
Author: Andrew Pinski 
Date:   Tue Feb 20 13:38:28 2024 -0800

c++/c-common: Fix convert_vector_to_array_for_subscript for qualified 
vector types [PR89224]

After r7-987-gf17a223de829cb, the access for the elements of a vector type 
would lose the qualifiers.
So if we had `constvector[0]`, the type of the element of the array would 
not have const on it.
This was due to a missing build_qualified_type for the inner type of the 
vector when building the array type.
We need to add back the call to build_qualified_type and now the access has 
the correct qualifiers. So the
overloads and even if it is a lvalue or rvalue is correctly done.

Note we correctly now reject the testcase gcc.dg/pr83415.c which was 
incorrectly accepted after r7-987-gf17a223de829cb.

Built and tested for aarch64-linux-gnu.

PR c++/89224

gcc/c-family/ChangeLog:

* c-common.cc (convert_vector_to_array_for_subscript): Call 
build_qualified_type
for the inner type.

gcc/cp/ChangeLog:

* constexpr.cc (cxx_eval_array_reference): Compare main variants
for the vector/array types instead of the types directly.

gcc/testsuite/ChangeLog:

* g++.dg/torture/vector-subaccess-1.C: New test.
* gcc.dg/pr83415.c: Change warning to error.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/c-family/c-common.cc  |  7 ++-
 gcc/cp/constexpr.cc   |  3 ++-
 gcc/testsuite/g++.dg/torture/vector-subaccess-1.C | 23 +++
 gcc/testsuite/gcc.dg/pr83415.c|  2 +-
 4 files changed, 32 insertions(+), 3 deletions(-)

diff --git a/gcc/c-family/c-common.cc b/gcc/c-family/c-common.cc
index 032dcb4b41d5..aae998d0f738 100644
--- a/gcc/c-family/c-common.cc
+++ b/gcc/c-family/c-common.cc
@@ -8964,6 +8964,7 @@ convert_vector_to_array_for_subscript (location_t loc,
   if (gnu_vector_type_p (TREE_TYPE (*vecp)))
 {
   tree type = TREE_TYPE (*vecp);
+  tree newitype;
 
   ret = !lvalue_p (*vecp);
 
@@ -8978,8 +8979,12 @@ convert_vector_to_array_for_subscript (location_t loc,
 for function parameters.  */
   c_common_mark_addressable_vec (*vecp);
 
+  /* Make sure qualifiers are copied from the vector type to the new 
element
+of the array type.  */
+  newitype = build_qualified_type (TREE_TYPE (type), TYPE_QUALS (type));
+
   *vecp = build1 (VIEW_CONVERT_EXPR,
- build_array_type_nelts (TREE_TYPE (type),
+ build_array_type_nelts (newitype,
  TYPE_VECTOR_SUBPARTS (type)),
  *vecp);
 }
diff --git a/gcc/cp/constexpr.cc b/gcc/cp/constexpr.cc
index 50f799d7ff7c..bd72533491e5 100644
--- a/gcc/cp/constexpr.cc
+++ b/gcc/cp/constexpr.cc
@@ -4424,7 +4424,8 @@ cxx_eval_array_reference (const constexpr_ctx *ctx, tree 
t,
   if (!lval
   && TREE_CODE (ary) == VIEW_CONVERT_EXPR
   && VECTOR_TYPE_P (TREE_TYPE (TREE_OPERAND (ary, 0)))
-  && TREE_TYPE (t) == TREE_TYPE (TREE_TYPE (TREE_OPERAND (ary, 0
+  && (TYPE_MAIN_VARIANT (TREE_TYPE (t))
+ == TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (TREE_OPERAND (ary, 0))
 ary = TREE_OPERAND (ary, 0);
 
   tree oldidx = TREE_OPERAND (t, 1);
diff --git a/gcc/testsuite/g++.dg/torture/vector-subaccess-1.C 
b/gcc/testsuite/g++.dg/torture/vector-subaccess-1.C
new file mode 100644
index ..0c8958a4e034
--- /dev/null
+++ b/gcc/testsuite/g++.dg/torture/vector-subaccess-1.C
@@ -0,0 +1,23 @@
+/* PR c++/89224 */
+
+/* The access of `vector[i]` has the same qualifiers as the original
+   vector which was missing. */
+
+typedef __attribute__((vector_size(16))) unsigned char  Int8x8_t;
+
+template 
+void g(T &x) {
+__builtin_abort();
+}
+template 
+void g(const T &x) {
+  __builtin_exit(0);
+}
+void f(const Int8x8_t x) {
+  g(x[0]);
+}
+int main(void)
+{
+Int8x8_t x ={};
+f(x);
+}
diff --git a/gcc/testsuite/gcc.dg/pr83415.c b/gcc/testsuite/gcc.dg/pr83415.c
index 5934c16d97cb..2fc85031505d 100644
--- a/gcc/testsuite/gcc.dg/pr83415.c
+++ b/gcc/testsuite/gcc.dg/pr83415.c
@@ -7,6 +7,6 @@ int
 main (int argc, short *argv[])
 {
   int i = argc;
-  y[i] = 7 - i; /* { dg-warning "read-only" } */
+  y[i] = 7 - i; /* { dg-error "read-only" } */
   return 0;
 }


[gcc r14-10183] c++/c-common: Fix convert_vector_to_array_for_subscript for qualified vector types [PR89224]

2024-05-07 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:cacc48014c7fdb888b4449830b567e5375dfb4e3

commit r14-10183-gcacc48014c7fdb888b4449830b567e5375dfb4e3
Author: Andrew Pinski 
Date:   Tue Feb 20 13:38:28 2024 -0800

c++/c-common: Fix convert_vector_to_array_for_subscript for qualified 
vector types [PR89224]

After r7-987-gf17a223de829cb, the access for the elements of a vector type 
would lose the qualifiers.
So if we had `constvector[0]`, the type of the element of the array would 
not have const on it.
This was due to a missing build_qualified_type for the inner type of the 
vector when building the array type.
We need to add back the call to build_qualified_type and now the access has 
the correct qualifiers. So the
overloads and even if it is a lvalue or rvalue is correctly done.

Note we correctly now reject the testcase gcc.dg/pr83415.c which was 
incorrectly accepted after r7-987-gf17a223de829cb.

Built and tested for aarch64-linux-gnu.

PR c++/89224

gcc/c-family/ChangeLog:

* c-common.cc (convert_vector_to_array_for_subscript): Call 
build_qualified_type
for the inner type.

gcc/cp/ChangeLog:

* constexpr.cc (cxx_eval_array_reference): Compare main variants
for the vector/array types instead of the types directly.

gcc/testsuite/ChangeLog:

* g++.dg/torture/vector-subaccess-1.C: New test.
* gcc.dg/pr83415.c: Change warning to error.

Signed-off-by: Andrew Pinski 
(cherry picked from commit 4421d35167b3083e0f2e4c84c91fded09a30cf22)

Diff:
---
 gcc/c-family/c-common.cc  |  7 ++-
 gcc/cp/constexpr.cc   |  3 ++-
 gcc/testsuite/g++.dg/torture/vector-subaccess-1.C | 23 +++
 gcc/testsuite/gcc.dg/pr83415.c|  2 +-
 4 files changed, 32 insertions(+), 3 deletions(-)

diff --git a/gcc/c-family/c-common.cc b/gcc/c-family/c-common.cc
index 01e3d247fc28..d14591c7bd3b 100644
--- a/gcc/c-family/c-common.cc
+++ b/gcc/c-family/c-common.cc
@@ -8959,6 +8959,7 @@ convert_vector_to_array_for_subscript (location_t loc,
   if (gnu_vector_type_p (TREE_TYPE (*vecp)))
 {
   tree type = TREE_TYPE (*vecp);
+  tree newitype;
 
   ret = !lvalue_p (*vecp);
 
@@ -8973,8 +8974,12 @@ convert_vector_to_array_for_subscript (location_t loc,
 for function parameters.  */
   c_common_mark_addressable_vec (*vecp);
 
+  /* Make sure qualifiers are copied from the vector type to the new 
element
+of the array type.  */
+  newitype = build_qualified_type (TREE_TYPE (type), TYPE_QUALS (type));
+
   *vecp = build1 (VIEW_CONVERT_EXPR,
- build_array_type_nelts (TREE_TYPE (type),
+ build_array_type_nelts (newitype,
  TYPE_VECTOR_SUBPARTS (type)),
  *vecp);
 }
diff --git a/gcc/cp/constexpr.cc b/gcc/cp/constexpr.cc
index 8078b31544d1..4a5444e0258a 100644
--- a/gcc/cp/constexpr.cc
+++ b/gcc/cp/constexpr.cc
@@ -4430,7 +4430,8 @@ cxx_eval_array_reference (const constexpr_ctx *ctx, tree 
t,
   if (!lval
   && TREE_CODE (ary) == VIEW_CONVERT_EXPR
   && VECTOR_TYPE_P (TREE_TYPE (TREE_OPERAND (ary, 0)))
-  && TREE_TYPE (t) == TREE_TYPE (TREE_TYPE (TREE_OPERAND (ary, 0
+  && (TYPE_MAIN_VARIANT (TREE_TYPE (t))
+ == TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (TREE_OPERAND (ary, 0))
 ary = TREE_OPERAND (ary, 0);
 
   tree oldidx = TREE_OPERAND (t, 1);
diff --git a/gcc/testsuite/g++.dg/torture/vector-subaccess-1.C 
b/gcc/testsuite/g++.dg/torture/vector-subaccess-1.C
new file mode 100644
index ..0c8958a4e034
--- /dev/null
+++ b/gcc/testsuite/g++.dg/torture/vector-subaccess-1.C
@@ -0,0 +1,23 @@
+/* PR c++/89224 */
+
+/* The access of `vector[i]` has the same qualifiers as the original
+   vector which was missing. */
+
+typedef __attribute__((vector_size(16))) unsigned char  Int8x8_t;
+
+template 
+void g(T &x) {
+__builtin_abort();
+}
+template 
+void g(const T &x) {
+  __builtin_exit(0);
+}
+void f(const Int8x8_t x) {
+  g(x[0]);
+}
+int main(void)
+{
+Int8x8_t x ={};
+f(x);
+}
diff --git a/gcc/testsuite/gcc.dg/pr83415.c b/gcc/testsuite/gcc.dg/pr83415.c
index 5934c16d97cb..2fc85031505d 100644
--- a/gcc/testsuite/gcc.dg/pr83415.c
+++ b/gcc/testsuite/gcc.dg/pr83415.c
@@ -7,6 +7,6 @@ int
 main (int argc, short *argv[])
 {
   int i = argc;
-  y[i] = 7 - i; /* { dg-warning "read-only" } */
+  y[i] = 7 - i; /* { dg-error "read-only" } */
   return 0;
 }


[gcc r13-8713] c++/c-common: Fix convert_vector_to_array_for_subscript for qualified vector types [PR89224]

2024-05-07 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:f5d9eef6507f36692066c0934d9f8c9d462e698f

commit r13-8713-gf5d9eef6507f36692066c0934d9f8c9d462e698f
Author: Andrew Pinski 
Date:   Tue Feb 20 13:38:28 2024 -0800

c++/c-common: Fix convert_vector_to_array_for_subscript for qualified 
vector types [PR89224]

After r7-987-gf17a223de829cb, the access for the elements of a vector type 
would lose the qualifiers.
So if we had `constvector[0]`, the type of the element of the array would 
not have const on it.
This was due to a missing build_qualified_type for the inner type of the 
vector when building the array type.
We need to add back the call to build_qualified_type and now the access has 
the correct qualifiers. So the
overloads and even if it is a lvalue or rvalue is correctly done.

Note we correctly now reject the testcase gcc.dg/pr83415.c which was 
incorrectly accepted after r7-987-gf17a223de829cb.

Built and tested for aarch64-linux-gnu.

PR c++/89224

gcc/c-family/ChangeLog:

* c-common.cc (convert_vector_to_array_for_subscript): Call 
build_qualified_type
for the inner type.

gcc/cp/ChangeLog:

* constexpr.cc (cxx_eval_array_reference): Compare main variants
for the vector/array types instead of the types directly.

gcc/testsuite/ChangeLog:

* g++.dg/torture/vector-subaccess-1.C: New test.
* gcc.dg/pr83415.c: Change warning to error.

Signed-off-by: Andrew Pinski 
(cherry picked from commit 4421d35167b3083e0f2e4c84c91fded09a30cf22)

Diff:
---
 gcc/c-family/c-common.cc  |  7 ++-
 gcc/cp/constexpr.cc   |  3 ++-
 gcc/testsuite/g++.dg/torture/vector-subaccess-1.C | 23 +++
 gcc/testsuite/gcc.dg/pr83415.c|  2 +-
 4 files changed, 32 insertions(+), 3 deletions(-)

diff --git a/gcc/c-family/c-common.cc b/gcc/c-family/c-common.cc
index d423cbbacaee..303d7f1ef5de 100644
--- a/gcc/c-family/c-common.cc
+++ b/gcc/c-family/c-common.cc
@@ -8545,6 +8545,7 @@ convert_vector_to_array_for_subscript (location_t loc,
   if (gnu_vector_type_p (TREE_TYPE (*vecp)))
 {
   tree type = TREE_TYPE (*vecp);
+  tree newitype;
 
   ret = !lvalue_p (*vecp);
 
@@ -8559,8 +8560,12 @@ convert_vector_to_array_for_subscript (location_t loc,
 for function parameters.  */
   c_common_mark_addressable_vec (*vecp);
 
+  /* Make sure qualifiers are copied from the vector type to the new 
element
+of the array type.  */
+  newitype = build_qualified_type (TREE_TYPE (type), TYPE_QUALS (type));
+
   *vecp = build1 (VIEW_CONVERT_EXPR,
- build_array_type_nelts (TREE_TYPE (type),
+ build_array_type_nelts (newitype,
  TYPE_VECTOR_SUBPARTS (type)),
  *vecp);
 }
diff --git a/gcc/cp/constexpr.cc b/gcc/cp/constexpr.cc
index a3c21e88e7ba..216b98122007 100644
--- a/gcc/cp/constexpr.cc
+++ b/gcc/cp/constexpr.cc
@@ -4187,7 +4187,8 @@ cxx_eval_array_reference (const constexpr_ctx *ctx, tree 
t,
   if (!lval
   && TREE_CODE (ary) == VIEW_CONVERT_EXPR
   && VECTOR_TYPE_P (TREE_TYPE (TREE_OPERAND (ary, 0)))
-  && TREE_TYPE (t) == TREE_TYPE (TREE_TYPE (TREE_OPERAND (ary, 0
+  && (TYPE_MAIN_VARIANT (TREE_TYPE (t))
+ == TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (TREE_OPERAND (ary, 0))
 ary = TREE_OPERAND (ary, 0);
 
   tree oldidx = TREE_OPERAND (t, 1);
diff --git a/gcc/testsuite/g++.dg/torture/vector-subaccess-1.C 
b/gcc/testsuite/g++.dg/torture/vector-subaccess-1.C
new file mode 100644
index ..0c8958a4e034
--- /dev/null
+++ b/gcc/testsuite/g++.dg/torture/vector-subaccess-1.C
@@ -0,0 +1,23 @@
+/* PR c++/89224 */
+
+/* The access of `vector[i]` has the same qualifiers as the original
+   vector which was missing. */
+
+typedef __attribute__((vector_size(16))) unsigned char  Int8x8_t;
+
+template 
+void g(T &x) {
+__builtin_abort();
+}
+template 
+void g(const T &x) {
+  __builtin_exit(0);
+}
+void f(const Int8x8_t x) {
+  g(x[0]);
+}
+int main(void)
+{
+Int8x8_t x ={};
+f(x);
+}
diff --git a/gcc/testsuite/gcc.dg/pr83415.c b/gcc/testsuite/gcc.dg/pr83415.c
index 5934c16d97cb..2fc85031505d 100644
--- a/gcc/testsuite/gcc.dg/pr83415.c
+++ b/gcc/testsuite/gcc.dg/pr83415.c
@@ -7,6 +7,6 @@ int
 main (int argc, short *argv[])
 {
   int i = argc;
-  y[i] = 7 - i; /* { dg-warning "read-only" } */
+  y[i] = 7 - i; /* { dg-error "read-only" } */
   return 0;
 }


[gcc r12-10420] c++/c-common: Fix convert_vector_to_array_for_subscript for qualified vector types [PR89224]

2024-05-07 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:315f8a474eb1a9b2d213aa650bdb132c78546264

commit r12-10420-g315f8a474eb1a9b2d213aa650bdb132c78546264
Author: Andrew Pinski 
Date:   Tue Feb 20 13:38:28 2024 -0800

c++/c-common: Fix convert_vector_to_array_for_subscript for qualified 
vector types [PR89224]

After r7-987-gf17a223de829cb, the access for the elements of a vector type 
would lose the qualifiers.
So if we had `constvector[0]`, the type of the element of the array would 
not have const on it.
This was due to a missing build_qualified_type for the inner type of the 
vector when building the array type.
We need to add back the call to build_qualified_type and now the access has 
the correct qualifiers. So the
overloads and even if it is a lvalue or rvalue is correctly done.

Note we correctly now reject the testcase gcc.dg/pr83415.c which was 
incorrectly accepted after r7-987-gf17a223de829cb.

Built and tested for aarch64-linux-gnu.

PR c++/89224

gcc/c-family/ChangeLog:

* c-common.cc (convert_vector_to_array_for_subscript): Call 
build_qualified_type
for the inner type.

gcc/cp/ChangeLog:

* constexpr.cc (cxx_eval_array_reference): Compare main variants
for the vector/array types instead of the types directly.

gcc/testsuite/ChangeLog:

* g++.dg/torture/vector-subaccess-1.C: New test.
* gcc.dg/pr83415.c: Change warning to error.

Signed-off-by: Andrew Pinski 
(cherry picked from commit 4421d35167b3083e0f2e4c84c91fded09a30cf22)

Diff:
---
 gcc/c-family/c-common.cc  |  7 ++-
 gcc/cp/constexpr.cc   |  3 ++-
 gcc/testsuite/g++.dg/torture/vector-subaccess-1.C | 23 +++
 gcc/testsuite/gcc.dg/pr83415.c|  2 +-
 4 files changed, 32 insertions(+), 3 deletions(-)

diff --git a/gcc/c-family/c-common.cc b/gcc/c-family/c-common.cc
index 9d1faf8ae167..94bef24220b4 100644
--- a/gcc/c-family/c-common.cc
+++ b/gcc/c-family/c-common.cc
@@ -8511,6 +8511,7 @@ convert_vector_to_array_for_subscript (location_t loc,
   if (gnu_vector_type_p (TREE_TYPE (*vecp)))
 {
   tree type = TREE_TYPE (*vecp);
+  tree newitype;
 
   ret = !lvalue_p (*vecp);
 
@@ -8525,8 +8526,12 @@ convert_vector_to_array_for_subscript (location_t loc,
 for function parameters.  */
   c_common_mark_addressable_vec (*vecp);
 
+  /* Make sure qualifiers are copied from the vector type to the new 
element
+of the array type.  */
+  newitype = build_qualified_type (TREE_TYPE (type), TYPE_QUALS (type));
+
   *vecp = build1 (VIEW_CONVERT_EXPR,
- build_array_type_nelts (TREE_TYPE (type),
+ build_array_type_nelts (newitype,
  TYPE_VECTOR_SUBPARTS (type)),
  *vecp);
 }
diff --git a/gcc/cp/constexpr.cc b/gcc/cp/constexpr.cc
index d2d02c282cd3..41f862e7056e 100644
--- a/gcc/cp/constexpr.cc
+++ b/gcc/cp/constexpr.cc
@@ -3932,7 +3932,8 @@ cxx_eval_array_reference (const constexpr_ctx *ctx, tree 
t,
   if (!lval
   && TREE_CODE (ary) == VIEW_CONVERT_EXPR
   && VECTOR_TYPE_P (TREE_TYPE (TREE_OPERAND (ary, 0)))
-  && TREE_TYPE (t) == TREE_TYPE (TREE_TYPE (TREE_OPERAND (ary, 0
+  && (TYPE_MAIN_VARIANT (TREE_TYPE (t))
+ == TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (TREE_OPERAND (ary, 0))
 ary = TREE_OPERAND (ary, 0);
 
   tree oldidx = TREE_OPERAND (t, 1);
diff --git a/gcc/testsuite/g++.dg/torture/vector-subaccess-1.C 
b/gcc/testsuite/g++.dg/torture/vector-subaccess-1.C
new file mode 100644
index ..0c8958a4e034
--- /dev/null
+++ b/gcc/testsuite/g++.dg/torture/vector-subaccess-1.C
@@ -0,0 +1,23 @@
+/* PR c++/89224 */
+
+/* The access of `vector[i]` has the same qualifiers as the original
+   vector which was missing. */
+
+typedef __attribute__((vector_size(16))) unsigned char  Int8x8_t;
+
+template 
+void g(T &x) {
+__builtin_abort();
+}
+template 
+void g(const T &x) {
+  __builtin_exit(0);
+}
+void f(const Int8x8_t x) {
+  g(x[0]);
+}
+int main(void)
+{
+Int8x8_t x ={};
+f(x);
+}
diff --git a/gcc/testsuite/gcc.dg/pr83415.c b/gcc/testsuite/gcc.dg/pr83415.c
index 5934c16d97cb..2fc85031505d 100644
--- a/gcc/testsuite/gcc.dg/pr83415.c
+++ b/gcc/testsuite/gcc.dg/pr83415.c
@@ -7,6 +7,6 @@ int
 main (int argc, short *argv[])
 {
   int i = argc;
-  y[i] = 7 - i; /* { dg-warning "read-only" } */
+  y[i] = 7 - i; /* { dg-error "read-only" } */
   return 0;
 }


[gcc r11-11419] c++/c-common: Fix convert_vector_to_array_for_subscript for qualified vector types [PR89224]

2024-05-07 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:046aeffba336295fbdaf0e1ecf64b582d08f0aa6

commit r11-11419-g046aeffba336295fbdaf0e1ecf64b582d08f0aa6
Author: Andrew Pinski 
Date:   Tue Feb 20 13:38:28 2024 -0800

c++/c-common: Fix convert_vector_to_array_for_subscript for qualified 
vector types [PR89224]

After r7-987-gf17a223de829cb, the access for the elements of a vector type 
would lose the qualifiers.
So if we had `constvector[0]`, the type of the element of the array would 
not have const on it.
This was due to a missing build_qualified_type for the inner type of the 
vector when building the array type.
We need to add back the call to build_qualified_type and now the access has 
the correct qualifiers. So the
overloads and even if it is a lvalue or rvalue is correctly done.

Note we correctly now reject the testcase gcc.dg/pr83415.c which was 
incorrectly accepted after r7-987-gf17a223de829cb.

Built and tested for aarch64-linux-gnu.

PR c++/89224

gcc/c-family/ChangeLog:

* c-common.c (convert_vector_to_array_for_subscript): Call 
build_qualified_type
for the inner type.

gcc/cp/ChangeLog:

* constexpr.c (cxx_eval_array_reference): Compare main variants
for the vector/array types instead of the types directly.

gcc/testsuite/ChangeLog:

* g++.dg/torture/vector-subaccess-1.C: New test.
* gcc.dg/pr83415.c: Change warning to error.

Signed-off-by: Andrew Pinski 
(cherry picked from commit 4421d35167b3083e0f2e4c84c91fded09a30cf22)

Diff:
---
 gcc/c-family/c-common.c   |  7 ++-
 gcc/cp/constexpr.c|  3 ++-
 gcc/testsuite/g++.dg/torture/vector-subaccess-1.C | 23 +++
 gcc/testsuite/gcc.dg/pr83415.c|  2 +-
 4 files changed, 32 insertions(+), 3 deletions(-)

diff --git a/gcc/c-family/c-common.c b/gcc/c-family/c-common.c
index 9417b7fb4d1f..ae3ef89b05cb 100644
--- a/gcc/c-family/c-common.c
+++ b/gcc/c-family/c-common.c
@@ -8274,6 +8274,7 @@ convert_vector_to_array_for_subscript (location_t loc,
   if (gnu_vector_type_p (TREE_TYPE (*vecp)))
 {
   tree type = TREE_TYPE (*vecp);
+  tree newitype;
 
   ret = !lvalue_p (*vecp);
 
@@ -8288,8 +8289,12 @@ convert_vector_to_array_for_subscript (location_t loc,
 for function parameters.  */
   c_common_mark_addressable_vec (*vecp);
 
+  /* Make sure qualifiers are copied from the vector type to the new 
element
+of the array type.  */
+  newitype = build_qualified_type (TREE_TYPE (type), TYPE_QUALS (type));
+
   *vecp = build1 (VIEW_CONVERT_EXPR,
- build_array_type_nelts (TREE_TYPE (type),
+ build_array_type_nelts (newitype,
  TYPE_VECTOR_SUBPARTS (type)),
  *vecp);
 }
diff --git a/gcc/cp/constexpr.c b/gcc/cp/constexpr.c
index 38f684144f0c..eb18b5b35378 100644
--- a/gcc/cp/constexpr.c
+++ b/gcc/cp/constexpr.c
@@ -3767,7 +3767,8 @@ cxx_eval_array_reference (const constexpr_ctx *ctx, tree 
t,
   if (!lval
   && TREE_CODE (ary) == VIEW_CONVERT_EXPR
   && VECTOR_TYPE_P (TREE_TYPE (TREE_OPERAND (ary, 0)))
-  && TREE_TYPE (t) == TREE_TYPE (TREE_TYPE (TREE_OPERAND (ary, 0
+  && (TYPE_MAIN_VARIANT (TREE_TYPE (t))
+ == TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (TREE_OPERAND (ary, 0))
 ary = TREE_OPERAND (ary, 0);
 
   tree oldidx = TREE_OPERAND (t, 1);
diff --git a/gcc/testsuite/g++.dg/torture/vector-subaccess-1.C 
b/gcc/testsuite/g++.dg/torture/vector-subaccess-1.C
new file mode 100644
index ..0c8958a4e034
--- /dev/null
+++ b/gcc/testsuite/g++.dg/torture/vector-subaccess-1.C
@@ -0,0 +1,23 @@
+/* PR c++/89224 */
+
+/* The access of `vector[i]` has the same qualifiers as the original
+   vector which was missing. */
+
+typedef __attribute__((vector_size(16))) unsigned char  Int8x8_t;
+
+template 
+void g(T &x) {
+__builtin_abort();
+}
+template 
+void g(const T &x) {
+  __builtin_exit(0);
+}
+void f(const Int8x8_t x) {
+  g(x[0]);
+}
+int main(void)
+{
+Int8x8_t x ={};
+f(x);
+}
diff --git a/gcc/testsuite/gcc.dg/pr83415.c b/gcc/testsuite/gcc.dg/pr83415.c
index 5934c16d97cb..2fc85031505d 100644
--- a/gcc/testsuite/gcc.dg/pr83415.c
+++ b/gcc/testsuite/gcc.dg/pr83415.c
@@ -7,6 +7,6 @@ int
 main (int argc, short *argv[])
 {
   int i = argc;
-  y[i] = 7 - i; /* { dg-warning "read-only" } */
+  y[i] = 7 - i; /* { dg-error "read-only" } */
   return 0;
 }


[gcc r15-328] match: `a CMP nonnegative ? a : ABS` simplified to just `ABS` [PR112392]

2024-05-08 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:5726de79e2154a16d8a045567d2cfad035f7ed19

commit r15-328-g5726de79e2154a16d8a045567d2cfad035f7ed19
Author: Andrew Pinski 
Date:   Mon May 6 23:53:41 2024 -0700

match: `a CMP nonnegative ? a : ABS` simplified to just `ABS` 
[PR112392]

We can optimize `a == nonnegative ? a : ABS`, `a > nonnegative ? a : 
ABS`
and `a >= nonnegative ? a : ABS` into `ABS`. This allows removal of
some extra comparison and extra conditional moves in some cases.
I don't remember where I had found though but it is simple to add so
let's add it.

Bootstrapped and tested on x86_64-linux-gnu with no regressions.

Note I have a secondary pattern for the equal case as either a or 
nonnegative
could be used.

PR tree-optimization/112392

gcc/ChangeLog:

* match.pd (`x CMP nonnegative ? x : ABS`): New pattern;
where CMP is ==, > and >=.
(`x CMP nonnegative@y ? y : ABS`): New pattern.

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/phi-opt-41.c: New test.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/match.pd   | 15 +
 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-41.c | 34 ++
 2 files changed, 49 insertions(+)

diff --git a/gcc/match.pd b/gcc/match.pd
index 03a03c31233c..07e743ae464b 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -5876,6 +5876,21 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
  (convert (absu:utype @0)))
 @3
 
+/* X >  Positive ? X : ABS(X) -> ABS(X) */
+/* X >= Positive ? X : ABS(X) -> ABS(X) */
+/* X == Positive ? X : ABS(X) -> ABS(X) */
+(for cmp (eq gt ge)
+ (simplify
+  (cond (cmp:c @0 tree_expr_nonnegative_p@1) @0 (abs@3 @0))
+  (if (INTEGRAL_TYPE_P (type))
+   @3)))
+
+/* X == Positive ? Positive : ABS(X) -> ABS(X) */
+(simplify
+ (cond (eq:c @0 tree_expr_nonnegative_p@1) @1 (abs@3 @0))
+ (if (INTEGRAL_TYPE_P (type))
+  @3))
+
 /* (X + 1) > Y ? -X : 1 simplifies to X >= Y ? -X : 1 when
X is unsigned, as when X + 1 overflows, X is -1, so -X == 1.  */
 (simplify
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-41.c 
b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-41.c
new file mode 100644
index ..9774e283a7ba
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-41.c
@@ -0,0 +1,34 @@
+/* { dg-do compile } */
+/* { dg-options "-O1 -fdump-tree-phiopt1" } */
+/* PR tree-optimization/112392 */
+
+int feq_1(int a, unsigned char b)
+{
+  int absb = b;
+  if (a == absb)  return absb;
+  return a > 0 ? a : -a;
+}
+int feq_2(int a, unsigned char b)
+{
+  int absb = b;
+  if (a == absb)  return a;
+  return a > 0 ? a : -a;
+}
+
+int fgt(int a, unsigned char b)
+{
+  int absb = b;
+  if (a > absb)  return a;
+  return a > 0 ? a : -a;
+}
+
+int fge(int a, unsigned char b)
+{
+  int absb = b;
+  if (a >= absb)  return a;
+  return a > 0 ? a : -a;
+}
+
+
+/* { dg-final { scan-tree-dump-not "if " "phiopt1" } } */
+/* { dg-final { scan-tree-dump-times "ABS_EXPR <" 4 "phiopt1" } } */


[gcc r13-8728] Fix PR 110066: crash with -pg -static on riscv

2024-05-08 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:929b0fffe4d3d836e07e5a398a8e176e65f8b2c2

commit r13-8728-g929b0fffe4d3d836e07e5a398a8e176e65f8b2c2
Author: Andrew Pinski 
Date:   Sat Jul 22 08:52:42 2023 -0700

Fix PR 110066: crash with -pg -static on riscv

The problem -fasynchronous-unwind-tables is on by default for riscv linux
We need turn it off for crt*.o because it would make __EH_FRAME_BEGIN__ 
point
to .eh_frame data from crtbeginT.o instead of the user-defined object
during static linking.

This turns it off.

OK?

libgcc/ChangeLog:

* config.host (riscv*-*-linux*): Add t-crtstuff to tmake_file.
(riscv*-*-freebsd*): Likewise.
* config/riscv/t-crtstuff: New file.

(cherry picked from commit bbc1a102735c72e3c5a4dede8ab382813d12b058)

Diff:
---
 libgcc/config.host | 4 ++--
 libgcc/config/riscv/t-crtstuff | 5 +
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/libgcc/config.host b/libgcc/config.host
index 9d7212028d06..c94d69d84b7c 100644
--- a/libgcc/config.host
+++ b/libgcc/config.host
@@ -1304,12 +1304,12 @@ pru-*-*)
tm_file="$tm_file pru/pru-abi.h"
;;
 riscv*-*-linux*)
-   tmake_file="${tmake_file} riscv/t-softfp${host_address} t-softfp 
riscv/t-elf riscv/t-elf${host_address} t-slibgcc-libgcc"
+   tmake_file="${tmake_file} riscv/t-crtstuff 
riscv/t-softfp${host_address} t-softfp riscv/t-elf riscv/t-elf${host_address} 
t-slibgcc-libgcc"
extra_parts="$extra_parts crtbegin.o crtend.o crti.o crtn.o crtendS.o 
crtbeginT.o"
md_unwind_header=riscv/linux-unwind.h
;;
 riscv*-*-freebsd*)
-   tmake_file="${tmake_file} riscv/t-softfp${host_address} t-softfp 
riscv/t-elf riscv/t-elf${host_address} t-slibgcc-libgcc"
+   tmake_file="${tmake_file} riscv/t-crtstuff 
riscv/t-softfp${host_address} t-softfp riscv/t-elf riscv/t-elf${host_address} 
t-slibgcc-libgcc"
extra_parts="$extra_parts crtbegin.o crtend.o crti.o crtn.o crtendS.o 
crtbeginT.o"
;;
 riscv*-*-*)
diff --git a/libgcc/config/riscv/t-crtstuff b/libgcc/config/riscv/t-crtstuff
new file mode 100644
index ..685d11b3e66d
--- /dev/null
+++ b/libgcc/config/riscv/t-crtstuff
@@ -0,0 +1,5 @@
+# -fasynchronous-unwind-tables -funwind-tables is on by default for riscv linux
+# We turn it off for crt*.o because it would make __EH_FRAME_BEGIN__ point
+# to .eh_frame data from crtbeginT.o instead of the user-defined object
+# during static linking.
+CRTSTUFF_T_CFLAGS += -fno-asynchronous-unwind-tables -fno-unwind-tables


[gcc r12-10430] Fold: Fix up merge_truthop_with_opposite_arm for NaNs [PR95351]

2024-05-08 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:d88fe8210e4edc2f4ddf722ba788924452c6f6a0

commit r12-10430-gd88fe8210e4edc2f4ddf722ba788924452c6f6a0
Author: Andrew Pinski 
Date:   Sun Mar 10 22:17:09 2024 +

Fold: Fix up merge_truthop_with_opposite_arm for NaNs [PR95351]

The problem here is that merge_truthop_with_opposite_arm would
use the type of the result of the comparison rather than the operands
of the comparison to figure out if we are honoring NaNs.
This fixes that oversight and now we get the correct results in this
case.

Committed as obvious after a bootstrap/test on x86_64-linux-gnu.

PR middle-end/95351

gcc/ChangeLog:

* fold-const.cc (merge_truthop_with_opposite_arm): Use
the type of the operands of the comparison and not the type
of the comparison.

gcc/testsuite/ChangeLog:

* gcc.dg/float_opposite_arm-1.c: New test.

Signed-off-by: Andrew Pinski 
(cherry picked from commit 31ce2e993d09dcad1ce139a2848a28de5931056d)

Diff:
---
 gcc/fold-const.cc   |  3 ++-
 gcc/testsuite/gcc.dg/float_opposite_arm-1.c | 17 +
 2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/gcc/fold-const.cc b/gcc/fold-const.cc
index cd410e50d779..da96ed34a4c3 100644
--- a/gcc/fold-const.cc
+++ b/gcc/fold-const.cc
@@ -6188,7 +6188,6 @@ static tree
 merge_truthop_with_opposite_arm (location_t loc, tree op, tree cmpop,
 bool rhs_only)
 {
-  tree type = TREE_TYPE (cmpop);
   enum tree_code code = TREE_CODE (cmpop);
   enum tree_code truthop_code = TREE_CODE (op);
   tree lhs = TREE_OPERAND (op, 0);
@@ -6204,6 +6203,8 @@ merge_truthop_with_opposite_arm (location_t loc, tree op, 
tree cmpop,
   if (TREE_CODE_CLASS (code) != tcc_comparison)
 return NULL_TREE;
 
+  tree type = TREE_TYPE (TREE_OPERAND (cmpop, 0));
+
   if (rhs_code == truthop_code)
 {
   tree newrhs = merge_truthop_with_opposite_arm (loc, rhs, cmpop, 
rhs_only);
diff --git a/gcc/testsuite/gcc.dg/float_opposite_arm-1.c 
b/gcc/testsuite/gcc.dg/float_opposite_arm-1.c
new file mode 100644
index ..d2dbff350663
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/float_opposite_arm-1.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O1 -fdump-tree-original -fdump-tree-optimized" } */
+/* { dg-add-options ieee } */
+/* PR middle-end/95351 */
+
+int Foo(double possiblyNAN, double b, double c)
+{
+return (possiblyNAN <= 2.0) || ((possiblyNAN  > 2.0) && (b > c));
+}
+
+/* Make sure we don't remove either >/<=  */
+
+/* { dg-final { scan-tree-dump "possiblyNAN > 2.0e.0" "original" } } */
+/* { dg-final { scan-tree-dump "possiblyNAN_\[0-9\]+.D. > 2.0e.0" "optimized" 
} } */
+
+/* { dg-final { scan-tree-dump "possiblyNAN <= 2.0e.0" "original" } } */
+/* { dg-final { scan-tree-dump "possiblyNAN_\[0-9\]+.D. <= 2.0e.0" "optimized" 
} } */


[gcc r11-11420] Fold: Fix up merge_truthop_with_opposite_arm for NaNs [PR95351]

2024-05-08 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:6c00c3245e688d00dae3e928f0d03f530640caae

commit r11-11420-g6c00c3245e688d00dae3e928f0d03f530640caae
Author: Andrew Pinski 
Date:   Sun Mar 10 22:17:09 2024 +

Fold: Fix up merge_truthop_with_opposite_arm for NaNs [PR95351]

The problem here is that merge_truthop_with_opposite_arm would
use the type of the result of the comparison rather than the operands
of the comparison to figure out if we are honoring NaNs.
This fixes that oversight and now we get the correct results in this
case.

Committed as obvious after a bootstrap/test on x86_64-linux-gnu.

PR middle-end/95351

gcc/ChangeLog:

* fold-const.c (merge_truthop_with_opposite_arm): Use
the type of the operands of the comparison and not the type
of the comparison.

gcc/testsuite/ChangeLog:

* gcc.dg/float_opposite_arm-1.c: New test.

Signed-off-by: Andrew Pinski 
(cherry picked from commit 31ce2e993d09dcad1ce139a2848a28de5931056d)

Diff:
---
 gcc/fold-const.c|  3 ++-
 gcc/testsuite/gcc.dg/float_opposite_arm-1.c | 17 +
 2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/gcc/fold-const.c b/gcc/fold-const.c
index f4fd980dbbc8..97f77da5b93f 100644
--- a/gcc/fold-const.c
+++ b/gcc/fold-const.c
@@ -6171,7 +6171,6 @@ static tree
 merge_truthop_with_opposite_arm (location_t loc, tree op, tree cmpop,
 bool rhs_only)
 {
-  tree type = TREE_TYPE (cmpop);
   enum tree_code code = TREE_CODE (cmpop);
   enum tree_code truthop_code = TREE_CODE (op);
   tree lhs = TREE_OPERAND (op, 0);
@@ -6187,6 +6186,8 @@ merge_truthop_with_opposite_arm (location_t loc, tree op, 
tree cmpop,
   if (TREE_CODE_CLASS (code) != tcc_comparison)
 return NULL_TREE;
 
+  tree type = TREE_TYPE (TREE_OPERAND (cmpop, 0));
+
   if (rhs_code == truthop_code)
 {
   tree newrhs = merge_truthop_with_opposite_arm (loc, rhs, cmpop, 
rhs_only);
diff --git a/gcc/testsuite/gcc.dg/float_opposite_arm-1.c 
b/gcc/testsuite/gcc.dg/float_opposite_arm-1.c
new file mode 100644
index ..d2dbff350663
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/float_opposite_arm-1.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O1 -fdump-tree-original -fdump-tree-optimized" } */
+/* { dg-add-options ieee } */
+/* PR middle-end/95351 */
+
+int Foo(double possiblyNAN, double b, double c)
+{
+return (possiblyNAN <= 2.0) || ((possiblyNAN  > 2.0) && (b > c));
+}
+
+/* Make sure we don't remove either >/<=  */
+
+/* { dg-final { scan-tree-dump "possiblyNAN > 2.0e.0" "original" } } */
+/* { dg-final { scan-tree-dump "possiblyNAN_\[0-9\]+.D. > 2.0e.0" "optimized" 
} } */
+
+/* { dg-final { scan-tree-dump "possiblyNAN <= 2.0e.0" "original" } } */
+/* { dg-final { scan-tree-dump "possiblyNAN_\[0-9\]+.D. <= 2.0e.0" "optimized" 
} } */


[gcc r11-11421] Fix PR 111331: wrong code for `a > 28 ? MIN : 29`

2024-05-08 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:16e27b6d03756bf1fae22607fa93107787a7b9cb

commit r11-11421-g16e27b6d03756bf1fae22607fa93107787a7b9cb
Author: Andrew Pinski 
Date:   Thu Sep 7 22:13:31 2023 -0700

Fix PR 111331: wrong code for `a > 28 ? MIN : 29`

The problem here is after r6-7425-ga9fee7cdc3c62d0e51730,
the comparison to see if the transformation could be done was using the
wrong value. Instead of see if the inner was LE (for MIN and GE for MAX)
the outer value, it was comparing the inner to the value used in the 
comparison
which was wrong.

Committed to GCC 13 branch after bootstrapped and tested on 
x86_64-linux-gnu.

gcc/ChangeLog:

PR tree-optimization/111331
* tree-ssa-phiopt.c (minmax_replacement):
Fix the LE/GE comparison for the
`(a CMP CST1) ? max : a` optimization.

gcc/testsuite/ChangeLog:

PR tree-optimization/111331
* gcc.c-torture/execute/pr111331-1.c: New test.
* gcc.c-torture/execute/pr111331-2.c: New test.
* gcc.c-torture/execute/pr111331-3.c: New test.

(cherry picked from commit 30e6ee074588bacefd2dfe745b188bb20c81fe5e)

Diff:
---
 gcc/testsuite/gcc.c-torture/execute/pr111331-1.c | 17 +
 gcc/testsuite/gcc.c-torture/execute/pr111331-2.c | 19 +++
 gcc/testsuite/gcc.c-torture/execute/pr111331-3.c | 15 +++
 gcc/tree-ssa-phiopt.c|  8 
 4 files changed, 55 insertions(+), 4 deletions(-)

diff --git a/gcc/testsuite/gcc.c-torture/execute/pr111331-1.c 
b/gcc/testsuite/gcc.c-torture/execute/pr111331-1.c
new file mode 100644
index ..4c7f4fdbaa9d
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/pr111331-1.c
@@ -0,0 +1,17 @@
+int a;
+int b;
+int c(int d, int e, int f) {
+  if (d < e)
+return e;
+  if (d > f)
+return f;
+  return d;
+}
+int main() {
+  int g = -1;
+  a = c(b + 30, 29, g + 29);
+  volatile t = a;
+  if (t != 28)
+__builtin_abort();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.c-torture/execute/pr111331-2.c 
b/gcc/testsuite/gcc.c-torture/execute/pr111331-2.c
new file mode 100644
index ..5c677f2caa9f
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/pr111331-2.c
@@ -0,0 +1,19 @@
+
+int a;
+int b;
+
+int main() {
+  int d = b+30;
+  {
+int t;
+if (d < 29)
+  t =  29;
+else
+  t = (d > 28) ? 28 : d;
+a = t;
+  }
+  volatile int t = a;
+  if (a != 28)
+__builtin_abort();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.c-torture/execute/pr111331-3.c 
b/gcc/testsuite/gcc.c-torture/execute/pr111331-3.c
new file mode 100644
index ..213d9bdd539d
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/pr111331-3.c
@@ -0,0 +1,15 @@
+int a;
+int b;
+
+int main() {
+  int d = b+30;
+  {
+int t;
+t = d < 29 ? 29 : ((d > 28) ? 28 : d);
+a = t;
+  }
+  volatile int t = a;
+  if (a != 28)
+__builtin_abort();
+  return 0;
+}
diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
index 5831a7764a49..d26d7889d952 100644
--- a/gcc/tree-ssa-phiopt.c
+++ b/gcc/tree-ssa-phiopt.c
@@ -1676,7 +1676,7 @@ minmax_replacement (basic_block cond_bb, basic_block 
middle_bb,
 
  /* We need BOUND <= LARGER.  */
  if (!integer_nonzerop (fold_build2 (LE_EXPR, boolean_type_node,
- bound, larger)))
+ bound, arg_false)))
return false;
}
  else if (operand_equal_for_phi_arg_p (arg_false, smaller)
@@ -1707,7 +1707,7 @@ minmax_replacement (basic_block cond_bb, basic_block 
middle_bb,
 
  /* We need BOUND >= SMALLER.  */
  if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node,
- bound, smaller)))
+ bound, arg_false)))
return false;
}
  else
@@ -1747,7 +1747,7 @@ minmax_replacement (basic_block cond_bb, basic_block 
middle_bb,
 
  /* We need BOUND >= LARGER.  */
  if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node,
- bound, larger)))
+ bound, arg_true)))
return false;
}
  else if (operand_equal_for_phi_arg_p (arg_true, smaller)
@@ -1774,7 +1774,7 @@ minmax_replacement (basic_block cond_bb, basic_block 
middle_bb,
 
  /* We need BOUND <= SMALLER.  */
  if (!integer_nonzerop (fold_build2 (LE_EXPR, boolean_type_node,
- bound, smaller)))
+ bound, arg_true)))
return false;
}
  else


[gcc r12-10431] Fix PR 111331: wrong code for `a > 28 ? MIN : 29`

2024-05-08 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:39d56b96996dd8336652ceac97983c26fd8de4c6

commit r12-10431-g39d56b96996dd8336652ceac97983c26fd8de4c6
Author: Andrew Pinski 
Date:   Thu Sep 7 22:13:31 2023 -0700

Fix PR 111331: wrong code for `a > 28 ? MIN : 29`

The problem here is after r6-7425-ga9fee7cdc3c62d0e51730,
the comparison to see if the transformation could be done was using the
wrong value. Instead of see if the inner was LE (for MIN and GE for MAX)
the outer value, it was comparing the inner to the value used in the 
comparison
which was wrong.

Committed to GCC 13 branch after bootstrapped and tested on 
x86_64-linux-gnu.

gcc/ChangeLog:

PR tree-optimization/111331
* tree-ssa-phiopt.cc (minmax_replacement):
Fix the LE/GE comparison for the
`(a CMP CST1) ? max : a` optimization.

gcc/testsuite/ChangeLog:

PR tree-optimization/111331
* gcc.c-torture/execute/pr111331-1.c: New test.
* gcc.c-torture/execute/pr111331-2.c: New test.
* gcc.c-torture/execute/pr111331-3.c: New test.

(cherry picked from commit 30e6ee074588bacefd2dfe745b188bb20c81fe5e)

Diff:
---
 gcc/testsuite/gcc.c-torture/execute/pr111331-1.c | 17 +
 gcc/testsuite/gcc.c-torture/execute/pr111331-2.c | 19 +++
 gcc/testsuite/gcc.c-torture/execute/pr111331-3.c | 15 +++
 gcc/tree-ssa-phiopt.cc   |  8 
 4 files changed, 55 insertions(+), 4 deletions(-)

diff --git a/gcc/testsuite/gcc.c-torture/execute/pr111331-1.c 
b/gcc/testsuite/gcc.c-torture/execute/pr111331-1.c
new file mode 100644
index ..4c7f4fdbaa9d
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/pr111331-1.c
@@ -0,0 +1,17 @@
+int a;
+int b;
+int c(int d, int e, int f) {
+  if (d < e)
+return e;
+  if (d > f)
+return f;
+  return d;
+}
+int main() {
+  int g = -1;
+  a = c(b + 30, 29, g + 29);
+  volatile t = a;
+  if (t != 28)
+__builtin_abort();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.c-torture/execute/pr111331-2.c 
b/gcc/testsuite/gcc.c-torture/execute/pr111331-2.c
new file mode 100644
index ..5c677f2caa9f
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/pr111331-2.c
@@ -0,0 +1,19 @@
+
+int a;
+int b;
+
+int main() {
+  int d = b+30;
+  {
+int t;
+if (d < 29)
+  t =  29;
+else
+  t = (d > 28) ? 28 : d;
+a = t;
+  }
+  volatile int t = a;
+  if (a != 28)
+__builtin_abort();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.c-torture/execute/pr111331-3.c 
b/gcc/testsuite/gcc.c-torture/execute/pr111331-3.c
new file mode 100644
index ..213d9bdd539d
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/pr111331-3.c
@@ -0,0 +1,15 @@
+int a;
+int b;
+
+int main() {
+  int d = b+30;
+  {
+int t;
+t = d < 29 ? 29 : ((d > 28) ? 28 : d);
+a = t;
+  }
+  volatile int t = a;
+  if (a != 28)
+__builtin_abort();
+  return 0;
+}
diff --git a/gcc/tree-ssa-phiopt.cc b/gcc/tree-ssa-phiopt.cc
index c56d0b9ff151..e2dba56383b4 100644
--- a/gcc/tree-ssa-phiopt.cc
+++ b/gcc/tree-ssa-phiopt.cc
@@ -2014,7 +2014,7 @@ minmax_replacement (basic_block cond_bb, basic_block 
middle_bb,
 
  /* We need BOUND <= LARGER.  */
  if (!integer_nonzerop (fold_build2 (LE_EXPR, boolean_type_node,
- bound, larger)))
+ bound, arg_false)))
return false;
}
  else if (operand_equal_for_phi_arg_p (arg_false, smaller)
@@ -2045,7 +2045,7 @@ minmax_replacement (basic_block cond_bb, basic_block 
middle_bb,
 
  /* We need BOUND >= SMALLER.  */
  if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node,
- bound, smaller)))
+ bound, arg_false)))
return false;
}
  else
@@ -2085,7 +2085,7 @@ minmax_replacement (basic_block cond_bb, basic_block 
middle_bb,
 
  /* We need BOUND >= LARGER.  */
  if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node,
- bound, larger)))
+ bound, arg_true)))
return false;
}
  else if (operand_equal_for_phi_arg_p (arg_true, smaller)
@@ -2112,7 +2112,7 @@ minmax_replacement (basic_block cond_bb, basic_block 
middle_bb,
 
  /* We need BOUND <= SMALLER.  */
  if (!integer_nonzerop (fold_build2 (LE_EXPR, boolean_type_node,
- bound, smaller)))
+ bound, arg_true)))
return false;
}
  else


[gcc r12-10432] warn-access: Fix handling of unnamed types [PR109804]

2024-05-08 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:16319f8fba6c049d743046488588f40da2349048

commit r12-10432-g16319f8fba6c049d743046488588f40da2349048
Author: Andrew Pinski 
Date:   Wed Feb 21 20:12:21 2024 -0800

warn-access: Fix handling of unnamed types [PR109804]

This looks like an oversight of handling DEMANGLE_COMPONENT_UNNAMED_TYPE.
DEMANGLE_COMPONENT_UNNAMED_TYPE only has the u.s_number.number set while
the code expected newc.u.s_binary.left would be valid.
So this treats DEMANGLE_COMPONENT_UNNAMED_TYPE like we treat function 
paramaters
(DEMANGLE_COMPONENT_FUNCTION_PARAM) and template paramaters 
(DEMANGLE_COMPONENT_TEMPLATE_PARAM).

Note the code in the demangler does this when it sets 
DEMANGLE_COMPONENT_UNNAMED_TYPE:
  ret->type = DEMANGLE_COMPONENT_UNNAMED_TYPE;
  ret->u.s_number.number = num;

Committed as obvious after bootstrap/test on x86_64-linux-gnu

PR tree-optimization/109804

gcc/ChangeLog:

* gimple-ssa-warn-access.cc (new_delete_mismatch_p): Handle
DEMANGLE_COMPONENT_UNNAMED_TYPE.

gcc/testsuite/ChangeLog:

* g++.dg/warn/Wmismatched-new-delete-8.C: New test.

Signed-off-by: Andrew Pinski 
(cherry picked from commit 1076ffda6ce5e6d5fc9577deaf8233e549e5787a)

Diff:
---
 gcc/gimple-ssa-warn-access.cc  |  1 +
 .../g++.dg/warn/Wmismatched-new-delete-8.C | 42 ++
 2 files changed, 43 insertions(+)

diff --git a/gcc/gimple-ssa-warn-access.cc b/gcc/gimple-ssa-warn-access.cc
index 8d088ad33f2f..e70a6f1fb877 100644
--- a/gcc/gimple-ssa-warn-access.cc
+++ b/gcc/gimple-ssa-warn-access.cc
@@ -1688,6 +1688,7 @@ new_delete_mismatch_p (const demangle_component &newc,
 
 case DEMANGLE_COMPONENT_FUNCTION_PARAM:
 case DEMANGLE_COMPONENT_TEMPLATE_PARAM:
+case DEMANGLE_COMPONENT_UNNAMED_TYPE:
   return newc.u.s_number.number != delc.u.s_number.number;
 
 case DEMANGLE_COMPONENT_CHARACTER:
diff --git a/gcc/testsuite/g++.dg/warn/Wmismatched-new-delete-8.C 
b/gcc/testsuite/g++.dg/warn/Wmismatched-new-delete-8.C
new file mode 100644
index ..0ddc056c6df2
--- /dev/null
+++ b/gcc/testsuite/g++.dg/warn/Wmismatched-new-delete-8.C
@@ -0,0 +1,42 @@
+/* PR tree-optimization/109804 */
+/* { dg-do compile { target c++11 } } */
+/* { dg-options "-Wall" } */
+
+/* Here we used to ICE in new_delete_mismatch_p because
+   we didn't handle unnamed types from the demangler 
(DEMANGLE_COMPONENT_UNNAMED_TYPE). */
+
+template 
+static inline T * construct_at(void *at, ARGS && args)
+{
+ struct Placeable : T
+ {
+  Placeable(ARGS && args) : T(args) { }
+  void * operator new (long unsigned int, void *ptr) { return ptr; }
+  void operator delete (void *, void *) { }
+ };
+ return new (at) Placeable(static_cast(args));
+}
+template 
+struct Reconstructible
+{
+  char _space[sizeof(MT)];
+  Reconstructible() { }
+};
+template 
+struct Constructible : Reconstructible
+{
+ Constructible(){}
+};
+struct A { };
+struct B
+{
+ Constructible a { };
+ B(int) { }
+};
+Constructible b { };
+void f()
+{
+  enum { ENUM_A = 1 };
+  enum { ENUM_B = 1 };
+  construct_at(b._space, ENUM_B);
+}


[gcc r12-10433] testsuite: fix Wmismatched-new-delete-8.C with -m32

2024-05-08 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:58d11bfc27d5412619c0919738158a4c05cca2cf

commit r12-10433-g58d11bfc27d5412619c0919738158a4c05cca2cf
Author: Marek Polacek 
Date:   Thu Feb 22 18:52:32 2024 -0500

testsuite: fix Wmismatched-new-delete-8.C with -m32

This fixes
error: 'operator new' takes type 'size_t' ('unsigned int') as first 
parameter [-fpermissive]

gcc/testsuite/ChangeLog:

* g++.dg/warn/Wmismatched-new-delete-8.C: Use __SIZE_TYPE__.

(cherry picked from commit d34d7c74d51d365a3a4ddcd4383fc7c9f29020a1)

Diff:
---
 gcc/testsuite/g++.dg/warn/Wmismatched-new-delete-8.C | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/g++.dg/warn/Wmismatched-new-delete-8.C 
b/gcc/testsuite/g++.dg/warn/Wmismatched-new-delete-8.C
index 0ddc056c6df2..e8fd7a85b8c9 100644
--- a/gcc/testsuite/g++.dg/warn/Wmismatched-new-delete-8.C
+++ b/gcc/testsuite/g++.dg/warn/Wmismatched-new-delete-8.C
@@ -11,7 +11,7 @@ static inline T * construct_at(void *at, ARGS && args)
  struct Placeable : T
  {
   Placeable(ARGS && args) : T(args) { }
-  void * operator new (long unsigned int, void *ptr) { return ptr; }
+  void * operator new (__SIZE_TYPE__, void *ptr) { return ptr; }
   void operator delete (void *, void *) { }
  };
  return new (at) Placeable(static_cast(args));


[gcc r12-10434] Fix PR 110386: backprop vs ABSU_EXPR

2024-05-09 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:f5c7306d7f039e5c74c5e82cf06610f0ae07a0e8

commit r12-10434-gf5c7306d7f039e5c74c5e82cf06610f0ae07a0e8
Author: Andrew Pinski 
Date:   Sat Sep 23 21:53:09 2023 -0700

Fix PR 110386: backprop vs ABSU_EXPR

The issue here is that when backprop tries to go
and strip sign ops, it skips over ABSU_EXPR but
ABSU_EXPR not only does an ABS, it also changes the
type to unsigned.
Since strip_sign_op_1 is only supposed to strip off
sign changing operands and not ones that change types,
removing ABSU_EXPR here is correct. We don't handle
nop conversions so this does cause any missed optimizations either.

OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.

PR tree-optimization/110386

gcc/ChangeLog:

* gimple-ssa-backprop.cc (strip_sign_op_1): Remove ABSU_EXPR.

gcc/testsuite/ChangeLog:

* gcc.c-torture/compile/pr110386-1.c: New test.
* gcc.c-torture/compile/pr110386-2.c: New test.

(cherry picked from commit 2bbac12ea7bd8a3eef5382e1b13f6019df4ec03f)

Diff:
---
 gcc/gimple-ssa-backprop.cc   |  1 -
 gcc/testsuite/gcc.c-torture/compile/pr110386-1.c |  9 +
 gcc/testsuite/gcc.c-torture/compile/pr110386-2.c | 11 +++
 3 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/gcc/gimple-ssa-backprop.cc b/gcc/gimple-ssa-backprop.cc
index 74f981112567..68ea403e847f 100644
--- a/gcc/gimple-ssa-backprop.cc
+++ b/gcc/gimple-ssa-backprop.cc
@@ -688,7 +688,6 @@ strip_sign_op_1 (tree rhs)
 switch (gimple_assign_rhs_code (assign))
   {
   case ABS_EXPR:
-  case ABSU_EXPR:
   case NEGATE_EXPR:
return gimple_assign_rhs1 (assign);
 
diff --git a/gcc/testsuite/gcc.c-torture/compile/pr110386-1.c 
b/gcc/testsuite/gcc.c-torture/compile/pr110386-1.c
new file mode 100644
index ..4fcc977ad16f
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/compile/pr110386-1.c
@@ -0,0 +1,9 @@
+
+int f(int a)
+{
+int c = c < 0 ? c : -c;
+c = -c;
+unsigned b =  c;
+unsigned t = b*a;
+return t*t;
+}
diff --git a/gcc/testsuite/gcc.c-torture/compile/pr110386-2.c 
b/gcc/testsuite/gcc.c-torture/compile/pr110386-2.c
new file mode 100644
index ..c60e1b6994b7
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/compile/pr110386-2.c
@@ -0,0 +1,11 @@
+/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
+/* { dg-options "-mavx" } */
+
+#include 
+
+__m128i do_stuff(__m128i XMM0) {
+   __m128i ABS0 = _mm_abs_epi32(XMM0);
+   __m128i MUL0 = _mm_mullo_epi32(ABS0, XMM0);
+   __m128i MUL1 = _mm_mullo_epi32(MUL0, MUL0);
+   return MUL1;
+}


[gcc r11-11422] Fix PR 110386: backprop vs ABSU_EXPR

2024-05-09 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:dbfc2d075f10149bd94e16c1210ffe4bac7e60c3

commit r11-11422-gdbfc2d075f10149bd94e16c1210ffe4bac7e60c3
Author: Andrew Pinski 
Date:   Sat Sep 23 21:53:09 2023 -0700

Fix PR 110386: backprop vs ABSU_EXPR

The issue here is that when backprop tries to go
and strip sign ops, it skips over ABSU_EXPR but
ABSU_EXPR not only does an ABS, it also changes the
type to unsigned.
Since strip_sign_op_1 is only supposed to strip off
sign changing operands and not ones that change types,
removing ABSU_EXPR here is correct. We don't handle
nop conversions so this does cause any missed optimizations either.

OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.

PR tree-optimization/110386

gcc/ChangeLog:

* gimple-ssa-backprop.c (strip_sign_op_1): Remove ABSU_EXPR.

gcc/testsuite/ChangeLog:

* gcc.c-torture/compile/pr110386-1.c: New test.
* gcc.c-torture/compile/pr110386-2.c: New test.

(cherry picked from commit 2bbac12ea7bd8a3eef5382e1b13f6019df4ec03f)

Diff:
---
 gcc/gimple-ssa-backprop.c|  1 -
 gcc/testsuite/gcc.c-torture/compile/pr110386-1.c |  9 +
 gcc/testsuite/gcc.c-torture/compile/pr110386-2.c | 11 +++
 3 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/gcc/gimple-ssa-backprop.c b/gcc/gimple-ssa-backprop.c
index 4b62bb92a21d..8c0a37e6e97d 100644
--- a/gcc/gimple-ssa-backprop.c
+++ b/gcc/gimple-ssa-backprop.c
@@ -688,7 +688,6 @@ strip_sign_op_1 (tree rhs)
 switch (gimple_assign_rhs_code (assign))
   {
   case ABS_EXPR:
-  case ABSU_EXPR:
   case NEGATE_EXPR:
return gimple_assign_rhs1 (assign);
 
diff --git a/gcc/testsuite/gcc.c-torture/compile/pr110386-1.c 
b/gcc/testsuite/gcc.c-torture/compile/pr110386-1.c
new file mode 100644
index ..4fcc977ad16f
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/compile/pr110386-1.c
@@ -0,0 +1,9 @@
+
+int f(int a)
+{
+int c = c < 0 ? c : -c;
+c = -c;
+unsigned b =  c;
+unsigned t = b*a;
+return t*t;
+}
diff --git a/gcc/testsuite/gcc.c-torture/compile/pr110386-2.c 
b/gcc/testsuite/gcc.c-torture/compile/pr110386-2.c
new file mode 100644
index ..c60e1b6994b7
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/compile/pr110386-2.c
@@ -0,0 +1,11 @@
+/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
+/* { dg-options "-mavx" } */
+
+#include 
+
+__m128i do_stuff(__m128i XMM0) {
+   __m128i ABS0 = _mm_abs_epi32(XMM0);
+   __m128i MUL0 = _mm_mullo_epi32(ABS0, XMM0);
+   __m128i MUL1 = _mm_mullo_epi32(MUL0, MUL0);
+   return MUL1;
+}


[gcc r15-501] tree-cfg: Move the returns_twice check to be last statement only [PR114301]

2024-05-15 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:642f31d6286b8a342130fbface51530befd975fd

commit r15-501-g642f31d6286b8a342130fbface51530befd975fd
Author: Andrew Pinski 
Date:   Tue May 14 06:29:18 2024 -0700

tree-cfg: Move the returns_twice check to be last statement only [PR114301]

When I was checking to making sure that all of the bugs dealing
with the case where gimple_can_duplicate_bb_p would return false was fixed,
I noticed that the code which was checking if a call statement was
returns_twice was checking all call statements rather than just the
last statement. Since calling gimple_call_flags has a small non-zero
overhead due to a few string comparison, removing the uses of it
can have a small performance improvement. In the case of returns_twice
functions calls, will always end the basic-block due to the check in
stmt_can_terminate_bb_p (and others). So checking only the last statement
is a small optimization and will be safe.

Bootstrapped and tested pon x86_64-linux-gnu with no regressions.

PR tree-optimization/114301
gcc/ChangeLog:

* tree-cfg.cc (gimple_can_duplicate_bb_p): Check returns_twice
only on the last call statement rather than all.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/tree-cfg.cc | 14 +-
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/gcc/tree-cfg.cc b/gcc/tree-cfg.cc
index b2d47b720847..7fb7b92966be 100644
--- a/gcc/tree-cfg.cc
+++ b/gcc/tree-cfg.cc
@@ -6495,6 +6495,13 @@ gimple_can_duplicate_bb_p (const_basic_block bb)
&& gimple_call_internal_p (last)
&& gimple_call_internal_unique_p (last))
   return false;
+
+/* Prohibit duplication of returns_twice calls, otherwise associated
+   abnormal edges also need to be duplicated properly.
+   return_twice functions will always be the last statement.  */
+if (is_gimple_call (last)
+   && (gimple_call_flags (last) & ECF_RETURNS_TWICE))
+  return false;
   }
 
   for (gimple_stmt_iterator gsi = gsi_start_bb (CONST_CAST_BB (bb));
@@ -6502,15 +6509,12 @@ gimple_can_duplicate_bb_p (const_basic_block bb)
 {
   gimple *g = gsi_stmt (gsi);
 
-  /* Prohibit duplication of returns_twice calls, otherwise associated
-abnormal edges also need to be duplicated properly.
-An IFN_GOMP_SIMT_ENTER_ALLOC/IFN_GOMP_SIMT_EXIT call must be
+  /* An IFN_GOMP_SIMT_ENTER_ALLOC/IFN_GOMP_SIMT_EXIT call must be
 duplicated as part of its group, or not at all.
 The IFN_GOMP_SIMT_VOTE_ANY and IFN_GOMP_SIMT_XCHG_* are part of such a
 group, so the same holds there.  */
   if (is_gimple_call (g)
- && (gimple_call_flags (g) & ECF_RETURNS_TWICE
- || gimple_call_internal_p (g, IFN_GOMP_SIMT_ENTER_ALLOC)
+ && (gimple_call_internal_p (g, IFN_GOMP_SIMT_ENTER_ALLOC)
  || gimple_call_internal_p (g, IFN_GOMP_SIMT_EXIT)
  || gimple_call_internal_p (g, IFN_GOMP_SIMT_VOTE_ANY)
  || gimple_call_internal_p (g, IFN_GOMP_SIMT_XCHG_BFLY)


[gcc r15-697] aarch64: Fold vget_low_* intrinsics to BIT_FIELD_REF [PR102171]

2024-05-20 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:a2e4fe5a53cf75cd055f64e745ebd51253e42254

commit r15-697-ga2e4fe5a53cf75cd055f64e745ebd51253e42254
Author: Pengxuan Zheng 
Date:   Mon May 13 10:47:10 2024 -0700

aarch64: Fold vget_low_* intrinsics to BIT_FIELD_REF [PR102171]

This patch folds vget_low_* intrinsics to BIT_FILED_REF to open up more
optimization opportunities for gimple optimizers.

While we are here, we also remove the vget_low_* definitions from 
arm_neon.h and
use the new intrinsics framework.

PR target/102171

gcc/ChangeLog:

* config/aarch64/aarch64-builtins.cc 
(AARCH64_SIMD_VGET_LOW_BUILTINS):
New macro to create definitions for all vget_low intrinsics.
(VGET_LOW_BUILTIN): Likewise.
(enum aarch64_builtins): Add vget_low function codes.
(aarch64_general_fold_builtin): Fold vget_low calls.
* config/aarch64/aarch64-simd-builtins.def: Delete vget_low 
builtins.
* config/aarch64/aarch64-simd.md (aarch64_get_low): Delete.
(aarch64_vget_lo_halfv8bf): Likewise.
* config/aarch64/arm_neon.h (__attribute__): Delete.
(vget_low_f16): Likewise.
(vget_low_f32): Likewise.
(vget_low_f64): Likewise.
(vget_low_p8): Likewise.
(vget_low_p16): Likewise.
(vget_low_p64): Likewise.
(vget_low_s8): Likewise.
(vget_low_s16): Likewise.
(vget_low_s32): Likewise.
(vget_low_s64): Likewise.
(vget_low_u8): Likewise.
(vget_low_u16): Likewise.
(vget_low_u32): Likewise.
(vget_low_u64): Likewise.
(vget_low_bf16): Likewise.

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/pr113573.c: Replace 
__builtin_aarch64_get_lowv8hi
with vget_low_s16.
* gcc.target/aarch64/vget_low_2.c: New test.
* gcc.target/aarch64/vget_low_2_be.c: New test.

Signed-off-by: Pengxuan Zheng 

Diff:
---
 gcc/config/aarch64/aarch64-builtins.cc   |  60 +
 gcc/config/aarch64/aarch64-simd-builtins.def |   5 +-
 gcc/config/aarch64/aarch64-simd.md   |  23 +
 gcc/config/aarch64/arm_neon.h| 105 ---
 gcc/testsuite/gcc.target/aarch64/pr113573.c  |   2 +-
 gcc/testsuite/gcc.target/aarch64/vget_low_2.c|  30 +++
 gcc/testsuite/gcc.target/aarch64/vget_low_2_be.c |  31 +++
 7 files changed, 124 insertions(+), 132 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-builtins.cc 
b/gcc/config/aarch64/aarch64-builtins.cc
index 75d21de14011..11b888016ed7 100644
--- a/gcc/config/aarch64/aarch64-builtins.cc
+++ b/gcc/config/aarch64/aarch64-builtins.cc
@@ -658,6 +658,23 @@ static aarch64_simd_builtin_datum 
aarch64_simd_builtin_data[] = {
   VREINTERPRET_BUILTINS \
   VREINTERPRETQ_BUILTINS
 
+#define AARCH64_SIMD_VGET_LOW_BUILTINS \
+  VGET_LOW_BUILTIN(f16) \
+  VGET_LOW_BUILTIN(f32) \
+  VGET_LOW_BUILTIN(f64) \
+  VGET_LOW_BUILTIN(p8) \
+  VGET_LOW_BUILTIN(p16) \
+  VGET_LOW_BUILTIN(p64) \
+  VGET_LOW_BUILTIN(s8) \
+  VGET_LOW_BUILTIN(s16) \
+  VGET_LOW_BUILTIN(s32) \
+  VGET_LOW_BUILTIN(s64) \
+  VGET_LOW_BUILTIN(u8) \
+  VGET_LOW_BUILTIN(u16) \
+  VGET_LOW_BUILTIN(u32) \
+  VGET_LOW_BUILTIN(u64) \
+  VGET_LOW_BUILTIN(bf16)
+
 typedef struct
 {
   const char *name;
@@ -697,6 +714,9 @@ typedef struct
 #define VREINTERPRET_BUILTIN(A, B, L) \
   AARCH64_SIMD_BUILTIN_VREINTERPRET##L##_##A##_##B,
 
+#define VGET_LOW_BUILTIN(A) \
+  AARCH64_SIMD_BUILTIN_VGET_LOW_##A,
+
 #undef VAR1
 #define VAR1(T, N, MAP, FLAG, A) \
   AARCH64_SIMD_BUILTIN_##T##_##N##A,
@@ -732,6 +752,7 @@ enum aarch64_builtins
   AARCH64_CRC32_BUILTIN_MAX,
   /* SIMD intrinsic builtins.  */
   AARCH64_SIMD_VREINTERPRET_BUILTINS
+  AARCH64_SIMD_VGET_LOW_BUILTINS
   /* ARMv8.3-A Pointer Authentication Builtins.  */
   AARCH64_PAUTH_BUILTIN_AUTIA1716,
   AARCH64_PAUTH_BUILTIN_PACIA1716,
@@ -823,8 +844,37 @@ static aarch64_fcmla_laneq_builtin_datum 
aarch64_fcmla_lane_builtin_data[] = {
  && SIMD_INTR_QUAL(A) == SIMD_INTR_QUAL(B) \
   },
 
+#undef VGET_LOW_BUILTIN
+#define VGET_LOW_BUILTIN(A) \
+  {"vget_low_" #A, \
+   AARCH64_SIMD_BUILTIN_VGET_LOW_##A, \
+   2, \
+   { SIMD_INTR_MODE(A, d), SIMD_INTR_MODE(A, q) }, \
+   { SIMD_INTR_QUAL(A), SIMD_INTR_QUAL(A) }, \
+   FLAG_AUTO_FP, \
+   false \
+  },
+
+#define AARCH64_SIMD_VGET_LOW_BUILTINS \
+  VGET_LOW_BUILTIN(f16) \
+  VGET_LOW_BUILTIN(f32) \
+  VGET_LOW_BUILTIN(f64) \
+  VGET_LOW_BUILTIN(p8) \
+  VGET_LOW_BUILTIN(p16) \
+  VGET_LOW_BUILTIN(p64) \
+  VGET_LOW_BUILTIN(s8) \
+  VGET_LOW_BUILTIN(s16) \
+  VGET_LOW_BUILTIN(s32) \
+  VGET_LOW_BUILTIN(s64) \
+  VGET_LOW_BUILTIN(u8) \
+  VGET_LOW_BUILTIN(u16) \
+  VGET_LOW_BUILTIN(u32) \
+  VGET_LOW_BUILTIN(u64) \
+  VGET_LOW_BUILTIN(bf16)
+
 static const aarch64_simd_intrinsic_datum aarch64_simd_intrinsic_data[] = {
   AARCH64_SIMD_VREI

[gcc r15-699] PHIOPT: Don't transform minmax if middle bb contains a phi [PR115143]

2024-05-20 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:9ff8f041331ef8b56007fb3c4d41d76f9850010d

commit r15-699-g9ff8f041331ef8b56007fb3c4d41d76f9850010d
Author: Andrew Pinski 
Date:   Sat May 18 11:55:58 2024 -0700

PHIOPT: Don't transform minmax if middle bb contains a phi [PR115143]

The problem here is even if last_and_only_stmt returns a statement,
the bb might still contain a phi node which defines a ssa name
which is used in that statement so we need to add a check to make sure
that the phi nodes are empty for the middle bbs in both the
`CMP?MINMAX:MINMAX` case and the `CMP?MINMAX:B` cases.

Bootstrapped and tested on x86_64_linux-gnu with no regressions.

PR tree-optimization/115143

gcc/ChangeLog:

* tree-ssa-phiopt.cc (minmax_replacement): Check for empty
phi nodes for middle bbs for the case where middle bb is not empty.

gcc/testsuite/ChangeLog:

* gcc.c-torture/compile/pr115143-1.c: New test.
* gcc.c-torture/compile/pr115143-2.c: New test.
* gcc.c-torture/compile/pr115143-3.c: New test.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/testsuite/gcc.c-torture/compile/pr115143-1.c | 21 +
 gcc/testsuite/gcc.c-torture/compile/pr115143-2.c | 30 
 gcc/testsuite/gcc.c-torture/compile/pr115143-3.c | 29 +++
 gcc/tree-ssa-phiopt.cc   | 12 ++
 4 files changed, 92 insertions(+)

diff --git a/gcc/testsuite/gcc.c-torture/compile/pr115143-1.c 
b/gcc/testsuite/gcc.c-torture/compile/pr115143-1.c
new file mode 100644
index ..5cb119ea4325
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/compile/pr115143-1.c
@@ -0,0 +1,21 @@
+/* PR tree-optimization/115143 */
+/* This used to ICE.
+   minmax part of phiopt would transform,
+   would transform `a!=0?min(a, b) : 0` into `min(a,b)`
+   which was correct except b was defined by a phi in the inner
+   bb which was not handled. */
+short a, d;
+char b;
+long c;
+unsigned long e, f;
+void g(unsigned long h) {
+  if (c ? e : b)
+if (e)
+  if (d) {
+a = f ? ({
+  unsigned long i = d ? f : 0, j = e ? h : 0;
+  i < j ? i : j;
+}) : 0;
+  }
+}
+
diff --git a/gcc/testsuite/gcc.c-torture/compile/pr115143-2.c 
b/gcc/testsuite/gcc.c-torture/compile/pr115143-2.c
new file mode 100644
index ..05c3bbe9738e
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/compile/pr115143-2.c
@@ -0,0 +1,30 @@
+/* { dg-options "-fgimple" } */
+/* PR tree-optimization/115143 */
+/* This used to ICE.
+   minmax part of phiopt would transform,
+   would transform `a!=0?min(a, b) : 0` into `min(a,b)`
+   which was correct except b was defined by a phi in the inner
+   bb which was not handled. */
+unsigned __GIMPLE (ssa,startwith("phiopt"))
+foo (unsigned a, unsigned b)
+{
+  unsigned j;
+  unsigned _23;
+  unsigned _12;
+
+  __BB(2):
+  if (a_6(D) != 0u)
+goto __BB3;
+  else
+goto __BB4;
+
+  __BB(3):
+  j_10 = __PHI (__BB2: b_11(D));
+  _23 = __MIN (a_6(D), j_10);
+  goto __BB4;
+
+  __BB(4):
+  _12 = __PHI (__BB3: _23, __BB2: 0u);
+  return _12;
+
+}
diff --git a/gcc/testsuite/gcc.c-torture/compile/pr115143-3.c 
b/gcc/testsuite/gcc.c-torture/compile/pr115143-3.c
new file mode 100644
index ..53c5fb5588e9
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/compile/pr115143-3.c
@@ -0,0 +1,29 @@
+/* { dg-options "-fgimple" } */
+/* PR tree-optimization/115143 */
+/* This used to ICE.
+   minmax part of phiopt would transform,
+   would transform `a!=0?min(a, b) : 0` into `min(a,b)`
+   which was correct except b was defined by a phi in the inner
+   bb which was not handled. */
+unsigned __GIMPLE (ssa,startwith("phiopt"))
+foo (unsigned a, unsigned b)
+{
+  unsigned j;
+  unsigned _23;
+  unsigned _12;
+
+  __BB(2):
+  if (a_6(D) > 0u)
+goto __BB3;
+  else
+goto __BB4;
+
+  __BB(3):
+  j_10 = __PHI (__BB2: b_7(D));
+  _23 = __MIN (a_6(D), j_10);
+  goto __BB4;
+
+  __BB(4):
+  _12 = __PHI (__BB3: _23, __BB2: 0u);
+  return _12;
+}
diff --git a/gcc/tree-ssa-phiopt.cc b/gcc/tree-ssa-phiopt.cc
index f166c3132cb7..918cf50b5898 100644
--- a/gcc/tree-ssa-phiopt.cc
+++ b/gcc/tree-ssa-phiopt.cc
@@ -1925,6 +1925,10 @@ minmax_replacement (basic_block cond_bb, basic_block 
middle_bb, basic_block alt_
  || gimple_code (assign) != GIMPLE_ASSIGN)
return false;
 
+  /* There cannot be any phi nodes in the middle bb. */
+  if (!gimple_seq_empty_p (phi_nodes (middle_bb)))
+   return false;
+
   lhs = gimple_assign_lhs (assign);
   ass_code = gimple_assign_rhs_code (assign);
   if (ass_code != MAX_EXPR && ass_code != MIN_EXPR)
@@ -1938,6 +1942,10 @@ minmax_replacement (basic_block cond_bb, basic_block 
middle_bb, basic_block alt_
  || gimple_code (assign) != GIMPLE_ASSIGN)
return false;
 
+  /* There cannot be any phi nodes in the alt middle bb. */
+  if (!gimple_seq_empty_p 

[gcc r14-10222] PHIOPT: Don't transform minmax if middle bb contains a phi [PR115143]

2024-05-20 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:89ab128656b9da1359705bd770ae7d2367b33ec2

commit r14-10222-g89ab128656b9da1359705bd770ae7d2367b33ec2
Author: Andrew Pinski 
Date:   Sat May 18 11:55:58 2024 -0700

PHIOPT: Don't transform minmax if middle bb contains a phi [PR115143]

The problem here is even if last_and_only_stmt returns a statement,
the bb might still contain a phi node which defines a ssa name
which is used in that statement so we need to add a check to make sure
that the phi nodes are empty for the middle bbs in both the
`CMP?MINMAX:MINMAX` case and the `CMP?MINMAX:B` cases.

Bootstrapped and tested on x86_64_linux-gnu with no regressions.

PR tree-optimization/115143

gcc/ChangeLog:

* tree-ssa-phiopt.cc (minmax_replacement): Check for empty
phi nodes for middle bbs for the case where middle bb is not empty.

gcc/testsuite/ChangeLog:

* gcc.c-torture/compile/pr115143-1.c: New test.
* gcc.c-torture/compile/pr115143-2.c: New test.
* gcc.c-torture/compile/pr115143-3.c: New test.

Signed-off-by: Andrew Pinski 
(cherry picked from commit 9ff8f041331ef8b56007fb3c4d41d76f9850010d)

Diff:
---
 gcc/testsuite/gcc.c-torture/compile/pr115143-1.c | 21 +
 gcc/testsuite/gcc.c-torture/compile/pr115143-2.c | 30 
 gcc/testsuite/gcc.c-torture/compile/pr115143-3.c | 29 +++
 gcc/tree-ssa-phiopt.cc   | 12 ++
 4 files changed, 92 insertions(+)

diff --git a/gcc/testsuite/gcc.c-torture/compile/pr115143-1.c 
b/gcc/testsuite/gcc.c-torture/compile/pr115143-1.c
new file mode 100644
index ..5cb119ea4325
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/compile/pr115143-1.c
@@ -0,0 +1,21 @@
+/* PR tree-optimization/115143 */
+/* This used to ICE.
+   minmax part of phiopt would transform,
+   would transform `a!=0?min(a, b) : 0` into `min(a,b)`
+   which was correct except b was defined by a phi in the inner
+   bb which was not handled. */
+short a, d;
+char b;
+long c;
+unsigned long e, f;
+void g(unsigned long h) {
+  if (c ? e : b)
+if (e)
+  if (d) {
+a = f ? ({
+  unsigned long i = d ? f : 0, j = e ? h : 0;
+  i < j ? i : j;
+}) : 0;
+  }
+}
+
diff --git a/gcc/testsuite/gcc.c-torture/compile/pr115143-2.c 
b/gcc/testsuite/gcc.c-torture/compile/pr115143-2.c
new file mode 100644
index ..05c3bbe9738e
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/compile/pr115143-2.c
@@ -0,0 +1,30 @@
+/* { dg-options "-fgimple" } */
+/* PR tree-optimization/115143 */
+/* This used to ICE.
+   minmax part of phiopt would transform,
+   would transform `a!=0?min(a, b) : 0` into `min(a,b)`
+   which was correct except b was defined by a phi in the inner
+   bb which was not handled. */
+unsigned __GIMPLE (ssa,startwith("phiopt"))
+foo (unsigned a, unsigned b)
+{
+  unsigned j;
+  unsigned _23;
+  unsigned _12;
+
+  __BB(2):
+  if (a_6(D) != 0u)
+goto __BB3;
+  else
+goto __BB4;
+
+  __BB(3):
+  j_10 = __PHI (__BB2: b_11(D));
+  _23 = __MIN (a_6(D), j_10);
+  goto __BB4;
+
+  __BB(4):
+  _12 = __PHI (__BB3: _23, __BB2: 0u);
+  return _12;
+
+}
diff --git a/gcc/testsuite/gcc.c-torture/compile/pr115143-3.c 
b/gcc/testsuite/gcc.c-torture/compile/pr115143-3.c
new file mode 100644
index ..53c5fb5588e9
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/compile/pr115143-3.c
@@ -0,0 +1,29 @@
+/* { dg-options "-fgimple" } */
+/* PR tree-optimization/115143 */
+/* This used to ICE.
+   minmax part of phiopt would transform,
+   would transform `a!=0?min(a, b) : 0` into `min(a,b)`
+   which was correct except b was defined by a phi in the inner
+   bb which was not handled. */
+unsigned __GIMPLE (ssa,startwith("phiopt"))
+foo (unsigned a, unsigned b)
+{
+  unsigned j;
+  unsigned _23;
+  unsigned _12;
+
+  __BB(2):
+  if (a_6(D) > 0u)
+goto __BB3;
+  else
+goto __BB4;
+
+  __BB(3):
+  j_10 = __PHI (__BB2: b_7(D));
+  _23 = __MIN (a_6(D), j_10);
+  goto __BB4;
+
+  __BB(4):
+  _12 = __PHI (__BB3: _23, __BB2: 0u);
+  return _12;
+}
diff --git a/gcc/tree-ssa-phiopt.cc b/gcc/tree-ssa-phiopt.cc
index d1746c4b468a..150e58e39e3f 100644
--- a/gcc/tree-ssa-phiopt.cc
+++ b/gcc/tree-ssa-phiopt.cc
@@ -1918,6 +1918,10 @@ minmax_replacement (basic_block cond_bb, basic_block 
middle_bb, basic_block alt_
  || gimple_code (assign) != GIMPLE_ASSIGN)
return false;
 
+  /* There cannot be any phi nodes in the middle bb. */
+  if (!gimple_seq_empty_p (phi_nodes (middle_bb)))
+   return false;
+
   lhs = gimple_assign_lhs (assign);
   ass_code = gimple_assign_rhs_code (assign);
   if (ass_code != MAX_EXPR && ass_code != MIN_EXPR)
@@ -1931,6 +1935,10 @@ minmax_replacement (basic_block cond_bb, basic_block 
middle_bb, basic_block alt_
  || gimple_code (assign) != GIMPLE_ASSIGN)
return false;
 
+  /* There canno

  1   2   >