date:20240423

Re: [PATCH v1] RISC-V: Add xfail test case for highpart overlap of vext.vf

2024-04-23 Thread juzhe.zh...@rivai.ai

LGTM.



juzhe.zh...@rivai.ai
 
From: pan2.li
Date: 2024-04-24 10:48
To: gcc-patches
CC: juzhe.zhong; kito.cheng; rdapp.gcc; Pan Li
Subject: [PATCH v1] RISC-V: Add xfail test case for highpart overlap of vext.vf
From: Pan Li 
 
We reverted below patch for register group overlap, add the related
insn test and mark it as xfail.  And we will remove the xfail
after we support the register overlap in GCC-15.
 
62685890d88 RISC-V: Support highpart overlap for vext.vf
 
The below test suites are passed for this patch
* The rv64gcv fully regression test with isl build.
 
gcc/testsuite/ChangeLog:
 
* gcc.target/riscv/rvv/base/unop_v_constraint-2.c: Adjust asm
check cond.
* gcc.target/riscv/rvv/base/pr112431-4.c: New test.
* gcc.target/riscv/rvv/base/pr112431-5.c: New test.
* gcc.target/riscv/rvv/base/pr112431-6.c: New test.
 
Signed-off-by: Pan Li 
---
.../gcc.target/riscv/rvv/base/pr112431-4.c| 104 ++
.../gcc.target/riscv/rvv/base/pr112431-5.c|  68 
.../gcc.target/riscv/rvv/base/pr112431-6.c|  51 +
.../riscv/rvv/base/unop_v_constraint-2.c  |   2 +-
4 files changed, 224 insertions(+), 1 deletion(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-4.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-5.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-6.c
 
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-4.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-4.c
new file mode 100644
index 000..cecf796e10c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-4.c
@@ -0,0 +1,104 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include "riscv_vector.h"
+
+size_t __attribute__ ((noinline))
+sumation (size_t sum0, size_t sum1, size_t sum2, size_t sum3, size_t sum4,
+   size_t sum5, size_t sum6, size_t sum7, size_t sum8, size_t sum9,
+   size_t sum10, size_t sum11, size_t sum12, size_t sum13, size_t sum14,
+   size_t sum15)
+{
+  return sum0 + sum1 + sum2 + sum3 + sum4 + sum5 + sum6 + sum7 + sum8 + sum9
+ + sum10 + sum11 + sum12 + sum13 + sum14 + sum15;
+}
+
+size_t
+foo (char const *buf, size_t len)
+{
+  size_t sum = 0;
+  size_t vl = __riscv_vsetvlmax_e8m8 ();
+  size_t step = vl * 4;
+  const char *it = buf, *end = buf + len;
+  for (; it + step <= end;)
+{
+  vint8m1_t v0 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v1 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v2 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v3 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v4 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v5 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v6 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v7 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v8 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v9 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v10 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v11 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v12 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v13 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v14 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v15 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  
+  asm volatile("nop" ::: "memory");
+  vint16m2_t vw0 = __riscv_vsext_vf2_i16m2 (v0, vl);
+  vint16m2_t vw1 = __riscv_vsext_vf2_i16m2 (v1, vl);
+  vint16m2_t vw2 = __riscv_vsext_vf2_i16m2 (v2, vl);
+  vint16m2_t vw3 = __riscv_vsext_vf2_i16m2 (v3, vl);
+  vint16m2_t vw4 = __riscv_vsext_vf2_i16m2 (v4, vl);
+  vint16m2_t vw5 = __riscv_vsext_vf2_i16m2 (v5, vl);
+  vint16m2_t vw6 = __riscv_vsext_vf2_i16m2 (v6, vl);
+  vint16m2_t vw7 = __riscv_vsext_vf2_i16m2 (v7, vl);
+  vint16m2_t vw8 = __riscv_vsext_vf2_i16m2 (v8, vl);
+  vint16m2_t vw9 = __riscv_vsext_vf2_i16m2 (v9, vl);
+  vint16m2_t vw10 = __riscv_vsext_vf2_i16m2 (v10, vl);
+  vint16m2_t vw11 = __riscv_vsext_vf2_i16m2 (v11, vl);
+  vint16m2_t vw12 = __riscv_vsext_vf2_i16m2 (v12, vl);
+  vint16m2_t vw13 = __riscv_vsext_vf2_i16m2 (v13, vl);
+  vint16m2_t vw14 = __riscv_vsext_vf2_i16m2 (v14, vl);
+  vint16m2_t vw15 = __riscv_vsext_vf2_i16m2 (v15, vl);
+
+  asm volatile("nop" ::: "memory");
+  size_t sum0 = __riscv_vmv_x_s_i16m2_i16 (vw0);
+  size_t sum1 = __riscv_vmv_x_s_i16m2_i16 (vw1);
+  size_t sum2 = __riscv_vmv_x_s_i16m2_i16 (vw2);
+  size_t sum3 = __riscv_vmv_x_s_i16m2_i16 (vw3);
+  size_t sum4 = __riscv_vmv_x_s_i16m2_i16 (vw4);
+  size_t sum5 = __riscv_vmv_x_s_i16m2_i16 (vw5);
+  size_t su

Re: [Patch, fortran] PR89462 - [11/12/13/14 Regression] gfortran loops in code generation

2024-04-23 Thread Paul Richard Thomas

PS ignore the chunk in trans-array.cc. It is an attempt to fix PR93678 that
literally did nothing.

Paul

On Wed, 24 Apr 2024 at 07:05, Paul Richard Thomas <
paul.richard.tho...@gmail.com> wrote:

> Hi,
>
> The linaro pre-commit error testing picked up errors for arm and aarch
> since they set the option -pedantic-errors.
> /home/tcwg-build/workspace/tcwg_gnu_4/abe/snapshots/gcc.git~master/gcc/testsuite/gfortran.dg/pr89462.f90:6:14:
> Warning: Obsolescent feature: Old-style character length at (1)
> /home/tcwg-build/workspace/tcwg_gnu_4/abe/snapshots/gcc.git~master/gcc/testsuite/gfortran.dg/pr89462.f90:7:17:
> Warning: Obsolescent feature: Old-style character length at (1)
>
> I have added the option to the testcase together with the corresponding
> warnings as in the attached.
>
> I will wait for 24 hours more.
>
> Paul
>
> On Tue, 23 Apr 2024 at 16:25, Paul Richard Thomas <
> paul.richard.tho...@gmail.com> wrote:
>
>> Hi All,
>>
>> Jakub pinpointed the source of this bug in comment 6 of the PR. The rest
>> was 'obvious' :-)
>>
>> I plan to push the patch to mainline in the next 24 hours unless there
>> are opinions to the contrary. Backporting is proposed to occur a couple of
>> weeks later.
>>
>> Best regards
>>
>> Paul
>>
>> Fortran: Generate new charlens for shared symbol typespecs [PR89462]
>>
>> 2024-04-23  Paul Thomas  
>>Jakub Jelinek  
>>
>> gcc/fortran
>> PR fortran/89462
>> * decl.cc (build_sym): Add an extra argument 'elem'. If 'elem'
>> is greater than 1, gfc_new_charlen is called to generate a new
>> charlen, registered in the symbol namespace.
>> (variable_decl, enumerator_decl): Set the new argument in the
>> calls to build_sym.
>>
>> gcc/testsuite/
>> PR fortran/89462
>> * gfortran.dg/pr89462.f90: New test.
>>
>>

Re: [Patch, fortran] PR89462 - [11/12/13/14 Regression] gfortran loops in code generation

2024-04-23 Thread Paul Richard Thomas

Hi,

The linaro pre-commit error testing picked up errors for arm and aarch
since they set the option -pedantic-errors.
/home/tcwg-build/workspace/tcwg_gnu_4/abe/snapshots/gcc.git~master/gcc/testsuite/gfortran.dg/pr89462.f90:6:14:
Warning: Obsolescent feature: Old-style character length at (1)
/home/tcwg-build/workspace/tcwg_gnu_4/abe/snapshots/gcc.git~master/gcc/testsuite/gfortran.dg/pr89462.f90:7:17:
Warning: Obsolescent feature: Old-style character length at (1)

I have added the option to the testcase together with the corresponding
warnings as in the attached.

I will wait for 24 hours more.

Paul

On Tue, 23 Apr 2024 at 16:25, Paul Richard Thomas <
paul.richard.tho...@gmail.com> wrote:

> Hi All,
>
> Jakub pinpointed the source of this bug in comment 6 of the PR. The rest
> was 'obvious' :-)
>
> I plan to push the patch to mainline in the next 24 hours unless there are
> opinions to the contrary. Backporting is proposed to occur a couple of
> weeks later.
>
> Best regards
>
> Paul
>
> Fortran: Generate new charlens for shared symbol typespecs [PR89462]
>
> 2024-04-23  Paul Thomas  
>Jakub Jelinek  
>
> gcc/fortran
> PR fortran/89462
> * decl.cc (build_sym): Add an extra argument 'elem'. If 'elem'
> is greater than 1, gfc_new_charlen is called to generate a new
> charlen, registered in the symbol namespace.
> (variable_decl, enumerator_decl): Set the new argument in the
> calls to build_sym.
>
> gcc/testsuite/
> PR fortran/89462
> * gfortran.dg/pr89462.f90: New test.
>
>
diff --git a/gcc/fortran/decl.cc b/gcc/fortran/decl.cc
index a7576f4bc40..b8308aeee55 100644
--- a/gcc/fortran/decl.cc
+++ b/gcc/fortran/decl.cc
@@ -1713,7 +1713,7 @@ gfc_verify_c_interop_param (gfc_symbol *sym)
 /* Function called by variable_decl() that adds a name to the symbol table.  */
 
 static bool
-build_sym (const char *name, gfc_charlen *cl, bool cl_deferred,
+build_sym (const char *name, int elem, gfc_charlen *cl, bool cl_deferred,
 	   gfc_array_spec **as, locus *var_locus)
 {
   symbol_attribute attr;
@@ -1778,7 +1778,10 @@ build_sym (const char *name, gfc_charlen *cl, bool cl_deferred,
 
   if (sym->ts.type == BT_CHARACTER)
 {
-  sym->ts.u.cl = cl;
+  if (elem > 1)
+	sym->ts.u.cl = gfc_new_charlen (sym->ns, cl);
+  else
+	sym->ts.u.cl = cl;
   sym->ts.deferred = cl_deferred;
 }
 
@@ -2960,7 +2963,7 @@ variable_decl (int elem)
  create a symbol for those yet.  If we fail to create the symbol,
  bail out.  */
   if (!gfc_comp_struct (gfc_current_state ())
-  && !build_sym (name, cl, cl_deferred, &as, &var_locus))
+  && !build_sym (name, elem, cl, cl_deferred, &as, &var_locus))
 {
   m = MATCH_ERROR;
   goto cleanup;
@@ -10938,7 +10941,7 @@ enumerator_decl (void)
   /* OK, we've successfully matched the declaration.  Now put the
  symbol in the current namespace. If we fail to create the symbol,
  bail out.  */
-  if (!build_sym (name, NULL, false, &as, &var_locus))
+  if (!build_sym (name, 1, NULL, false, &as, &var_locus))
 {
   m = MATCH_ERROR;
   goto cleanup;
diff --git a/gcc/fortran/trans-array.cc b/gcc/fortran/trans-array.cc
index 30b84762346..322ff552813 100644
--- a/gcc/fortran/trans-array.cc
+++ b/gcc/fortran/trans-array.cc
@@ -11756,8 +11756,16 @@ gfc_walk_variable_expr (gfc_ss * ss, gfc_expr * expr)
   gfc_fix_class_refs (expr);
 
   for (ref = expr->ref; ref; ref = ref->next)
-if (ref->type == REF_ARRAY && ref->u.ar.type != AR_ELEMENT)
-  break;
+{
+  if (ref->type == REF_COMPONENT
+	  && ref->u.c.component->attr.function)
+	{
+	  ref = NULL;
+	  break;
+	}
+  if (ref->type == REF_ARRAY && ref->u.ar.type != AR_ELEMENT)
+  break;
+}
 
   return gfc_walk_array_ref (ss, expr, ref);
 }
diff --git a/gcc/testsuite/gfortran.dg/pr89462.f90 b/gcc/testsuite/gfortran.dg/pr89462.f90
new file mode 100644
index 000..b2a4912fcc8
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/pr89462.f90
@@ -0,0 +1,13 @@
+! { dg-do compile }
+! { dg-options "-pedantic-errors" }
+! Test the fix for PR89462 in which the shared 'cl' field of the typespec
+! shared between 'test', 'TR' and 'aTP' caused the compiler to go into an
+! infinite loop.
+! Contributed by Sergei Trofimovich  
+  CHARACTER*1 FUNCTION test(H) ! { dg-warning "Old-style character length" }
+ CHARACTER*1 test2,TR,aTP  ! { dg-warning "Old-style character length" }
+ ENTRY test2(L)
+ CALL ttest3(aTP)
+ test = TR
+ RETURN
+  END

[PATCH] i386: Fix behavior for both using AVX10.1-256 in options and function attribute

2024-04-23 Thread Haochen Jiang

Hi all,

When we are using -mavx10.1-256 in command line and avx10.1-256 in
target attribute together, zmm should never be generated. But current
GCC will generate zmm since it wrongly enables EVEX512 for non-explicitly
set AVX512. This patch will fix that issue.

Regtested on x86_64-pc-linux-gnu. Ok for trunk?

gcc/ChangeLog:

* config/i386/i386-options.cc (ix86_valid_target_attribute_tree):
Check whether AVX512F is explicitly enabled.

gcc/testsuite/ChangeLog:

* gcc.target/i386/avx10_1-24.c: New test.
---
 gcc/config/i386/i386-options.cc| 1 +
 gcc/testsuite/gcc.target/i386/avx10_1-24.c | 7 +++
 2 files changed, 8 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-24.c

diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc
index 68a2e1c6910..ac48b5c61c4 100644
--- a/gcc/config/i386/i386-options.cc
+++ b/gcc/config/i386/i386-options.cc
@@ -1431,6 +1431,7 @@ ix86_valid_target_attribute_tree (tree fndecl, tree args,
  scenario.  */
   if ((def->x_ix86_isa_flags2 & OPTION_MASK_ISA2_AVX10_1_256)
   && (opts->x_ix86_isa_flags & OPTION_MASK_ISA_AVX512F)
+  && (opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F)
   && !(def->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA2_EVEX512)
   && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA2_EVEX512))
 opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_EVEX512;
diff --git a/gcc/testsuite/gcc.target/i386/avx10_1-24.c 
b/gcc/testsuite/gcc.target/i386/avx10_1-24.c
new file mode 100644
index 000..2e93f041760
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx10_1-24.c
@@ -0,0 +1,7 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64 -mavx10.1" } */
+/* { dg-final { scan-assembler-not "%zmm" } } */
+
+typedef float __m512 __attribute__ ((__vector_size__ (64), __may_alias__));
+
+void __attribute__((target("avx10.1-256"))) callee256(__m512 *a, __m512 *b) { 
*a = *b; }
-- 
2.31.1

[PATCH] tree-optimization/114832 - wrong dominator info with vect peeling

2024-04-23 Thread Richard Biener


When we update the dominator of the redirected exit after peeling
we check whether the immediate dominator was the loop header rather
than the exit source when we later want to just update it to the
new source.  The following fixes this oversight.

Bootstrap and regtest running on x86_64-unknown-linux-gnu.

Richard.

PR tree-optimization/114832
* tree-vect-loop-manip.cc (slpeel_tree_duplicate_loop_to_edge_cfg):
Fix dominance check.

* gcc.dg/vect/pr114832.c: New testcase.
---
 gcc/testsuite/gcc.dg/vect/pr114832.c | 13 +
 gcc/tree-vect-loop-manip.cc  |  2 +-
 2 files changed, 14 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr114832.c

diff --git a/gcc/testsuite/gcc.dg/vect/pr114832.c 
b/gcc/testsuite/gcc.dg/vect/pr114832.c
new file mode 100644
index 000..2de07ae22ef
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr114832.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-fno-tree-loop-if-convert 
-fno-tree-loop-distribute-patterns" } */
+
+int a, b, c, d[3];
+void e() {
+  int f, g = 0;
+  for (; g < 3; g++) {
+if (f || a || b && c) {
+  int h, *i = &h, **j = &i;
+}
+d[g] = 0;
+  }
+}
diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc
index 8d9b533d50f..43c7881c640 100644
--- a/gcc/tree-vect-loop-manip.cc
+++ b/gcc/tree-vect-loop-manip.cc
@@ -1523,7 +1523,7 @@ slpeel_tree_duplicate_loop_to_edge_cfg (class loop *loop, 
edge loop_exit,

   exit_dest = exit->dest;
   was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
- exit_dest) == loop->header ?
+ exit_dest) == exit->src ?
 true : false);

   /* Also copy the pre-header, this avoids jumping through hoops to
--
2.25.1

[PATCH v1] RISC-V: Add xfail test case for highpart overlap of vext.vf

2024-04-23 Thread pan2 . li

From: Pan Li 

We reverted below patch for register group overlap, add the related
insn test and mark it as xfail.  And we will remove the xfail
after we support the register overlap in GCC-15.

62685890d88 RISC-V: Support highpart overlap for vext.vf

The below test suites are passed for this patch
* The rv64gcv fully regression test with isl build.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/unop_v_constraint-2.c: Adjust asm
check cond.
* gcc.target/riscv/rvv/base/pr112431-4.c: New test.
* gcc.target/riscv/rvv/base/pr112431-5.c: New test.
* gcc.target/riscv/rvv/base/pr112431-6.c: New test.

Signed-off-by: Pan Li 
---
 .../gcc.target/riscv/rvv/base/pr112431-4.c| 104 ++
 .../gcc.target/riscv/rvv/base/pr112431-5.c|  68 
 .../gcc.target/riscv/rvv/base/pr112431-6.c|  51 +
 .../riscv/rvv/base/unop_v_constraint-2.c  |   2 +-
 4 files changed, 224 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-4.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-5.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-6.c

diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-4.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-4.c
new file mode 100644
index 000..cecf796e10c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-4.c
@@ -0,0 +1,104 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include "riscv_vector.h"
+
+size_t __attribute__ ((noinline))
+sumation (size_t sum0, size_t sum1, size_t sum2, size_t sum3, size_t sum4,
+ size_t sum5, size_t sum6, size_t sum7, size_t sum8, size_t sum9,
+ size_t sum10, size_t sum11, size_t sum12, size_t sum13, size_t sum14,
+ size_t sum15)
+{
+  return sum0 + sum1 + sum2 + sum3 + sum4 + sum5 + sum6 + sum7 + sum8 + sum9
++ sum10 + sum11 + sum12 + sum13 + sum14 + sum15;
+}
+
+size_t
+foo (char const *buf, size_t len)
+{
+  size_t sum = 0;
+  size_t vl = __riscv_vsetvlmax_e8m8 ();
+  size_t step = vl * 4;
+  const char *it = buf, *end = buf + len;
+  for (; it + step <= end;)
+{
+  vint8m1_t v0 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v1 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v2 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v3 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v4 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v5 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v6 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v7 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v8 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v9 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v10 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v11 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v12 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v13 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v14 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v15 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  
+  asm volatile("nop" ::: "memory");
+  vint16m2_t vw0 = __riscv_vsext_vf2_i16m2 (v0, vl);
+  vint16m2_t vw1 = __riscv_vsext_vf2_i16m2 (v1, vl);
+  vint16m2_t vw2 = __riscv_vsext_vf2_i16m2 (v2, vl);
+  vint16m2_t vw3 = __riscv_vsext_vf2_i16m2 (v3, vl);
+  vint16m2_t vw4 = __riscv_vsext_vf2_i16m2 (v4, vl);
+  vint16m2_t vw5 = __riscv_vsext_vf2_i16m2 (v5, vl);
+  vint16m2_t vw6 = __riscv_vsext_vf2_i16m2 (v6, vl);
+  vint16m2_t vw7 = __riscv_vsext_vf2_i16m2 (v7, vl);
+  vint16m2_t vw8 = __riscv_vsext_vf2_i16m2 (v8, vl);
+  vint16m2_t vw9 = __riscv_vsext_vf2_i16m2 (v9, vl);
+  vint16m2_t vw10 = __riscv_vsext_vf2_i16m2 (v10, vl);
+  vint16m2_t vw11 = __riscv_vsext_vf2_i16m2 (v11, vl);
+  vint16m2_t vw12 = __riscv_vsext_vf2_i16m2 (v12, vl);
+  vint16m2_t vw13 = __riscv_vsext_vf2_i16m2 (v13, vl);
+  vint16m2_t vw14 = __riscv_vsext_vf2_i16m2 (v14, vl);
+  vint16m2_t vw15 = __riscv_vsext_vf2_i16m2 (v15, vl);
+
+  asm volatile("nop" ::: "memory");
+  size_t sum0 = __riscv_vmv_x_s_i16m2_i16 (vw0);
+  size_t sum1 = __riscv_vmv_x_s_i16m2_i16 (vw1);
+  size_t sum2 = __riscv_vmv_x_s_i16m2_i16 (vw2);
+  size_t sum3 = __riscv_vmv_x_s_i16m2_i16 (vw3);
+  size_t sum4 = __riscv_vmv_x_s_i16m2_i16 (vw4);
+  size_t sum5 = __riscv_vmv_x_s_i16m2_i16 (vw5);
+  size_t sum6 = __riscv_vmv_x_s_i16m2_i16 (vw6);
+  size_t sum7 = __riscv_vmv_x_s_i16m2_i16 (vw7);
+  size_t sum8 = __riscv_vmv_x_s_i16m2_i16 (vw8);

Re: [PATCH v2] [testsuite] require sqrt_insn effective target where needed

2024-04-23 Thread Mike Stump

On Apr 22, 2024, at 2:56 AM, Alexandre Oliva  wrote:
> 
> This patch takes feedback received for 3 earlier patches, and adopts a
> simpler approach to skip the still-failing tests, that I believe to be
> in line with ppc maintainers' expressed preferences.
> https://gcc.gnu.org/pipermail/gcc-patches/2021-February/565939.html
> https://gcc.gnu.org/pipermail/gcc-patches/2021-March/566617.html
> https://gcc.gnu.org/pipermail/gcc-patches/2021-March/566521.html
> Ping?-ish :-)
> 
> 
> Some tests fail on ppc and ppc64 when testing a compiler [with options
> for] for a CPU [emulator] that doesn't support the sqrt insn.
> 
> The gcc.dg/cdce3.c is one in which the expected shrink-wrap
> optimization only takes place when the target CPU supports a sqrt
> insn.
> 
> The gcc.target/powerpc/pr46728-1[0-4].c tests use -mpowerpc-gpopt and
> call sqrt(), which involves the sqrt insn that the target CPU under
> test may not support.
> 
> Require a sqrt_insn effective target for all the affected tests.
> 
> Regstrapped on x86_64-linux-gnu and ppc64el-linux-gnu.  Also testing
> with gcc-13 on ppc64-vx7r2 and ppc-vx7r2.  Ok to install?

Ok.

Re: [PATCH] Value range: Add range op for __builtin_isfinite

2024-04-23 Thread HAO CHEN GUI

Yes, it's my typo.

Thanks.
Gui Haochen

在 2024/4/23 17:10, rep.dot@gmail.com 写道:
> On 12 April 2024 07:30:10 CEST, HAO CHEN GUI  wrote:
> 
> 
>>
>>
>> patch.diff
>> diff --git a/gcc/gimple-range-op.cc b/gcc/gimple-range-op.cc
>> index 9de130b4022..99c511728d3 100644
>> --- a/gcc/gimple-range-op.cc
>> +++ b/gcc/gimple-range-op.cc
>> @@ -1192,6 +1192,56 @@ public:
>>   }
>> } op_cfn_isinf;
>>
>> +//Implement range operator for CFN_BUILT_IN_ISFINITE
>> +class cnf_isfinite : public range_operator
>> +{
> 
> 
> s/cnf/cfn/g
> I guess.
> thanks

Re: [PATCH] DOCUMENTATION_ROOT_URL vs. release branches [PR114738]

2024-04-23 Thread David Malcolm

On Tue, 2024-04-23 at 17:45 +0200, Jakub Jelinek wrote:
> On Tue, Apr 23, 2024 at 11:40:55AM -0400, David Malcolm wrote:
> > > So, I think at least for the MAJOR.MINOR.0 releases we want to
> > > use
> > > URLs like above rather than the trunk ones and we can use the
> > > same
> > > process
> > > of updating *.opt.urls as well for that.
> > 
> > Would it make sense to instead update the default value in
> > gcc/configure.ac for DOCUMENTATION_ROOT_URL when branching or
> > releasing, from https://gcc.gnu.org/onlinedocs/ to
> > https://gcc.gnu.org/onlinedocs/gcc-MAJOR-MINOR.0/
> > 
> > ?
> > 
> > Before this patch the DOCUMENTATION_ROOT_URL expresses the location
> > of
> > a built texinfo html tree of docs, and the url suffixes express the
> > path within that tree.
> > 
> > As the patch is written, if a distributor overrides --with-
> > documentation-root-url= at configure time, then they need to mirror
> > the
> > structure of our website on their website, which seems like a
> > burden.
> 
> Sure, that is doable (of course, it shouldn't be done by updating
> gcc/configure.ac but by adjusting the default in there based on
> gcc_version,
> I'll post a patch tomorrow).

That sounds like a better approach; thanks.

> 
> Still, what do you think we should do on the release branches
> (recommend to
> developers and check with the post-commit CI)?

My hope is that the URL suffixes don't change: we shouldn't be adding
new command-line options on the release branches, and I'd hope that
texinfo doesn't change the generated anchors from run to run.

> No regeneration of *.urls except before doing a new release
> candidate,
> or a different make goal that would grab html files from the web and
> regenerate against that?

That sounds overcomplicated. 

If the anchors do change, it's fairly trivial to run "make regenerate-
opt-urls" locally, isn't it?

As mentioned above, I like the idea of having the
DOCUMENTATION_ROOT_URL express the location of a tree of docs built
with texinfo, and for the url suffixes to be relative to that.  We can
update the default in gcc/configure.ac for released branches, and drop
the logic from your previous patch.  So if a distributor wants to
upload their docs for a particular version to their own location,
they're responsible for providing a suitable value for  --with-
documentation-root-url= at configure time.

Or am I missing something here?

Dave

Re: [PATCH] c++/modules testsuite: avoid expensive ggc-min-expand=0

2024-04-23 Thread Jason Merrill


On 4/23/24 11:28, Patrick Palka wrote:

Tested on x86_64-pc-linux-gnu, does this look OK for trunk?


Is the test being run for multiple standard levels?  I'd rather restrict 
it to one and keep fully testing GC-safety.



-- >8 --

The below testcase uses --param=ggc-min-expand=0 which forces a full GC
during every collection point and in turn takes over two minutes to run
and ends up being the main bottleneck of the modules.exp testsuite.

This patch speeds up this test without (hopefully) significantly affecting
its coverage by using =1 instead of =0 which forces a full GC each time the
heap grows by 1%, which means exponentially fewer GCs.  After this patch
the modules.exp testsuite finishes in 2m55s instead of 3m40s with -j8 on
my machine.

gcc/testsuite/ChangeLog:

* g++.dg/modules/pr99023_a.X: Use ggc-min-expand=1 instead of =0.
* g++.dg/modules/pr99023_b.X: Likewise.
---
  gcc/testsuite/g++.dg/modules/pr99023_a.X | 2 +-
  gcc/testsuite/g++.dg/modules/pr99023_b.X | 2 +-
  2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/testsuite/g++.dg/modules/pr99023_a.X 
b/gcc/testsuite/g++.dg/modules/pr99023_a.X
index c872d15f792..96bb4a2ab5a 100644
--- a/gcc/testsuite/g++.dg/modules/pr99023_a.X
+++ b/gcc/testsuite/g++.dg/modules/pr99023_a.X
@@ -1,5 +1,5 @@
  // PR c++/99023, ICE
-// { dg-additional-options {-x c++-system-header initializer_list -fmodules-ts 
--param ggc-min-expand=0} }
+// { dg-additional-options {-x c++-system-header initializer_list -fmodules-ts 
--param ggc-min-expand=1} }
  
  // { dg-prune-output {linker input file unused} }
  
diff --git a/gcc/testsuite/g++.dg/modules/pr99023_b.X b/gcc/testsuite/g++.dg/modules/pr99023_b.X

index ca5f32e5bcc..955378ad88f 100644
--- a/gcc/testsuite/g++.dg/modules/pr99023_b.X
+++ b/gcc/testsuite/g++.dg/modules/pr99023_b.X
@@ -1,5 +1,5 @@
  // PR c++/99023, ICE
-// { dg-additional-options {-x c++-system-header iostream -fmodules-ts 
-flang-info-include-translate= --param ggc-min-expand=0} }
+// { dg-additional-options {-x c++-system-header iostream -fmodules-ts 
-flang-info-include-translate= --param ggc-min-expand=1} }
  
  // { dg-prune-output {linker input file unused} }

Re: [RFC][PATCH v1 2/4] C and C++ FE changes to support flexible array members in unions and alone in structures.

2024-04-23 Thread Qing Zhao




> On Apr 23, 2024, at 15:51, Joseph Myers  wrote:
> 
> On Fri, 19 Apr 2024, Qing Zhao wrote:
> 
>> gcc/c/ChangeLog:
>> 
>>  * c-decl.cc (finish_struct): Change errors to pedwarns for the cases
>>  flexible array members in union or alone in structures.
> 
> The C front-end changes are OK for GCC 15 once everything else in the 
> series is ready for inclusion (in particular, the testsuite changes).

Thanks, will update the C FE changes based on your comments.

Qing
> 
> -- 
> Joseph S. Myers
> josmy...@redhat.com
>

Re: [PATCH v9 0/5] New attribute "counted_by" to annotate bounds for C99 FAM(PR108896)

2024-04-23 Thread Qing Zhao

Ping for the middle-end change approval.

And an update on the status of the patch set:

**Approval status:

All C FE changes have been approved.

**Review status:

All Middle-end changes have been reviewed by Sid, no remaining issue. 

Okay for GCC15? 

thanks.

Qing

> On Apr 12, 2024, at 09:54, Qing Zhao  wrote:
> 
> Hi,
> 
> This is the 9th version of the patch.
> 
> Compare with the 8th version, the difference are:
> 
> updates per Joseph's comments:
> 
> 1. in C FE, add checking for counted_by attribute for the new multiple 
> definitions of the same tag for C23 in the routine 
> "tagged_types_tu_compatible_p".
>   Add a new testing case flex-array-counted-by-8.c for this. 
>   This is for Patch 1;
> 
> 2. two minor typo fixes in c-typeck.cc. 
>   This is for Patch 2;
> 
> Approval status:
> 
>   Patch 2's C FE change has been approved with minor typo fixes (the above 2);
>   Patch 4 has been approved; 
>   Patch 5's C FE change has been approved;
> 
> Review status:
> 
>   Patch 3, Patch 2 and Patch 5's Middle-end change have been review by Sid, 
> No issue.
> 
> More review needed:
> 
>   Patch 1's new change to C FE (the above 1);
>   Patch 2, 3 and 5's middle-end change need to be approved   
> 
> The 8th version is here:
> https://gcc.gnu.org/pipermail/gcc-patches/2024-March/648559.html
> https://gcc.gnu.org/pipermail/gcc-patches/2024-March/648560.html
> https://gcc.gnu.org/pipermail/gcc-patches/2024-March/648561.html
> https://gcc.gnu.org/pipermail/gcc-patches/2024-March/648562.html
> https://gcc.gnu.org/pipermail/gcc-patches/2024-March/648563.html
> 
> It based on the following original proposal:
> 
> https://gcc.gnu.org/pipermail/gcc-patches/2023-November/635884.html
> Represent the missing dependence for the "counted_by" attribute and its 
> consumers
> 
> **The summary of the proposal is:
> 
> * Add a new internal function ".ACCESS_WITH_SIZE" to carry the size 
> information for every reference to a FAM field;
> * In C FE, Replace every reference to a FAM field whose TYPE has the 
> "counted_by" attribute with the new internal function ".ACCESS_WITH_SIZE";
> * In every consumer of the size information, for example, BDOS or array bound 
> sanitizer, query the size information or ACCESS_MODE information from the new 
> internal function;
> * When expansing to RTL, replace the internal function with the actual 
> reference to the FAM field;
> * Some adjustment to ipa alias analysis, and other SSA passes to mitigate the 
> impact to the optimizer and code generation.
> 
> 
> **The new internal function
> 
>  .ACCESS_WITH_SIZE (REF_TO_OBJ, REF_TO_SIZE, CLASS_OF_SIZE, TYPE_OF_SIZE, 
> ACCESS_MODE, TYPE_OF_REF)
> 
> INTERNAL_FN (ACCESS_WITH_SIZE, ECF_LEAF | ECF_NOTHROW, NULL)
> 
> which returns the "REF_TO_OBJ" same as the 1st argument;
> 
> Both the return type and the type of the first argument of this function have 
> been converted from the incomplete array type to the corresponding pointer 
> type.
> 
> The call to .ACCESS_WITH_SIZE is wrapped with an INDIRECT_REF, whose type is 
> the original imcomplete array type.
> 
> Please see the following link for why:
> https://gcc.gnu.org/pipermail/gcc-patches/2023-November/638793.html
> https://gcc.gnu.org/pipermail/gcc-patches/2023-December/639605.html
> 
> 1st argument "REF_TO_OBJ": The reference to the object;
> 2nd argument "REF_TO_SIZE": The reference to the size of the object,
> 3rd argument "CLASS_OF_SIZE": The size referenced by the REF_TO_SIZE 
> represents
>   0: the number of bytes;
>   1: the number of the elements of the object type;
> 4th argument "TYPE_OF_SIZE": A constant 0 with the TYPE of the object
>  refed by REF_TO_SIZE
> 5th argument "ACCESS_MODE":
>  -1: Unknown access semantics
>   0: none
>   1: read_only
>   2: write_only
>   3: read_write
> 6th argument "TYPE_OF_REF": A constant 0 with the pointer TYPE to
>  to the original flexible array type.
> 
> ** The Patch sets included:
> 
> 1. Provide counted_by attribute to flexible array member field;
>  which includes:
>  * "counted_by" attribute documentation;
>  * C FE handling of the new attribute;
>syntax checking, error reporting;
>  * testing cases;
> 
> 2. Convert "counted_by" attribute to/from .ACCESS_WITH_SIZE.
>  which includes:
>  * The definition of the new internal function .ACCESS_WITH_SIZE in 
> internal-fn.def.
>  * C FE converts every reference to a FAM with "counted_by" attribute to 
> a call to the internal function .ACCESS_WITH_SIZE.
>(build_component_ref in c_typeck.cc)
>This includes the case when the object is statically allocated and 
> initialized.
>In order to make this working, we should update 
> initializer_constant_valid_p_1 and output_constant in varasm.cc to include 
> calls to .ACCESS_WITH_SIZE.
> 
>However, for the reference inside "offsetof", ignore the "counted_by" 
> attribute since it's not useful at all. (c_parser_postfix_expression in 
> c/c-parser.cc)

Re: [RFC][PATCH v1 2/4] C and C++ FE changes to support flexible array members in unions and alone in structures.

2024-04-23 Thread Joseph Myers

On Fri, 19 Apr 2024, Qing Zhao wrote:

> gcc/c/ChangeLog:
> 
>   * c-decl.cc (finish_struct): Change errors to pedwarns for the cases
>   flexible array members in union or alone in structures.

The C front-end changes are OK for GCC 15 once everything else in the 
series is ready for inclusion (in particular, the testsuite changes).

-- 
Joseph S. Myers
josmy...@redhat.com

Re: [RFC][PATCH v1 3/4] Add testing cases for flexible array members in unions and alone in structures.

2024-04-23 Thread Qing Zhao




> On Apr 23, 2024, at 14:53, Joseph Myers  wrote:
> 
> On Fri, 19 Apr 2024, Qing Zhao wrote:
> 
>> gcc/testsuite/ChangeLog:
>> 
>>  * gcc.dg/flex-array-in-union-1.c: New test.
>>  * gcc.dg/flex-array-in-union-2.c: New test.
> 
> There should also be a -pedantic-errors test that these constructs get 
> errors with -pedantic-errors.

Okay, will add. 
> 
> The tests mix two cases: flexible arrays in unions, and flexible arrays on 
> their own in structures.  That means the test names are misleading; either 
> they should be renamed, or the struct tests should be split out.
Okay, will update this.
> 
> Note that "no named members" also includes the case where there are 
> unnamed bit-fields together with a flexible array member, so that should 
> be tested as well.
Will add such testing cases.
> 
> Since this patch series involves changes for both C and C++, it would be 
> best for the tests to be c-c++-common tests.  But if that's problematic 
> for some reason - if there's still too much difference in behavior between 
> C and C++ - then there should at least be tests for C++ that are as 
> similar as possible to the tests for C.

I tried to put these two testing cases to c-c++-common, there were some 
inconsistent behavior 
I cannot resolve at that time, I will try to fix those issue or add C++ testing 
cases. 

Thanks for the review.

Qing
> 
> -- 
> Joseph S. Myers
> josmy...@redhat.com
>

Re: [RFC][PATCH v1 1/4] Documentation change

2024-04-23 Thread Qing Zhao




> On Apr 23, 2024, at 15:03, Joseph Myers  wrote:
> 
> On Tue, 23 Apr 2024, Qing Zhao wrote:
> 
>> However, I am not very confident on the wording of the doc, is the 
>> current wording good enough for this? Or do you have any suggestion on 
>> how to make it better?
> 
> I'm not convinced the statement about size (in relation to a structure 
> with the member omitted) is useful for unions the way it is for 
> structures.  The structure with the member omitted is a relevant concept 
> for thinking about a structure with a flexible array member (the flexible 
> array member essentially goes after that structure); it's much less 
> relevant for thinking about a union with a flexible array member.

Okay, then I will delete that statement about size.
> 
> (The statement that the size is zero when all members are flexible array 
> members still seems a useful one to make.)
And only keep the size is zero when all members are flexible array members.

Thanks.

Qing
> 
> -- 
> Joseph S. Myers
> josmy...@redhat.com
>

Re: [PATCH v2] gcc-14: Add Ada changes

2024-04-23 Thread Fernando Oleo Blanco

Hi Marc and all that are involved,

On 4/18/24 15:24, Marc Poulhiès wrote:
> Co-authored-by: Fernando Oleo Blanco 
> ---
> Hello Fernando,
> 
> Thanks again for your changes. After consulting other colleagues, I'm 
> proposing this revised version.
> Does that look ok to you?
> 
> As it was simpler I've created a new commit with a Co-authored-by line, but 
> can easily change that if you prefer.
> 
> Marc
> 
I really like your patch. It is a nicely streamlined version of what I 
submitted. I think it is also easier to understand.

You have my green light!

Best regards and thank you all,

Fer

Re: [RFC][PATCH v1 1/4] Documentation change

2024-04-23 Thread Joseph Myers

On Tue, 23 Apr 2024, Qing Zhao wrote:

> However, I am not very confident on the wording of the doc, is the 
> current wording good enough for this? Or do you have any suggestion on 
> how to make it better?

I'm not convinced the statement about size (in relation to a structure 
with the member omitted) is useful for unions the way it is for 
structures.  The structure with the member omitted is a relevant concept 
for thinking about a structure with a flexible array member (the flexible 
array member essentially goes after that structure); it's much less 
relevant for thinking about a union with a flexible array member.

(The statement that the size is zero when all members are flexible array 
members still seems a useful one to make.)

-- 
Joseph S. Myers
josmy...@redhat.com

Re: [RFC][PATCH v1 3/4] Add testing cases for flexible array members in unions and alone in structures.

2024-04-23 Thread Joseph Myers

On Fri, 19 Apr 2024, Qing Zhao wrote:

> gcc/testsuite/ChangeLog:
> 
>   * gcc.dg/flex-array-in-union-1.c: New test.
>   * gcc.dg/flex-array-in-union-2.c: New test.

There should also be a -pedantic-errors test that these constructs get 
errors with -pedantic-errors.

The tests mix two cases: flexible arrays in unions, and flexible arrays on 
their own in structures.  That means the test names are misleading; either 
they should be renamed, or the struct tests should be split out.

Note that "no named members" also includes the case where there are 
unnamed bit-fields together with a flexible array member, so that should 
be tested as well.

Since this patch series involves changes for both C and C++, it would be 
best for the tests to be c-c++-common tests.  But if that's problematic 
for some reason - if there's still too much difference in behavior between 
C and C++ - then there should at least be tests for C++ that are as 
similar as possible to the tests for C.

-- 
Joseph S. Myers
josmy...@redhat.com

[PATCH] c++/modules testsuite: avoid expensive ggc-min-expand=0

2024-04-23 Thread Patrick Palka

Tested on x86_64-pc-linux-gnu, does this look OK for trunk?

-- >8 --

The below testcase uses --param=ggc-min-expand=0 which forces a full GC
during every collection point and in turn takes over two minutes to run
and ends up being the main bottleneck of the modules.exp testsuite.

This patch speeds up this test without (hopefully) significantly affecting
its coverage by using =1 instead of =0 which forces a full GC each time the
heap grows by 1%, which means exponentially fewer GCs.  After this patch
the modules.exp testsuite finishes in 2m55s instead of 3m40s with -j8 on
my machine.

gcc/testsuite/ChangeLog:

* g++.dg/modules/pr99023_a.X: Use ggc-min-expand=1 instead of =0.
* g++.dg/modules/pr99023_b.X: Likewise.
---
 gcc/testsuite/g++.dg/modules/pr99023_a.X | 2 +-
 gcc/testsuite/g++.dg/modules/pr99023_b.X | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/testsuite/g++.dg/modules/pr99023_a.X 
b/gcc/testsuite/g++.dg/modules/pr99023_a.X
index c872d15f792..96bb4a2ab5a 100644
--- a/gcc/testsuite/g++.dg/modules/pr99023_a.X
+++ b/gcc/testsuite/g++.dg/modules/pr99023_a.X
@@ -1,5 +1,5 @@
 // PR c++/99023, ICE
-// { dg-additional-options {-x c++-system-header initializer_list -fmodules-ts 
--param ggc-min-expand=0} }
+// { dg-additional-options {-x c++-system-header initializer_list -fmodules-ts 
--param ggc-min-expand=1} }
 
 // { dg-prune-output {linker input file unused} }
 
diff --git a/gcc/testsuite/g++.dg/modules/pr99023_b.X 
b/gcc/testsuite/g++.dg/modules/pr99023_b.X
index ca5f32e5bcc..955378ad88f 100644
--- a/gcc/testsuite/g++.dg/modules/pr99023_b.X
+++ b/gcc/testsuite/g++.dg/modules/pr99023_b.X
@@ -1,5 +1,5 @@
 // PR c++/99023, ICE
-// { dg-additional-options {-x c++-system-header iostream -fmodules-ts 
-flang-info-include-translate= --param ggc-min-expand=0} }
+// { dg-additional-options {-x c++-system-header iostream -fmodules-ts 
-flang-info-include-translate= --param ggc-min-expand=1} }
 
 // { dg-prune-output {linker input file unused} }
 
-- 
2.45.0.rc0

Re: [RFC][PATCH v1 1/4] Documentation change

2024-04-23 Thread Qing Zhao



> On Apr 23, 2024, at 14:04, Joseph Myers  wrote:
> 
> On Fri, 19 Apr 2024, Qing Zhao wrote:
> 
>> +The size of the union is as if the flexiable array member were omitted
>> +except that it may have more trailing padding than the omission would imply.
> 
> "trailing padding" is more a concept for structures than for unions (where 
> padding depends on which union member is active).  But I suppose it's 
> still true that the union can be larger than without the flexible member, 
> because of alignment considerations.
> 
> union u { char c; int a[]; };
> 
> needs to be sufficiently aligned for int, which means the size is a 
> multiple of the size of int, whereas if the flexible array member weren't 
> present, the size could be 1 byte.

Yes, that’s exact what I tried to include in the documentation part -:)
And I have a testing case for this in the patch. 

However, I am not very confident on the wording of the doc, is the current 
wording good enough for this?
Or do you have any suggestion on how to make it better?

Thanks a lot!

Qing
> 
> -- 
> Joseph S. Myers
> josmy...@redhat.com
>

Re: [RFC][PATCH v1 1/4] Documentation change

2024-04-23 Thread Joseph Myers

On Fri, 19 Apr 2024, Qing Zhao wrote:

> +The size of the union is as if the flexiable array member were omitted
> +except that it may have more trailing padding than the omission would imply.

"trailing padding" is more a concept for structures than for unions (where 
padding depends on which union member is active).  But I suppose it's 
still true that the union can be larger than without the flexible member, 
because of alignment considerations.

union u { char c; int a[]; };

needs to be sufficiently aligned for int, which means the size is a 
multiple of the size of int, whereas if the flexible array member weren't 
present, the size could be 1 byte.

-- 
Joseph S. Myers
josmy...@redhat.com

Re: [PATCH] i386: Avoid =&r, r, r andn double-word alternative for ia32 [PR114810]

2024-04-23 Thread Uros Bizjak

On Tue, Apr 23, 2024 at 5:50 PM Jakub Jelinek  wrote:
>
> Hi!
>
> As discussed in the PR, on ia32 with its 8 GPRs, where 1 is always fixed
> and other 2 often are as well having an alternative which needs 3
> double-word registers is just too much for RA.
> The following patch splits that alternative into two, one with o is used
> even on ia32, but one with the 3x r is used just for -m64/-mx32.
> Tried to reduce the testcase further, but it wasn't easily possible.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
>
> 2024-04-23  Jakub Jelinek  
>
> PR target/114810
> * config/i386/i386.md (*andn3_doubleword_bmi): Split the =&r,r,ro
> alternative into =&r,r,r enabled only for x64 and =&r,r,o.
>
> * g++.target/i386/pr114810.C: New test.

OK.

Thanks,
Uros.

>
> --- gcc/config/i386/i386.md.jj  2024-04-15 14:25:58.203322878 +0200
> +++ gcc/config/i386/i386.md 2024-04-23 12:15:47.171956091 +0200
> @@ -12482,10 +12482,10 @@ (define_split
>  })
>
>  (define_insn_and_split "*andn3_doubleword_bmi"
> -  [(set (match_operand: 0 "register_operand" "=&r,r,r")
> +  [(set (match_operand: 0 "register_operand" "=&r,&r,r,r")
> (and:
> - (not: (match_operand: 1 "register_operand" "r,0,r"))
> - (match_operand: 2 "nonimmediate_operand" "ro,ro,0")))
> + (not: (match_operand: 1 "register_operand" "r,r,0,r"))
> + (match_operand: 2 "nonimmediate_operand" "r,o,ro,0")))
> (clobber (reg:CC FLAGS_REG))]
>"TARGET_BMI"
>"#"
> @@ -12496,7 +12496,8 @@ (define_insn_and_split "*andn3_doub
> (parallel [(set (match_dup 3)
>(and:DWIH (not:DWIH (match_dup 4)) (match_dup 5)))
>   (clobber (reg:CC FLAGS_REG))])]
> -  "split_double_mode (mode, &operands[0], 3, &operands[0], 
> &operands[3]);")
> +  "split_double_mode (mode, &operands[0], 3, &operands[0], 
> &operands[3]);"
> +  [(set_attr "isa" "x64,*,*,*")])
>
>  (define_insn_and_split "*andn3_doubleword"
>[(set (match_operand:DWI 0 "register_operand")
> --- gcc/testsuite/g++.target/i386/pr114810.C.jj 2024-04-23 14:21:19.202613799 
> +0200
> +++ gcc/testsuite/g++.target/i386/pr114810.C2024-04-23 14:24:22.813116589 
> +0200
> @@ -0,0 +1,861 @@
> +// PR target/114810
> +// { dg-do compile { target { { { *-*-linux* } && ia32 } && c++17 } } }
> +// { dg-options "-mstackrealign -O2 -mbmi -fno-exceptions -fno-plt 
> -march=x86-64 -w" }
> +// { dg-additional-options "-fpie" { target pie } }
> +
> +enum E1 { a, dp, b, jm, c, dq, d, mj, e, dr, f, jn, h, dt, j, nt, l, du, m, 
> jo, n, dv, o, mk, p, dw, q, jp, s, dx, t, ol, u, dy, v, jq, w };
> +enum dz { x, ml, y };
> +struct ea { short g; } z, jr;
> +long long aa;
> +struct eb { ea ab; ea dp[]; };
> +enum ac { };
> +typedef enum { } nu;
> +struct ad { ac k; };
> +unsigned ec (long);
> +struct ae;
> +int js (ae);
> +unsigned af ();
> +struct ed;
> +template < int ag > struct ee { using ah = ed[ag]; };
> +template < int ag > struct array { typename ee < ag >::ah ai; ed & 
> operator[] (int aj) { return ai[aj]; } };
> +struct { void dp (...); } ak;
> +void ef (int);
> +template < typename al > struct jt { al & operator[] (short); };
> +struct am { void operator= (bool); };
> +struct an { am operator[] (unsigned); };
> +template < typename, unsigned, unsigned >using eg = an;
> +struct ao;
> +struct ae { ae (ao *); };
> +struct mm { mm (); mm (int); };
> +enum ap { };
> +enum eh { };
> +bool aq, ju, ar, ei, nv, as, ej, at;
> +struct jv
> +{
> +  jv (eh au):dp (au) {}
> +  jv ();
> +  operator eh ();
> +  unsigned av ()
> +  {
> +aq = dp & 7;
> +return dp * (aq ? : 4);
> +  }
> +  unsigned ek ()
> +  {
> +int aw;
> +bool mn = dp & 7;
> +aw = dp * (mn ? : 4);
> +return aw + 3 >> 2;
> +  }
> +  eh dp;
> +} ax, el, ay, jw, az, em, ba, om;
> +struct ed
> +{
> +  ed ():bb (), dp () {}
> +  int bc () { return bb; }
> +  jv en () { return (eh) dp; }
> +  unsigned ek ()
> +  {
> +jv bd;
> +bd = (eh) dp;
> +return bd.ek ();
> +  }
> +  ap jx ();
> +  unsigned bb:24;
> +  int dp:8;
> +};
> +struct be { short dp = 0; } bf, eo;
> +struct bg
> +{
> +  bg ();
> +  bg (ed r)
> +  {
> +dp.bh = r;
> +if (r.bc ())
> +  mo = true;
> +else
> +  bi = true;
> +  }
> +  static bg ep (int);
> +  bg (be);
> +  struct { ed bh; } dp;
> +  union { char mo:1; char bi:1; short bj = 0; };
> +} jy, bk, eq, bl, mp, bm, er;
> +struct bn
> +{
> +  explicit bn (ed bo):bh (bo) {}
> +  ed dp ();
> +  ed bh;
> +  be es;
> +  char bj = 0;
> +};
> +struct bp
> +{
> +  eg < int, 6, 4 > dp;
> +};
> +jt < bg > bq;
> +jt < bn > definitions;
> +struct ao
> +{
> +  bp & br ();
> +};
> +enum jz:short;
> +template < typename > using bs = ae;
> +ao *et ();
> +short bt, nw;
> +struct bu
> +{
> +  int dp;
> +};
> +dz bv;
> +unsigned eu;
> +struct bw
> +{
> +  ac k;
> +  unsigned dp;
> +} *bx;
> +bool ka ();
> +struct by
> +{
> +  bool dp;
> +};
> +typedef enum
> +{ bz, ev } ca;
> +typedef enum
> +{
> +  m

Re: [PATCH] c++/modules: deduced return type merging [PR114795]

2024-04-23 Thread Jason Merrill


On 4/23/24 09:41, Patrick Palka wrote:

Tested on x86_64-pc-linux-gnu, does this look OK for trunk?

-- >8 --

When merging an imported function template specialization with an
existing one, if the existing one has an undeduced return type and the
imported one's is already deduced, we need to propagate the deduced type
since once we install the imported definition we won't get a chance to
deduce it by normal means.

This patch makes is_matching_decl propagate the deduced return type
alongside the existing propagate of the existing specification.  I
suppose could instead propagate it later when installing the imported
definition from read_definition, but it seems best to propagate it
sooner rather than later.

PR c++/114795

gcc/cp/ChangeLog:

* module.cc (trees_in::is_matching_decl): Propagate deduced
function return type.

gcc/testsuite/ChangeLog:

* g++.dg/modules/auto-4_a.H: New test.
* g++.dg/modules/auto-4_b.C: New test.
---
  gcc/cp/module.cc|  5 +
  gcc/testsuite/g++.dg/modules/auto-4_a.H | 14 ++
  gcc/testsuite/g++.dg/modules/auto-4_b.C | 15 +++
  3 files changed, 34 insertions(+)
  create mode 100644 gcc/testsuite/g++.dg/modules/auto-4_a.H
  create mode 100644 gcc/testsuite/g++.dg/modules/auto-4_b.C

diff --git a/gcc/cp/module.cc b/gcc/cp/module.cc
index d94d8ff4df9..e10e19ac9f7 100644
--- a/gcc/cp/module.cc
+++ b/gcc/cp/module.cc
@@ -11537,6 +11537,11 @@ trees_in::is_matching_decl (tree existing, tree decl, 
bool is_typedef)
else if (!DEFERRED_NOEXCEPT_SPEC_P (d_spec)
   && !comp_except_specs (d_spec, e_spec, ce_type))
goto mismatch;
+
+  /* Similarly if EXISTING has an undeduced return type, but DECL's
+is already deduced.  */
+  if (undeduced_auto_decl (existing) && !undeduced_auto_decl (decl))
+   TREE_TYPE (existing) = change_return_type (TREE_TYPE (d_type), e_type);


Perhaps this should dump a note like the noexcept merge does?  OK either 
way.


Jason

Re: [PATCH] libbacktrace: Avoid GNU ld --compress-debug-sections=zlib-gabi

2024-04-23 Thread Ian Lance Taylor

On Tue, Apr 23, 2024 at 7:24 AM Jakub Jelinek  wrote:
>
> What we could do is drop the HAVE_COMPRESSED_DEBUG stuff altogether, and
> instead similarly how we have HAVE_COMPRESSED_DEBUG_ZSTD have
> HAVE_COMPRESSED_DEBUG_{ZLIB,ZLIB_GABI,ZLIB_GNU} and for each of those
> if linker supports them test with that corresponding flag.

I think that's right.  Committed this patch after testing on
x86_64-pc-linux-gnu.  While I was at it I added an _alloc version of
ctestzstd.

Ian

* configure.ac: Test --compress-debug-sections=zlib-gnu and
--compress-debug-sections=zlib-gabi separately, setting new
automake conditionals.
* Makefile.am (ctestg, ctestg_alloc): Only build if
HAVE_COMPRESSED_DEBUG_ZLIB_GNU.
(ctesta, ctesta_alloc): Only build if
HAVE_COMPRESSED_DEBUG_ZLIB_GABI.
(ctestzstd_alloc): New test if HAVE_COMPRESSED_DEBUG_ZSTD.
* configure, Makefile.in: Regenerate.
3943de6986271466652cb619dbc60881060b180c
diff --git a/libbacktrace/Makefile.am b/libbacktrace/Makefile.am
index 5677ecd8865..bed42c29329 100644
--- a/libbacktrace/Makefile.am
+++ b/libbacktrace/Makefile.am
@@ -493,19 +493,37 @@ endif HAVE_OBJCOPY_DEBUGLINK
  $<
$(OBJCOPY) --strip-all $< $@
 
-if HAVE_COMPRESSED_DEBUG
+if HAVE_COMPRESSED_DEBUG_ZLIB_GNU
 
 ctestg_SOURCES = btest.c testlib.c
 ctestg_CFLAGS = $(libbacktrace_TEST_CFLAGS)
 ctestg_LDFLAGS = -Wl,--compress-debug-sections=zlib-gnu 
$(libbacktrace_testing_ldflags)
 ctestg_LDADD = libbacktrace.la
 
+ctestg_alloc_SOURCES = $(ctestg_SOURCES)
+ctestg_alloc_CFLAGS = $(ctestg_CFLAGS)
+ctestg_alloc_LDFLAGS = $(ctestg_LDFLAGS) $(libbacktrace_testing_ldflags)
+ctestg_alloc_LDADD = libbacktrace_alloc.la
+
+BUILDTESTS += ctestg ctestg_alloc
+
+endif
+
+if HAVE_COMPRESSED_DEBUG_ZLIB_GABI
+
 ctesta_SOURCES = btest.c testlib.c
 ctesta_CFLAGS = $(libbacktrace_TEST_CFLAGS)
 ctesta_LDFLAGS = -Wl,--compress-debug-sections=zlib-gabi 
$(libbacktrace_testing_ldflags)
 ctesta_LDADD = libbacktrace.la
 
-BUILDTESTS += ctestg ctesta
+ctesta_alloc_SOURCES = $(ctesta_SOURCES)
+ctesta_alloc_CFLAGS = $(ctesta_CFLAGS)
+ctesta_alloc_LDFLAGS = $(ctesta_LDFLAGS) $(libbacktrace_testing_ldflags)
+ctesta_alloc_LDADD = libbacktrace_alloc.la
+
+BUILDTESTS += ctesta ctesta_alloc
+
+endif
 
 if HAVE_COMPRESSED_DEBUG_ZSTD
 
@@ -514,21 +532,12 @@ ctestzstd_CFLAGS = $(libbacktrace_TEST_CFLAGS)
 ctestzstd_LDFLAGS = -Wl,--compress-debug-sections=zstd 
$(libbacktrace_testing_ldflags)
 ctestzstd_LDADD = libbacktrace.la
 
-BUILDTESTS += ctestzstd
-
-endif
-
-ctestg_alloc_SOURCES = $(ctestg_SOURCES)
-ctestg_alloc_CFLAGS = $(ctestg_CFLAGS)
-ctestg_alloc_LDFLAGS = $(ctestg_LDFLAGS) $(libbacktrace_testing_ldflags)
-ctestg_alloc_LDADD = libbacktrace_alloc.la
-
-ctesta_alloc_SOURCES = $(ctesta_SOURCES)
-ctesta_alloc_CFLAGS = $(ctesta_CFLAGS)
-ctesta_alloc_LDFLAGS = $(ctesta_LDFLAGS) $(libbacktrace_testing_ldflags)
-ctesta_alloc_LDADD = libbacktrace_alloc.la
+ctestzstd_alloc_SOURCES = $(ctestzstd_SOURCES)
+ctestzstd_alloc_CFLAGS = $(ctestzstd_CFLAGS)
+ctestzstd_alloc_LDFLAGS = $(ctestzstd_LDFLAGS) $(libbacktrace_testing_ldflags)
+ctestzstd_alloc_LDADD = libbacktrace_alloc.la
 
-BUILDTESTS += ctestg_alloc ctesta_alloc
+BUILDTESTS += ctestzstd ctestzstd_alloc
 
 endif
 
diff --git a/libbacktrace/configure.ac b/libbacktrace/configure.ac
index 0f61f2b28ab..3e0075a2b79 100644
--- a/libbacktrace/configure.ac
+++ b/libbacktrace/configure.ac
@@ -502,16 +502,27 @@ AC_LINK_IFELSE([AC_LANG_PROGRAM(,)],
 LDFLAGS=$LDFLAGS_hold])
 AM_CONDITIONAL(HAVE_BUILDID, test "$libbacktrace_cv_ld_buildid" = yes)
 
-dnl Test whether the linker supports the --compress-debug-sections option.
-AC_CACHE_CHECK([whether --compress-debug-sections is supported],
-[libgo_cv_ld_compress],
+dnl Test whether the linker supports the --compress-debug-sections=zlib-gnu
+dnl option.
+AC_CACHE_CHECK([whether --compress-debug-sections=zlib-gnu is supported],
+[libgo_cv_ld_compress_zlib_gnu],
 [LDFLAGS_hold=$LDFLAGS
 LDFLAGS="$LDFLAGS -Wl,--compress-debug-sections=zlib-gnu"
 AC_LINK_IFELSE([AC_LANG_PROGRAM(,)],
-[libgo_cv_ld_compress=yes],
-[libgo_cv_ld_compress=no])
+[libgo_cv_ld_compress_zlib_gnu=yes],
+[libgo_cv_ld_compress_zlib_gnu=no])
 LDFLAGS=$LDFLAGS_hold])
-AM_CONDITIONAL(HAVE_COMPRESSED_DEBUG, test "$libgo_cv_ld_compress" = yes)
+AM_CONDITIONAL(HAVE_COMPRESSED_DEBUG_ZLIB_GNU, test 
"$libgo_cv_ld_compress_zlib_gnu" = yes)
+
+AC_CACHE_CHECK([whether --compress-debug-sections=zlib-gabi is supported],
+[libgo_cv_ld_compress_zlib_gabi],
+[LDFLAGS_hold=$LDFLAGS
+LDFLAGS="$LDFLAGS -Wl,--compress-debug-sections=zlib-gabi"
+AC_LINK_IFELSE([AC_LANG_PROGRAM(,)],
+[libgo_cv_ld_compress_zlib_gabi=yes],
+[libgo_cv_ld_compress_zlib_gabi=no])
+LDFLAGS=$LDFLAGS_hold])
+AM_CONDITIONAL(HAVE_COMPRESSED_DEBUG_ZLIB_GABI, test 
"$libgo_cv_ld_compress_zlib_gabi" = yes)
 
 AC_CHECK_LIB([zstd], [ZSTD_compress],
 [AC_DEFINE(HAVE_ZSTD, 1, [Define if -lzstd is available.])])

Re: [PATCH v1] RISC-V: Adjust overlap attr after revert d3544cea63d and e65aaf8efe1

2024-04-23 Thread Palmer Dabbelt


On Tue, 23 Apr 2024 07:45:03 PDT (-0700), Patrick O'Neill wrote:

Hi Pan,

Sorry about that. It looks like there was difference between my local
machine and CI machine.

 From the CI it looks like we're back to the failure list we had on friday.

I'll do some local testing to manually confirm this.


Awesome, thanks. In the patchwork meeting, Kito was mentioning possibly 
wanting to revert some more of these widening ops?  If that's still the 
case we should get something on the lists as soon as we can, it's really 
late in the cycle already.




Thanks,
Patrick

On 4/22/24 23:50, Li, Pan2 wrote:


Hi Patrick,

After some investigation and double confirm, I think the 
gcc.dg/graphite/pr111878.c ice may have nothing to do
with the patches of revert series as it exists for quit a while. It may related 
to below commit

2e7abd09621a4401d44f4513adf126bce4b4828b RISC-V: Block VLSmodes according to 
TARGET_MAX_LMUL and BITS_PER_RISCV_VECTOR

Could you please help to double check about it *manually*? Here is my step(s) 
for your reference and I will take care of this failure soon.

../__RISC-V_INSTALL___RV64/bin/riscv64-unknown-elf-gcc --version
riscv64-unknown-elf-gcc (GCC) 14.0.0 20231205 (experimental)
Copyright (C) 2023 Free Software Foundation, Inc.
This is free software; see the source for copying conditions.  There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.

1. download isl-0.24, let isl -> /some-where/riscv-gnu-toolchain/gcc/isl-0.24
2. mkdir __BUILD__ && cd __BUILD__ && ../configure \
   --target=riscv64-unknown-elf \
   --prefix=${INSTALL_DIR} \
   --disable-shared \
   --enable-threads \
   --enable-tls \
   --enable-languages=c,c++,fortran \
   --with-system-zlib \
   --with-newlib \
   --disable-libmudflap \
   --disable-libssp \
   --disable-libquadmath \
   --disable-libgomp \
   --enable-nls \
   --disable-tm-clone-registry \
   --src=`pwd`/../ \
   --with-abi=lp64d \
   --with-arch=rv64gcv \
   --with-tune=rocket \
   --with-isa-spec=20191213 \
   CFLAGS_FOR_BUILD="-O0 -g" \
   CXXFLAGS_FOR_BUILD="-O0 -g" \
   CFLAGS_FOR_TARGET="-O0  -g" \
   CXXFLAGS_FOR_TARGET="-O0 -g" \
   BOOT_CFLAGS="-O0 -g" \
   CFLAGS="-O0 -g" \
   CXXFLAGS="-O0 -g" \
   GM2FLAGS_FOR_TARGET="-O0 -g" \
   GOCFLAGS_FOR_TARGET="-O0 -g" \
   GDCFLAGS_FOR_TARGET="-O0 -g"
make -j $(nproc) all-gcc && make install-gcc
3. ../__RISC-V_INSTALL___RV64/bin/riscv64-unknown-elf-gcc 
gcc/testsuite/gcc.dg/graphite/pr111878.c -O3 -fgraphite-identity 
-fsave-optimization-record -march=rv64gcv -mabi=lp64d -c -S -o -

Pan

-Original Message-
From: Li, Pan2
Sent: Tuesday, April 23, 2024 10:32 AM
To: Patrick O'Neill ; gcc-patches@gcc.gnu.org
Cc: juzhe.zh...@rivai.ai; kito.ch...@gmail.com; rdapp@gmail.com
Subject: RE: [PATCH v1] RISC-V: Adjust overlap attr after revert d3544cea63d 
and e65aaf8efe1

Thanks Patrick.

Turn out that the make report cannot tell the error like below and then the 
graphite.exp test will never run.
That explains why I missed test failures, will take care of it ASAP.

sorry, unimplemented: Graphite loop optimizations cannot be used (isl is not 
available)

Pan

-Original Message-
From: Patrick O'Neill 
Sent: Tuesday, April 23, 2024 8:32 AM
To: Li, Pan2 ; gcc-patches@gcc.gnu.org
Cc: juzhe.zh...@rivai.ai; kito.ch...@gmail.com; rdapp@gmail.com
Subject: Re: [PATCH v1] RISC-V: Adjust overlap attr after revert d3544cea63d 
and e65aaf8efe1

This patch in particular does not cause any more regressions. It's one
of the other reverts from the weekend.

Before the reverts [1]:
                      |  gcc |
g++ | gfortran |
      rv64gcv/  lp64d/ medlow |   48/    32 |     12/    3|   12 /    2

After the reverts:
                      |  gcc |
g++ | gfortran |
      rv64gcv/  lp64d/ medlow |   50 /    33 |   12 / 3 |   26 / 7 |


gcc new fails:
FAIL: gcc.dg/graphite/pr111878.c (internal compiler error: in
extract_insn, at recog.cc:2812)
FAIL: gcc.dg/graphite/pr111878.c (test for excess errors)

gfortran new fails:
FAIL: gfortran.dg/graphite/id-27.f90   -O  (internal compiler error: in
extract_insn, at recog.cc:2812)
FAIL: gfortran.dg/graphite/id-27.f90   -O  (test for excess errors)
FAIL: gfortran.dg/graphite/pr14741.f90   -O  (internal compiler error:
in extract_insn, at recog.cc:2812)
FAIL: gfortran.dg/graphite/pr14741.f90   -O  (test for excess errors)
FAIL: gfortran.dg/graphite/pr29581.f90   -O3 -fomit-frame-pointer
-funroll-loops -fpeel-loops -ftracer -finline-functions  (internal
compiler error: in extract_insn, at recog.cc:2812)
FAIL: gfortran.dg/graphite/pr29581.f90   -O3 -fomit-frame-pointer
-funroll-loops -fpeel-loops -ftracer -finline-functions  (test for
excess errors)
FAIL: gfortran.dg/graphite/pr29581.f90   -O3 -g  (internal compiler
error: in extract_insn, at recog.cc:2812)
FAIL: gfortran.dg/graphite/pr29581.f90   -O3 -g  (test for excess errors)
FAIL: gfortran.dg/graphite/pr

Re: [PATCH] c++/modules: deduced return type merging [PR114795]

2024-04-23 Thread Patrick Palka

On Tue, 23 Apr 2024, Patrick Palka wrote:
> Tested on x86_64-pc-linux-gnu, does this look OK for trunk?
> 
> -- >8 --
> 
> When merging an imported function template specialization with an
> existing one, if the existing one has an undeduced return type and the
> imported one's is already deduced, we need to propagate the deduced type
> since once we install the imported definition we won't get a chance to
> deduce it by normal means.
> 
> This patch makes is_matching_decl propagate the deduced return type
> alongside the existing propagate of the existing specification.  I

er, "alongside the existing propagation of the exception specification".

> suppose could instead propagate it later when installing the imported
> definition from read_definition, but it seems best to propagate it
> sooner rather than later.
> 
>   PR c++/114795
> 
> gcc/cp/ChangeLog:
> 
>   * module.cc (trees_in::is_matching_decl): Propagate deduced
>   function return type.
> 
> gcc/testsuite/ChangeLog:
> 
>   * g++.dg/modules/auto-4_a.H: New test.
>   * g++.dg/modules/auto-4_b.C: New test.
> ---
>  gcc/cp/module.cc|  5 +
>  gcc/testsuite/g++.dg/modules/auto-4_a.H | 14 ++
>  gcc/testsuite/g++.dg/modules/auto-4_b.C | 15 +++
>  3 files changed, 34 insertions(+)
>  create mode 100644 gcc/testsuite/g++.dg/modules/auto-4_a.H
>  create mode 100644 gcc/testsuite/g++.dg/modules/auto-4_b.C
> 
> diff --git a/gcc/cp/module.cc b/gcc/cp/module.cc
> index d94d8ff4df9..e10e19ac9f7 100644
> --- a/gcc/cp/module.cc
> +++ b/gcc/cp/module.cc
> @@ -11537,6 +11537,11 @@ trees_in::is_matching_decl (tree existing, tree 
> decl, bool is_typedef)
>else if (!DEFERRED_NOEXCEPT_SPEC_P (d_spec)
>  && !comp_except_specs (d_spec, e_spec, ce_type))
>   goto mismatch;
> +
> +  /* Similarly if EXISTING has an undeduced return type, but DECL's
> +  is already deduced.  */
> +  if (undeduced_auto_decl (existing) && !undeduced_auto_decl (decl))
> + TREE_TYPE (existing) = change_return_type (TREE_TYPE (d_type), e_type);
>  }
>else if (is_typedef)
>  {
> diff --git a/gcc/testsuite/g++.dg/modules/auto-4_a.H 
> b/gcc/testsuite/g++.dg/modules/auto-4_a.H
> new file mode 100644
> index 000..52b50533982
> --- /dev/null
> +++ b/gcc/testsuite/g++.dg/modules/auto-4_a.H
> @@ -0,0 +1,14 @@
> +// PR c++/114795
> +// { dg-additional-options "-fmodule-header" }
> +// { dg-module-cmi {} }
> +
> +template
> +struct A {
> +  auto f() { return 0; }

Oops, this should be "return T();" to match the other definition below.

> +};
> +
> +template
> +inline void g() {
> +  A a;
> +  a.f();
> +}
> diff --git a/gcc/testsuite/g++.dg/modules/auto-4_b.C 
> b/gcc/testsuite/g++.dg/modules/auto-4_b.C
> new file mode 100644
> index 000..378684ef6d0
> --- /dev/null
> +++ b/gcc/testsuite/g++.dg/modules/auto-4_b.C
> @@ -0,0 +1,15 @@
> +// PR c++/114795
> +// { dg-additional-options "-fmodules-ts -fno-module-lazy" }
> +
> +template
> +struct A {
> +  auto f() { return T(); }
> +};
> +
> +A a;
> +
> +import "auto-4_a.H";
> +
> +int main() {
> +  g(); // { dg-bogus "before deduction of 'auto'" "" { target *-*-* } 0 
> }
> +}
> -- 
> 2.45.0.rc0
> 
>

[PATCH] c++/modules: deduced return type merging [PR114795]

2024-04-23 Thread Patrick Palka

Tested on x86_64-pc-linux-gnu, does this look OK for trunk?

-- >8 --

When merging an imported function template specialization with an
existing one, if the existing one has an undeduced return type and the
imported one's is already deduced, we need to propagate the deduced type
since once we install the imported definition we won't get a chance to
deduce it by normal means.

This patch makes is_matching_decl propagate the deduced return type
alongside the existing propagate of the existing specification.  I
suppose could instead propagate it later when installing the imported
definition from read_definition, but it seems best to propagate it
sooner rather than later.

PR c++/114795

gcc/cp/ChangeLog:

* module.cc (trees_in::is_matching_decl): Propagate deduced
function return type.

gcc/testsuite/ChangeLog:

* g++.dg/modules/auto-4_a.H: New test.
* g++.dg/modules/auto-4_b.C: New test.
---
 gcc/cp/module.cc|  5 +
 gcc/testsuite/g++.dg/modules/auto-4_a.H | 14 ++
 gcc/testsuite/g++.dg/modules/auto-4_b.C | 15 +++
 3 files changed, 34 insertions(+)
 create mode 100644 gcc/testsuite/g++.dg/modules/auto-4_a.H
 create mode 100644 gcc/testsuite/g++.dg/modules/auto-4_b.C

diff --git a/gcc/cp/module.cc b/gcc/cp/module.cc
index d94d8ff4df9..e10e19ac9f7 100644
--- a/gcc/cp/module.cc
+++ b/gcc/cp/module.cc
@@ -11537,6 +11537,11 @@ trees_in::is_matching_decl (tree existing, tree decl, 
bool is_typedef)
   else if (!DEFERRED_NOEXCEPT_SPEC_P (d_spec)
   && !comp_except_specs (d_spec, e_spec, ce_type))
goto mismatch;
+
+  /* Similarly if EXISTING has an undeduced return type, but DECL's
+is already deduced.  */
+  if (undeduced_auto_decl (existing) && !undeduced_auto_decl (decl))
+   TREE_TYPE (existing) = change_return_type (TREE_TYPE (d_type), e_type);
 }
   else if (is_typedef)
 {
diff --git a/gcc/testsuite/g++.dg/modules/auto-4_a.H 
b/gcc/testsuite/g++.dg/modules/auto-4_a.H
new file mode 100644
index 000..52b50533982
--- /dev/null
+++ b/gcc/testsuite/g++.dg/modules/auto-4_a.H
@@ -0,0 +1,14 @@
+// PR c++/114795
+// { dg-additional-options "-fmodule-header" }
+// { dg-module-cmi {} }
+
+template
+struct A {
+  auto f() { return 0; }
+};
+
+template
+inline void g() {
+  A a;
+  a.f();
+}
diff --git a/gcc/testsuite/g++.dg/modules/auto-4_b.C 
b/gcc/testsuite/g++.dg/modules/auto-4_b.C
new file mode 100644
index 000..378684ef6d0
--- /dev/null
+++ b/gcc/testsuite/g++.dg/modules/auto-4_b.C
@@ -0,0 +1,15 @@
+// PR c++/114795
+// { dg-additional-options "-fmodules-ts -fno-module-lazy" }
+
+template
+struct A {
+  auto f() { return T(); }
+};
+
+A a;
+
+import "auto-4_a.H";
+
+int main() {
+  g(); // { dg-bogus "before deduction of 'auto'" "" { target *-*-* } 0 }
+}
-- 
2.45.0.rc0

Re: [PATCH] c++, v2: Retry the aliasing of base/complete cdtor optimization at import_export_decl time [PR113208]

2024-04-23 Thread Jakub Jelinek

On Mon, Apr 22, 2024 at 11:14:35PM -0400, Jason Merrill wrote:
> > > The following testcase regressed with Marek's r14-5979 change,
> > > when pr113208_0.C is compiled where the ctor is marked constexpr,
> > > we no longer perform this optimization, where
> > > _ZN6vectorI12QualityValueEC2ERKS1_ was emitted in the
> > > _ZN6vectorI12QualityValueEC5ERKS1_ comdat group and
> > > _ZN6vectorI12QualityValueEC1ERKS1_ was made an alias to it,
> > > instead we emit _ZN6vectorI12QualityValueEC2ERKS1_ in
> > > _ZN6vectorI12QualityValueEC2ERKS1_ comdat group and the same
> > > content _ZN6vectorI12QualityValueEC1ERKS1_ as separate symbol in
> > > _ZN6vectorI12QualityValueEC1ERKS1_ comdat group.
> 
> This seems like an ABI bug that could use a non-LTO testcase.

Well, except for the issues it causes to LTO I think it is compatible,
worst case we get the body of the ctor duplicated in the executable
and the linker picks some of the weak symbols as the symbol definitions.
Anyway, I've added a non-LTO testcase for that in the patch below.

> Hmm, cloning the bodies and then discarding them later seems like more extra
> work than creating the cgraph nodes.

So, I've tried to handle that in tentative_decl_linkage, like that function
already handles functions declared inline except for implicit template
instantiations.  If we expect that import_export_decl will do comdat_linkage
for the ctor later on do it right away.

That fixes the testcases too, but seems to regress
+FAIL: libstdc++-abi/abi_check
on both x86_64-linux and i686-linux, in each case 8 symbols disappeared from
libstdc++.so.6:
_ZNSt12__shared_ptrINSt10filesystem7__cxx1128recursive_directory_iterator10_Dir_stackELN9__gnu_cxx12_Lock_policyE2EEC1Ev
_ZNSt12__shared_ptrINSt10filesystem4_DirELN9__gnu_cxx12_Lock_policyE2EEC1Ev
_ZNSt12__shared_ptrINSt10filesystem28recursive_directory_iterator10_Dir_stackELN9__gnu_cxx12_Lock_policyE2EEC1Ev
_ZNSt12__shared_ptrINSt10filesystem4_DirELN9__gnu_cxx12_Lock_policyE2EEC2Ev
_ZNSt12__shared_ptrINSt10filesystem7__cxx114_DirELN9__gnu_cxx12_Lock_policyE2EEC1Ev
_ZNSt12__shared_ptrINSt10filesystem7__cxx1128recursive_directory_iterator10_Dir_stackELN9__gnu_cxx12_Lock_policyE2EEC2Ev
_ZNSt12__shared_ptrINSt10filesystem28recursive_directory_iterator10_Dir_stackELN9__gnu_cxx12_Lock_policyE2EEC2Ev
_ZNSt12__shared_ptrINSt10filesystem7__cxx114_DirELN9__gnu_cxx12_Lock_policyE2EEC2Ev

Will need to study why that happened, it might be that it was ok because
I think the filesystem stuff is unlike the rest compiled with no exported
templates, but would need at least some hacks in libstdc++ to preserve
previously exported symbols.
Still, feels like a risky change this late if it wouldn't break ABI of other
libraries.

2024-04-23  Jakub Jelinek  

PR lto/113208
* decl2.cc (tentative_decl_linkage): Use comdat_linkage also
for implicit instantiations of maybe in charge ctors/dtors
if -fimplicit-templates or -fimplicit-inline-templates and
-fweak and target supports aliases.

* g++.dg/abi/comdat2.C: New test.
* g++.dg/lto/pr113208_0.C: New test.
* g++.dg/lto/pr113208_1.C: New file.
* g++.dg/lto/pr113208.h: New file.

--- gcc/cp/decl2.cc.jj  2024-04-22 15:16:55.328548807 +0200
+++ gcc/cp/decl2.cc 2024-04-23 09:52:18.993250442 +0200
@@ -3314,7 +3314,16 @@ tentative_decl_linkage (tree decl)
 to mark the functions at this point.  */
  if (DECL_DECLARED_INLINE_P (decl)
  && (!DECL_IMPLICIT_INSTANTIATION (decl)
- || DECL_DEFAULTED_FN (decl)))
+ || DECL_DEFAULTED_FN (decl)
+ /* For implicit instantiations of cdtors,
+if import_export_decl would use comdat linkage,
+make sure to use it right away, so that maybe_clone_body
+can use aliases.  See PR113208.  */
+ || (DECL_MAYBE_IN_CHARGE_CDTOR_P (decl)
+ && (flag_implicit_templates
+ || flag_implicit_inline_templates)
+ && flag_weak
+ && TARGET_SUPPORTS_ALIASES)))
{
  /* This function must have external linkage, as
 otherwise DECL_INTERFACE_KNOWN would have been
--- gcc/testsuite/g++.dg/abi/comdat2.C.jj   2024-04-23 10:04:28.485964610 
+0200
+++ gcc/testsuite/g++.dg/abi/comdat2.C  2024-04-23 10:05:24.757171194 +0200
@@ -0,0 +1,26 @@
+// PR lto/113208
+// { dg-do compile { target { c++11 && { *-*-*gnu* } } } }
+// { dg-additional-options "-O2 -fkeep-inline-functions" }
+// { dg-final { scan-assembler "_ZN1BI1CEC5ERKS1_,comdat" } }
+// { dg-final { scan-assembler-not "_ZN1BI1CEC1ERKS1_,comdat" } }
+// { dg-final { scan-assembler-not "_ZN1BI1CEC2ERKS1_,comdat" } }
+
+template 
+struct A {
+  int foo () const;
+  A (int, int);
+};
+template 
+struct B : A {
+  constexpr B (const B &x) : A (1, x.foo ()) {}
+  B () : A (1, 2) {}
+};
+struct C;
+struct D : B {};
+voi

[PATCH] c++, v2: Fix constexpr evaluation of parameters passed by invisible reference [PR111284]

2024-04-23 Thread Jakub Jelinek

On Mon, Apr 15, 2024 at 02:19:36PM +0200, Jakub Jelinek wrote:
> They weren't the same, one was trying to evaluate the convert_from_reference
> with vc_glvalue, the other evaluates it without that and with vc_prvalue.
> Now, I guess the
> + /* Undo convert_for_arg_passing work here.  */
> + if (TYPE_REF_P (TREE_TYPE (x))
> + && !same_type_p (type, TREE_TYPE (TREE_TYPE (x
> +   x = cp_fold_convert (build_reference_type (type), x);
> part could be thrown away, given the other !same_type_p check (that one is
> needed because adjust_temp_type can't deal with that), at least
> when I remove that
> GXX_TESTSUITE_STDS=98,11,14,17,20,23,26 make check-g++ 
> RUNTESTFLAGS="dg.exp='constexpr-dtor* pr114426.C constexpr-111284.C 
> constexpr-lifetime7.C'"
> still passes.

I've now tested that version and it passed bootstrap/regtest on x86_64-linux
and i686-linux.  But as I said earlier, trying to tweak the patch further
doesn't work on the constexpr-dtor{5,6}.C tests.

2024-04-23  Jakub Jelinek  

PR c++/111284
* constexpr.cc (cxx_bind_parameters_in_call): For PARM_DECLs with
TREE_ADDRESSABLE types use vc_glvalue rather than vc_prvalue for
cxx_eval_constant_expression and if it doesn't have the same
type as it should, cast the reference type to reference to type
before convert_from_reference and instead of adjust_temp_type
take address of the arg, cast to reference to type and then
convert_from_reference.
(cxx_eval_constant_expression) : For lval case
on parameters with TREE_ADDRESSABLE types lookup result in
ctx->globals if possible.  Otherwise if lookup in ctx->globals
was successful for parameter with TREE_ADDRESSABLE type,
recurse with vc_prvalue on the returned value.

* g++.dg/cpp1z/constexpr-111284.C: New test.
* g++.dg/cpp1y/constexpr-lifetime7.C: Expect one error on a different
line.

--- gcc/cp/constexpr.cc.jj  2024-02-13 10:29:57.979155641 +0100
+++ gcc/cp/constexpr.cc 2024-03-07 19:35:01.032412221 +0100
@@ -1877,13 +1877,18 @@ cxx_bind_parameters_in_call (const const
  x = build_address (x);
}
   if (TREE_ADDRESSABLE (type))
-   /* Undo convert_for_arg_passing work here.  */
-   x = convert_from_reference (x);
-  /* Normally we would strip a TARGET_EXPR in an initialization context
-such as this, but here we do the elision differently: we keep the
-TARGET_EXPR, and use its CONSTRUCTOR as the value of the parm.  */
-  arg = cxx_eval_constant_expression (ctx, x, vc_prvalue,
- non_constant_p, overflow_p);
+   {
+ /* Undo convert_for_arg_passing work here.  */
+ x = convert_from_reference (x);
+ arg = cxx_eval_constant_expression (ctx, x, vc_glvalue,
+ non_constant_p, overflow_p);
+   }
+  else
+   /* Normally we would strip a TARGET_EXPR in an initialization context
+  such as this, but here we do the elision differently: we keep the
+  TARGET_EXPR, and use its CONSTRUCTOR as the value of the parm.  */
+   arg = cxx_eval_constant_expression (ctx, x, vc_prvalue,
+   non_constant_p, overflow_p);
   /* Check we aren't dereferencing a null pointer when calling a non-static
 member function, which is undefined behaviour.  */
   if (i == 0 && DECL_OBJECT_MEMBER_FUNCTION_P (fun)
@@ -1909,7 +1914,16 @@ cxx_bind_parameters_in_call (const const
{
  /* Make sure the binding has the same type as the parm.  But
 only for constant args.  */
- if (!TYPE_REF_P (type))
+ if (TREE_ADDRESSABLE (type))
+   {
+ if (!same_type_p (type, TREE_TYPE (arg)))
+   {
+ arg = build_fold_addr_expr (arg);
+ arg = cp_fold_convert (build_reference_type (type), arg);
+ arg = convert_from_reference (arg);
+   }
+   }
+ else if (!TYPE_REF_P (type))
arg = adjust_temp_type (type, arg);
  if (!TREE_CONSTANT (arg))
*non_constant_args = true;
@@ -7499,9 +7513,19 @@ cxx_eval_constant_expression (const cons
 
 case PARM_DECL:
   if (lval && !TYPE_REF_P (TREE_TYPE (t)))
-   /* glvalue use.  */;
+   {
+ /* glvalue use.  */
+ if (TREE_ADDRESSABLE (TREE_TYPE (t)))
+   if (tree v = ctx->global->get_value (t))
+ r = v;
+   }
   else if (tree v = ctx->global->get_value (t))
-   r = v;
+   {
+ r = v;
+ if (TREE_ADDRESSABLE (TREE_TYPE (t)))
+   r = cxx_eval_constant_expression (ctx, r, vc_prvalue,
+ non_constant_p, overflow_p);
+   }
   else if (lval)
/* Defer in case this is only used for its type.  */;
   else if (ctx->global->

[PATCH] i386: Avoid =&r,r,r andn double-word alternative for ia32 [PR114810]

2024-04-23 Thread Jakub Jelinek

Hi!

As discussed in the PR, on ia32 with its 8 GPRs, where 1 is always fixed
and other 2 often are as well having an alternative which needs 3
double-word registers is just too much for RA.
The following patch splits that alternative into two, one with o is used
even on ia32, but one with the 3x r is used just for -m64/-mx32.
Tried to reduce the testcase further, but it wasn't easily possible.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2024-04-23  Jakub Jelinek  

PR target/114810
* config/i386/i386.md (*andn3_doubleword_bmi): Split the =&r,r,ro
alternative into =&r,r,r enabled only for x64 and =&r,r,o.

* g++.target/i386/pr114810.C: New test.

--- gcc/config/i386/i386.md.jj  2024-04-15 14:25:58.203322878 +0200
+++ gcc/config/i386/i386.md 2024-04-23 12:15:47.171956091 +0200
@@ -12482,10 +12482,10 @@ (define_split
 })
 
 (define_insn_and_split "*andn3_doubleword_bmi"
-  [(set (match_operand: 0 "register_operand" "=&r,r,r")
+  [(set (match_operand: 0 "register_operand" "=&r,&r,r,r")
(and:
- (not: (match_operand: 1 "register_operand" "r,0,r"))
- (match_operand: 2 "nonimmediate_operand" "ro,ro,0")))
+ (not: (match_operand: 1 "register_operand" "r,r,0,r"))
+ (match_operand: 2 "nonimmediate_operand" "r,o,ro,0")))
(clobber (reg:CC FLAGS_REG))]
   "TARGET_BMI"
   "#"
@@ -12496,7 +12496,8 @@ (define_insn_and_split "*andn3_doub
(parallel [(set (match_dup 3)
   (and:DWIH (not:DWIH (match_dup 4)) (match_dup 5)))
  (clobber (reg:CC FLAGS_REG))])]
-  "split_double_mode (mode, &operands[0], 3, &operands[0], 
&operands[3]);")
+  "split_double_mode (mode, &operands[0], 3, &operands[0], &operands[3]);"
+  [(set_attr "isa" "x64,*,*,*")])
 
 (define_insn_and_split "*andn3_doubleword"
   [(set (match_operand:DWI 0 "register_operand")
--- gcc/testsuite/g++.target/i386/pr114810.C.jj 2024-04-23 14:21:19.202613799 
+0200
+++ gcc/testsuite/g++.target/i386/pr114810.C2024-04-23 14:24:22.813116589 
+0200
@@ -0,0 +1,861 @@
+// PR target/114810
+// { dg-do compile { target { { { *-*-linux* } && ia32 } && c++17 } } }
+// { dg-options "-mstackrealign -O2 -mbmi -fno-exceptions -fno-plt 
-march=x86-64 -w" }
+// { dg-additional-options "-fpie" { target pie } }
+
+enum E1 { a, dp, b, jm, c, dq, d, mj, e, dr, f, jn, h, dt, j, nt, l, du, m, 
jo, n, dv, o, mk, p, dw, q, jp, s, dx, t, ol, u, dy, v, jq, w };
+enum dz { x, ml, y };
+struct ea { short g; } z, jr;
+long long aa;
+struct eb { ea ab; ea dp[]; };
+enum ac { };
+typedef enum { } nu;
+struct ad { ac k; };
+unsigned ec (long);
+struct ae;
+int js (ae);
+unsigned af ();
+struct ed;
+template < int ag > struct ee { using ah = ed[ag]; };
+template < int ag > struct array { typename ee < ag >::ah ai; ed & operator[] 
(int aj) { return ai[aj]; } };
+struct { void dp (...); } ak;
+void ef (int);
+template < typename al > struct jt { al & operator[] (short); };
+struct am { void operator= (bool); };
+struct an { am operator[] (unsigned); };
+template < typename, unsigned, unsigned >using eg = an;
+struct ao;
+struct ae { ae (ao *); };
+struct mm { mm (); mm (int); };
+enum ap { };
+enum eh { };
+bool aq, ju, ar, ei, nv, as, ej, at;
+struct jv
+{
+  jv (eh au):dp (au) {}
+  jv ();
+  operator eh ();
+  unsigned av ()
+  {
+aq = dp & 7;
+return dp * (aq ? : 4);
+  }
+  unsigned ek ()
+  {
+int aw;
+bool mn = dp & 7;
+aw = dp * (mn ? : 4);
+return aw + 3 >> 2;
+  }
+  eh dp;
+} ax, el, ay, jw, az, em, ba, om;
+struct ed
+{
+  ed ():bb (), dp () {}
+  int bc () { return bb; }
+  jv en () { return (eh) dp; }
+  unsigned ek ()
+  {
+jv bd;
+bd = (eh) dp;
+return bd.ek ();
+  }
+  ap jx ();
+  unsigned bb:24;
+  int dp:8;
+};
+struct be { short dp = 0; } bf, eo;
+struct bg
+{
+  bg ();
+  bg (ed r)
+  {
+dp.bh = r;
+if (r.bc ())
+  mo = true;
+else
+  bi = true;
+  }
+  static bg ep (int);
+  bg (be);
+  struct { ed bh; } dp;
+  union { char mo:1; char bi:1; short bj = 0; };
+} jy, bk, eq, bl, mp, bm, er;
+struct bn
+{
+  explicit bn (ed bo):bh (bo) {}
+  ed dp ();
+  ed bh;
+  be es;
+  char bj = 0;
+};
+struct bp
+{
+  eg < int, 6, 4 > dp;
+};
+jt < bg > bq;
+jt < bn > definitions;
+struct ao
+{
+  bp & br ();
+};
+enum jz:short;
+template < typename > using bs = ae;
+ao *et ();
+short bt, nw;
+struct bu
+{
+  int dp;
+};
+dz bv;
+unsigned eu;
+struct bw
+{
+  ac k;
+  unsigned dp;
+} *bx;
+bool ka ();
+struct by
+{
+  bool dp;
+};
+typedef enum
+{ bz, ev } ca;
+typedef enum
+{
+  mq, cb, ew, cc, kb, cd, ex, ce
+} on;
+typedef struct cf
+{
+  on jx;
+  char dp;
+  char cg;
+} kc;
+struct ch
+{
+  kc *dp;
+};
+typedef enum
+{
+  ci, ey, cj, mr, ck, ez, cl, kd, cm, fa, cn, nx, co, fb, cp, ke, cq, fc,
+  cr, ms, cs, fd, ct, kf, cu, fe, cv, os, cw, ff, cx, kg, cy, fg, cz, mt, da
+} fh;
+typedef struct { cf db; fh dp; kc dc; ch kh[]; } dd;
+nu fi ();
+typedef enum
+{ de, ny } fj;
+typedef struct { fj jx; } df

Re: [PATCH] DOCUMENTATION_ROOT_URL vs. release branches [PR114738]

2024-04-23 Thread Jakub Jelinek

On Tue, Apr 23, 2024 at 11:40:55AM -0400, David Malcolm wrote:
> > So, I think at least for the MAJOR.MINOR.0 releases we want to use
> > URLs like above rather than the trunk ones and we can use the same
> > process
> > of updating *.opt.urls as well for that.
> 
> Would it make sense to instead update the default value in
> gcc/configure.ac for DOCUMENTATION_ROOT_URL when branching or
> releasing, from https://gcc.gnu.org/onlinedocs/ to
> https://gcc.gnu.org/onlinedocs/gcc-MAJOR-MINOR.0/
> 
> ?
> 
> Before this patch the DOCUMENTATION_ROOT_URL expresses the location of
> a built texinfo html tree of docs, and the url suffixes express the
> path within that tree.
> 
> As the patch is written, if a distributor overrides --with-
> documentation-root-url= at configure time, then they need to mirror the
> structure of our website on their website, which seems like a burden.

Sure, that is doable (of course, it shouldn't be done by updating
gcc/configure.ac but by adjusting the default in there based on gcc_version,
I'll post a patch tomorrow).

Still, what do you think we should do on the release branches (recommend to
developers and check with the post-commit CI)?
No regeneration of *.urls except before doing a new release candidate,
or a different make goal that would grab html files from the web and
regenerate against that?

Jakub

[committed] testsuite: Adjust testsuite expectations for diagnostic spelling fixes

2024-04-23 Thread Jakub Jelinek

Hi!

The nullability-00.m* tests unfortunately check the exact spelling of
the diagnostics I've changed earlier today.

Tested on x86_64-linux and i686-linux, committed to trunk as obvious.

2024-04-23  Jakub Jelinek  

* objc.dg/attributes/nullability-00.m: Adjust expected diagnostic
spelling: recognised -> recognized.
* obj-c++.dg/attributes/nullability-00.mm: Likewise.

--- gcc/testsuite/objc.dg/attributes/nullability-00.m.jj2020-11-13 
13:50:23.365551538 +0100
+++ gcc/testsuite/objc.dg/attributes/nullability-00.m   2024-04-23 
17:37:18.978721522 +0200
@@ -8,7 +8,7 @@ __attribute__((objc_nullability("unspeci
 __attribute__((objc_nullability("nullable"))) id c;
 __attribute__((objc_nullability("nonnull"))) id d;
 __attribute__((objc_nullability("resettable"))) id e;
-__attribute__((objc_nullability("nonsense"))) id e_3; /* { dg-error 
{'objc_nullability' attribute argument '"nonsense"' is not recognised} } */
+__attribute__((objc_nullability("nonsense"))) id e_3; /* { dg-error 
{'objc_nullability' attribute argument '"nonsense"' is not recognized} } */
 __attribute__((objc_nullability(noGoingToWork))) id e_4; /* { dg-error 
{'noGoingToWork' undeclared here} } */
 
 @interface MyRoot
--- gcc/testsuite/obj-c++.dg/attributes/nullability-00.mm.jj2020-11-13 
13:50:23.361551584 +0100
+++ gcc/testsuite/obj-c++.dg/attributes/nullability-00.mm   2024-04-23 
17:37:35.717500341 +0200
@@ -8,7 +8,7 @@ __attribute__((objc_nullability("unspeci
 __attribute__((objc_nullability("nullable"))) id c;
 __attribute__((objc_nullability("nonnull"))) id d;
 __attribute__((objc_nullability("resettable"))) id e;
-__attribute__((objc_nullability("nonsense"))) id e_3; /* { dg-error 
{'objc_nullability' attribute argument '"nonsense"' is not recognised} } */
+__attribute__((objc_nullability("nonsense"))) id e_3; /* { dg-error 
{'objc_nullability' attribute argument '"nonsense"' is not recognized} } */
 __attribute__((objc_nullability(noGoingToWork))) id e_4; /* { dg-error 
{'noGoingToWork' was not declared in this scope} } */
 
 @interface MyRoot

Jakub

Re: [PATCH] DOCUMENTATION_ROOT_URL vs. release branches [PR114738]

2024-04-23 Thread David Malcolm

On Wed, 2024-04-17 at 13:16 +0200, Jakub Jelinek wrote:
> Hi!
> 
> Starting with GCC 14 we have the nice URLification of the options
> printed
> in diagnostics, say for in
> test.c:4:23: warning: format ‘%d’ expects argument of type ‘int’, but
> argument 2 has type ‘long int’ [-Wformat=]
> the -Wformat= is underlined in some terminals and hovering on it
> shows
> https://gcc.gnu.org/onlinedocs/gcc/Warning-Options.html#index-Wformat
> link.

That isn't new in GCC 14; we've provided the URLs for the option
guarding a warning since GCC 10, I think.  What's new is that we now
"urlify" any quoted text that mentions an option, and that the option
URLs are now based on the anchors in the generated HTML (and thus more
likely to be correct).

> 
> This works nicely on the GCC trunk, where the online documentation is
> regenerated every day from a cron job and more importantly, people
> rarely
> use the trunk snapshots for too long, so it is unlikely that further
> changes
> in the documentation will make too many links stale, because users
> will
> simply regularly update to newer snapshots.
> 
> I think it doesn't work properly on release branches though.
> Some users only use the relased versions (i.e. MAJOR.MINOR.0) from
> tarballs
> but can use them for a couple of years, others use snapshots from the
> release branches, but again they could be in use for months or years
> and
> the above mentioned online docs which represent just the GCC trunk
> might
> diverge significantly.
> 
> Now, for the relases we always publish also online docs for the
> release,
> which unlike the trunk online docs will not change further, under
> e.g.
> https://gcc.gnu.org/onlinedocs/gcc-14.1.0/gcc/Warning-Options.html#index-Wformat
> or
> https://gcc.gnu.org/onlinedocs/gcc-14.2.0/gcc/Warning-Options.html#index-Wformat
> etc.


> 
> So, I think at least for the MAJOR.MINOR.0 releases we want to use
> URLs like above rather than the trunk ones and we can use the same
> process
> of updating *.opt.urls as well for that.

Would it make sense to instead update the default value in
gcc/configure.ac for DOCUMENTATION_ROOT_URL when branching or
releasing, from https://gcc.gnu.org/onlinedocs/ to
https://gcc.gnu.org/onlinedocs/gcc-MAJOR-MINOR.0/

?

Before this patch the DOCUMENTATION_ROOT_URL expresses the location of
a built texinfo html tree of docs, and the url suffixes express the
path within that tree.

As the patch is written, if a distributor overrides --with-
documentation-root-url= at configure time, then they need to mirror the
structure of our website on their website, which seems like a burden.



> 
> For the snapshots from release branches, we don't have such docs.
> One option (implemented in the patch below for the URL printing side)
> is
> point to the MAJOR.MINOR.0 docs even for MAJOR.MINOR.1 snapshots.
> Most of the links will work fine, for options newly added on the
> release
> branches (rare thing but still happens) can have until the next
> release
> no URLs for them and get them with the next point release.
> The question is what to do about make regenerate-opt-urls for the
> release
> branch snapshots.  Either just document that users shouldn't
> make regenerate-opt-urls on release branches (and filter out
> *.opt.urls
> changes from their commits), add make regenerate-opt-urls task be RM
> responsibility before making first release candidate from a branch
> and
> adjust the autoregen CI to know about that.  Or add a separate goal
> which instead of relying on make html created files would download
> copy of the html files from the last release from web (kind of web
> mirroring the https://gcc.gnu.org/onlinedocs/gcc-14.1.0/ subtree
> locally)
> and doing regenerate-opt-urls on top of that?  But how to catch the
> point when first release candidate is made and we want to update to
> what will be the URLs once the release is made (but will be stale
> URLs
> for a week or so)?
> 
> Another option would be to add to cron daily regeneration of the
> online
> docs for the release branches.  I don't think that is a good idea
> though,
> because as I wrote earlier, not all users update to the latest
> snapshot
> frequently, so there can be users that use gcc 13.1.1 20230525 for
> months
> or years, and other users which use gcc 13.1.1 20230615 for years
> etc.
> 
> Another question is what is most sensible for users who want to
> override
> the default root and use the --with-documentation-root-url= configure
> option.  Do we expect them to grab the whole onlinedocs tree or for
> release
> branches at least include gcc-14.1.0/ subdirectory under the root?
> If so, the patch below deals with that.  Or should we just change the
> default documentation root url, so if user doesn't specify
> --with-documentation-root-url= and we are on a release branch,
> default that
> to https://gcc.gnu.org/onlinedocs/gcc-14.1.0/ or
> https://gcc.gnu.org/onlinedocs/gcc-14.2.0/ etc. and don't add any
> infix in
> get_option_url/make_doc_url, bu

Re: enable sqrt insns for cdce3.c

2024-04-23 Thread Hans-Peter Nilsson

On Mon, 22 Apr 2024, Alexandre Oliva wrote:
> [Revamped version of this patch, combined with others, to follow]
> 
> On Mar 10, 2021, Hans-Peter Nilsson  wrote:

Time flies...

> > On Wed, 10 Mar 2021, Alexandre Oliva wrote:

> Is mmix a sqrt_insn effective target?  proc
> check_effective_target_sqrt_insn in
> gcc/testsuite/lib/target-supports.exp suggests it shouldn't pass, so I'm
> surprised it would still try to run the test despite the added
> /* { dg-require-effective-target sqrt_insn } */ directive.

The effective-target sqrt_insn predicate says "supports hardware 
square root instructions" and doesn't make a difference between 
sqrtdf2 (double) and sqrtsf3 (float).  I'm extrapolating that 
the "divine meaning" of the comment is that such an instruction 
must be present for all supported floating-point modes for the 
predicate to yield true (when the predicate is correctly 
implemented).

(We could also fix the predicate description to actually say 
"for all floating-point modes" and/or split the predicate into 
mode-specific variants, etc. ;-)

MMIX has sqrtdf2 but not sqrtsf2, and the latter is what's used 
in cdce3.c.

> cdce3 is supposed to shrink-wrap the sqrtf(x) call into something like
> (x >= 0 ? .SQRT(x) : sqrtf(x)), where .SQRT stands for a square root
> instruction.

...for 32-bit single floats.

> Since we don't know why it still runs for you, I'm keeping the mmix
> explicit skip in the new version of the patch.

Thanks, that does seem like TRT.

brgds, H-P

[Patch, fortran] PR89462 - [11/12/13/14 Regression] gfortran loops in code generation

2024-04-23 Thread Paul Richard Thomas

Hi All,

Jakub pinpointed the source of this bug in comment 6 of the PR. The rest
was 'obvious' :-)

I plan to push the patch to mainline in the next 24 hours unless there are
opinions to the contrary. Backporting is proposed to occur a couple of
weeks later.

Best regards

Paul

Fortran: Generate new charlens for shared symbol typespecs [PR89462]

2024-04-23  Paul Thomas  
   Jakub Jelinek  

gcc/fortran
PR fortran/89462
* decl.cc (build_sym): Add an extra argument 'elem'. If 'elem'
is greater than 1, gfc_new_charlen is called to generate a new
charlen, registered in the symbol namespace.
(variable_decl, enumerator_decl): Set the new argument in the
calls to build_sym.

gcc/testsuite/
PR fortran/89462
* gfortran.dg/pr89462.f90: New test.
diff --git a/gcc/fortran/decl.cc b/gcc/fortran/decl.cc
index a7576f4bc40..b8308aeee55 100644
--- a/gcc/fortran/decl.cc
+++ b/gcc/fortran/decl.cc
@@ -1713,7 +1713,7 @@ gfc_verify_c_interop_param (gfc_symbol *sym)
 /* Function called by variable_decl() that adds a name to the symbol table.  */
 
 static bool
-build_sym (const char *name, gfc_charlen *cl, bool cl_deferred,
+build_sym (const char *name, int elem, gfc_charlen *cl, bool cl_deferred,
 	   gfc_array_spec **as, locus *var_locus)
 {
   symbol_attribute attr;
@@ -1778,7 +1778,10 @@ build_sym (const char *name, gfc_charlen *cl, bool cl_deferred,
 
   if (sym->ts.type == BT_CHARACTER)
 {
-  sym->ts.u.cl = cl;
+  if (elem > 1)
+	sym->ts.u.cl = gfc_new_charlen (sym->ns, cl);
+  else
+	sym->ts.u.cl = cl;
   sym->ts.deferred = cl_deferred;
 }
 
@@ -2960,7 +2963,7 @@ variable_decl (int elem)
  create a symbol for those yet.  If we fail to create the symbol,
  bail out.  */
   if (!gfc_comp_struct (gfc_current_state ())
-  && !build_sym (name, cl, cl_deferred, &as, &var_locus))
+  && !build_sym (name, elem, cl, cl_deferred, &as, &var_locus))
 {
   m = MATCH_ERROR;
   goto cleanup;
@@ -10938,7 +10941,7 @@ enumerator_decl (void)
   /* OK, we've successfully matched the declaration.  Now put the
  symbol in the current namespace. If we fail to create the symbol,
  bail out.  */
-  if (!build_sym (name, NULL, false, &as, &var_locus))
+  if (!build_sym (name, 1, NULL, false, &as, &var_locus))
 {
   m = MATCH_ERROR;
   goto cleanup;
diff --git a/gcc/testsuite/gfortran.dg/pr89462.f90 b/gcc/testsuite/gfortran.dg/pr89462.f90
new file mode 100644
index 000..9efdb1adbc7
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/pr89462.f90
@@ -0,0 +1,12 @@
+! { dg-do compile }
+! Test the fix for PR89462 in which the shared 'cl' field of the typespec
+! shared between 'test', 'TR' and 'aTP' caused the compiler to go into an
+! infinite loop.
+! Contributed by Sergei Trofimovich  
+  CHARACTER*1 FUNCTION test(H)
+ CHARACTER*1 test2,TR,aTP
+ ENTRY test2(L)
+ CALL ttest3(aTP)
+ test = TR
+ RETURN
+  END

Re: [PATCH v1] RISC-V: Adjust overlap attr after revert d3544cea63d and e65aaf8efe1

2024-04-23 Thread Patrick O'Neill


Hi Pan,

Sorry about that. It looks like there was difference between my local 
machine and CI machine.


From the CI it looks like we're back to the failure list we had on friday.

I'll do some local testing to manually confirm this.

Thanks,
Patrick

On 4/22/24 23:50, Li, Pan2 wrote:


Hi Patrick,

After some investigation and double confirm, I think the 
gcc.dg/graphite/pr111878.c ice may have nothing to do
with the patches of revert series as it exists for quit a while. It may related 
to below commit

2e7abd09621a4401d44f4513adf126bce4b4828b RISC-V: Block VLSmodes according to 
TARGET_MAX_LMUL and BITS_PER_RISCV_VECTOR

Could you please help to double check about it *manually*? Here is my step(s) 
for your reference and I will take care of this failure soon.

../__RISC-V_INSTALL___RV64/bin/riscv64-unknown-elf-gcc --version
riscv64-unknown-elf-gcc (GCC) 14.0.0 20231205 (experimental)
Copyright (C) 2023 Free Software Foundation, Inc.
This is free software; see the source for copying conditions.  There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.

1. download isl-0.24, let isl -> /some-where/riscv-gnu-toolchain/gcc/isl-0.24
2. mkdir __BUILD__ && cd __BUILD__ && ../configure \
   --target=riscv64-unknown-elf \
   --prefix=${INSTALL_DIR} \
   --disable-shared \
   --enable-threads \
   --enable-tls \
   --enable-languages=c,c++,fortran \
   --with-system-zlib \
   --with-newlib \
   --disable-libmudflap \
   --disable-libssp \
   --disable-libquadmath \
   --disable-libgomp \
   --enable-nls \
   --disable-tm-clone-registry \
   --src=`pwd`/../ \
   --with-abi=lp64d \
   --with-arch=rv64gcv \
   --with-tune=rocket \
   --with-isa-spec=20191213 \
   CFLAGS_FOR_BUILD="-O0 -g" \
   CXXFLAGS_FOR_BUILD="-O0 -g" \
   CFLAGS_FOR_TARGET="-O0  -g" \
   CXXFLAGS_FOR_TARGET="-O0 -g" \
   BOOT_CFLAGS="-O0 -g" \
   CFLAGS="-O0 -g" \
   CXXFLAGS="-O0 -g" \
   GM2FLAGS_FOR_TARGET="-O0 -g" \
   GOCFLAGS_FOR_TARGET="-O0 -g" \
   GDCFLAGS_FOR_TARGET="-O0 -g"
make -j $(nproc) all-gcc && make install-gcc
3. ../__RISC-V_INSTALL___RV64/bin/riscv64-unknown-elf-gcc 
gcc/testsuite/gcc.dg/graphite/pr111878.c -O3 -fgraphite-identity 
-fsave-optimization-record -march=rv64gcv -mabi=lp64d -c -S -o -

Pan

-Original Message-
From: Li, Pan2
Sent: Tuesday, April 23, 2024 10:32 AM
To: Patrick O'Neill ; gcc-patches@gcc.gnu.org
Cc: juzhe.zh...@rivai.ai; kito.ch...@gmail.com; rdapp@gmail.com
Subject: RE: [PATCH v1] RISC-V: Adjust overlap attr after revert d3544cea63d 
and e65aaf8efe1

Thanks Patrick.

Turn out that the make report cannot tell the error like below and then the 
graphite.exp test will never run.
That explains why I missed test failures, will take care of it ASAP.

sorry, unimplemented: Graphite loop optimizations cannot be used (isl is not 
available)

Pan

-Original Message-
From: Patrick O'Neill 
Sent: Tuesday, April 23, 2024 8:32 AM
To: Li, Pan2 ; gcc-patches@gcc.gnu.org
Cc: juzhe.zh...@rivai.ai; kito.ch...@gmail.com; rdapp@gmail.com
Subject: Re: [PATCH v1] RISC-V: Adjust overlap attr after revert d3544cea63d 
and e65aaf8efe1

This patch in particular does not cause any more regressions. It's one
of the other reverts from the weekend.

Before the reverts [1]:
                      |  gcc |
g++ | gfortran |
      rv64gcv/  lp64d/ medlow |   48/    32 |     12/    3|   12 /    2

After the reverts:
                      |  gcc |
g++ | gfortran |
      rv64gcv/  lp64d/ medlow |   50 /    33 |   12 / 3 |   26 / 7 |


gcc new fails:
FAIL: gcc.dg/graphite/pr111878.c (internal compiler error: in
extract_insn, at recog.cc:2812)
FAIL: gcc.dg/graphite/pr111878.c (test for excess errors)

gfortran new fails:
FAIL: gfortran.dg/graphite/id-27.f90   -O  (internal compiler error: in
extract_insn, at recog.cc:2812)
FAIL: gfortran.dg/graphite/id-27.f90   -O  (test for excess errors)
FAIL: gfortran.dg/graphite/pr14741.f90   -O  (internal compiler error:
in extract_insn, at recog.cc:2812)
FAIL: gfortran.dg/graphite/pr14741.f90   -O  (test for excess errors)
FAIL: gfortran.dg/graphite/pr29581.f90   -O3 -fomit-frame-pointer
-funroll-loops -fpeel-loops -ftracer -finline-functions  (internal
compiler error: in extract_insn, at recog.cc:2812)
FAIL: gfortran.dg/graphite/pr29581.f90   -O3 -fomit-frame-pointer
-funroll-loops -fpeel-loops -ftracer -finline-functions  (test for
excess errors)
FAIL: gfortran.dg/graphite/pr29581.f90   -O3 -g  (internal compiler
error: in extract_insn, at recog.cc:2812)
FAIL: gfortran.dg/graphite/pr29581.f90   -O3 -g  (test for excess errors)
FAIL: gfortran.dg/graphite/pr29832.f90   -O3 -fomit-frame-pointer
-funroll-loops -fpeel-loops -ftracer -finline-functions  (internal
compiler error: in extract_insn, at recog.cc:2812)
FAIL: gfortran.dg/graphite/pr29832.f90   -O3 -fomit-frame-pointer
-funroll-loops -fpeel-loops -ftracer -finline-functions  (test for
excess errors)
FAIL: gfortran

Re: [PATCH] c++: Fix ICE with xobj parms and maybe incomplete decl-specifiers

2024-04-23 Thread Jason Merrill


On 4/21/24 19:59, Patrick Palka wrote:

Bootstrapped and regtested on x86_64-pc-linux-gnu, OK for trunk?

-- >8 --

This fixes a null dereference issue when decl_specifiers.type is not yet
provided.

gcc/cp/ChangeLog:

* parser.cc (cp_parser_parameter_declaration): Check if
decl_specifiers.type is null.

gcc/testsuite/ChangeLog:

* g++.dg/cpp23/explicit-obj-basic7.C: New test.


LGTM


Yes, OK.

Re: [PATCH] libbacktrace: Avoid GNU ld --compress-debug-sections=zlib-gabi

2024-04-23 Thread Jakub Jelinek

On Tue, Apr 23, 2024 at 04:18:49PM +0200, Jakub Jelinek wrote:
> Then you have two tests (ctestg and ctesta) doing exactly the same thing,
> that can't be right.
> I guess it might be fine to use zlib when it is an alias to zlib-gabi,
> but zlib-gnu shouldn't be replaced.
> 
> I must say I don't really understand the patch though, because configury
> checks
> AC_CACHE_CHECK([whether --compress-debug-sections is supported],
> [libgo_cv_ld_compress],
> [LDFLAGS_hold=$LDFLAGS
> LDFLAGS="$LDFLAGS -Wl,--compress-debug-sections=zlib-gnu"
> AC_LINK_IFELSE([AC_LANG_PROGRAM(,)],
> [libgo_cv_ld_compress=yes],
> [libgo_cv_ld_compress=no])
> LDFLAGS=$LDFLAGS_hold])
> AM_CONDITIONAL(HAVE_COMPRESSED_DEBUG, test "$libgo_cv_ld_compress" = yes)
> 
> So, if Solaris doesn't support --compress-debug-sections=zlib-gnu, it
> shouldn't be tested.  Or does it support zlib-gnu and zlib?

What we could do is drop the HAVE_COMPRESSED_DEBUG stuff altogether, and
instead similarly how we have HAVE_COMPRESSED_DEBUG_ZSTD have
HAVE_COMPRESSED_DEBUG_{ZLIB,ZLIB_GABI,ZLIB_GNU} and for each of those
if linker supports them test with that corresponding flag.

Jakub

Re: [PATCH] libbacktrace: Avoid GNU ld --compress-debug-sections=zlib-gabi

2024-04-23 Thread Jakub Jelinek

On Tue, Apr 23, 2024 at 04:05:07PM +0200, Rainer Orth wrote:
> I noticed that libbacktrace make check FAILs on Solaris with the native
> ld already when building the tests:
> 
> libtool: link: /var/gcc/regression/master/11.4-gcc/build/./gcc/xgcc 
> -B/var/gcc/r
> egression/master/11.4-gcc/build/./gcc/ -B/vol/gcc/sparc-sun-solaris2.11/bin/ 
> -B/
> vol/gcc/sparc-sun-solaris2.11/lib/ -isystem 
> /vol/gcc/sparc-sun-solaris2.11/inclu
> de -isystem /vol/gcc/sparc-sun-solaris2.11/sys-include -fchecking=1 
> -funwind-tab
> les -frandom-seed=ctesta_alloc -W -Wall -Wwrite-strings -Wstrict-prototypes 
> -Wmi
> ssing-prototypes -Wold-style-definition -Wmissing-format-attribute 
> -Wcast-qual -
> Werror -g -g -O2 -Wl,--compress-debug-sections=zlib-gabi -o ctesta_alloc 
> ctesta_
> alloc-btest.o ctesta_alloc-testlib.o  ./.libs/libbacktrace_alloc.a
> ld: fatal: unrecognized '--compress-debug-sections' cmp-type: zlib-gabi
> collect2: error: ld returned 1 exit status
> make[1]: *** [Makefile:1379: ctesta_alloc] Error 1
> 
> Solaris ld only supports --compress-debug-sections=zlib, while GNU ld
> allows zlib-gabi as an alias for zlib.  gold is the same, it seems,
> while lld doesn't support zlib-gabi at all.
> 
> Therefore the patch uses zlib instead.

Then you have two tests (ctestg and ctesta) doing exactly the same thing,
that can't be right.
I guess it might be fine to use zlib when it is an alias to zlib-gabi,
but zlib-gnu shouldn't be replaced.

I must say I don't really understand the patch though, because configury
checks
AC_CACHE_CHECK([whether --compress-debug-sections is supported],
[libgo_cv_ld_compress],
[LDFLAGS_hold=$LDFLAGS
LDFLAGS="$LDFLAGS -Wl,--compress-debug-sections=zlib-gnu"
AC_LINK_IFELSE([AC_LANG_PROGRAM(,)],
[libgo_cv_ld_compress=yes],
[libgo_cv_ld_compress=no])
LDFLAGS=$LDFLAGS_hold])
AM_CONDITIONAL(HAVE_COMPRESSED_DEBUG, test "$libgo_cv_ld_compress" = yes)

So, if Solaris doesn't support --compress-debug-sections=zlib-gnu, it
shouldn't be tested.  Or does it support zlib-gnu and zlib?

Jakub

[PATCH] libbacktrace: Avoid GNU ld --compress-debug-sections=zlib-gabi

2024-04-23 Thread Rainer Orth

I noticed that libbacktrace make check FAILs on Solaris with the native
ld already when building the tests:

libtool: link: /var/gcc/regression/master/11.4-gcc/build/./gcc/xgcc -B/var/gcc/r
egression/master/11.4-gcc/build/./gcc/ -B/vol/gcc/sparc-sun-solaris2.11/bin/ -B/
vol/gcc/sparc-sun-solaris2.11/lib/ -isystem /vol/gcc/sparc-sun-solaris2.11/inclu
de -isystem /vol/gcc/sparc-sun-solaris2.11/sys-include -fchecking=1 -funwind-tab
les -frandom-seed=ctesta_alloc -W -Wall -Wwrite-strings -Wstrict-prototypes -Wmi
ssing-prototypes -Wold-style-definition -Wmissing-format-attribute -Wcast-qual -
Werror -g -g -O2 -Wl,--compress-debug-sections=zlib-gabi -o ctesta_alloc ctesta_
alloc-btest.o ctesta_alloc-testlib.o  ./.libs/libbacktrace_alloc.a
ld: fatal: unrecognized '--compress-debug-sections' cmp-type: zlib-gabi
collect2: error: ld returned 1 exit status
make[1]: *** [Makefile:1379: ctesta_alloc] Error 1

Solaris ld only supports --compress-debug-sections=zlib, while GNU ld
allows zlib-gabi as an alias for zlib.  gold is the same, it seems,
while lld doesn't support zlib-gabi at all.

Therefore the patch uses zlib instead.

Tested on i386-pc-solaris2.11 with ld and gld and x86_64-pc-linux-gnu
with gld.

With this patch, libbacktrace make check PASSes on Solaris.
Unfortunately, this is quite easy to miss since the make check output
isn't in DejaGnu summary format, thus not picked up by make
mail-report.log.  I mean to adapt the libgo support to produce that
output format for libbacktrace to fix this.

Ok for trunk?

Rainer

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University


2024-04-05  Rainer Orth  

libbacktrace:
* Makefile.am (ctestg_LDFLAGS): Use
-Wl,--compress-debug-sections=zlib instead of zlib-gabi.
(ctesta_LDFLAGS): Likewise.
* Makefile.in: Regenerate.

# HG changeset patch
# Parent  3f974c85f19c90c967e272a1ca6523b897cc175f
libbacktrace: Avoid GNU ld --compress-debug-sections=zlib-gabi

diff --git a/libbacktrace/Makefile.am b/libbacktrace/Makefile.am
--- a/libbacktrace/Makefile.am
+++ b/libbacktrace/Makefile.am
@@ -497,12 +497,12 @@ if HAVE_COMPRESSED_DEBUG
 
 ctestg_SOURCES = btest.c testlib.c
 ctestg_CFLAGS = $(libbacktrace_TEST_CFLAGS)
-ctestg_LDFLAGS = -Wl,--compress-debug-sections=zlib-gnu $(libbacktrace_testing_ldflags)
+ctestg_LDFLAGS = -Wl,--compress-debug-sections=zlib $(libbacktrace_testing_ldflags)
 ctestg_LDADD = libbacktrace.la
 
 ctesta_SOURCES = btest.c testlib.c
 ctesta_CFLAGS = $(libbacktrace_TEST_CFLAGS)
-ctesta_LDFLAGS = -Wl,--compress-debug-sections=zlib-gabi $(libbacktrace_testing_ldflags)
+ctesta_LDFLAGS = -Wl,--compress-debug-sections=zlib $(libbacktrace_testing_ldflags)
 ctesta_LDADD = libbacktrace.la
 
 BUILDTESTS += ctestg ctesta
diff --git a/libbacktrace/Makefile.in b/libbacktrace/Makefile.in
--- a/libbacktrace/Makefile.in
+++ b/libbacktrace/Makefile.in
@@ -1177,11 +1177,11 @@ libbacktrace_testing_ldflags = -no-insta
 @HAVE_PTHREAD_TRUE@@NATIVE_TRUE@ttest_alloc_LDADD = libbacktrace_alloc.la
 @HAVE_COMPRESSED_DEBUG_TRUE@@NATIVE_TRUE@ctestg_SOURCES = btest.c testlib.c
 @HAVE_COMPRESSED_DEBUG_TRUE@@NATIVE_TRUE@ctestg_CFLAGS = $(libbacktrace_TEST_CFLAGS)
-@HAVE_COMPRESSED_DEBUG_TRUE@@NATIVE_TRUE@ctestg_LDFLAGS = -Wl,--compress-debug-sections=zlib-gnu $(libbacktrace_testing_ldflags)
+@HAVE_COMPRESSED_DEBUG_TRUE@@NATIVE_TRUE@ctestg_LDFLAGS = -Wl,--compress-debug-sections=zlib $(libbacktrace_testing_ldflags)
 @HAVE_COMPRESSED_DEBUG_TRUE@@NATIVE_TRUE@ctestg_LDADD = libbacktrace.la
 @HAVE_COMPRESSED_DEBUG_TRUE@@NATIVE_TRUE@ctesta_SOURCES = btest.c testlib.c
 @HAVE_COMPRESSED_DEBUG_TRUE@@NATIVE_TRUE@ctesta_CFLAGS = $(libbacktrace_TEST_CFLAGS)
-@HAVE_COMPRESSED_DEBUG_TRUE@@NATIVE_TRUE@ctesta_LDFLAGS = -Wl,--compress-debug-sections=zlib-gabi $(libbacktrace_testing_ldflags)
+@HAVE_COMPRESSED_DEBUG_TRUE@@NATIVE_TRUE@ctesta_LDFLAGS = -Wl,--compress-debug-sections=zlib $(libbacktrace_testing_ldflags)
 @HAVE_COMPRESSED_DEBUG_TRUE@@NATIVE_TRUE@ctesta_LDADD = libbacktrace.la
 @HAVE_COMPRESSED_DEBUG_TRUE@@HAVE_COMPRESSED_DEBUG_ZSTD_TRUE@@NATIVE_TRUE@ctestzstd_SOURCES = btest.c testlib.c
 @HAVE_COMPRESSED_DEBUG_TRUE@@HAVE_COMPRESSED_DEBUG_ZSTD_TRUE@@NATIVE_TRUE@ctestzstd_CFLAGS = $(libbacktrace_TEST_CFLAGS)

Fix documentation of -ftree-loop-distibutive-patterns

2024-04-23 Thread Jan Hubicka

Hi,
we have:

   -ftree-loop-distribute-patterns
   Perform loop distribution of patterns that can be code generated 
with calls to a library.  This flag is enabled by default at -O2 and higher, 
and by -fprofile-use and -fauto-profile.

   This pass distributes the initialization loops and generates a call 
to memset zero.  For example, the loop

...

   and the initialization loop is transformed into a call to memset 
zero.  This flag is enabled by default at -O3.  It is also enabled by 
-fprofile-use and -fauto-profile.

Which mentions optimizatoin flags twice and the repeated mention is out of
date, since we enable this option at -O2 as well.

Regtested x86_64-linux, plan to commit it shortly as obvious.

gcc/ChangeLog:

* doc/invoke.texi (-ftree-loop-distribute-patterns): Remove duplicated
sentence about optimization flags implying this.

diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 2a35dc7ac75..27c31ab0c86 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -13852,8 +13852,6 @@ DO I = 1, N
 ENDDO
 @end smallexample
 and the initialization loop is transformed into a call to memset zero.
-This flag is enabled by default at @option{-O3}.
-It is also enabled by @option{-fprofile-use} and @option{-fauto-profile}.
 
 @opindex floop-interchange
 @item -floop-interchange

[committed] Further spelling fixes in translatable strings

2024-04-23 Thread Jakub Jelinek

On Tue, Apr 23, 2024 at 11:32:08AM +0100, Jonathan Wakely wrote:
> On Mon, 22 Apr 2024 at 22:30, Jakub Jelinek  wrote:
> Yup:
> https://gcc.gnu.org/codingconventions.html#Spelling
> 
> That spelling is explicitly mentioned at the link above, so they
> should be "ize" really.

Ok.  I've committed that patch plus the following as obvious too.

I see various similar cases in m2 and rust FEs where they don't make it into
gcc/po/gcc.pot, guess those would be nice to get fixed too, end best even find 
out
how to make those messages translatable.
Talking about e.g.
m2/gm2-compiler/M2Quads.mod:'%a unrecognised builtin 
constant', Id) |
m2/gm2-compiler/M2Quads.mod:  InternalError ('unrecognised value')
...
rust/parse/rust-parse-impl.h:   "unrecognised token %qs for item in 
trait",
rust/parse/rust-parse-impl.h:"unrecognised token %qs for item 
in inherent impl",
but none of that making it into gcc.pot.

2024-04-23  Jakub Jelinek  

* config/darwin.opt (init): Spelling fix: initialiser -> initializer.
gcc/c-family/
* c-attribs.cc (handle_objc_nullability_attribute): Spelling fix:
recognised -> recognized.
gcc/m2/
* lang.opt (fdef=, fmod=): Spelling fix: recognise -> recognize.

--- gcc/config/darwin.opt.jj2024-01-03 11:51:43.137570111 +0100
+++ gcc/config/darwin.opt   2024-04-23 10:34:56.406196449 +0200
@@ -211,7 +211,7 @@ Driver RejectNegative Separate
 
 init
 Driver RejectNegative Separate
--init The symbol  will be used as the first 
initialiser for a dylib.
+-init The symbol  will be used as the first 
initializer for a dylib.
 
 install_name
 Driver RejectNegative Separate
--- gcc/c-family/c-attribs.cc.jj2024-01-09 15:35:43.626688356 +0100
+++ gcc/c-family/c-attribs.cc   2024-04-23 10:30:23.458043442 +0200
@@ -6244,7 +6244,7 @@ handle_objc_nullability_attribute (tree
  || strcmp (TREE_STRING_POINTER (val), "resettable") == 0))
 *no_add_attrs = false; /* OK */
   else if (val != error_mark_node)
-error ("%qE attribute argument %qE is not recognised", name, val);
+error ("%qE attribute argument %qE is not recognized", name, val);
 
   return NULL_TREE;
 }
--- gcc/m2/lang.opt.jj  2024-04-23 08:30:59.312249288 +0200
+++ gcc/m2/lang.opt 2024-04-23 10:34:13.118806549 +0200
@@ -96,7 +96,7 @@ turn on tracing of procedure line number
 
 fdef=
 Modula-2 Joined
-recognise the specified suffix as a definition module filename
+recognize the specified suffix as a definition module filename
 
 fdump-system-exports
 Modula-2
@@ -172,7 +172,7 @@ compile all implementation modules and p
 
 fmod=
 Modula-2 Joined
-recognise the specified suffix as implementation and module filenames
+recognize the specified suffix as implementation and module filenames
 
 fnil
 Modula-2
@@ -278,7 +278,7 @@ static-libgm2
 Driver
 Link the standard Modula-2 libraries statically in the compilation.
 
-; Here are C options that we also recognise, either within the compiler
+; Here are C options that we also recognize, either within the compiler
 ; or to build the preprocessor command lines.
 
 Wall


Jakub

Re: [PATCH v2] ifcvt: Handle multiple rewired regs and refactor noce_convert_multiple_sets

2024-04-23 Thread Manolis Tsamis

On Thu, Nov 23, 2023 at 11:01 PM Richard Sandiford
 wrote:
>
> Manolis Tsamis  writes:
> > The existing implementation of need_cmov_or_rewire and
> > noce_convert_multiple_sets_1 assumes that sets are either REG or SUBREG.
> > This commit enchances them so they can handle/rewire arbitrary set 
> > statements.
> >
> > To do that a new helper struct noce_multiple_sets_info is introduced which 
> > is
> > used by noce_convert_multiple_sets and its helper functions. This results in
> > cleaner function signatures, improved efficientcy (a number of vecs and hash
> > set/map are replaced with a single vec of struct) and simplicity.
> >
> > gcc/ChangeLog:
> >
> >   * ifcvt.cc (need_cmov_or_rewire): Renamed 
> > init_noce_multiple_sets_info.
> >   (init_noce_multiple_sets_info): Initialize noce_multiple_sets_info.
> >   (noce_convert_multiple_sets_1): Use noce_multiple_sets_info and handle
> >   rewiring of multiple registers.
> >   (noce_convert_multiple_sets): Updated to use noce_multiple_sets_info.
> >   * ifcvt.h (struct noce_multiple_sets_info): Introduce new struct
> >   noce_multiple_sets_info to store info for noce_convert_multiple_sets.
> >
> > Signed-off-by: Manolis Tsamis 
> > ---
>
> Thanks, this looks like a really nice clean-up.  One comment below:
>
> >
> > Changes in v2:
> > - Made standalone patch.
> > - Better comments, some more checks.
> >
> >  gcc/ifcvt.cc | 252 +++
> >  gcc/ifcvt.h  |  16 
> >  2 files changed, 129 insertions(+), 139 deletions(-)
> >
> > diff --git a/gcc/ifcvt.cc b/gcc/ifcvt.cc
> > index a0af553b9ff..9486d54de34 100644
> > --- a/gcc/ifcvt.cc
> > +++ b/gcc/ifcvt.cc
> > @@ -98,14 +98,10 @@ static bool dead_or_predicable (basic_block, 
> > basic_block, basic_block,
> >   edge, bool);
> >  static void noce_emit_move_insn (rtx, rtx);
> >  static rtx_insn *block_has_only_trap (basic_block);
> > -static void need_cmov_or_rewire (basic_block, hash_set *,
> > -  hash_map *);
> > +static void init_noce_multiple_sets_info (basic_block,
> > +  auto_delete_vec &);
> >  static bool noce_convert_multiple_sets_1 (struct noce_if_info *,
> > -   hash_set *,
> > -   hash_map *,
> > -   auto_vec *,
> > -   auto_vec *,
> > -   auto_vec *, int *);
> > +  auto_delete_vec &, int *);
> >
> >  /* Count the number of non-jump active insns in BB.  */
> >
> > @@ -3270,24 +3266,13 @@ noce_convert_multiple_sets (struct noce_if_info 
> > *if_info)
> >rtx x = XEXP (cond, 0);
> >rtx y = XEXP (cond, 1);
> >
> > -  /* The true targets for a conditional move.  */
> > -  auto_vec targets;
> > -  /* The temporaries introduced to allow us to not consider register
> > - overlap.  */
> > -  auto_vec temporaries;
> > -  /* The insns we've emitted.  */
> > -  auto_vec unmodified_insns;
> > -
> > -  hash_set need_no_cmov;
> > -  hash_map rewired_src;
> > -
> > -  need_cmov_or_rewire (then_bb, &need_no_cmov, &rewired_src);
> > +  auto_delete_vec insn_info;
> > +  init_noce_multiple_sets_info (then_bb, insn_info);
> >
> >int last_needs_comparison = -1;
> >
> >bool ok = noce_convert_multiple_sets_1
> > -(if_info, &need_no_cmov, &rewired_src, &targets, &temporaries,
> > - &unmodified_insns, &last_needs_comparison);
> > +(if_info, insn_info, &last_needs_comparison);
> >if (!ok)
> >return false;
> >
> > @@ -3302,8 +3287,7 @@ noce_convert_multiple_sets (struct noce_if_info 
> > *if_info)
> >end_sequence ();
> >start_sequence ();
> >ok = noce_convert_multiple_sets_1
> > - (if_info, &need_no_cmov, &rewired_src, &targets, &temporaries,
> > -  &unmodified_insns, &last_needs_comparison);
> > + (if_info, insn_info, &last_needs_comparison);
> >/* Actually we should not fail anymore if we reached here,
> >but better still check.  */
> >if (!ok)
> > @@ -3312,12 +3296,12 @@ noce_convert_multiple_sets (struct noce_if_info 
> > *if_info)
> >
> >/* We must have seen some sort of insn to insert, otherwise we were
> >   given an empty BB to convert, and we can't handle that.  */
> > -  gcc_assert (!unmodified_insns.is_empty ());
> > +  gcc_assert (!insn_info.is_empty ());
> >
> >/* Now fixup the assignments.  */
> > -  for (unsigned i = 0; i < targets.length (); i++)
> > -if (targets[i] != temporaries[i])
> > -  noce_emit_move_insn (targets[i], temporaries[i]);
> > +  for (unsigned i = 0; i < insn_info.length (); i++)
> > +if (insn_info[i]->target != insn_info[i]->temporary)
> > +  noce_emit_move_insn (insn_info[i]->target, insn_info[i]->temporary);
> >
> >/* Actually emit the sequence if it isn't too expensive.  */
> >rtx_insn *seq = get_insns ();
> > @@ -3332,10 +3316,10 @@ no

Re: [PATCH v3 2/4] ifcvt: Allow more operations in multiple set if conversion

2024-04-23 Thread Manolis Tsamis

On Thu, Oct 19, 2023 at 10:46 PM Richard Sandiford
 wrote:
>
> Manolis Tsamis  writes:
> > Currently the operations allowed for if conversion of a basic block with
> > multiple sets are few, namely REG, SUBREG and CONST_INT (as controlled by
> > bb_ok_for_noce_convert_multiple_sets).
> >
> > This commit allows more operations (arithmetic, compare, etc) to participate
> > in if conversion. The target's profitability hook and ifcvt's costing is
> > expected to reject sequences that are unprofitable.
> >
> > This is especially useful for targets which provide a rich selection of
> > conditional instructions (like aarch64 which has cinc, csneg, csinv, ccmp, 
> > ...)
> > which are currently not used in basic blocks with more than a single set.
> >
> > gcc/ChangeLog:
> >
> >   * ifcvt.cc (try_emit_cmove_seq): Modify comments.
> >   (noce_convert_multiple_sets_1): Modify comments.
> >   (bb_ok_for_noce_convert_multiple_sets): Allow more operations.
> >
> > gcc/testsuite/ChangeLog:
> >
> >   * gcc.target/aarch64/ifcvt_multiple_sets_arithm.c: New test.
> >
> > Signed-off-by: Manolis Tsamis 
> > ---
> >
> > Changes in v3:
> > - Add SCALAR_INT_MODE_P check in 
> > bb_ok_for_noce_convert_multiple_sets.
> > - Allow rewiring of multiple regs.
> > - Refactor code with noce_multiple_sets_info.
> > - Remove old code for subregs.
> >
> >  gcc/ifcvt.cc  | 63 ++-
> >  .../aarch64/ifcvt_multiple_sets_arithm.c  | 79 +++
> >  2 files changed, 123 insertions(+), 19 deletions(-)
> >  create mode 100644 
> > gcc/testsuite/gcc.target/aarch64/ifcvt_multiple_sets_arithm.c
> >
> > diff --git a/gcc/ifcvt.cc b/gcc/ifcvt.cc
> > index 3273aeca125..efe8ab1577a 100644
> > --- a/gcc/ifcvt.cc
> > +++ b/gcc/ifcvt.cc
> > @@ -3215,13 +3215,13 @@ try_emit_cmove_seq (struct noce_if_info *if_info, 
> > rtx temp,
> >  /* We have something like:
> >
> >   if (x > y)
> > -   { i = a; j = b; k = c; }
> > +   { i = EXPR_A; j = EXPR_B; k = EXPR_C; }
> >
> > Make it:
> >
> > - tmp_i = (x > y) ? a : i;
> > - tmp_j = (x > y) ? b : j;
> > - tmp_k = (x > y) ? c : k;
> > + tmp_i = (x > y) ? EXPR_A : i;
> > + tmp_j = (x > y) ? EXPR_B : j;
> > + tmp_k = (x > y) ? EXPR_C : k;
> >   i = tmp_i;
> >   j = tmp_j;
> >   k = tmp_k;
> > @@ -3637,11 +3637,10 @@ noce_convert_multiple_sets_1 (struct noce_if_info 
> > *if_info,
> >
> >
> >
> > -/* Return true iff basic block TEST_BB is comprised of only
> > -   (SET (REG) (REG)) insns suitable for conversion to a series
> > -   of conditional moves.  Also check that we have more than one set
> > -   (other routines can handle a single set better than we would), and
> > -   fewer than PARAM_MAX_RTL_IF_CONVERSION_INSNS sets.  While going
> > +/* Return true iff basic block TEST_BB is suitable for conversion to a
> > +   series of conditional moves.  Also check that we have more than one
> > +   set (other routines can handle a single set better than we would),
> > +   and fewer than PARAM_MAX_RTL_IF_CONVERSION_INSNS sets.  While going
> > through the insns store the sum of their potential costs in COST.  */
> >
> >  static bool
> > @@ -3667,20 +3666,46 @@ bb_ok_for_noce_convert_multiple_sets (basic_block 
> > test_bb, unsigned *cost)
> >rtx dest = SET_DEST (set);
> >rtx src = SET_SRC (set);
> >
> > -  /* We can possibly relax this, but for now only handle REG to REG
> > -  (including subreg) moves.  This avoids any issues that might come
> > -  from introducing loads/stores that might violate data-race-freedom
> > -  guarantees.  */
> > -  if (!REG_P (dest))
> > +  /* Do not handle anything involving memory loads/stores since it 
> > might
> > +  violate data-race-freedom guarantees.  */
> > +  if (!REG_P (dest) || contains_mem_rtx_p (src))
> > + return false;
> > +
> > +  if (!SCALAR_INT_MODE_P (GET_MODE (src)))
> >   return false;
> >
> > -  if (!((REG_P (src) || CONSTANT_P (src))
> > - || (GET_CODE (src) == SUBREG && REG_P (SUBREG_REG (src))
> > -   && subreg_lowpart_p (src
> > +  /* Allow a wide range of operations and let the costing function 
> > decide
> > +  if the conversion is worth it later.  */
> > +  enum rtx_code code = GET_CODE (src);
> > +  if (!(CONSTANT_P (src)
> > + || code == REG
> > + || code == SUBREG
> > + || code == ZERO_EXTEND
> > + || code == SIGN_EXTEND
> > + || code == NOT
> > + || code == NEG
> > + || code == PLUS
> > + || code == MINUS
> > + || code == AND
> > + || code == IOR
> > + || code == MULT
> > + || code == ASHIFT
> > + || code == ASHIFTRT
> > + || code == NE
> > + || code == EQ
> > + || code == GE
> > + || code == GT
> > + || code == LE
> > + || code == LT
> > + || co

Re: [PATCH v3 1/4] ifcvt: handle sequences that clobber flags in noce_convert_multiple_sets

2024-04-23 Thread Manolis Tsamis

On Thu, Oct 19, 2023 at 10:41 PM Richard Sandiford
 wrote:
>
> Manolis Tsamis  writes:
> > This is an extension of what was done in PR106590.
> >
> > Currently if a sequence generated in noce_convert_multiple_sets clobbers the
> > condition rtx (cc_cmp or rev_cc_cmp) then only seq1 is used afterwards
> > (sequences that emit the comparison itself). Since this applies only from 
> > the
> > next iteration it assumes that the sequences generated (in particular seq2)
> > doesn't clobber the condition rtx itself before using it in the 
> > if_then_else,
> > which is only true in specific cases (currently only register/subregister 
> > moves
> > are allowed).
> >
> > This patch changes this so it also tests if seq2 clobbers cc_cmp/rev_cc_cmp 
> > in
> > the current iteration. This makes it possible to include arithmetic 
> > operations
> > in noce_convert_multiple_sets.
> >
> > gcc/ChangeLog:
> >
> >   * ifcvt.cc (check_for_cc_cmp_clobbers): Use modified_in_p instead.
> >   (noce_convert_multiple_sets_1): Don't use seq2 if it clobbers cc_cmp.
> >
> > Signed-off-by: Manolis Tsamis 
> > ---
> >
> > (no changes since v1)
> >
> >  gcc/ifcvt.cc | 49 +++--
> >  1 file changed, 19 insertions(+), 30 deletions(-)
>
> Sorry for the slow review.  TBH I was hoping someone else would pick
> it up, since (a) I'm not very familiar with this code, and (b) I don't
> really agree with the way that the current code works.  I'm not sure the
> current dependency checking is safe, so I'm nervous about adding even
> more cases to it.  And it feels like the different ifcvt techniques are
> not now well distinguished, so that they're beginning to overlap and
> compete with one another.  None of that is your fault, of course. :)
>
> > diff --git a/gcc/ifcvt.cc b/gcc/ifcvt.cc
> > index a0af553b9ff..3273aeca125 100644
> > --- a/gcc/ifcvt.cc
> > +++ b/gcc/ifcvt.cc
> > @@ -3375,20 +3375,6 @@ noce_convert_multiple_sets (struct noce_if_info 
> > *if_info)
> >return true;
> >  }
> >
> > -/* Helper function for noce_convert_multiple_sets_1.  If store to
> > -   DEST can affect P[0] or P[1], clear P[0].  Called via note_stores.  */
> > -
> > -static void
> > -check_for_cc_cmp_clobbers (rtx dest, const_rtx, void *p0)
> > -{
> > -  rtx *p = (rtx *) p0;
> > -  if (p[0] == NULL_RTX)
> > -return;
> > -  if (reg_overlap_mentioned_p (dest, p[0])
> > -  || (p[1] && reg_overlap_mentioned_p (dest, p[1])))
> > -p[0] = NULL_RTX;
> > -}
> > -
> >  /* This goes through all relevant insns of IF_INFO->then_bb and tries to
> > create conditional moves.  In case a simple move sufficis the insn
> > should be listed in NEED_NO_CMOV.  The rewired-src cases should be
> > @@ -3552,9 +3538,17 @@ noce_convert_multiple_sets_1 (struct noce_if_info 
> > *if_info,
> >creating an additional compare for each.  If successful, costing
> >is easier and this sequence is usually preferred.  */
> >if (cc_cmp)
> > - seq2 = try_emit_cmove_seq (if_info, temp, cond,
> > -new_val, old_val, need_cmov,
> > -&cost2, &temp_dest2, cc_cmp, rev_cc_cmp);
> > + {
> > +   seq2 = try_emit_cmove_seq (if_info, temp, cond,
> > +  new_val, old_val, need_cmov,
> > +  &cost2, &temp_dest2, cc_cmp, rev_cc_cmp);
> > +
> > +   /* The if_then_else in SEQ2 may be affected when cc_cmp/rev_cc_cmp 
> > is
> > +  clobbered.  We can't safely use the sequence in this case.  */
> > +   if (seq2 && (modified_in_p (cc_cmp, seq2)
> > +   || (rev_cc_cmp && modified_in_p (rev_cc_cmp, seq2
> > + seq2 = NULL;
>
> modified_in_p only checks the first instruction in seq2, not the whole
> sequence.
>
> I think the unpatched approach is OK in cases where seq2 clobbers
> cc_cmp/rev_cc_cmp in or after the last use, since instructions are
> defined to operate on a read-all/compute/write-all basis.
>
> Soon after the snippet above, the unpatched code has this loop:
>
>   /* The backend might have created a sequence that uses the
>  condition.  Check this.  */
>   rtx_insn *walk = seq2;
>   while (walk)
> {
>   rtx set = single_set (walk);
>
>   if (!set || !SET_SRC (set))
>
> This condition looks odd.  A SET_SRC is never null.  But more importantly,
> continuing means "assume the best", and I don't think we should assume
> the best for (say) an insn with two parallel sets.
>
> It doesn't look like the series addresses this, but !set seems more
> likely to occur if we extend the function to general operations.
>
> {
>   walk = NEXT_INSN (walk);
>   continue;
> }
>
>   rtx src = SET_SRC (set);
>
>   if (XEXP (set, 1) && GET_CODE (XEXP (set, 1)) == IF_THEN_ELSE)
> ; /* We assume that this is the cmove created by the backend that
>

[PATCH v4 2/3] [RFC] ifcvt: Allow more operations in multiple set if conversion

2024-04-23 Thread Manolis Tsamis

Currently the operations allowed for if conversion of a basic block with
multiple sets are few, namely REG, SUBREG and CONST_INT (as controlled by
bb_ok_for_noce_convert_multiple_sets).

This commit allows more operations (arithmetic, compare, etc) to participate
in if conversion. The target's profitability hook and ifcvt's costing is
expected to reject sequences that are unprofitable.

This is especially useful for targets which provide a rich selection of
conditional instructions (like aarch64 which has cinc, csneg, csinv, ccmp, ...)
which are currently not used in basic blocks with more than a single set.

gcc/ChangeLog:

* ifcvt.cc (try_emit_cmove_seq): Modify comments.
(noce_convert_multiple_sets_1): Modify comments.
(bb_ok_for_noce_convert_multiple_sets): Allow more operations.

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/ifcvt_multiple_sets_arithm.c: New test.

Signed-off-by: Manolis Tsamis 
---

Changes in v4:
- Remove unnecessary hardcoded list of allowed ops in
bb_ok_for_noce_convert_multiple_sets.
- Set need_cmov based on BB live_out instead of REG_DEAD notes.
- Fix preexisting issues and improve the code that sets read_comparison.

 gcc/ifcvt.cc  | 34 +++-
 .../aarch64/ifcvt_multiple_sets_arithm.c  | 79 +++
 2 files changed, 92 insertions(+), 21 deletions(-)
 create mode 100644 
gcc/testsuite/gcc.target/aarch64/ifcvt_multiple_sets_arithm.c

diff --git a/gcc/ifcvt.cc b/gcc/ifcvt.cc
index 763a25f816e..dc00042be81 100644
--- a/gcc/ifcvt.cc
+++ b/gcc/ifcvt.cc
@@ -3432,13 +3432,13 @@ try_emit_cmove_seq (struct noce_if_info *if_info, rtx 
temp,
 /* We have something like:
 
  if (x > y)
-   { i = a; j = b; k = c; }
+   { i = EXPR_A; j = EXPR_B; k = EXPR_C; }
 
Make it:
 
- tmp_i = (x > y) ? a : i;
- tmp_j = (x > y) ? b : j;
- tmp_k = (x > y) ? c : k;
+ tmp_i = (x > y) ? EXPR_A : i;
+ tmp_j = (x > y) ? EXPR_B : j;
+ tmp_k = (x > y) ? EXPR_C : k;
  i = tmp_i;
  j = tmp_j;
  k = tmp_k;
@@ -3839,11 +3839,10 @@ noce_convert_multiple_sets_1 (struct noce_if_info 
*if_info,
 
 
 
-/* Return true iff basic block TEST_BB is comprised of only
-   (SET (REG) (REG)) insns suitable for conversion to a series
-   of conditional moves.  Also check that we have more than one set
-   (other routines can handle a single set better than we would), and
-   fewer than PARAM_MAX_RTL_IF_CONVERSION_INSNS sets.  While going
+/* Return true iff basic block TEST_BB is suitable for conversion to a
+   series of conditional moves.  Also check that we have more than one
+   set (other routines can handle a single set better than we would),
+   and fewer than PARAM_MAX_RTL_IF_CONVERSION_INSNS sets.  While going
through the insns store the sum of their potential costs in COST.  */
 
 static bool
@@ -3869,20 +3868,13 @@ bb_ok_for_noce_convert_multiple_sets (basic_block 
test_bb, unsigned *cost)
   rtx dest = SET_DEST (set);
   rtx src = SET_SRC (set);
 
-  /* We can possibly relax this, but for now only handle REG to REG
-(including subreg) moves.  This avoids any issues that might come
-from introducing loads/stores that might violate data-race-freedom
-guarantees.  */
-  if (!REG_P (dest))
-   return false;
-
-  if (!((REG_P (src) || CONSTANT_P (src))
-   || (GET_CODE (src) == SUBREG && REG_P (SUBREG_REG (src))
- && subreg_lowpart_p (src
+  /* Do not handle anything involving memory loads/stores since it might
+violate data-race-freedom guarantees.  */
+  if (!REG_P (dest) || contains_mem_rtx_p (src))
return false;
 
-  /* Destination must be appropriate for a conditional write.  */
-  if (!noce_operand_ok (dest))
+  /* Destination and source must be appropriate.  */
+  if (!noce_operand_ok (dest) || !noce_operand_ok (src))
return false;
 
   /* We must be able to conditionally move in this mode.  */
diff --git a/gcc/testsuite/gcc.target/aarch64/ifcvt_multiple_sets_arithm.c 
b/gcc/testsuite/gcc.target/aarch64/ifcvt_multiple_sets_arithm.c
new file mode 100644
index 000..d977f4d62ec
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/ifcvt_multiple_sets_arithm.c
@@ -0,0 +1,79 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-rtl-ce1" } */
+
+void sink2(int, int);
+void sink3(int, int, int);
+
+void cond1(int cond, int x, int y)
+{
+  if (cond)
+{
+  x = x << 4;
+  y = 1;
+}
+
+  sink2(x, y);
+}
+
+void cond2(int cond, int x, int y)
+{
+  if (cond)
+{
+  x++;
+  y++;
+}
+
+  sink2(x, y);
+}
+
+void cond3(int cond, int x1, int x2, int x3)
+{
+  if (cond)
+{
+  x1++;
+  x2++;
+  x3++;
+}
+
+  sink3(x1, x2, x3);
+}
+
+void cond4(int cond, int x, int y)
+{
+  if (cond)
+{
+  x += 2;
+  y += 3;
+}
+
+  sink2(x, y);
+}
+
+void cond5(int cond, int x, int y, int

[PATCH v4 3/3] [RFC] ifcvt: Handle multiple rewired regs and refactor noce_convert_multiple_sets

2024-04-23 Thread Manolis Tsamis

The existing implementation of need_cmov_or_rewire and
noce_convert_multiple_sets_1 assumes that sets are either REG or SUBREG.
This commit enchances them so they can handle/rewire arbitrary set statements.

To do that a new helper struct noce_multiple_sets_info is introduced which is
used by noce_convert_multiple_sets and its helper functions. This results in
cleaner function signatures, improved efficientcy (a number of vecs and hash
set/map are replaced with a single vec of struct) and simplicity.

gcc/ChangeLog:

* ifcvt.cc (need_cmov_or_rewire): Renamed init_noce_multiple_sets_info.
(init_noce_multiple_sets_info): Initialize noce_multiple_sets_info.
(noce_convert_multiple_sets_1): Use noce_multiple_sets_info and handle
rewiring of multiple registers.
(noce_convert_multiple_sets): Updated to use noce_multiple_sets_info.
* ifcvt.h (struct noce_multiple_sets_info): Introduce new struct
noce_multiple_sets_info to store info for noce_convert_multiple_sets.

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/ifcvt_multiple_sets_rewire.c: New test.

Signed-off-by: Manolis Tsamis 
---

(no changes since v1)

 gcc/ifcvt.cc  | 243 --
 gcc/ifcvt.h   |  16 ++
 .../aarch64/ifcvt_multiple_sets_rewire.c  |  20 ++
 3 files changed, 141 insertions(+), 138 deletions(-)
 create mode 100644 
gcc/testsuite/gcc.target/aarch64/ifcvt_multiple_sets_rewire.c

diff --git a/gcc/ifcvt.cc b/gcc/ifcvt.cc
index dc00042be81..8e36c16ee57 100644
--- a/gcc/ifcvt.cc
+++ b/gcc/ifcvt.cc
@@ -98,14 +98,10 @@ static bool dead_or_predicable (basic_block, basic_block, 
basic_block,
edge, bool);
 static void noce_emit_move_insn (rtx, rtx);
 static rtx_insn *block_has_only_trap (basic_block);
-static void need_cmov_or_rewire (basic_block, hash_set *,
-hash_map *);
+static void init_noce_multiple_sets_info (basic_block,
+  auto_delete_vec &);
 static bool noce_convert_multiple_sets_1 (struct noce_if_info *,
- hash_set *,
- hash_map *,
- auto_vec *,
- auto_vec *,
- auto_vec *, int *);
+  auto_delete_vec &, int *);
 
 /* Count the number of non-jump active insns in BB.  */
 
@@ -3487,24 +3483,13 @@ noce_convert_multiple_sets (struct noce_if_info 
*if_info)
   rtx x = XEXP (cond, 0);
   rtx y = XEXP (cond, 1);
 
-  /* The true targets for a conditional move.  */
-  auto_vec targets;
-  /* The temporaries introduced to allow us to not consider register
- overlap.  */
-  auto_vec temporaries;
-  /* The insns we've emitted.  */
-  auto_vec unmodified_insns;
-
-  hash_set need_no_cmov;
-  hash_map rewired_src;
-
-  need_cmov_or_rewire (then_bb, &need_no_cmov, &rewired_src);
+  auto_delete_vec insn_info;
+  init_noce_multiple_sets_info (then_bb, insn_info);
 
   int last_needs_comparison = -1;
 
   bool ok = noce_convert_multiple_sets_1
-(if_info, &need_no_cmov, &rewired_src, &targets, &temporaries,
- &unmodified_insns, &last_needs_comparison);
+(if_info, insn_info, &last_needs_comparison);
   if (!ok)
   return false;
 
@@ -3519,8 +3504,7 @@ noce_convert_multiple_sets (struct noce_if_info *if_info)
   end_sequence ();
   start_sequence ();
   ok = noce_convert_multiple_sets_1
-   (if_info, &need_no_cmov, &rewired_src, &targets, &temporaries,
-&unmodified_insns, &last_needs_comparison);
+   (if_info, insn_info, &last_needs_comparison);
   /* Actually we should not fail anymore if we reached here,
 but better still check.  */
   if (!ok)
@@ -3529,12 +3513,12 @@ noce_convert_multiple_sets (struct noce_if_info 
*if_info)
 
   /* We must have seen some sort of insn to insert, otherwise we were
  given an empty BB to convert, and we can't handle that.  */
-  gcc_assert (!unmodified_insns.is_empty ());
+  gcc_assert (!insn_info.is_empty ());
 
   /* Now fixup the assignments.  */
-  for (unsigned i = 0; i < targets.length (); i++)
-if (targets[i] != temporaries[i])
-  noce_emit_move_insn (targets[i], temporaries[i]);
+  for (unsigned i = 0; i < insn_info.length (); i++)
+if (insn_info[i]->target != insn_info[i]->temporary)
+  noce_emit_move_insn (insn_info[i]->target, insn_info[i]->temporary);
 
   /* Actually emit the sequence if it isn't too expensive.  */
   rtx_insn *seq = get_insns ();
@@ -3549,10 +3533,10 @@ noce_convert_multiple_sets (struct noce_if_info 
*if_info)
 set_used_flags (insn);
 
   /* Mark all our temporaries and targets as used.  */
-  for (unsigned i = 0; i < targets.length (); i++)
+  for (unsigned i = 0; i < insn_info.length (); i++)
 {
-  set_used_flags (temporaries[i]);
-  set_used_flags (targets[i]);
+  set_used_flags (insn_info[i]-

[PATCH v4 1/3] [RFC] ifcvt: handle sequences that clobber flags in noce_convert_multiple_sets

2024-04-23 Thread Manolis Tsamis

This is an extension of what was done in PR106590.

Currently if a sequence generated in noce_convert_multiple_sets clobbers the
condition rtx (cc_cmp or rev_cc_cmp) then only seq1 is used afterwards
(sequences that emit the comparison itself). Since this applies only from the
next iteration it assumes that the sequences generated (in particular seq2)
doesn't clobber the condition rtx itself before using it in the if_then_else,
which is only true in specific cases (currently only register/subregister moves
are allowed).

This patch changes this so it also tests if seq2 clobbers cc_cmp/rev_cc_cmp in
the current iteration. This makes it possible to include arithmetic operations
in noce_convert_multiple_sets.

It also makes the code that checks whether the condition is used outside of the
if_then_else emitted more robust.

gcc/ChangeLog:

* ifcvt.cc (check_for_cc_cmp_clobbers): Use modified_in_p instead.
(noce_convert_multiple_sets_1): Don't use seq2 if it clobbers cc_cmp.
Refactor the code that sets read_comparison.

Signed-off-by: Manolis Tsamis 
---

(no changes since v1)

 gcc/ifcvt.cc | 106 ---
 1 file changed, 59 insertions(+), 47 deletions(-)

diff --git a/gcc/ifcvt.cc b/gcc/ifcvt.cc
index 58ed42673e5..763a25f816e 100644
--- a/gcc/ifcvt.cc
+++ b/gcc/ifcvt.cc
@@ -3592,20 +3592,6 @@ noce_convert_multiple_sets (struct noce_if_info *if_info)
   return true;
 }
 
-/* Helper function for noce_convert_multiple_sets_1.  If store to
-   DEST can affect P[0] or P[1], clear P[0].  Called via note_stores.  */
-
-static void
-check_for_cc_cmp_clobbers (rtx dest, const_rtx, void *p0)
-{
-  rtx *p = (rtx *) p0;
-  if (p[0] == NULL_RTX)
-return;
-  if (reg_overlap_mentioned_p (dest, p[0])
-  || (p[1] && reg_overlap_mentioned_p (dest, p[1])))
-p[0] = NULL_RTX;
-}
-
 /* This goes through all relevant insns of IF_INFO->then_bb and tries to
create conditional moves.  In case a simple move sufficis the insn
should be listed in NEED_NO_CMOV.  The rewired-src cases should be
@@ -3731,36 +3717,67 @@ noce_convert_multiple_sets_1 (struct noce_if_info 
*if_info,
 creating an additional compare for each.  If successful, costing
 is easier and this sequence is usually preferred.  */
   if (cc_cmp)
-   seq2 = try_emit_cmove_seq (if_info, temp, cond,
-  new_val, old_val, need_cmov,
-  &cost2, &temp_dest2, cc_cmp, rev_cc_cmp);
+   {
+ seq2 = try_emit_cmove_seq (if_info, temp, cond,
+new_val, old_val, need_cmov,
+&cost2, &temp_dest2, cc_cmp, rev_cc_cmp);
+
+ /* The if_then_else in SEQ2 may be affected when cc_cmp/rev_cc_cmp is
+clobbered.  We can't safely use the sequence in this case.  */
+ if (seq2 && (modified_in_p (cc_cmp, seq2)
+ || (rev_cc_cmp && modified_in_p (rev_cc_cmp, seq2
+   seq2 = NULL;
+   }
 
   /* The backend might have created a sequence that uses the
-condition.  Check this.  */
+condition as a value.  Check this.  */
+
+  /* We cannot handle anything more complex than a reg or constant.  */
+  if (!REG_P (XEXP (cond, 0)) && !CONSTANT_P (XEXP (cond, 0)))
+   read_comparison = true;
+
+  if (!REG_P (XEXP (cond, 1)) && !CONSTANT_P (XEXP (cond, 1)))
+   read_comparison = true;
+
   rtx_insn *walk = seq2;
-  while (walk)
+  int if_then_else_count = 0;
+  while (walk && !read_comparison)
{
- rtx set = single_set (walk);
+ rtx exprs_to_check[2];
+ unsigned int exprs_count = 0;
 
- if (!set || !SET_SRC (set))
+ rtx set = single_set (walk);
+ if (set && XEXP (set, 1)
+ && GET_CODE (XEXP (set, 1)) == IF_THEN_ELSE)
{
- walk = NEXT_INSN (walk);
- continue;
+ /* We assume that this is the cmove created by the backend that
+naturally uses the condition.  */
+ exprs_to_check[exprs_count++] = XEXP (XEXP (set, 1), 1);
+ exprs_to_check[exprs_count++] = XEXP (XEXP (set, 1), 2);
+ if_then_else_count++;
}
+ else if (NONDEBUG_INSN_P (walk))
+   exprs_to_check[exprs_count++] = PATTERN (walk);
 
- rtx src = SET_SRC (set);
+ /* Bail if we get more than one if_then_else because the assumption
+above may be incorrect.  */
+ if (if_then_else_count > 1)
+   {
+ read_comparison = true;
+ break;
+   }
 
- if (XEXP (set, 1) && GET_CODE (XEXP (set, 1)) == IF_THEN_ELSE)
-   ; /* We assume that this is the cmove created by the backend that
-naturally uses the condition.  Therefore we ignore it.  */
- else
+ for (unsigned int i = 0; i < exprs_count; i++)
{

[PATCH v4 0/3] ifcvt: Allow if conversion of arithmetic in basic blocks with multiple sets

2024-04-23 Thread Manolis Tsamis



noce_convert_multiple_sets has been introduced and extended over time to handle
if conversion for blocks with multiple sets. Currently this is focused on
register moves and rejects any sort of arithmetic operations.

This series is an extension to allow more sequences to take part in if
conversion. The first patch is a required change to emit correct code and the
second patch whitelists a larger number of operations through
bb_ok_for_noce_convert_multiple_sets. The third patch adds support to rewire
multiple registers in noce_convert_multiple_sets_1 and refactors the code with
a new helper info struct. The fourth patch removes some old code that should
not be needed anymore.

For targets that have a rich selection of conditional instructions,
like aarch64, I have seen an ~5x increase of profitable if conversions for
multiple set blocks in SPEC benchmarks. Also tested with a wide variety of
benchmarks and I have not seen performance regressions on either x64 / aarch64.

Some samples that previously resulted in a branch but now better use these
instructions can be seen in the provided test cases.

Bootstrapped and tested on AArch64 and x86-64.


Changes in v4:
- Remove unnecessary hardcoded list of allowed ops in
bb_ok_for_noce_convert_multiple_sets.
- Set need_cmov based on BB live_out instead of REG_DEAD notes.
- Fix preexisting issues and improve the code that sets read_comparison.

Manolis Tsamis (3):
  [RFC] ifcvt: handle sequences that clobber flags in
noce_convert_multiple_sets
  [RFC] ifcvt: Allow more operations in multiple set if conversion
  [RFC] ifcvt: Handle multiple rewired regs and refactor
noce_convert_multiple_sets

 gcc/ifcvt.cc  | 383 --
 gcc/ifcvt.h   |  16 +
 .../aarch64/ifcvt_multiple_sets_arithm.c  |  79 
 .../aarch64/ifcvt_multiple_sets_rewire.c  |  20 +
 4 files changed, 292 insertions(+), 206 deletions(-)
 create mode 100644 
gcc/testsuite/gcc.target/aarch64/ifcvt_multiple_sets_arithm.c
 create mode 100644 
gcc/testsuite/gcc.target/aarch64/ifcvt_multiple_sets_rewire.c

-- 
2.34.1

[PATCH] MATCH: Maybe expand (T)(A + C1) * C2 and (T)(A + C1) * C2 + C3 [PR109393]

2024-04-23 Thread Manolis Tsamis

The original motivation for this pattern was that the following function does
not fold to 'return 1':

int foo(int *a, int j)
{
  int k = j - 1;
  return a[j - 1] == a[k];
}

The expression ((unsigned long) (X +- C1) * C2) appears frequently as part of
address calculations (e.g. arrays). These patterns help fold and simplify more
expressions.

PR tree-optimization/109393

gcc/ChangeLog:

* match.pd: Add new patterns for ((T)(A +- CST1)) * CST2 and
  ((T)(A +- CST1)) * CST2 + CST3.

gcc/testsuite/ChangeLog:

* gcc.dg/pr109393.c: New test.

Signed-off-by: Manolis Tsamis 
---

 gcc/match.pd| 30 ++
 gcc/testsuite/gcc.dg/pr109393.c | 16 
 2 files changed, 46 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/pr109393.c

diff --git a/gcc/match.pd b/gcc/match.pd
index d401e7503e6..13c828ba70d 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3650,6 +3650,36 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
(plus (convert @0) (op @2 (convert @1))
 #endif
 
+/* ((T)(A + CST1)) * CST2 + CST3
+ -> ((T)(A) * CST2) + ((T)CST1 * CST2 + CST3)
+   Where (A + CST1) doesn't need to have a single use.  */
+#if GIMPLE
+  (for op (plus minus)
+   (simplify
+(plus (mult (convert:s (op @0 INTEGER_CST@1)) INTEGER_CST@2) INTEGER_CST@3)
+ (if (TREE_CODE (TREE_TYPE (@0)) == INTEGER_TYPE
+ && TREE_CODE (type) == INTEGER_TYPE
+ && TYPE_PRECISION (type) > TYPE_PRECISION (TREE_TYPE (@0))
+ && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (@0))
+ && !TYPE_OVERFLOW_SANITIZED (TREE_TYPE (@0))
+ && TYPE_OVERFLOW_WRAPS (type))
+   (op (mult @2 (convert @0)) (plus (mult @2 (convert @1)) @3)
+#endif
+
+/* ((T)(A + CST1)) * CST2 -> ((T)(A) * CST2) + ((T)CST1 * CST2)  */
+#if GIMPLE
+  (for op (plus minus)
+   (simplify
+(mult (convert:s (op:s @0 INTEGER_CST@1)) INTEGER_CST@2)
+ (if (TREE_CODE (TREE_TYPE (@0)) == INTEGER_TYPE
+ && TREE_CODE (type) == INTEGER_TYPE
+ && TYPE_PRECISION (type) > TYPE_PRECISION (TREE_TYPE (@0))
+ && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (@0))
+ && !TYPE_OVERFLOW_SANITIZED (TREE_TYPE (@0))
+ && TYPE_OVERFLOW_WRAPS (type))
+   (op (mult @2 (convert @0)) (mult @2 (convert @1))
+#endif
+
 /* (T)(A) +- (T)(B) -> (T)(A +- B) only when (A +- B) could be simplified
to a simple value.  */
   (for op (plus minus)
diff --git a/gcc/testsuite/gcc.dg/pr109393.c b/gcc/testsuite/gcc.dg/pr109393.c
new file mode 100644
index 000..e9051273672
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr109393.c
@@ -0,0 +1,16 @@
+/* PR tree-optimization/109393 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+/* { dg-final { scan-tree-dump-times "return 1;" 2 "optimized" } } */
+
+int foo(int *a, int j)
+{
+  int k = j - 1;
+  return a[j - 1] == a[k];
+}
+
+int bar(int *a, int j)
+{
+  int k = j - 1;
+  return (&a[j + 1] - 2) == &a[k];
+}
-- 
2.34.1

Re: [PATCH] Spelling fixes for translatable strings

2024-04-23 Thread Jonathan Wakely

On Mon, 22 Apr 2024 at 22:30, Jakub Jelinek  wrote:
>
> Hi!
>
> I've run aspell on gcc.pot (just quickly skimming, so pressing
> I key hundreds of times and just stopping when I catch something that
> looks like a misspelling).
>
> I plan to commit this tomorrow as obvious unless somebody finds some
> issues in it, you know, I'm not a native English speaker.
> Yes, I know favour is valid UK spelling, but we spell the US way I think.

Yup:
https://gcc.gnu.org/codingconventions.html#Spelling

> I've left some *ise* -> *ize* cases (recognise, initialise), those
> had too many hits, though in translatable strings just 4, so maybe
> worth changing too:
> msgid "recognise the specified suffix as a definition module filename"
> msgid "recognise the specified suffix as implementation and module filenames"
> "initialiser for a dylib."
> msgid "%qE attribute argument %qE is not recognised"

That spelling is explicitly mentioned at the link above, so they
should be "ize" really.

> 2024-04-22  Jakub Jelinek  
>
> * config/epiphany/epiphany.opt (may-round-for-trunc): Spelling fix:
> floatig -> floating.
> * config/riscv/riscv.opt (mcsr-check): Spelling fix: CRS -> CSR.
> * params.opt (-param=ipa-cp-profile-count-base=): Spelling fix:
> frequncy -> frequency.
> gcc/c-family/
> * c.opt (Wstrict-flex-arrays): Spelling fix: inproper -> improper.
> gcc/cp/
> * parser.cc (cp_parser_using_declaration): Spelling fix: favour
> -> favor.
> gcc/m2/
> * lang.opt (fuse-list=): Spelling fix: finalializations ->
> finalizations.

LGTM

+Reviewed-by: Jonathan Wakely

Re: [PATCH v2] [testsuite] require sqrt_insn effective target where needed

2024-04-23 Thread Iain Sandoe

Hi Folks,

> On 23 Apr 2024, at 09:59, Kewen.Lin  wrote:
> 
> Hi,
> 
> on 2024/4/22 17:56, Alexandre Oliva wrote:
>> This patch takes feedback received for 3 earlier patches, and adopts a
>> simpler approach to skip the still-failing tests, that I believe to be
>> in line with ppc maintainers' expressed preferences.
>> https://gcc.gnu.org/pipermail/gcc-patches/2021-February/565939.html
>> https://gcc.gnu.org/pipermail/gcc-patches/2021-March/566617.html
>> https://gcc.gnu.org/pipermail/gcc-patches/2021-March/566521.html
>> Ping?-ish :-)
>> 
>> 
>> Some tests fail on ppc and ppc64 when testing a compiler [with options
>> for] for a CPU [emulator] that doesn't support the sqrt insn.
>> 
>> The gcc.dg/cdce3.c is one in which the expected shrink-wrap
>> optimization only takes place when the target CPU supports a sqrt
>> insn.
>> 
>> The gcc.target/powerpc/pr46728-1[0-4].c tests use -mpowerpc-gpopt and
>> call sqrt(), which involves the sqrt insn that the target CPU under
>> test may not support.
>> 
>> Require a sqrt_insn effective target for all the affected tests.
>> 
>> Regstrapped on x86_64-linux-gnu and ppc64el-linux-gnu.  Also testing
>> with gcc-13 on ppc64-vx7r2 and ppc-vx7r2.  Ok to install?
>> 
>> 
>> for  gcc/testsuite/ChangeLog
>> 
>>  * gcc.dg/cdce3.c: Require sqrt_insn effective target.
>>  * gcc.target/powerpc/pr46728-10.c: Likewise.
>>  * gcc.target/powerpc/pr46728-11.c: Likewise.
>>  * gcc.target/powerpc/pr46728-13.c: Likewise.
>>  * gcc.target/powerpc/pr46728-14.c: Likewise.
>> ---
>> gcc/testsuite/gcc.dg/cdce3.c  |3 ++-
>> gcc/testsuite/gcc.target/powerpc/pr46728-10.c |1 +
>> gcc/testsuite/gcc.target/powerpc/pr46728-11.c |1 +
>> gcc/testsuite/gcc.target/powerpc/pr46728-13.c |1 +
>> gcc/testsuite/gcc.target/powerpc/pr46728-14.c |1 +
>> 5 files changed, 6 insertions(+), 1 deletion(-)
>> 
>> diff --git a/gcc/testsuite/gcc.dg/cdce3.c b/gcc/testsuite/gcc.dg/cdce3.c
>> index 601ddf055fd71..f759a95972e8b 100644
>> --- a/gcc/testsuite/gcc.dg/cdce3.c
>> +++ b/gcc/testsuite/gcc.dg/cdce3.c
>> @@ -1,7 +1,8 @@
>> /* { dg-do compile } */
>> /* { dg-require-effective-target hard_float } */
>> +/* { dg-require-effective-target sqrt_insn } */
>> /* { dg-options "-O2 -fmath-errno -fdump-tree-cdce-details 
>> -fdump-tree-optimized" } */
>> -/* { dg-final { scan-tree-dump "cdce3.c:11: \[^\n\r]* function call is 
>> shrink-wrapped into error conditions\." "cdce" } } */
>> +/* { dg-final { scan-tree-dump "cdce3.c:12: \[^\n\r]* function call is 
>> shrink-wrapped into error conditions\." "cdce" } } */
>> /* { dg-final { scan-tree-dump "sqrtf \\(\[^\n\r]*\\); \\\[tail call\\\]" 
>> "optimized" } } */
>> /* { dg-skip-if "doesn't have a sqrtf insn" { mmix-*-* } } */
>> 
> 
> This change needs an approval from global maintainer as it touches a generic 
> test case?
> 
>> diff --git a/gcc/testsuite/gcc.target/powerpc/pr46728-10.c 
>> b/gcc/testsuite/gcc.target/powerpc/pr46728-10.c
>> index 3be4728d333a4..7e9bb638106c2 100644
>> --- a/gcc/testsuite/gcc.target/powerpc/pr46728-10.c
>> +++ b/gcc/testsuite/gcc.target/powerpc/pr46728-10.c
>> @@ -1,6 +1,7 @@
>> /* { dg-do run } */
>> /* { dg-skip-if "-mpowerpc-gpopt not supported" { powerpc*-*-darwin* } } */
>> /* { dg-options "-O2 -ffast-math -fno-inline -fno-unroll-loops -lm 
>> -mpowerpc-gpopt" } */
>> +/* { dg-require-effective-target sqrt_insn } */
> 
> This change looks sensible to me.
> 
> Nit: With the proposed change, I'd expect that we can remove the line for 
> powerpc*-*-darwin*.
> 
> CC Iain to confirm.

Indeed, the check for sqrt_insn fails and so the test is unsupported without 
needing the separate
powerpc*-*-darwin* line,

thanks,
Iain

> 
> BR,
> Kewen
> 
>> 
>> #include 
>> 
>> diff --git a/gcc/testsuite/gcc.target/powerpc/pr46728-11.c 
>> b/gcc/testsuite/gcc.target/powerpc/pr46728-11.c
>> index 43b6728a4b812..5bfa25925675a 100644
>> --- a/gcc/testsuite/gcc.target/powerpc/pr46728-11.c
>> +++ b/gcc/testsuite/gcc.target/powerpc/pr46728-11.c
>> @@ -1,6 +1,7 @@
>> /* { dg-do run } */
>> /* { dg-skip-if "-mpowerpc-gpopt not supported" { powerpc*-*-darwin* } } */
>> /* { dg-options "-O2 -ffast-math -fno-inline -fno-unroll-loops -lm 
>> -mpowerpc-gpopt" } */
>> +/* { dg-require-effective-target sqrt_insn } */
>> 
>> #include 
>> 
>> diff --git a/gcc/testsuite/gcc.target/powerpc/pr46728-13.c 
>> b/gcc/testsuite/gcc.target/powerpc/pr46728-13.c
>> index b9fd63973b728..b66d0209a5e54 100644
>> --- a/gcc/testsuite/gcc.target/powerpc/pr46728-13.c
>> +++ b/gcc/testsuite/gcc.target/powerpc/pr46728-13.c
>> @@ -1,6 +1,7 @@
>> /* { dg-do run } */
>> /* { dg-skip-if "-mpowerpc-gpopt not supported" { powerpc*-*-darwin* } } */
>> /* { dg-options "-O2 -ffast-math -fno-inline -fno-unroll-loops -lm 
>> -mpowerpc-gpopt" } */
>> +/* { dg-require-effective-target sqrt_insn } */
>> 
>> #include 
>> 
>> diff --git a/gcc/testsuite/gcc.target/powerpc/pr46728-14.c 
>> b/gcc/testsuite/gcc.target/powerpc/pr46728-14.c
>> index 5a13bd

Re: [PATCH] Value range: Add range op for __builtin_isfinite

2024-04-23 Thread rep . dot . nop

On 12 April 2024 07:30:10 CEST, HAO CHEN GUI  wrote:


>
>
>patch.diff
>diff --git a/gcc/gimple-range-op.cc b/gcc/gimple-range-op.cc
>index 9de130b4022..99c511728d3 100644
>--- a/gcc/gimple-range-op.cc
>+++ b/gcc/gimple-range-op.cc
>@@ -1192,6 +1192,56 @@ public:
>   }
> } op_cfn_isinf;
>
>+//Implement range operator for CFN_BUILT_IN_ISFINITE
>+class cnf_isfinite : public range_operator
>+{


s/cnf/cfn/g
I guess.
thanks

Re: [PATCH v2] [testsuite] require sqrt_insn effective target where needed

2024-04-23 Thread Kewen.Lin

Hi,

on 2024/4/22 17:56, Alexandre Oliva wrote:
> This patch takes feedback received for 3 earlier patches, and adopts a
> simpler approach to skip the still-failing tests, that I believe to be
> in line with ppc maintainers' expressed preferences.
> https://gcc.gnu.org/pipermail/gcc-patches/2021-February/565939.html
> https://gcc.gnu.org/pipermail/gcc-patches/2021-March/566617.html
> https://gcc.gnu.org/pipermail/gcc-patches/2021-March/566521.html
> Ping?-ish :-)
> 
> 
> Some tests fail on ppc and ppc64 when testing a compiler [with options
> for] for a CPU [emulator] that doesn't support the sqrt insn.
> 
> The gcc.dg/cdce3.c is one in which the expected shrink-wrap
> optimization only takes place when the target CPU supports a sqrt
> insn.
> 
> The gcc.target/powerpc/pr46728-1[0-4].c tests use -mpowerpc-gpopt and
> call sqrt(), which involves the sqrt insn that the target CPU under
> test may not support.
> 
> Require a sqrt_insn effective target for all the affected tests.
> 
> Regstrapped on x86_64-linux-gnu and ppc64el-linux-gnu.  Also testing
> with gcc-13 on ppc64-vx7r2 and ppc-vx7r2.  Ok to install?
> 
> 
> for  gcc/testsuite/ChangeLog
> 
>   * gcc.dg/cdce3.c: Require sqrt_insn effective target.
>   * gcc.target/powerpc/pr46728-10.c: Likewise.
>   * gcc.target/powerpc/pr46728-11.c: Likewise.
>   * gcc.target/powerpc/pr46728-13.c: Likewise.
>   * gcc.target/powerpc/pr46728-14.c: Likewise.
> ---
>  gcc/testsuite/gcc.dg/cdce3.c  |3 ++-
>  gcc/testsuite/gcc.target/powerpc/pr46728-10.c |1 +
>  gcc/testsuite/gcc.target/powerpc/pr46728-11.c |1 +
>  gcc/testsuite/gcc.target/powerpc/pr46728-13.c |1 +
>  gcc/testsuite/gcc.target/powerpc/pr46728-14.c |1 +
>  5 files changed, 6 insertions(+), 1 deletion(-)
> 
> diff --git a/gcc/testsuite/gcc.dg/cdce3.c b/gcc/testsuite/gcc.dg/cdce3.c
> index 601ddf055fd71..f759a95972e8b 100644
> --- a/gcc/testsuite/gcc.dg/cdce3.c
> +++ b/gcc/testsuite/gcc.dg/cdce3.c
> @@ -1,7 +1,8 @@
>  /* { dg-do compile } */
>  /* { dg-require-effective-target hard_float } */
> +/* { dg-require-effective-target sqrt_insn } */
>  /* { dg-options "-O2 -fmath-errno -fdump-tree-cdce-details 
> -fdump-tree-optimized" } */
> -/* { dg-final { scan-tree-dump "cdce3.c:11: \[^\n\r]* function call is 
> shrink-wrapped into error conditions\." "cdce" } } */
> +/* { dg-final { scan-tree-dump "cdce3.c:12: \[^\n\r]* function call is 
> shrink-wrapped into error conditions\." "cdce" } } */
>  /* { dg-final { scan-tree-dump "sqrtf \\(\[^\n\r]*\\); \\\[tail call\\\]" 
> "optimized" } } */
>  /* { dg-skip-if "doesn't have a sqrtf insn" { mmix-*-* } } */
> 

This change needs an approval from global maintainer as it touches a generic 
test case?

> diff --git a/gcc/testsuite/gcc.target/powerpc/pr46728-10.c 
> b/gcc/testsuite/gcc.target/powerpc/pr46728-10.c
> index 3be4728d333a4..7e9bb638106c2 100644
> --- a/gcc/testsuite/gcc.target/powerpc/pr46728-10.c
> +++ b/gcc/testsuite/gcc.target/powerpc/pr46728-10.c
> @@ -1,6 +1,7 @@
>  /* { dg-do run } */
>  /* { dg-skip-if "-mpowerpc-gpopt not supported" { powerpc*-*-darwin* } } */
>  /* { dg-options "-O2 -ffast-math -fno-inline -fno-unroll-loops -lm 
> -mpowerpc-gpopt" } */
> +/* { dg-require-effective-target sqrt_insn } */

This change looks sensible to me.

Nit: With the proposed change, I'd expect that we can remove the line for 
powerpc*-*-darwin*.

CC Iain to confirm.

BR,
Kewen

> 
>  #include 
> 
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr46728-11.c 
> b/gcc/testsuite/gcc.target/powerpc/pr46728-11.c
> index 43b6728a4b812..5bfa25925675a 100644
> --- a/gcc/testsuite/gcc.target/powerpc/pr46728-11.c
> +++ b/gcc/testsuite/gcc.target/powerpc/pr46728-11.c
> @@ -1,6 +1,7 @@
>  /* { dg-do run } */
>  /* { dg-skip-if "-mpowerpc-gpopt not supported" { powerpc*-*-darwin* } } */
>  /* { dg-options "-O2 -ffast-math -fno-inline -fno-unroll-loops -lm 
> -mpowerpc-gpopt" } */
> +/* { dg-require-effective-target sqrt_insn } */
> 
>  #include 
> 
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr46728-13.c 
> b/gcc/testsuite/gcc.target/powerpc/pr46728-13.c
> index b9fd63973b728..b66d0209a5e54 100644
> --- a/gcc/testsuite/gcc.target/powerpc/pr46728-13.c
> +++ b/gcc/testsuite/gcc.target/powerpc/pr46728-13.c
> @@ -1,6 +1,7 @@
>  /* { dg-do run } */
>  /* { dg-skip-if "-mpowerpc-gpopt not supported" { powerpc*-*-darwin* } } */
>  /* { dg-options "-O2 -ffast-math -fno-inline -fno-unroll-loops -lm 
> -mpowerpc-gpopt" } */
> +/* { dg-require-effective-target sqrt_insn } */
> 
>  #include 
> 
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr46728-14.c 
> b/gcc/testsuite/gcc.target/powerpc/pr46728-14.c
> index 5a13bdb6c..71a1a70c4e7a2 100644
> --- a/gcc/testsuite/gcc.target/powerpc/pr46728-14.c
> +++ b/gcc/testsuite/gcc.target/powerpc/pr46728-14.c
> @@ -1,6 +1,7 @@
>  /* { dg-do run } */
>  /* { dg-skip-if "-mpowerpc-gpopt not supported" { powerpc*-*-darwin* } } */
>  /* { dg-options "-O2 -ffast-math -fno-inline -fno-unrol

Re: [PATCH v2] xfail fetestexcept test - ppc always uses fcmpu

2024-04-23 Thread Kewen.Lin

Hi,

on 2024/4/22 18:00, Alexandre Oliva wrote:
> On Mar 10, 2021, Joseph Myers  wrote:
> 
>> On Wed, 10 Mar 2021, Alexandre Oliva wrote:
>>> operand exception for quiet NaN.  I couldn't find any evidence that
>>> the rs6000 backend ever outputs fcmpo.  Therefore, I'm adding the same
>>> execution xfail marker to this test.
> 
>> In my view, such an XFAIL (for a GCC bug as opposed to an environmental 
>> issue) should have a comment pointing to a corresponding open bug in GCC 
>> Bugzilla.  In this case, that's bug 58684.
> 
> Thanks for the suggestion, yeah, that makes sense.  Fixed in v2 below.
> https://gcc.gnu.org/pipermail/gcc-patches/2021-March/566523.html
> Ping?-ish
> 
> 
> gcc.dg/torture/pr91323.c tests that a compare with NaNf doesn't set an
> exception using builtin compare intrinsics, and that it does when
> using regular compare operators.
> 
> That doesn't seem to be expected to work on powerpc targets.  It fails
> on GNU/Linux, it's marked to be skipped on AIX, and a similar test,
> gcc.dg/torture/pr93133.c, has the execution test xfailed for all of
> powerpc*-*-*.
> 
> In this test, the functions that use intrinsics for the compare end up
> with the same code as the one that uses compare operators, using
> fcmpu, a floating compare that, unlike fcmpo, does not set the invalid
> operand exception for quiet NaN.  I couldn't find any evidence that
> the rs6000 backend ever outputs fcmpo.  Therefore, I'm adding the same
> execution xfail marker to this test.
> 
> Regstrapped on x86_64-linux-gnu and ppc64el-linux-gnu.  Also tested with
> gcc-13 on ppc64-vx7r2 and ppc-vx7r2.  Ok to install?
> 
> 
> for  gcc/testsuite/ChangeLog
> 
>   PR target/58684
>   * gcc.dg/torture/pr91323.c: Expect execution fail on
>   powerpc*-*-*.
> ---
>  gcc/testsuite/gcc.dg/torture/pr91323.c |3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/gcc/testsuite/gcc.dg/torture/pr91323.c 
> b/gcc/testsuite/gcc.dg/torture/pr91323.c
> index 1411fcaa3966c..f188faa3ccf47 100644
> --- a/gcc/testsuite/gcc.dg/torture/pr91323.c
> +++ b/gcc/testsuite/gcc.dg/torture/pr91323.c
> @@ -1,4 +1,5 @@
> -/* { dg-do run } */
> +/* { dg-do run { xfail powerpc*-*-* } } */
> +/* The ppc xfail is because of PR target/58684.  */

OK, though the proposed comment is slightly different from what's in
the related commit r8-6445-g86145a19abf39f. :)  Thanks!

BR,
Kewen

>  /* { dg-add-options ieee } */
>  /* { dg-require-effective-target fenv_exceptions } */
>  /* { dg-skip-if "fenv" { powerpc-ibm-aix* } } */
> 
>

Re: [PATCH] build: Check for cargo when building rust language

2024-04-23 Thread Rainer Orth

Hi Arthur,

> On 4/17/24 10:13, Rainer Orth wrote:
>> Andrew Pinski  writes:
>> 
>>> On Mon, Apr 8, 2024 at 9:39 AM  wrote:

 From: Pierre-Emmanuel Patry 

 Hello,

 The rust frontend requires cargo to build some of it's components,
 it's presence was not checked during configuration.
>>>
>>> WHY did this go in right before the release of GCC 14?
>>> I don't get why this is considered temporary and it goes in right
>>> before a release.
>>> That seems broken to me.
>> two more questions about this:
>> Right now, the new cargo configure test disable rust on all of my
>> targets (Solaris, Linux, Darwin) which didn't have it installed.  Before
>> (as recent as last Friday), I could successfully build and test
>> crab1/rust on all of them without cargo in sight.  So I wonder if the
>> patch isn't premature.
>
> We already have components depending on Rust libraries in our development
> repository, so this patch is important to ensure errors are emitted early
> during the configure phase rather than later at build time. I don't think
> this is premature, considering that your targets would fail to build the
> Rust frontend next time we upstream commits, which should happen this week
> or early next week.

it would have been very helpful to state this up front: introducing a
dependency that's never used outside of a configure test right night is
still damn confusing.  An alternative might have been to commit this
patch shortly before it's actually used.

>> Besides, while there are packaged versions of cargo for Solaris 11.4 and
>> Linux, Darwin hasn't anything (not checked Homebrew or similar yet).
>> What's worse, rustup only supports macOS 10.12 and up, while I'm still
>> regularly testing 10.7 and 10.11.  I don't really feel like building
>> rust from source here (if it works at all).  This hasn't been an issue
>> for any other languages that require additional tools for bootstrapping
>> (like Ada or D): there are versions of GNAT around that still support
>> those old Darwin releases, and I could use the C++ version of GDC in GCC
>> 11.
>
> Sorry, I'm not too familiar with the Rust situation on macOS. I am reading
> that starting from Rust version 1.74, the minimum macOS version required is
> indeed 10.12, released in 2016 I believe?
>
> We currently depend on Rust version 1.72, so you should be able to install
> it on macOS 10.11. Maybe with rustup? You can try something like the
> following:
>
> curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
> --default-toolchain=1.72.0;

I tried this with mixed success: while the part of the installation that
goes into $RUSTUP_HOME at least can run cargo --version on both 10.7
and 10.11, the other one (for $CARGO_HOME) dies with SIGILL on 10.7
trying the same.

However, when I ignore most of what's installed by rustup and point
$PATH at .../toolchains/1.72.0-x86_64-apple-darwin/bin, I can run the
cargo in there with --version even on 10.7.  For the moment, that's good
enough to get a trunk build/test with rust including working again, but
of course this doesn't prove that this will remain so once cargo is
actually used.

> which is the default installation method for Rustup, with version 1.72 of
> the language specified. I'm not able to test this, sorry, but I'm very
> interested in knowing if it works. I think you can also install Rust using
> Homebrew, but again I am not able to test this and apologize.

I'll go down this route (or try installing rust from source) only if
need be.

> The goal is to reduce that Rust version further soon anyway - we are going
> to target Rust version 1.49, released 3 years ago, as that is the version
> that gccrs aims to compile. This will bring us closer to compiling our
> dependencies with our own frontend.

Good.  At least knowing this it's easier to check what macOS versions
are supported by e.g. 1.49.

>> At the very least, the Rust situation needs to be documented clearly.
>
> I'd love to work on this - what sort of documentation do you have in mind?
> Do you mean something like the online GCC documentation or an in-tree file?
> Let me know what you'd like me to add and I'll be happy to do so.

I think this should go into gcc/doc/install.texi, as for all other
languages and targets.  This way you have all the necessary information
in one place, while some in-tree file is almost guaranteed to be
overlooked.

Rainer

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University

[Committed] s390x: Fix vec_xl/vec_xst type aliasing [PR114676]

2024-04-23 Thread Andreas Krebbel

The requirements of the vec_xl/vec_xst intrinsincs wrt aliasing of the
pointer argument are not really documented.  As it turns out, users
are likely to get it wrong.  With this patch we let the pointer
argument alias everything in order to make it more robust for users.

Committed to mainline. Will be cherry-picked for stable branches as well.

gcc/ChangeLog:

PR target/114676
* config/s390/s390-c.cc (s390_expand_overloaded_builtin): Use a
MEM_REF with an addend of type ptr_type_node.

gcc/testsuite/ChangeLog:

PR target/114676
* gcc.target/s390/zvector/pr114676.c: New test.

Suggested-by: Jakub Jelinek 
---
 gcc/config/s390/s390-c.cc | 16 +---
 .../gcc.target/s390/zvector/pr114676.c| 19 +++
 2 files changed, 28 insertions(+), 7 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/s390/zvector/pr114676.c

diff --git a/gcc/config/s390/s390-c.cc b/gcc/config/s390/s390-c.cc
index 8d3d1a467a8..1bb6e810766 100644
--- a/gcc/config/s390/s390-c.cc
+++ b/gcc/config/s390/s390-c.cc
@@ -498,11 +498,11 @@ s390_expand_overloaded_builtin (location_t loc,
/* Build a vector type with the alignment of the source
   location in order to enable correct alignment hints to be
   generated for vl.  */
-   tree mem_type = build_aligned_type (return_type,
-   TYPE_ALIGN (TREE_TYPE (TREE_TYPE 
((*arglist)[1];
+   unsigned align = TYPE_ALIGN (TREE_TYPE (TREE_TYPE ((*arglist)[1])));
+   tree mem_type = build_aligned_type (return_type, align);
return build2 (MEM_REF, mem_type,
   fold_build_pointer_plus ((*arglist)[1], (*arglist)[0]),
-  build_int_cst (TREE_TYPE ((*arglist)[1]), 0));
+  build_int_cst (ptr_type_node, 0));
   }
 case S390_OVERLOADED_BUILTIN_s390_vec_xst:
 case S390_OVERLOADED_BUILTIN_s390_vec_xstd2:
@@ -511,11 +511,13 @@ s390_expand_overloaded_builtin (location_t loc,
/* Build a vector type with the alignment of the target
   location in order to enable correct alignment hints to be
   generated for vst.  */
-   tree mem_type = build_aligned_type (TREE_TYPE((*arglist)[0]),
-   TYPE_ALIGN (TREE_TYPE (TREE_TYPE 
((*arglist)[2];
+   unsigned align = TYPE_ALIGN (TREE_TYPE (TREE_TYPE ((*arglist)[2])));
+   tree mem_type = build_aligned_type (TREE_TYPE ((*arglist)[0]), align);
return build2 (MODIFY_EXPR, mem_type,
-  build1 (INDIRECT_REF, mem_type,
-  fold_build_pointer_plus ((*arglist)[2], 
(*arglist)[1])),
+  build2 (MEM_REF, mem_type,
+  fold_build_pointer_plus ((*arglist)[2],
+   (*arglist)[1]),
+  build_int_cst (ptr_type_node, 0)),
   (*arglist)[0]);
   }
 case S390_OVERLOADED_BUILTIN_s390_vec_load_pair:
diff --git a/gcc/testsuite/gcc.target/s390/zvector/pr114676.c 
b/gcc/testsuite/gcc.target/s390/zvector/pr114676.c
new file mode 100644
index 000..bdc66b2920a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/zvector/pr114676.c
@@ -0,0 +1,19 @@
+/* { dg-do run { target { s390*-*-* } } } */
+/* { dg-options "-O3 -mzarch -march=z14 -mzvector" } */
+
+#include 
+
+void __attribute__((noinline)) foo (int *mem)
+{
+  vec_xst ((vector float){ 1.0f, 2.0f, 3.0f, 4.0f }, 0, (float*)mem);
+}
+
+int
+main ()
+{
+  int m[4] = { 0 };
+  foo (m);
+  if (m[3] == 0)
+__builtin_abort ();
+  return 0;
+}
-- 
2.44.0

[PATCH] tree-optimization/114799 - SLP and patterns

2024-04-23 Thread Richard Biener

The following plugs a hole with computing whether a SLP node has any
pattern stmts which is important to know when we want to replace it
by a CTOR from external defs.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

PR tree-optimization/114799
* tree-vect-slp.cc (vect_get_and_check_slp_defs): Properly
update ->any_pattern when swapping operands.

* gcc.dg/vect/bb-slp-pr114799.c: New testcase.
---
 gcc/testsuite/gcc.dg/vect/bb-slp-pr114799.c | 16 
 gcc/tree-vect-slp.cc|  6 ++
 2 files changed, 22 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/vect/bb-slp-pr114799.c

diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-pr114799.c 
b/gcc/testsuite/gcc.dg/vect/bb-slp-pr114799.c
new file mode 100644
index 000..70572fe703b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-pr114799.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-fno-tree-forwprop" } */
+
+unsigned long x;
+unsigned char y;
+
+void
+foo (void)
+{
+  unsigned long tt = y;
+  tt+=255;
+  unsigned short t1 = tt;
+  t1 = 254 - t1;
+  tt += ((unsigned long)t1);
+  x = tt;
+}
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 109f318c7d6..3eb326d20b5 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -888,6 +888,12 @@ vect_get_and_check_slp_defs (vec_info *vinfo, unsigned 
char swap,
 (*oprnds_info)[i+1]->def_stmts[stmt_num]);
  std::swap ((*oprnds_info)[i]->ops[stmt_num],
 (*oprnds_info)[i+1]->ops[stmt_num]);
+ /* After swapping some operands we lost track whether an
+operand has any pattern defs so be conservative here.  */
+ if ((*oprnds_info)[i]->any_pattern
+ || (*oprnds_info)[i+1]->any_pattern)
+   (*oprnds_info)[i]->any_pattern
+ = (*oprnds_info)[i+1]->any_pattern = true;
  swapped = true;
  continue;
}
-- 
2.35.3

58 matches

Mail list logo