date:20240312

Re: [PATCH v4] LoongArch: Add support for TLS descriptors

2024-03-12 Thread mengqinggang




在 2024/3/13 上午6:15, Xi Ruoyao 写道:

On Tue, 2024-03-12 at 17:20 +0800, mengqinggang wrote:

+(define_insn "@got_load_tls_desc"
+  [(set (match_operand:P 0 "register_operand" "=r")
+   (unspec:P
+       [(match_operand:P 1 "symbolic_operand" "")]
+       UNSPEC_TLS_DESC))
+    (clobber (reg:SI FCC0_REGNUM))
+    (clobber (reg:SI FCC1_REGNUM))
+    (clobber (reg:SI FCC2_REGNUM))
+    (clobber (reg:SI FCC3_REGNUM))
+    (clobber (reg:SI FCC4_REGNUM))
+    (clobber (reg:SI FCC5_REGNUM))
+    (clobber (reg:SI FCC6_REGNUM))
+    (clobber (reg:SI FCC7_REGNUM))
+    (clobber (reg:SI RETURN_ADDR_REGNUM))]
+  "TARGET_TLS_DESC"
+{
+  return TARGET_EXPLICIT_RELOCS
+    ? "pcalau12i\t$r4,%%desc_pc_hi20(%1)\n\
+  \taddi.d\t$r4,$r4,%%desc_pc_lo12(%1)\n\
+  \tld.d\t$r1,$r4,%%desc_ld(%1)\n\
+  \tjirl\t$r1,$r1,%%desc_call(%1)"

Use something like

 ? "pcalau12i\t$r4,%%desc_pc_hi20(%1)\n\t"
   "addi.d\t$r4,$r4,%%desc_pc_lo12(%1)\n\t"
   "ld.d\t$r1,$r4,%%desc_ld(%1)\n\t"
   "jirl\t$r1,$r1,%%desc_call(%1)"
 : "la.tls.desc\t%0,%1";

to prevent additional white spaces in the output asm before tabs.


+    : "la.tls.desc\t%0,%1";
+}
+  [(set_attr "got" "load")
+   (set_attr "mode" "")
+   (set_attr "length" "16")])
+
+(define_insn "got_load_tls_desc_off64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+   (unspec:DI
+       [(match_operand:DI 1 "symbolic_operand" "")]
+       UNSPEC_TLS_DESC_OFF64))
+    (clobber (reg:SI FCC0_REGNUM))
+    (clobber (reg:SI FCC1_REGNUM))
+    (clobber (reg:SI FCC2_REGNUM))
+    (clobber (reg:SI FCC3_REGNUM))
+    (clobber (reg:SI FCC4_REGNUM))
+    (clobber (reg:SI FCC5_REGNUM))
+    (clobber (reg:SI FCC6_REGNUM))
+    (clobber (reg:SI FCC7_REGNUM))
+    (clobber (reg:SI RETURN_ADDR_REGNUM))
+    (clobber (match_operand:DI 2 "register_operand" "="))]
+  "TARGET_TLS_DESC && TARGET_CMODEL_EXTREME"
+{
+  return TARGET_EXPLICIT_RELOCS
+    ? "pcalau12i\t$r4,%%desc_pc_hi20(%1)\n\
+  \taddi.d\t%2,$r0,%%desc_pc_lo12(%1)\n\
+  \tlu32i.d\t%2,%%desc64_pc_lo20(%1)\n\
+  \tlu52i.d\t%2,%2,%%desc64_pc_hi12(%1)\n\
+  \tadd.d\t$r4,$r4,%2\n\
+  \tld.d\t$r1,$r4,%%desc_ld(%1)\n\
+  \tjirl\t$r1,$r1,%%desc_call(%1)"
+    : "la.tls.desc\t%0,%2,%1";

Likewise.


+}
+  [(set_attr "got" "load")
+   (set_attr "length" "28")])

Otherwise OK.

It's better to allow splitting these two instructions but we can do it
in another patch.  And IMO it's better to enable TLS desc by default if
supported by both the assembler and the libc, but we'll have to defer it
until Glibc 2.40 release.



Do we need to wait until LLVM also supports TLS DESC  before setting it 
as default?

Re: [PATCH v4] LoongArch: Add support for TLS descriptors

2024-03-12 Thread Xi Ruoyao

On Wed, 2024-03-13 at 06:56 +0800, Xi Ruoyao wrote:
> On Wed, 2024-03-13 at 06:15 +0800, Xi Ruoyao wrote:
> > > +(define_insn "@got_load_tls_desc"
> > > +  [(set (match_operand:P 0 "register_operand" "=r")
> 
> Hmm, and it looks like we should use (reg:P 4) instead of match_operand
> here, because the instruction does not work for a different register:
> with TARGET_EXPLICIT_RELOCS we are hard coding r4, and without
> TARGET_EXPLICIT_RELOCS the TLS desc function still only puts the return
> value in r4.

Suggested changes:

diff --git a/gcc/config/loongarch/loongarch.cc 
b/gcc/config/loongarch/loongarch.cc
index 303666bf6d5..8f4d3f36c26 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -2954,10 +2954,10 @@ loongarch_legitimize_tls_address (rtx loc)
  tp = gen_rtx_REG (Pmode, THREAD_POINTER_REGNUM);
 
  if (TARGET_CMODEL_EXTREME)
-   emit_insn (gen_got_load_tls_desc_off64 (a0, loc,
+   emit_insn (gen_got_load_tls_desc_off64 (loc,
gen_reg_rtx (DImode)));
  else
-   emit_insn (gen_got_load_tls_desc (Pmode, a0, loc));
+   emit_insn (gen_got_load_tls_desc (Pmode, loc));
 
  emit_insn (gen_add3_insn (dest, a0, tp));
}
diff --git a/gcc/config/loongarch/loongarch.md 
b/gcc/config/loongarch/loongarch.md
index 0a1a6a24f61..8e8f1012344 100644
--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
@@ -2772,9 +2772,9 @@ (define_insn "store_word"
 ;; Thread-Local Storage
 
 (define_insn "@got_load_tls_desc"
-  [(set (match_operand:P 0 "register_operand" "=r")
+  [(set (reg:P 4)
(unspec:P
-   [(match_operand:P 1 "symbolic_operand" "")]
+   [(match_operand:P 0 "symbolic_operand" "")]
UNSPEC_TLS_DESC))
 (clobber (reg:SI FCC0_REGNUM))
 (clobber (reg:SI FCC1_REGNUM))
@@ -2788,20 +2788,20 @@ (define_insn "@got_load_tls_desc"
   "TARGET_TLS_DESC"
 {
   return TARGET_EXPLICIT_RELOCS
-? "pcalau12i\t$r4,%%desc_pc_hi20(%1)\n\
-  \taddi.d\t$r4,$r4,%%desc_pc_lo12(%1)\n\
-  \tld.d\t$r1,$r4,%%desc_ld(%1)\n\
-  \tjirl\t$r1,$r1,%%desc_call(%1)"
-: "la.tls.desc\t%0,%1";
+? "pcalau12i\t$r4,%%desc_pc_hi20(%0)\n\t"
+  "addi.d\t$r4,$r4,%%desc_pc_lo12(%0)\n\t"
+  "ld.d\t$r1,$r4,%%desc_ld(%0)\n\t"
+  "jirl\t$r1,$r1,%%desc_call(%0)"
+: "la.tls.desc\t$r4,%0";
 }
   [(set_attr "got" "load")
(set_attr "mode" "")
(set_attr "length" "16")])
 
 (define_insn "got_load_tls_desc_off64"
-  [(set (match_operand:DI 0 "register_operand" "=r")
+  [(set (reg:DI 4)
(unspec:DI
-   [(match_operand:DI 1 "symbolic_operand" "")]
+   [(match_operand:DI 0 "symbolic_operand" "")]
UNSPEC_TLS_DESC_OFF64))
 (clobber (reg:SI FCC0_REGNUM))
 (clobber (reg:SI FCC1_REGNUM))
@@ -2812,18 +2812,18 @@ (define_insn "got_load_tls_desc_off64"
 (clobber (reg:SI FCC6_REGNUM))
 (clobber (reg:SI FCC7_REGNUM))
 (clobber (reg:SI RETURN_ADDR_REGNUM))
-(clobber (match_operand:DI 2 "register_operand" "="))]
+(clobber (match_operand:DI 1 "register_operand" "="))]
   "TARGET_TLS_DESC && TARGET_CMODEL_EXTREME"
 {
   return TARGET_EXPLICIT_RELOCS
-? "pcalau12i\t$r4,%%desc_pc_hi20(%1)\n\
-  \taddi.d\t%2,$r0,%%desc_pc_lo12(%1)\n\
-  \tlu32i.d\t%2,%%desc64_pc_lo20(%1)\n\
-  \tlu52i.d\t%2,%2,%%desc64_pc_hi12(%1)\n\
-  \tadd.d\t$r4,$r4,%2\n\
-  \tld.d\t$r1,$r4,%%desc_ld(%1)\n\
-  \tjirl\t$r1,$r1,%%desc_call(%1)"
-: "la.tls.desc\t%0,%2,%1";
+? "pcalau12i\t$r4,%%desc_pc_hi20(%0)\n\t"
+  "addi.d\t%1,$r0,%%desc_pc_lo12(%0)\n\t"
+  "lu32i.d\t%1,%%desc64_pc_lo20(%0)\n\t"
+  "lu52i.d\t%1,%2,%%desc64_pc_hi12(%0)\n\t"
+  "add.d\t$r4,$r4,%1\n\t"
+  "ld.d\t$r1,$r4,%%desc_ld(%0)\n\t"
+  "jirl\t$r1,$r1,%%desc_call(%0)"
+: "la.tls.desc\t$r4,%1,%0";
 }
   [(set_attr "got" "load")
(set_attr "length" "28")])

-- 
Xi Ruoyao 
School of Aerospace Science and Technology, Xidian University

Re: [PATCH] sanitizer: [PR110027] Align asan_vec[0] to MAX (alignb, ASAN_RED_ZONE_SIZE)

2024-03-12 Thread Hongtao Liu

On Tue, Mar 12, 2024 at 8:00 PM liuhongt  wrote:
>
> if alignb > ASAN_RED_ZONE_SIZE and offset[0] is not multiple of
> alignb. (base_align_bias - base_offset) may not aligned to alignb, and
> caused segement fault.
>
> Bootstrapped and regtested on x86_64-linux-gnu{-m32,}.
> Ok for trunk and backport to GCC13?
CC jakub, I see the code was added by
https://gcc.gnu.org/pipermail/gcc-patches/2018-December/512313.html
The issue in the PR is similar, but __m512 requires bigger
alignment(64 > ASAN_RED_ZONE_SIZE(32)), in that case we need to insert
MAX (alignb, ASAN_RED_ZONE_SIZE) instead of ASAN_RED_ZONE_SIZE.
Assume when alignb > ASAN_RED_ZONE_SIZE, it must be multiple of
ASAN_RED_ZONE_SIZE.
>
> gcc/ChangeLog:
>
> PR sanitizer/110027
> * cfgexpand.cc (expand_stack_vars): Align frame offset to
> MAX (alignb, ASAN_RED_ZONE_SIZE).
>
> gcc/testsuite/ChangeLog:
>
> * g++.dg/asan/pr110027.C: New test.
> ---
>  gcc/cfgexpand.cc |  2 +-
>  gcc/testsuite/g++.dg/asan/pr110027.C | 20 
>  2 files changed, 21 insertions(+), 1 deletion(-)
>  create mode 100644 gcc/testsuite/g++.dg/asan/pr110027.C
>
> diff --git a/gcc/cfgexpand.cc b/gcc/cfgexpand.cc
> index 0de299c62e3..92062378d8e 100644
> --- a/gcc/cfgexpand.cc
> +++ b/gcc/cfgexpand.cc
> @@ -1214,7 +1214,7 @@ expand_stack_vars (bool (*pred) (size_t), class 
> stack_vars_data *data)
> {
>   if (data->asan_vec.is_empty ())
> {
> - align_frame_offset (ASAN_RED_ZONE_SIZE);
> + align_frame_offset (MAX (alignb, ASAN_RED_ZONE_SIZE));
>   prev_offset = frame_offset.to_constant ();
> }
>   prev_offset = align_base (prev_offset,
> diff --git a/gcc/testsuite/g++.dg/asan/pr110027.C 
> b/gcc/testsuite/g++.dg/asan/pr110027.C
> new file mode 100644
> index 000..0067781bc89
> --- /dev/null
> +++ b/gcc/testsuite/g++.dg/asan/pr110027.C
> @@ -0,0 +1,20 @@
> +/* PR sanitizer/110027 */
> +/* { dg-do run } */
> +/* { dg-require-effective-target avx512f_runtime } */
> +/* { dg-options "-std=gnu++23 -mavx512f -fsanitize=address -O0 -g 
> -fstack-protector-strong" } */
> +
> +#include 
> +#include 
> +
> +template 
> +using Vec [[gnu::vector_size(W * sizeof(T))]] = T;
> +
> +auto foo() {
> +  Vec<8, int64_t> ret{};
> +  return ret;
> +}
> +
> +int main() {
> +  foo();
> +  return 0;
> +}
> --
> 2.31.1
>


-- 
BR,
Hongtao

Re: [PATCH v4] LoongArch: Add support for TLS descriptors

2024-03-12 Thread Xi Ruoyao

On Wed, 2024-03-13 at 06:15 +0800, Xi Ruoyao wrote:
> > +(define_insn "@got_load_tls_desc"
> > +  [(set (match_operand:P 0 "register_operand" "=r")

Hmm, and it looks like we should use (reg:P 4) instead of match_operand
here, because the instruction does not work for a different register:
with TARGET_EXPLICIT_RELOCS we are hard coding r4, and without
TARGET_EXPLICIT_RELOCS the TLS desc function still only puts the return
value in r4.

> > +   (unspec:P
> > +       [(match_operand:P 1 "symbolic_operand" "")]
> > +       UNSPEC_TLS_DESC))
> > +    (clobber (reg:SI FCC0_REGNUM))
> > +    (clobber (reg:SI FCC1_REGNUM))
> > +    (clobber (reg:SI FCC2_REGNUM))
> > +    (clobber (reg:SI FCC3_REGNUM))
> > +    (clobber (reg:SI FCC4_REGNUM))
> > +    (clobber (reg:SI FCC5_REGNUM))
> > +    (clobber (reg:SI FCC6_REGNUM))
> > +    (clobber (reg:SI FCC7_REGNUM))
> > +    (clobber (reg:SI RETURN_ADDR_REGNUM))]
> > +  "TARGET_TLS_DESC"
> > +{
> > +  return TARGET_EXPLICIT_RELOCS
> > +    ? "pcalau12i\t$r4,%%desc_pc_hi20(%1)\n\
> > +  \taddi.d\t$r4,$r4,%%desc_pc_lo12(%1)\n\
> > +  \tld.d\t$r1,$r4,%%desc_ld(%1)\n\
> > +  \tjirl\t$r1,$r1,%%desc_call(%1)"
> 
> Use something like
> 
>     ? "pcalau12i\t$r4,%%desc_pc_hi20(%1)\n\t"
>   "addi.d\t$r4,$r4,%%desc_pc_lo12(%1)\n\t"
>   "ld.d\t$r1,$r4,%%desc_ld(%1)\n\t"
>   "jirl\t$r1,$r1,%%desc_call(%1)"
>     : "la.tls.desc\t%0,%1";
> 
> to prevent additional white spaces in the output asm before tabs.

-- 
Xi Ruoyao 
School of Aerospace Science and Technology, Xidian University

Re: [PATCH v2] c++: ICE with temporary of class type in array DMI [PR109966]

2024-03-12 Thread Jason Merrill


On 3/12/24 11:56, Marek Polacek wrote:

On Tue, Mar 12, 2024 at 09:57:14AM -0400, Jason Merrill wrote:

On 3/11/24 19:27, Marek Polacek wrote:

Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk/13?

-- >8 --
This ICE started with the fairly complicated r13-765.  We crash in
gimplify_var_or_parm_decl because a stray VAR_DECL leaked there.
The problem is ultimately that potential_prvalue_result_of wasn't
correctly handling arrays and replace_placeholders_for_class_temp_r
replaced a PLACEHOLDER_EXPR in a TARGET_EXPR which is used in the
context of copy elision.  If I have

M m[2] = { M{""}, M{""} };

then we don't invoke the M(const M&) copy-ctor.  I think the fix is
to detect such a case in potential_prvalue_result_of.

PR c++/109966

gcc/cp/ChangeLog:

* typeck2.cc (potential_prvalue_result_of): Add walk_subtrees
parameter.  Handle initializing an array from a
brace-enclosed-initializer.
(replace_placeholders_for_class_temp_r): Pass walk_subtrees down to
potential_prvalue_result_of.

gcc/testsuite/ChangeLog:

* g++.dg/cpp1y/nsdmi-aggr20.C: New test.
* g++.dg/cpp1y/nsdmi-aggr21.C: New test.
---
   gcc/cp/typeck2.cc | 27 ---
   gcc/testsuite/g++.dg/cpp1y/nsdmi-aggr20.C | 17 +++
   gcc/testsuite/g++.dg/cpp1y/nsdmi-aggr21.C | 59 +++
   3 files changed, 96 insertions(+), 7 deletions(-)
   create mode 100644 gcc/testsuite/g++.dg/cpp1y/nsdmi-aggr20.C
   create mode 100644 gcc/testsuite/g++.dg/cpp1y/nsdmi-aggr21.C

diff --git a/gcc/cp/typeck2.cc b/gcc/cp/typeck2.cc
index 31198b2f9f5..8b99ce78e9a 100644
--- a/gcc/cp/typeck2.cc
+++ b/gcc/cp/typeck2.cc
@@ -1406,46 +1406,59 @@ digest_init_flags (tree type, tree init, int flags, 
tsubst_flags_t complain)
A a = (A{});  // initializer
A a = (1, A{});   // initializer
A a = true ? A{} : A{};  // initializer
+ A arr[1] = { A{} };  // initializer
auto x = A{}.x;   // temporary materialization
auto x = foo(A{});// temporary materialization
  FULL_EXPR is the whole expression, SUBOB is its TARGET_EXPR subobject.  */
   static bool
-potential_prvalue_result_of (tree subob, tree full_expr)
+potential_prvalue_result_of (tree subob, tree full_expr, int *walk_subtrees)
   {
+#define RECUR(t) potential_prvalue_result_of (subob, t, walk_subtrees)
 if (subob == full_expr)
   return true;
 else if (TREE_CODE (full_expr) == TARGET_EXPR)
   {
 tree init = TARGET_EXPR_INITIAL (full_expr);
 if (TREE_CODE (init) == COND_EXPR)
-   return (potential_prvalue_result_of (subob, TREE_OPERAND (init, 1))
-   || potential_prvalue_result_of (subob, TREE_OPERAND (init, 2)));
+   return (RECUR (TREE_OPERAND (init, 1))
+   || RECUR (TREE_OPERAND (init, 2)));
 else if (TREE_CODE (init) == COMPOUND_EXPR)
-   return potential_prvalue_result_of (subob, TREE_OPERAND (init, 1));
+   return RECUR (TREE_OPERAND (init, 1));
 /* ??? I don't know if this can be hit.  */
 else if (TREE_CODE (init) == PAREN_EXPR)
{
  gcc_checking_assert (false);
- return potential_prvalue_result_of (subob, TREE_OPERAND (init, 0));
+ return RECUR (TREE_OPERAND (init, 0));
}
   }
+  /* The array case listed above.  */
+  else if (TREE_CODE (full_expr) == CONSTRUCTOR
+  && TREE_CODE (TREE_TYPE (full_expr)) == ARRAY_TYPE)
+for (constructor_elt : CONSTRUCTOR_ELTS (full_expr))
+  if (e.value == subob)
+   {
+ *walk_subtrees = 0;


Why clear walk_subtrees?  Won't that mean we fail to replace any
placeholders nested within an array element initializer?


Right.  I couldn't find a testcase where that would cause a problem
but I think I just wasn't inventive enough.

Originally, I was checking same_type_ignoring_top_level_qualifiers_p
but that's not going to work for code like

   struct N { N(M); };
   N arr[2] = { M{""}, M{""} };

or with operator M().  But I suppose I could just use can_convert
like below.  What do you think about that?


Basing this on the type seems unreliable, we're looking for where in the 
expression the TARGET_EXPR occurs, and there could be others of the same 
type elsewhere in the expression.


Why not loop over CONSTRUCTOR_ELTS like you did above, just without 
clearing walk_subtrees?


Jason

Re: [PATCH v4] LoongArch: Add support for TLS descriptors

2024-03-12 Thread Xi Ruoyao

On Tue, 2024-03-12 at 17:20 +0800, mengqinggang wrote:
> +(define_insn "@got_load_tls_desc"
> +  [(set (match_operand:P 0 "register_operand" "=r")
> + (unspec:P
> +     [(match_operand:P 1 "symbolic_operand" "")]
> +     UNSPEC_TLS_DESC))
> +    (clobber (reg:SI FCC0_REGNUM))
> +    (clobber (reg:SI FCC1_REGNUM))
> +    (clobber (reg:SI FCC2_REGNUM))
> +    (clobber (reg:SI FCC3_REGNUM))
> +    (clobber (reg:SI FCC4_REGNUM))
> +    (clobber (reg:SI FCC5_REGNUM))
> +    (clobber (reg:SI FCC6_REGNUM))
> +    (clobber (reg:SI FCC7_REGNUM))
> +    (clobber (reg:SI RETURN_ADDR_REGNUM))]
> +  "TARGET_TLS_DESC"
> +{
> +  return TARGET_EXPLICIT_RELOCS
> +    ? "pcalau12i\t$r4,%%desc_pc_hi20(%1)\n\
> +  \taddi.d\t$r4,$r4,%%desc_pc_lo12(%1)\n\
> +  \tld.d\t$r1,$r4,%%desc_ld(%1)\n\
> +  \tjirl\t$r1,$r1,%%desc_call(%1)"

Use something like

? "pcalau12i\t$r4,%%desc_pc_hi20(%1)\n\t"
  "addi.d\t$r4,$r4,%%desc_pc_lo12(%1)\n\t"
  "ld.d\t$r1,$r4,%%desc_ld(%1)\n\t"
  "jirl\t$r1,$r1,%%desc_call(%1)"
: "la.tls.desc\t%0,%1";

to prevent additional white spaces in the output asm before tabs.

> +    : "la.tls.desc\t%0,%1";
> +}
> +  [(set_attr "got" "load")
> +   (set_attr "mode" "")
> +   (set_attr "length" "16")])
> +
> +(define_insn "got_load_tls_desc_off64"
> +  [(set (match_operand:DI 0 "register_operand" "=r")
> + (unspec:DI
> +     [(match_operand:DI 1 "symbolic_operand" "")]
> +     UNSPEC_TLS_DESC_OFF64))
> +    (clobber (reg:SI FCC0_REGNUM))
> +    (clobber (reg:SI FCC1_REGNUM))
> +    (clobber (reg:SI FCC2_REGNUM))
> +    (clobber (reg:SI FCC3_REGNUM))
> +    (clobber (reg:SI FCC4_REGNUM))
> +    (clobber (reg:SI FCC5_REGNUM))
> +    (clobber (reg:SI FCC6_REGNUM))
> +    (clobber (reg:SI FCC7_REGNUM))
> +    (clobber (reg:SI RETURN_ADDR_REGNUM))
> +    (clobber (match_operand:DI 2 "register_operand" "="))]
> +  "TARGET_TLS_DESC && TARGET_CMODEL_EXTREME"
> +{
> +  return TARGET_EXPLICIT_RELOCS
> +    ? "pcalau12i\t$r4,%%desc_pc_hi20(%1)\n\
> +  \taddi.d\t%2,$r0,%%desc_pc_lo12(%1)\n\
> +  \tlu32i.d\t%2,%%desc64_pc_lo20(%1)\n\
> +  \tlu52i.d\t%2,%2,%%desc64_pc_hi12(%1)\n\
> +  \tadd.d\t$r4,$r4,%2\n\
> +  \tld.d\t$r1,$r4,%%desc_ld(%1)\n\
> +  \tjirl\t$r1,$r1,%%desc_call(%1)"
> +    : "la.tls.desc\t%0,%2,%1";

Likewise.

> +}
> +  [(set_attr "got" "load")
> +   (set_attr "length" "28")])

Otherwise OK.

It's better to allow splitting these two instructions but we can do it
in another patch.  And IMO it's better to enable TLS desc by default if
supported by both the assembler and the libc, but we'll have to defer it
until Glibc 2.40 release.

-- 
Xi Ruoyao 
School of Aerospace Science and Technology, Xidian University

[PATCH] Fortran: fix IS_CONTIGUOUS for polymorphic dummy arguments [PR114001]

2024-03-12 Thread Harald Anlauf

Dear all,

here's another small fix: IS_CONTIGUOUS did erroneously always
return .true. for CLASS dummy arguments.  The solution was to
adjust the logic in gfc_is_simply_contiguous to also handle
CLASS symbols.

Regtested on x86_64-pc-linux-gnu.  OK for mainline?

Thanks,
Harald

From 8f535b19bd0cb6a7c99ac9ba4c07778f86698a1c Mon Sep 17 00:00:00 2001
From: Harald Anlauf 
Date: Tue, 12 Mar 2024 22:58:39 +0100
Subject: [PATCH] Fortran: fix IS_CONTIGUOUS for polymorphic dummy arguments
 [PR114001]

gcc/fortran/ChangeLog:

	PR fortran/114001
	* expr.cc (gfc_is_simply_contiguous): Adjust logic so that CLASS
	symbols are also handled.

gcc/testsuite/ChangeLog:

	PR fortran/114001
	* gfortran.dg/is_contiguous_4.f90: New test.
---
 gcc/fortran/expr.cc   | 19 ++---
 gcc/testsuite/gfortran.dg/is_contiguous_4.f90 | 81 +++
 2 files changed, 91 insertions(+), 9 deletions(-)
 create mode 100644 gcc/testsuite/gfortran.dg/is_contiguous_4.f90

diff --git a/gcc/fortran/expr.cc b/gcc/fortran/expr.cc
index 37ea95d0185..82a642b01f7 100644
--- a/gcc/fortran/expr.cc
+++ b/gcc/fortran/expr.cc
@@ -6025,15 +6025,16 @@ gfc_is_simply_contiguous (gfc_expr *expr, bool strict, bool permit_element)
 }

   sym = expr->symtree->n.sym;
-  if (expr->ts.type != BT_CLASS
-  && ((part_ref
-	   && !part_ref->u.c.component->attr.contiguous
-	   && part_ref->u.c.component->attr.pointer)
-	  || (!part_ref
-	  && !sym->attr.contiguous
-	  && (sym->attr.pointer
-		  || (sym->as && sym->as->type == AS_ASSUMED_RANK)
-		  || (sym->as && sym->as->type == AS_ASSUMED_SHAPE)
+  if ((part_ref
+   && part_ref->u.c.component
+   && !part_ref->u.c.component->attr.contiguous
+   && IS_POINTER (part_ref->u.c.component))
+  || (!part_ref
+	  && expr->ts.type != BT_CLASS
+	  && !sym->attr.contiguous
+	  && (sym->attr.pointer
+	  || (sym->as && sym->as->type == AS_ASSUMED_RANK)
+	  || (sym->as && sym->as->type == AS_ASSUMED_SHAPE
 return false;

   if (!ar || ar->type == AR_FULL)
diff --git a/gcc/testsuite/gfortran.dg/is_contiguous_4.f90 b/gcc/testsuite/gfortran.dg/is_contiguous_4.f90
new file mode 100644
index 000..cb066f8836b
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/is_contiguous_4.f90
@@ -0,0 +1,81 @@
+! { dg-do run }
+! PR fortran/114001 - IS_CONTIGUOUS and polymorphic dummy
+
+program main
+  implicit none
+  integer :: i, cnt = 0
+  logical :: expect
+  integer, target  :: m(10) = [(i,i=1,size(m))]
+  integer, pointer :: p(:)
+  type t
+ integer :: j
+  end type t
+  type(t),  pointer :: tt(:), tp(:) ! Type pointer
+  class(t), pointer :: ct(:), cp(:) ! Class pointer
+
+  p => m(1:3)
+  expect = is_contiguous (p)
+  print *, "is_contiguous (p)=", expect
+  if (.not. expect) stop 91
+  call sub_star (p, expect)
+  p => m(1::3)
+  expect = is_contiguous (p)
+  print *, "is_contiguous (p)=", expect
+  if (expect) stop 92
+  call sub_star (p, expect)
+
+  allocate (tt(10))
+  tt(:)% j = m
+  tp => tt(4:6)
+  expect = is_contiguous (tp)
+  if (.not. expect) stop 96
+  print *, "is_contiguous (tp)=", expect
+  call sub_t (tp, expect)
+  tp => tt(4::3)
+  expect = is_contiguous (tp)
+  if (expect) stop 97
+  print *, "is_contiguous (tp)=", expect
+  call sub_t (tp, expect)
+
+  allocate (ct(10))
+  ct(:)% j = m
+  cp => ct(7:9)
+  expect = is_contiguous (cp)
+  print *, "is_contiguous (cp)=", expect
+  if (.not. expect) stop 98
+  call sub_t (cp, expect)
+  cp => ct(4::3)
+  expect = is_contiguous (cp)
+  print *, "is_contiguous (cp)=", expect
+  if (expect) stop 99
+  call sub_t (cp, expect)
+
+contains
+
+  subroutine sub_star (x, expect)
+class(*), intent(in) :: x(:)
+logical,  intent(in) :: expect
+cnt = cnt + 10
+if (is_contiguous (x) .neqv. expect) then
+   print *, "sub_star(1): is_contiguous (x)=", is_contiguous (x), expect
+   stop (cnt + 1)
+end if
+select type (x)
+type is (integer)
+   if (is_contiguous (x) .neqv. expect) then
+  print *, "sub_star(2): is_contiguous (x)=", is_contiguous (x), expect
+  stop (cnt + 2)
+   end if
+end select
+  end
+
+  subroutine sub_t (x, expect)
+class(t), intent(in) :: x(:)
+logical,  intent(in) :: expect
+cnt = cnt + 10
+if (is_contiguous (x) .neqv. expect) then
+   print *, "sub_t: is_contiguous (x)=", is_contiguous (x), expect
+   stop (cnt + 3)
+end if
+  end
+end
--
2.35.3

[PATCH V2] RISC-V: Update test expectancies with recent scheduler change

2024-03-12 Thread Edwin Lu

Given the recent change with adding the scheduler pipeline descriptions,
many scan-dump failures emerged. Relax the expected assembler output
conditions on the affected tests to reduce noise.

gcc/testsuite/ChangeLog:

* gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-6.c: Disable scheduling
* gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-8.c: Ditto
* gcc.target/riscv/rvv/base/pr108185-1.c: Update test expectancies
* gcc.target/riscv/rvv/base/pr108185-2.c: Ditto
* gcc.target/riscv/rvv/base/pr108185-3.c: Ditto
* gcc.target/riscv/rvv/base/pr108185-4.c: Ditto
* gcc.target/riscv/rvv/base/pr108185-5.c: Ditto
* gcc.target/riscv/rvv/base/pr108185-6.c: Ditto
* gcc.target/riscv/rvv/base/pr108185-7.c: Ditto
* gcc.target/riscv/rvv/base/vcreate.c: Disable scheduling and update
test expectancies
* gcc.target/riscv/rvv/vsetvl/vlmax_back_prop-30.c: Disable scheduling
* gcc.target/riscv/rvv/vsetvl/vlmax_back_prop-31.c: Ditto
* gcc.target/riscv/rvv/vsetvl/vlmax_single_block-17.c: Update test
expectancies
* gcc.target/riscv/rvv/vsetvl/vlmax_single_block-18.c: Ditto
* gcc.target/riscv/rvv/vsetvl/vlmax_switch_vtype-10.c: Ditto
* gcc.target/riscv/rvv/vsetvl/vlmax_switch_vtype-11.c: Ditto
* gcc.target/riscv/rvv/vsetvl/vlmax_switch_vtype-12.c: Ditto
* gcc.target/riscv/rvv/vsetvl/vlmax_switch_vtype-4.c: Ditto
* gcc.target/riscv/rvv/vsetvl/vlmax_switch_vtype-5.c: Ditto
* gcc.target/riscv/rvv/vsetvl/vlmax_switch_vtype-6.c: Ditto
* gcc.target/riscv/rvv/vsetvl/vlmax_switch_vtype-7.c: Ditto
* gcc.target/riscv/rvv/vsetvl/vlmax_switch_vtype-8.c: Ditto
* gcc.target/riscv/rvv/vsetvl/vlmax_switch_vtype-9.c: Ditto

Signed-off-by: Edwin Lu 
---
V1: Change tests to scan for range of vsetvls instead of specific number

V2: Add -fno-schedule-insns and -fno-schedule-insns2 to testcases that
were missing them. Those that had disabled insn scheduling, update
testcases to match current outputs to pass tests
---
 .../vect/costmodel/riscv/rvv/dynamic-lmul4-6.c   |  1 +
 .../vect/costmodel/riscv/rvv/dynamic-lmul4-8.c   |  1 +
 gcc/testsuite/gcc.target/riscv/rvv/base/pr108185-1.c | 12 ++--
 gcc/testsuite/gcc.target/riscv/rvv/base/pr108185-2.c | 12 ++--
 gcc/testsuite/gcc.target/riscv/rvv/base/pr108185-3.c | 12 ++--
 gcc/testsuite/gcc.target/riscv/rvv/base/pr108185-4.c | 12 ++--
 gcc/testsuite/gcc.target/riscv/rvv/base/pr108185-5.c | 12 ++--
 gcc/testsuite/gcc.target/riscv/rvv/base/pr108185-6.c | 12 ++--
 gcc/testsuite/gcc.target/riscv/rvv/base/pr108185-7.c | 12 ++--
 gcc/testsuite/gcc.target/riscv/rvv/base/vcreate.c|  6 --
 .../gcc.target/riscv/rvv/vsetvl/vlmax_back_prop-30.c |  1 +
 .../gcc.target/riscv/rvv/vsetvl/vlmax_back_prop-31.c |  1 +
 .../riscv/rvv/vsetvl/vlmax_single_block-17.c | 12 ++--
 .../riscv/rvv/vsetvl/vlmax_single_block-18.c |  6 +++---
 .../riscv/rvv/vsetvl/vlmax_switch_vtype-10.c |  4 ++--
 .../riscv/rvv/vsetvl/vlmax_switch_vtype-11.c |  2 +-
 .../riscv/rvv/vsetvl/vlmax_switch_vtype-12.c |  2 +-
 .../riscv/rvv/vsetvl/vlmax_switch_vtype-4.c  |  4 ++--
 .../riscv/rvv/vsetvl/vlmax_switch_vtype-5.c  |  4 ++--
 .../riscv/rvv/vsetvl/vlmax_switch_vtype-6.c  |  4 ++--
 .../riscv/rvv/vsetvl/vlmax_switch_vtype-7.c  |  4 ++--
 .../riscv/rvv/vsetvl/vlmax_switch_vtype-8.c  |  4 ++--
 .../riscv/rvv/vsetvl/vlmax_switch_vtype-9.c  |  4 ++--
 23 files changed, 75 insertions(+), 69 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-6.c 
b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-6.c
index bd7ce23f6b8..b23acebc916 100644
--- a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-6.c
+++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-6.c
@@ -1,5 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-march=rv32gcv -mabi=ilp32 -O3 -ftree-vectorize --param 
riscv-autovec-lmul=dynamic -mrvv-vector-bits=scalable -fselective-scheduling 
-fdump-tree-vect-details" } */
+/* { dg-additional-options "-fno-schedule-insns -fno-schedule-insns2" } */
 
 #include 
 
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-8.c 
b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-8.c
index 61619a0c879..ef719ee8445 100644
--- a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-8.c
+++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-8.c
@@ -1,5 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-march=rv32gcv -mabi=ilp32 -O3 -ftree-vectorize --param 
riscv-autovec-lmul=dynamic -mrvv-vector-bits=scalable -fselective-scheduling 
-fdump-tree-vect-details" } */
+/* { dg-additional-options "-fno-schedule-insns -fno-schedule-insns2" } */
 
 #include 
 
diff --git

Re: [Patch, fortran PR89645/99065 No IMPLICIT type error with: ASSOCIATE( X => function() )

2024-03-12 Thread Paul Richard Thomas

Hi Harald,

Roger that about the comments. The major part of my recent efforts has been
to maximise comments - apparently not always successfully!

The main reason that I want to "fix everything" is that this is it; I will
not work on this approach anymore. The gfortran/g95 founder's approach was
very clever but has found it's limit with the associate construct. The sad
thing is that this is the only blocker that I know of.

Thanks

Paul


On Tue, 12 Mar 2024 at 21:07, Harald Anlauf  wrote:

> Hi Paul,
>
> On 3/12/24 15:54, Paul Richard Thomas wrote:
> > Hi All,
> >
> > This is the last posting of this patch before I push it. Harald is OK
> with
> > it on the grounds that the inferred_type flag guards the whole lot,
> > except for the chunks in trans-stmt.cc.
> >
> > In spite of Harald's off-list admonition not to try to fix everything at
> > once, this version fixes most of the inquiry reference bugs
> > (associate_68.f90) with the exception of character(kind=4) function
> > selectors. The reason for this is that I have some housekeeping to do
> > before release on finalization and then I want to replace this patch in
> > 15-branch with two pass parsing. My first attempts at the latter were a
> > partial success.
>
> you wouldn't stop trying to fix everything, would you?  ;-)
>
> > It regtests OK on x86_64. Unless there are objections, I will commit on
> > Thursday evening.
>
> No objections, just one wish: could you improve the text of the
> following comments so that mere mortals understand them?
>
> diff --git a/gcc/fortran/primary.cc b/gcc/fortran/primary.cc
> index 12e7bf3c873..0ab69bb9dce 100644
> --- a/gcc/fortran/primary.cc
> +++ b/gcc/fortran/primary.cc
> [...]
> +  /* If there is a usable inquiry reference not there are no matching
> +derived types, force the inquiry reference by setting unknown the
> +type of the primary expression.  */
>
>
> I have a hard time parsing the first part of that sentence.
>
> diff --git a/gcc/fortran/symbol.cc b/gcc/fortran/symbol.cc
> index 5d9852c79e0..16adb2a7efb 100644
> --- a/gcc/fortran/symbol.cc
> +++ b/gcc/fortran/symbol.cc
> [...]
> +/* Find all derived types in the uppermost namespace that have a component
> +   a component called name and stash them in the assoc field of an
> +   associate name variable.
>
>
> "a component" too much?
>
> Thanks,
> Harald
>
> > Cheers
> >
> > Paul
>
>

Re: [Patch, fortran PR89645/99065 No IMPLICIT type error with: ASSOCIATE( X => function() )

2024-03-12 Thread Harald Anlauf


Hi Paul,

On 3/12/24 15:54, Paul Richard Thomas wrote:

Hi All,

This is the last posting of this patch before I push it. Harald is OK with
it on the grounds that the inferred_type flag guards the whole lot,
except for the chunks in trans-stmt.cc.

In spite of Harald's off-list admonition not to try to fix everything at
once, this version fixes most of the inquiry reference bugs
(associate_68.f90) with the exception of character(kind=4) function
selectors. The reason for this is that I have some housekeeping to do
before release on finalization and then I want to replace this patch in
15-branch with two pass parsing. My first attempts at the latter were a
partial success.


you wouldn't stop trying to fix everything, would you?  ;-)


It regtests OK on x86_64. Unless there are objections, I will commit on
Thursday evening.


No objections, just one wish: could you improve the text of the
following comments so that mere mortals understand them?

diff --git a/gcc/fortran/primary.cc b/gcc/fortran/primary.cc
index 12e7bf3c873..0ab69bb9dce 100644
--- a/gcc/fortran/primary.cc
+++ b/gcc/fortran/primary.cc
[...]
+  /* If there is a usable inquiry reference not there are no matching
+derived types, force the inquiry reference by setting unknown the
+type of the primary expression.  */


I have a hard time parsing the first part of that sentence.

diff --git a/gcc/fortran/symbol.cc b/gcc/fortran/symbol.cc
index 5d9852c79e0..16adb2a7efb 100644
--- a/gcc/fortran/symbol.cc
+++ b/gcc/fortran/symbol.cc
[...]
+/* Find all derived types in the uppermost namespace that have a component
+   a component called name and stash them in the assoc field of an
+   associate name variable.


"a component" too much?

Thanks,
Harald


Cheers

Paul

Re: Patch ping Re: [PATCH] icf: Reset SSA_NAME_{PTR,RANGE}_INFO in successfully merged functions [PR113907]

2024-03-12 Thread Jakub Jelinek

On Tue, Mar 12, 2024 at 05:21:58PM +0100, Jakub Jelinek wrote:
> On Tue, Mar 12, 2024 at 10:46:42AM +0100, Jan Hubicka wrote:
> > I am sorry for delaying this.  I made the variant that simply compares
> > value range of functions and prevents merging if they diverge and wanted
> > to make some bigger statistics.  This made me notice some performance
> > problems on clang performance and libstdc++ RB-trees which disrailed me
> > from the original PR.  I will finish the statistics today.
> 
> With the posted patch, perhaps if we don't want to union jump_tables etc.,
> all we could punt on is differences in the jump_table VRs rather than just
> any SSA_NAME_RANGE_INFO differences.

To expand on this, I think we need to either union or punt on jump_func
differences in any case, because for LTO we can't really punt on
SSA_NAME_RANGE_INFO differences given that we don't stream that out and in.
So the ipa_jump_func are I think the only thing that actually can differ
on the ICF merging candidates from value range POV.

Jakub

[PATCH] libquadmath: printf: fix misaligned access on args

2024-03-12 Thread Simon Chopin

On x86, this compiles into movdqa which segfaults on unaligned access.

This kind of failure has been seen when running against glibc 2.39,
which incidentally changed the printf implementation to move away from
alloca() for this data to instead append it at the end of an existing
"scratch buffer", with arbitrary alignement, whereas alloca() was
probably more likely to be naturally aligned.

Tested by adding the patch to the Ubuntu gcc-14 package in
https://launchpad.net/~schopin/+archive/ubuntu/libquadmath

Signed-off-by: Simon Chopin 
---
 libquadmath/printf/printf_fp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libquadmath/printf/printf_fp.c b/libquadmath/printf/printf_fp.c
index 8effcee88fa..d86aa650d38 100644
--- a/libquadmath/printf/printf_fp.c
+++ b/libquadmath/printf/printf_fp.c
@@ -363,7 +363,7 @@ __quadmath_printf_fp (struct __quadmath_printf_file *fp,
 
   /* Fetch the argument value. */
 {
-  fpnum = **(const __float128 **) args[0];
+  memcpy(, *(void* const *) args[0], sizeof(fpnum));
 
   /* Check for special values: not a number or infinity.  */
   if (isnanq (fpnum))

base-commit: 39737cdf002637c7a652e9c3e36f369cfce581e5
-- 
2.43.0

Fwd: [PATCH v3] combine: Fix ICE in try_combine on pr112494.c [PR112560]

2024-03-12 Thread Uros Bizjak

Forgot to CC gcc-patches@ ML... sorry for the duplicate...

The compiler, configured with --enable-checking=yes,rtl,extra ICEs with:

internal compiler error: RTL check: expected elt 0 type 'e' or 'u',
have 'E' (rtx unspec) in try_combine, at combine.cc:3237

This is

3236  /* Just replace the CC reg with a new mode.  */
3237  SUBST (XEXP (*cc_use_loc, 0), newpat_dest);
3238  undobuf.other_insn = cc_use_insn;

in combine.cc, where *cc_use_loc is

(unspec:DI [
(reg:CC 17 flags)
] UNSPEC_PUSHFL)

combine assumes CC must be used inside of a comparison and uses XEXP (..., 0)
without checking on the RTX type of the argument.

Replace cc_use_loc with the entire new RTX only in case cc_use_loc satisfies
COMPARISON_P predicate.  Otherwise scan the entire cc_use_loc RTX for CC reg
to be updated with a new mode.

PR rtl-optimization/112560

gcc/ChangeLog:

* combine.cc (try_combine): Replace cc_use_loc with the entire
new RTX only in case cc_use_loc satisfies COMPARISON_P predicate.
Otherwise scan the entire cc_use_loc RTX for CC reg to be updated
with a new mode.
* config/i386/i386.md (@pushf2): Allow all CC modes for
operand 1.

Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.

OK for trunk?

Uros.
diff --git a/gcc/combine.cc b/gcc/combine.cc
index a4479f8d836..92b8d98e6c1 100644
--- a/gcc/combine.cc
+++ b/gcc/combine.cc
@@ -3222,8 +3222,7 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, 
rtx_insn *i0,
 #endif
  /* Cases for modifying the CC-using comparison.  */
  if (compare_code != orig_compare_code
- /* ??? Do we need to verify the zero rtx?  */
- && XEXP (*cc_use_loc, 1) == const0_rtx)
+ && COMPARISON_P (*cc_use_loc))
{
  /* Replace cc_use_loc with entire new RTX.  */
  SUBST (*cc_use_loc,
@@ -3233,8 +3232,19 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, 
rtx_insn *i0,
}
  else if (compare_mode != orig_compare_mode)
{
+ subrtx_ptr_iterator::array_type array;
+
  /* Just replace the CC reg with a new mode.  */
- SUBST (XEXP (*cc_use_loc, 0), newpat_dest);
+ FOR_EACH_SUBRTX_PTR (iter, array, cc_use_loc, NONCONST)
+   {
+ rtx *loc = *iter;
+ if (REG_P (*loc)
+ && REGNO (*loc) == REGNO (newpat_dest))
+   {
+ SUBST (*loc, newpat_dest);
+ iter.skip_subrtxes ();
+   }
+   }
  undobuf.other_insn = cc_use_insn;
}
}
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index df97a2d6270..9dc33fd239a 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -2213,9 +2213,9 @@ (define_insn "*pop1_epilogue"
 
 (define_insn "@pushfl2"
   [(set (match_operand:W 0 "push_operand" "=<")
-   (unspec:W [(match_operand:CC 1 "flags_reg_operand")]
+   (unspec:W [(match_operand 1 "flags_reg_operand")]
  UNSPEC_PUSHFL))]
-  ""
+  "GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_CC"
   "pushf{}"
   [(set_attr "type" "push")
(set_attr "mode" "")])

Re: [PATCH] Fix PR ipa/113996

2024-03-12 Thread Eric Botcazou

> Patch is still OK, but ipa-ICF will only identify the functions if
> static chain is unused. Perhaps just picking the winning candidate to be
> version without static chain and making ipa-inline to not ICE when calls
> with static chain lands to function with no static chain would help us
> to optimize better.

I see, thanks for the explanation.  The attached patch appears to work.


PR ipa/113996
* ipa-icf.h (sem_function): Add static_chain_p member.
* ipa-icf.cc (sem_function::init): Initialize it.
(sem_item_optimizer::merge_classes): If the class is made of
functions, pick one without static chain as the target.

-- 
Eric Botcazoudiff --git a/gcc/ipa-icf.cc b/gcc/ipa-icf.cc
index 5d5a42f9c6c..4fc02831798 100644
--- a/gcc/ipa-icf.cc
+++ b/gcc/ipa-icf.cc
@@ -1368,6 +1368,8 @@ sem_function::init (ipa_icf_gimple::func_checker *checker)
   /* iterating all function arguments.  */
   arg_count = count_formal_params (fndecl);
 
+  static_chain_p = func->static_chain_decl != NULL_TREE;
+
   edge_count = n_edges_for_fn (func);
   cgraph_node *cnode = dyn_cast  (node);
   if (!cnode->thunk)
@@ -3399,11 +3401,22 @@ sem_item_optimizer::merge_classes (unsigned int prev_class_count,
 
 	sem_item *source = c->members[0];
 
-	if (DECL_NAME (source->decl)
-	&& MAIN_NAME_P (DECL_NAME (source->decl)))
-	  /* If merge via wrappers, picking main as the target can be
-	 problematic.  */
-	  source = c->members[1];
+	if (source->type == FUNC)
+	  {
+	/* Pick a member without static chain, if any.  */
+	for (unsigned int j = 0; j < c->members.length (); j++)
+	  if (!static_cast (c->members[j])->static_chain_p)
+		{
+		  source = c->members[j];
+		  break;
+		}
+
+	/* If merge via wrappers, picking main as the target can be
+	   problematic.  */
+	if (DECL_NAME (source->decl)
+		&& MAIN_NAME_P (DECL_NAME (source->decl)))
+	  source = c->members[source == c->members[0] ? 1 : 0];
+	  }
 
 	for (unsigned int j = 0; j < c->members.length (); j++)
 	  {
diff --git a/gcc/ipa-icf.h b/gcc/ipa-icf.h
index ef7e41bfa88..da20862c306 100644
--- a/gcc/ipa-icf.h
+++ b/gcc/ipa-icf.h
@@ -355,6 +355,9 @@ public:
  parameters.  */
   bool compatible_parm_types_p (tree, tree);
 
+  /* Return true if parameter I may be used.  */
+  bool param_used_p (unsigned int i);
+
   /* Exception handling region tree.  */
   eh_region region_tree;
 
@@ -379,6 +382,9 @@ public:
   /* Total number of SSA names used in the function.  */
   unsigned ssa_names_size;
 
+  /* Whether the special PARM_DECL for the static chain is present.  */
+  bool static_chain_p;
+
   /* Array of structures for all basic blocks.  */
   vec  bb_sorted;
 
@@ -386,9 +392,6 @@ public:
  function.  */
   hashval_t m_alias_sets_hash;
 
-  /* Return true if parameter I may be used.  */
-  bool param_used_p (unsigned int i);
-
 private:
   /* Calculates hash value based on a BASIC_BLOCK.  */
   hashval_t get_bb_hash (const ipa_icf_gimple::sem_bb *basic_block);

Re: Patch ping Re: [PATCH] icf: Reset SSA_NAME_{PTR,RANGE}_INFO in successfully merged functions [PR113907]

2024-03-12 Thread Jakub Jelinek

On Tue, Mar 12, 2024 at 10:46:42AM +0100, Jan Hubicka wrote:
> I am sorry for delaying this.  I made the variant that simply compares
> value range of functions and prevents merging if they diverge and wanted
> to make some bigger statistics.  This made me notice some performance
> problems on clang performance and libstdc++ RB-trees which disrailed me
> from the original PR.  I will finish the statistics today.

With the posted patch, perhaps if we don't want to union jump_tables etc.,
all we could punt on is differences in the jump_table VRs rather than just
any SSA_NAME_RANGE_INFO differences.
But I agree that for GCC 15 we really want to merge rather than punt.

> For next stage1 we definitly want to move ahead with merging metadata
> (not only value ranges, but hopefully also TBAA).  Currently the only
> thing we merge aggressively is the edge profile (and we have PR on that
> merging functions which differs only but likely/unlikely attribute).
> However for backportability and for this avoiding merging may be safer
> solution depending on the stats.A
> 
> I was looking into ipa-vrp useability and there are several tests on
> Firefox talos where ipa-prop VRP makes difference. It boils down to
> propagating fact that parameter is alway snon-NULL.  This is commonly
> tested in C++ casts.

Jakub

Re: [PATCH] testsuite: Fix vfprintf-chk-1.c with -fhardened

2024-03-12 Thread Jakub Jelinek

On Thu, Feb 15, 2024 at 10:53:08PM +, Sam James wrote:
> With _FORTIFY_SOURCE >= 2 (enabled by -fhardened), vfprintf-chk-1.c's
> __vfprintf_chk ends up calling __vprintf_chk rather than vprintf.

s/__vprintf_chk/__vfprintf_chk/ above

> 
> ```
> --- a/fortify.s
> +++ b/no-fortify.s
> @@ -8,27 +8,28 @@
> [...]
>  __vfprintf_chk:
> [...]
> movl$1, should_optimize(%rip)
> -   jmp __vfprintf_chk
> +   jmp vfprintf@PLT
> ```
> 
> 2024-02-15Sam James 
> 
> gcc/testsuite/ChangeLog:
>   * gcc.c-torture/execute/vfprintf-chk-1.c (__vfprintf_chk): Undefine 
> _FORTIFY_SOURCE
>   to call the real vfprintf.

I think we should change vprintf-chk-1.c as well.

> ---
> The test, AIUI, is trying to test GCC's own basic _chk bits rather than
> any of e.g. glibc's _FORTIFY_SOURCE handling.
> 
> I'm curious as to why only vfprintf triggers this right now. If this patch is 
> right,
> perhaps we should do printf-chk-1.c, fprintf-chk-1.c, and vprintf-chk-1.

That is easy.  In the printf-chk-1.c case it calls vprintf which results in
__vfprintf_chk or __vprintf_chk calls but redefines just __printf_chk.
Similarly fprintf-chk-1.c case calls vfprintf which results in __vfprintf_chk
call which isn't redefined either.
In the __vfprintf_chk case it calls vfprintf, which the inline results in
__vfprintf_chk call and so it self-recurses instead of calling glibc for the
actual implementation.
In the vprintf-chk-1.c case, it depends.
__fortify_function int
vprintf (const char *__restrict __fmt, __gnuc_va_list __ap)
{
#ifdef __USE_EXTERN_INLINES
  return __vfprintf_chk (stdout, __USE_FORTIFY_LEVEL - 1, __fmt, __ap);
#else
  return __vprintf_chk (__USE_FORTIFY_LEVEL - 1, __fmt, __ap);
#endif
}
So, if __USE_EXTERN_INLINES is defined, it will work fine, if not, it will
also self-recurse like vfprintf-chk-1.c.

Jakub

Summary: [PATCH][committed]AArch64: Do not allow SIMD clones with simdlen 1 [PR113552][GCC 13/12/11 backport]

2024-03-12 Thread Tamar Christina

Hi All,

This is a backport of g:306713c953d509720dc394c43c0890548bb0ae07.

The AArch64 vector PCS does not allow simd calls with simdlen 1,
however due to a bug we currently do allow it for num == 0.

This causes us to emit a symbol that doesn't exist and we fail to link.

Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.

Committed to GCC 13,12,11 branches as previously approved.

Thanks,
Tamar

gcc/ChangeLog:

PR tree-optimization/113552
* config/aarch64/aarch64.cc
(aarch64_simd_clone_compute_vecsize_and_simdlen): Block simdlen 1.

gcc/testsuite/ChangeLog:

PR tree-optimization/113552
* gcc.target/aarch64/pr113552.c: New test.
* gcc.target/aarch64/simd_pcs_attribute-3.c: Remove bogus check.

---
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 
f546c48ae2d2bad2e34c6b72e5e3e30aba3c3bd6..d19a9c16cc97ae75afd4e29f4339d65d39cfb73a
 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -27027,7 +27027,7 @@ aarch64_simd_clone_compute_vecsize_and_simdlen (struct 
cgraph_node *node,
bool explicit_p)
 {
   tree t, ret_type;
-  unsigned int elt_bits, count;
+  unsigned int elt_bits, count = 0;
   unsigned HOST_WIDE_INT const_simdlen;
   poly_uint64 vec_bits;
 
@@ -27104,7 +27104,7 @@ aarch64_simd_clone_compute_vecsize_and_simdlen (struct 
cgraph_node *node,
   vec_bits = (num == 0 ? 64 : 128);
   clonei->simdlen = exact_div (vec_bits, elt_bits);
 }
-  else
+  else if (maybe_ne (clonei->simdlen, 1U))
 {
   count = 1;
   vec_bits = clonei->simdlen * elt_bits;
diff --git a/gcc/testsuite/gcc.target/aarch64/pr113552.c 
b/gcc/testsuite/gcc.target/aarch64/pr113552.c
new file mode 100644
index 
..9c96b061ed2b4fcc57e58925277f74d14f79c51f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr113552.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-Ofast -march=armv8-a" } */
+
+__attribute__ ((__simd__ ("notinbranch"), const))
+double cos (double);
+
+void foo (float *a, double *b)
+{
+for (int i = 0; i < 12; i+=3)
+  {
+b[i] = cos (5.0 * a[i]);
+b[i+1] = cos (5.0 * a[i+1]);
+b[i+2] = cos (5.0 * a[i+2]);
+  }
+}
+
+/* { dg-final { scan-assembler-times {bl\t_ZGVnN2v_cos} 6 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/simd_pcs_attribute-3.c 
b/gcc/testsuite/gcc.target/aarch64/simd_pcs_attribute-3.c
index 
95f6a6803e889c02177ef10972962ed62d2095eb..c6dac6b104c94c9de89ed88dc5a73e185d2be125
 100644
--- a/gcc/testsuite/gcc.target/aarch64/simd_pcs_attribute-3.c
+++ b/gcc/testsuite/gcc.target/aarch64/simd_pcs_attribute-3.c
@@ -18,7 +18,7 @@ double foo(double x)
 }
 
 /* { dg-final { scan-assembler-not {\.variant_pcs\tfoo} } } */
-/* { dg-final { scan-assembler-times {\.variant_pcs\t_ZGVnM1v_foo} 1 } } */
+/* { dg-final { scan-assembler-not {\.variant_pcs\t_ZGVnM1v_foo} } } */
 /* { dg-final { scan-assembler-times {\.variant_pcs\t_ZGVnM2v_foo} 1 } } */
-/* { dg-final { scan-assembler-times {\.variant_pcs\t_ZGVnN1v_foo} 1 } } */
+/* { dg-final { scan-assembler-not {\.variant_pcs\t_ZGVnN1v_foo} } } */
 /* { dg-final { scan-assembler-times {\.variant_pcs\t_ZGVnN2v_foo} 1 } } */




-- 
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index f546c48ae2d2bad2e34c6b72e5e3e30aba3c3bd6..d19a9c16cc97ae75afd4e29f4339d65d39cfb73a 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -27027,7 +27027,7 @@ aarch64_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
 	bool explicit_p)
 {
   tree t, ret_type;
-  unsigned int elt_bits, count;
+  unsigned int elt_bits, count = 0;
   unsigned HOST_WIDE_INT const_simdlen;
   poly_uint64 vec_bits;
 
@@ -27104,7 +27104,7 @@ aarch64_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
   vec_bits = (num == 0 ? 64 : 128);
   clonei->simdlen = exact_div (vec_bits, elt_bits);
 }
-  else
+  else if (maybe_ne (clonei->simdlen, 1U))
 {
   count = 1;
   vec_bits = clonei->simdlen * elt_bits;
diff --git a/gcc/testsuite/gcc.target/aarch64/pr113552.c b/gcc/testsuite/gcc.target/aarch64/pr113552.c
new file mode 100644
index ..9c96b061ed2b4fcc57e58925277f74d14f79c51f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr113552.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-Ofast -march=armv8-a" } */
+
+__attribute__ ((__simd__ ("notinbranch"), const))
+double cos (double);
+
+void foo (float *a, double *b)
+{
+for (int i = 0; i < 12; i+=3)
+  {
+b[i] = cos (5.0 * a[i]);
+b[i+1] = cos (5.0 * a[i+1]);
+b[i+2] = cos (5.0 * a[i+2]);
+  }
+}
+
+/* { dg-final { scan-assembler-times {bl\t_ZGVnN2v_cos} 6 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/simd_pcs_attribute-3.c b/gcc/testsuite/gcc.target/aarch64/simd_pcs_attribute-3.c
index

Re: [PATCH] testsuite: Fix vfprintf-chk-1.c with -fhardened

2024-03-12 Thread Sam James

Sam James  writes:

> With _FORTIFY_SOURCE >= 2 (enabled by -fhardened), vfprintf-chk-1.c's
> __vfprintf_chk ends up calling __vprintf_chk rather than vprintf.
>
> ```
> --- a/fortify.s
> +++ b/no-fortify.s
> @@ -8,27 +8,28 @@
> [...]
>  __vfprintf_chk:
> [...]
> movl$1, should_optimize(%rip)
> -   jmp __vfprintf_chk
> +   jmp vfprintf@PLT
> ```

Ping.

>
> 2024-02-15Sam James 
>
> gcc/testsuite/ChangeLog:
>   * gcc.c-torture/execute/vfprintf-chk-1.c (__vfprintf_chk): Undefine 
> _FORTIFY_SOURCE
>   to call the real vfprintf.
> ---
> The test, AIUI, is trying to test GCC's own basic _chk bits rather than
> any of e.g. glibc's _FORTIFY_SOURCE handling.
>
> I'm curious as to why only vfprintf triggers this right now. If this patch is 
> right,
> perhaps we should do printf-chk-1.c, fprintf-chk-1.c, and vprintf-chk-1.
>
> Please push if OK as I don't have access.
>
>  gcc/testsuite/gcc.c-torture/execute/vfprintf-chk-1.c | 1 +
>  1 file changed, 1 insertion(+)
>
> diff --git a/gcc/testsuite/gcc.c-torture/execute/vfprintf-chk-1.c 
> b/gcc/testsuite/gcc.c-torture/execute/vfprintf-chk-1.c
> index 401eaf4304a4..a8e5689e3fe6 100644
> --- a/gcc/testsuite/gcc.c-torture/execute/vfprintf-chk-1.c
> +++ b/gcc/testsuite/gcc.c-torture/execute/vfprintf-chk-1.c
> @@ -1,6 +1,7 @@
>  /* { dg-skip-if "requires io" { freestanding } }  */
>  
>  #ifndef test
> +#undef _FORTIFY_SOURCE
>  #include 
>  #include 
>  #include

[PATCH v2] c++: ICE with temporary of class type in array DMI [PR109966]

2024-03-12 Thread Marek Polacek

On Tue, Mar 12, 2024 at 09:57:14AM -0400, Jason Merrill wrote:
> On 3/11/24 19:27, Marek Polacek wrote:
> > Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk/13?
> > 
> > -- >8 --
> > This ICE started with the fairly complicated r13-765.  We crash in
> > gimplify_var_or_parm_decl because a stray VAR_DECL leaked there.
> > The problem is ultimately that potential_prvalue_result_of wasn't
> > correctly handling arrays and replace_placeholders_for_class_temp_r
> > replaced a PLACEHOLDER_EXPR in a TARGET_EXPR which is used in the
> > context of copy elision.  If I have
> > 
> >M m[2] = { M{""}, M{""} };
> > 
> > then we don't invoke the M(const M&) copy-ctor.  I think the fix is
> > to detect such a case in potential_prvalue_result_of.
> > 
> > PR c++/109966
> > 
> > gcc/cp/ChangeLog:
> > 
> > * typeck2.cc (potential_prvalue_result_of): Add walk_subtrees
> > parameter.  Handle initializing an array from a
> > brace-enclosed-initializer.
> > (replace_placeholders_for_class_temp_r): Pass walk_subtrees down to
> > potential_prvalue_result_of.
> > 
> > gcc/testsuite/ChangeLog:
> > 
> > * g++.dg/cpp1y/nsdmi-aggr20.C: New test.
> > * g++.dg/cpp1y/nsdmi-aggr21.C: New test.
> > ---
> >   gcc/cp/typeck2.cc | 27 ---
> >   gcc/testsuite/g++.dg/cpp1y/nsdmi-aggr20.C | 17 +++
> >   gcc/testsuite/g++.dg/cpp1y/nsdmi-aggr21.C | 59 +++
> >   3 files changed, 96 insertions(+), 7 deletions(-)
> >   create mode 100644 gcc/testsuite/g++.dg/cpp1y/nsdmi-aggr20.C
> >   create mode 100644 gcc/testsuite/g++.dg/cpp1y/nsdmi-aggr21.C
> > 
> > diff --git a/gcc/cp/typeck2.cc b/gcc/cp/typeck2.cc
> > index 31198b2f9f5..8b99ce78e9a 100644
> > --- a/gcc/cp/typeck2.cc
> > +++ b/gcc/cp/typeck2.cc
> > @@ -1406,46 +1406,59 @@ digest_init_flags (tree type, tree init, int flags, 
> > tsubst_flags_t complain)
> >A a = (A{});   // initializer
> >A a = (1, A{});// initializer
> >A a = true ? A{} : A{};  // initializer
> > + A arr[1] = { A{} };  // initializer
> >auto x = A{}.x;// temporary materialization
> >auto x = foo(A{}); // temporary materialization
> >  FULL_EXPR is the whole expression, SUBOB is its TARGET_EXPR subobject. 
> >  */
> >   static bool
> > -potential_prvalue_result_of (tree subob, tree full_expr)
> > +potential_prvalue_result_of (tree subob, tree full_expr, int 
> > *walk_subtrees)
> >   {
> > +#define RECUR(t) potential_prvalue_result_of (subob, t, walk_subtrees)
> > if (subob == full_expr)
> >   return true;
> > else if (TREE_CODE (full_expr) == TARGET_EXPR)
> >   {
> > tree init = TARGET_EXPR_INITIAL (full_expr);
> > if (TREE_CODE (init) == COND_EXPR)
> > -   return (potential_prvalue_result_of (subob, TREE_OPERAND (init, 1))
> > -   || potential_prvalue_result_of (subob, TREE_OPERAND (init, 2)));
> > +   return (RECUR (TREE_OPERAND (init, 1))
> > +   || RECUR (TREE_OPERAND (init, 2)));
> > else if (TREE_CODE (init) == COMPOUND_EXPR)
> > -   return potential_prvalue_result_of (subob, TREE_OPERAND (init, 1));
> > +   return RECUR (TREE_OPERAND (init, 1));
> > /* ??? I don't know if this can be hit.  */
> > else if (TREE_CODE (init) == PAREN_EXPR)
> > {
> >   gcc_checking_assert (false);
> > - return potential_prvalue_result_of (subob, TREE_OPERAND (init, 0));
> > + return RECUR (TREE_OPERAND (init, 0));
> > }
> >   }
> > +  /* The array case listed above.  */
> > +  else if (TREE_CODE (full_expr) == CONSTRUCTOR
> > +  && TREE_CODE (TREE_TYPE (full_expr)) == ARRAY_TYPE)
> > +for (constructor_elt : CONSTRUCTOR_ELTS (full_expr))
> > +  if (e.value == subob)
> > +   {
> > + *walk_subtrees = 0;
> 
> Why clear walk_subtrees?  Won't that mean we fail to replace any
> placeholders nested within an array element initializer?

Right.  I couldn't find a testcase where that would cause a problem
but I think I just wasn't inventive enough.

Originally, I was checking same_type_ignoring_top_level_qualifiers_p
but that's not going to work for code like

  struct N { N(M); };
  N arr[2] = { M{""}, M{""} };

or with operator M().  But I suppose I could just use can_convert
like below.  What do you think about that?

dg.exp passed, full regtest running.

-- >8 --
This ICE started with the fairly complicated r13-765.  We crash in
gimplify_var_or_parm_decl because a stray VAR_DECL leaked there.
The problem is ultimately that potential_prvalue_result_of wasn't
correctly handling arrays and replace_placeholders_for_class_temp_r
replaced a PLACEHOLDER_EXPR in a TARGET_EXPR which is used in the
context of copy elision.  If I have

  M m[2] = { M{""}, M{""} };

then we don't invoke the M(const M&) copy-ctor.  I think the fix is
to detect such a case in potential_prvalue_result_of.

PR c++/109966

gcc/cp/ChangeLog:

* typeck2.cc

[committed] libgomp/libgomp.texi: Fix @node order in @menu

2024-03-12 Thread Tobias Burnus


The ordering problem was reported on #gfortran's IRC.

The warning disappears between texinfo 6.7 and 6.8  – and my bet is that 
it has been caused by the texinfo commit


fa1ee0cf35 Do not warn if external node in menu is not consistent with 
sections


which implies that it was done on purpose in texinfo. It clearly wasn't 
done on purpose in GCC, though. Hence:


Committed as obvious.

Tobias
commit ef79c64cb5762c86ee04ddfcedb7fe31eaa3bac8
Author: Tobias Burnus 
Date:   Tue Mar 12 15:42:50 2024 +0100

libgomp/libgomp.texi: Fix @node order in @menu

While texinfo 7.0.3 does not warn, an older texinfo did complain about:
libgomp.texi:1964: warning: node next `omp_target_memcpy' in menu
`omp_target_memcpy_rect' and in sectioning `omp_target_memcpy_async' differ

libgomp/

* libgomp.texi (Device Memory Routines): Swap item order to match
the order of the '@node's of the '@subsection's.

diff --git a/libgomp/libgomp.texi b/libgomp/libgomp.texi
index bf5c7a76fc9..57165e0e981 100644
--- a/libgomp/libgomp.texi
+++ b/libgomp/libgomp.texi
@@ -1783,8 +1783,8 @@ pointers on devices. They have C linkage and do not throw exceptions.
 * omp_target_is_present:: Check whether storage is mapped
 * omp_target_is_accessible:: Check whether memory is device accessible
 * omp_target_memcpy:: Copy data between devices
-* omp_target_memcpy_rect:: Copy a subvolume of data between devices
 * omp_target_memcpy_async:: Copy data between devices asynchronously
+* omp_target_memcpy_rect:: Copy a subvolume of data between devices
 * omp_target_memcpy_rect_async:: Copy a subvolume of data between devices asynchronously
 @c * omp_target_memset:: /TR12
 @c * omp_target_memset_async:: /TR12

Re: [PATCH] s390: Fix test vector/long-double-to-i64.c

2024-03-12 Thread Stefan Schulze Frielinghaus

On Mon, Mar 11, 2024 at 11:14:04AM +0100, Andreas Krebbel wrote:
> On 2/29/24 13:15, Stefan Schulze Frielinghaus wrote:
> > Starting with r14-8319-g86de9b66480b71 fwprop improved so that vpdi is
> > no longer required.
> > 
> > gcc/testsuite/ChangeLog:
> > 
> > * gcc.target/s390/vector/long-double-to-i64.c: Fix scan
> > assembler directive.
> 
> Should we perhaps rather turn the scan-assembler directives into something 
> which checks for the
> absence of vpdi then? In order to get notified once this really useful 
> optimization breaks?

I thought about checking for the most optimal code which would be just
two loads and a convert instruction.  Thus if this fails, then we have a
regression.  Speaking of regressions, the old behaviour was restored by
r14-9412-g3e3e4156a5f93e which means we are back using vpdi.  Thus, I
will leave this patch on hold and have a second look.

Cheers,
Stefan

> 
> Andreas
> 
> > ---
> >  .../gcc.target/s390/vector/long-double-to-i64.c | 13 +
> >  1 file changed, 9 insertions(+), 4 deletions(-)
> > 
> > diff --git a/gcc/testsuite/gcc.target/s390/vector/long-double-to-i64.c 
> > b/gcc/testsuite/gcc.target/s390/vector/long-double-to-i64.c
> > index 2dbbb5d1c03..ed89878e6ee 100644
> > --- a/gcc/testsuite/gcc.target/s390/vector/long-double-to-i64.c
> > +++ b/gcc/testsuite/gcc.target/s390/vector/long-double-to-i64.c
> > @@ -1,19 +1,24 @@
> >  /* { dg-do compile } */
> >  /* { dg-options "-O3 -march=z14 -mzarch --save-temps" } */
> >  /* { dg-do run { target { s390_z14_hw } } } */
> > +/* { dg-final { check-function-bodies "**" "" "" { target { lp64 } } } } */
> > +
> >  #include 
> >  #include 
> >  
> > +/*
> > +** long_double_to_i64:
> > +** ld  %f0,0\(%r2\)
> > +** ld  %f2,8\(%r2\)
> > +** cgxbr   %r2,5,%f0
> > +** br  %r14
> > +*/
> >  __attribute__ ((noipa)) static int64_t
> >  long_double_to_i64 (long double x)
> >  {
> >return x;
> >  }
> >  
> > -/* { dg-final { scan-assembler-times {\n\tvpdi\t%v\d+,%v\d+,%v\d+,1\n} 1 } 
> > } */
> > -/* { dg-final { scan-assembler-times {\n\tvpdi\t%v\d+,%v\d+,%v\d+,5\n} 1 } 
> > } */
> > -/* { dg-final { scan-assembler-times {\n\tcgxbr\t} 1 } } */
> > -
> >  int
> >  main (void)
> >  {
>

Re: [Patch, fortran PR89645/99065 No IMPLICIT type error with: ASSOCIATE( X => function() )

2024-03-12 Thread Paul Richard Thomas

Hi All,

This is the last posting of this patch before I push it. Harald is OK with
it on the grounds that the inferred_type flag guards the whole lot,
except for the chunks in trans-stmt.cc.

In spite of Harald's off-list admonition not to try to fix everything at
once, this version fixes most of the inquiry reference bugs
(associate_68.f90) with the exception of character(kind=4) function
selectors. The reason for this is that I have some housekeeping to do
before release on finalization and then I want to replace this patch in
15-branch with two pass parsing. My first attempts at the latter were a
partial success.

It regtests OK on x86_64. Unless there are objections, I will commit on
Thursday evening.

Cheers

Paul

Fortran: Fix class/derived/complex function associate selectors [PR87477]

2024-03-12  Paul Thomas  

gcc/fortran
PR fortran/87477
PR fortran/89645
PR fortran/99065
PR fortran/114141
PR fortran/114280
* class.cc (gfc_change_class): New function needed for
associate names, when rank changes or a derived type is
produced by resolution
* dump-parse-tree.cc (show_code_node): Make output for SELECT
TYPE more comprehensible.
* expr.cc (find_inquiry_ref): Do not simplify expressions of
an inferred type.
* gfortran.h : Add 'gfc_association_list' to structure
'gfc_association_list'. Add prototypes for
'gfc_find_derived_types', 'gfc_fixup_inferred_type_refs' and
'gfc_change_class'. Add macro IS_INFERRED_TYPE.
* match.cc (copy_ts_from_selector_to_associate): Add bolean arg
'select_type' with default false. If this is a select type name
and the selector is a inferred type, build the class type and
apply it to the associate name.
(build_associate_name): Pass true to 'select_type' in call to
previous.
* parse.cc (parse_associate): If the selector is inferred type
the associate name is too. Make sure that function selector
class and rank, if known, are passed to the associate name. If
a function result exists, pass its typespec to the associate
name.
* primary.cc (resolvable_fcns): New function to check that all
the function references are resolvable.
(gfc_match_varspec): If a scalar derived type select type
temporary has an array reference, match the array reference,
treating this in the same way as an equivalence member. Do not
set 'inquiry' if applied to an unknown type the inquiry name
is ambiguous with the component of an accessible derived type.
Check that resolution of the target expression is OK by testing
if the symbol is declared or is an operator expression, then
using 'resolvable_fcns' recursively. If all is well, resolve
the expression. If this is an inferred type with a component
reference, call 'gfc_find_derived_types' to find a suitable
derived type. If there is an inquiry ref and the symbol either
is of unknown type or is inferred to be a derived type, set the
primary and symbol TKR appropriately.
* resolve.cc (resolve_variable): Call new function below.
(gfc_fixup_inferred_type_refs): New function to ensure that the
expression references for a inferred type are consistent with
the now fixed up selector.
(resolve_assoc_var): Ensure that derived type or class function
selectors transmit the correct arrayspec to the associate name.
(resolve_select_type): If the selector is an associate name of
inferred type and has no component references, the associate
name should have its typespec. Simplify the conversion of a
class array to class scalar by calling 'gfc_change_class'.
Make sure that a class, inferred type selector with an array
ref transfers the typespec from the symbol to the expression.
* symbol.cc (gfc_set_default_type): If an associate name with
unknown type has a selector expression, try resolving the expr.
(find_derived_types, gfc_find_derived_types): New functions
that search for a derived type with a given name.
* trans-expr.cc (gfc_conv_variable): Some inferred type exprs
escape resolution so call 'gfc_fixup_inferred_type_refs'.
* trans-stmt.cc (trans_associate_var): Tidy up expression for
'class_target'. Finalize and free class function results.
Correctly handle selectors that are class functions and class
array references, passed as derived types.

gcc/testsuite/
PR fortran/87477
PR fortran/89645
PR fortran/99065
* gfortran.dg/associate_64.f90 : New test
* gfortran.dg/associate_66.f90 : New test
* gfortran.dg/associate_67.f90 : New test

PR fortran/114141
* gfortran.dg/associate_65.f90 : New test

PR fortran/114280
* gfortran.dg/associate_68.f90 : New test
diff --git a/gcc/fortran/class.cc b/gcc/fortran/class.cc
index ce31a93abcd..abe89630be3 100644
--- a/gcc/fortran/class.cc
+++ b/gcc/fortran/class.cc
@@ -815,6 +815,56 @@ gfc_build_class_symbol (gfc_typespec *ts, symbol_attribute *attr,
 }
 
 
+/* Change class, using gfc_build_class_symbol. This is needed for associate
+   names, when rank changes or a derived type is produced by resolution.  */
+
+void
+gfc_change_class (gfc_typespec *ts, symbol_attribute *sym_attr,
+		  gfc_array_spec *sym_as, int rank, int corank)
+{
+

Re: [PATCH] c++: recalculating local specs via build_extra_args [PR114303]

2024-03-12 Thread Patrick Palka

On Tue, 12 Mar 2024, Patrick Palka wrote:

> On Tue, 12 Mar 2024, Jason Merrill wrote:
> 
> > On 3/11/24 12:53, Patrick Palka wrote:
> > > Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look
> > > OK for trunk and release branches?
> > > 
> > > -- >8 --
> > > 
> > > r13-6452-g341e6cd8d603a3 made build_extra_args walk evaluated contexts
> > > first so that we prefer processing a local specialization in an evaluated
> > > context even if its first use is in an unevaluated context.  But this
> > > means we need to avoid walking a tree that already has extra args/specs
> > > saved because the list of saved specs appears to be an evaluated
> > > context.  It seems then that we should be calculating the saved specs
> > > from scratch each time, rather than potentially walking the saved specs
> > > list from an earlier partial instantiation when calling build_extra_args
> > > a second time around.
> > 
> > Makes sense, but I wonder if we want to approach that by avoiding walking 
> > into
> > *_EXTRA_ARGS in extract_locals_r?  Or do we still want to walk into any 
> > nested
> > extra args?  And if so, will we run into this same problem then?
> 
> I'm not sure totally but I'd expect a nested extra-args tree to always
> have empty *_EXTRA_ARGS since the outer extra-args tree should intercept
> any substitution before the inner extra-args tree can see it?

... and so in extract_locals_r I think we can assume *_EXTRA_ARGS is
empty, and not have to explicitly avoid walking it.

> 
> > 
> > >   PR c++/114303
> > > 
> > > gcc/cp/ChangeLog:
> > > 
> > >   * constraint.cc (tsubst_requires_expr): Clear
> > >   REQUIRES_EXPR_EXTRA_ARGS before calling build_extra_args.
> > >   * pt.cc (tsubst_stmt) : Call build_extra_args
> > >   on the new IF_STMT instead of t which might already have
> > >   IF_STMT_EXTRA_ARGS.
> > > 
> > > gcc/testsuite/ChangeLog:
> > > 
> > >   * g++.dg/cpp1z/constexpr-if-lambda6.C: New test.
> > > ---
> > >   gcc/cp/constraint.cc |  1 +
> > >   gcc/cp/pt.cc |  2 +-
> > >   .../g++.dg/cpp1z/constexpr-if-lambda6.C  | 16 
> > >   3 files changed, 18 insertions(+), 1 deletion(-)
> > >   create mode 100644 gcc/testsuite/g++.dg/cpp1z/constexpr-if-lambda6.C
> > > 
> > > diff --git a/gcc/cp/constraint.cc b/gcc/cp/constraint.cc
> > > index 49de3211d4c..8a3b5d80ba7 100644
> > > --- a/gcc/cp/constraint.cc
> > > +++ b/gcc/cp/constraint.cc
> > > @@ -2362,6 +2362,7 @@ tsubst_requires_expr (tree t, tree args, sat_info
> > > info)
> > >matching or dguide constraint rewriting), in which case we need
> > >to partially substitute.  */
> > > t = copy_node (t);
> > > +  REQUIRES_EXPR_EXTRA_ARGS (t) = NULL_TREE;
> > > REQUIRES_EXPR_EXTRA_ARGS (t) = build_extra_args (t, args,
> > > info.complain);
> > > return t;
> > >   }
> > > diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc
> > > index 8cf0d5b7a8d..37f2392d035 100644
> > > --- a/gcc/cp/pt.cc
> > > +++ b/gcc/cp/pt.cc
> > > @@ -18718,7 +18718,7 @@ tsubst_stmt (tree t, tree args, tsubst_flags_t
> > > complain, tree in_decl)
> > > IF_COND (stmt) = IF_COND (t);
> > > THEN_CLAUSE (stmt) = THEN_CLAUSE (t);
> > > ELSE_CLAUSE (stmt) = ELSE_CLAUSE (t);
> > > -   IF_STMT_EXTRA_ARGS (stmt) = build_extra_args (t, args, complain);
> > > +   IF_STMT_EXTRA_ARGS (stmt) = build_extra_args (stmt, args, complain);
> > > add_stmt (stmt);
> > > break;
> > >   }
> > > diff --git a/gcc/testsuite/g++.dg/cpp1z/constexpr-if-lambda6.C
> > > b/gcc/testsuite/g++.dg/cpp1z/constexpr-if-lambda6.C
> > > new file mode 100644
> > > index 000..038c2a41210
> > > --- /dev/null
> > > +++ b/gcc/testsuite/g++.dg/cpp1z/constexpr-if-lambda6.C
> > > @@ -0,0 +1,16 @@
> > > +// PR c++/114303
> > > +// { dg-do compile { target c++17 } }
> > > +
> > > +struct A { static constexpr bool value = true; };
> > > +
> > > +int main() {
> > > +  [](auto x1) {
> > > +return [&](auto) {
> > > +  return [&](auto x3) {
> > > +if constexpr (decltype(x3)::value) {
> > > +  static_assert(decltype(x1)::value);
> > > +}
> > > +  }(A{});
> > > +}(0);
> > > +  }(A{});
> > > +}
> > 
> > 
>

Re: [PATCH] c++: recalculating local specs via build_extra_args [PR114303]

2024-03-12 Thread Patrick Palka

On Tue, 12 Mar 2024, Jason Merrill wrote:

> On 3/11/24 12:53, Patrick Palka wrote:
> > Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look
> > OK for trunk and release branches?
> > 
> > -- >8 --
> > 
> > r13-6452-g341e6cd8d603a3 made build_extra_args walk evaluated contexts
> > first so that we prefer processing a local specialization in an evaluated
> > context even if its first use is in an unevaluated context.  But this
> > means we need to avoid walking a tree that already has extra args/specs
> > saved because the list of saved specs appears to be an evaluated
> > context.  It seems then that we should be calculating the saved specs
> > from scratch each time, rather than potentially walking the saved specs
> > list from an earlier partial instantiation when calling build_extra_args
> > a second time around.
> 
> Makes sense, but I wonder if we want to approach that by avoiding walking into
> *_EXTRA_ARGS in extract_locals_r?  Or do we still want to walk into any nested
> extra args?  And if so, will we run into this same problem then?

I'm not sure totally but I'd expect a nested extra-args tree to always
have empty *_EXTRA_ARGS since the outer extra-args tree should intercept
any substitution before the inner extra-args tree can see it?

> 
> > PR c++/114303
> > 
> > gcc/cp/ChangeLog:
> > 
> > * constraint.cc (tsubst_requires_expr): Clear
> > REQUIRES_EXPR_EXTRA_ARGS before calling build_extra_args.
> > * pt.cc (tsubst_stmt) : Call build_extra_args
> > on the new IF_STMT instead of t which might already have
> > IF_STMT_EXTRA_ARGS.
> > 
> > gcc/testsuite/ChangeLog:
> > 
> > * g++.dg/cpp1z/constexpr-if-lambda6.C: New test.
> > ---
> >   gcc/cp/constraint.cc |  1 +
> >   gcc/cp/pt.cc |  2 +-
> >   .../g++.dg/cpp1z/constexpr-if-lambda6.C  | 16 
> >   3 files changed, 18 insertions(+), 1 deletion(-)
> >   create mode 100644 gcc/testsuite/g++.dg/cpp1z/constexpr-if-lambda6.C
> > 
> > diff --git a/gcc/cp/constraint.cc b/gcc/cp/constraint.cc
> > index 49de3211d4c..8a3b5d80ba7 100644
> > --- a/gcc/cp/constraint.cc
> > +++ b/gcc/cp/constraint.cc
> > @@ -2362,6 +2362,7 @@ tsubst_requires_expr (tree t, tree args, sat_info
> > info)
> >  matching or dguide constraint rewriting), in which case we need
> >  to partially substitute.  */
> > t = copy_node (t);
> > +  REQUIRES_EXPR_EXTRA_ARGS (t) = NULL_TREE;
> > REQUIRES_EXPR_EXTRA_ARGS (t) = build_extra_args (t, args,
> > info.complain);
> > return t;
> >   }
> > diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc
> > index 8cf0d5b7a8d..37f2392d035 100644
> > --- a/gcc/cp/pt.cc
> > +++ b/gcc/cp/pt.cc
> > @@ -18718,7 +18718,7 @@ tsubst_stmt (tree t, tree args, tsubst_flags_t
> > complain, tree in_decl)
> >   IF_COND (stmt) = IF_COND (t);
> >   THEN_CLAUSE (stmt) = THEN_CLAUSE (t);
> >   ELSE_CLAUSE (stmt) = ELSE_CLAUSE (t);
> > - IF_STMT_EXTRA_ARGS (stmt) = build_extra_args (t, args, complain);
> > + IF_STMT_EXTRA_ARGS (stmt) = build_extra_args (stmt, args, complain);
> >   add_stmt (stmt);
> >   break;
> > }
> > diff --git a/gcc/testsuite/g++.dg/cpp1z/constexpr-if-lambda6.C
> > b/gcc/testsuite/g++.dg/cpp1z/constexpr-if-lambda6.C
> > new file mode 100644
> > index 000..038c2a41210
> > --- /dev/null
> > +++ b/gcc/testsuite/g++.dg/cpp1z/constexpr-if-lambda6.C
> > @@ -0,0 +1,16 @@
> > +// PR c++/114303
> > +// { dg-do compile { target c++17 } }
> > +
> > +struct A { static constexpr bool value = true; };
> > +
> > +int main() {
> > +  [](auto x1) {
> > +return [&](auto) {
> > +  return [&](auto x3) {
> > +if constexpr (decltype(x3)::value) {
> > +  static_assert(decltype(x1)::value);
> > +}
> > +  }(A{});
> > +}(0);
> > +  }(A{});
> > +}
> 
>

[PATCH] gimple-iterator, ubsan, v3: Fix ICE during instrumentation of returns_twice calls [PR112709]

2024-03-12 Thread Jakub Jelinek

On Tue, Mar 12, 2024 at 02:31:28PM +0100, Richard Biener wrote:
> Ah, yeah, I see :/
> 
> > So, the intention of edge_before_returns_twice_call is just that
> > it in the common case just finds the non-EDGE_ABNORMAL edge if there is one,
> > if there isn't just one, it adjusts the IL such that there is just one.
> > And then the next step is to handle that case.
> 
> So I guess the updated patch is OK then.

For the naming thing, another variant would be to export

void
gsi_safe_insert_before (gimple_stmt_iterator *iter, gimple *g)
{
  gimple *stmt = gsi_stmt (*iter);
  if (stmt
  && is_gimple_call (stmt)
  && (gimple_call_flags (stmt) & ECF_RETURNS_TWICE) != 0)
gsi_insert_before_returns_twice_call (gsi_bb (*iter), g);
  else
gsi_insert_before (iter, g, GSI_SAME_STMT);
}

/* Similarly for sequence SEQ.  */

void
gsi_safe_insert_seq_before (gimple_stmt_iterator *iter, gimple_seq seq)
...

and inline the gsi_insert_*before_returns_twice_call calls by hand
in there.
Then asan.cc/ubsan.cc wouldn't need to define those functions.
I could even outline the updating of SSA_NAMEs on a single statement
into a helper inline function.

The patch is even shorter then (the asan patch as well).

Tested again with make check-gcc check-g++ RUNTESTFLAGS='ubsan.exp asan.exp'

2024-03-12  Jakub Jelinek  

PR sanitizer/112709
* gimple-iterator.h (gsi_safe_insert_before,
gsi_safe_insert_seq_before): Declare.
* gimple-iterator.cc: Include gimplify.h.
(edge_before_returns_twice_call, adjust_before_returns_twice_call,
gsi_safe_insert_before, gsi_safe_insert_seq_before): New functions.
* ubsan.cc (instrument_mem_ref, instrument_pointer_overflow,
instrument_nonnull_arg, instrument_nonnull_return): Use
gsi_safe_insert_before instead of gsi_insert_before.
(maybe_instrument_pointer_overflow): Use force_gimple_operand,
gimple_seq_add_seq_without_update and gsi_safe_insert_seq_before
instead of force_gimple_operand_gsi.
(instrument_object_size): Likewise.  Use gsi_safe_insert_before
instead of gsi_insert_before.

* gcc.dg/ubsan/pr112709-1.c: New test.
* gcc.dg/ubsan/pr112709-2.c: New test.

--- gcc/gimple-iterator.h.jj2024-03-12 10:15:41.253529859 +0100
+++ gcc/gimple-iterator.h   2024-03-12 15:10:23.594845422 +0100
@@ -93,6 +93,8 @@ extern void gsi_insert_on_edge (edge, gi
 extern void gsi_insert_seq_on_edge (edge, gimple_seq);
 extern basic_block gsi_insert_on_edge_immediate (edge, gimple *);
 extern basic_block gsi_insert_seq_on_edge_immediate (edge, gimple_seq);
+extern void gsi_safe_insert_before (gimple_stmt_iterator *, gimple *);
+extern void gsi_safe_insert_seq_before (gimple_stmt_iterator *, gimple_seq);
 extern void gsi_commit_edge_inserts (void);
 extern void gsi_commit_one_edge_insert (edge, basic_block *);
 extern gphi_iterator gsi_start_phis (basic_block);
--- gcc/gimple-iterator.cc.jj   2024-03-12 10:15:41.209530471 +0100
+++ gcc/gimple-iterator.cc  2024-03-12 15:29:17.814171376 +0100
@@ -32,6 +32,7 @@ along with GCC; see the file COPYING3.
 #include "tree-cfg.h"
 #include "tree-ssa.h"
 #include "value-prof.h"
+#include "gimplify.h"
 
 
 /* Mark the statement STMT as modified, and update it.  */
@@ -944,3 +945,137 @@ gsi_start_phis (basic_block bb)
 
   return i;
 }
+
+/* Helper function for gsi_safe_insert_before and gsi_safe_insert_seq_before.
+   Find edge to insert statements before returns_twice call at the start of BB,
+   if there isn't just one, split the bb and adjust PHIs to ensure that.  */
+
+static edge
+edge_before_returns_twice_call (basic_block bb)
+{
+  gimple_stmt_iterator gsi = gsi_start_nondebug_bb (bb);
+  gcc_checking_assert (is_gimple_call (gsi_stmt (gsi))
+  && (gimple_call_flags (gsi_stmt (gsi))
+  & ECF_RETURNS_TWICE) != 0);
+  edge_iterator ei;
+  edge e, ad_edge = NULL, other_edge = NULL;
+  bool split = false;
+  FOR_EACH_EDGE (e, ei, bb->preds)
+{
+  if ((e->flags & (EDGE_ABNORMAL | EDGE_EH)) == EDGE_ABNORMAL)
+   {
+ gimple_stmt_iterator gsi
+   = gsi_start_nondebug_after_labels_bb (e->src);
+ gimple *ad = gsi_stmt (gsi);
+ if (ad && gimple_call_internal_p (ad, IFN_ABNORMAL_DISPATCHER))
+   {
+ gcc_checking_assert (ad_edge == NULL);
+ ad_edge = e;
+ continue;
+   }
+   }
+  if (other_edge || e->flags & (EDGE_ABNORMAL | EDGE_EH))
+   split = true;
+  other_edge = e;
+}
+  gcc_checking_assert (ad_edge);
+  if (other_edge == NULL)
+split = true;
+  if (split)
+{
+  other_edge = split_block_after_labels (bb);
+  e = make_edge (ad_edge->src, other_edge->dest, EDGE_ABNORMAL);
+  for (gphi_iterator gsi = gsi_start_phis (other_edge->src);
+  !gsi_end_p (gsi); gsi_next ())
+   {
+ gphi *phi = gsi.phi ();
+ tree lhs = gimple_phi_result (phi);
+

Re: [PATCH] c++: recalculating local specs via build_extra_args [PR114303]

2024-03-12 Thread Jason Merrill


On 3/11/24 12:53, Patrick Palka wrote:

Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look
OK for trunk and release branches?

-- >8 --

r13-6452-g341e6cd8d603a3 made build_extra_args walk evaluated contexts
first so that we prefer processing a local specialization in an evaluated
context even if its first use is in an unevaluated context.  But this
means we need to avoid walking a tree that already has extra args/specs
saved because the list of saved specs appears to be an evaluated
context.  It seems then that we should be calculating the saved specs
from scratch each time, rather than potentially walking the saved specs
list from an earlier partial instantiation when calling build_extra_args
a second time around.


Makes sense, but I wonder if we want to approach that by avoiding 
walking into *_EXTRA_ARGS in extract_locals_r?  Or do we still want to 
walk into any nested extra args?  And if so, will we run into this same 
problem then?



PR c++/114303

gcc/cp/ChangeLog:

* constraint.cc (tsubst_requires_expr): Clear
REQUIRES_EXPR_EXTRA_ARGS before calling build_extra_args.
* pt.cc (tsubst_stmt) : Call build_extra_args
on the new IF_STMT instead of t which might already have
IF_STMT_EXTRA_ARGS.

gcc/testsuite/ChangeLog:

* g++.dg/cpp1z/constexpr-if-lambda6.C: New test.
---
  gcc/cp/constraint.cc |  1 +
  gcc/cp/pt.cc |  2 +-
  .../g++.dg/cpp1z/constexpr-if-lambda6.C  | 16 
  3 files changed, 18 insertions(+), 1 deletion(-)
  create mode 100644 gcc/testsuite/g++.dg/cpp1z/constexpr-if-lambda6.C

diff --git a/gcc/cp/constraint.cc b/gcc/cp/constraint.cc
index 49de3211d4c..8a3b5d80ba7 100644
--- a/gcc/cp/constraint.cc
+++ b/gcc/cp/constraint.cc
@@ -2362,6 +2362,7 @@ tsubst_requires_expr (tree t, tree args, sat_info info)
 matching or dguide constraint rewriting), in which case we need
 to partially substitute.  */
t = copy_node (t);
+  REQUIRES_EXPR_EXTRA_ARGS (t) = NULL_TREE;
REQUIRES_EXPR_EXTRA_ARGS (t) = build_extra_args (t, args, 
info.complain);
return t;
  }
diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc
index 8cf0d5b7a8d..37f2392d035 100644
--- a/gcc/cp/pt.cc
+++ b/gcc/cp/pt.cc
@@ -18718,7 +18718,7 @@ tsubst_stmt (tree t, tree args, tsubst_flags_t 
complain, tree in_decl)
  IF_COND (stmt) = IF_COND (t);
  THEN_CLAUSE (stmt) = THEN_CLAUSE (t);
  ELSE_CLAUSE (stmt) = ELSE_CLAUSE (t);
- IF_STMT_EXTRA_ARGS (stmt) = build_extra_args (t, args, complain);
+ IF_STMT_EXTRA_ARGS (stmt) = build_extra_args (stmt, args, complain);
  add_stmt (stmt);
  break;
}
diff --git a/gcc/testsuite/g++.dg/cpp1z/constexpr-if-lambda6.C 
b/gcc/testsuite/g++.dg/cpp1z/constexpr-if-lambda6.C
new file mode 100644
index 000..038c2a41210
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp1z/constexpr-if-lambda6.C
@@ -0,0 +1,16 @@
+// PR c++/114303
+// { dg-do compile { target c++17 } }
+
+struct A { static constexpr bool value = true; };
+
+int main() {
+  [](auto x1) {
+return [&](auto) {
+  return [&](auto x3) {
+if constexpr (decltype(x3)::value) {
+  static_assert(decltype(x1)::value);
+}
+  }(A{});
+}(0);
+  }(A{});
+}

[PATCH][GCC] aarch64: Fix SCHEDULER_IDENT for Cortex-A520

2024-03-12 Thread Richard Ball

The SCHEDULER_IDENT for this CPU was incorrectly
set to cortexa55, which is incorrect. This can cause
sub-optimal asm to be generated.

Ok for trunk?

gcc/ChangeLog:
PR target/114272
* config/aarch64/aarch64-cores.def (AARCH64_CORE):
Change SCHEDULER_IDENT from cortexa55 to cortexa53
for Cortex-A520.diff --git a/gcc/config/aarch64/aarch64-cores.def 
b/gcc/config/aarch64/aarch64-cores.def
index 
ea84dcb11a2c11fb7d8ada3b66b3caaa39f08daf..f69fc212d56418887e29615f8acdeb02e39750c6
 100644
--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -171,7 +171,7 @@ AARCH64_CORE("cortex-r82", cortexr82, cortexa53, V8R, (), 
cortexa53, 0x41, 0xd15
 /* Arm ('A') cores. */
 AARCH64_CORE("cortex-a510",  cortexa510, cortexa53, V9A,  (SVE2_BITPERM, 
MEMTAG, I8MM, BF16), cortexa53, 0x41, 0xd46, -1)
 
-AARCH64_CORE("cortex-a520",  cortexa520, cortexa55, V9_2A,  (SVE2_BITPERM, 
MEMTAG), cortexa53, 0x41, 0xd80, -1)
+AARCH64_CORE("cortex-a520",  cortexa520, cortexa53, V9_2A,  (SVE2_BITPERM, 
MEMTAG), cortexa53, 0x41, 0xd80, -1)
 
 AARCH64_CORE("cortex-a710",  cortexa710, cortexa57, V9A,  (SVE2_BITPERM, 
MEMTAG, I8MM, BF16), neoversen2, 0x41, 0xd47, -1)

[PATCH] tree-optimization/114121 - chrec_fold_{plus,multiply} and recursion

2024-03-12 Thread Richard Biener

The following addresses endless recursion in the
chrec_fold_{plus,multiply} functions when handling sign-conversions.
We only need to apply tricks when we'd fail (there's a chrec in the
converted operand) and we need to make sure to not turn the other
operand into something worse (for the chrec-vs-chrec case).

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

PR tree-optimization/114121
* tree-chrec.cc (chrec_fold_plus_1): Guard recursion with
converted operand properly.
(chrec_fold_multiply): Likewise.  Handle missed recursion.

* gcc.dg/torture/pr114312.c: New testcase.
---
 gcc/testsuite/gcc.dg/torture/pr114312.c |  15 ++
 gcc/tree-chrec.cc   | 176 +---
 2 files changed, 107 insertions(+), 84 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/torture/pr114312.c

diff --git a/gcc/testsuite/gcc.dg/torture/pr114312.c 
b/gcc/testsuite/gcc.dg/torture/pr114312.c
new file mode 100644
index 000..c508c64ed19
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr114312.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target bitint } */
+
+#if __BITINT_MAXWIDTH__ >= 129
+typedef _BitInt(129) B;
+B b;
+
+B
+foo(void)
+{
+  _BitInt(64) a = 1;
+  a &= b * b;
+  return b << a;
+}
+#endif
diff --git a/gcc/tree-chrec.cc b/gcc/tree-chrec.cc
index 7cd0ebc1010..1b2ed753551 100644
--- a/gcc/tree-chrec.cc
+++ b/gcc/tree-chrec.cc
@@ -251,23 +251,27 @@ chrec_fold_plus_1 (enum tree_code code, tree type,
  return chrec_fold_plus_poly_poly (code, type, op0, op1);
 
CASE_CONVERT:
- {
-   /* We can strip sign-conversions to signed by performing the
-  operation in unsigned.  */
-   tree optype = TREE_TYPE (TREE_OPERAND (op1, 0));
-   if (INTEGRAL_TYPE_P (type)
-   && INTEGRAL_TYPE_P (optype)
-   && tree_nop_conversion_p (type, optype)
-   && TYPE_UNSIGNED (optype))
- return chrec_convert (type,
-   chrec_fold_plus_1 (code, optype,
-  chrec_convert (optype,
- op0, 
NULL),
-  TREE_OPERAND (op1, 0)),
-   NULL);
-   if (tree_contains_chrecs (op1, NULL))
+ if (tree_contains_chrecs (op1, NULL))
+   {
+ /* We can strip sign-conversions to signed by performing the
+operation in unsigned.  */
+ tree optype = TREE_TYPE (TREE_OPERAND (op1, 0));
+ if (INTEGRAL_TYPE_P (type)
+ && INTEGRAL_TYPE_P (optype)
+ && tree_nop_conversion_p (type, optype)
+ && TYPE_UNSIGNED (optype))
+   {
+ tree tem = chrec_convert (optype, op0, NULL);
+ if (TREE_CODE (tem) == POLYNOMIAL_CHREC)
+   return chrec_convert (type,
+ chrec_fold_plus_1 (code, optype,
+tem,
+TREE_OPERAND
+  (op1, 0)),
+ NULL);
+   }
  return chrec_dont_know;
- }
+   }
  /* FALLTHRU */
 
default:
@@ -284,26 +288,27 @@ chrec_fold_plus_1 (enum tree_code code, tree type,
}
 
 CASE_CONVERT:
-  {
-   /* We can strip sign-conversions to signed by performing the
-  operation in unsigned.  */
-   tree optype = TREE_TYPE (TREE_OPERAND (op0, 0));
-   if (INTEGRAL_TYPE_P (type)
-   && INTEGRAL_TYPE_P (optype)
-   && tree_nop_conversion_p (type, optype)
-   && TYPE_UNSIGNED (optype))
- return chrec_convert (type,
-   chrec_fold_plus_1 (code, optype,
-  TREE_OPERAND (op0, 0),
-  chrec_convert (optype,
- op1, NULL)),
-   NULL);
-   if (tree_contains_chrecs (op0, NULL))
+  if (tree_contains_chrecs (op0, NULL))
+   {
+ /* We can strip sign-conversions to signed by performing the
+operation in unsigned.  */
+ tree optype = TREE_TYPE (TREE_OPERAND (op0, 0));
+ if (INTEGRAL_TYPE_P (type)
+ && INTEGRAL_TYPE_P (optype)
+ && tree_nop_conversion_p (type, optype)
+ && TYPE_UNSIGNED (optype))
+   return chrec_convert (type,
+ chrec_fold_plus_1 (code, optype,
+TREE_OPERAND (op0, 0),
+

[PATCH][GCC] aarch64: Fix SCHEDULER_IDENT for Cortex-A510

2024-03-12 Thread Richard Ball

The SCHEDULER_IDENT for this CPU was incorrectly
set to cortexa55, which is incorrect. This can cause
sub-optimal asm to be generated.

Ok for trunk?

Can I also backport this to gcc-12 and gcc-13?

gcc/ChangeLog:
PR target/114272
* config/aarch64/aarch64-cores.def (AARCH64_CORE):
Change SCHEDULER_IDENT from cortexa55 to cortexa53
for Cortex-A510.diff --git a/gcc/config/aarch64/aarch64-cores.def 
b/gcc/config/aarch64/aarch64-cores.def
index 
7ebefcf543b6f84b3df22ab836728111b56fa76f..ea84dcb11a2c11fb7d8ada3b66b3caaa39f08daf
 100644
--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -169,7 +169,7 @@ AARCH64_CORE("cortex-r82", cortexr82, cortexa53, V8R, (), 
cortexa53, 0x41, 0xd15
 /* Armv9.0-A Architecture Processors.  */
 
 /* Arm ('A') cores. */
-AARCH64_CORE("cortex-a510",  cortexa510, cortexa55, V9A,  (SVE2_BITPERM, 
MEMTAG, I8MM, BF16), cortexa53, 0x41, 0xd46, -1)
+AARCH64_CORE("cortex-a510",  cortexa510, cortexa53, V9A,  (SVE2_BITPERM, 
MEMTAG, I8MM, BF16), cortexa53, 0x41, 0xd46, -1)
 
 AARCH64_CORE("cortex-a520",  cortexa520, cortexa55, V9_2A,  (SVE2_BITPERM, 
MEMTAG), cortexa53, 0x41, 0xd80, -1)

Re: [PATCH] c++: ICE with temporary of class type in array DMI [PR109966]

2024-03-12 Thread Jason Merrill


On 3/11/24 19:27, Marek Polacek wrote:

Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk/13?

-- >8 --
This ICE started with the fairly complicated r13-765.  We crash in
gimplify_var_or_parm_decl because a stray VAR_DECL leaked there.
The problem is ultimately that potential_prvalue_result_of wasn't
correctly handling arrays and replace_placeholders_for_class_temp_r
replaced a PLACEHOLDER_EXPR in a TARGET_EXPR which is used in the
context of copy elision.  If I have

   M m[2] = { M{""}, M{""} };

then we don't invoke the M(const M&) copy-ctor.  I think the fix is
to detect such a case in potential_prvalue_result_of.

PR c++/109966

gcc/cp/ChangeLog:

* typeck2.cc (potential_prvalue_result_of): Add walk_subtrees
parameter.  Handle initializing an array from a
brace-enclosed-initializer.
(replace_placeholders_for_class_temp_r): Pass walk_subtrees down to
potential_prvalue_result_of.

gcc/testsuite/ChangeLog:

* g++.dg/cpp1y/nsdmi-aggr20.C: New test.
* g++.dg/cpp1y/nsdmi-aggr21.C: New test.
---
  gcc/cp/typeck2.cc | 27 ---
  gcc/testsuite/g++.dg/cpp1y/nsdmi-aggr20.C | 17 +++
  gcc/testsuite/g++.dg/cpp1y/nsdmi-aggr21.C | 59 +++
  3 files changed, 96 insertions(+), 7 deletions(-)
  create mode 100644 gcc/testsuite/g++.dg/cpp1y/nsdmi-aggr20.C
  create mode 100644 gcc/testsuite/g++.dg/cpp1y/nsdmi-aggr21.C

diff --git a/gcc/cp/typeck2.cc b/gcc/cp/typeck2.cc
index 31198b2f9f5..8b99ce78e9a 100644
--- a/gcc/cp/typeck2.cc
+++ b/gcc/cp/typeck2.cc
@@ -1406,46 +1406,59 @@ digest_init_flags (tree type, tree init, int flags, 
tsubst_flags_t complain)
   A a = (A{});   // initializer
   A a = (1, A{});// initializer
   A a = true ? A{} : A{};  // initializer
+ A arr[1] = { A{} };  // initializer
   auto x = A{}.x;// temporary materialization
   auto x = foo(A{}); // temporary materialization
  
 FULL_EXPR is the whole expression, SUBOB is its TARGET_EXPR subobject.  */
  
  static bool

-potential_prvalue_result_of (tree subob, tree full_expr)
+potential_prvalue_result_of (tree subob, tree full_expr, int *walk_subtrees)
  {
+#define RECUR(t) potential_prvalue_result_of (subob, t, walk_subtrees)
if (subob == full_expr)
  return true;
else if (TREE_CODE (full_expr) == TARGET_EXPR)
  {
tree init = TARGET_EXPR_INITIAL (full_expr);
if (TREE_CODE (init) == COND_EXPR)
-   return (potential_prvalue_result_of (subob, TREE_OPERAND (init, 1))
-   || potential_prvalue_result_of (subob, TREE_OPERAND (init, 2)));
+   return (RECUR (TREE_OPERAND (init, 1))
+   || RECUR (TREE_OPERAND (init, 2)));
else if (TREE_CODE (init) == COMPOUND_EXPR)
-   return potential_prvalue_result_of (subob, TREE_OPERAND (init, 1));
+   return RECUR (TREE_OPERAND (init, 1));
/* ??? I don't know if this can be hit.  */
else if (TREE_CODE (init) == PAREN_EXPR)
{
  gcc_checking_assert (false);
- return potential_prvalue_result_of (subob, TREE_OPERAND (init, 0));
+ return RECUR (TREE_OPERAND (init, 0));
}
  }
+  /* The array case listed above.  */
+  else if (TREE_CODE (full_expr) == CONSTRUCTOR
+  && TREE_CODE (TREE_TYPE (full_expr)) == ARRAY_TYPE)
+for (constructor_elt : CONSTRUCTOR_ELTS (full_expr))
+  if (e.value == subob)
+   {
+ *walk_subtrees = 0;


Why clear walk_subtrees?  Won't that mean we fail to replace any 
placeholders nested within an array element initializer?


Jason

Re: [PATCH] asan: Fix ICE during instrumentation of returns_twice calls [PR112709]

2024-03-12 Thread Richard Biener

On Tue, 12 Mar 2024, Jakub Jelinek wrote:

> Hi!
> 
> The following patch on top of the previously posted ubsan/gimple-iterator
> one handles asan the same.  While the case of returning by hidden reference
> is handled differently because of the first recently posted asan patch,
> this deals with instrumentation of the aggregates returned in registers
> case as well as instrumentation of loads from aggregate memory in the
> function arguments of returns_twice calls.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

OK.

> 2024-03-12  Jakub Jelinek  
> 
>   PR sanitizer/112709
>   * asan.cc (asan_insert_before): New function.
>   (maybe_create_ssa_name, maybe_cast_to_ptrmode, build_check_stmt,
>   maybe_instrument_call, asan_expand_mark_ifn): Use it instead of
>   gsi_insert_before.
> 
>   * gcc.dg/asan/pr112709-2.c: New test.
> 
> --- gcc/asan.cc.jj2024-03-11 13:49:58.931045179 +0100
> +++ gcc/asan.cc   2024-03-11 18:38:29.047330489 +0100
> @@ -2561,6 +2561,21 @@ build_shadow_mem_access (gimple_stmt_ite
>return gimple_assign_lhs (g);
>  }
>  
> +/* Insert G stmt before ITER.  If ITER is a returns_twice call,
> +   insert it on an appropriate edge instead.  */
> +
> +static void
> +asan_insert_before (gimple_stmt_iterator *iter, gimple *g)
> +{
> +  gimple *stmt = gsi_stmt (*iter);
> +  if (stmt
> +  && is_gimple_call (stmt)
> +  && (gimple_call_flags (stmt) & ECF_RETURNS_TWICE) != 0)
> +gsi_insert_before_returns_twice_call (gsi_bb (*iter), g);
> +  else
> +gsi_insert_before (iter, g, GSI_SAME_STMT);
> +}
> +
>  /* BASE can already be an SSA_NAME; in that case, do not create a
> new SSA_NAME for it.  */
>  
> @@ -2574,7 +2589,7 @@ maybe_create_ssa_name (location_t loc, t
>gimple *g = gimple_build_assign (make_ssa_name (TREE_TYPE (base)), base);
>gimple_set_location (g, loc);
>if (before_p)
> -gsi_insert_before (iter, g, GSI_SAME_STMT);
> +asan_insert_before (iter, g);
>else
>  gsi_insert_after (iter, g, GSI_NEW_STMT);
>return gimple_assign_lhs (g);
> @@ -2593,7 +2608,7 @@ maybe_cast_to_ptrmode (location_t loc, t
> NOP_EXPR, len);
>gimple_set_location (g, loc);
>if (before_p)
> -gsi_insert_before (iter, g, GSI_SAME_STMT);
> +asan_insert_before (iter, g);
>else
>  gsi_insert_after (iter, g, GSI_NEW_STMT);
>return gimple_assign_lhs (g);
> @@ -2684,7 +2699,7 @@ build_check_stmt (location_t loc, tree b
>align / BITS_PER_UNIT));
>gimple_set_location (g, loc);
>if (before_p)
> -gsi_insert_before (, g, GSI_SAME_STMT);
> +asan_insert_before (, g);
>else
>  {
>gsi_insert_after (, g, GSI_NEW_STMT);
> @@ -3025,7 +3040,7 @@ maybe_instrument_call (gimple_stmt_itera
> tree decl = builtin_decl_implicit (BUILT_IN_ASAN_HANDLE_NO_RETURN);
> gimple *g = gimple_build_call (decl, 0);
> gimple_set_location (g, gimple_location (stmt));
> -   gsi_insert_before (iter, g, GSI_SAME_STMT);
> +   asan_insert_before (iter, g);
>   }
>  }
>  
> @@ -3852,7 +3867,7 @@ asan_expand_mark_ifn (gimple_stmt_iterat
>g = gimple_build_assign (make_ssa_name (pointer_sized_int_node),
>  NOP_EXPR, len);
>gimple_set_location (g, loc);
> -  gsi_insert_before (iter, g, GSI_SAME_STMT);
> +  asan_insert_before (iter, g);
>tree sz_arg = gimple_assign_lhs (g);
>  
>tree fun
> --- gcc/testsuite/gcc.dg/asan/pr112709-2.c.jj 2024-03-11 18:30:59.813488200 
> +0100
> +++ gcc/testsuite/gcc.dg/asan/pr112709-2.c2024-03-11 18:31:06.506396462 
> +0100
> @@ -0,0 +1,50 @@
> +/* PR sanitizer/112709 */
> +/* { dg-do compile } */
> +/* { dg-options "-fsanitize=address -O2" } */
> +
> +struct S { char c[1024]; } *p;
> +int foo (int);
> +
> +__attribute__((returns_twice, noipa)) int
> +bar (struct S x)
> +{
> +  (void) x.c[0];
> +  return 0;
> +}
> +
> +void
> +baz (int *y)
> +{
> +  foo (1);
> +  *y = bar (*p);
> +}
> +
> +void
> +qux (int x, int *y)
> +{
> +  if (x == 25)
> +x = foo (2);
> +  else if (x == 42)
> +x = foo (foo (3));
> +  *y = bar (*p);
> +}
> +
> +void
> +corge (int x, int *y)
> +{
> +  void *q[] = { &, &, &, & };
> +  if (x == 25)
> +{
> +l1:
> +  x = foo (2);
> +}
> +  else if (x == 42)
> +{
> +l2:
> +  x = foo (foo (3));
> +}
> +l3:
> +  *y = bar (*p);
> +  if (x < 4)
> +goto *q[x & 3];
> +}
> 
>   Jakub
> 
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)

Re: [PATCH] gimple-iterator, ubsan: Fix ICE during instrumentation of returns_twice calls [PR112709]

2024-03-12 Thread Richard Biener

On Tue, 12 Mar 2024, Jakub Jelinek wrote:

> On Tue, Mar 12, 2024 at 01:47:53PM +0100, Richard Biener wrote:
> > > Admittedly the above is the ugliest part of the patch IMHO.
> > > It isn't needed in all cases, but e.g. for the pr112709-2.c (qux) case
> > > we have before ubsan pass
> > >   # _7(ab) = PHI <_20(9), _8(ab)(11)>
> > >   _22 = bar (*_7(ab));
> > > and want to instrument the *_7(ab) load among the parameters for object
> > > size.
> > > So, it wants to add
> > >   _2 = __builtin_dynamic_object_size (_7(ab), 0);
> > > sequence and later:
> > >   .UBSAN_OBJECT_SIZE (_7(ab), 1024, _2, 0);
> > > before the call insn.  If it isn't a noreturn call, it would just
> > > add those statements into the same basic block using gsi_insert*before.
> > > But because it is a returns_twice call, it needs to be done in a different
> > > block.  And having all of ubsan/asan/bitintlower find that out first and
> > > figure out that it should use _20 instead of _7(ab) seems hard, especially
> > > that for cases like:
> > 
> > I would have expected the result as
> > 
> >   # _7 = PHI <_20(9)>
> >   _2 = __builtin_dynamic_object_size (_7, 0);
> >   .UBSAN_OBJECT_SIZE (_7(ab), 1024, _2, 0);
> >   // fallthru
> > 
> >   # _99(ab) = PHI <_7, _8(ab)(11)>
> >   _22 = bar (*_99(ab));
> > 
> > where all uses of _7 in the IL before splitting get replaced via
> > their immediate uses but the to-be inserted IL can continue using
> > the defs (that requires the not inserted IL to not have update_stmt
> > called on them, of course).
> > 
> > I think split_block would have the PHIs organized that way already,
> > you are adding the _99 defs anyway, right?  That is,
> > 
> > + tree new_lhs = copy_ssa_name (lhs);
> > + gimple_phi_set_result (phi, new_lhs);
> > + gphi *new_phi = create_phi_node (lhs, other_edge->dest);
> > 
> > here you swap the defs, I'd omit that.  And instead essentially do
> > replace_uses_by (lhs, new_lhs) here (without the folding and stuff).
> > You have to clear SSA_NAME_OCCURS_IN_ABNORMAL_PHI on the old
> > and set it on the new LHS if it was set.
> > 
> > I think that should contain the "SSA update" to
> > edge_before_returns_twice_call for the case it splits the block?
> 
> I have considered that, but there are problems with that.
> 
> The most important is that in the most common case, there is no
> block splitting nor any edge splitting, all we have is that
>   # _99(ab) = PHI <_7(6), _8(ab)(11)>
>   _22 = bar (*_99(ab));
> or similar.  And we can do several insertions of statements or sequences
> before the bar call.  If it is not returns_twice or even not a call,
> we want to use _99(ab) in there and insert just before the call/statement.
> If it is returns_twice call, we want to replace the _99(ab) uses with
> something that is actually usable on the 6->10 edge.
> So, if we wanted to keep using the _99(ab) (wouldn't it be even wrong
> that it is (ab)?), we'd need to essentially on every addition
> add statements like
>   _99(ab) = _7;
> (for each PHI node except the virtual one) before the added statement or
> statements (or add it only if used in the statement/sequence?) and
> change the PHI to be
>   # _100(ab) = PHI <_99(ab)(6), _8(ab)(11)>
> and replace all uses:
>   _22 = bar (*_100(ab));
> Right now ubsan can insert something like 3 statements/sequences per
> argument, so there can be thousands of them and the block could have
> thousands of PHIs, so that is millions of useless SSA_NAME copies.

Ah, yeah, I see :/

> So, the intention of edge_before_returns_twice_call is just that
> it in the common case just finds the non-EDGE_ABNORMAL edge if there is one,
> if there isn't just one, it adjusts the IL such that there is just one.
> And then the next step is to handle that case.

So I guess the updated patch is OK then.

Thanks,
Richard.

Re: [PATCH v2] c++: Support target-specific nodes with streaming [PR98645,PR111224]

2024-03-12 Thread Jason Merrill


On 3/12/24 08:21, Nathaniel Shead wrote:

On Tue, Mar 12, 2024 at 11:11:40PM +1100, Nathaniel Shead wrote:

On Mon, Mar 11, 2024 at 10:36:06AM -0400, Patrick Palka wrote:

On Sun, 10 Mar 2024, Nathaniel Shead wrote:


Bootstrapped and regtested on x86_64-pc-linux-gnu and
aarch64-unknown-linux-gnu, OK for trunk?

It's worth noting that the AArch64 machines I had available to test with
didn't have a new enough glibc to reproduce the ICEs in the PR, but this
patch will be necessary (albeit possibly not sufficient) to fix it.

-- >8 --

Some targets make use of POLY_INT_CSTs and other custom builtin types,
which currently violate some assumptions when streaming. This patch adds
support for them, specifically AArch64 SVE types like __fp16.


It seems other built-in types are handled by adding them to the
fixed_trees vector in init_modules (and then we install them first
during streaming).  Could we just add all the target-specific types to
fixed_trees too?



Yes, that works too. Seems cleaner as well, though I had to add it as a
separate loop because the set of builtin types registered is not
determined until runtiem (depending on e.g. ABI flags). I also noticed
that this fixes another PR, on PowerPC, so I've added a test for it.
Thanks!

Bootstrapped and regtested on x86_64-pc-linux-gnu,
aarch64-unknown-linux-gnu, and powerpc64le-unknown-linux-gnu;
OK for trunk?

-- >8 --

Some targets make use of POLY_INT_CSTs and other custom builtin types,
which currently violate some assumptions when streaming. This patch adds
support for them, such as types like Aarch64 __fp16, PowerPC __ibm128,
and vector types thereof.

This patch doesn't provide "full" support of AArch64 SVE, however, since
for that we would need to support 'target' nodes (tracked in PR108080).

Adding the new builtin types means that on Aarch64 we now have 217
global trees created on initialisation (up from 191), so this patch also
slightly bumps the initial size of the fixed_trees allocation to 250.

PR c++/98645
PR c++/111224

gcc/cp/ChangeLog:

* module.cc (enum tree_tag): Add new tag for builtin types.
(trees_out::start): POLY_INT_CSTs can be emitted.
(trees_in::start): Likewise.
(trees_out::core_vals): Stream POLY_INT_CSTs.
(trees_in::core_vals): Likewise.
(trees_out::type_node): Handle vectors with multiple coeffs.
(trees_in::tree_node): Likewise.
(init_modules): Register target-specific builtin types. Bump
initial capacity slightly.

gcc/testsuite/ChangeLog:

* g++.dg/modules/target-aarch64-1_a.C: New test.
* g++.dg/modules/target-aarch64-1_b.C: New test.
* g++.dg/modules/target-powerpc-1_a.C: New test.
* g++.dg/modules/target-powerpc-1_b.C: New test.

Signed-off-by: Nathaniel Shead 
Reviewed-by: Patrick Palka 
---
  gcc/cp/module.cc  | 32 +--
  .../g++.dg/modules/target-aarch64-1_a.C   | 17 ++
  .../g++.dg/modules/target-aarch64-1_b.C   | 13 
  .../g++.dg/modules/target-powerpc-1_a.C   |  7 
  .../g++.dg/modules/target-powerpc-1_b.C   | 10 ++
  5 files changed, 69 insertions(+), 10 deletions(-)
  create mode 100644 gcc/testsuite/g++.dg/modules/target-aarch64-1_a.C
  create mode 100644 gcc/testsuite/g++.dg/modules/target-aarch64-1_b.C
  create mode 100644 gcc/testsuite/g++.dg/modules/target-powerpc-1_a.C
  create mode 100644 gcc/testsuite/g++.dg/modules/target-powerpc-1_b.C

diff --git a/gcc/cp/module.cc b/gcc/cp/module.cc
index 99055523d91..8aab9ea0bae 100644
--- a/gcc/cp/module.cc
+++ b/gcc/cp/module.cc
@@ -5173,7 +5173,6 @@ trees_out::start (tree t, bool code_streamed)
break;
  
  case FIXED_CST:

-case POLY_INT_CST:
gcc_unreachable (); /* Not supported in C++.  */
break;
  
@@ -5259,7 +5258,6 @@ trees_in::start (unsigned code)
  
  case FIXED_CST:

  case IDENTIFIER_NODE:
-case POLY_INT_CST:
  case SSA_NAME:
  case TARGET_MEM_REF:
  case TRANSLATION_UNIT_DECL:
@@ -6106,7 +6104,10 @@ trees_out::core_vals (tree t)
break;
  
  case POLY_INT_CST:

-  gcc_unreachable (); /* Not supported in C++.  */
+  if (streaming_p ())
+   for (unsigned ix = 0; ix != NUM_POLY_INT_COEFFS; ix++)
+ WT (POLY_INT_CST_COEFF (t, ix));
+  break;
  
  case REAL_CST:

if (streaming_p ())
@@ -6615,8 +6616,9 @@ trees_in::core_vals (tree t)
break;
  
  case POLY_INT_CST:

-  /* Not suported in C++.  */
-  return false;
+  for (unsigned ix = 0; ix != NUM_POLY_INT_COEFFS; ix++)
+   RT (POLY_INT_CST_COEFF (t, ix));
+  break;
  
  case REAL_CST:

if (const void *bytes = buf (sizeof (real_value)))
@@ -9068,8 +9070,8 @@ trees_out::type_node (tree type)
if (streaming_p ())
{
  poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (type);
- /* to_constant asserts that only coeff[0] is of interest.  */
-

Re: [PATCH v3] RISC-V: Introduce gcc attribute riscv_rvv_vector_bits for RVV

2024-03-12 Thread Stefan O'Rear

On Tue, Mar 12, 2024, at 2:15 AM, pan2...@intel.com wrote:
> From: Pan Li 
>
> Update in v3:
> * Add pre-defined __riscv_v_fixed_vlen when zvl.
>
> Update in v2:
> * Cleanup some unused code.
> * Fix some typo of commit log.
>
> Original log:
>
> This patch would like to introduce one new gcc attribute for RVV.
> This attribute is used to define fixed-length variants of one
> existing sizeless RVV types.
>
> This attribute is valid if and only if the mrvv-vector-bits=zvl, the only
> one args should be the integer constant and its' value is terminated
> by the LMUL and the vector register bits in zvl*b.  For example:
>
> typedef vint32m2_t fixed_vint32m2_t 
> __attribute__((riscv_rvv_vector_bits(128)));
>
> The above type define is valid when -march=rv64gc_zve64d_zvl64b
> (aka 2(m2) * 64 = 128 for vin32m2_t), and will report error when
> -march=rv64gcv_zvl128b similar to below.
>
> "error: invalid RVV vector size '128', expected size is '256' based on
> LMUL of type and '-mrvv-vector-bits=zvl'"
>
> Meanwhile, a pre-define macro __riscv_v_fixed_vlen is introduced to
> represent the fixed vlen in a RVV vector register.

Shouldn't a major user-facing change like this be discussed in a PR against
https://github.com/riscv-non-isa/riscv-c-api-doc/ or
https://github.com/riscv-non-isa/rvv-intrinsic-doc before or concurrent with
compiler implementation?

-s

> For the vint*m*_t below operations are allowed.
> * The sizeof.
> * The global variable(s).
> * The element of union and struct.
> * The cast to other equalities.
> * CMP: >, <, ==, !=, <=, >=
> * ALU: +, -, *, /, %, &, |, ^, >>, <<, ~, -
>
> For the vfloat*m*_t below operations are allowed.
> * The sizeof.
> * The global variable(s).
> * The element of union and struct.
> * The cast to other equalities.
> * CMP: >, <, ==, !=, <=, >=
> * ALU: +, -, *, /, -
>
> For the vbool*_t types only below operations are allowed except
> the CMP and ALU. The CMP and ALU operations on vbool*_t is not
> well defined currently.
> * The sizeof.
> * The global variable(s).
> * The element of union and struct.
> * The cast to other equalities.
>
> For the vint*x*m*_t tuple types are not suppored in this patch
> which is compatible with clang.
>
> This patch passed the below testsuites.
> * The riscv fully regression tests.
>
> gcc/ChangeLog:
>
>   * config/riscv/riscv-c.cc (riscv_cpu_cpp_builtins): Add pre-define
>   macro __riscv_v_fixed_vlen when zvl.
>   * config/riscv/riscv.cc (riscv_handle_rvv_vector_bits_attribute):
>   New static func to take care of the RVV types decorated by
>   the attributes.
>
> gcc/testsuite/ChangeLog:
>
>   * gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-1.c: New test.
>   * gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-10.c: New test.
>   * gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-11.c: New test.
>   * gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-12.c: New test.
>   * gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-13.c: New test.
>   * gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-14.c: New test.
>   * gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-15.c: New test.
>   * gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-16.c: New test.
>   * gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-17.c: New test.
>   * gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-2.c: New test.
>   * gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-3.c: New test.
>   * gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-4.c: New test.
>   * gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-5.c: New test.
>   * gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-6.c: New test.
>   * gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-7.c: New test.
>   * gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-8.c: New test.
>   * gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-9.c: New test.
>   * gcc.target/riscv/rvv/base/riscv_rvv_vector_bits.h: New test.
>
> Signed-off-by: Pan Li 
> ---
>  gcc/config/riscv/riscv-c.cc   |   3 +
>  gcc/config/riscv/riscv.cc |  87 +-
>  .../riscv/rvv/base/riscv_rvv_vector_bits-1.c  |   6 +
>  .../riscv/rvv/base/riscv_rvv_vector_bits-10.c |  53 +
>  .../riscv/rvv/base/riscv_rvv_vector_bits-11.c |  76 
>  .../riscv/rvv/base/riscv_rvv_vector_bits-12.c |  14 +++
>  .../riscv/rvv/base/riscv_rvv_vector_bits-13.c |  10 ++
>  .../riscv/rvv/base/riscv_rvv_vector_bits-14.c |  10 ++
>  .../riscv/rvv/base/riscv_rvv_vector_bits-15.c |  10 ++
>  .../riscv/rvv/base/riscv_rvv_vector_bits-16.c |  11 ++
>  .../riscv/rvv/base/riscv_rvv_vector_bits-17.c |  10 ++
>  .../riscv/rvv/base/riscv_rvv_vector_bits-2.c  |   6 +
>  .../riscv/rvv/base/riscv_rvv_vector_bits-3.c  |   6 +
>  .../riscv/rvv/base/riscv_rvv_vector_bits-4.c  |   6 +
>  .../riscv/rvv/base/riscv_rvv_vector_bits-5.c  |   6 +
>  .../riscv/rvv/base/riscv_rvv_vector_bits-6.c  |   6 +
>  .../riscv/rvv/base/riscv_rvv_vector_bits-7.c  |  76

Re: [PATCH] Fix PR ipa/113996

2024-03-12 Thread Jan Hubicka

> 
> 
> On 3/11/24 4:38 PM, Eric Botcazou wrote:
> > Hi,
> > 
> > this is a regression present on all active branches: the attached Ada 
> > testcase
> > triggers an assertion failure when compiled with -O2 -gnatp -flto:
> > 
> >/* Initialize the static chain.  */
> >p = DECL_STRUCT_FUNCTION (fn)->static_chain_decl;
> >gcc_assert (fn != current_function_decl);
> >if (p)
> >  {
> >/* No static chain?  Seems like a bug in tree-nested.cc.  */
> >gcc_assert (static_chain);  <--- here
> > 
> >setup_one_parameter (id, p, static_chain, fn, bb, );
> >  }
> > 
> > The problem is that the ICF pass identifies two functions, one of which has 
> > a
> > static chain but the other does not.  The proposed fix is just to prevent 
> > this
> > identification from occurring.
> > 
> > Tested on x86-64/Linux, OK for all active branches?
> > 
> > 
> > 2024-03-11  Eric Botcazou  
> > 
> > PR ipa/113996
> > * ipa-icf.h (sem_function): Add static_chain_present member.
> > * ipa-icf.cc (sem_function::get_hash): Hash it.
> > (sem_function::equals_wpa): Compare it.
> > (sem_function::equals_private): Likewise.
> > (sem_function::init): Initialize it.
> > 
> > 
> > 2024-03-11  Eric Botcazou  
> > 
> > * gnat.dg/lto27.adb: New test.
> OK.
Patch is still OK, but ipa-ICF will only identify the functions if
static chain is unused. Perhaps just picking the winning candidate to be
version without static chain and making ipa-inline to not ICE when calls
with static chain lands to function with no static chain would help us
to optimize better.

Also IPA-SRA can optimize out unused static chains and ipa-prop can
propagate across them.

Honza
> jeff
>

Re: [PATCH v2] testsuite: xfail test for short_enums

2024-03-12 Thread Jason Merrill


On 3/11/24 06:23, Torbjörn SVENSSON wrote:

Changes compared to v1:
- Added reference to r14-6517-gb7e4a4c626e in dg-bogus comment
- Changed arm-*-* to short_enums in target selector
- Updated commit message to align with above changes


As the entire block generating the warning was removed in
r14-6517-gb7e4a4c626e, does it still make sense to add something to
trunk for the same line?
Do you want me to add the dg-bogus, but change "xfail" to "target" for
trunk?


Sounds good.


Is this patch ok for releases/gcc-13?


OK.


--

On arm-none-eabi, the test case fails with
.../null-deref-pr108251-smp_fetch_ssl_fc_has_early-O2.c:63:65: warning: 
converting a packed 'enum obj_type' pointer (alignment 1) to a 'struct 
connection' pointer (alignment 4) may result in an unaligned pointer value 
[-Waddress-of-packed-member]

The error was fixed in basepoints/gcc-14-6517-gb7e4a4c626e, but it
was considered to be a too big change to be backported and thus, the
failing test is marked xfail in GCC13.

gcc/testsuite/ChangeLog:

* gcc.dg/analyzer/null-deref-pr108251-smp_fetch_ssl_fc_has_early-O2.c:
Added dg-bogus with xfail on offending line for short_enums.

Signed-off-by: Torbjörn SVENSSON 
---
  .../null-deref-pr108251-smp_fetch_ssl_fc_has_early-O2.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git 
a/gcc/testsuite/gcc.dg/analyzer/null-deref-pr108251-smp_fetch_ssl_fc_has_early-O2.c
 
b/gcc/testsuite/gcc.dg/analyzer/null-deref-pr108251-smp_fetch_ssl_fc_has_early-O2.c
index 2a9c715c32c..e8cde7338a0 100644
--- 
a/gcc/testsuite/gcc.dg/analyzer/null-deref-pr108251-smp_fetch_ssl_fc_has_early-O2.c
+++ 
b/gcc/testsuite/gcc.dg/analyzer/null-deref-pr108251-smp_fetch_ssl_fc_has_early-O2.c
@@ -60,7 +60,7 @@ static inline enum obj_type obj_type(const enum obj_type *t)
  }
  static inline struct connection *__objt_conn(enum obj_type *t)
  {
- return ((struct connection *)(((void *)(t)) - ((long)&((struct connection 
*)0)->obj_type)));
+ return ((struct connection *)(((void *)(t)) - ((long)&((struct connection *)0)->obj_type))); /* { 
dg-bogus "may result in an unaligned pointer value" "Fixed in r14-6517-gb7e4a4c626e" { 
xfail short_enums } */
  }
  static inline struct connection *objt_conn(enum obj_type *t)
  {

Re: [PATCH] gimple-iterator, ubsan: Fix ICE during instrumentation of returns_twice calls [PR112709]

2024-03-12 Thread Jakub Jelinek

On Tue, Mar 12, 2024 at 01:47:53PM +0100, Richard Biener wrote:
> > Admittedly the above is the ugliest part of the patch IMHO.
> > It isn't needed in all cases, but e.g. for the pr112709-2.c (qux) case
> > we have before ubsan pass
> >   # _7(ab) = PHI <_20(9), _8(ab)(11)>
> >   _22 = bar (*_7(ab));
> > and want to instrument the *_7(ab) load among the parameters for object
> > size.
> > So, it wants to add
> >   _2 = __builtin_dynamic_object_size (_7(ab), 0);
> > sequence and later:
> >   .UBSAN_OBJECT_SIZE (_7(ab), 1024, _2, 0);
> > before the call insn.  If it isn't a noreturn call, it would just
> > add those statements into the same basic block using gsi_insert*before.
> > But because it is a returns_twice call, it needs to be done in a different
> > block.  And having all of ubsan/asan/bitintlower find that out first and
> > figure out that it should use _20 instead of _7(ab) seems hard, especially
> > that for cases like:
> 
> I would have expected the result as
> 
>   # _7 = PHI <_20(9)>
>   _2 = __builtin_dynamic_object_size (_7, 0);
>   .UBSAN_OBJECT_SIZE (_7(ab), 1024, _2, 0);
>   // fallthru
> 
>   # _99(ab) = PHI <_7, _8(ab)(11)>
>   _22 = bar (*_99(ab));
> 
> where all uses of _7 in the IL before splitting get replaced via
> their immediate uses but the to-be inserted IL can continue using
> the defs (that requires the not inserted IL to not have update_stmt
> called on them, of course).
> 
> I think split_block would have the PHIs organized that way already,
> you are adding the _99 defs anyway, right?  That is,
> 
> + tree new_lhs = copy_ssa_name (lhs);
> + gimple_phi_set_result (phi, new_lhs);
> + gphi *new_phi = create_phi_node (lhs, other_edge->dest);
> 
> here you swap the defs, I'd omit that.  And instead essentially do
> replace_uses_by (lhs, new_lhs) here (without the folding and stuff).
> You have to clear SSA_NAME_OCCURS_IN_ABNORMAL_PHI on the old
> and set it on the new LHS if it was set.
> 
> I think that should contain the "SSA update" to
> edge_before_returns_twice_call for the case it splits the block?

I have considered that, but there are problems with that.

The most important is that in the most common case, there is no
block splitting nor any edge splitting, all we have is that
  # _99(ab) = PHI <_7(6), _8(ab)(11)>
  _22 = bar (*_99(ab));
or similar.  And we can do several insertions of statements or sequences
before the bar call.  If it is not returns_twice or even not a call,
we want to use _99(ab) in there and insert just before the call/statement.
If it is returns_twice call, we want to replace the _99(ab) uses with
something that is actually usable on the 6->10 edge.
So, if we wanted to keep using the _99(ab) (wouldn't it be even wrong
that it is (ab)?), we'd need to essentially on every addition
add statements like
  _99(ab) = _7;
(for each PHI node except the virtual one) before the added statement or
statements (or add it only if used in the statement/sequence?) and
change the PHI to be
  # _100(ab) = PHI <_99(ab)(6), _8(ab)(11)>
and replace all uses:
  _22 = bar (*_100(ab));
Right now ubsan can insert something like 3 statements/sequences per
argument, so there can be thousands of them and the block could have
thousands of PHIs, so that is millions of useless SSA_NAME copies.

So, the intention of edge_before_returns_twice_call is just that
it in the common case just finds the non-EDGE_ABNORMAL edge if there is one,
if there isn't just one, it adjusts the IL such that there is just one.
And then the next step is to handle that case.

Jakub

Re: [PATCH v2] c++: Support target-specific nodes with streaming [PR98645,PR111224]

2024-03-12 Thread Patrick Palka

On Tue, 12 Mar 2024, Nathaniel Shead wrote:

> On Tue, Mar 12, 2024 at 11:11:40PM +1100, Nathaniel Shead wrote:
> > On Mon, Mar 11, 2024 at 10:36:06AM -0400, Patrick Palka wrote:
> > > On Sun, 10 Mar 2024, Nathaniel Shead wrote:
> > > 
> > > > Bootstrapped and regtested on x86_64-pc-linux-gnu and
> > > > aarch64-unknown-linux-gnu, OK for trunk?
> > > > 
> > > > It's worth noting that the AArch64 machines I had available to test with
> > > > didn't have a new enough glibc to reproduce the ICEs in the PR, but this
> > > > patch will be necessary (albeit possibly not sufficient) to fix it.
> > > > 
> > > > -- >8 --
> > > > 
> > > > Some targets make use of POLY_INT_CSTs and other custom builtin types,
> > > > which currently violate some assumptions when streaming. This patch adds
> > > > support for them, specifically AArch64 SVE types like __fp16.
> > > 
> > > It seems other built-in types are handled by adding them to the
> > > fixed_trees vector in init_modules (and then we install them first
> > > during streaming).  Could we just add all the target-specific types to
> > > fixed_trees too?
> > > 
> > 
> > Yes, that works too. Seems cleaner as well, though I had to add it as a
> > separate loop because the set of builtin types registered is not
> > determined until runtiem (depending on e.g. ABI flags). I also noticed
> > that this fixes another PR, on PowerPC, so I've added a test for it.
> > Thanks!
> > 
> > Bootstrapped and regtested on x86_64-pc-linux-gnu,
> > aarch64-unknown-linux-gnu, and powerpc64le-unknown-linux-gnu;
> > OK for trunk?
> > 
> > -- >8 --
> > 
> > Some targets make use of POLY_INT_CSTs and other custom builtin types,
> > which currently violate some assumptions when streaming. This patch adds
> > support for them, such as types like Aarch64 __fp16, PowerPC __ibm128,
> > and vector types thereof.
> > 
> > This patch doesn't provide "full" support of AArch64 SVE, however, since
> > for that we would need to support 'target' nodes (tracked in PR108080).
> > 
> > Adding the new builtin types means that on Aarch64 we now have 217
> > global trees created on initialisation (up from 191), so this patch also
> > slightly bumps the initial size of the fixed_trees allocation to 250.
> > 
> > PR c++/98645
> > PR c++/111224
> > 
> > gcc/cp/ChangeLog:
> > 
> > * module.cc (enum tree_tag): Add new tag for builtin types.
> > (trees_out::start): POLY_INT_CSTs can be emitted.
> > (trees_in::start): Likewise.
> > (trees_out::core_vals): Stream POLY_INT_CSTs.
> > (trees_in::core_vals): Likewise.
> > (trees_out::type_node): Handle vectors with multiple coeffs.
> > (trees_in::tree_node): Likewise.
> > (init_modules): Register target-specific builtin types. Bump
> > initial capacity slightly.
> > 
> > gcc/testsuite/ChangeLog:
> > 
> > * g++.dg/modules/target-aarch64-1_a.C: New test.
> > * g++.dg/modules/target-aarch64-1_b.C: New test.
> > * g++.dg/modules/target-powerpc-1_a.C: New test.
> > * g++.dg/modules/target-powerpc-1_b.C: New test.
> > 
> > Signed-off-by: Nathaniel Shead 
> > Reviewed-by: Patrick Palka 
> > ---
> >  gcc/cp/module.cc  | 32 +--
> >  .../g++.dg/modules/target-aarch64-1_a.C   | 17 ++
> >  .../g++.dg/modules/target-aarch64-1_b.C   | 13 
> >  .../g++.dg/modules/target-powerpc-1_a.C   |  7 
> >  .../g++.dg/modules/target-powerpc-1_b.C   | 10 ++
> >  5 files changed, 69 insertions(+), 10 deletions(-)
> >  create mode 100644 gcc/testsuite/g++.dg/modules/target-aarch64-1_a.C
> >  create mode 100644 gcc/testsuite/g++.dg/modules/target-aarch64-1_b.C
> >  create mode 100644 gcc/testsuite/g++.dg/modules/target-powerpc-1_a.C
> >  create mode 100644 gcc/testsuite/g++.dg/modules/target-powerpc-1_b.C
> > 
> > diff --git a/gcc/cp/module.cc b/gcc/cp/module.cc
> > index 99055523d91..8aab9ea0bae 100644
> > --- a/gcc/cp/module.cc
> > +++ b/gcc/cp/module.cc
> > @@ -5173,7 +5173,6 @@ trees_out::start (tree t, bool code_streamed)
> >break;
> >  
> >  case FIXED_CST:
> > -case POLY_INT_CST:
> >gcc_unreachable (); /* Not supported in C++.  */
> >break;
> >  
> > @@ -5259,7 +5258,6 @@ trees_in::start (unsigned code)
> >  
> >  case FIXED_CST:
> >  case IDENTIFIER_NODE:
> > -case POLY_INT_CST:
> >  case SSA_NAME:
> >  case TARGET_MEM_REF:
> >  case TRANSLATION_UNIT_DECL:
> > @@ -6106,7 +6104,10 @@ trees_out::core_vals (tree t)
> >break;
> >  
> >  case POLY_INT_CST:
> > -  gcc_unreachable (); /* Not supported in C++.  */
> > +  if (streaming_p ())
> > +   for (unsigned ix = 0; ix != NUM_POLY_INT_COEFFS; ix++)
> > + WT (POLY_INT_CST_COEFF (t, ix));
> > +  break;
> >  
> >  case REAL_CST:
> >if (streaming_p ())
> > @@ -6615,8 +6616,9 @@ trees_in::core_vals (tree t)
> >break;
> >  
> >  case POLY_INT_CST:
> > -  /* Not suported in C++.  */
> > -  return

Re: [PATCH] gimple-iterator, ubsan: Fix ICE during instrumentation of returns_twice calls [PR112709]

2024-03-12 Thread Richard Biener

On Tue, 12 Mar 2024, Jakub Jelinek wrote:

> On Tue, Mar 12, 2024 at 11:42:03AM +0100, Richard Biener wrote:
> > > +static edge
> > > +edge_before_returns_twice_call (basic_block bb)
> > > +{
> > > +  gimple_stmt_iterator gsi = gsi_start_nondebug_bb (bb);
> > > +  gcc_checking_assert (is_gimple_call (gsi_stmt (gsi))
> > > +&& (gimple_call_flags (gsi_stmt (gsi))
> > > +& ECF_RETURNS_TWICE) != 0);
> > > +  edge_iterator ei;
> > > +  edge e, ad_edge = NULL, other_edge = NULL;
> > > +  bool split = false;
> > > +  /* Look for a fallthru and possibly a single backedge.  */
> > 
> > I can't see the backedge handling (I'm not sure it's actually required?)
> 
> Oops, that comment line just shouldn't be there.  Grepping around, I've
> pasted it from tree-ssa-uninit.cc (warn_uninit_phi_uses) - just wanted
> to grab something with the edge_iterator, edge declaration and
> FOR_EACH_EDGE on bb->preds) and forgot to remove that comment line.
> 
> Consider it removed.
> 
> > > +  FOR_EACH_EDGE (e, ei, bb->preds)
> > > +{
> > > +  if ((e->flags & (EDGE_ABNORMAL | EDGE_EH)) == EDGE_ABNORMAL)
> > 
> > EDGE_EH is special because of the landing pad?
> 
> Yes, though I'd think a landing pad would need to start with a label.
> Anyway, the (x & (EDGE_ABNORMAL | EDGE_EH)) == EDGE_ABNORMAL
> test is what is tested for ABNORMAL_DISPATCHER in other places:
> tree-cfgcleanup.cc- if ((ei_edge (ei)->flags
> tree-cfgcleanup.cc:  & (EDGE_ABNORMAL | EDGE_EH)) == EDGE_ABNORMAL)
> tree-cfgcleanup.cc-   {
> tree-cfgcleanup.cc- gimple_stmt_iterator gsi
> tree-cfgcleanup.cc-   = gsi_start_nondebug_after_labels_bb (dest);
> tree-cfgcleanup.cc- gimple *g = gsi_stmt (gsi);
> tree-cfgcleanup.cc- if (g && gimple_call_internal_p (g, 
> IFN_ABNORMAL_DISPATCHER))
> tree-cfgcleanup.cc-   abnormal_dispatchers.safe_push (dest);
> (that is where I copied it from),
> tree-cfg.cc-  FOR_EACH_EDGE (e, ei, bb->succs)
> tree-cfg.cc:if ((e->flags & (EDGE_ABNORMAL | EDGE_EH)) == EDGE_ABNORMAL)
> tree-cfg.cc-  {
> tree-cfg.cc-  gimple_stmt_iterator gsi
> tree-cfg.cc-= gsi_start_nondebug_after_labels_bb (e->dest);
> tree-cfg.cc-  gimple *g = gsi_stmt (gsi);
> tree-cfg.cc-  if (g && gimple_call_internal_p (g, IFN_ABNORMAL_DISPATCHER))
> tree-cfg.cc-return e->dest;
> Though, admittedly, git blame shows both were written by myself, so it
> doesn't count much.  The edge from IFN_ABNORMAL_DISPATCHER has EDGE_ABNORMAL
> flag set:
>   make_edge (*dispatcher, for_bb, EDGE_ABNORMAL);
> and I bet some EDGE_* flags can be set on that edge later on, but EDGE_EH
> certainly shouldn't be.
> Bet in both of those preexisting cases and in this new one it could be done
> just as flags & EDGE_ABNORMAL test, though perhaps the tree-cfgcleanup.cc
> and tree-cfg.cc cases can see there EDGE_EH edges and would query the first
> non-debug stmt in them uselessly, while perhaps among the returns_twice
> predecessor EDGE_EH is very unlikely and not worth testing.
> So, if you want, I can change that line to
>   if ((e->flags & EDGE_ABNORMAL) != 0)
> 
> > 
> > > + {
> > > +   gimple_stmt_iterator gsi
> > > + = gsi_start_nondebug_after_labels_bb (e->src);
> > > +   gimple *ad = gsi_stmt (gsi);
> > > +   if (ad && gimple_call_internal_p (ad, IFN_ABNORMAL_DISPATCHER))
> > > + {
> > > +   gcc_checking_assert (ad_edge == NULL);
> > > +   ad_edge = e;
> > > +   continue;
> > > + }
> > > + }
> > > +  if (other_edge || e->flags & (EDGE_ABNORMAL | EDGE_EH))
> > > + split = true;
> > > +  other_edge = e;
> > > +}
> > > +  gcc_checking_assert (ad_edge);
> > > +  if (other_edge == NULL)
> > > +split = true;
> > > +  if (split)
> > > +{
> > > +  other_edge = split_block_after_labels (bb);
> > > +  e = make_edge (ad_edge->src, other_edge->dest, EDGE_ABNORMAL);
> > 
> > In theory there could be multiple abnormal edges.
> 
> Sure, but a function has at most one .ABNORMAL_DISPATCHER block.
> Edge from that block is the only one that needs to be kept pointing to
> the returns_twice function.  In the testcases I've even tried to
> get other abnormal edges there, but because of labels we actually have
> labels on the block that falls through to the returns_twice block.
> I bet for EH edges it is the same.
> 
> >  Did you try
> > to use make_forwarder_block instead of manually doing it?
> 
> Looking at it, it doesn't a lot of unnecessary work.  We don't need
> to do any redirection of anything but moving the .ABNORMAL_DISPATCHER
> edge after splitting the block.  And indeed, it would fail on redirection
> of the EDGE_ABNORMAL edge.

I'm mostly concerned about maintainability here, but let's leave that
aside because of the edge redirection that doesn't work (I don't
see why we couldn't just make it work, but ...)

> >  Or
> > does that fail because we end up redirecting the abnormal edges
> > which you avoid by re-making it new and scrapping the

Re: [PATCH v2] c++: Support target-specific nodes with streaming [PR98645,PR111224]

2024-03-12 Thread Nathaniel Shead

On Tue, Mar 12, 2024 at 11:11:40PM +1100, Nathaniel Shead wrote:
> On Mon, Mar 11, 2024 at 10:36:06AM -0400, Patrick Palka wrote:
> > On Sun, 10 Mar 2024, Nathaniel Shead wrote:
> > 
> > > Bootstrapped and regtested on x86_64-pc-linux-gnu and
> > > aarch64-unknown-linux-gnu, OK for trunk?
> > > 
> > > It's worth noting that the AArch64 machines I had available to test with
> > > didn't have a new enough glibc to reproduce the ICEs in the PR, but this
> > > patch will be necessary (albeit possibly not sufficient) to fix it.
> > > 
> > > -- >8 --
> > > 
> > > Some targets make use of POLY_INT_CSTs and other custom builtin types,
> > > which currently violate some assumptions when streaming. This patch adds
> > > support for them, specifically AArch64 SVE types like __fp16.
> > 
> > It seems other built-in types are handled by adding them to the
> > fixed_trees vector in init_modules (and then we install them first
> > during streaming).  Could we just add all the target-specific types to
> > fixed_trees too?
> > 
> 
> Yes, that works too. Seems cleaner as well, though I had to add it as a
> separate loop because the set of builtin types registered is not
> determined until runtiem (depending on e.g. ABI flags). I also noticed
> that this fixes another PR, on PowerPC, so I've added a test for it.
> Thanks!
> 
> Bootstrapped and regtested on x86_64-pc-linux-gnu,
> aarch64-unknown-linux-gnu, and powerpc64le-unknown-linux-gnu;
> OK for trunk?
> 
> -- >8 --
> 
> Some targets make use of POLY_INT_CSTs and other custom builtin types,
> which currently violate some assumptions when streaming. This patch adds
> support for them, such as types like Aarch64 __fp16, PowerPC __ibm128,
> and vector types thereof.
> 
> This patch doesn't provide "full" support of AArch64 SVE, however, since
> for that we would need to support 'target' nodes (tracked in PR108080).
> 
> Adding the new builtin types means that on Aarch64 we now have 217
> global trees created on initialisation (up from 191), so this patch also
> slightly bumps the initial size of the fixed_trees allocation to 250.
> 
>   PR c++/98645
>   PR c++/111224
> 
> gcc/cp/ChangeLog:
> 
>   * module.cc (enum tree_tag): Add new tag for builtin types.
>   (trees_out::start): POLY_INT_CSTs can be emitted.
>   (trees_in::start): Likewise.
>   (trees_out::core_vals): Stream POLY_INT_CSTs.
>   (trees_in::core_vals): Likewise.
>   (trees_out::type_node): Handle vectors with multiple coeffs.
>   (trees_in::tree_node): Likewise.
>   (init_modules): Register target-specific builtin types. Bump
>   initial capacity slightly.
> 
> gcc/testsuite/ChangeLog:
> 
>   * g++.dg/modules/target-aarch64-1_a.C: New test.
>   * g++.dg/modules/target-aarch64-1_b.C: New test.
>   * g++.dg/modules/target-powerpc-1_a.C: New test.
>   * g++.dg/modules/target-powerpc-1_b.C: New test.
> 
> Signed-off-by: Nathaniel Shead 
> Reviewed-by: Patrick Palka 
> ---
>  gcc/cp/module.cc  | 32 +--
>  .../g++.dg/modules/target-aarch64-1_a.C   | 17 ++
>  .../g++.dg/modules/target-aarch64-1_b.C   | 13 
>  .../g++.dg/modules/target-powerpc-1_a.C   |  7 
>  .../g++.dg/modules/target-powerpc-1_b.C   | 10 ++
>  5 files changed, 69 insertions(+), 10 deletions(-)
>  create mode 100644 gcc/testsuite/g++.dg/modules/target-aarch64-1_a.C
>  create mode 100644 gcc/testsuite/g++.dg/modules/target-aarch64-1_b.C
>  create mode 100644 gcc/testsuite/g++.dg/modules/target-powerpc-1_a.C
>  create mode 100644 gcc/testsuite/g++.dg/modules/target-powerpc-1_b.C
> 
> diff --git a/gcc/cp/module.cc b/gcc/cp/module.cc
> index 99055523d91..8aab9ea0bae 100644
> --- a/gcc/cp/module.cc
> +++ b/gcc/cp/module.cc
> @@ -5173,7 +5173,6 @@ trees_out::start (tree t, bool code_streamed)
>break;
>  
>  case FIXED_CST:
> -case POLY_INT_CST:
>gcc_unreachable (); /* Not supported in C++.  */
>break;
>  
> @@ -5259,7 +5258,6 @@ trees_in::start (unsigned code)
>  
>  case FIXED_CST:
>  case IDENTIFIER_NODE:
> -case POLY_INT_CST:
>  case SSA_NAME:
>  case TARGET_MEM_REF:
>  case TRANSLATION_UNIT_DECL:
> @@ -6106,7 +6104,10 @@ trees_out::core_vals (tree t)
>break;
>  
>  case POLY_INT_CST:
> -  gcc_unreachable (); /* Not supported in C++.  */
> +  if (streaming_p ())
> + for (unsigned ix = 0; ix != NUM_POLY_INT_COEFFS; ix++)
> +   WT (POLY_INT_CST_COEFF (t, ix));
> +  break;
>  
>  case REAL_CST:
>if (streaming_p ())
> @@ -6615,8 +6616,9 @@ trees_in::core_vals (tree t)
>break;
>  
>  case POLY_INT_CST:
> -  /* Not suported in C++.  */
> -  return false;
> +  for (unsigned ix = 0; ix != NUM_POLY_INT_COEFFS; ix++)
> + RT (POLY_INT_CST_COEFF (t, ix));
> +  break;
>  
>  case REAL_CST:
>if (const void *bytes = buf (sizeof (real_value)))
> @@ -9068,8 +9070,8 @@

Re: [PATCH] gimple-iterator, ubsan: Fix ICE during instrumentation of returns_twice calls [PR112709]

2024-03-12 Thread Jakub Jelinek

On Tue, Mar 12, 2024 at 11:42:03AM +0100, Richard Biener wrote:
> > +static edge
> > +edge_before_returns_twice_call (basic_block bb)
> > +{
> > +  gimple_stmt_iterator gsi = gsi_start_nondebug_bb (bb);
> > +  gcc_checking_assert (is_gimple_call (gsi_stmt (gsi))
> > +  && (gimple_call_flags (gsi_stmt (gsi))
> > +  & ECF_RETURNS_TWICE) != 0);
> > +  edge_iterator ei;
> > +  edge e, ad_edge = NULL, other_edge = NULL;
> > +  bool split = false;
> > +  /* Look for a fallthru and possibly a single backedge.  */
> 
> I can't see the backedge handling (I'm not sure it's actually required?)

Oops, that comment line just shouldn't be there.  Grepping around, I've
pasted it from tree-ssa-uninit.cc (warn_uninit_phi_uses) - just wanted
to grab something with the edge_iterator, edge declaration and
FOR_EACH_EDGE on bb->preds) and forgot to remove that comment line.

Consider it removed.

> > +  FOR_EACH_EDGE (e, ei, bb->preds)
> > +{
> > +  if ((e->flags & (EDGE_ABNORMAL | EDGE_EH)) == EDGE_ABNORMAL)
> 
> EDGE_EH is special because of the landing pad?

Yes, though I'd think a landing pad would need to start with a label.
Anyway, the (x & (EDGE_ABNORMAL | EDGE_EH)) == EDGE_ABNORMAL
test is what is tested for ABNORMAL_DISPATCHER in other places:
tree-cfgcleanup.cc-   if ((ei_edge (ei)->flags
tree-cfgcleanup.cc:& (EDGE_ABNORMAL | EDGE_EH)) == EDGE_ABNORMAL)
tree-cfgcleanup.cc- {
tree-cfgcleanup.cc-   gimple_stmt_iterator gsi
tree-cfgcleanup.cc- = gsi_start_nondebug_after_labels_bb (dest);
tree-cfgcleanup.cc-   gimple *g = gsi_stmt (gsi);
tree-cfgcleanup.cc-   if (g && gimple_call_internal_p (g, 
IFN_ABNORMAL_DISPATCHER))
tree-cfgcleanup.cc- abnormal_dispatchers.safe_push (dest);
(that is where I copied it from),
tree-cfg.cc-  FOR_EACH_EDGE (e, ei, bb->succs)
tree-cfg.cc:if ((e->flags & (EDGE_ABNORMAL | EDGE_EH)) == EDGE_ABNORMAL)
tree-cfg.cc-  {
tree-cfg.cc-gimple_stmt_iterator gsi
tree-cfg.cc-  = gsi_start_nondebug_after_labels_bb (e->dest);
tree-cfg.cc-gimple *g = gsi_stmt (gsi);
tree-cfg.cc-if (g && gimple_call_internal_p (g, IFN_ABNORMAL_DISPATCHER))
tree-cfg.cc-  return e->dest;
Though, admittedly, git blame shows both were written by myself, so it
doesn't count much.  The edge from IFN_ABNORMAL_DISPATCHER has EDGE_ABNORMAL
flag set:
  make_edge (*dispatcher, for_bb, EDGE_ABNORMAL);
and I bet some EDGE_* flags can be set on that edge later on, but EDGE_EH
certainly shouldn't be.
Bet in both of those preexisting cases and in this new one it could be done
just as flags & EDGE_ABNORMAL test, though perhaps the tree-cfgcleanup.cc
and tree-cfg.cc cases can see there EDGE_EH edges and would query the first
non-debug stmt in them uselessly, while perhaps among the returns_twice
predecessor EDGE_EH is very unlikely and not worth testing.
So, if you want, I can change that line to
  if ((e->flags & EDGE_ABNORMAL) != 0)

> 
> > +   {
> > + gimple_stmt_iterator gsi
> > +   = gsi_start_nondebug_after_labels_bb (e->src);
> > + gimple *ad = gsi_stmt (gsi);
> > + if (ad && gimple_call_internal_p (ad, IFN_ABNORMAL_DISPATCHER))
> > +   {
> > + gcc_checking_assert (ad_edge == NULL);
> > + ad_edge = e;
> > + continue;
> > +   }
> > +   }
> > +  if (other_edge || e->flags & (EDGE_ABNORMAL | EDGE_EH))
> > +   split = true;
> > +  other_edge = e;
> > +}
> > +  gcc_checking_assert (ad_edge);
> > +  if (other_edge == NULL)
> > +split = true;
> > +  if (split)
> > +{
> > +  other_edge = split_block_after_labels (bb);
> > +  e = make_edge (ad_edge->src, other_edge->dest, EDGE_ABNORMAL);
> 
> In theory there could be multiple abnormal edges.

Sure, but a function has at most one .ABNORMAL_DISPATCHER block.
Edge from that block is the only one that needs to be kept pointing to
the returns_twice function.  In the testcases I've even tried to
get other abnormal edges there, but because of labels we actually have
labels on the block that falls through to the returns_twice block.
I bet for EH edges it is the same.

>  Did you try
> to use make_forwarder_block instead of manually doing it?

Looking at it, it doesn't a lot of unnecessary work.  We don't need
to do any redirection of anything but moving the .ABNORMAL_DISPATCHER
edge after splitting the block.  And indeed, it would fail on redirection
of the EDGE_ABNORMAL edge.

>  Or
> does that fail because we end up redirecting the abnormal edges
> which you avoid by re-making it new and scrapping the old one?
> I fail to remember why we can't redirect abnormal edges - yes,
> there's no flow to be redirected but the process of "redirection"
> still should work.

Seems the redirection always wants to adjust the src blocks of the
edges.  That isn't something that should be done for this particular
abnormal edge, that one actually can be redirected just by replacing it,
though for

[PATCH v2] c++: Support target-specific nodes with streaming [PR98645,PR111224]

2024-03-12 Thread Nathaniel Shead

On Mon, Mar 11, 2024 at 10:36:06AM -0400, Patrick Palka wrote:
> On Sun, 10 Mar 2024, Nathaniel Shead wrote:
> 
> > Bootstrapped and regtested on x86_64-pc-linux-gnu and
> > aarch64-unknown-linux-gnu, OK for trunk?
> > 
> > It's worth noting that the AArch64 machines I had available to test with
> > didn't have a new enough glibc to reproduce the ICEs in the PR, but this
> > patch will be necessary (albeit possibly not sufficient) to fix it.
> > 
> > -- >8 --
> > 
> > Some targets make use of POLY_INT_CSTs and other custom builtin types,
> > which currently violate some assumptions when streaming. This patch adds
> > support for them, specifically AArch64 SVE types like __fp16.
> 
> It seems other built-in types are handled by adding them to the
> fixed_trees vector in init_modules (and then we install them first
> during streaming).  Could we just add all the target-specific types to
> fixed_trees too?
> 

Yes, that works too. Seems cleaner as well, though I had to add it as a
separate loop because the set of builtin types registered is not
determined until runtiem (depending on e.g. ABI flags). I also noticed
that this fixes another PR, on PowerPC, so I've added a test for it.
Thanks!

Bootstrapped and regtested on x86_64-pc-linux-gnu,
aarch64-unknown-linux-gnu, and powerpc64le-unknown-linux-gnu;
OK for trunk?

-- >8 --

Some targets make use of POLY_INT_CSTs and other custom builtin types,
which currently violate some assumptions when streaming. This patch adds
support for them, such as types like Aarch64 __fp16, PowerPC __ibm128,
and vector types thereof.

This patch doesn't provide "full" support of AArch64 SVE, however, since
for that we would need to support 'target' nodes (tracked in PR108080).

Adding the new builtin types means that on Aarch64 we now have 217
global trees created on initialisation (up from 191), so this patch also
slightly bumps the initial size of the fixed_trees allocation to 250.

PR c++/98645
PR c++/111224

gcc/cp/ChangeLog:

* module.cc (enum tree_tag): Add new tag for builtin types.
(trees_out::start): POLY_INT_CSTs can be emitted.
(trees_in::start): Likewise.
(trees_out::core_vals): Stream POLY_INT_CSTs.
(trees_in::core_vals): Likewise.
(trees_out::type_node): Handle vectors with multiple coeffs.
(trees_in::tree_node): Likewise.
(init_modules): Register target-specific builtin types. Bump
initial capacity slightly.

gcc/testsuite/ChangeLog:

* g++.dg/modules/target-aarch64-1_a.C: New test.
* g++.dg/modules/target-aarch64-1_b.C: New test.
* g++.dg/modules/target-powerpc-1_a.C: New test.
* g++.dg/modules/target-powerpc-1_b.C: New test.

Signed-off-by: Nathaniel Shead 
Reviewed-by: Patrick Palka 
---
 gcc/cp/module.cc  | 32 +--
 .../g++.dg/modules/target-aarch64-1_a.C   | 17 ++
 .../g++.dg/modules/target-aarch64-1_b.C   | 13 
 .../g++.dg/modules/target-powerpc-1_a.C   |  7 
 .../g++.dg/modules/target-powerpc-1_b.C   | 10 ++
 5 files changed, 69 insertions(+), 10 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/modules/target-aarch64-1_a.C
 create mode 100644 gcc/testsuite/g++.dg/modules/target-aarch64-1_b.C
 create mode 100644 gcc/testsuite/g++.dg/modules/target-powerpc-1_a.C
 create mode 100644 gcc/testsuite/g++.dg/modules/target-powerpc-1_b.C

diff --git a/gcc/cp/module.cc b/gcc/cp/module.cc
index 99055523d91..8aab9ea0bae 100644
--- a/gcc/cp/module.cc
+++ b/gcc/cp/module.cc
@@ -5173,7 +5173,6 @@ trees_out::start (tree t, bool code_streamed)
   break;

 case FIXED_CST:
-case POLY_INT_CST:
   gcc_unreachable (); /* Not supported in C++.  */
   break;

@@ -5259,7 +5258,6 @@ trees_in::start (unsigned code)

 case FIXED_CST:
 case IDENTIFIER_NODE:
-case POLY_INT_CST:
 case SSA_NAME:
 case TARGET_MEM_REF:
 case TRANSLATION_UNIT_DECL:
@@ -6106,7 +6104,10 @@ trees_out::core_vals (tree t)
   break;

 case POLY_INT_CST:
-  gcc_unreachable (); /* Not supported in C++.  */
+  if (streaming_p ())
+   for (unsigned ix = 0; ix != NUM_POLY_INT_COEFFS; ix++)
+ WT (POLY_INT_CST_COEFF (t, ix));
+  break;

 case REAL_CST:
   if (streaming_p ())
@@ -6615,8 +6616,9 @@ trees_in::core_vals (tree t)
   break;

 case POLY_INT_CST:
-  /* Not suported in C++.  */
-  return false;
+  for (unsigned ix = 0; ix != NUM_POLY_INT_COEFFS; ix++)
+   RT (POLY_INT_CST_COEFF (t, ix));
+  break;

 case REAL_CST:
   if (const void *bytes = buf (sizeof (real_value)))
@@ -9068,8 +9070,8 @@ trees_out::type_node (tree type)
   if (streaming_p ())
{
  poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (type);
- /* to_constant asserts that only coeff[0] is of interest.  */
- wu (static_cast (nunits.to_constant ()));
+ for (unsigned ix = 0; ix !=

[PATCH] sanitizer: [PR110027] Align asan_vec[0] to MAX (alignb, ASAN_RED_ZONE_SIZE)

2024-03-12 Thread liuhongt

if alignb > ASAN_RED_ZONE_SIZE and offset[0] is not multiple of
alignb. (base_align_bias - base_offset) may not aligned to alignb, and
caused segement fault.

Bootstrapped and regtested on x86_64-linux-gnu{-m32,}.
Ok for trunk and backport to GCC13?

gcc/ChangeLog:

PR sanitizer/110027
* cfgexpand.cc (expand_stack_vars): Align frame offset to
MAX (alignb, ASAN_RED_ZONE_SIZE).

gcc/testsuite/ChangeLog:

* g++.dg/asan/pr110027.C: New test.
---
 gcc/cfgexpand.cc |  2 +-
 gcc/testsuite/g++.dg/asan/pr110027.C | 20 
 2 files changed, 21 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/g++.dg/asan/pr110027.C

diff --git a/gcc/cfgexpand.cc b/gcc/cfgexpand.cc
index 0de299c62e3..92062378d8e 100644
--- a/gcc/cfgexpand.cc
+++ b/gcc/cfgexpand.cc
@@ -1214,7 +1214,7 @@ expand_stack_vars (bool (*pred) (size_t), class 
stack_vars_data *data)
{
  if (data->asan_vec.is_empty ())
{
- align_frame_offset (ASAN_RED_ZONE_SIZE);
+ align_frame_offset (MAX (alignb, ASAN_RED_ZONE_SIZE));
  prev_offset = frame_offset.to_constant ();
}
  prev_offset = align_base (prev_offset,
diff --git a/gcc/testsuite/g++.dg/asan/pr110027.C 
b/gcc/testsuite/g++.dg/asan/pr110027.C
new file mode 100644
index 000..0067781bc89
--- /dev/null
+++ b/gcc/testsuite/g++.dg/asan/pr110027.C
@@ -0,0 +1,20 @@
+/* PR sanitizer/110027 */
+/* { dg-do run } */
+/* { dg-require-effective-target avx512f_runtime } */
+/* { dg-options "-std=gnu++23 -mavx512f -fsanitize=address -O0 -g 
-fstack-protector-strong" } */
+
+#include 
+#include 
+
+template 
+using Vec [[gnu::vector_size(W * sizeof(T))]] = T;
+
+auto foo() {
+  Vec<8, int64_t> ret{};
+  return ret;
+}
+
+int main() {
+  foo();
+  return 0;
+}
-- 
2.31.1

Re: [Patch] OpenMP/Fortran: Fix defaultmap(none) issue with dummy procedures [PR114283]

2024-03-12 Thread Tobias Burnus


Jakub Jelinek wrote:


So firstprivate clause handling remaps them then if declare target indirect
is used? If so, the patch looks reasonable to me.


[I have now updated the patch to turn the testcase to ensure
that is also keeps works at runtime.]

OpenMP leaves it a bit open when the remapping has to happen,
but one can construct cases – in particular with unified-shared memory –
where it is not possible to do this upon entry to a target region.

Thus, it has to be done when the function is invoked, e.g.

i = (*g) ();

is turned (in the target region but only on the device side) into

i = (*GOMP_target_map_indirect_ptr (g)) ();

Thus, as long as the host pointer value is transferred to the device,
it works – as the lookup is done on the device side. Directly using a
device address (remap when mapping to the target) will also not shorten
the lookup, i.e. there is no need for it.

Does it still look reasonable to you?

Tobias

PS: The current OpenMP specification, it is listed mainly described via
the glossary (newest change is the addition of dummy procedure):

"indirect device invocation – An indirect call to the _device_ version of 
a _procedure_ on a _device_ other than the _host-device_, through a 
function pointer (C/C++), a pointer to a member function (C++), a dummy 
procedure (Fortran), or a procedure pointer (Fortran) that refers to the 
host version of the _procedure_."
OpenMP/Fortran: Fix defaultmap(none) issue with dummy procedures [PR114283]

Dummy procedures look similar to variables but aren't - neither in Fortran
nor in OpenMP. As the middle end sees PARM_DECLs, mark them as predetermined
firstprivate for mapping (as already done in gfc_omp_predetermined_sharing).

This does not address the isses related to procedure pointers, which are
still discussed on spec level [see PR].

	PR fortran/114283

gcc/fortran/ChangeLog:

	* trans-openmp.cc (gfc_omp_predetermined_mapping): Map dummy
	procedures as firstprivate.

libgomp/ChangeLog:

	* testsuite/libgomp.fortran/declare-target-indirect-4.f90: New test.

 gcc/fortran/trans-openmp.cc|  9 +
 .../libgomp.fortran/declare-target-indirect-4.f90  | 43 ++
 2 files changed, 52 insertions(+)

diff --git a/gcc/fortran/trans-openmp.cc b/gcc/fortran/trans-openmp.cc
index a2bf15665b3..1dba47126ed 100644
--- a/gcc/fortran/trans-openmp.cc
+++ b/gcc/fortran/trans-openmp.cc
@@ -343,6 +343,15 @@ gfc_omp_predetermined_mapping (tree decl)
 	&& GFC_DECL_SAVED_DESCRIPTOR (decl)))
 return OMP_CLAUSE_DEFAULTMAP_TO;
 
+  /* Dummy procedures aren't considered variables by OpenMP, thus are
+ disallowed in OpenMP clauses.  They are represented as PARM_DECLs
+ in the middle-end, so return OMP_CLAUSE_DEFAULTMAP_FIRSTPRIVATE here
+ to avoid complaining about their uses with defaultmap(none).  */
+  if (TREE_CODE (decl) == PARM_DECL
+  && TREE_CODE (TREE_TYPE (decl)) == POINTER_TYPE
+  && TREE_CODE (TREE_TYPE (TREE_TYPE (decl))) == FUNCTION_TYPE)
+return OMP_CLAUSE_DEFAULTMAP_FIRSTPRIVATE;
+
   /* These are either array or derived parameters, or vtables.  */
   if (VAR_P (decl) && TREE_READONLY (decl)
   && (TREE_STATIC (decl) || DECL_EXTERNAL (decl)))
diff --git a/libgomp/testsuite/libgomp.fortran/declare-target-indirect-4.f90 b/libgomp/testsuite/libgomp.fortran/declare-target-indirect-4.f90
new file mode 100644
index 000..43f4295494c
--- /dev/null
+++ b/libgomp/testsuite/libgomp.fortran/declare-target-indirect-4.f90
@@ -0,0 +1,43 @@
+! { dg-additional-options "-fdump-tree-gimple" }
+
+! PR fortran/114283
+
+! { dg-final { scan-tree-dump "#pragma omp parallel shared\\(i\\) if\\(0\\) default\\(none\\) firstprivate\\(g\\)" "gimple" } }
+! { dg-final { scan-tree-dump "#pragma omp target num_teams\\(-2\\) thread_limit\\(0\\) firstprivate\\(h\\) map\\(from:j \\\[len: 4\\\]\\) defaultmap\\(none\\)" "gimple" } }
+
+
+module m
+  implicit none (type, external)
+  !$omp declare target indirect enter(f1, f2)
+contains
+  integer function f1 ()
+f1 = 99
+  end
+  integer function f2 ()
+f2 = 89
+  end
+end module m
+
+use m
+implicit none (type, external)
+call sub1(f1)
+call sub2(f2)
+contains
+  subroutine sub1(g)
+procedure(integer) :: g
+integer :: i
+!$omp parallel default(none) if(.false.) shared(i)
+  i = g ()
+!$omp end parallel
+if (i /= 99) stop 1
+  end
+
+  subroutine sub2(h)
+procedure(integer) :: h
+integer :: j
+!$omp target defaultmap(none) map(from:j)
+  j = h ()
+!$omp end target
+if (j /= 89) stop 1
+  end
+end

RE: [PATCH] [X86_64]: Enable support for next generation AMD Zen5 CPU with znver5 scheduler Model

2024-03-12 Thread Kumar, Venkataramanan

[Public]

Hi Honza,

> -Original Message-
> From: Jan Hubicka 
> Sent: Tuesday, March 12, 2024 4:11 AM
> To: Anbazhagan, Karthiban 
> Cc: gcc-patches@gcc.gnu.org; Kumar, Venkataramanan
> ; Joshi, Tejas Sanjay
> ; Nagarajan, Muthu kumar raj
> ; Gopalasubramanian, Ganesh
> 
> Subject: Re: [PATCH] [X86_64]: Enable support for next generation AMD Zen5
> CPU with znver5 scheduler Model
>
> Caution: This message originated from an External Source. Use proper caution
> when opening attachments, clicking links, or responding.
>
>
> > [Public]
> >
> >
> > Hi all,
> >
> >
> >
> > PFA, the patch that enables support for the next generation AMD Zen5 CPU 
> > via -
> march=znver5 with basic znver5 scheduler Model.
> >
> > We may update the scheduler model going forward.
> >
> >
> >
> > Good for trunk?
> >
> > Thanks and Regards
> > Karthiban
> >
> >
> > Patch is inline here.
> > From 6230938c1420604c8d0af27b0d080970d9b54ac5 Mon Sep 17 00:00:00
> 2001
> > From: karthiban
> karthiban.anbazha...@amd.com
> > Date: Fri, 9 Feb 2024 15:03:09 +0530
> > Subject: [PATCH] Add AMD znver5 processor enablement with scheduler model
> >
> > gcc/ChangeLog:
> > * common/config/i386/cpuinfo.h (get_amd_cpu): Recognize znver5.
> > * common/config/i386/i386-common.cc (processor_names): Add znver5.
> > (processor_alias_table): Likewise.
> > * common/config/i386/i386-cpuinfo.h (processor_types): Add new zen
> > family.
> > (processor_subtypes): Add znver5.
> > * config.gcc (x86_64-*-* |...): Likewise.
> > * config/i386/driver-i386.cc (host_detect_local_cpu): Let
> > march=native detect znver5 cpu's.
> > * config/i386/i386-c.cc (ix86_target_macros_internal): Add znver5.
> > * config/i386/i386-options.cc (m_ZNVER5): New definition
> > (processor_cost_table): Add znver5.
> > * config/i386/i386.cc (ix86_reassociation_width): Likewise.
> > * config/i386/i386.h (processor_type): Add PROCESSOR_ZNVER5
> > (PTA_ZNVER5): New definition.
> > * config/i386/i386.md (define_attr "cpu"): Add znver5.
> > (Scheduling descriptions) Add znver5.md.
> > * config/i386/x86-tune-costs.h (znver5_cost): New definition.
> > * config/i386/x86-tune-sched.cc (ix86_issue_rate): Add znver5.
> > (ix86_adjust_cost): Likewise.
> > * config/i386/x86-tune.def (avx512_move_by_pieces): Add m_ZNVER5.
> > (avx512_store_by_pieces): Add m_ZNVER5.
> > * doc/extend.texi: Add znver5.
> > * doc/invoke.texi: Likewise.
> > * config/i386/znver5.md: New.
> >
> > gcc/testsuite/ChangeLog:
> > * g++.target/i386/mv29.C: Handle znver5 arch.
> > * gcc.target/i386/funcspec-56.inc:Likewise.
>
> Hi,
> I went through the scheduler description and found some places that can
> be commonized.  Most frequently it is the vector path instruction which
> blocks all execution cores so patterns can be shared between znver3 and
> 5 (blocking the new cores for znver3 does not change anything since they
> are not used anyway).  The insn automata growth is now about 5% which I
> hope is acceptable.  I tried the completely separate model and it was
> abour 7%.
>
> I plan to commit the patch tomorrow if htere are no further ideas for
> improvement.

Thank you for working on this.  The patch looks good.

Regards,
Venkat.

>
> Honza
>
> diff --git a/gcc/common/config/i386/cpuinfo.h
> b/gcc/common/config/i386/cpuinfo.h
> index a595ee537a8..017a952a5db 100644
> --- a/gcc/common/config/i386/cpuinfo.h
> +++ b/gcc/common/config/i386/cpuinfo.h
> @@ -310,6 +310,22 @@ get_amd_cpu (struct __processor_model *cpu_model,
>   cpu_model->__cpu_subtype = AMDFAM19H_ZNVER3;
> }
>break;
> +case 0x1a:
> +  cpu_model->__cpu_type = AMDFAM1AH;
> +  if (model <= 0x77)
> +   {
> + cpu = "znver5";
> + CHECK___builtin_cpu_is ("znver5");
> + cpu_model->__cpu_subtype = AMDFAM1AH_ZNVER5;
> +   }
> +  else if (has_cpu_feature (cpu_model, cpu_features2,
> +   FEATURE_AVX512VP2INTERSECT))
> +   {
> + cpu = "znver5";
> + CHECK___builtin_cpu_is ("znver5");
> + cpu_model->__cpu_subtype = AMDFAM1AH_ZNVER5;
> +   }
> +  break;
>  default:
>break;
>  }
> diff --git a/gcc/common/config/i386/i386-common.cc
> b/gcc/common/config/i386/i386-common.cc
> index c35191e6925..f814df8385b 100644
> --- a/gcc/common/config/i386/i386-common.cc
> +++ b/gcc/common/config/i386/i386-common.cc
> @@ -2166,7 +2166,8 @@ const char *const processor_names[] =
>"znver1",
>"znver2",
>"znver3",
> -  "znver4"
> +  "znver4",
> +  "znver5"
>  };
>
>  /* Guarantee that the array is aligned with enum processor_type.  */
> @@ -2435,6 +2436,9 @@ const pta processor_alias_table[] =
>{"znver4", PROCESSOR_ZNVER4, CPU_ZNVER4,
>  PTA_ZNVER4,
>  M_CPU_SUBTYPE

Re: [PATCH] gimple-iterator, ubsan: Fix ICE during instrumentation of returns_twice calls [PR112709]

2024-03-12 Thread Richard Biener

On Tue, 12 Mar 2024, Jakub Jelinek wrote:

> Hi!
> 
> ubsan, asan (both PR112709) and _BitInt lowering (PR113466) want to
> insert some instrumentation or adjustment statements before some statement.
> This unfortunately creates invalid IL if inserting before a returns_twice
> call, because we require that such calls are the first statement in a basic
> block and that we have an edge from the .ABNORMAL_DISPATCHER block to
> the block containing the returns_twice call (in addition to other edge(s)).
> 
> The following patch adds helper functions for such insertions and uses it
> for now in ubsan (I'll post a follow up which uses it in asan and will
> work later on the _BitInt lowering PR).
> 
> In particular, if the bb with returns_twice call at the start has just
> 2 edges, one EDGE_ABNORMAL from .ABNORMAL_DISPATCHER and another
> (non-EDGE_ABNORMAL/EDGE_EH) from some other bb, it just inserts the
> statement or sequence on that other edge.
> If the bb has more predecessor edges or the one not from
> .ABNORMAL_DISPATCHER is e.g. an EH edge (this latter case likely shouldn't
> happen, one would need labels or something like that), the patch splits the
> block with returns_twice call such that there is just one edge next to
> .ABNORMAL_DISPATCHER edge and adjusts PHIs as needed to make it happen.
> The functions also replace uses of PHIs from the returns_twice bb with
> the corresponding PHI arguments, because otherwise it would be invalid IL.
> 
> E.g. in ubsan/pr112709-2.c (qux) we have before the ubsan pass
>:
>   # .MEM_5(ab) = PHI <.MEM_4(9), .MEM_25(ab)(11)>
>   # _7(ab) = PHI <_20(9), _8(ab)(11)>
>   # .MEM_21(ab) = VDEF <.MEM_5(ab)>
>   _22 = bar (*_7(ab));
> where bar is returns_twice call and bb 11 has .ABNORMAL_DISPATCHER call,
> this patch instruments it like:
>:
>   # .MEM_4 = PHI <.MEM_17(ab)(4), .MEM_10(D)(5), .MEM_14(ab)(8)>
>   # DEBUG BEGIN_STMT
>   # VUSE <.MEM_4>
>   _20 = p;
>   # .MEM_27 = VDEF <.MEM_4>
>   .UBSAN_NULL (_20, 0B, 0);
>   # VUSE <.MEM_27>
>   _2 = __builtin_dynamic_object_size (_20, 0);
>   # .MEM_28 = VDEF <.MEM_27>
>   .UBSAN_OBJECT_SIZE (_20, 1024, _2, 0);
>   
>:
>   # .MEM_5(ab) = PHI <.MEM_28(9), .MEM_25(ab)(11)>
>   # _7(ab) = PHI <_20(9), _8(ab)(11)>
>   # .MEM_21(ab) = VDEF <.MEM_5(ab)>
>   _22 = bar (*_7(ab));
> The edge from .ABNORMAL_DISPATCHER is there just to represent the
> returning for 2nd and later times, the instrumentation can't be
> done at that point as there is no code executed during that point.
> The ubsan/pr112709-1.c testcase includes non-virtual PHIs to cover
> the handling of those as well.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
> 
> 2024-03-12  Jakub Jelinek  
> 
>   PR sanitizer/112709
>   * gimple-iterator.h (gsi_insert_before_returns_twice_call,
>   gsi_insert_seq_before_returns_twice_call): Declare.
>   * gimple-iterator.cc: Include gimplify.h.
>   (edge_before_returns_twice_call, gsi_insert_before_returns_twice_call,
>   gsi_insert_seq_before_returns_twice_call): New functions.
>   * ubsan.cc (ubsan_insert_before, ubsan_insert_seq_before): New
>   functions.
>   (instrument_mem_ref, instrument_pointer_overflow,
>   instrument_nonnull_arg, instrument_nonnull_return): Use
>   ubsan_insert_before instead of gsi_insert_before.
>   (maybe_instrument_pointer_overflow): Use force_gimple_operand,
>   gimple_seq_add_seq_without_update and ubsan_insert_seq_before
>   instead of force_gimple_operand_gsi.
>   (instrument_object_size): Likewise.  Use ubsan_insert_before instead
>   of gsi_insert_before.
> 
>   * gcc.dg/ubsan/pr112709-1.c: New test.
>   * gcc.dg/ubsan/pr112709-2.c: New test.
> 
> --- gcc/gimple-iterator.h.jj  2024-02-08 11:17:37.144661383 +0100
> +++ gcc/gimple-iterator.h 2024-03-11 16:16:24.670464737 +0100
> @@ -93,6 +93,10 @@ extern void gsi_insert_on_edge (edge, gi
>  extern void gsi_insert_seq_on_edge (edge, gimple_seq);
>  extern basic_block gsi_insert_on_edge_immediate (edge, gimple *);
>  extern basic_block gsi_insert_seq_on_edge_immediate (edge, gimple_seq);
> +extern basic_block gsi_insert_before_returns_twice_call (basic_block,
> +  gimple *);
> +extern basic_block gsi_insert_seq_before_returns_twice_call (basic_block,
> +  gimple_seq);
>  extern void gsi_commit_edge_inserts (void);
>  extern void gsi_commit_one_edge_insert (edge, basic_block *);
>  extern gphi_iterator gsi_start_phis (basic_block);
> --- gcc/gimple-iterator.cc.jj 2024-02-08 11:17:37.144661383 +0100
> +++ gcc/gimple-iterator.cc2024-03-11 18:17:08.994804808 +0100
> @@ -32,6 +32,7 @@ along with GCC; see the file COPYING3.
>  #include "tree-cfg.h"
>  #include "tree-ssa.h"
>  #include "value-prof.h"
> +#include "gimplify.h"
>  
>  
>  /* Mark the statement STMT as modified, and update it.  */
> @@ -944,3 +945,135 @@ gsi_start_phis

[COMMITED] MAINTAINERS: Fix order in Write After Aproval

2024-03-12 Thread Filip Kastl

Hi,

I pushed this patch on Fri 8th and sent this mail to notify that I did so.
I had some trouble with sending the mail though and it didn't arrive to the
mailing list.  I'm sending it now instead.

Filip Kastl

-- 8< --

ChangeLog:

* MAINTAINERS: Fix order of names in Write After Aproval

Signed-off-by: Filip Kastl 
---
 MAINTAINERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index a681518d704..8f64ee630b4 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -448,9 +448,9 @@ Wei Guozhi  

 Vineet Gupta   
 Naveen H.S 
 Mostafa Hagog  
-Demin Han  
 Jivan Hakobyan 
 Andrew Haley   
+Demin Han  
 Frederik Harwath   
 Stuart Hastings
 Michael Haubenwallner  

-- 
2.43.0

Re: [PATCH] asan: Instrument stores in callees rather than callers [PR112709]

2024-03-12 Thread Richard Biener

On Tue, 12 Mar 2024, Jakub Jelinek wrote:

> Hi!
> 
> asan currently instruments since PR69276 r6-6758 fix calls which store
> the return value into memory on the caller side, before the call it
> verifies the memory is writable.
> Now PR112709 where we ICE on trying to instrument such calls made me
> think about whether that is what we want to do.
> 
> There are 3 different cases.
> 
> One is when a function returns an aggregate which is passed e.g. in
> registers, say like struct S { int a[4]; }; returning on x86_64.
> That would be ideally instrumented in between the actual call and
> storing of the aggregate into memory, but asan currently mostly
> works as a GIMPLE pass and arranging for the instrumentation to happen
> at that spot would be really hard.  We could diagnose after the call
> but generally asan attempts to diagnose stuff before something is
> overwritten rather than after, or keep the current behavior (that is
> what this patch does, which has the disadvantage that it can complain
> about UB even for functions which never return and so never actually store,
> and doesn't check whether the memory wasn't e.g. poisoned during the call)
> or could e.g. instrument both before and after the call (that would have
> the disadvantage the current state has but at least would check post-factum
> the store again afterwards).
> 
> Another case is when a function returns an aggregate through a hidden
> reference, struct T { int a[128]; }; on x86_64 or even the above struct S
> on ia32 as example.  In the actual program such stores happen when storing
> something to  or its parts in the callee, because  there
> expands to *hidden_retval.  So, IMHO we should instrument those in the
> callee rather than caller, that is where the writes are and we can do that
> easily.  This is what the patch below does.
> 
> And the last case is for builtins/internal functions.  Usually those don't
> return aggregates, but in case they'd do and can be expanded inline, it is
> better to instrument them in the caller (as before) rather than not
> instrumenting the return stores at all.
> 
> I had to tweak the expected output on the PR69276 testcase, because
> with the patch it keeps previous behavior on x86_64 (structure returned
> in registers, stored in the caller, so reported as UB in A::A()),
> while on i686 it changed the behavior and is reported as UB in the
> vnull::operator vec which stores the structure, A::A() is then a frame
> above it in the backtrace.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

OK.

Thanks,
Richard.

> 2024-03-12  Jakub Jelinek  
> 
>   PR sanitizer/112709
>   * asan.cc (has_stmt_been_instrumented_p): Don't instrument call
>   stores on the caller side unless it is a call to a builtin or
>   internal function or function doesn't return by hidden reference.
>   (maybe_instrument_call): Likewise.
>   (instrument_derefs): Instrument stores to RESULT_DECL if
>   returning by hidden reference.
> 
>   * gcc.dg/asan/pr112709-1.c: New test.
>   * g++.dg/asan/pr69276.C: Adjust expected output for some targets.
> 
> --- gcc/asan.cc.jj2024-03-06 09:35:04.132894608 +0100
> +++ gcc/asan.cc   2024-03-11 13:49:58.931045179 +0100
> @@ -1372,7 +1372,12 @@ has_stmt_been_instrumented_p (gimple *st
> return true;
>   }
>  }
> -  else if (is_gimple_call (stmt) && gimple_store_p (stmt))
> +  else if (is_gimple_call (stmt)
> +&& gimple_store_p (stmt)
> +&& (gimple_call_builtin_p (stmt)
> +|| gimple_call_internal_p (stmt)
> +|| !aggregate_value_p (TREE_TYPE (gimple_call_lhs (stmt)),
> +   gimple_call_fntype (stmt
>  {
>asan_mem_ref r;
>asan_mem_ref_init (, NULL, 1);
> @@ -2751,7 +2756,9 @@ instrument_derefs (gimple_stmt_iterator
>  return;
>  
>poly_int64 decl_size;
> -  if ((VAR_P (inner) || TREE_CODE (inner) == RESULT_DECL)
> +  if ((VAR_P (inner)
> +   || (TREE_CODE (inner) == RESULT_DECL
> +&& !aggregate_value_p (inner, current_function_decl)))
>&& offset == NULL_TREE
>&& DECL_SIZE (inner)
>&& poly_int_tree_p (DECL_SIZE (inner), _size)
> @@ -3023,7 +3030,11 @@ maybe_instrument_call (gimple_stmt_itera
>  }
>  
>bool instrumented = false;
> -  if (gimple_store_p (stmt))
> +  if (gimple_store_p (stmt)
> +  && (gimple_call_builtin_p (stmt)
> +   || gimple_call_internal_p (stmt)
> +   || !aggregate_value_p (TREE_TYPE (gimple_call_lhs (stmt)),
> +  gimple_call_fntype (stmt
>  {
>tree ref_expr = gimple_call_lhs (stmt);
>instrument_derefs (iter, ref_expr,
> --- gcc/testsuite/gcc.dg/asan/pr112709-1.c.jj 2024-03-11 13:59:15.300408140 
> +0100
> +++ gcc/testsuite/gcc.dg/asan/pr112709-1.c2024-03-11 13:59:58.626813417 
> +0100
> @@ -0,0 +1,52 @@
> +/* PR sanitizer/112709 */
> +/* { dg-do compile } */
> +/* { dg-options

Re: Patch ping Re: [PATCH] icf: Reset SSA_NAME_{PTR,RANGE}_INFO in successfully merged functions [PR113907]

2024-03-12 Thread Jan Hubicka

> Hi!
Hi,
> 
> On Thu, Feb 15, 2024 at 08:29:24AM +0100, Jakub Jelinek wrote:
> > 2024-02-15  Jakub Jelinek  
> > 
> > PR middle-end/113907
> > * ipa-icf.cc (sem_item_optimizer::merge_classes): Reset
> > SSA_NAME_RANGE_INFO and SSA_NAME_PTR_INFO on successfully ICF merged
> > functions.
> > 
> > * gcc.dg/pr113907.c: New test.
> 
> I'd like to ping the
> https://gcc.gnu.org/pipermail/gcc-patches/2024-February/645644.html
> patch.
> After looking at this PR again yesterday, I'm convinced we don't need
> either this patch or some other patch to deal with the jump functions,
> but both, this patch to clear (or other variant would be to union them)
> SSA_NAME_RANGE_INFO in the bodies of IPA-ICF merged functions, and another
> patch that would maybe in sem_function::merge go through all the
> callees cgraph edges and for each of them attempt to merge (for value
> range union) at least the value range/pointer alignment information from
> the corresponding cgraph edge from alias for all jump functions of all
> the arguments.
> 
> Bootstrapped/regtested again last night.

I am sorry for delaying this.  I made the variant that simply compares
value range of functions and prevents merging if they diverge and wanted
to make some bigger statistics.  This made me notice some performance
problems on clang performance and libstdc++ RB-trees which disrailed me
from the original PR.  I will finish the statistics today.

For next stage1 we definitly want to move ahead with merging metadata
(not only value ranges, but hopefully also TBAA).  Currently the only
thing we merge aggressively is the edge profile (and we have PR on that
merging functions which differs only but likely/unlikely attribute).
However for backportability and for this avoiding merging may be safer
solution depending on the stats.A

I was looking into ipa-vrp useability and there are several tests on
Firefox talos where ipa-prop VRP makes difference. It boils down to
propagating fact that parameter is alway snon-NULL.  This is commonly
tested in C++ casts.

Honza

[PATCH v4] LoongArch: Add support for TLS descriptors

2024-03-12 Thread mengqinggang

Add support for TLS descriptors on normal code model and extreme code model.

Normal code model instruction sequence:
  -mno-explicit-relocs:
la.tls.desc $r4, s
add.d   $r12, $r4, $r2
  -mexplicit-relocs:
pcalau12i   $r4,%desc_pc_hi20(s)
addi.d  $r4,$r4,%desc_pc_lo12(s)
ld.d$r1,$r4,%desc_ld(s)
jirl$r1,$r1,%desc_call(s)
add.d   $r12, $r4, $r2

Extreme code model instruction sequence:
  -mno-explicit-relocs:
la.tls.desc $r4, $r12, s
add.d   $r12, $r4, $r2
  -mexplicit-relocs:
pcalau12i   $r4,%desc_pc_hi20(s)
addi.d  $r12,$r0,%desc_pc_lo12(s)
lu32i.d $r12,%desc64_pc_lo20(s)
lu52i.d $r12,$r12,%desc64_pc_hi12(s)
add.d   $r4,$r4,$r12
ld.d$r1,$r4,%desc_ld(s)
jirl$r1,$r1,%desc_call(s)
add.d   $r12, $r4, $r2

The default is still traditional TLS model, but can be configured with
--with-tls={trad,desc}. The default can change to TLS descriptors once
libc and LLVM support this.

gcc/ChangeLog:

* config.gcc: Add --with-tls option to change TLS flavor.
* config/loongarch/genopts/loongarch.opt.in: Add -mtls-dialect to
configure TLS flavor.
* config/loongarch/loongarch-def.h (struct loongarch_target): Add
tls_dialect.
* config/loongarch/loongarch-driver.cc (la_driver_init): Add tls
flavor.
* config/loongarch/loongarch-opts.cc (loongarch_init_target): Add
tls_dialect.
(loongarch_config_target): Ditto.
(loongarch_update_gcc_opt_status): Ditto.
* config/loongarch/loongarch-opts.h (loongarch_init_target):Ditto.
(TARGET_TLS_DESC): New define.
* config/loongarch/loongarch.cc (loongarch_symbol_insns): Add TLS DESC
instructions sequence length.
(loongarch_legitimize_tls_address): New TLS DESC instruction sequence.
(loongarch_option_override_internal): Add la_opt_tls_dialect.
(loongarch_option_restore): Add la_target.tls_dialect.
* config/loongarch/loongarch.md (@got_load_tls_desc): Normal
code model for TLS DESC.
(got_load_tls_desc_off64): Extreme code model for TLS DESC.
* config/loongarch/loongarch.opt: Regenerated.

gcc/testsuite/ChangeLog:

* gcc.target/loongarch/cmodel-extreme-1.c: Add -mtls-dialect=trad.
* gcc.target/loongarch/cmodel-extreme-2.c: Ditto.
* gcc.target/loongarch/explicit-relocs-auto-tls-ld-gd.c: Ditto.
* gcc.target/loongarch/explicit-relocs-medium-call36-auto-tls-ld-gd.c:
Ditto.
* gcc.target/loongarch/func-call-medium-1.c: Ditto.
* gcc.target/loongarch/func-call-medium-2.c: Ditto.
* gcc.target/loongarch/func-call-medium-3.c: Ditto.
* gcc.target/loongarch/func-call-medium-4.c: Ditto.
* gcc.target/loongarch/tls-extreme-macro.c: Ditto.
* gcc.target/loongarch/tls-gd-noplt.c: Ditto.
* gcc.target/loongarch/explicit-relocs-auto-extreme-tls-desc.c: New 
test.
* gcc.target/loongarch/explicit-relocs-auto-tls-desc.c: New test.
* gcc.target/loongarch/explicit-relocs-extreme-tls-desc.c: New test.
* gcc.target/loongarch/explicit-relocs-tls-desc.c: New test.
---
Changes v3 -> v4:
- Add TLS descriptors test cases.

Changes v2 -> v3:
- Set default to traditional TLS model.
- Add support for -mexplicit-relocs and extreme code model.

Changes v1 -> v2:
- Clobber fcc0-fcc7 registers in got_load_tls_desc template.
- Support --with-tls in configure.

v3 link: https://sourceware.org/pipermail/gcc-patches/2024-March/647578.html
v2 link: https://sourceware.org/pipermail/gcc-patches/2024-February/646817.html
v1 link: https://sourceware.org/pipermail/gcc-patches/2023-December/638907.html

 gcc/config.gcc| 19 +-
 gcc/config/loongarch/genopts/loongarch.opt.in | 14 
 gcc/config/loongarch/loongarch-def.h  |  7 ++
 gcc/config/loongarch/loongarch-driver.cc  |  2 +-
 gcc/config/loongarch/loongarch-opts.cc| 12 +++-
 gcc/config/loongarch/loongarch-opts.h |  2 +
 gcc/config/loongarch/loongarch.cc | 48 +
 gcc/config/loongarch/loongarch.md | 68 +++
 gcc/config/loongarch/loongarch.opt| 14 
 .../gcc.target/loongarch/cmodel-extreme-1.c   |  2 +-
 .../gcc.target/loongarch/cmodel-extreme-2.c   |  2 +-
 .../explicit-relocs-auto-extreme-tls-desc.c   | 10 +++
 .../loongarch/explicit-relocs-auto-tls-desc.c | 10 +++
 .../explicit-relocs-auto-tls-ld-gd.c  |  2 +-
 .../explicit-relocs-extreme-tls-desc.c| 16 +
 ...icit-relocs-medium-call36-auto-tls-ld-gd.c |  2 +-
 .../loongarch/explicit-relocs-tls-desc.c  | 13 
 .../gcc.target/loongarch/func-call-medium-1.c |  2 +-
 .../gcc.target/loongarch/func-call-medium-2.c |  2 +-
 .../gcc.target/loongarch/func-call-medium-3.c |  2 +-
 .../gcc.target/loongarch/func-call-medium-4.c |  2 +-
 .../gcc.target/loongarch/tls-extreme-macro.c

[PATCH] asan: Fix ICE during instrumentation of returns_twice calls [PR112709]

2024-03-12 Thread Jakub Jelinek

Hi!

The following patch on top of the previously posted ubsan/gimple-iterator
one handles asan the same.  While the case of returning by hidden reference
is handled differently because of the first recently posted asan patch,
this deals with instrumentation of the aggregates returned in registers
case as well as instrumentation of loads from aggregate memory in the
function arguments of returns_twice calls.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2024-03-12  Jakub Jelinek  

PR sanitizer/112709
* asan.cc (asan_insert_before): New function.
(maybe_create_ssa_name, maybe_cast_to_ptrmode, build_check_stmt,
maybe_instrument_call, asan_expand_mark_ifn): Use it instead of
gsi_insert_before.

* gcc.dg/asan/pr112709-2.c: New test.

--- gcc/asan.cc.jj  2024-03-11 13:49:58.931045179 +0100
+++ gcc/asan.cc 2024-03-11 18:38:29.047330489 +0100
@@ -2561,6 +2561,21 @@ build_shadow_mem_access (gimple_stmt_ite
   return gimple_assign_lhs (g);
 }
 
+/* Insert G stmt before ITER.  If ITER is a returns_twice call,
+   insert it on an appropriate edge instead.  */
+
+static void
+asan_insert_before (gimple_stmt_iterator *iter, gimple *g)
+{
+  gimple *stmt = gsi_stmt (*iter);
+  if (stmt
+  && is_gimple_call (stmt)
+  && (gimple_call_flags (stmt) & ECF_RETURNS_TWICE) != 0)
+gsi_insert_before_returns_twice_call (gsi_bb (*iter), g);
+  else
+gsi_insert_before (iter, g, GSI_SAME_STMT);
+}
+
 /* BASE can already be an SSA_NAME; in that case, do not create a
new SSA_NAME for it.  */
 
@@ -2574,7 +2589,7 @@ maybe_create_ssa_name (location_t loc, t
   gimple *g = gimple_build_assign (make_ssa_name (TREE_TYPE (base)), base);
   gimple_set_location (g, loc);
   if (before_p)
-gsi_insert_before (iter, g, GSI_SAME_STMT);
+asan_insert_before (iter, g);
   else
 gsi_insert_after (iter, g, GSI_NEW_STMT);
   return gimple_assign_lhs (g);
@@ -2593,7 +2608,7 @@ maybe_cast_to_ptrmode (location_t loc, t
  NOP_EXPR, len);
   gimple_set_location (g, loc);
   if (before_p)
-gsi_insert_before (iter, g, GSI_SAME_STMT);
+asan_insert_before (iter, g);
   else
 gsi_insert_after (iter, g, GSI_NEW_STMT);
   return gimple_assign_lhs (g);
@@ -2684,7 +2699,7 @@ build_check_stmt (location_t loc, tree b
 align / BITS_PER_UNIT));
   gimple_set_location (g, loc);
   if (before_p)
-gsi_insert_before (, g, GSI_SAME_STMT);
+asan_insert_before (, g);
   else
 {
   gsi_insert_after (, g, GSI_NEW_STMT);
@@ -3025,7 +3040,7 @@ maybe_instrument_call (gimple_stmt_itera
  tree decl = builtin_decl_implicit (BUILT_IN_ASAN_HANDLE_NO_RETURN);
  gimple *g = gimple_build_call (decl, 0);
  gimple_set_location (g, gimple_location (stmt));
- gsi_insert_before (iter, g, GSI_SAME_STMT);
+ asan_insert_before (iter, g);
}
 }
 
@@ -3852,7 +3867,7 @@ asan_expand_mark_ifn (gimple_stmt_iterat
   g = gimple_build_assign (make_ssa_name (pointer_sized_int_node),
   NOP_EXPR, len);
   gimple_set_location (g, loc);
-  gsi_insert_before (iter, g, GSI_SAME_STMT);
+  asan_insert_before (iter, g);
   tree sz_arg = gimple_assign_lhs (g);
 
   tree fun
--- gcc/testsuite/gcc.dg/asan/pr112709-2.c.jj   2024-03-11 18:30:59.813488200 
+0100
+++ gcc/testsuite/gcc.dg/asan/pr112709-2.c  2024-03-11 18:31:06.506396462 
+0100
@@ -0,0 +1,50 @@
+/* PR sanitizer/112709 */
+/* { dg-do compile } */
+/* { dg-options "-fsanitize=address -O2" } */
+
+struct S { char c[1024]; } *p;
+int foo (int);
+
+__attribute__((returns_twice, noipa)) int
+bar (struct S x)
+{
+  (void) x.c[0];
+  return 0;
+}
+
+void
+baz (int *y)
+{
+  foo (1);
+  *y = bar (*p);
+}
+
+void
+qux (int x, int *y)
+{
+  if (x == 25)
+x = foo (2);
+  else if (x == 42)
+x = foo (foo (3));
+  *y = bar (*p);
+}
+
+void
+corge (int x, int *y)
+{
+  void *q[] = { &, &, &, & };
+  if (x == 25)
+{
+l1:
+  x = foo (2);
+}
+  else if (x == 42)
+{
+l2:
+  x = foo (foo (3));
+}
+l3:
+  *y = bar (*p);
+  if (x < 4)
+goto *q[x & 3];
+}

Jakub

[PATCH] gimple-iterator, ubsan: Fix ICE during instrumentation of returns_twice calls [PR112709]

2024-03-12 Thread Jakub Jelinek

Hi!

ubsan, asan (both PR112709) and _BitInt lowering (PR113466) want to
insert some instrumentation or adjustment statements before some statement.
This unfortunately creates invalid IL if inserting before a returns_twice
call, because we require that such calls are the first statement in a basic
block and that we have an edge from the .ABNORMAL_DISPATCHER block to
the block containing the returns_twice call (in addition to other edge(s)).

The following patch adds helper functions for such insertions and uses it
for now in ubsan (I'll post a follow up which uses it in asan and will
work later on the _BitInt lowering PR).

In particular, if the bb with returns_twice call at the start has just
2 edges, one EDGE_ABNORMAL from .ABNORMAL_DISPATCHER and another
(non-EDGE_ABNORMAL/EDGE_EH) from some other bb, it just inserts the
statement or sequence on that other edge.
If the bb has more predecessor edges or the one not from
.ABNORMAL_DISPATCHER is e.g. an EH edge (this latter case likely shouldn't
happen, one would need labels or something like that), the patch splits the
block with returns_twice call such that there is just one edge next to
.ABNORMAL_DISPATCHER edge and adjusts PHIs as needed to make it happen.
The functions also replace uses of PHIs from the returns_twice bb with
the corresponding PHI arguments, because otherwise it would be invalid IL.

E.g. in ubsan/pr112709-2.c (qux) we have before the ubsan pass
   :
  # .MEM_5(ab) = PHI <.MEM_4(9), .MEM_25(ab)(11)>
  # _7(ab) = PHI <_20(9), _8(ab)(11)>
  # .MEM_21(ab) = VDEF <.MEM_5(ab)>
  _22 = bar (*_7(ab));
where bar is returns_twice call and bb 11 has .ABNORMAL_DISPATCHER call,
this patch instruments it like:
   :
  # .MEM_4 = PHI <.MEM_17(ab)(4), .MEM_10(D)(5), .MEM_14(ab)(8)>
  # DEBUG BEGIN_STMT
  # VUSE <.MEM_4>
  _20 = p;
  # .MEM_27 = VDEF <.MEM_4>
  .UBSAN_NULL (_20, 0B, 0);
  # VUSE <.MEM_27>
  _2 = __builtin_dynamic_object_size (_20, 0);
  # .MEM_28 = VDEF <.MEM_27>
  .UBSAN_OBJECT_SIZE (_20, 1024, _2, 0);
  
   :
  # .MEM_5(ab) = PHI <.MEM_28(9), .MEM_25(ab)(11)>
  # _7(ab) = PHI <_20(9), _8(ab)(11)>
  # .MEM_21(ab) = VDEF <.MEM_5(ab)>
  _22 = bar (*_7(ab));
The edge from .ABNORMAL_DISPATCHER is there just to represent the
returning for 2nd and later times, the instrumentation can't be
done at that point as there is no code executed during that point.
The ubsan/pr112709-1.c testcase includes non-virtual PHIs to cover
the handling of those as well.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2024-03-12  Jakub Jelinek  

PR sanitizer/112709
* gimple-iterator.h (gsi_insert_before_returns_twice_call,
gsi_insert_seq_before_returns_twice_call): Declare.
* gimple-iterator.cc: Include gimplify.h.
(edge_before_returns_twice_call, gsi_insert_before_returns_twice_call,
gsi_insert_seq_before_returns_twice_call): New functions.
* ubsan.cc (ubsan_insert_before, ubsan_insert_seq_before): New
functions.
(instrument_mem_ref, instrument_pointer_overflow,
instrument_nonnull_arg, instrument_nonnull_return): Use
ubsan_insert_before instead of gsi_insert_before.
(maybe_instrument_pointer_overflow): Use force_gimple_operand,
gimple_seq_add_seq_without_update and ubsan_insert_seq_before
instead of force_gimple_operand_gsi.
(instrument_object_size): Likewise.  Use ubsan_insert_before instead
of gsi_insert_before.

* gcc.dg/ubsan/pr112709-1.c: New test.
* gcc.dg/ubsan/pr112709-2.c: New test.

--- gcc/gimple-iterator.h.jj2024-02-08 11:17:37.144661383 +0100
+++ gcc/gimple-iterator.h   2024-03-11 16:16:24.670464737 +0100
@@ -93,6 +93,10 @@ extern void gsi_insert_on_edge (edge, gi
 extern void gsi_insert_seq_on_edge (edge, gimple_seq);
 extern basic_block gsi_insert_on_edge_immediate (edge, gimple *);
 extern basic_block gsi_insert_seq_on_edge_immediate (edge, gimple_seq);
+extern basic_block gsi_insert_before_returns_twice_call (basic_block,
+gimple *);
+extern basic_block gsi_insert_seq_before_returns_twice_call (basic_block,
+gimple_seq);
 extern void gsi_commit_edge_inserts (void);
 extern void gsi_commit_one_edge_insert (edge, basic_block *);
 extern gphi_iterator gsi_start_phis (basic_block);
--- gcc/gimple-iterator.cc.jj   2024-02-08 11:17:37.144661383 +0100
+++ gcc/gimple-iterator.cc  2024-03-11 18:17:08.994804808 +0100
@@ -32,6 +32,7 @@ along with GCC; see the file COPYING3.
 #include "tree-cfg.h"
 #include "tree-ssa.h"
 #include "value-prof.h"
+#include "gimplify.h"
 
 
 /* Mark the statement STMT as modified, and update it.  */
@@ -944,3 +945,135 @@ gsi_start_phis (basic_block bb)
 
   return i;
 }
+
+/* Helper function for gsi_insert_before_returns_twice_call and
+   gsi_insert_seq_before_returns_twice_call.  Find edge to
+   insert statements before

[PATCH] asan: Instrument stores in callees rather than callers [PR112709]

2024-03-12 Thread Jakub Jelinek

Hi!

asan currently instruments since PR69276 r6-6758 fix calls which store
the return value into memory on the caller side, before the call it
verifies the memory is writable.
Now PR112709 where we ICE on trying to instrument such calls made me
think about whether that is what we want to do.

There are 3 different cases.

One is when a function returns an aggregate which is passed e.g. in
registers, say like struct S { int a[4]; }; returning on x86_64.
That would be ideally instrumented in between the actual call and
storing of the aggregate into memory, but asan currently mostly
works as a GIMPLE pass and arranging for the instrumentation to happen
at that spot would be really hard.  We could diagnose after the call
but generally asan attempts to diagnose stuff before something is
overwritten rather than after, or keep the current behavior (that is
what this patch does, which has the disadvantage that it can complain
about UB even for functions which never return and so never actually store,
and doesn't check whether the memory wasn't e.g. poisoned during the call)
or could e.g. instrument both before and after the call (that would have
the disadvantage the current state has but at least would check post-factum
the store again afterwards).

Another case is when a function returns an aggregate through a hidden
reference, struct T { int a[128]; }; on x86_64 or even the above struct S
on ia32 as example.  In the actual program such stores happen when storing
something to  or its parts in the callee, because  there
expands to *hidden_retval.  So, IMHO we should instrument those in the
callee rather than caller, that is where the writes are and we can do that
easily.  This is what the patch below does.

And the last case is for builtins/internal functions.  Usually those don't
return aggregates, but in case they'd do and can be expanded inline, it is
better to instrument them in the caller (as before) rather than not
instrumenting the return stores at all.

I had to tweak the expected output on the PR69276 testcase, because
with the patch it keeps previous behavior on x86_64 (structure returned
in registers, stored in the caller, so reported as UB in A::A()),
while on i686 it changed the behavior and is reported as UB in the
vnull::operator vec which stores the structure, A::A() is then a frame
above it in the backtrace.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2024-03-12  Jakub Jelinek  

PR sanitizer/112709
* asan.cc (has_stmt_been_instrumented_p): Don't instrument call
stores on the caller side unless it is a call to a builtin or
internal function or function doesn't return by hidden reference.
(maybe_instrument_call): Likewise.
(instrument_derefs): Instrument stores to RESULT_DECL if
returning by hidden reference.

* gcc.dg/asan/pr112709-1.c: New test.
* g++.dg/asan/pr69276.C: Adjust expected output for some targets.

--- gcc/asan.cc.jj  2024-03-06 09:35:04.132894608 +0100
+++ gcc/asan.cc 2024-03-11 13:49:58.931045179 +0100
@@ -1372,7 +1372,12 @@ has_stmt_been_instrumented_p (gimple *st
  return true;
}
 }
-  else if (is_gimple_call (stmt) && gimple_store_p (stmt))
+  else if (is_gimple_call (stmt)
+  && gimple_store_p (stmt)
+  && (gimple_call_builtin_p (stmt)
+  || gimple_call_internal_p (stmt)
+  || !aggregate_value_p (TREE_TYPE (gimple_call_lhs (stmt)),
+ gimple_call_fntype (stmt
 {
   asan_mem_ref r;
   asan_mem_ref_init (, NULL, 1);
@@ -2751,7 +2756,9 @@ instrument_derefs (gimple_stmt_iterator
 return;
 
   poly_int64 decl_size;
-  if ((VAR_P (inner) || TREE_CODE (inner) == RESULT_DECL)
+  if ((VAR_P (inner)
+   || (TREE_CODE (inner) == RESULT_DECL
+  && !aggregate_value_p (inner, current_function_decl)))
   && offset == NULL_TREE
   && DECL_SIZE (inner)
   && poly_int_tree_p (DECL_SIZE (inner), _size)
@@ -3023,7 +3030,11 @@ maybe_instrument_call (gimple_stmt_itera
 }
 
   bool instrumented = false;
-  if (gimple_store_p (stmt))
+  if (gimple_store_p (stmt)
+  && (gimple_call_builtin_p (stmt)
+ || gimple_call_internal_p (stmt)
+ || !aggregate_value_p (TREE_TYPE (gimple_call_lhs (stmt)),
+gimple_call_fntype (stmt
 {
   tree ref_expr = gimple_call_lhs (stmt);
   instrument_derefs (iter, ref_expr,
--- gcc/testsuite/gcc.dg/asan/pr112709-1.c.jj   2024-03-11 13:59:15.300408140 
+0100
+++ gcc/testsuite/gcc.dg/asan/pr112709-1.c  2024-03-11 13:59:58.626813417 
+0100
@@ -0,0 +1,52 @@
+/* PR sanitizer/112709 */
+/* { dg-do compile } */
+/* { dg-options "-fsanitize=address -O2" } */
+
+struct S { char c[1024]; };
+int foo (int);
+
+__attribute__((returns_twice, noipa)) struct S
+bar (int x)
+{
+  (void) x;
+  struct S s = {};
+  s.c[42] = 42;
+  return s;
+}
+
+void
+baz (struct S *p)
+{
+  foo

Re: [PATCH] strlen: Fix another spot that can create invalid ranges [PR114293]

2024-03-12 Thread Richard Biener

On Tue, 12 Mar 2024, Jakub Jelinek wrote:

> Hi!
> 
> This PR is similar to PR110603 fixed with r14-8487, except in a different
> spot.  From the memset with -1 size of non-zero value we determine minimum
> of (size_t) -1 and the code uses PTRDIFF_MAX - 2 (not really sure I
> understand why it is - 2 and not - 1, e.g. heap allocated array
> with PTRDIFF_MAX char elements which contain '\0' in the last element
> should be fine, no?  One can still represent arr[PTRDIFF_MAX] - arr[0]
> and arr[0] - arr[PTRDIFF_MAX] in ptrdiff_t and
> strlen (arr) == PTRDIFF_MAX - 1) as the maximum, so again invalid range.
> As in the other case, it is just UB that can lead to that, and we have
> choice to only keep the min and use +inf for max, or only keep max
> and use 0 for min, or not set the range at all, or use [min, min] or
> [max, max] etc.  The following patch uses [min, +inf].
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

OK.

Richard.

> 2024-03-12  Jakub Jelinek  
> 
>   PR tree-optimization/114293
>   * tree-ssa-strlen.cc (strlen_pass::handle_builtin_strlen): If
>   max is smaller than min, set max to ~(size_t)0.
> 
>   * gcc.dg/pr114293.c: New test.
> 
> --- gcc/tree-ssa-strlen.cc.jj 2024-01-30 09:57:58.361809262 +0100
> +++ gcc/tree-ssa-strlen.cc2024-03-11 10:35:04.305650520 +0100
> @@ -2341,6 +2341,8 @@ strlen_pass::handle_builtin_strlen ()
> wide_int min = wi::to_wide (old);
> wide_int max
>   = wi::to_wide (TYPE_MAX_VALUE (ptrdiff_type_node)) - 2;
> +   if (wi::gtu_p (min, max))
> + max = wi::to_wide (TYPE_MAX_VALUE (TREE_TYPE (lhs)));
> set_strlen_range (lhs, min, max);
>   }
> else
> --- gcc/testsuite/gcc.dg/pr114293.c.jj2024-03-11 10:41:04.375708877 
> +0100
> +++ gcc/testsuite/gcc.dg/pr114293.c   2024-03-11 10:40:50.355895196 +0100
> @@ -0,0 +1,10 @@
> +/* PR tree-optimization/114293 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -w" } */
> +
> +int
> +foo (int x)
> +{
> +  __builtin_memset (, 5, -1);
> +  return __builtin_strlen ((char *) );
> +}
> 
>   Jakub
> 
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)

[PATCH] strlen: Fix another spot that can create invalid ranges [PR114293]

2024-03-12 Thread Jakub Jelinek

Hi!

This PR is similar to PR110603 fixed with r14-8487, except in a different
spot.  From the memset with -1 size of non-zero value we determine minimum
of (size_t) -1 and the code uses PTRDIFF_MAX - 2 (not really sure I
understand why it is - 2 and not - 1, e.g. heap allocated array
with PTRDIFF_MAX char elements which contain '\0' in the last element
should be fine, no?  One can still represent arr[PTRDIFF_MAX] - arr[0]
and arr[0] - arr[PTRDIFF_MAX] in ptrdiff_t and
strlen (arr) == PTRDIFF_MAX - 1) as the maximum, so again invalid range.
As in the other case, it is just UB that can lead to that, and we have
choice to only keep the min and use +inf for max, or only keep max
and use 0 for min, or not set the range at all, or use [min, min] or
[max, max] etc.  The following patch uses [min, +inf].

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2024-03-12  Jakub Jelinek  

PR tree-optimization/114293
* tree-ssa-strlen.cc (strlen_pass::handle_builtin_strlen): If
max is smaller than min, set max to ~(size_t)0.

* gcc.dg/pr114293.c: New test.

--- gcc/tree-ssa-strlen.cc.jj   2024-01-30 09:57:58.361809262 +0100
+++ gcc/tree-ssa-strlen.cc  2024-03-11 10:35:04.305650520 +0100
@@ -2341,6 +2341,8 @@ strlen_pass::handle_builtin_strlen ()
  wide_int min = wi::to_wide (old);
  wide_int max
= wi::to_wide (TYPE_MAX_VALUE (ptrdiff_type_node)) - 2;
+ if (wi::gtu_p (min, max))
+   max = wi::to_wide (TYPE_MAX_VALUE (TREE_TYPE (lhs)));
  set_strlen_range (lhs, min, max);
}
  else
--- gcc/testsuite/gcc.dg/pr114293.c.jj  2024-03-11 10:41:04.375708877 +0100
+++ gcc/testsuite/gcc.dg/pr114293.c 2024-03-11 10:40:50.355895196 +0100
@@ -0,0 +1,10 @@
+/* PR tree-optimization/114293 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -w" } */
+
+int
+foo (int x)
+{
+  __builtin_memset (, 5, -1);
+  return __builtin_strlen ((char *) );
+}

Jakub

Patch ping Re: [PATCH] icf: Reset SSA_NAME_{PTR,RANGE}_INFO in successfully merged functions [PR113907]

2024-03-12 Thread Jakub Jelinek

Hi!

On Thu, Feb 15, 2024 at 08:29:24AM +0100, Jakub Jelinek wrote:
> 2024-02-15  Jakub Jelinek  
> 
>   PR middle-end/113907
>   * ipa-icf.cc (sem_item_optimizer::merge_classes): Reset
>   SSA_NAME_RANGE_INFO and SSA_NAME_PTR_INFO on successfully ICF merged
>   functions.
> 
>   * gcc.dg/pr113907.c: New test.

I'd like to ping the
https://gcc.gnu.org/pipermail/gcc-patches/2024-February/645644.html
patch.
After looking at this PR again yesterday, I'm convinced we don't need
either this patch or some other patch to deal with the jump functions,
but both, this patch to clear (or other variant would be to union them)
SSA_NAME_RANGE_INFO in the bodies of IPA-ICF merged functions, and another
patch that would maybe in sem_function::merge go through all the
callees cgraph edges and for each of them attempt to merge (for value
range union) at least the value range/pointer alignment information from
the corresponding cgraph edge from alias for all jump functions of all
the arguments.

Bootstrapped/regtested again last night.

> --- gcc/ipa-icf.cc.jj 2024-02-14 14:26:11.101933914 +0100
> +++ gcc/ipa-icf.cc2024-02-14 16:49:35.141518117 +0100
> @@ -3396,6 +3397,7 @@ sem_item_optimizer::merge_classes (unsig
> continue;
>  
>   sem_item *source = c->members[0];
> + bool this_merged_p = false;
>  
>   if (DECL_NAME (source->decl)
>   && MAIN_NAME_P (DECL_NAME (source->decl)))
> @@ -3443,7 +3445,7 @@ sem_item_optimizer::merge_classes (unsig
>   if (dbg_cnt (merged_ipa_icf))
> {
>   bool merged = source->merge (alias);
> - merged_p |= merged;
> + this_merged_p |= merged;
>  
>   if (merged && alias->type == VAR)
> {
> @@ -3452,6 +3454,35 @@ sem_item_optimizer::merge_classes (unsig
> }
> }
> }
> +
> + merged_p |= this_merged_p;
> + if (this_merged_p
> + && source->type == FUNC
> + && (!flag_wpa || flag_checking))
> +   {
> + unsigned i;
> + tree name;
> + FOR_EACH_SSA_NAME (i, name, DECL_STRUCT_FUNCTION (source->decl))
> +   {
> + /* We need to either merge or reset SSA_NAME_*_INFO.
> +For merging we don't preserve the mapping between
> +original and alias SSA_NAMEs from successful equals
> +calls.  */
> + if (POINTER_TYPE_P (TREE_TYPE (name)))
> +   {
> + if (SSA_NAME_PTR_INFO (name))
> +   {
> + gcc_checking_assert (!flag_wpa);
> + SSA_NAME_PTR_INFO (name) = NULL;
> +   }
> +   }
> + else if (SSA_NAME_RANGE_INFO (name))
> +   {
> + gcc_checking_assert (!flag_wpa);
> + SSA_NAME_RANGE_INFO (name) = NULL;
> +   }
> +   }
> +   }
>}
>  
>if (!m_merged_variables.is_empty ())
> --- gcc/testsuite/gcc.dg/pr113907.c.jj2024-02-14 16:13:48.486555159 
> +0100
> +++ gcc/testsuite/gcc.dg/pr113907.c   2024-02-14 16:13:29.198825045 +0100
> @@ -0,0 +1,49 @@
> +/* PR middle-end/113907 */
> +/* { dg-do run } */
> +/* { dg-options "-O2" } */
> +/* { dg-additional-options "-minline-all-stringops" { target i?86-*-* 
> x86_64-*-* } } */
> +
> +static inline int
> +foo (int len, void *indata, void *outdata)
> +{
> +  if (len < 0 || (len & 7) != 0)
> +return 0;
> +  if (len != 0 && indata != outdata)
> +__builtin_memcpy (outdata, indata, len);
> +  return len;
> +}
> +
> +static inline int
> +bar (int len, void *indata, void *outdata)
> +{
> +  if (len < 0 || (len & 1) != 0)
> +return 0;
> +  if (len != 0 && indata != outdata)
> +__builtin_memcpy (outdata, indata, len);
> +  return len;
> +}
> +
> +int (*volatile p1) (int, void *, void *) = foo;
> +int (*volatile p2) (int, void *, void *) = bar;
> +
> +__attribute__((noipa)) int
> +baz (int len, void *indata, void *outdata)
> +{
> +  if ((len & 6) != 0)
> +bar (len, indata, outdata);
> +  else
> +foo (len, indata, outdata);
> +}
> +
> +struct S { char buf[64]; } s __attribute__((aligned (8)));
> +
> +int
> +main ()
> +{
> +  for (int i = 0; i < 64; ++i)
> +s.buf[i] = ' ' + i;
> +  p2 (2, s.buf, s.buf + 33);
> +  for (int i = 0; i < 64; ++i)
> +if (s.buf[i] != ' ' + ((i >= 33 && i < 35) ? i - 33 : i))
> +  __builtin_abort ();
> +}

Jakub

RE: [PATCH v1] RISC-V: Fix some code style issue(s) in riscv-c.cc [NFC]

2024-03-12 Thread Li, Pan2

Committed, thanks Kito.

Pan

-Original Message-
From: Kito Cheng  
Sent: Tuesday, March 12, 2024 3:11 PM
To: Li, Pan2 
Cc: gcc-patches@gcc.gnu.org; juzhe.zh...@rivai.ai; Wang, Yanzhang 

Subject: Re: [PATCH v1] RISC-V: Fix some code style issue(s) in riscv-c.cc [NFC]

LGTM :)

On Tue, Mar 12, 2024 at 3:07 PM  wrote:
>
> From: Pan Li 
>
> Notice some code style issue(s) when add __riscv_v_fixed_vlen, includes:
>
> * Meanless empty line.
> * Line greater than 80 chars.
> * Indent with 3 space(s).
> * Argument unalignment.
>
> gcc/ChangeLog:
>
> * config/riscv/riscv-c.cc (riscv_ext_version_value): Fix
> code style greater than 80 chars.
> (riscv_cpu_cpp_builtins): Fix useless empty line, indent
> with 3 space(s) and argument unalignment.
>
> Signed-off-by: Pan Li 
> ---
>  gcc/config/riscv/riscv-c.cc | 10 +-
>  1 file changed, 5 insertions(+), 5 deletions(-)
>
> diff --git a/gcc/config/riscv/riscv-c.cc b/gcc/config/riscv/riscv-c.cc
> index 3755ec0b8ef..7029ba88186 100644
> --- a/gcc/config/riscv/riscv-c.cc
> +++ b/gcc/config/riscv/riscv-c.cc
> @@ -37,7 +37,8 @@ along with GCC; see the file COPYING3.  If not see
>  static int
>  riscv_ext_version_value (unsigned major, unsigned minor)
>  {
> -  return (major * RISCV_MAJOR_VERSION_BASE) + (minor * 
> RISCV_MINOR_VERSION_BASE);
> +  return (major * RISCV_MAJOR_VERSION_BASE)
> ++ (minor * RISCV_MINOR_VERSION_BASE);
>  }
>
>  /* Implement TARGET_CPU_CPP_BUILTINS.  */
> @@ -110,7 +111,6 @@ riscv_cpu_cpp_builtins (cpp_reader *pfile)
>  case CM_MEDANY:
>builtin_define ("__riscv_cmodel_medany");
>break;
> -
>  }
>
>if (riscv_user_wants_strict_align)
> @@ -142,9 +142,9 @@ riscv_cpu_cpp_builtins (cpp_reader *pfile)
>  riscv_ext_version_value (0, 12));
>  }
>
> -   if (TARGET_XTHEADVECTOR)
> - builtin_define_with_int_value ("__riscv_th_v_intrinsic",
> -riscv_ext_version_value (0, 11));
> +  if (TARGET_XTHEADVECTOR)
> +builtin_define_with_int_value ("__riscv_th_v_intrinsic",
> +  riscv_ext_version_value (0, 11));
>
>/* Define architecture extension test macros.  */
>builtin_define_with_int_value ("__riscv_arch_test", 1);
> --
> 2.34.1
>

Re: [PATCH] Fortran: handle procedure pointer component in DT array [PR110826]

2024-03-12 Thread Paul Richard Thomas

Hi Harald,

This looks good to me. OK for mainline and, since it is so straightforward,
for backporting.

Thanks for the patch.

Paul

On Mon, 11 Mar 2024 at 21:20, Harald Anlauf  wrote:

> Dear all,
>
> the attached patch fixes an ICE-on-valid code when assigning
> a procedure pointer that is a component of a DT array and
> the function in question is array-valued.  (The procedure
> pointer itself cannot be an array.)
>
> Regtested on x86_64-pc-linux-gnu.  OK for mainline?
>
> Thanks,
> Harald
>
>

Re: [PATCH v1] RISC-V: Fix some code style issue(s) in riscv-c.cc [NFC]

2024-03-12 Thread Kito Cheng

LGTM :)

On Tue, Mar 12, 2024 at 3:07 PM  wrote:
>
> From: Pan Li 
>
> Notice some code style issue(s) when add __riscv_v_fixed_vlen, includes:
>
> * Meanless empty line.
> * Line greater than 80 chars.
> * Indent with 3 space(s).
> * Argument unalignment.
>
> gcc/ChangeLog:
>
> * config/riscv/riscv-c.cc (riscv_ext_version_value): Fix
> code style greater than 80 chars.
> (riscv_cpu_cpp_builtins): Fix useless empty line, indent
> with 3 space(s) and argument unalignment.
>
> Signed-off-by: Pan Li 
> ---
>  gcc/config/riscv/riscv-c.cc | 10 +-
>  1 file changed, 5 insertions(+), 5 deletions(-)
>
> diff --git a/gcc/config/riscv/riscv-c.cc b/gcc/config/riscv/riscv-c.cc
> index 3755ec0b8ef..7029ba88186 100644
> --- a/gcc/config/riscv/riscv-c.cc
> +++ b/gcc/config/riscv/riscv-c.cc
> @@ -37,7 +37,8 @@ along with GCC; see the file COPYING3.  If not see
>  static int
>  riscv_ext_version_value (unsigned major, unsigned minor)
>  {
> -  return (major * RISCV_MAJOR_VERSION_BASE) + (minor * 
> RISCV_MINOR_VERSION_BASE);
> +  return (major * RISCV_MAJOR_VERSION_BASE)
> ++ (minor * RISCV_MINOR_VERSION_BASE);
>  }
>
>  /* Implement TARGET_CPU_CPP_BUILTINS.  */
> @@ -110,7 +111,6 @@ riscv_cpu_cpp_builtins (cpp_reader *pfile)
>  case CM_MEDANY:
>builtin_define ("__riscv_cmodel_medany");
>break;
> -
>  }
>
>if (riscv_user_wants_strict_align)
> @@ -142,9 +142,9 @@ riscv_cpu_cpp_builtins (cpp_reader *pfile)
>  riscv_ext_version_value (0, 12));
>  }
>
> -   if (TARGET_XTHEADVECTOR)
> - builtin_define_with_int_value ("__riscv_th_v_intrinsic",
> -riscv_ext_version_value (0, 11));
> +  if (TARGET_XTHEADVECTOR)
> +builtin_define_with_int_value ("__riscv_th_v_intrinsic",
> +  riscv_ext_version_value (0, 11));
>
>/* Define architecture extension test macros.  */
>builtin_define_with_int_value ("__riscv_arch_test", 1);
> --
> 2.34.1
>

[PATCH v1] RISC-V: Fix some code style issue(s) in riscv-c.cc [NFC]

2024-03-12 Thread pan2 . li

From: Pan Li 

Notice some code style issue(s) when add __riscv_v_fixed_vlen, includes:

* Meanless empty line.
* Line greater than 80 chars.
* Indent with 3 space(s).
* Argument unalignment.

gcc/ChangeLog:

* config/riscv/riscv-c.cc (riscv_ext_version_value): Fix
code style greater than 80 chars.
(riscv_cpu_cpp_builtins): Fix useless empty line, indent
with 3 space(s) and argument unalignment.

Signed-off-by: Pan Li 
---
 gcc/config/riscv/riscv-c.cc | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/gcc/config/riscv/riscv-c.cc b/gcc/config/riscv/riscv-c.cc
index 3755ec0b8ef..7029ba88186 100644
--- a/gcc/config/riscv/riscv-c.cc
+++ b/gcc/config/riscv/riscv-c.cc
@@ -37,7 +37,8 @@ along with GCC; see the file COPYING3.  If not see
 static int
 riscv_ext_version_value (unsigned major, unsigned minor)
 {
-  return (major * RISCV_MAJOR_VERSION_BASE) + (minor * 
RISCV_MINOR_VERSION_BASE);
+  return (major * RISCV_MAJOR_VERSION_BASE)
++ (minor * RISCV_MINOR_VERSION_BASE);
 }
 
 /* Implement TARGET_CPU_CPP_BUILTINS.  */
@@ -110,7 +111,6 @@ riscv_cpu_cpp_builtins (cpp_reader *pfile)
 case CM_MEDANY:
   builtin_define ("__riscv_cmodel_medany");
   break;
-
 }
 
   if (riscv_user_wants_strict_align)
@@ -142,9 +142,9 @@ riscv_cpu_cpp_builtins (cpp_reader *pfile)
 riscv_ext_version_value (0, 12));
 }
 
-   if (TARGET_XTHEADVECTOR)
- builtin_define_with_int_value ("__riscv_th_v_intrinsic",
-riscv_ext_version_value (0, 11));
+  if (TARGET_XTHEADVECTOR)
+builtin_define_with_int_value ("__riscv_th_v_intrinsic",
+  riscv_ext_version_value (0, 11));
 
   /* Define architecture extension test macros.  */
   builtin_define_with_int_value ("__riscv_arch_test", 1);
-- 
2.34.1

RE: [COMMITTED] Fold: Fix up merge_truthop_with_opposite_arm for NaNs [PR95351]

2024-03-12 Thread Andrew Pinski (QUIC)

> -Original Message-
> From: Andrew Pinski (QUIC) 
> Sent: Sunday, March 10, 2024 7:58 PM
> To: gcc-patches@gcc.gnu.org
> Cc: Andrew Pinski (QUIC) 
> Subject: [COMMITTED] Fold: Fix up merge_truthop_with_opposite_arm for
> NaNs [PR95351]
> 
> The problem here is that merge_truthop_with_opposite_arm would use the
> type of the result of the comparison rather than the operands of the
> comparison to figure out if we are honoring NaNs.
> This fixes that oversight and now we get the correct results in this case.
> 
> Committed as obvious after a bootstrap/test on x86_64-linux-gnu.

Committed to the GCC 13 branch too.

Thanks,
Andrew

> 
>   PR middle-end/95351
> 
> gcc/ChangeLog:
> 
>   * fold-const.cc (merge_truthop_with_opposite_arm): Use
>   the type of the operands of the comparison and not the type
>   of the comparison.
> 
> gcc/testsuite/ChangeLog:
> 
>   * gcc.dg/float_opposite_arm-1.c: New test.
> 
> Signed-off-by: Andrew Pinski 
> ---
>  gcc/fold-const.cc   |  3 ++-
>  gcc/testsuite/gcc.dg/float_opposite_arm-1.c | 17 +
>  2 files changed, 19 insertions(+), 1 deletion(-)  create mode 100644
> gcc/testsuite/gcc.dg/float_opposite_arm-1.c
> 
> diff --git a/gcc/fold-const.cc b/gcc/fold-const.cc index
> 43105d20be3..299c22bf391 100644
> --- a/gcc/fold-const.cc
> +++ b/gcc/fold-const.cc
> @@ -6420,7 +6420,6 @@ static tree
>  merge_truthop_with_opposite_arm (location_t loc, tree op, tree cmpop,
>bool rhs_only)
>  {
> -  tree type = TREE_TYPE (cmpop);
>enum tree_code code = TREE_CODE (cmpop);
>enum tree_code truthop_code = TREE_CODE (op);
>tree lhs = TREE_OPERAND (op, 0);
> @@ -6436,6 +6435,8 @@ merge_truthop_with_opposite_arm (location_t
> loc, tree op, tree cmpop,
>if (TREE_CODE_CLASS (code) != tcc_comparison)
>  return NULL_TREE;
> 
> +  tree type = TREE_TYPE (TREE_OPERAND (cmpop, 0));
> +
>if (rhs_code == truthop_code)
>  {
>tree newrhs = merge_truthop_with_opposite_arm (loc, rhs, cmpop,
> rhs_only); diff --git a/gcc/testsuite/gcc.dg/float_opposite_arm-1.c
> b/gcc/testsuite/gcc.dg/float_opposite_arm-1.c
> new file mode 100644
> index 000..d2dbff35066
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/float_opposite_arm-1.c
> @@ -0,0 +1,17 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O1 -fdump-tree-original -fdump-tree-optimized" } */
> +/* { dg-add-options ieee } */
> +/* PR middle-end/95351 */
> +
> +int Foo(double possiblyNAN, double b, double c) {
> +return (possiblyNAN <= 2.0) || ((possiblyNAN  > 2.0) && (b > c)); }
> +
> +/* Make sure we don't remove either >/<=  */
> +
> +/* { dg-final { scan-tree-dump "possiblyNAN > 2.0e.0" "original" } } */
> +/* { dg-final { scan-tree-dump "possiblyNAN_\[0-9\]+.D. > 2.0e.0"
> +"optimized" } } */
> +
> +/* { dg-final { scan-tree-dump "possiblyNAN <= 2.0e.0" "original" } }
> +*/
> +/* { dg-final { scan-tree-dump "possiblyNAN_\[0-9\]+.D. <= 2.0e.0"
> +"optimized" } } */
> --
> 2.43.0

RE: [Committed] Reject -fno-multiflags [PR114314]

2024-03-12 Thread Andrew Pinski (QUIC)

> -Original Message-
> From: Andrew Pinski (QUIC) 
> Sent: Monday, March 11, 2024 8:59 PM
> To: gcc-patches@gcc.gnu.org
> Cc: Andrew Pinski (QUIC) 
> Subject: [Committed] Reject -fno-multiflags [PR114314]
> 
> When -fmultiflags option support was added in r13-3693-g6b1a2474f9e422,
> it accidently allowed -fno-multiflags which then would pass on to cc1.
> This fixes that oversight.
> 
> Committed as obvious after bootstrap/test on x86_64-linux-gnu.

Note I also committed this to the GCC 13 branch too.

Thanks,
Andrew

> 
> gcc/ChangeLog:
> 
>   PR driver/114314
>   * common.opt (fmultiflags): Add RejectNegative.
> 
> Signed-off-by: Andrew Pinski 
> ---
>  gcc/common.opt | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/gcc/common.opt b/gcc/common.opt index
> 51c4a17da83..1ad0169bd6f 100644
> --- a/gcc/common.opt
> +++ b/gcc/common.opt
> @@ -2295,7 +2295,7 @@ Common Var(flag_move_loop_stores)
> Optimization  Move stores out of loops.
> 
>  fmultiflags
> -Common Driver
> +Common Driver RejectNegative
>  Building block for specs-based multilib-aware TFLAGS.
> 
>  fdce
> --
> 2.43.0

[PATCH v3] RISC-V: Introduce gcc attribute riscv_rvv_vector_bits for RVV

2024-03-12 Thread pan2 . li

From: Pan Li 

Update in v3:
* Add pre-defined __riscv_v_fixed_vlen when zvl.

Update in v2:
* Cleanup some unused code.
* Fix some typo of commit log.

Original log:

This patch would like to introduce one new gcc attribute for RVV.
This attribute is used to define fixed-length variants of one
existing sizeless RVV types.

This attribute is valid if and only if the mrvv-vector-bits=zvl, the only
one args should be the integer constant and its' value is terminated
by the LMUL and the vector register bits in zvl*b.  For example:

typedef vint32m2_t fixed_vint32m2_t __attribute__((riscv_rvv_vector_bits(128)));

The above type define is valid when -march=rv64gc_zve64d_zvl64b
(aka 2(m2) * 64 = 128 for vin32m2_t), and will report error when
-march=rv64gcv_zvl128b similar to below.

"error: invalid RVV vector size '128', expected size is '256' based on
LMUL of type and '-mrvv-vector-bits=zvl'"

Meanwhile, a pre-define macro __riscv_v_fixed_vlen is introduced to
represent the fixed vlen in a RVV vector register.

For the vint*m*_t below operations are allowed.
* The sizeof.
* The global variable(s).
* The element of union and struct.
* The cast to other equalities.
* CMP: >, <, ==, !=, <=, >=
* ALU: +, -, *, /, %, &, |, ^, >>, <<, ~, -

For the vfloat*m*_t below operations are allowed.
* The sizeof.
* The global variable(s).
* The element of union and struct.
* The cast to other equalities.
* CMP: >, <, ==, !=, <=, >=
* ALU: +, -, *, /, -

For the vbool*_t types only below operations are allowed except
the CMP and ALU. The CMP and ALU operations on vbool*_t is not
well defined currently.
* The sizeof.
* The global variable(s).
* The element of union and struct.
* The cast to other equalities.

For the vint*x*m*_t tuple types are not suppored in this patch
which is compatible with clang.

This patch passed the below testsuites.
* The riscv fully regression tests.

gcc/ChangeLog:

* config/riscv/riscv-c.cc (riscv_cpu_cpp_builtins): Add pre-define
macro __riscv_v_fixed_vlen when zvl.
* config/riscv/riscv.cc (riscv_handle_rvv_vector_bits_attribute):
New static func to take care of the RVV types decorated by
the attributes.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-1.c: New test.
* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-10.c: New test.
* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-11.c: New test.
* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-12.c: New test.
* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-13.c: New test.
* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-14.c: New test.
* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-15.c: New test.
* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-16.c: New test.
* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-17.c: New test.
* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-2.c: New test.
* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-3.c: New test.
* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-4.c: New test.
* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-5.c: New test.
* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-6.c: New test.
* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-7.c: New test.
* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-8.c: New test.
* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-9.c: New test.
* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits.h: New test.

Signed-off-by: Pan Li 
---
 gcc/config/riscv/riscv-c.cc   |   3 +
 gcc/config/riscv/riscv.cc |  87 +-
 .../riscv/rvv/base/riscv_rvv_vector_bits-1.c  |   6 +
 .../riscv/rvv/base/riscv_rvv_vector_bits-10.c |  53 +
 .../riscv/rvv/base/riscv_rvv_vector_bits-11.c |  76 
 .../riscv/rvv/base/riscv_rvv_vector_bits-12.c |  14 +++
 .../riscv/rvv/base/riscv_rvv_vector_bits-13.c |  10 ++
 .../riscv/rvv/base/riscv_rvv_vector_bits-14.c |  10 ++
 .../riscv/rvv/base/riscv_rvv_vector_bits-15.c |  10 ++
 .../riscv/rvv/base/riscv_rvv_vector_bits-16.c |  11 ++
 .../riscv/rvv/base/riscv_rvv_vector_bits-17.c |  10 ++
 .../riscv/rvv/base/riscv_rvv_vector_bits-2.c  |   6 +
 .../riscv/rvv/base/riscv_rvv_vector_bits-3.c  |   6 +
 .../riscv/rvv/base/riscv_rvv_vector_bits-4.c  |   6 +
 .../riscv/rvv/base/riscv_rvv_vector_bits-5.c  |   6 +
 .../riscv/rvv/base/riscv_rvv_vector_bits-6.c  |   6 +
 .../riscv/rvv/base/riscv_rvv_vector_bits-7.c  |  76 
 .../riscv/rvv/base/riscv_rvv_vector_bits-8.c  |  75 
 .../riscv/rvv/base/riscv_rvv_vector_bits-9.c  |  76 
 .../riscv/rvv/base/riscv_rvv_vector_bits.h| 108 ++
 20 files changed, 653 insertions(+), 2 deletions(-)
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-1.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-10.c
 create mode

63 matches

Mail list logo