Re: Re: [Committed V2] RISC-V: Fix regression (GCC-14 compare with GCC-13.2) of SHA256 from coremark-pro

2024-01-25 Thread juzhe.zh...@rivai.ai
It's fixed by this commit: 
https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=d40b3c1e439db05c835b6bd4fd5bba58fda71dd6




juzhe.zh...@rivai.ai
 
From: Edwin Lu
Date: 2024-01-17 09:45
To: juzhe.zh...@rivai.ai; gcc-patches
CC: Patrick O'Neill
Subject: Re: [Committed V2] RISC-V: Fix regression (GCC-14 compare with 
GCC-13.2) of SHA256 from coremark-pro
On 1/16/2024 5:41 PM, juzhe.zh...@rivai.ai wrote:
> Are you saying using glibc lib ? I do the testing with newlib, I didn't 
> anything wrong.
> 
Yes, I'm seeing the problem using glibc. Looking at our postcommit ci 
reports, it appears to only affect linux rv32gcv.
> It seems that this patch triggers latent bug of VSETVL PASS (Even though 
> this patch doesn't change anything related to VSETVL PASS).
> 
> I will investigate it.
> 
> Thanks.
> 
Thanks!
 
Edwin
 


[PATCH v4 1/4] LoongArch: Merge template got_load_tls_{ld/gd/le/ie}.

2024-01-25 Thread Lulu Cheng
gcc/ChangeLog:

* config/loongarch/loongarch.cc (loongarch_load_tls):
Load all types of tls symbols through one function.
(loongarch_got_load_tls_gd): Delete.
(loongarch_got_load_tls_ld): Delete.
(loongarch_got_load_tls_ie): Delete.
(loongarch_got_load_tls_le): Delete.
(loongarch_call_tls_get_addr): Modify the called function name.
(loongarch_legitimize_tls_address): Likewise.
* config/loongarch/loongarch.md (@got_load_tls_gd): Delete.
(@load_tls): New template.
(@got_load_tls_ld): Delete.
(@got_load_tls_le): Delete.
(@got_load_tls_ie): Delete.
---
 gcc/config/loongarch/loongarch.cc | 47 +---
 gcc/config/loongarch/loongarch.md | 59 ---
 2 files changed, 30 insertions(+), 76 deletions(-)

diff --git a/gcc/config/loongarch/loongarch.cc 
b/gcc/config/loongarch/loongarch.cc
index dba1252c8f7..2f7de6f94d3 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -2736,36 +2736,12 @@ loongarch_add_offset (rtx temp, rtx reg, HOST_WIDE_INT 
offset)
 /* The __tls_get_attr symbol.  */
 static GTY (()) rtx loongarch_tls_symbol;
 
-/* Load an entry from the GOT for a TLS GD access.  */
+/* Load an entry for a TLS access.  */
 
 static rtx
-loongarch_got_load_tls_gd (rtx dest, rtx sym)
+loongarch_load_tls (rtx dest, rtx sym)
 {
-  return gen_got_load_tls_gd (Pmode, dest, sym);
-}
-
-/* Load an entry from the GOT for a TLS LD access.  */
-
-static rtx
-loongarch_got_load_tls_ld (rtx dest, rtx sym)
-{
-  return gen_got_load_tls_ld (Pmode, dest, sym);
-}
-
-/* Load an entry from the GOT for a TLS IE access.  */
-
-static rtx
-loongarch_got_load_tls_ie (rtx dest, rtx sym)
-{
-  return gen_got_load_tls_ie (Pmode, dest, sym);
-}
-
-/* Add in the thread pointer for a TLS LE access.  */
-
-static rtx
-loongarch_got_load_tls_le (rtx dest, rtx sym)
-{
-  return gen_got_load_tls_le (Pmode, dest, sym);
+  return gen_load_tls (Pmode, dest, sym);
 }
 
 /* Return an instruction sequence that calls __tls_get_addr.  SYM is
@@ -2809,14 +2785,7 @@ loongarch_call_tls_get_addr (rtx sym, enum 
loongarch_symbol_type type, rtx v0)
emit_insn (gen_tls_low (Pmode, a0, high, loc));
 }
   else
-{
-  if (type == SYMBOL_TLSLDM)
-   emit_insn (loongarch_got_load_tls_ld (a0, loc));
-  else if (type == SYMBOL_TLSGD)
-   emit_insn (loongarch_got_load_tls_gd (a0, loc));
-  else
-   gcc_unreachable ();
-}
+emit_insn (loongarch_load_tls (a0, loc));
 
   if (flag_plt)
 {
@@ -2953,10 +2922,10 @@ loongarch_legitimize_tls_address (rtx loc)
  /* la.tls.ie; tp-relative add.  */
  tp = gen_rtx_REG (Pmode, THREAD_POINTER_REGNUM);
  tmp1 = gen_reg_rtx (Pmode);
+ tmp2 = loongarch_unspec_address (loc, SYMBOL_TLS_IE);
  dest = gen_reg_rtx (Pmode);
  if (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE)
{
- tmp2 = loongarch_unspec_address (loc, SYMBOL_TLS_IE);
  tmp3 = gen_reg_rtx (Pmode);
  rtx high = gen_rtx_HIGH (Pmode, copy_rtx (tmp2));
  high = loongarch_force_temporary (tmp3, high);
@@ -2979,7 +2948,7 @@ loongarch_legitimize_tls_address (rtx loc)
emit_insn (gen_ld_from_got (Pmode, tmp1, high, tmp2));
}
  else
-   emit_insn (loongarch_got_load_tls_ie (tmp1, loc));
+   emit_insn (loongarch_load_tls (tmp1, tmp2));
  emit_insn (gen_add3_insn (dest, tmp1, tp));
}
   break;
@@ -3011,11 +2980,11 @@ loongarch_legitimize_tls_address (rtx loc)
 
  tp = gen_rtx_REG (Pmode, THREAD_POINTER_REGNUM);
  tmp1 = gen_reg_rtx (Pmode);
+ tmp2 = loongarch_unspec_address (loc, SYMBOL_TLS_LE);
  dest = gen_reg_rtx (Pmode);
 
  if (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE)
{
- tmp2 = loongarch_unspec_address (loc, SYMBOL_TLS_LE);
  tmp3 = gen_reg_rtx (Pmode);
  rtx high = gen_rtx_HIGH (Pmode, copy_rtx (tmp2));
  high = loongarch_force_temporary (tmp3, high);
@@ -3043,7 +3012,7 @@ loongarch_legitimize_tls_address (rtx loc)
}
}
  else
-   emit_insn (loongarch_got_load_tls_le (tmp1, loc));
+   emit_insn (loongarch_load_tls (tmp1, tmp2));
  emit_insn (gen_add3_insn (dest, tmp1, tp));
}
   break;
diff --git a/gcc/config/loongarch/loongarch.md 
b/gcc/config/loongarch/loongarch.md
index dda3cdf8be5..0b61b013798 100644
--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
@@ -51,10 +51,7 @@ (define_c_enum "unspec" [
   UNSPEC_BITREV_8B
 
   ;; TLS
-  UNSPEC_TLS_GD
-  UNSPEC_TLS_LD
-  UNSPEC_TLS_LE
-  UNSPEC_TLS_IE
+  UNSPEC_TLS
 
   ;; Stack tie
   UNSPEC_TIE
@@ -2701,45 +2698,33 @@ (define_insn "store_word"
 
 ;; Thread-Local Storage
 
-(define_insn "@got_load_tls_gd"
+(define_insn "@load

[PATCH v4 2/4] LoongArch: Add the macro implementation of mcmodel=extreme.

2024-01-25 Thread Lulu Cheng
gcc/ChangeLog:

* config/loongarch/loongarch-protos.h (loongarch_symbol_extreme_p):
Add function declaration.
* config/loongarch/loongarch.cc (loongarch_symbolic_constant_p):
For SYMBOL_PCREL64, non-zero addend of "la.local $rd,$rt,sym+addend"
is not allowed
(loongarch_load_tls): Added macro support in extreme mode.
(loongarch_call_tls_get_addr): Likewise.
(loongarch_legitimize_tls_address): Likewise.
(loongarch_force_address): Likewise.
(loongarch_legitimize_move): Likewise.
(loongarch_output_mi_thunk): Likewise.
(loongarch_option_override_internal): Remove the code that detects
explicit relocs status.
(loongarch_handle_model_attribute): Likewise.
* config/loongarch/loongarch.md (movdi_symbolic_off64): New template.
* config/loongarch/predicates.md (symbolic_off64_operand): New 
predicate.
(symbolic_off64_or_reg_operand): Likewise.

gcc/testsuite/ChangeLog:

* gcc.target/loongarch/attr-model-5.c: New test.
* gcc.target/loongarch/func-call-extreme-5.c: New test.
* gcc.target/loongarch/func-call-extreme-6.c: New test.
* gcc.target/loongarch/tls-extreme-macro.c: New test.
---
 gcc/config/loongarch/loongarch-protos.h   |   1 +
 gcc/config/loongarch/loongarch.cc | 108 +++---
 gcc/config/loongarch/loongarch.md |  42 +++
 gcc/config/loongarch/predicates.md|  12 ++
 .../gcc.target/loongarch/attr-model-5.c   |   8 ++
 .../loongarch/func-call-extreme-5.c   |   7 ++
 .../loongarch/func-call-extreme-6.c   |   7 ++
 .../gcc.target/loongarch/tls-extreme-macro.c  |  35 ++
 8 files changed, 177 insertions(+), 43 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/attr-model-5.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/func-call-extreme-5.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/func-call-extreme-6.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/tls-extreme-macro.c

diff --git a/gcc/config/loongarch/loongarch-protos.h 
b/gcc/config/loongarch/loongarch-protos.h
index 9ffc92afead..1fdfda9af01 100644
--- a/gcc/config/loongarch/loongarch-protos.h
+++ b/gcc/config/loongarch/loongarch-protos.h
@@ -222,4 +222,5 @@ extern rtx loongarch_build_signbit_mask (machine_mode, 
bool, bool);
 extern void loongarch_emit_swrsqrtsf (rtx, rtx, machine_mode, bool);
 extern void loongarch_emit_swdivsf (rtx, rtx, rtx, machine_mode);
 extern bool loongarch_explicit_relocs_p (enum loongarch_symbol_type);
+extern bool loongarch_symbol_extreme_p (enum loongarch_symbol_type);
 #endif /* ! GCC_LOONGARCH_PROTOS_H */
diff --git a/gcc/config/loongarch/loongarch.cc 
b/gcc/config/loongarch/loongarch.cc
index 2f7de6f94d3..4c64742f78b 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -1935,8 +1935,13 @@ loongarch_symbolic_constant_p (rtx x, enum 
loongarch_symbol_type *symbol_type)
  relocations.  */
   switch (*symbol_type)
 {
-case SYMBOL_PCREL:
 case SYMBOL_PCREL64:
+  /* When the code model is extreme, the non-zero offset situation
+has not been handled well, so it is disabled here now.  */
+  if (!loongarch_explicit_relocs_p (SYMBOL_PCREL64))
+   return false;
+/* fall through */
+case SYMBOL_PCREL:
   /* GAS rejects offsets outside the range [-2^31, 2^31-1].  */
   return sext_hwi (INTVAL (offset), 32) == INTVAL (offset);
 
@@ -2739,9 +2744,15 @@ static GTY (()) rtx loongarch_tls_symbol;
 /* Load an entry for a TLS access.  */
 
 static rtx
-loongarch_load_tls (rtx dest, rtx sym)
+loongarch_load_tls (rtx dest, rtx sym, enum loongarch_symbol_type type)
 {
-  return gen_load_tls (Pmode, dest, sym);
+  /* TLS LE gets a 32 or 64 bit offset here, so one register can do it.  */
+  if (type == SYMBOL_TLS_LE)
+return gen_load_tls (Pmode, dest, sym);
+
+  return loongarch_symbol_extreme_p (type) ?
+gen_movdi_symbolic_off64 (dest, sym, gen_reg_rtx (DImode))
+: gen_load_tls (Pmode, dest, sym);
 }
 
 /* Return an instruction sequence that calls __tls_get_addr.  SYM is
@@ -2773,8 +2784,6 @@ loongarch_call_tls_get_addr (rtx sym, enum 
loongarch_symbol_type type, rtx v0)
 
   if (TARGET_CMODEL_EXTREME)
{
- gcc_assert (TARGET_EXPLICIT_RELOCS);
-
  rtx tmp1 = gen_reg_rtx (Pmode);
  emit_insn (gen_tls_low (Pmode, tmp1, gen_rtx_REG (Pmode, 0), loc));
  emit_insn (gen_lui_h_lo20 (tmp1, tmp1, loc));
@@ -2785,7 +2794,7 @@ loongarch_call_tls_get_addr (rtx sym, enum 
loongarch_symbol_type type, rtx v0)
emit_insn (gen_tls_low (Pmode, a0, high, loc));
 }
   else
-emit_insn (loongarch_load_tls (a0, loc));
+emit_insn (loongarch_load_tls (a0, loc, type));
 
   if (flag_plt)
 {
@@ -2852,22 +2861,26 @@ loongarch_call_tls_get_addr (rtx sym, enum 
loongarch_symbol_type type, rtx v0)
 
case CMODEL_EXTREME:
 

[PATCH v4 4/4] LoongArch: Added support for loading __get_tls_addr symbol address using call36.

2024-01-25 Thread Lulu Cheng
gcc/ChangeLog:

* config/loongarch/loongarch.cc (loongarch_call_tls_get_addr):
Add support for call36.

gcc/testsuite/ChangeLog:

* gcc.target/loongarch/explicit-relocs-medium-call36-auto-tls-ld-gd.c: 
New test.
---
 gcc/config/loongarch/loongarch.cc | 20 +--
 ...icit-relocs-medium-call36-auto-tls-ld-gd.c |  5 +
 2 files changed, 19 insertions(+), 6 deletions(-)
 create mode 100644 
gcc/testsuite/gcc.target/loongarch/explicit-relocs-medium-call36-auto-tls-ld-gd.c

diff --git a/gcc/config/loongarch/loongarch.cc 
b/gcc/config/loongarch/loongarch.cc
index b76e201c0ef..19bb37b0c04 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -2807,17 +2807,25 @@ loongarch_call_tls_get_addr (rtx sym, enum 
loongarch_symbol_type type, rtx v0)
 
case CMODEL_MEDIUM:
{
- rtx reg = gen_reg_rtx (Pmode);
  if (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE)
{
- emit_insn (gen_pcalau12i (Pmode, reg, loongarch_tls_symbol));
- rtx call = gen_call_value_internal_1 (Pmode, v0, reg,
-   loongarch_tls_symbol,
-   const0_rtx);
- insn = emit_call_insn (call);
+ rtx call;
+
+if (HAVE_AS_SUPPORT_CALL36)
+  call = gen_call_value_internal (v0, loongarch_tls_symbol, 
const0_rtx);
+else
+  {
+rtx reg = gen_reg_rtx (Pmode);
+emit_insn (gen_pcalau12i (Pmode, reg, 
loongarch_tls_symbol));
+call = gen_call_value_internal_1 (Pmode, v0, reg,
+  loongarch_tls_symbol,
+  const0_rtx);
+  }
+insn = emit_call_insn (call);
}
  else
{
+ rtx reg = gen_reg_rtx (Pmode);
  emit_move_insn (reg, loongarch_tls_symbol);
  insn = emit_call_insn (gen_call_value_internal (v0,
  reg,
diff --git 
a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-medium-call36-auto-tls-ld-gd.c
 
b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-medium-call36-auto-tls-ld-gd.c
new file mode 100644
index 000..d1a4820834c
--- /dev/null
+++ 
b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-medium-call36-auto-tls-ld-gd.c
@@ -0,0 +1,5 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fPIC -mexplicit-relocs=auto -mcmodel=medium -fplt" } */
+/* { dg-final { scan-assembler 
"pcaddu18i\t\\\$r1,%call36\\\(__tls_get_addr\\\)" { target { tls_native && 
loongarch_call36_support } } } } */
+
+#include "./explicit-relocs-auto-tls-ld-gd.c"
-- 
2.39.3



[PATCH v4 3/4] LoongArch: Enable explicit reloc for extreme TLS GD/LD with -mexplicit-relocs=auto.

2024-01-25 Thread Lulu Cheng
Binutils does not support relaxation using four instructions to obtain
symbol addresses

gcc/ChangeLog:

* config/loongarch/loongarch.cc (loongarch_explicit_relocs_p):
When the code model of the symbol is extreme and -mexplicit-relocs=auto,
the macro instruction loading symbol address is not applicable.
(loongarch_call_tls_get_addr): Adjust code.
(loongarch_legitimize_tls_address): Likewise.

gcc/testsuite/ChangeLog:

* gcc.target/loongarch/explicit-relocs-extreme-auto-tls-ld-gd.c: New 
test.
* gcc.target/loongarch/explicit-relocs-medium-auto-tls-ld-gd.c: New 
test.
---
 gcc/config/loongarch/loongarch.cc | 19 +--
 .../explicit-relocs-extreme-auto-tls-ld-gd.c  |  5 +
 .../explicit-relocs-medium-auto-tls-ld-gd.c   |  5 +
 3 files changed, 19 insertions(+), 10 deletions(-)
 create mode 100644 
gcc/testsuite/gcc.target/loongarch/explicit-relocs-extreme-auto-tls-ld-gd.c
 create mode 100644 
gcc/testsuite/gcc.target/loongarch/explicit-relocs-medium-auto-tls-ld-gd.c

diff --git a/gcc/config/loongarch/loongarch.cc 
b/gcc/config/loongarch/loongarch.cc
index 4c64742f78b..b76e201c0ef 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -1971,6 +1971,10 @@ loongarch_explicit_relocs_p (enum loongarch_symbol_type 
type)
   if (la_opt_explicit_relocs != EXPLICIT_RELOCS_AUTO)
 return la_opt_explicit_relocs == EXPLICIT_RELOCS_ALWAYS;
 
+  /* The linker don't know how to relax accesses in extreme code model.  */
+  if (loongarch_symbol_extreme_p (type))
+return true;
+
   switch (type)
 {
   case SYMBOL_TLS_IE:
@@ -1982,11 +1986,6 @@ loongarch_explicit_relocs_p (enum loongarch_symbol_type 
type)
   does not relax 64-bit pc-relative accesses as at now.  */
return true;
   case SYMBOL_GOT_DISP:
-   /* The linker don't know how to relax GOT accesses in extreme
-  code model.  */
-   if (TARGET_CMODEL_EXTREME)
- return true;
-
/* If we are performing LTO for a final link, and we have the
   linker plugin so we know the resolution of the symbols, then
   all GOT references are binding to external symbols or
@@ -2776,7 +2775,7 @@ loongarch_call_tls_get_addr (rtx sym, enum 
loongarch_symbol_type type, rtx v0)
 
   start_sequence ();
 
-  if (la_opt_explicit_relocs == EXPLICIT_RELOCS_ALWAYS)
+  if (loongarch_explicit_relocs_p (type))
 {
   /* Split tls symbol to high and low.  */
   rtx high = gen_rtx_HIGH (Pmode, copy_rtx (loc));
@@ -2809,7 +2808,7 @@ loongarch_call_tls_get_addr (rtx sym, enum 
loongarch_symbol_type type, rtx v0)
case CMODEL_MEDIUM:
{
  rtx reg = gen_reg_rtx (Pmode);
- if (TARGET_EXPLICIT_RELOCS)
+ if (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE)
{
  emit_insn (gen_pcalau12i (Pmode, reg, loongarch_tls_symbol));
  rtx call = gen_call_value_internal_1 (Pmode, v0, reg,
@@ -2845,7 +2844,7 @@ loongarch_call_tls_get_addr (rtx sym, enum 
loongarch_symbol_type type, rtx v0)
case CMODEL_NORMAL:
case CMODEL_MEDIUM:
{
- if (TARGET_EXPLICIT_RELOCS)
+ if (loongarch_explicit_relocs_p (SYMBOL_GOT_DISP))
{
  rtx high = gen_reg_rtx (Pmode);
  loongarch_emit_move (high,
@@ -2937,7 +2936,7 @@ loongarch_legitimize_tls_address (rtx loc)
  tmp1 = gen_reg_rtx (Pmode);
  tmp2 = loongarch_unspec_address (loc, SYMBOL_TLS_IE);
  dest = gen_reg_rtx (Pmode);
- if (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE)
+ if (loongarch_explicit_relocs_p (SYMBOL_TLS_IE))
{
  tmp3 = gen_reg_rtx (Pmode);
  rtx high = gen_rtx_HIGH (Pmode, copy_rtx (tmp2));
@@ -2994,7 +2993,7 @@ loongarch_legitimize_tls_address (rtx loc)
  tmp2 = loongarch_unspec_address (loc, SYMBOL_TLS_LE);
  dest = gen_reg_rtx (Pmode);
 
- if (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE)
+ if (loongarch_explicit_relocs_p (SYMBOL_TLS_LE))
{
  tmp3 = gen_reg_rtx (Pmode);
  rtx high = gen_rtx_HIGH (Pmode, copy_rtx (tmp2));
diff --git 
a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-extreme-auto-tls-ld-gd.c 
b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-extreme-auto-tls-ld-gd.c
new file mode 100644
index 000..27baf4886d6
--- /dev/null
+++ 
b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-extreme-auto-tls-ld-gd.c
@@ -0,0 +1,5 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fPIC -mexplicit-relocs=auto -mcmodel=extreme -fno-plt" } 
*/
+/* { dg-final { scan-assembler-not "la.tls.[lg]d" { target tls_native } } } */
+
+#include "./explicit-relocs-auto-tls-ld-gd.c"
diff --git 
a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-medium-auto-tls-ld-gd.c 
b/gcc/testsuite/gcc.target/loongarch/explicit-relo

[PATCH v4 0/4] When cmodel=extreme, add macro support and only support macros.

2024-01-25 Thread Lulu Cheng
v3 -> v4:
  1. Add macro support for TLS symbols
  2. Added support for loading __get_tls_addr symbol address using call36.
  3. Merge template got_load_tls_{ld/gd/le/ie}.
  4. Enable explicit reloc for extreme TLS GD/LD with -mexplicit-relocs=auto.


v2 -> v3:
  1. Modify the detection rules of a test case.

v1 -> v2:
  1. Use the temporarily allocated registers as intermediate registers to 
implement the extreme macro.
  2. Fixed bugs in v1 test cases.


Lulu Cheng (4):
  LoongArch: Merge template got_load_tls_{ld/gd/le/ie}.
  LoongArch: Add the macro implementation of mcmodel=extreme.
  LoongArch: Enable explicit reloc for extreme TLS GD/LD with
-mexplicit-relocs=auto.
  LoongArch: Added support for loading __get_tls_addr symbol address
using call36.

 gcc/config/loongarch/loongarch-protos.h   |   1 +
 gcc/config/loongarch/loongarch.cc | 182 +-
 gcc/config/loongarch/loongarch.md | 101 ++
 gcc/config/loongarch/predicates.md|  12 ++
 .../gcc.target/loongarch/attr-model-5.c   |   8 +
 .../explicit-relocs-extreme-auto-tls-ld-gd.c  |   5 +
 .../explicit-relocs-medium-auto-tls-ld-gd.c   |   5 +
 ...icit-relocs-medium-call36-auto-tls-ld-gd.c |   5 +
 .../loongarch/func-call-extreme-5.c   |   7 +
 .../loongarch/func-call-extreme-6.c   |   7 +
 .../gcc.target/loongarch/tls-extreme-macro.c  |  35 
 11 files changed, 239 insertions(+), 129 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/attr-model-5.c
 create mode 100644 
gcc/testsuite/gcc.target/loongarch/explicit-relocs-extreme-auto-tls-ld-gd.c
 create mode 100644 
gcc/testsuite/gcc.target/loongarch/explicit-relocs-medium-auto-tls-ld-gd.c
 create mode 100644 
gcc/testsuite/gcc.target/loongarch/explicit-relocs-medium-call36-auto-tls-ld-gd.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/func-call-extreme-5.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/func-call-extreme-6.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/tls-extreme-macro.c

-- 
2.39.3



Re: [PATCH v3] LoongArch: testsuite:Added additional vectorization "-mlsx" option.

2024-01-25 Thread Richard Biener
On Fri, Jan 26, 2024 at 7:23 AM chenxiaolong  wrote:
>
> gcc/testsuite/ChangeLog:

OK

> * gcc.dg/signbit-2.c: Added additional "-mlsx" compilation options.
> * gfortran.dg/graphite/vect-pr40979.f90: Dito.
> * gfortran.dg/vect/fast-math-mgrid-resid.f: Dito.
> ---
>  gcc/testsuite/gcc.dg/signbit-2.c   | 1 +
>  gcc/testsuite/gfortran.dg/graphite/vect-pr40979.f90| 1 +
>  gcc/testsuite/gfortran.dg/vect/fast-math-mgrid-resid.f | 1 +
>  3 files changed, 3 insertions(+)
>
> diff --git a/gcc/testsuite/gcc.dg/signbit-2.c 
> b/gcc/testsuite/gcc.dg/signbit-2.c
> index 62bb4047d74..5511bb78149 100644
> --- a/gcc/testsuite/gcc.dg/signbit-2.c
> +++ b/gcc/testsuite/gcc.dg/signbit-2.c
> @@ -5,6 +5,7 @@
>  /* { dg-additional-options "-msse2 -mno-avx512f" { target { i?86-*-* 
> x86_64-*-* } } } */
>  /* { dg-additional-options "-march=armv8-a" { target aarch64_sve } } */
>  /* { dg-additional-options "-maltivec" { target powerpc_altivec_ok } } */
> +/* { dg-additional-options "-mlsx" { target loongarch_sx } } */
>  /* { dg-skip-if "no fallback for MVE" { arm_mve } } */
>
>  #include 
> diff --git a/gcc/testsuite/gfortran.dg/graphite/vect-pr40979.f90 
> b/gcc/testsuite/gfortran.dg/graphite/vect-pr40979.f90
> index a42290948c4..6f2ad1166a4 100644
> --- a/gcc/testsuite/gfortran.dg/graphite/vect-pr40979.f90
> +++ b/gcc/testsuite/gfortran.dg/graphite/vect-pr40979.f90
> @@ -1,6 +1,7 @@
>  ! { dg-do compile }
>  ! { dg-require-effective-target vect_double }
>  ! { dg-additional-options "-msse2" { target { { i?86-*-* x86_64-*-* } && 
> ilp32 } } }
> +! { dg-additional-options "-mlsx" { target { loongarch*-*-* } } }
>
>  module mqc_m
>  integer, parameter, private :: longreal = selected_real_kind(15,90)
> diff --git a/gcc/testsuite/gfortran.dg/vect/fast-math-mgrid-resid.f 
> b/gcc/testsuite/gfortran.dg/vect/fast-math-mgrid-resid.f
> index 08965cc5e20..97b88821731 100644
> --- a/gcc/testsuite/gfortran.dg/vect/fast-math-mgrid-resid.f
> +++ b/gcc/testsuite/gfortran.dg/vect/fast-math-mgrid-resid.f
> @@ -2,6 +2,7 @@
>  ! { dg-require-effective-target vect_double }
>  ! { dg-options "-O3 --param vect-max-peeling-for-alignment=0 
> -fpredictive-commoning -fdump-tree-pcom-details -std=legacy" }
>  ! { dg-additional-options "-mprefer-avx128" { target { i?86-*-* x86_64-*-* } 
> } }
> +! { dg-additional-options "-mlsx" { target { loongarch*-*-* } } }
>  ! { dg-additional-options "-mzarch" { target { s390*-*-* } } }
>
>  *** RESID COMPUTES THE RESIDUAL:  R = V - AU
> --
> 2.20.1
>


Re: [PATCH] testsuite/vect: Fix pr25413a.c expectations [PR109705]

2024-01-25 Thread Richard Biener
On Fri, Jan 26, 2024 at 6:01 AM Andrew Pinski  wrote:
>
> The 2 loops in octfapg_universe can and will be vectorized now
> after r14-333-g6d4b59a9356ac4 on targets that support multiplication
> in the long type. But the testcase does not check vect_long_mult for
> that, so this patch corrects that error and now the testcase passes correctly
> on aarch64-linux-gnu (with and without SVE).
>
> Built and tested on aarch64-linux-gnu (with and without SVE).

OK

> gcc/testsuite/ChangeLog:
>
> PR testsuite/109705
> * gcc.dg/vect/pr25413a.c: Expect 1 vectorized loops for 
> !vect_long_mult
> and 2 for vect_long_mult.
>
> Signed-off-by: Andrew Pinski 
> ---
>  gcc/testsuite/gcc.dg/vect/pr25413a.c | 4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
>
> diff --git a/gcc/testsuite/gcc.dg/vect/pr25413a.c 
> b/gcc/testsuite/gcc.dg/vect/pr25413a.c
> index ffb517c9ce0..905665e5dbe 100644
> --- a/gcc/testsuite/gcc.dg/vect/pr25413a.c
> +++ b/gcc/testsuite/gcc.dg/vect/pr25413a.c
> @@ -123,6 +123,8 @@ int main (void)
>return 0;
>  }
>
> -/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } } */
> +/* The second loop in octfapg_universe requires long multiply to do the 
> vectorization. */
> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target 
> { ! vect_long_mult } } } } */
> +/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" { target 
> vect_long_mult  } } } */
>  /* { dg-final { scan-tree-dump-times "vector alignment may not be reachable" 
> 1 "vect" { target { ! vector_alignment_reachable  } } } } */
>  /* { dg-final { scan-tree-dump-times "Alignment of access forced using 
> versioning" 1 "vect" { target { ! vector_alignment_reachable } } } } */
> --
> 2.39.3
>


Re: [patch] gcn/gcn-hsa.h: Always pass --amdhsa-code-object-version= in ASM_SPEC

2024-01-25 Thread Richard Biener
On Fri, Jan 26, 2024 at 12:04 AM Tobias Burnus  wrote:
>
> When targeting AMD GPUs, the LLVM assembler (and linker) are used.
>
> Two days ago LLVM changed the default for the AMDHSA code object
> version (COV) from 4 to 5.
>
> In principle, we do not care which COV is used as long as it works;
> unfortunately, "mkoffload.cc" also generates an object file directly,
> bypassing the AMD GPU compiler as it copies debugging data to that
> file. That object file must have the same COV version (ELF ABI version)
> as compiler + llvm-mc assembler generated files.
>
> In order to ensure those are the same, this patch forces the use of
> COV 4 instead of using the default. Once GCC requires LLVM >= 14
> instead of LLVM >= 13.0.1 we could change it. (Assuming that COV 5
> is sufficiently stable in LLVM 14.) - But for now COV 4 will do.
>
> If you wonder how this LLVM issue shows up, simply compile any OpenMP
> or OpenACC program with AMD GPU offloading and enable debugging ("-g"),
> e.g.
>   gcc -fopenmp -g test.f90 -foffload=amdgcn-amdhsa 
> -foffload-options=-march=gfx908
>
> With LLVM main (to become LLVM 18), you will then get the error:
>
>   ld: error: incompatible ABI version: /tmp/ccAKx5cz.mkoffload.dbg.o
>
> OK for mainline?

If you link against prebuilt objects with COV 5 it seems there's no way to
override the COV version GCC uses?  That is, do we want to add
a -mcode-object-version=... option to allow the user to override this
(and ABI_VERSION_SPEC honoring that, if specified and of course
mkoffload following suit)?

Otherwise looks OK in the meantime.

Richard.

> Tobias


Re: [Committed] RISC-V: Add regression test for vsetvl bug pr113429

2024-01-25 Thread juzhe.zh...@rivai.ai
This patch causes the following regression:

FAIL: gcc.target/riscv/rvv/vsetvl/pr113429.c   -O0  (test for excess errors)
FAIL: gcc.target/riscv/rvv/vsetvl/pr113429.c   -O1  (test for excess errors)
FAIL: gcc.target/riscv/rvv/vsetvl/pr113429.c   -O2  (test for excess errors)
FAIL: gcc.target/riscv/rvv/vsetvl/pr113429.c   -O2 -flto -fno-use-linker-plugin 
-flto-partition=none  (test for excess errors)
FAIL: gcc.target/riscv/rvv/vsetvl/pr113429.c   -O2 -flto -fuse-linker-plugin 
-fno-fat-lto-objects  (test for excess errors)
FAIL: gcc.target/riscv/rvv/vsetvl/pr113429.c   -O3 -fomit-frame-pointer 
-funroll-loops -fpeel-loops -ftracer -finline-functions  (test for excess 
errors)
FAIL: gcc.target/riscv/rvv/vsetvl/pr113429.c   -O3 -g  (test for excess errors)
FAIL: gcc.target/riscv/rvv/vsetvl/pr113429.c   -Os  (test for excess errors)

I suggest you add :

/* { dg-require-effective-target rv64 } */
/* { dg-require-effective-target riscv_v } */



juzhe.zh...@rivai.ai
 
From: Patrick O'Neill
Date: 2024-01-24 09:20
To: juzhe.zh...@rivai.ai; gcc-patches
CC: kito.cheng; law; rdapp; vineetg
Subject: [Committed] RISC-V: Add regression test for vsetvl bug pr113429
The reduced testcase for pr113429 (cam4 failure) needed additional
modules so it wasn't committed.
The fuzzer found a c testcase that was also fixed with pr113429's fix.
Adding it as a regression test.
PR target/113429
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/vsetvl/pr113429.c: New test.
Signed-off-by: Patrick O'Neill 
---
 .../gcc.target/riscv/rvv/vsetvl/pr113429.c| 70 +++
 1 file changed, 70 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr113429.c
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr113429.c 
b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr113429.c
new file mode 100644
index 000..05c3eeecb94
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr113429.c
@@ -0,0 +1,70 @@
+/* { dg-do run } */
+/* { dg-options "-march=rv64gcv_zvl256b -mabi=lp64d -O3" } */
+
+long a;
+int b, c, d, e, f, g;
+short h, i, j;
+static int k = 3;
+static int l = 6;
+int m[5][7];
+signed char n;
+int *const o = &c;
+
+signed char(p)(signed char p1, signed char q) {
+  return p1 / q;
+}
+
+void s(unsigned p1) {
+  b = (b ^ p1) & 255;
+}
+
+static long t() {
+  long u;
+  signed char v;
+  d = 1;
+  for (; d <= 4; d++) {
+j = 0;
+for (; j <= 4; j++) {
+  v = 0;
+  for (; v <= 4; v++) {
+if (m[v][v])
+  continue;
+c = 0;
+for (; c <= 4; c++) {
+  n = 0;
+  for (; n <= 4; n++) {
+int *w = &e;
+long r = v;
+u = r == 0 ? a : a % r;
+h |= u;
+*w = g;
+--m[n][c];
+f &= *o;
+  }
+}
+if (p((i < 3) ^ 9, k))
+  ;
+else if (v)
+  return 0;
+  }
+}
+  }
+  return 1;
+}
+
+static char x() {
+  for (;;) {
+t();
+if (l)
+  return 0;
+  }
+}
+
+int main() {
+  x();
+  s(e & 255);
+  if (b == 0)
+return 0;
+  else
+return 1;
+}
-- 
2.34.1



[Committed V2] RISC-V: Fix incorrect LCM delete bug [VSETVL PASS]

2024-01-25 Thread Juzhe-Zhong
This patch fixes the recent noticed bug in RV32 glibc.

We incorrectly deleted a vsetvl:

...
and a4,a4,a3
vmv.v.i v1,0 ---> Missed vsetvl cause illegal 
instruction report.
vse8.v  v1,0(a5)

The root cause the laterin in LCM is incorrect.

  BB 358:
avloc: n_bits = 2, set = {}
kill: n_bits = 2, set = {}
antloc: n_bits = 2, set = {}
transp: n_bits = 2, set = {}
avin: n_bits = 2, set = {}
avout: n_bits = 2, set = {}
del: n_bits = 2, set = {}

cause LCM let BB 360 delete the vsetvl:

  BB 360:
avloc: n_bits = 2, set = {}
kill: n_bits = 2, set = {}
antloc: n_bits = 2, set = {}
transp: n_bits = 2, set = {0 1 }
avin: n_bits = 2, set = {}
avout: n_bits = 2, set = {}
del: n_bits = 2, set = {1}

Also, remove unknown vsetvl info into local computation since it is unnecessary.

Tested on both RV32/RV64 no regression.

PR target/113469

gcc/ChangeLog:

* config/riscv/riscv-vsetvl.cc 
(pre_vsetvl::compute_lcm_local_properties): Fix bug.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/pr113469.c: New test.

---
 gcc/config/riscv/riscv-vsetvl.cc  | 19 +++
 .../gcc.target/riscv/rvv/autovec/pr113469.c   | 54 +++
 2 files changed, 64 insertions(+), 9 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113469.c

diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc
index da258b964fc..1a398f02596 100644
--- a/gcc/config/riscv/riscv-vsetvl.cc
+++ b/gcc/config/riscv/riscv-vsetvl.cc
@@ -2543,8 +2543,10 @@ pre_vsetvl::compute_lcm_local_properties ()
   vsetvl_info &header_info = block_info.get_entry_info ();
   vsetvl_info &footer_info = block_info.get_exit_info ();
   gcc_assert (footer_info.valid_p () || footer_info.unknown_p ());
-  add_expr (m_exprs, header_info);
-  add_expr (m_exprs, footer_info);
+  if (header_info.valid_p ())
+   add_expr (m_exprs, header_info);
+  if (footer_info.valid_p ())
+   add_expr (m_exprs, footer_info);
 }
 
   int num_exprs = m_exprs.length ();
@@ -2699,13 +2701,6 @@ pre_vsetvl::compute_lcm_local_properties ()
  }
 }
 
-  for (const bb_info *bb : crtl->ssa->bbs ())
-{
-  unsigned bb_index = bb->index ();
-  bitmap_ior (m_kill[bb_index], m_transp[bb_index], m_avloc[bb_index]);
-  bitmap_not (m_kill[bb_index], m_kill[bb_index]);
-}
-
   for (const bb_info *bb : crtl->ssa->bbs ())
 {
   unsigned bb_index = bb->index ();
@@ -2714,6 +2709,12 @@ pre_vsetvl::compute_lcm_local_properties ()
  bitmap_clear (m_antloc[bb_index]);
  bitmap_clear (m_transp[bb_index]);
}
+  /* Compute ae_kill for each basic block using:
+
+~(TRANSP | COMP)
+  */
+  bitmap_ior (m_kill[bb_index], m_transp[bb_index], m_avloc[bb_index]);
+  bitmap_not (m_kill[bb_index], m_kill[bb_index]);
 }
 }
 
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113469.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113469.c
new file mode 100644
index 000..d1c118c02d6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113469.c
@@ -0,0 +1,54 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3 -fno-vect-cost-model" } */
+
+struct a {
+ int b;
+ int c : 1;
+ int : 1;
+} d();
+typedef struct
+{
+ int e;
+ struct {
+   int f;
+ };
+} g;
+int i;
+char k, l, n;
+void *m;
+char *o;
+void h();
+char *j();
+void p(int buf, __builtin_va_list ab, int q) {
+ do {
+   void *r[] = {&&s, &&t, &&u, &&v, &&w};
+   int c;
+   goto *m;
+ s:
+   c = 1;
+   while (1) {
+   t:
+   u:
+   ae:
+ void *af = __builtin_va_arg(ab, void *);
+ h(p);
+ o = j(i);
+ if (o == 0)
+   goto ae;
+ l = 'S';
+ break;
+   v:
+ g ah;
+ __builtin_memset(&ah, '\0', sizeof(g));
+ h(n, __builtin_va_arg(ab, int), &ah);
+ break;
+   w:
+ if (__builtin_expect(q, 0))
+   c = 0;
+ struct a ai = {'S', c};
+ d(buf, ai, af);
+   }
+ } while (k);
+}
+
+/* { dg-final { scan-assembler-times 
{vsetivli\tzero,\s*4,\s*e8,\s*mf4,\s*t[au],\s*m[au]} 2 } } */
-- 
2.36.3



[PATCH v3] LoongArch: testsuite:Added additional vectorization "-mlsx" option.

2024-01-25 Thread chenxiaolong
gcc/testsuite/ChangeLog:

* gcc.dg/signbit-2.c: Added additional "-mlsx" compilation options.
* gfortran.dg/graphite/vect-pr40979.f90: Dito.
* gfortran.dg/vect/fast-math-mgrid-resid.f: Dito.
---
 gcc/testsuite/gcc.dg/signbit-2.c   | 1 +
 gcc/testsuite/gfortran.dg/graphite/vect-pr40979.f90| 1 +
 gcc/testsuite/gfortran.dg/vect/fast-math-mgrid-resid.f | 1 +
 3 files changed, 3 insertions(+)

diff --git a/gcc/testsuite/gcc.dg/signbit-2.c b/gcc/testsuite/gcc.dg/signbit-2.c
index 62bb4047d74..5511bb78149 100644
--- a/gcc/testsuite/gcc.dg/signbit-2.c
+++ b/gcc/testsuite/gcc.dg/signbit-2.c
@@ -5,6 +5,7 @@
 /* { dg-additional-options "-msse2 -mno-avx512f" { target { i?86-*-* 
x86_64-*-* } } } */
 /* { dg-additional-options "-march=armv8-a" { target aarch64_sve } } */
 /* { dg-additional-options "-maltivec" { target powerpc_altivec_ok } } */
+/* { dg-additional-options "-mlsx" { target loongarch_sx } } */
 /* { dg-skip-if "no fallback for MVE" { arm_mve } } */
 
 #include 
diff --git a/gcc/testsuite/gfortran.dg/graphite/vect-pr40979.f90 
b/gcc/testsuite/gfortran.dg/graphite/vect-pr40979.f90
index a42290948c4..6f2ad1166a4 100644
--- a/gcc/testsuite/gfortran.dg/graphite/vect-pr40979.f90
+++ b/gcc/testsuite/gfortran.dg/graphite/vect-pr40979.f90
@@ -1,6 +1,7 @@
 ! { dg-do compile }
 ! { dg-require-effective-target vect_double }
 ! { dg-additional-options "-msse2" { target { { i?86-*-* x86_64-*-* } && ilp32 
} } }
+! { dg-additional-options "-mlsx" { target { loongarch*-*-* } } }
 
 module mqc_m
 integer, parameter, private :: longreal = selected_real_kind(15,90)
diff --git a/gcc/testsuite/gfortran.dg/vect/fast-math-mgrid-resid.f 
b/gcc/testsuite/gfortran.dg/vect/fast-math-mgrid-resid.f
index 08965cc5e20..97b88821731 100644
--- a/gcc/testsuite/gfortran.dg/vect/fast-math-mgrid-resid.f
+++ b/gcc/testsuite/gfortran.dg/vect/fast-math-mgrid-resid.f
@@ -2,6 +2,7 @@
 ! { dg-require-effective-target vect_double }
 ! { dg-options "-O3 --param vect-max-peeling-for-alignment=0 
-fpredictive-commoning -fdump-tree-pcom-details -std=legacy" }
 ! { dg-additional-options "-mprefer-avx128" { target { i?86-*-* x86_64-*-* } } 
}
+! { dg-additional-options "-mlsx" { target { loongarch*-*-* } } }
 ! { dg-additional-options "-mzarch" { target { s390*-*-* } } }
 
 *** RESID COMPUTES THE RESIDUAL:  R = V - AU
-- 
2.20.1



[PATCH] aarch64: Fix/avoid undefinedness in aarch64_classify_index [PR100212]

2024-01-25 Thread Andrew Pinski
The problem here is we don't check the return value of exact_log2
and always use that result as shifter. This fixes the issue by avoiding
the shift if the value was `-1` (which means the value was not exact a power of 
2);
in this case we could either check if the values was equal to -1 or not equal 
to because
we then assign -1 to shift if the constant value was not equal. I chose `!=` as
it seemed to be more obvious of what the code is doing.

Committed as obvious after a build/test for aarch64-linux-gnu.

gcc/ChangeLog:

PR target/100212
* config/aarch64/aarch64.cc (aarch64_classify_index): Avoid
undefined shift after the call to exact_log2.

Signed-off-by: Andrew Pinski 
---
 gcc/config/aarch64/aarch64.cc | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index d2014ce1527..19c608bc3ed 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -10246,7 +10246,9 @@ aarch64_classify_index (struct aarch64_address_info 
*info, rtx x,
   type = ADDRESS_REG_UXTW;
   index = XEXP (XEXP (x, 0), 0);
   shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
-  if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0x << shift)
+  /* Avoid undefined code dealing with shift being -1. */
+  if (shift != -1
+ && INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0x << shift)
shift = -1;
 }
   /* (and:DI (ashift:DI (reg:DI) (const_int shift))
-- 
2.39.3



[PATCH] testsuite/vect: Fix pr25413a.c expectations [PR109705]

2024-01-25 Thread Andrew Pinski
The 2 loops in octfapg_universe can and will be vectorized now
after r14-333-g6d4b59a9356ac4 on targets that support multiplication
in the long type. But the testcase does not check vect_long_mult for
that, so this patch corrects that error and now the testcase passes correctly
on aarch64-linux-gnu (with and without SVE).

Built and tested on aarch64-linux-gnu (with and without SVE).

gcc/testsuite/ChangeLog:

PR testsuite/109705
* gcc.dg/vect/pr25413a.c: Expect 1 vectorized loops for !vect_long_mult
and 2 for vect_long_mult.

Signed-off-by: Andrew Pinski 
---
 gcc/testsuite/gcc.dg/vect/pr25413a.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.dg/vect/pr25413a.c 
b/gcc/testsuite/gcc.dg/vect/pr25413a.c
index ffb517c9ce0..905665e5dbe 100644
--- a/gcc/testsuite/gcc.dg/vect/pr25413a.c
+++ b/gcc/testsuite/gcc.dg/vect/pr25413a.c
@@ -123,6 +123,8 @@ int main (void)
   return 0;
 } 
 
-/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } } */
+/* The second loop in octfapg_universe requires long multiply to do the 
vectorization. */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { 
! vect_long_mult } } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" { target 
vect_long_mult  } } } */
 /* { dg-final { scan-tree-dump-times "vector alignment may not be reachable" 1 
"vect" { target { ! vector_alignment_reachable  } } } } */
 /* { dg-final { scan-tree-dump-times "Alignment of access forced using 
versioning" 1 "vect" { target { ! vector_alignment_reachable } } } } */
-- 
2.39.3



Re: [PATCH v4] c++/modules: Emit definitions of ODR-used static members imported from modules [PR112899]

2024-01-25 Thread Jason Merrill

On 1/25/24 21:28, Nathaniel Shead wrote:

On Wed, Jan 24, 2024 at 03:24:42PM -0500, Jason Merrill wrote:

On 1/20/24 05:45, Nathaniel Shead wrote:

I also included
your change to only add class variable templates to 'pending_statics'
(and the normal 'static_decl's for non-class otherwise) as otherwise I
could imagine that they would cause issues with this later too.


That seems wrong; the 'static_decls' vec is just for checking that
static/inline variables got defined.

pending_statics has been used for template instantiations for a long time,
for non-module code; let's not mess with that in a modules patch.



OK, makes sense.


I know that there's been discussion about the correct ABI for inline
declarations, but personally I'd like to have this fixed for normal uses
in GCC14 at least, and we can revisit the specific cases where various
kinds of declarations are emitted in stage 1.


Makes sense.


P.S.  As I go to send this, I wonder if maybe something like
'note_static_member_variable' would be a clearer choice of name than
'note_static_storage_variable'?


Let's call it note_vague_linkage_variable, to go with _fn just above.



Sounds good.


-- >8 --

Static data members marked 'inline' should be emitted in TUs where they
are ODR-used.  We need to make sure that statics imported from modules
are correctly added to the 'pending_statics' map so that they get
emitted if needed, otherwise the attached testcase fails to link.


What about non-member variables marked inline, and non-member variable
template instantiations?

Jason



Non-member variables marked inline are already handled by 'static_decls'
via 'add_module_namespace_decl' and 'add_decl_to_level' during
stream-in, and then are later emitted from 'wrapup_namespace_globals'.

I'd assumed that non-member variable template instantiations would also
be handled by here, but that turns out not to be the case, since the
instantiations themselves are not (of course) namespace-scope decls.
I've added a case to the tests for this.

Bootstrapped and regtested on x86_64-pc-linux-gnu, OK for trunk?


OK.


-- >8 --

Static data members marked 'inline' should be emitted in TUs where they
are ODR-used.  We need to make sure that inlines imported from modules
are correctly added to the 'pending_statics' map so that they get
emitted if needed, otherwise the attached testcase fails to link.

PR c++/112899

gcc/cp/ChangeLog:

* cp-tree.h (note_variable_template_instantiation): Rename to...
(note_vague_linkage_variable): ...this.
* decl2.cc (note_variable_template_instantiation): Rename to...
(note_vague_linkage_variable): ...this.
* pt.cc (instantiate_decl): Rename usage of above function.
* module.cc (trees_in::read_var_def): Remember pending statics
that we stream in.

gcc/testsuite/ChangeLog:

* g++.dg/modules/init-4_a.C: New test.
* g++.dg/modules/init-4_b.C: New test.
* g++.dg/modules/init-6_a.H: New test.
* g++.dg/modules/init-6_b.C: New test.

Signed-off-by: Nathaniel Shead 
Reviewed-by: Patrick Palka 
Reviewed-by: Jason Merrill   
-/* As above, but for variable template instantiations.  */

+/* As above, but for variables.  */
  
  void

-note_variable_template_instantiation (tree decl)
+note_vague_linkage_variable (tree decl)
  {
vec_safe_push (pending_statics, decl);
  }
diff --git a/gcc/cp/module.cc b/gcc/cp/module.cc
index f26b2265bce..6176801b7a7 100644
--- a/gcc/cp/module.cc
+++ b/gcc/cp/module.cc
@@ -11789,6 +11789,11 @@ trees_in::read_var_def (tree decl, tree maybe_template)
  DECL_INITIALIZED_P (decl) = true;
  if (maybe_dup && DECL_INITIALIZED_BY_CONSTANT_EXPRESSION_P 
(maybe_dup))
DECL_INITIALIZED_BY_CONSTANT_EXPRESSION_P (decl) = true;
+ if (DECL_IMPLICIT_INSTANTIATION (decl)
+ || (DECL_CLASS_SCOPE_P (decl)
+ && !DECL_VTABLE_OR_VTT_P (decl)
+ && !DECL_TEMPLATE_INFO (decl)))
+   note_vague_linkage_variable (decl);
}
DECL_INITIAL (decl) = init;
if (!dyn_init)
diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc
index 74013533b0f..f5bf159a879 100644
--- a/gcc/cp/pt.cc
+++ b/gcc/cp/pt.cc
@@ -27300,7 +27300,7 @@ instantiate_decl (tree d, bool defer_ok, bool 
expl_inst_class_mem_p)
  {
set_instantiating_module (d);
if (variable_template_p (gen_tmpl))
-   note_variable_template_instantiation (d);
+   note_vague_linkage_variable (d);
instantiate_body (td, args, d, false);
  }
  
diff --git a/gcc/testsuite/g++.dg/modules/init-4_a.C b/gcc/testsuite/g++.dg/modules/init-4_a.C

new file mode 100644
index 000..e0eb97b474e
--- /dev/null
+++ b/gcc/testsuite/g++.dg/modules/init-4_a.C
@@ -0,0 +1,9 @@
+// PR c++/112899
+// { dg-additional-options "-fmodules-ts" }
+// { dg-module-cmi M }
+
+export module M;
+
+export struct A {
+  static constexpr int x = -1;
+};
diff --git a/gcc/testsuite/g++.dg/modules/init-4_b.C 
b

Re: [PATCH v2] c++: avoid -Wdangling-reference for std::span-like classes [PR110358]

2024-01-25 Thread Jason Merrill

On 1/25/24 20:36, Marek Polacek wrote:

Better version:

Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?

-- >8 --
Real-world experience shows that -Wdangling-reference triggers for
user-defined std::span-like classes a lot.  We can easily avoid that
by considering classes like

 template
 struct Span {
   T* data_;
   std::size len_;
 };

to be std::span-like, and not warning for them.  Unlike the previous
patch, this one considers a non-union class template that has a pointer
data member and a trivial destructor as std::span-like.

PR c++/110358
PR c++/109640

gcc/cp/ChangeLog:

* call.cc (reference_like_class_p): Don't warn for std::span-like
classes.

gcc/ChangeLog:

* doc/invoke.texi: Update -Wdangling-reference description.

gcc/testsuite/ChangeLog:

* g++.dg/warn/Wdangling-reference18.C: New test.
* g++.dg/warn/Wdangling-reference19.C: New test.
* g++.dg/warn/Wdangling-reference20.C: New test.
---
  gcc/cp/call.cc| 18 
  gcc/doc/invoke.texi   | 14 +++
  .../g++.dg/warn/Wdangling-reference18.C   | 24 +++
  .../g++.dg/warn/Wdangling-reference19.C   | 25 +++
  .../g++.dg/warn/Wdangling-reference20.C   | 42 +++
  5 files changed, 123 insertions(+)
  create mode 100644 gcc/testsuite/g++.dg/warn/Wdangling-reference18.C
  create mode 100644 gcc/testsuite/g++.dg/warn/Wdangling-reference19.C
  create mode 100644 gcc/testsuite/g++.dg/warn/Wdangling-reference20.C

diff --git a/gcc/cp/call.cc b/gcc/cp/call.cc
index 9de0d77c423..afd3e1ff024 100644
--- a/gcc/cp/call.cc
+++ b/gcc/cp/call.cc
@@ -14082,6 +14082,24 @@ reference_like_class_p (tree ctype)
return true;
  }
  
+  /* Avoid warning if CTYPE looks like std::span: it's a class template,

+ has a T* member, and a trivial destructor.  For example,
+
+  template
+  struct Span {
+   T* data_;
+   std::size len_;
+  };
+
+ is considered std::span-like.  */
+  if (NON_UNION_CLASS_TYPE_P (ctype)
+  && CLASSTYPE_TEMPLATE_INSTANTIATION (ctype)
+  && TYPE_HAS_TRIVIAL_DESTRUCTOR (ctype))
+for (tree field = next_aggregate_field (TYPE_FIELDS (ctype));
+field; field = next_aggregate_field (DECL_CHAIN (field)))
+  if (TYPE_PTR_P (TREE_TYPE (field)))
+   return true;
+
/* Some classes, such as std::tuple, have the reference member in its
   (non-direct) base class.  */
if (dfs_walk_once (TYPE_BINFO (ctype), class_has_reference_member_p_r,
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 6ec56493e59..e0ff18a86f5 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -3916,6 +3916,20 @@ where @code{std::minmax} returns @code{std::pair}, and
  both references dangle after the end of the full expression that contains
  the call to @code{std::minmax}.
  
+The warning does not warn for @code{std::span}-like classes.  We consider

+classes of the form:
+
+@smallexample
+template
+struct Span @{
+  T* data_;
+  std::size len_;
+@};
+@end smallexample
+
+as @code{std::span}-like; that is, the class is a non-union class template
+that has a pointer data member and a trivial destructor.
+
  This warning is enabled by @option{-Wall}.
  
  @opindex Wdelete-non-virtual-dtor

diff --git a/gcc/testsuite/g++.dg/warn/Wdangling-reference18.C 
b/gcc/testsuite/g++.dg/warn/Wdangling-reference18.C
new file mode 100644
index 000..e088c177769
--- /dev/null
+++ b/gcc/testsuite/g++.dg/warn/Wdangling-reference18.C
@@ -0,0 +1,24 @@
+// PR c++/110358
+// { dg-do compile { target c++11 } }
+// { dg-options "-Wdangling-reference" }
+// Don't warn for std::span-like classes.
+
+template 
+struct Span {
+T* data_;
+int len_;
+
+[[nodiscard]] constexpr auto operator[](int n) const noexcept -> T& { 
return data_[n]; }
+[[nodiscard]] constexpr auto front() const noexcept -> T& { return 
data_[0]; }
+[[nodiscard]] constexpr auto back() const noexcept -> T& { return 
data_[len_ - 1]; }
+};
+
+auto get() -> Span;
+
+auto f() -> int {
+int const& a = get().front(); // { dg-bogus "dangling reference" }
+int const& b = get().back();  // { dg-bogus "dangling reference" }
+int const& c = get()[0];  // { dg-bogus "dangling reference" }
+
+return a + b + c;
+}
diff --git a/gcc/testsuite/g++.dg/warn/Wdangling-reference19.C 
b/gcc/testsuite/g++.dg/warn/Wdangling-reference19.C
new file mode 100644
index 000..053467d822f
--- /dev/null
+++ b/gcc/testsuite/g++.dg/warn/Wdangling-reference19.C
@@ -0,0 +1,25 @@
+// PR c++/110358
+// { dg-do compile { target c++11 } }
+// { dg-options "-Wdangling-reference" }
+// Like Wdangling-reference18.C but not actually a span-like class.
+
+template 
+struct Span {
+T* data_;
+int len_;
+~Span ();
+
+[[nodiscard]] constexpr auto operator[](int n) const noexcept -> T& { 
return data_[n]; }
+[[nodiscard]] constexp

Re: [PATCH] c++: implement [[gnu::non_owning]] [PR110358]

2024-01-25 Thread Marek Polacek
On Thu, Jan 25, 2024 at 08:37:36PM -0500, Marek Polacek wrote:
> +/* Handle a "non_owning" attribute; arguments as in
> +   struct attribute_spec.handler.  */
> +
> +tree
> +handle_non_owning_attribute (tree *node, tree name, tree args, int,
> +  bool *no_add_attrs)

I overlooked error: unused parameter 'args'.  Fixed.

Marek



[PATCH v4] c++/modules: Emit definitions of ODR-used static members imported from modules [PR112899]

2024-01-25 Thread Nathaniel Shead
On Wed, Jan 24, 2024 at 03:24:42PM -0500, Jason Merrill wrote:
> On 1/20/24 05:45, Nathaniel Shead wrote:
> > I also included
> > your change to only add class variable templates to 'pending_statics'
> > (and the normal 'static_decl's for non-class otherwise) as otherwise I
> > could imagine that they would cause issues with this later too.
> 
> That seems wrong; the 'static_decls' vec is just for checking that
> static/inline variables got defined.
> 
> pending_statics has been used for template instantiations for a long time,
> for non-module code; let's not mess with that in a modules patch.
> 

OK, makes sense.

> > I know that there's been discussion about the correct ABI for inline
> > declarations, but personally I'd like to have this fixed for normal uses
> > in GCC14 at least, and we can revisit the specific cases where various
> > kinds of declarations are emitted in stage 1.
> 
> Makes sense.
> 
> > P.S.  As I go to send this, I wonder if maybe something like
> > 'note_static_member_variable' would be a clearer choice of name than
> > 'note_static_storage_variable'?
> 
> Let's call it note_vague_linkage_variable, to go with _fn just above.
> 

Sounds good.

> > -- >8 --
> > 
> > Static data members marked 'inline' should be emitted in TUs where they
> > are ODR-used.  We need to make sure that statics imported from modules
> > are correctly added to the 'pending_statics' map so that they get
> > emitted if needed, otherwise the attached testcase fails to link.
> 
> What about non-member variables marked inline, and non-member variable
> template instantiations?
> 
> Jason
> 

Non-member variables marked inline are already handled by 'static_decls'
via 'add_module_namespace_decl' and 'add_decl_to_level' during
stream-in, and then are later emitted from 'wrapup_namespace_globals'.

I'd assumed that non-member variable template instantiations would also
be handled by here, but that turns out not to be the case, since the
instantiations themselves are not (of course) namespace-scope decls.
I've added a case to the tests for this.

Bootstrapped and regtested on x86_64-pc-linux-gnu, OK for trunk?

-- >8 --

Static data members marked 'inline' should be emitted in TUs where they
are ODR-used.  We need to make sure that inlines imported from modules
are correctly added to the 'pending_statics' map so that they get
emitted if needed, otherwise the attached testcase fails to link.

PR c++/112899

gcc/cp/ChangeLog:

* cp-tree.h (note_variable_template_instantiation): Rename to...
(note_vague_linkage_variable): ...this.
* decl2.cc (note_variable_template_instantiation): Rename to...
(note_vague_linkage_variable): ...this.
* pt.cc (instantiate_decl): Rename usage of above function.
* module.cc (trees_in::read_var_def): Remember pending statics
that we stream in.

gcc/testsuite/ChangeLog:

* g++.dg/modules/init-4_a.C: New test.
* g++.dg/modules/init-4_b.C: New test.
* g++.dg/modules/init-6_a.H: New test.
* g++.dg/modules/init-6_b.C: New test.

Signed-off-by: Nathaniel Shead 
Reviewed-by: Patrick Palka 
Reviewed-by: Jason Merrill 
+struct __from_chars_alnum_to_val_table {
+  static inline int value = 42;
+};
+
+inline unsigned char
+__from_chars_alnum_to_val() {
+  return __from_chars_alnum_to_val_table::value;
+}
+
+template 
+static inline int nonclass_value = 42;
+
+inline unsigned char
+get_nonclass_val() {
+  return nonclass_value;
+}
diff --git a/gcc/testsuite/g++.dg/modules/init-6_b.C 
b/gcc/testsuite/g++.dg/modules/init-6_b.C
new file mode 100644
index 000..d704968ec37
--- /dev/null
+++ b/gcc/testsuite/g++.dg/modules/init-6_b.C
@@ -0,0 +1,9 @@
+// { dg-module-do link }
+// { dg-additional-options "-fmodules-ts" }
+
+import "init-6_a.H";
+
+int main() {
+  __from_chars_alnum_to_val();
+  get_nonclass_val();
+}
-- 
2.43.0



[PATCH] c++: #pragma doesn't disable -Wunused-label [PR113582]

2024-01-25 Thread Marek Polacek
Low prio and not a regression.  Feel free to ignore till GCC 15.

Bootstrapped/regtested on x86_64-pc-linux-gnu.

-- >8 --
The PR complains that

  void do_something(){
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-label"
start:;
#pragma GCC diagnostic pop
  } #1

doesn't work.  That's because we warn_for_unused_label only while we're
in finish_function, meaning we're at #1 where we're outside the #pragma
region.  We can use suppress_warning + warning_suppressed_p to fix this.

Note that I'm not using TREE_USED.  Propagating it in tsubst_stmt/LABEL_EXPR
from decl to label would mean that we don't warn in do_something2, but
I think we want the warning there: we're in a template and the goto is
a discarded statement.

PR c++/113582

gcc/c-family/ChangeLog:

* c-warn.cc (warn_for_unused_label): Don't warn if -Wunused-label has
been suppressed for the label.

gcc/cp/ChangeLog:

* parser.cc (cp_parser_label_for_labeled_statement): suppress_warning
if it's not enabled at input_location.
* pt.cc (tsubst_stmt): Call copy_warning.

gcc/testsuite/ChangeLog:

* g++.dg/warn/Wunused-label-4.C: New test.
---
 gcc/c-family/c-warn.cc  |  4 ++-
 gcc/cp/parser.cc|  6 -
 gcc/cp/pt.cc|  9 ---
 gcc/testsuite/g++.dg/warn/Wunused-label-4.C | 29 +
 4 files changed, 42 insertions(+), 6 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/warn/Wunused-label-4.C

diff --git a/gcc/c-family/c-warn.cc b/gcc/c-family/c-warn.cc
index 8168696fa45..fdb5338f6f6 100644
--- a/gcc/c-family/c-warn.cc
+++ b/gcc/c-family/c-warn.cc
@@ -2186,7 +2186,9 @@ warn_for_unused_label (tree label)
 {
   if (!TREE_USED (label))
 {
-  if (DECL_INITIAL (label))
+  if (warning_suppressed_p (label, OPT_Wunused_label))
+   /* Don't warn.  */;
+  else if (DECL_INITIAL (label))
warning (OPT_Wunused_label, "label %q+D defined but not used", label);
   else
warning (OPT_Wunused_label, "label %q+D declared but not defined", 
label);
diff --git a/gcc/cp/parser.cc b/gcc/cp/parser.cc
index 3748ccd49ff..224d47f2f90 100644
--- a/gcc/cp/parser.cc
+++ b/gcc/cp/parser.cc
@@ -13093,7 +13093,11 @@ cp_parser_label_for_labeled_statement (cp_parser* 
parser, tree attributes)
   /* Anything else must be an ordinary label.  */
   label = finish_label_stmt (cp_parser_identifier (parser));
   if (label && TREE_CODE (label) == LABEL_DECL)
-   FALLTHROUGH_LABEL_P (label) = fallthrough_p;
+   {
+ FALLTHROUGH_LABEL_P (label) = fallthrough_p;
+ if (!warning_enabled_at (input_location, OPT_Wunused_label))
+   suppress_warning (label, OPT_Wunused_label);
+   }
   break;
 }
 
diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc
index 74013533b0f..af9fd8f6f03 100644
--- a/gcc/cp/pt.cc
+++ b/gcc/cp/pt.cc
@@ -18796,11 +18796,12 @@ tsubst_stmt (tree t, tree args, tsubst_flags_t 
complain, tree in_decl)
 case LABEL_EXPR:
   {
tree decl = LABEL_EXPR_LABEL (t);
-   tree label;
-
-   label = finish_label_stmt (DECL_NAME (decl));
+   tree label = finish_label_stmt (DECL_NAME (decl));
if (TREE_CODE (label) == LABEL_DECL)
- FALLTHROUGH_LABEL_P (label) = FALLTHROUGH_LABEL_P (decl);
+ {
+   FALLTHROUGH_LABEL_P (label) = FALLTHROUGH_LABEL_P (decl);
+   copy_warning (label, decl);
+ }
if (DECL_ATTRIBUTES (decl) != NULL_TREE)
  cplus_decl_attributes (&label, DECL_ATTRIBUTES (decl), 0);
   }
diff --git a/gcc/testsuite/g++.dg/warn/Wunused-label-4.C 
b/gcc/testsuite/g++.dg/warn/Wunused-label-4.C
new file mode 100644
index 000..d194f043d21
--- /dev/null
+++ b/gcc/testsuite/g++.dg/warn/Wunused-label-4.C
@@ -0,0 +1,29 @@
+// PR c++/113582
+// { dg-do compile { target c++17 } }
+// { dg-options "-Wunused-label" }
+
+template void
+do_something ()
+{
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-label"
+start:
+  if constexpr(B)
+goto start;
+#pragma GCC diagnostic pop
+}
+
+template void
+do_something2 ()
+{
+start: // { dg-warning "defined but not used" }
+  if constexpr(B)
+goto start;
+}
+
+void
+g ()
+{
+  do_something<0>();
+  do_something2<0>();
+}

base-commit: f22a7ae8a96f7e5e330b12bd5045424619aa4926
-- 
2.43.0



[PATCH] c++: implement [[gnu::non_owning]] [PR110358]

2024-01-25 Thread Marek Polacek
Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?

-- >8 --
Since -Wdangling-reference has false positives that can't be
prevented, we should offer an easy way to suppress the warning.
Currently, that is only possible by using a #pragma, either around the
enclosing class or around the call site.  But #pragma GCC diagnostic tend
to be onerous.  A better solution would be to have an attribute.  Such
an attribute should not be tied to this particular warning though.  [*]

The warning bogusly triggers for classes that are like std::span,
std::reference_wrapper, and std::ranges::ref_view.  The common property
seems to be that these classes are only wrappers around some data.  So
I chose the name non_owning, but I'm not attached to it.  I hope that
in the future the attribute can be used for something other than this
diagnostic.

[*] As I'm typing this, it's occurring to me that we might consider
having a general attribute allowing users to do [[gnu::ignore("-Wfoo")]].

PR c++/110358
PR c++/109642

gcc/cp/ChangeLog:

* call.cc (do_warn_dangling_reference): Don't warn when the function
or its enclosing class has attribute gnu::non_owning.
* tree.cc (cxx_gnu_attributes): Add gnu::non_owning.
(handle_non_owning_attribute): New.

gcc/ChangeLog:

* doc/extend.texi: Document gnu::non_owning.
* doc/invoke.texi: Mention that gnu::non_owning disables
-Wdangling-reference.

gcc/testsuite/ChangeLog:

* g++.dg/ext/attr-non-owning1.C: New test.
* g++.dg/ext/attr-non-owning2.C: New test.
* g++.dg/ext/attr-non-owning3.C: New test.
* g++.dg/ext/attr-non-owning4.C: New test.
* g++.dg/ext/attr-non-owning5.C: New test.
---
 gcc/cp/call.cc  |  9 -
 gcc/cp/tree.cc  | 20 +++
 gcc/doc/extend.texi | 15 
 gcc/doc/invoke.texi | 21 
 gcc/testsuite/g++.dg/ext/attr-non-owning1.C | 38 +
 gcc/testsuite/g++.dg/ext/attr-non-owning2.C | 28 +++
 gcc/testsuite/g++.dg/ext/attr-non-owning3.C | 24 +
 gcc/testsuite/g++.dg/ext/attr-non-owning4.C | 14 
 gcc/testsuite/g++.dg/ext/attr-non-owning5.C | 29 
 9 files changed, 197 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/g++.dg/ext/attr-non-owning1.C
 create mode 100644 gcc/testsuite/g++.dg/ext/attr-non-owning2.C
 create mode 100644 gcc/testsuite/g++.dg/ext/attr-non-owning3.C
 create mode 100644 gcc/testsuite/g++.dg/ext/attr-non-owning4.C
 create mode 100644 gcc/testsuite/g++.dg/ext/attr-non-owning5.C

diff --git a/gcc/cp/call.cc b/gcc/cp/call.cc
index 9de0d77c423..88ddba825a9 100644
--- a/gcc/cp/call.cc
+++ b/gcc/cp/call.cc
@@ -14157,9 +14157,16 @@ do_warn_dangling_reference (tree expr, bool arg_p)
   but probably not to one of its arguments.  */
|| (DECL_OBJECT_MEMBER_FUNCTION_P (fndecl)
&& DECL_OVERLOADED_OPERATOR_P (fndecl)
-   && DECL_OVERLOADED_OPERATOR_IS (fndecl, INDIRECT_REF)))
+   && DECL_OVERLOADED_OPERATOR_IS (fndecl, INDIRECT_REF))
+   || lookup_attribute ("non_owning",
+TYPE_ATTRIBUTES (TREE_TYPE (fndecl
  return NULL_TREE;
 
+   if (tree ctx = CP_DECL_CONTEXT (fndecl))
+ if (TYPE_P (ctx)
+ && lookup_attribute ("non_owning", TYPE_ATTRIBUTES (ctx)))
+   return NULL_TREE;
+
tree rettype = TREE_TYPE (TREE_TYPE (fndecl));
/* If the function doesn't return a reference, don't warn.  This
   can be e.g.
diff --git a/gcc/cp/tree.cc b/gcc/cp/tree.cc
index 77f57e0f9ac..2adf59b22d4 100644
--- a/gcc/cp/tree.cc
+++ b/gcc/cp/tree.cc
@@ -47,6 +47,7 @@ static tree verify_stmt_tree_r (tree *, int *, void *);
 static tree handle_init_priority_attribute (tree *, tree, tree, int, bool *);
 static tree handle_abi_tag_attribute (tree *, tree, tree, int, bool *);
 static tree handle_contract_attribute (tree *, tree, tree, int, bool *);
+static tree handle_non_owning_attribute (tree *, tree, tree, int, bool *);
 
 /* If REF is an lvalue, returns the kind of lvalue that REF is.
Otherwise, returns clk_none.  */
@@ -5096,6 +5097,8 @@ static const attribute_spec cxx_gnu_attributes[] =
 handle_init_priority_attribute, NULL },
   { "abi_tag", 1, -1, false, false, false, true,
 handle_abi_tag_attribute, NULL },
+  { "non_owning", 0, 0, false, true, false, false,
+handle_non_owning_attribute, NULL },
 };
 
 const scoped_attribute_specs cxx_gnu_attribute_table =
@@ -5385,6 +5388,23 @@ handle_contract_attribute (tree *ARG_UNUSED (node), tree 
ARG_UNUSED (name),
   return NULL_TREE;
 }
 
+/* Handle a "non_owning" attribute; arguments as in
+   struct attribute_spec.handler.  */
+
+tree
+handle_non_owning_attribute (tree *node, tree name, tree args, int,
+bool *no_add_attrs)
+{

Re: [PATCH v2] c++: avoid -Wdangling-reference for std::span-like classes [PR110358]

2024-01-25 Thread Marek Polacek
Better version:

Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?

-- >8 --
Real-world experience shows that -Wdangling-reference triggers for
user-defined std::span-like classes a lot.  We can easily avoid that
by considering classes like

template
struct Span {
  T* data_;
  std::size len_;
};

to be std::span-like, and not warning for them.  Unlike the previous
patch, this one considers a non-union class template that has a pointer
data member and a trivial destructor as std::span-like.

PR c++/110358
PR c++/109640

gcc/cp/ChangeLog:

* call.cc (reference_like_class_p): Don't warn for std::span-like
classes.

gcc/ChangeLog:

* doc/invoke.texi: Update -Wdangling-reference description.

gcc/testsuite/ChangeLog:

* g++.dg/warn/Wdangling-reference18.C: New test.
* g++.dg/warn/Wdangling-reference19.C: New test.
* g++.dg/warn/Wdangling-reference20.C: New test.
---
 gcc/cp/call.cc| 18 
 gcc/doc/invoke.texi   | 14 +++
 .../g++.dg/warn/Wdangling-reference18.C   | 24 +++
 .../g++.dg/warn/Wdangling-reference19.C   | 25 +++
 .../g++.dg/warn/Wdangling-reference20.C   | 42 +++
 5 files changed, 123 insertions(+)
 create mode 100644 gcc/testsuite/g++.dg/warn/Wdangling-reference18.C
 create mode 100644 gcc/testsuite/g++.dg/warn/Wdangling-reference19.C
 create mode 100644 gcc/testsuite/g++.dg/warn/Wdangling-reference20.C

diff --git a/gcc/cp/call.cc b/gcc/cp/call.cc
index 9de0d77c423..afd3e1ff024 100644
--- a/gcc/cp/call.cc
+++ b/gcc/cp/call.cc
@@ -14082,6 +14082,24 @@ reference_like_class_p (tree ctype)
return true;
 }
 
+  /* Avoid warning if CTYPE looks like std::span: it's a class template,
+ has a T* member, and a trivial destructor.  For example,
+
+  template
+  struct Span {
+   T* data_;
+   std::size len_;
+  };
+
+ is considered std::span-like.  */
+  if (NON_UNION_CLASS_TYPE_P (ctype)
+  && CLASSTYPE_TEMPLATE_INSTANTIATION (ctype)
+  && TYPE_HAS_TRIVIAL_DESTRUCTOR (ctype))
+for (tree field = next_aggregate_field (TYPE_FIELDS (ctype));
+field; field = next_aggregate_field (DECL_CHAIN (field)))
+  if (TYPE_PTR_P (TREE_TYPE (field)))
+   return true;
+
   /* Some classes, such as std::tuple, have the reference member in its
  (non-direct) base class.  */
   if (dfs_walk_once (TYPE_BINFO (ctype), class_has_reference_member_p_r,
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 6ec56493e59..e0ff18a86f5 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -3916,6 +3916,20 @@ where @code{std::minmax} returns @code{std::pair}, and
 both references dangle after the end of the full expression that contains
 the call to @code{std::minmax}.
 
+The warning does not warn for @code{std::span}-like classes.  We consider
+classes of the form:
+
+@smallexample
+template
+struct Span @{
+  T* data_;
+  std::size len_;
+@};
+@end smallexample
+
+as @code{std::span}-like; that is, the class is a non-union class template
+that has a pointer data member and a trivial destructor.
+
 This warning is enabled by @option{-Wall}.
 
 @opindex Wdelete-non-virtual-dtor
diff --git a/gcc/testsuite/g++.dg/warn/Wdangling-reference18.C 
b/gcc/testsuite/g++.dg/warn/Wdangling-reference18.C
new file mode 100644
index 000..e088c177769
--- /dev/null
+++ b/gcc/testsuite/g++.dg/warn/Wdangling-reference18.C
@@ -0,0 +1,24 @@
+// PR c++/110358
+// { dg-do compile { target c++11 } }
+// { dg-options "-Wdangling-reference" }
+// Don't warn for std::span-like classes.
+
+template 
+struct Span {
+T* data_;
+int len_;
+
+[[nodiscard]] constexpr auto operator[](int n) const noexcept -> T& { 
return data_[n]; }
+[[nodiscard]] constexpr auto front() const noexcept -> T& { return 
data_[0]; }
+[[nodiscard]] constexpr auto back() const noexcept -> T& { return 
data_[len_ - 1]; }
+};
+
+auto get() -> Span;
+
+auto f() -> int {
+int const& a = get().front(); // { dg-bogus "dangling reference" }
+int const& b = get().back();  // { dg-bogus "dangling reference" }
+int const& c = get()[0];  // { dg-bogus "dangling reference" }
+
+return a + b + c;
+}
diff --git a/gcc/testsuite/g++.dg/warn/Wdangling-reference19.C 
b/gcc/testsuite/g++.dg/warn/Wdangling-reference19.C
new file mode 100644
index 000..053467d822f
--- /dev/null
+++ b/gcc/testsuite/g++.dg/warn/Wdangling-reference19.C
@@ -0,0 +1,25 @@
+// PR c++/110358
+// { dg-do compile { target c++11 } }
+// { dg-options "-Wdangling-reference" }
+// Like Wdangling-reference18.C but not actually a span-like class.
+
+template 
+struct Span {
+T* data_;
+int len_;
+~Span ();
+
+[[nodiscard]] constexpr auto operator[](int n) const noexcept -> T& { 
return data_[n]; }
+[[nodiscard]] constexpr auto front() const noexcept -> T& { return 
data_[0]; }
+[[nodi

Re: [x86 PATCH] PR target/106060: Improved SSE vector constant materialization.

2024-01-25 Thread Hongtao Liu
On Fri, Jan 26, 2024 at 3:03 AM Roger Sayle  wrote:
>
>
> Hi Hongtao,
> Many thanks for the review.  Here's a revised version of my patch
> that addresses (most of) the issues you've raised.  Firstly the
> handling of zero and all_ones in this function is mostly for
> completeness/documentation, these standard_sse_constant_p
> values are (currently/normally) handled elsewhere.  But I have
> added an "n_var == 0" optimization to ix86_expand_vector_init.
>
> As you've suggested I've added explicit TARGET_SSE2 tests where
> required, and for consistency I've also added support for AVX512's
> V16SImode.
>
> As you've predicted, the eventual goal is to move this after combine
> (or reload) using define_insn_and_split, but that requires a significant
> restructuring that should be done in steps.  This also interacts with
> a similar planned reorganization of TImode constant handling.  If
> all 128-bit (vector) constants are acceptable before combine, then
> STV has the freedom to chose V1TImode (and this broadcast
> functionality) to implement TImode operations on immediate
> constants.
>
> This patch has been tested on x86_64-pc-linux-gnu with make bootstrap
> and make -k check, both with and without --target_board=unix{-m32}
> with no new failures.  Ok for mainline (in stage 1)?
Ok, thanks for handling this.
>
>
> 2024-01-25  Roger Sayle  
> Hongtao Liu  
>
> gcc/ChangeLog
> PR target/106060
> * config/i386/i386-expand.cc (enum ix86_vec_bcast_alg): New.
> (struct ix86_vec_bcast_map_simode_t): New type for table below.
> (ix86_vec_bcast_map_simode): Table of SImode constants that may
> be efficiently synthesized by a ix86_vec_bcast_alg method.
> (ix86_vec_bcast_map_simode_cmp): New comparator for bsearch.
> (ix86_vector_duplicate_simode_const): Efficiently synthesize
> V4SImode and V8SImode constants that duplicate special constants.
> (ix86_vector_duplicate_value): Attempt to synthesize "special"
> vector constants using ix86_vector_duplicate_simode_const.
> * config/i386/i386.cc (ix86_rtx_costs) : ABS of a
> vector integer mode costs with a single SSE instruction.
>
> gcc/testsuite/ChangeLog
> PR target/106060
> * gcc.target/i386/auto-init-8.c: Update test case.
> * gcc.target/i386/avx512fp16-3.c: Likewise.
> * gcc.target/i386/pr100865-9a.c: Likewise.
> * gcc.target/i386/pr101796-1.c: Likewise.
> * gcc.target/i386/pr106060-1.c: New test case.
> * gcc.target/i386/pr106060-2.c: Likewise.
> * gcc.target/i386/pr106060-3.c: Likewise.
> * gcc.target/i386/pr70314.c: Update test case.
> * gcc.target/i386/vect-shiftv4qi.c: Likewise.
> * gcc.target/i386/vect-shiftv8qi.c: Likewise.
>
>
> Roger
> --
>
> > -Original Message-
> > From: Hongtao Liu 
> > Sent: 17 January 2024 03:13
> > To: Roger Sayle 
> > Cc: gcc-patches@gcc.gnu.org; Uros Bizjak 
> > Subject: Re: [x86 PATCH] PR target/106060: Improved SSE vector constant
> > materialization.
> >
> > On Wed, Jan 17, 2024 at 5:59 AM Roger Sayle 
> > wrote:
> > >
> > >
> > > I thought I'd just missed the bug fixing season of stage3, but there
> > > appears to a little latitude in early stage4 (for vector patches), so
> > > I'll post this now.
> > >
> > > This patch resolves PR target/106060 by providing efficient methods
> > > for materializing/synthesizing special "vector" constants on x86.
> > > Currently there are three methods of materializing a vector constant;
> > > the most general is to load a vector from the constant pool, secondly
> > "duplicated"
> > > constants can be synthesized by moving an integer between units and
> > > broadcasting (or shuffling it), and finally the special cases of the
> > > all-zeros vector and all-ones vectors can be loaded via a single SSE
> > > instruction.   This patch handles additional cases that can be synthesized
> > > in two instructions, loading an all-ones vector followed by another
> > > SSE instruction.  Following my recent patch for PR target/112992,
> > > there's conveniently a single place in i386-expand.cc where these
> > > special cases can be handled.
> > >
> > > Two examples are given in the original bugzilla PR for 106060.
> > >
> > > __m256i
> > > should_be_cmpeq_abs ()
> > > {
> > >   return _mm256_set1_epi8 (1);
> > > }
> > >
> > > is now generated (with -O3 -march=x86-64-v3) as:
> > >
> > > vpcmpeqd%ymm0, %ymm0, %ymm0
> > > vpabsb  %ymm0, %ymm0
> > > ret
> > >
> > > and
> > >
> > > __m256i
> > > should_be_cmpeq_add ()
> > > {
> > >   return _mm256_set1_epi8 (-2);
> > > }
> > >
> > > is now generated as:
> > >
> > > vpcmpeqd%ymm0, %ymm0, %ymm0
> > > vpaddb  %ymm0, %ymm0, %ymm0
> > > ret
> > >
> > > This patch has been tested on x86_64-pc-linux-gnu with make bootstrap
> > > and make -k check, both with and without --target_board=unix{-m32}
> > > with no ne

[Patch-2, rs6000] Eliminate unnecessary byte swaps for duplicated constant vector store [PR113325]

2024-01-25 Thread HAO CHEN GUI
Hi,
  This patch creates an insn_and_split pattern which helps the duplicated
constant vector replace the source pseudo of store insn in fwprop pass.
Thus the store can be implemented by a single stxvd2x and it eliminates the
unnecessary byte swap insn on P8 LE. The test case shows the optimization.

  The patch depends on the first generic patch which uses insn cost in fwprop.

  Bootstrapped and tested on x86 and powerpc64-linux BE and LE with no
regressions.

Thanks
Gui Haochen


ChangeLog
rs6000: Eliminate unnecessary byte swaps for duplicated constant vector store

gcc/
PR target/113325
* config/rs6000/predicates.md (duplicate_easy_altivec_constant): New.
* config/rs6000/vsx.md (vsx_stxvd2x4_le_const_): New.

gcc/testsuite/
PR target/113325
* gcc.target/powerpc/pr113325.c: New.


patch.diff
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index ef7d3f214c4..8ab6db630b7 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -759,6 +759,14 @@ (define_predicate "easy_vector_constant"
   return false;
 })

+;; Return 1 if it's a duplicated easy_altivec_constant.
+(define_predicate "duplicate_easy_altivec_constant"
+  (and (match_code "const_vector")
+   (match_test "easy_altivec_constant (op, mode)"))
+{
+  return const_vec_duplicate_p (op);
+})
+
 ;; Same as easy_vector_constant but only for EASY_VECTOR_15_ADD_SELF.
 (define_predicate "easy_vector_constant_add_self"
   (and (match_code "const_vector")
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 26fa32829af..98e4be26f64 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -3362,6 +3362,29 @@ (define_insn "*vsx_stxvd2x4_le_"
   "stxvd2x %x1,%y0"
   [(set_attr "type" "vecstore")])

+(define_insn_and_split "vsx_stxvd2x4_le_const_"
+  [(set (match_operand:VSX_W 0 "memory_operand" "=Z")
+   (match_operand:VSX_W 1 "duplicate_easy_altivec_constant" "W"))]
+  "!BYTES_BIG_ENDIAN
+   && VECTOR_MEM_VSX_P (mode)
+   && !TARGET_P9_VECTOR"
+  "#"
+  "&& 1"
+  [(set (match_dup 2)
+   (match_dup 1))
+   (set (match_dup 0)
+   (vec_select:VSX_W
+ (match_dup 2)
+ (parallel [(const_int 2) (const_int 3)
+(const_int 0) (const_int 1)])))]
+{
+  operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
+: operands[1];
+
+}
+  [(set_attr "type" "vecstore")
+   (set_attr "length" "8")])
+
 (define_insn "*vsx_stxvd2x8_le_V8HI"
   [(set (match_operand:V8HI 0 "memory_operand" "=Z")
 (vec_select:V8HI
diff --git a/gcc/testsuite/gcc.target/powerpc/pr113325.c 
b/gcc/testsuite/gcc.target/powerpc/pr113325.c
new file mode 100644
index 000..dff68ac0a51
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr113325.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mdejagnu-cpu=power8 -mvsx" } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-final { scan-assembler-not {\mxxpermdi\M} } } */
+
+void* foo (void* s1)
+{
+  return __builtin_memset (s1, 0, 32);
+}


[PATCH-1] fwprop: Replace rtx_cost with insn_cost in try_fwprop_subst_pattern [PR113325]

2024-01-25 Thread HAO CHEN GUI
Hi,
  This patch replaces rtx_cost with insn_cost in forward propagation.
In the PR, one constant vector should be propagated and replace a
pseudo in a store insn if we know it's a duplicated constant vector.
It reduces the insn cost but not rtx cost. In this case, the kind of
destination operand (memory or pseudo) decides the cost and rtx cost
can't reflect it.

  The test case is added in the second target specific patch.

  Bootstrapped and tested on x86 and powerpc64-linux BE and LE with no
regressions. Is it OK for next stage 1?

Thanks
Gui Haochen


ChangeLog
fwprop: Replace rtx_cost with insn_cost in try_fwprop_subst_pattern

gcc/
PR target/113325
* fwprop.cc (try_fwprop_subst_pattern): Replace rtx_cost with
insn_cost.


patch.diff
diff --git a/gcc/fwprop.cc b/gcc/fwprop.cc
index 0707a234726..b05b2538edc 100644
--- a/gcc/fwprop.cc
+++ b/gcc/fwprop.cc
@@ -467,20 +467,17 @@ try_fwprop_subst_pattern (obstack_watermark &attempt, 
insn_change &use_change,
   redo_changes (0);
 }

-  /* ??? In theory, it should be better to use insn costs rather than
- set_src_costs here.  That would involve replacing this code with
- change_is_worthwhile.  */
   bool ok = recog (attempt, use_change);
   if (ok && !prop.changed_mem_p () && !use_insn->is_asm ())
-if (rtx use_set = single_set (use_rtl))
+if (single_set (use_rtl))
   {
bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (use_rtl));
+   auto new_cost = insn_cost (use_rtl, speed);
temporarily_undo_changes (0);
-   auto old_cost = set_src_cost (SET_SRC (use_set),
- GET_MODE (SET_DEST (use_set)), speed);
+   /* Invalidate recog data.  */
+   INSN_CODE (use_rtl) = -1;
+   auto old_cost = insn_cost (use_rtl, speed);
redo_changes (0);
-   auto new_cost = set_src_cost (SET_SRC (use_set),
- GET_MODE (SET_DEST (use_set)), speed);
if (new_cost > old_cost)
  {
if (dump_file)


Re: [PATCH] c++: Fix up build_m_component_ref [PR113599]

2024-01-25 Thread Jason Merrill

On 1/25/24 14:14, Jakub Jelinek wrote:

Hi!

The following testcase reduced from GDB is miscompiled starting with
r14-5503 PR112427 change.
The problem is in the build_m_component_ref hunk, which changed
-  datum = fold_build_pointer_plus (fold_convert (ptype, datum), component);
+  datum = cp_convert (ptype, datum, complain);
+  if (!processing_template_decl)
+   datum = build2 (POINTER_PLUS_EXPR, ptype,
+   datum, convert_to_ptrofftype (component));
+  datum = cp_fully_fold (datum);
Component is e, (sizetype) e is 16, offset of c inside of C.
ptype is A *, pointer to type of C::c and datum is &d.
Now, previously the above created ((A *) &d) p+ (sizetype) e which is correct,
but in the new code cp_convert sees that C has A as base class and
instead of returning (A *) &d, it returns &d.D.2800 where D.2800 is
the FIELD_DECL for the A base at offset 8 into C.
So, instead of computing ((A *) &d) p+ (sizetype) e it computes
&d.D.2800 p+ (sizetype) e, which is ((A *) &d) p+ 24.

The following patch fixes it by using convert instead of cp_convert which
eventually calls build_nop (ptype, datum).

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?


OK


2024-01-25  Jakub Jelinek  

PR c++/113599
* typeck2.cc (build_m_component_ref): Use convert instead of
cp_convert for pointer conversion.

* g++.dg/expr/ptrmem11.C: New test.

--- gcc/cp/typeck2.cc.jj2024-01-03 12:01:23.672476417 +0100
+++ gcc/cp/typeck2.cc   2024-01-25 14:11:40.656361310 +0100
@@ -2378,7 +2378,7 @@ build_m_component_ref (tree datum, tree
/* Build an expression for "object + offset" where offset is the
 value stored in the pointer-to-data-member.  */
ptype = build_pointer_type (type);
-  datum = cp_convert (ptype, datum, complain);
+  datum = convert (ptype, datum);
if (!processing_template_decl)
datum = build2 (POINTER_PLUS_EXPR, ptype,
datum, convert_to_ptrofftype (component));
--- gcc/testsuite/g++.dg/expr/ptrmem11.C.jj 2024-01-25 14:13:11.736089567 
+0100
+++ gcc/testsuite/g++.dg/expr/ptrmem11.C2024-01-25 14:18:47.720398222 
+0100
@@ -0,0 +1,17 @@
+// PR c++/113599
+// { dg-do run }
+
+struct A { void *a; };
+struct B { void *b; };
+struct C : public B, public A { A c; };
+static C d;
+
+int
+main ()
+{
+  A C::*e = &C::c;
+  A *f = &(d.*e);
+  A *g = &d.c;
+  if (f != g)
+__builtin_abort ();
+}

Jakub





[patch] gcn/gcn-hsa.h: Always pass --amdhsa-code-object-version= in ASM_SPEC

2024-01-25 Thread Tobias Burnus

When targeting AMD GPUs, the LLVM assembler (and linker) are used.

Two days ago LLVM changed the default for theAMDHSA code object version (COV) from 4 to 5. In principle, we do not 
care which COV is used as long as it works; unfortunately, 
"mkoffload.cc" also generates an object file directly, bypassing the AMD 
GPU compiler as it copies debugging data to that file. That object file 
must have the same COV version (ELF ABI version) as compiler + llvm-mc 
assembler generated files. In order to ensure those are the same, this 
patch forces the use of COV 4 instead of using the default. Once GCC 
requires LLVM >= 14 instead of LLVM >= 13.0.1 we could change it. 
(Assuming that COV 5 is sufficiently stable in LLVM 14.) - But for now 
COV 4 will do.

If you wonder how this LLVM issue shows up, simply compile any OpenMP
or OpenACC program with AMD GPU offloading and enable debugging ("-g"),
e.g.
  gcc -fopenmp -g test.f90 -foffload=amdgcn-amdhsa 
-foffload-options=-march=gfx908

With LLVM main (to become LLVM 18), you will then get the error:

  ld: error: incompatible ABI version: /tmp/ccAKx5cz.mkoffload.dbg.o

OK for mainline?

Tobias
gcn/gcn-hsa.h: Always pass --amdhsa-code-object-version= in ASM_SPEC

Since LLVM commit 082f87c9d418 (Pull Req. #79038; will become LLVM 18)
  "[AMDGPU] Change default AMDHSA Code Object version to 5"
the default - when no --amdhsa-code-object-version= is used - was bumped.

Using --amdhsa-code-object-version=5 is supported (with unknown limitations)
since LLVM 14. GCC required for proper support at least LLVM 13.0.1 such
that explicitly using COV5 is not possible.

Unfortunately, the COV number matters for debugging ("-g") as mkoffload.cc
extracts debugging data from the host's object file and writes into an
an AMD GPU object file it creates. And all object files linked together
must have the same ABI version. 

gcc/ChangeLog:

	* config/gcn/gcn-hsa.h (ABI_VERSION_SPEC): New; creates the
	"--amdhsa-code-object-version=" argument.
	(ASM_SPEC): Use it; replace previous version of it.

Signed-off-by: Tobias Burnus 

diff --git a/gcc/config/gcn/gcn-hsa.h b/gcc/config/gcn/gcn-hsa.h
index f5de0d2969f..e5b93f7d9e5 100644
--- a/gcc/config/gcn/gcn-hsa.h
+++ b/gcc/config/gcn/gcn-hsa.h
@@ -75,6 +75,21 @@ extern unsigned int gcn_local_sym_hash (const char *name);
supported for gcn.  */
 #define GOMP_SELF_SPECS ""
 
+/* Explicitly set the ABI version; in principle, we could use just the
+   default; however, when debugging symbols are turned on, mkoffload.cc
+   writes a new AMD GPU object file and the ABI version needs to be the
+   same. - LLVM <= 17 defaults to 4 while LLVM >= 18 defaults to 5.
+   GCC supports LLVM >= 13.0.1 and only LLVM >= 14 supports version 5.
+   Note that Fiji is only suppored with LLVM <= 17 as version 3 i no longer
+   supported in LLVM >= 18.  */
+#define ABI_VERSION_SPEC "march=fiji:--amdhsa-code-object-version=3;" \
+			 "!march=*|march=*:--amdhsa-code-object-version=4"
+
+/* Note that the XNACK and SRAM-ECC settings must match those in mkoffload.cc
+   as the latter creates new ELF object file when debugging is enabled and
+   the ELF flags (e_flags) of that generated file must be identical to those
+   generated by the compiler.  */
+
 #define NO_XNACK "march=fiji:;march=gfx1030:;march=gfx1100:;" \
 /* These match the defaults set in gcn.cc.  */ \
 "!mxnack*|mxnack=default:%{march=gfx900|march=gfx906|march=gfx908:-mattr=-xnack};"
@@ -88,7 +103,7 @@ extern unsigned int gcn_local_sym_hash (const char *name);
 /* Use LLVM assembler and linker options.  */
 #define ASM_SPEC  "-triple=amdgcn--amdhsa "  \
 		  "%{march=*:-mcpu=%*} " \
-		  "%{!march=*|march=fiji:--amdhsa-code-object-version=3} " \
+		  "%{" ABI_VERSION_SPEC "} " \
 		  "%{" NO_XNACK XNACKOPT "} " \
 		  "%{" NO_SRAM_ECC SRAMOPT "} " \
 		  "%{march=gfx1030|march=gfx1100:-mattr=+wavefrontsize64} " \


ping: [PATCH] c-family: Fix ICE with large column number after restoring a PCH [PR105608]

2024-01-25 Thread Lewis Hyatt
Hello-

May I please ping this small patch? Thanks
https://gcc.gnu.org/pipermail/gcc-patches/2023-December/639467.html

-Lewis

On Wed, Dec 20, 2023 at 8:02 PM Lewis Hyatt  wrote:
>
> Hello-
>
> May I please ping this PCH patch? Thanks!
> https://gcc.gnu.org/pipermail/gcc-patches/2023-December/639467.html
>
> -Lewis
>
> On Tue, Dec 5, 2023 at 8:52 PM Lewis Hyatt  wrote:
> >
> > Hello-
> >
> > https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105608
> >
> > There are two related issues here really, a regression since GCC 11 where we
> > can ICE after restoring a PCH, and a deeper issue with bogus locations
> > assigned to macros that were defined prior to restoring a PCH.  This patch
> > fixes the ICE regression with a simple change, and I think it's appropriate
> > for GCC 14 as well as backport to 11, 12, 13. The bad locations (wrong, but
> > not generally causing an ICE, and mostly affecting only the output of
> > -Wunused-macros) are not as problematic, and will be harder to fix. I could
> > take a stab at that for GCC 15. In the meantime the patch adds XFAILed
> > tests for the wrong locations (as well as passing tests for the regression
> > fix). Does it look OK please? Bootstrap + regtest all languages on x86-64
> > Linux. Thanks!
> >
> > -Lewis
> >
> > -- >8 --
> >
> > Users are allowed to define macros prior to restoring a precompiled header
> > file, as long as those macros are not defined (or are defined identically)
> > in the PCH.  However, the PCH restoration process destroys all the macro
> > definitions, so libcpp has to record them before restoring the PCH and then
> > redefine them afterward.
> >
> > This process does not currently assign great locations to the macros after
> > redefining them. Some work is needed to also remember the original locations
> > and get the line_maps instance in the right state (since, like all other
> > data structures, the line_maps instance is also reset after restoring a 
> > PCH).
> > The new testcase line-map-3.C contains XFAILed examples where the locations
> > are wrong.
> >
> > This patch addresses a more pressing issue, which is that we ICE in some
> > cases since GCC 11, hitting an assert in line-maps.cc. It happens if the
> > first line encountered after the PCH restore requires an LC_RENAME map, such
> > as will happen if the line is sufficiently long.  This is much easier to
> > fix, since we just need to call linemap_line_start before asking libcpp to
> > redefine the stored macros, instead of afterward, to avoid the unexpected
> > need for an LC_RENAME before an LC_ENTER has been seen.
> >
> > gcc/c-family/ChangeLog:
> >
> > PR preprocessor/105608
> > * c-pch.cc (c_common_read_pch): Start a new line map before asking
> > libcpp to restore macros defined prior to reading the PCH, instead
> > of afterward.
> >
> > gcc/testsuite/ChangeLog:
> >
> > PR preprocessor/105608
> > * g++.dg/pch/line-map-1.C: New test.
> > * g++.dg/pch/line-map-1.Hs: New test.
> > * g++.dg/pch/line-map-2.C: New test.
> > * g++.dg/pch/line-map-2.Hs: New test.
> > * g++.dg/pch/line-map-3.C: New test.
> > * g++.dg/pch/line-map-3.Hs: New test.
> > ---
> >  gcc/c-family/c-pch.cc  |  5 ++---
> >  gcc/testsuite/g++.dg/pch/line-map-1.C  |  4 
> >  gcc/testsuite/g++.dg/pch/line-map-1.Hs |  1 +
> >  gcc/testsuite/g++.dg/pch/line-map-2.C  |  6 ++
> >  gcc/testsuite/g++.dg/pch/line-map-2.Hs |  1 +
> >  gcc/testsuite/g++.dg/pch/line-map-3.C  | 23 +++
> >  gcc/testsuite/g++.dg/pch/line-map-3.Hs |  1 +
> >  7 files changed, 38 insertions(+), 3 deletions(-)
> >  create mode 100644 gcc/testsuite/g++.dg/pch/line-map-1.C
> >  create mode 100644 gcc/testsuite/g++.dg/pch/line-map-1.Hs
> >  create mode 100644 gcc/testsuite/g++.dg/pch/line-map-2.C
> >  create mode 100644 gcc/testsuite/g++.dg/pch/line-map-2.Hs
> >  create mode 100644 gcc/testsuite/g++.dg/pch/line-map-3.C
> >  create mode 100644 gcc/testsuite/g++.dg/pch/line-map-3.Hs
> >
> > diff --git a/gcc/c-family/c-pch.cc b/gcc/c-family/c-pch.cc
> > index 2f014fca210..9ee6f179002 100644
> > --- a/gcc/c-family/c-pch.cc
> > +++ b/gcc/c-family/c-pch.cc
> > @@ -342,6 +342,8 @@ c_common_read_pch (cpp_reader *pfile, const char *name,
> >gt_pch_restore (f);
> >cpp_set_line_map (pfile, line_table);
> >rebuild_location_adhoc_htab (line_table);
> > +  line_table->trace_includes = saved_trace_includes;
> > +  linemap_add (line_table, LC_ENTER, 0, saved_loc.file, saved_loc.line);
> >
> >timevar_push (TV_PCH_CPP_RESTORE);
> >if (cpp_read_state (pfile, name, f, smd) != 0)
> > @@ -355,9 +357,6 @@ c_common_read_pch (cpp_reader *pfile, const char *name,
> >
> >fclose (f);
> >
> > -  line_table->trace_includes = saved_trace_includes;
> > -  linemap_add (line_table, LC_ENTER, 0, saved_loc.file, saved_loc.line);
> > -
> >/* Give the front end a chance to take action after a PCH file has
> >   been loaded

Re: [PATCH 5/4] libbacktrace: improve getting debug information for loaded dlls

2024-01-25 Thread Ian Lance Taylor
On Thu, Jan 25, 2024 at 11:53 AM Björn Schäpers  wrote:
>
> Am 23.01.2024 um 23:37 schrieb Ian Lance Taylor:
> > On Thu, Jan 4, 2024 at 2:33 PM Björn Schäpers  wrote:
> >>
> >> Am 03.01.2024 um 00:12 schrieb Björn Schäpers:
> >>> Am 30.11.2023 um 20:53 schrieb Ian Lance Taylor:
>  On Fri, Jan 20, 2023 at 2:55 AM Björn Schäpers  wrote:
> >
> > From: Björn Schäpers 
> >
> > Fixes https://github.com/ianlancetaylor/libbacktrace/issues/53, except
> > that libraries loaded after the backtrace_initialize are not handled.
> > But as far as I can see that's the same for elf.
> 
>  Thanks, but I don't want a patch that loops using goto statements.
>  Please rewrite to avoid that.  It may be simpler to call a function.
> 
>  Also starting with a module count of 1000 seems like a lot.  Do
>  typical Windows programs load that many modules?
> 
>  Ian
> 
> 
> >>>
> >>> Rewritten using a function.
> >>>
> >>> If that is commited, could you attribute that commit to me 
> >>> (--author="Björn
> >>> Schäpers ")?
> >>>
> >>> Thanks and kind regards,
> >>> Björn.
> >>
> >> I noticed that under 64 bit libraries loaded with LoadLibrary were missing.
> >> EnumProcessModules stated the correct number of modules, but did not fill 
> >> the
> >> the HMODULEs, but set them to 0. While trying to investigate I noticed if 
> >> I do
> >> the very same thing from main (in C++) I even got fewer module HMODULEs.
> >>
> >> So I went a different way. This detects all libraries correctly, in 32 and 
> >> 64
> >> bit. The question is, if it should be a patch on top of the previous, or 
> >> should
> >> they be merged, or even only this solution and drop the EnumProcessModules 
> >> variant?
> >
> > Is there any reason to use both patches?  Seems simpler to just use
> > this one if it works.  Thanks.
> >
> > Ian
>
> This one needs the tlhelp32 header and its functions, which are (accoridng to
> the microsoft documentation) are only available since Windows XP rsp. Windows
> Server 2003.
>
> If that's no problem, and in my opinion it shouldn't be, then I can basically
> drop patch 4 and rebase this one.

I don't see that as a problem.  It seems like the patch will fall back
cleanly on ancient Windows and simply fail to pick up other loaded
DLLs.  I think that is fine.  I'll look for an updated patch.  Thanks.

Ian


ping: [PATCH] diagnostics: Fix behavior of permerror options after diagnostic pop [PR111918]

2024-01-25 Thread Lewis Hyatt
May I please ask again about this one? It's just a couple lines, and I
think it fixes an important gap in the logic for #pragma GCC
diagnostic. The PR was not reported by me so I think at least one
other person does care about it :). Thanks!

https://gcc.gnu.org/pipermail/gcc-patches/2023-November/638692.html

-Lewis

On Mon, Jan 8, 2024 at 6:53 PM Lewis Hyatt  wrote:
>
> Can I please ping this one again? It's 3 lines or so to fix the PR. Thanks!
> https://gcc.gnu.org/pipermail/gcc-patches/2023-November/638692.html
>
> On Tue, Dec 19, 2023 at 6:20 PM Lewis Hyatt  wrote:
> >
> > Hello-
> >
> > May I please ping this one? Thanks...
> > https://gcc.gnu.org/pipermail/gcc-patches/2023-November/638692.html
> >
> > -Lewis
> >
> > On Wed, Nov 29, 2023 at 7:05 PM Lewis Hyatt  wrote:
> > >
> > > On Thu, Nov 09, 2023 at 04:16:10PM -0500, Lewis Hyatt wrote:
> > > > https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111918
> > > >
> > > > This patch fixes the behavior of `#pragma GCC diagnostic pop' for 
> > > > permissive
> > > > error diagnostics such as -Wnarrowing (in C++11). Those currently do not
> > > > return to the correct state after the last pop; they become effectively
> > > > simple warnings instead. Bootstrap + regtest all languages on x86-64, 
> > > > does
> > > > it look OK please? Thanks!
> > >
> > > Hello-
> > >
> > > May I please ping this bug fix?
> > > https://gcc.gnu.org/pipermail/gcc-patches/2023-November/635871.html
> > >
> > > Please note, it requires a trivial rebase on top of recent changes to
> > > the class diagnostic_context public interface. I attached the rebased 
> > > patch
> > > here as well. Thanks!
> > >
> > > -Lewis


[PATCH] Fortran: NULL actual to optional dummy with VALUE attribute [PR113377]

2024-01-25 Thread Harald Anlauf
Dear all,

this is the third patch in a series that addresses dummy arguments
with the VALUE attribute, now handling the passing of NULL actual
arguments.  It is based on the refactoring in the previous patch
and reuses the handling of absent arguments.

Regtested on x86_64-pc-linux-gnu.  OK for mainline?

Thanks,
Harald

From a0509b34d52b32a2e3511daefcb7dc308c755931 Mon Sep 17 00:00:00 2001
From: Harald Anlauf 
Date: Thu, 25 Jan 2024 22:19:10 +0100
Subject: [PATCH] Fortran: NULL actual to optional dummy with VALUE attribute
 [PR113377]

gcc/fortran/ChangeLog:

	PR fortran/113377
	* trans-expr.cc (conv_dummy_value): Treat NULL actual argument to
	optional dummy with the VALUE attribute as not present.
	(gfc_conv_procedure_call): Likewise.

gcc/testsuite/ChangeLog:

	PR fortran/113377
	* gfortran.dg/optional_absent_11.f90: New test.
---
 gcc/fortran/trans-expr.cc | 11 ++-
 .../gfortran.dg/optional_absent_11.f90| 99 +++
 2 files changed, 108 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gfortran.dg/optional_absent_11.f90

diff --git a/gcc/fortran/trans-expr.cc b/gcc/fortran/trans-expr.cc
index 3dc521fab9a..67abca9f6ba 100644
--- a/gcc/fortran/trans-expr.cc
+++ b/gcc/fortran/trans-expr.cc
@@ -6086,7 +6086,7 @@ conv_dummy_value (gfc_se * parmse, gfc_expr * e, gfc_symbol * fsym,
   gcc_assert (fsym && fsym->attr.value && !fsym->attr.dimension);

   /* Absent actual argument for optional scalar dummy.  */
-  if (e == NULL && fsym->attr.optional && !fsym->attr.dimension)
+  if ((e == NULL || e->expr_type == EXPR_NULL) && fsym->attr.optional)
 {
   /* For scalar arguments with VALUE attribute which are passed by
 	 value, pass "0" and a hidden argument for the optional status.  */
@@ -6354,7 +6354,14 @@ gfc_conv_procedure_call (gfc_se * se, gfc_symbol * sym,
 	  e->ts = temp_ts;
 	}

-  if (e == NULL)
+  if (e == NULL
+	  || (e->expr_type == EXPR_NULL
+	  && fsym
+	  && fsym->attr.value
+	  && fsym->attr.optional
+	  && !fsym->attr.dimension
+	  && fsym->ts.type != BT_DERIVED
+	  && fsym->ts.type != BT_CLASS))
 	{
 	  if (se->ignore_optional)
 	{
diff --git a/gcc/testsuite/gfortran.dg/optional_absent_11.f90 b/gcc/testsuite/gfortran.dg/optional_absent_11.f90
new file mode 100644
index 000..1f63def46fa
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/optional_absent_11.f90
@@ -0,0 +1,99 @@
+! { dg-do run }
+! PR fortran/113377
+!
+! Test that a NULL actual argument to an optional dummy is not present
+! (see also F2018:15.5.2.12 on argument presence)
+
+program test_null_actual_is_absent
+  implicit none
+  integer   :: k(4) = 1
+  character :: c(4) = "#"
+  call one   (k)
+  call three (c)
+contains
+  subroutine one (i)
+integer, intent(in)  :: i(4)
+integer  :: kk = 2
+integer, allocatable :: aa
+integer, pointer :: pp => NULL()
+print *, "Scalar integer"
+call two (kk, aa)
+call two (kk, pp)
+call two (kk, NULL())
+call two (kk, NULL(aa))
+call two (kk, NULL(pp))
+print *, "Elemental integer"
+call two (i,  aa)
+call two (i,  pp)
+call two (i,  NULL())
+call two (i,  NULL(aa))
+call two (i,  NULL(pp))
+print *, "Scalar integer; value"
+call two_val (kk, aa)
+call two_val (kk, pp)
+call two_val (kk, NULL())
+call two_val (kk, NULL(aa))
+call two_val (kk, NULL(pp))
+print *, "Elemental integer; value"
+call two_val (i,  aa)
+call two_val (i,  pp)
+call two_val (i,  NULL())
+call two_val (i,  NULL(aa))
+call two_val (i,  NULL(pp))
+  end
+
+  elemental subroutine two (i, j)
+integer, intent(in)   :: i
+integer, intent(in), optional :: j
+if (present (j)) error stop 11
+  end
+
+  elemental subroutine two_val (i, j)
+integer, intent(in)   :: i
+integer, value,  optional :: j
+if (present (j)) error stop 12
+  end
+
+  subroutine three (y)
+character, intent(in)  :: y(4)
+character  :: zz = "*"
+character, allocatable :: aa
+character, pointer :: pp => NULL()
+print *, "Scalar character"
+call four (zz, aa)
+call four (zz, pp)
+call four (zz, NULL())
+call four (zz, NULL(aa))
+call four (zz, NULL(pp))
+print *, "Elemental character"
+call four (y,  aa)
+call four (y,  pp)
+call four (y,  NULL())
+call four (y,  NULL(aa))
+call four (y,  NULL(pp))
+print *, "Scalar character; value"
+call four_val (zz, aa)
+call four_val (zz, pp)
+call four_val (zz, NULL())
+call four_val (zz, NULL(aa))
+call four_val (zz, NULL(pp))
+print *, "Elemental character; value"
+call four_val (y,  aa)
+call four_val (y,  pp)
+call four_val (y,  NULL())
+call four_val (y,  NULL(aa))
+call four_val (y,  NULL(pp))
+  end
+
+  elemental subroutine four (i, j)
+   

Re: [PATCH] libgccjit: Fix float playback for cross-compilation

2024-01-25 Thread Antoni Boucher
Thanks for the review!

On Wed, 2024-01-24 at 13:10 -0500, David Malcolm wrote:
> On Thu, 2024-01-11 at 18:42 -0500, Antoni Boucher wrote:
> > Hi.
> > This patch fixes the bug 113343.
> > I'm wondering if there's a better solution than using mpfr.
> > The only other solution I found is real_from_string, but that seems
> > overkill to convert the number to a string.
> > I could not find a better way to create a real value from a host
> > double.
> 
> I took a look, and I don't see a better way; it seems weird to go
> through a string stage.  Ideally there would be a
> real_from_host_double, but I don't see one.
> 
> Is there a cross-platform way to directly access the representation
> of
> a host double?

I have no idea.

> 
> > If there's no solution, do we lose some precision by using mpfr?
> > Running Rust's core library tests, there was a difference of one
> > decimal, so I'm wondering if there's some lost precision, or if
> > it's
> > just because those tests don't work on m68k which was my test
> > target.
> 
> Sorry, can you clarify what you mean by "a difference of one decimal"
> above?

Let's say the Rust core tests expected the value "1.23456789", it
instead got the value "1.2345678" (e.g. without the last decimal).
Not sure if this is expected.
Everything works fine for x86-64; this just happened for m68k which is
not well supported for now in Rust, so that might just be that the test
doesn't work on this platform.

> 
> > Also, I'm not sure how to write a test this fix. Any ideas?
> 
> I think we don't need cross-compilation-specific tests, we should
> just
> use and/or extend the existing coverage for
> gcc_jit_context_new_rvalue_from_double e.g. in test-constants.c and
> test-types.c
> 
> We probably should have test coverage for "awkward" values; we
> already
> have coverage for DBL_MIN and DBL_MAX, but we don't yet have test
> coverage for:
> * quiet/signaling NaN
> * +ve/-ve inf
> * -ve zero

Is this something you would want for this patch?

> 
> Thanks
> Dave
> 



Re: [PATCH v2 3/5] C: Implement musttail attribute for returns

2024-01-25 Thread Andi Kleen
On Thu, Jan 25, 2024 at 08:08:23PM +, Joseph Myers wrote:
> On Wed, 24 Jan 2024, Andi Kleen wrote:
> 
> > Implement a C23 clang compatible musttail attribute similar to the earlier
> > C++ implementation in the C parser.
> 
> I'd expect diagnostics, and associated tests of those diagnostics, for:
> 
> * musttail attribute used with any arguments, even empty 
> [[gnu::musttail()]], much like e.g. [[fallthrough()]] or 
> [[maybe_unused()]] gets diagnosed.

These happen naturally because the attribute doesn't get removed when
not in front of return, and it gets warned about like any other unknown 
attribute:

tattr.c:5:9: warning: ‘musttail’ attribute ignored [-Wattributes]
5 | [[gnu::musttail]] i++;
  | ^

I don't have tests for that but since it's not new behavior I suppose
that's sufficient.


> For the first one of these, it may help to include the attribute in the 
> c_common_gnu_attributes table so the common attribute parsing code knows 
> that this one doesn't accept arguments (and with an attribute handler that 
> always rejects it on declarations, much like 
> handle_fallthrough_attribute).

I just removed it there based on earlier feedback, which gives the
intended "attribute is ignored" warning for these cases too.

-Andi


[pushed] c++: array of PMF [PR113598]

2024-01-25 Thread Jason Merrill
Tested x86_64-pc-linux-gnu, applying to trunk.

-- 8< --

Here AGGREGATE_TYPE_P includes pointers to member functions, which is not
what we want.  Instead we should use class||array, as elsewhere in the
function.

PR c++/113598

gcc/cp/ChangeLog:

* init.cc (build_vec_init): Don't use {} for PMF.

gcc/testsuite/ChangeLog:

* g++.dg/cpp0x/initlist-pmf2.C: New test.
---
 gcc/cp/init.cc |  4 +++-
 gcc/testsuite/g++.dg/cpp0x/initlist-pmf2.C | 12 
 2 files changed, 15 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/g++.dg/cpp0x/initlist-pmf2.C

diff --git a/gcc/cp/init.cc b/gcc/cp/init.cc
index adbdfc2dbfc..ac37330527e 100644
--- a/gcc/cp/init.cc
+++ b/gcc/cp/init.cc
@@ -4864,7 +4864,9 @@ build_vec_init (tree base, tree maxindex, tree init,
 But for non-classes, that's the same as value-initialization.  */
   if (empty_list)
{
- if (cxx_dialect >= cxx11 && AGGREGATE_TYPE_P (type))
+ if (cxx_dialect >= cxx11
+ && (CLASS_TYPE_P (type)
+ || TREE_CODE (type) == ARRAY_TYPE))
{
  init = build_constructor (init_list_type_node, NULL);
}
diff --git a/gcc/testsuite/g++.dg/cpp0x/initlist-pmf2.C 
b/gcc/testsuite/g++.dg/cpp0x/initlist-pmf2.C
new file mode 100644
index 000..0fac8333c75
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/initlist-pmf2.C
@@ -0,0 +1,12 @@
+// PR c++/113598
+// { dg-additional-options -Wno-c++11-extensions }
+
+struct Cpu
+{
+  int op_nop();
+};
+typedef int(Cpu::*OpCode)();
+void f()
+{
+  new OpCode[256]{&Cpu::op_nop};
+}

base-commit: c6c2a1d79eb333a00124bf67820a7f405d0d8641
prerequisite-patch-id: 32204a3e8393a5c133fa74b57979c77cd7742149
-- 
2.39.3



[pushed] c++: co_await and initializer_list [PR109227]

2024-01-25 Thread Jason Merrill
Tested x86_64-pc-linux-gnu, applying to trunk.

-- 8< --

Here we end up with an initializer_list of 'aa', a type with a non-trivial
destructor, and need to destroy it.  The code called
build_special_member_call for cleanups, but that doesn't work for arrays, so
use cxx_maybe_build_cleanup instead.  Let's go ahead and do that
everywhere that has been calling the destructor directly.

PR c++/109227

gcc/cp/ChangeLog:

* coroutines.cc (build_co_await): Use cxx_maybe_build_cleanup.
(build_actor_fn, process_conditional, maybe_promote_temps)
(morph_fn_to_coro): Likewise.
(expand_one_await_expression): Use build_cleanup.

gcc/testsuite/ChangeLog:

* g++.dg/coroutines/co-await-initlist2.C: New test.
---
 gcc/cp/coroutines.cc  | 118 ++
 .../g++.dg/coroutines/co-await-initlist2.C|  29 +
 2 files changed, 67 insertions(+), 80 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/coroutines/co-await-initlist2.C

diff --git a/gcc/cp/coroutines.cc b/gcc/cp/coroutines.cc
index ba6e6989d06..3194c911e8c 100644
--- a/gcc/cp/coroutines.cc
+++ b/gcc/cp/coroutines.cc
@@ -932,19 +932,14 @@ build_co_await (location_t loc, tree a, 
suspend_point_kind suspend_kind)
 
  /* We now know that the final suspend object is distinct from the
 final awaiter, so check for a non-throwing DTOR where needed.  */
- tree a_type = TREE_TYPE (a);
- if (TYPE_HAS_NONTRIVIAL_DESTRUCTOR (a_type))
-   if (tree dummy
-   = build_special_member_call (a, complete_dtor_identifier,
-NULL, a_type, LOOKUP_NORMAL,
-tf_none))
- {
-   if (CONVERT_EXPR_P (dummy))
- dummy = TREE_OPERAND (dummy, 0);
-   dummy = TREE_OPERAND (CALL_EXPR_FN (dummy), 0);
-   if (coro_diagnose_throwing_fn (dummy))
- return error_mark_node;
- }
+ if (tree dummy = cxx_maybe_build_cleanup (a, tf_none))
+   {
+ if (CONVERT_EXPR_P (dummy))
+   dummy = TREE_OPERAND (dummy, 0);
+ dummy = TREE_OPERAND (CALL_EXPR_FN (dummy), 0);
+ if (coro_diagnose_throwing_fn (dummy))
+   return error_mark_node;
+   }
}
 }
   else
@@ -1096,18 +1091,14 @@ build_co_await (location_t loc, tree a, 
suspend_point_kind suspend_kind)
return error_mark_node;
   if (coro_diagnose_throwing_fn (awrs_func))
return error_mark_node;
-  if (TYPE_HAS_NONTRIVIAL_DESTRUCTOR (o_type))
-   if (tree dummy
-   = build_special_member_call (e_proxy, complete_dtor_identifier,
-NULL, o_type, LOOKUP_NORMAL,
-tf_none))
- {
-   if (CONVERT_EXPR_P (dummy))
- dummy = TREE_OPERAND (dummy, 0);
-   dummy = TREE_OPERAND (CALL_EXPR_FN (dummy), 0);
-   if (coro_diagnose_throwing_fn (dummy))
- return error_mark_node;
- }
+  if (tree dummy = cxx_maybe_build_cleanup (e_proxy, tf_none))
+   {
+ if (CONVERT_EXPR_P (dummy))
+   dummy = TREE_OPERAND (dummy, 0);
+ dummy = TREE_OPERAND (CALL_EXPR_FN (dummy), 0);
+ if (coro_diagnose_throwing_fn (dummy))
+   return error_mark_node;
+   }
 }
 
   /* We now have three call expressions, in terms of the promise, handle and
@@ -1662,7 +1653,6 @@ expand_one_await_expression (tree *stmt, tree 
*await_expr, void *d)
   tree resume_label = create_named_label_with_ctx (loc, buf, actor);
   tree empty_list = build_empty_stmt (loc);
 
-  tree await_type = TREE_TYPE (var);
   tree stmt_list = NULL;
   tree r;
   tree *await_init = NULL;
@@ -1791,9 +1781,7 @@ expand_one_await_expression (tree *stmt, tree 
*await_expr, void *d)
   append_to_statement_list (destroy_label, &body_list);
   if (needs_dtor)
 {
-  tree dtor = build_special_member_call (var, complete_dtor_identifier,
-NULL, await_type, LOOKUP_NORMAL,
-tf_warning_or_error);
+  tree dtor = build_cleanup (var);
   append_to_statement_list (dtor, &body_list);
 }
   r = build1_loc (loc, GOTO_EXPR, void_type_node, data->cleanup);
@@ -1821,9 +1809,7 @@ expand_one_await_expression (tree *stmt, tree 
*await_expr, void *d)
   tree *revised = tsi_stmt_ptr (tsi_last (stmt_list));
   if (needs_dtor)
 {
-  tree dtor = build_special_member_call (var, complete_dtor_identifier,
-NULL, await_type, LOOKUP_NORMAL,
-tf_warning_or_error);
+  tree dtor = build_cleanup (var);
   append_to_statement_list (dtor, &stmt_list);
 }
   data->index += 2;
@@ -2330,10 +2316,8 @@ build_actor_fn (location_t loc, tree coro_frame_type, 

Re: [PATCH v4 0/4]New attribute "counted_by" to annotate bounds for C99 FAM(PR108896)

2024-01-25 Thread Qing Zhao
Thanks a lot for the testing.

Yes, I can repeat the issue with the following small example:

#include 
#include 
#include 

#define MAX(a, b)  ((a) > (b) ? (a) :  (b))

struct untracked {
   int size;
   int array[] __attribute__((counted_by (size)));
} *a;
struct untracked * alloc_buf (int index)
{
  struct untracked *p;
  p = (struct untracked *) malloc (MAX (sizeof (struct untracked),
(offsetof (struct untracked, array[0])
 + (index) * sizeof (int;
  p->size = index;
  return p;
}

int main()
{
  a = alloc_buf(10);
 printf ("same_type is %d\n",
  (__builtin_types_compatible_p(typeof (a->array), typeof (&(a->array)[0];
  return 0;
}


/home/opc/Install/latest-d/bin/gcc -O2 btcp.c
same_type is 1

Looks like that the “typeof” operator need to be handled specially in C FE
 for the new internal function .ACCESS_WITH_SIZE. 

(I have specially handle the operator “offsetof” in C FE already).

Will fix this issue.

Thanks.

Qing

> On Jan 24, 2024, at 7:51 PM, Kees Cook  wrote:
> 
> On Wed, Jan 24, 2024 at 12:29:51AM +, Qing Zhao wrote:
>> This is the 4th version of the patch.
> 
> Thanks very much for this!
> 
> I tripped over an unexpected behavioral change that the Linux kernel
> depends on:
> 
> __builtin_types_compatible_p() no longer treats an array marked with
> counted_by as different from that array's decayed pointer. Specifically,
> the kernel uses these macros:
> 
> 
> /*
> * Force a compilation error if condition is true, but also produce a
> * result (of value 0 and type int), so the expression can be used
> * e.g. in a structure initializer (or where-ever else comma expressions
> * aren't permitted).
> */
> #define BUILD_BUG_ON_ZERO(e) ((int)(sizeof(struct { int:(-!!(e)); })))
> 
> #define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b))
> 
> /* &a[0] degrades to a pointer: a different type from an array */
> #define __must_be_array(a)   BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0]))
> 
> 
> This gets used in various places to make sure we're dealing with an
> array for a macro:
> 
> #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + 
> __must_be_array(arr))
> 
> 
> So this builds:
> 
> struct untracked {
>int size;
>int array[];
> } *a;
> 
> __must_be_array(a->array)
> => 0 (as expected)
> __builtin_types_compatible_p(typeof(a->array), typeof(&(a->array)[0]))
> => 0 (as expected, array vs decayed array pointer)
> 
> 
> But if counted_by is added, we get a build failure:
> 
> struct tracked {
>int size;
>int array[] __counted_by(size);
> } *b;
> 
> __must_be_array(b->array)
> => build failure (not expected)
> __builtin_types_compatible_p(typeof(b->array), typeof(&(b->array)[0]))
> => 1 (not expected, both pointers?)
> 
> 
> 
> 
> -- 
> Kees Cook



Re: [PATCH v2 3/5] C: Implement musttail attribute for returns

2024-01-25 Thread Joseph Myers
On Wed, 24 Jan 2024, Andi Kleen wrote:

> Implement a C23 clang compatible musttail attribute similar to the earlier
> C++ implementation in the C parser.

I'd expect diagnostics, and associated tests of those diagnostics, for:

* musttail attribute used with any arguments, even empty 
[[gnu::musttail()]], much like e.g. [[fallthrough()]] or 
[[maybe_unused()]] gets diagnosed.

* musttail attribute used on a declaration, or as part of an attribute 
declaration (attributes on their own before a semicolon - whether musttail 
on its own, or together with the fallthrough attribute that is valid in 
that case).

* musttail attribute used on any statement other than a return statement.

All of these should definitely apply to the gnu:: form and probably to 
clang:: as well.  Some of these might already be diagnosed, but I don't 
see them in the added testcases.

For the first one of these, it may help to include the attribute in the 
c_common_gnu_attributes table so the common attribute parsing code knows 
that this one doesn't accept arguments (and with an attribute handler that 
always rejects it on declarations, much like 
handle_fallthrough_attribute).

-- 
Joseph S. Myers
josmy...@redhat.com



[PATCH] aarch64: Fix undefinedness while testing the J constraint [PR100204]

2024-01-25 Thread Andrew Pinski
The J constraint can invoke undefined behavior due to it taking the
negative of the ival if ival was HWI_MIN. The fix is simple as casting
to `unsigned HOST_WIDE_INT` before doing the negative of it. This
does that.

Committed as obvious after build/test for aarch64-linux-gnu.

gcc/ChangeLog:

PR target/100204
* config/aarch64/constraints.md (J): Cast to `unsigned HOST_WIDE_INT`
before taking the negative of it.

Signed-off-by: Andrew Pinski 
---
 gcc/config/aarch64/constraints.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/config/aarch64/constraints.md 
b/gcc/config/aarch64/constraints.md
index 8566befd727..a2569cea510 100644
--- a/gcc/config/aarch64/constraints.md
+++ b/gcc/config/aarch64/constraints.md
@@ -118,7 +118,7 @@ (define_constraint "Uat"
 (define_constraint "J"
  "A constant that can be used with a SUB operation (once negated)."
  (and (match_code "const_int")
-  (match_test "aarch64_uimm12_shift (-ival)")))
+  (match_test "aarch64_uimm12_shift (- (unsigned HOST_WIDE_INT) ival)")))
 
 ;; We can't use the mode of a CONST_INT to determine the context in
 ;; which it is being used, so we must have a separate constraint for
-- 
2.39.3



Re: [PATCH 5/4] libbacktrace: improve getting debug information for loaded dlls

2024-01-25 Thread Björn Schäpers

Am 23.01.2024 um 23:37 schrieb Ian Lance Taylor:

On Thu, Jan 4, 2024 at 2:33 PM Björn Schäpers  wrote:


Am 03.01.2024 um 00:12 schrieb Björn Schäpers:

Am 30.11.2023 um 20:53 schrieb Ian Lance Taylor:

On Fri, Jan 20, 2023 at 2:55 AM Björn Schäpers  wrote:


From: Björn Schäpers 

Fixes https://github.com/ianlancetaylor/libbacktrace/issues/53, except
that libraries loaded after the backtrace_initialize are not handled.
But as far as I can see that's the same for elf.


Thanks, but I don't want a patch that loops using goto statements.
Please rewrite to avoid that.  It may be simpler to call a function.

Also starting with a module count of 1000 seems like a lot.  Do
typical Windows programs load that many modules?

Ian




Rewritten using a function.

If that is commited, could you attribute that commit to me (--author="Björn
Schäpers ")?

Thanks and kind regards,
Björn.


I noticed that under 64 bit libraries loaded with LoadLibrary were missing.
EnumProcessModules stated the correct number of modules, but did not fill the
the HMODULEs, but set them to 0. While trying to investigate I noticed if I do
the very same thing from main (in C++) I even got fewer module HMODULEs.

So I went a different way. This detects all libraries correctly, in 32 and 64
bit. The question is, if it should be a patch on top of the previous, or should
they be merged, or even only this solution and drop the EnumProcessModules 
variant?


Is there any reason to use both patches?  Seems simpler to just use
this one if it works.  Thanks.

Ian


This one needs the tlhelp32 header and its functions, which are (accoridng to 
the microsoft documentation) are only available since Windows XP rsp. Windows 
Server 2003.


If that's no problem, and in my opinion it shouldn't be, then I can basically 
drop patch 4 and rebase this one.


Kind regards,
Björn.


[pushed][PR113526][LRA]: Fixing asm-flag-1.c failure on ARM

2024-01-25 Thread Vladimir Makarov

The following patch fixes

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113526

The patch was successfully bootstrapped and tested on x86-64, ppc64le, 
and aarch64.


commit 5c617df410602d0e51d61c84d1ae7e9b3f51efa4
Author: Vladimir N. Makarov 
Date:   Thu Jan 25 14:41:17 2024 -0500

[PR113526][LRA]: Fixing asm-flag-1.c failure on ARM

My recent patch for PR113356 results in failure asm-flag-1.c test on arm.
After the patch LRA treats asm operand pseudos as general regs.  There
are too many such operands and LRA can not assign hard regs to all
operand pseudos.  Actually we should not assign hard regs to the
operand pseudo at all.  The following patch fixes this.

gcc/ChangeLog:

PR target/113526
* lra-constraints.cc (curr_insn_transform): Change class even for
spilled pseudo successfully matched with with NO_REGS.

diff --git a/gcc/lra-constraints.cc b/gcc/lra-constraints.cc
index 3379b88ff22..0ae81c1ff9c 100644
--- a/gcc/lra-constraints.cc
+++ b/gcc/lra-constraints.cc
@@ -4498,10 +4498,10 @@ curr_insn_transform (bool check_only_p)
 		 registers for other pseudos referenced in the insn.  The most
 		 common case of this is a scratch register which will be
 		 transformed to scratch back at the end of LRA.  */
-	  && lra_get_regno_hard_regno (regno) >= 0
 	  && bitmap_single_bit_set_p (&lra_reg_info[regno].insn_bitmap))
 	{
-	  lra_change_class (regno, NO_REGS, "  Change to", true);
+	  if (lra_get_allocno_class (regno) != NO_REGS)
+		lra_change_class (regno, NO_REGS, "  Change to", true);
 	  reg_renumber[regno] = -1;
 	}
 	  /* We can do an optional reload.  If the pseudo got a hard


Re: [PATCH 2/2] aarch64: Add support for _BitInt

2024-01-25 Thread Richard Sandiford
Andre Vieira  writes:
> This patch adds support for C23's _BitInt for the AArch64 port when compiling
> for little endianness.  Big Endianness requires further target-agnostic
> support and we therefor disable it for now.
>
> gcc/ChangeLog:
>
>   * config/aarch64/aarch64.cc (TARGET_C_BITINT_TYPE_INFO): Declare MACRO.
>   (aarch64_bitint_type_info): New function.
>   (aarch64_return_in_memory_1): Return large _BitInt's in memory.
>   (aarch64_function_arg_alignment): Adapt to correctly return the ABI
>   mandated alignment of _BitInt(N) where N > 128 as the alignment of
>   TImode.
>   (aarch64_composite_type_p): Return true for _BitInt(N), where N > 128.
>
> libgcc/ChangeLog:
>
>   * config/aarch64/t-softfp: Add fixtfbitint, floatbitinttf and
>   floatbitinthf to the softfp_extras variable to ensure the
>   runtime support is available for _BitInt.
> ---
>  gcc/config/aarch64/aarch64.cc  | 44 +-
>  libgcc/config/aarch64/t-softfp |  3 ++-
>  2 files changed, 45 insertions(+), 2 deletions(-)
>
> diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
> index e6bd3fd0bb4..48bac51bc7c 100644
> --- a/gcc/config/aarch64/aarch64.cc
> +++ b/gcc/config/aarch64/aarch64.cc
> @@ -6534,7 +6534,7 @@ aarch64_return_in_memory_1 (const_tree type)
>machine_mode ag_mode;
>int count;
>  
> -  if (!AGGREGATE_TYPE_P (type)
> +  if (!(AGGREGATE_TYPE_P (type) || TREE_CODE (type) == BITINT_TYPE)
>&& TREE_CODE (type) != COMPLEX_TYPE
>&& TREE_CODE (type) != VECTOR_TYPE)
>  /* Simple scalar types always returned in registers.  */

I guess adding && TREE_CODE (type) != BITINT_TYPE would be more in
keeping with the current code.

> @@ -6618,6 +6618,10 @@ aarch64_function_arg_alignment (machine_mode mode, 
> const_tree type,
>  
>gcc_assert (TYPE_MODE (type) == mode);
>  
> +  if (TREE_CODE (type) == BITINT_TYPE
> +  && int_size_in_bytes (type) > 16)
> +return GET_MODE_ALIGNMENT (TImode);
> +

Does the type have a different alignment from this?  I think a comment
would help.

>if (!AGGREGATE_TYPE_P (type))
>  {
>/* The ABI alignment is the natural alignment of the type, without
> @@ -21793,6 +21797,11 @@ aarch64_composite_type_p (const_tree type,
>if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
>  return true;
>  
> +  if (type
> +  && TREE_CODE (type) == BITINT_TYPE
> +  && int_size_in_bytes (type) > 16)
> +return true;
> +

Just checking: does this have any practical effect as things stand?
It looks like all callers are either in big-endian code (where it
determines padding for <= 16-byte arguments) and in deciding whether
to pass something as a vector.

Seems OK to keep it on a better-safe-than-sorry basis, just wanted
to check.

It'd be good to have some tests.  E.g. maybe one return test for
each of...

>if (mode == BLKmode
>|| GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
>|| GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
> @@ -28330,6 +28339,36 @@ aarch64_excess_precision (enum excess_precision_type 
> type)
>return FLT_EVAL_METHOD_UNPREDICTABLE;
>  }
>  
> +/* Implement TARGET_C_BITINT_TYPE_INFO.
> +   Return true if _BitInt(N) is supported and fill its details into *INFO.  
> */
> +bool
> +aarch64_bitint_type_info (int n, struct bitint_info *info)
> +{
> +  if (TARGET_BIG_END)
> +return false;
> +
> +  if (n <= 8)
> +info->limb_mode = QImode;
> +  else if (n <= 16)
> +info->limb_mode = HImode;
> +  else if (n <= 32)
> +info->limb_mode = SImode;
> +  else if (n <= 64)
> +info->limb_mode = DImode;
> +  else if (n <= 128)
> +info->limb_mode = TImode;
> +  else
> +info->limb_mode = DImode;

...these conditions, and one argument test in which a _BitInt(n) is
passed as a second argument after a single x0 argument, such as in:

void f(int x, _BitInt(N) y) { ... }

Same for when all argument registers are taken, again with a preceding
stack argument:

void f(int x0, int x1, int x2, int x3,
   int x4, int x5, int x6, int x7,
   int stack0, _BitInt(N) y)
{
  ...
}

It'd also be good to have tests for alignof and sizeof.

Can you add a comment explaining why we pick DImode rather than TImode
for the n > 128 case?

Thanks,
Richard

> +
> +  if (n > 128)
> +info->abi_limb_mode = TImode;
> +  else
> +info->abi_limb_mode = info->limb_mode;
> +  info->big_endian = TARGET_BIG_END;
> +  info->extended = false;
> +  return true;
> +}
> +
>  /* Implement TARGET_SCHED_CAN_SPECULATE_INSN.  Return true if INSN can be
> scheduled for speculative execution.  Reject the long-running division
> and square-root instructions.  */
> @@ -30439,6 +30478,9 @@ aarch64_run_selftests (void)
>  #undef TARGET_C_EXCESS_PRECISION
>  #define TARGET_C_EXCESS_PRECISION aarch64_excess_precision
>  
> +#undef TARGET_C_BITINT_TYPE_INFO
> +#define TARGET_C_BITINT_TYPE_INFO aarch64_bitint_type_info
> +
>  #undef  TA

Re: [PATCH] c++: Fix up build_m_component_ref [PR113599]

2024-01-25 Thread Patrick Palka
On Thu, 25 Jan 2024, Patrick Palka wrote:

> On Thu, 25 Jan 2024, Jakub Jelinek wrote:
> 
> > Hi!
> > 
> > The following testcase reduced from GDB is miscompiled starting with
> > r14-5503 PR112427 change.
> > The problem is in the build_m_component_ref hunk, which changed
> > -  datum = fold_build_pointer_plus (fold_convert (ptype, datum), 
> > component);
> > +  datum = cp_convert (ptype, datum, complain);
> > +  if (!processing_template_decl)
> > +   datum = build2 (POINTER_PLUS_EXPR, ptype,
> > +   datum, convert_to_ptrofftype (component));
> > +  datum = cp_fully_fold (datum);
> > Component is e, (sizetype) e is 16, offset of c inside of C.
> > ptype is A *, pointer to type of C::c and datum is &d.
> > Now, previously the above created ((A *) &d) p+ (sizetype) e which is 
> > correct,
> > but in the new code cp_convert sees that C has A as base class and
> > instead of returning (A *) &d, it returns &d.D.2800 where D.2800 is
> > the FIELD_DECL for the A base at offset 8 into C.
> > So, instead of computing ((A *) &d) p+ (sizetype) e it computes
> > &d.D.2800 p+ (sizetype) e, which is ((A *) &d) p+ 24.
> > 
> > The following patch fixes it by using convert instead of cp_convert which
> > eventually calls build_nop (ptype, datum).
> 
> LGTM.  IIUC using 'convert' would work here too thanks to its special
> case for indirect types, but I naively went with cp_convert since it has
> a complain parameter :/

D'oh, I completely overlooked this version of the patch already uses
convert instead of build_nop directly :)  LGTM either way

> 
> > 
> > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
> > 
> > 2024-01-25  Jakub Jelinek  
> > 
> > PR c++/113599
> > * typeck2.cc (build_m_component_ref): Use convert instead of
> > cp_convert for pointer conversion.
> > 
> > * g++.dg/expr/ptrmem11.C: New test.
> > 
> > --- gcc/cp/typeck2.cc.jj2024-01-03 12:01:23.672476417 +0100
> > +++ gcc/cp/typeck2.cc   2024-01-25 14:11:40.656361310 +0100
> > @@ -2378,7 +2378,7 @@ build_m_component_ref (tree datum, tree
> >/* Build an expression for "object + offset" where offset is the
> >  value stored in the pointer-to-data-member.  */
> >ptype = build_pointer_type (type);
> > -  datum = cp_convert (ptype, datum, complain);
> > +  datum = convert (ptype, datum);
> >if (!processing_template_decl)
> > datum = build2 (POINTER_PLUS_EXPR, ptype,
> > datum, convert_to_ptrofftype (component));
> > --- gcc/testsuite/g++.dg/expr/ptrmem11.C.jj 2024-01-25 14:13:11.736089567 
> > +0100
> > +++ gcc/testsuite/g++.dg/expr/ptrmem11.C2024-01-25 14:18:47.720398222 
> > +0100
> > @@ -0,0 +1,17 @@
> > +// PR c++/113599
> > +// { dg-do run }
> > +
> > +struct A { void *a; };
> > +struct B { void *b; };
> > +struct C : public B, public A { A c; };
> > +static C d;
> > +
> > +int
> > +main ()
> > +{
> > +  A C::*e = &C::c;
> > +  A *f = &(d.*e);
> > +  A *g = &d.c;
> > +  if (f != g)
> > +__builtin_abort ();
> > +}
> > 
> > Jakub
> > 
> > 
> 



Re: [PATCH] c++: Fix up build_m_component_ref [PR113599]

2024-01-25 Thread Patrick Palka
On Thu, 25 Jan 2024, Jakub Jelinek wrote:

> Hi!
> 
> The following testcase reduced from GDB is miscompiled starting with
> r14-5503 PR112427 change.
> The problem is in the build_m_component_ref hunk, which changed
> -  datum = fold_build_pointer_plus (fold_convert (ptype, datum), 
> component);
> +  datum = cp_convert (ptype, datum, complain);
> +  if (!processing_template_decl)
> +   datum = build2 (POINTER_PLUS_EXPR, ptype,
> +   datum, convert_to_ptrofftype (component));
> +  datum = cp_fully_fold (datum);
> Component is e, (sizetype) e is 16, offset of c inside of C.
> ptype is A *, pointer to type of C::c and datum is &d.
> Now, previously the above created ((A *) &d) p+ (sizetype) e which is correct,
> but in the new code cp_convert sees that C has A as base class and
> instead of returning (A *) &d, it returns &d.D.2800 where D.2800 is
> the FIELD_DECL for the A base at offset 8 into C.
> So, instead of computing ((A *) &d) p+ (sizetype) e it computes
> &d.D.2800 p+ (sizetype) e, which is ((A *) &d) p+ 24.
> 
> The following patch fixes it by using convert instead of cp_convert which
> eventually calls build_nop (ptype, datum).

LGTM.  IIUC using 'convert' would work here too thanks to its special
case for indirect types, but I naively went with cp_convert since it has
a complain parameter :/

> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
> 
> 2024-01-25  Jakub Jelinek  
> 
>   PR c++/113599
>   * typeck2.cc (build_m_component_ref): Use convert instead of
>   cp_convert for pointer conversion.
> 
>   * g++.dg/expr/ptrmem11.C: New test.
> 
> --- gcc/cp/typeck2.cc.jj  2024-01-03 12:01:23.672476417 +0100
> +++ gcc/cp/typeck2.cc 2024-01-25 14:11:40.656361310 +0100
> @@ -2378,7 +2378,7 @@ build_m_component_ref (tree datum, tree
>/* Build an expression for "object + offset" where offset is the
>value stored in the pointer-to-data-member.  */
>ptype = build_pointer_type (type);
> -  datum = cp_convert (ptype, datum, complain);
> +  datum = convert (ptype, datum);
>if (!processing_template_decl)
>   datum = build2 (POINTER_PLUS_EXPR, ptype,
>   datum, convert_to_ptrofftype (component));
> --- gcc/testsuite/g++.dg/expr/ptrmem11.C.jj   2024-01-25 14:13:11.736089567 
> +0100
> +++ gcc/testsuite/g++.dg/expr/ptrmem11.C  2024-01-25 14:18:47.720398222 
> +0100
> @@ -0,0 +1,17 @@
> +// PR c++/113599
> +// { dg-do run }
> +
> +struct A { void *a; };
> +struct B { void *b; };
> +struct C : public B, public A { A c; };
> +static C d;
> +
> +int
> +main ()
> +{
> +  A C::*e = &C::c;
> +  A *f = &(d.*e);
> +  A *g = &d.c;
> +  if (f != g)
> +__builtin_abort ();
> +}
> 
>   Jakub
> 
> 



[PATCH] c++: problematic assert in reference_binding [PR113141]

2024-01-25 Thread Patrick Palka
Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look
OK for trunk/13?  This isn't a very satisfactory fix, but at least
it safely fixes these testcases I guess.  Note that there's
implementation disagreement about the second testcase, GCC always
accepted it but Clang/MSVC/icc reject it.

-- >8 --

In the bad reference binding shortcutting commit r13-1755-g68f37670eff0b872
I made us check the badness criteria in reference_binding earlier in
the function so that we can fail fast and avoid unnecessary template
instantiation during the first pass of overload resolution.  This was
for the most part obviously safe, except now the badness criteria are
also checked before the recursive case[1] in reference_binding, whereas
before the criteria didn't apply in that case.  So in order to justify
this hoisting I added a sanity check that the badness criteria are still
sound in the recursive case.

Unfortunately the below testcases triggers the sanity check.  I'm not
sure if this means the bad conversion shortcutting is unsound since I
wasn't able to construct a testcase that affects overload resolution.
And if it is unsound, I'm not sure how we can make it sound in light of
this recursive logic for non-direct user-defined conversions.  But we
can at least restore the pre-r13-1755 behavior for the below two
testcases by simply getting rid of this sanity check.

[1]: Added in
https://gcc.gnu.org/pipermail/gcc-patches/2014-April/386365.html

PR c++/113141

gcc/cp/ChangeLog:

* call.cc (reference_binding): Remove badness criteria sanity
check in the recursive case.

gcc/testsuite/ChangeLog:

* g++.dg/conversion/ref10.C: New test.
* g++.dg/conversion/ref11.C: New test.
---
 gcc/cp/call.cc  |  1 -
 gcc/testsuite/g++.dg/conversion/ref10.C | 13 +
 gcc/testsuite/g++.dg/conversion/ref11.C | 16 
 3 files changed, 29 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/g++.dg/conversion/ref10.C
 create mode 100644 gcc/testsuite/g++.dg/conversion/ref11.C

diff --git a/gcc/cp/call.cc b/gcc/cp/call.cc
index 9de0d77c423..2dce52bc7b8 100644
--- a/gcc/cp/call.cc
+++ b/gcc/cp/call.cc
@@ -2034,7 +2034,6 @@ reference_binding (tree rto, tree rfrom, tree expr, bool 
c_cast_p, int flags,
if (!new_second)
  return bad_direct_conv ? bad_direct_conv : nullptr;
conv = merge_conversion_sequences (t, new_second);
-   gcc_assert (maybe_valid_p || conv->bad_p);
return conv;
  }
 }
diff --git a/gcc/testsuite/g++.dg/conversion/ref10.C 
b/gcc/testsuite/g++.dg/conversion/ref10.C
new file mode 100644
index 000..633b7e48e47
--- /dev/null
+++ b/gcc/testsuite/g++.dg/conversion/ref10.C
@@ -0,0 +1,13 @@
+// PR c++/113141
+
+struct Matrix { };
+
+struct TPoint3 { operator const Matrix(); };
+
+void f(Matrix&);
+
+int main() {
+  TPoint3 X;
+  Matrix& m = (Matrix &)X;
+  f((Matrix &)X);
+}
diff --git a/gcc/testsuite/g++.dg/conversion/ref11.C 
b/gcc/testsuite/g++.dg/conversion/ref11.C
new file mode 100644
index 000..f893f12dafa
--- /dev/null
+++ b/gcc/testsuite/g++.dg/conversion/ref11.C
@@ -0,0 +1,16 @@
+// PR c++/113141
+// { dg-do compile { target c++11 } }
+
+struct ConvToRef {
+  operator int&();
+};
+
+struct A { int& r; };
+
+void f(A);
+
+int main() {
+  ConvToRef c;
+  A a{{c}};
+  f({{c}});
+}
-- 
2.43.0.386.ge02ecfcc53



[PATCH] c++: Fix up build_m_component_ref [PR113599]

2024-01-25 Thread Jakub Jelinek
Hi!

The following testcase reduced from GDB is miscompiled starting with
r14-5503 PR112427 change.
The problem is in the build_m_component_ref hunk, which changed
-  datum = fold_build_pointer_plus (fold_convert (ptype, datum), component);
+  datum = cp_convert (ptype, datum, complain);
+  if (!processing_template_decl)
+   datum = build2 (POINTER_PLUS_EXPR, ptype,
+   datum, convert_to_ptrofftype (component));
+  datum = cp_fully_fold (datum);
Component is e, (sizetype) e is 16, offset of c inside of C.
ptype is A *, pointer to type of C::c and datum is &d.
Now, previously the above created ((A *) &d) p+ (sizetype) e which is correct,
but in the new code cp_convert sees that C has A as base class and
instead of returning (A *) &d, it returns &d.D.2800 where D.2800 is
the FIELD_DECL for the A base at offset 8 into C.
So, instead of computing ((A *) &d) p+ (sizetype) e it computes
&d.D.2800 p+ (sizetype) e, which is ((A *) &d) p+ 24.

The following patch fixes it by using convert instead of cp_convert which
eventually calls build_nop (ptype, datum).

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2024-01-25  Jakub Jelinek  

PR c++/113599
* typeck2.cc (build_m_component_ref): Use convert instead of
cp_convert for pointer conversion.

* g++.dg/expr/ptrmem11.C: New test.

--- gcc/cp/typeck2.cc.jj2024-01-03 12:01:23.672476417 +0100
+++ gcc/cp/typeck2.cc   2024-01-25 14:11:40.656361310 +0100
@@ -2378,7 +2378,7 @@ build_m_component_ref (tree datum, tree
   /* Build an expression for "object + offset" where offset is the
 value stored in the pointer-to-data-member.  */
   ptype = build_pointer_type (type);
-  datum = cp_convert (ptype, datum, complain);
+  datum = convert (ptype, datum);
   if (!processing_template_decl)
datum = build2 (POINTER_PLUS_EXPR, ptype,
datum, convert_to_ptrofftype (component));
--- gcc/testsuite/g++.dg/expr/ptrmem11.C.jj 2024-01-25 14:13:11.736089567 
+0100
+++ gcc/testsuite/g++.dg/expr/ptrmem11.C2024-01-25 14:18:47.720398222 
+0100
@@ -0,0 +1,17 @@
+// PR c++/113599
+// { dg-do run }
+
+struct A { void *a; };
+struct B { void *b; };
+struct C : public B, public A { A c; };
+static C d;
+
+int
+main ()
+{
+  A C::*e = &C::c;
+  A *f = &(d.*e);
+  A *g = &d.c;
+  if (f != g)
+__builtin_abort ();
+}

Jakub



RE: [x86 PATCH] PR target/106060: Improved SSE vector constant materialization.

2024-01-25 Thread Roger Sayle

Hi Hongtao,
Many thanks for the review.  Here's a revised version of my patch
that addresses (most of) the issues you've raised.  Firstly the
handling of zero and all_ones in this function is mostly for 
completeness/documentation, these standard_sse_constant_p
values are (currently/normally) handled elsewhere.  But I have
added an "n_var == 0" optimization to ix86_expand_vector_init.

As you've suggested I've added explicit TARGET_SSE2 tests where
required, and for consistency I've also added support for AVX512's
V16SImode.

As you've predicted, the eventual goal is to move this after combine
(or reload) using define_insn_and_split, but that requires a significant
restructuring that should be done in steps.  This also interacts with
a similar planned reorganization of TImode constant handling.  If
all 128-bit (vector) constants are acceptable before combine, then
STV has the freedom to chose V1TImode (and this broadcast
functionality) to implement TImode operations on immediate
constants.

This patch has been tested on x86_64-pc-linux-gnu with make bootstrap
and make -k check, both with and without --target_board=unix{-m32}
with no new failures.  Ok for mainline (in stage 1)?


2024-01-25  Roger Sayle  
Hongtao Liu  

gcc/ChangeLog
PR target/106060
* config/i386/i386-expand.cc (enum ix86_vec_bcast_alg): New.
(struct ix86_vec_bcast_map_simode_t): New type for table below.
(ix86_vec_bcast_map_simode): Table of SImode constants that may
be efficiently synthesized by a ix86_vec_bcast_alg method.
(ix86_vec_bcast_map_simode_cmp): New comparator for bsearch.
(ix86_vector_duplicate_simode_const): Efficiently synthesize
V4SImode and V8SImode constants that duplicate special constants.
(ix86_vector_duplicate_value): Attempt to synthesize "special"
vector constants using ix86_vector_duplicate_simode_const.
* config/i386/i386.cc (ix86_rtx_costs) : ABS of a
vector integer mode costs with a single SSE instruction.

gcc/testsuite/ChangeLog
PR target/106060
* gcc.target/i386/auto-init-8.c: Update test case.
* gcc.target/i386/avx512fp16-3.c: Likewise.
* gcc.target/i386/pr100865-9a.c: Likewise.
* gcc.target/i386/pr101796-1.c: Likewise.
* gcc.target/i386/pr106060-1.c: New test case.
* gcc.target/i386/pr106060-2.c: Likewise.
* gcc.target/i386/pr106060-3.c: Likewise.
* gcc.target/i386/pr70314.c: Update test case.
* gcc.target/i386/vect-shiftv4qi.c: Likewise.
* gcc.target/i386/vect-shiftv8qi.c: Likewise.


Roger
--

> -Original Message-
> From: Hongtao Liu 
> Sent: 17 January 2024 03:13
> To: Roger Sayle 
> Cc: gcc-patches@gcc.gnu.org; Uros Bizjak 
> Subject: Re: [x86 PATCH] PR target/106060: Improved SSE vector constant
> materialization.
> 
> On Wed, Jan 17, 2024 at 5:59 AM Roger Sayle 
> wrote:
> >
> >
> > I thought I'd just missed the bug fixing season of stage3, but there
> > appears to a little latitude in early stage4 (for vector patches), so
> > I'll post this now.
> >
> > This patch resolves PR target/106060 by providing efficient methods
> > for materializing/synthesizing special "vector" constants on x86.
> > Currently there are three methods of materializing a vector constant;
> > the most general is to load a vector from the constant pool, secondly
> "duplicated"
> > constants can be synthesized by moving an integer between units and
> > broadcasting (or shuffling it), and finally the special cases of the
> > all-zeros vector and all-ones vectors can be loaded via a single SSE
> > instruction.   This patch handles additional cases that can be synthesized
> > in two instructions, loading an all-ones vector followed by another
> > SSE instruction.  Following my recent patch for PR target/112992,
> > there's conveniently a single place in i386-expand.cc where these
> > special cases can be handled.
> >
> > Two examples are given in the original bugzilla PR for 106060.
> >
> > __m256i
> > should_be_cmpeq_abs ()
> > {
> >   return _mm256_set1_epi8 (1);
> > }
> >
> > is now generated (with -O3 -march=x86-64-v3) as:
> >
> > vpcmpeqd%ymm0, %ymm0, %ymm0
> > vpabsb  %ymm0, %ymm0
> > ret
> >
> > and
> >
> > __m256i
> > should_be_cmpeq_add ()
> > {
> >   return _mm256_set1_epi8 (-2);
> > }
> >
> > is now generated as:
> >
> > vpcmpeqd%ymm0, %ymm0, %ymm0
> > vpaddb  %ymm0, %ymm0, %ymm0
> > ret
> >
> > This patch has been tested on x86_64-pc-linux-gnu with make bootstrap
> > and make -k check, both with and without --target_board=unix{-m32}
> > with no new failures.  Ok for mainline?
> >
> >
> > 2024-01-16  Roger Sayle  
> >
> > gcc/ChangeLog
> > PR target/106060
> > * config/i386/i386-expand.cc (enum ix86_vec_bcast_alg): New.
> > (struct ix86_vec_bcast_map_simode_t): New type for table below.
> > (ix86_vec_bcast_map_simode): Table 

[committed] MAINTAINERS: Update my email address

2024-01-25 Thread Chung-Lin Tang
Updated my email address.

Thanks,
Chung-Lin

From ffeab69e1ffc0405da3a9222c7b9f7a000252702 Mon Sep 17 00:00:00 2001
From: Chung-Lin Tang 
Date: Thu, 25 Jan 2024 18:20:43 +
Subject: [PATCH] MAINTAINERS: Update my work email address

* MAINTAINERS: Update my work email address.
---
 MAINTAINERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 7d3b78d276e..8b11ddbc069 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -99,7 +99,7 @@ moxie portAnthony Green   

 msp430 portNick Clifton
 nds32 port Chung-Ju Wu 
 nds32 port Shiva Chen  
-nios2 port Chung-Lin Tang  
+nios2 port Chung-Lin Tang  
 nios2 port Sandra Loosemore
 nvptx port Tom de Vries
 nvptx port Thomas Schwinge 
-- 
2.34.1



Re: [RFC] Either fix or disable SME feature for `aarch64-w64-mingw32` target?

2024-01-25 Thread Szabolcs Nagy
this patch added double notes on crt*.o and lse derived objects.
(which does not seem to cause build break but some linkers may
not like it)

after #include "aarch64-asm.h" all gnu-stack and gnu-property
related stuff should be removed since the header takes care of it.





Re: [RFC] Either fix or disable SME feature for `aarch64-w64-mingw32` target?

2024-01-25 Thread Szabolcs Nagy
The 01/15/2024 17:21, Radek Barton wrote:


v4-0001-Ifdef-.hidden-.type-and-.size-pseudo-ops-for-aarc.patch
Description: v4-0001-Ifdef-.hidden-.type-and-.size-pseudo-ops-for-aarc.patch


[PATCH 2/2] aarch64: Add support for _BitInt

2024-01-25 Thread Andre Vieira

This patch adds support for C23's _BitInt for the AArch64 port when compiling
for little endianness.  Big Endianness requires further target-agnostic
support and we therefor disable it for now.

gcc/ChangeLog:

* config/aarch64/aarch64.cc (TARGET_C_BITINT_TYPE_INFO): Declare MACRO.
(aarch64_bitint_type_info): New function.
(aarch64_return_in_memory_1): Return large _BitInt's in memory.
(aarch64_function_arg_alignment): Adapt to correctly return the ABI
mandated alignment of _BitInt(N) where N > 128 as the alignment of
TImode.
(aarch64_composite_type_p): Return true for _BitInt(N), where N > 128.

libgcc/ChangeLog:

* config/aarch64/t-softfp: Add fixtfbitint, floatbitinttf and
floatbitinthf to the softfp_extras variable to ensure the
runtime support is available for _BitInt.
---
 gcc/config/aarch64/aarch64.cc  | 44 +-
 libgcc/config/aarch64/t-softfp |  3 ++-
 2 files changed, 45 insertions(+), 2 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index e6bd3fd0bb4..48bac51bc7c 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -6534,7 +6534,7 @@ aarch64_return_in_memory_1 (const_tree type)
   machine_mode ag_mode;
   int count;
 
-  if (!AGGREGATE_TYPE_P (type)
+  if (!(AGGREGATE_TYPE_P (type) || TREE_CODE (type) == BITINT_TYPE)
   && TREE_CODE (type) != COMPLEX_TYPE
   && TREE_CODE (type) != VECTOR_TYPE)
 /* Simple scalar types always returned in registers.  */
@@ -6618,6 +6618,10 @@ aarch64_function_arg_alignment (machine_mode mode, const_tree type,
 
   gcc_assert (TYPE_MODE (type) == mode);
 
+  if (TREE_CODE (type) == BITINT_TYPE
+  && int_size_in_bytes (type) > 16)
+return GET_MODE_ALIGNMENT (TImode);
+
   if (!AGGREGATE_TYPE_P (type))
 {
   /* The ABI alignment is the natural alignment of the type, without
@@ -21793,6 +21797,11 @@ aarch64_composite_type_p (const_tree type,
   if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
 return true;
 
+  if (type
+  && TREE_CODE (type) == BITINT_TYPE
+  && int_size_in_bytes (type) > 16)
+return true;
+
   if (mode == BLKmode
   || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
   || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
@@ -28330,6 +28339,36 @@ aarch64_excess_precision (enum excess_precision_type type)
   return FLT_EVAL_METHOD_UNPREDICTABLE;
 }
 
+/* Implement TARGET_C_BITINT_TYPE_INFO.
+   Return true if _BitInt(N) is supported and fill its details into *INFO.  */
+bool
+aarch64_bitint_type_info (int n, struct bitint_info *info)
+{
+  if (TARGET_BIG_END)
+return false;
+
+  if (n <= 8)
+info->limb_mode = QImode;
+  else if (n <= 16)
+info->limb_mode = HImode;
+  else if (n <= 32)
+info->limb_mode = SImode;
+  else if (n <= 64)
+info->limb_mode = DImode;
+  else if (n <= 128)
+info->limb_mode = TImode;
+  else
+info->limb_mode = DImode;
+
+  if (n > 128)
+info->abi_limb_mode = TImode;
+  else
+info->abi_limb_mode = info->limb_mode;
+  info->big_endian = TARGET_BIG_END;
+  info->extended = false;
+  return true;
+}
+
 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN.  Return true if INSN can be
scheduled for speculative execution.  Reject the long-running division
and square-root instructions.  */
@@ -30439,6 +30478,9 @@ aarch64_run_selftests (void)
 #undef TARGET_C_EXCESS_PRECISION
 #define TARGET_C_EXCESS_PRECISION aarch64_excess_precision
 
+#undef TARGET_C_BITINT_TYPE_INFO
+#define TARGET_C_BITINT_TYPE_INFO aarch64_bitint_type_info
+
 #undef  TARGET_EXPAND_BUILTIN
 #define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
 
diff --git a/libgcc/config/aarch64/t-softfp b/libgcc/config/aarch64/t-softfp
index 2e32366f891..a335a34c243 100644
--- a/libgcc/config/aarch64/t-softfp
+++ b/libgcc/config/aarch64/t-softfp
@@ -4,7 +4,8 @@ softfp_extensions := sftf dftf hftf bfsf
 softfp_truncations := tfsf tfdf tfhf tfbf dfbf sfbf hfbf
 softfp_exclude_libgcc2 := n
 softfp_extras += fixhfti fixunshfti floattihf floatuntihf \
-		 floatdibf floatundibf floattibf floatuntibf
+		 floatdibf floatundibf floattibf floatuntibf \
+		 fixtfbitint floatbitinttf floatbitinthf
 
 TARGET_LIBGCC2_CFLAGS += -Wno-missing-prototypes
 


[PATCH 1/2] bitint: Use TARGET_ARRAY_MODE for large bitints where target supports it

2024-01-25 Thread Andre Vieira

This patch ensures we use TARGET_ARRAY_MODE to determine the storage mode of
large bitints that are represented as arrays in memory.  This is required to
support such bitints for aarch64 and potential other targets with similar
bitint specifications.  Existing tests like gcc.dg/torture/bitint-25.c are
affected by this for aarch64 targets.

gcc/ChangeLog:
stor-layout.cc (layout_type): Use TARGET_ARRAY_MODE for large bitints
for targets that implement it.
---
 gcc/stor-layout.cc | 10 +-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/gcc/stor-layout.cc b/gcc/stor-layout.cc
index 4cf249133e9..31da2c123ab 100644
--- a/gcc/stor-layout.cc
+++ b/gcc/stor-layout.cc
@@ -2427,8 +2427,16 @@ layout_type (tree type)
 	  }
 	else
 	  {
-	SET_TYPE_MODE (type, BLKmode);
 	cnt = CEIL (TYPE_PRECISION (type), GET_MODE_PRECISION (limb_mode));
+	machine_mode mode;
+	/* Some targets use TARGET_ARRAY_MODE to select the mode they use
+	   for arrays with a specific element mode and a specific element
+	   count and we should use this mode for large bitints that are
+	   stored as such arrays.  */
+	if (!targetm.array_mode (limb_mode, cnt).exists (&mode)
+		|| !targetm.array_mode_supported_p (limb_mode, cnt))
+	  mode = BLKmode;
+	SET_TYPE_MODE (type, mode);
 	gcc_assert (info.abi_limb_mode == info.limb_mode
 			|| !info.big_endian == !WORDS_BIG_ENDIAN);
 	  }


[PATCH 0/2] aarch64, bitint: Add support for _BitInt for AArch64 Little Endian

2024-01-25 Thread Andre Vieira
Hi,

This patch series adds support for _BitInt for AArch64 when compiling for
Little Endian. The first patch in the series fixes an issue that arises with
support for AArch64, the second patch adds the backend support for it.

Andre Vieira (2):
bitint: Use TARGET_ARRAY_MODE for large bitints where target supports it
aarch64: Add support for _BitInt

Patch series boostrapped and regression tested on aarch64-unknown-linux-gnu and 
x86_64-pc-linux-gnu.

Ok for trunk?

-- 
2.17.1


Re: [PATCH v4 4/4] aarch64: Add explicit checks for implicit LSE/LSE2 requirements.

2024-01-25 Thread Richard Sandiford
Victor Do Nascimento  writes:
> At present, Evaluation of both `has_lse2(hwcap)' and
> `has_lse128(hwcap)' may require issuing an `mrs' instruction to query
> a system register.  This instruction, when issued from user-space
> results in a trap by the kernel which then returns the value read in
> by the system register.  Given the undesirable nature of the
> computational expense associated with the context switch, it is
> important to implement mechanisms to, wherever possible, forgo the
> operation.
>
> In light of this, given how other architectural requirements serving
> as prerequisites have long been assigned HWCAP bits by the kernel, we
> can inexpensively query for their availability before attempting to
> read any system registers.  Where one of these early tests fail, we
> can assert that the main feature of interest (be it LSE2 or LSE128)
> cannot be present, allowing us to return from the function early and
> skip the unnecessary expensive kernel-mediated access to system
> registers.
>
> libatomic/ChangeLog:
>
>   * config/linux/aarch64/host-config.h (has_lse2): Add test for LSE.
>   (has_lse128): Add test for LSE2.

FAOD, the previous OK for this patch still stands.

Thanks,
Richard

> ---
>  libatomic/config/linux/aarch64/host-config.h | 13 ++---
>  1 file changed, 10 insertions(+), 3 deletions(-)
>
> diff --git a/libatomic/config/linux/aarch64/host-config.h 
> b/libatomic/config/linux/aarch64/host-config.h
> index 1bc7d839232..4e354124063 100644
> --- a/libatomic/config/linux/aarch64/host-config.h
> +++ b/libatomic/config/linux/aarch64/host-config.h
> @@ -64,8 +64,13 @@ typedef struct __ifunc_arg_t {
>  static inline bool
>  has_lse2 (unsigned long hwcap, const __ifunc_arg_t *features)
>  {
> +  /* Check for LSE2.  */
>if (hwcap & HWCAP_USCAT)
>  return true;
> +  /* No point checking further for atomic 128-bit load/store if LSE
> + prerequisite not met.  */
> +  if (!(hwcap & HWCAP_ATOMICS))
> +return false;
>if (!(hwcap & HWCAP_CPUID))
>  return false;
>  
> @@ -99,9 +104,11 @@ has_lse128 (unsigned long hwcap, const __ifunc_arg_t 
> *features)
>   support in older kernels as it is of CPU feature absence.  Try fallback
>   method to guarantee LSE128 is not implemented.
>  
> - In the absence of HWCAP_CPUID, we are unable to check for LSE128.  */
> -  if (!(hwcap & HWCAP_CPUID))
> -return false;
> + In the absence of HWCAP_CPUID, we are unable to check for LSE128.
> + If feature check available, check LSE2 prerequisite before proceeding.  
> */
> +  if (!(hwcap & HWCAP_CPUID) || !(hwcap & HWCAP_USCAT))
> + return false;
> +
>unsigned long isar0;
>asm volatile ("mrs %0, ID_AA64ISAR0_EL1" : "=r" (isar0));
>if (AT_FEAT_FIELD (isar0) >= 3)


Re: [PATCH v4 3/4] libatomic: Enable LSE128 128-bit atomics for armv9.4-a

2024-01-25 Thread Richard Sandiford
Victor Do Nascimento  writes:
> The armv9.4-a architectural revision adds three new atomic operations
> associated with the LSE128 feature:
>
>   * LDCLRP - Atomic AND NOT (bitclear) of a location with 128-bit
>   value held in a pair of registers, with original data loaded into
>   the same 2 registers.
>   * LDSETP - Atomic OR (bitset) of a location with 128-bit value held
>   in a pair of registers, with original data loaded into the same 2
>   registers.
>   * SWPP - Atomic swap of one 128-bit value with 128-bit value held
>   in a pair of registers.
>
> It is worth noting that in keeping with existing 128-bit atomic
> operations in `atomic_16.S', we have chosen to merge certain
> less-restrictive orderings into more restrictive ones.  This is done
> to minimize the number of branches in the atomic functions, minimizing
> both the likelihood of branch mispredictions and, in keeping code
> small, limit the need for extra fetch cycles.
>
> Past benchmarking has revealed that acquire is typically slightly
> faster than release (5-10%), such that for the most frequently used
> atomics (CAS and SWP) it makes sense to add support for acquire, as
> well as release.
>
> Likewise, it was identified that combining acquire and release typically
> results in little to no penalty, such that it is of negligible benefit
> to distinguish between release and acquire-release, making the
> combining release/acq_rel/seq_cst a worthwhile design choice.

I was thinking more that it would be good to have this as a block
comment within the file itself.  I won't insist though.  At least
having it in the commit message will ensure that it's discoverable
from the git repo.

> This patch adds the logic required to make use of these when the
> architectural feature is present and a suitable assembler available.
>
> In order to do this, the following changes are made:
>
>   1. Add a configure-time check to check for LSE128 support in the
>   assembler.
>   2. Edit host-config.h so that when N == 16, nifunc = 2.
>   3. Where available due to LSE128, implement the second ifunc, making
>   use of the novel instructions.
>   4. For atomic functions unable to make use of these new
>   instructions, define a new alias which causes the _i1 function
>   variant to point ahead to the corresponding _i2 implementation.
>
> libatomic/ChangeLog:
>
>   * Makefile.am (AM_CPPFLAGS): add conditional setting of
>   -DHAVE_FEAT_LSE128.
>   * acinclude.m4 (LIBAT_TEST_FEAT_AARCH64_LSE128): New.
>   * config/linux/aarch64/atomic_16.S (LSE128): New macro
>   definition.
>   (libat_exchange_16): New LSE128 variant.
>   (libat_fetch_or_16): Likewise.
>   (libat_or_fetch_16): Likewise.
>   (libat_fetch_and_16): Likewise.
>   (libat_and_fetch_16): Likewise.
>   * config/linux/aarch64/host-config.h (IFUNC_COND_2): New.
>   (IFUNC_NCOND): Add operand size checking.
>   (has_lse2): Renamed from `ifunc1`.
>   (has_lse128): New.
>   (HWCAP2_LSE128): Likewise.
>   * libatomic/configure.ac: Add call to
>   LIBAT_TEST_FEAT_AARCH64_LSE128.
>   * configure (ac_subst_vars): Regenerated via autoreconf.
>   * libatomic/Makefile.in: Likewise.
>   * libatomic/auto-config.h.in: Likewise.

OK, thanks.

Richard

> ---
>  libatomic/Makefile.am|   3 +
>  libatomic/Makefile.in|   1 +
>  libatomic/acinclude.m4   |  19 +++
>  libatomic/auto-config.h.in   |   3 +
>  libatomic/config/linux/aarch64/atomic_16.S   | 170 ++-
>  libatomic/config/linux/aarch64/host-config.h |  42 -
>  libatomic/configure  |  61 ++-
>  libatomic/configure.ac   |   3 +
>  8 files changed, 293 insertions(+), 9 deletions(-)
>
> diff --git a/libatomic/Makefile.am b/libatomic/Makefile.am
> index cfad90124f9..0623a0bf2d1 100644
> --- a/libatomic/Makefile.am
> +++ b/libatomic/Makefile.am
> @@ -130,6 +130,9 @@ libatomic_la_LIBADD = $(foreach s,$(SIZES),$(addsuffix 
> _$(s)_.lo,$(SIZEOBJS)))
>  ## On a target-specific basis, include alternates to be selected by IFUNC.
>  if HAVE_IFUNC
>  if ARCH_AARCH64_LINUX
> +if ARCH_AARCH64_HAVE_LSE128
> +AM_CPPFLAGS   = -DHAVE_FEAT_LSE128
> +endif
>  IFUNC_OPTIONS = -march=armv8-a+lse
>  libatomic_la_LIBADD += $(foreach s,$(SIZES),$(addsuffix 
> _$(s)_1_.lo,$(SIZEOBJS)))
>  libatomic_la_SOURCES += atomic_16.S
> diff --git a/libatomic/Makefile.in b/libatomic/Makefile.in
> index dc2330b91fd..cd48fa21334 100644
> --- a/libatomic/Makefile.in
> +++ b/libatomic/Makefile.in
> @@ -452,6 +452,7 @@ M_SRC = $(firstword $(filter %/$(M_FILE), $(all_c_files)))
>  libatomic_la_LIBADD = $(foreach s,$(SIZES),$(addsuffix \
>   _$(s)_.lo,$(SIZEOBJS))) $(am__append_1) $(am__append_3) \
>   $(am__append_4) $(am__append_5)
> +@ARCH_AARCH64_HAVE_LSE128_TRUE@@ARCH_AARCH64_LINUX_TRUE@@HAVE_IFUNC_TRUE@AM_CPPFLAGS
>  = -DHAVE_FEAT_LSE128
>  @ARCH_AARCH

Re: [PATCH v4 2/4] libatomic: Add support for __ifunc_arg_t arg in ifunc resolver

2024-01-25 Thread Richard Sandiford
Victor Do Nascimento  writes:
> With support for new atomic features in Armv9.4-a being indicated by
> HWCAP2 bits, Libatomic's ifunc resolver must now query its second
> argument, of type __ifunc_arg_t*.
>
> We therefore make this argument known to libatomic, allowing us to
> query hwcap2 bits in the following manner:
>
>   bool
>   resolver (unsigned long hwcap, const __ifunc_arg_t *features);
>   {
> return (features->hwcap2 & HWCAP2_);
>   }
>
> libatomic/ChangeLog:
>
>   * config/linux/aarch64/host-config.h (__ifunc_arg_t):
>   Conditionally-defined if `sys/ifunc.h' not found.
>   (_IFUNC_ARG_HWCAP): Likewise.
>   (IFUNC_COND_1): Pass __ifunc_arg_t argument to ifunc.
>   (ifunc1): Modify function signature to accept __ifunc_arg_t
>   argument.
>   * configure.tgt: Add second `const __ifunc_arg_t *features'
>   argument to IFUNC_RESOLVER_ARGS.

OK, thanks.

Richard

> ---
>  libatomic/config/linux/aarch64/host-config.h | 15 +--
>  libatomic/configure.tgt  |  2 +-
>  2 files changed, 14 insertions(+), 3 deletions(-)
>
> diff --git a/libatomic/config/linux/aarch64/host-config.h 
> b/libatomic/config/linux/aarch64/host-config.h
> index 4200293c4e3..8fd4fe3321a 100644
> --- a/libatomic/config/linux/aarch64/host-config.h
> +++ b/libatomic/config/linux/aarch64/host-config.h
> @@ -24,9 +24,20 @@
>  #if HAVE_IFUNC
>  #include 
>  
> +#if __has_include()
> +# include 
> +#else
> +typedef struct __ifunc_arg_t {
> +  unsigned long _size;
> +  unsigned long _hwcap;
> +  unsigned long _hwcap2;
> +} __ifunc_arg_t;
> +# define _IFUNC_ARG_HWCAP (1ULL << 62)
> +#endif
> +
>  #ifdef HWCAP_USCAT
>  # if N == 16
> -#  define IFUNC_COND_1   ifunc1 (hwcap)
> +#  define IFUNC_COND_1   ifunc1 (hwcap, features)
>  # else
>  #  define IFUNC_COND_1   (hwcap & HWCAP_ATOMICS)
>  # endif
> @@ -48,7 +59,7 @@
>  #define MIDR_PARTNUM(midr)   (((midr) >> 4) & 0xfff)
>  
>  static inline bool
> -ifunc1 (unsigned long hwcap)
> +ifunc1 (unsigned long hwcap, const __ifunc_arg_t *features)
>  {
>if (hwcap & HWCAP_USCAT)
>  return true;
> diff --git a/libatomic/configure.tgt b/libatomic/configure.tgt
> index b7609132c58..67a5f2dff80 100644
> --- a/libatomic/configure.tgt
> +++ b/libatomic/configure.tgt
> @@ -194,7 +194,7 @@ esac
>  # The type may be different on different architectures.
>  case "${target}" in
>aarch64*-*-*)
> - IFUNC_RESOLVER_ARGS="uint64_t hwcap"
> + IFUNC_RESOLVER_ARGS="uint64_t hwcap, const __ifunc_arg_t *features"
>   ;;
>*)
>   IFUNC_RESOLVER_ARGS="void"


Re: [libatomic PATCH] Fix testsuite regressions on ARM [raspberry pi].

2024-01-25 Thread Victor Do Nascimento




On 1/11/24 15:55, Roger Sayle wrote:


Hi Richard,
As you've recommended, this issue has now been filed in bugzilla
as PR other/113336.  As explained in the new PR, libatomic's testsuite
used to pass on armv6 (raspberry pi) in previous GCC releases, but
the code was incorrect/non-synchronous; this was reported as
PR target/107567 and PR target/109166.  Now that those issues
have been fixed, we now see that there's a missing dependency in
libatomic that's required to implement this functionality correctly.

I'm more convinced that my fix is correct, but it's perhaps a little
disappointing that libatomic doesn't have a (multi-threaded) run-time
test to search for race conditions, and confirm its implementations
are correctly serializing.

Please let me know what you think.
Best regards,
Roger
--


I do think that if the regression is caused by HAVE_ATOMIC_TAS now being 
detected as false due to a bugfix elsewhere as you kindly pointed out, 
then the fix perhaps ought to change the compile-time behavior for TAS 
alone.


As I point out in Bugzilla, we can get away with replacing the proposed

  libatomic_la_LIBADD += $(addsuffix _1_2_.lo,$(SIZEOBJS))

with

  libatomic_la_LIBADD += tas_1_2_.lo

so that we generate the missing `libat_test_and_set_1_i2' specifically.
I've not manage to detect the need for any other *_1_i2 thus far and 
this alone appears sufficient to fix all observed regressions.


Happy to investigate further, but my initial findings seem to be that 
this may be a better fix.


Let me know if you disagree ;).

Regards,
Victor


-Original Message-
From: Richard Earnshaw 
Sent: 10 January 2024 15:34
To: Roger Sayle ; gcc-patches@gcc.gnu.org
Subject: Re: [libatomic PATCH] Fix testsuite regressions on ARM [raspberry pi].



On 08/01/2024 16:07, Roger Sayle wrote:


Bootstrapping GCC on arm-linux-gnueabihf with --with-arch=armv6
currently has a large number of FAILs in libatomic (regressions since
last time I attempted this).  The failure mode is related to IFUNC
handling with the file tas_8_2_.o containing an unresolved reference
to the function libat_test_and_set_1_i2.

Bearing in mind I've no idea what's going on, the following one line
change, to build tas_1_2_.o when building tas_8_2_.o, resolves the
problem for me and restores the libatomic testsuite to 44 expected
passes and 5 unsupported tests [from 22 unexpected failures and 22 unresolved

testcases].


If this looks like the correct fix, I'm not confident with rebuilding
Makefile.in with correct version of automake, so I'd very much
appreciate it if someone/the reviewer/mainainer could please check this in for

me.

Thanks in advance.


2024-01-08  Roger Sayle  

libatomic/ChangeLog
  * Makefile.am: Build tas_1_2_.o on ARCH_ARM_LINUX
  * Makefile.in: Regenerate.


Roger
--



Hi Roger,

I don't really understand all this make foo :( so I'm not sure if this is the 
right fix
either.  If this is, as you say, a regression, have you been able to track down 
when
it first started to occur?  That might also help me to understand what changed 
to
cause this.

Perhaps we should have a PR for this, to make tracking the fixes easier.

R.




Re: [PATCH v4 1/4] libatomic: atomic_16.S: Improve ENTRY, END and ALIAS macro interface

2024-01-25 Thread Richard Sandiford
Victor Do Nascimento  writes:
> The introduction of further architectural-feature dependent ifuncs
> for AArch64 makes hard-coding ifunc `_i' suffixes to functions
> cumbersome to work with.  It is awkward to remember which ifunc maps
> onto which arch feature and makes the code harder to maintain when new
> ifuncs are added and their suffixes possibly altered.
>
> This patch uses pre-processor `#define' statements to map each suffix to
> a descriptive feature name macro, for example:
>
>   #define LSE(NAME) NAME##_i1
>
> Where we wish to generate ifunc names with the pre-processor's token
> concatenation feature, we add a level of indirection to previous macro
> calls.  If before we would have had`MACRO(_i)', we now have
> `MACRO_FEAT(name, feature)'.  Where we wish to refer to base
> functionality (i.e., functions where ifunc suffixes are absent), the
> original `MACRO()' may be used to bypass suffixing.
>
> Consequently, for base functionality, where the ifunc suffix is
> absent, the macro interface remains the same.  For example, the entry
> and endpoints of `libat_store_16' remain defined by:
>
>   ENTRY (libat_store_16)
>
> and
>
>   END (libat_store_16)
>
> For the LSE2 implementation of the same 16-byte atomic store, we now
> have:
>
>   ENTRY_FEAT (libat_store_16, LSE2)
>
> and
>
>   END_FEAT (libat_store_16, LSE2)
>
> For the aliasing of function names, we define the following new
> implementation of the ALIAS macro:
>
>   ALIAS (FN_BASE_NAME, FROM_SUFFIX, TO_SUFFIX)
>
> Defining the `CORE(NAME)' macro to be the identity operator, it
> returns the base function name unaltered and allows us to alias
> target-specific ifuncs to the corresponding base implementation.
> For example, we'd alias the LSE2 `libat_exchange_16' to it base
> implementation with:
>
>   ALIAS (libat_exchange_16, LSE2, CORE)
>
> libatomic/ChangeLog:
>   * config/linux/aarch64/atomic_16.S (CORE): New macro.
>   (LSE2): Likewise.
>   (ENTRY_FEAT): Likewise.
>   (ENTRY_FEAT1): Likewise.
>   (END_FEAT): Likewise.
>   (END_FEAT1): Likewise.
>   (ALIAS): Modify macro to take in `arch' arguments.
>   (ALIAS1): New.
> ---
>  libatomic/config/linux/aarch64/atomic_16.S | 79 +-
>  1 file changed, 47 insertions(+), 32 deletions(-)
>
> diff --git a/libatomic/config/linux/aarch64/atomic_16.S 
> b/libatomic/config/linux/aarch64/atomic_16.S
> index ad14f8f2e6e..16a42925903 100644
> --- a/libatomic/config/linux/aarch64/atomic_16.S
> +++ b/libatomic/config/linux/aarch64/atomic_16.S
> @@ -40,22 +40,38 @@
>  
>   .arch   armv8-a+lse
>  
> -#define ENTRY(name)  \
> - .global name;   \
> - .hidden name;   \
> - .type name,%function;   \
> +#define LSE2(NAME)   NAME##_i1
> +#define CORE(NAME)   NAME
> +
> +#define ENTRY(NAME) ENTRY_FEAT1 (NAME)
> +
> +#define ENTRY_FEAT(NAME, FEAT)  \
> + ENTRY_FEAT1 (FEAT (NAME))
> +
> +#define ENTRY_FEAT1(NAME)\
> + .global NAME;   \
> + .hidden NAME;   \
> + .type NAME,%function;   \

I don't think ENTRY_FEAT1 is necessary now.  It should be possible
to keep ENTRY as it was and use:

#define ENTRY_FEAT(NAME, FEAT)  \
ENTRY (FEAT (NAME))

Similarly for END/END_FEAT.

OK with those changes, thanks.

Richard

>   .p2align 4; \
> -name:\
> - .cfi_startproc; \
> +NAME:\
> + .cfi_startproc; \
>   hint34  // bti c
>  
> -#define END(name)\
> +#define END(NAME) END_FEAT1 (NAME)
> +
> +#define END_FEAT(NAME, FEAT) \
> + END_FEAT1 (FEAT (NAME))
> +
> +#define END_FEAT1(NAME)  \
>   .cfi_endproc;   \
> - .size name, .-name;
> + .size NAME, .-NAME;
> +
> +#define ALIAS(NAME, FROM, TO)\
> + ALIAS1 (FROM (NAME),TO (NAME))
>  
> -#define ALIAS(alias,name)\
> - .global alias;  \
> - .set alias, name;
> +#define ALIAS1(ALIAS, NAME)  \
> + .global ALIAS;  \
> + .set ALIAS, NAME;
>  
>  #define res0 x0
>  #define res1 x1
> @@ -108,7 +124,7 @@ ENTRY (libat_load_16)
>  END (libat_load_16)
>  
>  
> -ENTRY (libat_load_16_i1)
> +ENTRY_FEAT (libat_load_16, LSE2)
>   cbnzw1, 1f
>  
>   /* RELAXED.  */
> @@ -128,7 +144,7 @@ ENTRY (libat_load_16_i1)
>   ldp res0, res1, [x0]
>   dmb ishld
>   ret
> -END (libat_load_16_i1)
> +END_FEAT (libat_load_16, LSE2)
>  
>  
>  ENTRY (libat_store_16)
> @@ -148,7 +164,7 @@ ENTRY (libat_store_16)
>  END (libat_store_16)
>  
>  
> -ENTRY (libat_store_16_i1)
> +ENTRY_FEAT (libat_store_16, LSE2)
>   cbnzw4, 1f
>  
>   /* RELAXED.  */
> @@ -160,7 +176,7 @@ ENTRY (libat_store_16_i1)
>   stlxp   w4, in0, in1, [x0]
>   cbnzw4, 1b
>   ret
> -END (libat_store_16_i1)
> +END_FEAT (libat_store_16, LSE2)
>  
>  
>  ENTRY (libat_exchange_16)
> @@ -237,7 +253,7 @@ ENTRY (libat_compare_exchange_16)
>  END (libat_compare_exchange_16)
> 

RE: [PATCH] AArch64: Add -mcpu=cobalt-100

2024-01-25 Thread Kyrylo Tkachov



> -Original Message-
> From: Wilco Dijkstra 
> Sent: Thursday, January 25, 2024 5:00 PM
> To: Kyrylo Tkachov ; GCC Patches  patc...@gcc.gnu.org>
> Cc: Richard Earnshaw ; Richard Sandiford
> 
> Subject: Re: [PATCH] AArch64: Add -mcpu=cobalt-100
> 
> Hi,
> 
> >> Add support for -mcpu=cobalt-100 (Neoverse N2 with a different implementer
> >> ID).
> >>
> >> Passes regress, OK for commit?
> >
> > Ok.
> 
> Also OK to backport to GCC 13, 12 and 11?

On the 11 branch at least there is no support for the armv9-a flags, so the 
aarch64-cores.def entry would need to use what the branch-local neoverse-n2 
entry uses (armv8.5-a).
So the trunk patch won't apply as is.
So please ensure the appropriate flags are used in the aarch64-cores.def entry 
(with the usual testing).
But otherwise it's okay.
Thanks,
Kyrill

> 
> Cheers,
> Wilco


Re: [PATCH V3 4/4] RISC-V: Enable assert for insn_has_dfa_reservation

2024-01-25 Thread Robin Dapp
>/* If we ever encounter an insn without an insn reservation, trip
>   an assert so we can find and fix this problem.  */
> -#if 0
> +  if (! insn_has_dfa_reservation_p (insn)) {
> +print_rtl(stderr, insn);
> +fprintf(stderr, "%d", get_attr_type (insn));
> +  }
>gcc_assert (insn_has_dfa_reservation_p (insn));
> -#endif
>  
>return more - 1;
>  }

I was thinking about make the gcc_assert a gcc_checking_assert so,
in case we accidentally forget something at any point, it would
only gracefully degrade in a release build.  As we already have
a hard assert for the type the patch (and not many test with
enable checking anyway) this is OK IMHO.

I suppose you tested with all available -mtune options?

Regards
 Robin



Re: [PATCH V3 3/4] RISC-V: Use default cost model for insn scheduling

2024-01-25 Thread Robin Dapp
> Use default cost model scheduling on these test cases. All these tests
> introduce scan dump failures with -mtune generic-ooo. Since the vector
> cost models are the same across all three tunes, some of the tests
> in PR113249 will be fixed with this patch series.

This is OK, thanks.

> 39 additional unique testsuite failures (scan dumps) will still be present.
> I don't know how optimal the new output is compared to the old. Should I 
> update
> the testcase expected output to match the new scan dumps?

Currently, without vector op latency, the output should come close
to what's normally considered "good" (i.e. minimal number of vsetvls
and so on).  Therefore I'd suggest not to change the scan dumps to
much except when there is a real problem.  If you have a specific
example that you're unsure about we can discuss this on or off list.

Regards
 Robin



Re: [PATCH V3 2/4] RISC-V: Add vector related pipelines

2024-01-25 Thread Robin Dapp
Thanks, that looks better IMHO.

> +;; Copyright (C) 2011-2024 Free Software Foundation, Inc.
> +;; Contributed by Andrew Waterman (and...@sifive.com).
> +;; Based on MIPS target for GNU compiler.

You might want to change that, as well as the date.  While at
it you can also fix the broken date in my original file ;)

> +(define_insn_reservation "vec_load" 6
> +  (and (eq_attr "is_inorder" "no")
> +   (eq_attr "type" "vlde,vldm,vlds,vldux,vldox,vldff,vldr"))
> +  "vxu_ooo_issue,vxu_ooo_alu")

I would rather ditch the is_inorder attribute for now and define
"low" latencies as well as reservations explicitly once we're
sure rather than falling back to scheduler defaults. 

OK with those changes.

Regards
 Robin


Re: [PATCH V3 1/4] RISC-V: Add non-vector types to dfa pipelines

2024-01-25 Thread Robin Dapp
LGTM, thanks.

Regards
 Robin


Re: [PATCH] AArch64: Add -mcpu=cobalt-100

2024-01-25 Thread Wilco Dijkstra
Hi,

>> Add support for -mcpu=cobalt-100 (Neoverse N2 with a different implementer
>> ID).
>> 
>> Passes regress, OK for commit?
>
> Ok.

Also OK to backport to GCC 13, 12 and 11?

Cheers,
Wilco

Re: [PATCH] aarch64: Fix function multiversioning mangling

2024-01-25 Thread Richard Sandiford
Andrew Carlotti  writes:
> It would be neater if the middle end for target_clones used a target
> hook for version name mangling, so we only do version name mangling
> once.  However, that would require more intrusive refactoring that will
> have to wait till Stage 1.
>
>
> This patch builds upon the testsuite additions in patch 1/5 of the
> previous series. I could commit just the aarch64 tests for now if that's
> preferred. Is this version of the fix ok for master?
>
> gcc/ChangeLog:
>
>   * config/aarch64/aarch64.cc
>   (get_suffixed_assembler_name): New.
>   (make_resolver_func): Use get_suffixed_assembler_name.
>   (aarch64_mangle_decl_assembler_name): Add ".default" suffix.
>   (aarch64_generate_version_dispatcher_body): Redo name mangling.
>
> gcc/testsuite/ChangeLog:
>
>   * g++.target/aarch64/mv-symbols1.C: Update for mangling fixes.
>   * g++.target/aarch64/mv-symbols2.C: Ditto.
>   * g++.target/aarch64/mv-symbols3.C: Ditto.
>   * g++.target/aarch64/mv-symbols4.C: Ditto.
>   * g++.target/aarch64/mv-symbols5.C: Ditto.
>   * g++.target/aarch64/mvc-symbols1.C: Ditto.
>   * g++.target/aarch64/mvc-symbols2.C: Ditto.
>   * g++.target/aarch64/mvc-symbols3.C: Ditto.
>   * g++.target/aarch64/mvc-symbols4.C: Ditto.

I found this a bit difficult to review (not your fault).  In the
abstract, it seems a bit dangerous to change the names of the clones
while emitting the dispatcher.  But I agree that it looks like it
should work in practice, and that it's probably the least invasive
fix for GCC 14.  The dispatcher is created by the same IPA pass
that creates the clones, so there should be little risk of the
old assembler name's being used before it's changed.  And
symbol_table::change_decl_assembler_name does have some code
to detect when a name is changed after use, in case things are
restructured later.

So this patch and the aarch64 parts of 1/5 are OK from my POV
with some trivial changes below.

> diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
> index 
> 7d1f8c65ce41044d6850262300cf08a23d606617..bf698a2c3bb105375a2be37ca032397161bf4334
>  100644
> --- a/gcc/config/aarch64/aarch64.cc
> +++ b/gcc/config/aarch64/aarch64.cc
> @@ -19832,6 +19832,21 @@ build_ifunc_arg_type ()
>return pointer_type;
>  }
>  
> +/* Return an identifier for the base assembler name of a versioned function.
> +   This is computed by taking the default version's assembler name, and
> +   stripping off the ".default" suffix if it's already been appended.  */
> +
> +tree get_suffixed_assembler_name (tree default_decl, const char *suffix)

Probably best to make this static, given that it isn't in the aarch64_
"namespace".

Formatting nit, but: there should be a new line before the function name.

> +{
> +  std::string name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (default_decl));
> +
> +  auto size = name.size ();
> +  if (size >= 8 && name.compare (size - 8, 8, ".default") == 0)
> +name.resize (size - 8);
> +  name += suffix;
> +  return get_identifier (name.c_str());
> +}
> +
>  /* Make the resolver function decl to dispatch the versions of
> a multi-versioned function,  DEFAULT_DECL.  IFUNC_ALIAS_DECL is
> ifunc alias that will point to the created resolver.  Create an
> @@ -19845,8 +19860,9 @@ make_resolver_func (const tree default_decl,
>  {
>tree decl, type, t;
>  
> -  /* Create resolver function name based on default_decl.  */
> -  tree decl_name = clone_function_name (default_decl, "resolver");
> +  /* Create resolver function name based on default_decl.  We need to remove 
> an
> + existing ".default" suffix if this has already been appended.  */
> +  tree decl_name = get_suffixed_assembler_name (default_decl, ".resolver");
>const char *resolver_name = IDENTIFIER_POINTER (decl_name);
>  
>/* The resolver function should have signature
> @@ -20137,6 +20153,8 @@ dispatch_function_versions (tree dispatch_decl,
>return 0;
>  }
>  
> +tree aarch64_mangle_decl_assembler_name (tree, tree);
> +

Please reorder the functions instead of adding the forward declaration.
It'll create a bit of git churn, but that's OK.

>  /* Implement TARGET_GENERATE_VERSION_DISPATCHER_BODY.  */
>  
>  tree
> @@ -20193,6 +20211,28 @@ aarch64_generate_version_dispatcher_body (void 
> *node_p)
>dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb);
>cgraph_edge::rebuild_edges ();
>pop_cfun ();
> +
> +  /* Fix up symbol names.  First we need to obtain the base name, which may
> + have already been mangled.  */
> +  tree base_name = get_suffixed_assembler_name (default_ver_decl, "");
> +
> +  /* We need to redo the version mangling on the non-default versions for the
> + target_clones case.  Redoing the mangling for the target_version case is
> + redundant but does no harm.  We need to skip the default version, 
> because
> + expand_clones will append ".default" later; fortunately that suffix is 
> the

Re: [PATCH] RISC-V: Fix incorrect LCM delete bug [VSETVL PASS]

2024-01-25 Thread Robin Dapp
The non-test parts are OK IMHO.

Regards
 Robin


Re: [PATCH] [testsuite] Fix pretty printers regexps for GDB output

2024-01-25 Thread Christophe Lyon
On Wed, 24 Jan 2024 at 12:02, Jonathan Wakely  wrote:
>
> On Wed, 24 Jan 2024 at 10:48, Christophe Lyon wrote:
> >
> > GDB emits end of lines as \r\n, we currently match the reverse \n\r,
>
> We currently match [\n\r]+ which should match any of \n, \r, \n\r or \r\n
>

Hmm, right, sorry I had this patch in my tree for quite some time, but
wrote the description just now, so I read a bit too quickly.

>
> > possibly leading to mismatches under racy conditions.
>
> What do we incorrectly match? Is the problem that a \r\n sequence
> might be incompletely printed, due to buffering, and so the regex only
> sees (and matches) the \r which then leaves an unwanted \n in the
> stream, which then interferes with the next match? I don't understand
> why that problem wouldn't just result in a failed match with your new
> regex though.
>
Exactly: READ1 forces read() to return 1 byte at a time, so we leave
an unwanted \r in front of a string that should otherwise match the
"got" case.

>
> >
> > I noticed this while running the GCC testsuite using the equivalent of
> > GDB's READ1 feature [1] which helps detecting bufferization issues.
> >
> > Adjusting the first regexp to match the right order implied fixing the
> > second one, to skip the empty lines.
>
> At the very least, this part of the description is misleading. The
> existing regex matches "the right order" already. The change is to
> match *exactly* \r\n instead of any mix of CR and LF characters.
> That's not about matching "the right order", it's being more precise
> in what we match.
>
> But I'm still confused about what the failure scenario is and how the
> change fixes it.
>

I followed what the GDB testsuite does (it matches \r\n at the end of
many regexps), but in fact it seems it's not needed:
it works if I update the "got" regexp like this (ie. accept any number
of leading \r or \n):
-   -re {^(type|\$([0-9]+)) = ([^\n\r]*)[\n\r]+} {
+   -re {^[\n\r]*(type|\$([0-9]+)) = ([^\n\r]*)[\n\r]+} {
and leave the "skipping" regexp as it is currently.

Is the new attached version OK?

Thanks,

Christophe

> >
> > Tested on aarch64-linux-gnu.
> >
> > [1] 
> > https//github.com/bminor/binutils-gdb/blob/master/gdb/testsuite/README#L269
> >
> > 2024-01-24  Christophe Lyon  
> >
> > libstdc++-v3/
> > * testsuite/lib/gdb-test.exp (gdb-test): Fix regexps.
> > ---
> >  libstdc++-v3/testsuite/lib/gdb-test.exp | 4 ++--
> >  1 file changed, 2 insertions(+), 2 deletions(-)
> >
> > diff --git a/libstdc++-v3/testsuite/lib/gdb-test.exp 
> > b/libstdc++-v3/testsuite/lib/gdb-test.exp
> > index 31206f2fc32..0de8d9ee153 100644
> > --- a/libstdc++-v3/testsuite/lib/gdb-test.exp
> > +++ b/libstdc++-v3/testsuite/lib/gdb-test.exp
> > @@ -194,7 +194,7 @@ proc gdb-test { marker {selector {}} {load_xmethods 0} 
> > } {
> >
> >  set test_counter 0
> >  remote_expect target [timeout_value] {
> > -   -re {^(type|\$([0-9]+)) = ([^\n\r]*)[\n\r]+} {
> > +   -re {^(type|\$([0-9]+)) = ([^\n\r]*)\r\n} {
> > send_log "got: $expect_out(buffer)"
> >
> > incr test_counter
> > @@ -250,7 +250,7 @@ proc gdb-test { marker {selector {}} {load_xmethods 0} 
> > } {
> > return
> > }
> >
> > -   -re {^[^$][^\n\r]*[\n\r]+} {
> > +   -re {^[\r\n]*[^$][^\n\r]*\r\n} {
> > send_log "skipping: $expect_out(buffer)"
> > exp_continue
> > }
> > --
> > 2.34.1
> >
>
From e7cd0475141921282d5c9044b2450ae8e196efc4 Mon Sep 17 00:00:00 2001
From: Christophe Lyon 
Date: Thu, 25 Jan 2024 15:43:56 +
Subject: [PATCH v2] [testsuite] Fix pretty printers regexp for GDB output

GDB emits end of lines as \r\n, we currently match any >0 number of
either \n or \r, possibly leading to mismatches under racy conditions.

I noticed this while running the GCC testsuite using the equivalent of
GDB's READ1 feature [1] which helps detecting bufferization issues.

We try to match
\n$1 = empty std::tuple\r

against {^(type|\$([0-9]+)) = ([^\n\r]*)[\n\r]+} which fails because
of the leading \n (which was left in the buffer after the previous
"skipping" pattern matched the preceding \r).

This patch accepts any number of leading \n and/or \r in the "got" clause.

Also take this opportunity to quote \r and \r in the logs, to make
debugging such issues easier.

Tested on aarch64-linux-gnu.

[1] https//github.com/bminor/binutils-gdb/blob/master/gdb/testsuite/README#L269

2024-01-24  Christophe Lyon  

	libstdc++-v3/
	* testsuite/lib/gdb-test.exp (gdb-test): Fix regexp.  Quote
	newlines in logs.
---
 libstdc++-v3/testsuite/lib/gdb-test.exp | 12 +---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/libstdc++-v3/testsuite/lib/gdb-test.exp b/libstdc++-v3/testsuite/lib/gdb-test.exp
index 31206f2fc32..2ec5596983d 100644
--- a/libstdc++-v3/testsuite/lib/gdb-test.exp
+++ b/libstdc++-v3/testsuite/lib/gdb-test.exp
@@ -194,8 +194,11 @@ proc gdb-test { marker {selector {}} {load_xmethods 0} } {
 
 set test_counter 

[pushed] analyzer: fix defaults in compound assignments from non-zero offsets [PR112969]

2024-01-25 Thread David Malcolm
Confusion in binding_cluster::maybe_get_compound_binding about whether
offsets are relative to the start of the region or to the start of the
cluster was leading to incorrect handling of default values, leading
to false positives from -Wanalyzer-use-of-uninitialized-value, from
-Wanalyzer-exposure-through-uninit-copy, and other logic errors.

Fixed thusly.

Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
Successful run of analyzer integration tests on x86_64-pc-linux-gnu.
Pushed to trunk as r14-8428-g6426d466779fa8.

gcc/analyzer/ChangeLog:
PR analyzer/112969
* store.cc (binding_cluster::maybe_get_compound_binding): When
populating default_map, express the bit-range of the default key
for REG relative to REG, rather than to the base region.

gcc/testsuite/ChangeLog:
PR analyzer/112969
* c-c++-common/analyzer/compound-assignment-5.c (test_3): Remove
xfails, reorder tests.
* c-c++-common/analyzer/compound-assignment-pr112969.c: New test.
* gcc.dg/plugin/infoleak-pr112969.c: New test.
* gcc.dg/plugin/plugin.exp: Add infoleak-pr112969.c to
analyzer_kernel_plugin.c tests.

Signed-off-by: David Malcolm 
---
 gcc/analyzer/store.cc | 11 +++-
 .../analyzer/compound-assignment-5.c  |  3 +-
 .../analyzer/compound-assignment-pr112969.c   | 35 +
 .../gcc.dg/plugin/infoleak-pr112969.c | 52 +++
 gcc/testsuite/gcc.dg/plugin/plugin.exp|  1 +
 5 files changed, 99 insertions(+), 3 deletions(-)
 create mode 100644 
gcc/testsuite/c-c++-common/analyzer/compound-assignment-pr112969.c
 create mode 100644 gcc/testsuite/gcc.dg/plugin/infoleak-pr112969.c

diff --git a/gcc/analyzer/store.cc b/gcc/analyzer/store.cc
index 67c90b7fce4..e85a19647f7 100644
--- a/gcc/analyzer/store.cc
+++ b/gcc/analyzer/store.cc
@@ -1759,7 +1759,16 @@ binding_cluster::maybe_get_compound_binding 
(store_manager *mgr,
   else
 default_sval = sval_mgr->get_or_create_initial_value (reg);
   const binding_key *default_key = binding_key::make (mgr, reg);
-  default_map.put (default_key, default_sval);
+
+  /* Express the bit-range of the default key for REG relative to REG,
+ rather than to the base region.  */
+  const concrete_binding *concrete_default_key
+= default_key->dyn_cast_concrete_binding ();
+  if (!concrete_default_key)
+return nullptr;
+  const concrete_binding *default_key_relative_to_reg
+ = mgr->get_concrete_binding (0, concrete_default_key->get_size_in_bits 
());
+  default_map.put (default_key_relative_to_reg, default_sval);
 
   for (map_t::iterator iter = m_map.begin (); iter != m_map.end (); ++iter)
 {
diff --git a/gcc/testsuite/c-c++-common/analyzer/compound-assignment-5.c 
b/gcc/testsuite/c-c++-common/analyzer/compound-assignment-5.c
index 3ce2b72c8ff..08f10606d91 100644
--- a/gcc/testsuite/c-c++-common/analyzer/compound-assignment-5.c
+++ b/gcc/testsuite/c-c++-common/analyzer/compound-assignment-5.c
@@ -48,9 +48,8 @@ void test_3 (void)
 
   glob_arr3[7] = arr[3]; // or should the uninit warning be here?
 
-  __analyzer_eval (glob_arr3[7].x); /* { dg-warning "uninitialized" "uninit" { 
xfail *-*-* } } */
-  /* { dg-bogus "UNKNOWN" "unknown" { xfail *-*-* } .-1 } */
   __analyzer_eval (glob_arr3[7].y == 6); /* { dg-warning "TRUE" } */
+  __analyzer_eval (glob_arr3[7].x); /* { dg-warning "uninitialized" "uninit" } 
*/
 }
 
 /* Symbolic bindings: copying from one array to another.  */
diff --git a/gcc/testsuite/c-c++-common/analyzer/compound-assignment-pr112969.c 
b/gcc/testsuite/c-c++-common/analyzer/compound-assignment-pr112969.c
new file mode 100644
index 000..4bc037cb7cf
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/analyzer/compound-assignment-pr112969.c
@@ -0,0 +1,35 @@
+/* Reduced from -Wanalyzer-exposure-through-uninit-copy false positives
+   seen in Linux kernel in drivers/net/ethernet/intel/ice/ice_ptp.c  */
+
+#include "analyzer-decls.h"
+
+/* { dg-do compile } */
+
+struct hwtstamp_config
+{
+  int flags;
+  int tx_type;
+  int rx_filter;
+};
+
+struct ice_ptp
+{
+  long placeholder;
+  struct hwtstamp_config tstamp_config;
+};
+
+struct ice_pf
+{
+  struct ice_ptp ptp;
+};
+
+void
+ice_ptp_set_ts_config(struct ice_pf* pf)
+{
+  struct hwtstamp_config config;
+  pf->ptp.tstamp_config.tx_type = 1;
+  pf->ptp.tstamp_config.rx_filter = 2;
+  config = pf->ptp.tstamp_config;
+  __analyzer_eval (config.flags == pf->ptp.tstamp_config.flags); /* { 
dg-warning "TRUE" } */
+  /* { dg-bogus "use of uninitialized value 'config.flags'" "PR 
analyzer/112969" { target *-*-* } .-1 } */
+}
diff --git a/gcc/testsuite/gcc.dg/plugin/infoleak-pr112969.c 
b/gcc/testsuite/gcc.dg/plugin/infoleak-pr112969.c
new file mode 100644
index 000..e78fe365975
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/plugin/infoleak-pr112969.c
@@ -0,0 +1,52 @@
+/* Reduced from -Wanalyzer-exposure-through-uninit-copy false positives
+   seen in Linux kernel in driv

Re: [PATCH v3 0/2] RISC-V: Support CORE-V XCVSIMD extension

2024-01-25 Thread Kito Cheng
pushed :)

On Thu, Jan 25, 2024 at 9:53 PM Kito Cheng  wrote:
>
> It's stage 4, so I think it would be great to not disturb code base
> too much, and adding intrinsic without adding VLS modes should be
> better way to go, and  here is not really something serious coding
> style issue, just few minor indentation issue, so I gonna run
> regression to make not break anything else and then commit to trunk :)


Re: [patch] gcn: Add missing space to ASM_SPEC in gcn-hsa.h

2024-01-25 Thread Andrew Stubbs

On 25/01/2024 12:44, Tobias Burnus wrote:

This patch avoids assembler warnings for gfx908 and gfx90a such as
   '-xnack-mattr=-sramecc' is not a recognized feature for this target(ignoring 
feature)
as we pass   -mattr=-xnack-mattr=-sramecc  to the llvm-mc assembler.

Solution: Add a space before the second '-mattr='.

OK for mainline?


OK.

Andrew


Re: [PATCH v2] aarch64: Fix eh_return for -mtrack-speculation [PR112987]

2024-01-25 Thread Richard Sandiford
Szabolcs Nagy  writes:
> Recent commit introduced a conditional branch in eh_return epilogues
> that is not compatible with speculation tracking:
>
>   commit 426fddcbdad6746fe70e031f707fb07f55dfb405
>   Author: Szabolcs Nagy 
>   CommitDate: 2023-11-27 15:52:48 +
>
>   aarch64: Use br instead of ret for eh_return
>
> Refactor the compare zero and jump pattern and use it to fix the issue.
>
> gcc/ChangeLog:
>
>   PR target/112987
>   * config/aarch64/aarch64.cc (aarch64_gen_compare_zero_and_branch): New.
>   (aarch64_expand_epilogue): Use the new function.
>   (aarch64_split_compare_and_swap): Likewise.
>   (aarch64_split_atomic_op): Likewise.

OK, thanks.

Richard

> ---
> v2: factor out aarch64_gen_compare_zero_and_branch
>
>  gcc/config/aarch64/aarch64.cc | 75 +++
>  1 file changed, 32 insertions(+), 43 deletions(-)
>
> diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
> index 3d6dd98c5c5..d2014ce1527 100644
> --- a/gcc/config/aarch64/aarch64.cc
> +++ b/gcc/config/aarch64/aarch64.cc
> @@ -2637,6 +2637,28 @@ aarch64_gen_compare_reg_maybe_ze (RTX_CODE code, rtx 
> x, rtx y,
>return aarch64_gen_compare_reg (code, x, y);
>  }
>  
> +/* Generate conditional branch to LABEL, comparing X to 0 using CODE.
> +   Return the jump instruction.  */
> +
> +static rtx
> +aarch64_gen_compare_zero_and_branch (rtx_code code, rtx x,
> +  rtx_code_label *label)
> +{
> +  if (aarch64_track_speculation)
> +{
> +  /* Emit an explicit compare instruction, so that we can correctly
> +  track the condition codes.  */
> +  rtx cc_reg = aarch64_gen_compare_reg (code, x, const0_rtx);
> +  x = gen_rtx_fmt_ee (code, GET_MODE (cc_reg), cc_reg, const0_rtx);
> +}
> +  else
> +x = gen_rtx_fmt_ee (code, VOIDmode, x, const0_rtx);
> +
> +  x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
> + gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
> +  return gen_rtx_SET (pc_rtx, x);
> +}
> +
>  /* Consider the operation:
>  
>   OPERANDS[0] = CODE (OPERANDS[1], OPERANDS[2]) + OPERANDS[3]
> @@ -9882,11 +9904,10 @@ aarch64_expand_epilogue (rtx_call_insn *sibcall)
>to be SP; letting the CFA move during this adjustment
>is just as correct as retaining the CFA from the body
>of the function.  Therefore, do nothing special.  */
> -  rtx label = gen_label_rtx ();
> -  rtx x = gen_rtx_EQ (VOIDmode, EH_RETURN_TAKEN_RTX, const0_rtx);
> -  x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
> - gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
> -  rtx jump = emit_jump_insn (gen_rtx_SET (pc_rtx, x));
> +  rtx_code_label *label = gen_label_rtx ();
> +  rtx x = aarch64_gen_compare_zero_and_branch (EQ, EH_RETURN_TAKEN_RTX,
> +label);
> +  rtx jump = emit_jump_insn (x);
>JUMP_LABEL (jump) = label;
>LABEL_NUSES (label)++;
>emit_insn (gen_add2_insn (stack_pointer_rtx,
> @@ -24657,19 +24678,8 @@ aarch64_split_compare_and_swap (rtx operands[])
>  
>if (!is_weak)
>  {
> -  if (aarch64_track_speculation)
> - {
> -   /* Emit an explicit compare instruction, so that we can correctly
> -  track the condition codes.  */
> -   rtx cc_reg = aarch64_gen_compare_reg (NE, scratch, const0_rtx);
> -   x = gen_rtx_NE (GET_MODE (cc_reg), cc_reg, const0_rtx);
> - }
> -  else
> - x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
> -
> -  x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
> - gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
> -  aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
> +  x = aarch64_gen_compare_zero_and_branch (NE, scratch, label1);
> +  aarch64_emit_unlikely_jump (x);
>  }
>else
>  aarch64_gen_compare_reg (NE, scratch, const0_rtx);
> @@ -24685,18 +24695,8 @@ aarch64_split_compare_and_swap (rtx operands[])
>emit_label (label2);
>aarch64_emit_store_exclusive (mode, scratch, mem, rval, model_rtx);
>  
> -  if (aarch64_track_speculation)
> - {
> -   /* Emit an explicit compare instruction, so that we can correctly
> -  track the condition codes.  */
> -   rtx cc_reg = aarch64_gen_compare_reg (NE, scratch, const0_rtx);
> -   x = gen_rtx_NE (GET_MODE (cc_reg), cc_reg, const0_rtx);
> - }
> -  else
> - x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
> -  x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
> - gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
> -  aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
> +  x = aarch64_gen_compare_zero_and_branch (NE, scratch, label1);
> +  aarch64_emit_unlikely_jump (x);
>  
>label2 = label3;
>  }
> @@ -24780,19 +24780,8 @@ aarch64_split_atomic_op (enum rtx_code code, rtx 
> old_out, rtx new_out, rtx mem,
>aarch64_emit_store_exclusive (mode, co

[PATCH v4 0/1] RISC-V: Support CORE-V XCVBITMAIP extension

2024-01-25 Thread Mary Bennett
This patch series presents the comprehensive implementation of the BITMANIP
extension for CORE-V.

Tested with riscv-gnu-toolchain on binutils, ld, gas and gcc testsuites to
ensure its correctness and compatibility with the existing codebase.
However, your input, reviews, and suggestions are invaluable in making this
extension even more robust.

The CORE-V builtins are described in the specification [1] and work can be
found in the OpenHW group's Github repository [2].

[1] 
github.com/openhwgroup/core-v-sw/blob/master/specifications/corev-builtin-spec.md

[2] github.com/openhwgroup/corev-gcc

Contributors:
  Mary Bennett 
  Nandni Jamnadas 
  Pietra Ferreira 
  Charlie Keaney
  Jessica Mills
  Craig Blackmore 
  Simon Cook 
  Jeremy Bennett 
  Helene Chelin 

RISC-V: Add support for XCVbitmanip extension in CV32E40P

 gcc/common/config/riscv/riscv-common.cc   |   2 +
 gcc/config/riscv/constraints.md   |  16 ++
 gcc/config/riscv/corev.def|  13 ++
 gcc/config/riscv/corev.md | 182 ++
 gcc/config/riscv/predicates.md|  16 ++
 gcc/config/riscv/riscv-builtins.cc|   1 +
 gcc/config/riscv/riscv-ftypes.def |   5 +
 gcc/config/riscv/riscv.cc |  13 ++
 gcc/config/riscv/riscv.opt|   2 +
 gcc/doc/extend.texi   |  53 +
 gcc/doc/sourcebuild.texi  |   3 +
 .../riscv/cv-bitmanip-compile-bclr.c  |  27 +++
 .../riscv/cv-bitmanip-compile-bclrr.c |  18 ++
 .../riscv/cv-bitmanip-compile-bitrev.c|  30 +++
 .../riscv/cv-bitmanip-compile-bset.c  |  27 +++
 .../riscv/cv-bitmanip-compile-bsetr.c |  18 ++
 .../riscv/cv-bitmanip-compile-clb.c   |  18 ++
 .../riscv/cv-bitmanip-compile-cnt.c   |  18 ++
 .../riscv/cv-bitmanip-compile-extract.c   |  27 +++
 .../riscv/cv-bitmanip-compile-extractr.c  |  18 ++
 .../riscv/cv-bitmanip-compile-extractu.c  |  27 +++
 .../riscv/cv-bitmanip-compile-extractur.c |  18 ++
 .../riscv/cv-bitmanip-compile-ff1.c   |  18 ++
 .../riscv/cv-bitmanip-compile-fl1.c   |  18 ++
 .../riscv/cv-bitmanip-compile-insert.c|  24 +++
 .../riscv/cv-bitmanip-compile-insertr.c   |  18 ++
 .../riscv/cv-bitmanip-compile-ror.c   |  18 ++
 .../riscv/cv-bitmanip-fail-compile-bclr.c |  25 +++
 .../riscv/cv-bitmanip-fail-compile-bitrev.c   |  23 +++
 .../riscv/cv-bitmanip-fail-compile-bset.c |  25 +++
 .../riscv/cv-bitmanip-fail-compile-extract.c  |  25 +++
 .../riscv/cv-bitmanip-fail-compile-extractu.c |  25 +++
 .../riscv/cv-bitmanip-fail-compile-insert.c   |  25 +++
 gcc/testsuite/lib/target-supports.exp |  13 ++
 34 files changed, 809 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/cv-bitmanip-compile-bclr.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/cv-bitmanip-compile-bclrr.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/cv-bitmanip-compile-bitrev.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/cv-bitmanip-compile-bset.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/cv-bitmanip-compile-bsetr.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/cv-bitmanip-compile-clb.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/cv-bitmanip-compile-cnt.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/cv-bitmanip-compile-extract.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/cv-bitmanip-compile-extractr.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/cv-bitmanip-compile-extractu.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/cv-bitmanip-compile-extractur.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/cv-bitmanip-compile-ff1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/cv-bitmanip-compile-fl1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/cv-bitmanip-compile-insert.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/cv-bitmanip-compile-insertr.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/cv-bitmanip-compile-ror.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/cv-bitmanip-fail-compile-bclr.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/cv-bitmanip-fail-compile-bitrev.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/cv-bitmanip-fail-compile-bset.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/cv-bitmanip-fail-compile-extract.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/cv-bitmanip-fail-compile-extractu.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/cv-bitmanip-fail-compile-insert.c

-- 
2.34.1



[PATCH v4 1/1] RISC-V: Add support for XCVbitmanip extension in CV32E40P

2024-01-25 Thread Mary Bennett
Spec: 
github.com/openhwgroup/core-v-sw/blob/master/specifications/corev-builtin-spec.md

Contributors:
  Mary Bennett 
  Nandni Jamnadas 
  Pietra Ferreira 
  Charlie Keaney
  Jessica Mills
  Craig Blackmore 
  Simon Cook 
  Jeremy Bennett 
  Helene Chelin 

gcc/ChangeLog:
* common/config/riscv/riscv-common.cc: Add XCVbitmanip.
* config/riscv/constraints.md: Likewise.
* config/riscv/corev.def: Likewise.
* config/riscv/corev.md: Likewise.
* config/riscv/predicates.md: Likewise.
* config/riscv/riscv-builtins.cc (AVAIL): Likewise.
* config/riscv/riscv-ftypes.def: Likewise.
* config/riscv/riscv.opt: Likewise.
* doc/extend.texi: Add XCVbitmanip builtin documentation.
* doc/sourcebuild.texi: Likewise.

gcc/testsuite/ChangeLog:
* gcc.target/riscv/cv-bitmanip-compile-bclr.c: New test.
* gcc.target/riscv/cv-bitmanip-compile-bclrr.c: New test.
* gcc.target/riscv/cv-bitmanip-compile-bitrev.c: New test.
* gcc.target/riscv/cv-bitmanip-compile-bset.c: New test.
* gcc.target/riscv/cv-bitmanip-compile-bsetr.c: New test.
* gcc.target/riscv/cv-bitmanip-compile-clb.c: New test.
* gcc.target/riscv/cv-bitmanip-compile-cnt.c: New test.
* gcc.target/riscv/cv-bitmanip-compile-extract.c: New test.
* gcc.target/riscv/cv-bitmanip-compile-extractr.c: New test.
* gcc.target/riscv/cv-bitmanip-compile-extractu.c: New test.
* gcc.target/riscv/cv-bitmanip-compile-extractur.c: New test.
* gcc.target/riscv/cv-bitmanip-compile-ff1.c: New test.
* gcc.target/riscv/cv-bitmanip-compile-fl1.c: New test.
* gcc.target/riscv/cv-bitmanip-compile-insert.c: New test.
* gcc.target/riscv/cv-bitmanip-compile-insertr.c: New test.
* gcc.target/riscv/cv-bitmanip-compile-ror.c: New test.
* gcc.target/riscv/cv-bitmanip-fail-compile-bclr.c: New test.
* gcc.target/riscv/cv-bitmanip-fail-compile-bitrev.c: New test.
* gcc.target/riscv/cv-bitmanip-fail-compile-bset.c: New test.
* gcc.target/riscv/cv-bitmanip-fail-compile-extract.c: New test.
* gcc.target/riscv/cv-bitmanip-fail-compile-extractu.c: New test.
* gcc.target/riscv/cv-bitmanip-fail-compile-insert.c: New test.
* lib/target-supports.exp: Add proc for the XCVbitmanip extension.
---
 gcc/common/config/riscv/riscv-common.cc   |   2 +
 gcc/config/riscv/constraints.md   |  16 ++
 gcc/config/riscv/corev.def|  13 ++
 gcc/config/riscv/corev.md | 182 ++
 gcc/config/riscv/predicates.md|  16 ++
 gcc/config/riscv/riscv-builtins.cc|   1 +
 gcc/config/riscv/riscv-ftypes.def |   5 +
 gcc/config/riscv/riscv.cc |  13 ++
 gcc/config/riscv/riscv.opt|   2 +
 gcc/doc/extend.texi   |  53 +
 gcc/doc/sourcebuild.texi  |   3 +
 .../riscv/cv-bitmanip-compile-bclr.c  |  27 +++
 .../riscv/cv-bitmanip-compile-bclrr.c |  18 ++
 .../riscv/cv-bitmanip-compile-bitrev.c|  30 +++
 .../riscv/cv-bitmanip-compile-bset.c  |  27 +++
 .../riscv/cv-bitmanip-compile-bsetr.c |  18 ++
 .../riscv/cv-bitmanip-compile-clb.c   |  18 ++
 .../riscv/cv-bitmanip-compile-cnt.c   |  18 ++
 .../riscv/cv-bitmanip-compile-extract.c   |  27 +++
 .../riscv/cv-bitmanip-compile-extractr.c  |  18 ++
 .../riscv/cv-bitmanip-compile-extractu.c  |  27 +++
 .../riscv/cv-bitmanip-compile-extractur.c |  18 ++
 .../riscv/cv-bitmanip-compile-ff1.c   |  18 ++
 .../riscv/cv-bitmanip-compile-fl1.c   |  18 ++
 .../riscv/cv-bitmanip-compile-insert.c|  24 +++
 .../riscv/cv-bitmanip-compile-insertr.c   |  18 ++
 .../riscv/cv-bitmanip-compile-ror.c   |  18 ++
 .../riscv/cv-bitmanip-fail-compile-bclr.c |  25 +++
 .../riscv/cv-bitmanip-fail-compile-bitrev.c   |  23 +++
 .../riscv/cv-bitmanip-fail-compile-bset.c |  25 +++
 .../riscv/cv-bitmanip-fail-compile-extract.c  |  25 +++
 .../riscv/cv-bitmanip-fail-compile-extractu.c |  25 +++
 .../riscv/cv-bitmanip-fail-compile-insert.c   |  25 +++
 gcc/testsuite/lib/target-supports.exp |  13 ++
 34 files changed, 809 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/cv-bitmanip-compile-bclr.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/cv-bitmanip-compile-bclrr.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/cv-bitmanip-compile-bitrev.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/cv-bitmanip-compile-bset.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/cv-bitmanip-compile-bsetr.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/cv-bitmanip-compile-clb.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/cv-bitmanip-compile-cnt.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/cv-bitmanip-compile-extract.c
 create mode 100644 

[PATCH v3 1/1] RISC-V: Add support for XCVmem extension in CV32E40P

2024-01-25 Thread Mary Bennett
XCVmem adds more loads and stores. To prevent non-XCVmem loads and
stores from generating illegal XCVmem specific operands, constraint
'm' was redefined. 'm' does not accept POST_MODIFY or reg + reg
addresses.

Spec: 
github.com/openhwgroup/core-v-sw/blob/master/specifications/corev-builtin-spec.md

Contributors:
  Mary Bennett 
  Nandni Jamnadas 
  Pietra Ferreira 
  Charlie Keaney
  Jessica Mills
  Craig Blackmore 
  Simon Cook 
  Jeremy Bennett 
  Helene Chelin 

gcc/ChangeLog:
* common/config/riscv/riscv-common.cc: Add the XCVmem
  extension.
* config/riscv/riscv.opt: Likewise.
* config/riscv/corev.md: Likewise.
* config/riscv/predicates.md: Likewise.
* config/riscv/riscv-protos.h: Likewise.
* config/riscv/riscv.cc: Add POST_MODIFY.
* config/riscv/riscv.h: Likewise.
* config/riscv/riscv.md: Prevent XCVmem operands being
  used in non-XCVmem loads and stores.
* config/riscv/constraints.md: Likewise.
* config/riscv/predicates.md: Likewise.
* doc/sourcebuild.texi: Add XCVmem documentation.

gcc/testsuite/ChangeLog:
* gcc.target/riscv/cv-mem-operand-compile-1.c: New test.
* gcc.target/riscv/cv-mem-operand-compile-2.c: New test.
* gcc.target/riscv/cv-mem-operand-compile-3.c: New test.
* gcc.target/riscv/cv-mem-operand-compile-4.c: New test.
* gcc.target/riscv/cv-mem-operand-compile-5.c: New test.
* gcc.target/riscv/cv-mem-operand-compile-6.c: New test.
* gcc.target/riscv/cv-mem-operand-compile-7.c: New test.
* gcc.target/riscv/cv-mem-operand-compile-8.c: New test.
* gcc.target/riscv/cv-mem-lb-compile-1.c: New test.
* gcc.target/riscv/cv-mem-lb-compile-2.c: New test.
* gcc.target/riscv/cv-mem-lb-compile-3.c: New test.
* gcc.target/riscv/cv-mem-lbu-compile-1.c: New test.
* gcc.target/riscv/cv-mem-lbu-compile-2.c: New test.
* gcc.target/riscv/cv-mem-lbu-compile-3.c: New test.
* gcc.target/riscv/cv-mem-lh-compile-1.c: New test.
* gcc.target/riscv/cv-mem-lh-compile-2.c: New test.
* gcc.target/riscv/cv-mem-lh-compile-3.c: New test.
* gcc.target/riscv/cv-mem-lhu-compile-1.c: New test.
* gcc.target/riscv/cv-mem-lhu-compile-2.c: New test.
* gcc.target/riscv/cv-mem-lhu-compile-3.c: New test.
* gcc.target/riscv/cv-mem-lw-compile-1.c: New test.
* gcc.target/riscv/cv-mem-lw-compile-2.c: New test.
* gcc.target/riscv/cv-mem-lw-compile-3.c: New test.
* gcc.target/riscv/cv-mem-sb-compile-1.c: New test.
* gcc.target/riscv/cv-mem-sb-compile-2.c: New test.
* gcc.target/riscv/cv-mem-sb-compile-3.c: New test.
* gcc.target/riscv/cv-mem-sh-compile-1.c: New test.
* gcc.target/riscv/cv-mem-sh-compile-2.c: New test.
* gcc.target/riscv/cv-mem-sh-compile-3.c: New test.
* gcc.target/riscv/cv-mem-sw-compile-1.c: New test.
* gcc.target/riscv/cv-mem-sw-compile-2.c: New test.
* gcc.target/riscv/cv-mem-sw-compile-3.c: New test.
* lib/target-supports.exp: Add proc for XCVmem.
---
 gcc/common/config/riscv/riscv-common.cc   |   2 +
 gcc/config/riscv/constraints.md   |  29 ++
 gcc/config/riscv/corev.md | 270 ++
 gcc/config/riscv/predicates.md|  20 +-
 gcc/config/riscv/riscv-protos.h   |  12 +-
 gcc/config/riscv/riscv.cc |  48 +++-
 gcc/config/riscv/riscv.h  |   4 +-
 gcc/config/riscv/riscv.md |  26 +-
 gcc/config/riscv/riscv.opt|   2 +
 gcc/doc/sourcebuild.texi  |   3 +
 .../gcc.target/riscv/cv-mem-lb-compile-1.c|  21 ++
 .../gcc.target/riscv/cv-mem-lb-compile-2.c|  24 ++
 .../gcc.target/riscv/cv-mem-lb-compile-3.c|  16 ++
 .../gcc.target/riscv/cv-mem-lbu-compile-1.c   |  21 ++
 .../gcc.target/riscv/cv-mem-lbu-compile-2.c   |  24 ++
 .../gcc.target/riscv/cv-mem-lbu-compile-3.c   |  16 ++
 .../gcc.target/riscv/cv-mem-lh-compile-1.c|  21 ++
 .../gcc.target/riscv/cv-mem-lh-compile-2.c|  24 ++
 .../gcc.target/riscv/cv-mem-lh-compile-3.c|  16 ++
 .../gcc.target/riscv/cv-mem-lhu-compile-1.c   |  21 ++
 .../gcc.target/riscv/cv-mem-lhu-compile-2.c   |  24 ++
 .../gcc.target/riscv/cv-mem-lhu-compile-3.c   |  16 ++
 .../gcc.target/riscv/cv-mem-lw-compile-1.c|  33 +++
 .../gcc.target/riscv/cv-mem-lw-compile-2.c|  38 +++
 .../gcc.target/riscv/cv-mem-lw-compile-3.c|  22 ++
 .../riscv/cv-mem-operand-compile-1.c  |  19 ++
 .../riscv/cv-mem-operand-compile-2.c  |  20 ++
 .../riscv/cv-mem-operand-compile-3.c  |  28 ++
 .../riscv/cv-mem-operand-compile-4.c  |  21 ++
 .../riscv/cv-mem-operand-compile-5.c  |  25 ++
 .../riscv/cv-mem-operand-compile-6.c  |  21 ++
 .../riscv/cv-mem-operand-compile-7.c  |  24 ++
 .../riscv/cv-mem-operand-co

[PATCH v3 0/1] RISC-V: Support CORE-V XCVMEM extension

2024-01-25 Thread Mary Bennett
This patch series presents the comprehensive implementation of the MEM
extension for CORE-V.

Tested with riscv-gnu-toolchain on binutils, ld, gas and gcc testsuites to
ensure its correctness and compatibility with the existing codebase.
However, your input, reviews, and suggestions are invaluable in making this
extension even more robust.

The CORE-V builtins are described in the specification [1] and work can be
found in the OpenHW group's Github repository [2].

[1] 
github.com/openhwgroup/core-v-sw/blob/master/specifications/corev-builtin-spec.md

[2] github.com/openhwgroup/corev-gcc

Contributors:
  Mary Bennett 
  Nandni Jamnadas 
  Pietra Ferreira 
  Charlie Keaney
  Jessica Mills
  Craig Blackmore 
  Simon Cook 
  Jeremy Bennett 
  Helene Chelin 

RISC-V: Add support for XCVmem extension in CV32E40P

 gcc/common/config/riscv/riscv-common.cc   |   2 +
 gcc/config/riscv/constraints.md   |  29 ++
 gcc/config/riscv/corev.md | 270 ++
 gcc/config/riscv/predicates.md|  20 +-
 gcc/config/riscv/riscv-protos.h   |  12 +-
 gcc/config/riscv/riscv.cc |  48 +++-
 gcc/config/riscv/riscv.h  |   4 +-
 gcc/config/riscv/riscv.md |  26 +-
 gcc/config/riscv/riscv.opt|   2 +
 gcc/doc/sourcebuild.texi  |   3 +
 .../gcc.target/riscv/cv-mem-lb-compile-1.c|  21 ++
 .../gcc.target/riscv/cv-mem-lb-compile-2.c|  24 ++
 .../gcc.target/riscv/cv-mem-lb-compile-3.c|  16 ++
 .../gcc.target/riscv/cv-mem-lbu-compile-1.c   |  21 ++
 .../gcc.target/riscv/cv-mem-lbu-compile-2.c   |  24 ++
 .../gcc.target/riscv/cv-mem-lbu-compile-3.c   |  16 ++
 .../gcc.target/riscv/cv-mem-lh-compile-1.c|  21 ++
 .../gcc.target/riscv/cv-mem-lh-compile-2.c|  24 ++
 .../gcc.target/riscv/cv-mem-lh-compile-3.c|  16 ++
 .../gcc.target/riscv/cv-mem-lhu-compile-1.c   |  21 ++
 .../gcc.target/riscv/cv-mem-lhu-compile-2.c   |  24 ++
 .../gcc.target/riscv/cv-mem-lhu-compile-3.c   |  16 ++
 .../gcc.target/riscv/cv-mem-lw-compile-1.c|  33 +++
 .../gcc.target/riscv/cv-mem-lw-compile-2.c|  38 +++
 .../gcc.target/riscv/cv-mem-lw-compile-3.c|  22 ++
 .../riscv/cv-mem-operand-compile-1.c  |  19 ++
 .../riscv/cv-mem-operand-compile-2.c  |  20 ++
 .../riscv/cv-mem-operand-compile-3.c  |  28 ++
 .../riscv/cv-mem-operand-compile-4.c  |  21 ++
 .../riscv/cv-mem-operand-compile-5.c  |  25 ++
 .../riscv/cv-mem-operand-compile-6.c  |  21 ++
 .../riscv/cv-mem-operand-compile-7.c  |  24 ++
 .../riscv/cv-mem-operand-compile-8.c  |  18 ++
 .../gcc.target/riscv/cv-mem-sb-compile-1.c|  32 +++
 .../gcc.target/riscv/cv-mem-sb-compile-2.c|  38 +++
 .../gcc.target/riscv/cv-mem-sb-compile-3.c|  30 ++
 .../gcc.target/riscv/cv-mem-sh-compile-1.c|  32 +++
 .../gcc.target/riscv/cv-mem-sh-compile-2.c|  38 +++
 .../gcc.target/riscv/cv-mem-sh-compile-3.c|  30 ++
 .../gcc.target/riscv/cv-mem-sw-compile-1.c|  32 +++
 .../gcc.target/riscv/cv-mem-sw-compile-2.c|  38 +++
 .../gcc.target/riscv/cv-mem-sw-compile-3.c|  30 ++
 gcc/testsuite/lib/target-supports.exp |  13 +
 43 files changed, 1222 insertions(+), 20 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/cv-mem-lb-compile-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/cv-mem-lb-compile-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/cv-mem-lb-compile-3.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/cv-mem-lbu-compile-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/cv-mem-lbu-compile-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/cv-mem-lbu-compile-3.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/cv-mem-lh-compile-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/cv-mem-lh-compile-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/cv-mem-lh-compile-3.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/cv-mem-lhu-compile-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/cv-mem-lhu-compile-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/cv-mem-lhu-compile-3.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/cv-mem-lw-compile-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/cv-mem-lw-compile-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/cv-mem-lw-compile-3.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/cv-mem-operand-compile-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/cv-mem-operand-compile-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/cv-mem-operand-compile-3.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/cv-mem-operand-compile-4.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/cv-mem-operand-compile-5.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/cv-mem-operand-compile-6.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/cv-mem-operand-compile-7.c
 create mode 100644 gcc/testsuite/gcc.target/ri

[PATCH v2] aarch64: Fix eh_return for -mtrack-speculation [PR112987]

2024-01-25 Thread Szabolcs Nagy
Recent commit introduced a conditional branch in eh_return epilogues
that is not compatible with speculation tracking:

  commit 426fddcbdad6746fe70e031f707fb07f55dfb405
  Author: Szabolcs Nagy 
  CommitDate: 2023-11-27 15:52:48 +

  aarch64: Use br instead of ret for eh_return

Refactor the compare zero and jump pattern and use it to fix the issue.

gcc/ChangeLog:

PR target/112987
* config/aarch64/aarch64.cc (aarch64_gen_compare_zero_and_branch): New.
(aarch64_expand_epilogue): Use the new function.
(aarch64_split_compare_and_swap): Likewise.
(aarch64_split_atomic_op): Likewise.
---
v2: factor out aarch64_gen_compare_zero_and_branch

 gcc/config/aarch64/aarch64.cc | 75 +++
 1 file changed, 32 insertions(+), 43 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 3d6dd98c5c5..d2014ce1527 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -2637,6 +2637,28 @@ aarch64_gen_compare_reg_maybe_ze (RTX_CODE code, rtx x, 
rtx y,
   return aarch64_gen_compare_reg (code, x, y);
 }
 
+/* Generate conditional branch to LABEL, comparing X to 0 using CODE.
+   Return the jump instruction.  */
+
+static rtx
+aarch64_gen_compare_zero_and_branch (rtx_code code, rtx x,
+rtx_code_label *label)
+{
+  if (aarch64_track_speculation)
+{
+  /* Emit an explicit compare instruction, so that we can correctly
+track the condition codes.  */
+  rtx cc_reg = aarch64_gen_compare_reg (code, x, const0_rtx);
+  x = gen_rtx_fmt_ee (code, GET_MODE (cc_reg), cc_reg, const0_rtx);
+}
+  else
+x = gen_rtx_fmt_ee (code, VOIDmode, x, const0_rtx);
+
+  x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
+   gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
+  return gen_rtx_SET (pc_rtx, x);
+}
+
 /* Consider the operation:
 
  OPERANDS[0] = CODE (OPERANDS[1], OPERANDS[2]) + OPERANDS[3]
@@ -9882,11 +9904,10 @@ aarch64_expand_epilogue (rtx_call_insn *sibcall)
 to be SP; letting the CFA move during this adjustment
 is just as correct as retaining the CFA from the body
 of the function.  Therefore, do nothing special.  */
-  rtx label = gen_label_rtx ();
-  rtx x = gen_rtx_EQ (VOIDmode, EH_RETURN_TAKEN_RTX, const0_rtx);
-  x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
-   gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
-  rtx jump = emit_jump_insn (gen_rtx_SET (pc_rtx, x));
+  rtx_code_label *label = gen_label_rtx ();
+  rtx x = aarch64_gen_compare_zero_and_branch (EQ, EH_RETURN_TAKEN_RTX,
+  label);
+  rtx jump = emit_jump_insn (x);
   JUMP_LABEL (jump) = label;
   LABEL_NUSES (label)++;
   emit_insn (gen_add2_insn (stack_pointer_rtx,
@@ -24657,19 +24678,8 @@ aarch64_split_compare_and_swap (rtx operands[])
 
   if (!is_weak)
 {
-  if (aarch64_track_speculation)
-   {
- /* Emit an explicit compare instruction, so that we can correctly
-track the condition codes.  */
- rtx cc_reg = aarch64_gen_compare_reg (NE, scratch, const0_rtx);
- x = gen_rtx_NE (GET_MODE (cc_reg), cc_reg, const0_rtx);
-   }
-  else
-   x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
-
-  x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
-   gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
-  aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
+  x = aarch64_gen_compare_zero_and_branch (NE, scratch, label1);
+  aarch64_emit_unlikely_jump (x);
 }
   else
 aarch64_gen_compare_reg (NE, scratch, const0_rtx);
@@ -24685,18 +24695,8 @@ aarch64_split_compare_and_swap (rtx operands[])
   emit_label (label2);
   aarch64_emit_store_exclusive (mode, scratch, mem, rval, model_rtx);
 
-  if (aarch64_track_speculation)
-   {
- /* Emit an explicit compare instruction, so that we can correctly
-track the condition codes.  */
- rtx cc_reg = aarch64_gen_compare_reg (NE, scratch, const0_rtx);
- x = gen_rtx_NE (GET_MODE (cc_reg), cc_reg, const0_rtx);
-   }
-  else
-   x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
-  x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
-   gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
-  aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
+  x = aarch64_gen_compare_zero_and_branch (NE, scratch, label1);
+  aarch64_emit_unlikely_jump (x);
 
   label2 = label3;
 }
@@ -24780,19 +24780,8 @@ aarch64_split_atomic_op (enum rtx_code code, rtx 
old_out, rtx new_out, rtx mem,
   aarch64_emit_store_exclusive (mode, cond, mem,
gen_lowpart (mode, new_out), model_rtx);
 
-  if (aarch64_track_speculation)
-{
-  /* Emit an explicit compare instruction, so that we can correctly
-track the cond

Re: [PATCH v3 0/2] RISC-V: Support CORE-V XCVSIMD extension

2024-01-25 Thread Kito Cheng
It's stage 4, so I think it would be great to not disturb code base
too much, and adding intrinsic without adding VLS modes should be
better way to go, and  here is not really something serious coding
style issue, just few minor indentation issue, so I gonna run
regression to make not break anything else and then commit to trunk :)


Re: [PATCH] RISC-V: Fix incorrect LCM delete bug [VSETVL PASS]

2024-01-25 Thread Kito Cheng
Use this reduced testcase, but please verify this in your end again.
For the code change part, I would like to let other to review :P

struct a {
 int b;
 int c : 1;
 int : 1;
} d();
typedef struct
{
 int e;
 struct {
   int f;
 };
} g;
int i;
char k, l, n;
void *m;
char *o;
void h();
char *j();
void p(int buf, __builtin_va_list ab, int q) {
 do {
   void *r[] = {&&s, &&t, &&u, &&v, &&w};
   int c;
   goto *m;
 s:
   c = 1;
   while (1) {
   t:
   u:
   ae:
 void *af = __builtin_va_arg(ab, void *);
 h(p);
 o = j(i);
 if (o == 0)
   goto ae;
 l = 'S';
 break;
   v:
 g ah;
 __builtin_memset(&ah, '\0', sizeof(g));
 h(n, __builtin_va_arg(ab, int), &ah);
 break;
   w:
 if (__builtin_expect(q, 0))
   c = 0;
 struct a ai = {'S', c};
 d(buf, ai, af);
   }
 } while (k);
}

On Thu, Jan 25, 2024 at 8:34 PM Juzhe-Zhong  wrote:
>
> This patch fixes the recent noticed bug in RV32 glibc.
>
> We incorrectly deleted a vsetvl:
>
> ...
> and a4,a4,a3
> vmv.v.i v1,0 ---> Missed vsetvl cause illegal 
> instruction report.
> vse8.v  v1,0(a5)
>
> The root cause the laterin in LCM is incorrect.
>
>   BB 358:
> avloc: n_bits = 2, set = {}
> kill: n_bits = 2, set = {}
> antloc: n_bits = 2, set = {}
> transp: n_bits = 2, set = {}
> avin: n_bits = 2, set = {}
> avout: n_bits = 2, set = {}
> del: n_bits = 2, set = {}
>
> cause LCM let BB 360 delete the vsetvl:
>
>   BB 360:
> avloc: n_bits = 2, set = {}
> kill: n_bits = 2, set = {}
> antloc: n_bits = 2, set = {}
> transp: n_bits = 2, set = {0 1 }
> avin: n_bits = 2, set = {}
> avout: n_bits = 2, set = {}
> del: n_bits = 2, set = {1}
>
> Also, remove unknown vsetvl info into local computation since it is 
> unnecessary.
>
> Tested on both RV32/RV64 no regression.
>
> PR target/113469
>
> gcc/ChangeLog:
>
> * config/riscv/riscv-vsetvl.cc 
> (pre_vsetvl::compute_lcm_local_properties): Fix bug.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/riscv/rvv/autovec/pr113469.c: New test.
>
> ---
>  gcc/config/riscv/riscv-vsetvl.cc  |   21 +-
>  .../gcc.target/riscv/rvv/autovec/pr113469.c   | 1841 +
>  2 files changed, 1853 insertions(+), 9 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113469.c
>
> diff --git a/gcc/config/riscv/riscv-vsetvl.cc 
> b/gcc/config/riscv/riscv-vsetvl.cc
> index da258b964fc..f300f00e62a 100644
> --- a/gcc/config/riscv/riscv-vsetvl.cc
> +++ b/gcc/config/riscv/riscv-vsetvl.cc
> @@ -2543,8 +2543,10 @@ pre_vsetvl::compute_lcm_local_properties ()
>vsetvl_info &header_info = block_info.get_entry_info ();
>vsetvl_info &footer_info = block_info.get_exit_info ();
>gcc_assert (footer_info.valid_p () || footer_info.unknown_p ());
> -  add_expr (m_exprs, header_info);
> -  add_expr (m_exprs, footer_info);
> +  if (header_info.valid_p ())
> +   add_expr (m_exprs, header_info);
> +  if (footer_info.valid_p ())
> +   add_expr (m_exprs, footer_info);
>  }
>
>int num_exprs = m_exprs.length ();
> @@ -2699,13 +2701,6 @@ pre_vsetvl::compute_lcm_local_properties ()
>   }
>  }
>
> -  for (const bb_info *bb : crtl->ssa->bbs ())
> -{
> -  unsigned bb_index = bb->index ();
> -  bitmap_ior (m_kill[bb_index], m_transp[bb_index], m_avloc[bb_index]);
> -  bitmap_not (m_kill[bb_index], m_kill[bb_index]);
> -}
> -
>for (const bb_info *bb : crtl->ssa->bbs ())
>  {
>unsigned bb_index = bb->index ();
> @@ -2713,8 +2708,16 @@ pre_vsetvl::compute_lcm_local_properties ()
> {
>   bitmap_clear (m_antloc[bb_index]);
>   bitmap_clear (m_transp[bb_index]);
> + bitmap_clear (m_avloc[bb_index]);
> }
>  }
> +
> +  for (const bb_info *bb : crtl->ssa->bbs ())
> +{
> +  unsigned bb_index = bb->index ();
> +  bitmap_ior (m_kill[bb_index], m_transp[bb_index], m_avloc[bb_index]);
> +  bitmap_not (m_kill[bb_index], m_kill[bb_index]);
> +}
>  }
>
>  void
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113469.c 
> b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113469.c
> new file mode 100644
> index 000..2502040772b
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113469.c
> @@ -0,0 +1,1841 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */
> +
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +
> +static int read_int (const unsigned char * *pstr) {};
> +static const char null[] = "(null)";
> +extern size_t __strnlen (const char *__string, size_t __maxlen) 
> __attribute__ ((__pure__));
> +
> +struct printf_info
> +{
> +  int prec;
> +  int width;
> +  wchar_t spec;
> +  unsigned int is_long_double:1;
> +  unsigned int is_short:1;
> +  unsigned int is_long

RE: [PATCH v2] RISC-V: remove param riscv-vector-abi. [PR113538]

2024-01-25 Thread Li, Pan2
Committed, thanks Juzhe.

Pan

From: juzhe.zhong 
Sent: Thursday, January 25, 2024 9:08 PM
To: Wang, Yanzhang 
Cc: gcc-patches@gcc.gnu.org; kito.ch...@sifive.com; Li, Pan2 
; Wang, Yanzhang 
Subject: Re: [PATCH v2] RISC-V: remove param riscv-vector-abi. [PR113538]

lgtm
 Replied Message 
From
yanzhang.w...@intel.com
Date
01/25/2024 21:06
To
gcc-patches@gcc.gnu.org
Cc
juzhe.zh...@rivai.ai,
kito.ch...@sifive.com,
pan2...@intel.com,
yanzhang.w...@intel.com
Subject
[PATCH v2] RISC-V: remove param riscv-vector-abi. [PR113538]



Re: [PATCH v2] RISC-V: remove param riscv-vector-abi. [PR113538]

2024-01-25 Thread juzhe.zhong
lgtm Replied Message Fromyanzhang.w...@intel.comDate01/25/2024 21:06 Togcc-patches@gcc.gnu.org Ccjuzhe.zh...@rivai.ai,kito.ch...@sifive.com,pan2...@intel.com,yanzhang.w...@intel.comSubject[PATCH v2] RISC-V: remove param riscv-vector-abi. [PR113538]


[PATCH v2] RISC-V: remove param riscv-vector-abi. [PR113538]

2024-01-25 Thread yanzhang . wang
From: Yanzhang Wang 

Also adjust some of the tests for scan-assembly. The behavior is the
same as --param=riscv-vector-abi before.

gcc/ChangeLog:

* config/riscv/riscv.cc (riscv_get_arg_info): Remove the flag.
(riscv_fntype_abi): Ditto.
* config/riscv/riscv.opt: Ditto.

gcc/testsuite/ChangeLog:

* gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul1-7.c: Fix the asm
  check.
* gcc.target/riscv/rvv/base/abi-call-args-1-run.c: Ditto.
* gcc.target/riscv/rvv/base/abi-call-args-1.c: Ditto.
* gcc.target/riscv/rvv/base/abi-call-args-2-run.c: Ditto.
* gcc.target/riscv/rvv/base/abi-call-args-2.c: Ditto.
* gcc.target/riscv/rvv/base/abi-call-args-3-run.c: Ditto.
* gcc.target/riscv/rvv/base/abi-call-args-3.c: Ditto.
* gcc.target/riscv/rvv/base/abi-call-args-4-run.c: Ditto.
* gcc.target/riscv/rvv/base/abi-call-args-4.c: Ditto.
* gcc.target/riscv/rvv/base/abi-call-error-1.c: Ditto.
* gcc.target/riscv/rvv/base/abi-call-return-run.c: Ditto.
* gcc.target/riscv/rvv/base/abi-call-return.c: Ditto.
* gcc.target/riscv/rvv/base/abi-call-variant_cc.c: Ditto.
* gcc.target/riscv/rvv/base/abi-callee-saved-1-fixed-1.c: Ditto.
* gcc.target/riscv/rvv/base/abi-callee-saved-1-fixed-2.c: Ditto.
* gcc.target/riscv/rvv/base/abi-callee-saved-1-save-restore.c: Ditto.
* gcc.target/riscv/rvv/base/abi-callee-saved-1-zcmp.c: Ditto.
* gcc.target/riscv/rvv/base/abi-callee-saved-1.c: Ditto.
* gcc.target/riscv/rvv/base/abi-callee-saved-2-save-restore.c: Ditto.
* gcc.target/riscv/rvv/base/abi-callee-saved-2-zcmp.c: Ditto.
* gcc.target/riscv/rvv/base/abi-callee-saved-2.c: Ditto.
* gcc.target/riscv/rvv/base/float-point-dynamic-frm-69.c: Ditto.
* gcc.target/riscv/rvv/base/float-point-dynamic-frm-70.c: Ditto.
* gcc.target/riscv/rvv/base/float-point-dynamic-frm-71.c: Ditto.
* gcc.target/riscv/rvv/base/misc_vreinterpret_vbool_vint.c: Ditto.
* gcc.target/riscv/rvv/base/overloaded_rv32_vadd.c: Ditto.
* gcc.target/riscv/rvv/base/overloaded_rv32_vfadd.c: Ditto.
* gcc.target/riscv/rvv/base/overloaded_rv32_vget_vset.c: Ditto.
* gcc.target/riscv/rvv/base/overloaded_rv32_vloxseg2ei16.c: Ditto.
* gcc.target/riscv/rvv/base/overloaded_rv32_vreinterpret.c: Ditto.
* gcc.target/riscv/rvv/base/overloaded_rv64_vadd.c: Ditto.
* gcc.target/riscv/rvv/base/overloaded_rv64_vfadd.c: Ditto.
* gcc.target/riscv/rvv/base/overloaded_rv64_vget_vset.c: Ditto.
* gcc.target/riscv/rvv/base/overloaded_rv64_vloxseg2ei16.c: Ditto.
* gcc.target/riscv/rvv/base/overloaded_rv64_vreinterpret.c: Ditto.
* gcc.target/riscv/rvv/base/spill-10.c: Ditto.
* gcc.target/riscv/rvv/base/spill-11.c: Ditto.
* gcc.target/riscv/rvv/base/spill-9.c: Ditto.
* gcc.target/riscv/rvv/base/tuple_vundefined.c: Ditto.
* gcc.target/riscv/rvv/base/vcreate.c: Ditto.
* gcc.target/riscv/rvv/base/vlmul_ext-1.c: Ditto.
* gcc.target/riscv/rvv/base/zvfh-over-zvfhmin.c: Ditto.
* gcc.target/riscv/rvv/base/zvfhmin-intrinsic.c: Ditto.
* lib/target-supports.exp: Remove the flag.

Signed-off-by: Yanzhang Wang 
---
 gcc/config/riscv/riscv.cc   |  7 +++
 gcc/config/riscv/riscv.opt  |  5 -
 .../vect/costmodel/riscv/rvv/dynamic-lmul1-7.c  |  2 +-
 .../riscv/rvv/base/abi-call-args-1-run.c|  2 +-
 .../gcc.target/riscv/rvv/base/abi-call-args-1.c |  2 +-
 .../riscv/rvv/base/abi-call-args-2-run.c|  2 +-
 .../gcc.target/riscv/rvv/base/abi-call-args-2.c |  2 +-
 .../riscv/rvv/base/abi-call-args-3-run.c|  2 +-
 .../gcc.target/riscv/rvv/base/abi-call-args-3.c |  2 +-
 .../riscv/rvv/base/abi-call-args-4-run.c|  2 +-
 .../gcc.target/riscv/rvv/base/abi-call-args-4.c |  2 +-
 .../riscv/rvv/base/abi-call-error-1.c   |  2 +-
 .../riscv/rvv/base/abi-call-return-run.c|  2 +-
 .../gcc.target/riscv/rvv/base/abi-call-return.c |  2 +-
 .../riscv/rvv/base/abi-call-variant_cc.c|  2 +-
 .../riscv/rvv/base/abi-callee-saved-1-fixed-1.c |  2 +-
 .../riscv/rvv/base/abi-callee-saved-1-fixed-2.c |  2 +-
 .../rvv/base/abi-callee-saved-1-save-restore.c  |  2 +-
 .../riscv/rvv/base/abi-callee-saved-1-zcmp.c|  2 +-
 .../riscv/rvv/base/abi-callee-saved-1.c |  2 +-
 .../rvv/base/abi-callee-saved-2-save-restore.c  |  2 +-
 .../riscv/rvv/base/abi-callee-saved-2-zcmp.c|  2 +-
 .../riscv/rvv/base/abi-callee-saved-2.c |  2 +-
 .../riscv/rvv/base/float-point-dynamic-frm-69.c |  6 +++---
 .../riscv/rvv/base/float-point-dynamic-frm-70.c |  6 +++---
 .../riscv/rvv/base/float-point-dynamic-frm-71.c |  6 +++---
 .../rvv/base/misc_vreinterpret_vbool_vint.c |  4 +---
 .../riscv/rvv/base/overloaded_rv32_vadd.c   |  2 --
 .../riscv/rvv/base/overloaded_rv32_vfadd.c  |  2 --
 .../ris

[PATCH] RISC-V: remove param riscv-vector-abi. [PR113538]

2024-01-25 Thread yanzhang . wang
From: Yanzhang Wang 

Also adjust some of the tests for scan-assembly. The behavior is the
same as --param=riscv-vector-abi before.

gcc/ChangeLog:

* config/riscv/riscv.cc (riscv_get_arg_info): Remove the flag.
(riscv_fntype_abi): Ditto.
* config/riscv/riscv.opt: Ditto.

gcc/testsuite/ChangeLog:

* gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul1-7.c: Fix the asm
  check.
* gcc.target/riscv/rvv/base/abi-call-args-1-run.c: Ditto.
* gcc.target/riscv/rvv/base/abi-call-args-1.c: Ditto.
* gcc.target/riscv/rvv/base/abi-call-args-2-run.c: Ditto.
* gcc.target/riscv/rvv/base/abi-call-args-2.c: Ditto.
* gcc.target/riscv/rvv/base/abi-call-args-3-run.c: Ditto.
* gcc.target/riscv/rvv/base/abi-call-args-3.c: Ditto.
* gcc.target/riscv/rvv/base/abi-call-args-4-run.c: Ditto.
* gcc.target/riscv/rvv/base/abi-call-args-4.c: Ditto.
* gcc.target/riscv/rvv/base/abi-call-error-1.c: Ditto.
* gcc.target/riscv/rvv/base/abi-call-return-run.c: Ditto.
* gcc.target/riscv/rvv/base/abi-call-return.c: Ditto.
* gcc.target/riscv/rvv/base/abi-call-variant_cc.c: Ditto.
* gcc.target/riscv/rvv/base/abi-callee-saved-1-fixed-1.c: Ditto.
* gcc.target/riscv/rvv/base/abi-callee-saved-1-fixed-2.c: Ditto.
* gcc.target/riscv/rvv/base/abi-callee-saved-1-save-restore.c: Ditto.
* gcc.target/riscv/rvv/base/abi-callee-saved-1-zcmp.c: Ditto.
* gcc.target/riscv/rvv/base/abi-callee-saved-1.c: Ditto.
* gcc.target/riscv/rvv/base/abi-callee-saved-2-save-restore.c: Ditto.
* gcc.target/riscv/rvv/base/abi-callee-saved-2-zcmp.c: Ditto.
* gcc.target/riscv/rvv/base/abi-callee-saved-2.c: Ditto.
* gcc.target/riscv/rvv/base/float-point-dynamic-frm-69.c: Ditto.
* gcc.target/riscv/rvv/base/float-point-dynamic-frm-70.c: Ditto.
* gcc.target/riscv/rvv/base/float-point-dynamic-frm-71.c: Ditto.
* gcc.target/riscv/rvv/base/misc_vreinterpret_vbool_vint.c: Ditto.
* gcc.target/riscv/rvv/base/overloaded_rv32_vadd.c: Ditto.
* gcc.target/riscv/rvv/base/overloaded_rv32_vfadd.c: Ditto.
* gcc.target/riscv/rvv/base/overloaded_rv32_vget_vset.c: Ditto.
* gcc.target/riscv/rvv/base/overloaded_rv32_vloxseg2ei16.c: Ditto.
* gcc.target/riscv/rvv/base/overloaded_rv32_vreinterpret.c: Ditto.
* gcc.target/riscv/rvv/base/overloaded_rv64_vadd.c: Ditto.
* gcc.target/riscv/rvv/base/overloaded_rv64_vfadd.c: Ditto.
* gcc.target/riscv/rvv/base/overloaded_rv64_vget_vset.c: Ditto.
* gcc.target/riscv/rvv/base/overloaded_rv64_vloxseg2ei16.c: Ditto.
* gcc.target/riscv/rvv/base/overloaded_rv64_vreinterpret.c: Ditto.
* gcc.target/riscv/rvv/base/spill-10.c: Ditto.
* gcc.target/riscv/rvv/base/spill-11.c: Ditto.
* gcc.target/riscv/rvv/base/spill-9.c: Ditto.
* gcc.target/riscv/rvv/base/tuple_vundefined.c: Ditto.
* gcc.target/riscv/rvv/base/vcreate.c: Ditto.
* gcc.target/riscv/rvv/base/vlmul_ext-1.c: Ditto.
* gcc.target/riscv/rvv/base/zvfh-over-zvfhmin.c: Ditto.
* gcc.target/riscv/rvv/base/zvfhmin-intrinsic.c: Ditto.
* lib/target-supports.exp: Remove the flag.

Signed-off-by: Yanzhang Wang 
---
 gcc/config/riscv/riscv.cc   |  7 +++
 gcc/config/riscv/riscv.opt  |  5 -
 .../vect/costmodel/riscv/rvv/dynamic-lmul1-7.c  |  2 +-
 .../riscv/rvv/base/abi-call-args-1-run.c|  2 +-
 .../gcc.target/riscv/rvv/base/abi-call-args-1.c |  2 +-
 .../riscv/rvv/base/abi-call-args-2-run.c|  2 +-
 .../gcc.target/riscv/rvv/base/abi-call-args-2.c |  2 +-
 .../riscv/rvv/base/abi-call-args-3-run.c|  2 +-
 .../gcc.target/riscv/rvv/base/abi-call-args-3.c |  2 +-
 .../riscv/rvv/base/abi-call-args-4-run.c|  2 +-
 .../gcc.target/riscv/rvv/base/abi-call-args-4.c |  2 +-
 .../riscv/rvv/base/abi-call-error-1.c   |  2 +-
 .../riscv/rvv/base/abi-call-return-run.c|  2 +-
 .../gcc.target/riscv/rvv/base/abi-call-return.c |  2 +-
 .../riscv/rvv/base/abi-call-variant_cc.c|  2 +-
 .../riscv/rvv/base/abi-callee-saved-1-fixed-1.c |  2 +-
 .../riscv/rvv/base/abi-callee-saved-1-fixed-2.c |  2 +-
 .../rvv/base/abi-callee-saved-1-save-restore.c  |  2 +-
 .../riscv/rvv/base/abi-callee-saved-1-zcmp.c|  2 +-
 .../riscv/rvv/base/abi-callee-saved-1.c |  2 +-
 .../rvv/base/abi-callee-saved-2-save-restore.c  |  2 +-
 .../riscv/rvv/base/abi-callee-saved-2-zcmp.c|  2 +-
 .../riscv/rvv/base/abi-callee-saved-2.c |  2 +-
 .../riscv/rvv/base/float-point-dynamic-frm-69.c |  6 +++---
 .../riscv/rvv/base/float-point-dynamic-frm-70.c |  6 +++---
 .../riscv/rvv/base/float-point-dynamic-frm-71.c |  6 +++---
 .../rvv/base/misc_vreinterpret_vbool_vint.c |  4 +---
 .../riscv/rvv/base/overloaded_rv32_vadd.c   |  2 --
 .../riscv/rvv/base/overloaded_rv32_vfadd.c  |  2 --
 .../ris

Re: [PATCH] libgccjit: Allow comparing array types

2024-01-25 Thread Antoni Boucher
Thanks.
Can we please agree on some wording to use so I know when the patch can
be pushed. Especially since we're now in stage 4, it would help me if
you say something like "you can push to master".
Regards.

On Wed, 2024-01-24 at 12:14 -0500, David Malcolm wrote:
> On Fri, 2024-01-19 at 16:55 -0500, Antoni Boucher wrote:
> > Hi.
> > This patch allows comparing different instances of array types as
> > equal.
> > Thanks for the review.
> 
> Thanks; the patch looks good to me.
> 
> Dave
> 



[patch] gcn: Add missing space to ASM_SPEC in gcn-hsa.h

2024-01-25 Thread Tobias Burnus

This patch avoids assembler warnings for gfx908 and gfx90a such as
  '-xnack-mattr=-sramecc' is not a recognized feature for this target(ignoring 
feature)
as we pass   -mattr=-xnack-mattr=-sramecc  to the llvm-mc assembler.

Solution: Add a space before the second '-mattr='.

OK for mainline?

Tobias
gcn: Add missing space to ASM_SPEC in gcn-hsa.h

gcc/
	* config/gcn/gcn-hsa.h (ASM_SPEC): Add space after -mxnack= argument.

diff --git a/gcc/config/gcn/gcn-hsa.h b/gcc/config/gcn/gcn-hsa.h
index bf7079fbbc6..f5de0d2969f 100644
--- a/gcc/config/gcn/gcn-hsa.h
+++ b/gcc/config/gcn/gcn-hsa.h
@@ -89,7 +89,7 @@ extern unsigned int gcn_local_sym_hash (const char *name);
 #define ASM_SPEC  "-triple=amdgcn--amdhsa "  \
 		  "%{march=*:-mcpu=%*} " \
 		  "%{!march=*|march=fiji:--amdhsa-code-object-version=3} " \
-		  "%{" NO_XNACK XNACKOPT "}" \
+		  "%{" NO_XNACK XNACKOPT "} " \
 		  "%{" NO_SRAM_ECC SRAMOPT "} " \
 		  "%{march=gfx1030|march=gfx1100:-mattr=+wavefrontsize64} " \
 		  "-filetype=obj"


[PATCH] RISC-V: Fix incorrect LCM delete bug [VSETVL PASS]

2024-01-25 Thread Juzhe-Zhong
This patch fixes the recent noticed bug in RV32 glibc.

We incorrectly deleted a vsetvl:

...
and a4,a4,a3
vmv.v.i v1,0 ---> Missed vsetvl cause illegal 
instruction report.
vse8.v  v1,0(a5)

The root cause the laterin in LCM is incorrect.

  BB 358:
avloc: n_bits = 2, set = {}
kill: n_bits = 2, set = {}
antloc: n_bits = 2, set = {}
transp: n_bits = 2, set = {}
avin: n_bits = 2, set = {}
avout: n_bits = 2, set = {}
del: n_bits = 2, set = {}

cause LCM let BB 360 delete the vsetvl:

  BB 360:
avloc: n_bits = 2, set = {}
kill: n_bits = 2, set = {}
antloc: n_bits = 2, set = {}
transp: n_bits = 2, set = {0 1 }
avin: n_bits = 2, set = {}
avout: n_bits = 2, set = {}
del: n_bits = 2, set = {1}

Also, remove unknown vsetvl info into local computation since it is unnecessary.

Tested on both RV32/RV64 no regression.

PR target/113469

gcc/ChangeLog:

* config/riscv/riscv-vsetvl.cc 
(pre_vsetvl::compute_lcm_local_properties): Fix bug.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/pr113469.c: New test.

---
 gcc/config/riscv/riscv-vsetvl.cc  |   21 +-
 .../gcc.target/riscv/rvv/autovec/pr113469.c   | 1841 +
 2 files changed, 1853 insertions(+), 9 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113469.c

diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc
index da258b964fc..f300f00e62a 100644
--- a/gcc/config/riscv/riscv-vsetvl.cc
+++ b/gcc/config/riscv/riscv-vsetvl.cc
@@ -2543,8 +2543,10 @@ pre_vsetvl::compute_lcm_local_properties ()
   vsetvl_info &header_info = block_info.get_entry_info ();
   vsetvl_info &footer_info = block_info.get_exit_info ();
   gcc_assert (footer_info.valid_p () || footer_info.unknown_p ());
-  add_expr (m_exprs, header_info);
-  add_expr (m_exprs, footer_info);
+  if (header_info.valid_p ())
+   add_expr (m_exprs, header_info);
+  if (footer_info.valid_p ())
+   add_expr (m_exprs, footer_info);
 }
 
   int num_exprs = m_exprs.length ();
@@ -2699,13 +2701,6 @@ pre_vsetvl::compute_lcm_local_properties ()
  }
 }
 
-  for (const bb_info *bb : crtl->ssa->bbs ())
-{
-  unsigned bb_index = bb->index ();
-  bitmap_ior (m_kill[bb_index], m_transp[bb_index], m_avloc[bb_index]);
-  bitmap_not (m_kill[bb_index], m_kill[bb_index]);
-}
-
   for (const bb_info *bb : crtl->ssa->bbs ())
 {
   unsigned bb_index = bb->index ();
@@ -2713,8 +2708,16 @@ pre_vsetvl::compute_lcm_local_properties ()
{
  bitmap_clear (m_antloc[bb_index]);
  bitmap_clear (m_transp[bb_index]);
+ bitmap_clear (m_avloc[bb_index]);
}
 }
+
+  for (const bb_info *bb : crtl->ssa->bbs ())
+{
+  unsigned bb_index = bb->index ();
+  bitmap_ior (m_kill[bb_index], m_transp[bb_index], m_avloc[bb_index]);
+  bitmap_not (m_kill[bb_index], m_kill[bb_index]);
+}
 }
 
 void
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113469.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113469.c
new file mode 100644
index 000..2502040772b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113469.c
@@ -0,0 +1,1841 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+
+static int read_int (const unsigned char * *pstr) {};
+static const char null[] = "(null)";
+extern size_t __strnlen (const char *__string, size_t __maxlen) __attribute__ 
((__pure__));
+
+struct printf_info
+{
+  int prec;
+  int width;
+  wchar_t spec;
+  unsigned int is_long_double:1;
+  unsigned int is_short:1;
+  unsigned int is_long:1;
+  unsigned int alt:1;
+  unsigned int space:1;
+  unsigned int left:1;
+  unsigned int showsign:1;
+  unsigned int group:1;
+  unsigned int extra:1;
+  unsigned int is_char:1;
+  unsigned int wide:1;
+  unsigned int i18n:1;
+  unsigned int is_binary128:1;
+
+  unsigned int __pad:3;
+  unsigned short int user;
+  wchar_t pad;
+};
+
+enum {
+  ABDAY_1 = (((2) << 16) | (0)),
+  ABDAY_2,
+  ABDAY_3,
+  ABDAY_4,
+  ABDAY_5,
+  ABDAY_6,
+  ABDAY_7,
+  DAY_1,
+  DAY_2,
+  DAY_3,
+  DAY_4,
+  DAY_5,
+  DAY_6,
+  DAY_7,
+  ABMON_1,
+  ABMON_2,
+  ABMON_3,
+  ABMON_4,
+  ABMON_5,
+  ABMON_6,
+  ABMON_7,
+  ABMON_8,
+  ABMON_9,
+  ABMON_10,
+  ABMON_11,
+  ABMON_12,
+  MON_1,
+  MON_2,
+  MON_3,
+  MON_4,
+  MON_5,
+  MON_6,
+  MON_7,
+  MON_8,
+  MON_9,
+  MON_10,
+  MON_11,
+  MON_12,
+  AM_STR,
+  PM_STR,
+  D_T_FMT,
+  D_FMT,
+  T_FMT,
+  T_FMT_AMPM,
+  ERA,
+  __ERA_YEAR,
+  ERA_D_FMT,
+
+  ALT_DIGITS,
+
+  ERA_D_T_FMT,
+
+  ERA_T_FMT,
+  _NL_TIME_ERA_NUM_ENTRIES,
+  _NL_TIME_ERA_ENTRIES,
+
+  _NL_WABDAY_1,
+  _NL_WABDAY_2,
+  _NL_WABDAY_3,
+  _NL_WABDAY_4,
+  _NL_WABDAY_5,
+  _NL_WABDAY_6,
+  _NL_WABDAY_7,
+  _NL_WDA

[PATCH v1] C++: Support constexpr strings for asm statements

2024-01-25 Thread Andi Kleen
Some programing styles use a lot of inline assembler, and it is common
to use very complex preprocessor macros to generate the assembler
strings for the asm statements. In C++ there would be a typesafe alternative
using templates and constexpr to generate the assembler strings, but
unfortunately the asm statement requires plain string literals, so this
doesn't work.

This patch modifies the C++ parser to accept strings generated by
constexpr instead of just plain strings. This requires new syntax
because e.g. asm("..." : "r" (expr)) would be ambigious with a function
call. I chose () to make it unique. For example now you can write

constexpr const char *genasm() { return "insn"; }
constexpr const char *genconstraint() { return "r"; }

asm(genasm() :: (genconstraint()) (input));

The constexpr strings are allowed for the asm template, the
constraints and the clobbers (every time current asm accepts a string)

The drawback of this scheme is that the constexpr doesn't have
full control over the input/output/clobber lists, but that can be
usually handled with a switch statement.  One could imagine
more flexible ways to handle that, for example supporting constexpr
vectors for the clobber list, or similar. But even without
that it is already useful.

Bootstrapped and full test on x86_64-linux.
---
 gcc/cp/parser.cc   | 76 ++
 gcc/doc/extend.texi| 17 +-
 gcc/testsuite/g++.dg/constexpr-asm-1.C | 30 ++
 3 files changed, 99 insertions(+), 24 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/constexpr-asm-1.C

diff --git a/gcc/cp/parser.cc b/gcc/cp/parser.cc
index 3748ccd49ff3..cc323dc8557a 100644
--- a/gcc/cp/parser.cc
+++ b/gcc/cp/parser.cc
@@ -22654,6 +22654,43 @@ cp_parser_using_directive (cp_parser* parser)
   cp_parser_require (parser, CPP_SEMICOLON, RT_SEMICOLON);
 }
 
+/* Parse a string literal or constant expression yielding a string.
+   The constant expression uses extra parens to avoid ambiguity with "x" 
(expr).
+
+   asm-string-expr:
+ string-literal
+ ( constant-expr ) */
+
+static tree
+cp_parser_asm_string_expression (cp_parser *parser)
+{
+  location_t sloc = cp_lexer_peek_token (parser->lexer)->location;
+
+  if (cp_lexer_next_token_is (parser->lexer, CPP_OPEN_PAREN))
+{
+  matching_parens parens;
+  parens.consume_open (parser);
+  tree string = cp_parser_constant_expression (parser);
+  if (string != error_mark_node)
+   string = cxx_constant_value (string, tf_error);
+  if (TREE_CODE (string) == NOP_EXPR)
+   string = TREE_OPERAND (string, 0);
+  if (TREE_CODE (string) == ADDR_EXPR && TREE_CODE (TREE_OPERAND (string, 
0)) == STRING_CST)
+   string = TREE_OPERAND (string, 0);
+  if (TREE_CODE (string) == VIEW_CONVERT_EXPR)
+   string = TREE_OPERAND (string, 0);
+  if (TREE_CODE (string) != STRING_CST && string != error_mark_node)
+   {
+ error_at (sloc, "Expected string valued constant expression for 
%, not type %qT",
+   TREE_TYPE (string));
+ string = error_mark_node;
+   }
+  parens.require_close (parser);
+  return string;
+}
+  return cp_parser_string_literal (parser, false, false);
+}
+
 /* Parse an asm-definition.
 
   asm-qualifier:
@@ -22666,19 +22703,19 @@ cp_parser_using_directive (cp_parser* parser)
 asm-qualifier-list asm-qualifier
 
asm-definition:
- asm ( string-literal ) ;
+ asm ( constant-expr ) ;
 
GNU Extension:
 
asm-definition:
- asm asm-qualifier-list [opt] ( string-literal ) ;
- asm asm-qualifier-list [opt] ( string-literal : asm-operand-list [opt] ) ;
- asm asm-qualifier-list [opt] ( string-literal : asm-operand-list [opt]
+ asm asm-qualifier-list [opt] ( asm-string-expr ) ;
+ asm asm-qualifier-list [opt] ( asm-string-expr : asm-operand-list [opt] ) 
;
+ asm asm-qualifier-list [opt] ( asm-string-expr : asm-operand-list [opt]
: asm-operand-list [opt] ) ;
- asm asm-qualifier-list [opt] ( string-literal : asm-operand-list [opt]
+ asm asm-qualifier-list [opt] ( asm-string-expr : asm-operand-list [opt]
: asm-operand-list [opt]
  : asm-clobber-list [opt] ) ;
- asm asm-qualifier-list [opt] ( string-literal : : asm-operand-list [opt]
+ asm asm-qualifier-list [opt] ( asm-string-expr : : asm-operand-list [opt]
: asm-clobber-list [opt]
: asm-goto-list ) ;
 
@@ -22797,8 +22834,7 @@ cp_parser_asm_definition (cp_parser* parser)
   if (!cp_parser_require (parser, CPP_OPEN_PAREN, RT_OPEN_PAREN))
 return;
   /* Look for the string.  */
-  tree string = cp_parser_string_literal (parser, /*translate=*/false,
- /*wide_ok=*/false);
+  tree string = cp_parser_asm_string_expression (parser);
   if (string == error_mar

[pushed] aarch64: Fix out-of-bounds ENCODED_ELT access [PR113572]

2024-01-25 Thread Richard Sandiford
When generalising vector_cst_all_same, I'd forgotten to update
VECTOR_CST_ENCODED_ELT to VECTOR_CST_ELT.  The check deliberately
looks at implicitly encoded elements in some cases.

Tested on aarch64-linux-gnu & pushed.

Richard


gcc/
PR target/113572
* config/aarch64/aarch64-sve-builtins.cc (vector_cst_all_same):
Check VECTOR_CST_ELT instead of VECTOR_CST_ENCODED_ELT

gcc/testsuite/
PR target/113572
* gcc.target/aarch64/sve/pr113572.c: New test.
---
 gcc/config/aarch64/aarch64-sve-builtins.cc  |  2 +-
 gcc/testsuite/gcc.target/aarch64/sve/pr113572.c | 12 
 2 files changed, 13 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/pr113572.c

diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc 
b/gcc/config/aarch64/aarch64-sve-builtins.cc
index c2f1486315f..11f5c5c500c 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins.cc
@@ -3474,7 +3474,7 @@ vector_cst_all_same (tree v, unsigned int step)
   unsigned int nelts = lcm * VECTOR_CST_NELTS_PER_PATTERN (v);
   tree first_el = VECTOR_CST_ENCODED_ELT (v, 0);
   for (unsigned int i = 0; i < nelts; i += step)
-if (!operand_equal_p (VECTOR_CST_ENCODED_ELT (v, i), first_el, 0))
+if (!operand_equal_p (VECTOR_CST_ELT (v, i), first_el, 0))
   return false;
 
   return true;
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr113572.c 
b/gcc/testsuite/gcc.target/aarch64/sve/pr113572.c
new file mode 100644
index 000..a5e6b7e9907
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pr113572.c
@@ -0,0 +1,12 @@
+#include 
+
+uint64_t u;
+struct S { int i; } he;
+
+void
+foo ()
+{
+  svuint64_t vld_clz = svld1_u64 (svwhilelt_b64 (0, 4), (uint64_t *) &he);
+  vld_clz = svclz_u64_z (svwhilelt_b64 (0, 1), vld_clz);
+  svst1_u64 (svwhilelt_b64 (0, 1), &u, vld_clz);
+}
-- 
2.25.1



Re: [PATCH] aarch64: Fix movv8di for overlapping register and memory load [PR113550]

2024-01-25 Thread Richard Sandiford
Andrew Pinski  writes:
> The split for movv8di is not ready to handle the case where the setting
> register overlaps with the address of the memory that is being load.
> Fixing the split than just making the output constraint as an early clobber
> for this alternative. The split would first need to figure out which register
> is overlapping with the address and then only emit that move last.

I was curious how strained that detection would be in practice, and in
the end it didn't seem too bad.  I pushed the following variant after
testing on aarch64-linux-gnu.

Thanks,
Richard


The LS64 movv8di pattern didn't handle loads that overlapped with
the address register (unless the overlap happened to be in the
last subload).

gcc/
PR target/113550
* config/aarch64/aarch64-simd.md: In the movv8di splitter, check
whether each split instruction is a load that clobbers the source
address.  Emit that instruction last if so.

gcc/testsuite/
PR target/113550
* gcc.target/aarch64/pr113550.c: New test.
---
 gcc/config/aarch64/aarch64-simd.md  | 18 ++--
 gcc/testsuite/gcc.target/aarch64/pr113550.c | 48 +
 2 files changed, 62 insertions(+), 4 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/pr113550.c

diff --git a/gcc/config/aarch64/aarch64-simd.md 
b/gcc/config/aarch64/aarch64-simd.md
index 48f0741e7d0..f036f6ce997 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -8221,11 +8221,21 @@ (define_split
   || (memory_operand (operands[0], V8DImode)
   && register_operand (operands[1], V8DImode)))
 {
+  std::pair last_pair = {};
   for (int offset = 0; offset < 64; offset += 16)
-   emit_move_insn (simplify_gen_subreg (TImode, operands[0],
-V8DImode, offset),
-   simplify_gen_subreg (TImode, operands[1],
-V8DImode, offset));
+{
+ std::pair pair = {
+   simplify_gen_subreg (TImode, operands[0], V8DImode, offset),
+   simplify_gen_subreg (TImode, operands[1], V8DImode, offset)
+ };
+ if (register_operand (pair.first, TImode)
+ && reg_overlap_mentioned_p (pair.first, pair.second))
+   last_pair = pair;
+ else
+   emit_move_insn (pair.first, pair.second);
+}
+  if (last_pair.first)
+   emit_move_insn (last_pair.first, last_pair.second);
   DONE;
 }
   else
diff --git a/gcc/testsuite/gcc.target/aarch64/pr113550.c 
b/gcc/testsuite/gcc.target/aarch64/pr113550.c
new file mode 100644
index 000..0ff3c7b5c00
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr113550.c
@@ -0,0 +1,48 @@
+/* { dg-options "-O" } */
+/* { dg-do run } */
+
+#pragma GCC push_options
+#pragma GCC target "+ls64"
+#pragma GCC aarch64 "arm_acle.h"
+#pragma GCC pop_options
+
+#define DEF_FUNCTION(NAME, ARGS)   \
+  __attribute__((noipa))   \
+  __arm_data512_t  \
+  NAME ARGS\
+  {\
+return *ptr;   \
+  }
+
+DEF_FUNCTION (f0, (__arm_data512_t *ptr))
+DEF_FUNCTION (f1, (int x0, __arm_data512_t *ptr))
+DEF_FUNCTION (f2, (int x0, int x1, __arm_data512_t *ptr))
+DEF_FUNCTION (f3, (int x0, int x1, int x2, __arm_data512_t *ptr))
+DEF_FUNCTION (f4, (int x0, int x1, int x2, int x3, __arm_data512_t *ptr))
+DEF_FUNCTION (f5, (int x0, int x1, int x2, int x3, int x4,
+  __arm_data512_t *ptr))
+DEF_FUNCTION (f6, (int x0, int x1, int x2, int x3, int x4, int x5,
+  __arm_data512_t *ptr))
+DEF_FUNCTION (f7, (int x0, int x1, int x2, int x3, int x4, int x5, int x6,
+  __arm_data512_t *ptr))
+
+int
+main (void)
+{
+  __arm_data512_t x = { 0, 10, 20, 30, 40, 50, 60, 70 };
+  __arm_data512_t res[8] =
+  {
+f0 (&x),
+f1 (0, &x),
+f2 (0, 1, &x),
+f3 (0, 1, 2, &x),
+f4 (0, 1, 2, 3, &x),
+f5 (0, 1, 2, 3, 4, &x),
+f6 (0, 1, 2, 3, 4, 5, &x),
+f7 (0, 1, 2, 3, 4, 5, 6, &x)
+  };
+  for (int i = 0; i < 8; ++i)
+if (__builtin_memcmp (&x, &res[i], sizeof (x)) != 0)
+  __builtin_abort ();
+  return 0;
+}
-- 
2.25.1



[pushed] aarch64: Avoid paradoxical subregs in UXTL split [PR113485]

2024-01-25 Thread Richard Sandiford
g:74e3e839ab2d36841320 handled the UXTL{,2}-ZIP[12] optimisation
in split1.  The UXTL input is a 64-bit vector of N-bit elements
and the result is a 128-bit vector of 2N-bit elements.  The
corresponding ZIP1 operates on 128-bit vectors of N-bit elements.

This meant that the ZIP1 input had to be a 128-bit paradoxical subreg
of the 64-bit UXTL input.  In the PRs, it wasn't possible to generate
this subreg because the inputs were already subregs of a x[234]
structure of 64-bit vectors.

I don't think the same thing can happen for UXTL2->ZIP2 because
UXTL2 input is a 128-bit vector rather than a 64-bit vector.

It isn't really necessary for ZIP1 to take 128-bit inputs,
since the upper 64 bits are ignored.  This patch therefore adds
a pattern for 64-bit → 128-bit ZIP1s.

In principle, we should probably use this form for all ZIP1s.
But in practice, that creates an awkward special case, and
would be quite invasive for stage 4.

Tested on aarch64-linux-gnu & pushed.

Richard


gcc/
PR target/113485
* config/aarch64/aarch64-simd.md (aarch64_zip1_low): New
pattern.
(2): Use it instead of generating a
paradoxical subreg for the input.

gcc/testsuite/
PR target/113485
* gcc.target/aarch64/pr113485.c: New test.
* gcc.target/aarch64/pr113573.c: Likewise.
---
 gcc/config/aarch64/aarch64-simd.md  | 17 +++--
 gcc/testsuite/gcc.target/aarch64/pr113485.c | 25 +
 gcc/testsuite/gcc.target/aarch64/pr113573.c | 40 +
 3 files changed, 79 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/pr113485.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/pr113573.c

diff --git a/gcc/config/aarch64/aarch64-simd.md 
b/gcc/config/aarch64/aarch64-simd.md
index 556d0cf359f..48f0741e7d0 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -8505,6 +8505,18 @@ (define_insn 
"aarch64_"
   [(set_attr "type" "neon_permute")]
 )
 
+;; ZIP1 ignores the contents of the upper halves of the registers,
+;; so we can describe 128-bit operations in terms of 64-bit inputs.
+(define_insn "aarch64_zip1_low"
+  [(set (match_operand:VQ 0 "register_operand" "=w")
+   (unspec:VQ [(match_operand: 1 "register_operand" "w")
+   (match_operand: 2 "register_operand" "w")]
+  UNSPEC_ZIP1))]
+  "TARGET_SIMD"
+  "zip1\t%0., %1., %2."
+  [(set_attr "type" "neon_permute_q")]
+)
+
 ;; This instruction's pattern is generated directly by
 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
 ;; need corresponding changes there.  Note that the immediate (third)
@@ -9685,9 +9697,8 @@ (define_insn_and_split "2"
not sufficient uses of the zero to make the split worthwhile.  */
 rtx res = simplify_gen_subreg (mode, operands[0],
   mode, 0);
-rtx zero = aarch64_gen_shareable_zero (mode);
-rtx op = lowpart_subreg (mode, operands[1], mode);
-emit_insn (gen_aarch64_zip1 (res, op, zero));
+rtx zero = aarch64_gen_shareable_zero (mode);
+emit_insn (gen_aarch64_zip1_low (res, operands[1], zero));
 DONE;
   }
   [(set_attr "type" "neon_shift_imm_long")]
diff --git a/gcc/testsuite/gcc.target/aarch64/pr113485.c 
b/gcc/testsuite/gcc.target/aarch64/pr113485.c
new file mode 100644
index 000..c7028245b61
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr113485.c
@@ -0,0 +1,25 @@
+/* { dg-options "-O" } */
+
+#include 
+
+void test()
+{
+  while (1)
+  {
+static const uint16_t jsimd_rgb_ycc_neon_consts[] = {19595, 0, 0, 0, 0, 0, 
0, 0};
+uint16x8_t consts = vld1q_u16(jsimd_rgb_ycc_neon_consts);
+
+uint8_t tmp_buf[0];
+uint8x8x3_t input_pixels = vld3_u8(tmp_buf);
+uint16x8_t r = vmovl_u8(input_pixels.val[1]);
+uint32x4_t y_l = vmull_laneq_u16(vget_low_u16(r), consts, 0);
+
+uint32x4_t s = vdupq_n_u32(1);
+uint16x4_t a = vrshrn_n_u32(s, 16);
+uint16x4_t y = vrshrn_n_u32(y_l, 16);
+uint16x8_t ay = vcombine_u16(a, y);
+
+unsigned char ***out_buf;
+vst1_u8(out_buf[1][0], vmovn_u16(ay));
+  }
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/pr113573.c 
b/gcc/testsuite/gcc.target/aarch64/pr113573.c
new file mode 100644
index 000..a8e445c6e19
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr113573.c
@@ -0,0 +1,40 @@
+/* { dg-options "-O2" } */
+
+#pragma GCC aarch64 "arm_neon.h"
+typedef __Uint8x8_t uint8x8_t;
+typedef __Uint16x4_t uint16x4_t;
+typedef __Int16x8_t int16x8_t;
+typedef __Uint16x8_t uint16x8_t;
+int jsimd_extbgrx_ycc_convert_neon_image_width,
+jsimd_extbgrx_ycc_convert_neon___trans_tmp_1;
+uint16x4_t jsimd_extbgrx_ycc_convert_neon___trans_tmp_2;
+uint16x8_t vcombine_u16();
+uint16x8_t vmovl_u8(uint8x8_t __a) {
+  return __builtin_aarch64_uxtlv8hi_uu(__a);
+}
+__inline int __attribute__((__gnu_inline__)) vmull_laneq_u16();
+uint8x8x4_t vld4_u8();
+void jsimd_extbgrx_ycc_convert_neon() {
+  int scaled_128_5 = jsimd

Re: [PATCH] Make gcc.target/arm/bics_3.c testcase a bit more generic [PR113542]

2024-01-25 Thread Richard Earnshaw (lists)
On 25/01/2024 10:29, Maxim Kuvyrkov wrote:
> After fwprop improvement in r14-8319-g86de9b66480, codegen in
> bics_3.c test changed from "bics" to "bic" instruction, with
> the overall instruction stream remaining at the same quality.
> 
> This patch makes the scan-assembler directive accept both
> "bics" and "bic".
> 
> BEFORE r14-8319-g86de9b66480:
>   bicsr0, r0, r1 @ 9  [c=4 l=4]  *andsi_notsi_si_compare0_scratch
>   mov r0, #1  @ 23[c=4 l=4]  *thumb2_movsi_vfp/1
>   it  eq
>   moveq   r0, #0  @ 26[c=8 l=4]  *p *thumb2_movsi_vfp/2
>   bx  lr  @ 29[c=8 l=4]  *thumb2_return
> 
> AFTER r14-8319-g86de9b66480:
>   bic r0, r0, r1  @ 8 [c=4 l=4]  andsi_notsi_si
>   subsr0, r0, #0  @ 22[c=4 l=4]  cmpsi2_addneg/0
>   it  ne
>   movne   r0, #1  @ 23[c=8 l=4]  *p *thumb2_movsi_vfp/2
>   bx  lr  @ 26[c=8 l=4]  *thumb2_return
> 
> gcc/testsuite/ChangeLog:
> 
>   PR target/113542
>   * gcc.target/arm/bics_3.c: Update scan-assembler directive.
> ---
>  gcc/testsuite/gcc.target/arm/bics_3.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/gcc/testsuite/gcc.target/arm/bics_3.c 
> b/gcc/testsuite/gcc.target/arm/bics_3.c
> index e056b264e15..c5bed3c92d2 100644
> --- a/gcc/testsuite/gcc.target/arm/bics_3.c
> +++ b/gcc/testsuite/gcc.target/arm/bics_3.c
> @@ -35,6 +35,6 @@ main (void)
>return 0;
>  }
>  
> -/* { dg-final { scan-assembler-times "bics\tr\[0-9\]+, r\[0-9\]+, r\[0-9\]+" 
> 2 } } */
> -/* { dg-final { scan-assembler-times "bics\tr\[0-9\]+, r\[0-9\]+, r\[0-9\]+, 
> .sl #2" 1 } } */
> +/* { dg-final { scan-assembler-times "bics?\tr\[0-9\]+, r\[0-9\]+, 
> r\[0-9\]+" 2 } } */
> +/* { dg-final { scan-assembler-times "bics?\tr\[0-9\]+, r\[0-9\]+, 
> r\[0-9\]+, .sl #2" 1 } } */
>  


The test was added (r6-823-g0454e698401a3e) specifically to check that a BICS 
instruction was being generated.  Whether or not that is right is somewhat 
debatable, but this change seems to be papering over a different issue.

Either we should generate BICS, making this change incorrect, or we should 
disable the test for thumb code on the basis that this isn't really a win.

But really, we should fix the compiler to do better here.  We really want 
something like

BICS  r0, r0, r1  // r0 is 0 or non-zero
MOVNE r0, #1  // convert all non-zero to 1

in Arm state (ie using the BICS instruction to set the result to zero); and in 
thumb2, perhaps something like:

BICS  r0, r0, r1
ITne
MOVNE r0, #1

or maybe even better:

BIC  r0, r0, r1
SUBS r1, r0, #1
SBC  r0, r0, r1

which is slightly better than BICS because SUBS breaks a condition-code chain 
(all the flag bits are set).

There are similar quality issues for other NE(arith-op, 0) cases; we just don't 
have tests for those.

R.


Re: [PATCH] tree-optimization/113576 - non-empty latch and may_be_zero vectorization

2024-01-25 Thread Maxim Kuvyrkov
> On Jan 24, 2024, at 18:40, Richard Biener  wrote:
> 
> We can't support niters with may_be_zero when we end up with a
> non-empty latch due to early exit peeling.  At least not in
> the simplistic way the vectorizer handles this now.  Disallow
> it again for exits that are not the last one.
> 
> Bootstrap and regtest running on x86_64-unknown-linux-gnu.
> 
> PR tree-optimization/113576
> * tree-vect-loop.cc (vec_init_loop_exit_info): Only allow
> exits with may_be_zero niters when its the last one.
> 
> * gcc.dg/vect/pr113576.c: New testcase.
> ---
> gcc/testsuite/gcc.dg/vect/pr113576.c | 157 +++
> gcc/tree-vect-loop.cc|   9 +-
> 2 files changed, 164 insertions(+), 2 deletions(-)
> create mode 100644 gcc/testsuite/gcc.dg/vect/pr113576.c
> 
> diff --git a/gcc/testsuite/gcc.dg/vect/pr113576.c 
> b/gcc/testsuite/gcc.dg/vect/pr113576.c
> new file mode 100644
> index 000..da5ddb09e33
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/pr113576.c
> @@ -0,0 +1,157 @@
> +/* { dg-do run } */
> +/* { dg-options "-O3" } */
> +/* { dg-additional-options "-march=skylake-avx512" } */

Please adjust the testcase; this fails on non-x86_64 targets, see [1] and [2].

[1] 
https://patchwork.sourceware.org/project/gcc/patch/20240124144159.51c503858...@sourceware.org/
[2] 
https://ci.linaro.org/job/tcwg_gcc_check--master-aarch64-precommit/4765/artifact/artifacts/artifacts.precommit/00-sumfiles/gcc.log.1.xz

Thanks!

--
Maxim Kuvyrkov
https://www.linaro.org




[PATCH] Make gcc.target/arm/bics_3.c testcase a bit more generic [PR113542]

2024-01-25 Thread Maxim Kuvyrkov
After fwprop improvement in r14-8319-g86de9b66480, codegen in
bics_3.c test changed from "bics" to "bic" instruction, with
the overall instruction stream remaining at the same quality.

This patch makes the scan-assembler directive accept both
"bics" and "bic".

BEFORE r14-8319-g86de9b66480:
bicsr0, r0, r1 @ 9  [c=4 l=4]  *andsi_notsi_si_compare0_scratch
mov r0, #1  @ 23[c=4 l=4]  *thumb2_movsi_vfp/1
it  eq
moveq   r0, #0  @ 26[c=8 l=4]  *p *thumb2_movsi_vfp/2
bx  lr  @ 29[c=8 l=4]  *thumb2_return

AFTER r14-8319-g86de9b66480:
bic r0, r0, r1  @ 8 [c=4 l=4]  andsi_notsi_si
subsr0, r0, #0  @ 22[c=4 l=4]  cmpsi2_addneg/0
it  ne
movne   r0, #1  @ 23[c=8 l=4]  *p *thumb2_movsi_vfp/2
bx  lr  @ 26[c=8 l=4]  *thumb2_return

gcc/testsuite/ChangeLog:

PR target/113542
* gcc.target/arm/bics_3.c: Update scan-assembler directive.
---
 gcc/testsuite/gcc.target/arm/bics_3.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/testsuite/gcc.target/arm/bics_3.c 
b/gcc/testsuite/gcc.target/arm/bics_3.c
index e056b264e15..c5bed3c92d2 100644
--- a/gcc/testsuite/gcc.target/arm/bics_3.c
+++ b/gcc/testsuite/gcc.target/arm/bics_3.c
@@ -35,6 +35,6 @@ main (void)
   return 0;
 }
 
-/* { dg-final { scan-assembler-times "bics\tr\[0-9\]+, r\[0-9\]+, r\[0-9\]+" 2 
} } */
-/* { dg-final { scan-assembler-times "bics\tr\[0-9\]+, r\[0-9\]+, r\[0-9\]+, 
.sl #2" 1 } } */
+/* { dg-final { scan-assembler-times "bics?\tr\[0-9\]+, r\[0-9\]+, r\[0-9\]+" 
2 } } */
+/* { dg-final { scan-assembler-times "bics?\tr\[0-9\]+, r\[0-9\]+, r\[0-9\]+, 
.sl #2" 1 } } */
 
-- 
2.34.1



Re: [patch] gcn/mkoffload.cc: Fix SRAM_ECC and XNACK handling [PR111966]

2024-01-25 Thread Andrew Stubbs

On 24/01/2024 22:12, Tobias Burnus wrote:

This patch fixes "-g" debug compilation for gfx1100 and gfx1030,
which fail to link when "-g" is specified. The reason is:

When using gfx1100 and compiling with '-g' I was running into an error
because the eflags used for the debugger file has additional eflags
(elf flags) set - contrary to the compiled files; mkoffload writes files
itself, hence, it also needs to get the elf flags right.

It turned out that the ASM_SPEC handling was insufficiently replicated
in mkoffload, leading to issues with gfx1100 and gfx1030. I think in
some corner case, gfx906 also behaved differently; for gfx900 and fiji,
the eflags were different before, but got reset inside
copy_early_debug_info such that those difference did not matter.

OK for mainline?


I've got so confused trying to figure out this stuff and how it works 
with different LLVM, different defaults, different devices.


I think this patch is fine, but we should wait until we can test it on 
all those devices.


Andrew


Tobias

PS: I tried hard to look at the ASM_SPEC and played with different
options, looking at what really got passed to the assembler, but I
might have missed something as the code is somewhat confusing. Naming
wise, there is both UNSUPPORTED and UNSET for the same thing; it should
be a tad more consistent (flag = UNSUPPORTED, SET/TEST functions: UNSET),
still, one could also argue that a single name would do.


Sometimes not passing the -mattr flag gives "any", and sometimes 
"unsupported", and sometimes leaves the flag unset. I think it's changed 
over time as well, but mkoffload has to match precisely or it won't link. :(



PPS: I think the PR is about other things in addition, but it also
kind of covers this "-g" issue and the one of previous commit. Even
if not directly addressing the issue, it is related and having the
commits listed there makes IMHO sense.


Re: [PATCH] jit, Darwin: Implement library exports list.

2024-01-25 Thread Iain Sandoe
Hi David,

> On 24 Jan 2024, at 18:31, David Malcolm  wrote:
> 
> On Tue, 2024-01-16 at 11:10 +, Iain Sandoe wrote:
>> Tested on x86_64, i686 Darwin and x86_64 Linux,
>> OK for trunk? when ?
>> thanks,
>> Iain
> 
> Hi Iain, thanks for the patch.
> 
> I'll have to defer to your Darwin expertise here; given that you've
> tested it on the above configurations I'll assume it's correct, but...
> 
>> 
>> --- 8< ---
>> 
>> Currently, we have no exports list for libgccjit, which means that
>> all symbols are exported, including those from libstdc++ which is
>> linked statically into the lib.  This causes failures when the
>> shared libstdc++ is used but some c++ symbols are satisfied from
>> libgccjit.
>> 
>> This implements an export file for Darwin (which is currently
>> manually created by cross-checking libgccjit.map).
> 
> ...I'm a little nervous about this; Antoyo has a number of out-of-tree
> patches we're working towards merging, and almost all of these touch
> libgccjit.map.
> 
> 
>>   Ideally we'd
>> script this, at some point.  
> 
> Yes.  How about a Python 3 script (inside "contrib", or in "gcc/jit")
> that would do that.  

I’m not sure we want to make a build dependency on Python 3.. 
the reason I say ‘build’ is ...

> Then whenever a patch touches libgccjit.map we'd
> run that script to regenerate libgccjit.exp in the source tree.  I can
> have a go at writing it, if you think that's the best way to go.

… there are two other places in the current sources where ld map files
are converted to Darwin (and Solaris) symbol export files [libgcc, libstdc++].

In these cases, the export file is created on-the-fly at build time by scripts
(IIRC a mixture of awk, sh, perl).

This requires more surgery to the Make stuff and that we have a suitable
script - but it does mean that we do not need to commit the Darwin (and
potentially Solaris) versions to the source tree.

I’m actually happy with either solution - since we do not expect this to
be a daily occurance, we could have a maintainter’s python3 script to
update a committed export file or we could try the mechanism used in
the other two places (but then the script would need to use awk/sh/perl
to avoid new build deps).

> I take it .exp is the standard extension for these exports file in the
> Darwin world.  If so, it's a shame (but unavoidable) that it clashes
> with the existing uses of .exp in our source tree for our
> expect/Tcl/DejaGnu sources.

I suspect the linker will accept other extensions, although ‘exp’ is a
convention used elsewhere, it is unfortunate that it clashes indeed.
 - let me try an alternate (e.g. .export) and report back.

> I think the patch as-is is OK for trunk now, assuming that you've
> tested it as above.

I’m going to hold off on this for now (but do want some solution before
14 branches, because there are quite a few new fails from it).

Iain


> 
> Dave
> 
> 
>> Update libtool current and age to
>> reflect the current ABI version (we are not bumping the SO name
>> at this stage).
>> 
>> This fixes a number of new failures in jit testing.
>> 
>> gcc/jit/ChangeLog:
>> 
>> * Make-lang.in: Implement exports list, and use a shared
>> libgcc.
>> * libgccjit.exp: New file.
>> 
>> Signed-off-by: Iain Sandoe 
>> ---
>>  gcc/jit/Make-lang.in  |  38 ---
>>  gcc/jit/libgccjit.exp | 229
>> ++
>>  2 files changed, 251 insertions(+), 16 deletions(-)
>>  create mode 100644 gcc/jit/libgccjit.exp
>> 
>> diff --git a/gcc/jit/Make-lang.in b/gcc/jit/Make-lang.in
>> index b1f0ce73e12..52dc2c24908 100644
>> --- a/gcc/jit/Make-lang.in
>> +++ b/gcc/jit/Make-lang.in
>> @@ -55,7 +55,10 @@ else
>>  
>>  ifneq (,$(findstring darwin,$(host)))
>>  
>> -LIBGCCJIT_AGE = 1
>> +LIBGCCJIT_CURRENT = 26
>> +LIBGCCJIT_REVISION = 0
>> +LIBGCCJIT_AGE = 26
>> +LIBGCCJIT_COMPAT = 0
>>  LIBGCCJIT_BASENAME = libgccjit
>>  
>>  LIBGCCJIT_SONAME = \
>> @@ -63,15 +66,15 @@ LIBGCCJIT_SONAME = \
>>  LIBGCCJIT_FILENAME =
>> $(LIBGCCJIT_BASENAME).$(LIBGCCJIT_VERSION_NUM).dylib
>>  LIBGCCJIT_LINKER_NAME = $(LIBGCCJIT_BASENAME).dylib
>>  
>> -# Conditionalize the use of the LD_VERSION_SCRIPT_OPTION and
>> -# LD_SONAME_OPTION depending if configure found them, using $(if)
>> -# We have to define a COMMA here, otherwise the commas in the "true"
>> -# result are treated as separators by the $(if).
>> -COMMA := ,
>> +# Darwin does not have a version script option. Exported symbols are
>> controlled
>> +# by the following, and library versioning is done using libtool.
>>  LIBGCCJIT_VERSION_SCRIPT_OPTION = \
>> -   $(if $(LD_VERSION_SCRIPT_OPTION),\
>> - -
>> Wl$(COMMA)$(LD_VERSION_SCRIPT_OPTION)$(COMMA)$(srcdir)/jit/libgccjit.
>> map)
>> +  -Wl,-exported_symbols_list,$(srcdir)/jit/libgccjit.exp
>>  
>> +# Conditionalize the use of  LD_SONAME_OPTION on configure finding
>> it, using
>> +# $(if).  We have to define a COMMA here, otherwise the commas in
>> the "true"
>> +# result are treated as separat

Re: Repost [PATCH 3/6] PowerPC: Add support for accumulators in DMR registers.

2024-01-25 Thread Kewen.Lin
Hi Mike,

on 2024/1/6 07:38, Michael Meissner wrote:
> The MMA subsystem added the notion of accumulator registers as an optional
> feature of ISA 3.1 (power10).  In ISA 3.1, these accumulators overlapped with
> the traditional floating point registers 0..31, but logically the accumulator
> registers were separate from the FPR registers.  In ISA 3.1, it was 
> anticipated

Using VSX register 0..31 rather than traditional floating point registers 0..31
seems more clear, since floating point registers imply 64 bit long registers.

> that in future systems, the accumulator registers may no overlap with the FPR
> registers.  This patch adds the support for dense math registers as separate
> registers.
> 
> This particular patch does not change the MMA support to use the accumulators
> within the dense math registers.  This patch just adds the basic support for
> having separate DMRs.  The next patch will switch the MMA support to use the
> accumulators if -mcpu=future is used.
> 
> For testing purposes, I added an undocumented option '-mdense-math' to enable
> or disable the dense math support.

Can we avoid this and use one macro for it instead?  As you might have noticed
that some previous temporary options like -mpower{8,9}-vector cause ICEs due to
some unexpected combination and we are going to neuter them, so let's try our
best to avoid it if possible.  I guess one macro TARGET_DENSE_MATH defined by
TARGET_FUTURE && TARGET_MMA matches all use places? and specifying -mcpu=future
can enable it while -mcpu=power10 can disable it.

> 
> This patch adds a new constraint (wD).  If MMA is selected but dense math is
> not selected (i.e. -mcpu=power10), the wD constraint will allow access to
> accumulators that overlap with the VSX vector registers 0..31.  If both MMA 
> and

Sorry for nitpicking, it's more accurate with "VSX registers 0..31".

> dense math are selected (i.e. -mcpu=future), the wD constraint will only allow
> dense math registers.
> 
> This patch modifies the existing %A output modifier.  If MMA is selected but
> dense math is not selected, then %A output modifier converts the VSX register
> number to the accumulator number, by dividing it by 4.  If both MMA and dense
> math are selected, then %A will map the separate DMR registers into 0..7.
> 
> The intention is that user code using extended asm can be modified to run on
> both MMA without dense math and MMA with dense math:
> 
> 1)If possible, don't use extended asm, but instead use the MMA 
> built-in
>   functions;
> 
> 2)If you do need to write extended asm, change the d constraints
>   targetting accumulators should now use wD;
> 
> 3)Only use the built-in zero, assemble and disassemble functions 
> create
>   move data between vector quad types and dense math accumulators.
>   I.e. do not use the xxmfacc, xxmtacc, and xxsetaccz directly in the
>   extended asm code.  The reason is these instructions assume there is a
>   1-to-1 correspondence between 4 adjacent FPR registers and an
>   accumulator that overlaps with those instructions.  With accumulators
>   now being separate registers, there no longer is a 1-to-1
>   correspondence.
> 
> It is possible that the mangling for DMRs and the GDB register numbers may
> change in the future.
> 
> 2024-01-05   Michael Meissner  
> 
> gcc/
> 
>   * config/rs6000/constraints.md (wD constraint): New constraint.
>   * config/rs6000/mma.md (UNSPEC_DM_ASSEMBLE_ACC): New unspec.
>   (movxo): Convert into define_expand.
>   (movxo_vsx): Version of movxo where accumulators overlap with VSX vector
>   registers 0..31.
>   (movxo_dm): Verson of movxo that supports separate dense math
>   accumulators.
>   (mma_assemble_acc): Add dense math support to define_expand.
>   (mma_assemble_acc_vsx): Rename from mma_assemble_acc, and restrict it to
>   non dense math systems.
>   (mma_assemble_acc_dm): Dense math version of mma_assemble_acc.
>   (mma_disassemble_acc): Add dense math support to define_expand.
>   (mma_disassemble_acc_vsx): Rename from mma_disassemble_acc, and restrict
>   it to non dense math systems.
>   (mma_disassemble_acc_dm): Dense math version of mma_disassemble_acc.
>   * config/rs6000/predicates.md (dmr_operand): New predicate.
>   (accumulator_operand): Likewise.
>   * config/rs6000/rs6000-cpus.def (ISA_FUTURE_MASKS): Add -mdense-math.
>   (POWERPC_MASKS): Likewise.
>   * config/rs6000/rs6000.cc (enum rs6000_reg_type): Add DMR_REG_TYPE.
>   (enum rs6000_reload_reg_type): Add RELOAD_REG_DMR.
>   (LAST_RELOAD_REG_CLASS): Add support for DMR registers and the wD
>   constraint.
>   (reload_reg_map): Likewise.
>   (rs6000_reg_names): Likewise.
>   (alt_reg_names): Likewise.
>   (rs6000_hard_regno_nregs_internal): Likewise.
>   (rs6000_hard_regno_mode_ok_uncached): Likewise.
>   (rs6000_debug_reg_global): Like

Re: [middle-end PATCH] Prefer PLUS over IOR in RTL expansion of multi-word shifts/rotates.

2024-01-25 Thread Richard Biener
On Wed, Jan 24, 2024 at 4:50 PM Georg-Johann Lay  wrote:
>
>
>
> Am 22.01.24 um 08:45 schrieb Richard Biener:
> > On Fri, Jan 19, 2024 at 5:06 PM Georg-Johann Lay  wrote:
> >>
> >>
> >>
> >> Am 18.01.24 um 20:54 schrieb Roger Sayle:
> >>>
> >>> This patch tweaks RTL expansion of multi-word shifts and rotates to use
> >>> PLUS rather than IOR for disjunctive operations.  During expansion of
> >>> these operations, the middle-end creates RTL like (X<>C2)
> >>> where the constants C1 and C2 guarantee that bits don't overlap.
> >>> Hence the IOR can be performed by any any_or_plus operation, such as
> >>> IOR, XOR or PLUS; for word-size operations where carry chains aren't
> >>> an issue these should all be equally fast (single-cycle) instructions.
> >>> The benefit of this change is that targets with shift-and-add insns,
> >>> like x86's lea, can benefit from the LSHIFT-ADD form.
> >>>
> >>> An example of a backend that benefits is ARC, which is demonstrated
> >>> by these two simple functions:
> >>
> >> But there are also back-ends where this is bad.
> >>
> >> The reason is that with ORI, the back-end needs only to operate no
> >> these sub-words where the sub-mask is non-zero.  But for PLUS this
> >> is not the case because the back-end does not know that intermediate
> >> carry will be zero.  Hence, with PLUS, more instructions are needed.
> >> An example is AVR, but maybe much more target with multi-word operations
> >> are affected in a bad way.
> >>
> >> Take for example the case with 2 words and a value of 1.
> >>
> >> LO |= 1
> >> HI |= 0
> >>
> >> can be optimized to
> >>
> >> LO |= 1
> >>
> >> but for addition this is not the case:
> >>
> >> LO += 1
> >> HI +=c 0 ;; Does not know that always carry = 0.
> >
> > I wonder if the PLUS can be done on the lowpart only to make this
> > detail obvious?
>
> For AVR, word_mode is HImode, but the hardware has only 8-bit registers.
>
> Moreover splitting insns is not wanted or not possible (due to CCmode).

Btw, it would be nice to have test coverage on AVR for the cases we're
talking about (if there isn't already).  That makes sure we don't regress
with whatever solution we end up with.

Richard.

> Johann
>
> >>> unsigned long long foo(unsigned long long x) { return x<<2; }
> >>>
> >>> which with -O2 is currently compiled to:
> >>>
> >>> foo:lsr r2,r0,30
> >>>   asl_s   r1,r1,2
> >>>   asl_s   r0,r0,2
> >>>   j_s.d   [blink]
> >>>   or_sr1,r1,r2
> >>>
> >>> with this patch becomes:
> >>>
> >>> foo:lsr r2,r0,30
> >>>   add2r1,r2,r1
> >>>   j_s.d   [blink]
> >>>   asl_s   r0,r0,2
> >>>
> >>> unsigned long long bar(unsigned long long x) { return (x<<2)|(x>>62); }
> >>>
> >>> which with -O2 is currently compiled to 6 insns + return:
> >>>
> >>> bar:lsr r12,r0,30
> >>>   asl_s   r3,r1,2
> >>>   asl_s   r0,r0,2
> >>>   lsr_s   r1,r1,30
> >>>   or_sr0,r0,r1
> >>>   j_s.d   [blink]
> >>>   or  r1,r12,r3
> >>>
> >>> with this patch becomes 4 insns + return:
> >>>
> >>> bar:lsr r3,r1,30
> >>>   lsr r2,r0,30
> >>>   add2r1,r2,r1
> >>>   j_s.d   [blink]
> >>>   add2r0,r3,r0
> >>>
> >>>
> >>> This patch has been tested on x86_64-pc-linux-gnu with make bootstrap
> >>> and make -k check, both with and without --target_board=unix{-m32}
> >>> with no new failures.  Ok for mainline?
> >>>
> >>>
> >>> 2024-01-18  Roger Sayle  
> >>>
> >>> gcc/ChangeLog
> >>>   * expmed.cc (expand_shift_1): Use add_optab instead of ior_optab
> >>>   to generate PLUS instead or IOR when unioning disjoint 
> >>> bitfields.
> >>>   * optabs.cc (expand_subword_shift): Likewise.
> >>>   (expand_binop): Likewise for double-word rotate.
> >>>
> >>>
> >>> Thanks in advance,
> >>> Roger


Re: [PATCH] RISC-V: Add LCM delete block predecessors dump information

2024-01-25 Thread Kito Cheng
LGTM, it's always good to improve some debug for vsetvli insertion
pass, it's really non-trivial optimization.

On Thu, Jan 25, 2024 at 4:59 PM Juzhe-Zhong  wrote:
>
> While looking into PR113469, I notice the LCM delete a vsetvl incorrectly.
>
> This patch add dump information of all predecessors for LCM delete vsetvl 
> block
> for better debugging.
>
> Tested no regression.
>
> gcc/ChangeLog:
>
> * config/riscv/riscv-vsetvl.cc (get_all_predecessors): New function.
> (pre_vsetvl::pre_global_vsetvl_info): Add LCM delete block all 
> predecessors dump information.
>
> ---
>  gcc/config/riscv/riscv-vsetvl.cc | 42 
>  1 file changed, 42 insertions(+)
>
> diff --git a/gcc/config/riscv/riscv-vsetvl.cc 
> b/gcc/config/riscv/riscv-vsetvl.cc
> index 72c5a127d9e..da258b964fc 100644
> --- a/gcc/config/riscv/riscv-vsetvl.cc
> +++ b/gcc/config/riscv/riscv-vsetvl.cc
> @@ -654,6 +654,31 @@ invalid_opt_bb_p (basic_block cfg_bb)
>return false;
>  }
>
> +/* Get all predecessors of BB.  */
> +static hash_set
> +get_all_predecessors (basic_block bb)
> +{
> +  hash_set blocks;
> +  auto_vec work_list;
> +  hash_set visited_list;
> +  work_list.safe_push (bb);
> +
> +  while (!work_list.is_empty ())
> +{
> +  basic_block new_bb = work_list.pop ();
> +  visited_list.add (new_bb);
> +  edge e;
> +  edge_iterator ei;
> +  FOR_EACH_EDGE (e, ei, new_bb->preds)
> +   {
> + if (!visited_list.contains (e->src))
> +   work_list.safe_push (e->src);
> + blocks.add (e->src);
> +   }
> +}
> +  return blocks;
> +}
> +
>  /* This flags indicates the minimum demand of the vl and vtype values by the
> RVV instruction. For example, DEMAND_RATIO_P indicates that this RVV
> instruction only needs the SEW/LMUL ratio to remain the same, and does not
> @@ -3142,6 +3167,23 @@ pre_vsetvl::pre_global_vsetvl_info ()
>const vsetvl_block_info &block_info = get_block_info (info.get_bb ());
>gcc_assert (block_info.get_entry_info () == info);
>info.set_delete ();
> +  if (dump_file && (dump_flags & TDF_DETAILS))
> +   {
> + fprintf (dump_file,
> +  "\nLCM deleting vsetvl of block %d, it has predecessors: 
> \n",
> +  bb->index ());
> + hash_set all_preds
> +   = get_all_predecessors (bb->cfg_bb ());
> + int i = 0;
> + for (const auto pred : all_preds)
> +   {
> + fprintf (dump_file, "%d ", pred->index);
> + i++;
> + if (i % 32 == 0)
> +   fprintf (dump_file, "\n");
> +   }
> + fprintf (dump_file, "\n");
> +   }
>  }
>
>/* Remove vsetvl infos if all precessors are available to the block.  */
> --
> 2.36.3
>


Re: [PATCH] convert: Fix test for out of bounds shift count [PR113574]

2024-01-25 Thread Richard Biener
On Thu, 25 Jan 2024, Jakub Jelinek wrote:

> Hi!
> 
> The following patch is miscompiled, because convert_to_integer_1 for
> LSHIFT_EXPR tests if the INTEGER_CST shift count is too high, but
> incorrectly compares it against TYPE_SIZE rather than TYPE_PRECISION.
> The type in question is unsigned _BitInt(1), which has TYPE_PRECISION 1,
> TYPE_SIZE 8, and the shift count is 2 in that case.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

OK.

> 2024-01-25  Jakub Jelinek  
> 
>   PR middle-end/113574
>   * convert.cc (convert_to_integer_1) : Compare shift
>   count against TYPE_PRECISION rather than TYPE_SIZE.
> 
>   * gcc.dg/torture/bitint-52.c: New test.
> 
> --- gcc/convert.cc.jj 2024-01-03 11:51:24.0 +0100
> +++ gcc/convert.cc2024-01-24 17:29:56.328124611 +0100
> @@ -762,7 +762,8 @@ convert_to_integer_1 (tree type, tree ex
> {
>   /* If shift count is less than the width of the truncated type,
>  really shift.  */
> - if (tree_int_cst_lt (TREE_OPERAND (expr, 1), TYPE_SIZE (type)))
> + if (wi::to_widest (TREE_OPERAND (expr, 1))
> + < TYPE_PRECISION (type))
> /* In this case, shifting is like multiplication.  */
> goto trunc1;
>   else
> --- gcc/testsuite/gcc.dg/torture/bitint-52.c.jj   2024-01-24 
> 17:33:42.174986825 +0100
> +++ gcc/testsuite/gcc.dg/torture/bitint-52.c  2024-01-24 17:33:33.395108814 
> +0100
> @@ -0,0 +1,23 @@
> +/* PR middle-end/113574 */
> +/* { dg-do run { target bitint } } */
> +/* { dg-options "-std=c23 -pedantic-errors" } */
> +/* { dg-skip-if "" { ! run_expensive_tests }  { "*" } { "-O0" "-O2" } } */
> +/* { dg-skip-if "" { ! run_expensive_tests } { "-flto" } { "" } } */
> +
> +unsigned _BitInt(1) a;
> +unsigned _BitInt(8) b;
> +
> +void
> +foo (unsigned _BitInt(16) x)
> +{
> +  a += (x << 2) | b;
> +}
> +
> +int
> +main ()
> +{
> +  foo (0xfef1uwb);
> +  if (a)
> +__builtin_abort ();
> +  return 0;
> +}
> 
>   Jakub
> 
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)


[PATCH] RISC-V: Add LCM delete block predecessors dump information

2024-01-25 Thread Juzhe-Zhong
While looking into PR113469, I notice the LCM delete a vsetvl incorrectly.

This patch add dump information of all predecessors for LCM delete vsetvl block
for better debugging.

Tested no regression.

gcc/ChangeLog:

* config/riscv/riscv-vsetvl.cc (get_all_predecessors): New function.
(pre_vsetvl::pre_global_vsetvl_info): Add LCM delete block all 
predecessors dump information.

---
 gcc/config/riscv/riscv-vsetvl.cc | 42 
 1 file changed, 42 insertions(+)

diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc
index 72c5a127d9e..da258b964fc 100644
--- a/gcc/config/riscv/riscv-vsetvl.cc
+++ b/gcc/config/riscv/riscv-vsetvl.cc
@@ -654,6 +654,31 @@ invalid_opt_bb_p (basic_block cfg_bb)
   return false;
 }
 
+/* Get all predecessors of BB.  */
+static hash_set
+get_all_predecessors (basic_block bb)
+{
+  hash_set blocks;
+  auto_vec work_list;
+  hash_set visited_list;
+  work_list.safe_push (bb);
+
+  while (!work_list.is_empty ())
+{
+  basic_block new_bb = work_list.pop ();
+  visited_list.add (new_bb);
+  edge e;
+  edge_iterator ei;
+  FOR_EACH_EDGE (e, ei, new_bb->preds)
+   {
+ if (!visited_list.contains (e->src))
+   work_list.safe_push (e->src);
+ blocks.add (e->src);
+   }
+}
+  return blocks;
+}
+
 /* This flags indicates the minimum demand of the vl and vtype values by the
RVV instruction. For example, DEMAND_RATIO_P indicates that this RVV
instruction only needs the SEW/LMUL ratio to remain the same, and does not
@@ -3142,6 +3167,23 @@ pre_vsetvl::pre_global_vsetvl_info ()
   const vsetvl_block_info &block_info = get_block_info (info.get_bb ());
   gcc_assert (block_info.get_entry_info () == info);
   info.set_delete ();
+  if (dump_file && (dump_flags & TDF_DETAILS))
+   {
+ fprintf (dump_file,
+  "\nLCM deleting vsetvl of block %d, it has predecessors: \n",
+  bb->index ());
+ hash_set all_preds
+   = get_all_predecessors (bb->cfg_bb ());
+ int i = 0;
+ for (const auto pred : all_preds)
+   {
+ fprintf (dump_file, "%d ", pred->index);
+ i++;
+ if (i % 32 == 0)
+   fprintf (dump_file, "\n");
+   }
+ fprintf (dump_file, "\n");
+   }
 }
 
   /* Remove vsetvl infos if all precessors are available to the block.  */
-- 
2.36.3



Re: [PATCH v2 5/5] Add documentation for musttail attribute

2024-01-25 Thread Andi Kleen
On Thu, Jan 25, 2024 at 07:51:21AM +0100, rep.dot@gmail.com wrote:
> On 24 January 2024 20:30:45 CET, Andi Kleen  wrote:
> >---
> > gcc/doc/extend.texi | 16 
> > 1 file changed, 16 insertions(+)
> >
> >diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
> >index 0bc586d120e7..c68d32bed8de 100644
> >--- a/gcc/doc/extend.texi
> >+++ b/gcc/doc/extend.texi
> >@@ -9867,6 +9867,22 @@ foo (int x, int y)
> > @code{y} is not actually incremented and the compiler can but does not
> > have to optimize it to just @code{return 42 + 42;}.
> > 
> >+@cindex @code{musttail} statement attribute
> >+@item musttail
> >+
> >+The @code{gnu::musttail} or @code{clang::hottail} attribute
> 
> AFAICS this patchset does not handle hottail ?

Thanks. Fixed the typo.
-Andi


Re: [PATCH] Fix a few vect gimple testcases for LLP64 targets (e.g. mingw) [PR113548]

2024-01-25 Thread Richard Biener
On Thu, Jan 25, 2024 at 1:46 AM Andrew Pinski  wrote:
>
> This fixes of the vect testcases which uses the gimple FE for LLP64 targets.
> The testcases use directly `unsigned long` for the addition to pointers
> when they should be using `__SIZETYPE__`. This changes to use that instead.

OK

> gcc/testsuite/ChangeLog:
>
> PR testsuite/113548
> * gcc.dg/vect/slp-reduc-10a.c: Use `__SIZETYPE__` instead of 
> `unsigned long`.
> * gcc.dg/vect/slp-reduc-10b.c: Likewise.
> * gcc.dg/vect/slp-reduc-10c.c: Likewise.
> * gcc.dg/vect/slp-reduc-10d.c: Likewise.
> * gcc.dg/vect/slp-reduc-10e.c: Likewise.
> * gcc.dg/vect/vect-cond-arith-2.c: Likewise.
> * gcc.dg/vect/vect-ifcvt-19.c: Likewise.
>
> Signed-off-by: Andrew Pinski 
> ---
>  gcc/testsuite/gcc.dg/vect/slp-reduc-10a.c | 20 +--
>  gcc/testsuite/gcc.dg/vect/slp-reduc-10b.c | 20 +--
>  gcc/testsuite/gcc.dg/vect/slp-reduc-10c.c |  6 +++---
>  gcc/testsuite/gcc.dg/vect/slp-reduc-10d.c |  6 +++---
>  gcc/testsuite/gcc.dg/vect/slp-reduc-10e.c |  6 +++---
>  gcc/testsuite/gcc.dg/vect/vect-cond-arith-2.c |  8 
>  gcc/testsuite/gcc.dg/vect/vect-ifcvt-19.c |  8 
>  7 files changed, 37 insertions(+), 37 deletions(-)
>
> diff --git a/gcc/testsuite/gcc.dg/vect/slp-reduc-10a.c 
> b/gcc/testsuite/gcc.dg/vect/slp-reduc-10a.c
> index d3c2c2d7f54..66d3a6c4765 100644
> --- a/gcc/testsuite/gcc.dg/vect/slp-reduc-10a.c
> +++ b/gcc/testsuite/gcc.dg/vect/slp-reduc-10a.c
> @@ -8,8 +8,8 @@ foo (int * x, int n)
>int i;
>int sum;
>int _1;
> -  long unsigned int _2;
> -  long unsigned int _3;
> +  __SIZETYPE__ _2;
> +  __SIZETYPE__ _3;
>int * _4;
>int _5;
>__SIZETYPE__ _7;
> @@ -38,23 +38,23 @@ foo (int * x, int n)
>sum_30 = __PHI (__BB5: 0, __BB6: sum_27);
>i_32 = __PHI (__BB5: 0, __BB6: i_28);
>_1 = i_32 * 4;
> -  _2 = (long unsigned int) _1;
> -  _3 = _2 * 4ul;
> +  _2 = (__SIZETYPE__) _1;
> +  _3 = _2 * _Literal(__SIZETYPE__)4;
>_4 = x_23(D) + _3;
>_5 = __MEM  (_4);
>sum_24 = _5 + sum_30;
> -  _7 = _2 + 1ul;
> -  _8 = _7 * 4ul;
> +  _7 = _2 + _Literal(__SIZETYPE__)1;
> +  _8 = _7 * _Literal(__SIZETYPE__)4;
>_9 = x_23(D) + _8;
>_10 = __MEM  (_9);
>sum_25 = _10 + sum_24;
> -  _11 = _2 + 2ul;
> -  _12 = _11 * 4ul;
> +  _11 = _2 + _Literal(__SIZETYPE__)2;
> +  _12 = _11 * _Literal(__SIZETYPE__)4;
>_13 = x_23(D) + _12;
>_14 = __MEM  (_13);
>sum_26 = _14 + sum_25;
> -  _15 = _2 + 3ul;
> -  _16 = _15 * 4ul;
> +  _15 = _2 + _Literal(__SIZETYPE__)3;
> +  _16 = _15 * _Literal(__SIZETYPE__)4;
>_17 = x_23(D) + _16;
>_18 = __MEM  (_17);
>sum_27 = _18 + sum_26;
> diff --git a/gcc/testsuite/gcc.dg/vect/slp-reduc-10b.c 
> b/gcc/testsuite/gcc.dg/vect/slp-reduc-10b.c
> index 6a0d55def30..2e6d15e1c9d 100644
> --- a/gcc/testsuite/gcc.dg/vect/slp-reduc-10b.c
> +++ b/gcc/testsuite/gcc.dg/vect/slp-reduc-10b.c
> @@ -8,8 +8,8 @@ foo (int * x, int n)
>int i;
>int sum;
>int _1;
> -  long unsigned int _2;
> -  long unsigned int _3;
> +  __SIZETYPE__ _2;
> +  __SIZETYPE__ _3;
>int * _4;
>int _5;
>__SIZETYPE__ _7;
> @@ -38,23 +38,23 @@ foo (int * x, int n)
>sum_30 = __PHI (__BB5: 0, __BB6: sum_27);
>i_32 = __PHI (__BB5: 0, __BB6: i_28);
>_1 = i_32 * 4;
> -  _2 = (long unsigned int) _1;
> -  _3 = _2 * 4ul;
> +  _2 = (__SIZETYPE__) _1;
> +  _3 = _2 * _Literal(__SIZETYPE__)4;
>_4 = x_23(D) + _3;
>_5 = __MEM  (_4);
>sum_24 = _5 + sum_30;
> -  _7 = _2 + 1ul;
> -  _8 = _7 * 4ul;
> +  _7 = _2 + _Literal(__SIZETYPE__)1;
> +  _8 = _7 * _Literal(__SIZETYPE__)4;
>_9 = x_23(D) + _8;
>_100 = __MEM  (_9);
>sum_25 = sum_24 + _100;
> -  _11 = _2 + 2ul;
> -  _12 = _11 * 4ul;
> +  _11 = _2 + _Literal(__SIZETYPE__)2;
> +  _12 = _11 * _Literal(__SIZETYPE__)4;
>_13 = x_23(D) + _12;
>_14 = __MEM  (_13);
>sum_26 = _14 + sum_25;
> -  _15 = _2 + 3ul;
> -  _16 = _15 * 4ul;
> +  _15 = _2 + _Literal(__SIZETYPE__)3;
> +  _16 = _15 * _Literal(__SIZETYPE__)4;
>_17 = x_23(D) + _16;
>_18 = __MEM  (_17);
>sum_27 = _18 + sum_26;
> diff --git a/gcc/testsuite/gcc.dg/vect/slp-reduc-10c.c 
> b/gcc/testsuite/gcc.dg/vect/slp-reduc-10c.c
> index 20df2626764..25a21adb729 100644
> --- a/gcc/testsuite/gcc.dg/vect/slp-reduc-10c.c
> +++ b/gcc/testsuite/gcc.dg/vect/slp-reduc-10c.c
> @@ -8,8 +8,8 @@ foo (int * x, int n)
>int i;
>int sum;
>int _1;
> -  long unsigned int _2;
> -  long unsigned int _3;
> +  __SIZETYPE__ _2;
> +  __SIZETYPE__ _3;
>int * _4;
>int _5;
>__SIZETYPE__ _7;
> @@ -38,7 +38,7 @@ foo (int * x, int n)
>sum_30 = __PHI (__BB5: 0, __BB6: sum_27);
>i_32 = __PHI (__BB5: 0, __BB6: i_28);
>_1 = i_32 * 4;
> -  _2 = (long unsigned int) _1;
> +  _2 = (__SIZETYPE__) _1;
>_3 = _2 * 4ul;
>_4 = x_23(D) + _3;
>_5 = __MEM  (_4);
> diff --git a/gcc/testsuite/gcc.dg/vect/slp-reduc-10d.c 
> b/gcc/testsuite/gcc.dg/vect/s

[Committed] RISC-V: Remove redundant full available computation [NFC]

2024-01-25 Thread Juzhe-Zhong
Notice full available is computed evey round of earliest fusion which is 
redundant.
Actually we only need to compute it once in phase 3.

It's NFC patch and tested no regression. Committed.
 
gcc/ChangeLog:

* config/riscv/riscv-vsetvl.cc (pre_vsetvl::compute_vsetvl_def_data): 
Remove redundant full available computation.
(pre_vsetvl::pre_global_vsetvl_info): Ditto.

---
 gcc/config/riscv/riscv-vsetvl.cc | 57 +---
 1 file changed, 23 insertions(+), 34 deletions(-)

diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc
index 53d954e1dff..72c5a127d9e 100644
--- a/gcc/config/riscv/riscv-vsetvl.cc
+++ b/gcc/config/riscv/riscv-vsetvl.cc
@@ -1256,9 +1256,7 @@ public:
   vsetvl_info global_info;
   bb_info *bb;
 
-  bool full_available;
-
-  vsetvl_block_info () : bb (nullptr), full_available (false)
+  vsetvl_block_info () : bb (nullptr)
   {
 local_infos.safe_grow_cleared (0);
 global_info.set_empty ();
@@ -2489,34 +2487,6 @@ pre_vsetvl::compute_vsetvl_def_data ()
}
 }
 
-  for (const bb_info *bb : crtl->ssa->bbs ())
-{
-  vsetvl_block_info &block_info = get_block_info (bb);
-  if (block_info.empty_p ())
-   continue;
-  vsetvl_info &curr_info = block_info.get_entry_info ();
-  if (!curr_info.valid_p ())
-   continue;
-
-  unsigned int expr_index;
-  sbitmap_iterator sbi;
-  gcc_assert (
-   !bitmap_empty_p (m_vsetvl_def_in[curr_info.get_bb ()->index ()]));
-  bool full_available = true;
-  EXECUTE_IF_SET_IN_BITMAP (m_vsetvl_def_in[bb->index ()], 0, expr_index,
-   sbi)
-   {
- vsetvl_info &prev_info = *m_vsetvl_def_exprs[expr_index];
- if (!prev_info.valid_p ()
- || !m_dem.available_p (prev_info, curr_info))
-   {
- full_available = false;
- break;
-   }
-   }
-  block_info.full_available = full_available;
-}
-
   sbitmap_vector_free (def_loc);
   sbitmap_vector_free (m_kill);
 }
@@ -3178,11 +3148,30 @@ pre_vsetvl::pre_global_vsetvl_info ()
   for (const bb_info *bb : crtl->ssa->bbs ())
 {
   vsetvl_block_info &block_info = get_block_info (bb);
-  if (block_info.empty_p () || !block_info.full_available)
+  if (block_info.empty_p ())
+   continue;
+  vsetvl_info &curr_info = block_info.get_entry_info ();
+  if (!curr_info.valid_p ())
continue;
 
-  vsetvl_info &info = block_info.get_entry_info ();
-  info.set_delete ();
+  unsigned int expr_index;
+  sbitmap_iterator sbi;
+  gcc_assert (
+   !bitmap_empty_p (m_vsetvl_def_in[curr_info.get_bb ()->index ()]));
+  bool full_available = true;
+  EXECUTE_IF_SET_IN_BITMAP (m_vsetvl_def_in[bb->index ()], 0, expr_index,
+   sbi)
+   {
+ vsetvl_info &prev_info = *m_vsetvl_def_exprs[expr_index];
+ if (!prev_info.valid_p ()
+ || !m_dem.available_p (prev_info, curr_info))
+   {
+ full_available = false;
+ break;
+   }
+   }
+  if (full_available)
+   curr_info.set_delete ();
 }
 
   for (const bb_info *bb : crtl->ssa->bbs ())
-- 
2.36.3



  1   2   >